├── src
    ├── win32
    │   └── config.h
    ├── print.h
    ├── scandir.h
    ├── decompress.h
    ├── lang.h
    ├── log.h
    ├── ignore.h
    ├── search.h
    ├── log.c
    ├── scandir.c
    ├── options.h
    ├── util.h
    ├── lang.c
    ├── main.c
    ├── print.c
    ├── decompress.c
    ├── ignore.c
    ├── util.c
    ├── search.c
    └── options.c
├── NOTICE
├── format.sh
├── tests
    ├── setup.sh
    ├── bad_path.t
    ├── stupid_fnmatch.t.disabled
    ├── exitcodes.t
    ├── ignore_subdir.t
    ├── ignore_abs_path.t
    ├── invert_match.t
    ├── passthrough.t
    ├── case_sensitivity.t
    ├── big
    │   ├── big_file.t
    │   └── create_big_file.py
    ├── hidden_option.t
    ├── ignore_backups.t
    └── list_file_types.t
├── .clang-format
├── .travis.yml
├── .gitignore
├── Makefile.am
├── Makefile.w32
├── doc
    ├── generate_man.sh
    ├── ag.1.md
    └── ag.1
├── configure.ac
├── the_silver_searcher.spec.in
├── ag.bashcomp.sh
├── README.md
├── LICENSE
└── m4
    └── ax_pthread.m4


/src/win32/config.h:
--------------------------------------------------------------------------------
1 | #define HAVE_LZMA_H
2 | 


--------------------------------------------------------------------------------
/NOTICE:
--------------------------------------------------------------------------------
1 | The Silver Searcher
2 | Copyright 2011-2014 Geoff Greer
3 | 


--------------------------------------------------------------------------------
/format.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | CLANG_FORMAT=clang-format-3.5
4 | 
5 | $CLANG_FORMAT -i src/*.c
6 | 


--------------------------------------------------------------------------------
/tests/setup.sh:
--------------------------------------------------------------------------------
1 | # All cram tests should use this. Make sure that "ag" runs the version
2 | # of ag we just built, and make the output really simple.
3 | 
4 | alias ag="$TESTDIR/../ag --nocolor --workers=1 --parallel"
5 | 


--------------------------------------------------------------------------------
/tests/bad_path.t:
--------------------------------------------------------------------------------
 1 | Setup:
 2 | 
 3 |   $ . $TESTDIR/setup.sh
 4 | 
 5 | Complain about nonexistent path:
 6 | 
 7 |   $ ag foo doesnt_exist
 8 |   ERR: Error stat()ing: doesnt_exist
 9 |   ERR: Error opening directory doesnt_exist: No such file or directory
10 |   [1]
11 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
 1 | #BasedOnStyle: LLVM
 2 | AllowShortIfStatementsOnASingleLine: false
 3 | BreakBeforeBraces: Attach
 4 | ColumnLimit: 0
 5 | IndentWidth: 4
 6 | IndentCaseLabels: true
 7 | Language: Cpp
 8 | MaxEmptyLinesToKeep: 2
 9 | SpaceBeforeParens: ControlStatements
10 | UseTab: Never
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: c
 2 | 
 3 | branches:
 4 |   only:
 5 |     - master
 6 | 
 7 | notifications:
 8 |   email:
 9 |     - geoff@greer.fm
10 | 
11 | install:
12 |   - sudo apt-get install -y automake pkg-config libpcre3-dev zlib1g-dev liblzma-dev
13 | 
14 | script:
15 |   - ./build.sh && make test
16 | 
17 | before_script:
18 |   - sudo pip install cram
19 | 


--------------------------------------------------------------------------------
/tests/stupid_fnmatch.t.disabled:
--------------------------------------------------------------------------------
 1 | Setup:
 2 | 
 3 |   $ . $TESTDIR/setup.sh
 4 |   $ mkdir -p ./a/bomb
 5 |   $ echo 'whatever' > ./a/bomb/foo.yml
 6 |   $ echo '*b/foo.yml' > ./.gitignore
 7 | 
 8 | Ignore foo.yml but not blah.yml:
 9 | 
10 |   $ ag whatever .
11 | 
12 | Dont ignore anything (unrestricted search):
13 | 
14 |   $ ag -u whatever .
15 |   a/bomb/foo.yml:1:whatever
16 | 


--------------------------------------------------------------------------------
/tests/exitcodes.t:
--------------------------------------------------------------------------------
 1 | Setup:
 2 | 
 3 |   $ . $TESTDIR/setup.sh
 4 |   $ echo foo > ./exitcodes_test.txt
 5 |   $ echo bar >> ./exitcodes_test.txt
 6 | 
 7 | Normal matching:
 8 | 
 9 |   $ ag foo exitcodes_test.txt
10 |   1:foo
11 |   $ ag zoo exitcodes_test.txt
12 |   [1]
13 | 
14 | Inverted matching:
15 | 
16 |   $ ag -v foo exitcodes_test.txt
17 |   2:bar
18 |   $ ag -v zoo exitcodes_test.txt
19 |   1:foo
20 |   2:bar
21 |   $ ag -v "foo|bar" exitcodes_test.txt
22 |   [1]
23 | 


--------------------------------------------------------------------------------
/tests/ignore_subdir.t:
--------------------------------------------------------------------------------
 1 | Setup:
 2 | 
 3 |   $ . $TESTDIR/setup.sh
 4 |   $ mkdir -p ./a/b/c
 5 |   $ echo 'whatever1' > ./a/b/c/blah.yml
 6 |   $ echo 'whatever2' > ./a/b/foo.yml
 7 |   $ echo 'a/b/*.yml' > ./.gitignore
 8 | 
 9 | Ignore foo.yml but not blah.yml:
10 | 
11 |   $ ag whatever .
12 |   a/b/c/blah.yml:1:whatever1
13 | 
14 | Dont ignore anything (unrestricted search):
15 | 
16 |   $ ag -u whatever . | sort
17 |   a/b/c/blah.yml:1:whatever1
18 |   a/b/foo.yml:1:whatever2
19 | 


--------------------------------------------------------------------------------
/tests/ignore_abs_path.t:
--------------------------------------------------------------------------------
 1 | Setup:
 2 | 
 3 |   $ . $TESTDIR/setup.sh
 4 |   $ mkdir -p ./a/b/c
 5 |   $ echo 'whatever1' > ./a/b/c/blah.yml
 6 |   $ echo 'whatever2' > ./a/b/foo.yml
 7 |   $ echo '/a/b/foo.yml' > ./.gitignore
 8 | 
 9 | Ignore foo.yml but not blah.yml:
10 | 
11 |   $ ag whatever .
12 |   a/b/c/blah.yml:1:whatever1
13 | 
14 | Dont ignore anything (unrestricted search):
15 | 
16 |   $ ag -u whatever . | sort
17 |   a/b/c/blah.yml:1:whatever1
18 |   a/b/foo.yml:1:whatever2
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.dSYM
 2 | *.o
 3 | *.plist
 4 | .deps
 5 | .dirstamp
 6 | .DS_Store
 7 | aclocal.m4
 8 | ag
 9 | autom4te.cache
10 | cachegrind.out.*
11 | callgrind.out.*
12 | clang_output_*
13 | compile
14 | config.guess
15 | config.log
16 | config.status
17 | config.sub
18 | configure
19 | depcomp
20 | gmon.out
21 | install-sh
22 | Makefile
23 | Makefile.in
24 | missing
25 | src/config.h*
26 | stamp-h1
27 | tests/*.err
28 | tests/big/*.err
29 | tests/big/big_file.txt
30 | the_silver_searcher.spec


--------------------------------------------------------------------------------
/src/print.h:
--------------------------------------------------------------------------------
 1 | #ifndef PRINT_H
 2 | #define PRINT_H
 3 | 
 4 | #include "util.h"
 5 | 
 6 | void print_path(const char *path, const char sep);
 7 | void print_binary_file_matches(const char *path);
 8 | void print_file_matches(const char *path, const char *buf, const size_t buf_len, const match_t matches[], const size_t matches_len);
 9 | void print_line_number(size_t line, const char sep);
10 | void print_file_separator(void);
11 | const char *normalize_path(const char *path);
12 | 
13 | #endif
14 | 


--------------------------------------------------------------------------------
/src/scandir.h:
--------------------------------------------------------------------------------
 1 | #ifndef SCANDIR_H
 2 | #define SCANDIR_H
 3 | 
 4 | #include "ignore.h"
 5 | 
 6 | typedef struct {
 7 |     const ignores *ig;
 8 |     const char *base_path;
 9 |     size_t base_path_len;
10 | } scandir_baton_t;
11 | 
12 | typedef int (*filter_fp)(const char *path, const struct dirent *, void *);
13 | 
14 | int ag_scandir(const char *dirname,
15 |                struct dirent ***namelist,
16 |                filter_fp filter,
17 |                void *baton);
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/tests/invert_match.t:
--------------------------------------------------------------------------------
 1 | Setup:
 2 | 
 3 |   $ . $TESTDIR/setup.sh
 4 |   $ echo 'valid: 1' > ./blah.txt
 5 |   $ echo 'some_string' >> ./blah.txt
 6 |   $ echo 'valid: 654' >> ./blah.txt
 7 |   $ echo 'some_other_string' >> ./blah.txt
 8 |   $ echo 'valid: 0' >> ./blah.txt
 9 |   $ echo 'valid: 23' >> ./blah.txt
10 |   $ echo 'valid: 0' >> ./blah.txt
11 | 
12 | Search for lines not matching "valid: 0" in blah.txt:
13 | 
14 |   $ ag -v 'valid: '
15 |   blah.txt:2:some_string
16 |   blah.txt:4:some_other_string
17 | 


--------------------------------------------------------------------------------
/src/decompress.h:
--------------------------------------------------------------------------------
 1 | #ifndef DECOMPRESS_H
 2 | #define DECOMPRESS_H
 3 | 
 4 | #include "config.h"
 5 | #include "log.h"
 6 | #include "options.h"
 7 | 
 8 | typedef enum {
 9 |     AG_NO_COMPRESSION,
10 |     AG_GZIP,
11 |     AG_COMPRESS,
12 |     AG_ZIP,
13 |     AG_XZ,
14 | } ag_compression_type;
15 | 
16 | ag_compression_type is_zipped(const void *buf, const int buf_len);
17 | 
18 | void *decompress(const ag_compression_type zip_type, const void *buf, const int buf_len, const char *dir_full_path, int *new_buf_len);
19 | #endif
20 | 


--------------------------------------------------------------------------------
/src/lang.h:
--------------------------------------------------------------------------------
 1 | #ifndef LANG_H
 2 | #define LANG_H
 3 | 
 4 | #define MAX_EXTENSIONS 12
 5 | #define LANG_COUNT 66
 6 | 
 7 | typedef struct {
 8 |     const char *name;
 9 |     const char *extensions[MAX_EXTENSIONS];
10 | } lang_spec_t;
11 | 
12 | extern lang_spec_t langs[];
13 | 
14 | /**
15 | Convert a NULL-terminated array of language extensions
16 | into a regular expression of the form \.(extension1|extension2...)$
17 | 
18 | Caller is responsible for freeing the returned string.
19 | */
20 | char *make_lang_regex(const char **extensions);
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/src/log.h:
--------------------------------------------------------------------------------
 1 | #ifndef LOG_H
 2 | #define LOG_H
 3 | 
 4 | #include <stdarg.h>
 5 | 
 6 | enum log_level {
 7 |     LOG_LEVEL_DEBUG = 10,
 8 |     LOG_LEVEL_MSG = 20,
 9 |     LOG_LEVEL_WARN = 30,
10 |     LOG_LEVEL_ERR = 40,
11 |     LOG_LEVEL_NONE = 100
12 | };
13 | 
14 | void set_log_level(enum log_level threshold);
15 | 
16 | void log_debug(const char *fmt, ...);
17 | void log_msg(const char *fmt, ...);
18 | void log_warn(const char *fmt, ...);
19 | void log_err(const char *fmt, ...);
20 | 
21 | void vplog(const unsigned int level, const char *fmt, va_list args);
22 | void plog(const unsigned int level, const char *fmt, ...);
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/tests/passthrough.t:
--------------------------------------------------------------------------------
 1 | Setup:
 2 | 
 3 |   $ . $TESTDIR/setup.sh
 4 |   $ unalias ag
 5 |   $ alias ag="$TESTDIR/../ag --nocolor --workers=1"
 6 |   $ echo "foo bar" > passthrough_test.txt
 7 |   $ echo "zoo zar" >> passthrough_test.txt
 8 |   $ echo "foo test" >> passthrough_test.txt
 9 | 
10 | No impact on non-stream:
11 | 
12 |   $ ag --passthrough zoo passthrough_test.txt
13 |   zoo zar
14 | 
15 | Match stream with --passthrough:
16 | 
17 |   $ cat passthrough_test.txt | ag --passthrough foo
18 |   foo bar
19 |   zoo zar
20 |   foo test
21 | 
22 | Match stream without --passthrough:
23 | 
24 |   $ cat passthrough_test.txt | ag foo
25 |   foo bar
26 |   foo test
27 | 


--------------------------------------------------------------------------------
/tests/case_sensitivity.t:
--------------------------------------------------------------------------------
 1 | Setup:
 2 | 
 3 |   $ . $TESTDIR/setup.sh
 4 |   $ echo Foo >> ./sample
 5 |   $ echo bar >> ./sample
 6 | 
 7 | Smart case by default:
 8 | 
 9 |   $ ag foo sample
10 |   1:Foo
11 |   $ ag FOO sample
12 |   [1]
13 |   $ ag 'f.o' sample
14 |   1:Foo
15 |   $ ag Foo sample
16 |   1:Foo
17 |   $ ag 'F.o' sample
18 |   1:Foo
19 | 
20 | Case sensitive mode:
21 | 
22 |   $ ag -s foo sample
23 |   [1]
24 |   $ ag -s FOO sample
25 |   [1]
26 |   $ ag -s 'f.o' sample
27 |   [1]
28 |   $ ag -s Foo sample
29 |   1:Foo
30 |   $ ag -s 'F.o' sample
31 |   1:Foo
32 | Case insensitive mode:
33 | 
34 |   $ ag foo -i sample
35 |   1:Foo
36 |   $ ag foo --ignore-case sample
37 |   1:Foo
38 |   $ ag 'f.o' -i sample
39 |   1:Foo
40 | 


--------------------------------------------------------------------------------
/Makefile.am:
--------------------------------------------------------------------------------
 1 | ACLOCAL_AMFLAGS = ${ACLOCAL_FLAGS}
 2 | 
 3 | bin_PROGRAMS = ag
 4 | ag_SOURCES = src/ignore.c src/ignore.h src/log.c src/log.h src/options.c src/options.h src/print.c src/print.h src/scandir.c src/scandir.h src/search.c src/search.h src/lang.c src/lang.h src/util.c src/util.h src/decompress.c src/decompress.h src/uthash.h src/main.c
 5 | ag_LDADD = ${PCRE_LIBS} ${LZMA_LIBS} ${ZLIB_LIBS} $(PTHREAD_LIBS)
 6 | 
 7 | dist_man_MANS = doc/ag.1
 8 | 
 9 | bashcompdir = $(pkgdatadir)/completions
10 | dist_bashcomp_DATA = ag.bashcomp.sh
11 | 
12 | EXTRA_DIST = Makefile.w32 LICENSE NOTICE the_silver_searcher.spec README.md
13 | 
14 | test:
15 | 	cram -v tests/*.t
16 | 
17 | test_big:
18 | 	cram -v tests/big/*.t
19 | 
20 | .PHONY : all test clean
21 | 


--------------------------------------------------------------------------------
/tests/big/big_file.t:
--------------------------------------------------------------------------------
 1 | Setup and create really big file:
 2 | 
 3 |   $ . $TESTDIR/../setup.sh
 4 |   $ python3 $TESTDIR/create_big_file.py $TESTDIR/big_file.txt
 5 | 
 6 | Search a big file:
 7 | 
 8 |   $ $TESTDIR/../../ag --nocolor --workers=1 --parallel hello $TESTDIR/big_file.txt
 9 |   33554432:hello1073741824
10 |   67108864:hello2147483648
11 |   100663296:hello3221225472
12 |   134217728:hello4294967296
13 |   167772160:hello5368709120
14 |   201326592:hello6442450944
15 |   234881024:hello7516192768
16 |   268435456:hello
17 | 
18 | Fail to regex search a big file:
19 |   $ $TESTDIR/../../ag --nocolor --workers=1 --parallel 'hello.*' $TESTDIR/big_file.txt
20 |   ERR: Skipping */big_file.txt: pcre_exec() can't handle files larger than 2147483647 bytes. (glob)
21 |   [1]
22 | 


--------------------------------------------------------------------------------
/tests/big/create_big_file.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Create an 8GB file of mostly "abcdefghijklmnopqrstuvwxyz01234",
 4 | # with a few instances of "hello"
 5 | 
 6 | import sys
 7 | 
 8 | if len(sys.argv) != 2:
 9 |     print("Usage: %s big_file.txt" % sys.argv[0])
10 |     sys.exit(1)
11 | 
12 | big_file = sys.argv[1]
13 | 
14 | 
15 | def create_big_file():
16 |     with open(big_file, "w") as fd:
17 |         for i in range(1, 2**28):
18 |             byte = i * 32
19 |             if byte % 2**30 == 0:
20 |                 fd.write("hello%s\n" % byte)
21 |             else:
22 |                 fd.write("abcdefghijklmnopqrstuvwxyz01234\n")
23 |         fd.write("hello\n")
24 | 
25 | 
26 | try:
27 |     fd = open(big_file, "r")
28 | except Exception as e:
29 |     create_big_file()
30 | 


--------------------------------------------------------------------------------
/Makefile.w32:
--------------------------------------------------------------------------------
 1 | VERSION=$(shell grep -Po "(?<=\[)([0-9.]+.[0-9]+.[0-9]+)(?=\])" configure.ac)
 2 | 
 3 | CC=gcc
 4 | 
 5 | SRCS = \
 6 | 	src/decompress.c \
 7 | 	src/ignore.c \
 8 | 	src/lang.c \
 9 | 	src/log.c \
10 | 	src/main.c \
11 | 	src/options.c \
12 | 	src/print.c \
13 | 	src/scandir.c \
14 | 	src/search.c \
15 | 	src/util.c
16 | OBJS = $(subst .c,.o,$(SRCS))
17 | 
18 | CFLAGS = -O2 -Isrc/win32 -DPACKAGE_VERSION=\"$(VERSION)\" -DHAVE_PTHREAD_H
19 | LIBS = -lz -lpthread -lpcre -llzma -lshlwapi
20 | CFLAGS := -Ic:/appl/mingw/local/include $(CFLAGS)
21 | LIBS := -Lc:/appl/mingw/local/lib $(LIBS)
22 | TARGET = ag.exe
23 | 
24 | all : $(TARGET)
25 | 
26 | $(TARGET) : $(OBJS)
27 | 	$(CC) -o $@ $(OBJS) $(LIBS)
28 | 
29 | .c.o :
30 | 	$(CC) -c $(CFLAGS) -Isrc $< -o $@
31 | 
32 | clean :
33 | 	rm -f src/*.o $(TARGET)
34 | 


--------------------------------------------------------------------------------
/tests/hidden_option.t:
--------------------------------------------------------------------------------
 1 | Setup:
 2 | 
 3 |   $ . $TESTDIR/setup.sh
 4 |   $ mkdir hidden_bug
 5 |   $ cd hidden_bug
 6 |   $ echo "test" > a.txt
 7 |   $ git init --quiet
 8 |   $ if [ ! -d .git/info ] ; then mkdir .git/info ; fi
 9 |   $ echo "a.txt" > .git/info/exclude
10 | 
11 |   $ ag --ignore-dir .git test
12 |   [1]
13 | 
14 |   $ ag --hidden --ignore-dir .git test
15 |   [1]
16 | 
17 |   $ ag -U --ignore-dir .git test
18 |   a.txt:1:test
19 | 
20 |   $ ag --hidden -U --ignore-dir .git test
21 |   a.txt:1:test
22 | 
23 |   $ mkdir -p ./.hidden
24 |   $ echo 'whatever' > ./.hidden/a.txt
25 | 
26 |   $ ag whatever
27 |   [1]
28 | 
29 |   $ ag --hidden whatever
30 |   [1]
31 | 
32 |   $ echo "" > .git/info/exclude
33 | 
34 |   $ ag whatever
35 |   [1]
36 | 
37 |   $ ag --hidden whatever
38 |   .hidden/a.txt:1:whatever
39 | 


--------------------------------------------------------------------------------
/src/ignore.h:
--------------------------------------------------------------------------------
 1 | #ifndef IGNORE_H
 2 | #define IGNORE_H
 3 | 
 4 | #include <dirent.h>
 5 | #include <sys/types.h>
 6 | 
 7 | #define SVN_DIR_PROP_BASE "dir-prop-base"
 8 | #define SVN_DIR ".svn"
 9 | #define SVN_PROP_IGNORE "svn:ignore"
10 | 
11 | struct ignores {
12 |     char **names; /* Non-regex ignore lines. Sorted so we can binary search them. */
13 |     size_t names_len;
14 |     char **regexes; /* For patterns that need fnmatch */
15 |     size_t regexes_len;
16 |     struct ignores *parent;
17 | };
18 | typedef struct ignores ignores;
19 | 
20 | ignores *root_ignores;
21 | 
22 | extern const char *evil_hardcoded_ignore_files[];
23 | extern const char *ignore_pattern_files[];
24 | 
25 | ignores *init_ignore(ignores *parent);
26 | void cleanup_ignore(ignores *ig);
27 | 
28 | void add_ignore_pattern(ignores *ig, const char *pattern);
29 | 
30 | void load_ignore_patterns(ignores *ig, const char *path);
31 | void load_svn_ignore_patterns(ignores *ig, const char *path);
32 | 
33 | int filename_filter(const char *path, const struct dirent *dir, void *baton);
34 | 
35 | #endif
36 | 


--------------------------------------------------------------------------------
/doc/generate_man.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | # ronn is used to turn the markdown into a manpage.
 4 | # Get ronn at https://github.com/rtomayko/ronn
 5 | 
 6 | awk '
 7 | BEGIN{
 8 |   in_options_block = 0;
 9 |   first_item_in_list_of_options = 1;
10 | }
11 | 
12 | {
13 |   if ($0 == "## OPTIONS") {
14 |     in_options_block = 1;
15 |   }
16 | 
17 |   if (in_options_block == 1) { # in options block
18 |     first_4_chars = substr($0, 0, 4);
19 | 
20 |     if (first_4_chars == "  * ") { # this line contains the option name
21 | 
22 |       # print only 1 new line for cases like the following
23 |       # * `--[no]group`
24 |       # * `-g PATTERN`:
25 |       if (first_item_in_list_of_options == 1) {
26 |         print "";
27 |         first_item_in_list_of_options = 0;
28 |       }
29 | 
30 |       # end the line with 2 spaces, so a literal <br> is inserted!
31 |       # more info at http://daringfireball.net/projects/markdown/syntax.php#p
32 |       printf("%s  \n", substr($0, 5));
33 | 
34 |     } else if (first_4_chars == "    ") { # we are in a description line
35 |       printf("&nbsp;&nbsp;&nbsp;&nbsp;  %s\n", substr($0, 5));
36 |       first_item_in_list_of_options = 1;
37 |     } else if (first_4_chars == "## F") { # reached the end of #OPTIONS part
38 |       in_options_block = 0;
39 |       print $0;
40 |     } else {
41 |       print $0;
42 |     }
43 |   } else { # outside options block
44 |     print $0;
45 |   }
46 | }' <ag.1.md >ag.1.md.tmp
47 | 
48 | ronn -r ag.1.md.tmp
49 | 
50 | rm -f ag.1.md.tmp
51 | 


--------------------------------------------------------------------------------
/tests/ignore_backups.t:
--------------------------------------------------------------------------------
 1 | Setup:
 2 | 
 3 |   $ . $TESTDIR/setup.sh
 4 |   $ mkdir -p ./a/b/c
 5 |   $ echo 'whatever1'  > ./a/b/c/foo.yml
 6 |   $ echo 'whatever2'  > ./a/b/c/foo.yml~
 7 |   $ echo 'whatever3'  > ./a/b/c/.foo.yml.swp
 8 |   $ echo 'whatever4'  > ./a/b/c/.foo.yml.swo
 9 |   $ echo 'whatever5'  > ./a/b/foo.yml
10 |   $ echo 'whatever6'  > ./a/b/foo.yml~
11 |   $ echo 'whatever7'  > ./a/b/.foo.yml.swp
12 |   $ echo 'whatever8'  > ./a/b/.foo.yml.swo
13 |   $ echo 'whatever9'  > ./a/foo.yml
14 |   $ echo 'whatever10' > ./a/foo.yml~
15 |   $ echo 'whatever11' > ./a/.foo.yml.swp
16 |   $ echo 'whatever12' > ./a/.foo.yml.swo
17 |   $ echo 'whatever13' > ./foo.yml
18 |   $ echo 'whatever14' > ./foo.yml~
19 |   $ echo 'whatever15' > ./.foo.yml.swp
20 |   $ echo 'whatever16' > ./.foo.yml.swo
21 |   $ echo '*~\n*.sw[po]' > ./.gitignore
22 | 
23 | Ignore all files except foo.yml
24 | 
25 |   $ ag whatever . | sort
26 |   a/b/c/foo.yml:1:whatever1
27 |   a/b/foo.yml:1:whatever5
28 |   a/foo.yml:1:whatever9
29 |   foo.yml:1:whatever13
30 | 
31 | Dont ignore anything (unrestricted search):
32 | 
33 |   $ ag -u whatever . | sort
34 |   .foo.yml.swo:1:whatever16
35 |   .foo.yml.swp:1:whatever15
36 |   a/.foo.yml.swo:1:whatever12
37 |   a/.foo.yml.swp:1:whatever11
38 |   a/b/.foo.yml.swo:1:whatever8
39 |   a/b/.foo.yml.swp:1:whatever7
40 |   a/b/c/.foo.yml.swo:1:whatever4
41 |   a/b/c/.foo.yml.swp:1:whatever3
42 |   a/b/c/foo.yml:1:whatever1
43 |   a/b/c/foo.yml~:1:whatever2
44 |   a/b/foo.yml:1:whatever5
45 |   a/b/foo.yml~:1:whatever6
46 |   a/foo.yml:1:whatever9
47 |   a/foo.yml~:1:whatever10
48 |   foo.yml:1:whatever13
49 |   foo.yml~:1:whatever14
50 | 


--------------------------------------------------------------------------------
/src/search.h:
--------------------------------------------------------------------------------
 1 | #ifndef SEARCH_H
 2 | #define SEARCH_H
 3 | 
 4 | #include <dirent.h>
 5 | #include <errno.h>
 6 | #include <fcntl.h>
 7 | #include <limits.h>
 8 | #include <pcre.h>
 9 | #include <stdio.h>
10 | #include <stdlib.h>
11 | #include <string.h>
12 | #ifdef _WIN32
13 | #include <windows.h>
14 | #else
15 | #include <sys/mman.h>
16 | #endif
17 | #include <sys/stat.h>
18 | #include <unistd.h>
19 | 
20 | #include "config.h"
21 | 
22 | #ifdef HAVE_PTHREAD_H
23 | #include <pthread.h>
24 | #endif
25 | 
26 | #include "ignore.h"
27 | #include "log.h"
28 | #include "options.h"
29 | #include "print.h"
30 | #include "util.h"
31 | #include "uthash.h"
32 | 
33 | size_t alpha_skip_lookup[256];
34 | size_t *find_skip_lookup;
35 | 
36 | struct work_queue_t {
37 |     char *path;
38 |     struct work_queue_t *next;
39 | };
40 | typedef struct work_queue_t work_queue_t;
41 | 
42 | work_queue_t *work_queue;
43 | work_queue_t *work_queue_tail;
44 | int done_adding_files;
45 | pthread_cond_t files_ready;
46 | pthread_mutex_t print_mtx;
47 | pthread_mutex_t stats_mtx;
48 | pthread_mutex_t work_queue_mtx;
49 | 
50 | 
51 | /* For symlink loop detection */
52 | #define SYMLOOP_ERROR (-1)
53 | #define SYMLOOP_OK (0)
54 | #define SYMLOOP_LOOP (1)
55 | 
56 | typedef struct {
57 |     dev_t dev;
58 |     ino_t ino;
59 | } dirkey_t;
60 | 
61 | typedef struct {
62 |     dirkey_t key;
63 |     UT_hash_handle hh;
64 | } symdir_t;
65 | 
66 | symdir_t *symhash;
67 | 
68 | void search_buf(const char *buf, const size_t buf_len,
69 |                 const char *dir_full_path);
70 | void search_stream(FILE *stream, const char *path);
71 | void search_file(const char *file_full_path);
72 | 
73 | void *search_file_worker(void *i);
74 | 
75 | void search_dir(ignores *ig, const char *base_path, const char *path, const int depth);
76 | 
77 | #endif
78 | 


--------------------------------------------------------------------------------
/src/log.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdarg.h>
 3 | 
 4 | #include "log.h"
 5 | #include "util.h"
 6 | 
 7 | static enum log_level log_threshold = LOG_LEVEL_ERR;
 8 | 
 9 | void set_log_level(enum log_level threshold) {
10 |     log_threshold = threshold;
11 | }
12 | 
13 | void log_debug(const char *fmt, ...) {
14 |     va_list args;
15 |     va_start(args, fmt);
16 |     vplog(LOG_LEVEL_DEBUG, fmt, args);
17 |     va_end(args);
18 | }
19 | 
20 | void log_msg(const char *fmt, ...) {
21 |     va_list args;
22 |     va_start(args, fmt);
23 |     vplog(LOG_LEVEL_MSG, fmt, args);
24 |     va_end(args);
25 | }
26 | 
27 | void log_warn(const char *fmt, ...) {
28 |     va_list args;
29 |     va_start(args, fmt);
30 |     vplog(LOG_LEVEL_WARN, fmt, args);
31 |     va_end(args);
32 | }
33 | 
34 | void log_err(const char *fmt, ...) {
35 |     va_list args;
36 |     va_start(args, fmt);
37 |     vplog(LOG_LEVEL_ERR, fmt, args);
38 |     va_end(args);
39 | }
40 | 
41 | void vplog(const unsigned int level, const char *fmt, va_list args) {
42 |     if (level < log_threshold) {
43 |         return;
44 |     }
45 | 
46 |     FILE *stream = out_fd;
47 | 
48 |     switch (level) {
49 |         case LOG_LEVEL_DEBUG:
50 |             fprintf(stream, "DEBUG: ");
51 |             break;
52 |         case LOG_LEVEL_MSG:
53 |             fprintf(stream, "MSG: ");
54 |             break;
55 |         case LOG_LEVEL_WARN:
56 |             fprintf(stream, "WARN: ");
57 |             break;
58 |         case LOG_LEVEL_ERR:
59 |             stream = stderr;
60 |             fprintf(stream, "ERR: ");
61 |             break;
62 |     }
63 | 
64 |     vfprintf(stream, fmt, args);
65 |     fprintf(stream, "\n");
66 | }
67 | 
68 | void plog(const unsigned int level, const char *fmt, ...) {
69 |     va_list args;
70 |     va_start(args, fmt);
71 |     vplog(level, fmt, args);
72 |     va_end(args);
73 | }
74 | 


--------------------------------------------------------------------------------
/configure.ac:
--------------------------------------------------------------------------------
 1 | AC_INIT(
 2 |     [the_silver_searcher],
 3 |     [0.24.1],
 4 |     [https://github.com/ggreer/the_silver_searcher/issues],
 5 |     [the_silver_searcher],
 6 |     [https://github.com/ggreer/the_silver_searcher])
 7 | 
 8 | AM_INIT_AUTOMAKE([no-define foreign subdir-objects])
 9 | 
10 | AC_PROG_CC
11 | AM_PROG_CC_C_O
12 | AC_PREREQ([2.59])
13 | 
14 | m4_ifdef(
15 |     [AM_SILENT_RULES],
16 |     [AM_SILENT_RULES([yes])])
17 | 
18 | PKG_CHECK_MODULES([PCRE], [libpcre])
19 | 
20 | m4_include([m4/ax_pthread.m4])
21 | AX_PTHREAD([
22 |     AC_CHECK_HEADERS([pthread.h])
23 | ])
24 | 
25 | # Run CFLAGS="-pg" ./configure if you want debug symbols
26 | CFLAGS="$CFLAGS $PTHREAD_CFLAGS $PCRE_CFLAGS -Wall -Wextra -Wformat=2 -Wno-format-nonliteral -Wshadow -Wpointer-arith -Wcast-qual -Wmissing-prototypes -Wno-missing-braces -std=gnu89 -D_GNU_SOURCE -O2"
27 | LDFLAGS="$LDFLAGS"
28 | 
29 | LIBS="$PTHREAD_LIBS $LIBS"
30 | 
31 | AC_ARG_ENABLE([zlib],
32 |     AS_HELP_STRING([--disable-zlib], [Disable zlib compressed search support]))
33 | 
34 | AS_IF([test "x$enable_zlib" != "xno"], [
35 |     AC_CHECK_HEADERS([zlib.h])
36 |     AC_SEARCH_LIBS([inflate], [zlib, z])
37 | ])
38 | 
39 | AC_ARG_ENABLE([lzma],
40 |     AS_HELP_STRING([--disable-lzma], [Disable lzma compressed search support]))
41 | 
42 | AS_IF([test "x$enable_lzma" != "xno"], [
43 |     AC_CHECK_HEADERS([lzma.h])
44 |     PKG_CHECK_MODULES([LZMA], [liblzma])
45 | ])
46 | 
47 | AC_CHECK_DECL([PCRE_CONFIG_JIT], [AC_DEFINE([USE_PCRE_JIT], [], [Use PCRE JIT])], [], [#include <pcre.h>])
48 | 
49 | AC_CHECK_MEMBER([struct dirent.d_type], [AC_DEFINE([HAVE_DIRENT_DTYPE], [], [Have dirent struct member d_type])], [], [[#include <dirent.h>]])
50 | 
51 | AC_CHECK_FUNCS(fgetln getline realpath strlcpy strndup vasprintf madvise posix_fadvise)
52 | 
53 | AC_CONFIG_FILES([Makefile the_silver_searcher.spec])
54 | AC_CONFIG_HEADERS([src/config.h])
55 | 
56 | AC_OUTPUT
57 | 


--------------------------------------------------------------------------------
/src/scandir.c:
--------------------------------------------------------------------------------
 1 | #include <dirent.h>
 2 | #include <stdlib.h>
 3 | 
 4 | #include "scandir.h"
 5 | #include "util.h"
 6 | 
 7 | int ag_scandir(const char *dirname,
 8 |                struct dirent ***namelist,
 9 |                filter_fp filter,
10 |                void *baton) {
11 |     DIR *dirp = NULL;
12 |     struct dirent **names = NULL;
13 |     struct dirent *entry, *d;
14 |     int names_len = 32;
15 |     int results_len = 0;
16 | 
17 |     dirp = opendir(dirname);
18 |     if (dirp == NULL) {
19 |         goto fail;
20 |     }
21 | 
22 |     names = malloc(sizeof(struct dirent *) * names_len);
23 |     if (names == NULL) {
24 |         goto fail;
25 |     }
26 | 
27 |     while ((entry = readdir(dirp)) != NULL) {
28 |         if ((*filter)(dirname, entry, baton) == FALSE) {
29 |             continue;
30 |         }
31 |         if (results_len >= names_len) {
32 |             struct dirent **tmp_names = names;
33 |             names_len *= 2;
34 |             names = realloc(names, sizeof(struct dirent *) * names_len);
35 |             if (names == NULL) {
36 |                 free(tmp_names);
37 |                 goto fail;
38 |             }
39 |         }
40 | 
41 | #if defined(__MINGW32__) || defined(__CYGWIN__)
42 |         d = malloc(sizeof(struct dirent));
43 | #else
44 |         d = malloc(entry->d_reclen);
45 | #endif
46 | 
47 |         if (d == NULL) {
48 |             goto fail;
49 |         }
50 | #if defined(__MINGW32__) || defined(__CYGWIN__)
51 |         memcpy(d, entry, sizeof(struct dirent));
52 | #else
53 |         memcpy(d, entry, entry->d_reclen);
54 | #endif
55 | 
56 |         names[results_len] = d;
57 |         results_len++;
58 |     }
59 | 
60 |     closedir(dirp);
61 |     *namelist = names;
62 |     return results_len;
63 | 
64 | fail:
65 |     if (dirp) {
66 |         closedir(dirp);
67 |     }
68 | 
69 |     if (names != NULL) {
70 |         int i;
71 |         for (i = 0; i < results_len; i++) {
72 |             free(names[i]);
73 |         }
74 |         free(names);
75 |     }
76 |     return -1;
77 | }
78 | 


--------------------------------------------------------------------------------
/the_silver_searcher.spec.in:
--------------------------------------------------------------------------------
 1 | %define _bashcompdir %_sysconfdir/bash_completion.d
 2 | 
 3 | 
 4 | Name:		the_silver_searcher
 5 | Version:	@VERSION@
 6 | Release:	1%{?dist}
 7 | Summary:	A code-searching tool similar to ack, but faster
 8 | 
 9 | Group:		Applications/Utilities
10 | License:	Apache v2.0
11 | URL:		https://github.com/ggreer/%{name}
12 | Source0:	https://github.com/downloads/ggreer/%{name}/%{name}-%{version}.tar.gz
13 | BuildRoot:	%(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
14 | 
15 | BuildRequires:	pcre-devel, xz-devel, zlib-devel
16 | Requires:	pcre, xz, zlib
17 | 
18 | %description
19 | The Silver Searcher
20 | An attempt to make something better than ack (which itself is better than grep).
21 | 
22 | Why use Ag?
23 | * It searches code about 3–5× faster than ack.
24 | * It ignores file patterns from your .gitignore and .hgignore.
25 | * If there are files in your source repo you don't want to search, just add their patterns to a .agignore file. *cough* extern *cough*
26 | * The command name is 33% shorter than ack!
27 | 
28 | How is it so fast?
29 | * Searching for literals (no regex) uses Boyer-Moore-Horspool strstr.
30 | * Files are mmap()ed instead of read into a buffer.
31 | * If you're building with PCRE 8.21 or greater, regex searches use the JIT compiler.
32 | * Ag calls pcre_study() before executing the regex on a jillion files.
33 | * Instead of calling fnmatch() on every pattern in your ignore files, non-regex patterns are loaded into an array and binary searched.
34 | * Ag uses Pthreads to take advantage of multiple CPU cores and search files in parallel.
35 | 
36 | %prep
37 | %setup -q
38 | 
39 | 
40 | %build
41 | aclocal
42 | autoconf
43 | autoheader
44 | automake --add-missing
45 | %configure 
46 | make %{?_smp_mflags}
47 | 
48 | 
49 | %install
50 | rm -rf ${RPM_BUILD_ROOT}
51 | make install DESTDIR=${RPM_BUILD_ROOT}
52 | mkdir -p ${RPM_BUILD_ROOT}%{_bashcompdir}
53 | install -m 644 ag.bashcomp.sh ${RPM_BUILD_ROOT}%{_bashcompdir}
54 | 
55 | %clean
56 | rm -rf ${RPM_BUILD_ROOT}
57 | 
58 | 
59 | %files
60 | %defattr(-,root,root,-)
61 | %{_bindir}/*
62 | %{_mandir}/*
63 | %config %{_bashcompdir}/ag.bashcomp.sh
64 | %config %{_datadir}/%{name}/completions/ag.bashcomp.sh
65 | 
66 | 
67 | %changelog
68 | * Thu Dec 5 2013 Emily Strickland <code@emily.st> - 0.18.1-1
69 | - More accurate build and install requirements
70 | 
71 | * Fri Aug 16 2013 Andrew Seidl <git@aas.io> - 0.15.0-1
72 | - Install bash completion file
73 | 
74 | * Wed Dec 05 2012 Daniel Nelson <packetcollision@gmail.com> - 0.13.1-1
75 | - Initial Build
76 | 


--------------------------------------------------------------------------------
/src/options.h:
--------------------------------------------------------------------------------
 1 | #ifndef OPTIONS_H
 2 | #define OPTIONS_H
 3 | 
 4 | #include <getopt.h>
 5 | #include <sys/stat.h>
 6 | 
 7 | #include <pcre.h>
 8 | 
 9 | #define DEFAULT_AFTER_LEN 2
10 | #define DEFAULT_BEFORE_LEN 2
11 | #define DEFAULT_CONTEXT_LEN 2
12 | 
13 | enum case_behavior {
14 |     CASE_SENSITIVE,
15 |     CASE_INSENSITIVE,
16 |     CASE_SMART,
17 |     CASE_SENSITIVE_RETRY_INSENSITIVE /* for future use */
18 | };
19 | 
20 | enum path_print_behavior {
21 |     PATH_PRINT_DEFAULT, /* PRINT_TOP if > 1 file being searched, else PRINT_NOTHING */
22 |     PATH_PRINT_DEFAULT_EACH_LINE, /* PRINT_EACH_LINE if > 1 file being searched, else PRINT_NOTHING */
23 |     PATH_PRINT_TOP,
24 |     PATH_PRINT_EACH_LINE,
25 |     PATH_PRINT_NOTHING
26 | };
27 | 
28 | typedef struct {
29 |     int ackmate;
30 |     pcre *ackmate_dir_filter;
31 |     pcre_extra *ackmate_dir_filter_extra;
32 |     size_t after;
33 |     size_t before;
34 |     enum case_behavior casing;
35 |     const char *file_search_string;
36 |     int match_files;
37 |     pcre *file_search_regex;
38 |     pcre_extra *file_search_regex_extra;
39 |     int color;
40 |     char *color_line_number;
41 |     char *color_match;
42 |     char *color_path;
43 |     int column;
44 |     int context;
45 |     int follow_symlinks;
46 |     int invert_match;
47 |     int literal;
48 |     int literal_starts_wordchar;
49 |     int literal_ends_wordchar;
50 |     int max_matches_per_file;
51 |     int max_search_depth;
52 |     int null_follows_filename;
53 |     char *path_to_agignore;
54 |     int print_break;
55 |     int print_filename_only;
56 |     int print_path;
57 |     int print_line_numbers;
58 |     int print_long_lines; /* TODO: support this in print.c */
59 |     int passthrough;
60 |     pcre *re;
61 |     pcre_extra *re_extra;
62 |     int recurse_dirs;
63 |     int search_all_files;
64 |     int skip_vcs_ignores;
65 |     int search_binary_files;
66 |     int search_zip_files;
67 |     int search_hidden_files;
68 |     int search_stream; /* true if tail -F blah | ag */
69 |     int stats;
70 |     size_t stream_line_num; /* This should totally not be in here */
71 |     int match_found; /* This should totally not be in here */
72 |     ino_t stdout_inode;
73 |     char *query;
74 |     int query_len;
75 |     char *pager;
76 |     int paths_len;
77 |     int parallel;
78 |     int word_regexp;
79 |     int workers;
80 | } cli_options;
81 | 
82 | /* global options. parse_options gives it sane values, everything else reads from it */
83 | cli_options opts;
84 | 
85 | typedef struct option option_t;
86 | 
87 | void usage(void);
88 | void print_version(void);
89 | 
90 | void init_options(void);
91 | void parse_options(int argc, char **argv, char **base_paths[], char **paths[]);
92 | void cleanup_options(void);
93 | 
94 | #endif
95 | 


--------------------------------------------------------------------------------
/ag.bashcomp.sh:
--------------------------------------------------------------------------------
  1 | have ag &&
  2 | _ag() {
  3 |   local lngopt shtopt split=false
  4 |   local cur prev
  5 | 
  6 |   COMPREPLY=()
  7 |   cur=$(_get_cword "=")
  8 |   prev="${COMP_WORDS[COMP_CWORD-1]}"
  9 | 
 10 |   _expand || return 0
 11 | 
 12 |   lngopt='
 13 |     --ackmate
 14 |     --all-text
 15 |     --all-types
 16 |     --after
 17 |     --before
 18 |     --break
 19 |     --nobreak
 20 |     --case-sensitive
 21 |     --color-line-number
 22 |     --color-match
 23 |     --color-path
 24 |     --color
 25 |     --nocolor
 26 |     --column
 27 |     --context
 28 |     --debug
 29 |     --depth
 30 |     --file-search-regex
 31 |     --files-with-matches
 32 |     --files-without-matches
 33 |     --follow
 34 |     --group
 35 |     --nogroup
 36 |     --heading
 37 |     --noheading
 38 |     --help
 39 |     --hidden
 40 |     --ignore
 41 |     --ignore-case
 42 |     --ignore-dir
 43 |     --invert-match
 44 |     --line-numbers
 45 |     --list-file-types
 46 |     --literal
 47 |     --max-count
 48 |     --no-numbers
 49 |     --pager
 50 |     --nopager
 51 |     --parallel
 52 |     --path-to-agignore
 53 |     --print-long-lines
 54 |     --recurse
 55 |     --no-recurse
 56 |     --search-binary
 57 |     --search-files
 58 |     --search-zip
 59 |     --silent
 60 |     --skip-vcs-ignores
 61 |     --smart-case
 62 |     --stats
 63 |     --unrestricted
 64 |     --version
 65 |     --word-regexp
 66 |     --workers
 67 |   '
 68 |   shtopt='
 69 |     -a -A -B -C -D
 70 |     -f -g -G -h -i
 71 |     -l -L -m -n -p
 72 |     -Q -r -R -s -S
 73 |     -t -u -U -v -V
 74 |     -w -z
 75 |   '
 76 | 
 77 |   # these options require an argument
 78 |   if [[ "${prev}" == -@(A|B|C|G|g|m) ]] ; then
 79 |     return 0
 80 |   fi
 81 | 
 82 |   _split_longopt && split=true
 83 | 
 84 |   case "${prev}" in
 85 |     --ignore-dir) # directory completion
 86 |               _filedir -d
 87 |               return 0;;
 88 |     --path-to-agignore) # file completion
 89 |               _filedir
 90 |               return 0;;
 91 |     --pager) # command completion
 92 |               COMPREPLY=( $(compgen -c -- "${cur}") )
 93 |               return 0;;
 94 |     --ackmate-dir-filter|--after|--before|--color-*|--context|--depth\
 95 |     |--file-search-regex|--ignore|--max-count|--workers)
 96 |               return 0;;
 97 |   esac
 98 | 
 99 |   $split && return 0
100 | 
101 |   case "${cur}" in
102 |     -*)
103 |           if [[ "${COMP_CWORD}" -eq 1 ]] ; then
104 |             COMPREPLY=( $(compgen -W \
105 |               "${lngopt} ${shtopt}" -- "${cur}") )
106 |           else
107 |             COMPREPLY=( $(compgen -W \
108 |               "${lngopt} ${shtopt}" -- "${cur}") )
109 |           fi
110 |           return 0;;
111 |     *)
112 |           _filedir
113 |           return 0;;
114 |   esac
115 | } &&
116 | complete -F _ag ${nospace} ag
117 | 


--------------------------------------------------------------------------------
/tests/list_file_types.t:
--------------------------------------------------------------------------------
  1 | Setup:
  2 | 
  3 |   $ . $TESTDIR/setup.sh
  4 | 
  5 | Language types are output:
  6 | 
  7 |   $ ag --list-file-types
  8 |   The following file types are supported:
  9 |     --actionscript
 10 |         .as  .mxml
 11 |   
 12 |     --ada
 13 |         .ada  .adb  .ads
 14 |   
 15 |     --asm
 16 |         .asm  .s
 17 |   
 18 |     --batch
 19 |         .bat  .cmd
 20 |   
 21 |     --cc
 22 |         .c  .h  .xs
 23 |   
 24 |     --cfmx
 25 |         .cfc  .cfm  .cfml
 26 |   
 27 |     --clojure
 28 |         .clj
 29 |   
 30 |     --coffee
 31 |         .coffee
 32 |   
 33 |     --cpp
 34 |         .cpp  .cc  .C  .cxx  .m  .hpp  .hh  .h  .H  .hxx
 35 |   
 36 |     --csharp
 37 |         .cs
 38 |   
 39 |     --css
 40 |         .css
 41 |   
 42 |     --delphi
 43 |         .pas  .int  .dfm  .nfm  .dof  .dpk  .dproj  .groupproj  .bdsgroup  .bdsproj
 44 |   
 45 |     --elisp
 46 |         .el
 47 |   
 48 |     --erlang
 49 |         .erl  .hrl
 50 |   
 51 |     --fortran
 52 |         .f  .f77  .f90  .f95  .f03  .for  .ftn  .fpp
 53 |   
 54 |     --gettext
 55 |         .po  .pot  .mo
 56 |   
 57 |     --go
 58 |         .go
 59 |   
 60 |     --groovy
 61 |         .groovy  .gtmpl  .gpp  .grunit
 62 |   
 63 |     --haml
 64 |         .haml
 65 |   
 66 |     --haskell
 67 |         .hs  .lhs
 68 |   
 69 |     --hh
 70 |         .h
 71 |   
 72 |     --html
 73 |         .htm  .html  .shtml  .xhtml
 74 |   
 75 |     --ini
 76 |         .ini
 77 |   
 78 |     --jade
 79 |         .jade
 80 |   
 81 |     --java
 82 |         .java  .properties
 83 |   
 84 |     --js
 85 |         .js
 86 |   
 87 |     --json
 88 |         .json
 89 |   
 90 |     --jsp
 91 |         .jsp  .jspx  .jhtm  .jhtml
 92 |   
 93 |     --less
 94 |         .less
 95 |   
 96 |     --lisp
 97 |         .lisp  .lsp
 98 |   
 99 |     --lua
100 |         .lua
101 |   
102 |     --m4
103 |         .m4
104 |   
105 |     --make
106 |         .Makefiles  .mk  .mak
107 |   
108 |     --mason
109 |         .mas  .mhtml  .mpl  .mtxt
110 |   
111 |     --matlab
112 |         .m
113 |   
114 |     --objc
115 |         .m  .h
116 |   
117 |     --objcpp
118 |         .mm  .h
119 |   
120 |     --ocaml
121 |         .ml  .mli
122 |   
123 |     --octave
124 |         .m
125 |   
126 |     --parrot
127 |         .pir  .pasm  .pmc  .ops  .pod  .pg  .tg
128 |   
129 |     --perl
130 |         .pl  .pm  .pm6  .pod  .t
131 |   
132 |     --php
133 |         .php  .phpt  .php3  .php4  .php5  .phtml
134 |   
135 |     --plone
136 |         .pt  .cpt  .metadata  .cpy  .py
137 |   
138 |     --python
139 |         .py
140 |   
141 |     --rake
142 |         .Rakefiles
143 |   
144 |     --rs
145 |         .rs
146 |   
147 |     --ruby
148 |         .rb  .rhtml  .rjs  .rxml  .erb  .rake  .spec
149 |   
150 |     --rust
151 |         .rs
152 |   
153 |     --salt
154 |         .sls
155 |   
156 |     --sass
157 |         .sass  .scss
158 |   
159 |     --scala
160 |         .scala
161 |   
162 |     --scheme
163 |         .scm  .ss
164 |   
165 |     --shell
166 |         .sh  .bash  .csh  .tcsh  .ksh  .zsh
167 |   
168 |     --smalltalk
169 |         .st
170 |   
171 |     --sql
172 |         .sql  .ctl
173 |   
174 |     --stylus
175 |         .styl
176 |   
177 |     --swift
178 |         .swift
179 |   
180 |     --tcl
181 |         .tcl  .itcl  .itk
182 |   
183 |     --tex
184 |         .tex  .cls  .sty
185 |   
186 |     --tt
187 |         .tt  .tt2  .ttml
188 |   
189 |     --vb
190 |         .bas  .cls  .frm  .ctl  .vb  .resx
191 |   
192 |     --verilog
193 |         .v  .vh  .sv
194 |   
195 |     --vhdl
196 |         .vhd  .vhdl
197 |   
198 |     --vim
199 |         .vim
200 |   
201 |     --xml
202 |         .xml  .dtd  .xsl  .xslt  .ent
203 |   
204 |     --yaml
205 |         .yaml  .yml
206 |   
207 | 


--------------------------------------------------------------------------------
/src/util.h:
--------------------------------------------------------------------------------
  1 | #ifndef UTIL_H
  2 | #define UTIL_H
  3 | 
  4 | #include <dirent.h>
  5 | #include <pcre.h>
  6 | #include <stdio.h>
  7 | #include <string.h>
  8 | #include <stdio.h>
  9 | #include <sys/time.h>
 10 | 
 11 | #include "config.h"
 12 | #include "log.h"
 13 | #include "options.h"
 14 | 
 15 | FILE *out_fd;
 16 | 
 17 | #ifndef TRUE
 18 | #define TRUE 1
 19 | #endif
 20 | 
 21 | #ifndef FALSE
 22 | #define FALSE 0
 23 | #endif
 24 | 
 25 | void *ag_malloc(size_t size);
 26 | void *ag_realloc(void *ptr, size_t size);
 27 | void *ag_calloc(size_t nelem, size_t elsize);
 28 | char *ag_strdup(const char *s);
 29 | char *ag_strndup(const char *s, size_t size);
 30 | 
 31 | typedef struct {
 32 |     size_t start; /* Byte at which the match starts */
 33 |     size_t end;   /* and where it ends */
 34 | } match_t;
 35 | 
 36 | typedef struct {
 37 |     long total_bytes;
 38 |     long total_files;
 39 |     long total_matches;
 40 |     struct timeval time_start;
 41 |     struct timeval time_end;
 42 | } ag_stats;
 43 | 
 44 | typedef enum {
 45 |     AG_NO_COMPRESSION,
 46 |     AG_GZIP,
 47 |     AG_COMPRESS,
 48 |     AG_ZIP
 49 | } ag_compression_type;
 50 | 
 51 | ag_stats stats;
 52 | 
 53 | typedef const char *(*strncmp_fp)(const char *, const char *, const size_t, const size_t, const size_t[], const size_t *);
 54 | 
 55 | void generate_alpha_skip(const char *find, size_t f_len, size_t skip_lookup[], const int case_sensitive);
 56 | int is_prefix(const char *s, const size_t s_len, const size_t pos, const int case_sensitive);
 57 | size_t suffix_len(const char *s, const size_t s_len, const size_t pos, const int case_sensitive);
 58 | void generate_find_skip(const char *find, const size_t f_len, size_t **skip_lookup, const int case_sensitive);
 59 | 
 60 | /* max is already defined on spec-violating compilers such as MinGW */
 61 | size_t ag_max(size_t a, size_t b);
 62 | 
 63 | const char *boyer_moore_strnstr(const char *s, const char *find, const size_t s_len, const size_t f_len,
 64 |                                 const size_t alpha_skip_lookup[], const size_t *find_skip_lookup);
 65 | const char *boyer_moore_strncasestr(const char *s, const char *find, const size_t s_len, const size_t f_len,
 66 |                                     const size_t alpha_skip_lookup[], const size_t *find_skip_lookup);
 67 | 
 68 | strncmp_fp get_strstr(enum case_behavior opts);
 69 | 
 70 | size_t invert_matches(const char *buf, const size_t buf_len, match_t matches[], size_t matches_len);
 71 | void compile_study(pcre **re, pcre_extra **re_extra, char *q, const int pcre_opts, const int study_opts);
 72 | 
 73 | void *decompress(const ag_compression_type zip_type, const void *buf, const int buf_len, const char *dir_full_path, int *new_buf_len);
 74 | ag_compression_type is_zipped(const void *buf, const int buf_len);
 75 | 
 76 | int is_binary(const void *buf, const size_t buf_len);
 77 | int is_regex(const char *query);
 78 | int is_fnmatch(const char *filename);
 79 | int binary_search(const char *needle, char **haystack, int start, int end);
 80 | 
 81 | void init_wordchar_table(void);
 82 | int is_wordchar(char ch);
 83 | 
 84 | int is_lowercase(const char *s);
 85 | 
 86 | int is_directory(const char *path, const struct dirent *d);
 87 | int is_symlink(const char *path, const struct dirent *d);
 88 | int is_named_pipe(const char *path, const struct dirent *d);
 89 | 
 90 | void die(const char *fmt, ...);
 91 | 
 92 | void ag_asprintf(char **ret, const char *fmt, ...);
 93 | 
 94 | #ifndef HAVE_FGETLN
 95 | char *fgetln(FILE *fp, size_t *lenp);
 96 | #endif
 97 | #ifndef HAVE_GETLINE
 98 | ssize_t getline(char **lineptr, size_t *n, FILE *stream);
 99 | #endif
100 | #ifndef HAVE_REALPATH
101 | char *realpath(const char *path, char *resolved_path);
102 | #endif
103 | #ifndef HAVE_STRLCPY
104 | size_t strlcpy(char *dest, const char *src, size_t size);
105 | #endif
106 | #ifndef HAVE_VASPRINTF
107 | int vasprintf(char **ret, const char *fmt, va_list args);
108 | #endif
109 | 
110 | #endif
111 | 


--------------------------------------------------------------------------------
/src/lang.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <string.h>
  3 | 
  4 | #include "lang.h"
  5 | #include "util.h"
  6 | 
  7 | lang_spec_t langs[] = {
  8 |     { "actionscript", { "as", "mxml" } },
  9 |     { "ada", { "ada", "adb", "ads" } },
 10 |     { "asm", { "asm", "s" } },
 11 |     { "batch", { "bat", "cmd" } },
 12 |     { "cc", { "c", "h", "xs" } },
 13 |     { "cfmx", { "cfc", "cfm", "cfml" } },
 14 |     { "clojure", { "clj" } },
 15 |     { "coffee", { "coffee" } },
 16 |     { "cpp", { "cpp", "cc", "C", "cxx", "m", "hpp", "hh", "h", "H", "hxx" } },
 17 |     { "csharp", { "cs" } },
 18 |     { "css", { "css" } },
 19 |     { "delphi", { "pas", "int", "dfm", "nfm", "dof", "dpk", "dproj", "groupproj", "bdsgroup", "bdsproj" } },
 20 |     { "elisp", { "el" } },
 21 |     { "erlang", { "erl", "hrl" } },
 22 |     { "fortran", { "f", "f77", "f90", "f95", "f03", "for", "ftn", "fpp" } },
 23 |     { "gettext", { "po", "pot", "mo" } },
 24 |     { "go", { "go" } },
 25 |     { "groovy", { "groovy", "gtmpl", "gpp", "grunit" } },
 26 |     { "haml", { "haml" } },
 27 |     { "haskell", { "hs", "lhs" } },
 28 |     { "hh", { "h" } },
 29 |     { "html", { "htm", "html", "shtml", "xhtml" } },
 30 |     { "ini", { "ini" } },
 31 |     { "jade", { "jade" } },
 32 |     { "java", { "java", "properties" } },
 33 |     { "js", { "js" } },
 34 |     { "json", { "json" } },
 35 |     { "jsp", { "jsp", "jspx", "jhtm", "jhtml" } },
 36 |     { "less", { "less" } },
 37 |     { "lisp", { "lisp", "lsp" } },
 38 |     { "lua", { "lua" } },
 39 |     { "m4", { "m4" } },
 40 |     { "make", { "Makefiles", "mk", "mak" } },
 41 |     { "mason", { "mas", "mhtml", "mpl", "mtxt" } },
 42 |     { "matlab", { "m" } },
 43 |     { "objc", { "m", "h" } },
 44 |     { "objcpp", { "mm", "h" } },
 45 |     { "ocaml", { "ml", "mli" } },
 46 |     { "octave", { "m" } },
 47 |     { "parrot", { "pir", "pasm", "pmc", "ops", "pod", "pg", "tg" } },
 48 |     { "perl", { "pl", "pm", "pm6", "pod", "t" } },
 49 |     { "php", { "php", "phpt", "php3", "php4", "php5", "phtml" } },
 50 |     { "plone", { "pt", "cpt", "metadata", "cpy", "py" } },
 51 |     { "python", { "py" } },
 52 |     { "rake", { "Rakefiles" } },
 53 |     { "rs", { "rs" } },
 54 |     { "ruby", { "rb", "rhtml", "rjs", "rxml", "erb", "rake", "spec" } },
 55 |     { "rust", { "rs" } },
 56 |     { "salt", { "sls" } },
 57 |     { "sass", { "sass", "scss" } },
 58 |     { "scala", { "scala" } },
 59 |     { "scheme", { "scm", "ss" } },
 60 |     { "shell", { "sh", "bash", "csh", "tcsh", "ksh", "zsh" } },
 61 |     { "smalltalk", { "st" } },
 62 |     { "sql", { "sql", "ctl" } },
 63 |     { "stylus", { "styl" } },
 64 |     { "swift", { "swift" } },
 65 |     { "tcl", { "tcl", "itcl", "itk" } },
 66 |     { "tex", { "tex", "cls", "sty" } },
 67 |     { "tt", { "tt", "tt2", "ttml" } },
 68 |     { "vb", { "bas", "cls", "frm", "ctl", "vb", "resx" } },
 69 |     { "verilog", { "v", "vh", "sv" } },
 70 |     { "vhdl", { "vhd", "vhdl" } },
 71 |     { "vim", { "vim" } },
 72 |     { "xml", { "xml", "dtd", "xsl", "xslt", "ent" } },
 73 |     { "yaml", { "yaml", "yml" } },
 74 |     { NULL, { NULL } }
 75 | };
 76 | 
 77 | char *make_lang_regex(const char **extensions) {
 78 |     int regex_capacity = 100;
 79 |     char *regex = ag_malloc(regex_capacity);
 80 |     int regex_length = 3;
 81 |     int subsequent = 0;
 82 |     const char **extension;
 83 | 
 84 |     strcpy(regex, "\\.(");
 85 | 
 86 |     for (extension = extensions; *extension; ++extension) {
 87 |         int extension_length = strlen(*extension);
 88 |         while (regex_length + extension_length + 3 + subsequent > regex_capacity) {
 89 |             regex_capacity *= 2;
 90 |             regex = ag_realloc(regex, regex_capacity);
 91 |         }
 92 |         if (subsequent) {
 93 |             regex[regex_length++] = '|';
 94 |         } else {
 95 |             subsequent = 1;
 96 |         }
 97 |         strcpy(regex + regex_length, *extension);
 98 |         regex_length += extension_length;
 99 |     }
100 | 
101 |     regex[regex_length++] = ')';
102 |     regex[regex_length++] = '$';
103 |     regex[regex_length++] = 0;
104 |     return regex;
105 | }
106 | 


--------------------------------------------------------------------------------
/doc/ag.1.md:
--------------------------------------------------------------------------------
  1 | ag(1) -- The Silver Searcher. Like ack, but faster.
  2 | =============================================
  3 | 
  4 | ## SYNOPSIS
  5 | 
  6 | `ag` [<file-type>] [<options>] PATTERN [PATH]
  7 | 
  8 | ## DESCRIPTION
  9 | 
 10 | Recursively search for PATTERN in PATH. Like grep or ack, but faster.
 11 | 
 12 | ## OPTIONS
 13 | 
 14 |   * `--ackmate`:
 15 |     Output results in a format parseable by [AckMate](https://github.com/protocool/AckMate).
 16 |   * `-a --all-types`:
 17 |     Search all files. This doesn't include hidden files, and also doesn't respect any ignore files
 18 |   * `-A --after [LINES]`:
 19 |     Print lines after match. Defaults to 2.
 20 |   * `-B --before [LINES]`:
 21 |     Print lines before match. Defaults to 2.
 22 |   * `--[no]break`:
 23 |     Print a newline between matches in different files. Enabled by default.
 24 |   * `--[no]color`:
 25 |     Print color codes in results. Enabled by default.
 26 |   * `--color-line-number`:
 27 |     Color codes for line numbers. Defaults to 1;33.
 28 |   * `--color-match`:
 29 |     Color codes for result match numbers. Defaults to 30;43.
 30 |   * `--color-path`:
 31 |     Color codes for path names. Defaults to 1;32.
 32 |   * `--column`:
 33 |     Print column numbers in results.
 34 |   * `-C --context [LINES]`:
 35 |     Print lines before and after matches. Defaults to 2.
 36 |   * `-D --debug`:
 37 |     Output ridiculous amounts of debugging info. Probably not useful.
 38 |   * `--depth NUM`:
 39 |     Search up to NUM directories deep. Default is 25.
 40 |   * `-f --follow`:
 41 |     Follow symlinks.
 42 |   * `--[no]group`
 43 |   * `-g PATTERN`:
 44 |     Print filenames matching PATTERN.
 45 |   * `-G`, `--file-search-regex PATTERN`:
 46 |     Only search filenames matching PATTERN.
 47 |   * `-H`, `--[no]heading`:
 48 |     Print file names above matching contents.
 49 |   * `--hidden`:
 50 |     Search hidden files. This option obeys ignore files.
 51 |   * `--ignore PATTERN`:
 52 |     Ignore files/directories matching this pattern. Literal file and directory names are also allowed.
 53 |   * `--ignore-dir NAME`:
 54 |     Alias for --ignore for compatibility with ack.
 55 |   * `-i --ignore-case`:
 56 |     Match case insensitively.
 57 |   * `-l --files-with-matches`:
 58 |     Only print filenames containing matches, not matching lines. An empty query will print all files that would be searched.
 59 |   * `-L --files-without-matches`:
 60 |     Only print filenames that don't contain matches.
 61 |   * `--list-file-types`:
 62 |     See `FILE TYPES` below.
 63 |   * `-m --max-count NUM`:
 64 |     Skip the rest of a file after NUM matches. Default is 10,000.
 65 |   * `--no-numbers`:            
 66 |     Don't show line numbers
 67 |   * `--null`:
 68 |     Separate files output with -l or -L by \0 rather than \n, this allows 'xargs -0 <command>' to correctly process filenames with spaces.
 69 |   * `-p --path-to-agignore STRING`:
 70 |     Provide a path to a specific .agignore file.
 71 |   * `--pager COMMAND`:
 72 |     Use a pager such as less. Use `--nopager` to override. This option is also ignored if output is piped to another program.
 73 |   * `--print-long-lines`:
 74 |     Print matches on very long lines (> 2k characters by default)
 75 |   * `--passthrough`:
 76 |     When searching a stream, print all lines even if they don't match.
 77 |   * `-Q --literal`:
 78 |     Do not parse PATTERN as a regular expression. Try to match it literally.
 79 |   * `-s --case-sensitive`:
 80 |     Match case sensitively.
 81 |   * `-S --smart-case`:
 82 |     Match case sensitively if there are any uppercase letters in PATTERN, or case insensitively otherwise. Enabled by default.
 83 |   * `--search-binary`:
 84 |     Search binary files for matches.
 85 |   * `--silent`:
 86 |     Suppress all log messages, including errors.
 87 |   * `--stats`:
 88 |     Print stats (files scanned, time taken, etc)
 89 |   * `-t --all-text`:
 90 |     Search all text files. This doesn't include hidden files.
 91 |   * `-u --unrestricted`:
 92 |     Search *all* files. This ignores .agignore, .gitignore, etc. It searches binary and hidden files as well.
 93 |   * `-U --skip-vcs-ignores`:
 94 |     Ignore VCS ignore files (.gitignore, .hgignore, svn:ignore), but still use .agignore.
 95 |   * `-v --invert-match`
 96 |   * `-w --word-regexp`:
 97 |     Only match whole words.
 98 |   * `-z --search-zip`:
 99 |     Search contents of compressed files.
100 | 
101 | ## FILE TYPES
102 | 
103 | It is possible to restrict the types of files searched. For example, passing `--html` as the `file-types` parameter will search only files with the extensions `htm`, `html`, `shtml` or `xhtml`. For a list of supported `file-types` run `ag --list-file-types`.
104 | 
105 | ## IGNORING FILES
106 | 
107 | By default, ag will ignore files matched by patterns in .gitignore, .hgignore,
108 | or .agignore. These files can be anywhere in the directories being searched. Ag
109 | also ignores files matched by the svn:ignore property if `svn --version` is 1.6
110 | or older.  Finally, ag looks in $HOME/.agignore for
111 | ignore patterns. Binary files are ignored by default as well.
112 | 
113 | If you want to ignore .gitignore, .hgignore, and svn:ignore but still take .agignore into account, use `-U`.
114 | 
115 | Use the `-t` option to search all text files, `-a` to search all files, and `-u` to search all including hidden files.
116 | 
117 | ## EXAMPLES
118 | 
119 | `ag printf`:
120 |   Find matches for "printf" in the current directory.
121 | 
122 | `ag foo /bar/`:
123 |   Find matches for "foo" in path /bar/.
124 | 
125 | ## SEE ALSO
126 | 
127 | grep(1)
128 | 


--------------------------------------------------------------------------------
/src/main.c:
--------------------------------------------------------------------------------
  1 | #include <pcre.h>
  2 | #include <stdarg.h>
  3 | #include <stdio.h>
  4 | #include <string.h>
  5 | #include <ctype.h>
  6 | #include <sys/time.h>
  7 | #include <unistd.h>
  8 | #ifdef _WIN32
  9 | #include <windows.h>
 10 | #endif
 11 | 
 12 | #include "config.h"
 13 | 
 14 | #ifdef HAVE_PTHREAD_H
 15 | #include <pthread.h>
 16 | #endif
 17 | 
 18 | #include "log.h"
 19 | #include "options.h"
 20 | #include "search.h"
 21 | #include "util.h"
 22 | 
 23 | int main(int argc, char **argv) {
 24 |     char **base_paths = NULL;
 25 |     char **paths = NULL;
 26 |     int i;
 27 |     int pcre_opts = PCRE_MULTILINE;
 28 |     int study_opts = 0;
 29 |     double time_diff;
 30 |     pthread_t *workers = NULL;
 31 |     int workers_len;
 32 | 
 33 |     set_log_level(LOG_LEVEL_WARN);
 34 | 
 35 |     work_queue = NULL;
 36 |     work_queue_tail = NULL;
 37 |     memset(&stats, 0, sizeof(stats));
 38 |     root_ignores = init_ignore(NULL);
 39 |     out_fd = stdout;
 40 | #ifdef USE_PCRE_JIT
 41 |     int has_jit = 0;
 42 |     pcre_config(PCRE_CONFIG_JIT, &has_jit);
 43 |     if (has_jit) {
 44 |         study_opts |= PCRE_STUDY_JIT_COMPILE;
 45 |     }
 46 | #endif
 47 | 
 48 |     gettimeofday(&(stats.time_start), NULL);
 49 | 
 50 |     parse_options(argc, argv, &base_paths, &paths);
 51 |     log_debug("PCRE Version: %s", pcre_version());
 52 | 
 53 | #ifdef _WIN32
 54 |     {
 55 |         SYSTEM_INFO si;
 56 |         GetSystemInfo(&si);
 57 |         workers_len = si.dwNumberOfProcessors;
 58 |     }
 59 | #else
 60 |     workers_len = (int)sysconf(_SC_NPROCESSORS_ONLN);
 61 | #endif
 62 |     if (opts.literal) {
 63 |         workers_len--;
 64 |     }
 65 |     if (opts.workers) {
 66 |         workers_len = opts.workers;
 67 |     }
 68 |     if (workers_len < 1) {
 69 |         workers_len = 1;
 70 |     }
 71 | 
 72 |     log_debug("Using %i workers", workers_len);
 73 |     done_adding_files = FALSE;
 74 |     workers = ag_calloc(workers_len, sizeof(pthread_t));
 75 |     if (pthread_cond_init(&files_ready, NULL)) {
 76 |         die("pthread_cond_init failed!");
 77 |     }
 78 |     if (pthread_mutex_init(&print_mtx, NULL)) {
 79 |         die("pthread_mutex_init failed!");
 80 |     }
 81 |     if (pthread_mutex_init(&stats_mtx, NULL)) {
 82 |         die("pthread_mutex_init failed!");
 83 |     }
 84 |     if (pthread_mutex_init(&work_queue_mtx, NULL)) {
 85 |         die("pthread_mutex_init failed!");
 86 |     }
 87 | 
 88 |     if (opts.casing == CASE_SMART) {
 89 |         opts.casing = is_lowercase(opts.query) ? CASE_INSENSITIVE : CASE_SENSITIVE;
 90 |     }
 91 | 
 92 |     if (opts.literal) {
 93 |         if (opts.casing == CASE_INSENSITIVE) {
 94 |             /* Search routine needs the query to be lowercase */
 95 |             char *c = opts.query;
 96 |             for (; *c != '\0'; ++c) {
 97 |                 *c = (char)tolower(*c);
 98 |             }
 99 |         }
100 |         generate_alpha_skip(opts.query, opts.query_len, alpha_skip_lookup, opts.casing == CASE_SENSITIVE);
101 |         find_skip_lookup = NULL;
102 |         generate_find_skip(opts.query, opts.query_len, &find_skip_lookup, opts.casing == CASE_SENSITIVE);
103 |         if (opts.word_regexp) {
104 |             init_wordchar_table();
105 |             opts.literal_starts_wordchar = is_wordchar(opts.query[0]);
106 |             opts.literal_ends_wordchar = is_wordchar(opts.query[opts.query_len - 1]);
107 |         }
108 |     } else {
109 |         if (opts.casing == CASE_INSENSITIVE) {
110 |             pcre_opts |= PCRE_CASELESS;
111 |         }
112 |         if (opts.word_regexp) {
113 |             char *word_regexp_query;
114 |             ag_asprintf(&word_regexp_query, "\\b%s\\b", opts.query);
115 |             free(opts.query);
116 |             opts.query = word_regexp_query;
117 |             opts.query_len = strlen(opts.query);
118 |         }
119 |         compile_study(&opts.re, &opts.re_extra, opts.query, pcre_opts, study_opts);
120 |     }
121 | 
122 |     if (opts.search_stream) {
123 |         search_stream(stdin, "");
124 |     } else {
125 |         for (i = 0; i < workers_len; i++) {
126 |             int rv = pthread_create(&(workers[i]), NULL, &search_file_worker, &i);
127 |             if (rv != 0) {
128 |                 die("error in pthread_create(): %s", strerror(rv));
129 |             }
130 |         }
131 |         for (i = 0; paths[i] != NULL; i++) {
132 |             log_debug("searching path %s for %s", paths[i], opts.query);
133 |             symhash = NULL;
134 |             search_dir(root_ignores, base_paths[i], paths[i], 0);
135 |         }
136 |         pthread_mutex_lock(&work_queue_mtx);
137 |         done_adding_files = TRUE;
138 |         pthread_cond_broadcast(&files_ready);
139 |         pthread_mutex_unlock(&work_queue_mtx);
140 |         for (i = 0; i < workers_len; i++) {
141 |             if (pthread_join(workers[i], NULL)) {
142 |                 die("pthread_join failed!");
143 |             }
144 |         }
145 |     }
146 | 
147 |     if (opts.stats) {
148 |         gettimeofday(&(stats.time_end), NULL);
149 |         time_diff = ((long)stats.time_end.tv_sec * 1000000 + stats.time_end.tv_usec) -
150 |                     ((long)stats.time_start.tv_sec * 1000000 + stats.time_start.tv_usec);
151 |         time_diff /= 1000000;
152 | 
153 |         printf("%ld matches\n%ld files searched\n%ld bytes searched\n%f seconds\n", stats.total_matches, stats.total_files, stats.total_bytes, time_diff);
154 |     }
155 | 
156 |     if (opts.pager) {
157 |         pclose(out_fd);
158 |     }
159 |     cleanup_options();
160 |     pthread_cond_destroy(&files_ready);
161 |     pthread_mutex_destroy(&work_queue_mtx);
162 |     pthread_mutex_destroy(&stats_mtx);
163 |     pthread_mutex_destroy(&print_mtx);
164 |     cleanup_ignore(root_ignores);
165 |     free(workers);
166 |     for (i = 0; paths[i] != NULL; i++) {
167 |         free(paths[i]);
168 |         free(base_paths[i]);
169 |     }
170 |     free(base_paths);
171 |     free(paths);
172 |     if (find_skip_lookup) {
173 |         free(find_skip_lookup);
174 |     }
175 |     return !opts.match_found;
176 | }
177 | 


--------------------------------------------------------------------------------
/doc/ag.1:
--------------------------------------------------------------------------------
  1 | .\" generated with Ronn/v0.7.3
  2 | .\" http://github.com/rtomayko/ronn/tree/0.7.3
  3 | .
  4 | .TH "AG" "1" "August 2014" "" ""
  5 | .
  6 | .SH "NAME"
  7 | \fBag\fR \- The Silver Searcher\. Like ack, but faster\.
  8 | .
  9 | .SH "SYNOPSIS"
 10 | \fBag\fR [\fIfile\-type\fR] [\fIoptions\fR] PATTERN [PATH]
 11 | .
 12 | .SH "DESCRIPTION"
 13 | Recursively search for PATTERN in PATH\. Like grep or ack, but faster\.
 14 | .
 15 | .SH "OPTIONS"
 16 | \fB\-\-ackmate\fR:
 17 | .
 18 | .br
 19 | \~\~\~\~ Output results in a format parseable by AckMate \fIhttps://github\.com/protocool/AckMate\fR\.
 20 | .
 21 | .P
 22 | \fB\-a \-\-all\-types\fR:
 23 | .
 24 | .br
 25 | \~\~\~\~ Search all files\. This doesn\'t include hidden files, and also doesn\'t respect any ignore files
 26 | .
 27 | .P
 28 | \fB\-A \-\-after [LINES]\fR:
 29 | .
 30 | .br
 31 | \~\~\~\~ Print lines after match\. Defaults to 2\.
 32 | .
 33 | .P
 34 | \fB\-B \-\-before [LINES]\fR:
 35 | .
 36 | .br
 37 | \~\~\~\~ Print lines before match\. Defaults to 2\.
 38 | .
 39 | .P
 40 | \fB\-\-[no]break\fR:
 41 | .
 42 | .br
 43 | \~\~\~\~ Print a newline between matches in different files\. Enabled by default\.
 44 | .
 45 | .P
 46 | \fB\-\-[no]color\fR:
 47 | .
 48 | .br
 49 | \~\~\~\~ Print color codes in results\. Enabled by default\.
 50 | .
 51 | .P
 52 | \fB\-\-color\-line\-number\fR:
 53 | .
 54 | .br
 55 | \~\~\~\~ Color codes for line numbers\. Defaults to 1;33\.
 56 | .
 57 | .P
 58 | \fB\-\-color\-match\fR:
 59 | .
 60 | .br
 61 | \~\~\~\~ Color codes for result match numbers\. Defaults to 30;43\.
 62 | .
 63 | .P
 64 | \fB\-\-color\-path\fR:
 65 | .
 66 | .br
 67 | \~\~\~\~ Color codes for path names\. Defaults to 1;32\.
 68 | .
 69 | .P
 70 | \fB\-\-column\fR:
 71 | .
 72 | .br
 73 | \~\~\~\~ Print column numbers in results\.
 74 | .
 75 | .P
 76 | \fB\-C \-\-context [LINES]\fR:
 77 | .
 78 | .br
 79 | \~\~\~\~ Print lines before and after matches\. Defaults to 2\.
 80 | .
 81 | .P
 82 | \fB\-D \-\-debug\fR:
 83 | .
 84 | .br
 85 | \~\~\~\~ Output ridiculous amounts of debugging info\. Probably not useful\.
 86 | .
 87 | .P
 88 | \fB\-\-depth NUM\fR:
 89 | .
 90 | .br
 91 | \~\~\~\~ Search up to NUM directories deep\. Default is 25\.
 92 | .
 93 | .P
 94 | \fB\-f \-\-follow\fR:
 95 | .
 96 | .br
 97 | \~\~\~\~ Follow symlinks\.
 98 | .
 99 | .P
100 | \fB\-\-[no]group\fR
101 | .
102 | .br
103 | \fB\-g PATTERN\fR:
104 | .
105 | .br
106 | \~\~\~\~ Print filenames matching PATTERN\.
107 | .
108 | .P
109 | \fB\-G\fR, \fB\-\-file\-search\-regex PATTERN\fR:
110 | .
111 | .br
112 | \~\~\~\~ Only search filenames matching PATTERN\.
113 | .
114 | .P
115 | \fB\-H\fR, \fB\-\-[no]heading\fR:
116 | .
117 | .br
118 | \~\~\~\~ Print file names above matching contents\.
119 | .
120 | .P
121 | \fB\-\-hidden\fR:
122 | .
123 | .br
124 | \~\~\~\~ Search hidden files\. This option obeys ignore files\.
125 | .
126 | .P
127 | \fB\-\-ignore PATTERN\fR:
128 | .
129 | .br
130 | \~\~\~\~ Ignore files/directories matching this pattern\. Literal file and directory names are also allowed\.
131 | .
132 | .P
133 | \fB\-\-ignore\-dir NAME\fR:
134 | .
135 | .br
136 | \~\~\~\~ Alias for \-\-ignore for compatibility with ack\.
137 | .
138 | .P
139 | \fB\-i \-\-ignore\-case\fR:
140 | .
141 | .br
142 | \~\~\~\~ Match case insensitively\.
143 | .
144 | .P
145 | \fB\-l \-\-files\-with\-matches\fR:
146 | .
147 | .br
148 | \~\~\~\~ Only print filenames containing matches, not matching lines\. An empty query will print all files that would be searched\.
149 | .
150 | .P
151 | \fB\-L \-\-files\-without\-matches\fR:
152 | .
153 | .br
154 | \~\~\~\~ Only print filenames that don\'t contain matches\.
155 | .
156 | .P
157 | \fB\-\-list\-file\-types\fR:
158 | .
159 | .br
160 | \~\~\~\~ See \fBFILE TYPES\fR below\.
161 | .
162 | .P
163 | \fB\-m \-\-max\-count NUM\fR:
164 | .
165 | .br
166 | \~\~\~\~ Skip the rest of a file after NUM matches\. Default is 10,000\.
167 | .
168 | .P
169 | \fB\-\-no\-numbers\fR:
170 | .
171 | .br
172 | \~\~\~\~ Don\'t show line numbers
173 | .
174 | .P
175 | \fB\-p \-\-path\-to\-agignore STRING\fR:
176 | .
177 | .br
178 | \~\~\~\~ Provide a path to a specific \.agignore file\.
179 | .
180 | .P
181 | \fB\-\-pager COMMAND\fR:
182 | .
183 | .br
184 | \~\~\~\~ Use a pager such as less\. Use \fB\-\-nopager\fR to override\. This option is also ignored if output is piped to another program\.
185 | .
186 | .P
187 | \fB\-\-print\-long\-lines\fR:
188 | .
189 | .br
190 | \~\~\~\~ Print matches on very long lines (> 2k characters by default)
191 | .
192 | .P
193 | \fB\-\-passthrough\fR:
194 | .
195 | .br
196 | \~\~\~\~ When searching a stream, print all lines even if they don\'t match\.
197 | .
198 | .P
199 | \fB\-Q \-\-literal\fR:
200 | .
201 | .br
202 | \~\~\~\~ Do not parse PATTERN as a regular expression\. Try to match it literally\.
203 | .
204 | .P
205 | \fB\-s \-\-case\-sensitive\fR:
206 | .
207 | .br
208 | \~\~\~\~ Match case sensitively\.
209 | .
210 | .P
211 | \fB\-S \-\-smart\-case\fR:
212 | .
213 | .br
214 | \~\~\~\~ Match case sensitively if there are any uppercase letters in PATTERN, or case insensitively otherwise\. Enabled by default\.
215 | .
216 | .P
217 | \fB\-\-search\-binary\fR:
218 | .
219 | .br
220 | \~\~\~\~ Search binary files for matches\.
221 | .
222 | .P
223 | \fB\-\-silent\fR:
224 | .
225 | .br
226 | \~\~\~\~ Suppress all log messages, including errors\.
227 | .
228 | .P
229 | \fB\-\-stats\fR:
230 | .
231 | .br
232 | \~\~\~\~ Print stats (files scanned, time taken, etc)
233 | .
234 | .P
235 | \fB\-t \-\-all\-text\fR:
236 | .
237 | .br
238 | \~\~\~\~ Search all text files\. This doesn\'t include hidden files\.
239 | .
240 | .P
241 | \fB\-u \-\-unrestricted\fR:
242 | .
243 | .br
244 | \~\~\~\~ Search \fIall\fR files\. This ignores \.agignore, \.gitignore, etc\. It searches binary and hidden files as well\.
245 | .
246 | .P
247 | \fB\-U \-\-skip\-vcs\-ignores\fR:
248 | .
249 | .br
250 | \~\~\~\~ Ignore VCS ignore files (\.gitignore, \.hgignore, svn:ignore), but still use \.agignore\.
251 | .
252 | .P
253 | \fB\-v \-\-invert\-match\fR
254 | .
255 | .br
256 | \fB\-w \-\-word\-regexp\fR:
257 | .
258 | .br
259 | \~\~\~\~ Only match whole words\.
260 | .
261 | .P
262 | \fB\-z \-\-search\-zip\fR:
263 | .
264 | .br
265 | \~\~\~\~ Search contents of compressed files\.
266 | .
267 | .SH "FILE TYPES"
268 | It is possible to restrict the types of files searched\. For example, passing \fB\-\-html\fR as the \fBfile\-types\fR parameter will search only files with the extensions \fBhtm\fR, \fBhtml\fR, \fBshtml\fR or \fBxhtml\fR\. For a list of supported \fBfile\-types\fR run \fBag \-\-list\-file\-types\fR\.
269 | .
270 | .SH "IGNORING FILES"
271 | By default, ag will ignore files matched by patterns in \.gitignore, \.hgignore, or \.agignore\. These files can be anywhere in the directories being searched\. Ag also ignores files matched by the svn:ignore property if \fBsvn \-\-version\fR is 1\.6 or older\. Finally, ag looks in $HOME/\.agignore for ignore patterns\. Binary files are ignored by default as well\.
272 | .
273 | .P
274 | If you want to ignore \.gitignore, \.hgignore, and svn:ignore but still take \.agignore into account, use \fB\-U\fR\.
275 | .
276 | .P
277 | Use the \fB\-t\fR option to search all text files, \fB\-a\fR to search all files, and \fB\-u\fR to search all including hidden files\.
278 | .
279 | .SH "EXAMPLES"
280 | \fBag printf\fR: Find matches for "printf" in the current directory\.
281 | .
282 | .P
283 | \fBag foo /bar/\fR: Find matches for "foo" in path /bar/\.
284 | .
285 | .SH "SEE ALSO"
286 | grep(1)
287 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # The Silver Searcher #
  2 | 
  3 | A code searching tool similar to `ack`, with a focus on speed.
  4 | 
  5 | [![Build Status](https://travis-ci.org/ggreer/the_silver_searcher.svg?branch=master)](https://travis-ci.org/ggreer/the_silver_searcher)
  6 | 
  7 | <a href="https://floobits.com/ggreer/ag/redirect">
  8 |   <img alt="Floobits status" width="100" height="40" src="https://floobits.com/ggreer/ag.png" />
  9 | </a>
 10 | 
 11 | 
 12 | ## What's so great about Ag? ##
 13 | 
 14 | * It searches code about 3–5× faster than `ack`.
 15 | * It ignores file patterns from your `.gitignore` and `.hgignore`.
 16 | * If there are files in your source repo you don't want to search, just add their patterns to a `.agignore` file. \*cough\* extern \*cough\*
 17 | * The command name is 33% shorter than `ack`, and all keys are on the home row!
 18 | 
 19 | 
 20 | ## How is it so fast? ##
 21 | 
 22 | * Searching for literals (no regex) uses [Boyer-Moore-Horspool strstr](http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm).
 23 | * Files are `mmap()`ed instead of read into a buffer.
 24 | * If you're building with PCRE 8.21 or greater, regex searches use [the JIT compiler](http://sljit.sourceforge.net/pcre.html).
 25 | * Ag calls `pcre_study()` before executing the regex on a jillion files.
 26 | * Instead of calling `fnmatch()` on every pattern in your ignore files, non-regex patterns are loaded into an array and binary searched.
 27 | * Ag uses [Pthreads](http://en.wikipedia.org/wiki/POSIX_Threads) to take advantage of multiple CPU cores and search files in parallel.
 28 | 
 29 | I've written several blog posts showing how I've improved performance. These include how I [added pthreads](http://geoff.greer.fm/2012/09/07/the-silver-searcher-adding-pthreads/), [wrote my own `scandir()`](http://geoff.greer.fm/2012/09/03/profiling-ag-writing-my-own-scandir/), [benchmarked every revision to find performance regressions](http://geoff.greer.fm/2012/08/25/the-silver-searcher-benchmarking-revisions/), and profiled with [gprof](http://geoff.greer.fm/2012/02/08/profiling-with-gprof/) and [Valgrind](http://geoff.greer.fm/2012/01/23/making-programs-faster-profiling/).
 30 | 
 31 | 
 32 | ## Installation ##
 33 | 
 34 | ### Gentoo
 35 | 
 36 |     emerge the_silver_searcher
 37 | 
 38 | ### OS X
 39 | 
 40 |     brew install the_silver_searcher
 41 | 
 42 | or
 43 | 
 44 |     port install the_silver_searcher
 45 | 
 46 | ### Arch Linux
 47 | 
 48 |     pacman -S the_silver_searcher
 49 | 
 50 | ### Debian unstable
 51 | 
 52 |     apt-get install silversearcher-ag
 53 | 
 54 | ### Ubuntu 13.10 or later
 55 | 
 56 |     apt-get install silversearcher-ag
 57 | 
 58 | ### FreeBSD
 59 | 
 60 |     pkg install the_silver_searcher
 61 | 
 62 | or
 63 | 
 64 |     pkg_add -r the_silver_searcher
 65 | 
 66 | To build from source on FreeBSD:
 67 | 
 68 |     make -C /usr/ports/textproc/the_silver_searcher install clean
 69 | 
 70 | ### OpenBSD
 71 | 
 72 |     pkg_add the_silver_searcher
 73 | 
 74 | To build from source on OpenBSD:
 75 | 
 76 |     cd /usr/ports/textproc/the_silver_searcher && make install
 77 | 
 78 | 
 79 | If you want a CentOS rpm or Ubuntu deb, take a look at [Vikram Dighe's packages](http://swiftsignal.com/packages/).
 80 | 
 81 | 
 82 | ## Building from source ##
 83 | 
 84 | 1. Install dependencies (Automake, pkg-config, PCRE, LZMA):
 85 |     * Ubuntu:
 86 | 
 87 |             apt-get install -y automake pkg-config libpcre3-dev zlib1g-dev liblzma-dev
 88 |     * Fedora:
 89 | 
 90 |             yum -y install pkgconfig automake gcc zlib-devel pcre-devel xz-devel
 91 |     * CentOS:
 92 | 
 93 |             yum -y groupinstall "Development Tools"
 94 |             yum -y install pcre-devel xz-devel
 95 |     * OS X:
 96 | 
 97 |             brew install automake pkg-config pcre
 98 |         or
 99 | 
100 |             port install automake pkgconfig pcre
101 |     * Windows: It's complicated. See [this wiki page](https://github.com/ggreer/the_silver_searcher/wiki/Windows).
102 | 2. Run the build script (which just runs aclocal, automake, etc):
103 | 
104 |         ./build.sh
105 | 
106 |   On Windows:
107 | 
108 |         mingw32-make -f Makefile.w32
109 | 3. Make install:
110 | 
111 |         sudo make install
112 | 
113 | 
114 | 
115 | ## Current development status ##
116 | 
117 | It's quite stable now. Most changes are new features, minor bug fixes, or performance improvements. It's much faster than Ack in my benchmarks.
118 | 
119 |     ack blahblahblah ~/code  6.59s user 1.94s system 99% cpu 8.547 total
120 | 
121 |     ag blahblahblah ~/code  1.39s user 1.81s system 229% cpu 1.396 total
122 | 
123 | 
124 | ## Editor Integration ##
125 | 
126 | ### TextMate ###
127 | 
128 | TextMate users can use Ag with [my fork](https://github.com/ggreer/AckMate) of the popular AckMate plugin, which lets you use both Ack and Ag for searching. If you already have AckMate you just want to replace Ack with Ag, move or delete `"~/Library/Application Support/TextMate/PlugIns/AckMate.tmplugin/Contents/Resources/ackmate_ack"` and run `ln -s /usr/local/bin/ag "~/Library/Application Support/TextMate/PlugIns/AckMate.tmplugin/Contents/Resources/ackmate_ack"`
129 | 
130 | ### Vim ###
131 | 
132 | You can use Ag with [ack.vim][] by adding the following line to your `.vimrc`:
133 | 
134 |     let g:ackprg = 'ag --nogroup --nocolor --column'
135 | 
136 | There's also a fork of ack.vim tailored for use with Ag: [ag.vim][]
137 | [ack.vim]: https://github.com/mileszs/ack.vim
138 | [ag.vim]: https://github.com/rking/ag.vim
139 | 
140 | ### Emacs ###
141 | 
142 | You can use use [ag.el][] as an Emacs fronted to Ag.
143 | 
144 | [ag.el]: https://github.com/Wilfred/ag.el
145 | 
146 | 
147 | ## Contributing ##
148 | 
149 | I like when people send pull requests. It validates my existence. If you want to help out, check the [issue list](https://github.com/ggreer/the_silver_searcher/issues?sort=updated&state=open) or search the codebase for `TODO`. Don't worry if you lack experience writing C. If I think a pull request isn't ready to be merged, I'll give feedback in comments. Once everything looks good, I'll comment on your pull request with a cool animated gif and hit the merge button.
150 | 
151 | 
152 | ## TODO ##
153 | 
154 | A special thanks goes out to Alex Davies. He has given me some excellent recommendations to improve Ag. Many of these things are still on my list:
155 | 
156 | * Optimizations
157 |   * Write a benchmarking script that tweaks various settings to find what's fastest.
158 | * Features
159 |   * Behave better when matching in files with really long lines.
160 |   * Report "match found at position X of line N" if line is > 10k chars.
161 | * Windows support
162 |   * `readdir()` and `stat()` are much slower on Windows. Use `FindNextFile()` instead.
163 |   * Support Visual Studio instead of autotools?
164 |   * Need to use pthreads-win32 or something similar.
165 | 
166 | 
167 | ## Other stuff you might like ##
168 | 
169 | * [Ack](https://github.com/petdance/ack) - Better than grep. Without Ack, Ag would not exist.
170 | * [AckMate](https://github.com/protocool/AckMate) - An ack-powered replacement for TextMate's slow built-in search.
171 | * [ack.vim](https://github.com/mileszs/ack.vim)
172 | * [ag.vim]( https://github.com/rking/ag.vim)
173 | * [Exuberant Ctags](http://ctags.sourceforge.net/) - Faster than Ag, but it builds an index beforehand. Good for *really* big codebases.
174 | * [Git-grep](http://git-scm.com/docs/git-grep) - As fast as Ag but only works on git repos.
175 | * [Sack](https://github.com/sampson-chen/sack) - A utility that wraps Ack and Ag. It removes a lot of repetition from searching and opening matching files.
176 | 


--------------------------------------------------------------------------------
/src/print.c:
--------------------------------------------------------------------------------
  1 | #include <stdarg.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | 
  6 | #include "ignore.h"
  7 | #include "log.h"
  8 | #include "options.h"
  9 | #include "print.h"
 10 | #include "util.h"
 11 | 
 12 | int first_file_match = 1;
 13 | 
 14 | const char *color_reset = "\033[0m\033[K";
 15 | 
 16 | void print_path(const char *path, const char sep) {
 17 |     path = normalize_path(path);
 18 | 
 19 |     if (opts.ackmate) {
 20 |         fprintf(out_fd, ":%s%c", path, sep);
 21 |     } else {
 22 |         if (opts.color) {
 23 |             fprintf(out_fd, "%s%s%s%c", opts.color_path, path, color_reset, sep);
 24 |         } else {
 25 |             fprintf(out_fd, "%s%c", path, sep);
 26 |         }
 27 |     }
 28 | }
 29 | 
 30 | void print_binary_file_matches(const char *path) {
 31 |     path = normalize_path(path);
 32 |     print_file_separator();
 33 |     fprintf(out_fd, "Binary file %s matches.\n", path);
 34 | }
 35 | 
 36 | void print_file_matches(const char *path, const char *buf, const size_t buf_len, const match_t matches[], const size_t matches_len) {
 37 |     size_t line = 1;
 38 |     char **context_prev_lines = NULL;
 39 |     size_t prev_line = 0;
 40 |     size_t last_prev_line = 0;
 41 |     size_t prev_line_offset = 0;
 42 |     size_t cur_match = 0;
 43 |     /* TODO the line below contains a terrible hack */
 44 |     size_t lines_since_last_match = 1000000; /* if I initialize this to INT_MAX it'll overflow */
 45 |     ssize_t lines_to_print = 0;
 46 |     size_t last_printed_match = 0;
 47 |     char sep = '-';
 48 |     size_t i, j;
 49 |     int in_a_match = FALSE;
 50 |     int printing_a_match = FALSE;
 51 | 
 52 |     if (opts.ackmate) {
 53 |         sep = ':';
 54 |     }
 55 | 
 56 |     print_file_separator();
 57 | 
 58 |     if (opts.print_path == PATH_PRINT_DEFAULT) {
 59 |         opts.print_path = PATH_PRINT_TOP;
 60 |     } else if (opts.print_path == PATH_PRINT_DEFAULT_EACH_LINE) {
 61 |         opts.print_path = PATH_PRINT_EACH_LINE;
 62 |     }
 63 | 
 64 |     if (opts.print_path == PATH_PRINT_TOP) {
 65 |         print_path(path, '\n');
 66 |     }
 67 | 
 68 |     context_prev_lines = ag_calloc(sizeof(char *), (opts.before + 1));
 69 | 
 70 |     for (i = 0; i <= buf_len && (cur_match < matches_len || lines_since_last_match <= opts.after); i++) {
 71 |         if (cur_match < matches_len && i == matches[cur_match].end) {
 72 |             /* We found the end of a match. */
 73 |             cur_match++;
 74 |             in_a_match = FALSE;
 75 |         }
 76 | 
 77 |         if (cur_match < matches_len && i == matches[cur_match].start) {
 78 |             in_a_match = TRUE;
 79 |             /* We found the start of a match */
 80 |             if (cur_match > 0 && opts.context && lines_since_last_match > (opts.before + opts.after + 1)) {
 81 |                 fprintf(out_fd, "--\n");
 82 |             }
 83 | 
 84 |             if (lines_since_last_match > 0 && opts.before > 0) {
 85 |                 /* TODO: better, but still needs work */
 86 |                 /* print the previous line(s) */
 87 |                 lines_to_print = lines_since_last_match - (opts.after + 1);
 88 |                 if (lines_to_print < 0) {
 89 |                     lines_to_print = 0;
 90 |                 } else if ((size_t)lines_to_print > opts.before) {
 91 |                     lines_to_print = opts.before;
 92 |                 }
 93 | 
 94 |                 for (j = (opts.before - lines_to_print); j < opts.before; j++) {
 95 |                     prev_line = (last_prev_line + j) % opts.before;
 96 |                     if (context_prev_lines[prev_line] != NULL) {
 97 |                         if (opts.print_path == PATH_PRINT_EACH_LINE) {
 98 |                             print_path(path, ':');
 99 |                         }
100 |                         print_line_number(line - (opts.before - j), sep);
101 |                         fprintf(out_fd, "%s\n", context_prev_lines[prev_line]);
102 |                     }
103 |                 }
104 |             }
105 |             lines_since_last_match = 0;
106 |         }
107 | 
108 |         /* We found the end of a line. */
109 |         if (buf[i] == '\n' && opts.before > 0) {
110 |             if (context_prev_lines[last_prev_line] != NULL) {
111 |                 free(context_prev_lines[last_prev_line]);
112 |             }
113 |             /* We don't want to strcpy the \n */
114 |             context_prev_lines[last_prev_line] =
115 |                 ag_strndup(&buf[prev_line_offset], i - prev_line_offset);
116 |             last_prev_line = (last_prev_line + 1) % opts.before;
117 |         }
118 | 
119 |         if (buf[i] == '\n' || i == buf_len) {
120 |             if (lines_since_last_match == 0) {
121 |                 if (opts.print_path == PATH_PRINT_EACH_LINE && !opts.search_stream) {
122 |                     print_path(path, ':');
123 |                 }
124 | 
125 |                 if (opts.ackmate) {
126 |                     /* print headers for ackmate to parse */
127 |                     print_line_number(line, ';');
128 |                     for (; last_printed_match < cur_match; last_printed_match++) {
129 |                         fprintf(out_fd, "%lu %lu",
130 |                                 (matches[last_printed_match].start - prev_line_offset),
131 |                                 (matches[last_printed_match].end - matches[last_printed_match].start));
132 |                         last_printed_match == cur_match - 1 ? fputc(':', out_fd) : fputc(',', out_fd);
133 |                     }
134 |                     j = prev_line_offset;
135 |                     /* print up to current char */
136 |                     for (; j <= i; j++) {
137 |                         fputc(buf[j], out_fd);
138 |                     }
139 |                 } else {
140 |                     print_line_number(line, ':');
141 |                     if (opts.column) {
142 |                         fprintf(out_fd, "%lu:", (matches[last_printed_match].start - prev_line_offset) + 1);
143 |                     }
144 | 
145 |                     if (printing_a_match && opts.color) {
146 |                         fprintf(out_fd, "%s", opts.color_match);
147 |                     }
148 |                     for (j = prev_line_offset; j <= i; j++) {
149 |                         if (last_printed_match < matches_len && j == matches[last_printed_match].end) {
150 |                             if (opts.color) {
151 |                                 fprintf(out_fd, "%s", color_reset);
152 |                             }
153 |                             printing_a_match = FALSE;
154 |                             last_printed_match++;
155 |                         }
156 |                         if (last_printed_match < matches_len && j == matches[last_printed_match].start) {
157 |                             if (opts.color) {
158 |                                 fprintf(out_fd, "%s", opts.color_match);
159 |                             }
160 |                             printing_a_match = TRUE;
161 |                         }
162 |                         /* Don't print the null terminator */
163 |                         if (j < buf_len) {
164 |                             fputc(buf[j], out_fd);
165 |                         }
166 |                     }
167 |                     if (printing_a_match && opts.color) {
168 |                         fprintf(out_fd, "%s", color_reset);
169 |                     }
170 |                 }
171 |             } else if (lines_since_last_match <= opts.after) {
172 |                 /* print context after matching line */
173 |                 if (opts.print_path == PATH_PRINT_EACH_LINE) {
174 |                     print_path(path, ':');
175 |                 }
176 |                 print_line_number(line, sep);
177 | 
178 |                 for (j = prev_line_offset; j < i; j++) {
179 |                     fputc(buf[j], out_fd);
180 |                 }
181 |                 fputc('\n', out_fd);
182 |             }
183 | 
184 |             prev_line_offset = i + 1; /* skip the newline */
185 |             line++;
186 |             if (!in_a_match) {
187 |                 lines_since_last_match++;
188 |             }
189 |             /* File doesn't end with a newline. Print one so the output is pretty. */
190 |             if (i == buf_len && buf[i] != '\n' && !opts.search_stream) {
191 |                 fputc('\n', out_fd);
192 |             }
193 |         }
194 |     }
195 | 
196 |     for (i = 0; i < opts.before; i++) {
197 |         if (context_prev_lines[i] != NULL) {
198 |             free(context_prev_lines[i]);
199 |         }
200 |     }
201 |     free(context_prev_lines);
202 | }
203 | 
204 | void print_line_number(size_t line, const char sep) {
205 |     if (!opts.print_line_numbers) {
206 |         return;
207 |     }
208 |     if (opts.search_stream && opts.stream_line_num) {
209 |         line = opts.stream_line_num;
210 |     }
211 |     if (opts.color) {
212 |         fprintf(out_fd, "%s%lu%s%c", opts.color_line_number, line, color_reset, sep);
213 |     } else {
214 |         fprintf(out_fd, "%lu%c", line, sep);
215 |     }
216 | }
217 | 
218 | void print_file_separator(void) {
219 |     if (first_file_match == 0 && opts.print_break) {
220 |         fprintf(out_fd, "\n");
221 |     }
222 |     first_file_match = 0;
223 | }
224 | 
225 | const char *normalize_path(const char *path) {
226 |     if (strlen(path) < 3) {
227 |         return path;
228 |     }
229 |     if (path[0] == '.' && path[1] == '/') {
230 |         return path + 2;
231 |     }
232 |     if (path[0] == '/' && path[1] == '/') {
233 |         return path + 1;
234 |     }
235 |     return path;
236 | }
237 | 


--------------------------------------------------------------------------------
/src/decompress.c:
--------------------------------------------------------------------------------
  1 | #include <string.h>
  2 | #include <unistd.h>
  3 | 
  4 | #include "decompress.h"
  5 | 
  6 | #ifdef HAVE_LZMA_H
  7 | #include <lzma.h>
  8 | 
  9 | /*  http://tukaani.org/xz/xz-file-format.txt */
 10 | const uint8_t XZ_HEADER_MAGIC[6] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 };
 11 | const uint8_t LZMA_HEADER_SOMETIMES[3] = { 0x5D, 0x00, 0x00 };
 12 | #endif
 13 | 
 14 | 
 15 | #ifdef HAVE_ZLIB_H
 16 | #define ZLIB_CONST 1
 17 | #include <zlib.h>
 18 | 
 19 | /* Code in decompress_zlib from
 20 |  *
 21 |  * https://raw.github.com/madler/zlib/master/examples/zpipe.c
 22 |  *
 23 |  * zpipe.c: example of proper use of zlib's inflate() and deflate()
 24 |  *    Not copyrighted -- provided to the public domain
 25 |  *    Version 1.4  11 December 2005  Mark Adler 
 26 |  */
 27 | static void *decompress_zlib(const void *buf, const int buf_len,
 28 |                              const char *dir_full_path, int *new_buf_len) {
 29 |     int ret = 0;
 30 |     unsigned char *result = NULL;
 31 |     size_t result_size = 0;
 32 |     size_t pagesize = 0;
 33 |     z_stream stream;
 34 | 
 35 |     log_debug("Decompressing zlib file %s", dir_full_path);
 36 | 
 37 |     /* allocate inflate state */
 38 |     stream.zalloc = Z_NULL;
 39 |     stream.zfree = Z_NULL;
 40 |     stream.opaque = Z_NULL;
 41 |     stream.avail_in = 0;
 42 |     stream.next_in = Z_NULL;
 43 | 
 44 |     /* Add 32 to allow zlib and gzip format detection */
 45 |     if (inflateInit2(&stream, 32 + 15) != Z_OK) {
 46 |         log_err("Unable to initialize zlib: %s", stream.msg);
 47 |         goto error_out;
 48 |     }
 49 | 
 50 |     stream.avail_in = buf_len;
 51 |     stream.next_in = buf;
 52 | 
 53 |     pagesize = getpagesize();
 54 |     result_size = ((buf_len + pagesize - 1) & ~(pagesize - 1));
 55 |     do {
 56 |         do {
 57 |             unsigned char *tmp_result = result;
 58 |             /* Double the buffer size and realloc */
 59 |             result_size *= 2;
 60 |             result = (unsigned char *)realloc(result, result_size * sizeof(unsigned char));
 61 |             if (result == NULL) {
 62 |                 free(tmp_result);
 63 |                 log_err("Unable to allocate %d bytes to decompress file %s", result_size * sizeof(unsigned char), dir_full_path);
 64 |                 inflateEnd(&stream);
 65 |                 goto error_out;
 66 |             }
 67 | 
 68 |             stream.avail_out = result_size / 2;
 69 |             stream.next_out = &result[stream.total_out];
 70 |             ret = inflate(&stream, Z_SYNC_FLUSH);
 71 |             log_debug("inflate ret = %d", ret);
 72 |             switch (ret) {
 73 |                 case Z_STREAM_ERROR: {
 74 |                     log_err("Found stream error while decompressing zlib stream: %s", stream.msg);
 75 |                     inflateEnd(&stream);
 76 |                     goto error_out;
 77 |                 }
 78 |                 case Z_NEED_DICT:
 79 |                 case Z_DATA_ERROR:
 80 |                 case Z_MEM_ERROR: {
 81 |                     log_err("Found mem/data error while decompressing zlib stream: %s", stream.msg);
 82 |                     inflateEnd(&stream);
 83 |                     goto error_out;
 84 |                 }
 85 |             }
 86 |         } while (stream.avail_out == 0);
 87 |     } while (ret == Z_OK);
 88 | 
 89 |     *new_buf_len = stream.total_out;
 90 |     inflateEnd(&stream);
 91 | 
 92 |     if (ret == Z_STREAM_END) {
 93 |         return result;
 94 |     }
 95 | 
 96 | error_out:
 97 |     *new_buf_len = 0;
 98 |     return NULL;
 99 | }
100 | #endif
101 | 
102 | 
103 | static void *decompress_lzw(const void *buf, const int buf_len,
104 |                             const char *dir_full_path, int *new_buf_len) {
105 |     (void)buf;
106 |     (void)buf_len;
107 |     log_err("LZW (UNIX compress) files not yet supported: %s", dir_full_path);
108 |     *new_buf_len = 0;
109 |     return NULL;
110 | }
111 | 
112 | 
113 | static void *decompress_zip(const void *buf, const int buf_len,
114 |                             const char *dir_full_path, int *new_buf_len) {
115 |     (void)buf;
116 |     (void)buf_len;
117 |     log_err("Zip files not yet supported: %s", dir_full_path);
118 |     *new_buf_len = 0;
119 |     return NULL;
120 | }
121 | 
122 | 
123 | #ifdef HAVE_LZMA_H
124 | static void *decompress_lzma(const void *buf, const int buf_len,
125 |                              const char *dir_full_path, int *new_buf_len) {
126 |     lzma_stream stream = LZMA_STREAM_INIT;
127 |     lzma_ret lzrt;
128 |     unsigned char *result = NULL;
129 |     size_t result_size = 0;
130 |     size_t pagesize = 0;
131 | 
132 |     stream.avail_in = buf_len;
133 |     stream.next_in = buf;
134 | 
135 |     lzrt = lzma_auto_decoder(&stream, -1, 0);
136 | 
137 |     if (lzrt != LZMA_OK) {
138 |         log_err("Unable to initialize lzma_auto_decoder: %d", lzrt);
139 |         goto error_out;
140 |     }
141 | 
142 |     pagesize = getpagesize();
143 |     result_size = ((buf_len + pagesize - 1) & ~(pagesize - 1));
144 |     do {
145 |         do {
146 |             unsigned char *tmp_result = result;
147 |             /* Double the buffer size and realloc */
148 |             result_size *= 2;
149 |             result = (unsigned char *)realloc(result, result_size * sizeof(unsigned char));
150 |             if (result == NULL) {
151 |                 free(tmp_result);
152 |                 log_err("Unable to allocate %d bytes to decompress file %s", result_size * sizeof(unsigned char), dir_full_path);
153 |                 goto error_out;
154 |             }
155 | 
156 |             stream.avail_out = result_size / 2;
157 |             stream.next_out = &result[stream.total_out];
158 |             lzrt = lzma_code(&stream, LZMA_RUN);
159 |             log_debug("lzma_code ret = %d", lzrt);
160 |             switch (lzrt) {
161 |                 case LZMA_OK:
162 |                 case LZMA_STREAM_END:
163 |                     break;
164 |                 default:
165 |                     log_err("Found mem/data error while decompressing xz/lzma stream: %d", lzrt);
166 |                     goto error_out;
167 |             }
168 |         } while (stream.avail_out == 0);
169 |     } while (lzrt == LZMA_OK);
170 | 
171 |     *new_buf_len = stream.total_out;
172 | 
173 |     if (lzrt == LZMA_STREAM_END) {
174 |         lzma_end(&stream);
175 |         return result;
176 |     }
177 | 
178 | 
179 | error_out:
180 |     lzma_end(&stream);
181 |     *new_buf_len = 0;
182 |     if (result) {
183 |         free(result);
184 |     }
185 |     return NULL;
186 | }
187 | #endif
188 | 
189 | 
190 | /* This function is very hot. It's called on every file when zip is enabled. */
191 | void *decompress(const ag_compression_type zip_type, const void *buf, const int buf_len,
192 |                  const char *dir_full_path, int *new_buf_len) {
193 | 
194 |     switch (zip_type) {
195 | #ifdef HAVE_ZLIB_H
196 |         case AG_GZIP:
197 |             return decompress_zlib(buf, buf_len, dir_full_path, new_buf_len);
198 | #endif
199 |         case AG_COMPRESS:
200 |             return decompress_lzw(buf, buf_len, dir_full_path, new_buf_len);
201 |         case AG_ZIP:
202 |             return decompress_zip(buf, buf_len, dir_full_path, new_buf_len);
203 | #ifdef HAVE_LZMA_H
204 |         case AG_XZ:
205 |             return decompress_lzma(buf, buf_len, dir_full_path, new_buf_len);
206 | #endif
207 |         case AG_NO_COMPRESSION:
208 |             log_err("File %s is not compressed", dir_full_path);
209 |             break;
210 |         default:
211 |             log_err("Unsupported compression type: %d", zip_type);
212 |     }
213 | 
214 |     *new_buf_len = 0;
215 |     return NULL;
216 | }
217 | 
218 | 
219 | /* This function is very hot. It's called on every file. */
220 | ag_compression_type is_zipped(const void *buf, const int buf_len) {
221 |     /* Zip magic numbers
222 |      * compressed file: { 0x1F, 0x9B }
223 |      * http://en.wikipedia.org/wiki/Compress
224 |      * 
225 |      * gzip file:       { 0x1F, 0x8B }
226 |      * http://www.gzip.org/zlib/rfc-gzip.html#file-format
227 |      *
228 |      * zip file:        { 0x50, 0x4B, 0x03, 0x04 }
229 |      * http://www.pkware.com/documents/casestudies/APPNOTE.TXT (Section 4.3)
230 |      */
231 | 
232 |     const unsigned char *buf_c = buf;
233 | 
234 |     if (buf_len == 0)
235 |         return AG_NO_COMPRESSION;
236 | 
237 |     /* Check for gzip & compress */
238 |     if (buf_len >= 2) {
239 |         if (buf_c[0] == 0x1F) {
240 |             if (buf_c[1] == 0x8B) {
241 | #ifdef HAVE_ZLIB_H
242 |                 log_debug("Found gzip-based stream");
243 |                 return AG_GZIP;
244 | #endif
245 |             } else if (buf_c[1] == 0x9B) {
246 |                 log_debug("Found compress-based stream");
247 |                 return AG_COMPRESS;
248 |             }
249 |         }
250 |     }
251 | 
252 |     /* Check for zip */
253 |     if (buf_len >= 4) {
254 |         if (buf_c[0] == 0x50 && buf_c[1] == 0x4B && buf_c[2] == 0x03 && buf_c[3] == 0x04) {
255 |             log_debug("Found zip-based stream");
256 |             return AG_ZIP;
257 |         }
258 |     }
259 | 
260 | #ifdef HAVE_LZMA_H
261 |     if (buf_len >= 6) {
262 |         if (memcmp(XZ_HEADER_MAGIC, buf_c, 6) == 0) {
263 |             log_debug("Found xz based stream");
264 |             return AG_XZ;
265 |         }
266 |     }
267 | 
268 |     /* LZMA doesn't really have a header: http://www.mail-archive.com/xz-devel@tukaani.org/msg00003.html */
269 |     if (buf_len >= 3) {
270 |         if (memcmp(LZMA_HEADER_SOMETIMES, buf_c, 3) == 0) {
271 |             log_debug("Found lzma-based stream");
272 |             return AG_XZ;
273 |         }
274 |     }
275 | #endif
276 | 
277 |     return AG_NO_COMPRESSION;
278 | }
279 | 


--------------------------------------------------------------------------------
/src/ignore.c:
--------------------------------------------------------------------------------
  1 | #include <ctype.h>
  2 | #include <dirent.h>
  3 | #include <stdio.h>
  4 | #include <stdlib.h>
  5 | #include <string.h>
  6 | #include <sys/stat.h>
  7 | 
  8 | #include "ignore.h"
  9 | #include "log.h"
 10 | #include "options.h"
 11 | #include "scandir.h"
 12 | #include "util.h"
 13 | 
 14 | #ifdef _WIN32
 15 | #include <shlwapi.h>
 16 | #define fnmatch(x, y, z) (!PathMatchSpec(y, x))
 17 | #else
 18 | #include <fnmatch.h>
 19 | const int fnmatch_flags = FNM_PATHNAME;
 20 | #endif
 21 | 
 22 | /* TODO: build a huge-ass list of files we want to ignore by default (build cache stuff, pyc files, etc) */
 23 | 
 24 | const char *evil_hardcoded_ignore_files[] = {
 25 |     ".",
 26 |     "..",
 27 |     NULL
 28 | };
 29 | 
 30 | /* Warning: changing the first string will break skip_vcs_ignores. */
 31 | const char *ignore_pattern_files[] = {
 32 |     ".agignore",
 33 |     ".gitignore",
 34 |     ".git/info/exclude",
 35 |     ".hgignore",
 36 |     ".svn",
 37 |     NULL
 38 | };
 39 | 
 40 | ignores *init_ignore(ignores *parent) {
 41 |     ignores *ig = ag_malloc(sizeof(ignores));
 42 |     ig->names = NULL;
 43 |     ig->names_len = 0;
 44 |     ig->regexes = NULL;
 45 |     ig->regexes_len = 0;
 46 |     ig->parent = parent;
 47 |     return ig;
 48 | }
 49 | 
 50 | void cleanup_ignore(ignores *ig) {
 51 |     size_t i;
 52 | 
 53 |     if (ig) {
 54 |         if (ig->regexes) {
 55 |             for (i = 0; i < ig->regexes_len; i++) {
 56 |                 free(ig->regexes[i]);
 57 |             }
 58 |             free(ig->regexes);
 59 |         }
 60 |         if (ig->names) {
 61 |             for (i = 0; i < ig->names_len; i++) {
 62 |                 free(ig->names[i]);
 63 |             }
 64 |             free(ig->names);
 65 |         }
 66 |         free(ig);
 67 |     }
 68 | }
 69 | 
 70 | void add_ignore_pattern(ignores *ig, const char *pattern) {
 71 |     int i;
 72 |     int pattern_len;
 73 | 
 74 |     /* Strip off the leading dot so that matches are more likely. */
 75 |     if (strncmp(pattern, "./", 2) == 0) {
 76 |         pattern++;
 77 |     }
 78 | 
 79 |     /* Kill trailing whitespace */
 80 |     for (pattern_len = strlen(pattern); pattern_len > 0; pattern_len--) {
 81 |         if (!isspace(pattern[pattern_len - 1])) {
 82 |             break;
 83 |         }
 84 |     }
 85 | 
 86 |     if (pattern_len == 0) {
 87 |         log_debug("Pattern is empty. Not adding any ignores.");
 88 |         return;
 89 |     }
 90 | 
 91 |     /* TODO: de-dupe these patterns */
 92 |     if (is_fnmatch(pattern)) {
 93 |         ig->regexes_len++;
 94 |         ig->regexes = ag_realloc(ig->regexes, ig->regexes_len * sizeof(char *));
 95 |         /* Prepend '/' if the pattern contains '/' but doesn't start with '/' */
 96 |         if ((pattern[0] != '/') && (strchr(pattern, '/') != NULL)) {
 97 |             ag_asprintf(&(ig->regexes[ig->regexes_len - 1]), "/%s", pattern);
 98 |             log_debug("added regex ignore pattern /%s", pattern);
 99 |         } else {
100 |             ig->regexes[ig->regexes_len - 1] = ag_strndup(pattern, pattern_len);
101 |             log_debug("added regex ignore pattern %s", pattern);
102 |         }
103 |     } else {
104 |         /* a balanced binary tree is best for performance, but I'm lazy */
105 |         ig->names_len++;
106 |         ig->names = ag_realloc(ig->names, ig->names_len * sizeof(char *));
107 |         for (i = ig->names_len - 1; i > 0; i--) {
108 |             if (strcmp(pattern, ig->names[i - 1]) > 0) {
109 |                 break;
110 |             }
111 |             ig->names[i] = ig->names[i - 1];
112 |         }
113 |         ig->names[i] = ag_strndup(pattern, pattern_len);
114 |         log_debug("added literal ignore pattern %s", pattern);
115 |     }
116 | }
117 | 
118 | /* For loading git/hg ignore patterns */
119 | void load_ignore_patterns(ignores *ig, const char *path) {
120 |     FILE *fp = NULL;
121 |     fp = fopen(path, "r");
122 |     if (fp == NULL) {
123 |         log_debug("Skipping ignore file %s", path);
124 |         return;
125 |     }
126 | 
127 |     char *line = NULL;
128 |     ssize_t line_len = 0;
129 |     size_t line_cap = 0;
130 | 
131 |     while ((line_len = getline(&line, &line_cap, fp)) > 0) {
132 |         if (line_len == 0 || line[0] == '\n' || line[0] == '#') {
133 |             continue;
134 |         }
135 |         if (line[line_len - 1] == '\n') {
136 |             line[line_len - 1] = '\0'; /* kill the \n */
137 |         }
138 |         add_ignore_pattern(ig, line);
139 |     }
140 | 
141 |     free(line);
142 |     fclose(fp);
143 | }
144 | 
145 | void load_svn_ignore_patterns(ignores *ig, const char *path) {
146 |     FILE *fp = NULL;
147 |     char *dir_prop_base;
148 |     ag_asprintf(&dir_prop_base, "%s/%s", path, SVN_DIR_PROP_BASE);
149 | 
150 |     fp = fopen(dir_prop_base, "r");
151 |     if (fp == NULL) {
152 |         log_debug("Skipping svn ignore file %s", dir_prop_base);
153 |         free(dir_prop_base);
154 |         return;
155 |     }
156 | 
157 |     char *entry = NULL;
158 |     size_t entry_len = 0;
159 |     char *key = ag_malloc(32); /* Sane start for max key length. */
160 |     size_t key_len = 0;
161 |     size_t bytes_read = 0;
162 |     char *entry_line;
163 |     size_t line_len;
164 |     int matches;
165 | 
166 |     while (fscanf(fp, "K %zu\n", &key_len) == 1) {
167 |         key = ag_realloc(key, key_len + 1);
168 |         bytes_read = fread(key, 1, key_len, fp);
169 |         key[key_len] = '\0';
170 |         matches = fscanf(fp, "\nV %zu\n", &entry_len);
171 |         if (matches != 1) {
172 |             log_debug("Unable to parse svnignore file %s: fscanf() got %i matches, expected 1.", dir_prop_base, matches);
173 |             goto cleanup;
174 |         }
175 | 
176 |         if (strncmp(SVN_PROP_IGNORE, key, bytes_read) != 0) {
177 |             log_debug("key is %s, not %s. skipping %u bytes", key, SVN_PROP_IGNORE, entry_len);
178 |             /* Not the key we care about. fseek and repeat */
179 |             fseek(fp, entry_len + 1, SEEK_CUR); /* +1 to account for newline. yes I know this is hacky */
180 |             continue;
181 |         }
182 |         /* Aww yeah. Time to ignore stuff */
183 |         entry = ag_malloc(entry_len + 1);
184 |         bytes_read = fread(entry, 1, entry_len, fp);
185 |         entry[bytes_read] = '\0';
186 |         log_debug("entry: %s", entry);
187 |         break;
188 |     }
189 |     if (entry == NULL) {
190 |         goto cleanup;
191 |     }
192 |     char *patterns = entry;
193 |     size_t patterns_len = strlen(patterns);
194 |     while (*patterns != '\0' && patterns < (entry + bytes_read)) {
195 |         for (line_len = 0; line_len < patterns_len; line_len++) {
196 |             if (patterns[line_len] == '\n') {
197 |                 break;
198 |             }
199 |         }
200 |         if (line_len > 0) {
201 |             entry_line = ag_strndup(patterns, line_len);
202 |             add_ignore_pattern(ig, entry_line);
203 |             free(entry_line);
204 |         }
205 |         patterns += line_len + 1;
206 |         patterns_len -= line_len + 1;
207 |     }
208 |     free(entry);
209 | cleanup:
210 |     free(dir_prop_base);
211 |     free(key);
212 |     fclose(fp);
213 | }
214 | 
215 | static int ackmate_dir_match(const char *dir_name) {
216 |     if (opts.ackmate_dir_filter == NULL) {
217 |         return 0;
218 |     }
219 |     /* we just care about the match, not where the matches are */
220 |     return pcre_exec(opts.ackmate_dir_filter, NULL, dir_name, strlen(dir_name), 0, 0, NULL, 0);
221 | }
222 | 
223 | static int filename_ignore_search(const ignores *ig, const char *filename) {
224 |     size_t i;
225 |     int match_pos;
226 | 
227 |     if (strncmp(filename, "./", 2) == 0) {
228 |         filename++;
229 |     }
230 | 
231 |     match_pos = binary_search(filename, ig->names, 0, ig->names_len);
232 |     if (match_pos >= 0) {
233 |         log_debug("file %s ignored because name matches static pattern %s", filename, ig->names[match_pos]);
234 |         return 1;
235 |     }
236 | 
237 |     for (i = 0; i < ig->regexes_len; i++) {
238 |         if (fnmatch(ig->regexes[i], filename, fnmatch_flags) == 0) {
239 |             log_debug("file %s ignored because name matches regex pattern %s", filename, ig->regexes[i]);
240 |             return 1;
241 |         }
242 |         log_debug("pattern %s doesn't match file %s", ig->regexes[i], filename);
243 |     }
244 | 
245 |     log_debug("file %s not ignored", filename);
246 |     return 0;
247 | }
248 | 
249 | static int path_ignore_search(const ignores *ig, const char *path, const char *filename) {
250 |     char *temp;
251 | 
252 |     if (filename_ignore_search(ig, filename)) {
253 |         return 1;
254 |     }
255 | 
256 |     ag_asprintf(&temp, "%s/%s", path[0] == '.' ? path + 1 : path, filename);
257 | 
258 |     if (filename_ignore_search(ig, temp)) {
259 |         free(temp);
260 |         return 1;
261 |     }
262 | 
263 |     int rv = ackmate_dir_match(temp);
264 |     free(temp);
265 |     return rv;
266 | }
267 | 
268 | /* This function is REALLY HOT. It gets called for every file */
269 | int filename_filter(const char *path, const struct dirent *dir, void *baton) {
270 |     const char *filename = dir->d_name;
271 |     /* TODO: don't call strlen on filename every time we call filename_filter() */
272 |     size_t filename_len = strlen(filename);
273 |     size_t i;
274 |     scandir_baton_t *scandir_baton = (scandir_baton_t *)baton;
275 |     const ignores *ig = scandir_baton->ig;
276 |     const char *base_path = scandir_baton->base_path;
277 |     const size_t base_path_len = scandir_baton->base_path_len;
278 |     const char *path_start = path;
279 |     char *temp;
280 | 
281 |     if (!opts.follow_symlinks && is_symlink(path, dir)) {
282 |         log_debug("File %s ignored becaused it's a symlink", dir->d_name);
283 |         return 0;
284 |     }
285 | 
286 |     if (is_named_pipe(path, dir)) {
287 |         log_debug("%s ignored because it's a named pipe", path);
288 |         return 0;
289 |     }
290 | 
291 |     for (i = 0; evil_hardcoded_ignore_files[i] != NULL; i++) {
292 |         if (strcmp(filename, evil_hardcoded_ignore_files[i]) == 0) {
293 |             return 0;
294 |         }
295 |     }
296 | 
297 |     if (!opts.search_hidden_files && filename[0] == '.') {
298 |         return 0;
299 |     }
300 |     if (opts.search_all_files && !opts.path_to_agignore) {
301 |         return 1;
302 |     }
303 | 
304 |     for (i = 0; base_path[i] == path[i] && i < base_path_len; i++) {
305 |         /* base_path always ends with "/\0" while path doesn't, so this is safe */
306 |         path_start = path + i + 2;
307 |     }
308 |     log_debug("path_start %s filename %s", path_start, filename);
309 | 
310 |     while (ig != NULL) {
311 |         if (path_ignore_search(ig, path_start, filename)) {
312 |             return 0;
313 |         }
314 | 
315 |         if (is_directory(path, dir) && filename[filename_len - 1] != '/') {
316 |             ag_asprintf(&temp, "%s/", filename);
317 |             int rv = path_ignore_search(ig, path_start, temp);
318 |             free(temp);
319 |             if (rv) {
320 |                 return 0;
321 |             }
322 |         }
323 |         ig = ig->parent;
324 |     }
325 | 
326 |     return 1;
327 | }
328 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright [yyyy] [name of copyright owner]
191 | 
192 |    Licensed under the Apache License, Version 2.0 (the "License");
193 |    you may not use this file except in compliance with the License.
194 |    You may obtain a copy of the License at
195 | 
196 |        http://www.apache.org/licenses/LICENSE-2.0
197 | 
198 |    Unless required by applicable law or agreed to in writing, software
199 |    distributed under the License is distributed on an "AS IS" BASIS,
200 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 |    See the License for the specific language governing permissions and
202 |    limitations under the License.
203 | 


--------------------------------------------------------------------------------
/m4/ax_pthread.m4:
--------------------------------------------------------------------------------
  1 | # ===========================================================================
  2 | #        http://www.gnu.org/software/autoconf-archive/ax_pthread.html
  3 | # ===========================================================================
  4 | #
  5 | # SYNOPSIS
  6 | #
  7 | #   AX_PTHREAD([ACTION-IF-FOUND[, ACTION-IF-NOT-FOUND]])
  8 | #
  9 | # DESCRIPTION
 10 | #
 11 | #   This macro figures out how to build C programs using POSIX threads. It
 12 | #   sets the PTHREAD_LIBS output variable to the threads library and linker
 13 | #   flags, and the PTHREAD_CFLAGS output variable to any special C compiler
 14 | #   flags that are needed. (The user can also force certain compiler
 15 | #   flags/libs to be tested by setting these environment variables.)
 16 | #
 17 | #   Also sets PTHREAD_CC to any special C compiler that is needed for
 18 | #   multi-threaded programs (defaults to the value of CC otherwise). (This
 19 | #   is necessary on AIX to use the special cc_r compiler alias.)
 20 | #
 21 | #   NOTE: You are assumed to not only compile your program with these flags,
 22 | #   but also link it with them as well. e.g. you should link with
 23 | #   $PTHREAD_CC $CFLAGS $PTHREAD_CFLAGS $LDFLAGS ... $PTHREAD_LIBS $LIBS
 24 | #
 25 | #   If you are only building threads programs, you may wish to use these
 26 | #   variables in your default LIBS, CFLAGS, and CC:
 27 | #
 28 | #     LIBS="$PTHREAD_LIBS $LIBS"
 29 | #     CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
 30 | #     CC="$PTHREAD_CC"
 31 | #
 32 | #   In addition, if the PTHREAD_CREATE_JOINABLE thread-attribute constant
 33 | #   has a nonstandard name, defines PTHREAD_CREATE_JOINABLE to that name
 34 | #   (e.g. PTHREAD_CREATE_UNDETACHED on AIX).
 35 | #
 36 | #   Also HAVE_PTHREAD_PRIO_INHERIT is defined if pthread is found and the
 37 | #   PTHREAD_PRIO_INHERIT symbol is defined when compiling with
 38 | #   PTHREAD_CFLAGS.
 39 | #
 40 | #   ACTION-IF-FOUND is a list of shell commands to run if a threads library
 41 | #   is found, and ACTION-IF-NOT-FOUND is a list of commands to run it if it
 42 | #   is not found. If ACTION-IF-FOUND is not specified, the default action
 43 | #   will define HAVE_PTHREAD.
 44 | #
 45 | #   Please let the authors know if this macro fails on any platform, or if
 46 | #   you have any other suggestions or comments. This macro was based on work
 47 | #   by SGJ on autoconf scripts for FFTW (http://www.fftw.org/) (with help
 48 | #   from M. Frigo), as well as ac_pthread and hb_pthread macros posted by
 49 | #   Alejandro Forero Cuervo to the autoconf macro repository. We are also
 50 | #   grateful for the helpful feedback of numerous users.
 51 | #
 52 | #   Updated for Autoconf 2.68 by Daniel Richard G.
 53 | #
 54 | # LICENSE
 55 | #
 56 | #   Copyright (c) 2008 Steven G. Johnson <stevenj@alum.mit.edu>
 57 | #   Copyright (c) 2011 Daniel Richard G. <skunk@iSKUNK.ORG>
 58 | #
 59 | #   This program is free software: you can redistribute it and/or modify it
 60 | #   under the terms of the GNU General Public License as published by the
 61 | #   Free Software Foundation, either version 3 of the License, or (at your
 62 | #   option) any later version.
 63 | #
 64 | #   This program is distributed in the hope that it will be useful, but
 65 | #   WITHOUT ANY WARRANTY; without even the implied warranty of
 66 | #   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
 67 | #   Public License for more details.
 68 | #
 69 | #   You should have received a copy of the GNU General Public License along
 70 | #   with this program. If not, see <http://www.gnu.org/licenses/>.
 71 | #
 72 | #   As a special exception, the respective Autoconf Macro's copyright owner
 73 | #   gives unlimited permission to copy, distribute and modify the configure
 74 | #   scripts that are the output of Autoconf when processing the Macro. You
 75 | #   need not follow the terms of the GNU General Public License when using
 76 | #   or distributing such scripts, even though portions of the text of the
 77 | #   Macro appear in them. The GNU General Public License (GPL) does govern
 78 | #   all other use of the material that constitutes the Autoconf Macro.
 79 | #
 80 | #   This special exception to the GPL applies to versions of the Autoconf
 81 | #   Macro released by the Autoconf Archive. When you make and distribute a
 82 | #   modified version of the Autoconf Macro, you may extend this special
 83 | #   exception to the GPL to apply to your modified version as well.
 84 | 
 85 | #serial 21
 86 | 
 87 | AU_ALIAS([ACX_PTHREAD], [AX_PTHREAD])
 88 | AC_DEFUN([AX_PTHREAD], [
 89 | AC_REQUIRE([AC_CANONICAL_HOST])
 90 | AC_LANG_PUSH([C])
 91 | ax_pthread_ok=no
 92 | 
 93 | # We used to check for pthread.h first, but this fails if pthread.h
 94 | # requires special compiler flags (e.g. on True64 or Sequent).
 95 | # It gets checked for in the link test anyway.
 96 | 
 97 | # First of all, check if the user has set any of the PTHREAD_LIBS,
 98 | # etcetera environment variables, and if threads linking works using
 99 | # them:
100 | if test x"$PTHREAD_LIBS$PTHREAD_CFLAGS" != x; then
101 |         save_CFLAGS="$CFLAGS"
102 |         CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
103 |         save_LIBS="$LIBS"
104 |         LIBS="$PTHREAD_LIBS $LIBS"
105 |         AC_MSG_CHECKING([for pthread_join in LIBS=$PTHREAD_LIBS with CFLAGS=$PTHREAD_CFLAGS])
106 |         AC_TRY_LINK_FUNC([pthread_join], [ax_pthread_ok=yes])
107 |         AC_MSG_RESULT([$ax_pthread_ok])
108 |         if test x"$ax_pthread_ok" = xno; then
109 |                 PTHREAD_LIBS=""
110 |                 PTHREAD_CFLAGS=""
111 |         fi
112 |         LIBS="$save_LIBS"
113 |         CFLAGS="$save_CFLAGS"
114 | fi
115 | 
116 | # We must check for the threads library under a number of different
117 | # names; the ordering is very important because some systems
118 | # (e.g. DEC) have both -lpthread and -lpthreads, where one of the
119 | # libraries is broken (non-POSIX).
120 | 
121 | # Create a list of thread flags to try.  Items starting with a "-" are
122 | # C compiler flags, and other items are library names, except for "none"
123 | # which indicates that we try without any flags at all, and "pthread-config"
124 | # which is a program returning the flags for the Pth emulation library.
125 | 
126 | ax_pthread_flags="pthreads none -Kthread -kthread lthread -pthread -pthreads -mthreads pthread --thread-safe -mt pthread-config"
127 | 
128 | # The ordering *is* (sometimes) important.  Some notes on the
129 | # individual items follow:
130 | 
131 | # pthreads: AIX (must check this before -lpthread)
132 | # none: in case threads are in libc; should be tried before -Kthread and
133 | #       other compiler flags to prevent continual compiler warnings
134 | # -Kthread: Sequent (threads in libc, but -Kthread needed for pthread.h)
135 | # -kthread: FreeBSD kernel threads (preferred to -pthread since SMP-able)
136 | # lthread: LinuxThreads port on FreeBSD (also preferred to -pthread)
137 | # -pthread: Linux/gcc (kernel threads), BSD/gcc (userland threads)
138 | # -pthreads: Solaris/gcc
139 | # -mthreads: Mingw32/gcc, Lynx/gcc
140 | # -mt: Sun Workshop C (may only link SunOS threads [-lthread], but it
141 | #      doesn't hurt to check since this sometimes defines pthreads too;
142 | #      also defines -D_REENTRANT)
143 | #      ... -mt is also the pthreads flag for HP/aCC
144 | # pthread: Linux, etcetera
145 | # --thread-safe: KAI C++
146 | # pthread-config: use pthread-config program (for GNU Pth library)
147 | 
148 | case ${host_os} in
149 |         solaris*)
150 | 
151 |         # On Solaris (at least, for some versions), libc contains stubbed
152 |         # (non-functional) versions of the pthreads routines, so link-based
153 |         # tests will erroneously succeed.  (We need to link with -pthreads/-mt/
154 |         # -lpthread.)  (The stubs are missing pthread_cleanup_push, or rather
155 |         # a function called by this macro, so we could check for that, but
156 |         # who knows whether they'll stub that too in a future libc.)  So,
157 |         # we'll just look for -pthreads and -lpthread first:
158 | 
159 |         ax_pthread_flags="-pthreads pthread -mt -pthread $ax_pthread_flags"
160 |         ;;
161 | 
162 |         darwin*)
163 |         ax_pthread_flags="-pthread $ax_pthread_flags"
164 |         ;;
165 | esac
166 | 
167 | # Clang doesn't consider unrecognized options an error unless we specify
168 | # -Werror. We throw in some extra Clang-specific options to ensure that
169 | # this doesn't happen for GCC, which also accepts -Werror.
170 | 
171 | AC_MSG_CHECKING([if compiler needs -Werror to reject unknown flags])
172 | save_CFLAGS="$CFLAGS"
173 | ax_pthread_extra_flags="-Werror"
174 | CFLAGS="$CFLAGS $ax_pthread_extra_flags -Wunknown-warning-option -Wsizeof-array-argument"
175 | AC_COMPILE_IFELSE([AC_LANG_PROGRAM([int foo(void);],[foo()])],
176 |                   [AC_MSG_RESULT([yes])],
177 |                   [ax_pthread_extra_flags=
178 |                    AC_MSG_RESULT([no])])
179 | CFLAGS="$save_CFLAGS"
180 | 
181 | if test x"$ax_pthread_ok" = xno; then
182 | for flag in $ax_pthread_flags; do
183 | 
184 |         case $flag in
185 |                 none)
186 |                 AC_MSG_CHECKING([whether pthreads work without any flags])
187 |                 ;;
188 | 
189 |                 -*)
190 |                 AC_MSG_CHECKING([whether pthreads work with $flag])
191 |                 PTHREAD_CFLAGS="$flag"
192 |                 ;;
193 | 
194 |                 pthread-config)
195 |                 AC_CHECK_PROG([ax_pthread_config], [pthread-config], [yes], [no])
196 |                 if test x"$ax_pthread_config" = xno; then continue; fi
197 |                 PTHREAD_CFLAGS="`pthread-config --cflags`"
198 |                 PTHREAD_LIBS="`pthread-config --ldflags` `pthread-config --libs`"
199 |                 ;;
200 | 
201 |                 *)
202 |                 AC_MSG_CHECKING([for the pthreads library -l$flag])
203 |                 PTHREAD_LIBS="-l$flag"
204 |                 ;;
205 |         esac
206 | 
207 |         save_LIBS="$LIBS"
208 |         save_CFLAGS="$CFLAGS"
209 |         LIBS="$PTHREAD_LIBS $LIBS"
210 |         CFLAGS="$CFLAGS $PTHREAD_CFLAGS $ax_pthread_extra_flags"
211 | 
212 |         # Check for various functions.  We must include pthread.h,
213 |         # since some functions may be macros.  (On the Sequent, we
214 |         # need a special flag -Kthread to make this header compile.)
215 |         # We check for pthread_join because it is in -lpthread on IRIX
216 |         # while pthread_create is in libc.  We check for pthread_attr_init
217 |         # due to DEC craziness with -lpthreads.  We check for
218 |         # pthread_cleanup_push because it is one of the few pthread
219 |         # functions on Solaris that doesn't have a non-functional libc stub.
220 |         # We try pthread_create on general principles.
221 |         AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>
222 |                         static void routine(void *a) { a = 0; }
223 |                         static void *start_routine(void *a) { return a; }],
224 |                        [pthread_t th; pthread_attr_t attr;
225 |                         pthread_create(&th, 0, start_routine, 0);
226 |                         pthread_join(th, 0);
227 |                         pthread_attr_init(&attr);
228 |                         pthread_cleanup_push(routine, 0);
229 |                         pthread_cleanup_pop(0) /* ; */])],
230 |                 [ax_pthread_ok=yes],
231 |                 [])
232 | 
233 |         LIBS="$save_LIBS"
234 |         CFLAGS="$save_CFLAGS"
235 | 
236 |         AC_MSG_RESULT([$ax_pthread_ok])
237 |         if test "x$ax_pthread_ok" = xyes; then
238 |                 break;
239 |         fi
240 | 
241 |         PTHREAD_LIBS=""
242 |         PTHREAD_CFLAGS=""
243 | done
244 | fi
245 | 
246 | # Various other checks:
247 | if test "x$ax_pthread_ok" = xyes; then
248 |         save_LIBS="$LIBS"
249 |         LIBS="$PTHREAD_LIBS $LIBS"
250 |         save_CFLAGS="$CFLAGS"
251 |         CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
252 | 
253 |         # Detect AIX lossage: JOINABLE attribute is called UNDETACHED.
254 |         AC_MSG_CHECKING([for joinable pthread attribute])
255 |         attr_name=unknown
256 |         for attr in PTHREAD_CREATE_JOINABLE PTHREAD_CREATE_UNDETACHED; do
257 |             AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <pthread.h>],
258 |                            [int attr = $attr; return attr /* ; */])],
259 |                 [attr_name=$attr; break],
260 |                 [])
261 |         done
262 |         AC_MSG_RESULT([$attr_name])
263 |         if test "$attr_name" != PTHREAD_CREATE_JOINABLE; then
264 |             AC_DEFINE_UNQUOTED([PTHREAD_CREATE_JOINABLE], [$attr_name],
265 |                                [Define to necessary symbol if this constant
266 |                                 uses a non-standard name on your system.])
267 |         fi
268 | 
269 |         AC_MSG_CHECKING([if more special flags are required for pthreads])
270 |         flag=no
271 |         case ${host_os} in
272 |             aix* | freebsd* | darwin*) flag="-D_THREAD_SAFE";;
273 |             osf* | hpux*) flag="-D_REENTRANT";;
274 |             solaris*)
275 |             if test "$GCC" = "yes"; then
276 |                 flag="-D_REENTRANT"
277 |             else
278 |                 # TODO: What about Clang on Solaris?
279 |                 flag="-mt -D_REENTRANT"
280 |             fi
281 |             ;;
282 |         esac
283 |         AC_MSG_RESULT([$flag])
284 |         if test "x$flag" != xno; then
285 |             PTHREAD_CFLAGS="$flag $PTHREAD_CFLAGS"
286 |         fi
287 | 
288 |         AC_CACHE_CHECK([for PTHREAD_PRIO_INHERIT],
289 |             [ax_cv_PTHREAD_PRIO_INHERIT], [
290 |                 AC_LINK_IFELSE([AC_LANG_PROGRAM([[#include <pthread.h>]],
291 |                                                 [[int i = PTHREAD_PRIO_INHERIT;]])],
292 |                     [ax_cv_PTHREAD_PRIO_INHERIT=yes],
293 |                     [ax_cv_PTHREAD_PRIO_INHERIT=no])
294 |             ])
295 |         AS_IF([test "x$ax_cv_PTHREAD_PRIO_INHERIT" = "xyes"],
296 |             [AC_DEFINE([HAVE_PTHREAD_PRIO_INHERIT], [1], [Have PTHREAD_PRIO_INHERIT.])])
297 | 
298 |         LIBS="$save_LIBS"
299 |         CFLAGS="$save_CFLAGS"
300 | 
301 |         # More AIX lossage: compile with *_r variant
302 |         if test "x$GCC" != xyes; then
303 |             case $host_os in
304 |                 aix*)
305 |                 AS_CASE(["x/$CC"],
306 |                   [x*/c89|x*/c89_128|x*/c99|x*/c99_128|x*/cc|x*/cc128|x*/xlc|x*/xlc_v6|x*/xlc128|x*/xlc128_v6],
307 |                   [#handle absolute path differently from PATH based program lookup
308 |                    AS_CASE(["x$CC"],
309 |                      [x/*],
310 |                      [AS_IF([AS_EXECUTABLE_P([${CC}_r])],[PTHREAD_CC="${CC}_r"])],
311 |                      [AC_CHECK_PROGS([PTHREAD_CC],[${CC}_r],[$CC])])])
312 |                 ;;
313 |             esac
314 |         fi
315 | fi
316 | 
317 | test -n "$PTHREAD_CC" || PTHREAD_CC="$CC"
318 | 
319 | AC_SUBST([PTHREAD_LIBS])
320 | AC_SUBST([PTHREAD_CFLAGS])
321 | AC_SUBST([PTHREAD_CC])
322 | 
323 | # Finally, execute ACTION-IF-FOUND/ACTION-IF-NOT-FOUND:
324 | if test x"$ax_pthread_ok" = xyes; then
325 |         ifelse([$1],,[AC_DEFINE([HAVE_PTHREAD],[1],[Define if you have POSIX threads libraries and header files.])],[$1])
326 |         :
327 | else
328 |         ax_pthread_ok=no
329 |         $2
330 | fi
331 | AC_LANG_POP
332 | ])dnl AX_PTHREAD
333 | 


--------------------------------------------------------------------------------
/src/util.c:
--------------------------------------------------------------------------------
  1 | #include <ctype.h>
  2 | #include <string.h>
  3 | #include <stdio.h>
  4 | #include <stdarg.h>
  5 | #include <stdlib.h>
  6 | #include <sys/stat.h>
  7 | 
  8 | #include "util.h"
  9 | #include "config.h"
 10 | 
 11 | #ifdef _WIN32
 12 | #define flockfile(x)
 13 | #define funlockfile(x)
 14 | #define getc_unlocked(x) getc(x)
 15 | #endif
 16 | 
 17 | #define CHECK_AND_RETURN(ptr)             \
 18 |     if (ptr == NULL) {                    \
 19 |         die("Memory allocation failed."); \
 20 |     }                                     \
 21 |     return ptr;
 22 | 
 23 | void *ag_malloc(size_t size) {
 24 |     void *ptr = malloc(size);
 25 |     CHECK_AND_RETURN(ptr)
 26 | }
 27 | 
 28 | void *ag_realloc(void *ptr, size_t size) {
 29 |     void *new_ptr = realloc(ptr, size);
 30 |     CHECK_AND_RETURN(new_ptr)
 31 | }
 32 | 
 33 | void *ag_calloc(size_t count, size_t size) {
 34 |     void *ptr = calloc(count, size);
 35 |     CHECK_AND_RETURN(ptr)
 36 | }
 37 | 
 38 | char *ag_strdup(const char *s) {
 39 |     char *str = strdup(s);
 40 |     CHECK_AND_RETURN(str)
 41 | }
 42 | 
 43 | char *ag_strndup(const char *s, size_t size) {
 44 |     char *str = NULL;
 45 | #ifdef HAVE_STRNDUP
 46 |     str = strndup(s, size);
 47 |     CHECK_AND_RETURN(str)
 48 | #else
 49 |     str = (char *)ag_malloc(size + 1);
 50 |     strlcpy(str, s, size + 1);
 51 |     return str;
 52 | #endif
 53 | }
 54 | 
 55 | void generate_alpha_skip(const char *find, size_t f_len, size_t skip_lookup[], const int case_sensitive) {
 56 |     size_t i;
 57 | 
 58 |     for (i = 0; i < 256; i++) {
 59 |         skip_lookup[i] = f_len;
 60 |     }
 61 | 
 62 |     f_len--;
 63 | 
 64 |     for (i = 0; i < f_len; i++) {
 65 |         if (case_sensitive) {
 66 |             skip_lookup[(unsigned char)find[i]] = f_len - i;
 67 |         } else {
 68 |             skip_lookup[(unsigned char)tolower(find[i])] = f_len - i;
 69 |             skip_lookup[(unsigned char)toupper(find[i])] = f_len - i;
 70 |         }
 71 |     }
 72 | }
 73 | 
 74 | int is_prefix(const char *s, const size_t s_len, const size_t pos, const int case_sensitive) {
 75 |     size_t i;
 76 | 
 77 |     for (i = 0; pos + i < s_len; i++) {
 78 |         if (case_sensitive) {
 79 |             if (s[i] != s[i + pos]) {
 80 |                 return 0;
 81 |             }
 82 |         } else {
 83 |             if (tolower(s[i]) != tolower(s[i + pos])) {
 84 |                 return 0;
 85 |             }
 86 |         }
 87 |     }
 88 | 
 89 |     return 1;
 90 | }
 91 | 
 92 | size_t suffix_len(const char *s, const size_t s_len, const size_t pos, const int case_sensitive) {
 93 |     size_t i;
 94 | 
 95 |     for (i = 0; i < pos; i++) {
 96 |         if (case_sensitive) {
 97 |             if (s[pos - i] != s[s_len - i - 1]) {
 98 |                 break;
 99 |             }
100 |         } else {
101 |             if (tolower(s[pos - i]) != tolower(s[s_len - i - 1])) {
102 |                 break;
103 |             }
104 |         }
105 |     }
106 | 
107 |     return i;
108 | }
109 | 
110 | void generate_find_skip(const char *find, const size_t f_len, size_t **skip_lookup, const int case_sensitive) {
111 |     size_t i;
112 |     size_t s_len;
113 |     size_t *sl = ag_malloc(f_len * sizeof(size_t));
114 |     *skip_lookup = sl;
115 |     size_t last_prefix = f_len;
116 | 
117 |     for (i = last_prefix; i > 0; i--) {
118 |         if (is_prefix(find, f_len, i, case_sensitive)) {
119 |             last_prefix = i;
120 |         }
121 |         sl[i - 1] = last_prefix + (f_len - i);
122 |     }
123 | 
124 |     for (i = 0; i < f_len; i++) {
125 |         s_len = suffix_len(find, f_len, i, case_sensitive);
126 |         if (find[i - s_len] != find[f_len - 1 - s_len]) {
127 |             sl[f_len - 1 - s_len] = f_len - 1 - i + s_len;
128 |         }
129 |     }
130 | }
131 | 
132 | size_t ag_max(size_t a, size_t b) {
133 |     if (b > a) {
134 |         return b;
135 |     }
136 |     return a;
137 | }
138 | 
139 | /* Boyer-Moore strstr */
140 | const char *boyer_moore_strnstr(const char *s, const char *find, const size_t s_len, const size_t f_len,
141 |                                 const size_t alpha_skip_lookup[], const size_t *find_skip_lookup) {
142 |     ssize_t i;
143 |     size_t pos = f_len - 1;
144 | 
145 |     while (pos < s_len) {
146 |         for (i = f_len - 1; i >= 0 && s[pos] == find[i]; pos--, i--) {
147 |         }
148 |         if (i < 0) {
149 |             return s + pos + 1;
150 |         }
151 |         pos += ag_max(alpha_skip_lookup[(unsigned char)s[pos]], find_skip_lookup[i]);
152 |     }
153 | 
154 |     return NULL;
155 | }
156 | 
157 | /* Copy-pasted from above. Yes I know this is bad. One day I might even fix it. */
158 | const char *boyer_moore_strncasestr(const char *s, const char *find, const size_t s_len, const size_t f_len,
159 |                                     const size_t alpha_skip_lookup[], const size_t *find_skip_lookup) {
160 |     ssize_t i;
161 |     size_t pos = f_len - 1;
162 | 
163 |     while (pos < s_len) {
164 |         for (i = f_len - 1; i >= 0 && tolower(s[pos]) == find[i]; pos--, i--) {
165 |         }
166 |         if (i < 0) {
167 |             return s + pos + 1;
168 |         }
169 |         pos += ag_max(alpha_skip_lookup[(unsigned char)s[pos]], find_skip_lookup[i]);
170 |     }
171 | 
172 |     return NULL;
173 | }
174 | 
175 | strncmp_fp get_strstr(enum case_behavior casing) {
176 |     strncmp_fp ag_strncmp_fp = &boyer_moore_strnstr;
177 | 
178 |     if (casing == CASE_INSENSITIVE) {
179 |         ag_strncmp_fp = &boyer_moore_strncasestr;
180 |     }
181 | 
182 |     return ag_strncmp_fp;
183 | }
184 | 
185 | size_t invert_matches(const char *buf, const size_t buf_len, match_t matches[], size_t matches_len) {
186 |     size_t i;
187 |     size_t match_read_index = 0;
188 |     size_t inverted_match_count = 0;
189 |     size_t inverted_match_start = 0;
190 |     size_t last_line_end = 0;
191 |     int in_inverted_match = TRUE;
192 |     match_t next_match;
193 | 
194 |     log_debug("Inverting %u matches.", matches_len);
195 | 
196 |     if (matches_len > 0) {
197 |         next_match = matches[0];
198 |     } else {
199 |         next_match.start = buf_len + 1;
200 |     }
201 | 
202 |     /* No matches, so the whole buffer is now a match. */
203 |     if (matches_len == 0) {
204 |         matches[0].start = 0;
205 |         matches[0].end = buf_len - 1;
206 |         return 1;
207 |     }
208 | 
209 |     for (i = 0; i < buf_len; i++) {
210 |         if (i == next_match.start) {
211 |             i = next_match.end - 1;
212 | 
213 |             match_read_index++;
214 | 
215 |             if (match_read_index < matches_len) {
216 |                 next_match = matches[match_read_index];
217 |             }
218 | 
219 |             if (in_inverted_match && last_line_end > inverted_match_start) {
220 |                 matches[inverted_match_count].start = inverted_match_start;
221 |                 matches[inverted_match_count].end = last_line_end - 1;
222 | 
223 |                 inverted_match_count++;
224 |             }
225 | 
226 |             in_inverted_match = FALSE;
227 |         } else if (i == buf_len - 1 && in_inverted_match) {
228 |             matches[inverted_match_count].start = inverted_match_start;
229 |             matches[inverted_match_count].end = i;
230 | 
231 |             inverted_match_count++;
232 |         } else if (buf[i] == '\n') {
233 |             last_line_end = i + 1;
234 | 
235 |             if (!in_inverted_match) {
236 |                 inverted_match_start = last_line_end;
237 |             }
238 | 
239 |             in_inverted_match = TRUE;
240 |         }
241 |     }
242 | 
243 |     for (i = 0; i < matches_len; i++) {
244 |         log_debug("Inverted match %i start %i end %i.", i, matches[i].start, matches[i].end);
245 |     }
246 | 
247 |     return inverted_match_count;
248 | }
249 | 
250 | void compile_study(pcre **re, pcre_extra **re_extra, char *q, const int pcre_opts, const int study_opts) {
251 |     const char *pcre_err = NULL;
252 |     int pcre_err_offset = 0;
253 | 
254 |     *re = pcre_compile(q, pcre_opts, &pcre_err, &pcre_err_offset, NULL);
255 |     if (*re == NULL) {
256 |         die("pcre_compile failed at position %i. Error: %s", pcre_err_offset, pcre_err);
257 |     }
258 |     *re_extra = pcre_study(*re, study_opts, &pcre_err);
259 |     if (*re_extra == NULL) {
260 |         log_debug("pcre_study returned nothing useful. Error: %s", pcre_err);
261 |     }
262 | }
263 | 
264 | /* This function is very hot. It's called on every file. */
265 | int is_binary(const void *buf, const size_t buf_len) {
266 |     size_t suspicious_bytes = 0;
267 |     size_t total_bytes = buf_len > 512 ? 512 : buf_len;
268 |     const unsigned char *buf_c = buf;
269 |     size_t i;
270 | 
271 |     if (buf_len == 0) {
272 |         return 0;
273 |     }
274 | 
275 |     if (buf_len >= 3 && buf_c[0] == 0xEF && buf_c[1] == 0xBB && buf_c[2] == 0xBF) {
276 |         /* UTF-8 BOM. This isn't binary. */
277 |         return 0;
278 |     }
279 | 
280 |     for (i = 0; i < total_bytes; i++) {
281 |         if (buf_c[i] == '\0') {
282 |             /* NULL char. It's binary */
283 |             return 1;
284 |         } else if ((buf_c[i] < 7 || buf_c[i] > 14) && (buf_c[i] < 32 || buf_c[i] > 127)) {
285 |             /* UTF-8 detection */
286 |             if (buf_c[i] > 193 && buf_c[i] < 224 && i + 1 < total_bytes) {
287 |                 i++;
288 |                 if (buf_c[i] > 127 && buf_c[i] < 192) {
289 |                     continue;
290 |                 }
291 |             } else if (buf_c[i] > 223 && buf_c[i] < 240 && i + 2 < total_bytes) {
292 |                 i++;
293 |                 if (buf_c[i] > 127 && buf_c[i] < 192 && buf_c[i + 1] > 127 && buf_c[i + 1] < 192) {
294 |                     i++;
295 |                     continue;
296 |                 }
297 |             }
298 |             suspicious_bytes++;
299 |             /* Disk IO is so slow that it's worthwhile to do this calculation after every suspicious byte. */
300 |             /* This is true even on a 1.6Ghz Atom with an Intel 320 SSD. */
301 |             /* Read at least 32 bytes before making a decision */
302 |             if (i >= 32 && (suspicious_bytes * 100) / total_bytes > 10) {
303 |                 return 1;
304 |             }
305 |         }
306 |     }
307 |     if ((suspicious_bytes * 100) / total_bytes > 10) {
308 |         return 1;
309 |     }
310 | 
311 |     return 0;
312 | }
313 | 
314 | int is_regex(const char *query) {
315 |     char regex_chars[] = {
316 |         '$',
317 |         '(',
318 |         ')',
319 |         '*',
320 |         '+',
321 |         '.',
322 |         '?',
323 |         '[',
324 |         '\\',
325 |         '^',
326 |         '{',
327 |         '|',
328 |         '\0'
329 |     };
330 | 
331 |     return (strpbrk(query, regex_chars) != NULL);
332 | }
333 | 
334 | int is_fnmatch(const char *filename) {
335 |     char fnmatch_chars[] = {
336 |         '!',
337 |         '*',
338 |         '?',
339 |         '[',
340 |         ']',
341 |         '\0'
342 |     };
343 | 
344 |     return (strpbrk(filename, fnmatch_chars) != NULL);
345 | }
346 | 
347 | int binary_search(const char *needle, char **haystack, int start, int end) {
348 |     int mid;
349 |     int rc;
350 | 
351 |     if (start == end) {
352 |         return -1;
353 |     }
354 | 
355 |     mid = (start + end) / 2; /* can screw up on arrays with > 2 billion elements */
356 | 
357 |     rc = strcmp(needle, haystack[mid]);
358 |     if (rc < 0) {
359 |         return binary_search(needle, haystack, start, mid);
360 |     } else if (rc > 0) {
361 |         return binary_search(needle, haystack, mid + 1, end);
362 |     }
363 | 
364 |     return mid;
365 | }
366 | 
367 | static int wordchar_table[256];
368 | 
369 | void init_wordchar_table(void) {
370 |     int i;
371 |     for (i = 0; i < 256; ++i) {
372 |         char ch = (char)i;
373 |         wordchar_table[i] =
374 |             ('a' <= ch && ch <= 'z') ||
375 |             ('A' <= ch && ch <= 'Z') ||
376 |             ('0' <= ch && ch <= '9') ||
377 |             ch == '_';
378 |     }
379 | }
380 | 
381 | int is_wordchar(char ch) {
382 |     return wordchar_table[(unsigned char)ch];
383 | }
384 | 
385 | int is_lowercase(const char *s) {
386 |     int i;
387 |     for (i = 0; s[i] != '\0'; i++) {
388 |         if (!isascii(s[i]) || isupper(s[i])) {
389 |             return FALSE;
390 |         }
391 |     }
392 |     return TRUE;
393 | }
394 | 
395 | int is_directory(const char *path, const struct dirent *d) {
396 | #ifdef HAVE_DIRENT_DTYPE
397 |     /* Some filesystems, e.g. ReiserFS, always return a type DT_UNKNOWN from readdir or scandir. */
398 |     /* Call stat if we don't find DT_DIR to get the information we need. */
399 |     /* Also works for symbolic links to directories. */
400 |     if (d->d_type != DT_UNKNOWN && d->d_type != DT_LNK) {
401 |         return d->d_type == DT_DIR;
402 |     }
403 | #endif
404 |     char *full_path;
405 |     struct stat s;
406 |     ag_asprintf(&full_path, "%s/%s", path, d->d_name);
407 |     if (stat(full_path, &s) != 0) {
408 |         free(full_path);
409 |         return FALSE;
410 |     }
411 |     free(full_path);
412 |     return S_ISDIR(s.st_mode);
413 | }
414 | 
415 | int is_symlink(const char *path, const struct dirent *d) {
416 | #ifdef _WIN32
417 |     return 0;
418 | #else
419 | #ifdef HAVE_DIRENT_DTYPE
420 |     /* Some filesystems, e.g. ReiserFS, always return a type DT_UNKNOWN from readdir or scandir. */
421 |     /* Call lstat if we find DT_UNKNOWN to get the information we need. */
422 |     if (d->d_type != DT_UNKNOWN) {
423 |         return (d->d_type == DT_LNK);
424 |     }
425 | #endif
426 |     char *full_path;
427 |     struct stat s;
428 |     ag_asprintf(&full_path, "%s/%s", path, d->d_name);
429 |     if (lstat(full_path, &s) != 0) {
430 |         free(full_path);
431 |         return FALSE;
432 |     }
433 |     free(full_path);
434 |     return S_ISLNK(s.st_mode);
435 | #endif
436 | }
437 | 
438 | int is_named_pipe(const char *path, const struct dirent *d) {
439 | #ifdef HAVE_DIRENT_DTYPE
440 |     if (d->d_type != DT_UNKNOWN) {
441 |         return d->d_type == DT_FIFO;
442 |     }
443 | #endif
444 |     char *full_path;
445 |     struct stat s;
446 |     ag_asprintf(&full_path, "%s/%s", path, d->d_name);
447 |     if (stat(full_path, &s) != 0) {
448 |         free(full_path);
449 |         return FALSE;
450 |     }
451 |     free(full_path);
452 |     return S_ISFIFO(s.st_mode);
453 | }
454 | 
455 | void ag_asprintf(char **ret, const char *fmt, ...) {
456 |     va_list args;
457 |     va_start(args, fmt);
458 |     if (vasprintf(ret, fmt, args) == -1) {
459 |         die("vasprintf returned -1");
460 |     }
461 |     va_end(args);
462 | }
463 | 
464 | void die(const char *fmt, ...) {
465 |     va_list args;
466 |     va_start(args, fmt);
467 |     vplog(LOG_LEVEL_ERR, fmt, args);
468 |     va_end(args);
469 |     exit(2);
470 | }
471 | 
472 | #ifndef HAVE_FGETLN
473 | char *fgetln(FILE *fp, size_t *lenp) {
474 |     char *buf = NULL;
475 |     int c, used = 0, len = 0;
476 | 
477 |     flockfile(fp);
478 |     while ((c = getc_unlocked(fp)) != EOF) {
479 |         if (!buf || len >= used) {
480 |             size_t nsize;
481 |             char *newbuf;
482 |             nsize = used + BUFSIZ;
483 |             if (!(newbuf = realloc(buf, nsize))) {
484 |                 funlockfile(fp);
485 |                 if (buf)
486 |                     free(buf);
487 |                 return NULL;
488 |             }
489 |             buf = newbuf;
490 |             used = nsize;
491 |         }
492 |         buf[len++] = c;
493 |         if (c == '\n') {
494 |             break;
495 |         }
496 |     }
497 |     funlockfile(fp);
498 |     *lenp = len;
499 |     return buf;
500 | }
501 | #endif
502 | 
503 | #ifndef HAVE_GETLINE
504 | /*
505 |  * Do it yourself getline() implementation
506 |  */
507 | ssize_t getline(char **lineptr, size_t *n, FILE *stream) {
508 |     size_t len = 0;
509 |     char *srcln = NULL;
510 |     char *newlnptr = NULL;
511 | 
512 |     /* get line, bail on error */
513 |     if (!(srcln = fgetln(stream, &len))) {
514 |         return -1;
515 |     }
516 | 
517 |     if (len >= *n) {
518 |         /* line is too big for buffer, must realloc */
519 |         /* double the buffer, bail on error */
520 |         if (!(newlnptr = realloc(*lineptr, len * 2))) {
521 |             return -1;
522 |         }
523 |         *lineptr = newlnptr;
524 |         *n = len * 2;
525 |     }
526 | 
527 |     memcpy(*lineptr, srcln, len);
528 | 
529 | #ifndef HAVE_FGETLN
530 |     /* Our own implementation of fgetln() returns a malloc()d buffer that we
531 |      * must free
532 |      */
533 |     free(srcln);
534 | #endif
535 | 
536 |     (*lineptr)[len] = '\0';
537 |     return len;
538 | }
539 | #endif
540 | 
541 | #ifndef HAVE_REALPATH
542 | /*
543 |  * realpath() for Windows. Turns slashes into backslashes and calls _fullpath
544 |  */
545 | char *realpath(const char *path, char *resolved_path) {
546 |     char *p;
547 |     char tmp[MAX_PATH + 1];
548 |     strlcpy(tmp, path, sizeof(tmp));
549 |     p = tmp;
550 |     while (*p) {
551 |         if (*p == '/') {
552 |             *p = '\\';
553 |         }
554 |         p++;
555 |     }
556 |     return _fullpath(resolved_path, tmp, _MAX_PATH);
557 | }
558 | #endif
559 | 
560 | #ifndef HAVE_STRLCPY
561 | size_t strlcpy(char *dst, const char *src, size_t size) {
562 |     char *d = dst;
563 |     const char *s = src;
564 |     size_t n = size;
565 | 
566 |     /* Copy as many bytes as will fit */
567 |     if (n != 0) {
568 |         while (--n != 0) {
569 |             if ((*d++ = *s++) == '\0') {
570 |                 break;
571 |             }
572 |         }
573 |     }
574 | 
575 |     /* Not enough room in dst, add NUL and traverse rest of src */
576 |     if (n == 0) {
577 |         if (size != 0) {
578 |             *d = '\0'; /* NUL-terminate dst */
579 |         }
580 | 
581 |         while (*s++) {
582 |         }
583 |     }
584 | 
585 |     return (s - src - 1); /* count does not include NUL */
586 | }
587 | #endif
588 | 
589 | #ifndef HAVE_VASPRINTF
590 | int vasprintf(char **ret, const char *fmt, va_list args) {
591 |     int rv;
592 |     *ret = NULL;
593 |     va_list args2;
594 | /* vsnprintf can destroy args, so we need to copy it for the second call */
595 | #ifdef __va_copy
596 |     /* non-standard macro, but usually exists */
597 |     __va_copy(args2, args);
598 | #elif va_copy
599 |     /* C99 macro. We compile with -std=c89 but you never know */
600 |     va_copy(args2, args);
601 | #else
602 |     /* Ancient compiler. This usually works but there are no guarantees. */
603 |     memcpy(args2, args, sizeof(va_list));
604 | #endif
605 |     rv = vsnprintf(NULL, 0, fmt, args);
606 |     va_end(args);
607 |     if (rv < 0) {
608 |         return rv;
609 |     }
610 |     *ret = malloc(++rv); /* vsnprintf doesn't count \0 */
611 |     if (*ret == NULL) {
612 |         return -1;
613 |     }
614 |     rv = vsnprintf(*ret, rv, fmt, args2);
615 |     va_end(args2);
616 |     if (rv < 0) {
617 |         free(*ret);
618 |     }
619 |     return rv;
620 | }
621 | #endif
622 | 


--------------------------------------------------------------------------------
/src/search.c:
--------------------------------------------------------------------------------
  1 | #include "search.h"
  2 | #include "scandir.h"
  3 | 
  4 | void search_buf(const char *buf, const size_t buf_len,
  5 |                 const char *dir_full_path) {
  6 |     int binary = -1; /* 1 = yes, 0 = no, -1 = don't know */
  7 |     size_t buf_offset = 0;
  8 | 
  9 |     if (opts.search_stream) {
 10 |         binary = 0;
 11 |     } else if (!opts.search_binary_files) {
 12 |         binary = is_binary((const void *)buf, buf_len);
 13 |         if (binary) {
 14 |             log_debug("File %s is binary. Skipping...", dir_full_path);
 15 |             return;
 16 |         }
 17 |     }
 18 | 
 19 |     int matches_len = 0;
 20 |     match_t *matches;
 21 |     size_t matches_size;
 22 |     size_t matches_spare;
 23 | 
 24 |     if (opts.invert_match) {
 25 |         /* If we are going to invert the set of matches at the end, we will need
 26 |          * one extra match struct, even if there are no matches at all. So make
 27 |          * sure we have a nonempty array; and make sure we always have spare
 28 |          * capacity for one extra.
 29 |          */
 30 |         matches_size = 100;
 31 |         matches = ag_malloc(matches_size * sizeof(match_t));
 32 |         matches_spare = 1;
 33 |     } else {
 34 |         matches_size = 0;
 35 |         matches = NULL;
 36 |         matches_spare = 0;
 37 |     }
 38 | 
 39 |     if (opts.query_len == 1 && opts.query[0] == '.') {
 40 |         matches_size = 1;
 41 |         matches = ag_malloc(matches_size * sizeof(match_t));
 42 |         matches[0].start = 0;
 43 |         matches[0].end = buf_len;
 44 |         matches_len = 1;
 45 |     } else if (opts.literal) {
 46 |         const char *match_ptr = buf;
 47 |         strncmp_fp ag_strnstr_fp = get_strstr(opts.casing);
 48 | 
 49 |         while (buf_offset < buf_len) {
 50 |             match_ptr = ag_strnstr_fp(match_ptr, opts.query, buf_len - buf_offset, opts.query_len, alpha_skip_lookup, find_skip_lookup);
 51 |             if (match_ptr == NULL) {
 52 |                 break;
 53 |             }
 54 | 
 55 |             if (opts.word_regexp) {
 56 |                 const char *start = match_ptr;
 57 |                 const char *end = match_ptr + opts.query_len;
 58 | 
 59 |                 /* Check whether both start and end of the match lie on a word
 60 |                  * boundary
 61 |                  */
 62 |                 if ((start == buf ||
 63 |                      is_wordchar(*(start - 1)) != opts.literal_starts_wordchar) &&
 64 |                     (end == buf + buf_len ||
 65 |                      is_wordchar(*end) != opts.literal_ends_wordchar)) {
 66 |                     /* It's a match */
 67 |                 } else {
 68 |                     /* It's not a match */
 69 |                     match_ptr += opts.query_len;
 70 |                     buf_offset = end - buf;
 71 |                     continue;
 72 |                 }
 73 |             }
 74 | 
 75 |             if ((size_t)matches_len + matches_spare >= matches_size) {
 76 |                 /* TODO: benchmark initial size of matches. 100 may be too small/big */
 77 |                 matches_size = matches ? matches_size * 2 : 100;
 78 |                 log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size);
 79 |                 matches = ag_realloc(matches, matches_size * sizeof(match_t));
 80 |             }
 81 | 
 82 |             matches[matches_len].start = match_ptr - buf;
 83 |             matches[matches_len].end = matches[matches_len].start + opts.query_len;
 84 |             buf_offset = matches[matches_len].end;
 85 |             log_debug("Match found. File %s, offset %lu bytes.", dir_full_path, matches[matches_len].start);
 86 |             matches_len++;
 87 |             match_ptr += opts.query_len;
 88 | 
 89 |             if (matches_len >= opts.max_matches_per_file) {
 90 |                 log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
 91 |                 break;
 92 |             }
 93 |         }
 94 |     } else {
 95 |         int offset_vector[3];
 96 |         while (buf_offset < buf_len &&
 97 |                (pcre_exec(opts.re, opts.re_extra, buf, buf_len, buf_offset, 0, offset_vector, 3)) >= 0) {
 98 |             log_debug("Regex match found. File %s, offset %i bytes.", dir_full_path, offset_vector[0]);
 99 |             buf_offset = offset_vector[1];
100 | 
101 |             /* TODO: copy-pasted from above. FIXME */
102 |             if ((size_t)matches_len + matches_spare >= matches_size) {
103 |                 matches_size = matches ? matches_size * 2 : 100;
104 |                 log_debug("Too many matches in %s. Reallocating matches to %zu.", dir_full_path, matches_size);
105 |                 matches = ag_realloc(matches, matches_size * sizeof(match_t));
106 |             }
107 | 
108 |             matches[matches_len].start = offset_vector[0];
109 |             matches[matches_len].end = offset_vector[1];
110 |             matches_len++;
111 | 
112 |             if (matches_len >= opts.max_matches_per_file) {
113 |                 log_err("Too many matches in %s. Skipping the rest of this file.", dir_full_path);
114 |                 break;
115 |             }
116 |         }
117 |     }
118 | 
119 |     if (opts.invert_match) {
120 |         matches_len = invert_matches(buf, buf_len, matches, matches_len);
121 |     }
122 | 
123 |     if (opts.stats) {
124 |         pthread_mutex_lock(&stats_mtx);
125 |         stats.total_bytes += buf_len;
126 |         stats.total_files++;
127 |         stats.total_matches += matches_len;
128 |         pthread_mutex_unlock(&stats_mtx);
129 |     }
130 | 
131 |     if (matches_len > 0) {
132 |         if (binary == -1 && !opts.print_filename_only) {
133 |             binary = is_binary((const void *)buf, buf_len);
134 |         }
135 |         pthread_mutex_lock(&print_mtx);
136 |         if (opts.print_filename_only) {
137 |             /* If the --files-without-matches or -L option in passed we should
138 |              * not print a matching line. This option currently sets
139 |              * opts.print_filename_only and opts.invert_match. Unfortunately
140 |              * setting the latter has the side effect of making matches.len = 1
141 |              * on a file-without-matches which is not desired behaviour. See
142 |              * GitHub issue 206 for the consequences if this behaviour is not
143 |              * checked. */
144 |             if (!opts.invert_match || matches_len < 2) {
145 |                 print_path(dir_full_path, opts.null_follows_filename ? 0 : '\n');
146 |             }
147 |         } else if (binary) {
148 |             print_binary_file_matches(dir_full_path);
149 |         } else {
150 |             print_file_matches(dir_full_path, buf, buf_len, matches, matches_len);
151 |         }
152 |         pthread_mutex_unlock(&print_mtx);
153 |         opts.match_found = 1;
154 |     } else if (opts.search_stream && opts.passthrough) {
155 |         fprintf(out_fd, "%s", buf);
156 |     } else {
157 |         log_debug("No match in %s", dir_full_path);
158 |     }
159 | 
160 |     if (matches_size > 0) {
161 |         free(matches);
162 |     }
163 | }
164 | 
165 | /* TODO: this will only match single lines. multi-line regexes silently don't match */
166 | void search_stream(FILE *stream, const char *path) {
167 |     char *line = NULL;
168 |     ssize_t line_len = 0;
169 |     size_t line_cap = 0;
170 |     size_t i;
171 | 
172 |     for (i = 1; (line_len = getline(&line, &line_cap, stream)) > 0; i++) {
173 |         opts.stream_line_num = i;
174 |         search_buf(line, line_len, path);
175 |     }
176 | 
177 |     free(line);
178 | }
179 | 
180 | void search_file(const char *file_full_path) {
181 |     int fd;
182 |     off_t f_len = 0;
183 |     char *buf = NULL;
184 |     struct stat statbuf;
185 |     int rv = 0;
186 |     FILE *pipe = NULL;
187 | 
188 |     fd = open(file_full_path, O_RDONLY);
189 |     if (fd < 0) {
190 |         /* XXXX: strerror is not thread-safe */
191 |         log_err("Skipping %s: Error opening file: %s", file_full_path, strerror(errno));
192 |         goto cleanup;
193 |     }
194 | 
195 |     rv = fstat(fd, &statbuf);
196 |     if (rv != 0) {
197 |         log_err("Skipping %s: Error fstat()ing file.", file_full_path);
198 |         goto cleanup;
199 |     }
200 | 
201 |     if (opts.stdout_inode != 0 && opts.stdout_inode == statbuf.st_ino) {
202 |         log_debug("Skipping %s: stdout is redirected to it", file_full_path);
203 |         goto cleanup;
204 |     }
205 | 
206 |     if ((statbuf.st_mode & S_IFMT) == 0) {
207 |         log_err("Skipping %s: Mode %u is not a file.", file_full_path, statbuf.st_mode);
208 |         goto cleanup;
209 |     }
210 | 
211 |     if (statbuf.st_mode & S_IFIFO) {
212 |         log_debug("%s is a named pipe. stream searching", file_full_path);
213 |         pipe = fdopen(fd, "r");
214 |         search_stream(pipe, file_full_path);
215 |         fclose(pipe);
216 |         goto cleanup;
217 |     }
218 | 
219 |     f_len = statbuf.st_size;
220 | 
221 |     if (f_len == 0) {
222 |         log_debug("Skipping %s: file is empty.", file_full_path);
223 |         goto cleanup;
224 |     }
225 | 
226 |     if (!opts.literal && f_len > INT_MAX) {
227 |         log_err("Skipping %s: pcre_exec() can't handle files larger than %i bytes.", file_full_path, INT_MAX);
228 |         goto cleanup;
229 |     }
230 | 
231 | #ifdef _WIN32
232 |     {
233 |         HANDLE hmmap = CreateFileMapping(
234 |             (HANDLE)_get_osfhandle(fd), 0, PAGE_READONLY, 0, f_len, NULL);
235 |         buf = (char *)MapViewOfFile(hmmap, FILE_SHARE_READ, 0, 0, f_len);
236 |         if (hmmap != NULL)
237 |             CloseHandle(hmmap);
238 |     }
239 |     if (buf == NULL) {
240 |         FormatMessageA(
241 |             FORMAT_MESSAGE_ALLOCATE_BUFFER |
242 |                 FORMAT_MESSAGE_FROM_SYSTEM |
243 |                 FORMAT_MESSAGE_IGNORE_INSERTS,
244 |             NULL, GetLastError(), 0, (void *)&buf, 0, NULL);
245 |         log_err("File %s failed to load: %s.", file_full_path, buf);
246 |         LocalFree((void *)buf);
247 |         goto cleanup;
248 |     }
249 | #else
250 |     buf = mmap(0, f_len, PROT_READ, MAP_SHARED, fd, 0);
251 |     if (buf == MAP_FAILED) {
252 |         log_err("File %s failed to load: %s.", file_full_path, strerror(errno));
253 |         goto cleanup;
254 |     }
255 | #if HAVE_MADVISE
256 |     madvise(buf, f_len, MADV_SEQUENTIAL);
257 | #elif HAVE_POSIX_FADVISE
258 |     posix_fadvise(fd, 0, f_len, POSIX_MADV_SEQUENTIAL);
259 | #endif
260 | #endif
261 | 
262 |     if (opts.search_zip_files) {
263 |         ag_compression_type zip_type = is_zipped(buf, f_len);
264 |         if (zip_type != AG_NO_COMPRESSION) {
265 |             int _buf_len = (int)f_len;
266 |             char *_buf = decompress(zip_type, buf, f_len, file_full_path, &_buf_len);
267 |             if (_buf == NULL || _buf_len == 0) {
268 |                 log_err("Cannot decompress zipped file %s", file_full_path);
269 |                 goto cleanup;
270 |             }
271 |             search_buf(_buf, _buf_len, file_full_path);
272 |             free(_buf);
273 |             goto cleanup;
274 |         }
275 |     }
276 | 
277 |     search_buf(buf, f_len, file_full_path);
278 | 
279 | cleanup:
280 | 
281 |     if (buf != NULL) {
282 | #ifdef _WIN32
283 |         UnmapViewOfFile(buf);
284 | #else
285 |         munmap(buf, f_len);
286 | #endif
287 |     }
288 |     if (fd != -1) {
289 |         close(fd);
290 |     }
291 | }
292 | 
293 | void *search_file_worker(void *i) {
294 |     work_queue_t *queue_item;
295 |     int worker_id = *(int *)i;
296 | 
297 |     log_debug("Worker %i started", worker_id);
298 |     while (TRUE) {
299 |         pthread_mutex_lock(&work_queue_mtx);
300 |         while (work_queue == NULL) {
301 |             if (done_adding_files) {
302 |                 pthread_mutex_unlock(&work_queue_mtx);
303 |                 log_debug("Worker %i finished.", worker_id);
304 |                 pthread_exit(NULL);
305 |             }
306 |             pthread_cond_wait(&files_ready, &work_queue_mtx);
307 |         }
308 |         queue_item = work_queue;
309 |         work_queue = work_queue->next;
310 |         if (work_queue == NULL) {
311 |             work_queue_tail = NULL;
312 |         }
313 |         pthread_mutex_unlock(&work_queue_mtx);
314 | 
315 |         search_file(queue_item->path);
316 |         free(queue_item->path);
317 |         free(queue_item);
318 |     }
319 | }
320 | 
321 | static int check_symloop_enter(const char *path, dirkey_t *outkey) {
322 | #ifdef _WIN32
323 |     return SYMLOOP_OK;
324 | #else
325 |     struct stat buf;
326 |     symdir_t *item_found = NULL;
327 |     symdir_t *new_item = NULL;
328 | 
329 |     memset(outkey, 0, sizeof(dirkey_t));
330 |     outkey->dev = 0;
331 |     outkey->ino = 0;
332 | 
333 |     int res = stat(path, &buf);
334 |     if (res != 0) {
335 |         log_err("Error stat()ing: %s", path);
336 |         return SYMLOOP_ERROR;
337 |     }
338 | 
339 |     outkey->dev = buf.st_dev;
340 |     outkey->ino = buf.st_ino;
341 | 
342 |     HASH_FIND(hh, symhash, outkey, sizeof(dirkey_t), item_found);
343 |     if (item_found) {
344 |         return SYMLOOP_LOOP;
345 |     }
346 | 
347 |     new_item = (symdir_t *)ag_malloc(sizeof(symdir_t));
348 |     memcpy(&new_item->key, outkey, sizeof(dirkey_t));
349 |     HASH_ADD(hh, symhash, key, sizeof(dirkey_t), new_item);
350 |     return SYMLOOP_OK;
351 | #endif
352 | }
353 | 
354 | static int check_symloop_leave(dirkey_t *dirkey) {
355 | #ifdef _WIN32
356 |     return SYMLOOP_OK;
357 | #else
358 |     symdir_t *item_found = NULL;
359 | 
360 |     if (dirkey->dev == 0 && dirkey->ino == 0) {
361 |         return SYMLOOP_ERROR;
362 |     }
363 | 
364 |     HASH_FIND(hh, symhash, dirkey, sizeof(dirkey_t), item_found);
365 |     if (!item_found) {
366 |         log_err("item not found! weird stuff...\n");
367 |         return SYMLOOP_ERROR;
368 |     }
369 | 
370 |     HASH_DELETE(hh, symhash, item_found);
371 |     free(item_found);
372 |     return SYMLOOP_OK;
373 | #endif
374 | }
375 | 
376 | /* TODO: Append matches to some data structure instead of just printing them out.
377 |  * Then ag can have sweet summaries of matches/files scanned/time/etc.
378 |  */
379 | void search_dir(ignores *ig, const char *base_path, const char *path, const int depth) {
380 |     struct dirent **dir_list = NULL;
381 |     struct dirent *dir = NULL;
382 |     scandir_baton_t scandir_baton;
383 |     int results = 0;
384 | 
385 |     char *dir_full_path = NULL;
386 |     const char *ignore_file = NULL;
387 |     int i;
388 | 
389 |     int symres;
390 |     dirkey_t current_dirkey;
391 | 
392 |     symres = check_symloop_enter(path, &current_dirkey);
393 |     if (symres == SYMLOOP_LOOP) {
394 |         log_err("Recursive directory loop: %s", path);
395 |         return;
396 |     }
397 | 
398 |     /* find agignore/gitignore/hgignore/etc files to load ignore patterns from */
399 |     for (i = 0; opts.skip_vcs_ignores ? (i == 0) : (ignore_pattern_files[i] != NULL); i++) {
400 |         ignore_file = ignore_pattern_files[i];
401 |         ag_asprintf(&dir_full_path, "%s/%s", path, ignore_file);
402 |         if (strcmp(SVN_DIR, ignore_file) == 0) {
403 |             load_svn_ignore_patterns(ig, dir_full_path);
404 |         } else {
405 |             load_ignore_patterns(ig, dir_full_path);
406 |         }
407 |         free(dir_full_path);
408 |         dir_full_path = NULL;
409 |     }
410 | 
411 |     if (opts.path_to_agignore) {
412 |         load_ignore_patterns(ig, opts.path_to_agignore);
413 |     }
414 | 
415 |     scandir_baton.ig = ig;
416 |     scandir_baton.base_path = base_path;
417 |     scandir_baton.base_path_len = base_path ? strlen(base_path) : 0;
418 |     results = ag_scandir(path, &dir_list, &filename_filter, &scandir_baton);
419 |     if (results == 0) {
420 |         log_debug("No results found in directory %s", path);
421 |         goto search_dir_cleanup;
422 |     } else if (results == -1) {
423 |         if (errno == ENOTDIR) {
424 |             /* Not a directory. Probably a file. */
425 |             if (depth == 0 && opts.paths_len == 1) {
426 |                 /* If we're only searching one file, don't print the filename header at the top. */
427 |                 if (opts.print_path == PATH_PRINT_DEFAULT || opts.print_path == PATH_PRINT_DEFAULT_EACH_LINE) {
428 |                     opts.print_path = PATH_PRINT_NOTHING;
429 |                 }
430 |             }
431 |             search_file(path);
432 |         } else {
433 |             log_err("Error opening directory %s: %s", path, strerror(errno));
434 |         }
435 |         goto search_dir_cleanup;
436 |     }
437 | 
438 |     int offset_vector[3];
439 |     int rc = 0;
440 |     work_queue_t *queue_item;
441 | 
442 |     for (i = 0; i < results; i++) {
443 |         queue_item = NULL;
444 |         dir = dir_list[i];
445 |         ag_asprintf(&dir_full_path, "%s/%s", path, dir->d_name);
446 | 
447 |         /* If a link points to a directory then we need to treat it as a directory. */
448 |         if (!opts.follow_symlinks && is_symlink(path, dir)) {
449 |             log_debug("File %s ignored becaused it's a symlink", dir->d_name);
450 |             goto cleanup;
451 |         }
452 | 
453 |         if (!is_directory(path, dir)) {
454 |             if (opts.file_search_regex) {
455 |                 rc = pcre_exec(opts.file_search_regex, NULL, dir_full_path, strlen(dir_full_path),
456 |                                0, 0, offset_vector, 3);
457 |                 if (rc < 0) { /* no match */
458 |                     log_debug("Skipping %s due to file_search_regex.", dir_full_path);
459 |                     goto cleanup;
460 |                 } else if (opts.match_files) {
461 |                     log_debug("match_files: file_search_regex matched for %s.", dir_full_path);
462 |                     pthread_mutex_lock(&print_mtx);
463 |                     print_path(dir_full_path, '\n');
464 |                     pthread_mutex_unlock(&print_mtx);
465 |                     goto cleanup;
466 |                 }
467 |             }
468 | 
469 |             queue_item = ag_malloc(sizeof(work_queue_t));
470 |             queue_item->path = dir_full_path;
471 |             queue_item->next = NULL;
472 |             pthread_mutex_lock(&work_queue_mtx);
473 |             if (work_queue_tail == NULL) {
474 |                 work_queue = queue_item;
475 |             } else {
476 |                 work_queue_tail->next = queue_item;
477 |             }
478 |             work_queue_tail = queue_item;
479 |             pthread_cond_signal(&files_ready);
480 |             pthread_mutex_unlock(&work_queue_mtx);
481 |             log_debug("%s added to work queue", dir_full_path);
482 |         } else if (opts.recurse_dirs) {
483 |             if (depth < opts.max_search_depth) {
484 |                 log_debug("Searching dir %s", dir_full_path);
485 |                 ignores *child_ig = init_ignore(ig);
486 |                 search_dir(child_ig, base_path, dir_full_path, depth + 1);
487 |                 cleanup_ignore(child_ig);
488 |             } else {
489 |                 log_err("Skipping %s. Use the --depth option to search deeper.", dir_full_path);
490 |             }
491 |         }
492 | 
493 |     cleanup:
494 |         free(dir);
495 |         dir = NULL;
496 |         if (queue_item == NULL) {
497 |             free(dir_full_path);
498 |             dir_full_path = NULL;
499 |         }
500 |     }
501 | 
502 | search_dir_cleanup:
503 |     check_symloop_leave(&current_dirkey);
504 |     free(dir_list);
505 |     dir_list = NULL;
506 | }
507 | 


--------------------------------------------------------------------------------
/src/options.c:
--------------------------------------------------------------------------------
  1 | #include <errno.h>
  2 | #include <limits.h>
  3 | #include <stdarg.h>
  4 | #include <stdio.h>
  5 | #include <stdlib.h>
  6 | #include <string.h>
  7 | #include <sys/param.h>
  8 | #include <sys/stat.h>
  9 | #include <unistd.h>
 10 | 
 11 | #include "config.h"
 12 | #include "ignore.h"
 13 | #include "options.h"
 14 | #include "lang.h"
 15 | #include "log.h"
 16 | #include "util.h"
 17 | 
 18 | const char *color_line_number = "\033[1;33m"; /* yellow with black background */
 19 | const char *color_match = "\033[30;43m";      /* black with yellow background */
 20 | const char *color_path = "\033[1;32m";        /* bold green */
 21 | 
 22 | /* TODO: try to obey out_fd? */
 23 | void usage(void) {
 24 |     printf("\n");
 25 |     printf("Usage: ag [FILE-TYPE] [OPTIONS] PATTERN [PATH]\n\n");
 26 | 
 27 |     printf("  Recursively search for PATTERN in PATH.\n");
 28 |     printf("  Like grep or ack, but faster.\n\n");
 29 | 
 30 |     printf("Example:\n  ag -i foo /bar/\n\n");
 31 | 
 32 |     printf("\
 33 | Output Options:\n\
 34 |      --ackmate            Print results in AckMate-parseable format\n\
 35 |   -A --after [LINES]      Print lines after match (Default: 2)\n\
 36 |   -B --before [LINES]     Print lines before match (Default: 2)\n\
 37 |      --[no]break          Print newlines between matches in different files\n\
 38 |                           (Enabled by default)\n\
 39 |      --[no]color          Print color codes in results (Enabled by default)\n\
 40 |      --color-line-number  Color codes for line numbers (Default: 1;33)\n\
 41 |      --color-match        Color codes for result match numbers (Default: 30;43)\n\
 42 |      --color-path         Color codes for path names (Default: 1;32)\n\
 43 |      --column             Print column numbers in results\n\
 44 |   -H --[no]heading        Print file names (Enabled unless searching a single file)\n\
 45 |      --line-numbers       Print line numbers even for streams\n\
 46 |   -C --context [LINES]    Print lines before and after matches (Default: 2)\n\
 47 |      --[no]group          Same as --[no]break --[no]heading\n\
 48 |   -g PATTERN              Print filenames matching PATTERN\n\
 49 |   -l --files-with-matches Only print filenames that contain matches\n\
 50 |                           (don't print the matching lines)\n\
 51 |   -L --files-without-matches\n\
 52 |                           Only print filenames that don't contain matches\n\
 53 |      --null               Follow filename (-l|-L) with null for 'xargs -0'\n\
 54 |      --no-numbers         Don't print line numbers\n\
 55 |      --print-long-lines   Print matches on very long lines (Default: >2k characters)\n\
 56 |      --passthrough        When searching a stream, print all lines even if they\n\
 57 |                           don't match\n\
 58 |      --silent             Suppress all log messages, including errors\n\
 59 |      --stats              Print stats (files scanned, time taken, etc.)\n\
 60 | \n\
 61 | Search Options:\n\
 62 |   -a --all-types          Search all files (doesn't include hidden files\n\
 63 |                           or patterns from ignore files)\n\
 64 |   -D --debug              Ridiculous debugging (probably not useful)\n\
 65 |      --depth NUM          Search up to NUM directories deep (Default: 25)\n\
 66 |   -f --follow             Follow symlinks\n\
 67 |   -G --file-search-regex  PATTERN Limit search to filenames matching PATTERN\n\
 68 |      --hidden             Search hidden files (obeys .*ignore files)\n\
 69 |   -i --ignore-case        Match case insensitively\n\
 70 |      --ignore PATTERN     Ignore files/directories matching PATTERN\n\
 71 |                           (literal file/directory names also allowed)\n\
 72 |      --ignore-dir NAME    Alias for --ignore for compatibility with ack.\n\
 73 |   -m --max-count NUM      Skip the rest of a file after NUM matches (Default: 10,000)\n\
 74 |   -p --path-to-agignore STRING\n\
 75 |                           Use .agignore file at STRING\n\
 76 |   -Q --literal            Don't parse PATTERN as a regular expression\n\
 77 |   -s --case-sensitive     Match case sensitively\n\
 78 |   -S --smart-case         Match case insensitively unless PATTERN contains\n\
 79 |                           uppercase characters (Enabled by default)\n\
 80 |      --search-binary      Search binary files for matches\n\
 81 |   -t --all-text           Search all text files (doesn't include hidden files)\n\
 82 |   -u --unrestricted       Search all files (ignore .agignore, .gitignore, etc.;\n\
 83 |                           searches binary and hidden files as well)\n\
 84 |   -U --skip-vcs-ignores   Ignore VCS ignore files\n\
 85 |                           (.gitignore, .hgignore, .svnignore; still obey .agignore)\n\
 86 |   -v --invert-match\n\
 87 |   -w --word-regexp        Only match whole words\n\
 88 |   -z --search-zip         Search contents of compressed (e.g., gzip) files\n\
 89 | \n");
 90 |     printf("File Types:\n\
 91 | The search can be restricted to certain types of files. Example:\n\
 92 |   ag --html needle\n\
 93 |   - Searches for 'needle' in files with suffix .htm, .html, .shtml or .xhtml.\n\
 94 | \n\
 95 | For a list of supported file types run:\n\
 96 |   ag --list-file-types\n\n");
 97 | }
 98 | 
 99 | void print_version(void) {
100 |     printf("ag version %s\n", PACKAGE_VERSION);
101 | }
102 | 
103 | void init_options(void) {
104 |     memset(&opts, 0, sizeof(opts));
105 |     opts.casing = CASE_SMART;
106 | #ifdef _WIN32
107 |     opts.color = getenv("ANSICON") ? TRUE : FALSE;
108 | #else
109 |     opts.color = TRUE;
110 | #endif
111 |     opts.max_matches_per_file = 10000;
112 |     opts.max_search_depth = 25;
113 |     opts.print_break = TRUE;
114 |     opts.print_path = PATH_PRINT_DEFAULT;
115 |     opts.print_line_numbers = TRUE;
116 |     opts.recurse_dirs = TRUE;
117 |     opts.color_path = ag_strdup(color_path);
118 |     opts.color_match = ag_strdup(color_match);
119 |     opts.color_line_number = ag_strdup(color_line_number);
120 | }
121 | 
122 | void cleanup_options(void) {
123 |     free(opts.color_path);
124 |     free(opts.color_match);
125 |     free(opts.color_line_number);
126 | 
127 |     if (opts.query) {
128 |         free(opts.query);
129 |     }
130 | 
131 |     pcre_free(opts.re);
132 |     if (opts.re_extra) {
133 |         /* Using pcre_free_study on pcre_extra* can segfault on some versions of PCRE */
134 |         pcre_free(opts.re_extra);
135 |     }
136 | 
137 |     if (opts.ackmate_dir_filter) {
138 |         pcre_free(opts.ackmate_dir_filter);
139 |     }
140 |     if (opts.ackmate_dir_filter_extra) {
141 |         pcre_free(opts.ackmate_dir_filter_extra);
142 |     }
143 | 
144 |     if (opts.file_search_regex) {
145 |         pcre_free(opts.file_search_regex);
146 |     }
147 |     if (opts.file_search_regex_extra) {
148 |         pcre_free(opts.file_search_regex_extra);
149 |     }
150 | }
151 | 
152 | void parse_options(int argc, char **argv, char **base_paths[], char **paths[]) {
153 |     int ch;
154 |     int i;
155 |     int path_len = 0;
156 |     int useless = 0;
157 |     int group = 1;
158 |     int help = 0;
159 |     int version = 0;
160 |     int list_file_types = 0;
161 |     int opt_index = 0;
162 |     char *num_end;
163 |     const char *home_dir = getenv("HOME");
164 |     char *ignore_file_path = NULL;
165 |     int needs_query = 1;
166 |     struct stat statbuf;
167 |     int rv;
168 | 
169 |     size_t longopts_len, full_len;
170 |     option_t *longopts;
171 |     char *lang_regex = NULL;
172 | 
173 |     init_options();
174 | 
175 |     option_t base_longopts[] = {
176 |         { "ackmate", no_argument, &opts.ackmate, 1 },
177 |         { "ackmate-dir-filter", required_argument, NULL, 0 },
178 |         { "after", optional_argument, NULL, 'A' },
179 |         { "all-text", no_argument, NULL, 't' },
180 |         { "all-types", no_argument, NULL, 'a' },
181 |         { "before", optional_argument, NULL, 'B' },
182 |         { "break", no_argument, &opts.print_break, 1 },
183 |         { "case-sensitive", no_argument, NULL, 's' },
184 |         { "color", no_argument, &opts.color, 1 },
185 |         { "color-line-number", required_argument, NULL, 0 },
186 |         { "color-match", required_argument, NULL, 0 },
187 |         { "color-path", required_argument, NULL, 0 },
188 |         { "column", no_argument, &opts.column, 1 },
189 |         { "context", optional_argument, NULL, 'C' },
190 |         { "debug", no_argument, NULL, 'D' },
191 |         { "depth", required_argument, NULL, 0 },
192 |         { "file-search-regex", required_argument, NULL, 'G' },
193 |         { "files-with-matches", no_argument, NULL, 'l' },
194 |         { "files-without-matches", no_argument, NULL, 'L' },
195 |         { "follow", no_argument, &opts.follow_symlinks, 1 },
196 |         { "group", no_argument, &group, 1 },
197 |         { "heading", no_argument, &opts.print_path, PATH_PRINT_TOP },
198 |         { "help", no_argument, NULL, 'h' },
199 |         { "hidden", no_argument, &opts.search_hidden_files, 1 },
200 |         { "ignore", required_argument, NULL, 0 },
201 |         { "ignore-case", no_argument, NULL, 'i' },
202 |         { "ignore-dir", required_argument, NULL, 0 },
203 |         { "invert-match", no_argument, &opts.invert_match, 1 },
204 |         { "line-numbers", no_argument, &opts.print_line_numbers, 2 },
205 |         { "list-file-types", no_argument, &list_file_types, 1 },
206 |         { "literal", no_argument, NULL, 'Q' },
207 |         { "match", no_argument, &useless, 0 },
208 |         { "max-count", required_argument, NULL, 'm' },
209 |         { "no-numbers", no_argument, NULL, 0 },
210 |         { "no-recurse", no_argument, NULL, 'n' },
211 |         { "nobreak", no_argument, &opts.print_break, 0 },
212 |         { "nocolor", no_argument, &opts.color, 0 },
213 |         { "nofollow", no_argument, &opts.follow_symlinks, 0 },
214 |         { "nogroup", no_argument, &group, 0 },
215 |         { "noheading", no_argument, &opts.print_path, PATH_PRINT_EACH_LINE },
216 |         { "nopager", no_argument, NULL, 0 },
217 |         { "null", no_argument, &opts.null_follows_filename, 1 },
218 |         { "pager", required_argument, NULL, 0 },
219 |         { "parallel", no_argument, &opts.parallel, 1 },
220 |         { "passthrough", no_argument, &opts.passthrough, 1 },
221 |         { "passthru", no_argument, &opts.passthrough, 1 },
222 |         { "path-to-agignore", required_argument, NULL, 'p' },
223 |         { "print-long-lines", no_argument, &opts.print_long_lines, 1 },
224 |         { "recurse", no_argument, NULL, 'r' },
225 |         { "search-binary", no_argument, &opts.search_binary_files, 1 },
226 |         { "search-files", no_argument, &opts.search_stream, 0 },
227 |         { "search-zip", no_argument, &opts.search_zip_files, 1 },
228 |         { "silent", no_argument, NULL, 0 },
229 |         { "skip-vcs-ignores", no_argument, NULL, 'U' },
230 |         { "smart-case", no_argument, NULL, 'S' },
231 |         { "stats", no_argument, &opts.stats, 1 },
232 |         { "unrestricted", no_argument, NULL, 'u' },
233 |         { "version", no_argument, &version, 1 },
234 |         { "word-regexp", no_argument, NULL, 'w' },
235 |         { "workers", required_argument, NULL, 0 },
236 |     };
237 | 
238 |     longopts_len = (sizeof(base_longopts) / sizeof(option_t));
239 |     full_len = (longopts_len + LANG_COUNT + 1);
240 |     longopts = ag_malloc(full_len * sizeof(option_t));
241 |     memcpy(longopts, base_longopts, sizeof(base_longopts));
242 | 
243 |     for (i = 0; i < LANG_COUNT; i++) {
244 |         option_t opt = { langs[i].name, no_argument, NULL, 0 };
245 |         longopts[i + longopts_len] = opt;
246 |     }
247 |     longopts[full_len - 1] = (option_t) { NULL, 0, NULL, 0 };
248 | 
249 |     if (argc < 2) {
250 |         usage();
251 |         cleanup_ignore(root_ignores);
252 |         cleanup_options();
253 |         exit(1);
254 |     }
255 | 
256 |     rv = fstat(fileno(stdin), &statbuf);
257 |     if (rv == 0) {
258 |         if (S_ISFIFO(statbuf.st_mode)) {
259 |             opts.search_stream = 1;
260 |         }
261 |     }
262 | 
263 |     /* If we're not outputting to a terminal. change output to:
264 |         * turn off colors
265 |         * print filenames on every line
266 |      */
267 |     if (!isatty(fileno(stdout))) {
268 |         opts.color = 0;
269 |         group = 0;
270 | 
271 |         /* Don't search the file that stdout is redirected to */
272 |         rv = fstat(fileno(stdout), &statbuf);
273 |         if (rv != 0) {
274 |             die("Error fstat()ing stdout");
275 |         }
276 |         opts.stdout_inode = statbuf.st_ino;
277 |     }
278 | 
279 |     while ((ch = getopt_long(argc, argv, "A:aB:C:DG:g:fHhiLlm:np:QRrSsvVtuUwz", longopts, &opt_index)) != -1) {
280 |         switch (ch) {
281 |             case 'A':
282 |                 if (optarg) {
283 |                     opts.after = strtol(optarg, &num_end, 10);
284 |                     if (num_end == optarg || *num_end != '\0' || errno == ERANGE) {
285 |                         /* This arg must be the search string instead of the after length */
286 |                         optind--;
287 |                         opts.after = DEFAULT_AFTER_LEN;
288 |                     }
289 |                 } else {
290 |                     opts.after = DEFAULT_AFTER_LEN;
291 |                 }
292 |                 break;
293 |             case 'a':
294 |                 opts.search_all_files = 1;
295 |                 opts.search_binary_files = 1;
296 |                 break;
297 |             case 'B':
298 |                 if (optarg) {
299 |                     opts.before = strtol(optarg, &num_end, 10);
300 |                     if (num_end == optarg || *num_end != '\0' || errno == ERANGE) {
301 |                         /* This arg must be the search string instead of the before length */
302 |                         optind--;
303 |                         opts.before = DEFAULT_BEFORE_LEN;
304 |                     }
305 |                 } else {
306 |                     opts.before = DEFAULT_BEFORE_LEN;
307 |                 }
308 |                 break;
309 |             case 'C':
310 |                 if (optarg) {
311 |                     opts.context = strtol(optarg, &num_end, 10);
312 |                     if (num_end == optarg || *num_end != '\0' || errno == ERANGE) {
313 |                         /* This arg must be the search string instead of the context length */
314 |                         optind--;
315 |                         opts.context = DEFAULT_CONTEXT_LEN;
316 |                     }
317 |                 } else {
318 |                     opts.context = DEFAULT_CONTEXT_LEN;
319 |                 }
320 |                 break;
321 |             case 'D':
322 |                 set_log_level(LOG_LEVEL_DEBUG);
323 |                 break;
324 |             case 'f':
325 |                 opts.follow_symlinks = 1;
326 |                 break;
327 |             case 'g':
328 |                 needs_query = 0;
329 |                 opts.match_files = 1;
330 |             /* Fall through and build regex */
331 |             case 'G':
332 |                 compile_study(&opts.file_search_regex, &opts.file_search_regex_extra, optarg, opts.casing & PCRE_CASELESS, 0);
333 |                 opts.casing = CASE_SENSITIVE;
334 |                 break;
335 |             case 'H':
336 |                 opts.print_path = PATH_PRINT_TOP;
337 |                 break;
338 |             case 'h':
339 |                 help = 1;
340 |                 break;
341 |             case 'i':
342 |                 opts.casing = CASE_INSENSITIVE;
343 |                 break;
344 |             case 'L':
345 |                 opts.invert_match = 1;
346 |             /* fall through */
347 |             case 'l':
348 |                 opts.print_filename_only = 1;
349 |                 break;
350 |             case 'm':
351 |                 opts.max_matches_per_file = atoi(optarg);
352 |                 break;
353 |             case 'n':
354 |                 opts.recurse_dirs = 0;
355 |                 break;
356 |             case 'p':
357 |                 opts.path_to_agignore = optarg;
358 |                 break;
359 |             case 'Q':
360 |                 opts.literal = 1;
361 |                 break;
362 |             case 'R':
363 |             case 'r':
364 |                 opts.recurse_dirs = 1;
365 |                 break;
366 |             case 'S':
367 |                 opts.casing = CASE_SMART;
368 |                 break;
369 |             case 's':
370 |                 opts.casing = CASE_SENSITIVE;
371 |                 break;
372 |             case 't':
373 |                 opts.search_all_files = 1;
374 |                 break;
375 |             case 'u':
376 |                 opts.search_binary_files = 1;
377 |                 opts.search_all_files = 1;
378 |                 opts.search_hidden_files = 1;
379 |                 break;
380 |             case 'U':
381 |                 opts.skip_vcs_ignores = 1;
382 |                 break;
383 |             case 'v':
384 |                 opts.invert_match = 1;
385 |                 break;
386 |             case 'V':
387 |                 version = 1;
388 |                 break;
389 |             case 'w':
390 |                 opts.word_regexp = 1;
391 |                 break;
392 |             case 'z':
393 |                 opts.search_zip_files = 1;
394 |                 break;
395 |             case 0: /* Long option */
396 |                 if (strcmp(longopts[opt_index].name, "ackmate-dir-filter") == 0) {
397 |                     compile_study(&opts.ackmate_dir_filter, &opts.ackmate_dir_filter_extra, optarg, 0, 0);
398 |                     break;
399 |                 } else if (strcmp(longopts[opt_index].name, "depth") == 0) {
400 |                     opts.max_search_depth = atoi(optarg);
401 |                     break;
402 |                 } else if (strcmp(longopts[opt_index].name, "no-numbers") == 0) {
403 |                     opts.print_line_numbers = FALSE;
404 |                     break;
405 |                 } else if (strcmp(longopts[opt_index].name, "ignore-dir") == 0) {
406 |                     add_ignore_pattern(root_ignores, optarg);
407 |                     break;
408 |                 } else if (strcmp(longopts[opt_index].name, "ignore") == 0) {
409 |                     add_ignore_pattern(root_ignores, optarg);
410 |                     break;
411 |                 } else if (strcmp(longopts[opt_index].name, "nopager") == 0) {
412 |                     out_fd = stdout;
413 |                     opts.pager = NULL;
414 |                     break;
415 |                 } else if (strcmp(longopts[opt_index].name, "pager") == 0) {
416 |                     opts.pager = optarg;
417 |                     break;
418 |                 } else if (strcmp(longopts[opt_index].name, "workers") == 0) {
419 |                     opts.workers = atoi(optarg);
420 |                     break;
421 |                 } else if (strcmp(longopts[opt_index].name, "color-line-number") == 0) {
422 |                     free(opts.color_line_number);
423 |                     ag_asprintf(&opts.color_line_number, "\033[%sm", optarg);
424 |                     break;
425 |                 } else if (strcmp(longopts[opt_index].name, "color-match") == 0) {
426 |                     free(opts.color_match);
427 |                     ag_asprintf(&opts.color_match, "\033[%sm", optarg);
428 |                     break;
429 |                 } else if (strcmp(longopts[opt_index].name, "color-path") == 0) {
430 |                     free(opts.color_path);
431 |                     ag_asprintf(&opts.color_path, "\033[%sm", optarg);
432 |                     break;
433 |                 } else if (strcmp(longopts[opt_index].name, "silent") == 0) {
434 |                     set_log_level(LOG_LEVEL_NONE);
435 |                     break;
436 |                 }
437 | 
438 |                 /* Continue to usage if we don't recognize the option */
439 |                 if (longopts[opt_index].flag != 0) {
440 |                     break;
441 |                 }
442 | 
443 |                 for (i = 0; i < LANG_COUNT; i++) {
444 |                     if (strcmp(longopts[opt_index].name, langs[i].name) == 0) {
445 |                         lang_regex = make_lang_regex(langs[i].extensions);
446 |                         compile_study(&opts.file_search_regex, &opts.file_search_regex_extra, lang_regex, 0, 0);
447 |                         break;
448 |                     }
449 |                 }
450 |                 if (lang_regex) {
451 |                     free(lang_regex);
452 |                     lang_regex = NULL;
453 |                     break;
454 |                 }
455 | 
456 |                 log_err("option %s does not take a value", longopts[opt_index].name);
457 |             default:
458 |                 usage();
459 |                 exit(1);
460 |         }
461 |     }
462 | 
463 |     free(longopts);
464 | 
465 |     argc -= optind;
466 |     argv += optind;
467 | 
468 |     if (opts.pager) {
469 |         out_fd = popen(opts.pager, "w");
470 |         if (!out_fd) {
471 |             perror("Failed to run pager");
472 |             exit(1);
473 |         }
474 |     }
475 | 
476 |     if (help) {
477 |         usage();
478 |         exit(0);
479 |     }
480 | 
481 |     if (version) {
482 |         print_version();
483 |         exit(0);
484 |     }
485 | 
486 |     if (list_file_types) {
487 |         int lang_index;
488 |         printf("The following file types are supported:\n");
489 |         for (lang_index = 0; lang_index < LANG_COUNT; lang_index++) {
490 |             printf("  --%s\n    ", langs[lang_index].name);
491 |             int j;
492 |             for (j = 0; j < MAX_EXTENSIONS && langs[lang_index].extensions[j]; j++) {
493 |                 printf("  .%s", langs[lang_index].extensions[j]);
494 |             }
495 |             printf("\n\n");
496 |         }
497 |         exit(0);
498 |     }
499 | 
500 |     if (needs_query && argc == 0) {
501 |         log_err("What do you want to search for?");
502 |         exit(1);
503 |     }
504 | 
505 |     if (home_dir && !opts.search_all_files) {
506 |         log_debug("Found user's home dir: %s", home_dir);
507 |         ag_asprintf(&ignore_file_path, "%s/%s", home_dir, ignore_pattern_files[0]);
508 |         load_ignore_patterns(root_ignores, ignore_file_path);
509 |         free(ignore_file_path);
510 |     }
511 | 
512 |     if (!opts.skip_vcs_ignores) {
513 |         FILE *gitconfig_file = NULL;
514 |         size_t buf_len = 0;
515 |         char *gitconfig_res = NULL;
516 | 
517 |         gitconfig_file = popen("git config -z --get core.excludesfile 2>/dev/null", "r");
518 |         if (gitconfig_file != NULL) {
519 |             do {
520 |                 gitconfig_res = ag_realloc(gitconfig_res, buf_len + 65);
521 |                 buf_len += fread(gitconfig_res + buf_len, 1, 64, gitconfig_file);
522 |             } while (!feof(gitconfig_file) && buf_len > 0 && buf_len % 64 == 0);
523 |             gitconfig_res[buf_len] = '\0';
524 |             load_ignore_patterns(root_ignores, gitconfig_res);
525 |             free(gitconfig_res);
526 |             pclose(gitconfig_file);
527 |         }
528 |     }
529 | 
530 |     if (opts.context > 0) {
531 |         opts.before = opts.context;
532 |         opts.after = opts.context;
533 |     }
534 | 
535 |     if (opts.ackmate) {
536 |         opts.color = 0;
537 |         opts.print_break = 1;
538 |         group = 1;
539 |         opts.search_stream = 0;
540 |     }
541 | 
542 |     if (opts.parallel) {
543 |         opts.search_stream = 0;
544 |     }
545 | 
546 |     if (opts.print_path != PATH_PRINT_DEFAULT || opts.print_break == 0) {
547 |         goto skip_group;
548 |     }
549 | 
550 |     if (group) {
551 |         opts.print_break = 1;
552 |     } else {
553 |         opts.print_path = PATH_PRINT_DEFAULT_EACH_LINE;
554 |         opts.print_break = 0;
555 |     }
556 | 
557 | skip_group:
558 |     if (opts.search_stream) {
559 |         opts.print_break = 0;
560 |         opts.print_path = PATH_PRINT_NOTHING;
561 |         if (opts.print_line_numbers != 2) {
562 |             opts.print_line_numbers = 0;
563 |         }
564 |     }
565 | 
566 |     if (needs_query) {
567 |         opts.query = ag_strdup(argv[0]);
568 |         argc--;
569 |         argv++;
570 |     } else {
571 |         opts.query = ag_strdup(".");
572 |     }
573 |     opts.query_len = strlen(opts.query);
574 | 
575 |     log_debug("Query is %s", opts.query);
576 | 
577 |     if (opts.query_len == 0) {
578 |         log_err("Error: No query. What do you want to search for?");
579 |         exit(1);
580 |     }
581 | 
582 |     if (!is_regex(opts.query)) {
583 |         opts.literal = 1;
584 |     }
585 | 
586 |     char *path = NULL;
587 |     char *tmp = NULL;
588 |     opts.paths_len = argc;
589 |     if (argc > 0) {
590 |         *paths = ag_calloc(sizeof(char *), argc + 1);
591 |         *base_paths = ag_calloc(sizeof(char *), argc + 1);
592 |         for (i = 0; i < argc; i++) {
593 |             path = ag_strdup(argv[i]);
594 |             path_len = strlen(path);
595 |             /* kill trailing slash */
596 |             if (path_len > 1 && path[path_len - 1] == '/') {
597 |                 path[path_len - 1] = '\0';
598 |             }
599 |             (*paths)[i] = path;
600 |             tmp = ag_malloc(PATH_MAX);
601 |             (*base_paths)[i] = realpath(path, tmp);
602 |         }
603 |         /* Make sure we search these paths instead of stdin. */
604 |         opts.search_stream = 0;
605 |     } else {
606 |         path = ag_strdup(".");
607 |         *paths = ag_malloc(sizeof(char *) * 2);
608 |         *base_paths = ag_malloc(sizeof(char *) * 2);
609 |         (*paths)[0] = path;
610 |         tmp = ag_malloc(PATH_MAX);
611 |         (*base_paths)[0] = realpath(path, tmp);
612 |         i = 1;
613 |     }
614 |     (*paths)[i] = NULL;
615 |     (*base_paths)[i] = NULL;
616 | }
617 | 


--------------------------------------------------------------------------------