├── .gitignore ├── .goreleaser.yml ├── LICENSE ├── README.md ├── main.go └── p /.gitignore: -------------------------------------------------------------------------------- 1 | dist/* 2 | -------------------------------------------------------------------------------- /.goreleaser.yml: -------------------------------------------------------------------------------- 1 | # This is an example goreleaser.yaml file with some sane defaults. 2 | # Make sure to check the documentation at http://goreleaser.com 3 | builds: 4 | - env: 5 | - CGO_ENABLED=0 6 | archive: 7 | replacements: 8 | darwin: Darwin 9 | linux: Linux 10 | windows: Windows 11 | 386: i386 12 | amd64: x86_64 13 | checksum: 14 | name_template: 'checksums.txt' 15 | snapshot: 16 | name_template: "{{ .Tag }}-next" 17 | changelog: 18 | sort: asc 19 | filters: 20 | exclude: 21 | - '^docs:' 22 | - '^test:' 23 | 24 | nfpm: 25 | # You can change the name of the package. 26 | # Default: `{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}{{ if .Arm }}v{{ .Arm }}{{ end }}` 27 | name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}" 28 | 29 | homepage: https://github.com/bellecp/fast-p 30 | description: Fast commandline pdf fuzzy finder 31 | maintainer: http://github.com/bellecp 32 | 33 | # Formats to be generated. 34 | formats: 35 | - deb 36 | - rpm 37 | license: MIT 38 | 39 | brew: 40 | name: fast-pdf-finder 41 | 42 | github: 43 | owner: bellecp 44 | name: homebrew-fast-p 45 | 46 | # Git author used to commit to the repository. 47 | # Defaults are shown. 48 | commit_author: 49 | name: bellecp 50 | email: bellecp@users.noreply.github.com 51 | 52 | # Your app's homepage. 53 | # Default is empty. 54 | homepage: "https://github.com/bellecp/fast-p" 55 | 56 | # Your app's description. 57 | # Default is empty. 58 | description: "Fast, command-line PDF finder" 59 | 60 | # Packages your package depends on. 61 | dependencies: 62 | - grep 63 | - fzf 64 | - coreutils 65 | - findutils 66 | - poppler 67 | - pkg-config 68 | - the_silver_searcher 69 | 70 | # Custom install script for brew. 71 | # Default is 'bin.install "program"'. 72 | install: | 73 | bin.install "fast-p" 74 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 bellecp (github.com/bellecp) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fast-p 2 | 3 | Quickly find and open a pdf among a collection of thousands of unsorted pdfs through fzf (fuzzy finder) 4 | 5 | - [Installation on Linux](#installation-on-unix-or-linux-based-systems) 6 | - [Installation on OSX](#installation-on-osx-with-homebrew) 7 | - [Usage](#usage) 8 | - [How to clear the cache?](#how-to-clear-the-cache) 9 | - [Launch with keyboard shortcut in Ubuntu](#launch-with-keyboard-shortcut-in-ubuntu) 10 | - [See it in action](#see-it-in-action) 11 | - [Is the historical bash code still available?](#is-the-historical-bash-code-still-available) 12 | 13 | # Installation on Unix or Linux based systems 14 | 15 | 1. __Requirements.__ Make sure the following requirements are satisfied: 16 | - install ``pdftotext``. This comes with the texlive distribution on linux, 17 | On ubuntu, ``sudo apt-get install poppler-utils`` . 18 | - install ``fzf``: https://github.com/junegunn/fzf 19 | - install ``GNU grep``, ``ag`` (silver searcher). 20 | 21 | 2. __Install binary__. Do either one of the two steps below: 22 | - __Compile from source with ``go`` and ``go get``.__ 23 | With a working ``golang`` installation, do 24 | ```go install github.com/bellecp/fast-p@v0.2.5``` 25 | It will fetch the code and its dependencies, 26 | compile and create an executable ``fast-p`` in the ``/bin`` folder of your go 27 | installation, typically ``~/go/bin``. Make sure the command ``fast-p`` can be 28 | found (for instance, add ``~/go/bin`` to your ``$PATH``.) 29 | - Or: __Use the precompiled binary for your architecture.__ Download the binary that corresponds to your 30 | architecture at https://github.com/bellecp/fast-p/releases and make sure that 31 | the command ``fast-p`` can be found. For instance, 32 | put the binary file ``fast-p`` in ``~/custom/bin`` and add ``export 33 | PATH=~/custom/bin:$PATH`` to your ``.bashrc``. 34 | 35 | 3. __Tweak your .bashrc__. Add the following code to your ``.bashrc`` 36 | ``` 37 | p () { 38 | open=xdg-open # this will open pdf file withthe default PDF viewer on KDE, xfce, LXDE and perhaps on other desktops. 39 | 40 | ag -U -g ".pdf$" \ 41 | | fast-p \ 42 | | fzf --read0 --reverse -e -d $'\t' \ 43 | --preview-window down:80% --preview ' 44 | v=$(echo {q} | tr " " "|"); 45 | echo -e {1}"\n"{2} | grep -E "^|$v" -i --color=always; 46 | ' \ 47 | | cut -z -f 1 -d $'\t' | tr -d '\n' | xargs -r --null $open > /dev/null 2> /dev/null 48 | } 49 | 50 | ``` 51 | - You may replace ``ag -U -g ".pdf$"`` with another command that returns a list of pdf files. 52 | - You may replace ``open=...`` by your favorite PDF viewer, for instance ``open=evince`` or ``open=okular``. 53 | 54 | # Installation on OSX with homebrew 55 | 56 | 1. Install [homebrew](https://brew.sh/) and __run__ 57 | ``` 58 | brew install bellecp/fast-p/fast-pdf-finder 59 | ``` 60 | _The above brew formula is experimental. 61 | Please report any issues/suggestions/feedback at _ 62 | 63 | 64 | 2. __Tweak your .bashrc__. Add the following code to your ``.bashrc`` 65 | ``` 66 | p () { 67 | local open 68 | open=open # on OSX, "open" opens a pdf in preview 69 | ag -U -g ".pdf$" \ 70 | | fast-p \ 71 | | fzf --read0 --reverse -e -d $'\t' \ 72 | --preview-window down:80% --preview ' 73 | v=$(echo {q} | gtr " " "|"); 74 | echo -e {1}"\n"{2} | ggrep -E "^|$v" -i --color=always; 75 | ' \ 76 | | gcut -z -f 1 -d $'\t' | gtr -d '\n' | gxargs -r --null $open > /dev/null 2> /dev/null 77 | } 78 | 79 | ``` 80 | - You may replace ``ag -U -g ".pdf$"`` with another command that returns a list of pdf files. 81 | - You may replace ``open=...`` by your favorite PDF viewer, for instance ``open=evince`` or ``open=okular``. 82 | 83 | __Remark:__ On OSX, we use the command line tools ``gcut``, ``gxargs``, ``ggrep``, ``gtr`` which are the GNU versions 84 | of the tools ``cut``, ``xargs``, ``grep``, ``tr``. This way, we avoid the specifics of the versions of these tools pre-installed on OSX, 85 | and the same ``.bashrc`` code can be used for both OSX and GNU Linux systems. 86 | 87 | # Usage 88 | 89 | Use the command ``p`` to browse among the PDF files in the current directory and its subdirectories. 90 | 91 | The first run of the command will take some time to cache the text extracted from each pdf. Further runs of the command will be much faster since the text extraction will only apply to new pdfs. 92 | 93 | # How to clear the cache? 94 | 95 | To clear the cache (which contains text extracted from PDF), you can run 'fast-p --clear-cache'. This will safely remove the file located at: 96 | ``~/.cache/fast-p-pdftotext-output/fast-p_cached_pdftotext_output.db`` 97 | 98 | For older versions, please manually delete the cache file found at 99 | ``~/.cache/fast-p_cached_pdftotext_output.db`` 100 | 101 | # Launch with keyboard shortcut in Ubuntu 102 | 103 | On Ubuntu desktop (tested in 18.04), one may add a keyboard shortcut to launch a new terminal running the ``p`` command right away. 104 | With the following script, the new terminal window will automatically close after choosing a PDF. 105 | 106 | Create a file ``~/.fast-p-rc`` with 107 | ``` 108 | source .bashrc 109 | p; 110 | sleep 0.15; exit; 111 | ``` 112 | and in Ubuntu Settings/Keyboard, add a custom shortcut that runs the command 113 | ``gnome-terminal -- sh -c "bash --rcfile .fast-p-rc"``. 114 | 115 | 116 | 117 | # See it in action 118 | 119 | ![illustration of the p command](https://user-images.githubusercontent.com/1019692/34446795-12229072-ecac-11e7-856a-ec0df0de60ae.gif) 120 | 121 | 122 | # Is the historical bash code still available? 123 | 124 | Yes, see https://github.com/bellecp/fast-p/blob/master/p but using the go binary as explained above is recommended for speed and interoperability. 125 | 126 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "encoding/hex" 6 | "flag" 7 | "fmt" 8 | "github.com/boltdb/bolt" 9 | "github.com/cespare/xxhash" 10 | "github.com/mitchellh/go-homedir" 11 | "io" 12 | "log" 13 | "os" 14 | "os/exec" 15 | "path/filepath" 16 | ) 17 | 18 | func hash_file_xxhash(filePath string) (string, error) { 19 | var returnMD5String string 20 | file, err := os.Open(filePath) 21 | if err != nil { 22 | return returnMD5String, err 23 | } 24 | defer file.Close() 25 | hash := xxhash.New() 26 | if _, err := io.Copy(hash, file); err != nil { 27 | return returnMD5String, err 28 | } 29 | hashInBytes := hash.Sum(nil)[:] 30 | returnMD5String = hex.EncodeToString(hashInBytes) 31 | return returnMD5String, nil 32 | 33 | } 34 | 35 | func main() { 36 | flag.Usage = func() { 37 | fmt.Printf(`Usage: fast-p [OPTIONS] 38 | Reads a list of PDF filenames from STDIN and returns a list of null-byte 39 | separated items of the form 40 | filename[TAB]text 41 | where "text" is the text extracted from the first two pages of the PDF 42 | by pdftotext and [TAB] denotes a tab character "\t". 43 | 44 | Common usage of this tool is to pipe the result to FZF with a command in 45 | your .bashrc as explained in https://github.com/bellecp/fast-p. 46 | 47 | 48 | `) 49 | flag.PrintDefaults() 50 | } 51 | version := flag.Bool("version", false, "Display program version") 52 | clearCache := flag.Bool("clear-cache", false, "Delete cache file located at: \n~/.cache/fast-p-pdftotext-output/fast-p_cached_pdftotext_output.db") 53 | flag.Parse() 54 | 55 | if *version != false { 56 | fmt.Printf("v.0.2.5 \nhttps://github.com/bellecp/fast-p\n") 57 | os.Exit(0) 58 | } 59 | 60 | if *clearCache != false { 61 | removePath, err := homedir.Expand("~/.cache/fast-p-pdftotext-output/fast-p_cached_pdftotext_output.db") 62 | if err != nil { 63 | log.Fatal(err) 64 | os.Exit(1) 65 | } 66 | os.Remove(removePath) 67 | os.Exit(0) 68 | } 69 | 70 | // Create ~/.cache folder if does not exist 71 | // https://stackoverflow.com/questions/37932551/mkdir-if-not-exists-using-golang 72 | cachePath, err := homedir.Expand("~/.cache/fast-p-pdftotext-output/") 73 | os.MkdirAll(cachePath, os.ModePerm) 74 | 75 | // open BoltDB cache database 76 | scanner := bufio.NewScanner(os.Stdin) 77 | boltDbFilepath := filepath.Join(cachePath, "fast-p_cached_pdftotext_output.db") 78 | if err != nil { 79 | log.Fatal(err) 80 | } 81 | db, err := bolt.Open(boltDbFilepath, 0600, nil) 82 | bucketName := "fast-p_bucket_for_cached_pdftotext_output" 83 | if err != nil { 84 | log.Fatal(err) 85 | } 86 | defer db.Close() 87 | 88 | nullByte := "\u0000" 89 | 90 | db.Update(func(tx *bolt.Tx) error { 91 | _, err := tx.CreateBucketIfNotExists([]byte(bucketName)) 92 | if err != nil { 93 | return fmt.Errorf("create bucket: %s", err) 94 | } 95 | return nil 96 | }) 97 | 98 | missing := make(map[string]string) 99 | alreadySeen := make(map[string]bool) 100 | 101 | for scanner.Scan() { 102 | filepath := scanner.Text() 103 | hash, err := hash_file_xxhash(filepath) 104 | if alreadySeen[hash] != true { 105 | alreadySeen[hash] = true 106 | if err != nil { 107 | log.Println("err", hash) 108 | } 109 | var content string 110 | found := false 111 | err2 := db.View(func(tx *bolt.Tx) error { 112 | b := tx.Bucket([]byte(bucketName)) 113 | v := b.Get([]byte(hash)) 114 | if v != nil { 115 | found = true 116 | content = string(v) 117 | } 118 | return nil 119 | }) 120 | if err2 != nil { 121 | log.Println(err2) 122 | } 123 | if found == true { 124 | fmt.Println(filepath + "\t" + content + nullByte) 125 | } else { 126 | missing[hash] = filepath 127 | } 128 | } 129 | } 130 | for hash, filepath := range missing { 131 | cmd := exec.Command("pdftotext", "-l", "2", filepath, "-") 132 | out, err := cmd.CombinedOutput() 133 | content := string(out) 134 | if err != nil { 135 | log.Println(err) 136 | } 137 | fmt.Println(filepath + "\t" + content + nullByte) 138 | db.Update(func(tx *bolt.Tx) error { 139 | b := tx.Bucket([]byte(bucketName)) 140 | err := b.Put([]byte(hash), []byte(content)) 141 | if err != nil { 142 | fmt.Println(err) 143 | } 144 | return nil 145 | }) 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /p: -------------------------------------------------------------------------------- 1 | # This file is kept only for historical reasons. 2 | # It is recommended to use the go binary and the installatoin procedure 3 | # describe at https://github.com/bellecp/fast-p 4 | 5 | ## Installation 6 | # - install ``pdftotext``. This comes with the texlive distribution on linux or with poppler on OSX. 7 | # - install ``fzf``: https://github.com/junegunn/fzf 8 | # - install ``xxhash``: https://github.com/Cyan4973/xxHash 9 | # - install ``GNU grep``, ``ag`` (silver searcher) 10 | # - clone the repository: ``$ git clone https://github.com/bellecp/fast-p.git`` 11 | # - add a line ``source fast-p/p`` to your .bashrc or .bash_profile 12 | # - Run the command ``p``. The first run of the command will take some time to 13 | # cache the text extracted from each pdf. Further runs of the command will be 14 | # much faster since the text extraction will only apply to new pdfs. 15 | # 16 | ## Usage 17 | # 18 | # Run the command ``p`` and start typing keywords to search for pdf. 19 | # Type "enter" to view the pdf in the default viewer 20 | 21 | p () { 22 | local DIR open CACHEDLIST PDFLIST 23 | PDFLIST="/tmp/fewijbbioasBBBB" 24 | CACHEDLIST="/tmp/fewijbbioasAAAA" 25 | DIR="${HOME}/.cache/pdftotext" 26 | mkdir -p "${DIR}" 27 | touch "$DIR/NOOP" 28 | if [ "$(uname)" = "Darwin" ]; then 29 | open=open 30 | else 31 | open="gio open" 32 | fi 33 | 34 | # escale filenames 35 | # compute xxh sum 36 | # replace separator by tab character 37 | # sort to prepare for join 38 | # remove duplicates 39 | ag -U -g ".pdf$"| sed 's/\([ \o47()"&;\\]\)/\\\1/g;s/\o15/\\r/g' \ 40 | | xargs xxh64sum \ 41 | | sed 's/ /\t/' \ 42 | | sort \ 43 | | awk 'BEGIN {FS="\t"; OFS="\t"}; !seen[$1]++ {print $1, $2}' \ 44 | >| $PDFLIST 45 | 46 | # printed (hashsum,cached text) for every previously cached output of pdftotext 47 | # remove full path 48 | # replace separator by tab character 49 | # sort to prepare for join 50 | grep "" ~/.cache/pdftotext/* \ 51 | | sed 's=.*cache/pdftotext/==' \ 52 | | sed 's/:/\t/' \ 53 | | sort \ 54 | >| $CACHEDLIST 55 | 56 | { 57 | echo " "; # starting to type query sends it to fzf right away 58 | join -t ' ' $PDFLIST $CACHEDLIST; # already cached pdfs 59 | # Next, apply pdftotext to pdfs that haven't been cached yet 60 | comm -13 \ 61 | <(cat $CACHEDLIST | awk 'BEGIN {FS="\t"; OFS="\t"}; {print $1}') \ 62 | <(cat $PDFLIST | awk 'BEGIN {FS="\t"; OFS="\t"}; {print $1}') \ 63 | | join -t ' ' - $PDFLIST \ 64 | | awk 'BEGIN {FS="\t"; OFS="\t"}; !seen[$1]++ {print $1, $2}' \ 65 | | \ 66 | while read -r LINE; do 67 | local CACHE 68 | IFS=" "; set -- $LINE; 69 | CACHE="$DIR/$1" 70 | pdftotext -f 1 -l 2 "$2" - 2>/dev/null | tr "\n" "__" >| $CACHE 71 | echo -e "$1 $2 $(cat $CACHE)" 72 | done 73 | } | fzf --reverse -e -d '\t' \ 74 | --with-nth=2,3 \ 75 | --preview-window down:80% \ 76 | --preview ' 77 | v=$(echo {q} | tr " " "|"); 78 | echo {2} | grep -E "^|$v" -i --color=always; 79 | echo {3} | tr "__" "\n" | grep -E "^|$v" -i --color=always; 80 | ' \ 81 | | awk 'BEGIN {FS="\t"; OFS="\t"}; {print $2}' \ 82 | | sed 's/\([ \o47()"&;\\]\)/\\\1/g;s/\o15/\\r/g' \ 83 | | xargs $open > /dev/null 2> /dev/null 84 | 85 | } 86 | --------------------------------------------------------------------------------