├── .gitignore ├── DNSBinarySearch ├── DNSBinarySearch.go ├── DNSBinarySearch_test.go └── test_data.txt ├── LICENSE ├── README.md ├── dnsgrep.go ├── experimentalServer ├── .gitignore ├── config.json └── experimentalServer.go └── scripts ├── fdns_a.sh └── rdns.sh /.gitignore: -------------------------------------------------------------------------------- 1 | dnsgrep 2 | *.sw* 3 | 4 | -------------------------------------------------------------------------------- /DNSBinarySearch/DNSBinarySearch.go: -------------------------------------------------------------------------------- 1 | package DNSBinarySearch 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "io" 7 | "os" 8 | "sort" 9 | "strings" 10 | 11 | // for string reverse function 12 | "github.com/golang/example/stringutil" 13 | ) 14 | 15 | // we expect every line to be less than 500 bytes (DNS only allows 255) 16 | const MAXLINESIZE = 500 17 | 18 | // scan backwards 10 kilobytes at a time looking for the edge of our matched string 19 | const WALKBYTES = 10000 20 | 21 | // we use 2 limits to limit runtime & output size of these library 22 | type Limits struct { 23 | // the maximum distance to scan backwards (x 10kB) 24 | MaxScan int 25 | // the maximum number of lines of output 26 | MaxOutputLines int 27 | } 28 | 29 | var DefaultLimits = Limits{ 30 | MaxScan: 100, // 10MB 31 | MaxOutputLines: 100000, // 100,000 lines 32 | } 33 | 34 | // fetches a string buffer from a file 35 | func getStringBuffer(f *os.File, offset int) (string, error) { 36 | _, err := f.Seek(int64(offset), 0) 37 | if err != nil { 38 | return "", err 39 | } 40 | returnBuf := make([]byte, MAXLINESIZE) 41 | _, err = io.ReadAtLeast(f, returnBuf, MAXLINESIZE) 42 | if err != nil { 43 | return "", err 44 | } 45 | 46 | return string(returnBuf), nil 47 | } 48 | 49 | // get the next line from a random string buffer 50 | // (the first full line, newline char seperated) 51 | func getNextLine(str string) string { 52 | // get the start of the next line 53 | lines := strings.Split(str, "\n") 54 | if len(lines) < 2 { 55 | // we expect the input file to be sufficiently large that we do not need to handle the EOF/start edge cases 56 | // we also expect that every line is less than 500 chars, that could also trigger this case 57 | return "" 58 | } 59 | 60 | // take out what we are going to compare 61 | // (the first line, after the next newline char, up to the length of the line we are trying to find) 62 | return lines[1] 63 | } 64 | 65 | // intermediary helper function to simplify code below 66 | // takes a file, offset and string to search for 67 | // returns a string compareLine which is fullLine truncated to len(searchStr) 68 | // if err is set, result cannot be trusted 69 | func getLineDetails(f *os.File, offset int, searchStr string) (compareLine string, err error) { 70 | // get the string buffer at this offset 71 | stringBuffer, err := getStringBuffer(f, offset) 72 | if err != nil { 73 | return "", err 74 | } 75 | 76 | // get the next line 77 | fullLine := getNextLine(stringBuffer) 78 | compareLine = fullLine 79 | if fullLine == "" { 80 | return "", fmt.Errorf("Failed to get next line from string buffer: %s\n", stringBuffer) 81 | } 82 | 83 | // filter out up to the length of the search string 84 | if len(compareLine) > len(searchStr) { 85 | compareLine = compareLine[0:len(searchStr)] 86 | } 87 | 88 | return 89 | } 90 | 91 | // pass a file path and search string to search for matches 92 | // expects the file to sorted, with domain names at the start of the file, in reverse order 93 | // example: "moc.elpmaxe.www,1.1.1.1" 94 | // returns a list of matches 95 | // example ["1.1.1.1,www.example.com"] 96 | func DNSBinarySearch(filePath string, searchStr string, limit Limits) (ret []string, err error) { 97 | 98 | // reverse the search string 99 | searchStr = stringutil.Reverse(searchStr) 100 | 101 | // open the file & get it's size 102 | f, err := os.Open(filePath) 103 | if err != nil { 104 | return nil, fmt.Errorf("failed to open file") 105 | } 106 | fi, err := f.Stat() 107 | if err != nil { 108 | return nil, fmt.Errorf("failed to stat file") 109 | } 110 | 111 | // use sort.Search to find a line in our sorted file containing the search string 112 | // A possible enhancement here is to define our own sort.Search with an interface to 113 | // pass in the variables we need, rather than implicitly passing to the sub function here... 114 | foundByteLocation := sort.Search(int(fi.Size()), func(i int) bool { 115 | 116 | // use the intermediary function to get the line details at the offset we are currently considering 117 | searchLineCompare, err := getLineDetails(f, i, searchStr) 118 | if err != nil { 119 | // this should trigger an error in the next phase causing us to fail out quickly 120 | return false 121 | } 122 | 123 | // substring compare 124 | if strings.Compare(searchStr, searchLineCompare) > 0 { 125 | return false 126 | } else { 127 | return true 128 | } 129 | }) // end sort.Search 130 | 131 | // check if we found a match, if we did not, exit out 132 | stringBuffer, err := getStringBuffer(f, foundByteLocation) 133 | if err != nil { 134 | return nil, fmt.Errorf("failed to get matched buffer from file?") 135 | } 136 | fullLine := getNextLine(stringBuffer) 137 | if fullLine == "" || !strings.HasPrefix(fullLine, searchStr) { 138 | return nil, fmt.Errorf("failed to find exact match via binary search") 139 | } 140 | 141 | // walk back 10 kilobytes bytes at a time, searching for a line that does not contain a match 142 | minSearchLocation := foundByteLocation 143 | maxScan := limit.MaxScan 144 | for { 145 | 146 | maxScan-- 147 | if maxScan == 0 { 148 | return nil, fmt.Errorf("scan limit reached!") 149 | } 150 | 151 | // walk backwards in the file 152 | minSearchLocation = minSearchLocation - WALKBYTES 153 | if minSearchLocation < 0 { 154 | return nil, fmt.Errorf("scanned backwards too far! Reached start of file!") 155 | } 156 | 157 | // get the string buffer & next line at this offset 158 | searchLineCompare, err := getLineDetails(f, minSearchLocation, searchStr) 159 | if err != nil { 160 | return nil, fmt.Errorf("unexpected failure, failed to fetch next line while walking backwards?") 161 | } 162 | 163 | // we are looking for the first result that does not contain our substring 164 | if strings.Compare(searchStr, searchLineCompare) != 0 { 165 | break 166 | } 167 | } 168 | 169 | // seek to the minimum search location (this is likely unncessary to repeat as it was already done in getLineDetails above) 170 | _, err = f.Seek(int64(minSearchLocation), 0) 171 | if err != nil { 172 | return nil, err 173 | } 174 | 175 | // now that we have a min-location, use a bufio reader & ReadString('\n') to read the next line until they do not match! 176 | 177 | // call readString once to advance the pointer to the next \n 178 | reader := bufio.NewReader(f) 179 | _, err = reader.ReadString('\n') 180 | if err != nil { 181 | return nil, err 182 | } 183 | 184 | firstHit := false // bool flag is set to true once we start matching, once we stop matching and this flag is true, we can exit! 185 | maxOutputLines := limit.MaxOutputLines 186 | for { 187 | 188 | maxOutputLines-- 189 | if maxOutputLines == 0 { 190 | // we likely could return what we have here already, but the result would be incomplete... 191 | return nil, fmt.Errorf("output limit reached!") 192 | } 193 | 194 | // this will read the next string up to the \n char 195 | nextLine, err := reader.ReadString('\n') 196 | if err != nil { 197 | return nil, err 198 | } 199 | 200 | // remove the newline char 201 | nextLine = strings.TrimSuffix(nextLine, "\n") 202 | 203 | // filter out up to the length of the search string 204 | compareLine := nextLine 205 | if len(compareLine) > len(searchStr) { 206 | compareLine = compareLine[0:len(searchStr)] 207 | } 208 | 209 | // strings match! 210 | if strings.Compare(compareLine, searchStr) == 0 { 211 | // append the reversed line 212 | ret = append(ret, stringutil.Reverse(nextLine)) 213 | 214 | // if this is our first hit, mark it as such! 215 | if firstHit == false { 216 | firstHit = true 217 | } 218 | } else if firstHit == true { 219 | // we've had a string match before, and they no longer match! 220 | // it's time to return 221 | break 222 | } 223 | 224 | } 225 | 226 | // and finally, close the file 227 | f.Close() 228 | 229 | return 230 | } 231 | -------------------------------------------------------------------------------- /DNSBinarySearch/DNSBinarySearch_test.go: -------------------------------------------------------------------------------- 1 | package DNSBinarySearch 2 | 3 | import "testing" 4 | 5 | // a quick sanity check to make sure this library works as expected 6 | func TestResult(t *testing.T) { 7 | 8 | output, err := DNSBinarySearch("test_data.txt", "amiccom.com.tw", DefaultLimits) 9 | if err != nil { 10 | t.Fatalf("unexpected error: %+v", err) 11 | } else { 12 | if len(output) != 6 { 13 | t.Fatalf("unexpected output length: %+v", output) 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Samuel Erb 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DNSGrep 2 | 3 | A fork of @erbbysam's DNSGrep that you can install with `go get`. 4 | 5 | A utility for quickly searching presorted DNS names. Built around the Rapid7 rdns & fdns dataset. 6 | 7 | # How does it work? 8 | 9 | This utility assumes the file provided is presorted (both alphabetical, and symbols). 10 | 11 | The algorithm is pretty simple: 12 | 1) Use a binary search algorithm to seek through the file, looking for a substring match against the query. 13 | 2) Once a match is found, the file is scanned backwards in 10KB increments looking for a non-matching substring. 14 | 3) Once a non-matching substring is found, the file is scanned forwards until all exact matches are returned. 15 | 16 | # Limits 17 | 18 | There is a built-in limit system. This prevents 2 things: 19 | 1) scanning too far backwards (`MaxScan`) 20 | 2) scanning too far forwards after scanning backwards (`MaxOutputLines`) 21 | 22 | This allows for any input while stopping requests that are taking too long. 23 | 24 | Additionally, this utility does not handle the edge cases(start/end) of files and will return an error if encountered. 25 | 26 | # Install 27 | 28 | `go get` the following packages: 29 | 30 | ``` 31 | go get -u github.com/tomnomnom/dnsgrep 32 | 33 | ``` 34 | 35 | # Run 36 | 37 | The following steps were tested with Ubuntu 16.04 & go 1.11.5. 38 | 39 | Generate fdns_a.sort.txt and rdns.sort.txt first using the scripts found in the scripts/ folder: 40 | ``` 41 | # Each of these scripts requires: 42 | # * 3 hours+ on an SSD 43 | # * 300GB+ temp disk space (under the same folder) 44 | # * ~65GB for output output (under the same folder) 45 | # * jq to be installed 46 | ./scripts/fdns_a.sh 47 | ./scripts/rdns.sh 48 | ``` 49 | 50 | 51 | Run the command line utility: 52 | ``` 53 | dnsgrep -f DNSBinarySearch/test_data.txt -i "amiccom.com.tw" 54 | ``` 55 | 56 | Run the experimental server in the same folder as fdns_a.sort & rdns.sort.txt: 57 | ``` 58 | go run experimentalServer.go 59 | ``` 60 | 61 | # Data Source 62 | The source of this data referenced throughout this repository is Rapid7 Labs. Please review the Terms of Service: 63 | https://opendata.rapid7.com/about/ 64 | 65 | https://opendata.rapid7.com/sonar.rdns_v2/ 66 | 67 | https://opendata.rapid7.com/sonar.fdns_v2/ 68 | 69 | # Stack Overflow References 70 | 71 | via https://unix.stackexchange.com/a/35472 72 | * we need to sort with LC_COLLATE=C to also sort ., chars 73 | 74 | via https://unix.stackexchange.com/a/350068 75 | * To sort a large file: split it into chunks, sort the chunks and then simply merge the results 76 | 77 | 78 | 79 | # License 80 | 81 | See LICENSE file. 82 | -------------------------------------------------------------------------------- /dnsgrep.go: -------------------------------------------------------------------------------- 1 | // a lightweight utility to scan a sorted file for a substring at the start of each line 2 | 3 | package main 4 | 5 | import ( 6 | . "github.com/tomnomnom/dnsgrep/DNSBinarySearch" 7 | 8 | "fmt" 9 | "os" 10 | 11 | "github.com/jessevdk/go-flags" 12 | ) 13 | 14 | // command line parsing 15 | type Options struct { 16 | Input string `short:"i" long:"input" description:"A hostname to search" required:"true"` 17 | DNSFile string `short:"f" long:"file" description:"A large file containing sorted, reversed domain names" required:"true"` 18 | } 19 | 20 | // command line parsing 21 | var options Options 22 | var parser = flags.NewParser(&options, flags.Default) 23 | 24 | func main() { 25 | 26 | // command line parsing 27 | _, err := parser.Parse() 28 | if err != nil { 29 | return 30 | } 31 | 32 | // increase our limits x10 as we're running this locally 33 | var limits = Limits{ 34 | MaxScan: 1000, // 100MB 35 | MaxOutputLines: 1000000, // 1,000,000 lines 36 | } 37 | 38 | // main.go is really just a wrapper around this function 39 | output, err := DNSBinarySearch(options.DNSFile, options.Input, limits) 40 | if err != nil { 41 | fmt.Fprintf(os.Stderr, "Error: %+v\n", err) 42 | } else { 43 | for _, result := range output { 44 | fmt.Printf("%s\n", result) 45 | } 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /experimentalServer/.gitignore: -------------------------------------------------------------------------------- 1 | experimentalServer 2 | -------------------------------------------------------------------------------- /experimentalServer/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "Message": "Powered by DNSGrep - https://github.com/erbbysam/DNSGrep", 3 | "FileNames": ["2019-02-15-1550232363-fdns_a.json.gz","2019-02-13-1550077568-rdns.json.gz"], 4 | "TOS":"The source of this data is Rapid7 Labs. Please review the Terms of Service: https://opendata.rapid7.com/about/" 5 | } 6 | -------------------------------------------------------------------------------- /experimentalServer/experimentalServer.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "github.com/gorilla/mux" 7 | . "github.com/tomnomnom/dnsgrep/DNSBinarySearch" 8 | "io/ioutil" 9 | "log" 10 | "net/http" 11 | "time" 12 | ) 13 | 14 | const ( 15 | configJSON = "/home/ubuntu/go/src/dnsgrep/experimentalServer/config.json" 16 | ) 17 | 18 | // a struct for the metadata contained in the JSON 19 | type MetaJSON struct { 20 | Runtime string // not the most efficent way to convey this... 21 | Errors []string 22 | Message string `json:"Message"` // custom message to send 23 | FileNames []string `json:"FileNames"` // list of filenames scanned 24 | TOS string `json:"TOS"` 25 | } 26 | 27 | // a struct for the response json 28 | type ResponseJSON struct { 29 | Meta MetaJSON 30 | FDNS_A []string 31 | RDNS []string 32 | } 33 | 34 | // load config 35 | func GetMeta(path string) (MetaConfig *MetaJSON) { 36 | MetaConfig = new(MetaJSON) 37 | data, err := ioutil.ReadFile(path) 38 | if err != nil { 39 | log.Fatalf("Error opening config file: %v", err) 40 | } 41 | err = json.Unmarshal(data, &MetaConfig) 42 | if err != nil { 43 | log.Fatalf("Error unmarshalling config file: %v", err) 44 | } 45 | return MetaConfig 46 | } 47 | 48 | // fetch the DNS info from our files 49 | func fetchDNSInfo(queryString string) (fdns_a []string, rdns []string, errors []string) { 50 | 51 | // fetch from our files 52 | fdns_a, err := DNSBinarySearch("fdns_a.sort.txt", queryString, DefaultLimits) 53 | if err != nil { 54 | errors = append(errors, fmt.Sprintf("fdns_a error: %+v", err)) 55 | } 56 | rdns, err = DNSBinarySearch("rdns.sort.txt", queryString, DefaultLimits) 57 | if err != nil { 58 | errors = append(errors, fmt.Sprintf("rdns error: %+v", err)) 59 | } 60 | 61 | return 62 | } 63 | 64 | // homepage handler 65 | func IndexHandler(w http.ResponseWriter, r *http.Request) { 66 | w.WriteHeader(http.StatusOK) 67 | w.Write([]byte("OK\n")) 68 | } 69 | 70 | // primary DNS handler 71 | func DNSHandler(w http.ResponseWriter, r *http.Request) { 72 | MetaCfg := GetMeta(configJSON) 73 | vals := r.URL.Query() 74 | queryString, ok := vals["q"] 75 | if ok { 76 | 77 | // write out a JSON content-type 78 | w.Header().Set("Content-Type", "application/json") 79 | w.WriteHeader(http.StatusOK) 80 | 81 | // query the two large files 82 | before := time.Now() 83 | fdns_a, rdns, errors := fetchDNSInfo(queryString[0]) 84 | 85 | // get runtime 86 | delta := time.Now().Sub(before) 87 | runtimeStr := fmt.Sprintf("%f seconds", delta.Seconds()) 88 | 89 | // now put together our JSON! 90 | ret := ResponseJSON{ 91 | FDNS_A: fdns_a, 92 | RDNS: rdns, 93 | } 94 | ret.Meta.Runtime = runtimeStr 95 | ret.Meta.Errors = errors 96 | // TODO -- these really should come in via a config file 97 | ret.Meta.Message = MetaCfg.Message 98 | ret.Meta.FileNames = MetaCfg.FileNames 99 | ret.Meta.TOS = MetaCfg.TOS 100 | 101 | // finally, encode the json! 102 | jsonEncoded, err := json.MarshalIndent(ret, "", "\t") 103 | if err != nil { 104 | w.Write([]byte("Unexpected failure to encode json?\n")) 105 | } else { 106 | // success! 107 | w.Write(jsonEncoded) 108 | } 109 | 110 | } else { 111 | w.Write([]byte("Missing query string!\n")) 112 | } 113 | } 114 | 115 | // simple mux server startup 116 | func main() { 117 | r := mux.NewRouter() 118 | r.HandleFunc("/", IndexHandler) 119 | r.HandleFunc("/dns", DNSHandler) 120 | log.Fatal(http.ListenAndServe(":80", r)) 121 | } 122 | -------------------------------------------------------------------------------- /scripts/fdns_a.sh: -------------------------------------------------------------------------------- 1 | # fetch the fdns_a file 2 | wget -O fdns_a.gz https://opendata.rapid7.com/sonar.fdns_v2/2019-01-25-1548417890-fdns_a.json.gz 3 | 4 | # extract and format our data 5 | gunzip -c fdns_a.gz | jq -r '.value + ","+ .name' | tr '[:upper:]' '[:lower:]' | rev > fdns_a.rev.lowercase.txt 6 | 7 | # split the data into chunks ot sort 8 | split -b100M fdns_a.rev.lowercase.txt fileChunk 9 | 10 | # remove the old files 11 | rm fdns_a.gz 12 | rm fdns_a.rev.lowercase.txt 13 | 14 | ## Sort each of the pieces and delete the unsorted one 15 | for f in fileChunk*; do LC_COLLATE=C sort "$f" > "$f".sorted && rm "$f"; done 16 | 17 | ## merge the sorted files with local tmp directory 18 | mkdir -p sorttmp 19 | LC_COLLATE=C sort -T sorttmp/ -muo fdns_a.sort.txt fileChunk*.sorted 20 | 21 | # clean up 22 | rm fileChunk* 23 | -------------------------------------------------------------------------------- /scripts/rdns.sh: -------------------------------------------------------------------------------- 1 | # fetch the rdns file 2 | wget -O rdns.gz https://opendata.rapid7.com/sonar.rdns_v2/2019-01-30-1548868121-rdns.json.gz 3 | 4 | # extract and format our data 5 | gunzip -c rdns.gz | jq -r '.name + ","+ .value' | tr '[:upper:]' '[:lower:]' | rev > rdns.rev.lowercase.txt 6 | 7 | # split the data into chunks ot sort 8 | split -b100M rdns.rev.lowercase.txt fileChunk 9 | 10 | # remove the old files 11 | rm rdns.gz 12 | rm rdns.rev.lowercase.txt 13 | 14 | ## Sort each of the pieces and delete the unsorted one 15 | for f in fileChunk*; do LC_COLLATE=C sort "$f" > "$f".sorted && rm "$f"; done 16 | 17 | ## merge the sorted files with local tmp directory 18 | mkdir -p sorttmp 19 | LC_COLLATE=C sort -T sorttmp/ -muo rdns.sort.txt fileChunk*.sorted 20 | 21 | # clean up 22 | rm fileChunk* 23 | --------------------------------------------------------------------------------