├── .gitignore ├── README.md ├── go.mod ├── go.sum ├── main.go └── pkg ├── config └── config.go ├── matcher ├── extractor.go └── matcher.go └── utils ├── http.go ├── utils.go └── wappalyzer.go /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .idea 3 | 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # whoareyou 2 | whoareyou is a tool to find the underlying technology/software used in a list of URLs 3 | passed through stdin (using [Wappalyzer](https://github.com/AliasIO/wappalyzer/blob/master/src/apps.json) dataset). It will 4 | make a request to the URL, analyze the data received, and match against known fingerprints/indicators of technology. 5 | 6 | Support for custom matches for user provided regex values in HTTP responses is also supported, in addition or standalone from Wappalyzer. 7 | 8 | This is useful to understand what technology the website is using, easy search for custom strings/regex, as well as finding many different 9 | websites that use a given set of technology in mass. 10 | 11 | ## Installation 12 | With Go installed, run: 13 | 14 | ``` 15 | go get -u github.com/ameenmaali/whoareyou 16 | ``` 17 | 18 | ## Usage 19 | 20 | ``` 21 | Usage of whoareyou: 22 | -H string 23 | Headers to add in all requests. Multiple should be separated by semi-colon 24 | -V Get the current version of whoareyou 25 | -cookies string 26 | Cookies to add in all requests 27 | -debug 28 | Debug/verbose mode to print more info for failed/malformed URLs or requests 29 | -disable-wappalyzer 30 | Disable Wappalyzer scans (useful for only including custom matches) 31 | -dw 32 | Disable Wappalyzer scans (useful for only including custom matches) 33 | -headers string 34 | Headers to add in all requests. Multiple should be separated by semi-colon 35 | -m value 36 | Key value pair (JSON formatted, see README for usage info) of a match source type and regex value (or string) to search for 37 | (i.e. '{"name": {"responseBody": "^http(s)?:\/\/.+"}}'. Available match source types are: responseBody, scriptSrc. Flag can be set more than once. 38 | -match value 39 | Key value pair (JSON formatted, see README for usage info) of a match source type and regex value (or string) to search for 40 | (i.e. '{"name": {"responseBody": "^http(s)?:\/\/.+"}}'. Available match source types are: responseBody, scriptSrc. Flag can be set more than once. 41 | -tech string 42 | The technology to check against (default is all, comma-separated list). 43 | Get names from app keys here: https://github.com/AliasIO/wappalyzer/blob/master/src/apps.json 44 | -technology-lookups string 45 | The technology to check against (default is all, comma-separated list). 46 | Get names from app keys here: https://github.com/AliasIO/wappalyzer/blob/master/src/apps.json 47 | -t int 48 | Set the timeout length (in seconds) for each HTTP request (default 15) 49 | -timeout int 50 | Set the timeout length (in seconds) for each HTTP request (default 15) 51 | -version 52 | Get the current version of whoareyou 53 | -w int 54 | Set the concurrency/worker count (default 25) 55 | -workers int 56 | Set the concurrency/worker count (default 25) 57 | ``` 58 | 59 | ### Custom Matches 60 | Support for custom matches is also included with the `-m|-match` flag. This should be a JSON formatted string which 61 | expects a search name (which you create), the match type (where the search should be), and the regex match values you are looking for. 62 | 63 | The current supported match types are: 64 | * `responseBody` - Search the entire response body/HTML 65 | * `scriptSrc` - Search for a value within the src tags in scripts in the designated page 66 | 67 | Data should be formatted as valid JSON, with the following structure 68 | ``` 69 | {"searchName": {"matchType": "regexValue"}} 70 | {"searchName": {"matchType": ["regexValue1", "regexValue2"]}} 71 | ``` 72 | 73 | * The `searchName` is whatever you want to identify the search as 74 | * The `matchType` is one of the above supported match types 75 | * The `regexValue`'s as identified should be a string or list of strings (either normal strings or regex values) 76 | 77 | You can have as many `-m|-match` flags as you'd like in a given search. To only include custom matches, and not Wappalyzer data, 78 | make sure to include the `-dw|disable-wappalyzer` flag 79 | 80 | ## Examples 81 | 82 | Pass in a list of URLs with no custom matches 83 | 84 | ``` 85 | whoareyou < /path/to/urls.txt 86 | ``` 87 | 88 | Pass in a site to [waybackurls](https://github.com/tomnomnom/waybackurls), run it through [urldedupe](https://github.com/ameenmaali/urldedupe) to deduplicate, and run whoareyou and store to results.txt 89 | 90 | ``` 91 | echo "https://google.com" | waybackurls | urldedupe | whoareyou > results.txt 92 | ``` 93 | 94 | Use a custom match to look for the existence of a URL in a response body or script tag 95 | 96 | ``` 97 | whoareyou -m '{"findUrls":{"scriptSrc":"^http(s)?:\/\/mymatch.+", "responseBody":"^http(s)?:\/\/mymatch.+"}}' < urls.txt 98 | ``` 99 | 100 | Use a custom match, and don't use Wappalyzer dataset to look for a specific list of strings in a response body 101 | 102 | ``` 103 | whoareyou -m '{"findstring":{"responseBody":["str1","str2","str3"]}}' -dw < /path/to/urls.txt 104 | ``` 105 | 106 | Search for specify technology key from [Wappalyzer](https://github.com/AliasIO/wappalyzer/blob/master/src/apps.json) 107 | 108 | ``` 109 | whoareyou -tech "wordpress,intercom,youtube" < /path/to/urls.txt 110 | ``` 111 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/ameenmaali/whoareyou 2 | 3 | go 1.13 4 | 5 | require ( 6 | github.com/EDDYCJY/fake-useragent v0.2.0 7 | github.com/PuerkitoBio/goquery v1.5.1 8 | github.com/andybalholm/cascadia v1.2.0 // indirect 9 | github.com/fatih/color v1.9.0 10 | github.com/mattn/go-colorable v0.1.6 // indirect 11 | golang.org/x/net v0.0.0-20200602114024-627f9648deb9 // indirect 12 | golang.org/x/sys v0.0.0-20200610111108-226ff32320da // indirect 13 | ) 14 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/EDDYCJY/fake-useragent v0.2.0 h1:Jcnkk2bgXmDpX0z+ELlUErTkoLb/mxFBNd2YdcpvJBs= 2 | github.com/EDDYCJY/fake-useragent v0.2.0/go.mod h1:5wn3zzlDxhKW6NYknushqinPcAqZcAPHy8lLczCdJdc= 3 | github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= 4 | github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= 5 | github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= 6 | github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= 7 | github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE= 8 | github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY= 9 | github.com/fatih/color v1.9.0 h1:8xPHl4/q1VyqGIPif1F+1V3Y3lSmrq01EabUW3CoW5s= 10 | github.com/fatih/color v1.9.0/go.mod h1:eQcE1qtQxscV5RaZvpXrrb8Drkc3/DdQ+uUYCNjL+zU= 11 | github.com/mattn/go-colorable v0.1.4 h1:snbPLB8fVfU9iwbbo30TPtbLRzwWu6aJS6Xh4eaaviA= 12 | github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= 13 | github.com/mattn/go-colorable v0.1.6 h1:6Su7aK7lXmJ/U79bYtBjLNaha4Fs1Rg9plHpcH+vvnE= 14 | github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= 15 | github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= 16 | github.com/mattn/go-isatty v0.0.11 h1:FxPOTFNqGkuDUGi3H/qkUbQO4ZiBa2brKq5r0l8TGeM= 17 | github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= 18 | github.com/mattn/go-isatty v0.0.12 h1:wuysRhFDzyxgEmMf5xjvJ2M9dZoWAXNNr5LSBS7uHXY= 19 | github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= 20 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 21 | golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 22 | golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI= 23 | golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 24 | golang.org/x/net v0.0.0-20200602114024-627f9648deb9 h1:pNX+40auqi2JqRfOP1akLGtYcn15TUbkhwuCO3foqqM= 25 | golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= 26 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 27 | golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 28 | golang.org/x/sys v0.0.0-20191026070338-33540a1f6037 h1:YyJpGZS1sBuBCzLAR1VEpK193GlqGZbnPFnPV/5Rsb4= 29 | golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 30 | golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 31 | golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 32 | golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 33 | golang.org/x/sys v0.0.0-20200610111108-226ff32320da h1:bGb80FudwxpeucJUjPYJXuJ8Hk91vNtfvrymzwiei38= 34 | golang.org/x/sys v0.0.0-20200610111108-226ff32320da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 35 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 36 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "strings" 8 | "sync" 9 | 10 | "github.com/ameenmaali/whoareyou/pkg/config" 11 | "github.com/ameenmaali/whoareyou/pkg/matcher" 12 | "github.com/ameenmaali/whoareyou/pkg/utils" 13 | ) 14 | 15 | type Task struct { 16 | Url string 17 | } 18 | 19 | var conf config.Config 20 | var opts config.CliOptions 21 | var failedRequestsSent int 22 | var successfulRequestsSent int 23 | 24 | func main() { 25 | // Create an empty conf object 26 | conf = config.NewConfig() 27 | 28 | // Verify flags are properly formatted/expected 29 | err := conf.VerifyFlags(&opts) 30 | if err != nil { 31 | conf.Utils.PrintRed(os.Stderr, "error parsing flags: %v\n", err) 32 | flag.Usage() 33 | os.Exit(1) 34 | } 35 | 36 | // Get the URLs provided, deduplicate, and load properly formatted ones into slice 37 | urls, err := utils.GetUrlsFromFile(&conf) 38 | if err != nil { 39 | fmt.Println("Error getting URLs from stdin: ", err) 40 | } 41 | 42 | // Create HTTP Transport and Client after parsing flags 43 | conf.HttpClient = utils.CreateClient(opts.Timeout) 44 | 45 | // Fetch the latest wappalyzer data 46 | conf.TechInScope, err = utils.FetchWappalyzerData(&conf) 47 | if err != nil { 48 | fmt.Println("Error fetching data from Wappalyzer: ", err) 49 | } 50 | 51 | // Check if specific technology to lookup, else include all 52 | conf.UpdateTechnologyInScope() 53 | 54 | tasks := make(chan Task) 55 | var wg sync.WaitGroup 56 | 57 | for i := 0; i < opts.Concurrency; i++ { 58 | wg.Add(1) 59 | go func() { 60 | for task := range tasks { 61 | task.execute() 62 | } 63 | wg.Done() 64 | }() 65 | } 66 | 67 | for _, u := range urls { 68 | tasks <- Task{Url: u} 69 | } 70 | 71 | close(tasks) 72 | wg.Wait() 73 | } 74 | 75 | func (t Task) execute() { 76 | resp, err := utils.SendRequest(t.Url, &conf) 77 | if err != nil { 78 | failedRequestsSent += 1 79 | if conf.DebugMode { 80 | conf.Utils.PrintRed(os.Stderr, "error sending HTTP request to %v: %v\n", t.Url, err) 81 | } 82 | return 83 | } 84 | successfulRequestsSent += 1 85 | 86 | responseBody := string(resp.Body) 87 | if responseBody == "" { 88 | return 89 | } 90 | 91 | // Extract relevant data from HTML docs 92 | htmlExtractions := matcher.HtmlExtractions{ 93 | ScriptTags: []string{}, 94 | InlineJavaScript: []string{}, 95 | MetaTags: map[string]string{}, 96 | } 97 | htmlExtractions.Parse(resp.GoQueryDoc) 98 | htmlExtractions.RawHtmlBody = &responseBody 99 | 100 | techMatches := map[string][]string{} 101 | matchResult := matcher.MatchResult{ 102 | Url: t.Url, 103 | TechnologyMatches: techMatches, 104 | TechFound: []string{}, 105 | } 106 | 107 | if !opts.DisableWappalyzer { 108 | for key, value := range conf.TechInScope { 109 | value.Matches.HtmlExtractions = htmlExtractions 110 | value.Matches.Evaluate(key, &matchResult) 111 | } 112 | } 113 | 114 | for key, value := range conf.CustomMatch { 115 | value.Matches.HtmlExtractions = htmlExtractions 116 | value.Matches.Evaluate(key, &matchResult) 117 | } 118 | 119 | if len(matchResult.TechFound) > 0 { 120 | conf.Utils.PrintGreen(os.Stdout, "[%v]: [%v]\n", matchResult.Url, strings.Join(matchResult.TechFound, ", ")) 121 | } else { 122 | if conf.DebugMode { 123 | conf.Utils.PrintYellow(os.Stderr, "[%v]: no matches found\n", matchResult.Url) 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /pkg/config/config.go: -------------------------------------------------------------------------------- 1 | package config 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "flag" 7 | "fmt" 8 | "io" 9 | "net/http" 10 | "os" 11 | "regexp" 12 | "strings" 13 | 14 | "github.com/ameenmaali/whoareyou/pkg/matcher" 15 | "github.com/fatih/color" 16 | ) 17 | 18 | const Version = "1.0.0" 19 | 20 | type CliOptions struct { 21 | Cookies string 22 | Headers string 23 | Debug bool 24 | DisableWappalyzer bool 25 | Concurrency int 26 | Timeout int 27 | Version bool 28 | RawTechInScope string 29 | CustomMatch MultiStringFlag 30 | } 31 | 32 | type Config struct { 33 | Cookies string 34 | Headers map[string]string 35 | HttpClient *http.Client 36 | TechProvided []string 37 | CustomMatch map[string]matcher.AppMatch 38 | TechInScope map[string]matcher.AppMatch 39 | Utils Utilities 40 | DebugMode bool 41 | } 42 | 43 | type PrintColor func(w io.Writer, format string, a ...interface{}) 44 | 45 | type Utilities struct { 46 | PrintRed PrintColor 47 | PrintGreen PrintColor 48 | PrintCyan PrintColor 49 | PrintYellow PrintColor 50 | } 51 | 52 | type MultiStringFlag []string 53 | 54 | func NewConfig() Config { 55 | utilities := Utilities{ 56 | PrintGreen: color.New(color.FgGreen).FprintfFunc(), 57 | PrintRed: color.New(color.FgRed).FprintfFunc(), 58 | PrintCyan: color.New(color.FgCyan).FprintfFunc(), 59 | PrintYellow: color.New(color.FgYellow).FprintfFunc(), 60 | } 61 | 62 | config := Config{ 63 | Cookies: "", 64 | Headers: make(map[string]string), 65 | HttpClient: nil, 66 | TechProvided: []string{}, 67 | CustomMatch: make(map[string]matcher.AppMatch), 68 | TechInScope: make(map[string]matcher.AppMatch), 69 | Utils: utilities, 70 | } 71 | return config 72 | } 73 | 74 | func (c *Config) UpdateTechnologyInScope() { 75 | if c.TechProvided != nil { 76 | data := map[string]matcher.AppMatch{} 77 | for _, technology := range c.TechProvided { 78 | if _, ok := c.TechInScope[technology]; ok { 79 | data[technology] = c.TechInScope[technology] 80 | } else { 81 | c.Utils.PrintRed(os.Stderr, "Technology provided [%v] was not found\n", technology) 82 | } 83 | } 84 | 85 | if len(data) != 0 { 86 | c.TechInScope = data 87 | } 88 | } 89 | } 90 | 91 | func (c *Config) VerifyFlags(options *CliOptions) error { 92 | flag.StringVar(&options.Cookies, "cookies", "", "Cookies to add in all requests") 93 | 94 | flag.StringVar(&options.Headers, "H", "", "Headers to add in all requests. Multiple should be separated by semi-colon") 95 | flag.StringVar(&options.Headers, "headers", "", "Headers to add in all requests. Multiple should be separated by semi-colon") 96 | 97 | flag.StringVar(&options.RawTechInScope, "tech", "", "The technology to check against (default is all, comma-separated list).\n" + 98 | " Get names from app keys here: https://github.com/AliasIO/wappalyzer/blob/master/src/apps.json") 99 | flag.StringVar(&options.RawTechInScope, "technology-lookups", "", "The technology to check against (default is all, comma-separated list).\n" + 100 | " Get names from app keys here: https://github.com/AliasIO/wappalyzer/blob/master/src/apps.json") 101 | 102 | flag.Var(&options.CustomMatch, "m", "Key value pair (JSON formatted, see README for usage info) of a match source type and regex value (or string) to search for\n" + 103 | " (i.e. '{\"name\": {\"responseBody\": \"^http(s)?:\\/\\/.+\"}}'. Available match source types are: responseBody, scriptSrc. Flag can be set more than once.") 104 | flag.Var(&options.CustomMatch, "match", "Key value pair (JSON formatted, see README for usage info) of a match source type and regex value (or string) to search for\n" + 105 | " (i.e. '{\"name\": {\"responseBody\": \"^http(s)?:\\/\\/.+\"}}'. Available match source types are: responseBody, scriptSrc. Flag can be set more than once.") 106 | 107 | flag.BoolVar(&options.DisableWappalyzer, "dw", false, "Disable Wappalyzer scans (useful for only including custom matches)") 108 | flag.BoolVar(&options.DisableWappalyzer, "disable-wappalyzer", false, "Disable Wappalyzer scans (useful for only including custom matches)") 109 | 110 | flag.BoolVar(&options.Debug, "debug", false, "Debug/verbose mode to print more info for failed/malformed URLs or requests") 111 | 112 | flag.IntVar(&options.Concurrency, "w", 25, "Set the concurrency/worker count") 113 | flag.IntVar(&options.Concurrency, "workers", 25, "Set the concurrency/worker count") 114 | 115 | flag.IntVar(&options.Timeout, "t", 15, "Set the timeout length (in seconds) for each HTTP request") 116 | flag.IntVar(&options.Timeout, "timeout", 15, "Set the timeout length (in seconds) for each HTTP request") 117 | 118 | flag.BoolVar(&options.Version, "version", false, "Get the current version of whoareyou") 119 | flag.BoolVar(&options.Version, "V", false, "Get the current version of whoareyou") 120 | 121 | flag.Parse() 122 | 123 | if options.Version { 124 | fmt.Println("whoareyou version: " + Version) 125 | os.Exit(0) 126 | } 127 | 128 | if options.Cookies != "" { 129 | c.Cookies = options.Cookies 130 | } 131 | 132 | if options.Debug { 133 | c.DebugMode = true 134 | } 135 | 136 | if options.Headers != "" { 137 | if !strings.Contains(options.Headers, ":") { 138 | return errors.New("headers flag not formatted properly (no colon to separate header and value)") 139 | } 140 | headers := make(map[string]string) 141 | rawHeaders := strings.Split(options.Headers, ";") 142 | for _, header := range rawHeaders { 143 | var parts []string 144 | if strings.Contains(header, ": ") { 145 | parts = strings.Split(header, ": ") 146 | } else if strings.Contains(header, ":") { 147 | parts = strings.Split(header, ":") 148 | } else { 149 | continue 150 | } 151 | headers[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1]) 152 | } 153 | c.Headers = headers 154 | 155 | } 156 | 157 | if options.RawTechInScope != "" { 158 | var technology []string 159 | rawTechnology := strings.Split(options.RawTechInScope, ",") 160 | for _, part := range rawTechnology { 161 | technology = append(technology, strings.ToLower(strings.TrimSpace(part))) 162 | } 163 | c.TechProvided = technology 164 | } 165 | 166 | err := c.parseCustomMatches(options.CustomMatch) 167 | if err != nil { 168 | return err 169 | } 170 | 171 | return nil 172 | } 173 | 174 | func (m *MultiStringFlag) String() string { 175 | return "" 176 | } 177 | 178 | func (m *MultiStringFlag) Set(value string) error { 179 | *m = append(*m, value) 180 | return nil 181 | } 182 | 183 | func (c *Config) parseCustomMatches(data MultiStringFlag) error { 184 | for _, value := range data { 185 | var data map[string]map[string]interface{} 186 | err := json.Unmarshal([]byte(value), &data) 187 | if err != nil { 188 | return err 189 | } 190 | 191 | match := matcher.Matcher{} 192 | app := matcher.AppMatch{ 193 | Matches: &match, 194 | } 195 | 196 | for key, value := range data { 197 | app.Name = "custom-" + key 198 | for matchType, matchValue := range value { 199 | var matchValues []*regexp.Regexp 200 | valType := fmt.Sprintf("%T", matchValue) 201 | 202 | // Not a great way to do this, but... 203 | if valType == "string" || valType == "float64" { 204 | str := fmt.Sprintf("%v", matchValue) 205 | re, err := regexp.Compile(str) 206 | if err != nil { 207 | return err 208 | } 209 | matchValues = append(matchValues, re) 210 | 211 | } else if valType == "[]interface {}" { 212 | for _, v := range matchValue.([]interface{}) { 213 | str := fmt.Sprintf("%v", v) 214 | re, err := regexp.Compile(str) 215 | if err != nil { 216 | return err 217 | } 218 | matchValues = append(matchValues, re) 219 | } 220 | } else { 221 | return errors.New(fmt.Sprintf("%v data type is not supported. It must be either a string or list of regex values", matchValue)) 222 | } 223 | 224 | matchType = strings.ToLower(matchType) 225 | if matchType == "responsebody" { 226 | match.ResponseContent = matchValues 227 | } else if matchType == "scriptsrc" { 228 | match.Script = matchValues 229 | } else { 230 | return errors.New(fmt.Sprint("%v is not a valid match type. See the usage info and README for current supported types", matchType)) 231 | } 232 | } 233 | c.CustomMatch[app.Name] = app 234 | } 235 | } 236 | return nil 237 | } 238 | -------------------------------------------------------------------------------- /pkg/matcher/extractor.go: -------------------------------------------------------------------------------- 1 | package matcher 2 | 3 | import ( 4 | "github.com/PuerkitoBio/goquery" 5 | ) 6 | 7 | type HtmlExtractions struct { 8 | ScriptTags []string 9 | InlineJavaScript []string 10 | MetaTags map[string]string 11 | RawHtmlBody *string 12 | } 13 | 14 | func (he *HtmlExtractions) getScriptTags(doc *goquery.Document) { 15 | var scripts []string 16 | doc.Find("script").Each(func(i int, item *goquery.Selection) { 17 | if src, exists := item.Attr("src"); exists { 18 | scripts = append(scripts, src) 19 | } 20 | }) 21 | he.ScriptTags = scripts 22 | } 23 | 24 | func (he *HtmlExtractions) getMetaTags(doc *goquery.Document) { 25 | doc.Find("meta").Each(func(i int, item *goquery.Selection) { 26 | attr := item.Get(0) 27 | for _, a := range attr.Attr { 28 | he.MetaTags[a.Key] = a.Val 29 | } 30 | }) 31 | } 32 | 33 | func (he *HtmlExtractions) getInlineJavaScript(doc *goquery.Document) { 34 | var inlineJS []string 35 | doc.Find("script").Each(func(i int, item *goquery.Selection) { 36 | inlineJS = append(inlineJS, item.Text()) 37 | }) 38 | he.InlineJavaScript = inlineJS 39 | } 40 | 41 | func (he *HtmlExtractions) Parse(doc *goquery.Document) { 42 | he.getScriptTags(doc) 43 | he.getMetaTags(doc) 44 | he.getInlineJavaScript(doc) 45 | } 46 | -------------------------------------------------------------------------------- /pkg/matcher/matcher.go: -------------------------------------------------------------------------------- 1 | package matcher 2 | 3 | import ( 4 | "regexp" 5 | "strings" 6 | ) 7 | 8 | type Matcher struct { 9 | Cookies map[string]*regexp.Regexp 10 | Headers map[string]*regexp.Regexp 11 | Icon string 12 | ResponseContent []*regexp.Regexp 13 | Script []*regexp.Regexp 14 | JavaScript map[string]*regexp.Regexp 15 | Meta map[string]*regexp.Regexp 16 | HtmlExtractions HtmlExtractions 17 | } 18 | 19 | type AppMatch struct { 20 | Name string 21 | Website string 22 | Matches *Matcher 23 | } 24 | 25 | type MatchResult struct { 26 | Url string 27 | TechnologyMatches map[string][]string 28 | TechFound []string 29 | } 30 | 31 | func (m *Matcher) contentMatch(body *string) bool { 32 | return strAndSliceMatch(body, m.ResponseContent) 33 | } 34 | 35 | func (m *Matcher) headersMatch(header *string) bool { 36 | return strAndMapMatch(header, m.Headers) 37 | } 38 | 39 | func (m *Matcher) cookiesMatch(cookie *string) bool { 40 | return strAndMapMatch(cookie, m.Cookies) 41 | } 42 | 43 | func (m *Matcher) javascriptMatch(js *[]string) bool { 44 | return sliceAndMapMatch(js, m.JavaScript) 45 | } 46 | 47 | func (m *Matcher) scriptMatch(script *[]string) bool { 48 | return sliceAndSliceMatch(script, m.Script) 49 | } 50 | 51 | func (m *Matcher) metaMatch(meta *map[string]string) bool { 52 | return mapAndMapMatch(meta, m.Meta) 53 | } 54 | 55 | func (m *Matcher) Evaluate(tech string, matchResult *MatchResult) { 56 | var matchTypes []string 57 | if contentMatch := m.contentMatch(m.HtmlExtractions.RawHtmlBody); contentMatch { 58 | matchTypes = append(matchTypes, "htmlContent") 59 | matchResult.TechnologyMatches[tech] = matchTypes 60 | matchResult.TechFound = append(matchResult.TechFound, tech) 61 | } 62 | 63 | if scriptMatch := m.scriptMatch(&m.HtmlExtractions.ScriptTags); scriptMatch { 64 | matchTypes = append(matchTypes, "scriptTag") 65 | matchResult.TechnologyMatches[tech] = matchTypes 66 | matchResult.TechFound = append(matchResult.TechFound, tech) 67 | } 68 | 69 | if metaMatch := m.metaMatch(&m.HtmlExtractions.MetaTags); metaMatch { 70 | matchTypes = append(matchTypes, "metaTag") 71 | matchResult.TechnologyMatches[tech] = matchTypes 72 | matchResult.TechFound = append(matchResult.TechFound, tech) 73 | } 74 | 75 | if jsMatch := m.javascriptMatch(&m.HtmlExtractions.InlineJavaScript); jsMatch { 76 | matchTypes = append(matchTypes, "javascriptContent") 77 | matchResult.TechnologyMatches[tech] = matchTypes 78 | matchResult.TechFound = append(matchResult.TechFound, tech) 79 | } 80 | } 81 | 82 | func strAndMapMatch(matchStrPtr *string, values map[string]*regexp.Regexp) bool { 83 | matchStr := *matchStrPtr 84 | for key, match := range values { 85 | if match == nil { 86 | continue 87 | } 88 | 89 | if strings.ToLower(matchStr) == strings.ToLower(key) && match.MatchString(matchStr) { 90 | return true 91 | } 92 | } 93 | return false 94 | } 95 | 96 | func strAndSliceMatch(matchStrPtr *string, values []*regexp.Regexp) bool { 97 | matchStr := *matchStrPtr 98 | for _, match := range values { 99 | if match == nil { 100 | continue 101 | } 102 | 103 | if match.MatchString(matchStr) { 104 | return true 105 | } 106 | } 107 | return false 108 | } 109 | 110 | func sliceAndSliceMatch(matchSlicePtr *[]string, values []*regexp.Regexp) bool { 111 | matchSlice := *matchSlicePtr 112 | for _, match := range values { 113 | if match == nil { 114 | continue 115 | } 116 | 117 | for _, val := range matchSlice { 118 | if match.MatchString(val) { 119 | return true 120 | } 121 | } 122 | } 123 | return false 124 | } 125 | 126 | func sliceAndMapMatch(matchSlicePtr *[]string, values map[string]*regexp.Regexp) bool { 127 | matchSlice := *matchSlicePtr 128 | for key, match := range values { 129 | if match == nil { 130 | continue 131 | } 132 | 133 | for _, val := range matchSlice { 134 | if strings.ToLower(val) == strings.ToLower(key) && match.MatchString(val) { 135 | return true 136 | } 137 | } 138 | } 139 | return false 140 | } 141 | 142 | func mapAndMapMatch(matchMapPtr *map[string]string, values map[string]*regexp.Regexp) bool { 143 | matchMap := *matchMapPtr 144 | for key, match := range values { 145 | for attr, val := range matchMap { 146 | if strings.ToLower(key) == strings.ToLower(attr) && match.MatchString(val) { 147 | return true 148 | } 149 | } 150 | } 151 | return false 152 | } 153 | -------------------------------------------------------------------------------- /pkg/utils/http.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "bytes" 5 | "crypto/tls" 6 | "io/ioutil" 7 | "net" 8 | "net/http" 9 | "time" 10 | 11 | "github.com/EDDYCJY/fake-useragent" 12 | "github.com/PuerkitoBio/goquery" 13 | 14 | "github.com/ameenmaali/whoareyou/pkg/config" 15 | ) 16 | 17 | type Response struct { 18 | StatusCode int 19 | Body []byte 20 | Headers http.Header 21 | ContentLength int 22 | GoQueryDoc *goquery.Document 23 | } 24 | 25 | func CreateClient(timeout int) *http.Client { 26 | transport := &http.Transport{ 27 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, 28 | DisableKeepAlives: true, 29 | DialContext: (&net.Dialer{ 30 | Timeout: time.Duration(timeout) * time.Second, 31 | KeepAlive: time.Second, 32 | }).DialContext, 33 | } 34 | 35 | httpClient := &http.Client{ 36 | Transport: transport, 37 | Timeout: time.Duration(timeout+3) * time.Second, 38 | } 39 | return httpClient 40 | } 41 | 42 | func SendRequest(u string, config *config.Config) (Response, error) { 43 | response := Response{} 44 | 45 | request, err := http.NewRequest("GET", u, nil) 46 | if err != nil { 47 | return response, err 48 | } 49 | 50 | request.Header.Add("User-Agent", browser.Random()) 51 | 52 | // Add headers passed in as arguments 53 | for header, value := range config.Headers { 54 | request.Header.Add(header, value) 55 | } 56 | 57 | // Add cookies passed in as arguments 58 | request.Header.Add("Cookie", config.Cookies) 59 | 60 | resp, err := config.HttpClient.Do(request) 61 | 62 | if err != nil { 63 | return response, err 64 | } 65 | 66 | if resp.Body == nil { 67 | return response, err 68 | } 69 | 70 | defer resp.Body.Close() 71 | 72 | body, err := ioutil.ReadAll(resp.Body) 73 | if err != nil { 74 | return response, err 75 | } 76 | 77 | // Reset the response body to be read again 78 | resp.Body = ioutil.NopCloser(bytes.NewBuffer(body)) 79 | 80 | doc, err := goquery.NewDocumentFromReader(resp.Body) 81 | if err == nil { 82 | response.GoQueryDoc = doc 83 | } 84 | 85 | response.Body = body 86 | response.Headers = resp.Header 87 | response.StatusCode = resp.StatusCode 88 | response.ContentLength = int(resp.ContentLength) 89 | 90 | return response, err 91 | } 92 | -------------------------------------------------------------------------------- /pkg/utils/utils.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "bufio" 5 | "errors" 6 | "github.com/ameenmaali/whoareyou/pkg/config" 7 | "net/url" 8 | "os" 9 | "regexp" 10 | "strings" 11 | ) 12 | 13 | func GetUrlsFromFile(conf *config.Config) ([]string, error) { 14 | deduplicatedUrls := make(map[string]bool) 15 | var urls []string 16 | 17 | scanner := bufio.NewScanner(os.Stdin) 18 | for scanner.Scan() { 19 | providedUrl := scanner.Text() 20 | 21 | // Only include properly formatted URLs 22 | u, err := url.ParseRequestURI(providedUrl) 23 | if err != nil { 24 | if conf.DebugMode { 25 | conf.Utils.PrintRed(os.Stderr, "url provided [%v] is not a properly formatted URL\n", providedUrl) 26 | } 27 | continue 28 | } 29 | 30 | if deduplicatedUrls[u.String()] { 31 | continue 32 | } 33 | 34 | deduplicatedUrls[u.String()] = true 35 | urls = append(urls, u.String()) 36 | } 37 | 38 | return urls, scanner.Err() 39 | } 40 | 41 | func stringToRegex(value interface{}) (*regexp.Regexp, error) { 42 | str, err := cleanString(value) 43 | if err != nil { 44 | return nil, err 45 | } 46 | 47 | re, err := regexp.Compile(str) 48 | if err != nil { 49 | return nil, err 50 | } 51 | 52 | return re, nil 53 | } 54 | 55 | func sliceToRegexSlice(value interface{}, matches []*regexp.Regexp) ([]*regexp.Regexp, error) { 56 | values, ok := value.([]interface{}) 57 | if !ok { 58 | return matches, errors.New("value provided is not a slice of strings") 59 | } 60 | 61 | for _, str := range values { 62 | s, err := cleanString(str) 63 | if err != nil { 64 | continue 65 | } 66 | 67 | re, err := regexp.Compile(s) 68 | if err != nil { 69 | continue 70 | } 71 | matches = append(matches, re) 72 | } 73 | 74 | return matches, nil 75 | } 76 | 77 | func mapToRegexMap(value interface{}) (map[string]*regexp.Regexp, error) { 78 | values, ok := value.(map[string]interface{}) 79 | if !ok { 80 | return nil, errors.New("value provided is not a properly formated map") 81 | } 82 | 83 | regexMap := map[string]*regexp.Regexp{} 84 | for key, val := range values { 85 | re, err := regexp.Compile(val.(string)) 86 | if err != nil { 87 | continue 88 | } 89 | regexMap[key] = re 90 | } 91 | return regexMap, nil 92 | } 93 | 94 | func cleanString(value interface{}) (string, error) { 95 | str, ok := value.(string) 96 | if !ok { 97 | return "", errors.New("value provided is not a string") 98 | } 99 | 100 | splitStr := strings.Split(str, ";") 101 | // Only take the first portion of the string, which contains the regex value 102 | str = splitStr[0] 103 | if endsWithSlash := strings.HasSuffix(str, "\\"); endsWithSlash { 104 | str = strings.TrimSuffix(str, "\\") 105 | } 106 | return str, nil 107 | } 108 | -------------------------------------------------------------------------------- /pkg/utils/wappalyzer.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "os" 7 | "regexp" 8 | "strings" 9 | 10 | "github.com/ameenmaali/whoareyou/pkg/config" 11 | "github.com/ameenmaali/whoareyou/pkg/matcher" 12 | ) 13 | 14 | const WAPPALYZER_SOURCE_URL = "https://raw.githubusercontent.com/AliasIO/wappalyzer/master/src/apps.json" 15 | 16 | func FetchWappalyzerData(conf *config.Config) (map[string]matcher.AppMatch, error) { 17 | wappalyzerData := map[string]matcher.AppMatch{} 18 | resp, err := SendRequest(WAPPALYZER_SOURCE_URL, conf) 19 | if err != nil { 20 | return wappalyzerData, err 21 | } 22 | 23 | responseBody := make(map[string]map[string]map[string]interface{}) 24 | 25 | err = json.Unmarshal(resp.Body, &responseBody) 26 | 27 | for _, value := range responseBody { 28 | for app, apps := range value { 29 | match := matcher.Matcher{ 30 | Cookies: nil, 31 | Icon: "", 32 | Headers: nil, 33 | ResponseContent: nil, 34 | Script: nil, 35 | JavaScript: nil, 36 | Meta: nil, 37 | } 38 | 39 | wapp := matcher.AppMatch{ 40 | Name: app, 41 | Website: "", 42 | Matches: &match, 43 | } 44 | 45 | if apps["website"] != nil { 46 | wapp.Website = apps["website"].(string) 47 | } 48 | 49 | if apps["icon"] != nil { 50 | match.Icon = apps["icon"].(string) 51 | } 52 | 53 | if apps["html"] != nil { 54 | if err := stringOrSliceHandler(apps["html"], &match.ResponseContent); err != nil { 55 | if conf.DebugMode { 56 | conf.Utils.PrintRed(os.Stderr, "error parsing wappalyzer html data", err) 57 | } 58 | } 59 | } 60 | 61 | if apps["headers"] != nil { 62 | if err := mapHandler(apps["headers"], &match.Headers); err != nil { 63 | if conf.DebugMode { 64 | conf.Utils.PrintRed(os.Stderr, "error parsing wappalyzer header data", err) 65 | } 66 | } 67 | } 68 | 69 | if apps["cookies"] != nil { 70 | if err := mapHandler(apps["cookies"], &match.Cookies); err != nil { 71 | if conf.DebugMode { 72 | conf.Utils.PrintRed(os.Stderr, "error parsing wappalyzer cookie data", err) 73 | } 74 | } 75 | } 76 | 77 | if apps["script"] != nil { 78 | if err := stringOrSliceHandler(apps["script"], &match.Script); err != nil { 79 | if conf.DebugMode { 80 | conf.Utils.PrintRed(os.Stderr, "error parsing wappalyzer script data", err) 81 | } 82 | } 83 | } 84 | 85 | if apps["js"] != nil { 86 | if err := mapHandler(apps["js"], &match.JavaScript); err != nil { 87 | if conf.DebugMode { 88 | conf.Utils.PrintRed(os.Stderr, "error parsing wappalyzer js data", err) 89 | } 90 | } 91 | } 92 | 93 | if apps["meta"] != nil { 94 | if err := mapHandler(apps["meta"], &match.Meta); err != nil { 95 | if conf.DebugMode { 96 | conf.Utils.PrintRed(os.Stderr, "error parsing wappalyzer meta data", err) 97 | } 98 | } 99 | } 100 | wappalyzerData[strings.ToLower(wapp.Name)] = wapp 101 | } 102 | } 103 | 104 | return wappalyzerData, nil 105 | } 106 | 107 | func stringOrSliceHandler(value interface{}, matchResult *[]*regexp.Regexp) error { 108 | errorCount := 0 109 | matchError := "" 110 | 111 | var matches []*regexp.Regexp 112 | 113 | re, err := stringToRegex(value) 114 | if err != nil { 115 | errorCount += 1 116 | matchError += err.Error() + "\n" 117 | } 118 | matches = append(matches, re) 119 | 120 | matches, err = sliceToRegexSlice(value, matches) 121 | if err != nil { 122 | errorCount += 1 123 | matchError += err.Error() + "\n" 124 | } 125 | 126 | // If both conversions fail, mark as an error and move on 127 | if errorCount >= 2 { 128 | return errors.New(matchError) 129 | } else { 130 | *matchResult = append(matches) 131 | } 132 | return nil 133 | } 134 | 135 | func mapHandler(value interface{}, matchResult *map[string]*regexp.Regexp) error { 136 | headerMap, err := mapToRegexMap(value) 137 | if err != nil { 138 | return err 139 | } else { 140 | *matchResult = headerMap 141 | } 142 | return nil 143 | } 144 | --------------------------------------------------------------------------------