├── .gitignore ├── fingerprints_data.go ├── go.mod ├── .github ├── workflows │ ├── build-test.yml │ ├── lint-test.yml │ ├── codeql-analysis.yml │ ├── autorelease-tag.yml │ └── fingerprint-update.yml └── dependabot.yml ├── fingerprints_test.go ├── examples └── main.go ├── go.sum ├── LICENSE.md ├── fingerprint_headers.go ├── README.md ├── fingerprint_cookies.go ├── wappalyzergo_test.go ├── fingerprint_body.go ├── tech.go ├── cmd └── update-fingerprints │ └── main.go └── fingerprints.go /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | cmd/update-fingerprints/update-fingerprints 3 | -------------------------------------------------------------------------------- /fingerprints_data.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | _ "embed" 5 | ) 6 | 7 | //go:embed fingerprints_data.json 8 | var fingerprints string 9 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/projectdiscovery/wappalyzergo 2 | 3 | go 1.21 4 | 5 | require ( 6 | github.com/stretchr/testify v1.8.4 7 | golang.org/x/net v0.19.0 8 | ) 9 | 10 | require ( 11 | github.com/davecgh/go-spew v1.1.1 // indirect 12 | github.com/pmezard/go-difflib v1.0.0 // indirect 13 | gopkg.in/yaml.v3 v3.0.1 // indirect 14 | ) 15 | -------------------------------------------------------------------------------- /.github/workflows/build-test.yml: -------------------------------------------------------------------------------- 1 | name: 🔨 Build Test 2 | on: 3 | push: 4 | pull_request: 5 | workflow_dispatch: 6 | 7 | 8 | jobs: 9 | build: 10 | name: Test Builds 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Set up Go 14 | uses: actions/setup-go@v4 15 | with: 16 | go-version: 1.21.x 17 | 18 | - name: Check out code 19 | uses: actions/checkout@v4 20 | 21 | - name: Test 22 | run: go test ./... -------------------------------------------------------------------------------- /.github/workflows/lint-test.yml: -------------------------------------------------------------------------------- 1 | name: 🙏🏻 Lint Test 2 | on: 3 | push: 4 | pull_request: 5 | workflow_dispatch: 6 | 7 | jobs: 8 | lint: 9 | name: Lint Test 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout code 13 | uses: actions/checkout@v4 14 | - name: Set up Go 15 | uses: actions/setup-go@v4 16 | with: 17 | go-version: 1.21.x 18 | - name: Run golangci-lint 19 | uses: golangci/golangci-lint-action@v3.5.0 20 | with: 21 | version: latest 22 | args: --timeout 5m 23 | working-directory: . 24 | -------------------------------------------------------------------------------- /fingerprints_test.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestVersionRegex(t *testing.T) { 10 | regex, err := newVersionRegex("JBoss(?:-([\\d.]+))?\\;confidence:50\\;version:\\1") 11 | require.NoError(t, err, "could not create version regex") 12 | 13 | matched, version := regex.MatchString("JBoss-2.3.9") 14 | require.True(t, matched, "could not get version regex match") 15 | require.Equal(t, "2.3.9", version, "could not get correct version") 16 | 17 | t.Run("confidence-only", func(t *testing.T) { 18 | _, err := newVersionRegex("\\;confidence:50") 19 | require.NoError(t, err, "could create invalid version regex") 20 | }) 21 | } 22 | -------------------------------------------------------------------------------- /examples/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | "net/http" 8 | 9 | wappalyzer "github.com/projectdiscovery/wappalyzergo" 10 | ) 11 | 12 | func main() { 13 | resp, err := http.DefaultClient.Get("https://www.hackerone.com") 14 | if err != nil { 15 | log.Fatal(err) 16 | } 17 | data, _ := io.ReadAll(resp.Body) // Ignoring error for example 18 | 19 | wappalyzerClient, err := wappalyzer.New() 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | fingerprints := wappalyzerClient.Fingerprint(resp.Header, data) 24 | fmt.Printf("%v\n", fingerprints) 25 | // Output: map[Acquia Cloud Platform:{} Amazon EC2:{} Apache:{} Cloudflare:{} Drupal:{} PHP:{} Percona:{} React:{} Varnish:{}] 26 | 27 | fingerprintsWithCats := wappalyzerClient.FingerprintWithCats(resp.Header, data) 28 | fmt.Printf("%v\n", fingerprintsWithCats) 29 | } 30 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | name: 🚨 CodeQL Analysis 2 | 3 | on: 4 | workflow_dispatch: 5 | pull_request: 6 | branches: 7 | - dev 8 | 9 | jobs: 10 | analyze: 11 | name: Analyze 12 | runs-on: ubuntu-latest 13 | permissions: 14 | actions: read 15 | contents: read 16 | security-events: write 17 | 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | language: [ 'go' ] 22 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 23 | 24 | steps: 25 | - name: Checkout repository 26 | uses: actions/checkout@v4 27 | 28 | # Initializes the CodeQL tools for scanning. 29 | - name: Initialize CodeQL 30 | uses: github/codeql-action/init@v2 31 | with: 32 | languages: ${{ matrix.language }} 33 | 34 | - name: Autobuild 35 | uses: github/codeql-action/autobuild@v2 36 | 37 | - name: Perform CodeQL Analysis 38 | uses: github/codeql-action/analyze@v2 -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 6 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 7 | golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= 8 | golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= 9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 10 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 13 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 ProjectDiscovery, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | 9 | # Maintain dependencies for GitHub Actions 10 | - package-ecosystem: "github-actions" 11 | directory: "/" 12 | schedule: 13 | interval: "weekly" 14 | target-branch: "main" 15 | commit-message: 16 | prefix: "chore" 17 | include: "scope" 18 | 19 | # Maintain dependencies for go modules 20 | - package-ecosystem: "gomod" 21 | directory: "/" 22 | schedule: 23 | interval: "weekly" 24 | target-branch: "main" 25 | commit-message: 26 | prefix: "chore" 27 | include: "scope" 28 | 29 | # Maintain dependencies for docker 30 | - package-ecosystem: "docker" 31 | directory: "/" 32 | schedule: 33 | interval: "weekly" 34 | target-branch: "main" 35 | commit-message: 36 | prefix: "chore" 37 | include: "scope" 38 | -------------------------------------------------------------------------------- /.github/workflows/autorelease-tag.yml: -------------------------------------------------------------------------------- 1 | name: 🔖 Release Tag 2 | 3 | on: 4 | workflow_run: 5 | workflows: ["💡Fingerprints Update"] 6 | types: 7 | - completed 8 | workflow_dispatch: 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 17 | 18 | - name: Get Commit Count 19 | id: get_commit 20 | run: git rev-list `git rev-list --tags --no-walk --max-count=1`..HEAD --count | xargs -I {} echo COMMIT_COUNT={} >> $GITHUB_OUTPUT 21 | 22 | - name: Create release and tag 23 | if: ${{ steps.get_commit.outputs.COMMIT_COUNT > 0 }} 24 | id: tag_version 25 | uses: mathieudutour/github-tag-action@v6.1 26 | with: 27 | github_token: ${{ secrets.GITHUB_TOKEN }} 28 | 29 | - name: Create a GitHub release 30 | if: ${{ steps.get_commit.outputs.COMMIT_COUNT > 0 }} 31 | uses: actions/create-release@v1 32 | env: 33 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 34 | with: 35 | tag_name: ${{ steps.tag_version.outputs.new_tag }} 36 | release_name: Release ${{ steps.tag_version.outputs.new_tag }} 37 | body: ${{ steps.tag_version.outputs.changelog }} -------------------------------------------------------------------------------- /fingerprint_headers.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | // checkHeaders checks if the headers for a target match the fingerprints 8 | // and returns the matched IDs if any. 9 | func (s *Wappalyze) checkHeaders(headers map[string]string) []string { 10 | technologies := s.fingerprints.matchMapString(headers, headersPart) 11 | return technologies 12 | } 13 | 14 | // normalizeHeaders normalizes the headers for the tech discovery on headers 15 | func (s *Wappalyze) normalizeHeaders(headers map[string][]string) map[string]string { 16 | normalized := make(map[string]string, len(headers)) 17 | data := getHeadersMap(headers) 18 | 19 | for header, value := range data { 20 | normalized[strings.ToLower(header)] = strings.ToLower(value) 21 | } 22 | return normalized 23 | } 24 | 25 | // GetHeadersMap returns a map[string]string of response headers 26 | func getHeadersMap(headersArray map[string][]string) map[string]string { 27 | headers := make(map[string]string, len(headersArray)) 28 | 29 | builder := &strings.Builder{} 30 | for key, value := range headersArray { 31 | for i, v := range value { 32 | builder.WriteString(v) 33 | if i != len(value)-1 { 34 | builder.WriteString(", ") 35 | } 36 | } 37 | headerValue := builder.String() 38 | 39 | headers[key] = headerValue 40 | builder.Reset() 41 | } 42 | return headers 43 | } 44 | -------------------------------------------------------------------------------- /.github/workflows/fingerprint-update.yml: -------------------------------------------------------------------------------- 1 | name: 💡Fingerprints Update 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: '0 0 * * 0' 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout Repo 13 | uses: actions/checkout@v4 14 | with: 15 | persist-credentials: false 16 | 17 | - name: Setup golang 18 | uses: actions/setup-go@v4 19 | with: 20 | go-version: 1.21.x 21 | 22 | - name: Installing Update binary 23 | run: | 24 | go install github.com/projectdiscovery/wappalyzergo/cmd/update-fingerprints 25 | shell: bash 26 | 27 | - name: Downloading latest wappalyzer changes 28 | run: | 29 | update-fingerprints -fingerprints fingerprints_data.json 30 | shell: bash 31 | 32 | - name: Create local changes 33 | run: | 34 | git add fingerprints_data.json 35 | 36 | - name: Commit files 37 | run: | 38 | git config --local user.email "action@github.com" 39 | git config --local user.name "GitHub Action" 40 | git commit -m "Weekly fingerprints update [$(date)] :robot:" -a --allow-empty 41 | 42 | - name: Push changes 43 | uses: ad-m/github-push-action@master 44 | with: 45 | github_token: ${{ secrets.GITHUB_TOKEN }} 46 | branch: ${{ github.ref }} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Wappalyzergo 2 | 3 | A high performance port of the Wappalyzer Technology Detection Library to Go. Inspired by [Webanalyze](https://github.com/rverton/webanalyze). 4 | 5 | Uses data from https://github.com/AliasIO/wappalyzer 6 | 7 | ## Features 8 | 9 | - Very simple and easy to use, with clean codebase. 10 | - Normalized regexes + auto-updating database of wappalyzer fingerprints. 11 | - Optimized for performance: parsing HTML manually for best speed. 12 | 13 | ### Using *go install* 14 | 15 | ```sh 16 | go install -v github.com/projectdiscovery/wappalyzergo/cmd/update-fingerprints@latest 17 | ``` 18 | 19 | After this command *wappalyzergo* library source will be in your current go.mod. 20 | 21 | ## Example 22 | Usage Example: 23 | 24 | ``` go 25 | package main 26 | 27 | import ( 28 | "fmt" 29 | "io" 30 | "log" 31 | "net/http" 32 | 33 | wappalyzer "github.com/projectdiscovery/wappalyzergo" 34 | ) 35 | 36 | func main() { 37 | resp, err := http.DefaultClient.Get("https://www.hackerone.com") 38 | if err != nil { 39 | log.Fatal(err) 40 | } 41 | data, _ := io.ReadAll(resp.Body) // Ignoring error for example 42 | 43 | wappalyzerClient, err := wappalyzer.New() 44 | fingerprints := wappalyzerClient.Fingerprint(resp.Header, data) 45 | fmt.Printf("%v\n", fingerprints) 46 | 47 | // Output: map[Acquia Cloud Platform:{} Amazon EC2:{} Apache:{} Cloudflare:{} Drupal:{} PHP:{} Percona:{} React:{} Varnish:{}] 48 | } 49 | ``` 50 | -------------------------------------------------------------------------------- /fingerprint_cookies.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | // checkCookies checks if the cookies for a target match the fingerprints 8 | // and returns the matched IDs if any. 9 | func (s *Wappalyze) checkCookies(cookies []string) []string { 10 | // Normalize the cookies for further processing 11 | normalized := s.normalizeCookies(cookies) 12 | 13 | technologies := s.fingerprints.matchMapString(normalized, cookiesPart) 14 | return technologies 15 | } 16 | 17 | const keyValuePairLength = 2 18 | 19 | // normalizeCookies normalizes the cookies and returns an 20 | // easily parsed format that can be processed upon. 21 | func (s *Wappalyze) normalizeCookies(cookies []string) map[string]string { 22 | normalized := make(map[string]string) 23 | 24 | for _, part := range cookies { 25 | parts := strings.SplitN(strings.Trim(part, " "), "=", keyValuePairLength) 26 | if len(parts) < keyValuePairLength { 27 | continue 28 | } 29 | normalized[parts[0]] = parts[1] 30 | } 31 | return normalized 32 | } 33 | 34 | // findSetCookie finds the set cookie header from the normalized headers 35 | func (s *Wappalyze) findSetCookie(headers map[string]string) []string { 36 | value, ok := headers["set-cookie"] 37 | if !ok { 38 | return nil 39 | } 40 | 41 | var values []string 42 | for _, v := range strings.Split(value, " ") { 43 | if v == "" { 44 | continue 45 | } 46 | if strings.Contains(v, ",") { 47 | values = append(values, strings.Split(v, ",")...) 48 | } else if strings.Contains(v, ";") { 49 | values = append(values, strings.Split(v, ";")...) 50 | } else { 51 | values = append(values, v) 52 | } 53 | } 54 | return values 55 | } 56 | -------------------------------------------------------------------------------- /wappalyzergo_test.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestCookiesDetect(t *testing.T) { 10 | wappalyzer, err := New() 11 | require.Nil(t, err, "could not create wappalyzer") 12 | 13 | matches := wappalyzer.Fingerprint(map[string][]string{ 14 | "Set-Cookie": {"_uetsid=ABCDEF"}, 15 | }, []byte("")) 16 | 17 | require.Contains(t, matches, "Microsoft Advertising", "Could not get correct match") 18 | 19 | t.Run("position", func(t *testing.T) { 20 | wappalyzerClient, _ := New() 21 | 22 | fingerprints := wappalyzerClient.Fingerprint(map[string][]string{ 23 | "Set-Cookie": {"path=/; jsessionid=111; path=/, jsessionid=111;"}, 24 | }, []byte("")) 25 | fingerprints1 := wappalyzerClient.Fingerprint(map[string][]string{ 26 | "Set-Cookie": {"jsessionid=111; path=/, XSRF-TOKEN=; expires=test, path=/ laravel_session=eyJ*"}, 27 | }, []byte("")) 28 | 29 | require.Equal(t, map[string]struct{}{"Java": {}}, fingerprints, "could not get correct fingerprints") 30 | require.Equal(t, map[string]struct{}{"Java": {}, "Laravel": {}, "PHP": {}}, fingerprints1, "could not get correct fingerprints") 31 | }) 32 | } 33 | 34 | func TestHeadersDetect(t *testing.T) { 35 | wappalyzer, err := New() 36 | require.Nil(t, err, "could not create wappalyzer") 37 | 38 | matches := wappalyzer.Fingerprint(map[string][]string{ 39 | "Server": {"now"}, 40 | }, []byte("")) 41 | 42 | require.Contains(t, matches, "Vercel", "Could not get correct match") 43 | } 44 | 45 | func TestBodyDetect(t *testing.T) { 46 | wappalyzer, err := New() 47 | require.Nil(t, err, "could not create wappalyzer") 48 | 49 | t.Run("meta", func(t *testing.T) { 50 | matches := wappalyzer.Fingerprint(map[string][]string{}, []byte(` 51 |
52 | 53 | 54 | `)) 55 | require.Contains(t, matches, "Mura CMS:1", "Could not get correct match") 56 | }) 57 | 58 | t.Run("html-implied", func(t *testing.T) { 59 | matches := wappalyzer.Fingerprint(map[string][]string{}, []byte(` 60 | 61 | 62 | 63 | 64 | `)) 65 | require.Contains(t, matches, "AngularJS", "Could not get correct implied match") 66 | require.Contains(t, matches, "PHP", "Could not get correct implied match") 67 | require.Contains(t, matches, "Proximis Unified Commerce", "Could not get correct match") 68 | }) 69 | } 70 | 71 | func TestUniqueFingerprints(t *testing.T) { 72 | fingerprints := newUniqueFingerprints() 73 | fingerprints.setIfNotExists("test") 74 | require.Equal(t, map[string]struct{}{"test": {}}, fingerprints.getValues(), "could not get correct values") 75 | 76 | t.Run("linear", func(t *testing.T) { 77 | fingerprints.setIfNotExists("new:2.3.5") 78 | require.Equal(t, map[string]struct{}{"test": {}, "new:2.3.5": {}}, fingerprints.getValues(), "could not get correct values") 79 | 80 | fingerprints.setIfNotExists("new") 81 | require.Equal(t, map[string]struct{}{"test": {}, "new:2.3.5": {}}, fingerprints.getValues(), "could not get correct values") 82 | }) 83 | 84 | t.Run("opposite", func(t *testing.T) { 85 | fingerprints.setIfNotExists("another") 86 | require.Equal(t, map[string]struct{}{"test": {}, "new:2.3.5": {}, "another": {}}, fingerprints.getValues(), "could not get correct values") 87 | 88 | fingerprints.setIfNotExists("another:2.3.5") 89 | require.Equal(t, map[string]struct{}{"test": {}, "new:2.3.5": {}, "another:2.3.5": {}}, fingerprints.getValues(), "could not get correct values") 90 | }) 91 | } 92 | -------------------------------------------------------------------------------- /fingerprint_body.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | "bytes" 5 | "unsafe" 6 | 7 | "golang.org/x/net/html" 8 | ) 9 | 10 | // checkBody checks for fingerprints in the HTML body 11 | func (s *Wappalyze) checkBody(body []byte) []string { 12 | var technologies []string 13 | 14 | bodyString := unsafeToString(body) 15 | 16 | technologies = append( 17 | technologies, 18 | s.fingerprints.matchString(bodyString, htmlPart)..., 19 | ) 20 | 21 | // Tokenize the HTML document and check for fingerprints as required 22 | tokenizer := html.NewTokenizer(bytes.NewReader(body)) 23 | 24 | for { 25 | tt := tokenizer.Next() 26 | switch tt { 27 | case html.ErrorToken: 28 | return technologies 29 | case html.StartTagToken: 30 | token := tokenizer.Token() 31 | switch token.Data { 32 | case "script": 33 | // Check if the script tag has a source file to check 34 | source, found := getScriptSource(token) 35 | if found { 36 | // Check the script tags for script fingerprints 37 | technologies = append( 38 | technologies, 39 | s.fingerprints.matchString(source, scriptPart)..., 40 | ) 41 | continue 42 | } 43 | 44 | // Check the text attribute of the tag for javascript based technologies. 45 | // The next token should be the contents of the script tag 46 | if tokenType := tokenizer.Next(); tokenType != html.TextToken { 47 | continue 48 | } 49 | 50 | // TODO: JS requires a running VM, for checking properties. Only 51 | // possible with headless for now :( 52 | 53 | // data := tokenizer.Token().Data 54 | // technologies = append( 55 | // technologies, 56 | // s.fingerprints.matchString(data, jsPart)..., 57 | // ) 58 | case "meta": 59 | // For meta tag, we are only interested in name and content attributes. 60 | name, content, found := getMetaNameAndContent(token) 61 | if !found { 62 | continue 63 | } 64 | technologies = append( 65 | technologies, 66 | s.fingerprints.matchKeyValueString(name, content, metaPart)..., 67 | ) 68 | } 69 | case html.SelfClosingTagToken: 70 | token := tokenizer.Token() 71 | if token.Data != "meta" { 72 | continue 73 | } 74 | 75 | // Parse the meta tag and check for tech 76 | name, content, found := getMetaNameAndContent(token) 77 | if !found { 78 | continue 79 | } 80 | technologies = append( 81 | technologies, 82 | s.fingerprints.matchKeyValueString(name, content, metaPart)..., 83 | ) 84 | } 85 | } 86 | } 87 | 88 | func (s *Wappalyze) getTitle(body []byte) string { 89 | var title string 90 | 91 | // Tokenize the HTML document and check for fingerprints as required 92 | tokenizer := html.NewTokenizer(bytes.NewReader(body)) 93 | 94 | for { 95 | tt := tokenizer.Next() 96 | switch tt { 97 | case html.ErrorToken: 98 | return title 99 | case html.StartTagToken: 100 | token := tokenizer.Token() 101 | switch token.Data { 102 | case "title": 103 | // Next text token will be the actual title of the page 104 | if tokenType := tokenizer.Next(); tokenType != html.TextToken { 105 | continue 106 | } 107 | title = tokenizer.Token().Data 108 | } 109 | } 110 | } 111 | } 112 | 113 | // getMetaNameAndContent gets name and content attributes from meta html token 114 | func getMetaNameAndContent(token html.Token) (string, string, bool) { 115 | if len(token.Attr) < keyValuePairLength { 116 | return "", "", false 117 | } 118 | 119 | var name, content string 120 | for _, attr := range token.Attr { 121 | switch attr.Key { 122 | case "name": 123 | name = attr.Val 124 | case "content": 125 | content = attr.Val 126 | } 127 | } 128 | return name, content, true 129 | } 130 | 131 | // getScriptSource gets src tag from a script tag 132 | func getScriptSource(token html.Token) (string, bool) { 133 | if len(token.Attr) < 1 { 134 | return "", false 135 | } 136 | 137 | var source string 138 | for _, attr := range token.Attr { 139 | switch attr.Key { 140 | case "src": 141 | source = attr.Val 142 | } 143 | } 144 | return source, true 145 | } 146 | 147 | // unsafeToString converts a byte slice to string and does it with 148 | // zero allocations. 149 | // 150 | // NOTE: This function should only be used if its certain that the underlying 151 | // array has not been manipulated. 152 | // 153 | // Reference - https://github.com/golang/go/issues/25484 154 | func unsafeToString(data []byte) string { 155 | return *(*string)(unsafe.Pointer(&data)) 156 | } 157 | -------------------------------------------------------------------------------- /tech.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "strings" 7 | ) 8 | 9 | // Wappalyze is a client for working with tech detection 10 | type Wappalyze struct { 11 | fingerprints *CompiledFingerprints 12 | } 13 | 14 | // New creates a new tech detection instance 15 | func New() (*Wappalyze, error) { 16 | wappalyze := &Wappalyze{ 17 | fingerprints: &CompiledFingerprints{ 18 | Apps: make(map[string]*CompiledFingerprint), 19 | }, 20 | } 21 | 22 | err := wappalyze.loadFingerprints() 23 | if err != nil { 24 | return nil, err 25 | } 26 | return wappalyze, nil 27 | } 28 | 29 | // loadFingerprints loads the fingerprints and compiles them 30 | func (s *Wappalyze) loadFingerprints() error { 31 | var fingerprintsStruct Fingerprints 32 | err := json.Unmarshal([]byte(fingerprints), &fingerprintsStruct) 33 | if err != nil { 34 | return err 35 | } 36 | 37 | for i, fingerprint := range fingerprintsStruct.Apps { 38 | s.fingerprints.Apps[i] = compileFingerprint(fingerprint) 39 | } 40 | return nil 41 | } 42 | 43 | // Fingerprint identifies technologies on a target, 44 | // based on the received response headers and body. 45 | // 46 | // Body should not be mutated while this function is being called, or it may 47 | // lead to unexpected things. 48 | func (s *Wappalyze) Fingerprint(headers map[string][]string, body []byte) map[string]struct{} { 49 | uniqueFingerprints := newUniqueFingerprints() 50 | 51 | // Lowercase everything that we have received to check 52 | normalizedBody := bytes.ToLower(body) 53 | normalizedHeaders := s.normalizeHeaders(headers) 54 | 55 | // Run header based fingerprinting if the number 56 | // of header checks if more than 0. 57 | for _, application := range s.checkHeaders(normalizedHeaders) { 58 | uniqueFingerprints.setIfNotExists(application) 59 | } 60 | 61 | cookies := s.findSetCookie(normalizedHeaders) 62 | // Run cookie based fingerprinting if we have a set-cookie header 63 | if len(cookies) > 0 { 64 | for _, application := range s.checkCookies(cookies) { 65 | uniqueFingerprints.setIfNotExists(application) 66 | } 67 | } 68 | 69 | // Check for stuff in the body finally 70 | bodyTech := s.checkBody(normalizedBody) 71 | for _, application := range bodyTech { 72 | uniqueFingerprints.setIfNotExists(application) 73 | } 74 | return uniqueFingerprints.getValues() 75 | } 76 | 77 | type uniqueFingerprints struct { 78 | values map[string]struct{} 79 | } 80 | 81 | func newUniqueFingerprints() uniqueFingerprints { 82 | return uniqueFingerprints{ 83 | values: make(map[string]struct{}), 84 | } 85 | } 86 | 87 | func (u uniqueFingerprints) getValues() map[string]struct{} { 88 | return u.values 89 | } 90 | 91 | const versionSeparator = ":" 92 | 93 | // separateAppVersion returns app name and version 94 | func separateAppVersion(value string) (string, string) { 95 | if strings.Contains(value, versionSeparator) { 96 | if parts := strings.Split(value, versionSeparator); len(parts) == 2 { 97 | return parts[0], parts[1] 98 | } 99 | } 100 | return value, "" 101 | } 102 | 103 | func (u uniqueFingerprints) setIfNotExists(value string) { 104 | app, version := separateAppVersion(value) 105 | if _, ok := u.values[app]; ok { 106 | // Handles case when we get additional version information next 107 | if version != "" { 108 | delete(u.values, app) 109 | u.values[strings.Join([]string{app, version}, versionSeparator)] = struct{}{} 110 | } 111 | return 112 | } 113 | 114 | // Handle duplication for : based values 115 | for k := range u.values { 116 | if strings.Contains(k, versionSeparator) { 117 | if parts := strings.Split(k, versionSeparator); len(parts) == 2 && parts[0] == value { 118 | return 119 | } 120 | } 121 | } 122 | u.values[value] = struct{}{} 123 | } 124 | 125 | // FingerprintWithTitle identifies technologies on a target, 126 | // based on the received response headers and body. 127 | // It also returns the title of the page. 128 | // 129 | // Body should not be mutated while this function is being called, or it may 130 | // lead to unexpected things. 131 | func (s *Wappalyze) FingerprintWithTitle(headers map[string][]string, body []byte) (map[string]struct{}, string) { 132 | uniqueFingerprints := newUniqueFingerprints() 133 | 134 | // Lowercase everything that we have received to check 135 | normalizedBody := bytes.ToLower(body) 136 | normalizedHeaders := s.normalizeHeaders(headers) 137 | 138 | // Run header based fingerprinting if the number 139 | // of header checks if more than 0. 140 | for _, application := range s.checkHeaders(normalizedHeaders) { 141 | uniqueFingerprints.setIfNotExists(application) 142 | } 143 | 144 | cookies := s.findSetCookie(normalizedHeaders) 145 | // Run cookie based fingerprinting if we have a set-cookie header 146 | if len(cookies) > 0 { 147 | for _, application := range s.checkCookies(cookies) { 148 | uniqueFingerprints.setIfNotExists(application) 149 | } 150 | } 151 | 152 | // Check for stuff in the body finally 153 | if strings.Contains(normalizedHeaders["content-type"], "text/html") { 154 | bodyTech := s.checkBody(normalizedBody) 155 | for _, application := range bodyTech { 156 | uniqueFingerprints.setIfNotExists(application) 157 | } 158 | title := s.getTitle(body) 159 | return uniqueFingerprints.getValues(), title 160 | } 161 | return uniqueFingerprints.getValues(), "" 162 | } 163 | 164 | // FingerprintWithInfo identifies technologies on a target, 165 | // based on the received response headers and body. 166 | // It also returns basic information about the technology, such as description 167 | // and website URL. 168 | // 169 | // Body should not be mutated while this function is being called, or it may 170 | // lead to unexpected things. 171 | func (s *Wappalyze) FingerprintWithInfo(headers map[string][]string, body []byte) map[string]AppInfo { 172 | apps := s.Fingerprint(headers, body) 173 | result := make(map[string]AppInfo, len(apps)) 174 | 175 | for app := range apps { 176 | if fingerprint, ok := s.fingerprints.Apps[app]; ok { 177 | result[app] = AppInfo{ 178 | Description: fingerprint.description, 179 | Website: fingerprint.website, 180 | CPE: fingerprint.cpe, 181 | } 182 | } 183 | } 184 | 185 | return result 186 | } 187 | 188 | // FingerprintWithCats identifies technologies on a target, 189 | // based on the received response headers and body. 190 | // It also returns categories information about the technology, is there's any 191 | // Body should not be mutated while this function is being called, or it may 192 | // lead to unexpected things. 193 | func (s *Wappalyze) FingerprintWithCats(headers map[string][]string, body []byte) map[string]CatsInfo { 194 | apps := s.Fingerprint(headers, body) 195 | result := make(map[string]CatsInfo, len(apps)) 196 | 197 | for app := range apps { 198 | if fingerprint, ok := s.fingerprints.Apps[app]; ok { 199 | result[app] = CatsInfo{ 200 | Cats: fingerprint.cats, 201 | } 202 | } 203 | } 204 | 205 | return result 206 | } 207 | -------------------------------------------------------------------------------- /cmd/update-fingerprints/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "flag" 7 | "fmt" 8 | "io" 9 | "log" 10 | "net/http" 11 | "os" 12 | "reflect" 13 | "sort" 14 | "strings" 15 | ) 16 | 17 | var fingerprints = flag.String("fingerprints", "../../fingerprints_data.json", "File to write wappalyzer fingerprints to") 18 | 19 | // Fingerprints contains a map of fingerprints for tech detection 20 | type Fingerprints struct { 21 | // Apps is organized as