├── .gitignore ├── fingerprints_data.go ├── go.mod ├── .github ├── workflows │ ├── build-test.yml │ ├── lint-test.yml │ ├── codeql-analysis.yml │ ├── autorelease-tag.yml │ └── fingerprint-update.yml └── dependabot.yml ├── fingerprints_test.go ├── examples └── main.go ├── go.sum ├── LICENSE.md ├── fingerprint_headers.go ├── README.md ├── fingerprint_cookies.go ├── wappalyzergo_test.go ├── fingerprint_body.go ├── tech.go ├── cmd └── update-fingerprints │ └── main.go └── fingerprints.go /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | cmd/update-fingerprints/update-fingerprints 3 | -------------------------------------------------------------------------------- /fingerprints_data.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | _ "embed" 5 | ) 6 | 7 | //go:embed fingerprints_data.json 8 | var fingerprints string 9 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/projectdiscovery/wappalyzergo 2 | 3 | go 1.21 4 | 5 | require ( 6 | github.com/stretchr/testify v1.8.4 7 | golang.org/x/net v0.19.0 8 | ) 9 | 10 | require ( 11 | github.com/davecgh/go-spew v1.1.1 // indirect 12 | github.com/pmezard/go-difflib v1.0.0 // indirect 13 | gopkg.in/yaml.v3 v3.0.1 // indirect 14 | ) 15 | -------------------------------------------------------------------------------- /.github/workflows/build-test.yml: -------------------------------------------------------------------------------- 1 | name: 🔨 Build Test 2 | on: 3 | push: 4 | pull_request: 5 | workflow_dispatch: 6 | 7 | 8 | jobs: 9 | build: 10 | name: Test Builds 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Set up Go 14 | uses: actions/setup-go@v4 15 | with: 16 | go-version: 1.21.x 17 | 18 | - name: Check out code 19 | uses: actions/checkout@v4 20 | 21 | - name: Test 22 | run: go test ./... -------------------------------------------------------------------------------- /.github/workflows/lint-test.yml: -------------------------------------------------------------------------------- 1 | name: 🙏🏻 Lint Test 2 | on: 3 | push: 4 | pull_request: 5 | workflow_dispatch: 6 | 7 | jobs: 8 | lint: 9 | name: Lint Test 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout code 13 | uses: actions/checkout@v4 14 | - name: Set up Go 15 | uses: actions/setup-go@v4 16 | with: 17 | go-version: 1.21.x 18 | - name: Run golangci-lint 19 | uses: golangci/golangci-lint-action@v3.5.0 20 | with: 21 | version: latest 22 | args: --timeout 5m 23 | working-directory: . 24 | -------------------------------------------------------------------------------- /fingerprints_test.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestVersionRegex(t *testing.T) { 10 | regex, err := newVersionRegex("JBoss(?:-([\\d.]+))?\\;confidence:50\\;version:\\1") 11 | require.NoError(t, err, "could not create version regex") 12 | 13 | matched, version := regex.MatchString("JBoss-2.3.9") 14 | require.True(t, matched, "could not get version regex match") 15 | require.Equal(t, "2.3.9", version, "could not get correct version") 16 | 17 | t.Run("confidence-only", func(t *testing.T) { 18 | _, err := newVersionRegex("\\;confidence:50") 19 | require.NoError(t, err, "could create invalid version regex") 20 | }) 21 | } 22 | -------------------------------------------------------------------------------- /examples/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "log" 7 | "net/http" 8 | 9 | wappalyzer "github.com/projectdiscovery/wappalyzergo" 10 | ) 11 | 12 | func main() { 13 | resp, err := http.DefaultClient.Get("https://www.hackerone.com") 14 | if err != nil { 15 | log.Fatal(err) 16 | } 17 | data, _ := io.ReadAll(resp.Body) // Ignoring error for example 18 | 19 | wappalyzerClient, err := wappalyzer.New() 20 | if err != nil { 21 | log.Fatal(err) 22 | } 23 | fingerprints := wappalyzerClient.Fingerprint(resp.Header, data) 24 | fmt.Printf("%v\n", fingerprints) 25 | // Output: map[Acquia Cloud Platform:{} Amazon EC2:{} Apache:{} Cloudflare:{} Drupal:{} PHP:{} Percona:{} React:{} Varnish:{}] 26 | 27 | fingerprintsWithCats := wappalyzerClient.FingerprintWithCats(resp.Header, data) 28 | fmt.Printf("%v\n", fingerprintsWithCats) 29 | } 30 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | name: 🚨 CodeQL Analysis 2 | 3 | on: 4 | workflow_dispatch: 5 | pull_request: 6 | branches: 7 | - dev 8 | 9 | jobs: 10 | analyze: 11 | name: Analyze 12 | runs-on: ubuntu-latest 13 | permissions: 14 | actions: read 15 | contents: read 16 | security-events: write 17 | 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | language: [ 'go' ] 22 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] 23 | 24 | steps: 25 | - name: Checkout repository 26 | uses: actions/checkout@v4 27 | 28 | # Initializes the CodeQL tools for scanning. 29 | - name: Initialize CodeQL 30 | uses: github/codeql-action/init@v2 31 | with: 32 | languages: ${{ matrix.language }} 33 | 34 | - name: Autobuild 35 | uses: github/codeql-action/autobuild@v2 36 | 37 | - name: Perform CodeQL Analysis 38 | uses: github/codeql-action/analyze@v2 -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 2 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 3 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 4 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 5 | github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= 6 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 7 | golang.org/x/net v0.19.0 h1:zTwKpTd2XuCqf8huc7Fo2iSy+4RHPd10s4KzeTnVr1c= 8 | golang.org/x/net v0.19.0/go.mod h1:CfAk/cbD4CthTvqiEl8NpboMuiuOYsAr/7NOjZJtv1U= 9 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 10 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 11 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 12 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 13 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 ProjectDiscovery, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | 9 | # Maintain dependencies for GitHub Actions 10 | - package-ecosystem: "github-actions" 11 | directory: "/" 12 | schedule: 13 | interval: "weekly" 14 | target-branch: "main" 15 | commit-message: 16 | prefix: "chore" 17 | include: "scope" 18 | 19 | # Maintain dependencies for go modules 20 | - package-ecosystem: "gomod" 21 | directory: "/" 22 | schedule: 23 | interval: "weekly" 24 | target-branch: "main" 25 | commit-message: 26 | prefix: "chore" 27 | include: "scope" 28 | 29 | # Maintain dependencies for docker 30 | - package-ecosystem: "docker" 31 | directory: "/" 32 | schedule: 33 | interval: "weekly" 34 | target-branch: "main" 35 | commit-message: 36 | prefix: "chore" 37 | include: "scope" 38 | -------------------------------------------------------------------------------- /.github/workflows/autorelease-tag.yml: -------------------------------------------------------------------------------- 1 | name: 🔖 Release Tag 2 | 3 | on: 4 | workflow_run: 5 | workflows: ["💡Fingerprints Update"] 6 | types: 7 | - completed 8 | workflow_dispatch: 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 17 | 18 | - name: Get Commit Count 19 | id: get_commit 20 | run: git rev-list `git rev-list --tags --no-walk --max-count=1`..HEAD --count | xargs -I {} echo COMMIT_COUNT={} >> $GITHUB_OUTPUT 21 | 22 | - name: Create release and tag 23 | if: ${{ steps.get_commit.outputs.COMMIT_COUNT > 0 }} 24 | id: tag_version 25 | uses: mathieudutour/github-tag-action@v6.1 26 | with: 27 | github_token: ${{ secrets.GITHUB_TOKEN }} 28 | 29 | - name: Create a GitHub release 30 | if: ${{ steps.get_commit.outputs.COMMIT_COUNT > 0 }} 31 | uses: actions/create-release@v1 32 | env: 33 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 34 | with: 35 | tag_name: ${{ steps.tag_version.outputs.new_tag }} 36 | release_name: Release ${{ steps.tag_version.outputs.new_tag }} 37 | body: ${{ steps.tag_version.outputs.changelog }} -------------------------------------------------------------------------------- /fingerprint_headers.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | // checkHeaders checks if the headers for a target match the fingerprints 8 | // and returns the matched IDs if any. 9 | func (s *Wappalyze) checkHeaders(headers map[string]string) []string { 10 | technologies := s.fingerprints.matchMapString(headers, headersPart) 11 | return technologies 12 | } 13 | 14 | // normalizeHeaders normalizes the headers for the tech discovery on headers 15 | func (s *Wappalyze) normalizeHeaders(headers map[string][]string) map[string]string { 16 | normalized := make(map[string]string, len(headers)) 17 | data := getHeadersMap(headers) 18 | 19 | for header, value := range data { 20 | normalized[strings.ToLower(header)] = strings.ToLower(value) 21 | } 22 | return normalized 23 | } 24 | 25 | // GetHeadersMap returns a map[string]string of response headers 26 | func getHeadersMap(headersArray map[string][]string) map[string]string { 27 | headers := make(map[string]string, len(headersArray)) 28 | 29 | builder := &strings.Builder{} 30 | for key, value := range headersArray { 31 | for i, v := range value { 32 | builder.WriteString(v) 33 | if i != len(value)-1 { 34 | builder.WriteString(", ") 35 | } 36 | } 37 | headerValue := builder.String() 38 | 39 | headers[key] = headerValue 40 | builder.Reset() 41 | } 42 | return headers 43 | } 44 | -------------------------------------------------------------------------------- /.github/workflows/fingerprint-update.yml: -------------------------------------------------------------------------------- 1 | name: 💡Fingerprints Update 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: '0 0 * * 0' 7 | 8 | jobs: 9 | build: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout Repo 13 | uses: actions/checkout@v4 14 | with: 15 | persist-credentials: false 16 | 17 | - name: Setup golang 18 | uses: actions/setup-go@v4 19 | with: 20 | go-version: 1.21.x 21 | 22 | - name: Installing Update binary 23 | run: | 24 | go install github.com/projectdiscovery/wappalyzergo/cmd/update-fingerprints 25 | shell: bash 26 | 27 | - name: Downloading latest wappalyzer changes 28 | run: | 29 | update-fingerprints -fingerprints fingerprints_data.json 30 | shell: bash 31 | 32 | - name: Create local changes 33 | run: | 34 | git add fingerprints_data.json 35 | 36 | - name: Commit files 37 | run: | 38 | git config --local user.email "action@github.com" 39 | git config --local user.name "GitHub Action" 40 | git commit -m "Weekly fingerprints update [$(date)] :robot:" -a --allow-empty 41 | 42 | - name: Push changes 43 | uses: ad-m/github-push-action@master 44 | with: 45 | github_token: ${{ secrets.GITHUB_TOKEN }} 46 | branch: ${{ github.ref }} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Wappalyzergo 2 | 3 | A high performance port of the Wappalyzer Technology Detection Library to Go. Inspired by [Webanalyze](https://github.com/rverton/webanalyze). 4 | 5 | Uses data from https://github.com/AliasIO/wappalyzer 6 | 7 | ## Features 8 | 9 | - Very simple and easy to use, with clean codebase. 10 | - Normalized regexes + auto-updating database of wappalyzer fingerprints. 11 | - Optimized for performance: parsing HTML manually for best speed. 12 | 13 | ### Using *go install* 14 | 15 | ```sh 16 | go install -v github.com/projectdiscovery/wappalyzergo/cmd/update-fingerprints@latest 17 | ``` 18 | 19 | After this command *wappalyzergo* library source will be in your current go.mod. 20 | 21 | ## Example 22 | Usage Example: 23 | 24 | ``` go 25 | package main 26 | 27 | import ( 28 | "fmt" 29 | "io" 30 | "log" 31 | "net/http" 32 | 33 | wappalyzer "github.com/projectdiscovery/wappalyzergo" 34 | ) 35 | 36 | func main() { 37 | resp, err := http.DefaultClient.Get("https://www.hackerone.com") 38 | if err != nil { 39 | log.Fatal(err) 40 | } 41 | data, _ := io.ReadAll(resp.Body) // Ignoring error for example 42 | 43 | wappalyzerClient, err := wappalyzer.New() 44 | fingerprints := wappalyzerClient.Fingerprint(resp.Header, data) 45 | fmt.Printf("%v\n", fingerprints) 46 | 47 | // Output: map[Acquia Cloud Platform:{} Amazon EC2:{} Apache:{} Cloudflare:{} Drupal:{} PHP:{} Percona:{} React:{} Varnish:{}] 48 | } 49 | ``` 50 | -------------------------------------------------------------------------------- /fingerprint_cookies.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | // checkCookies checks if the cookies for a target match the fingerprints 8 | // and returns the matched IDs if any. 9 | func (s *Wappalyze) checkCookies(cookies []string) []string { 10 | // Normalize the cookies for further processing 11 | normalized := s.normalizeCookies(cookies) 12 | 13 | technologies := s.fingerprints.matchMapString(normalized, cookiesPart) 14 | return technologies 15 | } 16 | 17 | const keyValuePairLength = 2 18 | 19 | // normalizeCookies normalizes the cookies and returns an 20 | // easily parsed format that can be processed upon. 21 | func (s *Wappalyze) normalizeCookies(cookies []string) map[string]string { 22 | normalized := make(map[string]string) 23 | 24 | for _, part := range cookies { 25 | parts := strings.SplitN(strings.Trim(part, " "), "=", keyValuePairLength) 26 | if len(parts) < keyValuePairLength { 27 | continue 28 | } 29 | normalized[parts[0]] = parts[1] 30 | } 31 | return normalized 32 | } 33 | 34 | // findSetCookie finds the set cookie header from the normalized headers 35 | func (s *Wappalyze) findSetCookie(headers map[string]string) []string { 36 | value, ok := headers["set-cookie"] 37 | if !ok { 38 | return nil 39 | } 40 | 41 | var values []string 42 | for _, v := range strings.Split(value, " ") { 43 | if v == "" { 44 | continue 45 | } 46 | if strings.Contains(v, ",") { 47 | values = append(values, strings.Split(v, ",")...) 48 | } else if strings.Contains(v, ";") { 49 | values = append(values, strings.Split(v, ";")...) 50 | } else { 51 | values = append(values, v) 52 | } 53 | } 54 | return values 55 | } 56 | -------------------------------------------------------------------------------- /wappalyzergo_test.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/require" 7 | ) 8 | 9 | func TestCookiesDetect(t *testing.T) { 10 | wappalyzer, err := New() 11 | require.Nil(t, err, "could not create wappalyzer") 12 | 13 | matches := wappalyzer.Fingerprint(map[string][]string{ 14 | "Set-Cookie": {"_uetsid=ABCDEF"}, 15 | }, []byte("")) 16 | 17 | require.Contains(t, matches, "Microsoft Advertising", "Could not get correct match") 18 | 19 | t.Run("position", func(t *testing.T) { 20 | wappalyzerClient, _ := New() 21 | 22 | fingerprints := wappalyzerClient.Fingerprint(map[string][]string{ 23 | "Set-Cookie": {"path=/; jsessionid=111; path=/, jsessionid=111;"}, 24 | }, []byte("")) 25 | fingerprints1 := wappalyzerClient.Fingerprint(map[string][]string{ 26 | "Set-Cookie": {"jsessionid=111; path=/, XSRF-TOKEN=; expires=test, path=/ laravel_session=eyJ*"}, 27 | }, []byte("")) 28 | 29 | require.Equal(t, map[string]struct{}{"Java": {}}, fingerprints, "could not get correct fingerprints") 30 | require.Equal(t, map[string]struct{}{"Java": {}, "Laravel": {}, "PHP": {}}, fingerprints1, "could not get correct fingerprints") 31 | }) 32 | } 33 | 34 | func TestHeadersDetect(t *testing.T) { 35 | wappalyzer, err := New() 36 | require.Nil(t, err, "could not create wappalyzer") 37 | 38 | matches := wappalyzer.Fingerprint(map[string][]string{ 39 | "Server": {"now"}, 40 | }, []byte("")) 41 | 42 | require.Contains(t, matches, "Vercel", "Could not get correct match") 43 | } 44 | 45 | func TestBodyDetect(t *testing.T) { 46 | wappalyzer, err := New() 47 | require.Nil(t, err, "could not create wappalyzer") 48 | 49 | t.Run("meta", func(t *testing.T) { 50 | matches := wappalyzer.Fingerprint(map[string][]string{}, []byte(` 51 | 52 | 53 | 54 | `)) 55 | require.Contains(t, matches, "Mura CMS:1", "Could not get correct match") 56 | }) 57 | 58 | t.Run("html-implied", func(t *testing.T) { 59 | matches := wappalyzer.Fingerprint(map[string][]string{}, []byte(` 60 | 61 | 62 | 63 | 64 | `)) 65 | require.Contains(t, matches, "AngularJS", "Could not get correct implied match") 66 | require.Contains(t, matches, "PHP", "Could not get correct implied match") 67 | require.Contains(t, matches, "Proximis Unified Commerce", "Could not get correct match") 68 | }) 69 | } 70 | 71 | func TestUniqueFingerprints(t *testing.T) { 72 | fingerprints := newUniqueFingerprints() 73 | fingerprints.setIfNotExists("test") 74 | require.Equal(t, map[string]struct{}{"test": {}}, fingerprints.getValues(), "could not get correct values") 75 | 76 | t.Run("linear", func(t *testing.T) { 77 | fingerprints.setIfNotExists("new:2.3.5") 78 | require.Equal(t, map[string]struct{}{"test": {}, "new:2.3.5": {}}, fingerprints.getValues(), "could not get correct values") 79 | 80 | fingerprints.setIfNotExists("new") 81 | require.Equal(t, map[string]struct{}{"test": {}, "new:2.3.5": {}}, fingerprints.getValues(), "could not get correct values") 82 | }) 83 | 84 | t.Run("opposite", func(t *testing.T) { 85 | fingerprints.setIfNotExists("another") 86 | require.Equal(t, map[string]struct{}{"test": {}, "new:2.3.5": {}, "another": {}}, fingerprints.getValues(), "could not get correct values") 87 | 88 | fingerprints.setIfNotExists("another:2.3.5") 89 | require.Equal(t, map[string]struct{}{"test": {}, "new:2.3.5": {}, "another:2.3.5": {}}, fingerprints.getValues(), "could not get correct values") 90 | }) 91 | } 92 | -------------------------------------------------------------------------------- /fingerprint_body.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | "bytes" 5 | "unsafe" 6 | 7 | "golang.org/x/net/html" 8 | ) 9 | 10 | // checkBody checks for fingerprints in the HTML body 11 | func (s *Wappalyze) checkBody(body []byte) []string { 12 | var technologies []string 13 | 14 | bodyString := unsafeToString(body) 15 | 16 | technologies = append( 17 | technologies, 18 | s.fingerprints.matchString(bodyString, htmlPart)..., 19 | ) 20 | 21 | // Tokenize the HTML document and check for fingerprints as required 22 | tokenizer := html.NewTokenizer(bytes.NewReader(body)) 23 | 24 | for { 25 | tt := tokenizer.Next() 26 | switch tt { 27 | case html.ErrorToken: 28 | return technologies 29 | case html.StartTagToken: 30 | token := tokenizer.Token() 31 | switch token.Data { 32 | case "script": 33 | // Check if the script tag has a source file to check 34 | source, found := getScriptSource(token) 35 | if found { 36 | // Check the script tags for script fingerprints 37 | technologies = append( 38 | technologies, 39 | s.fingerprints.matchString(source, scriptPart)..., 40 | ) 41 | continue 42 | } 43 | 44 | // Check the text attribute of the tag for javascript based technologies. 45 | // The next token should be the contents of the script tag 46 | if tokenType := tokenizer.Next(); tokenType != html.TextToken { 47 | continue 48 | } 49 | 50 | // TODO: JS requires a running VM, for checking properties. Only 51 | // possible with headless for now :( 52 | 53 | // data := tokenizer.Token().Data 54 | // technologies = append( 55 | // technologies, 56 | // s.fingerprints.matchString(data, jsPart)..., 57 | // ) 58 | case "meta": 59 | // For meta tag, we are only interested in name and content attributes. 60 | name, content, found := getMetaNameAndContent(token) 61 | if !found { 62 | continue 63 | } 64 | technologies = append( 65 | technologies, 66 | s.fingerprints.matchKeyValueString(name, content, metaPart)..., 67 | ) 68 | } 69 | case html.SelfClosingTagToken: 70 | token := tokenizer.Token() 71 | if token.Data != "meta" { 72 | continue 73 | } 74 | 75 | // Parse the meta tag and check for tech 76 | name, content, found := getMetaNameAndContent(token) 77 | if !found { 78 | continue 79 | } 80 | technologies = append( 81 | technologies, 82 | s.fingerprints.matchKeyValueString(name, content, metaPart)..., 83 | ) 84 | } 85 | } 86 | } 87 | 88 | func (s *Wappalyze) getTitle(body []byte) string { 89 | var title string 90 | 91 | // Tokenize the HTML document and check for fingerprints as required 92 | tokenizer := html.NewTokenizer(bytes.NewReader(body)) 93 | 94 | for { 95 | tt := tokenizer.Next() 96 | switch tt { 97 | case html.ErrorToken: 98 | return title 99 | case html.StartTagToken: 100 | token := tokenizer.Token() 101 | switch token.Data { 102 | case "title": 103 | // Next text token will be the actual title of the page 104 | if tokenType := tokenizer.Next(); tokenType != html.TextToken { 105 | continue 106 | } 107 | title = tokenizer.Token().Data 108 | } 109 | } 110 | } 111 | } 112 | 113 | // getMetaNameAndContent gets name and content attributes from meta html token 114 | func getMetaNameAndContent(token html.Token) (string, string, bool) { 115 | if len(token.Attr) < keyValuePairLength { 116 | return "", "", false 117 | } 118 | 119 | var name, content string 120 | for _, attr := range token.Attr { 121 | switch attr.Key { 122 | case "name": 123 | name = attr.Val 124 | case "content": 125 | content = attr.Val 126 | } 127 | } 128 | return name, content, true 129 | } 130 | 131 | // getScriptSource gets src tag from a script tag 132 | func getScriptSource(token html.Token) (string, bool) { 133 | if len(token.Attr) < 1 { 134 | return "", false 135 | } 136 | 137 | var source string 138 | for _, attr := range token.Attr { 139 | switch attr.Key { 140 | case "src": 141 | source = attr.Val 142 | } 143 | } 144 | return source, true 145 | } 146 | 147 | // unsafeToString converts a byte slice to string and does it with 148 | // zero allocations. 149 | // 150 | // NOTE: This function should only be used if its certain that the underlying 151 | // array has not been manipulated. 152 | // 153 | // Reference - https://github.com/golang/go/issues/25484 154 | func unsafeToString(data []byte) string { 155 | return *(*string)(unsafe.Pointer(&data)) 156 | } 157 | -------------------------------------------------------------------------------- /tech.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "strings" 7 | ) 8 | 9 | // Wappalyze is a client for working with tech detection 10 | type Wappalyze struct { 11 | fingerprints *CompiledFingerprints 12 | } 13 | 14 | // New creates a new tech detection instance 15 | func New() (*Wappalyze, error) { 16 | wappalyze := &Wappalyze{ 17 | fingerprints: &CompiledFingerprints{ 18 | Apps: make(map[string]*CompiledFingerprint), 19 | }, 20 | } 21 | 22 | err := wappalyze.loadFingerprints() 23 | if err != nil { 24 | return nil, err 25 | } 26 | return wappalyze, nil 27 | } 28 | 29 | // loadFingerprints loads the fingerprints and compiles them 30 | func (s *Wappalyze) loadFingerprints() error { 31 | var fingerprintsStruct Fingerprints 32 | err := json.Unmarshal([]byte(fingerprints), &fingerprintsStruct) 33 | if err != nil { 34 | return err 35 | } 36 | 37 | for i, fingerprint := range fingerprintsStruct.Apps { 38 | s.fingerprints.Apps[i] = compileFingerprint(fingerprint) 39 | } 40 | return nil 41 | } 42 | 43 | // Fingerprint identifies technologies on a target, 44 | // based on the received response headers and body. 45 | // 46 | // Body should not be mutated while this function is being called, or it may 47 | // lead to unexpected things. 48 | func (s *Wappalyze) Fingerprint(headers map[string][]string, body []byte) map[string]struct{} { 49 | uniqueFingerprints := newUniqueFingerprints() 50 | 51 | // Lowercase everything that we have received to check 52 | normalizedBody := bytes.ToLower(body) 53 | normalizedHeaders := s.normalizeHeaders(headers) 54 | 55 | // Run header based fingerprinting if the number 56 | // of header checks if more than 0. 57 | for _, application := range s.checkHeaders(normalizedHeaders) { 58 | uniqueFingerprints.setIfNotExists(application) 59 | } 60 | 61 | cookies := s.findSetCookie(normalizedHeaders) 62 | // Run cookie based fingerprinting if we have a set-cookie header 63 | if len(cookies) > 0 { 64 | for _, application := range s.checkCookies(cookies) { 65 | uniqueFingerprints.setIfNotExists(application) 66 | } 67 | } 68 | 69 | // Check for stuff in the body finally 70 | bodyTech := s.checkBody(normalizedBody) 71 | for _, application := range bodyTech { 72 | uniqueFingerprints.setIfNotExists(application) 73 | } 74 | return uniqueFingerprints.getValues() 75 | } 76 | 77 | type uniqueFingerprints struct { 78 | values map[string]struct{} 79 | } 80 | 81 | func newUniqueFingerprints() uniqueFingerprints { 82 | return uniqueFingerprints{ 83 | values: make(map[string]struct{}), 84 | } 85 | } 86 | 87 | func (u uniqueFingerprints) getValues() map[string]struct{} { 88 | return u.values 89 | } 90 | 91 | const versionSeparator = ":" 92 | 93 | // separateAppVersion returns app name and version 94 | func separateAppVersion(value string) (string, string) { 95 | if strings.Contains(value, versionSeparator) { 96 | if parts := strings.Split(value, versionSeparator); len(parts) == 2 { 97 | return parts[0], parts[1] 98 | } 99 | } 100 | return value, "" 101 | } 102 | 103 | func (u uniqueFingerprints) setIfNotExists(value string) { 104 | app, version := separateAppVersion(value) 105 | if _, ok := u.values[app]; ok { 106 | // Handles case when we get additional version information next 107 | if version != "" { 108 | delete(u.values, app) 109 | u.values[strings.Join([]string{app, version}, versionSeparator)] = struct{}{} 110 | } 111 | return 112 | } 113 | 114 | // Handle duplication for : based values 115 | for k := range u.values { 116 | if strings.Contains(k, versionSeparator) { 117 | if parts := strings.Split(k, versionSeparator); len(parts) == 2 && parts[0] == value { 118 | return 119 | } 120 | } 121 | } 122 | u.values[value] = struct{}{} 123 | } 124 | 125 | // FingerprintWithTitle identifies technologies on a target, 126 | // based on the received response headers and body. 127 | // It also returns the title of the page. 128 | // 129 | // Body should not be mutated while this function is being called, or it may 130 | // lead to unexpected things. 131 | func (s *Wappalyze) FingerprintWithTitle(headers map[string][]string, body []byte) (map[string]struct{}, string) { 132 | uniqueFingerprints := newUniqueFingerprints() 133 | 134 | // Lowercase everything that we have received to check 135 | normalizedBody := bytes.ToLower(body) 136 | normalizedHeaders := s.normalizeHeaders(headers) 137 | 138 | // Run header based fingerprinting if the number 139 | // of header checks if more than 0. 140 | for _, application := range s.checkHeaders(normalizedHeaders) { 141 | uniqueFingerprints.setIfNotExists(application) 142 | } 143 | 144 | cookies := s.findSetCookie(normalizedHeaders) 145 | // Run cookie based fingerprinting if we have a set-cookie header 146 | if len(cookies) > 0 { 147 | for _, application := range s.checkCookies(cookies) { 148 | uniqueFingerprints.setIfNotExists(application) 149 | } 150 | } 151 | 152 | // Check for stuff in the body finally 153 | if strings.Contains(normalizedHeaders["content-type"], "text/html") { 154 | bodyTech := s.checkBody(normalizedBody) 155 | for _, application := range bodyTech { 156 | uniqueFingerprints.setIfNotExists(application) 157 | } 158 | title := s.getTitle(body) 159 | return uniqueFingerprints.getValues(), title 160 | } 161 | return uniqueFingerprints.getValues(), "" 162 | } 163 | 164 | // FingerprintWithInfo identifies technologies on a target, 165 | // based on the received response headers and body. 166 | // It also returns basic information about the technology, such as description 167 | // and website URL. 168 | // 169 | // Body should not be mutated while this function is being called, or it may 170 | // lead to unexpected things. 171 | func (s *Wappalyze) FingerprintWithInfo(headers map[string][]string, body []byte) map[string]AppInfo { 172 | apps := s.Fingerprint(headers, body) 173 | result := make(map[string]AppInfo, len(apps)) 174 | 175 | for app := range apps { 176 | if fingerprint, ok := s.fingerprints.Apps[app]; ok { 177 | result[app] = AppInfo{ 178 | Description: fingerprint.description, 179 | Website: fingerprint.website, 180 | CPE: fingerprint.cpe, 181 | } 182 | } 183 | } 184 | 185 | return result 186 | } 187 | 188 | // FingerprintWithCats identifies technologies on a target, 189 | // based on the received response headers and body. 190 | // It also returns categories information about the technology, is there's any 191 | // Body should not be mutated while this function is being called, or it may 192 | // lead to unexpected things. 193 | func (s *Wappalyze) FingerprintWithCats(headers map[string][]string, body []byte) map[string]CatsInfo { 194 | apps := s.Fingerprint(headers, body) 195 | result := make(map[string]CatsInfo, len(apps)) 196 | 197 | for app := range apps { 198 | if fingerprint, ok := s.fingerprints.Apps[app]; ok { 199 | result[app] = CatsInfo{ 200 | Cats: fingerprint.cats, 201 | } 202 | } 203 | } 204 | 205 | return result 206 | } 207 | -------------------------------------------------------------------------------- /cmd/update-fingerprints/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "flag" 7 | "fmt" 8 | "io" 9 | "log" 10 | "net/http" 11 | "os" 12 | "reflect" 13 | "sort" 14 | "strings" 15 | ) 16 | 17 | var fingerprints = flag.String("fingerprints", "../../fingerprints_data.json", "File to write wappalyzer fingerprints to") 18 | 19 | // Fingerprints contains a map of fingerprints for tech detection 20 | type Fingerprints struct { 21 | // Apps is organized as 22 | Apps map[string]Fingerprint `json:"technologies"` 23 | } 24 | 25 | // Fingerprint is a single piece of information about a tech 26 | type Fingerprint struct { 27 | Cats []int `json:"cats"` 28 | CSS interface{} `json:"css"` 29 | Cookies map[string]string `json:"cookies"` 30 | JS map[string]string `json:"js"` 31 | Headers map[string]string `json:"headers"` 32 | HTML interface{} `json:"html"` 33 | Script interface{} `json:"scripts"` 34 | ScriptSrc interface{} `json:"scriptSrc"` 35 | Meta map[string]interface{} `json:"meta"` 36 | Implies interface{} `json:"implies"` 37 | Description string `json:"description"` 38 | Website string `json:"website"` 39 | CPE string `json:"cpe"` 40 | } 41 | 42 | // OutputFingerprints contains a map of fingerprints for tech detection 43 | // optimized and validated for the tech detection package 44 | type OutputFingerprints struct { 45 | // Apps is organized as 46 | Apps map[string]OutputFingerprint `json:"apps"` 47 | } 48 | 49 | // OutputFingerprint is a single piece of information about a tech validated and normalized 50 | type OutputFingerprint struct { 51 | Cats []int `json:"cats,omitempty"` 52 | CSS []string `json:"css,omitempty"` 53 | Cookies map[string]string `json:"cookies,omitempty"` 54 | JS []string `json:"js,omitempty"` 55 | Headers map[string]string `json:"headers,omitempty"` 56 | HTML []string `json:"html,omitempty"` 57 | Script []string `json:"scripts,omitempty"` 58 | ScriptSrc []string `json:"scriptSrc,omitempty"` 59 | Meta map[string][]string `json:"meta,omitempty"` 60 | Implies []string `json:"implies,omitempty"` 61 | Description string `json:"description,omitempty"` 62 | Website string `json:"website,omitempty"` 63 | CPE string `json:"cpe,omitempty"` 64 | } 65 | 66 | const fingerprintURL = "https://raw.githubusercontent.com/Lissy93/wapalyzer/master/src/technologies/%s.json" 67 | 68 | func makeFingerprintURLs() []string { 69 | files := []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "_"} 70 | 71 | fingerprints := make([]string, 0, len(files)) 72 | for _, item := range files { 73 | fingerprints = append(fingerprints, fmt.Sprintf(fingerprintURL, item)) 74 | } 75 | return fingerprints 76 | } 77 | 78 | func main() { 79 | flag.Parse() 80 | 81 | fingerprintURLs := makeFingerprintURLs() 82 | 83 | fingerprintsOld := &Fingerprints{ 84 | Apps: make(map[string]Fingerprint), 85 | } 86 | for _, fingerprintItem := range fingerprintURLs { 87 | if err := gatherFingerprintsFromURL(fingerprintItem, fingerprintsOld); err != nil { 88 | log.Fatalf("Could not gather fingerprints %s: %v\n", fingerprintItem, err) 89 | } 90 | } 91 | 92 | log.Printf("Read fingerprints from the server\n") 93 | log.Printf("Starting normalizing of %d fingerprints...\n", len(fingerprintsOld.Apps)) 94 | 95 | outputFingerprints := normalizeFingerprints(fingerprintsOld) 96 | 97 | log.Printf("Got %d valid fingerprints\n", len(outputFingerprints.Apps)) 98 | 99 | fingerprintsFile, err := os.OpenFile(*fingerprints, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o666) 100 | if err != nil { 101 | log.Fatalf("Could not open fingerprints file %s: %s\n", *fingerprints, err) 102 | } 103 | 104 | // sort map keys and pretty print the json to make git diffs useful 105 | 106 | data, err := json.MarshalIndent(outputFingerprints, "", " ") 107 | if err != nil { 108 | log.Fatalf("Could not marshal fingerprints: %s\n", err) 109 | } 110 | _, err = fingerprintsFile.Write(data) 111 | if err != nil { 112 | log.Fatalf("Could not write fingerprints file: %s\n", err) 113 | } 114 | err = fingerprintsFile.Close() 115 | if err != nil { 116 | log.Fatalf("Could not close fingerprints file: %s\n", err) 117 | } 118 | } 119 | 120 | func gatherFingerprintsFromURL(URL string, fingerprints *Fingerprints) error { 121 | req, err := http.NewRequest(http.MethodGet, URL, nil) 122 | if err != nil { 123 | return err 124 | } 125 | 126 | resp, err := http.DefaultClient.Do(req) 127 | if err != nil { 128 | return err 129 | } 130 | defer resp.Body.Close() 131 | 132 | data, err := io.ReadAll(resp.Body) 133 | if err != nil { 134 | return err 135 | } 136 | 137 | fingerprintsOld := &Fingerprints{} 138 | err = json.NewDecoder(bytes.NewReader(data)).Decode(&fingerprintsOld.Apps) 139 | if err != nil { 140 | return err 141 | } 142 | 143 | for k, v := range fingerprintsOld.Apps { 144 | fingerprints.Apps[k] = v 145 | } 146 | return nil 147 | } 148 | 149 | func normalizeFingerprints(fingerprints *Fingerprints) *OutputFingerprints { 150 | outputFingerprints := &OutputFingerprints{Apps: make(map[string]OutputFingerprint)} 151 | 152 | for app, fingerprint := range fingerprints.Apps { 153 | output := OutputFingerprint{ 154 | Cats: fingerprint.Cats, 155 | Cookies: make(map[string]string), 156 | Headers: make(map[string]string), 157 | Meta: make(map[string][]string), 158 | Description: fingerprint.Description, 159 | Website: fingerprint.Website, 160 | CPE: fingerprint.CPE, 161 | } 162 | 163 | for cookie, value := range fingerprint.Cookies { 164 | output.Cookies[strings.ToLower(cookie)] = strings.ToLower(value) 165 | } 166 | for js := range fingerprint.JS { 167 | output.JS = append(output.JS, strings.ToLower(js)) 168 | } 169 | sort.Strings(output.JS) 170 | 171 | for header, pattern := range fingerprint.Headers { 172 | output.Headers[strings.ToLower(header)] = strings.ToLower(pattern) 173 | } 174 | 175 | // Use reflection type switch for determining HTML tag type 176 | if fingerprint.HTML != nil { 177 | v := reflect.ValueOf(fingerprint.HTML) 178 | 179 | switch v.Kind() { 180 | case reflect.String: 181 | data := v.Interface().(string) 182 | output.HTML = append(output.HTML, strings.ToLower(data)) 183 | case reflect.Slice: 184 | data := v.Interface().([]interface{}) 185 | 186 | for _, pattern := range data { 187 | pat := pattern.(string) 188 | output.HTML = append(output.HTML, strings.ToLower(pat)) 189 | } 190 | } 191 | 192 | sort.Strings(output.HTML) 193 | } 194 | 195 | // Use reflection type switch for determining Script type 196 | if fingerprint.Script != nil { 197 | v := reflect.ValueOf(fingerprint.Script) 198 | 199 | switch v.Kind() { 200 | case reflect.String: 201 | data := v.Interface().(string) 202 | output.Script = append(output.Script, strings.ToLower(data)) 203 | case reflect.Slice: 204 | data := v.Interface().([]interface{}) 205 | for _, pattern := range data { 206 | pat := pattern.(string) 207 | output.Script = append(output.Script, strings.ToLower(pat)) 208 | } 209 | } 210 | 211 | sort.Strings(output.Script) 212 | } 213 | 214 | // Use reflection type switch for determining ScriptSrc type 215 | if fingerprint.ScriptSrc != nil { 216 | v := reflect.ValueOf(fingerprint.ScriptSrc) 217 | 218 | switch v.Kind() { 219 | case reflect.String: 220 | data := v.Interface().(string) 221 | output.ScriptSrc = append(output.ScriptSrc, strings.ToLower(data)) 222 | case reflect.Slice: 223 | data := v.Interface().([]interface{}) 224 | for _, pattern := range data { 225 | pat := pattern.(string) 226 | output.ScriptSrc = append(output.ScriptSrc, strings.ToLower(pat)) 227 | } 228 | } 229 | 230 | sort.Strings(output.ScriptSrc) 231 | } 232 | 233 | for header, pattern := range fingerprint.Meta { 234 | v := reflect.ValueOf(pattern) 235 | 236 | switch v.Kind() { 237 | case reflect.String: 238 | data := strings.ToLower(v.Interface().(string)) 239 | if data == "" { 240 | output.Meta[strings.ToLower(header)] = []string{} 241 | } else { 242 | output.Meta[strings.ToLower(header)] = []string{data} 243 | } 244 | case reflect.Slice: 245 | data := v.Interface().([]interface{}) 246 | 247 | final := []string{} 248 | for _, pattern := range data { 249 | pat := pattern.(string) 250 | final = append(final, strings.ToLower(pat)) 251 | } 252 | sort.Strings(final) 253 | output.Meta[strings.ToLower(header)] = final 254 | } 255 | } 256 | 257 | // Use reflection type switch for determining "Implies" tag type 258 | if fingerprint.Implies != nil { 259 | v := reflect.ValueOf(fingerprint.Implies) 260 | 261 | switch v.Kind() { 262 | case reflect.String: 263 | data := v.Interface().(string) 264 | output.Implies = append(output.Implies, data) 265 | case reflect.Slice: 266 | data := v.Interface().([]interface{}) 267 | for _, pattern := range data { 268 | pat := pattern.(string) 269 | output.Implies = append(output.Implies, pat) 270 | } 271 | } 272 | 273 | sort.Strings(output.Implies) 274 | } 275 | 276 | // Use reflection type switch for determining CSS tag type 277 | if fingerprint.CSS != nil { 278 | v := reflect.ValueOf(fingerprint.CSS) 279 | 280 | switch v.Kind() { 281 | case reflect.String: 282 | data := v.Interface().(string) 283 | output.CSS = append(output.CSS, data) 284 | case reflect.Slice: 285 | data := v.Interface().([]interface{}) 286 | for _, pattern := range data { 287 | pat := pattern.(string) 288 | output.CSS = append(output.CSS, pat) 289 | } 290 | } 291 | 292 | sort.Strings(output.CSS) 293 | } 294 | 295 | // Only add if the fingerprint is valid 296 | outputFingerprints.Apps[app] = output 297 | } 298 | return outputFingerprints 299 | } 300 | -------------------------------------------------------------------------------- /fingerprints.go: -------------------------------------------------------------------------------- 1 | package wappalyzer 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | "strconv" 7 | "strings" 8 | ) 9 | 10 | // Fingerprints contains a map of fingerprints for tech detection 11 | type Fingerprints struct { 12 | // Apps is organized as 13 | Apps map[string]*Fingerprint `json:"apps"` 14 | } 15 | 16 | // Fingerprint is a single piece of information about a tech validated and normalized 17 | type Fingerprint struct { 18 | Cats []int `json:"cats"` 19 | CSS []string `json:"css"` 20 | Cookies map[string]string `json:"cookies"` 21 | JS []string `json:"js"` 22 | Headers map[string]string `json:"headers"` 23 | HTML []string `json:"html"` 24 | Script []string `json:"scripts"` 25 | ScriptSrc []string `json:"scriptSrcs"` 26 | Meta map[string][]string `json:"meta"` 27 | Implies []string `json:"implies"` 28 | Description string `json:"description"` 29 | Website string `json:"website"` 30 | CPE string `json:"cpe"` 31 | } 32 | 33 | // CompiledFingerprints contains a map of fingerprints for tech detection 34 | type CompiledFingerprints struct { 35 | // Apps is organized as 36 | Apps map[string]*CompiledFingerprint 37 | } 38 | 39 | // CompiledFingerprint contains the compiled fingerprints from the tech json 40 | type CompiledFingerprint struct { 41 | // cats contain categories that are implicit with this tech 42 | cats []int 43 | // implies contains technologies that are implicit with this tech 44 | implies []string 45 | // description contains fingerprint description 46 | description string 47 | // website contains a URL associated with the fingerprint 48 | website string 49 | // cookies contains fingerprints for target cookies 50 | cookies map[string]*versionRegex 51 | // js contains fingerprints for the js file 52 | js []*versionRegex 53 | // headers contains fingerprints for target headers 54 | headers map[string]*versionRegex 55 | // html contains fingerprints for the target HTML 56 | html []*versionRegex 57 | // script contains fingerprints for scripts 58 | script []*versionRegex 59 | // scriptSrc contains fingerprints for script srcs 60 | scriptSrc []*versionRegex 61 | // meta contains fingerprints for meta tags 62 | meta map[string][]*versionRegex 63 | // cpe contains the cpe for a fingerpritn 64 | cpe string 65 | } 66 | 67 | // AppInfo contains basic information about an App. 68 | type AppInfo struct { 69 | Description string 70 | Website string 71 | CPE string 72 | } 73 | 74 | // CatsInfo contains basic information about an App. 75 | type CatsInfo struct { 76 | Cats []int 77 | } 78 | 79 | type versionRegex struct { 80 | regex *regexp.Regexp 81 | skipRegex bool 82 | group int 83 | } 84 | 85 | const versionPrefix = "version:\\" 86 | 87 | // newVersionRegex creates a new version matching regex 88 | // TODO: handles simple group cases only as of now (no ternary) 89 | func newVersionRegex(value string) (*versionRegex, error) { 90 | splitted := strings.Split(value, "\\;") 91 | if len(splitted) == 0 { 92 | return nil, nil 93 | } 94 | 95 | compiled, err := regexp.Compile(splitted[0]) 96 | if err != nil { 97 | return nil, err 98 | } 99 | skipRegex := splitted[0] == "" 100 | regex := &versionRegex{regex: compiled, skipRegex: skipRegex} 101 | for _, part := range splitted { 102 | if strings.HasPrefix(part, versionPrefix) { 103 | group := strings.TrimPrefix(part, versionPrefix) 104 | if parsed, err := strconv.Atoi(group); err == nil { 105 | regex.group = parsed 106 | } 107 | } 108 | } 109 | return regex, nil 110 | } 111 | 112 | // MatchString returns true if a version regex matched. 113 | // The found version is also returned if any. 114 | func (v *versionRegex) MatchString(value string) (bool, string) { 115 | if v.skipRegex { 116 | return true, "" 117 | } 118 | matches := v.regex.FindAllStringSubmatch(value, -1) 119 | if len(matches) == 0 { 120 | return false, "" 121 | } 122 | 123 | var version string 124 | if v.group > 0 { 125 | for _, match := range matches { 126 | version = match[v.group] 127 | } 128 | } 129 | return true, version 130 | } 131 | 132 | // part is the part of the fingerprint to match 133 | type part int 134 | 135 | // parts that can be matched 136 | const ( 137 | cookiesPart part = iota + 1 138 | jsPart 139 | headersPart 140 | htmlPart 141 | scriptPart 142 | metaPart 143 | ) 144 | 145 | // loadPatterns loads the fingerprint patterns and compiles regexes 146 | func compileFingerprint(fingerprint *Fingerprint) *CompiledFingerprint { 147 | compiled := &CompiledFingerprint{ 148 | cats: fingerprint.Cats, 149 | implies: fingerprint.Implies, 150 | description: fingerprint.Description, 151 | website: fingerprint.Website, 152 | cookies: make(map[string]*versionRegex), 153 | js: make([]*versionRegex, 0, len(fingerprint.JS)), 154 | headers: make(map[string]*versionRegex), 155 | html: make([]*versionRegex, 0, len(fingerprint.HTML)), 156 | script: make([]*versionRegex, 0, len(fingerprint.Script)), 157 | scriptSrc: make([]*versionRegex, 0, len(fingerprint.ScriptSrc)), 158 | meta: make(map[string][]*versionRegex), 159 | cpe: fingerprint.CPE, 160 | } 161 | 162 | for header, pattern := range fingerprint.Cookies { 163 | fingerprint, err := newVersionRegex(pattern) 164 | if err != nil { 165 | continue 166 | } 167 | compiled.cookies[header] = fingerprint 168 | } 169 | 170 | for _, pattern := range fingerprint.JS { 171 | fingerprint, err := newVersionRegex(pattern) 172 | if err != nil { 173 | continue 174 | } 175 | compiled.js = append(compiled.js, fingerprint) 176 | } 177 | 178 | for header, pattern := range fingerprint.Headers { 179 | fingerprint, err := newVersionRegex(pattern) 180 | if err != nil { 181 | continue 182 | } 183 | compiled.headers[header] = fingerprint 184 | } 185 | 186 | for _, pattern := range fingerprint.HTML { 187 | fingerprint, err := newVersionRegex(pattern) 188 | if err != nil { 189 | continue 190 | } 191 | compiled.html = append(compiled.html, fingerprint) 192 | } 193 | 194 | for _, pattern := range fingerprint.Script { 195 | fingerprint, err := newVersionRegex(pattern) 196 | if err != nil { 197 | continue 198 | } 199 | compiled.script = append(compiled.script, fingerprint) 200 | } 201 | 202 | for _, pattern := range fingerprint.ScriptSrc { 203 | fingerprint, err := newVersionRegex(pattern) 204 | if err != nil { 205 | continue 206 | } 207 | compiled.scriptSrc = append(compiled.scriptSrc, fingerprint) 208 | } 209 | 210 | for meta, patterns := range fingerprint.Meta { 211 | var compiledList []*versionRegex 212 | 213 | for _, pattern := range patterns { 214 | fingerprint, err := newVersionRegex(pattern) 215 | if err != nil { 216 | continue 217 | } 218 | compiledList = append(compiledList, fingerprint) 219 | } 220 | compiled.meta[meta] = compiledList 221 | } 222 | return compiled 223 | } 224 | 225 | // matchString matches a string for the fingerprints 226 | func (f *CompiledFingerprints) matchString(data string, part part) []string { 227 | var matched bool 228 | var technologies []string 229 | 230 | for app, fingerprint := range f.Apps { 231 | var version string 232 | 233 | switch part { 234 | case jsPart: 235 | for _, pattern := range fingerprint.js { 236 | if valid, versionString := pattern.MatchString(data); valid { 237 | matched = true 238 | version = versionString 239 | } 240 | } 241 | case scriptPart: 242 | for _, pattern := range fingerprint.scriptSrc { 243 | if valid, versionString := pattern.MatchString(data); valid { 244 | matched = true 245 | version = versionString 246 | } 247 | } 248 | case htmlPart: 249 | for _, pattern := range fingerprint.html { 250 | if valid, versionString := pattern.MatchString(data); valid { 251 | matched = true 252 | version = versionString 253 | } 254 | } 255 | } 256 | 257 | // If no match, continue with the next fingerprint 258 | if !matched { 259 | continue 260 | } 261 | 262 | if version != "" { 263 | app = formatAppVersion(app, version) 264 | } 265 | // Append the technologies as well as implied ones 266 | technologies = append(technologies, app) 267 | if len(fingerprint.implies) > 0 { 268 | technologies = append(technologies, fingerprint.implies...) 269 | } 270 | matched = false 271 | } 272 | return technologies 273 | } 274 | 275 | // matchKeyValue matches a key-value store map for the fingerprints 276 | func (f *CompiledFingerprints) matchKeyValueString(key, value string, part part) []string { 277 | var matched bool 278 | var technologies []string 279 | 280 | for app, fingerprint := range f.Apps { 281 | var version string 282 | 283 | switch part { 284 | case cookiesPart: 285 | for data, pattern := range fingerprint.cookies { 286 | if data != key { 287 | continue 288 | } 289 | 290 | if valid, versionString := pattern.MatchString(value); valid { 291 | matched = true 292 | version = versionString 293 | break 294 | } 295 | } 296 | case headersPart: 297 | for data, pattern := range fingerprint.headers { 298 | if data != key { 299 | continue 300 | } 301 | 302 | if valid, versionString := pattern.MatchString(value); valid { 303 | matched = true 304 | version = versionString 305 | break 306 | } 307 | } 308 | case metaPart: 309 | for data, patterns := range fingerprint.meta { 310 | if data != key { 311 | continue 312 | } 313 | 314 | for _, pattern := range patterns { 315 | if valid, versionString := pattern.MatchString(value); valid { 316 | matched = true 317 | version = versionString 318 | break 319 | } 320 | } 321 | } 322 | } 323 | 324 | // If no match, continue with the next fingerprint 325 | if !matched { 326 | continue 327 | } 328 | 329 | // Append the technologies as well as implied ones 330 | if version != "" { 331 | app = formatAppVersion(app, version) 332 | } 333 | technologies = append(technologies, app) 334 | if len(fingerprint.implies) > 0 { 335 | technologies = append(technologies, fingerprint.implies...) 336 | } 337 | matched = false 338 | } 339 | return technologies 340 | } 341 | 342 | // matchMapString matches a key-value store map for the fingerprints 343 | func (f *CompiledFingerprints) matchMapString(keyValue map[string]string, part part) []string { 344 | var matched bool 345 | var technologies []string 346 | 347 | for app, fingerprint := range f.Apps { 348 | var version string 349 | 350 | switch part { 351 | case cookiesPart: 352 | for data, pattern := range fingerprint.cookies { 353 | value, ok := keyValue[data] 354 | if !ok { 355 | continue 356 | } 357 | if pattern == nil { 358 | matched = true 359 | } 360 | if valid, versionString := pattern.MatchString(value); valid { 361 | matched = true 362 | version = versionString 363 | break 364 | } 365 | } 366 | case headersPart: 367 | for data, pattern := range fingerprint.headers { 368 | value, ok := keyValue[data] 369 | if !ok { 370 | continue 371 | } 372 | 373 | if valid, versionString := pattern.MatchString(value); valid { 374 | matched = true 375 | version = versionString 376 | break 377 | } 378 | } 379 | case metaPart: 380 | for data, patterns := range fingerprint.meta { 381 | value, ok := keyValue[data] 382 | if !ok { 383 | continue 384 | } 385 | 386 | for _, pattern := range patterns { 387 | if valid, versionString := pattern.MatchString(value); valid { 388 | matched = true 389 | version = versionString 390 | break 391 | } 392 | } 393 | } 394 | } 395 | 396 | // If no match, continue with the next fingerprint 397 | if !matched { 398 | continue 399 | } 400 | 401 | // Append the technologies as well as implied ones 402 | if version != "" { 403 | app = formatAppVersion(app, version) 404 | } 405 | technologies = append(technologies, app) 406 | if len(fingerprint.implies) > 0 { 407 | technologies = append(technologies, fingerprint.implies...) 408 | } 409 | matched = false 410 | } 411 | return technologies 412 | } 413 | 414 | func formatAppVersion(app, version string) string { 415 | return fmt.Sprintf("%s:%s", app, version) 416 | } 417 | 418 | // GetFingerprints returns the fingerprint string from wappalyzer 419 | func GetFingerprints() string { 420 | return fingerprints 421 | } 422 | --------------------------------------------------------------------------------