├── CREDITS ├── Wappalyzer ├── __init__.py ├── webpage │ ├── __init__.py │ ├── _bs4.py │ ├── _stdlib.py │ └── _common.py ├── __main__.py ├── fingerprint.py └── Wappalyzer.py ├── .gitignore ├── Makefile ├── .github └── workflows │ ├── docs.yml │ └── test.yml ├── tox.ini ├── setup.py ├── README.rst ├── tests └── test_wappalyzer.py └── LICENSE /CREDITS: -------------------------------------------------------------------------------- 1 | The code in this repository is based on 2 | Elbert Alias' Wappalyzer utility: 3 | 4 | https://github.com/ElbertF/Wappalyzer 5 | -------------------------------------------------------------------------------- /Wappalyzer/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Welcome to ``python-Wappalyzer`` API documentation! 3 | 4 | :see: `Wappalyzer` and `WebPage`. 5 | """ 6 | 7 | from .Wappalyzer import Wappalyzer, analyze 8 | from .webpage import WebPage 9 | __all__ = ["Wappalyzer", 10 | "WebPage", 11 | "analyze"] 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # VScode 2 | .vscode 3 | 4 | # PyDoctor 5 | apidocs 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | env/ 18 | venv/ 19 | ENV/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | eggs/ 25 | .eggs/ 26 | lib/ 27 | lib64/ 28 | parts/ 29 | sdist/ 30 | var/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | 35 | # Installer logs 36 | pip-log.txt 37 | 38 | # Unit test / coverage reports 39 | .coverage 40 | .tox 41 | nosetests.xml 42 | .noseids 43 | 44 | # Translations 45 | *.mo 46 | 47 | # Mr Developer 48 | .mr.developer.cfg 49 | .project 50 | .pydevproject 51 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: tests 2 | 3 | default: build 4 | 5 | install: 6 | wget -O Wappalyzer/data/technologies.json https://raw.githubusercontent.com/AliasIO/wappalyzer/master/src/technologies.json 7 | pip install . 8 | 9 | rebuild: clean install tests 10 | 11 | build: install tests 12 | 13 | clean: 14 | rm -f -r build/ 15 | rm -f -r bin/ 16 | rm -f -r dist/ 17 | rm -f -r *.egg-info 18 | find . -name '*.pyc' -exec rm -f {} + 19 | find . -name '*.pyo' -exec rm -f {} + 20 | find . -name '*~' -exec rm -f {} + 21 | find . -name '__pycache__' -exec rm -rf {} + 22 | find . -name '.pytest_cache' -exec rm -rf {} + 23 | 24 | tests: 25 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 26 | python -m pytest 27 | -------------------------------------------------------------------------------- /Wappalyzer/webpage/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | `Wappalyzer.WebPage` uses BeautifulSoup4 by default and fallback to standard library if it's not available. 3 | 4 | The following objects are importable form this module: `WebPage`, `IWebPage`, `ITag`. 5 | 6 | :Note: You can directly use/subclass one of the ``WebPage`` classes provided 7 | in modules `_bs4` and `_stdlib` if you'de like more control. 8 | Alternatively, your can write your own ``WebPage`` from scratch by suclassing the `IWebPage` interface. 9 | """ 10 | from ._common import IWebPage, ITag 11 | try: 12 | from ._bs4 import WebPage 13 | except Exception: 14 | try: 15 | from ._stdlib import WebPage # type: ignore 16 | except Exception as e: 17 | raise ImportError( 18 | """Cannot use Wappalyzer, missing required parser libraries. 19 | You can either install 'lxml' and 'beatifulsoup4' OR install 'dom_query'. 20 | The later option makes Wappalyzer use the standard library HTML parser.""") from e -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Publish API Docs 2 | on: 3 | push: 4 | branches: [ master ] 5 | 6 | jobs: 7 | deploy: 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@master 12 | - name: Set up Python 3.8 13 | uses: actions/setup-python@v2 14 | with: 15 | python-version: 3.8 16 | 17 | - name: Install package 18 | run: | 19 | python -m pip install --upgrade pip setuptools wheel 20 | python -m pip install .[dev] 21 | python -m pip install docutils pydoctor 22 | 23 | pydoctor --version 24 | 25 | - name: Generate API documentation with pydoctor 26 | run: | 27 | # Run pydoctor build 28 | tox -e docs -- $(git rev-parse HEAD) 29 | 30 | - name: Publish API documentation to the gh-pages branch 31 | uses: peaceiris/actions-gh-pages@v3 32 | with: 33 | github_token: ${{ secrets.GITHUB_TOKEN }} 34 | publish_dir: ./apidocs 35 | commit_message: "Generate API documentation" 36 | -------------------------------------------------------------------------------- /Wappalyzer/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | from .Wappalyzer import analyze 4 | 5 | def get_parser() -> argparse.ArgumentParser: 6 | """Get the CLI `argparse.ArgumentParser`""" 7 | parser = argparse.ArgumentParser(description="python-Wappalyzer CLI", prog="python -m Wappalyzer") 8 | parser.add_argument('url', help='URL to analyze') 9 | parser.add_argument('--update', action='store_true', help='Use the latest technologies file downloaded from the internet') 10 | parser.add_argument('--user-agent', help='Request user agent', dest='useragent') 11 | parser.add_argument('--timeout', help='Request timeout', type=int, default=10) 12 | parser.add_argument('--no-verify', action='store_true', help='Skip SSL cert verify', dest='noverify') 13 | return parser 14 | 15 | def main(args) -> None: 16 | """Entrypoint 17 | :param args: `Namespace` returned by `argparse.ArgumentParser.parse_args`. 18 | """ 19 | result = analyze(args.url, update=args.update, useragent=args.useragent, timeout=args.timeout, verify=not args.noverify) 20 | print(json.dumps(result)) 21 | 22 | if __name__ == '__main__': 23 | main(get_parser().parse_args()) 24 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | minversion=3.20.1 3 | requires= 4 | virtualenv>=20.0.35 5 | envlist = 6 | test,mypy,docs 7 | 8 | [testenv:test] 9 | description = run tests (unittest) 10 | 11 | passenv = * 12 | 13 | extras = dev 14 | 15 | commands = 16 | pytest -vv 17 | 18 | [testenv:mypy] 19 | description = run mypy (static type checker) 20 | 21 | extras = dev 22 | 23 | commands = 24 | mypy \ 25 | --cache-dir="{toxworkdir}/mypy_cache" \ 26 | {tty:--pretty:} \ 27 | {posargs:Wappalyzer} 28 | 29 | [testenv:docs] 30 | description = build the documentation 31 | 32 | extras = docs 33 | 34 | setenv = 35 | TOX_INI_DIR = {toxinidir} 36 | 37 | commands = 38 | pydoctor \ 39 | ./Wappalyzer \ 40 | --project-name=python-Wappalyzer \ 41 | --project-url=https://github.com/chorsley/python-Wappalyzer/ \ 42 | --html-viewsource-base=https://github.com/chorsley/python-Wappalyzer/tree/{posargs:master}/ \ 43 | --make-html --quiet \ 44 | --html-output={toxinidir}/apidocs \ 45 | --project-base-dir={toxinidir} \ 46 | --docformat=restructuredtext \ 47 | --intersphinx=https://docs.python.org/3/objects.inv \ 48 | --intersphinx=https://docs.aiohttp.org/en/stable/objects.inv \ 49 | --intersphinx=https://requests.readthedocs.io/en/stable/objects.inv 50 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | import pathlib 3 | 4 | setup( 5 | name = "python-Wappalyzer", 6 | version = "0.4.0", 7 | description = "Python implementation of the Wappalyzer web application " 8 | "detection utility", 9 | long_description = (pathlib.Path(__file__).parent / "README.rst").read_text(), 10 | long_description_content_type = "text/markdown", 11 | author = "Chris Horsley (chorsley) and other contributors (See git history)", 12 | url = "https://github.com/chorsley/python-Wappalyzer", 13 | classifiers = [ 14 | 'Development Status :: 3 - Alpha', 15 | 'Intended Audience :: Developers', 16 | 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', 17 | 'Programming Language :: Python :: 3', 18 | 'Topic :: Internet :: WWW/HTTP', 19 | ], 20 | packages = find_packages(exclude='tests'), 21 | package_data = {'Wappalyzer': ['data/technologies.json']}, 22 | install_requires = [ 'beautifulsoup4', 23 | 'lxml', 24 | 'requests', 25 | 'aiohttp', 26 | 'cached_property', ], 27 | extras_require = { 28 | # Pin pydoctor version until https://github.com/twisted/pydoctor/issues/513 is fixed 29 | 'docs': ["pydoctor==21.2.2", "docutils"], 30 | 'dev': ["tox", "mypy>=0.902", "httpretty", "pytest", "pytest-asyncio", 31 | "types-requests", "types-pkg_resources", "aioresponses"] 32 | }, 33 | python_requires = '>=3.6', 34 | ) -------------------------------------------------------------------------------- /Wappalyzer/webpage/_bs4.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of WebPage based on bs4, depends on lxml. 3 | """ 4 | from typing import Iterator, Mapping 5 | #Just to check if it's available 6 | import lxml # type: ignore 7 | from bs4 import BeautifulSoup, Tag as bs4_Tag # type: ignore 8 | from cached_property import cached_property # type: ignore 9 | 10 | from ._common import BaseWebPage, BaseTag 11 | 12 | class Tag(BaseTag): 13 | 14 | def __init__(self, name: str, attributes: Mapping[str, str], soup: bs4_Tag) -> None: 15 | super().__init__(name, attributes) 16 | self._soup = soup 17 | 18 | @cached_property 19 | def inner_html(self) -> str: 20 | return self._soup.decode_contents() 21 | 22 | class WebPage(BaseWebPage): 23 | """ 24 | Simple representation of a web page, decoupled 25 | from any particular HTTP library's API. 26 | 27 | Well, except for the class methods that use `requests` 28 | or `aiohttp` to create the WebPage. 29 | 30 | This object is designed to be created for each website scanned 31 | by python-Wappalyzer. 32 | It will parse the HTML with BeautifulSoup to find