├── .dockerignore
├── .flake8
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   ├── custom.md
    │   └── feature_request.md
    ├── dependabot.yml
    └── workflows
    │   ├── ci.yml
    │   └── codeql-analysis.yml
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── requirements.txt
├── setup.cfg
├── setup.py
├── tests
    ├── __init__.py
    ├── test-requirements.txt
    └── unit
    │   ├── __init__.py
    │   ├── data_duplicates_negative.json
    │   ├── data_html_negative.json
    │   ├── data_html_positive.json
    │   ├── data_sitemap_negative.json
    │   ├── data_sitemap_positive.json
    │   ├── data_url_negative.json
    │   ├── data_url_positive.json
    │   ├── data_visible_tags.json
    │   ├── data_webpage.json
    │   ├── test_stop_words.py
    │   ├── test_webpage_analysis.py
    │   └── test_website_analysis.py
└── webedge
    ├── __init__.py
    ├── cli_output.py
    ├── social_websites.py
    ├── stop_words.py
    ├── warnings.py
    ├── webedge.py
    ├── webpage_analysis.py
    └── website_analysis.py


/.dockerignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | #docker should ignore the tests folder 
132 | tests


--------------------------------------------------------------------------------
/.flake8:
--------------------------------------------------------------------------------
 1 | [flake8]
 2 | exclude =
 3 |     # don't traverse git directory
 4 |     .git,
 5 |     # don't traverse cached files
 6 |     __pycache__,
 7 |     # don't traverse venv files
 8 |     bin,
 9 |     lib,
10 |     share,
11 |     local,
12 |     # don't traverse autogenerated scripts
13 |     migrations
14 | max-line-length = 99
15 | 
16 | # Specify a list of codes to ignore.
17 | ignore =
18 |     E722, W503, E251, E501


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/custom.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Custom issue template
 3 | about: Describe this issue template's purpose here.
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: "pip"
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: "weekly"
 7 |     open-pull-requests-limit: 10
 8 |     reviewers:
 9 |     - "HarshCasper"
10 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: Python CI Workflow
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python-version:
16 |           - "3.6"
17 |           - "3.7"
18 |           - "3.8"
19 |           - "3.9"
20 | 
21 |     steps:
22 |       - uses: actions/checkout@v2
23 | 
24 |       - name: Set up Python ${{ matrix.python-version }}
25 |         uses: actions/setup-python@v2
26 |         with:
27 |           python-version: ${{ matrix.python-version }}
28 | 
29 |       - name: Upgrade pip version
30 |         run: |
31 |           python3 -m pip install --upgrade pip
32 |       - name: Installs all the Dependencies
33 |         run: |
34 |           python3 setup.py install
35 |       - name: Checks the Application Build
36 |         run: |
37 |           pip3 install wheel
38 |           python3 setup.py sdist bdist_wheel
39 |       - name: Tests the Application
40 |         run: |
41 |           pip3 install -r tests/test-requirements.txt
42 |           nosetests --with-coverage --cover-package=webedge tests.unit
43 |       - name: Lint with flake8
44 |         run: |
45 |           pip3 install flake8
46 |           flake8 .


--------------------------------------------------------------------------------
/.github/workflows/codeql-analysis.yml:
--------------------------------------------------------------------------------
 1 | name: "CodeQL Analysis - Python"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | jobs:
10 |   CodeQL-Build:
11 | 
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         language: ['python']
16 | 
17 |     runs-on: ubuntu-latest
18 | 
19 |     steps:
20 |     - name: Checkout repository
21 |       uses: actions/checkout@v2
22 | 
23 |     - name: Initialize CodeQL
24 |       uses: github/codeql-action/init@v1
25 |       with:
26 |         languages: python
27 |         setup-python-dependencies: false
28 | 
29 |     - name: Perform CodeQL Analysis
30 |       uses: github/codeql-action/analyze@v1
31 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.7
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | COPY requirements.txt ./
 6 | 
 7 | RUN pip install -r requirements.txt
 8 | 
 9 | COPY . .
10 | 
11 | RUN python setup.py install 
12 | 
13 | CMD ["webedge", "-d", "https://ajitesh13.github.io"]    


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Harsh Bardhan Mishra
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![WebEdge](https://socialify.git.ci/HarshCasper/WebEdge/image?description=1&descriptionEditable=Bringing%20Edge%20to%20your%20Web%20Performance%20%F0%9F%94%A5%E2%9C%A8&forks=1&issues=1&language=1&pulls=1&stargazers=1&theme=Light)
  2 | 
  3 | <p align="center">
  4 | 	We all have  inborn  talent and also  inborn  failings, <br>
  5 | 	So often  scorn  a balance, chained to our own railings, <br>
  6 | 	And our world misses a website that deserved to be a star, <br>
  7 | 	But not  unfurled  in all its might, merely  cowering  from  afar, <br>
  8 | 	Why not take your Van  Dyke, or Rembrandt seen by few, <br>
  9 | 	And let us make it see the light, exposed to global view, <br>
 10 | 	Don't hide them in shadows behind barriers of your mind, <br>
 11 | 	Where pride and jealous arrows make them hard to find, <br>
 12 | 	Instead turn to experts just  as good as  you would like to be, <br>
 13 |     Who you'll learn to trust, and who will set your website free. <br>
 14 | </p>
 15 | <p align="center">
 16 | Developed with <span style="color: #8b0000;">&hearts;</span> by your friends at <a href="https://github.com/MLH-Fellowship">MLH Fellowship</a> Team-1.
 17 | </p>
 18 | <p align="center">
 19 |     <img src="https://img.shields.io/badge/Version-1.0.2-brightgreen" alt="version 1.0.2"/>
 20 |     <img src="https://img.shields.io/badge/license-MIT-brightgreen" alt="license MIT"/>
 21 |     <img src="https://img.shields.io/badge/Author-MLH%20Fellowship%20Team--1-yellow" alt="MLH Fellowship Team 1"/>
 22 |     <img src="https://github.com/harshcasper/webedge/actions/workflows/ci.yml/badge.svg" alt="GitHub-Actions-Build"/>
 23 |     <img src="https://img.shields.io/badge/code%20style-black-000000.svg" alt="Code Format: Black">
 24 |     <a href="https://img.shields.io/pypi/v/webedge"><img src="https://img.shields.io/pypi/v/webedge" alt="PyPI version" height="18"></a>
 25 |     <a href="https://pepy.tech/project/webedge"><img src="https://pepy.tech/badge/webedge" alt="PyPi downloads" height="18"></a>
 26 | </p>
 27 | 
 28 | ## 💥 Introduction
 29 | 
 30 | > Bringing Edge to your Web Performance
 31 | 
 32 | Rise of Web  has heralded the increasing ways in which we optimize Digital Performance. With SEO and Web Performance playing an important part, Developers feel lost around Performance needs. <b>WebEdge</b> aims to fix this 🌐
 33 | 
 34 | WebEdge have been introduced to suggest Web Optimizations for the App that can speed up operations and boost productivity ⚡
 35 | 
 36 | ## 💡 Why did we build it?
 37 | 
 38 | As Frontend Developers, Performance plays an important part for Ranking and User Experience. The priority is such that it cannot be avoided any longer. WebEdge provides a Python Package for you to scrap you Website and auto-suggest improvements you can make to improve your Optimization Ranking ♾️
 39 | 
 40 | With this Package, we aim to have a unified tool to improve your SEO Ranking with real-time optimizations, that you can fix as a Developer. Sounds interesting? Well it is 🔥
 41 | 
 42 | ## 🚀 Installation
 43 | 
 44 | To install WebEdge, we can use `pip`:
 45 | 
 46 | ```sh
 47 | pip3 install webedge
 48 | ```
 49 | 
 50 | The standard Python package will setup the CLI and you can use the same for local testing and analysis of your website and webpages.
 51 | 
 52 | ```sh
 53 |  _       __     __    ______    __         
 54 | | |     / /__  / /_  / ____/___/ /___ ____ 
 55 | | | /| / / _ \/ __ \/ __/ / __  / __ `/ _ \
 56 | | |/ |/ /  __/ /_/ / /___/ /_/ / /_/ /  __/
 57 | |__/|__/\___/_.___/_____/\__,_/\__, /\___/ 
 58 |                               /____/       
 59 | 
 60 | 
 61 | usage: webedge [-h] -d DOMAIN [-s SITEMAP] [-p PAGE]
 62 | ```
 63 | 
 64 | ## 🛠️ Local development
 65 | 
 66 | That's pretty easy. To ensure that you are able to install everything properly, we would recommend you to have <b>Git</b>, <b>Python</b> and <b>pip</b> installed. You should ideally work with a Virtual Environment, such as `venv` or the `virtualenv` module, to get the best out of the package.
 67 | 
 68 | We will first start with setting up the Local Project Environment:
 69 | 
 70 | ```sh
 71 | git clone https://github.com/HarshCasper/WebEdge.git
 72 | cd WebEdge
 73 | virtualenv venv
 74 | source venv/bin/activate
 75 | pip3 install -r requirements.txt
 76 | python3 setup.py install
 77 | ```
 78 | 
 79 | Once you run the Commands and get everything fine, we are all set to run the tool ✔️
 80 | 
 81 | Let's run the tool now:
 82 | 
 83 | ```sh
 84 | webedge -d http://[DOMAIN_NAME]/
 85 | ```
 86 | 
 87 | * For example if your domain is `https://fastcoder.netlify.app/` then your command should be (you can use `http` or `https` in the command according to your needs):
 88 | 
 89 | ```sh
 90 | webedge -d https://fastcoder.netlify.app/
 91 | ```
 92 | 
 93 | Pass your Website to the tool and you will get a generated JSON highlighting all the achievements you have made in SEO Optimization or the warnings being displayed by the same 🔑
 94 | 
 95 | To run the tests, simply push:
 96 | 
 97 | ```sh
 98 | nosetests --with-coverage --cover-package=webedge tests.unit
 99 | ```
100 | 
101 | To build with Docker, simply push:
102 | 
103 | **Building using docker**
104 | ```bash
105 | $ docker build -t 'app:webedge' .
106 | $ docker run app:webedge
107 | ```
108 | 
109 | ## 🛑 External Tools
110 | 
111 | The Python Files have been linted using [flake8](https://flake8.pycqa.org/) which automatically suggests linting errors and issues with formatting and styling. You can run the `flake8` command with the given configuration in the Project 🍀
112 | 
113 | We are also making use of CodeQL Analysis, which can be viewed [here](.github/workflows/codeql-analysis.yml). This allows us to identify potential bugs and anti-patterns with each push to the repository, and potentially fix it 🐛
114 | 
115 | For setting up CI/CD, we are making use of [GitHub Actions](https://github.com/features/actions). With a simple configuration set-up, we were able to test each build for specific issues, which can be viewed [here](.github/workflows/ci.yml) 🌱
116 | 
117 | ## 📜 LICENSE
118 | 
119 | [MIT License](https://github.com/HarshCasper/WebEdge/blob/main/LICENSE)
120 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | appdirs==1.4.3
 2 | beautifulsoup4==4.9.3
 3 | bs4==0.0.1
 4 | CacheControl==0.12.6
 5 | certifi==2019.11.28
 6 | chardet==3.0.4
 7 | clanimate==0.0.1
 8 | click==7.1.2
 9 | colorama==0.4.4
10 | contextlib2==0.6.0
11 | distlib==0.3.0
12 | distro==1.4.0
13 | html5lib==1.0.1
14 | idna==2.8
15 | ipaddr==2.2.0
16 | joblib==0.17.0
17 | lockfile==0.12.2
18 | msgpack==0.6.2
19 | nltk==3.5
20 | packaging==20.3
21 | pep517==0.8.2
22 | progress==1.5
23 | prompt-toolkit==1.0.14
24 | pyfiglet==0.8.post1
25 | Pygments==2.7.2
26 | PyInquirer==1.0.3
27 | pyparsing==2.4.6
28 | pytoml==0.1.21
29 | pyyaml==5.3.1
30 | regex==2020.11.13
31 | requests==2.25.0
32 | retrying==1.3.3
33 | SentimentAnalysis==0.8
34 | six==1.15.0
35 | soupsieve==2.0.1
36 | tqdm==4.53.0
37 | urllib3==1.26.2
38 | vaderSentiment==3.3.2
39 | wcwidth==0.2.5
40 | webencodings==0.5.1
41 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [nosetests]
 2 | cover-branches = true
 3 | cover-erase = true
 4 | cover-inclusive = true
 5 | cover-min-percentage = 90
 6 | cover-package = webedge
 7 | match = ^test
 8 | where = tests
 9 | 
10 | with-doctest = true


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | with open('requirements.txt') as f:
 4 |     REQUIREMENTS = f.read().splitlines()
 5 | with open('README.md', encoding='utf8') as f:
 6 |     README = f.read()
 7 | 
 8 | setup(
 9 |     name='WebEdge',
10 |     version='1.0.2',
11 |     license='MIT License',
12 |     author='MLH Fellowship Team 1',
13 |     author_email='erbeusgriffincasper@gmail.com',
14 |     description='Bringing Edge to your Web Performance',
15 |     long_description=README,
16 |     long_description_content_type='text/markdown',
17 |     url='https://github.com/HarshCasper/WebEdge',
18 |     install_requires=REQUIREMENTS,
19 |     packages=find_packages(exclude = ["*.tests", "*.tests.*", "tests.*", "tests"]),
20 |     entry_points={
21 |         'console_scripts': [
22 |             'webedge = webedge.webedge:main'
23 |         ]
24 |     }
25 | )
26 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HarshCasper/WebEdge/3175e89a1753c9ef9a5e69766d355319206f84a3/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test-requirements.txt:
--------------------------------------------------------------------------------
1 | nose==1.3.7
2 | coverage==5.3
3 | mock==4.0.2
4 | tox==3.20.1
5 | testtools==2.4.0
6 | ddt==1.4.1
7 | jsonschema==3.2.0


--------------------------------------------------------------------------------
/tests/unit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HarshCasper/WebEdge/3175e89a1753c9ef9a5e69766d355319206f84a3/tests/unit/__init__.py


--------------------------------------------------------------------------------
/tests/unit/data_duplicates_negative.json:
--------------------------------------------------------------------------------
1 | {
2 |     "TITLE_DUPLICATED": ["<html><head><title>The cat in the hat</title></head></html>", "TITLE_DUPLICATED"],
3 |     "DESCRIPTION_DUPLICATED": ["<html><head><meta name='description' content='One fish, two fish, red fish, blue fish' /></head></html>", "DESCRIPTION_DUPLICATED"]
4 | }
5 | 
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/tests/unit/data_html_negative.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "TITLE_MISSING": ["<html><head></head></html>", "TITLE_MISSING"],
 3 |     "TITLE_MISSING_EMPTY": ["<html><head><title></title></head></html>", "TITLE_MISSING"],
 4 |     "TITLE_TOO_SHORT": ["<html><head><title>Short</title></head></html>", "TITLE_TOO_SHORT"],
 5 |     "TITLE_TOO_LONG": ["<html><head><title>This title is way too long to be a useful title.  You should really try to keep the length to a reasonable size</title></head></html>", "TITLE_TOO_LONG"],
 6 |     "TITLE_TOO_GENERIC": ["<html><head><title>This is a Page</title></head></html>", "TITLE_TOO_GENERIC"],
 7 |     "TITLE_TOO_GENERIC_UNTITLED": ["<html><head><title>Untitled Page</title></head></html>", "TITLE_TOO_GENERIC"],
 8 |     "TITLE_KEYWORD_STUFFED": ["<html><head><title>Web Design, Design, Website Design, Design Websites in Atlanta</title></head></html>", "TITLE_KEYWORD_STUFFED"],
 9 |     "DESCRIPTION_MISSING_EMPTY": ["<html><head><meta name='description' content=''/></head></html>", "DESCRIPTION_MISSING"],
10 |     "DESCRIPTION_MISSING_CONTENT_TAG": ["<html><head><meta name='description'/></head></html>", "DESCRIPTION_MISSING"],
11 |     "DESCRIPTION_MISSING_ABSENT": ["<html><head></head></html>", "DESCRIPTION_MISSING"],
12 |     "DESCRIPTION_TOO_SHORT": ["<html><head><meta name='description' content='Really Short' /></head></html>", "DESCRIPTION_TOO_SHORT"],
13 |     "DESCRIPTION_TOO_LONG": ["<html><head><meta name='description' content='This description is way too long to be a useful description.  You should really try to keep the length to a reasonable size.This description is way too long to be a useful description.  You should really try to keep the length to a reasonable size. Its just way too long and try not to do this.'/></head></html>", "DESCRIPTION_TOO_LONG"],
14 |     "DESCRIPTION_TOO_GENERIC_PAGE": ["<html><head><meta name='description' content='this is a web page that does something' /></head></html>", "DESCRIPTION_TOO_GENERIC"],
15 |     "DESCRIPTION_TOO_GENERIC_UNTITLED": ["<html><head><meta name='description' content='the page about stuff to do with things' /></head></html>", "DESCRIPTION_TOO_GENERIC"],
16 |     "DESCRIPTION_KEYWORD_STUFFED": ["<html><head><meta name='description' content='Web Design, Design, Website Design, Design Websites in Atlanta' /></head></html>", "DESCRIPTION_KEYWORD_STUFFED"],
17 |     "URL_NOT_CANONICAL": ["<html><head><link rel='canonical' href='https://fakesite.com/about' /></head></html>", "URL_NOT_CANONICAL"],
18 |     "IMAGE_LINK_ALT_MISSING": ["<html><img><a href='404.html'><img/></a></html></html>", "IMAGE_LINK_ALT_MISSING"],
19 |     "IMAGE_LINK_ALT_MISSING_EMPTY": ["<html><a href='404.html'><img alt=''/></a></html>", "IMAGE_LINK_ALT_MISSING"],
20 |     "ANCHOR_TEXT_MISSING": ["<html><a href='404.html'></a></html>", "ANCHOR_TEXT_MISSING"],
21 |     "ANCHOR_TEXT_TOO_SHORT": ["<html><a href='404.html'>as</a></html>", "ANCHOR_TEXT_TOO_SHORT"],
22 |     "ANCHOR_TEXT_TOO_LONG": ["<html><a href='404.html'>Click here if you want to see something really cool.  We will do a bunch of magic and you may get spammed.</a></html>", "ANCHOR_TEXT_TOO_LONG"],
23 |     "ANCHOR_TEXT_TOO_GENERIC_PAGE1": ["<html><a href='404.html'>Page 1</a></html>", "ANCHOR_TEXT_TOO_GENERIC"],
24 |     "ANCHOR_TEXT_TOO_GENERIC_CLICKHERE": ["<html><a href='404.html'>Click Here!</a></html>", "ANCHOR_TEXT_TOO_GENERIC"],
25 |     "ANCHOR_TEXT_TOO_GENERIC_ARTICLE": ["<html><a href='404.html'>Article One</a></html>", "ANCHOR_TEXT_TOO_GENERIC"],
26 |     "ANCHOR_HREF_TOO_LONG": ["<html><a href='http://www.amazon.com/gp/product/B0007TJ5OG/102-8372974-4064145?v=glance&n=502394&m=ATVPDKIKX0DER&n=3031001&s=photo&v=glance'>Lengthy Link</a></html>", "ANCHOR_HREF_TOO_LONG"],
27 |     "ANCHOR_HREF_EQUALS_TEXT": ["<html><a href='404.html'>404.html</a></html>", "ANCHOR_HREF_EQUALS_TEXT"],
28 |     "BROKEN_LINK_RELATIVE": ["<html><a href='/idontexist.html'>404.html</a></html>", "BROKEN_LINK"],
29 |     "BROKEN_LINK_ABSOLUTE": ["<html><a href='http://www.google.com/idontexist.html'>404.html</a></html>", "BROKEN_LINK"],
30 |     "BROKEN_LINK_DUPLICATE": ["<html><a href='http://www.google.com/idontexist.html'>404.html</a><a href='http://www.google.com/idontexist.html'>404.html</a></html>", "BROKEN_LINK"],
31 |     "ANCHOR_NO_FOLLOW": ["<html><a href='http://www.externalsite.com'>Go External</a></html>", "ANCHOR_NO_FOLLOW"],
32 |     "IMAGE_SRC_MISSING": ["<html><img></img></html>", "IMAGE_SRC_MISSING"],
33 |     "IMAGE_ALT_MISSING": ["<html><img src='/image.png'></img></html>", "IMAGE_ALT_MISSING"],
34 |     "IMAGE_ALT_MISSING_EMPTY": ["<html><img src='/image.png' alt=''></img></html>", "IMAGE_ALT_MISSING"],
35 |     "IMAGE_ALT_TOO_LONG": ["<html><img src='/image.png' alt='This description is way too long to be a useful description.  You should really try to keep the length to a reasonable size.This description is way too long to be a useful description.  You should really try to keep the length to a reasonable size. Its just way too long and try not to do this.'></img></html>", "IMAGE_ALT_TOO_LONG"],
36 |     "H1_ONE_PER_PAGE_MISSING": ["<html><h2>This is a second level heading</h2></html>", "H1_ONE_PER_PAGE"],
37 |     "H1_ONE_PER_PAGE_TWOFOUND": ["<html><h1>Heading One</h1><p><h1>Heading Two</h1></p></html>", "H1_ONE_PER_PAGE"],
38 |     "H1_TOO_SHORT_EMPTY": ["<html><h1></h1></html>", "H1_TOO_SHORT"],
39 |     "H1_TOO_SHORT": ["<html><h1>Eg</h1></html>", "H1_TOO_SHORT"],
40 |     "KEYWORDS_META": ["<html><head><meta name='keywords' content='dogs, dog, doggy, puppy, wolf, bulldogs, pugs' /></head></html>", "KEYWORDS_META"],
41 |     "WORDCOUNT_TOO_SHORT": ["<html><head><h1>This is a good header</h1><p>but not enough text</p></head></html>", "WORDCOUNT_TOO_SHORT"]
42 | }
43 | 
44 | 
45 | 
46 | 


--------------------------------------------------------------------------------
/tests/unit/data_html_positive.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "TITLE_LENGTH": ["<html><head><title>This is a good length title that is well optimized for SEO</title></head></html>", "TITLE_LENGTH"],
 3 |     "TITLE_INFORMATIVE": ["<html><head><title>This is a good length title that is well optimized for SEO</title></head></html>", "TITLE_INFORMATIVE"],
 4 |     "TITLE_UNIQUE": ["<html><head><title>This is a good length title that is well optimized for SEO</title></head></html>", "TITLE_UNIQUE"],
 5 |     "DESCRIPTION_LENGTH": ["<html><head><meta name='description' content='This is considered to be a good descriptive description about your page.  You want to target about 140-200 characters to tell your users about your page' /></head></html>", "DESCRIPTION_LENGTH"],
 6 |     "DESCRIPTION_INFORMATIVE": ["<html><head><meta name='description' content='This is considered to be a good descriptive description about your page.  You want to target about 140-200 characters to tell your users about your page' /></head></html>", "DESCRIPTION_INFORMATIVE"],
 7 |     "URL_CANONICAL": ["<html><head><link rel='canonical' href='https://harshcasper.github.io' /></head></html>", "URL_CANONICAL"],
 8 |     "IMAGE_LINK_ALT": ["<html><img><a href='running.html'><img alt='Some like to run in the hot hot sun'/></a></html></html>", "IMAGE_LINK_ALT"],
 9 |     "IMAGE_SRC_TOO_LONG_EXTERNAL": ["<html><img src='https://www.google.com/this-is-a-really-long-file-name-that-should-be-avoided.png'></img></html>", ""],
10 |     "ANCHOR_NO_FOLLOW": ["<html><a href='http://www.externalsite.com' rel='nofollow'>Go External</a></html>", "ANCHOR_NO_FOLLOW"],
11 |     "H1_ONE_PER_PAGE": ["<html><h1>This is a good header</h1></html>", "H1_ONE_PER_PAGE"],
12 |     "H1_LENGTH": ["<html><h1>This is a good header</h1></html>", "H1_LENGTH"],
13 |     "MAIL_LINKS": ["<html><a href='mailto:erbeusgriffincasper@gmail.com'>Message Me</a></html>", ""],
14 |     "WORDCOUNT": ["<html><p>Lorem ipsum dolor sit amet, consectetur adipiscing elit. Aliquam in elit augue. Ut dolor ex, pretium a eros eget, interdum congue eros. Nulla eget vehicula eros. In efficitur sapien vitae vehicula suscipit. Aenean dictum elit eget turpis ornare maximus. Fusce at volutpat dolor. Sed quam ante, volutpat cursus massa nec, vehicula volutpat nulla. Etiam feugiat aliquam lectus et efficitur. Vestibulum porta metus interdum blandit hendrerit. Duis ultrices eu erat sit amet hendrerit. Ut ut diam maximus, luctus eros a, molestie ipsum.</p><p>Praesent ultricies orci sit amet lobortis ultrices. Donec consectetur nisl quis dignissim rutrum. Vestibulum vulputate massa dui, id elementum mi sagittis sed. Integer volutpat quis tellus et porta. Cras diam justo, volutpat at sodales at, venenatis et quam. Quisque a magna malesuada eros varius porta vel at nibh. Aliquam ac erat magna. Vestibulum sodales aliquam nulla at gravida. Vivamus ac ullamcorper nibh.</p><p>Aenean scelerisque dolor a arcu viverra, quis convallis nulla tincidunt. Curabitur posuere vestibulum mollis. Nunc tincidunt ipsum tellus, ut sollicitudin orci ornare non. Etiam sapien neque, cursus sed purus ut, tincidunt elementum nulla. Sed at gravida magna. Duis euismod blandit placerat. Sed eget efficitur quam. Sed vel scelerisque urna. Praesent nulla sem, eleifend eget luctus eget, vestibulum vel purus. Etiam aliquet feugiat felis, quis convallis sapien commodo sit amet. Duis vel diam eros.</p><p>Nunc malesuada et sem ac fringilla. Phasellus pulvinar molestie turpis vel feugiat. Nam consequat congue odio aliquet dictum. Nam rhoncus elementum leo, in varius dolor molestie et. Pellentesque vestibulum ante gravida, malesuada lectus et, scelerisque metus. Curabitur dapibus dolor sem, efficitur bibendum sem dignissim id. Curabitur rhoncus tempor elit, vel ultrices nibh tincidunt vitae. Phasellus imperdiet vel justo non tincidunt. Integer ultrices luctus purus ut eleifend. Duis et sem vel quam dignissim placerat at id erat. Ut a mattis felis.</p><p>Proin ut mi sed quam efficitur aliquet. Fusce in velit id diam dignissim facilisis sit amet nec eros. Etiam non est nibh. Mauris ultrices scelerisque mauris, nec dapibus ligula. Nulla eu posuere lectus. Sed accumsan eros eget nulla finibus dignissim. Fusce eleifend congue ipsum, vitae condimentum libero bibendum sit amet. Sed vulputate tincidunt dui, in vehicula odio mattis id. Etiam condimentum venenatis lacus, ut porttitor neque rutrum sit amet. Nulla sit amet urna at nunc gravida convallis eget sed justo. Nulla posuere ultrices eros, molestie placerat enim molestie in. Integer rutrum orci felis, et pulvinar odio fringilla eget. In vehicula sit amet massa et varius. Etiam non ex quis tortor laoreet aliquam.</p><p>Pellentesque nec arcu suscipit, facilisis leo quis, viverra velit. Fusce non ultricies nisi, eu auctor felis. Integer vehicula dui a quam tempus, ut venenatis tortor tempus. Donec turpis erat, consequat varius lorem in, fermentum scelerisque enim. Sed laoreet a sapien vel pellentesque. Aliquam nulla ligula, malesuada sit amet enim non, tempus dictum mauris. Fusce congue rutrum ex ut posuere. Interdum et malesuada fames ac ante ipsum primis in faucibus. Proin a nibh porta, consectetur tellus at, lacinia tellus. In consequat massa enim, non condimentum purus tincidunt nec. Proin sapien ipsum, consectetur laoreet mauris in, sagittis ornare turpis.</p><p>Morbi interdum rutrum mi ac vehicula. Aenean eget nulla a turpis suscipit accumsan. Nam vehicula eu diam mattis pharetra. In rutrum mi ac ipsum gravida, egestas consequat dui viverra. Cras eleifend sapien sed odio suscipit euismod. Praesent suscipit nec justo convallis gravida. Curabitur ullamcorper, velit cursus placerat facilisis, enim leo interdum tortor, sit amet blandit neque felis sed lectus. Phasellus non ex sed libero finibus rutrum eget molestie turpis. Quisque vehicula vehicula hendrerit.</p><p>Aenean ultricies porttitor lobortis. Sed vitae enim ipsum. Praesent feugiat vel dolor eget lacinia. In in porttitor arcu. Quisque consequat augue ut dolor semper vulputate. Donec auctor vulputate lectus quis maximus. Mauris ultricies molestie porta. Nulla facilisi. Suspendisse dignissim diam sollicitudin, imperdiet odio sed, rhoncus arcu.</p><p>Fusce sapien sem, blandit ac tellus pulvinar, volutpat accumsan metus. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Quisque vehicula, tortor at lobortis interdum, turpis erat sollicitudin orci, vel egestas dolor ligula at felis. Proin ac vestibulum nisl. Vivamus fermentum tellus volutpat sem tristique, a venenatis dolor volutpat. Nunc a mauris tincidunt, consequat dolor in, bibendum nisi. Quisque venenatis magna massa, et dictum ligula accumsan sed. Sed eget nulla vitae justo ultrices rutrum. Cras sagittis congue congue.</p><p>Fusce interdum, metus a volutpat molestie, lacus metus aliquet odio, eget suscipit nibh eros efficitur mi. Nullam ut tristique libero. Duis commodo, nulla at dapibus vulputate, ipsum turpis faucibus ipsum, eu egestas elit dolor sed leo. Nullam porttitor gravida dolor vel tincidunt. Mauris non ante elementum, luctus nisl quis, fermentum dui. Phasellus in fringilla odio. Proin sed rutrum nunc. Curabitur sed suscipit libero, fermentum consequat ligula. Vestibulum consectetur lacus sed mi interdum ullamcorper. Sed non ultrices arcu, sed tincidunt sem. Sed et ante ac eros vulputate luctus.</p><p>In vitae ipsum quis justo elementum vestibulum. Aenean finibus, magna vel dapibus molestie, nibh lectus faucibus tellus, ut imperdiet purus nisl ut leo. Ut laoreet nisi vel vehicula pretium. Mauris mi lectus, maximus sed libero id, sollicitudin tristique massa. Aliquam sollicitudin felis eget quam tempus, vel blandit mi facilisis. Nunc facilisis ullamcorper nisi non facilisis. Etiam et porttitor ligula, sed sagittis tellus. Mauris facilisis diam libero, ut fermentum nunc lobortis scelerisque. Etiam a commodo est, aliquet commodo justo.</p><p>Integer facilisis hendrerit massa a posuere. Vestibulum accumsan orci nec lorem auctor aliquam. Aliquam erat volutpat. Mauris ligula purus, lacinia nec elit vitae, fermentum porttitor nisl. Nunc finibus non sapien at sagittis. Morbi elementum elit nec justo varius, eu tristique risus egestas. Fusce ipsum purus, eleifend malesuada leo a, auctor vulputate arcu. Pellentesque in dui vitae massa dignissim hendrerit nec quis elit. Quisque dapibus, ex eu molestie vestibulum, dolor orci congue dui, non porta nisi purus in leo. Etiam ac lacus sit amet sem varius mollis. Donec id tempor mauris. Cras vel finibus lorem. Fusce tempor consectetur metus eget pharetra. Pellentesque aliquam bibendum cursus. Pellentesque feugiat tempus sollicitudin.</p><p>Donec vitae imperdiet lacus. Duis ac vulputate dolor. Praesent nec mauris luctus, eleifend justo quis, suscipit ipsum. Aenean dui nulla, dapibus nec consectetur eu, semper sit amet libero. Nunc vitae lacus tortor. Nam a commodo ipsum, ut accumsan lorem. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nullam eget sem est. Vivamus non vestibulum mauris. Nullam et ultricies est.</p><p>Nullam convallis risus eget ligula maximus aliquam. Pellentesque varius, enim sed volutpat congue, leo augue porttitor arcu, ac hendrerit ex leo ac neque. Curabitur sodales consequat volutpat. Cras ultrices magna non sollicitudin suscipit. Cras dignissim consectetur lacus eu imperdiet. Maecenas est ex, pulvinar quis aliquet a, consectetur vitae ex. Duis sem magna, facilisis vel lacinia auctor, pharetra vel arcu. Sed sit amet velit id ante sagittis blandit. Suspendisse venenatis vehicula massa, et auctor tortor laoreet mattis. Etiam tincidunt arcu vel erat dapibus feugiat. Cras augue metus, suscipit et felis nec, luctus scelerisque augue. Aliquam eu luctus erat. Maecenas. Aliquam eu luctus erat. Maecenas. Suspendisse venenatis vehicula massa, et auctor tortor laoreet mattis. Etiam tincidunt arcu vel erat dapibus feugiat. Cras augue metus, suscipit et felis nec, luctus scelerisque augue. Aliquam eu luctus erat. Maecenas. Aliquam eu luctus erat. Maecenas In rutrum mi ac ipsum gravida, egestas consequat dui viverra. Cras eleifend sapien sed odio suscipit euismod. Praesent suscipit nec justo convallis gravida. Curabitur ullamcorper, velit cursus placerat facilisis, enim leo interdum tortor, sit amet blandit neque felis sed lectus. Phasellus non ex sed libero finibus rutrum eget molestie turpis. Quisque vehicula vehicula hendrerit.</p><p>Aenean ultricies porttitor lobortis. Sed vitae enim ipsum. Praesent feugiat vel dolor eget lacinia. In in porttitor arcu. Quisque consequat augue ut dolor semper vulputate. Donec auctor vulputate lectus quis maximus. Mauris ultricies molestie porta. Nulla facilisi. Suspendisse dignissim diam sollicitudin, imperdiet odio sed, rhoncus arcu.</p><p>Fusce sapien sem, blandit ac tellus pulvinar, volutpat accumsan metus. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia Curae; Quisque vehicula, tortor at lobortis interdum, turpis erat sollicitudin orci, vel egestas dolor ligula at felis. Proin ac vestibulum nisl. Vivamus fermentum tellus volutpat sem tristique, a venenatis dolor volutpat. Nunc a mauris tincidunt, consequat dolor in, bibendum nisi. Quisque venenatis magna massa, et dictum ligula accumsan sed. Sed eget nulla vitae justo ultrices rutrum. Cras sagittis congue congue.</p><p>Fusce interdum, metus a volutpat molestie, lacus metus aliquet odio, eget suscipit nibh eros efficitur mi. Nullam ut tristique libero. Duis commodo, nulla at dapibus vulputate, ipsum turpis faucibus ipsum, eu egestas elit dolor sed leo. Nullam porttitor gravida dolor vel tincidunt. Mauris non ante elementum, luctus nisl quis, fermentum dui. Phasellus in fringilla odio. Proin sed rutrum nunc. Curabitur sed suscipit libero, fermentum consequat ligula. Vestibulum consectetur lacus sed mi interdum ullamcorper. Sed non ultrices arcu, sed tincidunt sem. Sed et ante ac eros vulputate luctus.</p><p>In vitae ipsum quis justo elementum vestibulum. Aenean finibus, magna vel dapibus molestie, nibh lectus faucibus tellus, ut imperdiet purus nisl ut leo. Ut laoreet nisi vel vehicula pretium. Mauris mi lectus, maximus sed libero id, sollicitudin tristique massa. Aliquam sollicitudin felis eget quam tempus, vel blandit mi facilisis. Nunc facilisis ullamcorper nisi non facilisis. Etiam et porttitor ligula, sed sagittis tellus. Mauris facilisis diam libero, ut fermentum nunc lobortis scelerisque. Etiam a commodo est, aliquet commodo justo.</p><p>Integer facilisis hendrerit massa a posuere. Vestibulum accumsan orci nec lorem auctor aliquam. Aliquam erat volutpat. Mauris ligula purus, lacinia nec elit vitae, fermentum porttitor nisl. Nunc finibus non sapien at sagittis. Morbi elementum elit nec justo varius, eu tristique risus egestas. Fusce ipsum purus, eleifend malesuada leo a, auctor vulputate arcu. Pellentesque in dui vitae massa dignissim hendrerit nec quis elit. Quisque dapibus, ex eu molestie vestibulum, dolor orci congue dui, non porta nisi purus in leo. Etiam ac lacus sit amet sem varius mollis. Donec id tempor mauris. Cras vel finibus lorem. Fusce tempor consectetur metus eget pharetra. Pellentesque aliquam bibendum cursus. Pellentesque feugiat tempus sollicitudin.</p><p>Donec vitae imperdiet lacus. Duis ac vulputate dolor. Praesent nec mauris luctus, eleifend justo quis, suscipit ipsum. Aenean dui nulla, dapibus nec consectetur eu, semper sit amet libero. Nunc vitae lacus tortor. Nam a commodo ipsum, ut accumsan lorem. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Nullam eget sem est. Vivamus non vestibulum mauris. Nullam et ultricies est.</p><p>Nullam convallis risus eget ligula maximus aliquam. Pellentesque varius, enim sed volutpat congue, leo augue porttitor arcu, ac hendrerit ex leo ac neque. Curabitur sodales consequat volutpat. Cras ultrices magna non sollicitudin suscipit. Cras dignissim consectetur lacus eu imperdiet. Maecenas est ex, pulvinar quis aliquet a, consectetur vitae ex. Duis sem magna, facilisis vel lacinia auctor, pharetra vel arcu. Sed sit amet velit id ante sagittis blandit. Suspendisse venenatis vehicula massa, et auctor tortor laoreet mattis. Etiam tincidunt arcu vel erat dapibus feugiat. Cras augue metus, suscipit et felis nec, luctus scelerisque augue. Aliquam eu luctus erat. Maecenas. Aliquam eu luctus erat. Maecenas. Suspendisse venenatis vehicula massa, et auctor tortor laoreet mattis. Etiam tincidunt arcu vel erat dapibus feugiat. Cras augue metus, suscipit et felis nec, luctus scelerisque augue. Aliquam eu luctus erat. Maecenas. Aliquam eu luctus erat. Maecenas</p></html>", "WORDCOUNT"],
15 |     "ANCHOR_INTERNAL_HREF": ["<html><a href='https://harshcasper.github.io/portfolio.html'>Portfolio</a></html>", ""],
16 |     "ANCHOR_INTERNAL_HREF_SLASH": ["<html><a href='/about.html'>About Me</a></html>", ""],
17 |     "ANCHOR_INTERNAL_HREF_NOSLASH": ["<html><a href='about.html'>About Me</a></html>", ""],
18 |     "ANCHOR_SOCIAL_HREF": ["<html><a href='https://twitter.com/harsh_casper'>Follow me on Twitter</a></html>", ""]
19 | }
20 | 
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/tests/unit/data_sitemap_negative.json:
--------------------------------------------------------------------------------
1 | {
2 |     "invalid_sitemap": "<urlset></set>"
3 | }
4 | 
5 | 
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/tests/unit/data_sitemap_positive.json:
--------------------------------------------------------------------------------
1 | {
2 |     "positive": "<urlset><url><loc>https://harshcasper.github.io</loc></url><url><loc>https://harshcasper.github.io/portfolio.html</loc></url></urlset>",
3 |     "nolocations": "<urlset></urlset>"
4 | }


--------------------------------------------------------------------------------
/tests/unit/data_url_negative.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "URL_TOO_LONG": ["http://www.amazon.com/gp/product/B0007TJ5OG/102-8372974-4064145?v=glance&n=502394&m=ATVPDKIKX0DER&n=3031001&s=photo&v=glance'", "URL_TOO_LONG"],
 3 |     "URL_TOO_GENERIC": ["http://www.domain.com/page1.html", "URL_TOO_GENERIC"],
 4 |     "URL_KEYWORD_STUFFED": ["http://www.domain.com/baseball-cards-baseball-cards-baseballcards.htm", "URL_KEYWORD_STUFFED"],
 5 |     "URL_TOO_DEEP": ["http://www.domain.com/redfish/bluefish/blackfish/bluefish/oldfish/newfish/little_star.html", "URL_TOO_DEEP"],
 6 |     "URL_CAPITALIZED": ["http://www.domain.com/SomeWhoRun.html", "URL_CAPITALIZED"]
 7 | }
 8 | 
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/tests/unit/data_url_positive.json:
--------------------------------------------------------------------------------
1 | {
2 |     "URL_CORRECTLY_CASED": ["http://www.domain.com/page1.html", "URL_CORRECTLY_CASED"]
3 | }
4 | 
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/tests/unit/data_visible_tags.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "style": ["<html><head><style></style></head></html>", false],
 3 |     "script": ["<html><body><script>alert();</script></body></html>", false],
 4 |     "document": ["[document]", false],
 5 |     "head": ["<html><head></head></html>", false],
 6 |     "title": ["<html><head><title>Title</title></head></html>", false],
 7 |     "meta": ["<html><head><meta /></head></html>", false],
 8 |     "comment": ["<html><div><!-- this is a comment --></div></html>", false],
 9 |     "p": ["<html><p>paragraph</p></html>", true],
10 |     "a": ["<html><a href=''>link</a></html>", true],
11 |     "b": ["<html><b>bold</b></html>", true],
12 |     "strong": ["<html><strong>strong</strong></html>", true],
13 |     "em": ["<html><em>em</em></html>", true],
14 |     "i": ["<html><i>italic</i></html>", true],
15 |     "h1": ["<html><h1>header 1</h1></html>", true],
16 |     "h2": ["<html><h2>header 2</h2></html>", true]
17 | }
18 | 
19 | 
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/tests/unit/data_webpage.json:
--------------------------------------------------------------------------------
1 | {
2 |     "good_website": "200|<html><head><title>This is a title that is good.</title></head></html>",
3 |     "not_found_website": "404|<html><head><title>This is a title that is not found.</title></head></html>",
4 |     "crashed_website": "500|<html><head><title>This is a title for a broken site.</title></head></html>"
5 | }
6 | 
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/tests/unit/test_stop_words.py:
--------------------------------------------------------------------------------
 1 | import testtools
 2 | from webedge import stop_words
 3 | 
 4 | 
 5 | class StopWordsTests(testtools.TestCase):
 6 | 
 7 |     def setUp(self):
 8 |         super(StopWordsTests, self).setUp()
 9 |         pass
10 | 
11 |     def test_stopwords(self):
12 |         words = stop_words.ENGLISH_STOP_WORDS
13 |         self.assertTrue("able" in words)
14 |         self.assertTrue("about" in words)
15 |         self.assertTrue("looks" in words)
16 |         self.assertTrue("zero" in words)
17 |         self.assertEqual(len(words), 635)
18 | 


--------------------------------------------------------------------------------
/tests/unit/test_webpage_analysis.py:
--------------------------------------------------------------------------------
  1 | import bs4
  2 | import ddt
  3 | import testtools
  4 | from webedge import webpage_analysis
  5 | from webedge.warnings import BADGES
  6 | from webedge.warnings import WARNINGS
  7 | 
  8 | 
  9 | @ddt.ddt
 10 | class WebpageTests(testtools.TestCase):
 11 | 
 12 |     def setUp(self):
 13 |         super(WebpageTests, self).setUp()
 14 |         self.titles = {}
 15 |         self.descriptions = {}
 16 | 
 17 |     def soup_file(self, html):
 18 |         soup = bs4.BeautifulSoup(html, "html.parser")
 19 |         return soup
 20 | 
 21 |     @ddt.file_data('data_html_positive.json')
 22 |     def test_analyze_positive(self, data):
 23 |         html = data[0]
 24 |         # badge = data[1]
 25 |         self.wp = webpage_analysis.Webpage(
 26 |             "https://harshcasper.github.io",
 27 |             html,
 28 |             self.titles,
 29 |             self.descriptions)
 30 |         self.wp.report()
 31 | 
 32 |     @ddt.file_data('data_html_negative.json')
 33 |     def test_analyze_negative(self, data):
 34 |         html = data[0]
 35 |         expected_error = data[1]
 36 |         self.wp = webpage_analysis.Webpage(
 37 |             "https://harshcasper.github.io",
 38 |             html,
 39 |             self.titles,
 40 |             self.descriptions)
 41 |         self.wp.report()
 42 |         self.assertTrue(any(issue["warning"] == WARNINGS[expected_error]
 43 |                             for issue in self.wp.issues),
 44 |                         "{0} not raised.".format(WARNINGS[expected_error]))
 45 | 
 46 |     @ddt.file_data('data_url_negative.json')
 47 |     def test_analyze_negative_url(self, data):
 48 |         url = data[0]
 49 |         expected_error = data[1]
 50 |         html = ""
 51 |         self.wp = webpage_analysis.Webpage(
 52 |             url, html, self.titles, self.descriptions)
 53 |         self.wp.report()
 54 |         self.assertTrue(any(issue["warning"] == WARNINGS[expected_error]
 55 |                             for issue in self.wp.issues),
 56 |                         "{0} not raised.".format(WARNINGS[expected_error]))
 57 | 
 58 |     @ddt.file_data('data_url_positive.json')
 59 |     def test_analyze_positive_url(self, data):
 60 |         url = data[0]
 61 |         badge = data[1]
 62 |         html = ""
 63 |         self.wp = webpage_analysis.Webpage(
 64 |             url, html, self.titles, self.descriptions)
 65 |         self.wp.report()
 66 |         if badge != "":
 67 |             self.assertTrue(any(earned["achievement"] == BADGES[badge]
 68 |                                 for earned in self.wp.achieved),
 69 |                             "{0} not earned".format(BADGES[badge]))
 70 | 
 71 |     @ddt.file_data('data_visible_tags.json')
 72 |     def test_visible_tags(self, data):
 73 |         html = ""
 74 |         self.wp = webpage_analysis.Webpage(
 75 |             "https://harshcasper.github.io",
 76 |             html,
 77 |             self.titles,
 78 |             self.descriptions)
 79 |         soup = self.soup_file(data[0])
 80 |         elements = soup.findAll(text=True)
 81 |         for tag in elements:
 82 |             result = self.wp.visible_tags(tag)
 83 |             self.assertEqual(result, data[1])
 84 | 
 85 |     @ddt.file_data('data_duplicates_negative.json')
 86 |     def test_analyze_duplicates_negative(self, page):
 87 |         html = page[0]
 88 |         expected_error = page[1]
 89 |         report = {"pages": []}
 90 |         for i in range(0, 2):
 91 |             self.wp = webpage_analysis.Webpage(
 92 |                 "https://harshcasper.github.io/page{0}.html".format(i),
 93 |                 html,
 94 |                 self.titles,
 95 |                 self.descriptions)
 96 | 
 97 |             page_report = self.wp.report()
 98 |             report['pages'].append(page_report)
 99 |         self.assertTrue(any(issue["warning"] == WARNINGS[expected_error]
100 |                             for p in report['pages'] for issue in p['issues']),
101 |                         "{0} not raised. {1} {2}".format(
102 |                             WARNINGS[expected_error],
103 |                             self.titles,
104 |                             self.descriptions))
105 | 


--------------------------------------------------------------------------------
/tests/unit/test_website_analysis.py:
--------------------------------------------------------------------------------
  1 | import uuid
  2 | import mock
  3 | import requests
  4 | from bs4 import BeautifulSoup as Soup
  5 | from webedge import website_analysis
  6 | from webedge.warnings import BADGES
  7 | from webedge.warnings import WARNINGS
  8 | import ddt
  9 | import testtools
 10 | 
 11 | 
 12 | @ddt.ddt
 13 | class WebsiteTests(testtools.TestCase):
 14 | 
 15 |     def setUp(self):
 16 |         super(WebsiteTests, self).setUp()
 17 |         self.site_url = "http://www.mock{0}.com".format(uuid.uuid4())
 18 | 
 19 |     def test_init_url(self):
 20 |         web_page = website_analysis.Spider(self.site_url, None)
 21 |         self.assertEqual(len(web_page.pages_to_crawl), 1)
 22 |         self.assertEqual(web_page.pages_to_crawl[0], self.site_url)
 23 | 
 24 |     @ddt.file_data("data_sitemap_positive.json")
 25 |     @mock.patch('webedge.website_analysis.requests.get')
 26 |     def test_init_sitemap_positive(self, sitemap_content, mock_requests):
 27 |         sitemap_url = "/sitemap.xml"
 28 |         mock_requests.return_value.status_code = requests.codes.ok
 29 |         mock_requests.return_value.content = sitemap_content
 30 |         web_page = website_analysis.Spider(self.site_url, self.site_url + sitemap_url)
 31 |         self.assertTrue(self.site_url in web_page.pages_to_crawl)
 32 | 
 33 |     @ddt.file_data("data_sitemap_negative.json")
 34 |     @mock.patch('webedge.website_analysis.requests.get')
 35 |     def test_init_sitemap_negative(self, sitemap_content, mock_requests):
 36 |         sitemap_url = "/sitemap.xml"
 37 |         mock_requests.return_value.status_code = requests.codes.not_found
 38 |         mock_requests.return_value.content = sitemap_content
 39 |         web_page = website_analysis.Spider(self.site_url, self.site_url + sitemap_url)
 40 |         self.assertTrue(self.site_url in web_page.pages_to_crawl)
 41 | 
 42 |     @ddt.file_data("data_sitemap_positive.json")
 43 |     def test_parse_sitemap(self, sitemap_content):
 44 |         web_page = website_analysis.Spider(self.site_url, None)
 45 |         locations = web_page._parse_sitemap(sitemap_content)
 46 |         soup = Soup(sitemap_content, "html.parser")
 47 |         urls = soup.findAll('url')
 48 |         self.assertEqual(len(locations), len(urls))
 49 | 
 50 |     @ddt.file_data("data_webpage.json")
 51 |     @mock.patch('webedge.website_analysis.requests.get')
 52 |     def test_crawl(self, data, mock_requests):
 53 |         web_page = website_analysis.Spider(self.site_url, None)
 54 |         web_page._analyze_crawlers = mock.MagicMock(name="_analyze_crawlers")
 55 |         resp_code, content = data.split("|")
 56 |         mock_requests.return_value.status_code = int(resp_code)
 57 |         mock_requests.return_value.content = content
 58 |         web_page.crawl()
 59 |         if int(resp_code) == requests.codes.ok:
 60 |             self.assertEqual(len(web_page.issues), 0)
 61 |         elif int(resp_code) == requests.codes.not_found:
 62 |             self.assertTrue(any(issue["warning"] == WARNINGS["BROKEN_LINK"]
 63 |                                 for issue in web_page.issues),
 64 |                             "{0} not raised.".format(WARNINGS["BROKEN_LINK"]))
 65 |         else:
 66 |             self.assertTrue(any(issue["warning"] == WARNINGS["SERVER_ERROR"]
 67 |                                 for issue in web_page.issues),
 68 |                             "{0} not raised.".format(WARNINGS["SERVER_ERROR"]))
 69 | 
 70 |     @ddt.data("200", "404", "500")
 71 |     @mock.patch('webedge.website_analysis.requests.get')
 72 |     def test_analyze_crawlers(self, resp_code, mock_requests):
 73 |         mock_requests.return_value.status_code = int(resp_code)
 74 |         web_page = website_analysis.Spider(self.site_url, None)
 75 |         web_page._analyze_crawlers()
 76 |         if int(resp_code) == requests.codes.ok:
 77 |             self.assertTrue(any(earned["achievement"] == BADGES["ROBOTS.TXT"]
 78 |                                 for earned in web_page.achieved),
 79 |                             "{0} not earned".format(BADGES["ROBOTS.TXT"]))
 80 |         else:
 81 |             self.assertTrue(any(issue["warning"] == WARNINGS["ROBOTS.TXT"]
 82 |                                 for issue in web_page.issues),
 83 |                             "{0} not raised.".format(WARNINGS["ROBOTS.TXT"]))
 84 | 
 85 |     @ddt.data("200", "404", "500")
 86 |     @mock.patch('webedge.website_analysis.requests.get')
 87 |     def test_analyze_blog(self, resp_code, mock_requests):
 88 |         mock_requests.return_value.status_code = int(resp_code)
 89 |         web_page = website_analysis.Spider(self.site_url, None)
 90 |         web_page._analyze_blog()
 91 |         if int(resp_code) == requests.codes.ok:
 92 |             self.assertTrue(
 93 |                 any(earned["achievement"] == BADGES["BLOG_DETECTED"]
 94 |                     for earned in web_page.achieved),
 95 |                 "{0} not earned".format(BADGES["BLOG_DETECTED"]))
 96 |         else:
 97 |             self.assertTrue(
 98 |                 any(issue["warning"] == WARNINGS["BLOG_MISSING"]
 99 |                     for issue in web_page.issues),
100 |                 "{0} not raised.".format(WARNINGS["BLOG_MISSING"]))
101 | 


--------------------------------------------------------------------------------
/webedge/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/HarshCasper/WebEdge/3175e89a1753c9ef9a5e69766d355319206f84a3/webedge/__init__.py


--------------------------------------------------------------------------------
/webedge/cli_output.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function, unicode_literals
  2 | from PyInquirer import style_from_dict, Token, prompt
  3 | from pyfiglet import Figlet
  4 | from colorama import Fore, Style
  5 | import json
  6 | import clanimate
  7 | import yaml
  8 | 
  9 | jsonData = ["emptyData"]
 10 | loadingAnim = clanimate.Animator(
 11 |     "scroll_text",
 12 |     10,
 13 |     name=" => WebEdge Is Scrapping Your Website ",
 14 |     animation_frames="===============",
 15 | )
 16 | 
 17 | style = style_from_dict(
 18 |     {
 19 |         Token.Separator: "#cc5454",
 20 |         Token.QuestionMark: "#00ff00 bold",
 21 |         Token.Selected: "#cc5454",  # default
 22 |         Token.Pointer: "#673ab7 bold",
 23 |         Token.Instruction: "",  # default
 24 |         Token.Answer: "#0000ff bold",
 25 |         Token.Question: "",
 26 |     }
 27 | )
 28 | 
 29 | 
 30 | def shouldc2(answers):
 31 |     check1 = answers["c1"]
 32 |     if check1 != "site":
 33 |         return True
 34 |     return False
 35 | 
 36 | 
 37 | def getm2(answers):
 38 |     m2 = "Which " + str(answers["c1"] + " you want to review?")
 39 |     return m2
 40 | 
 41 | 
 42 | def getc2(answers):
 43 |     options = []
 44 |     if answers["c1"] != "pages":
 45 |         options.append("no pages")
 46 |         return options
 47 |     for i in jsonData[answers["c1"]]:
 48 |         options.append(i["url"])
 49 |     if not options:
 50 |         options.append("no pages")
 51 |     return options
 52 | 
 53 | 
 54 | def filterc2(val):
 55 |     m = 0
 56 |     for i in jsonData["pages"]:
 57 |         if i["url"] == val:
 58 |             return m
 59 |         m = m + 1
 60 | 
 61 | 
 62 | def filterc3(val):
 63 |     if val[0] == "I":
 64 |         return "issues"
 65 |     return "achieved"
 66 | 
 67 | 
 68 | def outputJson(jsonValue):
 69 |     global jsonData  # skipcq PYL-W0603
 70 |     jsonData = json.loads(jsonValue)
 71 |     options = []
 72 |     for i in jsonData.keys():
 73 |         options.append(i)
 74 |     print()
 75 |     questions = [
 76 |         {
 77 |             "type": "list",
 78 |             "name": "c1",
 79 |             "message": "What do you want to check first ?",
 80 |             "choices": options,
 81 |         },
 82 |         {
 83 |             "type": "list",
 84 |             "name": "c2",
 85 |             "message": "Which page do you want to review ?",
 86 |             "choices": getc2,
 87 |             "filter": filterc2,
 88 |             "when": shouldc2,
 89 |         },
 90 |         {
 91 |             "type": "list",
 92 |             "name": "c3",
 93 |             "message": "Issues or Achievements?",
 94 |             "choices": ["Issues", "Achievements"],
 95 |             "filter": filterc3,
 96 |         },
 97 |         {
 98 |             "type": "list",
 99 |             "name": "c4",
100 |             "message": "See them all at once or one by one",
101 |             "choices": ["All at Once", "One by One"],
102 |         },
103 |     ]
104 |     answers = prompt(questions, style=style)
105 |     k1 = "warning"
106 |     if answers["c3"] == "achieved":
107 |         k1 = "achievement"
108 | 
109 |     if answers["c1"] == "pages":
110 |         li = jsonData[answers["c1"]][answers["c2"]][answers["c3"]]
111 |     else:
112 |         li = jsonData[answers["c1"]][answers["c3"]]
113 | 
114 |     no = 0
115 |     didBreak = False
116 |     allAtOnce = False
117 |     if answers["c4"] == "All at Once":
118 |         allAtOnce = True
119 |     for i in li:
120 |         no = no + 1
121 |         ivalue = str(i["value"])
122 |         message = (
123 |             "Point - " + str(no) + "\n Label : " + i[k1] + "\n Current : " + ivalue
124 |         )
125 |         if allAtOnce is False:
126 |             qn = [
127 |                 {
128 |                     "type": "confirm",
129 |                     "name": "forward",
130 |                     "message": message + "\n Go to next?",
131 |                     "default": True,
132 |                 }
133 |             ]
134 |             a = prompt(qn, style=style)
135 |             if a["forward"] is False:
136 |                 didBreak = True
137 |                 break
138 |         else:
139 |             if no % 2 == 1:
140 |                 print(Fore.BLUE + Style.BRIGHT + message + "\n" + Style.RESET_ALL)
141 |             else:
142 |                 print(Fore.CYAN + Style.BRIGHT + message + "\n" + Style.RESET_ALL)
143 | 
144 |     if didBreak is False and allAtOnce is False:
145 |         print("List Ended")
146 | 
147 |     retry = [
148 |         {
149 |             "type": "confirm",
150 |             "name": "again",
151 |             "message": "Do you want to check other things?",
152 |             "default": True,
153 |         }
154 |     ]
155 |     res = prompt(retry, style=style)
156 |     if res["again"] is True:
157 |         outputJson(jsonValue)
158 |     else:
159 |         saveFile = [
160 |             {
161 |                 "type": "confirm",
162 |                 "name": "fileSave",
163 |                 "message": "Do you want to save your analysis in a file?",
164 |                 "default": True,
165 |             }
166 |         ]
167 |         isFileSaved = prompt(saveFile, style=style)
168 |         if isFileSaved["fileSave"] is True:
169 |             filename = str(jsonData["pages"][0]["url"] + "_webedge_analysis.yaml")
170 |             bad_chars = ["/", ":", "\\"]
171 |             for i in bad_chars:
172 |                 filename = filename.replace(i, "")
173 |             with open(filename, "w+") as f:
174 |                 f.write(yaml.dump(yaml.safe_load(json.dumps(json.loads(jsonValue)))))
175 |             print(filename + " saved")
176 | 
177 |         print(
178 |             Fore.GREEN
179 |             + Style.BRIGHT
180 |             + "=====================\nWebEdge Analysis Done\n====================="
181 |             + Style.RESET_ALL
182 |         )
183 | 
184 | 
185 | def outputName(name):
186 |     f = Figlet(font="slant")
187 |     print(Style.RESET_ALL)
188 |     print(Fore.GREEN + Style.BRIGHT + f.renderText(name))
189 |     print(Style.RESET_ALL)
190 | 
191 | 
192 | def startLoading():
193 |     print(Fore.GREEN + Style.BRIGHT, end="")
194 |     loadingAnim.start_animation()
195 | 
196 | 
197 | def endLoading():
198 |     loadingAnim.end_animation()
199 |     print(Style.RESET_ALL, end="")
200 | 
201 | 
202 | def outputError():
203 |     catsay("WebEdge Couldn't Parse Your Website")
204 | 
205 | 
206 | def printError(errMessage):
207 |     print(
208 |         Style.RESET_ALL
209 |         + Fore.RED
210 |         + Style.BRIGHT
211 |         + "\nERROR => "
212 |         + errMessage
213 |         + Style.RESET_ALL
214 |     )
215 | 
216 | 
217 | def exitError():
218 |     catsay("Unexpected Exit By User")
219 | 
220 | 
221 | def catsay(message):
222 |     space = len(message) + 4
223 |     upBlock = "  " + "_" * space + "\n " + "/" + " " * space + "\\ \n |< "
224 |     downBlock = " >|\n \\" + "_" * space + "/\n "
225 |     catStr = (
226 |         "	      \\ \n "
227 |         + "	       \\    /\\_/\\           ___\n "
228 |         + "		\\  = o_o =_______    \\ \\ \n "
229 |         + "		    __^      __(  \\.__) )\n "
230 |         + "		(@)<_____>__(_____)____/\n"
231 |     )
232 |     print(
233 |         Style.RESET_ALL
234 |         + Fore.YELLOW
235 |         + Style.BRIGHT
236 |         + upBlock
237 |         + message
238 |         + downBlock
239 |         + catStr
240 |         + Style.RESET_ALL
241 |     )
242 | 


--------------------------------------------------------------------------------
/webedge/social_websites.py:
--------------------------------------------------------------------------------
 1 | SOCIAL_WEBSITES = [
 2 |     "www.facebook.com",
 3 |     "twitter.com",
 4 |     "plus.google.com",
 5 |     "www.instagram.com",
 6 |     "www.pinterest.com",
 7 |     "apple.com",
 8 |     "youtube.com",
 9 |     "www.google.com",
10 |     "play.google.com",
11 |     "microsoft.com",
12 |     "www.blogger.com",
13 |     "en.wikipedia.org",
14 |     "wordpress.org",
15 |     "maps.google.com",
16 |     "docs.google.com",
17 |     "linkedin.com",
18 |     "mozilla.org",
19 |     "youtu.be",
20 |     "amazon.com",
21 |     "github.com",
22 |     "medium.com",
23 |     "www.yahoo.com",
24 |     "t.me",
25 |     "paypal.com",
26 |     "slideshare.net",
27 |     "whatsapp.com",
28 |     "telegram.me",
29 |     "bit.ly",
30 |     "quora.com",
31 |     "discord.gg",
32 |     "calendar.google.com",
33 |     "outlook.com",
34 |     "canva.com",
35 |     "ieee.org",
36 | ]
37 | 


--------------------------------------------------------------------------------
/webedge/stop_words.py:
--------------------------------------------------------------------------------
  1 | ENGLISH_STOP_WORDS = [
  2 |     "able",
  3 |     "about",
  4 |     "above",
  5 |     "abroad",
  6 |     "according",
  7 |     "accordingly",
  8 |     "across",
  9 |     "actually",
 10 |     "adj",
 11 |     "after",
 12 |     "afterwards",
 13 |     "again",
 14 |     "against",
 15 |     "ago",
 16 |     "ahead",
 17 |     "ain't",
 18 |     "all",
 19 |     "allow",
 20 |     "allows",
 21 |     "almost",
 22 |     "alone",
 23 |     "along",
 24 |     "alongside",
 25 |     "already",
 26 |     "also",
 27 |     "although",
 28 |     "always",
 29 |     "am",
 30 |     "amid",
 31 |     "amidst",
 32 |     "among",
 33 |     "amongst",
 34 |     "an",
 35 |     "and",
 36 |     "another",
 37 |     "any",
 38 |     "anybody",
 39 |     "anyhow",
 40 |     "anyone",
 41 |     "anything",
 42 |     "anyway",
 43 |     "anyways",
 44 |     "anywhere",
 45 |     "apart",
 46 |     "appear",
 47 |     "appreciate",
 48 |     "appropriate",
 49 |     "are",
 50 |     "aren't",
 51 |     "around",
 52 |     "as",
 53 |     "a's",
 54 |     "aside",
 55 |     "ask",
 56 |     "asking",
 57 |     "associated",
 58 |     "at",
 59 |     "available",
 60 |     "away",
 61 |     "awfully",
 62 |     "back",
 63 |     "backward",
 64 |     "backwards",
 65 |     "be",
 66 |     "became",
 67 |     "because",
 68 |     "become",
 69 |     "becomes",
 70 |     "becoming",
 71 |     "been",
 72 |     "before",
 73 |     "beforehand",
 74 |     "begin",
 75 |     "behind",
 76 |     "being",
 77 |     "believe",
 78 |     "below",
 79 |     "beside",
 80 |     "besides",
 81 |     "best",
 82 |     "better",
 83 |     "between",
 84 |     "beyond",
 85 |     "both",
 86 |     "brief",
 87 |     "but",
 88 |     "by",
 89 |     "came",
 90 |     "can",
 91 |     "cannot",
 92 |     "cant",
 93 |     "can't",
 94 |     "caption",
 95 |     "cause",
 96 |     "causes",
 97 |     "certain",
 98 |     "certainly",
 99 |     "changes",
100 |     "clearly",
101 |     "c'mon",
102 |     "co",
103 |     "co.",
104 |     "com",
105 |     "come",
106 |     "comes",
107 |     "concerning",
108 |     "consequently",
109 |     "consider",
110 |     "considering",
111 |     "contain",
112 |     "containing",
113 |     "contains",
114 |     "corresponding",
115 |     "could",
116 |     "couldn't",
117 |     "course",
118 |     "c's",
119 |     "currently",
120 |     "dare",
121 |     "daren't",
122 |     "definitely",
123 |     "described",
124 |     "despite",
125 |     "did",
126 |     "didn't",
127 |     "different",
128 |     "directly",
129 |     "do",
130 |     "does",
131 |     "doesn't",
132 |     "doing",
133 |     "done",
134 |     "don't",
135 |     "down",
136 |     "downwards",
137 |     "during",
138 |     "each",
139 |     "edu",
140 |     "eg",
141 |     "eight",
142 |     "eighty",
143 |     "either",
144 |     "else",
145 |     "elsewhere",
146 |     "end",
147 |     "ending",
148 |     "enough",
149 |     "entirely",
150 |     "especially",
151 |     "et",
152 |     "etc",
153 |     "even",
154 |     "ever",
155 |     "evermore",
156 |     "every",
157 |     "everybody",
158 |     "everyone",
159 |     "everything",
160 |     "everywhere",
161 |     "ex",
162 |     "exactly",
163 |     "example",
164 |     "except",
165 |     "fairly",
166 |     "far",
167 |     "farther",
168 |     "few",
169 |     "fewer",
170 |     "fifth",
171 |     "first",
172 |     "five",
173 |     "followed",
174 |     "following",
175 |     "follows",
176 |     "for",
177 |     "forever",
178 |     "former",
179 |     "formerly",
180 |     "forth",
181 |     "forward",
182 |     "found",
183 |     "four",
184 |     "from",
185 |     "further",
186 |     "furthermore",
187 |     "get",
188 |     "gets",
189 |     "getting",
190 |     "given",
191 |     "gives",
192 |     "go",
193 |     "goes",
194 |     "going",
195 |     "gone",
196 |     "got",
197 |     "gotten",
198 |     "greetings",
199 |     "had",
200 |     "hadn't",
201 |     "half",
202 |     "happens",
203 |     "hardly",
204 |     "has",
205 |     "hasn't",
206 |     "have",
207 |     "haven't",
208 |     "having",
209 |     "he",
210 |     "he'd",
211 |     "he'll",
212 |     "hello",
213 |     "help",
214 |     "hence",
215 |     "her",
216 |     "here",
217 |     "hereafter",
218 |     "hereby",
219 |     "herein",
220 |     "here's",
221 |     "hereupon",
222 |     "hers",
223 |     "herself",
224 |     "he's",
225 |     "hi",
226 |     "him",
227 |     "himself",
228 |     "his",
229 |     "hither",
230 |     "hopefully",
231 |     "how",
232 |     "howbeit",
233 |     "however",
234 |     "hundred",
235 |     "i'd",
236 |     "ie",
237 |     "if",
238 |     "ignored",
239 |     "i'll",
240 |     "i'm",
241 |     "immediate",
242 |     "in",
243 |     "inasmuch",
244 |     "inc",
245 |     "inc.",
246 |     "indeed",
247 |     "indicate",
248 |     "indicated",
249 |     "indicates",
250 |     "inner",
251 |     "inside",
252 |     "insofar",
253 |     "instead",
254 |     "into",
255 |     "inward",
256 |     "is",
257 |     "isn't",
258 |     "it",
259 |     "it'd",
260 |     "it'll",
261 |     "its",
262 |     "it's",
263 |     "itself",
264 |     "i've",
265 |     "just",
266 |     "k",
267 |     "keep",
268 |     "keeps",
269 |     "kept",
270 |     "know",
271 |     "known",
272 |     "knows",
273 |     "last",
274 |     "lately",
275 |     "later",
276 |     "latter",
277 |     "latterly",
278 |     "least",
279 |     "less",
280 |     "lest",
281 |     "let",
282 |     "let's",
283 |     "like",
284 |     "liked",
285 |     "likely",
286 |     "likewise",
287 |     "little",
288 |     "look",
289 |     "looking",
290 |     "looks",
291 |     "low",
292 |     "lower",
293 |     "ltd",
294 |     "made",
295 |     "mainly",
296 |     "make",
297 |     "makes",
298 |     "many",
299 |     "may",
300 |     "maybe",
301 |     "mayn't",
302 |     "me",
303 |     "mean",
304 |     "meantime",
305 |     "meanwhile",
306 |     "merely",
307 |     "might",
308 |     "mightn't",
309 |     "mine",
310 |     "minus",
311 |     "miss",
312 |     "more",
313 |     "moreover",
314 |     "most",
315 |     "mostly",
316 |     "mr",
317 |     "mrs",
318 |     "much",
319 |     "must",
320 |     "mustn't",
321 |     "my",
322 |     "myself",
323 |     "name",
324 |     "namely",
325 |     "nd",
326 |     "near",
327 |     "nearly",
328 |     "necessary",
329 |     "need",
330 |     "needn't",
331 |     "needs",
332 |     "neither",
333 |     "never",
334 |     "neverf",
335 |     "neverless",
336 |     "nevertheless",
337 |     "new",
338 |     "next",
339 |     "nine",
340 |     "ninety",
341 |     "no",
342 |     "nobody",
343 |     "non",
344 |     "none",
345 |     "nonetheless",
346 |     "noone",
347 |     "no-one",
348 |     "nor",
349 |     "normally",
350 |     "not",
351 |     "nothing",
352 |     "notwithstanding",
353 |     "novel",
354 |     "now",
355 |     "nowhere",
356 |     "obviously",
357 |     "of",
358 |     "off",
359 |     "often",
360 |     "oh",
361 |     "ok",
362 |     "okay",
363 |     "old",
364 |     "on",
365 |     "once",
366 |     "one",
367 |     "ones",
368 |     "one's",
369 |     "only",
370 |     "onto",
371 |     "opposite",
372 |     "or",
373 |     "other",
374 |     "others",
375 |     "otherwise",
376 |     "ought",
377 |     "oughtn't",
378 |     "our",
379 |     "ours",
380 |     "ourselves",
381 |     "out",
382 |     "outside",
383 |     "over",
384 |     "overall",
385 |     "own",
386 |     "particular",
387 |     "particularly",
388 |     "past",
389 |     "per",
390 |     "perhaps",
391 |     "placed",
392 |     "please",
393 |     "plus",
394 |     "possible",
395 |     "presumably",
396 |     "probably",
397 |     "provided",
398 |     "provides",
399 |     "que",
400 |     "quite",
401 |     "qv",
402 |     "rather",
403 |     "rd",
404 |     "re",
405 |     "really",
406 |     "reasonably",
407 |     "recent",
408 |     "recently",
409 |     "regarding",
410 |     "regardless",
411 |     "regards",
412 |     "relatively",
413 |     "respectively",
414 |     "right",
415 |     "round",
416 |     "said",
417 |     "same",
418 |     "saw",
419 |     "say",
420 |     "saying",
421 |     "says",
422 |     "second",
423 |     "secondly",
424 |     "see",
425 |     "seeing",
426 |     "seem",
427 |     "seemed",
428 |     "seeming",
429 |     "seems",
430 |     "seen",
431 |     "self",
432 |     "selves",
433 |     "sensible",
434 |     "sent",
435 |     "serious",
436 |     "seriously",
437 |     "seven",
438 |     "several",
439 |     "shall",
440 |     "shan't",
441 |     "she",
442 |     "she'd",
443 |     "she'll",
444 |     "she's",
445 |     "should",
446 |     "shouldn't",
447 |     "since",
448 |     "six",
449 |     "so",
450 |     "some",
451 |     "somebody",
452 |     "someday",
453 |     "somehow",
454 |     "someone",
455 |     "something",
456 |     "sometime",
457 |     "sometimes",
458 |     "somewhat",
459 |     "somewhere",
460 |     "soon",
461 |     "sorry",
462 |     "specified",
463 |     "specify",
464 |     "specifying",
465 |     "still",
466 |     "sub",
467 |     "such",
468 |     "sup",
469 |     "sure",
470 |     "take",
471 |     "taken",
472 |     "taking",
473 |     "tell",
474 |     "tends",
475 |     "th",
476 |     "than",
477 |     "thank",
478 |     "thanks",
479 |     "thanx",
480 |     "that",
481 |     "that'll",
482 |     "thats",
483 |     "that's",
484 |     "that've",
485 |     "the",
486 |     "their",
487 |     "theirs",
488 |     "them",
489 |     "themselves",
490 |     "then",
491 |     "thence",
492 |     "there",
493 |     "thereafter",
494 |     "thereby",
495 |     "there'd",
496 |     "therefore",
497 |     "therein",
498 |     "there'll",
499 |     "there're",
500 |     "theres",
501 |     "there's",
502 |     "thereupon",
503 |     "there've",
504 |     "these",
505 |     "they",
506 |     "they'd",
507 |     "they'll",
508 |     "they're",
509 |     "they've",
510 |     "thing",
511 |     "things",
512 |     "think",
513 |     "third",
514 |     "thirty",
515 |     "this",
516 |     "thorough",
517 |     "thoroughly",
518 |     "those",
519 |     "though",
520 |     "three",
521 |     "through",
522 |     "throughout",
523 |     "thru",
524 |     "thus",
525 |     "till",
526 |     "to",
527 |     "together",
528 |     "too",
529 |     "took",
530 |     "toward",
531 |     "towards",
532 |     "tried",
533 |     "tries",
534 |     "truly",
535 |     "try",
536 |     "trying",
537 |     "t's",
538 |     "twice",
539 |     "two",
540 |     "un",
541 |     "under",
542 |     "underneath",
543 |     "undoing",
544 |     "unfortunately",
545 |     "unless",
546 |     "unlike",
547 |     "unlikely",
548 |     "until",
549 |     "unto",
550 |     "up",
551 |     "upon",
552 |     "upwards",
553 |     "us",
554 |     "use",
555 |     "used",
556 |     "useful",
557 |     "uses",
558 |     "using",
559 |     "usually",
560 |     "v",
561 |     "value",
562 |     "various",
563 |     "versus",
564 |     "very",
565 |     "via",
566 |     "viz",
567 |     "vs",
568 |     "want",
569 |     "wants",
570 |     "was",
571 |     "wasn't",
572 |     "way",
573 |     "we",
574 |     "we'd",
575 |     "welcome",
576 |     "well",
577 |     "we'll",
578 |     "went",
579 |     "were",
580 |     "we're",
581 |     "weren't",
582 |     "we've",
583 |     "what",
584 |     "whatever",
585 |     "what'll",
586 |     "what's",
587 |     "what've",
588 |     "when",
589 |     "whence",
590 |     "whenever",
591 |     "where",
592 |     "whereafter",
593 |     "whereas",
594 |     "whereby",
595 |     "wherein",
596 |     "where's",
597 |     "whereupon",
598 |     "wherever",
599 |     "whether",
600 |     "which",
601 |     "whichever",
602 |     "while",
603 |     "whilst",
604 |     "whither",
605 |     "who",
606 |     "who'd",
607 |     "whoever",
608 |     "whole",
609 |     "who'll",
610 |     "whom",
611 |     "whomever",
612 |     "who's",
613 |     "whose",
614 |     "why",
615 |     "will",
616 |     "willing",
617 |     "wish",
618 |     "with",
619 |     "within",
620 |     "without",
621 |     "wonder",
622 |     "won't",
623 |     "would",
624 |     "wouldn't",
625 |     "yes",
626 |     "yet",
627 |     "you",
628 |     "you'd",
629 |     "you'll",
630 |     "your",
631 |     "you're",
632 |     "yours",
633 |     "yourself",
634 |     "yourselves",
635 |     "you've",
636 |     "zero",
637 | ]
638 | 


--------------------------------------------------------------------------------
/webedge/warnings.py:
--------------------------------------------------------------------------------
 1 | WARNINGS = {
 2 |     "NEGATIVE_DESCRIPTION": u"Description is too negative",
 3 |     "NEGATIVE_TITLE": u"Title is too negative",
 4 |     "TITLE_MISSING": u"Title tag is missing or empty.",
 5 |     "TITLE_TOO_SHORT": u"Avoid using extremely short titles "
 6 |     u"that are unhelpful to users (less than 10 characters).",
 7 |     "TITLE_TOO_LONG": u"Avoid using extremely lengthy titles "
 8 |     u"that are unhelpful to users (more than 70 characters).",
 9 |     "TITLE_TOO_GENERIC": u"Avoid using default or vague titles like 'Untitled' or 'New Page 1'.",
10 |     "TITLE_KEYWORD_STUFFED": u"Avoid stuffing unneeded keywords in your title tags.",
11 |     "TITLE_DUPLICATED": u"Avoid using a duplicate title tag across your website.",
12 |     "DESCRIPTION_MISSING": u"Description is missing.",
13 |     "DESCRIPTION_TOO_SHORT": u"Description is too short (less than 140 characters). "
14 |     u"Descriptions are important as Google may use them as page snippets.",
15 |     "DESCRIPTION_TOO_LONG": u"Description is too long (more than 255 characters). "
16 |     u"Descriptions are important as Google may use them as page snippets.",
17 |     "DESCRIPTION_TOO_GENERIC": u"Description is too generic.",
18 |     "DESCRIPTION_KEYWORD_STUFFED": u"Avoid keyword stuffing in the description.",
19 |     "DESCRIPTION_DUPLICATED": u"Avoid using a duplicate description across your website.",
20 |     "URL_TOO_LONG": u"Avoid using URLs with unnecessary parameters and IDs.",
21 |     "URL_TOO_GENERIC": u"Avoid choosing generic page names like 'page1.html'.",
22 |     "URL_KEYWORD_STUFFED": u"Avoid keyword stuffing in the url.",
23 |     "URL_TOO_DEEP": u"Avoid having deep nesting of subdirectories (more than 3 levels deep) "
24 |     u"like '.../dir1/dir2/dir3/dir4/dir5/dir6/page.html'.",
25 |     "URL_NOT_CANONICAL": u"Only one version of a URL (Canonical URL) "
26 |     u"should be used to reach a document",
27 |     "URL_CAPITALIZED": u"Avoid using uppercase characters in the URL. "
28 |     u"Many users expect lower-case URLs and remember them better.",
29 |     "IMAGE_LINK_ALT_MISSING": u"Image link missing Alt tag.",
30 |     "ANCHOR_TEXT_MISSING": u"Anchor missing title tag or text.",
31 |     "ANCHOR_TEXT_TOO_SHORT": u"Anchor text too short (less than 3 characters).",
32 |     "ANCHOR_TEXT_TOO_LONG": u"Avoid using lengthy links with unnecessary parameters "
33 |     u"(more than 80 characters).",
34 |     "ANCHOR_TEXT_TOO_GENERIC": u"Anchor text contains generic text.",
35 |     "ANCHOR_HREF_TOO_LONG": u"Avoid using lengthy links with unnecessary parameters "
36 |     u"(more than 100 characters).",
37 |     "ANCHOR_HREF_EQUALS_TEXT": u"Avoid using the page URL as the anchor text.",
38 |     "ANCHOR_NO_FOLLOW": u"Avoid passing your reputation to low ranking or non relevant websites.",
39 |     "IMAGE_SRC_MISSING": u"Image missing src tag.",
40 |     "IMAGE_SRC_TOO_LONG": u"Avoid using long filenames in links (more than 15 characters).",
41 |     "IMAGE_ALT_MISSING": u"Image missing alt tag.",
42 |     "IMAGE_ALT_TOO_LONG": u"Avoid writing excessively long alt text that could be spammy.",
43 |     "H1_ONE_PER_PAGE": u"Each page should have only one h1 tag",
44 |     "H1_TOO_SHORT": u"Avoid using H1 Tags that are too short (less than 3 characters).",
45 |     "KEYWORDS_META": u"The Keywords Metatag should be avoided as they are a spam indicator "
46 |     u"and no longer used by Search Engines.",
47 |     "WORDCOUNT_TOO_SHORT": u"The average word count for top-ranking content is 1,140 - 1,285 words.",
48 |     "ROBOTS.TXT": u"robots.txt is missing. "
49 |     u"A 'robots.txt' file tells search engines whether they can "
50 |     u"access, and therefore crawl parts of your site",
51 |     "BROKEN_LINK": u"Avoid referencing broken links on your site.",
52 |     "SERVER_ERROR": u"Avoid referencing pages that error out on your site.",
53 |     "BLOG_MISSING": u"Blog was not found on this domain. "
54 |     u"Blogging about your expertise helps build trust and relationships. "
55 |     u"Ensure your blog exists on this domain to build your domain authority.",
56 | }
57 | BADGES = {
58 |     "POSITIVE_TITLE": u"Title has positive sentiments",
59 |     "NEUTRAL_TITLE": u"Title has neutral sentiments",
60 |     "TITLE_LENGTH": u"Title length is between 10 and 70 characters.",
61 |     "TITLE_INFORMATIVE": u"Title is informative.",
62 |     "TITLE_UNIQUE": u"This page has a unique title tag.",
63 |     "NEUTRAL_DESCRIPTION": u"Description has neutral sentiments",
64 |     "POSITIVE_DESCRIPTION": u"Description has positive sentiments",
65 |     "DESCRIPTION_LENGTH": u"Descriptions are important as Google may use them as page snippets.",
66 |     "DESCRIPTION_INFORMATIVE": u"Description is informative and helps give context to "
67 |     u"customers trying to get to your page.",
68 |     "URL_CANONICAL": u"Using canonical URLs helps avoid duplicate content.",
69 |     "URL_CORRECTLY_CASED": u"URL is lowercase. Many users expect lower-case URLs and "
70 |     u"remember them better.",
71 |     "IMAGE_LINK_ALT": u"Image link contains an alt tag.",
72 |     "ANCHOR_NO_FOLLOW": u"Good use of nofollow to nonrelevant websites.",
73 |     "H1_ONE_PER_PAGE": u"Page contains a single H1 Heading",
74 |     "H1_LENGTH": u"Page contains an H1 with a good length",
75 |     "WORDCOUNT": u"You have provided great comprehensive coverage of your topic.",
76 |     "ROBOTS.TXT": u"Robots.txt file detected.  Robots.txt helps search engines navigate "
77 |     u"pages that should be indexed.",
78 |     "BLOG_DETECTED": u"Blog was found on this domain. "
79 |     u"Blogging about your expertise helps build trust and relationships.",
80 | }
81 | 


--------------------------------------------------------------------------------
/webedge/webedge.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | from webedge import website_analysis
 4 | from webedge import cli_output
 5 | import sys
 6 | 
 7 | 
 8 | def create_parser():
 9 |     """
10 |         Creates a Parser to pass Arguement Parser.
11 |         Returns:
12 |             parser: Arguement Parser through which the User can pass the Website
13 |         """
14 |     parser = argparse.ArgumentParser(
15 |         description="Search and Analyze the Search Engine Optimization of a Website"
16 |     )
17 |     parser.add_argument(
18 |         "-d",
19 |         "--domain",
20 |         type=str,
21 |         required=True,
22 |         help="Share the Website Domain to analyze",
23 |     )
24 |     parser.add_argument(
25 |         "-s", "--sitemap", type=str, required=False, help="Sitemap.xml file to use"
26 |     )
27 | 
28 |     parser.add_argument(
29 |         "-p", "--page", type=str, required=False, help="Single Page to analyze"
30 |     )
31 |     return parser
32 | 
33 | 
34 | def analyze(domain, sitemap, page):
35 |     """
36 |         Analyzes the Domain/Sitemap/Page passed by the User.
37 |         Args:
38 |             domain: Uniform Resource Locator of the Web Application
39 |             sitempap: An XML Sitemap for a Web Application
40 |             page: Uniform Resource Locator for a single Webpage
41 |         Returns:
42 |             report: JSON Document consisting of all achievements and warnings
43 |         """
44 |     spider = website_analysis.Spider(domain, sitemap, page)
45 |     raw_report = spider.crawl()
46 |     report = json.dumps(raw_report, indent=4, separators=(",", ": "))
47 |     return report
48 | 
49 | 
50 | def main():
51 |     """
52 |         Main Function to run the Parser and invoke the Scripts.
53 |         Returns:
54 |             report: JSON Report of the whole Website/Webpage/Sitemap
55 |     """
56 |     cli_output.outputName("WebEdge")
57 |     parser = create_parser()
58 |     args = parser.parse_args()
59 |     err = False
60 |     cli_output.startLoading()
61 |     try:
62 |         report = analyze(args.domain, args.sitemap, args.page)
63 |     except (SystemExit, KeyError):
64 |         cli_output.exitError()
65 |         err = True
66 |     except:  # skipcq FLK-E722
67 |         cli_output.printError(str(sys.exc_info()[0]) + "\n" + str(sys.exc_info()[1]))
68 |         cli_output.outputError()
69 |         err = True
70 |     try:
71 |         cli_output.endLoading()
72 |     except:  # skipcq FLK-E722
73 |         sys.exit()
74 |     try:
75 |         if err is False:
76 |             cli_output.outputJson(report)
77 |     except (SystemExit, KeyError):
78 |         cli_output.exitError()
79 |     except:  # skipcq FLK-E722
80 |         cli_output.printError(str(sys.exc_info()[0]) + "\n" + str(sys.exc_info()[1]))
81 |         cli_output.outputError()
82 | 
83 | 
84 | if __name__ == "__main__":
85 |     main()
86 | 


--------------------------------------------------------------------------------
/webedge/webpage_analysis.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import bs4
  3 | import requests
  4 | from six.moves.urllib import parse
  5 | from webedge.stop_words import ENGLISH_STOP_WORDS
  6 | from webedge.warnings import BADGES
  7 | from webedge.warnings import WARNINGS
  8 | from webedge.social_websites import SOCIAL_WEBSITES
  9 | from AnalyseSentiment.AnalyseSentiment import AnalyseSentiment
 10 | 
 11 | # REGEX to match the Words on the Markup Document
 12 | TOKEN_REGEX = re.compile(r"(?u)\b\w\w+\b")
 13 | 
 14 | 
 15 | class Webpage:
 16 |     url = None
 17 |     title = None
 18 |     description = None
 19 | 
 20 |     website_titles = {}
 21 |     website_descriptions = {}
 22 | 
 23 |     def __init__(self, page_url, html, website_titles, website_descriptions):
 24 |         self.url = page_url
 25 |         self.netloc = parse.urlparse(page_url).netloc
 26 |         self.html = html
 27 |         self.title = None
 28 |         self.description = None
 29 |         self.keywords = {}
 30 |         self.issues = []
 31 |         self.achieved = []
 32 | 
 33 |         self.website_titles = website_titles
 34 |         self.website_descriptions = website_descriptions
 35 | 
 36 |     def report(self):
 37 |         """
 38 |         Analyzes and verified the Optimizations on the Page.
 39 |         """
 40 |         soup = bs4.BeautifulSoup(self.html, "html.parser")
 41 | 
 42 |         # per page analysis
 43 |         self._analyze_title(soup)
 44 |         self._analyze_description(soup)
 45 |         self._analyze_url_structure(soup)
 46 |         self._analyze_anchors(soup)
 47 |         self._analyze_images(soup)
 48 |         self._analyze_headings(soup)
 49 |         self._analyze_keywords(soup)
 50 |         self._analyze_wordcount(soup)
 51 | 
 52 |         return self._render()
 53 | 
 54 |     def _analyze_title(self, doc):
 55 |         """
 56 |         Validate the title
 57 |         Args:
 58 |             doc: Beautful Soup Object
 59 |         Returns:
 60 |             earned/warn: Returns if the Document Title fall among the prerequisties set
 61 |         """
 62 |         self.title = t = u""
 63 |         if doc.title:
 64 |             self.title = t = doc.title.text
 65 | 
 66 |         length = len(t)
 67 |         if length == 0:
 68 |             self.warn(WARNINGS["TITLE_MISSING"], self.title)
 69 |             return
 70 |         if length < 10:
 71 |             self.warn(WARNINGS["TITLE_TOO_SHORT"], self.title)
 72 |         elif length > 70:
 73 |             self.warn(WARNINGS["TITLE_TOO_LONG"], self.title)
 74 |         else:
 75 |             self.earned(BADGES["TITLE_LENGTH"], self.title)
 76 | 
 77 |         if any(vague_words in t.lower() for vague_words in ["untitled", "page"]):
 78 |             self.warn(WARNINGS["TITLE_TOO_GENERIC"], self.title)
 79 |         else:
 80 |             self.earned(BADGES["TITLE_INFORMATIVE"], self.title)
 81 | 
 82 |         sentimentobj = AnalyseSentiment()
 83 |         sentimentdata = sentimentobj.Analyse(self.title)
 84 |         if sentimentdata.get("overall_sentiment") == "Negative":
 85 |             self.warn(WARNINGS["NEGATIVE_TITLE"], self.title)
 86 |         elif sentimentdata.get("overall_sentiment") == "Neutral":
 87 |             self.earned(BADGES["NEUTRAL_TITLE"], self.title)
 88 |         else:
 89 |             self.earned(BADGES["POSITIVE_TITLE"], self.title)
 90 | 
 91 |         title_words = self.grouped(self.tokenize(t))
 92 |         for word, count in title_words:
 93 |             if count > 3:
 94 |                 self.warn(WARNINGS["TITLE_KEYWORD_STUFFED"], self.title)
 95 | 
 96 |         if t in self.website_titles:
 97 |             self.warn(
 98 |                 WARNINGS["TITLE_DUPLICATED"],
 99 |                 u'"{0}" previously used on pages: {1}'.format(
100 |                     t, self.website_titles[t]
101 |                 ),
102 |             )
103 |         else:
104 |             self.earned(BADGES["TITLE_UNIQUE"], self.title)
105 |             self.website_titles[t] = self.url
106 | 
107 |     def _analyze_description(self, doc):
108 |         """
109 |         Analyzes and Validates the description present in the Markup Document.
110 |         Args:
111 |             doc: Beautful Soup Object
112 |         Returns:
113 |             earned/warn: Returns if Description fall among the prerequisties set
114 |         """
115 |         desc = doc.findAll("meta", attrs={"name": "description"})
116 | 
117 |         self.description = d = u""
118 |         if len(desc) > 0:
119 |             self.description = d = desc[0].get("content", "")
120 | 
121 |         length = len(d)
122 |         if length == 0:
123 |             self.warn(WARNINGS["DESCRIPTION_MISSING"])
124 |             return
125 |         if length < 140:
126 |             self.warn(WARNINGS["DESCRIPTION_TOO_SHORT"], self.description)
127 |         elif length > 255:
128 |             self.warn(WARNINGS["DESCRIPTION_TOO_LONG"], self.description)
129 |         else:
130 |             self.earned(BADGES["DESCRIPTION_LENGTH"], self.description)
131 | 
132 |         if any(vague_words in d.lower() for vague_words in ["web page", "page about"]):
133 |             self.warn(WARNINGS["DESCRIPTION_TOO_GENERIC"], self.description)
134 |         else:
135 |             self.earned(BADGES["DESCRIPTION_INFORMATIVE"], self.description)
136 | 
137 |         sentimentobj = AnalyseSentiment()
138 |         sentimentdata = sentimentobj.Analyse(self.title)
139 |         if sentimentdata.get("overall_sentiment") == "Negative":
140 |             self.warn(WARNINGS["NEGATIVE_DESCRIPTION"], self.description)
141 |         elif sentimentdata.get("overall_sentiment") == "Neutral":
142 |             self.earned(BADGES["NEUTRAL_DESCRIPTION"], self.description)
143 |         else:
144 |             self.earned(BADGES["POSITIVE_DESCRIPTION"], self.description)
145 | 
146 |         desc_words = self.grouped(self.tokenize(d))
147 |         for word, count in desc_words:
148 |             if count > 3:
149 |                 self.warn(WARNINGS["DESCRIPTION_KEYWORD_STUFFED"], self.description)
150 | 
151 |         if d in self.website_descriptions:
152 |             self.warn(
153 |                 WARNINGS["DESCRIPTION_DUPLICATED"],
154 |                 u'"{0}" previously used on pages: {1}'.format(
155 |                     d, self.website_descriptions[d]
156 |                 ),
157 |             )
158 |         else:
159 |             self.website_descriptions[d] = self.url
160 | 
161 |     def _analyze_url_structure(self, doc):
162 |         """
163 |         Analyze and verified the URL Structure of the Website.
164 |         Args:
165 |             doc: Beautful Soup Object
166 |         Returns:
167 |             earned/warn: Returns if URL Structure falls in the prerequisties set
168 |         """
169 | 
170 |         parsed_url = parse.urlparse(self.url)
171 |         path = parsed_url.path.split("/")
172 | 
173 |         if len(self.url) > 100:
174 |             self.warn(WARNINGS["URL_TOO_LONG"], self.url)
175 | 
176 |         if any(vague_words in self.url.lower() for vague_words in ["page"]):
177 |             self.warn(WARNINGS["URL_TOO_GENERIC"], self.url)
178 | 
179 |         url_words = self.grouped(self.tokenize(path[-1]))
180 |         for word, count in url_words:
181 |             if count >= 2:
182 |                 self.warn(WARNINGS["URL_KEYWORD_STUFFED"], self.url)
183 | 
184 |         if len(path) > 3:
185 |             self.warn(WARNINGS["URL_TOO_DEEP"], self.url)
186 | 
187 |         canonical = doc.find("link", rel="canonical")
188 |         if canonical:
189 |             canonical_url = canonical["href"]
190 | 
191 |             if canonical_url != self.url:
192 |                 self.warn(WARNINGS["URL_NOT_CANONICAL"], canonical_url)
193 |             else:
194 |                 self.earned(BADGES["URL_CANONICAL"], self.url)
195 | 
196 |         if any(x.isupper() for x in self.url):
197 |             self.warn(WARNINGS["URL_CAPITALIZED"], self.url)
198 |         else:
199 |             self.earned(BADGES["URL_CORRECTLY_CASED"], self.url)
200 | 
201 |     def _analyze_anchors(self, doc):
202 |         """
203 |         Analyzes and verified the Anchor Tags on the Markup.
204 |         Args:
205 |             doc: Beautful Soup Object
206 |         Returns:
207 |             earned/warn: Returns if Anchors are defined and the prerequisties are set.
208 |         """
209 |         anchors = doc.find_all("a", href=True)
210 |         verified_pages = []
211 | 
212 |         for tag in anchors:
213 |             tag_href = tag["href"]
214 |             tag_text = tag.text.lower().strip()
215 | 
216 |             image_link = tag.find("img")
217 | 
218 |             if image_link is not None:
219 |                 if len(image_link.get("alt", "")) == 0:
220 |                     self.warn(WARNINGS["IMAGE_LINK_ALT_MISSING"], tag_href)
221 |                 else:
222 |                     self.earned(BADGES["IMAGE_LINK_ALT"], image_link.get("alt", ""))
223 | 
224 |             else:
225 |                 if len(tag.get("title", "")) == 0 and len(tag_text) == 0:
226 |                     self.warn(WARNINGS["ANCHOR_TEXT_MISSING"], tag_href)
227 |                 elif len(tag_text) < 3:
228 |                     self.warn(WARNINGS["ANCHOR_TEXT_TOO_SHORT"], tag_text)
229 |                 elif len(tag_text) > 100:
230 |                     self.warn(WARNINGS["ANCHOR_TEXT_TOO_LONG"], tag_text)
231 | 
232 |                 if any(
233 |                     vague_words in tag_text.lower()
234 |                     for vague_words in ["click here", "page", "article"]
235 |                 ):
236 |                     self.warn(WARNINGS["ANCHOR_TEXT_TOO_GENERIC"], tag_text)
237 | 
238 |             if len(tag_href) > 100:
239 |                 self.warn(WARNINGS["ANCHOR_HREF_TOO_LONG"], tag_href)
240 | 
241 |             if tag_text == tag_href:
242 |                 self.warn(WARNINGS["ANCHOR_HREF_EQUALS_TEXT"], tag_text)
243 | 
244 |             if len(parse.urlparse(tag_href).netloc) > 0:
245 |                 if self.netloc not in tag_href:
246 |                     if not (
247 |                         any(social_site in tag_href for social_site in SOCIAL_WEBSITES)
248 |                     ):
249 |                         if tag.get("rel") is None or "nofollow" not in tag.get("rel"):
250 |                             self.warn(WARNINGS["ANCHOR_NO_FOLLOW"], tag_href)
251 |                         else:
252 |                             self.earned(BADGES["ANCHOR_NO_FOLLOW"], tag_href)
253 | 
254 |             if not tag_href.startswith("mailto:"):
255 |                 referenced_href = tag_href
256 |                 if len(parse.urlparse(tag_href).netloc) == 0:
257 |                     referenced_href = parse.urljoin(self.url, tag_href)
258 | 
259 |                 if referenced_href not in verified_pages:
260 |                     resp = requests.head(referenced_href)
261 |                     if resp.status_code == requests.codes.not_found:
262 |                         self.warn(WARNINGS["BROKEN_LINK"], referenced_href)
263 | 
264 |                 verified_pages.append(referenced_href)
265 | 
266 |     def _analyze_images(self, doc):
267 |         """
268 |         Analyzes and verifies that each image has an alt and title.
269 |         Args:
270 |             doc: Beautful Soup Object
271 |         Returns:
272 |             earned/warn: Returns if Images Alt and Title tag fall in the prerequisties set
273 |         """
274 |         images = doc.find_all("img")
275 | 
276 |         for image in images:
277 |             src = image.get("src", image.get("data-src", ""))
278 | 
279 |             if len(src) == 0:
280 |                 self.warn(WARNINGS["IMAGE_SRC_MISSING"], str(image))
281 |             else:
282 |                 if len(image.get("alt", "")) == 0:
283 |                     self.warn(WARNINGS["IMAGE_ALT_MISSING"], str(image))
284 | 
285 |                 if len(parse.urlparse(src).netloc) == 0 or self.netloc in src:
286 |                     if len(src) > 15:
287 |                         self.warn(WARNINGS["IMAGE_SRC_TOO_LONG"], src)
288 |                 if len(image.get("alt", "")) > 40:
289 |                     self.warn(WARNINGS["IMAGE_ALT_TOO_LONG"], image.get("alt", ""))
290 | 
291 |     def _analyze_headings(self, doc):
292 |         """
293 |         Analyzes Headings on the Website and makes sure of atleast one heading tag.
294 |         Args:
295 |             doc: Beautful Soup Object
296 |         Returns:
297 |             earned/warn: Returns if Headings fall in the prerequisties set
298 |         """
299 |         h1tags = doc.find_all("h1")
300 | 
301 |         self.headers = []
302 |         for h in h1tags:
303 |             self.headers.append(h.text)
304 | 
305 |             if len(h.text) < 3:
306 |                 self.warn(WARNINGS["H1_TOO_SHORT"], h.text)
307 |             else:
308 |                 self.earned(BADGES["H1_LENGTH"], h.text)
309 | 
310 |         if len(h1tags) != 1:
311 |             self.warn(WARNINGS["H1_ONE_PER_PAGE"], self.headers)
312 |         else:
313 |             self.earned(BADGES["H1_ONE_PER_PAGE"], self.headers)
314 | 
315 |     def _analyze_keywords(self, doc):
316 |         """
317 |         Analyzes the Keywords on the Website.
318 |         Args:
319 |             doc: Beautful Soup Object
320 |         Returns:
321 |             earned/warn: Returns if Keyword Count fall in the prerequisties set
322 |         """
323 |         kw_meta = doc.findAll("meta", attrs={"name": "keywords"})
324 | 
325 |         if len(kw_meta) > 0:
326 |             self.warn(WARNINGS["KEYWORDS_META"], kw_meta)
327 | 
328 |         self.keywords = self._get_keywords(doc)
329 | 
330 |         del self.keywords[5:]
331 | 
332 |     def _analyze_wordcount(self, doc):
333 |         """
334 |         Analyzes the Wordcount on the Website.
335 |         Args:
336 |             doc: Beautful Soup Object
337 |         Returns:
338 |             earned/warn: Returns if Wordcount fall in the prerequistie limit
339 |         """
340 |         page_content = self._get_keywords(doc)
341 |         count = 0
342 |         for word, freq in page_content:
343 |             count += freq
344 | 
345 |         if count < 2416:
346 |             self.warn(
347 |                 WARNINGS["WORDCOUNT_TOO_SHORT"], u"You have {0} words.".format(count)
348 |             )
349 |         else:
350 |             self.earned(BADGES["WORDCOUNT"], u"You have {0} words.".format(count))
351 | 
352 |     def _render(self):
353 |         """
354 |         Renders the Result of SEO Analysis
355 |         """
356 |         keywords_result = []
357 | 
358 |         for word, count in self.keywords:
359 |             kw = {
360 |                 "keyword": word,
361 |                 "frequency": count,
362 |                 "in_title": word in self.title.lower(),
363 |                 "in_description": word in self.description.lower(),
364 |                 "in_header": word in self.headers,
365 |             }
366 |             keywords_result.append(kw)
367 | 
368 |         result = {
369 |             "url": self.url,
370 |             "keywords": keywords_result,
371 |             "issues": self.issues,
372 |             "achieved": self.achieved,
373 |             "title": self.title,
374 |             "description": self.description,
375 |         }
376 | 
377 |         return result
378 | 
379 |     def warn(self, message, value=None):
380 |         """
381 |         Value lost through improper SEO Optimization on the Website.
382 |         """
383 |         self.issues.append({"warning": message, "value": value})
384 | 
385 |     def earned(self, message, value=None):
386 |         """
387 |         Value earned through proper SEO Optimization on the Website.
388 |         """
389 |         self.achieved.append({"achievement": message, "value": value})
390 | 
391 |     def visible_tags(self, element):
392 |         """
393 |         Finds element tags in the Markup Document.
394 |         Args:
395 |             element: Elements from the Markup Document
396 |         Returns:
397 |             boolean: True/False depending on the availability of the Elements
398 |         """
399 |         non_visible_elements = [
400 |             "style",
401 |             "script",
402 |             "[document]",
403 |             "head",
404 |             "title",
405 |             "meta",
406 |         ]
407 | 
408 |         if element.parent.name in non_visible_elements:
409 |             return False
410 |         if isinstance(element, bs4.element.Comment):
411 |             return False
412 | 
413 |         return True
414 | 
415 |     def tokenize(self, rawtext):
416 |         """
417 |         Tokenizes the Raw Text passed to it by passing through Regex and removing Stop Words.
418 |         Args:
419 |             rawtext: Markup Text
420 |         Returns:
421 |             word: Tokenized Text after removing Stop Words and passing through Regex
422 |         """
423 |         return [
424 |             word
425 |             for word in TOKEN_REGEX.findall(rawtext.lower())
426 |             if word not in ENGLISH_STOP_WORDS
427 |         ]
428 | 
429 |     def grouped(self, token_list):
430 |         """
431 |         Groups the List with the Token List passed to it.
432 |         Args:
433 |             token_list: List Data Structure
434 |         Returns:
435 |             grouped_list: Dictionary consisting of all the Grouped Lists together
436 |         """
437 |         grouped_list = {}
438 |         for word in token_list:
439 |             if word in grouped_list:
440 |                 grouped_list[word] += 1
441 |             else:
442 |                 grouped_list[word] = 1
443 | 
444 |         grouped_list = sorted(grouped_list.items(), key=lambda x: x[1], reverse=True)
445 |         return grouped_list
446 | 
447 |     def _get_keywords(self, doc):
448 |         """
449 |         Fetches the Keywords present in the given Webpage.
450 |         Args:
451 |             doc: Beautful Soup Object
452 | 
453 |         Returns:
454 |             keywords: Dictionary of Keywords and their Frequencies
455 |         """
456 |         keywords = {}
457 |         text_elements = filter(self.visible_tags, doc.findAll(text=True))
458 |         page_text = ""
459 |         for element in text_elements:
460 |             page_text += element.lower() + " "
461 | 
462 |         tokens = self.tokenize(page_text)
463 |         keywords = self.grouped(tokens)
464 | 
465 |         return keywords
466 | 


--------------------------------------------------------------------------------
/webedge/website_analysis.py:
--------------------------------------------------------------------------------
  1 | from bs4 import BeautifulSoup as Soup
  2 | import requests
  3 | from six.moves.urllib import parse
  4 | from webedge.warnings import BADGES
  5 | from webedge.warnings import WARNINGS
  6 | from webedge import webpage_analysis
  7 | 
  8 | 
  9 | class Spider:
 10 |     report = {"pages": []}
 11 | 
 12 |     def __init__(self, site, sitemap=None, page=None):
 13 |         parsed_url = parse.urlparse(site)
 14 | 
 15 |         self.domain = "{0}://{1}".format(parsed_url.scheme, parsed_url.netloc)
 16 |         self.pages_crawled = []
 17 |         self.pages_to_crawl = []
 18 |         self.titles = {}
 19 |         self.descriptions = {}
 20 |         self.issues = []
 21 |         self.achieved = []
 22 | 
 23 |         if sitemap is not None:
 24 |             locations = []
 25 |             resp = requests.get(self.domain + sitemap)
 26 |             if resp.status_code == requests.codes.ok:
 27 |                 locations = self._parse_sitemap(resp.content)
 28 | 
 29 |             self.pages_to_crawl.append(site)
 30 |             self.pages_to_crawl.extend(locations)
 31 |         elif page is not None:
 32 |             self.pages_to_crawl.append(site + page)
 33 |         else:
 34 |             self.pages_to_crawl.append(site)
 35 | 
 36 |     def _parse_sitemap(self, sitemap):
 37 |         """
 38 |         Parse the Sitemap for Locations.
 39 |         Args:
 40 |             sitemap: XML Sitempa
 41 |         Returns:
 42 |             locations
 43 |         """
 44 |         locations = []
 45 | 
 46 |         soup = Soup(sitemap, "html.parser")
 47 |         urls = soup.findAll("url")
 48 | 
 49 |         if len(urls) > 0:
 50 |             for u in urls:
 51 |                 loc = u.find("loc").string
 52 |                 locations.append(loc)
 53 | 
 54 |         return locations
 55 | 
 56 |     def _analyze_crawlers(self):
 57 |         """
 58 |         Analyzes Crawlers in form of robots.txt file.
 59 |         Returns:
 60 |             Badges/Warnings: Depending on whether a Robots.txt exists.
 61 |         """
 62 |         resp = requests.get(self.domain + "/robots.txt")
 63 |         if resp.status_code == requests.codes.ok:
 64 |             self.earned(BADGES["ROBOTS.TXT"])
 65 |         else:
 66 |             self.warn(WARNINGS["ROBOTS.TXT"])
 67 | 
 68 |     def _analyze_blog(self):
 69 |         """
 70 |         Analyzes Blogs in form of a Blogging Subdomain
 71 |         Returns:
 72 |             Badges/Warnings: Depending on whether a Blog exists or not.
 73 |         """
 74 |         resp = requests.get(self.domain + "/blog")
 75 |         if resp.status_code == requests.codes.ok:
 76 |             self.earned(BADGES["BLOG_DETECTED"], self.domain + u"/blog")
 77 |         else:
 78 |             self.warn(WARNINGS["BLOG_MISSING"])
 79 | 
 80 |     def warn(self, message, value=None):
 81 |         """
 82 |         Value lost through improper SEO Optimization on the Website.
 83 |         """
 84 |         self.issues.append({"warning": message, "value": value})
 85 | 
 86 |     def earned(self, message, value=None):
 87 |         """
 88 |         Value earned through proper SEO Optimization on the Website.
 89 |         """
 90 |         self.achieved.append({"achievement": message, "value": value})
 91 | 
 92 |     def crawl(self):
 93 |         """
 94 |         Crawl the Website and analyze different things.
 95 |         """
 96 |         self._analyze_crawlers()
 97 |         self._analyze_blog()
 98 |         for page_url in self.pages_to_crawl:
 99 |             resp = requests.get(page_url)
100 |             if resp.status_code == requests.codes.ok:
101 |                 html = webpage_analysis.Webpage(
102 |                     page_url, resp.content, self.titles, self.descriptions
103 |                 )
104 |                 page_report = html.report()
105 |                 self.report["pages"].append(page_report)
106 |                 self.pages_crawled.append(page_url.strip().lower())
107 |                 # print("Crawled {0} Pages of {1}: {2}".format(
108 |                 #     len(self.pages_crawled), len(self.pages_to_crawl), page_url))
109 |             elif resp.status_code == requests.codes.not_found:
110 |                 self.warn(WARNINGS["BROKEN_LINK"], page_url)
111 |             else:
112 |                 self.warn(
113 |                     WARNINGS["SERVER_ERROR"],
114 |                     "HTTP{0} received for {1}".format(resp.status_code, page_url),
115 |                 )
116 |         self.report["site"] = {}
117 |         self.report["site"]["issues"] = self.issues
118 |         self.report["site"]["achieved"] = self.achieved
119 |         return self.report
120 | 


--------------------------------------------------------------------------------