├── .gitignore ├── pelican └── plugins │ └── seo │ ├── tests │ ├── __init__.py │ ├── test_canonical_url_creator.py │ ├── test_page_title_analyzer.py │ ├── test_robots_file_creator.py │ ├── test_page_description_analyzer.py │ ├── test_internal_link_analyzer.py │ ├── test_content_title_analyzer.py │ ├── test_seo_report.py │ ├── test_seo_enhancer.py │ ├── test_breadcrumb_schema_creator.py │ ├── conftest.py │ └── test_article_schema_creator.py │ ├── __init__.py │ ├── settings.py │ ├── seo_enhancer │ ├── html_enhancer │ │ ├── canonical_url_creator.py │ │ ├── __init__.py │ │ ├── article_schema_creator.py │ │ └── breadcrumb_schema_creator.py │ ├── robots_file_creator.py │ └── __init__.py │ ├── seo_report │ ├── seo_analyzer │ │ ├── page_title_analyzer.py │ │ ├── page_description_analyzer.py │ │ ├── content_title_analyzer.py │ │ ├── internal_link_analyzer.py │ │ └── __init__.py │ ├── static │ │ └── seo_report.css │ ├── template │ │ └── seo_report.html │ └── __init__.py │ └── seo.py ├── tox.ini ├── docs └── seo-report-example.png ├── .travis.yml ├── .editorconfig ├── CONTRIBUTING.md ├── .pre-commit-config.yaml ├── tasks.py ├── pyproject.toml └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | *__pycache__ -------------------------------------------------------------------------------- /pelican/plugins/seo/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pelican/plugins/seo/__init__.py: -------------------------------------------------------------------------------- 1 | from .seo import * # NOQA 2 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | ignore = E203, E266, E501, W503 4 | -------------------------------------------------------------------------------- /docs/seo-report-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MaevaBrunelles/pelican-seo/HEAD/docs/seo-report-example.png -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.7" 5 | 6 | install: 7 | - pip3 install pelican pytest bs4 8 | 9 | script: 10 | - pytest 11 | 12 | branches: 13 | only: 14 | - master -------------------------------------------------------------------------------- /pelican/plugins/seo/settings.py: -------------------------------------------------------------------------------- 1 | """ 2 | Plugin settings : activate or deactivate SEO features 3 | """ 4 | 5 | SEO_REPORT = True 6 | SEO_ENHANCER = True 7 | 8 | ARTICLES_LIMIT = 10 9 | PAGES_LIMIT = 10 10 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | charset = utf-8 5 | end_of_line = lf 6 | indent_size = 4 7 | indent_style = space 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [*.py] 12 | max_line_length = 88 13 | 14 | [*.yml] 15 | indent_size = 2 16 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_enhancer/html_enhancer/canonical_url_creator.py: -------------------------------------------------------------------------------- 1 | """ Canonical URL creator. """ 2 | 3 | import os 4 | 5 | 6 | class CanonicalURLCreator(): 7 | """ Canonical URL creator. """ 8 | 9 | def __init__(self, fileurl, siteurl): 10 | self._fileurl = fileurl 11 | self._siteurl = siteurl 12 | 13 | def create_url(self): 14 | """ Join site URL and file URL to create canonical link. """ 15 | 16 | canonical_url = os.path.join(self._siteurl, self._fileurl) 17 | return canonical_url 18 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============ 3 | 4 | Contributions are welcome and much appreciated. Every little bit helps. You can contribute by improving the documentation, adding missing features, and fixing bugs. You can also help out by reviewing and commenting on [existing issues][]. 5 | 6 | To start contributing to this plugin, review the [Contributing to Pelican][] documentation, beginning with the **Contributing Code** section. 7 | 8 | [existing issues]: https://github.com/pelican-plugins/seo/issues 9 | [Contributing to Pelican]: https://docs.getpelican.com/en/latest/contribute.html 10 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_report/seo_analyzer/page_title_analyzer.py: -------------------------------------------------------------------------------- 1 | """ Analyze the page title. """ 2 | 3 | 4 | class PageTitleAnalyzer(): 5 | """ Analyze the page title. """ 6 | 7 | def __init__(self, title): 8 | self._title = title 9 | 10 | def has_page_title(self): 11 | """ Return True is there is a page title. """ 12 | 13 | if not self._title: 14 | return False 15 | 16 | return True 17 | 18 | @property 19 | def page_title_length(self): 20 | """ Return page title length. """ 21 | 22 | return len(self._title) 23 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_enhancer/robots_file_creator.py: -------------------------------------------------------------------------------- 1 | """ Return elements to build the robots.txt file. """ 2 | 3 | 4 | class RobotsFileCreator(): 5 | """ Get robots rules from article metadata. Return them. """ 6 | 7 | def __init__(self, metadata): 8 | self.metadata_noindex = metadata.get('noindex') 9 | self.metadata_disallow = metadata.get('disallow') 10 | 11 | @property 12 | def get_noindex(self): 13 | """ Return noindex rules. """ 14 | 15 | return self.metadata_noindex 16 | 17 | @property 18 | def get_disallow(self): 19 | """ Return disallow rules. """ 20 | 21 | return self.metadata_disallow 22 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_report/seo_analyzer/page_description_analyzer.py: -------------------------------------------------------------------------------- 1 | """ Analyze the page description. """ 2 | 3 | 4 | class PageDescriptionAnalyzer(): 5 | """ Analyze the page description. """ 6 | 7 | def __init__(self, description): 8 | self._description = description 9 | 10 | def has_page_description(self): 11 | """ Return True if there is a page description. """ 12 | 13 | if not self._description: 14 | return False 15 | 16 | return True 17 | 18 | @property 19 | def page_description_length(self): 20 | """ Return page description length. """ 21 | 22 | return len(self._description) 23 | -------------------------------------------------------------------------------- /pelican/plugins/seo/tests/test_canonical_url_creator.py: -------------------------------------------------------------------------------- 1 | """ Units tests for Canonical URL Creator. """ 2 | 3 | from seo.seo_enhancer.html_enhancer import CanonicalURLCreator 4 | 5 | 6 | class TestCanonicalURLCreator(): 7 | """ Unit tests for CanonicalURLCreator. """ 8 | 9 | def test_create_url(self, fake_article): 10 | """ Test if create_url() returns the join of site URL and article URL. """ 11 | 12 | canonical = CanonicalURLCreator( 13 | fileurl=fake_article.url, 14 | siteurl=fake_article.settings['SITEURL'] 15 | ) 16 | canonical_link = canonical.create_url() 17 | 18 | assert canonical_link == "fakesite.com/fake-title.html" 19 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_report/seo_analyzer/content_title_analyzer.py: -------------------------------------------------------------------------------- 1 | """ Analyze the content title. """ 2 | 3 | from bs4 import BeautifulSoup 4 | 5 | 6 | class ContentTitleAnalyzer(): 7 | """ Analyze the content title. """ 8 | 9 | def __init__(self, content): 10 | self._soup = BeautifulSoup(content, features="html.parser") 11 | 12 | def has_content_title(self): 13 | """ Return True is there is a content title. """ 14 | 15 | if not self._soup.h1: 16 | return False 17 | 18 | return True 19 | 20 | def is_content_title_unique(self): 21 | """ Return True if content title is unique. """ 22 | 23 | content_titles = self._soup.find_all('h1') 24 | 25 | if len(content_titles) > 1: 26 | return False 27 | 28 | return True 29 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com/hooks.html for info on hooks 2 | repos: 3 | - repo: https://github.com/pre-commit/pre-commit-hooks 4 | rev: v2.3.0 5 | hooks: 6 | - id: check-added-large-files 7 | - id: check-ast 8 | - id: check-toml 9 | - id: check-yaml 10 | - id: debug-statements 11 | - id: detect-private-key 12 | - id: end-of-file-fixer 13 | - id: trailing-whitespace 14 | 15 | - repo: https://github.com/psf/black 16 | rev: 19.10b0 17 | hooks: 18 | - id: black 19 | 20 | - repo: https://gitlab.com/pycqa/flake8 21 | rev: 3.7.9 22 | hooks: 23 | - id: flake8 24 | args: [--max-line-length=88] 25 | language_version: python3.7 26 | 27 | - repo: https://github.com/pre-commit/mirrors-isort 28 | rev: v4.3.21 29 | hooks: 30 | - id: isort 31 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_report/seo_analyzer/internal_link_analyzer.py: -------------------------------------------------------------------------------- 1 | """ Analyze the internal link of an article. """ 2 | 3 | from bs4 import BeautifulSoup 4 | 5 | 6 | class InternalLinkAnalyzer(): 7 | """ Analyze internal link of an article. """ 8 | 9 | def __init__(self, content, siteurl): 10 | self._soup = BeautifulSoup(content, features="html.parser") 11 | self._links = self._soup.find_all('a') 12 | self._siteurl = siteurl 13 | 14 | def has_internal_link(self): 15 | """ 16 | Return True is there is a internal link. 17 | Need to have SITEURL parameter declared. 18 | """ 19 | 20 | if not self._links: 21 | return False 22 | 23 | for link in self._links: 24 | if self._siteurl in link['href']: 25 | return True 26 | 27 | return False 28 | 29 | @property 30 | def internal_link_occurrence(self): 31 | """ Return the internal link occurrence. """ 32 | 33 | return len([link for link in self._links if self._siteurl in link['href']]) 34 | -------------------------------------------------------------------------------- /pelican/plugins/seo/tests/test_page_title_analyzer.py: -------------------------------------------------------------------------------- 1 | """ Unit tests for Page Title Analyzer. """ 2 | 3 | from seo.seo_report.seo_analyzer import PageTitleAnalyzer 4 | 5 | 6 | class TestPageTitleAnalyzer(): 7 | """ Units tests for PageTitleAnalyzer. """ 8 | 9 | def test_article_has_page_title(self, fake_article): 10 | """ Test if has_page_title returns True if fake_article has a title. """ 11 | 12 | fake_analysis = PageTitleAnalyzer(title=fake_article.title) 13 | assert fake_analysis.has_page_title() 14 | 15 | def test_article_has_no_page_title(self, fake_article_missing_elements): 16 | """ Test if has_page_title returns False if fake_article has no title. """ 17 | 18 | fake_analysis = PageTitleAnalyzer(title=fake_article_missing_elements.title) 19 | assert not fake_analysis.has_page_title() 20 | 21 | def test_article_page_title_length(self, fake_article): 22 | """ Test if page_title_length returns the good title length. """ 23 | 24 | fake_analysis = PageTitleAnalyzer(title=fake_article.title) 25 | assert fake_analysis.page_title_length == len(fake_article.title) 26 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_report/seo_analyzer/__init__.py: -------------------------------------------------------------------------------- 1 | """ Launch micro SEO analysis. """ 2 | 3 | from .page_title_analyzer import PageTitleAnalyzer 4 | from .page_description_analyzer import PageDescriptionAnalyzer 5 | from .content_title_analyzer import ContentTitleAnalyzer 6 | from .internal_link_analyzer import InternalLinkAnalyzer 7 | 8 | 9 | class SEOAnalyzer(): 10 | """ Instancy all micro SEO analyzers. """ 11 | 12 | def __init__(self, article): 13 | self._title = getattr(article, 'title', None) 14 | self._description = getattr(article, 'description', None) 15 | self._content = getattr(article, 'content', None) 16 | self._settings = getattr(article, 'settings', None) 17 | 18 | self.page_title_analysis = PageTitleAnalyzer(title=self._title) 19 | self.page_description_analysis = PageDescriptionAnalyzer( 20 | description=self._description 21 | ) 22 | self.content_title_analysis = ContentTitleAnalyzer(content=self._content) 23 | self.internal_link_analysis = InternalLinkAnalyzer( 24 | content=self._content, 25 | siteurl=self._settings['SITEURL'] 26 | ) 27 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_report/static/seo_report.css: -------------------------------------------------------------------------------- 1 | html, body{ 2 | margin: 0; 3 | padding: 0; 4 | font-family: Arial, Helvetica, sans-serif; 5 | } 6 | 7 | header{ 8 | background-color: #373e49; 9 | width: 100%; 10 | padding: 20px; 11 | } 12 | 13 | section{ 14 | padding: 40px; 15 | background-color: #f2f4f7; 16 | } 17 | 18 | article{ 19 | padding: 20px; 20 | background-color: white; 21 | margin-bottom: 30px; 22 | } 23 | 24 | article:last-child{ 25 | margin-bottom: 0px; 26 | } 27 | 28 | footer{ 29 | background-color: #373e49; 30 | text-align: center; 31 | padding: 20px; 32 | color: white; 33 | } 34 | 35 | footer a{ 36 | color: white; 37 | } 38 | 39 | h1{ 40 | text-align: center; 41 | color: white; 42 | } 43 | 44 | h2{ 45 | color: #373e49; 46 | padding-bottom: 5px; 47 | } 48 | 49 | .article-header{ 50 | display:flex; 51 | justify-content:space-between; 52 | align-items:baseline; 53 | border-bottom: 2px solid #373e49; 54 | } 55 | 56 | .green{ 57 | color:green; 58 | } 59 | 60 | .orange{ 61 | color: orange; 62 | } 63 | 64 | .red{ 65 | color: red; 66 | } 67 | 68 | table{ 69 | border-collapse: collapse; 70 | } 71 | 72 | td, th{ 73 | border: 1px solid black; 74 | padding: 10px; 75 | } 76 | 77 | th{ 78 | text-align: left; 79 | } -------------------------------------------------------------------------------- /pelican/plugins/seo/tests/test_robots_file_creator.py: -------------------------------------------------------------------------------- 1 | """ Units tests for Robots File Creator. """ 2 | 3 | from seo.seo_enhancer.robots_file_creator import RobotsFileCreator 4 | 5 | 6 | class TestRobotsFileCreator(): 7 | """ Units tests for RobotsFileCreator. """ 8 | 9 | def test_get_all_robots_rules(self, fake_article): 10 | """ 11 | Test if get_noindex and get_disallow return True 12 | if the article has specific rules. 13 | """ 14 | 15 | fake_robots = RobotsFileCreator(fake_article.metadata) 16 | assert fake_robots.get_noindex 17 | assert fake_robots.get_disallow 18 | 19 | def test_get_one_robots_rule(self, fake_article_missing_elements): 20 | """ 21 | Test if only get_noindex or get_disallow return True 22 | if the article has one specific rule. 23 | """ 24 | 25 | fake_robots = RobotsFileCreator(fake_article_missing_elements.metadata) 26 | assert fake_robots.get_noindex 27 | assert not fake_robots.get_disallow 28 | 29 | def test_get_none_robots_rule(self, fake_article_multiple_elements): 30 | """ 31 | Test if get_noindex and get_disallow return None 32 | if the article has no specific rules. 33 | """ 34 | 35 | fake_robots = RobotsFileCreator(fake_article_multiple_elements.metadata) 36 | assert not fake_robots.get_noindex 37 | assert not fake_robots.get_disallow 38 | -------------------------------------------------------------------------------- /pelican/plugins/seo/tests/test_page_description_analyzer.py: -------------------------------------------------------------------------------- 1 | """ Unit tests for Page Description Analyzer. """ 2 | 3 | from seo.seo_report.seo_analyzer import PageDescriptionAnalyzer 4 | 5 | 6 | class TestPageDescriptionAnalyzer(): 7 | """ Units tests for PageDescriptionAnalyzer. """ 8 | 9 | def test_article_has_page_description(self, fake_article): 10 | """ 11 | Test if has_page_description returns True 12 | if fake_article has a description. 13 | """ 14 | 15 | fake_analysis = PageDescriptionAnalyzer(description=fake_article.description) 16 | assert fake_analysis.has_page_description() 17 | 18 | def test_article_has_no_page_description(self, fake_article_missing_elements): 19 | """ 20 | Test if has_page_description returns False 21 | if fake_article has no description. 22 | """ 23 | 24 | fake_analysis = PageDescriptionAnalyzer( 25 | description=fake_article_missing_elements.description 26 | ) 27 | assert not fake_analysis.has_page_description() 28 | 29 | def test_article_page_description_length(self, fake_article): 30 | """ 31 | Test if page_description_length returns 32 | the good description length. 33 | """ 34 | 35 | fake_analysis = PageDescriptionAnalyzer(description=fake_article.description) 36 | assert fake_analysis.page_description_length == len(fake_article.description) 37 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_enhancer/html_enhancer/__init__.py: -------------------------------------------------------------------------------- 1 | """ HTML Enhancer : get instances of HTML enhancements. """ 2 | 3 | from .canonical_url_creator import CanonicalURLCreator 4 | from .article_schema_creator import ArticleSchemaCreator 5 | from .breadcrumb_schema_creator import BreadcrumbSchemaCreator 6 | 7 | 8 | class HTMLEnhancer(): 9 | """ HTML Enhancer : get instances of HTML enhancements. """ 10 | 11 | def __init__(self, file, output_path, path): 12 | _settings = getattr(file, 'settings', None) 13 | _fileurl = getattr(file, 'url', None) 14 | _author = getattr(file, 'author', None) 15 | _date = getattr(file, 'date', None) 16 | _title = getattr(file, 'title', None) 17 | _category = getattr(file, 'category', None) 18 | _image = getattr(file, 'image', None) 19 | 20 | self.article_schema = ArticleSchemaCreator( 21 | author=_author, 22 | title=_title, 23 | category=_category, 24 | date=_date, 25 | logo=_settings.get('LOGO'), 26 | image=_image, 27 | sitename=_settings.get('SITENAME'), 28 | ) 29 | 30 | self.canonical_link = CanonicalURLCreator( 31 | siteurl=_settings.get('SITEURL'), 32 | fileurl=_fileurl, 33 | ) 34 | 35 | self.breadcrumb_schema = BreadcrumbSchemaCreator( 36 | output_path=output_path, 37 | path=path, 38 | sitename=_settings.get('SITENAME'), 39 | siteurl=_settings.get('SITEURL'), 40 | ) 41 | -------------------------------------------------------------------------------- /pelican/plugins/seo/tests/test_internal_link_analyzer.py: -------------------------------------------------------------------------------- 1 | """ Unit tests for Internal Link Analyzer. """ 2 | 3 | from seo.seo_report.seo_analyzer import InternalLinkAnalyzer 4 | 5 | 6 | class TestInternalLinkAnalyzer(): 7 | """ Units tests for InternalLinkAnalyzer. """ 8 | 9 | def test_article_has_internal_link(self, fake_article): 10 | """ 11 | Test if has_internal_link returns True 12 | if fake_article has at least one internal link. 13 | """ 14 | 15 | fake_analysis = InternalLinkAnalyzer( 16 | content=fake_article.content, 17 | siteurl=fake_article.settings['SITEURL'], 18 | ) 19 | assert fake_analysis.has_internal_link() 20 | 21 | def test_article_has_no_internal_link(self, fake_article_missing_elements): 22 | """ 23 | Test if has_internal_link returns False 24 | if fake_article has no internal link. 25 | """ 26 | 27 | fake_analysis = InternalLinkAnalyzer( 28 | content=fake_article_missing_elements.content, 29 | siteurl=fake_article_missing_elements.settings['SITEURL'], 30 | ) 31 | assert not fake_analysis.has_internal_link() 32 | 33 | def test_article_internal_link_occurrence(self, fake_article_multiple_elements): 34 | """ Test if internal_link_occurrence returns the rigth length. """ 35 | 36 | fake_analysis = InternalLinkAnalyzer( 37 | content=fake_article_multiple_elements.content, 38 | siteurl=fake_article_multiple_elements.settings['SITEURL'], 39 | ) 40 | assert fake_analysis.internal_link_occurrence == 2 41 | -------------------------------------------------------------------------------- /pelican/plugins/seo/tests/test_content_title_analyzer.py: -------------------------------------------------------------------------------- 1 | """ Unit tests for Content Title Analyzer. """ 2 | 3 | from seo.seo_report.seo_analyzer import ContentTitleAnalyzer 4 | 5 | 6 | class TestContentTitleAnalyzer(): 7 | """ Units tests for ContentTitleAnalyzer. """ 8 | 9 | def test_article_has_content_title(self, fake_article): 10 | """ 11 | Test if has_content_title returns True 12 | if fake_article has a content title. 13 | """ 14 | 15 | fake_analysis = ContentTitleAnalyzer(content=fake_article.content) 16 | assert fake_analysis.has_content_title() 17 | 18 | def test_article_has_no_content_title(self, fake_article_missing_elements): 19 | """ 20 | Test if has_content_title returns False 21 | if fake_article has no content title. 22 | """ 23 | 24 | fake_analysis = ContentTitleAnalyzer( 25 | content=fake_article_missing_elements.content 26 | ) 27 | assert not fake_analysis.has_content_title() 28 | 29 | def test_article_content_title_is_unique(self, fake_article): 30 | """ Test if is_content_title_unique returns True if content title is unique. """ 31 | 32 | fake_analysis = ContentTitleAnalyzer(content=fake_article.content) 33 | assert fake_analysis.is_content_title_unique 34 | 35 | def test_article_content_title_is_not_unique(self, fake_article_multiple_elements): 36 | """ 37 | Test if is_content_title_unique returns False 38 | if content title is not unique. 39 | """ 40 | 41 | fake_analysis = ContentTitleAnalyzer( 42 | content=fake_article_multiple_elements.content 43 | ) 44 | assert not fake_analysis.is_content_title_unique() 45 | -------------------------------------------------------------------------------- /tasks.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | from shutil import which 4 | 5 | from invoke import task 6 | 7 | PKG_NAME = "seo" 8 | PKG_PATH = Path(f"pelican/plugins/{PKG_NAME}") 9 | ACTIVE_VENV = os.environ.get("VIRTUAL_ENV", None) 10 | VENV_HOME = Path(os.environ.get("WORKON_HOME", "~/.local/share/virtualenvs")) 11 | VENV_PATH = Path(ACTIVE_VENV) if ACTIVE_VENV else (VENV_HOME / PKG_NAME) 12 | VENV = str(VENV_PATH.expanduser()) 13 | 14 | TOOLS = ["poetry", "pre-commit"] 15 | POETRY = which("poetry") if which("poetry") else (VENV / Path("bin") / "poetry") 16 | PRECOMMIT = ( 17 | which("pre-commit") if which("pre-commit") else (VENV / Path("bin") / "pre-commit") 18 | ) 19 | 20 | 21 | @task 22 | def tests(c): 23 | """Run the test suite""" 24 | c.run(f"{VENV}/bin/pytest", pty=True) 25 | 26 | 27 | @task 28 | def black(c, check=False, diff=False): 29 | """Run Black auto-formatter, optionally with --check or --diff""" 30 | check_flag, diff_flag = "", "" 31 | if check: 32 | check_flag = "--check" 33 | if diff: 34 | diff_flag = "--diff" 35 | c.run(f"{VENV}/bin/black {check_flag} {diff_flag} {PKG_PATH} tasks.py") 36 | 37 | 38 | @task 39 | def isort(c, check=False, diff=False): 40 | check_flag, diff_flag = "", "" 41 | if check: 42 | check_flag = "-c" 43 | if diff: 44 | diff_flag = "--diff" 45 | c.run( 46 | f"{VENV}/bin/isort {check_flag} {diff_flag} --recursive {PKG_PATH}/* tasks.py" 47 | ) 48 | 49 | 50 | @task 51 | def flake8(c): 52 | c.run(f"{VENV}/bin/flake8 {PKG_PATH} tasks.py") 53 | 54 | 55 | @task 56 | def lint(c): 57 | isort(c, check=True) 58 | black(c, check=True) 59 | flake8(c) 60 | 61 | 62 | @task 63 | def tools(c): 64 | """Install tools in the virtual environment if not already on PATH""" 65 | for tool in TOOLS: 66 | if not which(tool): 67 | c.run(f"{VENV}/bin/pip install {tool}") 68 | 69 | 70 | @task 71 | def precommit(c): 72 | """Install pre-commit hooks to .git/hooks/pre-commit""" 73 | c.run(f"{PRECOMMIT} install") 74 | 75 | 76 | @task 77 | def setup(c): 78 | c.run(f"{VENV}/bin/pip install -U pip") 79 | tools(c) 80 | c.run(f"{POETRY} install") 81 | precommit(c) 82 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "pelican-seo" 3 | version = "0.1.0" 4 | description = "This plugin helps you improve your Pelican site SEO (Search Engine Optimization) to reach the top positions on search engines." 5 | authors = ["Maëva Brunelles "] 6 | license = "AGPL-3.0" 7 | readme = "README.md" 8 | keywords = ["pelican", "plugin", "seo"] 9 | repository = "https://github.com/pelican-plugins/seo" 10 | packages = [ 11 | { include = "pelican" }, 12 | ] 13 | 14 | classifiers = [ 15 | "Development Status :: 3 - Alpha", 16 | "Environment :: Console", 17 | "Framework :: Pelican", 18 | "Framework :: Pelican :: Plugins", 19 | "Intended Audience :: End Users/Desktop", 20 | "License :: OSI Approved :: GNU Affero General Public License v3", 21 | "Operating System :: OS Independent", 22 | "Programming Language :: Python", 23 | "Programming Language :: Python :: 3", 24 | "Programming Language :: Python :: 3.6", 25 | "Programming Language :: Python :: 3.7", 26 | "Programming Language :: Python :: 3.8", 27 | "Topic :: Internet :: WWW/HTTP", 28 | "Topic :: Software Development :: Libraries :: Python Modules", 29 | ] 30 | 31 | [tool.poetry.urls] 32 | "Documentation" = "https://docs.getpelican.com/" 33 | "Funding" = "https://donate.getpelican.com/" 34 | "Source" = "https://github.com/pelican-plugins/seo" 35 | "Tracker" = "https://github.com/pelican-plugins/seo/issues" 36 | 37 | [tool.poetry.dependencies] 38 | python = "3.7" 39 | pelican = "^4.2" 40 | markdown = {version = "^3.1.1",optional = true} 41 | 42 | [tool.poetry.dev-dependencies] 43 | black = {version = "^19.10b0",allow-prereleases = true} 44 | flake8 = "^3.7" 45 | flake8-black = "^0.1.0" 46 | invoke = "^1.3" 47 | isort = "^4.3" 48 | livereload = "^2.6" 49 | markdown = "^3.1.1" 50 | pytest = "^5.0" 51 | pytest-cov = "^2.7" 52 | pytest-pythonpath = "^0.7.3" 53 | pytest-sugar = "^0.9.2" 54 | Werkzeug = "^0.15.5" 55 | 56 | [tool.poetry.extras] 57 | markdown = ["markdown"] 58 | 59 | [tool.isort] 60 | # Maintain compatibility with Black 61 | combine_as_imports = true 62 | force_grid_wrap = 0 63 | include_trailing_comma = true 64 | line_length = 88 65 | multi_line_output = 3 66 | 67 | # Sort imports within their section independent of the import type 68 | force_sort_within_sections = true 69 | 70 | [build-system] 71 | requires = ["poetry>=1.0"] 72 | build-backend = "poetry.masonry.api" 73 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_report/template/seo_report.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | SEO Report 4 | 5 | 6 | 7 | 8 |
9 |

SEO report - {{ site_name }}

10 |
11 | 12 |
13 | 14 | {% for documents in seo_reports %} 15 |
16 | 17 |
18 |

{{ documents.url }}

19 | {% if documents.date %} 20 | {{ documents.date }} 21 | {% endif %} 22 |
23 | 24 | {% for report in documents.seo_reports %} 25 | 26 |

{{ report.title }}

27 | 28 | 29 | {% if report.content.good %} 30 | 31 | 32 | {% for good_point in report.content.good %} 33 | 34 | {% endfor %} 35 | 36 | {% endif %} 37 | 38 | {% if report.content.to_improve %} 39 | 40 | 41 | {% for improvment in report.content.to_improve %} 42 | 43 | {% endfor %} 44 | 45 | {% endif %} 46 | 47 | {% if report.content.problems %} 48 | 49 | 50 | {% for problem in report.content.problems %} 51 | 52 | {% endfor %} 53 | 54 | {% endif %} 55 | 56 |
Good{{ good_point }}
To improve{{ improvment }}
Problems{{ problem }}
57 | 58 | {% endfor %} 59 | 60 |
61 | {% endfor %} 62 | 63 |
64 | 65 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_enhancer/html_enhancer/article_schema_creator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Article Schema.org creator : Improve articles display in Google 3 | if all fields are fill in. 4 | https://schema.org/Article : JSON-LD format. 5 | """ 6 | 7 | import datetime 8 | 9 | 10 | class ArticleSchemaCreator(): 11 | """ 12 | Get all field values and build the Article schema compliant 13 | to https://schema.org/Article and Google requirements. 14 | """ 15 | 16 | def __init__(self, author, title, category, date, logo, image, sitename): 17 | self._author = author.name 18 | self._title = title 19 | self._category = category.name 20 | self._publication_date = date 21 | self._logo = logo 22 | self._image = image 23 | self._sitename = sitename 24 | 25 | def _convert_date(self, date): 26 | """ Get SafeDate Pelican object and return date in string. """ 27 | 28 | date_time = datetime.datetime( 29 | date.year, 30 | date.month, 31 | date.day, 32 | date.hour, 33 | date.minute 34 | ) 35 | return date_time.strftime("%Y-%m-%d %H:%M") 36 | 37 | def create_schema(self): 38 | """ 39 | Create Article schema. 40 | Schema : { 41 | "@context": "https://schema.org", 42 | "@type": "Article", 43 | "author": { 44 | "@type": "Person", 45 | "name": :author: 46 | }, 47 | "publisher": { 48 | "@type": "Organization", 49 | "name": :sitename:, 50 | "logo": { 51 | "@type": "ImageObject", 52 | "url": :logo: 53 | } 54 | }, 55 | "headline": :title:, 56 | "about": :category:, 57 | "datePublished": :publication_date:, 58 | "image": :image: 59 | } 60 | """ 61 | 62 | schema_article = { 63 | "@context": "https://schema.org", 64 | "@type": "Article", 65 | } 66 | 67 | if self._author: 68 | schema_article["author"] = { 69 | "@type": "Person", 70 | "name": self._author 71 | } 72 | 73 | if self._sitename: 74 | schema_article["publisher"] = { 75 | "@type": "Organization", 76 | "name": self._sitename, 77 | } 78 | 79 | if self._logo: 80 | schema_article["publisher"]["logo"] = { 81 | "@type": "ImageObject", 82 | "url": self._logo 83 | } 84 | 85 | if self._title: 86 | schema_article["headline"] = self._title 87 | 88 | if self._category: 89 | schema_article["about"] = self._category 90 | 91 | if self._publication_date: 92 | schema_article["datePublished"] = self._convert_date(self._publication_date) 93 | 94 | if self._image: 95 | schema_article["image"] = self._image 96 | 97 | return schema_article 98 | -------------------------------------------------------------------------------- /pelican/plugins/seo/tests/test_seo_report.py: -------------------------------------------------------------------------------- 1 | """ Unit tests for SEO Report. """ 2 | 3 | from unittest.mock import mock_open, patch 4 | 5 | from seo.seo_report.seo_analyzer import ( 6 | InternalLinkAnalyzer, 7 | ContentTitleAnalyzer, 8 | PageTitleAnalyzer, 9 | PageDescriptionAnalyzer, 10 | ) 11 | 12 | 13 | class TestSEOReport(): 14 | """ Units tests for SEOReport. """ 15 | 16 | def test_launch_analysis_returns_dict(self, fake_article, fake_seo_report): 17 | """ Test if launch_analysis return a dict with expected keys. """ 18 | 19 | fake_articles_analysis = fake_seo_report.launch_analysis(fake_article) 20 | 21 | assert fake_articles_analysis['url'] 22 | assert fake_articles_analysis['date'] 23 | assert fake_articles_analysis['seo_analysis'] 24 | assert fake_articles_analysis['seo_analysis']['page_title_analysis'] 25 | assert fake_articles_analysis['seo_analysis']['page_description_analysis'] 26 | assert fake_articles_analysis['seo_analysis']['content_title_analysis'] 27 | assert fake_articles_analysis['seo_analysis']['internal_link_analysis'] 28 | 29 | def test_launch_analysis_values_are_instances_of_expected_analysis_objects( 30 | self, fake_article, fake_seo_report): 31 | """ 32 | Test if the dict returned by launch_analysis 33 | contained expected analysis objects. 34 | """ 35 | 36 | fake_articles_analysis = fake_seo_report.launch_analysis(fake_article) 37 | fake_seo_analysis = fake_articles_analysis['seo_analysis'] 38 | 39 | page_title_analysis = fake_seo_analysis['page_title_analysis'] 40 | page_description_analysis = fake_seo_analysis['page_description_analysis'] 41 | content_title_analysis = fake_seo_analysis['content_title_analysis'] 42 | internal_link_analysis = fake_seo_analysis['internal_link_analysis'] 43 | 44 | assert isinstance(page_title_analysis, PageTitleAnalyzer) 45 | assert isinstance(page_description_analysis, PageDescriptionAnalyzer) 46 | assert isinstance(content_title_analysis, ContentTitleAnalyzer) 47 | assert isinstance(internal_link_analysis, InternalLinkAnalyzer) 48 | 49 | def test_generate_create_report_file_and_write_output( 50 | self, fake_seo_report, fake_articles_analysis): 51 | """ 52 | Test that generate create a HTML file and write SEO report on it. 53 | Need mock_open to test this. 54 | """ 55 | 56 | with patch('seo.seo_report.open', mock_open()) as mocked_open: 57 | # Get a reference to the MagicMock that will be returned 58 | # when mock_open will be called 59 | # => When we do open("seo_report", "w") as report in generate, report 60 | # will also be a reference to the same MagicMock 61 | mocked_file_handle = mocked_open.return_value 62 | 63 | # When generate is executed, mock_open() is call instead of open() 64 | fake_seo_report.generate('Fake site', fake_articles_analysis) 65 | 66 | # mocked_open and the file handle got all 67 | # executed calls, and can assert them 68 | mocked_open.assert_called_once_with('seo_report.html', 'w') 69 | mocked_file_handle.write.assert_called_once() 70 | 71 | # Get all arguments in the mocked write call and select the first 72 | # true arg (output) 73 | args, _ = mocked_file_handle.write.call_args_list[0] 74 | output = args[0] 75 | assert "

SEO report - Fake site

" in output 76 | -------------------------------------------------------------------------------- /pelican/plugins/seo/tests/test_seo_enhancer.py: -------------------------------------------------------------------------------- 1 | """ Units tests for SEO Enhancer. """ 2 | 3 | from unittest.mock import mock_open, patch 4 | 5 | 6 | class TestSEOEnhancer(): 7 | """ Units tests for SEOEnhancer. """ 8 | 9 | def test_populate_robots_return_dict_with_rules_for_an_url( 10 | self, fake_seo_enhancer, fake_article): 11 | """ 12 | Test that populate_robots return a dict with document_url, 13 | noindex and disallow rules. 14 | """ 15 | 16 | fake_robots_rules = fake_seo_enhancer.populate_robots(fake_article) 17 | 18 | assert fake_robots_rules['document_url'] 19 | assert fake_robots_rules['noindex'] 20 | assert fake_robots_rules['disallow'] 21 | 22 | def test_generate_robots_file(self, fake_seo_enhancer, fake_robots_rules): 23 | """ Test if generate_robots create a robots.txt file by mocking open(). """ 24 | 25 | with patch('os.mkdir'): 26 | with patch('seo.seo_enhancer.open', mock_open()) as mocked_open: 27 | mocked_file_handle = mocked_open.return_value 28 | 29 | fake_seo_enhancer.generate_robots( 30 | rules=fake_robots_rules, 31 | output_path='fake_output' 32 | ) 33 | 34 | mocked_open.assert_called_once_with('fake_output/robots.txt', 'w+') 35 | mocked_file_handle.write.assert_called() 36 | # 4 : 1 fix write + 3 generated write 37 | assert len(mocked_file_handle.write.call_args_list) == 4 38 | 39 | args, _ = mocked_file_handle.write.call_args_list[1] 40 | fake_rule = args[0] 41 | assert "Noindex: fake-title.html" in fake_rule 42 | 43 | def test_launch_html_enhancemer_returns_dict(self, fake_article, fake_seo_enhancer): 44 | """ Test if launch_html_enhancemer returns a dict with expected keys. """ 45 | 46 | fake_html_enhancements = fake_seo_enhancer.launch_html_enhancer( 47 | file=fake_article, 48 | output_path='fake_output', 49 | path='fake_dir/fake_output/fake_file.html', 50 | ) 51 | 52 | assert fake_html_enhancements['canonical_tag'] 53 | assert fake_html_enhancements['article_schema'] 54 | assert fake_html_enhancements['breadcrumb_schema'] 55 | 56 | def test_add_html_enhancements_to_file(self, fake_article, fake_seo_enhancer): 57 | """ 58 | Test if add_html_to_file add SEO enhancements 59 | to HTML files by mocking open(). 60 | """ 61 | 62 | path = "fake_dir/fake_output/fake_file.html" 63 | fake_html_enhancements = fake_seo_enhancer.launch_html_enhancer( 64 | file=fake_article, 65 | output_path='fake_output', 66 | path=path, 67 | ) 68 | 69 | with patch( 70 | 'seo.seo_enhancer.open', 71 | mock_open(read_data=fake_article.content) 72 | ) as mocked_open: 73 | mocked_file_handle = mocked_open.return_value 74 | 75 | fake_seo_enhancer.add_html_to_file( 76 | enhancements=fake_html_enhancements, 77 | path=path 78 | ) 79 | assert len(mocked_open.call_args_list) == 2 80 | mocked_file_handle.read.assert_called_once() 81 | mocked_file_handle.write.assert_called_once() 82 | 83 | write_args, _ = mocked_file_handle.write.call_args_list[0] 84 | fake_html_content = write_args[0] 85 | assert '' \ 86 | in fake_html_content 87 | assert '{"@context": "https://schema.org", "@type": "Article"' \ 88 | in fake_html_content 89 | assert '{"@context": "https://schema.org", "@type": "BreadcrumbList"' \ 90 | in fake_html_content 91 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_enhancer/__init__.py: -------------------------------------------------------------------------------- 1 | """ Improve SEO technical for each article and page : HTML code and robots.txt file. """ 2 | 3 | import json 4 | import logging 5 | import os 6 | 7 | from bs4 import BeautifulSoup 8 | 9 | from .html_enhancer import HTMLEnhancer 10 | from .robots_file_creator import RobotsFileCreator 11 | 12 | 13 | logger = logging.getLogger(__name__) 14 | 15 | 16 | class SEOEnhancer(): 17 | """ 18 | Improve SEO technical for each article and page : HTML code and robots.txt file. 19 | """ 20 | 21 | def launch_html_enhancer(self, file, output_path, path): 22 | """ 23 | Call HTMLEnhancer for each article and page. 24 | Return a dict with all HTML enhancements. 25 | """ 26 | 27 | html_enhancer = HTMLEnhancer(file, output_path, path) 28 | 29 | html_enhancements = { 30 | 'canonical_tag': html_enhancer.canonical_link.create_url(), 31 | 'breadcrumb_schema': html_enhancer.breadcrumb_schema.create_schema(), 32 | } 33 | 34 | if 'pages' not in file.url: 35 | article_schema = html_enhancer.article_schema.create_schema() 36 | html_enhancements['article_schema'] = article_schema 37 | 38 | return html_enhancements 39 | 40 | def populate_robots(self, document): 41 | """ 42 | Get all robots rules in document.metadata. 43 | Return a dict with rules per url. 44 | """ 45 | 46 | robots_file = RobotsFileCreator(document.metadata) 47 | 48 | return { 49 | 'document_url': document.url, 50 | 'noindex': robots_file.get_noindex, 51 | 'disallow': robots_file.get_disallow, 52 | } 53 | 54 | def generate_robots(self, rules, output_path): 55 | """ 56 | Create robots.txt file, with noindex and disallow rules for each document URL. 57 | """ 58 | if not os.path.isdir(output_path): 59 | os.mkdir(output_path) 60 | 61 | robots_path = os.path.join(output_path, 'robots.txt') 62 | 63 | with open(robots_path, 'w+') as robots_file: 64 | robots_file.write('User-agent: *') 65 | for rule in rules: 66 | if rule.get('noindex'): 67 | robots_file.write('\n' + 'Noindex: ' + rule.get('document_url')) 68 | if rule.get('disallow'): 69 | robots_file.write('\n' + 'Disallow: ' + rule.get('document_url')) 70 | 71 | logger.info("SEO plugin - SEO Enhancement: robots.txt file created") 72 | 73 | def add_html_to_file(self, enhancements, path): 74 | """ 75 | Open HTML file, add HTML enhancements with bs4 and create the new HTML files. 76 | """ 77 | 78 | with open(path) as html_file: 79 | html_content = html_file.read() 80 | soup = BeautifulSoup(html_content, features="html.parser") 81 | 82 | canonical_tag = soup.new_tag( 83 | "link", 84 | rel="canonical", 85 | href=enhancements.get('canonical_tag') 86 | ) 87 | soup.head.append(canonical_tag) 88 | 89 | position = 0 90 | for enhancement in enhancements: 91 | 92 | if enhancement.endswith('_schema'): 93 | schema = enhancement 94 | 95 | schema_script = soup.new_tag("script", type="application/ld+json") 96 | soup.head.append(schema_script) 97 | 98 | schema_script = soup.findAll('script')[position] 99 | # Json dumps permit to keep dict double quotes instead of simples 100 | # Google valids schema only with double quotes 101 | schema_script.append( 102 | json.dumps(enhancements[schema], ensure_ascii=False) 103 | ) 104 | 105 | position += 1 106 | 107 | with open(path, 'w') as html_file: 108 | html_file.write(soup.prettify()) 109 | 110 | logger.info(f"SEO plugin - SEO Enhancement: Done for {path}") 111 | -------------------------------------------------------------------------------- /pelican/plugins/seo/tests/test_breadcrumb_schema_creator.py: -------------------------------------------------------------------------------- 1 | """ Units tests for Breadcrumb Schema Creator. """ 2 | 3 | from seo.seo_enhancer.html_enhancer import BreadcrumbSchemaCreator 4 | 5 | 6 | class TestBreadcrumbSchemaCreator(): 7 | """ Unit tests for BreadcrumbSchemaCreator. """ 8 | 9 | def test_create_schema(self, fake_article): 10 | """ 11 | Test that create_schema returns a valid 12 | schema.org (dict) for breadcrumb. 13 | """ 14 | 15 | breadcrumb = BreadcrumbSchemaCreator( 16 | output_path='fake_output', 17 | path='fake_dir/fake_output/fake-file.html', 18 | sitename=fake_article.settings['SITENAME'], 19 | siteurl=fake_article.settings['SITEURL'], 20 | ) 21 | 22 | fake_breadcrumb_schema = breadcrumb.create_schema() 23 | 24 | assert fake_breadcrumb_schema['@context'] == "https://schema.org" 25 | assert fake_breadcrumb_schema['@type'] == "BreadcrumbList" 26 | 27 | assert len(fake_breadcrumb_schema['itemListElement']) == 2 28 | 29 | assert fake_breadcrumb_schema['itemListElement'][0]['@type'] == "ListItem" 30 | assert fake_breadcrumb_schema['itemListElement'][0]['position'] == 1 31 | assert fake_breadcrumb_schema['itemListElement'][0]['name'] == "Fake Site Name" 32 | assert fake_breadcrumb_schema['itemListElement'][0]['item'] == "fakesite.com" 33 | 34 | assert fake_breadcrumb_schema['itemListElement'][1]['@type'] == "ListItem" 35 | assert fake_breadcrumb_schema['itemListElement'][1]['position'] == 2 36 | assert fake_breadcrumb_schema['itemListElement'][1]['name'] == "Fake file" 37 | assert fake_breadcrumb_schema['itemListElement'][1]['item'] == \ 38 | "fakesite.com/fake-file.html" 39 | 40 | def test_create_schema_with_x_elements_in_path(self, fake_article): 41 | """ 42 | Test that create_schema returns a valid 43 | schema.org (dict) for a path with x elements. 44 | """ 45 | 46 | breadcrumb = BreadcrumbSchemaCreator( 47 | output_path='fake_output', 48 | path='fake_dir/fake_output/test/blabla/other/kiwi/fake-file.html', 49 | sitename=fake_article.settings['SITENAME'], 50 | siteurl=fake_article.settings['SITEURL'], 51 | ) 52 | 53 | fake_breadcrumb_schema = breadcrumb.create_schema() 54 | 55 | assert len(fake_breadcrumb_schema['itemListElement']) == 6 56 | 57 | def test_create_schema_with_no_sitename_no_siteurl(self, fake_article): 58 | """ Test that create_schema returns incomplete schema.org. """ 59 | 60 | breadcrumb = BreadcrumbSchemaCreator( 61 | output_path='fake_output', 62 | path='fake_dir/fake_output/fake-file.html', 63 | sitename='', 64 | siteurl='', 65 | ) 66 | 67 | fake_breadcrumb_schema = breadcrumb.create_schema() 68 | 69 | assert not fake_breadcrumb_schema['itemListElement'][0]['position'] == 1 70 | 71 | def test_create_schema_with_no_sitename(self, fake_article): 72 | """ 73 | Test that create_schema with siteurl but no 74 | sitename returns incomplete schema.org. 75 | """ 76 | 77 | breadcrumb = BreadcrumbSchemaCreator( 78 | output_path='fake_output', 79 | path='fake_dir/fake_output/fake-file.html', 80 | sitename='', 81 | siteurl=fake_article.settings['SITEURL'], 82 | ) 83 | 84 | fake_breadcrumb_schema = breadcrumb.create_schema() 85 | 86 | assert not fake_breadcrumb_schema['itemListElement'][0]['position'] == 1 87 | 88 | def test_create_schema_with_no_siteurl(self, fake_article): 89 | """ 90 | Test that create_schema with sitename but no 91 | siteurl returns incomplete schema.org. 92 | """ 93 | 94 | breadcrumb = BreadcrumbSchemaCreator( 95 | output_path='fake_output', 96 | path='fake_dir/fake_output/fake-file.html', 97 | sitename=fake_article.settings['SITENAME'], 98 | siteurl='', 99 | ) 100 | 101 | fake_breadcrumb_schema = breadcrumb.create_schema() 102 | 103 | assert not fake_breadcrumb_schema['itemListElement'][0]['position'] == 1 104 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_enhancer/html_enhancer/breadcrumb_schema_creator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Breadcrumb Schema.org creator : Improve URLs display in Google 3 | thanks to breadcrumb set up. 4 | https://schema.org/BreadcrumbList : JSON-LD format. 5 | """ 6 | 7 | import os 8 | 9 | 10 | class BreadcrumbSchemaCreator(): 11 | """ 12 | Get all URLs for a path and build the Breadcrumb schema compliant 13 | to https://schema.org/BreadcrumbList and Google requirements. 14 | """ 15 | 16 | def __init__(self, output_path, path, sitename, siteurl): 17 | self._output_path = output_path 18 | self._path = path 19 | self._sitename = sitename 20 | self._siteurl = siteurl 21 | 22 | def _create_paths(self): 23 | """ 24 | Split the file path, get all elements after the output path. 25 | By default, output path is 'output/' but it can be changes in Pelican settings. 26 | Build all paths, for example : 27 | Path = 'test-dir/output/category/file.html' 28 | Split path = ['output', 'category', 'file.html'] 29 | Position begins at 2, as number 1 is dedicated to the index page. 30 | Returns list of dicts : 31 | [ 32 | { 33 | 'position': 2, 34 | 'name': 'Category', 35 | 'url': ':siteurl:/category' 36 | }, 37 | { 38 | 'position': 3, 39 | 'name': 'File', 40 | 'url': ':siteurl:/category/file.html' 41 | }, 42 | ] 43 | """ 44 | 45 | split_path = self._path.split('/') 46 | 47 | if self._output_path in split_path: 48 | max_index = split_path.index(self._output_path) + 1 49 | 50 | # Delete all elements before output path, including it 51 | del split_path[0:max_index] 52 | 53 | breadcrumb_paths = [] 54 | position = 2 55 | 56 | for item in range(1, len(split_path) + 1): 57 | 58 | name = split_path[item-1] 59 | name = name.replace('-', ' ').capitalize() 60 | if name.endswith('.html'): 61 | name = name[:-5] 62 | 63 | full_path = '/'.join(split_path[:item]) 64 | url = os.path.join(self._siteurl, full_path) 65 | 66 | breadcrumb_paths.append({ 67 | 'name': name, 68 | 'url': url, 69 | 'position': position 70 | }) 71 | 72 | position += 1 73 | 74 | return breadcrumb_paths 75 | 76 | def create_schema(self): 77 | """ 78 | Schema = { 79 | "@context": "https://schema.org", 80 | "@type": "BreadcrumbList", 81 | "itemListElement": [ 82 | { 83 | "@type": "ListItem", 84 | "position": :n=1:, 85 | "name": :Sitename:, 86 | "item": :SITEURL: 87 | }, 88 | { 89 | "@type": "ListItem", 90 | "position": :n+1:, 91 | "name": :name:, 92 | "item": :url: 93 | }, 94 | { 95 | "@type": "ListItem", 96 | "position": :n+x:, 97 | "name": :name:, 98 | "item": :url: 99 | } 100 | ] 101 | } 102 | """ 103 | 104 | breadcrumb_items = self._create_paths() 105 | 106 | breadcrumb_schema = { 107 | "@context": "https://schema.org", 108 | "@type": "BreadcrumbList", 109 | "itemListElement": [], 110 | } 111 | 112 | if self._sitename and self._siteurl: 113 | breadcrumb_schema['itemListElement'].append( 114 | { 115 | "@type": "ListItem", 116 | "position": 1, 117 | "name": self._sitename, 118 | "item": self._siteurl 119 | } 120 | ) 121 | 122 | for item in breadcrumb_items: 123 | breadcrumb_schema['itemListElement'].append( 124 | { 125 | "@type": "ListItem", 126 | "position": item['position'], 127 | "name": item['name'], 128 | "item": item['url'] 129 | } 130 | ) 131 | 132 | return breadcrumb_schema 133 | -------------------------------------------------------------------------------- /pelican/plugins/seo/seo.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | SEO is a Pelican plugin to helps you improve your Pelican site SEO to 4 | reach the tops positions on search engines like Qwant, DuckDuckGo or Google. 5 | =================================================================================== 6 | 7 | It generates a SEO report and SEO enhancements. 8 | You can enable / disable the main features in the plugin settings. 9 | For the SEO report, you can limit the number of analysis 10 | in the plugin settings too. 11 | 12 | Author : Maëva Brunelles 13 | License : GNU AFFERO GENERAL PUBLIC LICENSE Version 3 14 | """ 15 | 16 | import logging 17 | 18 | from pelican import signals 19 | from pelican.generators import ArticlesGenerator, PagesGenerator 20 | 21 | from .settings import SEO_REPORT, SEO_ENHANCER, ARTICLES_LIMIT, PAGES_LIMIT 22 | from .seo_report import SEOReport 23 | from .seo_enhancer import SEOEnhancer 24 | 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | 29 | def plugin_initializer(settings): 30 | """ Raises if SITEURL parameter is not set in Pelican settings """ 31 | 32 | if not settings.settings.get('SITEURL'): 33 | raise Exception("You must fill in SITEURL variable in pelicanconf.py \ 34 | to use SEO plugin.") 35 | 36 | logger.info("SEO plugin initialized") 37 | 38 | 39 | def run_seo_report(generators): 40 | """ Run SEO report creation if SEO_REPORT is enabled in settings. """ 41 | 42 | seo_report = SEOReport() 43 | documents_analysis = [] 44 | 45 | site_name = None 46 | 47 | for generator in generators: 48 | 49 | if isinstance(generator, ArticlesGenerator): 50 | # Launch analysis for each article. User can limit this number. 51 | for _, article in zip(range(ARTICLES_LIMIT), generator.articles): 52 | analysis = seo_report.launch_analysis(document=article) 53 | documents_analysis.append(analysis) 54 | 55 | if not site_name: 56 | site_name = generator.settings.get('SITENAME') 57 | 58 | if isinstance(generator, PagesGenerator): 59 | # Launch analysis each page. User can limit this number. 60 | for _, page in zip(range(PAGES_LIMIT), generator.pages): 61 | analysis = seo_report.launch_analysis(document=page) 62 | documents_analysis.append(analysis) 63 | 64 | if not site_name: 65 | site_name = generator.settings.get('SITENAME') 66 | 67 | seo_report.generate( 68 | site_name=site_name, 69 | documents_analysis=documents_analysis 70 | ) 71 | 72 | 73 | def run_robots_file(generators): 74 | """ 75 | Run robots.txt file creation if SEO_ENHANCER 76 | is enabled in settings. 77 | """ 78 | 79 | seo_enhancer = SEOEnhancer() 80 | robots_rules = [] 81 | 82 | for generator in generators: 83 | 84 | output_path = generator.output_path 85 | 86 | if isinstance(generator, ArticlesGenerator): 87 | for article in generator.articles: 88 | article_metadata = seo_enhancer.populate_robots( 89 | document=article 90 | ) 91 | robots_rules.append(article_metadata) 92 | 93 | if isinstance(generator, PagesGenerator): 94 | for page in generator.pages: 95 | page_metadata = seo_enhancer.populate_robots(document=page) 96 | robots_rules.append(page_metadata) 97 | 98 | seo_enhancer.generate_robots( 99 | rules=robots_rules, 100 | output_path=output_path, 101 | ) 102 | 103 | 104 | def run_html_enhancer(path, context): 105 | """ Run HTML enhancements if SEO_ENHANCER is enabled in settings. """ 106 | 107 | if context.get('article'): 108 | seo_enhancer = SEOEnhancer() 109 | html_enhancements = seo_enhancer.launch_html_enhancer( 110 | file=context['article'], 111 | output_path=context.get('OUTPUT_PATH'), 112 | path=path, 113 | ) 114 | seo_enhancer.add_html_to_file( 115 | enhancements=html_enhancements, 116 | path=path, 117 | ) 118 | 119 | elif context.get('page'): 120 | seo_enhancer = SEOEnhancer() 121 | html_enhancements = seo_enhancer.launch_html_enhancer( 122 | file=context['page'], 123 | output_path=context.get('OUTPUT_PATH'), 124 | path=path, 125 | ) 126 | seo_enhancer.add_html_to_file( 127 | enhancements=html_enhancements, 128 | path=path, 129 | ) 130 | 131 | 132 | def register(): 133 | 134 | signals.initialized.connect(plugin_initializer) 135 | 136 | if SEO_REPORT and SEO_ENHANCER: 137 | signals.all_generators_finalized.connect(run_seo_report) 138 | signals.all_generators_finalized.connect(run_robots_file) 139 | signals.content_written.connect(run_html_enhancer) 140 | 141 | elif SEO_REPORT: 142 | signals.all_generators_finalized.connect(run_seo_report) 143 | 144 | elif SEO_ENHANCER: 145 | signals.all_generators_finalized.connect(run_robots_file) 146 | signals.content_written.connect(run_html_enhancer) 147 | -------------------------------------------------------------------------------- /pelican/plugins/seo/tests/conftest.py: -------------------------------------------------------------------------------- 1 | """ Mocks Pelican objects required for the units tests. """ 2 | 3 | import pytest 4 | 5 | from seo.seo_report import SEOReport 6 | from seo.seo_enhancer import SEOEnhancer 7 | 8 | 9 | class FakeArticle(): 10 | """ Mock Pelican Article object. """ 11 | 12 | def __init__(self, settings, metadata, title, 13 | description, url, date, content, author, category): 14 | self.settings = settings 15 | self.metadata = metadata 16 | self.title = title 17 | self.description = description 18 | self.url = url 19 | self.date = date 20 | self.content = content 21 | self.author = author 22 | self.category = category 23 | 24 | 25 | class FakeDate(): 26 | """ Mock Pelican SafeDate object. """ 27 | 28 | def __init__(self, year, month, day, hour, minute): 29 | self.year = int(year) 30 | self.month = int(month) 31 | self.day = int(day) 32 | self.hour = int(hour) 33 | self.minute = int(minute) 34 | 35 | 36 | class FakeAuthor(): 37 | """ Mock Pelican Author object. """ 38 | 39 | def __init__(self, name): 40 | self.name = name 41 | 42 | 43 | class FakeCategory(): 44 | """ Mock Pelican Category object. """ 45 | 46 | def __init__(self, name): 47 | self.name = name 48 | 49 | 50 | @pytest.fixture() 51 | def fake_article(): 52 | """ Create a fake article. """ 53 | 54 | settings = { 55 | 'SITEURL': 'fakesite.com', 56 | 'SITENAME': 'Fake Site Name', 57 | 'LOGO': 'https://www.fakesite.com/fake-logo.jpg', 58 | } 59 | metadata = { 60 | 'noindex': True, 61 | 'disallow': True, 62 | 'image': 'https://www.fakesite.com/fake-image.jpg', 63 | } 64 | title = 'Fake Title' 65 | description = 'Fake description' 66 | url = 'fake-title.html' 67 | date = FakeDate('2019', '04', '03', '23', '49') 68 | author = FakeAuthor(name='Fake author') 69 | category = FakeCategory(name='Fake category') 70 | content = """ 71 | 72 | Fake Title 73 | 74 | 75 | 76 |

Fake content title

77 |

Fake content

78 | Fake internal link 79 | 80 | """ 81 | 82 | return FakeArticle( 83 | settings=settings, 84 | metadata=metadata, 85 | title=title, 86 | description=description, 87 | url=url, 88 | date=date, 89 | content=content, 90 | author=author, 91 | category=category 92 | ) 93 | 94 | 95 | @pytest.fixture() 96 | def fake_article_missing_elements(): 97 | """ Create a fake article with missing elements. """ 98 | 99 | settings = { 100 | 'SITEURL': 'fakesite.com', 101 | 'SITENAME': '', 102 | 'LOGO': '', 103 | } 104 | metadata = { 105 | 'noindex': True, 106 | 'image': '', 107 | } 108 | title = '' 109 | description = '' 110 | url = 'fake-title.html' 111 | date = FakeDate('2019', '04', '03', '23', '49') 112 | author = FakeAuthor(name='') 113 | category = FakeCategory(name='') 114 | content = """ 115 | 116 | 117 | 118 |

Fake content

119 | 120 | """ 121 | 122 | return FakeArticle( 123 | settings=settings, 124 | metadata=metadata, 125 | title=title, 126 | description=description, 127 | url=url, 128 | date=date, 129 | content=content, 130 | author=author, 131 | category=category 132 | ) 133 | 134 | 135 | @pytest.fixture() 136 | def fake_article_multiple_elements(): 137 | """ Create a fake article with multiple elements. """ 138 | 139 | settings = { 140 | 'SITEURL': 'fakesite.com', 141 | 'SITENAME': 'Fake Site Name', 142 | 'LOGO': 'https://www.fakesite.com/fake-logo.jpg', 143 | } 144 | metadata = {} 145 | title = 'Fake Title' 146 | description = 'Fake description' 147 | url = 'fake-title.html' 148 | date = FakeDate('2019', '04', '03', '23', '49') 149 | author = FakeAuthor(name='Fake author') 150 | category = FakeCategory(name='Fake category') 151 | content = """ 152 | 153 | Fake Title 154 | 155 | 156 | 157 |

Content title

158 |

Fake content

159 |

Multiple content title

160 | Fake internal link 161 | Fake external link 162 | Fake internal path link 163 | 164 | """ 165 | 166 | return FakeArticle( 167 | settings=settings, 168 | metadata=metadata, 169 | title=title, 170 | description=description, 171 | url=url, 172 | date=date, 173 | content=content, 174 | author=author, 175 | category=category 176 | ) 177 | 178 | 179 | @pytest.fixture() 180 | def fake_seo_report(): 181 | """ Create a fake seo report instance. """ 182 | 183 | return SEOReport() 184 | 185 | 186 | @pytest.fixture() 187 | def fake_robots_rules(fake_seo_enhancer, fake_article, 188 | fake_article_multiple_elements, 189 | fake_article_missing_elements): 190 | """ Create a fake robots rules. """ 191 | 192 | robots_rules = [] 193 | 194 | robots_rules.append( 195 | fake_seo_enhancer.populate_robots(fake_article) 196 | ) 197 | robots_rules.append( 198 | fake_seo_enhancer.populate_robots(fake_article_missing_elements) 199 | ) 200 | robots_rules.append( 201 | fake_seo_enhancer.populate_robots(fake_article_multiple_elements) 202 | ) 203 | 204 | return robots_rules 205 | 206 | 207 | @pytest.fixture() 208 | def fake_articles_analysis(fake_seo_report, fake_article, 209 | fake_article_multiple_elements, 210 | fake_article_missing_elements): 211 | """ Create a fake articles analysis. """ 212 | 213 | articles_analysis = [] 214 | 215 | articles_analysis.append( 216 | fake_seo_report.launch_analysis(fake_article) 217 | ) 218 | articles_analysis.append( 219 | fake_seo_report.launch_analysis(fake_article_missing_elements) 220 | ) 221 | articles_analysis.append( 222 | fake_seo_report.launch_analysis(fake_article_multiple_elements) 223 | ) 224 | 225 | return articles_analysis 226 | 227 | 228 | @pytest.fixture() 229 | def fake_seo_enhancer(): 230 | """ Create a fake seo enhancer instance. """ 231 | 232 | return SEOEnhancer() 233 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SEO: A plugin for Pelican :fr: 2 | 3 | [![Build Status](https://travis-ci.com/MaevaBrunelles/pelican-seo.svg?branch=master)](https://travis-ci.com/MaevaBrunelles/pelican-seo) 4 | 5 | > Warning: This plugin is now released on the new official Pelican plugins workspace. If you want to use it, you must install https://github.com/pelican-plugins/seo. For contributions, issues or anything else, see the [contribution part](#contributing). 6 | 7 | This plugin helps you improve your Pelican site SEO (Search Engine Optimization) to reach the top positions on search engines. To see what can do SEO for you, go directly to [Usage](#usage) section. 8 | 9 | | Author | GitHub | 10 | | :-------------: | :-------------------------------: | 11 | | Maëva Brunelles | https://github.com/MaevaBrunelles | 12 | 13 | ## Why do you need SEO 14 | 15 | If : 16 | * you need some help to improve the SEO of your Pelican site 17 | * you are not familiar with SEO 18 | * you want a simple site with all SEO basis 19 | 20 | This plugin is for you ! 21 | 22 | SEO comes with two complete features to help you : 23 | * [SEO Report](#seo-report) : generation of an HTML report based on articles analysis. Provides you what is good, what should be improved and what is problematic. Useful if you want to improve the referencement of your articles. 24 | * [SEO Enhancer](#seo-enhancer) : generation of robots indexation hints, HTML tag and structured data. Usefull if you want to control the appareance of your site in the search engine. 25 | 26 | ## Installation 27 | 28 | This plugin can be installed via: 29 | 30 | pip install pelican-seo 31 | 32 | ## Requirements 33 | 34 | SEO needs Beautiful Soup 4 : 35 | 36 | ``` 37 | pip install bs4 38 | ``` 39 | 40 | `SITEURL` Pelican parameter must be defined as features are based on it. 41 | 42 | ## Usage 43 | 44 | You can choose which feature to enable or disable in the plugin settings. Default is `True` for both. 45 | 46 | ``` 47 | # settings.py 48 | SEO_REPORT = True # To enable this feature 49 | SEO_ENHANCER = False # To disable this feature 50 | ``` 51 | 52 | SEO runs when you [generate your site](https://docs.getpelican.com/en/stable/quickstart.html#generate-your-site). If you want to see SEO logs, you should use `--verbose` Pelican argument: 53 | ``` 54 | $ pelican content --verbose 55 | -> SEO plugin initialized 56 | -> SEO plugin - SEO Report: seo_report.html file created 57 | -> SEO plugin - SEO Enhancement: robots.txt file created 58 | -> Writing /output/my-first-review.html 59 | -> SEO plugin - SEO Enhancement: Done for /output/my-first-review.html 60 | Done: Processed 1 articles, 0 drafts, 0 pages, 0 hidden pages and 0 draft pages in 0.17 seconds. 61 | ``` 62 | 63 | ### SEO report 64 | 65 | SEO analyzes all your articles and pages, and generate an HTML SEO report in your Pelican root project : `seo-report.html` 66 | 67 | Example : 68 | 69 | ![SEO report example](docs/seo-report-example.png) 70 | 71 | You can set up a limit for article and page analysis in the plugin settings.py. By default, it's set up to 10 articles and 10 pages. 72 | 73 | ``` 74 | ARTICLES_LIMIT = 10 75 | PAGES_LIMIT = 10 76 | ``` 77 | 78 | The analysis works from the most recents articles or pages to the olders according to the `date` metadata. 79 | Analysis is focused on : 80 | * Page title `` 81 | * Page description `` 82 | * Title content `

` 83 | * Internal link `` 84 | 85 | Which are declared by article and page metadata and content : 86 | 87 | ```markdown 88 | # article.md 89 | Title: Title page 90 | Description: Description page 91 | 92 | # Title content 93 | [Internal link](https://example.com/about.html) 94 | ``` 95 | 96 | Theses elements form the basis of page SEO optimization. Have a look to the report, optimize your articles and pages according to analysis, and restart the process to see if all elements are now in green. 97 | 98 | ### SEO Enhancer 99 | 100 | SEO generates for you : 101 | * HTML enhancements 102 | * Structured data 103 | * Robots file 104 | 105 | #### Robots.txt file 106 | 107 | Indicates to search engines which pages they are allowed to access to or not. By default, all pages are authorized to be explored by all the existing robots. 108 | 109 | ``` 110 | User-agent: * 111 | ``` 112 | 113 | To disallow the exploration or to forbid the indexation of a specific ressource, add theses metadata : 114 | 115 | ``` 116 | Disallow: True 117 | Noindex: True 118 | ``` 119 | 120 | A `robots.txt` file is added at the website root, in the `OUTPUT_PATH` setting (Pelican's default is `output/`). 121 | 122 | ``` 123 | # robots.txt 124 | User-agent: * 125 | 126 | Disallow: example.html 127 | Noindex: other-example.html 128 | ``` 129 | 130 | #### Canonical URL tag 131 | 132 | SEO automatically adds for each article the canonical URL tag in the `` to avoid duplicate content. 133 | 134 | ```html 135 | 136 | ``` 137 | 138 | #### Structured data 139 | 140 | SEO automatically adds structured data in the `` to improve the display of result snippet in search engines. Articles will have both article schema and breadcrumb schema, while pages will only have breadcrumb schema. 141 | Structured data are based on [Schema.org](https://schema.org/) vocabulary, with `JSON-LD` encoding. Note that schemas generated by default are Schema.org compliant, but not automatically Google compliant. Additionnal metadata are required for it. 142 | 143 | ##### Breadcrumb schema 144 | 145 | Based on [BreadcrumbList schema](https://schema.org/BreadcrumbList) : 146 | 147 | ``` 148 | { 149 | "@context": "https://schema.org", 150 | "@type": "BreadcrumbList", 151 | "itemListElement": [ 152 | { 153 | "@type": "ListItem", 154 | "position": :n=1:, 155 | "name": :Sitename:, 156 | "item": :SITEURL: 157 | }, 158 | { 159 | "@type": "ListItem", 160 | "position": :n+1:, 161 | "name": :name:, 162 | "item": :url: 163 | }, 164 | { 165 | "@type": "ListItem", 166 | "position": :n+x:, 167 | "name": :name:, 168 | "item": :url: 169 | } 170 | ] 171 | } 172 | ``` 173 | 174 | Each element of the file path has his `ListItem`, even folders, so it's better to create a user friendly page for those (otherwise you'll get your server's default page). 175 | 176 | ##### Article schema 177 | 178 | Based on [Article schema](https://schema.org/Article) : 179 | 180 | ``` 181 | { 182 | "@context": "https://schema.org", 183 | "@type": "Article", 184 | "author": { 185 | "@type": "Person", 186 | "name": :author: 187 | }, 188 | "publisher": { 189 | "@type": "Organization", 190 | "name": :sitename:, 191 | "logo": { 192 | "@type": "ImageObject", 193 | "url": :logo: 194 | } 195 | }, 196 | "headline": :title:, 197 | "about": :category:, 198 | "datePublished": :publication_date:, 199 | "image": :image: 200 | } 201 | ``` 202 | 203 | `:logo:` and `:image:` fields are not required by Schema.org but they are by Google. 204 | 205 | To fill `:logo:`, add a parameter in the `pelicanconf.py` : 206 | 207 | ``` 208 | # pelicanconf.py 209 | LOGO = 'https://www.example.com/logo.jpg' 210 | ``` 211 | 212 | To fill `:image:`, add a metadata for each Markdown or reST article : 213 | 214 | ``` 215 | # article.md 216 | Image: https://www.example.com/article-image.jpg 217 | 218 | # article.rst 219 | :image: https://www.example.com/article-image.jpg 220 | ``` 221 | 222 | ## Contributing 223 | 224 | Contributions are welcome and much appreciated. Every little bit helps. You can contribute by improving the documentation, adding missing features, and fixing bugs. You can also help out by reviewing and commenting on [existing issues][]. 225 | 226 | To start contributing to this plugin, review the [Contributing to Pelican][] documentation, beginning with the **Contributing Code** section. 227 | 228 | [existing issues]: https://github.com/pelican-plugins/seo/issues 229 | [Contributing to Pelican]: https://docs.getpelican.com/en/latest/contribute.html -------------------------------------------------------------------------------- /pelican/plugins/seo/seo_report/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate a SEO report by calling SEO analyzers for each content. 3 | """ 4 | 5 | import datetime 6 | import logging 7 | import os 8 | 9 | from jinja2 import Environment, FileSystemLoader 10 | 11 | from .seo_analyzer import SEOAnalyzer 12 | 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | class SEOReport(): 18 | """ Generate a SEO report by calling SEO analyzers for each content. """ 19 | 20 | PAGE_TITLE_RECOMMENDED_LENGTH = range(60, 71) 21 | PAGE_DESCRIPTION_RECOMMENDED_LENGTH = range(150, 161) 22 | 23 | def _convert_date(self, date): 24 | """ Get SafeDate Pelican object and return date in string. """ 25 | 26 | date_time = datetime.datetime( 27 | date.year, 28 | date.month, 29 | date.day, 30 | date.hour, 31 | date.minute 32 | ) 33 | return date_time.strftime("%Y-%m-%d %H:%M") 34 | 35 | def launch_analysis(self, document): 36 | """ 37 | Launch SEO analysis for a document (either article or page). 38 | Return a dict with document informations and his analysis. 39 | """ 40 | 41 | seo_analysis = SEOAnalyzer(document) 42 | 43 | date = None 44 | if hasattr(document, "date"): 45 | date = self._convert_date(document.date) 46 | 47 | document_analysis = { 48 | "url": document.url, 49 | "date": date, 50 | "seo_analysis": { 51 | "page_title_analysis": seo_analysis.page_title_analysis, 52 | "page_description_analysis": seo_analysis.page_description_analysis, 53 | "content_title_analysis": seo_analysis.content_title_analysis, 54 | "internal_link_analysis": seo_analysis.internal_link_analysis, 55 | }, 56 | } 57 | 58 | return document_analysis 59 | 60 | def _page_title_report(self, page_title_analysis): 61 | """ 62 | Create report for page title thanks to dedicated analysis. 63 | Return a dict with details. 64 | """ 65 | 66 | report = { 67 | 'title': 'Page title analysis', 68 | 'content': { 69 | 'good': [], 70 | 'to_improve': [], 71 | 'problems': [], 72 | }, 73 | } 74 | 75 | if page_title_analysis.has_page_title: 76 | report['content']['good'].append( 77 | 'You have declared a title. Nice job !' 78 | ) 79 | 80 | if page_title_analysis.page_title_length in \ 81 | SEOReport.PAGE_TITLE_RECOMMENDED_LENGTH: 82 | report['content']['good'].append('Your title has a good longer.') 83 | 84 | elif page_title_analysis.page_title_length < \ 85 | SEOReport.PAGE_TITLE_RECOMMENDED_LENGTH[0]: 86 | report['content']['to_improve'].append( 87 | 'Your title is too short. The recommended length is 70 characters.' 88 | ) 89 | 90 | elif page_title_analysis.page_title_length > \ 91 | SEOReport.PAGE_TITLE_RECOMMENDED_LENGTH[-1]: 92 | report['content']['to_improve'].append( 93 | 'Your title is too long. The maximum recommended \ 94 | length is 70 characters.' 95 | ) 96 | 97 | else: 98 | report['content']['problems'].append( 99 | 'Title is missing. Create one to improve your SEO.' 100 | ) 101 | 102 | return report 103 | 104 | def _page_description_report(self, page_description_analysis): 105 | """ 106 | Create report for page description thanks to dedicated analysis. 107 | Return a dict with details. 108 | """ 109 | 110 | report = { 111 | 'title': 'Page description analysis', 112 | 'content': { 113 | 'good': [], 114 | 'to_improve': [], 115 | 'problems': [], 116 | }, 117 | } 118 | 119 | if page_description_analysis.has_page_description(): 120 | report['content']['good'].append( 121 | 'You have declared a description. Nice job !' 122 | ) 123 | 124 | if page_description_analysis.page_description_length in \ 125 | SEOReport.PAGE_DESCRIPTION_RECOMMENDED_LENGTH: 126 | report['content']['good'].append('Your description has a good longer') 127 | 128 | elif page_description_analysis.page_description_length < \ 129 | SEOReport.PAGE_DESCRIPTION_RECOMMENDED_LENGTH[0]: 130 | report['content']['to_improve'].append( 131 | 'Your description is too short. The minimum recommended \ 132 | length is 150 characters.' 133 | ) 134 | 135 | elif page_description_analysis.page_description_length > \ 136 | SEOReport.PAGE_DESCRIPTION_RECOMMENDED_LENGTH[-1]: 137 | report['content']['to_improve'].append( 138 | 'Your description is too long. The maximum recommended \ 139 | length is 160 characters.' 140 | ) 141 | 142 | else: 143 | report['content']['problems'].append( 144 | 'You need to declare a description to improve SEO.' 145 | ) 146 | 147 | return report 148 | 149 | def _content_title_report(self, content_title_analysis): 150 | """ 151 | Create report for content title thanks to dedicated analysis. 152 | Return a dict with details. 153 | """ 154 | 155 | report = { 156 | 'title': 'Content title analysis', 157 | 'content': { 158 | 'good': [], 159 | 'to_improve': [], 160 | 'problems': [], 161 | }, 162 | } 163 | 164 | if content_title_analysis.has_content_title(): 165 | report['content']['good'].append( 166 | 'You have declared a content title. Nice job !' 167 | ) 168 | 169 | if not content_title_analysis.is_content_title_unique(): 170 | report['content']['to_improve'].append( 171 | 'Your content title must be unique.' 172 | ) 173 | else: 174 | report['content']['problems'].append('You\'re missing a content title.') 175 | 176 | return report 177 | 178 | def _internal_link_report(self, internal_link_analysis): 179 | """ 180 | Create report for internal links thanks to dedicated analysis. 181 | Return a dict with details. 182 | """ 183 | 184 | report = { 185 | 'title': 'Internal link analysis', 186 | 'content': { 187 | 'good': [], 188 | 'to_improve': [], 189 | 'problems': [], 190 | } 191 | } 192 | 193 | internal_link_occurrence = internal_link_analysis.internal_link_occurrence 194 | 195 | if internal_link_analysis.has_internal_link(): 196 | report['content']['good'].append( 197 | 'You\'ve included ' + str(internal_link_occurrence) + ' internal \ 198 | links. Nice job !' 199 | ) 200 | 201 | else: 202 | report['content']['problems'].append( 203 | 'It\'s better to include internal links.' 204 | ) 205 | 206 | return report 207 | 208 | def _launch_report(self, document_analysis): 209 | """ 210 | Get all adocuments analysis and launch dedicated report for each. 211 | Return a dict with all micro-reports. 212 | """ 213 | seo_analysis = document_analysis['seo_analysis'] 214 | 215 | page_title_analysis = seo_analysis['page_title_analysis'] 216 | page_description_analysis = seo_analysis['page_description_analysis'] 217 | content_title_analysis = seo_analysis['content_title_analysis'] 218 | internal_link_analysis = seo_analysis['internal_link_analysis'] 219 | 220 | page_title_report = self._page_title_report( 221 | page_title_analysis=page_title_analysis 222 | ) 223 | page_description_report = self._page_description_report( 224 | page_description_analysis=page_description_analysis 225 | ) 226 | content_title_report = self._content_title_report( 227 | content_title_analysis=content_title_analysis 228 | ) 229 | internal_link_report = self._internal_link_report( 230 | internal_link_analysis=internal_link_analysis 231 | ) 232 | 233 | document_report = [ 234 | page_title_report, 235 | page_description_report, 236 | content_title_report, 237 | internal_link_report, 238 | ] 239 | 240 | return document_report 241 | 242 | def generate(self, site_name, documents_analysis): 243 | """ 244 | Generate the SEO report. 245 | Return HTML file generated by Jinja2. 246 | """ 247 | 248 | seo_reports = [] 249 | for document_analysis in documents_analysis: 250 | 251 | document_report = self._launch_report(document_analysis) 252 | 253 | documents_reports = { 254 | 'url': document_analysis.get('url'), 255 | 'date': document_analysis.get('date'), 256 | 'seo_reports': document_report, 257 | } 258 | 259 | seo_reports.append(documents_reports) 260 | 261 | # Sort documents by publication date, from recent to oldest. 262 | # Document whithout a date are sorted at the end of the report. 263 | seo_reports = sorted( 264 | seo_reports, 265 | key=lambda k: (k['date'] is not None, k['date']), 266 | reverse=True 267 | ) 268 | 269 | # Get Jinja HTML template 270 | plugin_path = os.path.dirname(os.path.realpath(__file__)) 271 | file_loader = FileSystemLoader(plugin_path + '/template') 272 | env = Environment(loader=file_loader) 273 | 274 | template = env.get_template('seo_report.html') 275 | css_file = plugin_path + '/static/seo_report.css' 276 | output = template.render( 277 | site_name=site_name, 278 | seo_reports=seo_reports, 279 | css_file=css_file 280 | ) 281 | 282 | # Create HTML file 283 | with open("seo_report.html", 'w') as report: 284 | report.write(output) 285 | 286 | logger.info( 287 | "SEO plugin - SEO Report: seo_report.html file created" 288 | ) 289 | -------------------------------------------------------------------------------- /pelican/plugins/seo/tests/test_article_schema_creator.py: -------------------------------------------------------------------------------- 1 | """ Units tests for Article Schema Creator. """ 2 | 3 | from seo.seo_enhancer.html_enhancer import ArticleSchemaCreator 4 | 5 | 6 | class TestArticleSchemaCreator(): 7 | """ Unit tests for ArticleSchemaCreator. """ 8 | 9 | def test_create_schema(self, fake_article): 10 | """ Test that create_schema returns a valid schema.org (dict). """ 11 | 12 | article = ArticleSchemaCreator( 13 | author=fake_article.author, 14 | title=fake_article.title, 15 | category=fake_article.category, 16 | date=fake_article.date, 17 | logo=fake_article.settings['LOGO'], 18 | image=fake_article.metadata['image'], 19 | sitename=fake_article.settings['SITENAME'], 20 | ) 21 | 22 | fake_article_schema = article.create_schema() 23 | 24 | assert fake_article_schema['@context'] == "https://schema.org" 25 | assert fake_article_schema['@type'] == "Article" 26 | 27 | assert fake_article_schema['author']['@type'] == 'Person' 28 | assert fake_article_schema['author']['name'] == 'Fake author' 29 | 30 | assert fake_article_schema['publisher']['@type'] == 'Organization' 31 | assert fake_article_schema['publisher']['name'] == 'Fake Site Name' 32 | assert fake_article_schema['publisher']['logo']['@type'] == 'ImageObject' 33 | assert fake_article_schema['publisher']['logo']['url'] == \ 34 | 'https://www.fakesite.com/fake-logo.jpg' 35 | 36 | assert fake_article_schema['headline'] == 'Fake Title' 37 | 38 | assert fake_article_schema['about'] == 'Fake category' 39 | 40 | assert fake_article_schema['datePublished'] == '2019-04-03 23:49' 41 | 42 | assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg' 43 | 44 | def test_create_schema_with_incomplete_article(self, fake_article_missing_elements): 45 | """ 46 | Test that create_schema returns a schema.org, 47 | even if article is incomplete. 48 | """ 49 | 50 | article = ArticleSchemaCreator( 51 | author=fake_article_missing_elements.author, 52 | title=fake_article_missing_elements.title, 53 | category=fake_article_missing_elements.category, 54 | date='', 55 | logo=fake_article_missing_elements.settings['LOGO'], 56 | image=fake_article_missing_elements.metadata['image'], 57 | sitename=fake_article_missing_elements.settings['SITENAME'], 58 | ) 59 | 60 | fake_article_schema = article.create_schema() 61 | 62 | assert fake_article_schema['@context'] == "https://schema.org" 63 | assert fake_article_schema['@type'] == "Article" 64 | 65 | assert 'author' not in fake_article_schema 66 | assert 'publisher' not in fake_article_schema 67 | assert 'headline' not in fake_article_schema 68 | assert 'about' not in fake_article_schema 69 | assert 'datePublished' not in fake_article_schema 70 | assert 'image' not in fake_article_schema 71 | 72 | def test_create_schema_with_author_missing(self, fake_article, 73 | fake_article_missing_elements): 74 | """ Test that create_schema returns a schema.org, with author missing. """ 75 | 76 | article = ArticleSchemaCreator( 77 | author=fake_article_missing_elements.author, 78 | title=fake_article.title, 79 | category=fake_article.category, 80 | date=fake_article.date, 81 | logo=fake_article.settings['LOGO'], 82 | image=fake_article.metadata['image'], 83 | sitename=fake_article.settings['SITENAME'], 84 | ) 85 | 86 | fake_article_schema = article.create_schema() 87 | 88 | assert 'Fake author' not in fake_article_schema 89 | 90 | assert fake_article_schema['publisher']['name'] == 'Fake Site Name' 91 | assert fake_article_schema['publisher']['logo']['url'] == \ 92 | 'https://www.fakesite.com/fake-logo.jpg' 93 | assert fake_article_schema['headline'] == 'Fake Title' 94 | assert fake_article_schema['about'] == 'Fake category' 95 | assert fake_article_schema['datePublished'] == '2019-04-03 23:49' 96 | assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg' 97 | 98 | def test_create_schema_with_title_missing(self, fake_article, 99 | fake_article_missing_elements): 100 | """ Test that create_schema returns a schema.org, with title missing. """ 101 | 102 | article = ArticleSchemaCreator( 103 | author=fake_article.author, 104 | title=fake_article_missing_elements.title, 105 | category=fake_article.category, 106 | date=fake_article.date, 107 | logo=fake_article.settings['LOGO'], 108 | image=fake_article.metadata['image'], 109 | sitename=fake_article.settings['SITENAME'], 110 | ) 111 | 112 | fake_article_schema = article.create_schema() 113 | 114 | assert 'Fake Title' not in fake_article_schema 115 | 116 | assert fake_article_schema['author']['name'] == 'Fake author' 117 | assert fake_article_schema['publisher']['name'] == 'Fake Site Name' 118 | assert fake_article_schema['publisher']['logo']['url'] == \ 119 | 'https://www.fakesite.com/fake-logo.jpg' 120 | assert fake_article_schema['about'] == 'Fake category' 121 | assert fake_article_schema['datePublished'] == '2019-04-03 23:49' 122 | assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg' 123 | 124 | def test_create_schema_with_category_missing(self, fake_article, 125 | fake_article_missing_elements): 126 | """ Test that create_schema returns a schema.org, with category missing. """ 127 | 128 | article = ArticleSchemaCreator( 129 | author=fake_article.author, 130 | title=fake_article.title, 131 | category=fake_article_missing_elements.category, 132 | date=fake_article.date, 133 | logo=fake_article.settings['LOGO'], 134 | image=fake_article.metadata['image'], 135 | sitename=fake_article.settings['SITENAME'], 136 | ) 137 | 138 | fake_article_schema = article.create_schema() 139 | 140 | assert 'Fake category' not in fake_article_schema 141 | 142 | assert fake_article_schema['author']['name'] == 'Fake author' 143 | assert fake_article_schema['publisher']['name'] == 'Fake Site Name' 144 | assert fake_article_schema['publisher']['logo']['url'] == \ 145 | 'https://www.fakesite.com/fake-logo.jpg' 146 | assert fake_article_schema['headline'] == 'Fake Title' 147 | assert fake_article_schema['datePublished'] == '2019-04-03 23:49' 148 | assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg' 149 | 150 | def test_create_schema_with_date_missing(self, fake_article, 151 | fake_article_missing_elements): 152 | """ Test that create_schema returns a schema.org, with date missing. """ 153 | 154 | article = ArticleSchemaCreator( 155 | author=fake_article.author, 156 | title=fake_article.title, 157 | category=fake_article.category, 158 | date='', 159 | logo=fake_article.settings['LOGO'], 160 | image=fake_article.metadata['image'], 161 | sitename=fake_article.settings['SITENAME'], 162 | ) 163 | 164 | fake_article_schema = article.create_schema() 165 | 166 | assert '2019-04-03 23:49' not in fake_article_schema 167 | 168 | assert fake_article_schema['author']['name'] == 'Fake author' 169 | assert fake_article_schema['publisher']['name'] == 'Fake Site Name' 170 | assert fake_article_schema['publisher']['logo']['url'] == \ 171 | 'https://www.fakesite.com/fake-logo.jpg' 172 | assert fake_article_schema['headline'] == 'Fake Title' 173 | assert fake_article_schema['about'] == 'Fake category' 174 | assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg' 175 | 176 | def test_create_schema_with_logo_missing(self, fake_article, 177 | fake_article_missing_elements): 178 | """ Test that create_schema returns a schema.org, with logo missing. """ 179 | 180 | article = ArticleSchemaCreator( 181 | author=fake_article.author, 182 | title=fake_article.title, 183 | category=fake_article.category, 184 | date=fake_article.date, 185 | logo=fake_article_missing_elements.settings['LOGO'], 186 | image=fake_article.metadata['image'], 187 | sitename=fake_article.settings['SITENAME'], 188 | ) 189 | 190 | fake_article_schema = article.create_schema() 191 | 192 | assert 'https://www.fakesite.com/fake-logo.jpg' not in fake_article_schema 193 | 194 | assert fake_article_schema['author']['name'] == 'Fake author' 195 | assert fake_article_schema['publisher']['name'] == 'Fake Site Name' 196 | assert fake_article_schema['headline'] == 'Fake Title' 197 | assert fake_article_schema['about'] == 'Fake category' 198 | assert fake_article_schema['datePublished'] == '2019-04-03 23:49' 199 | assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg' 200 | 201 | def test_create_schema_with_image_missing(self, fake_article, 202 | fake_article_missing_elements): 203 | """ Test that create_schema returns a schema.org, with image missing. """ 204 | 205 | article = ArticleSchemaCreator( 206 | author=fake_article.author, 207 | title=fake_article.title, 208 | category=fake_article.category, 209 | date=fake_article.date, 210 | logo=fake_article.settings['LOGO'], 211 | image=fake_article_missing_elements.metadata['image'], 212 | sitename=fake_article.settings['SITENAME'], 213 | ) 214 | 215 | fake_article_schema = article.create_schema() 216 | 217 | assert 'https://www.fakesite.com/fake-image.jpg' not in fake_article_schema 218 | 219 | assert fake_article_schema['author']['name'] == 'Fake author' 220 | assert fake_article_schema['publisher']['name'] == 'Fake Site Name' 221 | assert fake_article_schema['publisher']['logo']['url'] == \ 222 | 'https://www.fakesite.com/fake-logo.jpg' 223 | assert fake_article_schema['headline'] == 'Fake Title' 224 | assert fake_article_schema['about'] == 'Fake category' 225 | assert fake_article_schema['datePublished'] == '2019-04-03 23:49' 226 | 227 | def test_create_schema_with_sitename_missing(self, fake_article, 228 | fake_article_missing_elements): 229 | """ Test that create_schema returns a schema.org, with sitename missing. """ 230 | 231 | article = ArticleSchemaCreator( 232 | author=fake_article.author, 233 | title=fake_article.title, 234 | category=fake_article.category, 235 | date=fake_article.date, 236 | logo=fake_article.settings['LOGO'], 237 | image=fake_article.metadata['image'], 238 | sitename=fake_article_missing_elements.settings['SITENAME'], 239 | ) 240 | 241 | fake_article_schema = article.create_schema() 242 | 243 | assert 'Fake Site Name' not in fake_article_schema 244 | assert 'logo' not in fake_article_schema 245 | 246 | assert fake_article_schema['author']['name'] == 'Fake author' 247 | assert fake_article_schema['headline'] == 'Fake Title' 248 | assert fake_article_schema['about'] == 'Fake category' 249 | assert fake_article_schema['datePublished'] == '2019-04-03 23:49' 250 | assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg' 251 | --------------------------------------------------------------------------------