├── .gitignore
├── pelican
    └── plugins
    │   └── seo
    │       ├── tests
    │           ├── __init__.py
    │           ├── test_canonical_url_creator.py
    │           ├── test_page_title_analyzer.py
    │           ├── test_robots_file_creator.py
    │           ├── test_page_description_analyzer.py
    │           ├── test_internal_link_analyzer.py
    │           ├── test_content_title_analyzer.py
    │           ├── test_seo_report.py
    │           ├── test_seo_enhancer.py
    │           ├── test_breadcrumb_schema_creator.py
    │           ├── conftest.py
    │           └── test_article_schema_creator.py
    │       ├── __init__.py
    │       ├── settings.py
    │       ├── seo_enhancer
    │           ├── html_enhancer
    │           │   ├── canonical_url_creator.py
    │           │   ├── __init__.py
    │           │   ├── article_schema_creator.py
    │           │   └── breadcrumb_schema_creator.py
    │           ├── robots_file_creator.py
    │           └── __init__.py
    │       ├── seo_report
    │           ├── seo_analyzer
    │           │   ├── page_title_analyzer.py
    │           │   ├── page_description_analyzer.py
    │           │   ├── content_title_analyzer.py
    │           │   ├── internal_link_analyzer.py
    │           │   └── __init__.py
    │           ├── static
    │           │   └── seo_report.css
    │           ├── template
    │           │   └── seo_report.html
    │           └── __init__.py
    │       └── seo.py
├── tox.ini
├── docs
    └── seo-report-example.png
├── .travis.yml
├── .editorconfig
├── CONTRIBUTING.md
├── .pre-commit-config.yaml
├── tasks.py
├── pyproject.toml
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
1 | .vscode/
2 | *__pycache__


--------------------------------------------------------------------------------
/pelican/plugins/seo/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/__init__.py:
--------------------------------------------------------------------------------
1 | from .seo import *  # NOQA
2 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [flake8]
2 | max-line-length = 88
3 | ignore = E203, E266, E501, W503
4 | 


--------------------------------------------------------------------------------
/docs/seo-report-example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MaevaBrunelles/pelican-seo/HEAD/docs/seo-report-example.png


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - "3.7"
 5 | 
 6 | install:
 7 |   - pip3 install pelican pytest bs4
 8 | 
 9 | script:
10 |   - pytest
11 | 
12 | branches:
13 |   only:
14 |     - master


--------------------------------------------------------------------------------
/pelican/plugins/seo/settings.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Plugin settings : activate or deactivate SEO features
 3 | """
 4 | 
 5 | SEO_REPORT = True
 6 | SEO_ENHANCER = True
 7 | 
 8 | ARTICLES_LIMIT = 10
 9 | PAGES_LIMIT = 10
10 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | charset = utf-8
 5 | end_of_line = lf
 6 | indent_size = 4
 7 | indent_style = space
 8 | insert_final_newline = true
 9 | trim_trailing_whitespace = true
10 | 
11 | [*.py]
12 | max_line_length = 88
13 | 
14 | [*.yml]
15 | indent_size = 2
16 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_enhancer/html_enhancer/canonical_url_creator.py:
--------------------------------------------------------------------------------
 1 | """ Canonical URL creator. """
 2 | 
 3 | import os
 4 | 
 5 | 
 6 | class CanonicalURLCreator():
 7 |     """ Canonical URL creator. """
 8 | 
 9 |     def __init__(self, fileurl, siteurl):
10 |         self._fileurl = fileurl
11 |         self._siteurl = siteurl
12 | 
13 |     def create_url(self):
14 |         """ Join site URL and file URL to create canonical link. """
15 | 
16 |         canonical_url = os.path.join(self._siteurl, self._fileurl)
17 |         return canonical_url
18 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | Contributing
 2 | ============
 3 | 
 4 | Contributions are welcome and much appreciated. Every little bit helps. You can contribute by improving the documentation, adding missing features, and fixing bugs. You can also help out by reviewing and commenting on [existing issues][].
 5 | 
 6 | To start contributing to this plugin, review the [Contributing to Pelican][] documentation, beginning with the **Contributing Code** section.
 7 | 
 8 | [existing issues]: https://github.com/pelican-plugins/seo/issues
 9 | [Contributing to Pelican]: https://docs.getpelican.com/en/latest/contribute.html
10 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_report/seo_analyzer/page_title_analyzer.py:
--------------------------------------------------------------------------------
 1 | """ Analyze the page title. """
 2 | 
 3 | 
 4 | class PageTitleAnalyzer():
 5 |     """ Analyze the page title. """
 6 | 
 7 |     def __init__(self, title):
 8 |         self._title = title
 9 | 
10 |     def has_page_title(self):
11 |         """ Return True is there is a page title. """
12 | 
13 |         if not self._title:
14 |             return False
15 | 
16 |         return True
17 | 
18 |     @property
19 |     def page_title_length(self):
20 |         """ Return page title length. """
21 | 
22 |         return len(self._title)
23 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_enhancer/robots_file_creator.py:
--------------------------------------------------------------------------------
 1 | """ Return elements to build the robots.txt file. """
 2 | 
 3 | 
 4 | class RobotsFileCreator():
 5 |     """ Get robots rules from article metadata. Return them. """
 6 | 
 7 |     def __init__(self, metadata):
 8 |         self.metadata_noindex = metadata.get('noindex')
 9 |         self.metadata_disallow = metadata.get('disallow')
10 | 
11 |     @property
12 |     def get_noindex(self):
13 |         """ Return noindex rules. """
14 | 
15 |         return self.metadata_noindex
16 | 
17 |     @property
18 |     def get_disallow(self):
19 |         """ Return disallow rules. """
20 | 
21 |         return self.metadata_disallow
22 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_report/seo_analyzer/page_description_analyzer.py:
--------------------------------------------------------------------------------
 1 | """ Analyze the page description. """
 2 | 
 3 | 
 4 | class PageDescriptionAnalyzer():
 5 |     """ Analyze the page description. """
 6 | 
 7 |     def __init__(self, description):
 8 |         self._description = description
 9 | 
10 |     def has_page_description(self):
11 |         """ Return True if there is a page description. """
12 | 
13 |         if not self._description:
14 |             return False
15 | 
16 |         return True
17 | 
18 |     @property
19 |     def page_description_length(self):
20 |         """ Return page description length. """
21 | 
22 |         return len(self._description)
23 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/tests/test_canonical_url_creator.py:
--------------------------------------------------------------------------------
 1 | """ Units tests for Canonical URL Creator. """
 2 | 
 3 | from seo.seo_enhancer.html_enhancer import CanonicalURLCreator
 4 | 
 5 | 
 6 | class TestCanonicalURLCreator():
 7 |     """ Unit tests for CanonicalURLCreator. """
 8 | 
 9 |     def test_create_url(self, fake_article):
10 |         """ Test if create_url() returns the join of site URL and article URL. """
11 | 
12 |         canonical = CanonicalURLCreator(
13 |             fileurl=fake_article.url,
14 |             siteurl=fake_article.settings['SITEURL']
15 |         )
16 |         canonical_link = canonical.create_url()
17 | 
18 |         assert canonical_link == "fakesite.com/fake-title.html"
19 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_report/seo_analyzer/content_title_analyzer.py:
--------------------------------------------------------------------------------
 1 | """ Analyze the content title. """
 2 | 
 3 | from bs4 import BeautifulSoup
 4 | 
 5 | 
 6 | class ContentTitleAnalyzer():
 7 |     """ Analyze the content title. """
 8 | 
 9 |     def __init__(self, content):
10 |         self._soup = BeautifulSoup(content, features="html.parser")
11 | 
12 |     def has_content_title(self):
13 |         """ Return True is there is a content title. """
14 | 
15 |         if not self._soup.h1:
16 |             return False
17 | 
18 |         return True
19 | 
20 |     def is_content_title_unique(self):
21 |         """ Return True if content title is unique. """
22 | 
23 |         content_titles = self._soup.find_all('h1')
24 | 
25 |         if len(content_titles) > 1:
26 |             return False
27 | 
28 |         return True
29 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com/hooks.html for info on hooks
 2 | repos:
 3 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 4 |     rev: v2.3.0
 5 |     hooks:
 6 |       - id: check-added-large-files
 7 |       - id: check-ast
 8 |       - id: check-toml
 9 |       - id: check-yaml
10 |       - id: debug-statements
11 |       - id: detect-private-key
12 |       - id: end-of-file-fixer
13 |       - id: trailing-whitespace
14 | 
15 |   - repo: https://github.com/psf/black
16 |     rev: 19.10b0
17 |     hooks:
18 |       - id: black
19 | 
20 |   - repo: https://gitlab.com/pycqa/flake8
21 |     rev: 3.7.9
22 |     hooks:
23 |       - id: flake8
24 |         args: [--max-line-length=88]
25 |         language_version: python3.7
26 | 
27 |   - repo: https://github.com/pre-commit/mirrors-isort
28 |     rev: v4.3.21
29 |     hooks:
30 |       - id: isort
31 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_report/seo_analyzer/internal_link_analyzer.py:
--------------------------------------------------------------------------------
 1 | """ Analyze the internal link of an article. """
 2 | 
 3 | from bs4 import BeautifulSoup
 4 | 
 5 | 
 6 | class InternalLinkAnalyzer():
 7 |     """ Analyze internal link of an article. """
 8 | 
 9 |     def __init__(self, content, siteurl):
10 |         self._soup = BeautifulSoup(content, features="html.parser")
11 |         self._links = self._soup.find_all('a')
12 |         self._siteurl = siteurl
13 | 
14 |     def has_internal_link(self):
15 |         """
16 |         Return True is there is a internal link.
17 |         Need to have SITEURL parameter declared.
18 |         """
19 | 
20 |         if not self._links:
21 |             return False
22 | 
23 |         for link in self._links:
24 |             if self._siteurl in link['href']:
25 |                 return True
26 | 
27 |         return False
28 | 
29 |     @property
30 |     def internal_link_occurrence(self):
31 |         """ Return the internal link occurrence. """
32 | 
33 |         return len([link for link in self._links if self._siteurl in link['href']])
34 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/tests/test_page_title_analyzer.py:
--------------------------------------------------------------------------------
 1 | """ Unit tests for Page Title Analyzer. """
 2 | 
 3 | from seo.seo_report.seo_analyzer import PageTitleAnalyzer
 4 | 
 5 | 
 6 | class TestPageTitleAnalyzer():
 7 |     """ Units tests for PageTitleAnalyzer. """
 8 | 
 9 |     def test_article_has_page_title(self, fake_article):
10 |         """ Test if has_page_title returns True if fake_article has a title. """
11 | 
12 |         fake_analysis = PageTitleAnalyzer(title=fake_article.title)
13 |         assert fake_analysis.has_page_title()
14 | 
15 |     def test_article_has_no_page_title(self, fake_article_missing_elements):
16 |         """ Test if has_page_title returns False if fake_article has no title. """
17 | 
18 |         fake_analysis = PageTitleAnalyzer(title=fake_article_missing_elements.title)
19 |         assert not fake_analysis.has_page_title()
20 | 
21 |     def test_article_page_title_length(self, fake_article):
22 |         """ Test if page_title_length returns the good title length. """
23 | 
24 |         fake_analysis = PageTitleAnalyzer(title=fake_article.title)
25 |         assert fake_analysis.page_title_length == len(fake_article.title)
26 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_report/seo_analyzer/__init__.py:
--------------------------------------------------------------------------------
 1 | """ Launch micro SEO analysis. """
 2 | 
 3 | from .page_title_analyzer import PageTitleAnalyzer
 4 | from .page_description_analyzer import PageDescriptionAnalyzer
 5 | from .content_title_analyzer import ContentTitleAnalyzer
 6 | from .internal_link_analyzer import InternalLinkAnalyzer
 7 | 
 8 | 
 9 | class SEOAnalyzer():
10 |     """ Instancy all micro SEO analyzers. """
11 | 
12 |     def __init__(self, article):
13 |         self._title = getattr(article, 'title', None)
14 |         self._description = getattr(article, 'description', None)
15 |         self._content = getattr(article, 'content', None)
16 |         self._settings = getattr(article, 'settings', None)
17 | 
18 |         self.page_title_analysis = PageTitleAnalyzer(title=self._title)
19 |         self.page_description_analysis = PageDescriptionAnalyzer(
20 |             description=self._description
21 |         )
22 |         self.content_title_analysis = ContentTitleAnalyzer(content=self._content)
23 |         self.internal_link_analysis = InternalLinkAnalyzer(
24 |             content=self._content,
25 |             siteurl=self._settings['SITEURL']
26 |         )
27 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_report/static/seo_report.css:
--------------------------------------------------------------------------------
 1 | html, body{
 2 |     margin: 0;
 3 |     padding: 0;
 4 |     font-family: Arial, Helvetica, sans-serif;
 5 | }
 6 | 
 7 | header{
 8 |     background-color: #373e49;
 9 |     width: 100%;
10 |     padding: 20px;
11 | }
12 | 
13 | section{
14 |     padding: 40px;
15 |     background-color: #f2f4f7;
16 | }
17 | 
18 | article{
19 |     padding: 20px;
20 |     background-color: white;
21 |     margin-bottom: 30px;
22 | }
23 | 
24 | article:last-child{
25 |     margin-bottom: 0px;
26 | }
27 | 
28 | footer{
29 |     background-color: #373e49;
30 |     text-align: center;
31 |     padding: 20px;
32 |     color: white;
33 | }
34 | 
35 | footer a{
36 |     color: white;
37 | }
38 | 
39 | h1{
40 |     text-align: center;
41 |     color: white;
42 | }
43 | 
44 | h2{
45 |     color: #373e49;
46 |     padding-bottom: 5px;
47 | }
48 | 
49 | .article-header{
50 |     display:flex;
51 |     justify-content:space-between;
52 |     align-items:baseline;
53 |     border-bottom: 2px solid #373e49;
54 | }
55 | 
56 | .green{
57 |     color:green;
58 | }
59 | 
60 | .orange{
61 |     color: orange;
62 | }
63 | 
64 | .red{
65 |     color: red;
66 | }
67 | 
68 | table{
69 |     border-collapse: collapse;
70 | }
71 | 
72 | td, th{
73 |     border: 1px solid black;
74 |     padding: 10px;
75 | }
76 | 
77 | th{
78 |     text-align: left;
79 | }


--------------------------------------------------------------------------------
/pelican/plugins/seo/tests/test_robots_file_creator.py:
--------------------------------------------------------------------------------
 1 | """ Units tests for Robots File Creator. """
 2 | 
 3 | from seo.seo_enhancer.robots_file_creator import RobotsFileCreator
 4 | 
 5 | 
 6 | class TestRobotsFileCreator():
 7 |     """ Units tests for RobotsFileCreator. """
 8 | 
 9 |     def test_get_all_robots_rules(self, fake_article):
10 |         """
11 |         Test if get_noindex and get_disallow return True
12 |         if the article has specific rules.
13 |         """
14 | 
15 |         fake_robots = RobotsFileCreator(fake_article.metadata)
16 |         assert fake_robots.get_noindex
17 |         assert fake_robots.get_disallow
18 | 
19 |     def test_get_one_robots_rule(self, fake_article_missing_elements):
20 |         """
21 |         Test if only get_noindex or get_disallow return True
22 |         if the article has one specific rule.
23 |         """
24 | 
25 |         fake_robots = RobotsFileCreator(fake_article_missing_elements.metadata)
26 |         assert fake_robots.get_noindex
27 |         assert not fake_robots.get_disallow
28 | 
29 |     def test_get_none_robots_rule(self, fake_article_multiple_elements):
30 |         """
31 |         Test if get_noindex and get_disallow return None
32 |         if the article has no specific rules.
33 |         """
34 | 
35 |         fake_robots = RobotsFileCreator(fake_article_multiple_elements.metadata)
36 |         assert not fake_robots.get_noindex
37 |         assert not fake_robots.get_disallow
38 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/tests/test_page_description_analyzer.py:
--------------------------------------------------------------------------------
 1 | """ Unit tests for Page Description Analyzer. """
 2 | 
 3 | from seo.seo_report.seo_analyzer import PageDescriptionAnalyzer
 4 | 
 5 | 
 6 | class TestPageDescriptionAnalyzer():
 7 |     """ Units tests for PageDescriptionAnalyzer. """
 8 | 
 9 |     def test_article_has_page_description(self, fake_article):
10 |         """
11 |         Test if has_page_description returns True
12 |         if fake_article has a description.
13 |         """
14 | 
15 |         fake_analysis = PageDescriptionAnalyzer(description=fake_article.description)
16 |         assert fake_analysis.has_page_description()
17 | 
18 |     def test_article_has_no_page_description(self, fake_article_missing_elements):
19 |         """
20 |         Test if has_page_description returns False
21 |         if fake_article has no description.
22 |         """
23 | 
24 |         fake_analysis = PageDescriptionAnalyzer(
25 |             description=fake_article_missing_elements.description
26 |         )
27 |         assert not fake_analysis.has_page_description()
28 | 
29 |     def test_article_page_description_length(self, fake_article):
30 |         """
31 |         Test if page_description_length returns
32 |         the good description length.
33 |         """
34 | 
35 |         fake_analysis = PageDescriptionAnalyzer(description=fake_article.description)
36 |         assert fake_analysis.page_description_length == len(fake_article.description)
37 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_enhancer/html_enhancer/__init__.py:
--------------------------------------------------------------------------------
 1 | """ HTML Enhancer : get instances of HTML enhancements. """
 2 | 
 3 | from .canonical_url_creator import CanonicalURLCreator
 4 | from .article_schema_creator import ArticleSchemaCreator
 5 | from .breadcrumb_schema_creator import BreadcrumbSchemaCreator
 6 | 
 7 | 
 8 | class HTMLEnhancer():
 9 |     """ HTML Enhancer : get instances of HTML enhancements. """
10 | 
11 |     def __init__(self, file, output_path, path):
12 |         _settings = getattr(file, 'settings', None)
13 |         _fileurl = getattr(file, 'url', None)
14 |         _author = getattr(file, 'author', None)
15 |         _date = getattr(file, 'date', None)
16 |         _title = getattr(file, 'title', None)
17 |         _category = getattr(file, 'category', None)
18 |         _image = getattr(file, 'image', None)
19 | 
20 |         self.article_schema = ArticleSchemaCreator(
21 |             author=_author,
22 |             title=_title,
23 |             category=_category,
24 |             date=_date,
25 |             logo=_settings.get('LOGO'),
26 |             image=_image,
27 |             sitename=_settings.get('SITENAME'),
28 |         )
29 | 
30 |         self.canonical_link = CanonicalURLCreator(
31 |             siteurl=_settings.get('SITEURL'),
32 |             fileurl=_fileurl,
33 |         )
34 | 
35 |         self.breadcrumb_schema = BreadcrumbSchemaCreator(
36 |             output_path=output_path,
37 |             path=path,
38 |             sitename=_settings.get('SITENAME'),
39 |             siteurl=_settings.get('SITEURL'),
40 |         )
41 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/tests/test_internal_link_analyzer.py:
--------------------------------------------------------------------------------
 1 | """ Unit tests for Internal Link Analyzer. """
 2 | 
 3 | from seo.seo_report.seo_analyzer import InternalLinkAnalyzer
 4 | 
 5 | 
 6 | class TestInternalLinkAnalyzer():
 7 |     """ Units tests for InternalLinkAnalyzer. """
 8 | 
 9 |     def test_article_has_internal_link(self, fake_article):
10 |         """
11 |         Test if has_internal_link returns True
12 |         if fake_article has at least one internal link.
13 |         """
14 | 
15 |         fake_analysis = InternalLinkAnalyzer(
16 |             content=fake_article.content,
17 |             siteurl=fake_article.settings['SITEURL'],
18 |         )
19 |         assert fake_analysis.has_internal_link()
20 | 
21 |     def test_article_has_no_internal_link(self, fake_article_missing_elements):
22 |         """
23 |         Test if has_internal_link returns False
24 |         if fake_article has no internal link.
25 |         """
26 | 
27 |         fake_analysis = InternalLinkAnalyzer(
28 |             content=fake_article_missing_elements.content,
29 |             siteurl=fake_article_missing_elements.settings['SITEURL'],
30 |         )
31 |         assert not fake_analysis.has_internal_link()
32 | 
33 |     def test_article_internal_link_occurrence(self, fake_article_multiple_elements):
34 |         """ Test if internal_link_occurrence returns the rigth length. """
35 | 
36 |         fake_analysis = InternalLinkAnalyzer(
37 |             content=fake_article_multiple_elements.content,
38 |             siteurl=fake_article_multiple_elements.settings['SITEURL'],
39 |         )
40 |         assert fake_analysis.internal_link_occurrence == 2
41 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/tests/test_content_title_analyzer.py:
--------------------------------------------------------------------------------
 1 | """ Unit tests for Content Title Analyzer. """
 2 | 
 3 | from seo.seo_report.seo_analyzer import ContentTitleAnalyzer
 4 | 
 5 | 
 6 | class TestContentTitleAnalyzer():
 7 |     """ Units tests for ContentTitleAnalyzer. """
 8 | 
 9 |     def test_article_has_content_title(self, fake_article):
10 |         """
11 |         Test if has_content_title returns True
12 |         if fake_article has a content title.
13 |         """
14 | 
15 |         fake_analysis = ContentTitleAnalyzer(content=fake_article.content)
16 |         assert fake_analysis.has_content_title()
17 | 
18 |     def test_article_has_no_content_title(self, fake_article_missing_elements):
19 |         """
20 |         Test if has_content_title returns False
21 |         if fake_article has no content title.
22 |         """
23 | 
24 |         fake_analysis = ContentTitleAnalyzer(
25 |             content=fake_article_missing_elements.content
26 |         )
27 |         assert not fake_analysis.has_content_title()
28 | 
29 |     def test_article_content_title_is_unique(self, fake_article):
30 |         """ Test if is_content_title_unique returns True if content title is unique. """
31 | 
32 |         fake_analysis = ContentTitleAnalyzer(content=fake_article.content)
33 |         assert fake_analysis.is_content_title_unique
34 | 
35 |     def test_article_content_title_is_not_unique(self, fake_article_multiple_elements):
36 |         """
37 |         Test if is_content_title_unique returns False
38 |         if content title is not unique.
39 |         """
40 | 
41 |         fake_analysis = ContentTitleAnalyzer(
42 |             content=fake_article_multiple_elements.content
43 |         )
44 |         assert not fake_analysis.is_content_title_unique()
45 | 


--------------------------------------------------------------------------------
/tasks.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from pathlib import Path
 3 | from shutil import which
 4 | 
 5 | from invoke import task
 6 | 
 7 | PKG_NAME = "seo"
 8 | PKG_PATH = Path(f"pelican/plugins/{PKG_NAME}")
 9 | ACTIVE_VENV = os.environ.get("VIRTUAL_ENV", None)
10 | VENV_HOME = Path(os.environ.get("WORKON_HOME", "~/.local/share/virtualenvs"))
11 | VENV_PATH = Path(ACTIVE_VENV) if ACTIVE_VENV else (VENV_HOME / PKG_NAME)
12 | VENV = str(VENV_PATH.expanduser())
13 | 
14 | TOOLS = ["poetry", "pre-commit"]
15 | POETRY = which("poetry") if which("poetry") else (VENV / Path("bin") / "poetry")
16 | PRECOMMIT = (
17 |     which("pre-commit") if which("pre-commit") else (VENV / Path("bin") / "pre-commit")
18 | )
19 | 
20 | 
21 | @task
22 | def tests(c):
23 |     """Run the test suite"""
24 |     c.run(f"{VENV}/bin/pytest", pty=True)
25 | 
26 | 
27 | @task
28 | def black(c, check=False, diff=False):
29 |     """Run Black auto-formatter, optionally with --check or --diff"""
30 |     check_flag, diff_flag = "", ""
31 |     if check:
32 |         check_flag = "--check"
33 |     if diff:
34 |         diff_flag = "--diff"
35 |     c.run(f"{VENV}/bin/black {check_flag} {diff_flag} {PKG_PATH} tasks.py")
36 | 
37 | 
38 | @task
39 | def isort(c, check=False, diff=False):
40 |     check_flag, diff_flag = "", ""
41 |     if check:
42 |         check_flag = "-c"
43 |     if diff:
44 |         diff_flag = "--diff"
45 |     c.run(
46 |         f"{VENV}/bin/isort {check_flag} {diff_flag} --recursive {PKG_PATH}/* tasks.py"
47 |     )
48 | 
49 | 
50 | @task
51 | def flake8(c):
52 |     c.run(f"{VENV}/bin/flake8 {PKG_PATH} tasks.py")
53 | 
54 | 
55 | @task
56 | def lint(c):
57 |     isort(c, check=True)
58 |     black(c, check=True)
59 |     flake8(c)
60 | 
61 | 
62 | @task
63 | def tools(c):
64 |     """Install tools in the virtual environment if not already on PATH"""
65 |     for tool in TOOLS:
66 |         if not which(tool):
67 |             c.run(f"{VENV}/bin/pip install {tool}")
68 | 
69 | 
70 | @task
71 | def precommit(c):
72 |     """Install pre-commit hooks to .git/hooks/pre-commit"""
73 |     c.run(f"{PRECOMMIT} install")
74 | 
75 | 
76 | @task
77 | def setup(c):
78 |     c.run(f"{VENV}/bin/pip install -U pip")
79 |     tools(c)
80 |     c.run(f"{POETRY} install")
81 |     precommit(c)
82 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "pelican-seo"
 3 | version = "0.1.0"
 4 | description = "This plugin helps you improve your Pelican site SEO (Search Engine Optimization) to reach the top positions on search engines."
 5 | authors = ["Maëva Brunelles <contact@tremacorp.fr>"]
 6 | license = "AGPL-3.0"
 7 | readme = "README.md"
 8 | keywords = ["pelican", "plugin", "seo"]
 9 | repository = "https://github.com/pelican-plugins/seo"
10 | packages = [
11 |     { include = "pelican" },
12 | ]
13 | 
14 | classifiers = [
15 |     "Development Status :: 3 - Alpha",
16 |     "Environment :: Console",
17 |     "Framework :: Pelican",
18 |     "Framework :: Pelican :: Plugins",
19 |     "Intended Audience :: End Users/Desktop",
20 |     "License :: OSI Approved :: GNU Affero General Public License v3",
21 |     "Operating System :: OS Independent",
22 |     "Programming Language :: Python",
23 |     "Programming Language :: Python :: 3",
24 |     "Programming Language :: Python :: 3.6",
25 |     "Programming Language :: Python :: 3.7",
26 |     "Programming Language :: Python :: 3.8",
27 |     "Topic :: Internet :: WWW/HTTP",
28 |     "Topic :: Software Development :: Libraries :: Python Modules",
29 | ]
30 | 
31 | [tool.poetry.urls]
32 | "Documentation" = "https://docs.getpelican.com/"
33 | "Funding" = "https://donate.getpelican.com/"
34 | "Source" = "https://github.com/pelican-plugins/seo"
35 | "Tracker" = "https://github.com/pelican-plugins/seo/issues"
36 | 
37 | [tool.poetry.dependencies]
38 | python = "3.7"
39 | pelican = "^4.2"
40 | markdown = {version = "^3.1.1",optional = true}
41 | 
42 | [tool.poetry.dev-dependencies]
43 | black = {version = "^19.10b0",allow-prereleases = true}
44 | flake8 = "^3.7"
45 | flake8-black = "^0.1.0"
46 | invoke = "^1.3"
47 | isort = "^4.3"
48 | livereload = "^2.6"
49 | markdown = "^3.1.1"
50 | pytest = "^5.0"
51 | pytest-cov = "^2.7"
52 | pytest-pythonpath = "^0.7.3"
53 | pytest-sugar = "^0.9.2"
54 | Werkzeug = "^0.15.5"
55 | 
56 | [tool.poetry.extras]
57 | markdown = ["markdown"]
58 | 
59 | [tool.isort]
60 | # Maintain compatibility with Black
61 | combine_as_imports = true
62 | force_grid_wrap = 0
63 | include_trailing_comma = true
64 | line_length = 88
65 | multi_line_output = 3
66 | 
67 | # Sort imports within their section independent of the import type
68 | force_sort_within_sections = true
69 | 
70 | [build-system]
71 | requires = ["poetry>=1.0"]
72 | build-backend = "poetry.masonry.api"
73 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_report/template/seo_report.html:
--------------------------------------------------------------------------------
 1 | <html>
 2 |     <head>
 3 |         <title>SEO Report</title>
 4 | 
 5 |         <link href="{{ css_file }}" rel="stylesheet" media="all" type="text/css">
 6 |     </head>
 7 |     <body>
 8 |         <header>
 9 |             <h1>SEO report - {{ site_name }}</h1>
10 |         </header>
11 | 
12 |         <section>
13 | 
14 |             {% for documents in seo_reports %}
15 |             <article>
16 | 
17 |                 <div class="article-header">
18 |                     <h2>{{ documents.url }}</h2>
19 |                     {% if documents.date %}
20 |                     <span>{{ documents.date }}</span>
21 |                     {% endif %}
22 |                 </div>
23 | 
24 |                 {% for report in documents.seo_reports %}
25 | 
26 |                     <h3>{{ report.title }}</h3>
27 | 
28 |                     <table>
29 |                         {% if report.content.good %}
30 |                         <tr>
31 |                             <th class="green">Good</th>
32 |                             {% for good_point in report.content.good %}
33 |                             <td>{{ good_point }}</td>
34 |                             {% endfor %}
35 |                         </tr>
36 |                         {% endif %}
37 | 
38 |                         {% if report.content.to_improve %}
39 |                         <tr>
40 |                             <th class="orange">To improve</th>
41 |                             {% for improvment in report.content.to_improve %}
42 |                             <td>{{ improvment }}</td>
43 |                             {% endfor %}
44 |                         </tr>
45 |                         {% endif %}
46 | 
47 |                         {% if report.content.problems %}
48 |                         <tr>
49 |                             <th class="red">Problems</th>
50 |                             {% for problem in report.content.problems %}
51 |                             <td>{{ problem }}</td>
52 |                             {% endfor %}
53 |                         </tr>
54 |                         {% endif %}
55 | 
56 |                     </table>
57 | 
58 |                 {% endfor %}
59 | 
60 |             </article>
61 |             {% endfor %}
62 | 
63 |         </section>
64 | 
65 |         <footer>
66 |             <p>Powered by <a href="https://github.com/MaevaBrunelles/pelican-seo">SEO</a>, a Pelican plugin, with much salt.</p>
67 |         </footer>
68 | 
69 |     </body>
70 | </html>
71 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_enhancer/html_enhancer/article_schema_creator.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Article Schema.org creator : Improve articles display in Google
 3 | if all fields are fill in.
 4 | https://schema.org/Article : JSON-LD format.
 5 | """
 6 | 
 7 | import datetime
 8 | 
 9 | 
10 | class ArticleSchemaCreator():
11 |     """
12 |     Get all field values and build the Article schema compliant
13 |     to https://schema.org/Article and Google requirements.
14 |     """
15 | 
16 |     def __init__(self, author, title, category, date, logo, image, sitename):
17 |         self._author = author.name
18 |         self._title = title
19 |         self._category = category.name
20 |         self._publication_date = date
21 |         self._logo = logo
22 |         self._image = image
23 |         self._sitename = sitename
24 | 
25 |     def _convert_date(self, date):
26 |         """ Get SafeDate Pelican object and return date in string. """
27 | 
28 |         date_time = datetime.datetime(
29 |             date.year,
30 |             date.month,
31 |             date.day,
32 |             date.hour,
33 |             date.minute
34 |         )
35 |         return date_time.strftime("%Y-%m-%d %H:%M")
36 | 
37 |     def create_schema(self):
38 |         """
39 |         Create Article schema.
40 |         Schema : {
41 |             "@context": "https://schema.org",
42 |             "@type": "Article",
43 |             "author": {
44 |                 "@type": "Person",
45 |                 "name": :author:
46 |             },
47 |             "publisher": {
48 |                 "@type": "Organization",
49 |                 "name": :sitename:,
50 |                 "logo": {
51 |                     "@type": "ImageObject",
52 |                     "url": :logo:
53 |                 }
54 |             },
55 |             "headline": :title:,
56 |             "about": :category:,
57 |             "datePublished": :publication_date:,
58 |             "image": :image:
59 |         }
60 |         """
61 | 
62 |         schema_article = {
63 |             "@context": "https://schema.org",
64 |             "@type": "Article",
65 |         }
66 | 
67 |         if self._author:
68 |             schema_article["author"] = {
69 |                 "@type": "Person",
70 |                 "name": self._author
71 |             }
72 | 
73 |         if self._sitename:
74 |             schema_article["publisher"] = {
75 |                 "@type": "Organization",
76 |                 "name": self._sitename,
77 |             }
78 | 
79 |             if self._logo:
80 |                 schema_article["publisher"]["logo"] = {
81 |                     "@type": "ImageObject",
82 |                     "url": self._logo
83 |                 }
84 | 
85 |         if self._title:
86 |             schema_article["headline"] = self._title
87 | 
88 |         if self._category:
89 |             schema_article["about"] = self._category
90 | 
91 |         if self._publication_date:
92 |             schema_article["datePublished"] = self._convert_date(self._publication_date)
93 | 
94 |         if self._image:
95 |             schema_article["image"] = self._image
96 | 
97 |         return schema_article
98 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/tests/test_seo_report.py:
--------------------------------------------------------------------------------
 1 | """ Unit tests for SEO Report. """
 2 | 
 3 | from unittest.mock import mock_open, patch
 4 | 
 5 | from seo.seo_report.seo_analyzer import (
 6 |     InternalLinkAnalyzer,
 7 |     ContentTitleAnalyzer,
 8 |     PageTitleAnalyzer,
 9 |     PageDescriptionAnalyzer,
10 | )
11 | 
12 | 
13 | class TestSEOReport():
14 |     """ Units tests for SEOReport. """
15 | 
16 |     def test_launch_analysis_returns_dict(self, fake_article, fake_seo_report):
17 |         """ Test if launch_analysis return a dict with expected keys. """
18 | 
19 |         fake_articles_analysis = fake_seo_report.launch_analysis(fake_article)
20 | 
21 |         assert fake_articles_analysis['url']
22 |         assert fake_articles_analysis['date']
23 |         assert fake_articles_analysis['seo_analysis']
24 |         assert fake_articles_analysis['seo_analysis']['page_title_analysis']
25 |         assert fake_articles_analysis['seo_analysis']['page_description_analysis']
26 |         assert fake_articles_analysis['seo_analysis']['content_title_analysis']
27 |         assert fake_articles_analysis['seo_analysis']['internal_link_analysis']
28 | 
29 |     def test_launch_analysis_values_are_instances_of_expected_analysis_objects(
30 |             self, fake_article, fake_seo_report):
31 |         """
32 |         Test if the dict returned by launch_analysis
33 |         contained expected analysis objects.
34 |         """
35 | 
36 |         fake_articles_analysis = fake_seo_report.launch_analysis(fake_article)
37 |         fake_seo_analysis = fake_articles_analysis['seo_analysis']
38 | 
39 |         page_title_analysis = fake_seo_analysis['page_title_analysis']
40 |         page_description_analysis = fake_seo_analysis['page_description_analysis']
41 |         content_title_analysis = fake_seo_analysis['content_title_analysis']
42 |         internal_link_analysis = fake_seo_analysis['internal_link_analysis']
43 | 
44 |         assert isinstance(page_title_analysis, PageTitleAnalyzer)
45 |         assert isinstance(page_description_analysis, PageDescriptionAnalyzer)
46 |         assert isinstance(content_title_analysis, ContentTitleAnalyzer)
47 |         assert isinstance(internal_link_analysis, InternalLinkAnalyzer)
48 | 
49 |     def test_generate_create_report_file_and_write_output(
50 |             self, fake_seo_report, fake_articles_analysis):
51 |         """
52 |         Test that generate create a HTML file and write SEO report on it.
53 |         Need mock_open to test this.
54 |         """
55 | 
56 |         with patch('seo.seo_report.open', mock_open()) as mocked_open:
57 |             # Get a reference to the MagicMock that will be returned
58 |             # when mock_open will be called
59 |             # => When we do open("seo_report", "w") as report in generate, report
60 |             # will also be a reference to the same MagicMock
61 |             mocked_file_handle = mocked_open.return_value
62 | 
63 |             # When generate is executed, mock_open() is call instead of open()
64 |             fake_seo_report.generate('Fake site', fake_articles_analysis)
65 | 
66 |             # mocked_open and the file handle got all
67 |             # executed calls, and can assert them
68 |             mocked_open.assert_called_once_with('seo_report.html', 'w')
69 |             mocked_file_handle.write.assert_called_once()
70 | 
71 |             # Get all arguments in the mocked write call and select the first
72 |             # true arg (output)
73 |             args, _ = mocked_file_handle.write.call_args_list[0]
74 |             output = args[0]
75 |             assert "<h1>SEO report - Fake site</h1>" in output
76 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/tests/test_seo_enhancer.py:
--------------------------------------------------------------------------------
 1 | """ Units tests for SEO Enhancer. """
 2 | 
 3 | from unittest.mock import mock_open, patch
 4 | 
 5 | 
 6 | class TestSEOEnhancer():
 7 |     """ Units tests for SEOEnhancer. """
 8 | 
 9 |     def test_populate_robots_return_dict_with_rules_for_an_url(
10 |             self, fake_seo_enhancer, fake_article):
11 |         """
12 |         Test that populate_robots return a dict with document_url,
13 |         noindex and disallow rules.
14 |         """
15 | 
16 |         fake_robots_rules = fake_seo_enhancer.populate_robots(fake_article)
17 | 
18 |         assert fake_robots_rules['document_url']
19 |         assert fake_robots_rules['noindex']
20 |         assert fake_robots_rules['disallow']
21 | 
22 |     def test_generate_robots_file(self, fake_seo_enhancer, fake_robots_rules):
23 |         """ Test if generate_robots create a robots.txt file by mocking open(). """
24 | 
25 |         with patch('os.mkdir'):
26 |             with patch('seo.seo_enhancer.open', mock_open()) as mocked_open:
27 |                 mocked_file_handle = mocked_open.return_value
28 | 
29 |                 fake_seo_enhancer.generate_robots(
30 |                     rules=fake_robots_rules,
31 |                     output_path='fake_output'
32 |                 )
33 | 
34 |                 mocked_open.assert_called_once_with('fake_output/robots.txt', 'w+')
35 |                 mocked_file_handle.write.assert_called()
36 |                 # 4 : 1 fix write + 3 generated write
37 |                 assert len(mocked_file_handle.write.call_args_list) == 4
38 | 
39 |                 args, _ = mocked_file_handle.write.call_args_list[1]
40 |                 fake_rule = args[0]
41 |                 assert "Noindex: fake-title.html" in fake_rule
42 | 
43 |     def test_launch_html_enhancemer_returns_dict(self, fake_article, fake_seo_enhancer):
44 |         """ Test if launch_html_enhancemer returns a dict with expected keys. """
45 | 
46 |         fake_html_enhancements = fake_seo_enhancer.launch_html_enhancer(
47 |             file=fake_article,
48 |             output_path='fake_output',
49 |             path='fake_dir/fake_output/fake_file.html',
50 |         )
51 | 
52 |         assert fake_html_enhancements['canonical_tag']
53 |         assert fake_html_enhancements['article_schema']
54 |         assert fake_html_enhancements['breadcrumb_schema']
55 | 
56 |     def test_add_html_enhancements_to_file(self, fake_article, fake_seo_enhancer):
57 |         """
58 |         Test if add_html_to_file add SEO enhancements
59 |         to HTML files by mocking open().
60 |         """
61 | 
62 |         path = "fake_dir/fake_output/fake_file.html"
63 |         fake_html_enhancements = fake_seo_enhancer.launch_html_enhancer(
64 |             file=fake_article,
65 |             output_path='fake_output',
66 |             path=path,
67 |         )
68 | 
69 |         with patch(
70 |             'seo.seo_enhancer.open',
71 |             mock_open(read_data=fake_article.content)
72 |         ) as mocked_open:
73 |             mocked_file_handle = mocked_open.return_value
74 | 
75 |             fake_seo_enhancer.add_html_to_file(
76 |                 enhancements=fake_html_enhancements,
77 |                 path=path
78 |             )
79 |             assert len(mocked_open.call_args_list) == 2
80 |             mocked_file_handle.read.assert_called_once()
81 |             mocked_file_handle.write.assert_called_once()
82 | 
83 |             write_args, _ = mocked_file_handle.write.call_args_list[0]
84 |             fake_html_content = write_args[0]
85 |             assert '<link href="fakesite.com/fake-title.html" rel="canonical"/>' \
86 |                 in fake_html_content
87 |             assert '{"@context": "https://schema.org", "@type": "Article"' \
88 |                 in fake_html_content
89 |             assert '{"@context": "https://schema.org", "@type": "BreadcrumbList"' \
90 |                 in fake_html_content
91 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_enhancer/__init__.py:
--------------------------------------------------------------------------------
  1 | """ Improve SEO technical for each article and page : HTML code and robots.txt file. """
  2 | 
  3 | import json
  4 | import logging
  5 | import os
  6 | 
  7 | from bs4 import BeautifulSoup
  8 | 
  9 | from .html_enhancer import HTMLEnhancer
 10 | from .robots_file_creator import RobotsFileCreator
 11 | 
 12 | 
 13 | logger = logging.getLogger(__name__)
 14 | 
 15 | 
 16 | class SEOEnhancer():
 17 |     """
 18 |     Improve SEO technical for each article and page : HTML code and robots.txt file.
 19 |     """
 20 | 
 21 |     def launch_html_enhancer(self, file, output_path, path):
 22 |         """
 23 |         Call HTMLEnhancer for each article and page.
 24 |         Return a dict with all HTML enhancements.
 25 |         """
 26 | 
 27 |         html_enhancer = HTMLEnhancer(file, output_path, path)
 28 | 
 29 |         html_enhancements = {
 30 |             'canonical_tag': html_enhancer.canonical_link.create_url(),
 31 |             'breadcrumb_schema': html_enhancer.breadcrumb_schema.create_schema(),
 32 |         }
 33 | 
 34 |         if 'pages' not in file.url:
 35 |             article_schema = html_enhancer.article_schema.create_schema()
 36 |             html_enhancements['article_schema'] = article_schema
 37 | 
 38 |         return html_enhancements
 39 | 
 40 |     def populate_robots(self, document):
 41 |         """
 42 |         Get all robots rules in document.metadata.
 43 |         Return a dict with rules per url.
 44 |         """
 45 | 
 46 |         robots_file = RobotsFileCreator(document.metadata)
 47 | 
 48 |         return {
 49 |             'document_url': document.url,
 50 |             'noindex': robots_file.get_noindex,
 51 |             'disallow': robots_file.get_disallow,
 52 |         }
 53 | 
 54 |     def generate_robots(self, rules, output_path):
 55 |         """
 56 |         Create robots.txt file, with noindex and disallow rules for each document URL.
 57 |         """
 58 |         if not os.path.isdir(output_path):
 59 |             os.mkdir(output_path)
 60 | 
 61 |         robots_path = os.path.join(output_path, 'robots.txt')
 62 | 
 63 |         with open(robots_path, 'w+') as robots_file:
 64 |             robots_file.write('User-agent: *')
 65 |             for rule in rules:
 66 |                 if rule.get('noindex'):
 67 |                     robots_file.write('\n' + 'Noindex: ' + rule.get('document_url'))
 68 |                 if rule.get('disallow'):
 69 |                     robots_file.write('\n' + 'Disallow: ' + rule.get('document_url'))
 70 | 
 71 |         logger.info("SEO plugin - SEO Enhancement: robots.txt file created")
 72 | 
 73 |     def add_html_to_file(self, enhancements, path):
 74 |         """
 75 |         Open HTML file, add HTML enhancements with bs4 and create the new HTML files.
 76 |         """
 77 | 
 78 |         with open(path) as html_file:
 79 |             html_content = html_file.read()
 80 |             soup = BeautifulSoup(html_content, features="html.parser")
 81 | 
 82 |         canonical_tag = soup.new_tag(
 83 |             "link",
 84 |             rel="canonical",
 85 |             href=enhancements.get('canonical_tag')
 86 |         )
 87 |         soup.head.append(canonical_tag)
 88 | 
 89 |         position = 0
 90 |         for enhancement in enhancements:
 91 | 
 92 |             if enhancement.endswith('_schema'):
 93 |                 schema = enhancement
 94 | 
 95 |                 schema_script = soup.new_tag("script", type="application/ld+json")
 96 |                 soup.head.append(schema_script)
 97 | 
 98 |                 schema_script = soup.findAll('script')[position]
 99 |                 # Json dumps permit to keep dict double quotes instead of simples
100 |                 # Google valids schema only with double quotes
101 |                 schema_script.append(
102 |                     json.dumps(enhancements[schema], ensure_ascii=False)
103 |                 )
104 | 
105 |                 position += 1
106 | 
107 |         with open(path, 'w') as html_file:
108 |             html_file.write(soup.prettify())
109 | 
110 |         logger.info(f"SEO plugin - SEO Enhancement: Done for {path}")
111 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/tests/test_breadcrumb_schema_creator.py:
--------------------------------------------------------------------------------
  1 | """ Units tests for Breadcrumb Schema Creator. """
  2 | 
  3 | from seo.seo_enhancer.html_enhancer import BreadcrumbSchemaCreator
  4 | 
  5 | 
  6 | class TestBreadcrumbSchemaCreator():
  7 |     """ Unit tests for BreadcrumbSchemaCreator. """
  8 | 
  9 |     def test_create_schema(self, fake_article):
 10 |         """
 11 |         Test that create_schema returns a valid
 12 |         schema.org (dict) for breadcrumb.
 13 |         """
 14 | 
 15 |         breadcrumb = BreadcrumbSchemaCreator(
 16 |             output_path='fake_output',
 17 |             path='fake_dir/fake_output/fake-file.html',
 18 |             sitename=fake_article.settings['SITENAME'],
 19 |             siteurl=fake_article.settings['SITEURL'],
 20 |         )
 21 | 
 22 |         fake_breadcrumb_schema = breadcrumb.create_schema()
 23 | 
 24 |         assert fake_breadcrumb_schema['@context'] == "https://schema.org"
 25 |         assert fake_breadcrumb_schema['@type'] == "BreadcrumbList"
 26 | 
 27 |         assert len(fake_breadcrumb_schema['itemListElement']) == 2
 28 | 
 29 |         assert fake_breadcrumb_schema['itemListElement'][0]['@type'] == "ListItem"
 30 |         assert fake_breadcrumb_schema['itemListElement'][0]['position'] == 1
 31 |         assert fake_breadcrumb_schema['itemListElement'][0]['name'] == "Fake Site Name"
 32 |         assert fake_breadcrumb_schema['itemListElement'][0]['item'] == "fakesite.com"
 33 | 
 34 |         assert fake_breadcrumb_schema['itemListElement'][1]['@type'] == "ListItem"
 35 |         assert fake_breadcrumb_schema['itemListElement'][1]['position'] == 2
 36 |         assert fake_breadcrumb_schema['itemListElement'][1]['name'] == "Fake file"
 37 |         assert fake_breadcrumb_schema['itemListElement'][1]['item'] == \
 38 |             "fakesite.com/fake-file.html"
 39 | 
 40 |     def test_create_schema_with_x_elements_in_path(self, fake_article):
 41 |         """
 42 |         Test that create_schema returns a valid
 43 |         schema.org (dict) for a path with x elements.
 44 |         """
 45 | 
 46 |         breadcrumb = BreadcrumbSchemaCreator(
 47 |             output_path='fake_output',
 48 |             path='fake_dir/fake_output/test/blabla/other/kiwi/fake-file.html',
 49 |             sitename=fake_article.settings['SITENAME'],
 50 |             siteurl=fake_article.settings['SITEURL'],
 51 |         )
 52 | 
 53 |         fake_breadcrumb_schema = breadcrumb.create_schema()
 54 | 
 55 |         assert len(fake_breadcrumb_schema['itemListElement']) == 6
 56 | 
 57 |     def test_create_schema_with_no_sitename_no_siteurl(self, fake_article):
 58 |         """ Test that create_schema returns incomplete schema.org. """
 59 | 
 60 |         breadcrumb = BreadcrumbSchemaCreator(
 61 |             output_path='fake_output',
 62 |             path='fake_dir/fake_output/fake-file.html',
 63 |             sitename='',
 64 |             siteurl='',
 65 |         )
 66 | 
 67 |         fake_breadcrumb_schema = breadcrumb.create_schema()
 68 | 
 69 |         assert not fake_breadcrumb_schema['itemListElement'][0]['position'] == 1
 70 | 
 71 |     def test_create_schema_with_no_sitename(self, fake_article):
 72 |         """
 73 |         Test that create_schema with siteurl but no
 74 |         sitename returns incomplete schema.org.
 75 |         """
 76 | 
 77 |         breadcrumb = BreadcrumbSchemaCreator(
 78 |             output_path='fake_output',
 79 |             path='fake_dir/fake_output/fake-file.html',
 80 |             sitename='',
 81 |             siteurl=fake_article.settings['SITEURL'],
 82 |         )
 83 | 
 84 |         fake_breadcrumb_schema = breadcrumb.create_schema()
 85 | 
 86 |         assert not fake_breadcrumb_schema['itemListElement'][0]['position'] == 1
 87 | 
 88 |     def test_create_schema_with_no_siteurl(self, fake_article):
 89 |         """
 90 |         Test that create_schema with sitename but no
 91 |         siteurl returns incomplete schema.org.
 92 |         """
 93 | 
 94 |         breadcrumb = BreadcrumbSchemaCreator(
 95 |             output_path='fake_output',
 96 |             path='fake_dir/fake_output/fake-file.html',
 97 |             sitename=fake_article.settings['SITENAME'],
 98 |             siteurl='',
 99 |         )
100 | 
101 |         fake_breadcrumb_schema = breadcrumb.create_schema()
102 | 
103 |         assert not fake_breadcrumb_schema['itemListElement'][0]['position'] == 1
104 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_enhancer/html_enhancer/breadcrumb_schema_creator.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Breadcrumb Schema.org creator : Improve URLs display in Google
  3 | thanks to breadcrumb set up.
  4 | https://schema.org/BreadcrumbList : JSON-LD format.
  5 | """
  6 | 
  7 | import os
  8 | 
  9 | 
 10 | class BreadcrumbSchemaCreator():
 11 |     """
 12 |     Get all URLs for a path and build the Breadcrumb schema compliant
 13 |     to https://schema.org/BreadcrumbList and Google requirements.
 14 |     """
 15 | 
 16 |     def __init__(self, output_path, path, sitename, siteurl):
 17 |         self._output_path = output_path
 18 |         self._path = path
 19 |         self._sitename = sitename
 20 |         self._siteurl = siteurl
 21 | 
 22 |     def _create_paths(self):
 23 |         """
 24 |         Split the file path, get all elements after the output path.
 25 |         By default, output path is 'output/' but it can be changes in Pelican settings.
 26 |         Build all paths, for example :
 27 |         Path = 'test-dir/output/category/file.html'
 28 |         Split path = ['output', 'category', 'file.html']
 29 |         Position begins at 2, as number 1 is dedicated to the index page.
 30 |         Returns list of dicts :
 31 |         [
 32 |             {
 33 |                 'position': 2,
 34 |                 'name': 'Category',
 35 |                 'url': ':siteurl:/category'
 36 |             },
 37 |             {
 38 |                 'position': 3,
 39 |                 'name': 'File',
 40 |                 'url': ':siteurl:/category/file.html'
 41 |             },
 42 |         ]
 43 |         """
 44 | 
 45 |         split_path = self._path.split('/')
 46 | 
 47 |         if self._output_path in split_path:
 48 |             max_index = split_path.index(self._output_path) + 1
 49 | 
 50 |         # Delete all elements before output path, including it
 51 |         del split_path[0:max_index]
 52 | 
 53 |         breadcrumb_paths = []
 54 |         position = 2
 55 | 
 56 |         for item in range(1, len(split_path) + 1):
 57 | 
 58 |             name = split_path[item-1]
 59 |             name = name.replace('-', ' ').capitalize()
 60 |             if name.endswith('.html'):
 61 |                 name = name[:-5]
 62 | 
 63 |             full_path = '/'.join(split_path[:item])
 64 |             url = os.path.join(self._siteurl, full_path)
 65 | 
 66 |             breadcrumb_paths.append({
 67 |                 'name': name,
 68 |                 'url': url,
 69 |                 'position': position
 70 |             })
 71 | 
 72 |             position += 1
 73 | 
 74 |         return breadcrumb_paths
 75 | 
 76 |     def create_schema(self):
 77 |         """
 78 |         Schema = {
 79 |             "@context": "https://schema.org",
 80 |             "@type": "BreadcrumbList",
 81 |             "itemListElement": [
 82 |                 {
 83 |                     "@type": "ListItem",
 84 |                     "position": :n=1:,
 85 |                     "name": :Sitename:,
 86 |                     "item": :SITEURL:
 87 |                 },
 88 |                 {
 89 |                     "@type": "ListItem",
 90 |                     "position": :n+1:,
 91 |                     "name": :name:,
 92 |                     "item": :url:
 93 |                 },
 94 |                 {
 95 |                     "@type": "ListItem",
 96 |                     "position": :n+x:,
 97 |                     "name": :name:,
 98 |                     "item": :url:
 99 |                 }
100 |             ]
101 |         }
102 |         """
103 | 
104 |         breadcrumb_items = self._create_paths()
105 | 
106 |         breadcrumb_schema = {
107 |             "@context": "https://schema.org",
108 |             "@type": "BreadcrumbList",
109 |             "itemListElement": [],
110 |         }
111 | 
112 |         if self._sitename and self._siteurl:
113 |             breadcrumb_schema['itemListElement'].append(
114 |                 {
115 |                     "@type": "ListItem",
116 |                     "position": 1,
117 |                     "name": self._sitename,
118 |                     "item": self._siteurl
119 |                 }
120 |             )
121 | 
122 |         for item in breadcrumb_items:
123 |             breadcrumb_schema['itemListElement'].append(
124 |                 {
125 |                     "@type": "ListItem",
126 |                     "position": item['position'],
127 |                     "name": item['name'],
128 |                     "item": item['url']
129 |                 }
130 |             )
131 | 
132 |         return breadcrumb_schema
133 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | """
  3 | SEO is a Pelican plugin to helps you improve your Pelican site SEO to
  4 | reach the tops positions on search engines like Qwant, DuckDuckGo or Google.
  5 | ===================================================================================
  6 | 
  7 | It generates a SEO report and SEO enhancements.
  8 | You can enable / disable the main features in the plugin settings.
  9 | For the SEO report, you can limit the number of analysis
 10 | in the plugin settings too.
 11 | 
 12 | Author : Maëva Brunelles <https://github.com/MaevaBrunelles>
 13 | License : GNU AFFERO GENERAL PUBLIC LICENSE Version 3
 14 | """
 15 | 
 16 | import logging
 17 | 
 18 | from pelican import signals
 19 | from pelican.generators import ArticlesGenerator, PagesGenerator
 20 | 
 21 | from .settings import SEO_REPORT, SEO_ENHANCER, ARTICLES_LIMIT, PAGES_LIMIT
 22 | from .seo_report import SEOReport
 23 | from .seo_enhancer import SEOEnhancer
 24 | 
 25 | 
 26 | logger = logging.getLogger(__name__)
 27 | 
 28 | 
 29 | def plugin_initializer(settings):
 30 |     """ Raises if SITEURL parameter is not set in Pelican settings """
 31 | 
 32 |     if not settings.settings.get('SITEURL'):
 33 |         raise Exception("You must fill in SITEURL variable in pelicanconf.py \
 34 |             to use SEO plugin.")
 35 | 
 36 |     logger.info("SEO plugin initialized")
 37 | 
 38 | 
 39 | def run_seo_report(generators):
 40 |     """ Run SEO report creation if SEO_REPORT is enabled in settings. """
 41 | 
 42 |     seo_report = SEOReport()
 43 |     documents_analysis = []
 44 | 
 45 |     site_name = None
 46 | 
 47 |     for generator in generators:
 48 | 
 49 |         if isinstance(generator, ArticlesGenerator):
 50 |             # Launch analysis for each article. User can limit this number.
 51 |             for _, article in zip(range(ARTICLES_LIMIT), generator.articles):
 52 |                 analysis = seo_report.launch_analysis(document=article)
 53 |                 documents_analysis.append(analysis)
 54 | 
 55 |             if not site_name:
 56 |                 site_name = generator.settings.get('SITENAME')
 57 | 
 58 |         if isinstance(generator, PagesGenerator):
 59 |             # Launch analysis each page. User can limit this number.
 60 |             for _, page in zip(range(PAGES_LIMIT), generator.pages):
 61 |                 analysis = seo_report.launch_analysis(document=page)
 62 |                 documents_analysis.append(analysis)
 63 | 
 64 |             if not site_name:
 65 |                 site_name = generator.settings.get('SITENAME')
 66 | 
 67 |     seo_report.generate(
 68 |         site_name=site_name,
 69 |         documents_analysis=documents_analysis
 70 |     )
 71 | 
 72 | 
 73 | def run_robots_file(generators):
 74 |     """
 75 |     Run robots.txt file creation if SEO_ENHANCER
 76 |     is enabled in settings.
 77 |     """
 78 | 
 79 |     seo_enhancer = SEOEnhancer()
 80 |     robots_rules = []
 81 | 
 82 |     for generator in generators:
 83 | 
 84 |         output_path = generator.output_path
 85 | 
 86 |         if isinstance(generator, ArticlesGenerator):
 87 |             for article in generator.articles:
 88 |                 article_metadata = seo_enhancer.populate_robots(
 89 |                     document=article
 90 |                 )
 91 |                 robots_rules.append(article_metadata)
 92 | 
 93 |         if isinstance(generator, PagesGenerator):
 94 |             for page in generator.pages:
 95 |                 page_metadata = seo_enhancer.populate_robots(document=page)
 96 |                 robots_rules.append(page_metadata)
 97 | 
 98 |     seo_enhancer.generate_robots(
 99 |         rules=robots_rules,
100 |         output_path=output_path,
101 |     )
102 | 
103 | 
104 | def run_html_enhancer(path, context):
105 |     """ Run HTML enhancements if SEO_ENHANCER is enabled in settings. """
106 | 
107 |     if context.get('article'):
108 |         seo_enhancer = SEOEnhancer()
109 |         html_enhancements = seo_enhancer.launch_html_enhancer(
110 |             file=context['article'],
111 |             output_path=context.get('OUTPUT_PATH'),
112 |             path=path,
113 |         )
114 |         seo_enhancer.add_html_to_file(
115 |             enhancements=html_enhancements,
116 |             path=path,
117 |         )
118 | 
119 |     elif context.get('page'):
120 |         seo_enhancer = SEOEnhancer()
121 |         html_enhancements = seo_enhancer.launch_html_enhancer(
122 |             file=context['page'],
123 |             output_path=context.get('OUTPUT_PATH'),
124 |             path=path,
125 |         )
126 |         seo_enhancer.add_html_to_file(
127 |             enhancements=html_enhancements,
128 |             path=path,
129 |         )
130 | 
131 | 
132 | def register():
133 | 
134 |     signals.initialized.connect(plugin_initializer)
135 | 
136 |     if SEO_REPORT and SEO_ENHANCER:
137 |         signals.all_generators_finalized.connect(run_seo_report)
138 |         signals.all_generators_finalized.connect(run_robots_file)
139 |         signals.content_written.connect(run_html_enhancer)
140 | 
141 |     elif SEO_REPORT:
142 |         signals.all_generators_finalized.connect(run_seo_report)
143 | 
144 |     elif SEO_ENHANCER:
145 |         signals.all_generators_finalized.connect(run_robots_file)
146 |         signals.content_written.connect(run_html_enhancer)
147 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | """ Mocks Pelican objects required for the units tests. """
  2 | 
  3 | import pytest
  4 | 
  5 | from seo.seo_report import SEOReport
  6 | from seo.seo_enhancer import SEOEnhancer
  7 | 
  8 | 
  9 | class FakeArticle():
 10 |     """ Mock Pelican Article object. """
 11 | 
 12 |     def __init__(self, settings, metadata, title,
 13 |                  description, url, date, content, author, category):
 14 |         self.settings = settings
 15 |         self.metadata = metadata
 16 |         self.title = title
 17 |         self.description = description
 18 |         self.url = url
 19 |         self.date = date
 20 |         self.content = content
 21 |         self.author = author
 22 |         self.category = category
 23 | 
 24 | 
 25 | class FakeDate():
 26 |     """ Mock Pelican SafeDate object. """
 27 | 
 28 |     def __init__(self, year, month, day, hour, minute):
 29 |         self.year = int(year)
 30 |         self.month = int(month)
 31 |         self.day = int(day)
 32 |         self.hour = int(hour)
 33 |         self.minute = int(minute)
 34 | 
 35 | 
 36 | class FakeAuthor():
 37 |     """ Mock Pelican Author object. """
 38 | 
 39 |     def __init__(self, name):
 40 |         self.name = name
 41 | 
 42 | 
 43 | class FakeCategory():
 44 |     """ Mock Pelican Category object. """
 45 | 
 46 |     def __init__(self, name):
 47 |         self.name = name
 48 | 
 49 | 
 50 | @pytest.fixture()
 51 | def fake_article():
 52 |     """ Create a fake article. """
 53 | 
 54 |     settings = {
 55 |         'SITEURL': 'fakesite.com',
 56 |         'SITENAME': 'Fake Site Name',
 57 |         'LOGO': 'https://www.fakesite.com/fake-logo.jpg',
 58 |     }
 59 |     metadata = {
 60 |         'noindex': True,
 61 |         'disallow': True,
 62 |         'image': 'https://www.fakesite.com/fake-image.jpg',
 63 |     }
 64 |     title = 'Fake Title'
 65 |     description = 'Fake description'
 66 |     url = 'fake-title.html'
 67 |     date = FakeDate('2019', '04', '03', '23', '49')
 68 |     author = FakeAuthor(name='Fake author')
 69 |     category = FakeCategory(name='Fake category')
 70 |     content = """<html>
 71 |                     <head>
 72 |                         <title>Fake Title</title>
 73 |                         <meta name='description' content='Fake description' />
 74 |                     </head>
 75 |                     <body>
 76 |                         <h1>Fake content title</h1>
 77 |                         <p>Fake content</p>
 78 |                         <a href='https://www.fakesite.com'>Fake internal link</a>
 79 |                     </body>
 80 |                 </html>"""
 81 | 
 82 |     return FakeArticle(
 83 |         settings=settings,
 84 |         metadata=metadata,
 85 |         title=title,
 86 |         description=description,
 87 |         url=url,
 88 |         date=date,
 89 |         content=content,
 90 |         author=author,
 91 |         category=category
 92 |     )
 93 | 
 94 | 
 95 | @pytest.fixture()
 96 | def fake_article_missing_elements():
 97 |     """ Create a fake article with missing elements. """
 98 | 
 99 |     settings = {
100 |         'SITEURL': 'fakesite.com',
101 |         'SITENAME': '',
102 |         'LOGO': '',
103 |     }
104 |     metadata = {
105 |         'noindex': True,
106 |         'image': '',
107 |     }
108 |     title = ''
109 |     description = ''
110 |     url = 'fake-title.html'
111 |     date = FakeDate('2019', '04', '03', '23', '49')
112 |     author = FakeAuthor(name='')
113 |     category = FakeCategory(name='')
114 |     content = """<html>
115 |                     <head>
116 |                     </head>
117 |                     <body>
118 |                         <p>Fake content</p>
119 |                     </body>
120 |                 </html>"""
121 | 
122 |     return FakeArticle(
123 |         settings=settings,
124 |         metadata=metadata,
125 |         title=title,
126 |         description=description,
127 |         url=url,
128 |         date=date,
129 |         content=content,
130 |         author=author,
131 |         category=category
132 |     )
133 | 
134 | 
135 | @pytest.fixture()
136 | def fake_article_multiple_elements():
137 |     """ Create a fake article with multiple elements. """
138 | 
139 |     settings = {
140 |         'SITEURL': 'fakesite.com',
141 |         'SITENAME': 'Fake Site Name',
142 |         'LOGO': 'https://www.fakesite.com/fake-logo.jpg',
143 |     }
144 |     metadata = {}
145 |     title = 'Fake Title'
146 |     description = 'Fake description'
147 |     url = 'fake-title.html'
148 |     date = FakeDate('2019', '04', '03', '23', '49')
149 |     author = FakeAuthor(name='Fake author')
150 |     category = FakeCategory(name='Fake category')
151 |     content = """<html>
152 |                     <head>
153 |                         <title>Fake Title</title>
154 |                         <meta name='description' content='Fake description' />
155 |                     </head>
156 |                     <body>
157 |                         <h1>Content title</h1>
158 |                         <p>Fake content</p>
159 |                         <h1>Multiple content title</h1>
160 |                         <a href='https://www.fakesite.com'>Fake internal link</a>
161 |                         <a href='https://www.test.com'>Fake external link</a>
162 |                         <a href='www.fakesite.com/test/'>Fake internal path link</a>
163 |                     </body>
164 |                 </html>"""
165 | 
166 |     return FakeArticle(
167 |         settings=settings,
168 |         metadata=metadata,
169 |         title=title,
170 |         description=description,
171 |         url=url,
172 |         date=date,
173 |         content=content,
174 |         author=author,
175 |         category=category
176 |     )
177 | 
178 | 
179 | @pytest.fixture()
180 | def fake_seo_report():
181 |     """ Create a fake seo report instance. """
182 | 
183 |     return SEOReport()
184 | 
185 | 
186 | @pytest.fixture()
187 | def fake_robots_rules(fake_seo_enhancer, fake_article,
188 |                       fake_article_multiple_elements,
189 |                       fake_article_missing_elements):
190 |     """ Create a fake robots rules. """
191 | 
192 |     robots_rules = []
193 | 
194 |     robots_rules.append(
195 |         fake_seo_enhancer.populate_robots(fake_article)
196 |     )
197 |     robots_rules.append(
198 |         fake_seo_enhancer.populate_robots(fake_article_missing_elements)
199 |     )
200 |     robots_rules.append(
201 |         fake_seo_enhancer.populate_robots(fake_article_multiple_elements)
202 |     )
203 | 
204 |     return robots_rules
205 | 
206 | 
207 | @pytest.fixture()
208 | def fake_articles_analysis(fake_seo_report, fake_article,
209 |                            fake_article_multiple_elements,
210 |                            fake_article_missing_elements):
211 |     """ Create a fake articles analysis. """
212 | 
213 |     articles_analysis = []
214 | 
215 |     articles_analysis.append(
216 |         fake_seo_report.launch_analysis(fake_article)
217 |     )
218 |     articles_analysis.append(
219 |         fake_seo_report.launch_analysis(fake_article_missing_elements)
220 |     )
221 |     articles_analysis.append(
222 |         fake_seo_report.launch_analysis(fake_article_multiple_elements)
223 |     )
224 | 
225 |     return articles_analysis
226 | 
227 | 
228 | @pytest.fixture()
229 | def fake_seo_enhancer():
230 |     """ Create a fake seo enhancer instance. """
231 | 
232 |     return SEOEnhancer()
233 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # SEO: A plugin for Pelican :fr:
  2 | 
  3 | [![Build Status](https://travis-ci.com/MaevaBrunelles/pelican-seo.svg?branch=master)](https://travis-ci.com/MaevaBrunelles/pelican-seo)
  4 | 
  5 | > Warning: This plugin is now released on the new official Pelican plugins workspace. If you want to use it, you must install https://github.com/pelican-plugins/seo. For contributions, issues or anything else, see the [contribution part](#contributing).
  6 | 
  7 | This plugin helps you improve your Pelican site SEO (Search Engine Optimization) to reach the top positions on search engines. To see what can do SEO for you, go directly to [Usage](#usage) section.
  8 | 
  9 | | Author          | GitHub                            |
 10 | | :-------------: | :-------------------------------: |
 11 | | Maëva Brunelles | https://github.com/MaevaBrunelles |
 12 | 
 13 | ## Why do you need SEO
 14 | 
 15 | If :
 16 | * you need some help to improve the SEO of your Pelican site 
 17 | * you are not familiar with SEO 
 18 | * you want a simple site with all SEO basis
 19 | 
 20 | This plugin is for you !
 21 | 
 22 | SEO comes with two complete features to help you :
 23 | * [SEO Report](#seo-report) : generation of an HTML report based on articles analysis. Provides you what is good, what should be improved and what is problematic. Useful if you want to improve the referencement of your articles. 
 24 | * [SEO Enhancer](#seo-enhancer) : generation of robots indexation hints, HTML tag and structured data. Usefull if you want to control the appareance of your site in the search engine.
 25 | 
 26 | ## Installation
 27 | 
 28 | This plugin can be installed via:
 29 | 
 30 |     pip install pelican-seo
 31 | 
 32 | ## Requirements
 33 | 
 34 | SEO needs Beautiful Soup 4 :
 35 | 
 36 | ```
 37 | pip install bs4
 38 | ```
 39 | 
 40 | `SITEURL` Pelican parameter must be defined as features are based on it.
 41 | 
 42 | ## Usage
 43 | 
 44 | You can choose which feature to enable or disable in the plugin settings. Default is `True` for both.
 45 | 
 46 | ```
 47 | # settings.py
 48 | SEO_REPORT = True # To enable this feature
 49 | SEO_ENHANCER = False # To disable this feature
 50 | ```
 51 | 
 52 | SEO runs when you [generate your site](https://docs.getpelican.com/en/stable/quickstart.html#generate-your-site). If you want to see SEO logs, you should use `--verbose` Pelican argument:
 53 | ```
 54 | $ pelican content --verbose
 55 | -> SEO plugin initialized
 56 | -> SEO plugin - SEO Report: seo_report.html file created
 57 | -> SEO plugin - SEO Enhancement: robots.txt file created
 58 | -> Writing /output/my-first-review.html
 59 | -> SEO plugin - SEO Enhancement: Done for /output/my-first-review.html
 60 | Done: Processed 1 articles, 0 drafts, 0 pages, 0 hidden pages and 0 draft pages in 0.17 seconds.
 61 | ```
 62 | 
 63 | ### SEO report
 64 | 
 65 | SEO analyzes all your articles and pages, and generate an HTML SEO report in your Pelican root project : `seo-report.html`
 66 | 
 67 | Example :
 68 | 
 69 | ![SEO report example](docs/seo-report-example.png)
 70 | 
 71 | You can set up a limit for article and page analysis in the plugin settings.py. By default, it's set up to 10 articles and 10 pages.
 72 | 
 73 | ```
 74 | ARTICLES_LIMIT = 10
 75 | PAGES_LIMIT = 10
 76 | ```
 77 | 
 78 | The analysis works from the most recents articles or pages to the olders according to the `date` metadata. 
 79 | Analysis is focused on :
 80 | * Page title `<title></title>`
 81 | * Page description `<meta name="description" content="" />`
 82 | * Title content `<h1></h1>`
 83 | * Internal link `<a href="SITEURL/..."></a>`
 84 | 
 85 | Which are declared by article and page metadata and content :
 86 | 
 87 | ```markdown
 88 | # article.md
 89 | Title: Title page
 90 | Description: Description page
 91 | 
 92 | # Title content
 93 | [Internal link](https://example.com/about.html)
 94 | ```
 95 | 
 96 | Theses elements form the basis of page SEO optimization. Have a look to the report, optimize your articles and pages according to analysis, and restart the process to see if all elements are now in green. 
 97 | 
 98 | ### SEO Enhancer
 99 | 
100 | SEO generates for you : 
101 | * HTML enhancements 
102 | * Structured data
103 | * Robots file
104 | 
105 | #### Robots.txt file
106 | 
107 | Indicates to search engines which pages they are allowed to access to or not. By default, all pages are authorized to be explored by all the existing robots.
108 | 
109 | ```
110 | User-agent: *
111 | ```
112 | 
113 | To disallow the exploration or to forbid the indexation of a specific ressource, add theses metadata :
114 | 
115 | ```
116 | Disallow: True
117 | Noindex: True
118 | ```
119 | 
120 | A `robots.txt` file is added at the website root, in the `OUTPUT_PATH` setting (Pelican's default is `output/`).
121 | 
122 | ```
123 | # robots.txt
124 | User-agent: *
125 | 
126 | Disallow: example.html
127 | Noindex: other-example.html
128 | ```
129 | 
130 | #### Canonical URL tag
131 | 
132 | SEO automatically adds for each article the canonical URL tag in the `<head>` to avoid duplicate content.
133 | 
134 | ```html
135 | <link rel="canonical" href="<SITEURL>/example.html" />
136 | ```
137 | 
138 | #### Structured data
139 | 
140 | SEO automatically adds structured data in the `<head>` to improve the display of result snippet in search engines. Articles will have both article schema and breadcrumb schema, while pages will only have breadcrumb schema.
141 | Structured data are based on [Schema.org](https://schema.org/) vocabulary, with `JSON-LD` encoding. Note that schemas generated by default are Schema.org compliant, but not automatically Google compliant. Additionnal metadata are required for it.
142 | 
143 | ##### Breadcrumb schema
144 | 
145 | Based on [BreadcrumbList schema](https://schema.org/BreadcrumbList) :
146 | 
147 | ```
148 | {
149 |     "@context": "https://schema.org",
150 |     "@type": "BreadcrumbList",
151 |     "itemListElement": [
152 |         {
153 |             "@type": "ListItem",
154 |             "position": :n=1:,
155 |             "name": :Sitename:,
156 |             "item": :SITEURL:
157 |         },
158 |         {
159 |             "@type": "ListItem",
160 |             "position": :n+1:,
161 |             "name": :name:,
162 |             "item": :url:
163 |         },
164 |         {
165 |             "@type": "ListItem",
166 |             "position": :n+x:,
167 |             "name": :name:,
168 |             "item": :url:
169 |         }
170 |     ]
171 | }
172 | ```
173 | 
174 | Each element of the file path has his `ListItem`, even folders, so it's better to create a user friendly page for those (otherwise you'll get your server's default page). 
175 | 
176 | ##### Article schema
177 | 
178 | Based on [Article schema](https://schema.org/Article) :
179 | 
180 | ```
181 | {
182 |     "@context": "https://schema.org",
183 |     "@type": "Article",
184 |     "author": {
185 |         "@type": "Person",
186 |         "name": :author:
187 |     },
188 |     "publisher": {
189 |         "@type": "Organization",
190 |         "name": :sitename:,
191 |         "logo": {
192 |             "@type": "ImageObject",
193 |             "url": :logo:
194 |         }
195 |     },
196 |     "headline": :title:,
197 |     "about": :category:,
198 |     "datePublished": :publication_date:,
199 |     "image": :image:
200 | }
201 | ```
202 | 
203 | `:logo:` and `:image:` fields are not required by Schema.org but they are by Google. 
204 | 
205 | To fill `:logo:`, add a parameter in the `pelicanconf.py` :
206 | 
207 | ```
208 | # pelicanconf.py
209 | LOGO = 'https://www.example.com/logo.jpg'
210 | ```
211 | 
212 | To fill `:image:`, add a metadata for each Markdown or reST article :
213 | 
214 | ```
215 | # article.md
216 | Image: https://www.example.com/article-image.jpg
217 | 
218 | # article.rst
219 | :image: https://www.example.com/article-image.jpg
220 | ```
221 | 
222 | ## Contributing
223 | 
224 | Contributions are welcome and much appreciated. Every little bit helps. You can contribute by improving the documentation, adding missing features, and fixing bugs. You can also help out by reviewing and commenting on [existing issues][].
225 | 
226 | To start contributing to this plugin, review the [Contributing to Pelican][] documentation, beginning with the **Contributing Code** section.
227 | 
228 | [existing issues]: https://github.com/pelican-plugins/seo/issues
229 | [Contributing to Pelican]: https://docs.getpelican.com/en/latest/contribute.html


--------------------------------------------------------------------------------
/pelican/plugins/seo/seo_report/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Generate a SEO report by calling SEO analyzers for each content.
  3 | """
  4 | 
  5 | import datetime
  6 | import logging
  7 | import os
  8 | 
  9 | from jinja2 import Environment, FileSystemLoader
 10 | 
 11 | from .seo_analyzer import SEOAnalyzer
 12 | 
 13 | 
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | class SEOReport():
 18 |     """ Generate a SEO report by calling SEO analyzers for each content. """
 19 | 
 20 |     PAGE_TITLE_RECOMMENDED_LENGTH = range(60, 71)
 21 |     PAGE_DESCRIPTION_RECOMMENDED_LENGTH = range(150, 161)
 22 | 
 23 |     def _convert_date(self, date):
 24 |         """ Get SafeDate Pelican object and return date in string. """
 25 | 
 26 |         date_time = datetime.datetime(
 27 |             date.year,
 28 |             date.month,
 29 |             date.day,
 30 |             date.hour,
 31 |             date.minute
 32 |         )
 33 |         return date_time.strftime("%Y-%m-%d %H:%M")
 34 | 
 35 |     def launch_analysis(self, document):
 36 |         """
 37 |         Launch SEO analysis for a document (either article or page).
 38 |         Return a dict with document informations and his analysis.
 39 |         """
 40 | 
 41 |         seo_analysis = SEOAnalyzer(document)
 42 | 
 43 |         date = None
 44 |         if hasattr(document, "date"):
 45 |             date = self._convert_date(document.date)
 46 | 
 47 |         document_analysis = {
 48 |             "url": document.url,
 49 |             "date": date,
 50 |             "seo_analysis": {
 51 |                 "page_title_analysis": seo_analysis.page_title_analysis,
 52 |                 "page_description_analysis": seo_analysis.page_description_analysis,
 53 |                 "content_title_analysis": seo_analysis.content_title_analysis,
 54 |                 "internal_link_analysis": seo_analysis.internal_link_analysis,
 55 |                 },
 56 |             }
 57 | 
 58 |         return document_analysis
 59 | 
 60 |     def _page_title_report(self, page_title_analysis):
 61 |         """
 62 |         Create report for page title thanks to dedicated analysis.
 63 |         Return a dict with details.
 64 |         """
 65 | 
 66 |         report = {
 67 |             'title': 'Page title analysis',
 68 |             'content': {
 69 |                 'good': [],
 70 |                 'to_improve': [],
 71 |                 'problems': [],
 72 |             },
 73 |         }
 74 | 
 75 |         if page_title_analysis.has_page_title:
 76 |             report['content']['good'].append(
 77 |                 'You have declared a title. Nice job !'
 78 |             )
 79 | 
 80 |             if page_title_analysis.page_title_length in \
 81 |                     SEOReport.PAGE_TITLE_RECOMMENDED_LENGTH:
 82 |                 report['content']['good'].append('Your title has a good longer.')
 83 | 
 84 |             elif page_title_analysis.page_title_length < \
 85 |                     SEOReport.PAGE_TITLE_RECOMMENDED_LENGTH[0]:
 86 |                 report['content']['to_improve'].append(
 87 |                     'Your title is too short. The recommended length is 70 characters.'
 88 |                 )
 89 | 
 90 |             elif page_title_analysis.page_title_length > \
 91 |                     SEOReport.PAGE_TITLE_RECOMMENDED_LENGTH[-1]:
 92 |                 report['content']['to_improve'].append(
 93 |                     'Your title is too long. The maximum recommended \
 94 |                         length is 70 characters.'
 95 |                 )
 96 | 
 97 |         else:
 98 |             report['content']['problems'].append(
 99 |                 'Title is missing. Create one to improve your SEO.'
100 |             )
101 | 
102 |         return report
103 | 
104 |     def _page_description_report(self, page_description_analysis):
105 |         """
106 |         Create report for page description thanks to dedicated analysis.
107 |         Return a dict with details.
108 |         """
109 | 
110 |         report = {
111 |             'title': 'Page description analysis',
112 |             'content': {
113 |                 'good': [],
114 |                 'to_improve': [],
115 |                 'problems': [],
116 |             },
117 |         }
118 | 
119 |         if page_description_analysis.has_page_description():
120 |             report['content']['good'].append(
121 |                 'You have declared a description. Nice job !'
122 |             )
123 | 
124 |             if page_description_analysis.page_description_length in \
125 |                     SEOReport.PAGE_DESCRIPTION_RECOMMENDED_LENGTH:
126 |                 report['content']['good'].append('Your description has a good longer')
127 | 
128 |             elif page_description_analysis.page_description_length < \
129 |                     SEOReport.PAGE_DESCRIPTION_RECOMMENDED_LENGTH[0]:
130 |                 report['content']['to_improve'].append(
131 |                     'Your description is too short. The minimum recommended \
132 |                         length is 150 characters.'
133 |                 )
134 | 
135 |             elif page_description_analysis.page_description_length > \
136 |                     SEOReport.PAGE_DESCRIPTION_RECOMMENDED_LENGTH[-1]:
137 |                 report['content']['to_improve'].append(
138 |                     'Your description is too long. The maximum recommended \
139 |                         length is 160 characters.'
140 |                 )
141 | 
142 |         else:
143 |             report['content']['problems'].append(
144 |                 'You need to declare a description to improve SEO.'
145 |             )
146 | 
147 |         return report
148 | 
149 |     def _content_title_report(self, content_title_analysis):
150 |         """
151 |         Create report for content title thanks to dedicated analysis.
152 |         Return a dict with details.
153 |         """
154 | 
155 |         report = {
156 |             'title': 'Content title analysis',
157 |             'content': {
158 |                 'good': [],
159 |                 'to_improve': [],
160 |                 'problems': [],
161 |             },
162 |         }
163 | 
164 |         if content_title_analysis.has_content_title():
165 |             report['content']['good'].append(
166 |                 'You have declared a content title. Nice job !'
167 |             )
168 | 
169 |             if not content_title_analysis.is_content_title_unique():
170 |                 report['content']['to_improve'].append(
171 |                     'Your content title must be unique.'
172 |                 )
173 |         else:
174 |             report['content']['problems'].append('You\'re missing a content title.')
175 | 
176 |         return report
177 | 
178 |     def _internal_link_report(self, internal_link_analysis):
179 |         """
180 |         Create report for internal links thanks to dedicated analysis.
181 |         Return a dict with details.
182 |         """
183 | 
184 |         report = {
185 |             'title': 'Internal link analysis',
186 |             'content': {
187 |                 'good': [],
188 |                 'to_improve': [],
189 |                 'problems': [],
190 |             }
191 |         }
192 | 
193 |         internal_link_occurrence = internal_link_analysis.internal_link_occurrence
194 | 
195 |         if internal_link_analysis.has_internal_link():
196 |             report['content']['good'].append(
197 |                 'You\'ve included ' + str(internal_link_occurrence) + ' internal \
198 |                     links. Nice job !'
199 |             )
200 | 
201 |         else:
202 |             report['content']['problems'].append(
203 |                 'It\'s better to include internal links.'
204 |             )
205 | 
206 |         return report
207 | 
208 |     def _launch_report(self, document_analysis):
209 |         """
210 |         Get all adocuments analysis and launch dedicated report for each.
211 |         Return a dict with all micro-reports.
212 |         """
213 |         seo_analysis = document_analysis['seo_analysis']
214 | 
215 |         page_title_analysis = seo_analysis['page_title_analysis']
216 |         page_description_analysis = seo_analysis['page_description_analysis']
217 |         content_title_analysis = seo_analysis['content_title_analysis']
218 |         internal_link_analysis = seo_analysis['internal_link_analysis']
219 | 
220 |         page_title_report = self._page_title_report(
221 |             page_title_analysis=page_title_analysis
222 |         )
223 |         page_description_report = self._page_description_report(
224 |             page_description_analysis=page_description_analysis
225 |         )
226 |         content_title_report = self._content_title_report(
227 |             content_title_analysis=content_title_analysis
228 |         )
229 |         internal_link_report = self._internal_link_report(
230 |             internal_link_analysis=internal_link_analysis
231 |         )
232 | 
233 |         document_report = [
234 |             page_title_report,
235 |             page_description_report,
236 |             content_title_report,
237 |             internal_link_report,
238 |         ]
239 | 
240 |         return document_report
241 | 
242 |     def generate(self, site_name, documents_analysis):
243 |         """
244 |         Generate the SEO report.
245 |         Return HTML file generated by Jinja2.
246 |         """
247 | 
248 |         seo_reports = []
249 |         for document_analysis in documents_analysis:
250 | 
251 |             document_report = self._launch_report(document_analysis)
252 | 
253 |             documents_reports = {
254 |                 'url': document_analysis.get('url'),
255 |                 'date': document_analysis.get('date'),
256 |                 'seo_reports': document_report,
257 |             }
258 | 
259 |             seo_reports.append(documents_reports)
260 | 
261 |         # Sort documents by publication date, from recent to oldest.
262 |         # Document whithout a date are sorted at the end of the report.
263 |         seo_reports = sorted(
264 |             seo_reports,
265 |             key=lambda k: (k['date'] is not None, k['date']),
266 |             reverse=True
267 |         )
268 | 
269 |         # Get Jinja HTML template
270 |         plugin_path = os.path.dirname(os.path.realpath(__file__))
271 |         file_loader = FileSystemLoader(plugin_path + '/template')
272 |         env = Environment(loader=file_loader)
273 | 
274 |         template = env.get_template('seo_report.html')
275 |         css_file = plugin_path + '/static/seo_report.css'
276 |         output = template.render(
277 |             site_name=site_name,
278 |             seo_reports=seo_reports,
279 |             css_file=css_file
280 |         )
281 | 
282 |         # Create HTML file
283 |         with open("seo_report.html", 'w') as report:
284 |             report.write(output)
285 | 
286 |         logger.info(
287 |             "SEO plugin - SEO Report: seo_report.html file created"
288 |         )
289 | 


--------------------------------------------------------------------------------
/pelican/plugins/seo/tests/test_article_schema_creator.py:
--------------------------------------------------------------------------------
  1 | """ Units tests for Article Schema Creator. """
  2 | 
  3 | from seo.seo_enhancer.html_enhancer import ArticleSchemaCreator
  4 | 
  5 | 
  6 | class TestArticleSchemaCreator():
  7 |     """ Unit tests for ArticleSchemaCreator. """
  8 | 
  9 |     def test_create_schema(self, fake_article):
 10 |         """ Test that create_schema returns a valid schema.org (dict). """
 11 | 
 12 |         article = ArticleSchemaCreator(
 13 |             author=fake_article.author,
 14 |             title=fake_article.title,
 15 |             category=fake_article.category,
 16 |             date=fake_article.date,
 17 |             logo=fake_article.settings['LOGO'],
 18 |             image=fake_article.metadata['image'],
 19 |             sitename=fake_article.settings['SITENAME'],
 20 |         )
 21 | 
 22 |         fake_article_schema = article.create_schema()
 23 | 
 24 |         assert fake_article_schema['@context'] == "https://schema.org"
 25 |         assert fake_article_schema['@type'] == "Article"
 26 | 
 27 |         assert fake_article_schema['author']['@type'] == 'Person'
 28 |         assert fake_article_schema['author']['name'] == 'Fake author'
 29 | 
 30 |         assert fake_article_schema['publisher']['@type'] == 'Organization'
 31 |         assert fake_article_schema['publisher']['name'] == 'Fake Site Name'
 32 |         assert fake_article_schema['publisher']['logo']['@type'] == 'ImageObject'
 33 |         assert fake_article_schema['publisher']['logo']['url'] == \
 34 |             'https://www.fakesite.com/fake-logo.jpg'
 35 | 
 36 |         assert fake_article_schema['headline'] == 'Fake Title'
 37 | 
 38 |         assert fake_article_schema['about'] == 'Fake category'
 39 | 
 40 |         assert fake_article_schema['datePublished'] == '2019-04-03 23:49'
 41 | 
 42 |         assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg'
 43 | 
 44 |     def test_create_schema_with_incomplete_article(self, fake_article_missing_elements):
 45 |         """
 46 |         Test that create_schema returns a schema.org,
 47 |         even if article is incomplete.
 48 |         """
 49 | 
 50 |         article = ArticleSchemaCreator(
 51 |             author=fake_article_missing_elements.author,
 52 |             title=fake_article_missing_elements.title,
 53 |             category=fake_article_missing_elements.category,
 54 |             date='',
 55 |             logo=fake_article_missing_elements.settings['LOGO'],
 56 |             image=fake_article_missing_elements.metadata['image'],
 57 |             sitename=fake_article_missing_elements.settings['SITENAME'],
 58 |         )
 59 | 
 60 |         fake_article_schema = article.create_schema()
 61 | 
 62 |         assert fake_article_schema['@context'] == "https://schema.org"
 63 |         assert fake_article_schema['@type'] == "Article"
 64 | 
 65 |         assert 'author' not in fake_article_schema
 66 |         assert 'publisher' not in fake_article_schema
 67 |         assert 'headline' not in fake_article_schema
 68 |         assert 'about' not in fake_article_schema
 69 |         assert 'datePublished' not in fake_article_schema
 70 |         assert 'image' not in fake_article_schema
 71 | 
 72 |     def test_create_schema_with_author_missing(self, fake_article,
 73 |                                                fake_article_missing_elements):
 74 |         """ Test that create_schema returns a schema.org, with author missing. """
 75 | 
 76 |         article = ArticleSchemaCreator(
 77 |             author=fake_article_missing_elements.author,
 78 |             title=fake_article.title,
 79 |             category=fake_article.category,
 80 |             date=fake_article.date,
 81 |             logo=fake_article.settings['LOGO'],
 82 |             image=fake_article.metadata['image'],
 83 |             sitename=fake_article.settings['SITENAME'],
 84 |         )
 85 | 
 86 |         fake_article_schema = article.create_schema()
 87 | 
 88 |         assert 'Fake author' not in fake_article_schema
 89 | 
 90 |         assert fake_article_schema['publisher']['name'] == 'Fake Site Name'
 91 |         assert fake_article_schema['publisher']['logo']['url'] == \
 92 |             'https://www.fakesite.com/fake-logo.jpg'
 93 |         assert fake_article_schema['headline'] == 'Fake Title'
 94 |         assert fake_article_schema['about'] == 'Fake category'
 95 |         assert fake_article_schema['datePublished'] == '2019-04-03 23:49'
 96 |         assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg'
 97 | 
 98 |     def test_create_schema_with_title_missing(self, fake_article,
 99 |                                               fake_article_missing_elements):
100 |         """ Test that create_schema returns a schema.org, with title missing. """
101 | 
102 |         article = ArticleSchemaCreator(
103 |             author=fake_article.author,
104 |             title=fake_article_missing_elements.title,
105 |             category=fake_article.category,
106 |             date=fake_article.date,
107 |             logo=fake_article.settings['LOGO'],
108 |             image=fake_article.metadata['image'],
109 |             sitename=fake_article.settings['SITENAME'],
110 |         )
111 | 
112 |         fake_article_schema = article.create_schema()
113 | 
114 |         assert 'Fake Title' not in fake_article_schema
115 | 
116 |         assert fake_article_schema['author']['name'] == 'Fake author'
117 |         assert fake_article_schema['publisher']['name'] == 'Fake Site Name'
118 |         assert fake_article_schema['publisher']['logo']['url'] == \
119 |             'https://www.fakesite.com/fake-logo.jpg'
120 |         assert fake_article_schema['about'] == 'Fake category'
121 |         assert fake_article_schema['datePublished'] == '2019-04-03 23:49'
122 |         assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg'
123 | 
124 |     def test_create_schema_with_category_missing(self, fake_article,
125 |                                                  fake_article_missing_elements):
126 |         """ Test that create_schema returns a schema.org, with category missing. """
127 | 
128 |         article = ArticleSchemaCreator(
129 |             author=fake_article.author,
130 |             title=fake_article.title,
131 |             category=fake_article_missing_elements.category,
132 |             date=fake_article.date,
133 |             logo=fake_article.settings['LOGO'],
134 |             image=fake_article.metadata['image'],
135 |             sitename=fake_article.settings['SITENAME'],
136 |         )
137 | 
138 |         fake_article_schema = article.create_schema()
139 | 
140 |         assert 'Fake category' not in fake_article_schema
141 | 
142 |         assert fake_article_schema['author']['name'] == 'Fake author'
143 |         assert fake_article_schema['publisher']['name'] == 'Fake Site Name'
144 |         assert fake_article_schema['publisher']['logo']['url'] == \
145 |             'https://www.fakesite.com/fake-logo.jpg'
146 |         assert fake_article_schema['headline'] == 'Fake Title'
147 |         assert fake_article_schema['datePublished'] == '2019-04-03 23:49'
148 |         assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg'
149 | 
150 |     def test_create_schema_with_date_missing(self, fake_article,
151 |                                              fake_article_missing_elements):
152 |         """ Test that create_schema returns a schema.org, with date missing. """
153 | 
154 |         article = ArticleSchemaCreator(
155 |             author=fake_article.author,
156 |             title=fake_article.title,
157 |             category=fake_article.category,
158 |             date='',
159 |             logo=fake_article.settings['LOGO'],
160 |             image=fake_article.metadata['image'],
161 |             sitename=fake_article.settings['SITENAME'],
162 |         )
163 | 
164 |         fake_article_schema = article.create_schema()
165 | 
166 |         assert '2019-04-03 23:49' not in fake_article_schema
167 | 
168 |         assert fake_article_schema['author']['name'] == 'Fake author'
169 |         assert fake_article_schema['publisher']['name'] == 'Fake Site Name'
170 |         assert fake_article_schema['publisher']['logo']['url'] == \
171 |             'https://www.fakesite.com/fake-logo.jpg'
172 |         assert fake_article_schema['headline'] == 'Fake Title'
173 |         assert fake_article_schema['about'] == 'Fake category'
174 |         assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg'
175 | 
176 |     def test_create_schema_with_logo_missing(self, fake_article,
177 |                                              fake_article_missing_elements):
178 |         """ Test that create_schema returns a schema.org, with logo missing. """
179 | 
180 |         article = ArticleSchemaCreator(
181 |             author=fake_article.author,
182 |             title=fake_article.title,
183 |             category=fake_article.category,
184 |             date=fake_article.date,
185 |             logo=fake_article_missing_elements.settings['LOGO'],
186 |             image=fake_article.metadata['image'],
187 |             sitename=fake_article.settings['SITENAME'],
188 |         )
189 | 
190 |         fake_article_schema = article.create_schema()
191 | 
192 |         assert 'https://www.fakesite.com/fake-logo.jpg' not in fake_article_schema
193 | 
194 |         assert fake_article_schema['author']['name'] == 'Fake author'
195 |         assert fake_article_schema['publisher']['name'] == 'Fake Site Name'
196 |         assert fake_article_schema['headline'] == 'Fake Title'
197 |         assert fake_article_schema['about'] == 'Fake category'
198 |         assert fake_article_schema['datePublished'] == '2019-04-03 23:49'
199 |         assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg'
200 | 
201 |     def test_create_schema_with_image_missing(self, fake_article,
202 |                                               fake_article_missing_elements):
203 |         """ Test that create_schema returns a schema.org, with image missing. """
204 | 
205 |         article = ArticleSchemaCreator(
206 |             author=fake_article.author,
207 |             title=fake_article.title,
208 |             category=fake_article.category,
209 |             date=fake_article.date,
210 |             logo=fake_article.settings['LOGO'],
211 |             image=fake_article_missing_elements.metadata['image'],
212 |             sitename=fake_article.settings['SITENAME'],
213 |         )
214 | 
215 |         fake_article_schema = article.create_schema()
216 | 
217 |         assert 'https://www.fakesite.com/fake-image.jpg' not in fake_article_schema
218 | 
219 |         assert fake_article_schema['author']['name'] == 'Fake author'
220 |         assert fake_article_schema['publisher']['name'] == 'Fake Site Name'
221 |         assert fake_article_schema['publisher']['logo']['url'] == \
222 |             'https://www.fakesite.com/fake-logo.jpg'
223 |         assert fake_article_schema['headline'] == 'Fake Title'
224 |         assert fake_article_schema['about'] == 'Fake category'
225 |         assert fake_article_schema['datePublished'] == '2019-04-03 23:49'
226 | 
227 |     def test_create_schema_with_sitename_missing(self, fake_article,
228 |                                                  fake_article_missing_elements):
229 |         """ Test that create_schema returns a schema.org, with sitename missing. """
230 | 
231 |         article = ArticleSchemaCreator(
232 |             author=fake_article.author,
233 |             title=fake_article.title,
234 |             category=fake_article.category,
235 |             date=fake_article.date,
236 |             logo=fake_article.settings['LOGO'],
237 |             image=fake_article.metadata['image'],
238 |             sitename=fake_article_missing_elements.settings['SITENAME'],
239 |         )
240 | 
241 |         fake_article_schema = article.create_schema()
242 | 
243 |         assert 'Fake Site Name' not in fake_article_schema
244 |         assert 'logo' not in fake_article_schema
245 | 
246 |         assert fake_article_schema['author']['name'] == 'Fake author'
247 |         assert fake_article_schema['headline'] == 'Fake Title'
248 |         assert fake_article_schema['about'] == 'Fake category'
249 |         assert fake_article_schema['datePublished'] == '2019-04-03 23:49'
250 |         assert fake_article_schema['image'] == 'https://www.fakesite.com/fake-image.jpg'
251 | 


--------------------------------------------------------------------------------