├── tests ├── __init__.py └── test_serializer.py ├── .gitignore ├── jsonfeedvalidator ├── __init__.py ├── validator.py └── schema.py ├── Makefile ├── bin ├── quick_test.py └── update_schema.py ├── .editorconfig ├── LICENSE ├── setup.py └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .cache 2 | *.pyc 3 | dist 4 | build 5 | *__pycache__* 6 | *.egg-info 7 | -------------------------------------------------------------------------------- /jsonfeedvalidator/__init__.py: -------------------------------------------------------------------------------- 1 | from .validator import (validate_str, validate_feed, format_errors, ErrorTree) 2 | 3 | __all__ = (validate_str, validate_feed, format_errors, ErrorTree) 4 | 5 | __version__ = "0.0.3" 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | test: 4 | pytest --ignore=setup.py --ignore=bin 5 | 6 | deploy: 7 | python setup.py sdist upload -r pypi 8 | 9 | update_schema: 10 | rm -fR jsonfeedvalidator/schema.py 11 | python bin/update_schema.py > jsonfeedvalidator/schema.py 12 | -------------------------------------------------------------------------------- /bin/quick_test.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from jsonfeedvalidator import validate_feed 3 | 4 | resp = requests.get("https://daringfireball.net/feeds/json") 5 | 6 | errors = validate_feed(resp.json()) 7 | 8 | assert errors is None 9 | 10 | errors = validate_feed({}) 11 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | root = true 3 | 4 | [*] 5 | indent_style = space 6 | indent_size = 4 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.md] 12 | trim_trailing_whitespace = false 13 | 14 | [Makefile] 15 | indent_style = tab 16 | indent_size = 2 17 | -------------------------------------------------------------------------------- /bin/update_schema.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import pprint 3 | 4 | 5 | data = requests.get("http://json.schemastore.org/feed") 6 | schema = data.json() 7 | 8 | 9 | def coerce_key(data): 10 | if isinstance(data, dict): 11 | return {str(key): coerce_key(val) for key, val in data.items()} 12 | 13 | if isinstance(data, list): 14 | return [coerce_key(x) for x in data] 15 | 16 | if isinstance(data, basestring): 17 | return str(data.encode('utf-8')) 18 | 19 | return data 20 | 21 | schema = coerce_key(schema) 22 | schema = pprint.pformat(schema) 23 | formatted_string = "SCHEMA = %s" % (schema) 24 | 25 | print formatted_string 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2017 Alex Kessinger 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import re 4 | 5 | from codecs import open 6 | 7 | from setuptools import setup 8 | 9 | packages = [ 10 | 'jsonfeedvalidator', 11 | ] 12 | 13 | requires = [ 14 | "jsonschema", 15 | ] 16 | test_requirements = ['pytest>=2.8.0', "requests"] 17 | 18 | with open('jsonfeedvalidator/__init__.py', 'r') as fd: 19 | version = re.search(r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', 20 | fd.read(), re.MULTILINE).group(1) 21 | 22 | if not version: 23 | raise RuntimeError('Cannot find version information') 24 | 25 | with open('README.rst', 'r', 'utf-8') as f: 26 | readme = f.read() 27 | 28 | 29 | setup( 30 | name='jsonfeedvalidator', 31 | version=version, 32 | description='A JSON feed validator', 33 | long_description=readme, 34 | author='Alex Kessinger', 35 | author_email='voidfiles@gmail.com', 36 | url="https://github.com/voidfiles/jsonfeedvalidator", 37 | packages=packages, 38 | package_data={'': ['LICENSE']}, 39 | package_dir={'jsonfeedvalidator': 'jsonfeedvalidator'}, 40 | include_package_data=True, 41 | install_requires=requires, 42 | license='MIT', 43 | zip_safe=False, 44 | tests_require=test_requirements, 45 | ) 46 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | JSON Feed Validator 2 | =================== 3 | 4 | This is a preview release of a JSON Feed validator 5 | 6 | 7 | Example 8 | ------- 9 | 10 | .. code-block:: python 11 | 12 | >>> import requests 13 | >>> from jsonfeedvalidator import validate_feed, format_errors, ErrorTree 14 | >>> resp = requests.get("https://daringfireball.net/feeds/json") 15 | >>> validate_feed(resp.json()) 16 | [] 17 | >>> feed = {"items": [{"attachments": [{}]}]} 18 | >>> errors = validate_feed(feed) 19 | >>> format_errors(feed, ErrorTree(errors)) 20 | { 21 | 'items': { 22 | 0: { 23 | 'errors': { 24 | 'required': [ 25 | "'id' is a required property" 26 | ] 27 | }, 28 | 'attachments': { 29 | 0: { 30 | 'errors': { 31 | 'required': [ 32 | "'mime_type' is a required property", 33 | "'url' is a required property" 34 | ] 35 | } 36 | } 37 | } 38 | } 39 | }, 40 | 'errors': { 41 | 'required': [ 42 | "'title' is a required property", 43 | "'version' is a required property" 44 | ] 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /tests/test_serializer.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | 3 | from jsonfeedvalidator.validator import validate_feed, format_errors, ErrorTree 4 | 5 | GOOD_DATA = { 6 | "version": "https://jsonfeed.org/version/1", 7 | "title": "My Example Feed", 8 | "home_page_url": "https://example.org/", 9 | "feed_url": "https://example.org/feed.json", 10 | "items": [ 11 | { 12 | "id": "2", 13 | "content_text": "This is a second item.", 14 | "url": "https://example.org/second-item" 15 | }, 16 | { 17 | "id": "1", 18 | "content_html": "

Hello, world!

", 19 | "url": "https://example.org/initial-post", 20 | "attachments": [{ 21 | "url": "http://example.org", 22 | "mime_type": "text/html", 23 | "title": "This is an amazing website", 24 | "size_in_bytes": 1000, 25 | "duration_in_seconds": 10, 26 | }] 27 | } 28 | ] 29 | } 30 | 31 | 32 | BAD_DATA = { 33 | "home_page_url": "https://example.org/", 34 | "feed_url": "https://example.org/feed.json", 35 | "items": [ 36 | { 37 | "blah": "awesome", 38 | "attachments": [{ 39 | "url": "http://example.org", 40 | }] 41 | } 42 | ] 43 | } 44 | 45 | 46 | def test_validate_good_feed(): 47 | assert validate_feed(GOOD_DATA) == [] 48 | 49 | 50 | def test_validate_bad_feed(): 51 | errors = validate_feed(BAD_DATA) 52 | assert len(errors) > 0 53 | 54 | def test_validate_format_feed_errors(): 55 | errors = validate_feed(BAD_DATA) 56 | assert len(errors) > 0 57 | 58 | blah = format_errors(BAD_DATA, ErrorTree(errors)) 59 | assert blah == { 60 | 'items': { 61 | 0: { 62 | 'errors': { 63 | 'additionalProperties': [ 64 | "'blah' does not match any of the regexes: '^_[a-zA-Z]([^.]+)$'" 65 | ], 66 | 'required': [ 67 | "'id' is a required property" 68 | ] 69 | }, 70 | 'attachments': { 71 | 0: { 72 | 'errors': { 73 | 'required': [ 74 | "'mime_type' is a required property" 75 | ] 76 | } 77 | } 78 | 79 | } 80 | } 81 | }, 82 | 'errors': { 83 | 'required': [ 84 | "'title' is a required property", 85 | "'version' is a required property" 86 | ] 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /jsonfeedvalidator/validator.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | 4 | from collections import defaultdict 5 | 6 | from jsonschema import Draft4Validator 7 | from jsonschema.exceptions import relevance 8 | 9 | from jsonfeedvalidator.schema import SCHEMA 10 | 11 | json_schema_validator = Draft4Validator(SCHEMA) 12 | 13 | 14 | class Unset(object): 15 | """ 16 | An as-of-yet unset attribute or unprovided default parameter. 17 | """ 18 | 19 | def __repr__(self): 20 | return "" 21 | 22 | _unset = Unset() 23 | 24 | 25 | class ErrorTree(object): 26 | """ 27 | ErrorTrees make it easier to check which validations failed. 28 | """ 29 | 30 | _instance = _unset 31 | 32 | def __init__(self, errors=()): 33 | self.errors = defaultdict(list) 34 | self._contents = defaultdict(self.__class__) 35 | 36 | for error in errors: 37 | container = self 38 | for element in error.path: 39 | container = container[element] 40 | container.errors[error.validator] += [error] 41 | 42 | container._instance = error.instance 43 | 44 | def __contains__(self, index): 45 | """ 46 | Check whether ``instance[index]`` has any errors. 47 | """ 48 | 49 | return index in self._contents 50 | 51 | def __getitem__(self, index): 52 | """ 53 | Retrieve the child tree one level down at the given ``index``. 54 | If the index is not in the instance that this tree corresponds to and 55 | is not known by this tree, whatever error would be raised by 56 | ``instance.__getitem__`` will be propagated (usually this is some 57 | subclass of :class:`LookupError`. 58 | """ 59 | 60 | if self._instance is not _unset and index not in self: 61 | self._instance[index] 62 | 63 | return self._contents[index] 64 | 65 | def __setitem__(self, index, value): 66 | self._contents[index] = value 67 | 68 | def __iter__(self): 69 | """ 70 | Iterate (non-recursively) over the indices in the instance with errors. 71 | """ 72 | 73 | return iter(self._contents) 74 | 75 | def __repr__(self): 76 | return "<%s (%s total errors)>" % (self.__class__.__name__, len(self)) 77 | 78 | 79 | def validate_feed(feed): 80 | errors = json_schema_validator.iter_errors(feed) 81 | errors = sorted(errors, key=relevance, reverse=True) 82 | errors = (error for error in errors if not error.context) 83 | 84 | return list(errors) 85 | 86 | 87 | def validate_str(json_str): 88 | data = json.loads(json_str) 89 | validate_feed(data) 90 | 91 | 92 | def path_to_shape(path, value): 93 | 94 | container = { 95 | path.pop(): value 96 | } 97 | 98 | for part in reversed(path): 99 | _container = { 100 | part: container 101 | } 102 | container = _container 103 | 104 | return container 105 | 106 | REQUIRED_PARSER = re.compile(r"u'(.*)' is a required property") 107 | 108 | 109 | def format_message(error): 110 | if error.validator == 'required': 111 | key = REQUIRED_PARSER.match(error.message).items[0] 112 | return "A feed must %s is a required property" % (key) 113 | 114 | 115 | def format_errors(instance, error_tree): 116 | context = {} 117 | 118 | if isinstance(instance, dict): 119 | for key, val in instance.items(): 120 | if key in error_tree: 121 | context[key] = format_errors(val, error_tree[key]) 122 | else: 123 | for i, val in enumerate(instance): 124 | if i in error_tree: 125 | context[i] = format_errors(val, error_tree[i]) 126 | 127 | if error_tree.errors: 128 | context["errors"] = { 129 | key: [e.message for e in val] for key, val in error_tree.errors.items() 130 | } 131 | 132 | return context 133 | -------------------------------------------------------------------------------- /jsonfeedvalidator/schema.py: -------------------------------------------------------------------------------- 1 | SCHEMA = {'$schema': 'http://json-schema.org/draft-04/schema#', 2 | 'additionalProperties': False, 3 | 'definitions': {'attachment': {'additionalProperties': False, 4 | 'patternProperties': {'^_[a-zA-Z]([^.]+)$': {'$ref': '#/definitions/extension'}}, 5 | 'properties': {'duration_in_seconds': {'description': 'Specifies how long the attachment takes to listen to or watch.', 6 | 'type': 'number'}, 7 | 'mime_type': {'description': 'Specifies the type of the attachment, such as "audio/mpeg".', 8 | 'type': 'string'}, 9 | 'size_in_bytes': {'description': 'Specifies how large the file is.', 10 | 'type': 'number'}, 11 | 'title': {'description': 'Is a name for the attachment. Important: if there are multiple attachments, and two or more have the exact same title (when title is present), then they are considered as alternate representations of the same thing. In this way a podcaster, for instance, might provide an audio recording in different formats.', 12 | 'type': 'string'}, 13 | 'url': {'allOf': [{'$ref': '#/definitions/uri'}], 14 | 'description': 'Specifies the location of the attachment'}}, 15 | 'required': ['mime_type', 'url'], 16 | 'type': 'object'}, 17 | 'author': {'additionalProperties': False, 18 | 'description': 'Specifies the feed author', 19 | 'minProperties': 1, 20 | 'patternProperties': {'^_[a-zA-Z]([^.]+)$': {'$ref': '#/definitions/extension'}}, 21 | 'properties': {'avatar': {'allOf': [{'$ref': '#/definitions/uri'}], 22 | 'description': 'the URL for an image for the author. As with icon, it should be square and relatively large \xe2\x80\x94 such as 512 x 512 \xe2\x80\x94 and should use transparency where appropriate, since it may be rendered on a non-white background.'}, 23 | 'name': {'description': "Is the author's name", 24 | 'type': 'string'}, 25 | 'url': {'allOf': [{'$ref': '#/definitions/uri'}], 26 | 'description': 'Is the URL of a site owned by the author. It could be a blog, micro-blog, Twitter account, and so on. Ideally the linked-to page provides a way to contact the author, but that\xe2\x80\x99s not required.'}}, 27 | 'type': 'object'}, 28 | 'extension': {'description': 'Custom extension to the JSON Feed format', 29 | 'properties': {'about': {'description': 'A description or URL to description of the custom extension.', 30 | 'type': 'string'}}, 31 | 'type': 'object'}, 32 | 'item': {'additionalProperties': False, 33 | 'oneOf': [{'not': {'required': ['content_text']}, 34 | 'required': ['content_html']}, 35 | {'not': {'required': ['content_html']}, 36 | 'required': ['content_text']}, 37 | {'required': ['content_text', 38 | 'content_html']}], 39 | 'patternProperties': {'^_[a-zA-Z]([^.]+)$': {'$ref': '#/definitions/extension'}}, 40 | 'properties': {'attachments': {'description': 'Lists related resources', 41 | 'items': {'$ref': '#/definitions/attachment'}, 42 | 'type': 'array'}, 43 | 'author': {'$ref': '#/definitions/author'}, 44 | 'banner_image': {'allOf': [{'$ref': '#/definitions/uri'}], 45 | 'description': 'The URL of an image to use as a banner. Some blogging systems (such as Medium) display a different banner image chosen to go with each post, but that image wouldn\xe2\x80\x99t otherwise appear in the "content_html". A feed reader with a detail view may choose to show this banner image at the top of the detail view, possibly with the title overlaid.'}, 46 | 'content_html': {'description': 'The HTML representation of the content', 47 | 'type': 'string'}, 48 | 'content_text': {'description': 'The plain text representation of the content', 49 | 'type': 'string'}, 50 | 'date_modified': {'description': 'Specifies the date in ISO 8601 format. (Example: 2010-02-07T14:04:00-05:00.)', 51 | 'format': 'date-time', 52 | 'type': 'string'}, 53 | 'date_published': {'description': 'Specifies the date in ISO 8601 format. (Example: 2010-02-07T14:04:00-05:00.)', 54 | 'format': 'date-time', 55 | 'type': 'string'}, 56 | 'external_url': {'allOf': [{'$ref': '#/definitions/uri'}], 57 | 'description': 'Is the URL of a page elsewhere. This is especially useful for linkblogs. If "url" links to where you\xe2\x80\x99re talking about a thing, then "external_url" links to the thing you\xe2\x80\x99re talking about.'}, 58 | 'id': {'description': 'Is unique for that item for that feed over time. If an item is ever updated, the id should be unchanged. New items should never use a previously-used id. If an id is presented as a number or other type, a JSON Feed reader must coerce it to a string. Ideally, the id is the full URL of the resource described by the item, since URLs make great unique identifiers.', 59 | 'type': ['string', 60 | 'number']}, 61 | 'image': {'allOf': [{'$ref': '#/definitions/uri'}], 62 | 'description': 'The URL of the main image for the item. This image may also appear in the "content_html" \xe2\x80\x94 if so, it\xe2\x80\x99s a hint to the feed reader that this is the main, featured image. Feed readers may use the image as a preview.'}, 63 | 'summary': {'description': 'A plain text sentence or two describing the item. This might be presented in a timeline, for instance, where a detail view would display all of "content_html" or "content_text".', 64 | 'type': 'string'}, 65 | 'tags': {'description': 'Can have any plain text values you want. Tags tend to be just one word, but they may be anything. Note: they are not the equivalent of Twitter hashtags. Some blogging systems and other feed formats call these categories.', 66 | 'items': {'type': 'string'}, 67 | 'type': 'array', 68 | 'uniqueItems': True}, 69 | 'title': {'description': 'Is plain text. Microblog items in particular may omit titles.', 70 | 'type': 'string'}, 71 | 'url': {'allOf': [{'$ref': '#/definitions/uri'}], 72 | 'description': 'Is the URL of the resource described by the item. It\xe2\x80\x99s the permalink. This may be the same as the id \xe2\x80\x94 but should be present regardless.'}}, 73 | 'required': ['id']}, 74 | 'uri': {'format': 'uri', 'type': 'string'}}, 75 | 'patternProperties': {'^_[a-zA-Z]([^.]+)$': {'$ref': '#/definitions/extension'}}, 76 | 'properties': {'author': {'$ref': '#/definitions/author'}, 77 | 'description': {'description': 'Provides more detail, beyond the title, on what the feed is about. A feed reader may display this text.', 78 | 'type': 'string'}, 79 | 'expired': {'description': 'Says whether or not the feed is finished \xe2\x80\x94 that is, whether or not it will ever update again. A feed for a temporary event, such as an instance of the Olympics, could expire. If the value is true, then it\xe2\x80\x99s expired. Any other value, or the absence of expired, means the feed may continue to update.', 80 | 'type': 'boolean'}, 81 | 'favicon': {'allOf': [{'$ref': '#/definitions/uri'}], 82 | 'description': 'The URL of an image for the feed suitable to be used in a source list. It should be square and relatively small, but not smaller than 64 x 64 (so that it can look good on retina displays). As with icon, this image should use transparency where appropriate, since it may be rendered on a non-white background.'}, 83 | 'feed_url': {'description': "The URL of the feed, and serves as the unique identifier for the feed. As with 'home_page_url', this should be considered required for feeds on the public web."}, 84 | 'home_page_url': {'allOf': [{'$ref': '#/definitions/uri'}], 85 | 'description': 'The URL of the resource that the feed describes. This resource may or may not actually be a \xe2\x80\x9chome\xe2\x80\x9d page, but it should be an HTML page. If a feed is published on the public web, this should be considered as required. But it may not make sense in the case of a file created on a desktop computer, when that file is not shared or is shared only privately.'}, 86 | 'hubs': {'description': 'Describes endpoints that can be used to subscribe to real-time notifications from the publisher of this feed', 87 | 'items': {'properties': {'type': {'description': 'Describes the protocol used to talk with the hub, such as "rssCloud" or "WebSub".', 88 | 'type': 'string'}, 89 | 'url': {'allOf': [{'$ref': '#/definitions/uri'}]}}, 90 | 'required': ['type', 'url'], 91 | 'type': 'object'}, 92 | 'type': 'array'}, 93 | 'icon': {'allOf': [{'$ref': '#/definitions/uri'}], 94 | 'description': 'The URL of an image for the feed suitable to be used in a timeline, much the way an avatar might be used. It should be square and relatively large \xe2\x80\x94\xc2\xa0such as 512 x 512 \xe2\x80\x94 so that it can be scaled-down and so that it can look good on retina displays. It should use transparency where appropriate, since it may be rendered on a non-white background.'}, 95 | 'items': {'items': {'$ref': '#/definitions/item'}, 96 | 'type': 'array'}, 97 | 'next_url': {'allOf': [{'$ref': '#/definitions/uri'}], 98 | 'description': 'The URL of a feed that provides the next n items, where n is determined by the publisher. This allows for pagination, but with the expectation that reader software is not required to use it and probably won\xe2\x80\x99t use it very often. next_url must not be the same as feed_url, and it must not be the same as a previous next_url (to avoid infinite loops).'}, 99 | 'title': {'description': 'The name of the feed, which will often correspond to the name of the website (blog, for instance), though not necessarily.', 100 | 'type': 'string'}, 101 | 'user_comment': {'description': 'A description of the purpose of the feed. This is for the use of people looking at the raw JSON, and should be ignored by feed readers', 102 | 'type': 'string'}, 103 | 'version': {'anyOf': [{'enum': ['https://jsonfeed.org/version/1']}, 104 | {'$ref': '#/definitions/uri'}], 105 | 'description': 'The URL of the version of the format the feed uses. This should appear at the very top, though we recognize that not all JSON generators allow for ordering.'}}, 106 | 'required': ['items', 'title', 'version'], 107 | 'title': 'JSON schema for the JSON Feed format', 108 | 'type': 'object'} 109 | --------------------------------------------------------------------------------