├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── doc
    └── source
    │   ├── conf.py
    │   └── index.rst
├── mf2util.py
├── setup.cfg
├── setup.py
└── tests
    ├── __init__.py
    ├── authorship
        ├── h-card_with_u-url_equal_to_self.html
        ├── h-card_with_u-url_equal_to_u-uid_equal_to_self.html
        ├── h-card_with_u-url_that_is_also_rel-me.html
        ├── h-entry_with_p-author_h-card.html
        ├── h-entry_with_rel-author.html
        ├── h-entry_with_u-author.html
        ├── h-feed_with_p-author_h-card.html
        ├── h-feed_with_u-author.html
        └── no_h-card.html
    ├── interpret
        ├── article_naive_datetime.json
        ├── article_no_p-name.json
        ├── article_non_ascii_content.json
        ├── article_two_published_dates.json
        ├── follow.json
        ├── hwc-event.json
        ├── location_h-adr.json
        ├── location_h-card.json
        ├── location_h-geo.json
        ├── location_top_level.json
        ├── note_with_comment_and_like.json
        ├── relative_paths.json
        ├── reply_h-cite.json
        ├── reply_invite.json
        ├── reply_rsvp.json
        ├── reply_u-in-reply-to.json
        └── unusual_properties.json
    ├── posttype
        ├── hcard_no_name.json
        ├── hcard_org.json
        ├── only_html_content.json
        └── tantek_photo.json
    ├── test_authorship.py
    ├── test_classify_comments.py
    ├── test_datetime.py
    ├── test_interpret.py
    ├── test_is_name_a_title.py
    ├── test_post_type_discovery.py
    └── test_representative_hcard.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | .cache
 3 | local
 4 | mf2util.egg-info
 5 | TAGS
 6 | .eggs
 7 | build
 8 | dist
 9 | .vscode
10 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "2.7"
 4 |   - "3.4"
 5 |   - "3.5"
 6 |   - "3.6"
 7 | install:
 8 |   - pip install --upgrade setuptools
 9 |   - pip install pytest
10 |   - pip install mf2py
11 | script:
12 |   pytest tests
13 | sudo: false
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Simplifed BSD License
 2 | 
 3 | Copyright (c) 2014, Kyle Mahan
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are
 8 | met:
 9 | 
10 | 1. Redistributions of source code must retain the above copyright
11 |    notice, this list of conditions and the following disclaimer.
12 | 
13 | 2. Redistributions in binary form must reproduce the above copyright
14 |    notice, this list of conditions and the following disclaimer in the
15 |    documentation and/or other materials provided with the
16 |    distribution.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | Microformats2 Utilities
  2 | =======================
  3 | 
  4 | [![Build
  5 | Status](https://travis-ci.org/kylewm/mf2util.svg?branch=master)](https://travis-ci.org/kylewm/mf2util)
  6 | [![Documentation
  7 | Status](https://readthedocs.org/projects/mf2util/badge/?version=latest)](https://readthedocs.org/projects/mf2util/?badge=latest)
  8 | 
  9 | Microformats2 provides an extremely flexible way to mark up HTML
 10 | documents, so that human-centric data is machine-discoverable. This
 11 | utility can be used to interpret a microformatted post or event, for
 12 | display as a [comment](http://indiewebcamp.com/comments-presentation) or
 13 | [reply-context](http://indiewebcamp.com/reply-context).
 14 | 
 15 | The library itself has no dependencies, but it won't do you much good
 16 | without an mf2 parser. I use and recommend
 17 | [mf2py](https://github.com/tommorris/mf2py).
 18 | 
 19 | Compatibility: Python 2.6, 2.7, 3.3+
 20 | 
 21 | License: [Simplified BSD](http://opensource.org/licenses/BSD-2-Clause)
 22 | 
 23 | Installation
 24 | ------------
 25 | 
 26 | I've done my best to create appropriate unit tests for this library, but
 27 | it is very much alpha software at this point.
 28 | 
 29 | Install via pip
 30 | 
 31 |     pip install mf2util
 32 | 
 33 | or add as a submodule to your own project.
 34 | 
 35 | I've used pytest for running unit tests (These are also run
 36 | automatically by Travis-CI)
 37 | 
 38 |     pip install pytest
 39 |     python -m pytest
 40 | 
 41 | Quick Start
 42 | -----------
 43 | 
 44 | For received webmentions, use the method `mf2util.interpret_comment`.
 45 | This will return a dictionary with the fields necessary to display the
 46 | comment. For example:
 47 | 
 48 | ```python
 49 | import mf2py
 50 | import mf2util
 51 | 
 52 | # source_url = source_url of incoming webmention
 53 | # target_url = target_url of incoming webmention
 54 | 
 55 | parsed = mf2py.Parser(url=source_url).to_dict()
 56 | comment = mf2util.interpret_comment(parsed, source_url, [target_url])
 57 | 
 58 | # result
 59 | {
 60 |  'type': 'entry',
 61 |  'name': 'Re: How to make toast',
 62 |  'content': '<p>This solved my problem, thanks!</p>',
 63 |  'url': 'http://facebook.com/posts/0123456789',
 64 |  'published': datetime.datetime(2014, 11, 24, 13, 24)
 65 |  'author': {
 66 |   'name': 'John Doe',
 67 |   'url': 'http://facebook.com/john.doe',
 68 |   'photo': 'http://img.facebook.com/johndoe-profile-picture.jpg'
 69 |  },
 70 |  'comment_type': ['reply']
 71 | }
 72 | ```
 73 | 
 74 | When display reply-context, you may not know the precise type of the
 75 | source document. Use the method `mf2util.interpret` to interpret the
 76 | document, it will figure out the document's primary h- type and return
 77 | the appropriate fields for display. Currently supports h-entry and
 78 | h-event style documents.
 79 | 
 80 | ```python
 81 | import mf2py
 82 | import mf2util
 83 | 
 84 | # reply_to_url = url being replied to
 85 | 
 86 | parsed = mf2py.Parser(url=rely_to_url).to_dict()
 87 | entry = mf2util.interpret(parsed, reply_to_url)
 88 | 
 89 | # result
 90 | {
 91 |  'type': 'event',
 92 |  'name': 'Homebrew Website Club',
 93 |  'start': datetime.datetime(2014, 5, 7, 18, 30),
 94 |  'end': datetime.datetime(2014, 5, 7, 19, 30),
 95 |  'content': '<p>Exchange information, swap ideas, talk shop, help work on a project ...</p>'
 96 | }
 97 | ```
 98 | 
 99 | For most users, these two methods alone may be sufficient.
100 | 
101 | Comments
102 | --------
103 | 
104 | When processing an incoming webmention, you can use the
105 | `mf2util.classify_comment` method to classify it as a reply, like, or
106 | repost (or a combination thereof). The method returns a list of zero or
107 | more strings (one of 'like', 'repost', or 'reply').
108 | 
109 | ### Usage
110 | 
111 | ```python
112 | import mf2py
113 | import mf2util
114 | 
115 | # receive webmention from source_url to target_url
116 | target_url = 'http://my-domain.com/2014/04/12/1'
117 | alternate_url = 'http://doma.in/V4ls'
118 | parsed = mf2py.Parser(url=source_url)
119 | mentions = mf2util.classify_comment(parsed, [target_url, alternative_url])
120 | ```
121 | 
122 | Datetimes
123 | ---------
124 | 
125 | The `mf2util.parse_datetime` function is useful for parsing microformats2
126 | dates and datetimes. It can be used as a microformats-specific
127 | alternative to larger, more general libraries like python-dateutil.
128 | 
129 | The definition for microformats2 dt-\* properties are fairly lenient.
130 | This module will convert a mf2 date string into either a datetime.date
131 | or datetime.datetime object. Datetimes will be naive unless a timezone
132 | is specified.
133 | 
134 | Timezones are specified as fixed offsets from UTC.
135 | 
136 | ### Usage
137 | 
138 | ```python
139 | import mf2py
140 | import mf2util
141 | 
142 | parsed = mf2py.Parser(=…)
143 | publishedstr = parsed.to_dict()['items'][0]['properties']['published'][0]
144 | published = mf2util.parse_datetime(published)  # --> datetime.datetime
145 | ```
146 | 
147 | Authorship
148 | ----------
149 | 
150 | Use `mf2py.find_author` to determine an h-event's author name, url, and
151 | photo. Uses the [authorship
152 | algorithm](https://indiewebcamp.com/authorship) described on the
153 | IndieWebCamp wiki.
154 | 
155 | Contributing
156 | ------------
157 | 
158 | If you find a bug or deficiency, feel free to file an issue, pull
159 | request, or just message me in the \#indiewebcamp channel on freenode.
160 | 
161 | Changes
162 | -------
163 | 
164 | All notable changes to this project will be documented here.
165 | 
166 | ### 0.5.2 - 2023-01-15
167 | 
168 | - Bugfix: post-type-discovery should only return org if name and org properties are present. Thanks @snarfed!
169 | 
170 | ### 0.5.1 - 2018-11-04
171 | 
172 | - Add `follow` to `post_type_discovery()`.
173 | 
174 | ### 0.5.0 - 2016-10-27
175 | 
176 | - Fully implement location parsing based on https://indieweb.org/location#How_to_determine_the_location_of_a_microformat
177 |   thanks to @snarfed
178 | 
179 | ### 0.4.3 - 2016-08-20
180 | 
181 | - representative_hcard now includes h-cards that are properties of
182 |   other h-* entities, thanks to @angelogladding
183 | 
184 | ### 0.4.2 - 2016-05-09
185 | 
186 | - Added properties "dt-deleted", "u-logo", "u-featured"
187 | 
188 | ### 0.4.1 - 2016-05-04
189 | 
190 | - Minor bugfix: interpret was passing parameters in the wrong order
191 |   when parsing nested reply contexts and comments, which meant (in
192 |   practice) `want_json` was always false, and dates were included as
193 |   strings rather than datetimes.
194 | 
195 | ### 0.4.0 - 2016-04-23
196 | #### Added
197 | 
198 | - Update authorship implementation (`find_author`) to support fetching
199 |   a separate page to find the author's h-card.
200 | - Added a new optional parameter to all `interpret_*` methods called
201 |   `fetch_mf2_func`. A good value for this is `lambda url: mf2py.parse(url=url)`
202 | 
203 | ### 0.3.3 - 2016-04-07
204 | #### Changed
205 | 
206 | - minor bugfixes to prevent throwing errors on bad mf2 input
207 | - when a value (e.g. "name") is expected to be simple and we get a
208 |   dict instead
209 | - when a e-* value has "html" but not "value"
210 | 
211 | ### 0.3.2 - 2016-03-01
212 | #### Changed
213 | 
214 | - `interpret_feed` now skips rel=syndication when parsing syndication
215 |   values for individual entries. This value should be empty for feeds,
216 |   but if it isn't, it will almost always be wrong.
217 | 
218 | ### 0.3.1 - 2016-02-17
219 | #### Changed
220 | 
221 | - Added "poster" to the recognized URL properties of a video tag.
222 | 
223 | ### 0.3.0 - 2016-02-17
224 | #### Changed
225 | 
226 | - Added `base_href` parameter to all interpret methods. Now when
227 |   content is normalized, it will take into account the base tag if
228 |   it's given.
229 | - Added `audio`, `video`, and `source` tags to the list of tags that
230 |   might contain URL attributes.
231 | 
232 | ### 0.2.12 - 2016-02-15
233 | #### Added
234 | 
235 | - Added "photo" to common URL properties.
236 | 
237 | ### 0.2.11 - 2016-01-02
238 | #### Changed
239 | 
240 | - `is_name_a_title` accepts bytestrings now, no longer throws an error
241 |   if the input is not unicode.
242 | 
243 | ### 0.2.10 - 2015-11-27
244 | #### Added
245 | 
246 | - `representative_hcard()` implementation of
247 |   http://microformats.org/wiki/representative-h-card-parsing. Search
248 |   all h-cards on a page and find the one that represents the page's
249 |   author/owner.
250 | 
251 | ### 0.2.9 - 2015-10-28
252 | #### Changed
253 | 
254 | - Guard against mf2 required fields being None to make it a little
255 |   easier for third parties (in this case Bridgy) to write unit tests.
256 | 
257 | ### 0.2.8 - 2015-10-28
258 | #### Added
259 | 
260 | - `post_type_discovery()` implementation that takes an h-event or
261 |   h-entry and returns a string defining the post type (e.g. "article",
262 |   "note", "like", etc.)
263 | 
264 | #### Changed
265 | 
266 | - Consolidated modules into one flat file for simplicity
267 | - Renamed `parse_dt` to `parse_datetime` (old name still works for
268 |   backcompat)
269 | - In python 3, use builtin timezone implementation instead of
270 |   mf2util's custom implementation
271 | 
272 | ### 0.2.7 - 2015-10-05
273 | #### Added
274 | 
275 | - add parsing for comment, like, and repost h-cites nested inside an
276 |   h-entry
277 | 
278 | ### 0.2.6 - 2015-09-24
279 | #### Added
280 | 
281 | - added property content-plain to preserve the e-content value
282 | 
283 | ### 0.2.5 - 2015-09-14
284 | #### Changed
285 | 
286 | - minor bugfix: interpret should pass want_json recursively when
287 |   fetching reply contexts.
288 | 
289 | ### 0.2.4 - 2015-09-14
290 | 
291 | #### Added
292 | 
293 | - interpret methods now have an optional want_json argument. If true,
294 |   result will be pure json with no Python-only objects (i.e. datetimes)
295 | 
296 | ### 0.2.3 - 2015-08-27
297 | 
298 | #### Added
299 | 
300 | -   parse simple location name and url from events and entries \#\#\#
301 |     Changed
302 | -   accept complex-valued "url" properties and fallback to their "value"
303 | 
304 | ### 0.2.1 - 2015-06-08
305 | 
306 | #### Changed
307 | 
308 | -   more lenient parsing of content as either e-content or p-content
309 | 
310 | ### 0.2.0 - 2015-06-08
311 | 
312 | #### Added
313 | 
314 | -   parse nested h-cite comments as entries under the top-level entry
315 | -   check for bookmark-of \#\#\# Changed
316 | -   in-reply-to, repost-of, like-of all parse into a list of objects now
317 |     instead of a list of urls
318 | 
319 | ### 0.1.9 - 2015-04-01
320 | 
321 | #### Added
322 | 
323 | -   Parse event invitations as type ['invite', 'reply'].
324 | -   Parse the list of invitees.
325 | 
326 | ### 0.1.5 - 2015-02-18
327 | 
328 | #### Added
329 | 
330 | -   in-reply-to, like-of, and repost-of properties added to the
331 |     interpret\_entry result.
332 | 
333 | ### 0.1.4 - 2015-01-27
334 | 
335 | #### Changed
336 | 
337 | -   Authorship algorithm was incorrectly using the first h-entry on a
338 |     page, even when parsing an h-feed that has many.
339 | 
340 | ### 0.1.3 - 2014-12-14
341 | 
342 | #### Changed
343 | 
344 | -   RSVP replies are now classified as type 'rsvp' instead of 'reply'
345 | 
346 | ### 0.1.2 - 2014-09-20
347 | 
348 | #### Added
349 | 
350 | -   Utility methods for interpreting h-feeds that contain one or more
351 |     entries.
352 | 
353 | #### Changed
354 | 
355 | -   Handle parsing errors more gracefully.
356 | -   Distinguish between explicit h-entry titles and auto-generated
357 |     p-names (junk) when determining whether a post has a title
358 | 
359 | ### 0.1.1 - 2014-06-21
360 | 
361 | #### Added
362 | 
363 | -   Include "syndication" attribute for including syndication URLs (e.g.
364 |     for de-duplicating received comments)
365 | -   Convert URL attributes from relative paths to absolute URLs for
366 |     displaying foreign content.
367 | 
368 | ### 0.1.0 - 2014-05-11
369 | 
370 | #### Added
371 | 
372 | -   Migrated code from Red Wind for reasoning about raw microformats2
373 |     data into this library.
374 | -   Methods for interpreting h-entry, h-event, and received comments (to
375 |     decide whether they are replies, likes, reposts, etc.)
376 | -   No-dependency method for parsing datetimes described in
377 |     [http://microformats.org/wiki/value-class-pattern#Date_and_time_parsing](http://microformats.org/wiki/value-class-pattern#Date_and_time_parsing)
378 | 


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # mf2util documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon May 12 08:02:11 2014.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | import sys
 16 | import os
 17 | 
 18 | # If extensions (or modules to document with autodoc) are in another directory,
 19 | # add these directories to sys.path here. If the directory is relative to the
 20 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 21 | #sys.path.insert(0, os.path.abspath('.'))
 22 | sys.path.insert(0, os.path.abspath('../..'))
 23 | 
 24 | # -- General configuration ------------------------------------------------
 25 | 
 26 | # If your documentation needs a minimal Sphinx version, state it here.
 27 | #needs_sphinx = '1.0'
 28 | 
 29 | # Add any Sphinx extension module names here, as strings. They can be
 30 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 31 | # ones.
 32 | extensions = [
 33 |     'sphinx.ext.autodoc',
 34 |     'sphinx.ext.ifconfig',
 35 |     'sphinx.ext.viewcode',
 36 | ]
 37 | 
 38 | # Add any paths that contain templates here, relative to this directory.
 39 | templates_path = ['_templates']
 40 | 
 41 | # The suffix of source filenames.
 42 | source_suffix = '.rst'
 43 | 
 44 | # The encoding of source files.
 45 | #source_encoding = 'utf-8-sig'
 46 | 
 47 | # The master toctree document.
 48 | master_doc = 'index'
 49 | 
 50 | # General information about the project.
 51 | project = u'mf2util'
 52 | copyright = u'2014, Kyle Mahan'
 53 | 
 54 | # The version info for the project you're documenting, acts as replacement for
 55 | # |version| and |release|, also used in various other places throughout the
 56 | # built documents.
 57 | #
 58 | # The short X.Y version.
 59 | version = '0.1.0'
 60 | # The full version, including alpha/beta/rc tags.
 61 | release = '0.1.0'
 62 | 
 63 | # The language for content autogenerated by Sphinx. Refer to documentation
 64 | # for a list of supported languages.
 65 | #language = None
 66 | 
 67 | # There are two options for replacing |today|: either, you set today to some
 68 | # non-false value, then it is used:
 69 | #today = ''
 70 | # Else, today_fmt is used as the format for a strftime call.
 71 | #today_fmt = '%B %d, %Y'
 72 | 
 73 | # List of patterns, relative to source directory, that match files and
 74 | # directories to ignore when looking for source files.
 75 | exclude_patterns = []
 76 | 
 77 | # The reST default role (used for this markup: `text`) to use for all
 78 | # documents.
 79 | #default_role = None
 80 | 
 81 | # If true, '()' will be appended to :func: etc. cross-reference text.
 82 | #add_function_parentheses = True
 83 | 
 84 | # If true, the current module name will be prepended to all description
 85 | # unit titles (such as .. function::).
 86 | #add_module_names = True
 87 | 
 88 | # If true, sectionauthor and moduleauthor directives will be shown in the
 89 | # output. They are ignored by default.
 90 | #show_authors = False
 91 | 
 92 | # The name of the Pygments (syntax highlighting) style to use.
 93 | pygments_style = 'sphinx'
 94 | 
 95 | # A list of ignored prefixes for module index sorting.
 96 | #modindex_common_prefix = []
 97 | 
 98 | # If true, keep warnings as "system message" paragraphs in the built documents.
 99 | #keep_warnings = False
100 | 
101 | 
102 | # -- Options for HTML output ----------------------------------------------
103 | 
104 | # The theme to use for HTML and HTML Help pages.  See the documentation for
105 | # a list of builtin themes.
106 | html_theme = 'default'
107 | 
108 | # Theme options are theme-specific and customize the look and feel of a theme
109 | # further.  For a list of options available for each theme, see the
110 | # documentation.
111 | #html_theme_options = {}
112 | 
113 | # Add any paths that contain custom themes here, relative to this directory.
114 | #html_theme_path = []
115 | 
116 | # The name for this set of Sphinx documents.  If None, it defaults to
117 | # "<project> v<release> documentation".
118 | #html_title = None
119 | 
120 | # A shorter title for the navigation bar.  Default is the same as html_title.
121 | #html_short_title = None
122 | 
123 | # The name of an image file (relative to this directory) to place at the top
124 | # of the sidebar.
125 | #html_logo = None
126 | 
127 | # The name of an image file (within the static path) to use as favicon of the
128 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
129 | # pixels large.
130 | #html_favicon = None
131 | 
132 | # Add any paths that contain custom static files (such as style sheets) here,
133 | # relative to this directory. They are copied after the builtin static files,
134 | # so a file named "default.css" will overwrite the builtin "default.css".
135 | html_static_path = ['_static']
136 | 
137 | # Add any extra paths that contain custom files (such as robots.txt or
138 | # .htaccess) here, relative to this directory. These files are copied
139 | # directly to the root of the documentation.
140 | #html_extra_path = []
141 | 
142 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
143 | # using the given strftime format.
144 | #html_last_updated_fmt = '%b %d, %Y'
145 | 
146 | # If true, SmartyPants will be used to convert quotes and dashes to
147 | # typographically correct entities.
148 | #html_use_smartypants = True
149 | 
150 | # Custom sidebar templates, maps document names to template names.
151 | #html_sidebars = {}
152 | 
153 | # Additional templates that should be rendered to pages, maps page names to
154 | # template names.
155 | #html_additional_pages = {}
156 | 
157 | # If false, no module index is generated.
158 | #html_domain_indices = True
159 | 
160 | # If false, no index is generated.
161 | #html_use_index = True
162 | 
163 | # If true, the index is split into individual pages for each letter.
164 | #html_split_index = False
165 | 
166 | # If true, links to the reST sources are added to the pages.
167 | #html_show_sourcelink = True
168 | 
169 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
170 | #html_show_sphinx = True
171 | 
172 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
173 | #html_show_copyright = True
174 | 
175 | # If true, an OpenSearch description file will be output, and all pages will
176 | # contain a <link> tag referring to it.  The value of this option must be the
177 | # base URL from which the finished HTML is served.
178 | #html_use_opensearch = ''
179 | 
180 | # This is the file name suffix for HTML files (e.g. ".xhtml").
181 | #html_file_suffix = None
182 | 
183 | # Output file base name for HTML help builder.
184 | htmlhelp_basename = 'mf2utildoc'
185 | 
186 | 
187 | # -- Options for LaTeX output ---------------------------------------------
188 | 
189 | latex_elements = {
190 | # The paper size ('letterpaper' or 'a4paper').
191 | #'papersize': 'letterpaper',
192 | 
193 | # The font size ('10pt', '11pt' or '12pt').
194 | #'pointsize': '10pt',
195 | 
196 | # Additional stuff for the LaTeX preamble.
197 | #'preamble': '',
198 | }
199 | 
200 | # Grouping the document tree into LaTeX files. List of tuples
201 | # (source start file, target name, title,
202 | #  author, documentclass [howto, manual, or own class]).
203 | latex_documents = [
204 |   ('index', 'mf2util.tex', u'mf2util Documentation',
205 |    u'Kyle Mahan', 'manual'),
206 | ]
207 | 
208 | # The name of an image file (relative to this directory) to place at the top of
209 | # the title page.
210 | #latex_logo = None
211 | 
212 | # For "manual" documents, if this is true, then toplevel headings are parts,
213 | # not chapters.
214 | #latex_use_parts = False
215 | 
216 | # If true, show page references after internal links.
217 | #latex_show_pagerefs = False
218 | 
219 | # If true, show URL addresses after external links.
220 | #latex_show_urls = False
221 | 
222 | # Documents to append as an appendix to all manuals.
223 | #latex_appendices = []
224 | 
225 | # If false, no module index is generated.
226 | #latex_domain_indices = True
227 | 
228 | 
229 | # -- Options for manual page output ---------------------------------------
230 | 
231 | # One entry per manual page. List of tuples
232 | # (source start file, name, description, authors, manual section).
233 | man_pages = [
234 |     ('index', 'mf2util', u'mf2util Documentation',
235 |      [u'Kyle Mahan'], 1)
236 | ]
237 | 
238 | # If true, show URL addresses after external links.
239 | #man_show_urls = False
240 | 
241 | 
242 | # -- Options for Texinfo output -------------------------------------------
243 | 
244 | # Grouping the document tree into Texinfo files. List of tuples
245 | # (source start file, target name, title, author,
246 | #  dir menu entry, description, category)
247 | texinfo_documents = [
248 |   ('index', 'mf2util', u'mf2util Documentation',
249 |    u'Kyle Mahan', 'mf2util', 'One line description of project.',
250 |    'Miscellaneous'),
251 | ]
252 | 
253 | # Documents to append as an appendix to all manuals.
254 | #texinfo_appendices = []
255 | 
256 | # If false, no module index is generated.
257 | #texinfo_domain_indices = True
258 | 
259 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
260 | #texinfo_show_urls = 'footnote'
261 | 
262 | # If true, do not generate a @detailmenu in the "Top" node's menu.
263 | #texinfo_no_detailmenu = False
264 | 
265 | 
266 | # Example configuration for intersphinx: refer to the Python standard library.
267 | intersphinx_mapping = {'http://docs.python.org/': None}
268 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. mf2util documentation master file, created by
 2 |    sphinx-quickstart on Mon May 12 08:02:11 2014.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | mf2util API reference
 7 | =====================
 8 | 
 9 | .. automodule:: mf2util
10 |    :members:
11 | 


--------------------------------------------------------------------------------
/mf2util.py:
--------------------------------------------------------------------------------
  1 | """Utilities for interpreting mf2 data.
  2 | 
  3 | Microformats2 is a general way to mark up any HTML document with
  4 | classes and propeties. This module uses domain-specific assumptions
  5 | about the classes (specifically h-entry and h-event) to extract
  6 | certain interesting properties."""
  7 | 
  8 | 
  9 | from __future__ import unicode_literals
 10 | from collections import deque
 11 | from datetime import tzinfo, timedelta, datetime, date
 12 | import logging
 13 | import re
 14 | import string
 15 | 
 16 | import unicodedata
 17 | import sys
 18 | 
 19 | PY3 = sys.version_info[0] >= 3
 20 | 
 21 | # 2/3 compatibility
 22 | if PY3:
 23 |     from urllib.parse import urljoin
 24 |     from datetime import timezone
 25 |     utc = timezone.utc
 26 |     timezone_from_offset = timezone
 27 |     string_type = str
 28 | else:
 29 |     from urlparse import urljoin
 30 |     string_type = unicode
 31 | 
 32 |     # timezone shims for py2
 33 | 
 34 |     class UTC(tzinfo):
 35 |         """UTC timezone, from Python documentation
 36 |         https://docs.python.org/2/library/datetime.html#tzinfo-objects"""
 37 | 
 38 |         def utcoffset(self, dt):
 39 |             return timedelta(0)
 40 | 
 41 |         def tzname(self, dt):
 42 |             return "UTC"
 43 | 
 44 |         def dst(self, dt):
 45 |             return timedelta(0)
 46 | 
 47 |     class FixedOffset(tzinfo):
 48 |         """A class building tzinfo objects for fixed-offset time zones.
 49 |         Note that FixedOffset(0, "UTC") is a different way to build a
 50 |         UTC tzinfo object.
 51 | 
 52 |         Fixed offset in minutes east from UTC. from Python 2 documentation
 53 |         https://docs.python.org/2/library/datetime.html#tzinfo-objects"""
 54 | 
 55 |         def __init__(self, offset, name):
 56 |             self.__offset = offset
 57 |             self.__name = name
 58 | 
 59 |         def utcoffset(self, dt):
 60 |             return self.__offset
 61 | 
 62 |         def tzname(self, dt):
 63 |             return self.__name
 64 | 
 65 |         def dst(self, dt):
 66 |             return timedelta(0)
 67 | 
 68 |     utc = UTC()
 69 |     timezone_from_offset = FixedOffset
 70 | 
 71 | 
 72 | URL_ATTRIBUTES = {
 73 |     'a': ['href'],
 74 |     'link': ['href'],
 75 |     'img': ['src'],
 76 |     'audio': ['src'],
 77 |     'video': ['src', 'poster'],
 78 |     'source': ['src'],
 79 | }
 80 | 
 81 | # From https://indieweb.org/location#How_to_determine_the_location_of_a_microformat
 82 | LOCATION_PROPERTIES = frozenset((
 83 |     'street-address',
 84 |     'extended-address',
 85 |     'post-office-box',
 86 |     'locality',
 87 |     'region',
 88 |     'postal-code',
 89 |     'country-name',
 90 |     'label',
 91 |     'latitude',
 92 |     'longitude',
 93 |     'altitude',
 94 |     'name',
 95 | ))
 96 | 
 97 | 
 98 | def find_first_entry(parsed, types):
 99 |     """Find the first interesting h-* object in BFS-order
100 | 
101 |     :param dict parsed: a mf2py parsed dict
102 |     :param list types: target types, e.g. ['h-entry', 'h-event']
103 |     :return: an mf2py item that is one of `types`, or None
104 |     """
105 |     return next(_find_all_entries(parsed, types, False), None)
106 | 
107 | 
108 | def find_all_entries(parsed, types, include_properties=False):
109 |     """Find all h-* objects of a given type in BFS-order. Traverses the
110 |     top-level items and their children and descendents. Includes property
111 |     values (e.g. finding all h-cards would not find values of
112 |     "p-author h-card") only if `include_properties` is True.
113 | 
114 |     :param dict parsed: a mf2py parsed dict
115 |     :param list types: target types, e.g. ['h-entry', 'h-event']
116 |     :param boolean include_properties: include properties in search of entries
117 |     :return: all entries with any of the the target types
118 |     """
119 |     return list(_find_all_entries(parsed, types, include_properties))
120 | 
121 | 
122 | def _find_all_entries(parsed, types, include_properties):
123 |     queue = deque(item for item in parsed['items'])
124 |     while queue:
125 |         item = queue.popleft()
126 |         if any(h_class in item.get('type', []) for h_class in types):
127 |             yield item
128 |         queue.extend(item.get('children', []))
129 |         if include_properties:
130 |             queue.extend(prop for props in item.get('properties', {}).values()
131 |                          for prop in props if isinstance(prop, dict))
132 | 
133 | 
134 | def find_datetimes(parsed):
135 |     """Find published, updated, start, and end dates.
136 | 
137 |     :param dict parsed: a mf2py parsed dict
138 |     :return: a dictionary from property type to datetime or date
139 |     """
140 |     hentry = find_first_entry(parsed)
141 |     result = {}
142 | 
143 |     if hentry:
144 |         for prop in ('published', 'updated', 'start', 'end'):
145 |             date_strs = hentry['properties'].get(prop, [])
146 |             result[prop] = parse_datetime(' '.join(date_strs))
147 | 
148 | 
149 | def get_plain_text(values, strip=True):
150 |     """Get the first value in a list of values that we expect to be plain-text.
151 |     If it is a dict, then return the value of "value".
152 | 
153 |     :param list values: a list of values
154 |     :param boolean strip: true if we should strip the plaintext value
155 |     :return: a string or None
156 |     """
157 |     if values:
158 |         v = values[0]
159 |         if isinstance(v, dict):
160 |             v = v.get('value', '')
161 |         if strip:
162 |             v = v.strip()
163 |         return v
164 | 
165 | 
166 | def classify_comment(parsed, target_urls):
167 |     """Find and categorize comments that reference any of a collection of
168 |     target URLs. Looks for references of type reply, like, and repost.
169 | 
170 |     :param dict parsed: a mf2py parsed dict
171 |     :param list target_urls: a collection of urls that represent the
172 |       target post. this can include alternate or shortened URLs.
173 |     :return: a list of applicable comment types ['like', 'reply', 'repost']
174 |     """
175 |     def process_references(objs, reftypes, result):
176 |         for obj in objs:
177 |             if isinstance(obj, dict):
178 |                 if any(url in target_urls for url
179 |                        in obj.get('properties', {}).get('url', [])):
180 |                     result += (r for r in reftypes if r not in result)
181 |             elif obj in target_urls:
182 |                 result += (r for r in reftypes if r not in result)
183 | 
184 |     result = []
185 |     hentry = find_first_entry(parsed, ['h-entry'])
186 |     if hentry:
187 |         reply_type = []
188 |         if 'rsvp' in hentry['properties']:
189 |             reply_type.append('rsvp')
190 |         if 'invitee' in hentry['properties']:
191 |             reply_type.append('invite')
192 |         reply_type.append('reply')
193 | 
194 |         # TODO handle rel=in-reply-to
195 |         for prop in ('in-reply-to', 'reply-to', 'reply'):
196 |             process_references(
197 |                 hentry['properties'].get(prop, []), reply_type, result)
198 | 
199 |         for prop in ('like-of', 'like'):
200 |             process_references(
201 |                 hentry['properties'].get(prop, []), ('like',), result)
202 | 
203 |         for prop in ('repost-of', 'repost'):
204 |             process_references(
205 |                 hentry['properties'].get(prop, []), ('repost',), result)
206 | 
207 |     return result
208 | 
209 | 
210 | def parse_author(obj):
211 |     """Parse the value of a u-author property, can either be a compound
212 |     h-card or a single name or url.
213 | 
214 |     :param object obj: the mf2 property value, either a dict or a string
215 |     :result: a dict containing the author's name, photo, and url
216 |     """
217 |     result = {}
218 |     if isinstance(obj, dict):
219 |         names = obj['properties'].get('name')
220 |         photos = obj['properties'].get('photo')
221 |         urls = obj['properties'].get('url')
222 |         if names:
223 |             result['name'] = names[0]
224 |         if photos:
225 |             result['photo'] = photos[0]
226 |         if urls:
227 |             result['url'] = urls[0]
228 |     elif obj:
229 |         if obj.startswith('http://') or obj.startswith('https://'):
230 |             result['url'] = obj
231 |         else:
232 |             result['name'] = obj
233 |     return result
234 | 
235 | 
236 | def find_author(parsed, source_url=None, hentry=None, fetch_mf2_func=None):
237 |     """Use the authorship discovery algorithm
238 |     https://indiewebcamp.com/authorship to determine an h-entry's
239 |     author.
240 | 
241 |     :param dict parsed: an mf2py parsed dict.
242 |     :param str source_url: the source of the parsed document.
243 |     :param hentry dict: optional, the h-entry we're examining, if omitted,
244 |         we'll just use the first one
245 |     :param fetch_mf2_func callable: optional function that takes a URL
246 |         and returns parsed mf2
247 |     :return: a dict containing the author's name, photo, and url
248 |     """
249 |     def find_hentry_author(hentry):
250 |         for obj in hentry['properties'].get('author', []):
251 |             return parse_author(obj)
252 | 
253 |     def find_parent_hfeed_author(hentry):
254 |         for hfeed in _find_all_entries(parsed, ['h-feed'], False):
255 |             # find the h-entry's parent h-feed
256 |             if hentry in hfeed.get('children', []):
257 |                 for obj in hfeed['properties'].get('author', []):
258 |                     return parse_author(obj)
259 | 
260 |     if not hentry:
261 |         hentry = find_first_entry(parsed, ['h-entry'])
262 |         if not hentry:
263 |             return None
264 | 
265 |     author_page = None
266 | 
267 |     # 3. if the h-entry has an author property, use that
268 |     author = find_hentry_author(hentry)
269 | 
270 |     # 4. otherwise if the h-entry has a parent h-feed with author property,
271 |     #    use that
272 |     if not author:
273 |         author = find_parent_hfeed_author(hentry)
274 | 
275 |     # 5. if an author property was found
276 |     if author:
277 |         # 5.2 otherwise if author property is an http(s) URL, let the
278 |         #     author-page have that URL
279 |         if list(author.keys()) == ['url']:
280 |             author_page = author['url']
281 |         # 5.1 if it has an h-card, use it, exit.
282 |         # 5.3 otherwise use the author property as the author name,
283 |         #     exit.
284 |         else:
285 |             return author
286 | 
287 |     # 6. if there is no author-page and the h-entry's page is a permalink page
288 |     if not author_page:
289 |         # 6.1 if the page has a rel-author link, let the author-page's
290 |         #     URL be the href of the rel-author link
291 |         rel_authors = parsed.get('rels', {}).get('author', [])
292 |         if rel_authors:
293 |             author_page = rel_authors[0]
294 | 
295 |     # 7. if there is an author-page URL
296 |     if author_page:
297 |         if not fetch_mf2_func:
298 |             return {'url': author_page}
299 | 
300 |         # 7.1 get the author-page from that URL and parse it for microformats2
301 |         parsed = fetch_mf2_func(author_page)
302 |         hcards = find_all_entries(parsed, ['h-card'])
303 | 
304 |         # 7.2 if author-page has 1+ h-card with url == uid ==
305 |         #     author-page's URL, then use first such h-card, exit.
306 |         for hcard in hcards:
307 |             hcard_url = get_plain_text(hcard['properties'].get('url'))
308 |             hcard_uid = get_plain_text(hcard['properties'].get('uid'))
309 |             if (hcard_url and hcard_uid and hcard_url == hcard_uid
310 |                     and hcard_url == author_page):
311 |                 return parse_author(hcard)
312 | 
313 |         # 7.3 else if author-page has 1+ h-card with url property
314 |         #     which matches the href of a rel-me link on the author-page
315 |         #     (perhaps the same hyperlink element as the u-url, though not
316 |         #     required to be), use first such h-card, exit.
317 |         rel_mes = parsed.get('rels', {}).get('me', [])
318 |         for hcard in hcards:
319 |             hcard_url = get_plain_text(hcard['properties'].get('url'))
320 |             if hcard_url and hcard_url in rel_mes:
321 |                 return parse_author(hcard)
322 | 
323 |         # 7.4 if the h-entry's page has 1+ h-card with url ==
324 |         #     author-page URL, use first such h-card, exit.
325 |         for hcard in hcards:
326 |             hcard_url = get_plain_text(hcard['properties'].get('url'))
327 |             if hcard_url and hcard_url == author_page:
328 |                 return parse_author(hcard)
329 | 
330 |         # 8. otherwise no deterministic author can be found.
331 |         return None
332 | 
333 | 
334 | def representative_hcard(parsed, source_url):
335 |     """Find the representative h-card for a URL
336 | 
337 |     http://microformats.org/wiki/representative-h-card-parsing
338 | 
339 |     :param dict parsed: an mf2 parsed dict
340 |     :param str source_url: the source of the parsed document.
341 |     :return: the representative h-card if one is found
342 |     """
343 |     hcards = find_all_entries(parsed, ['h-card'], include_properties=True)
344 |     # uid and url both match source_url
345 |     for hcard in hcards:
346 |         if (source_url in hcard['properties'].get('uid', [])
347 |                 and source_url in hcard['properties'].get('url', [])):
348 |             return hcard
349 |     # url that is also a rel=me
350 |     for hcard in hcards:
351 |         if any(url in parsed.get('rels', {}).get('me', [])
352 |                for url in hcard['properties'].get('url', [])):
353 |             return hcard
354 |     # single hcard with matching url
355 |     found = None
356 |     count = 0
357 |     for hcard in hcards:
358 |         if source_url in hcard['properties'].get('url', []):
359 |             found = hcard
360 |             count += 1
361 |     if count == 1:
362 |         return found
363 | 
364 | 
365 | def convert_relative_paths_to_absolute(source_url, base_href, html):
366 |     """Attempt to convert relative paths in foreign content
367 |     to absolute based on the source url of the document. Useful for
368 |     displaying images or links in reply contexts and comments.
369 | 
370 |     Gets list of tags/attributes from `URL_ATTRIBUTES`. Note that this
371 |     function uses a regular expression to avoid adding a library
372 |     dependency on a proper parser.
373 | 
374 |     :param str source_url: the source of the parsed document.
375 |     :param str html: the text of the source document
376 |     :return: the document with relative urls replaced with absolute ones
377 |     """
378 |     def do_convert(match):
379 |         base_url = urljoin(source_url, base_href) if base_href else source_url
380 |         return (match.string[match.start(0):match.start(1)] +
381 |                 urljoin(base_url, match.group(1)) +
382 |                 match.string[match.end(1):match.end(0)])
383 | 
384 |     if source_url:
385 |         for tagname, attributes in URL_ATTRIBUTES.items():
386 |             for attribute in attributes:
387 |                 pattern = re.compile(
388 |                     '<%s[^>]*?%s\s*=\s*[\'"](.*?)[\'"]' % (tagname, attribute),
389 |                     flags=re.DOTALL | re.MULTILINE | re.IGNORECASE)
390 |                 html = pattern.sub(do_convert, html)
391 | 
392 |     return html
393 | 
394 | 
395 | def is_name_a_title(name, content):
396 |     """Determine whether the name property represents an explicit title.
397 | 
398 |     Typically when parsing an h-entry, we check whether p-name ==
399 |     e-content (value). If they are non-equal, then p-name likely
400 |     represents a title.
401 | 
402 |     However, occasionally we come across an h-entry that does not
403 |     provide an explicit p-name. In this case, the name is
404 |     automatically generated by converting the entire h-entry content
405 |     to plain text. This definitely does not represent a title, and
406 |     looks very bad when displayed as such.
407 | 
408 |     To handle this case, we broaden the equality check to see if
409 |     content is a subset of name. We also strip out non-alphanumeric
410 |     characters just to make the check a little more forgiving.
411 | 
412 |     :param str name: the p-name property that may represent a title
413 |     :param str content: the plain-text version of an e-content property
414 |     :return: True if the name likely represents a separate, explicit title
415 |     """
416 |     def normalize(s):
417 |         if not isinstance(s, string_type):
418 |             s = s.decode('utf-8')
419 |         s = unicodedata.normalize('NFKD', s)
420 |         s = s.lower()
421 |         s = re.sub('[' + string.whitespace + string.punctuation + ']', '', s)
422 |         return s
423 |     if not content:
424 |         return True
425 |     if not name:
426 |         return False
427 |     return normalize(content) not in normalize(name)
428 | 
429 | 
430 | def post_type_discovery(hentry):
431 |     """Implementation of the post-type discovery algorithm
432 |     defined here https://indiewebcamp.com/post-type-discovery#Algorithm
433 | 
434 |     :param dict hentry: mf2 item representing the entry to test
435 | 
436 |     :return: string, one of: 'org', 'person', 'event', 'rsvp',
437 |                      'invite', 'reply', 'repost', 'like', 'photo',
438 |                      'article', 'note', 'follow'
439 | 
440 |     """
441 |     props = hentry.get('properties', {})
442 |     if 'h-card' in hentry.get('type', []):
443 |         name = get_plain_text(props.get('name'))
444 |         org = get_plain_text(props.get('org'))
445 |         if name and org and name == org:
446 |             return 'org'
447 |         return 'person'
448 | 
449 |     if 'h-event' in hentry.get('type', []):
450 |         return 'event'
451 | 
452 |     for prop, implied_type in [
453 |         ('rsvp', 'rsvp'),
454 |         ('invitee', 'invite'),
455 |         ('in-reply-to', 'reply'),
456 |         ('repost-of', 'repost'),
457 |         ('like-of', 'like'),
458 |         ('follow-of', 'follow'),
459 |         ('photo', 'photo'),
460 |     ]:
461 |         if props.get(prop) is not None:
462 |             return implied_type
463 |     # check name ~= content
464 |     name = get_plain_text(props.get('name'))
465 |     content = get_plain_text(props.get('content'))
466 |     if not content:
467 |         content = get_plain_text(props.get('summary'))
468 |     if content and name and is_name_a_title(name, content):
469 |         return 'article'
470 |     return 'note'
471 | 
472 | 
473 | def parse_datetime(s):
474 |     """The definition for microformats2 dt-* properties are fairly
475 |     lenient.  This method converts an mf2 date string into either a
476 |     datetime.date or datetime.datetime object. Datetimes will be naive
477 |     unless a timezone is specified.
478 | 
479 |     :param str s: a mf2 string representation of a date or datetime
480 |     :return: datetime.date or datetime.datetime
481 |     :raises ValueError: if the string is not recognizable
482 |     """
483 | 
484 |     if not s:
485 |         return None
486 | 
487 |     s = re.sub('\s+', ' ', s)
488 |     date_re = "(?P<year>\d{4,})-(?P<month>\d{1,2})-(?P<day>\d{1,2})"
489 |     time_re = "(?P<hour>\d{1,2}):(?P<minute>\d{2})(:(?P<second>\d{2})(\.(?P<microsecond>\d+))?)?"
490 |     tz_re = "(?P<tzz>Z)|(?P<tzsign>[+-])(?P<tzhour>\d{1,2}):?(?P<tzminute>\d{2})"
491 |     dt_re = "%s((T| )%s ?(%s)?)?$" % (date_re, time_re, tz_re)
492 | 
493 |     m = re.match(dt_re, s)
494 |     if not m:
495 |         raise ValueError('unrecognized datetime %s' % s)
496 | 
497 |     year = m.group('year')
498 |     month = m.group('month')
499 |     day = m.group('day')
500 | 
501 |     hour = m.group('hour')
502 | 
503 |     if not hour:
504 |         return date(int(year), int(month), int(day))
505 | 
506 |     minute = m.group('minute') or "00"
507 |     second = m.group('second') or "00"
508 | 
509 |     if hour:
510 |         dt = datetime(int(year), int(month), int(day), int(hour),
511 |                       int(minute), int(second))
512 |     if m.group('tzz'):
513 |         dt = dt.replace(tzinfo=utc)
514 |     else:
515 |         tzsign = m.group('tzsign')
516 |         tzhour = m.group('tzhour')
517 |         tzminute = m.group('tzminute') or "00"
518 | 
519 |         if tzsign and tzhour:
520 |             offset = timedelta(hours=int(tzhour),
521 |                                minutes=int(tzminute))
522 |             if tzsign == '-':
523 |                 offset = -offset
524 |             dt = dt.replace(tzinfo=timezone_from_offset(
525 |                 offset, '%s%s:%s' % (tzsign, tzhour, tzminute)))
526 | 
527 |     return dt
528 | 
529 | 
530 | parse_dt = parse_datetime  # backcompat
531 | 
532 | 
533 | def _interpret_common_properties(
534 |         parsed, source_url, base_href, hentry, use_rel_syndication,
535 |         want_json, fetch_mf2_func):
536 |     result = {}
537 |     props = hentry['properties']
538 | 
539 |     for prop in ('url', 'uid', 'photo', 'featured' 'logo'):
540 |         value = get_plain_text(props.get(prop))
541 |         if value:
542 |             result[prop] = value
543 | 
544 |     for prop in ('start', 'end', 'published', 'updated', 'deleted'):
545 |         date_str = get_plain_text(props.get(prop))
546 |         if date_str:
547 |             if want_json:
548 |                 result[prop] = date_str
549 |             else:
550 |                 result[prop + '-str'] = date_str
551 |                 try:
552 |                     date = parse_datetime(date_str)
553 |                     if date:
554 |                         result[prop] = date
555 |                 except ValueError:
556 |                     logging.warn('Failed to parse datetime %s', date_str)
557 | 
558 |     author = find_author(parsed, source_url, hentry, fetch_mf2_func)
559 |     if author:
560 |         result['author'] = author
561 | 
562 |     content_prop = props.get('content')
563 |     content_value = None
564 |     if content_prop:
565 |         if isinstance(content_prop[0], dict):
566 |             content_html = content_prop[0].get('html', '').strip()
567 |             content_value = content_prop[0].get('value', '').strip()
568 |         else:
569 |             content_value = content_html = content_prop[0]
570 |         result['content'] = convert_relative_paths_to_absolute(
571 |             source_url, base_href, content_html)
572 |         result['content-plain'] = content_value
573 | 
574 |     summary_prop = props.get('summary')
575 |     if summary_prop:
576 |         if isinstance(summary_prop[0], dict):
577 |             result['summary'] = summary_prop[0]['value']
578 |         else:
579 |             result['summary'] = summary_prop[0]
580 | 
581 | 
582 |     # Collect location objects, then follow this algorithm to consolidate their
583 |     # properties:
584 |     # https://indieweb.org/location#How_to_determine_the_location_of_a_microformat
585 |     location_stack = [props]
586 | 
587 |     for prop in 'location', 'adr':
588 |         vals = props.get(prop)
589 |         if vals:
590 |             if isinstance(vals[0], string_type):
591 |                 location_stack.append({'name': vals})
592 |             else:
593 |                 location_stack.append(vals[0].get('properties', {}))
594 | 
595 |     geo = props.get('geo')
596 |     if geo:
597 |         if isinstance(geo[0], dict):
598 |             location_stack.append(geo[0].get('properties', {}))
599 |         else:
600 |             if geo[0].startswith('geo:'):
601 |                 # a geo: URL. try to parse it. https://tools.ietf.org/html/rfc5870
602 |                 parts = geo[0][len('geo:'):].split(';')[0].split(',')
603 |                 if len(parts) >= 2:
604 |                     location_stack.append({
605 |                         'latitude': [parts[0]],
606 |                         'longitude': [parts[1]],
607 |                         'altitude': [parts[2]] if len(parts) >= 3 else [],
608 |                     })
609 | 
610 |     for prop in LOCATION_PROPERTIES:
611 |         for obj in location_stack:
612 |             if obj and obj.get(prop) and not (obj == props and prop == 'name'):
613 |                 result.setdefault('location', {})[prop] = obj[prop][0]
614 | 
615 |     if use_rel_syndication:
616 |         result['syndication'] = list(set(
617 |             parsed.get('rels', {}).get('syndication', []) +
618 |             hentry['properties'].get('syndication', [])))
619 |     else:
620 |         result['syndication'] = hentry['properties'].get('syndication', [])
621 | 
622 |     return result
623 | 
624 | 
625 | def interpret_event(
626 |         parsed, source_url, base_href=None, hevent=None,
627 |         use_rel_syndication=True, want_json=False, fetch_mf2_func=None):
628 |     """Given a document containing an h-event, return a dictionary::
629 | 
630 |         {
631 |          'type': 'event',
632 |          'url': the permalink url of the document (may be different than source_url),
633 |          'start': datetime or date,
634 |          'end': datetime or date,
635 |          'name': plain-text event name,
636 |          'content': body of event description (contains HTML)
637 |         }
638 | 
639 |     :param dict parsed: the result of parsing a document containing mf2 markup
640 |     :param str source_url: the URL of the parsed document, not currently used
641 |     :param str base_href: (optional) the href value of the base tag
642 |     :param dict hevent: (optional) the item in the above document representing
643 |       the h-event. if provided, we can avoid a redundant call to
644 |       find_first_entry
645 |     :param boolean use_rel_syndication: (optional, default True) Whether
646 |       to include rel=syndication in the list of syndication sources. Sometimes
647 |       useful to set this to False when parsing h-feeds that erroneously include
648 |       rel=syndication on each entry.
649 |     :param boolean want_json: (optional, default false) if true, the result
650 |       will be pure json with datetimes as strings instead of python objects
651 |     :param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
652 |       output for a given URL.
653 |     :return: a dict with some or all of the described properties
654 |     """
655 |     # find the h-event if it wasn't provided
656 |     if not hevent:
657 |         hevent = find_first_entry(parsed, ['h-event'])
658 |         if not hevent:
659 |             return {}
660 | 
661 |     result = _interpret_common_properties(
662 |         parsed, source_url, base_href, hevent, use_rel_syndication, want_json,
663 |         fetch_mf2_func)
664 |     result['type'] = 'event'
665 |     name_value = get_plain_text(hevent['properties'].get('name'))
666 |     if name_value:
667 |         result['name'] = name_value
668 |     return result
669 | 
670 | 
671 | def interpret_entry(
672 |         parsed, source_url, base_href=None, hentry=None,
673 |         use_rel_syndication=True, want_json=False, fetch_mf2_func=None):
674 |     """Given a document containing an h-entry, return a dictionary::
675 | 
676 |         {
677 |          'type': 'entry',
678 |          'url': the permalink url of the document (may be different than source_url),
679 |          'published': datetime or date,
680 |          'updated': datetime or date,
681 |          'name': title of the entry,
682 |          'content': body of entry (contains HTML),
683 |          'author': {
684 |           'name': author name,
685 |           'url': author url,
686 |           'photo': author photo
687 |          },
688 |          'syndication': [
689 |            'syndication url',
690 |            ...
691 |          ],
692 |          'in-reply-to': [...],
693 |          'like-of': [...],
694 |          'repost-of': [...],
695 |         }
696 | 
697 |     :param dict parsed: the result of parsing a document containing mf2 markup
698 |     :param str source_url: the URL of the parsed document, used by the
699 |       authorship algorithm
700 |     :param str base_href: (optional) the href value of the base tag
701 |     :param dict hentry: (optional) the item in the above document
702 |       representing the h-entry. if provided, we can avoid a redundant
703 |       call to find_first_entry
704 |     :param boolean use_rel_syndication: (optional, default True) Whether
705 |       to include rel=syndication in the list of syndication sources. Sometimes
706 |       useful to set this to False when parsing h-feeds that erroneously include
707 |       rel=syndication on each entry.
708 |     :param boolean want_json: (optional, default False) if true, the result
709 |       will be pure json with datetimes as strings instead of python objects
710 |     :param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
711 |       output for a given URL.
712 |     :return: a dict with some or all of the described properties
713 |     """
714 | 
715 |     # find the h-entry if it wasn't provided
716 |     if not hentry:
717 |         hentry = find_first_entry(parsed, ['h-entry'])
718 |         if not hentry:
719 |             return {}
720 | 
721 |     result = _interpret_common_properties(
722 |         parsed, source_url, base_href, hentry, use_rel_syndication, want_json,
723 |         fetch_mf2_func)
724 |     if 'h-cite' in hentry.get('type', []):
725 |         result['type'] = 'cite'
726 |     else:
727 |         result['type'] = 'entry'
728 | 
729 |     title = get_plain_text(hentry['properties'].get('name'))
730 |     if title and is_name_a_title(title, result.get('content-plain')):
731 |         result['name'] = title
732 | 
733 |     for prop in ('in-reply-to', 'like-of', 'repost-of', 'bookmark-of',
734 |                  'comment', 'like', 'repost'):
735 |         for url_val in hentry['properties'].get(prop, []):
736 |             if isinstance(url_val, dict):
737 |                 result.setdefault(prop, []).append(
738 |                     interpret(parsed, source_url, base_href, url_val,
739 |                               use_rel_syndication=False,
740 |                               want_json=want_json,
741 |                               fetch_mf2_func=fetch_mf2_func))
742 |             else:
743 |                 result.setdefault(prop, []).append({
744 |                     'url': url_val,
745 |                 })
746 | 
747 |     return result
748 | 
749 | 
750 | def interpret_feed(parsed, source_url, base_href=None, hfeed=None,
751 |                    want_json=False, fetch_mf2_func=None):
752 |     """Interpret a source page as an h-feed or as an top-level collection
753 |     of h-entries.
754 | 
755 |     :param dict parsed: the result of parsing a mf2 document
756 |     :param str source_url: the URL of the source document (used for authorship
757 |         discovery)
758 |     :param str base_href: (optional) the href value of the base tag
759 |     :param dict hfedd: (optional) the h-feed to be parsed. If provided,
760 |         this will be used instead of the first h-feed on the page.
761 |     :param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
762 |       output for a given URL.
763 |     :return: a dict containing 'entries', a list of entries, and possibly other
764 |         feed properties (like 'name').
765 |     """
766 |     result = {}
767 |     # find the first feed if it wasn't provided
768 |     if not hfeed:
769 |         hfeed = find_first_entry(parsed, ['h-feed'])
770 | 
771 |     if hfeed:
772 |         names = hfeed['properties'].get('name')
773 |         if names:
774 |             result['name'] = names[0]
775 |         children = hfeed.get('children', [])
776 |     # just use the top level 'items' as the feed children
777 |     else:
778 |         children = parsed.get('items', [])
779 | 
780 |     entries = []
781 |     for child in children:
782 |         entry = interpret(
783 |             parsed, source_url, base_href, item=child,
784 |             use_rel_syndication=False, want_json=want_json,
785 |             fetch_mf2_func=fetch_mf2_func)
786 |         if entry:
787 |             entries.append(entry)
788 |     result['entries'] = entries
789 |     return result
790 | 
791 | 
792 | def interpret(parsed, source_url, base_href=None, item=None,
793 |               use_rel_syndication=True, want_json=False, fetch_mf2_func=None):
794 |     """Interpret a permalink of unknown type. Finds the first interesting
795 |     h-* element, and delegates to :func:`interpret_entry` if it is an
796 |     h-entry or :func:`interpret_event` for an h-event
797 | 
798 |     :param dict parsed: the result of parsing a mf2 document
799 |     :param str source_url: the URL of the source document (used for authorship
800 |       discovery)
801 |     :param str base_href: (optional) the href value of the base tag
802 |     :param dict item: (optional) the item to be parsed. If provided,
803 |       this will be used instead of the first element on the page.
804 |     :param boolean use_rel_syndication: (optional, default True) Whether
805 |       to include rel=syndication in the list of syndication sources. Sometimes
806 |       useful to set this to False when parsing h-feeds that erroneously include
807 |       rel=syndication on each entry.
808 |     :param boolean want_json: (optional, default False) If true, the result
809 |       will be pure json with datetimes as strings instead of python objects
810 |     :param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
811 |       output for a given URL.
812 |     :return: a dict as described by interpret_entry or interpret_event, or None
813 |     """
814 |     if not item:
815 |         item = find_first_entry(parsed, ['h-entry', 'h-event'])
816 | 
817 |     if item:
818 |         types = item.get('type', [])
819 |         if 'h-event' in types:
820 |             return interpret_event(
821 |                 parsed, source_url, base_href=base_href, hevent=item,
822 |                 use_rel_syndication=use_rel_syndication, want_json=want_json,
823 |                 fetch_mf2_func=fetch_mf2_func)
824 |         elif 'h-entry' in types or 'h-cite' in types:
825 |             return interpret_entry(
826 |                 parsed, source_url, base_href=base_href, hentry=item,
827 |                 use_rel_syndication=use_rel_syndication, want_json=want_json,
828 |                 fetch_mf2_func=fetch_mf2_func)
829 | 
830 | 
831 | def interpret_comment(parsed, source_url, target_urls, base_href=None,
832 |                       want_json=False, fetch_mf2_func=None):
833 |     """Interpret received webmentions, and classify as like, reply, or
834 |     repost (or a combination thereof). Returns a dict as described
835 |     in :func:`interpret_entry`, with the additional fields::
836 | 
837 |         {
838 |          'comment_type': a list of strings, zero or more of
839 |                          'like', 'reply', or 'repost'
840 |          'rsvp': a string containing the rsvp response (optional)
841 |         }
842 | 
843 |     :param dict parsed: a parsed mf2 parsed document
844 |     :param str source_url: the URL of the source document
845 |     :param list target_urls: a collection containing the URL of the target\
846 |       document, and any alternate URLs (e.g., shortened links) that should\
847 |       be considered equivalent when looking for references
848 |     :param str base_href: (optional) the href value of the base tag
849 |     :param boolean want_json: (optional, default False) If true, the result
850 |       will be pure json with datetimes as strings instead of python objects
851 |     :param callable fetch_mf2_func: (optional) function to fetch mf2 parsed
852 |       output for a given URL.
853 |     :return: a dict as described above, or None
854 |     """
855 |     item = find_first_entry(parsed, ['h-entry'])
856 |     if item:
857 |         result = interpret_entry(parsed, source_url, base_href=base_href,
858 |                                  hentry=item, want_json=want_json,
859 |                                  fetch_mf2_func=fetch_mf2_func)
860 |         if result:
861 |             result['comment_type'] = classify_comment(parsed, target_urls)
862 |             rsvp = get_plain_text(item['properties'].get('rsvp'))
863 |             if rsvp:
864 |                 result['rsvp'] = rsvp.lower()
865 | 
866 |             invitees = item['properties'].get('invitee')
867 |             if invitees:
868 |                 result['invitees'] = [
869 |                     parse_author(inv) for inv in invitees]
870 | 
871 |         return result
872 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [tool:pytest]
2 | norecursedirs = venv*
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from setuptools import setup, find_packages
 3 | from setuptools.command.test import test as TestCommand
 4 | import sys
 5 | 
 6 | 
 7 | class PyTest(TestCommand):
 8 |     user_options = [('pytest-args=', 'a', "Arguments to pass to py.test")]
 9 | 
10 |     def initialize_options(self):
11 |         TestCommand.initialize_options(self)
12 |         self.pytest_args = []
13 | 
14 |     def finalize_options(self):
15 |         TestCommand.finalize_options(self)
16 |         self.test_args = []
17 |         self.test_suite = True
18 | 
19 |     def run_tests(self):
20 |         # import here, cause outside the eggs aren't loaded
21 |         import pytest
22 |         errno = pytest.main(self.pytest_args)
23 |         sys.exit(errno)
24 | 
25 | 
26 | setup(name='mf2util',
27 |       version='0.5.2',
28 |       description='Python Microformats2 utilities, a companion to mf2py',
29 |       long_description="""
30 | Microformats2 Utilities
31 | =======================
32 | 
33 | Microformats2 provides an extremely flexible way to mark up HTML
34 | documents, so that human-centric data is machine-discoverable. This
35 | utility can be used to interpret a microformatted post or event, for
36 | display as a `comment <http://indiewebcamp.com/comments-presentation>`__
37 | or `reply-context <http://indiewebcamp.com/reply-context>`__.
38 | 
39 | The library itself has no dependencies, but it won't do you much good
40 | without an mf2 parser. I use and recommend
41 | `mf2py <https://github.com/tommorris/mf2py>`__.
42 | 
43 | Full `documentation is on GitHub
44 | <https://github.com/kylewm/mf2util/blob/master/README.md>`__.
45 | 
46 | Compatibility: Python 2.6, 2.7, 3.3+
47 | 
48 | License: `Simplified BSD <http://opensource.org/licenses/BSD-2-Clause>`__
49 | """,
50 |       author='Kyle Mahan',
51 |       author_email='kyle.mahan@gmail.com',
52 |       url='http://indiewebcamp.com/mf2util',
53 |       py_modules=['mf2util'],
54 |       tests_require=['pytest', 'mf2py'],
55 |       cmdclass={'test': PyTest},
56 |       classifiers=[
57 |           'Development Status :: 3 - Alpha',
58 |           'Intended Audience :: Developers',
59 |           'License :: OSI Approved :: BSD License',
60 |           'Natural Language :: English',
61 |           'Programming Language :: Python',
62 |           'Programming Language :: Python :: 3',
63 |           'Topic :: Text Processing :: Markup :: HTML'
64 |       ]
65 |   )
66 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karadaisy/mf2util/b1acda62ea5b0d500dc5a6770b2c681825a01e41/tests/__init__.py


--------------------------------------------------------------------------------
/tests/authorship/h-card_with_u-url_equal_to_self.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <title>h-card with u-url == self</title>
 6 |   </head>
 7 |   <body>
 8 | 
 9 |     <div class="h-card">
10 |       <a class="u-url" href="no_h-card.html"></a>
11 |       <p class="p-name">Wrong One</p>
12 |     </div>
13 | 
14 |     <div class="h-card">
15 |       <a class="u-url" href="h-card_with_u-url_equal_to_self.html">
16 | 	<img class="u-photo" src="http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm">
17 |       </a>
18 |       <p class="p-name">John Doe</p>
19 |     </div>
20 | 
21 |   </body>
22 | </html>
23 | 


--------------------------------------------------------------------------------
/tests/authorship/h-card_with_u-url_equal_to_u-uid_equal_to_self.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <title>h-card with u-url == u-uid == self</title>
 6 |   </head>
 7 | 
 8 |   <body>
 9 | 
10 |     <div class="h-card">
11 |       <a class="u-url" rel="me" href="h-card_with_u-url_equal_to_u-uid_equal_to_self.html"></a>
12 |       <p class="p-name">Wrong One</p>
13 |     </div>
14 | 
15 |     <div class="h-card">
16 |       <a class="u-url u-uid" href="h-card_with_u-url_equal_to_u-uid_equal_to_self.html">
17 | 	<img class="u-photo" src="http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm">
18 |       </a>
19 |       <p class="p-name">John Doe</p>
20 |     </div>
21 | 
22 |   </body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/tests/authorship/h-card_with_u-url_that_is_also_rel-me.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <title>h-card with u-url == rel-me</title>
 6 |     <link rel="me" href="h-card_with_u-url_that_is_also_rel-me.html">
 7 |   </head>
 8 | 
 9 |   <body>
10 | 
11 |     <div class="h-card">
12 |       <a class="u-url" href="h-card_with_u-url_equal_to_u-uid_equal_to_self.html"></a>
13 |       <p class="p-name">Wrong One</p>
14 |     </div>
15 | 
16 |     <div class="h-card">
17 |       <a class="u-url" href="h-card_with_u-url_that_is_also_rel-me.html">
18 | 	<img class="u-photo" src="http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm">
19 |       </a>
20 |       <p class="p-name">John Doe</p>
21 |     </div>
22 | 
23 |   </body>
24 | </html>
25 | 


--------------------------------------------------------------------------------
/tests/authorship/h-entry_with_p-author_h-card.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <title>h-entry with p-author</title>
 6 |   </head>
 7 | 
 8 |   <body>
 9 | 
10 |     <div class="h-entry">
11 |       <div class="p-author h-card">
12 | 	<a class="u-url" href="http://example.com/johndoe/">
13 | 	  <img class="u-photo" src="http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm">
14 | 	</a>
15 | 	<p class="p-name">John Doe</p>
16 |       </div>
17 |       <div class="p-name p-summary e-content">Hello World!</div>
18 |     </div>
19 | 
20 |   </body>
21 | </html>
22 | 


--------------------------------------------------------------------------------
/tests/authorship/h-entry_with_rel-author.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8">
 5 |     <title>h-entry with rel-author</title>
 6 |     <link rel="author" href="h-card_with_u-url_that_is_also_rel-me.html" />
 7 |   </head>
 8 |   <body>
 9 |     <div class="h-entry">
10 |       <span class="p-name p-summary e-content">Hello World!</span>
11 |     </div>
12 |   </body>
13 | </html>
14 | 


--------------------------------------------------------------------------------
/tests/authorship/h-entry_with_u-author.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE>
 2 | <html>
 3 |   <head>
 4 |     <title>h-entry with u-author pointing to an author page</title>
 5 |   </head>
 6 |   <body>
 7 | 
 8 |     <div class="h-entry">
 9 |       <a class="u-author" href="h-card_with_u-url_equal_to_self.html">Not Here</a>
10 |       <span class="p-name e-content">
11 |         Follow the u-author URL to find the full h-card
12 |       </span>
13 |     </div>
14 | 
15 |   </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/tests/authorship/h-feed_with_p-author_h-card.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE>
 2 | <html>
 3 |   <head>
 4 |     <title>h-feed with p-author h-card</title>
 5 |   </head>
 6 |   <body>
 7 | 
 8 |     <div class="h-feed">
 9 |       <div class="p-author h-card">
10 |         <a class="u-url" href="http://example.com/johndoe/">
11 |           <img class="u-photo" src="http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm" />
12 |           <span class="p-name">John Doe</span>
13 |         </a>
14 |       </div>
15 | 
16 |       <div class="h-entry">
17 |         <span class="p-name e-content">
18 |           First entry
19 |         </span>
20 |       </div>
21 | 
22 |       <div class="h-entry">
23 |         <span class="p-name e-content">
24 |           Second entry
25 |         </span>
26 |       </div>
27 | 
28 |       <div class="h-entry">
29 |         <span class="p-name e-content">
30 |           Third entry
31 |         </span>
32 |       </div>
33 | 
34 |     </div>
35 | 
36 |   </body>
37 | </html>
38 | 


--------------------------------------------------------------------------------
/tests/authorship/h-feed_with_u-author.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE>
 2 | <html>
 3 |   <head>
 4 |     <title>h-feed with u-author</title>
 5 |   </head>
 6 |   <body>
 7 | 
 8 |     <div class="h-feed">
 9 | 
10 |       <a class="u-author" href="h-card_with_u-url_equal_to_u-uid_equal_to_self.html">Not Here</a>
11 | 
12 |       <div class="h-entry">
13 |         <span class="p-name e-content">
14 |           First entry
15 |         </span>
16 |       </div>
17 | 
18 |       <div class="h-entry">
19 |         <span class="p-name e-content">
20 |           Second entry
21 |         </span>
22 |       </div>
23 | 
24 |       <div class="h-entry">
25 |         <span class="p-name e-content">
26 |           Third entry
27 |         </span>
28 |       </div>
29 | 
30 |     </div>
31 | 
32 |   </body>
33 | </html>
34 | 


--------------------------------------------------------------------------------
/tests/authorship/no_h-card.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | <head>
 4 |   <meta charset="utf-8">
 5 |   <title>h-entry with p-author</title>
 6 | </head>
 7 | 
 8 | <body>
 9 |   <p>Nothing to see here. Move along.</p>
10 | </body>
11 | </html>
12 | 


--------------------------------------------------------------------------------
/tests/interpret/article_naive_datetime.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "items": [
 3 |   {
 4 |    "properties": {
 5 |     "name": [
 6 |      "\nMarkup For People Focused Mobile Communication \n"
 7 |     ],
 8 |     "published": [
 9 |      "2014-04-30T12:11"
10 |     ],
11 |     "uid": [
12 |      "http://tantek.com/2014/120/b1/markup-people-focused-mobile-communication"
13 |     ],
14 |     "url": [
15 |      "http://tantek.com/2014/120/b1/markup-people-focused-mobile-communication"
16 |     ],
17 |     "updated": [
18 |      "2014-04-30T12:11"
19 |     ],
20 |     "content": [
21 |      {
22 |       "html": "\n<p>\nAll functionality on web pages and applications starts with markup. The previous post in this series, <cite><a href=\"http://tantek.com/2014/084/b1/urls-people-focused-mobile-communication\">URLs For People Focused Mobile Communication</a></cite>, documented the various URL schemes for launching the communication apps shown in <a href=\"http://tantek.com/2014/067/b2/mockups-people-focused-mobile-communication\">the mockups</a>, as well as results of testing them on mobile devices. Those tests used minimal markup.\n</p>\n<p>\nThis post documents and explains that markup, building up element by element from a simple hyperlink to the structure implied by this mockup:\n</p>\n<p class=\"figure\"><img alt=\"mobile website icon header\" src=\"http://indiewebcamp.com/images/6/66/mobile-personal-home-contact-ios7.jpg\"/>\n</p>\n<p>\nOr if you want, you may jump directly to the <a href=\"120/b1/markup-people-focused-mobile-communication#comms-markup\">complete markup example</a>.\n</p>\n<h2 id=\"urls-comms-hyperlink\">A hyperlink</h2>\n<p>\nA hyperlink provides a way for the user to navigate to other web pages. Using a URL scheme for a communication app, a hyperlink can start a message, resume a conversation, or start an audio/video call. Here's a simple hyperlink that uses the first URL scheme documented in the previous post, <code>sms:</code>\n</p>\n<pre style=\"white-space:pre-wrap\"><code>&lt;a href=\"sms:user@example.com\"&gt;txt message&lt;/a&gt;\n</code></pre>\n<p>\nLive example: <a href=\"sms:user@example.com\">txt message</a>\n</p>\n<p>\nActivating that live example likely won't do much, as <code>user@example.com</code> does not belong to anyone. <code>Example.com</code> is a domain registered purely for the purpose of examples like this one. To make this hyperlink work, you'd have to use a registered AppleID email address, which would send a txt on iOS, and fallback to email via phone provider on Android.\n</p>\n<h2>Action labels not app names</h2>\n<p>\nI use the link text \"<strong>txt message</strong>\" to indicate its user-centered function: the action of <em>creating a txt message</em>, from one human to another.\n</p>\n<p>\nContrast that with the mockup above (which I \"built\" using an iOS7 home screen folder), which uses the label \"Messages\", the name of the application it launches. \n</p>\n<p>\nThis deliberate change from \"Messages\" (application) to \"txt message\" (action) reflects the larger purpose of this exercise: <a href=\"http://tantek.com/2013/338/b1/people-focused-mobile-communication-experience\">people-focused rather than app-focused communication</a>. Subsequent labels follow a similar approach.\n</p>\n<h2>An image hyperlink</h2>\n<p>\nA simple text hyperlink is functional, yet does not provide the immediate association and recognition conveyed by the Messages icon in the mockup. There are two methods of providing an image hyperlink:\n</p>\n<ul>\n<li>An <code>&lt;img&gt;</code> element inside the hyperlink</li>\n<li>A CSS <code>background-image</code></li>\n</ul>\n<p>\nThe question of when to use markup for an image and when to use CSS is usually easily answered by the question: is the image meaningful <em>content</em> (like a photograph) or purely <em>decorative</em> (like a flourish)? Or by asking, is any meaning lost if the image is dropped?\n</p>\n<p>\nThe Messages image is neither content nor decorative. It's a button, and it's also a standard iOS user interface element, which means it <em>does</em> convey meaning to those users, above and beyond any text label. Here's the minimum markup for an image hyperlink, with the link text moved into the alt attribute as a fallback:\n</p>\n<pre style=\"white-space:pre-wrap\"><code>&lt;a href=\"sms:user@example.com\"&gt;\n  &lt;img src=\"ios7-messages-icon.png\" \n       alt=\"txt message\"/&gt;\n&lt;/a&gt;\n</code></pre>\n<p>\nLive example: <br/>\n<a href=\"sms:user@example.com\"><img alt=\"txt message\" src=\"http://media.idownloadblog.com/wp-content/uploads/2013/06/Messages-icon-iOS-7.png\"/></a>\n</p>\n<h2>Image and text hyperlink</h2>\n<p>\nThere is a third option, as implied by the mockup, and that is to use both an image and a text label. That's a simple matter of moving the alt text outside the image:\n</p>\n<pre style=\"white-space:pre-wrap\"><code>&lt;a href=\"sms:user@example.com\"&gt;\n  &lt;img src=\"ios7-messages-icon.png\" \n       alt=\"\"/&gt;\n  txt message\n&lt;/a&gt;\n</code></pre>\n<p>\nLive example: <br/>\n<a href=\"sms:user@example.com\"><img alt=\"\" src=\"http://media.idownloadblog.com/wp-content/uploads/2013/06/Messages-icon-iOS-7.png\"/>\ntxt message</a>\n</p>\n<p>\nThe <code>alt</code> attribute is left deliberately empty since putting anything there would not add to the usability of the link, and could in fact detract from it.\n</p>\n<p>\nUnlike the mockup, the link text is next to (instead of underneath) the image, and is blue &amp; underlined. These are all presentational aspects and will be addressed in the next post on CSS for People Focused Mobile Communication.\n</p>\n<h2>A list of communication options</h2>\n<p>\nThe mockup also shows multiple communication buttons in a list laid out as a grid. We can assign meaning to the order of the buttons - the site owner's preferred order of communication methods. Thus we use an ordered list to convey that their order is significant. Here's a few image+text links wrapped in list items inside an ordered list:\n</p>\n<pre style=\"white-space:pre-wrap\"><code>&lt;ol&gt;\n  &lt;li&gt;&lt;a href=\"sms:user@example.com\"&gt;\n      &lt;img src=\"ios7-messages-icon.png\" \n           alt=\"\"/&gt;\n      txt message\n  &lt;/a&gt;&lt;/li&gt;\n  &lt;li&gt;&lt;a href=\"fb-messenger://user-thread/4\"&gt;\n      &lt;img src=\"fb-messenger-icon.png\" \n           alt=\"\"/&gt;\n      &lt;abbr title=\"Facebook\"&gt;FB&lt;/abbr&gt; message\n  &lt;/a&gt;&lt;/li&gt;\n  &lt;li&gt;&lt;a href=\"aim:goim?screenname=tantekc&amp;message=hi\"&gt;\n      &lt;img src=\"aim-icon.png\" \n           alt=\"\"/&gt;\n      AIM chat\n  &lt;/a&gt;&lt;/li&gt;\n&lt;/ol&gt;\n</code></pre>\n<p>\nNote the use of an <code>&lt;abbr&gt;</code> element to abbreviate \"Facebook\" just to \"FB\" to shorten the overall \"FB message\" link text.\n</p>\n<p>\nLive example: \n</p>\n<ol>\n<li><a href=\"sms:user@example.com\"><img alt=\"\" src=\"http://media.idownloadblog.com/wp-content/uploads/2013/06/Messages-icon-iOS-7.png\"/>\ntxt message</a></li>\n<li><a href=\"fb-messenger://user-thread/4\"><img alt=\"\" src=\"http://a1.mzstatic.com/us/r30/Purple/v4/c0/92/69/c09269c0-85ca-fd85-5f0f-f235dff13ff8/mzl.lyucgsnh.175x175-75.jpg\"/>\n<abbr title=\"Facebook\">FB</abbr> message</a></li>\n<li>\n<a href=\"aim:goim?screenname=tantekc&amp;message=greetings+program\"><img alt=\"\" src=\"http://a1.mzstatic.com/us/r30/Purple6/v4/93/04/58/93045809-0d28-da1c-e5c8-a2966065b59d/mzl.onzjlxad.175x175-75.jpg\"/> AIM chat</a></li>\n</ol>\n<p>\nJust as in the previous URLs post, the FB message link uses Zuck's <abbr title=\"identifer\">ID</abbr>, and the AIM chat link uses the same nickname I've had in the sidebar for a while.\n</p>\n\n<h2>List heading</h2>\n\n<p>\nThe mockup labels the entire grid \"Contact\" (also deliberately chosen as an action, rather than the \"Contacts\" application). This makes sense as a heading, and in the context of a home page, a second level heading:\n</p>\n\n<pre style=\"white-space:pre-wrap\"><code>&lt;h2&gt;Contact&lt;/h2&gt;\n</code></pre>\n\n<p>No need for a separate live example - the subheads above are all <code>&lt;h2&gt;</code> elements. As is this one:</p>\n\n<h2 id=\"comms-markup\">Putting it all together</h2>\n\n<p>\nCombining the Contact heading with the previous ordered list, and adding the remaining buttons:\n</p>\n\n<pre style=\"white-space:pre-wrap\"><code>&lt;h2&gt;Contact&lt;/h2&gt;\n&lt;ol&gt;\n  &lt;li&gt;&lt;a href=\"sms:<b style=\"color:orange\">user@example.com</b>\"&gt;\n      &lt;img src=\"ios7-messages-icon.png\" \n           alt=\"\"/&gt;\n      txt message\n  &lt;/a&gt;&lt;/li&gt;\n  &lt;li&gt;&lt;a href=\"fb-messenger://user-thread/<b style=\"color:orange\">4</b>\"&gt;\n      &lt;img src=\"fb-messenger-icon.png\" \n           alt=\"\"/&gt;\n      &lt;abbr title=\"Facebook\"&gt;FB&lt;/abbr&gt; message\n  &lt;/a&gt;&lt;/li&gt;\n  &lt;li&gt;&lt;a href=\"aim:goim?screenname=<b style=\"color:orange\">tantekc</b>&amp;message=hi\"&gt;\n      &lt;img src=\"aim-icon.png\" \n           alt=\"\"/&gt;\n      AIM chat\n  &lt;/a&gt;&lt;/li&gt;\n  &lt;li&gt;&lt;a href=\"facetime:<b style=\"color:orange\">user@example.com</b>\"&gt;\n      &lt;img src=\"facetime-icon.png\" \n           alt=\"\"/&gt;\n      FaceTime call\n  &lt;/a&gt;&lt;/li&gt;\n  &lt;li&gt;&lt;a href=\"skype:<b style=\"color:orange\">echo123</b>?call\"&gt;\n      &lt;img src=\"skype-icon.png\" \n           alt=\"\"/&gt;\n      Skype call\n  &lt;/a&gt;&lt;/li&gt;\n  &lt;li&gt;&lt;a href=\"https://mobile.twitter.com/<b style=\"color:orange\">t</b>/messages\"&gt;\n      &lt;img src=\"twitter-dm-icon.png\" \n           alt=\"\"/&gt;\n      Twitter <abbr title=\"Direct Message\">DM</abbr>\n  &lt;/a&gt;&lt;/li&gt;\n&lt;/ol&gt;\n</code></pre>\n\n<p>\nIn this final code example I've highlighted (using orange bold tags), the key pieces you need to change to your own identifiers on each service.\n</p>\n\n<p>\nLive example once more, including heading:\n</p>\n\n<h2>Contact</h2>\n<ol>\n<li><a href=\"sms:user@example.com\"><img alt=\"\" src=\"http://media.idownloadblog.com/wp-content/uploads/2013/06/Messages-icon-iOS-7.png\"/>\ntxt message</a></li>\n<li><a href=\"fb-messenger://user-thread/4\"><img alt=\"\" src=\"http://a1.mzstatic.com/us/r30/Purple/v4/c0/92/69/c09269c0-85ca-fd85-5f0f-f235dff13ff8/mzl.lyucgsnh.175x175-75.jpg\"/>\n<abbr title=\"Facebook\">FB</abbr> message</a></li>\n<li>\n<a href=\"aim:goim?screenname=tantekc&amp;message=greetings+program\"><img alt=\"\" src=\"http://a1.mzstatic.com/us/r30/Purple6/v4/93/04/58/93045809-0d28-da1c-e5c8-a2966065b59d/mzl.onzjlxad.175x175-75.jpg\"/> AIM chat</a></li>\n<li>\n<a href=\"facetime:user@example.com\"><img alt=\"\" src=\"http://www.downloadios7.org/wp-content/uploads/apple_facetime_ios_7_logo.png\"/> FaceTime call</a></li>\n\n<li>\n<a href=\"skype:echo123?call\"><img alt=\"\" src=\"http://a1.mzstatic.com/us/r30/Purple4/v4/56/a2/b7/56a2b7a7-426d-cb33-3d11-41b1259dab89/mzl.zcfukhdi.175x175-75.jpg\"/> Skype call</a></li>\n\n<li>\n<a href=\"https://mobile.twitter.com/t/messages\"><img alt=\"\" src=\"https://ma.twimg.com/twitter-mobile/52e2205d8630c1980f88fe6357cacae3d7772a7e/images/apple-touch-icon-114.png\"/> Twitter DM</a></li>\n\n</ol>\n\n<p>\nI dropped the Google Hangouts icon since that application lacks support for any URL schemes (as noted in the previous post). Also I've re-ordered a bit from the mockup, having found that I prefer FaceTime over Skype.  Pick your own from among the <a href=\"http://tantek.com/2014/084/b1/urls-people-focused-mobile-communication\">documented URL schemes</a>, and order them to your preference.\n</p>\n\n<h2 id=\"next-markup-steps\">Next Steps</h2>\n\n<p>\nAll the essential structure is there, yet it clearly needs some CSS. There's plenty to fix from inconsistent image sizes (all but the Messages &amp; FaceTime icons are from Apple's iTunes store web pages), to blue underlined link text. And there's plenty to clean up to approach the look of the mockup: from the clustered center-aligned image+text button layout, to the grid layout of the buttons, to white text on the gray rounded corner ordered list background.</p>\n<p>\nThat's all for the next post in <a href=\"http://tantek.com/2014/067/b1/building-blocks-people-focused-mobile-communication\">this series</a>.\n</p>\n\n",
23 |       "value": "\n\nAll functionality on web pages and applications starts with markup. The previous post in this series, URLs For People Focused Mobile Communication, documented the various URL schemes for launching the communication apps shown in the mockups, as well as results of testing them on mobile devices. Those tests used minimal markup.\n\n\nThis post documents and explains that markup, building up element by element from a simple hyperlink to the structure implied by this mockup:\n\n\n\n\nOr if you want, you may jump directly to the complete markup example.\n\nA hyperlink\n\nA hyperlink provides a way for the user to navigate to other web pages. Using a URL scheme for a communication app, a hyperlink can start a message, resume a conversation, or start an audio/video call. Here's a simple hyperlink that uses the first URL scheme documented in the previous post, sms:\n\n<a href=\"sms:user@example.com\">txt message</a>\n\n\nLive example: txt message\n\n\nActivating that live example likely won't do much, as user@example.com does not belong to anyone. Example.com is a domain registered purely for the purpose of examples like this one. To make this hyperlink work, you'd have to use a registered AppleID email address, which would send a txt on iOS, and fallback to email via phone provider on Android.\n\nAction labels not app names\n\nI use the link text \"txt message\" to indicate its user-centered function: the action of creating a txt message, from one human to another.\n\n\nContrast that with the mockup above (which I \"built\" using an iOS7 home screen folder), which uses the label \"Messages\", the name of the application it launches. \n\n\nThis deliberate change from \"Messages\" (application) to \"txt message\" (action) reflects the larger purpose of this exercise: people-focused rather than app-focused communication. Subsequent labels follow a similar approach.\n\nAn image hyperlink\n\nA simple text hyperlink is functional, yet does not provide the immediate association and recognition conveyed by the Messages icon in the mockup. There are two methods of providing an image hyperlink:\n\n\nAn <img> element inside the hyperlink\nA CSS background-image\n\n\nThe question of when to use markup for an image and when to use CSS is usually easily answered by the question: is the image meaningful content (like a photograph) or purely decorative (like a flourish)? Or by asking, is any meaning lost if the image is dropped?\n\n\nThe Messages image is neither content nor decorative. It's a button, and it's also a standard iOS user interface element, which means it does convey meaning to those users, above and beyond any text label. Here's the minimum markup for an image hyperlink, with the link text moved into the alt attribute as a fallback:\n\n<a href=\"sms:user@example.com\">\n  <img src=\"ios7-messages-icon.png\" \n       alt=\"txt message\"/>\n</a>\n\n\nLive example: \n\n\nImage and text hyperlink\n\nThere is a third option, as implied by the mockup, and that is to use both an image and a text label. That's a simple matter of moving the alt text outside the image:\n\n<a href=\"sms:user@example.com\">\n  <img src=\"ios7-messages-icon.png\" \n       alt=\"\"/>\n  txt message\n</a>\n\n\nLive example: \n\ntxt message\n\n\nThe alt attribute is left deliberately empty since putting anything there would not add to the usability of the link, and could in fact detract from it.\n\n\nUnlike the mockup, the link text is next to (instead of underneath) the image, and is blue & underlined. These are all presentational aspects and will be addressed in the next post on CSS for People Focused Mobile Communication.\n\nA list of communication options\n\nThe mockup also shows multiple communication buttons in a list laid out as a grid. We can assign meaning to the order of the buttons - the site owner's preferred order of communication methods. Thus we use an ordered list to convey that their order is significant. Here's a few image+text links wrapped in list items inside an ordered list:\n\n<ol>\n  <li><a href=\"sms:user@example.com\">\n      <img src=\"ios7-messages-icon.png\" \n           alt=\"\"/>\n      txt message\n  </a></li>\n  <li><a href=\"fb-messenger://user-thread/4\">\n      <img src=\"fb-messenger-icon.png\" \n           alt=\"\"/>\n      <abbr title=\"Facebook\">FB</abbr> message\n  </a></li>\n  <li><a href=\"aim:goim?screenname=tantekc&message=hi\">\n      <img src=\"aim-icon.png\" \n           alt=\"\"/>\n      AIM chat\n  </a></li>\n</ol>\n\n\nNote the use of an <abbr> element to abbreviate \"Facebook\" just to \"FB\" to shorten the overall \"FB message\" link text.\n\n\nLive example: \n\n\n\ntxt message\n\nFB message\n\n AIM chat\n\n\nJust as in the previous URLs post, the FB message link uses Zuck's ID, and the AIM chat link uses the same nickname I've had in the sidebar for a while.\n\n\nList heading\n\n\nThe mockup labels the entire grid \"Contact\" (also deliberately chosen as an action, rather than the \"Contacts\" application). This makes sense as a heading, and in the context of a home page, a second level heading:\n\n\n<h2>Contact</h2>\n\n\nNo need for a separate live example - the subheads above are all <h2> elements. As is this one:\n\nPutting it all together\n\n\nCombining the Contact heading with the previous ordered list, and adding the remaining buttons:\n\n\n<h2>Contact</h2>\n<ol>\n  <li><a href=\"sms:user@example.com\">\n      <img src=\"ios7-messages-icon.png\" \n           alt=\"\"/>\n      txt message\n  </a></li>\n  <li><a href=\"fb-messenger://user-thread/4\">\n      <img src=\"fb-messenger-icon.png\" \n           alt=\"\"/>\n      <abbr title=\"Facebook\">FB</abbr> message\n  </a></li>\n  <li><a href=\"aim:goim?screenname=tantekc&message=hi\">\n      <img src=\"aim-icon.png\" \n           alt=\"\"/>\n      AIM chat\n  </a></li>\n  <li><a href=\"facetime:user@example.com\">\n      <img src=\"facetime-icon.png\" \n           alt=\"\"/>\n      FaceTime call\n  </a></li>\n  <li><a href=\"skype:echo123?call\">\n      <img src=\"skype-icon.png\" \n           alt=\"\"/>\n      Skype call\n  </a></li>\n  <li><a href=\"https://mobile.twitter.com/t/messages\">\n      <img src=\"twitter-dm-icon.png\" \n           alt=\"\"/>\n      Twitter DM\n  </a></li>\n</ol>\n\n\n\nIn this final code example I've highlighted (using orange bold tags), the key pieces you need to change to your own identifiers on each service.\n\n\n\nLive example once more, including heading:\n\n\nContact\n\n\ntxt message\n\nFB message\n\n AIM chat\n\n FaceTime call\n\n\n Skype call\n\n\n Twitter DM\n\n\n\n\nI dropped the Google Hangouts icon since that application lacks support for any URL schemes (as noted in the previous post). Also I've re-ordered a bit from the mockup, having found that I prefer FaceTime over Skype.  Pick your own from among the documented URL schemes, and order them to your preference.\n\n\nNext Steps\n\n\nAll the essential structure is there, yet it clearly needs some CSS. There's plenty to fix from inconsistent image sizes (all but the Messages & FaceTime icons are from Apple's iTunes store web pages), to blue underlined link text. And there's plenty to clean up to approach the look of the mockup: from the clustered center-aligned image+text button layout, to the grid layout of the buttons, to white text on the gray rounded corner ordered list background.\n\nThat's all for the next post in this series.\n\n\n"
24 |      }
25 |     ],
26 |     "author": [
27 |      {
28 |       "value": "",
29 |       "properties": {
30 |        "name": [
31 |         "Tantek \u00c7elik"
32 |        ],
33 |        "photo": [
34 |         "http://tantek.com/logo.jpg"
35 |        ],
36 |        "url": [
37 |         "http://tantek.com/"
38 |        ]
39 |       },
40 |       "type": [
41 |        "h-card"
42 |       ]
43 |      }
44 |     ]
45 |    },
46 |    "type": [
47 |     "h-entry",
48 |     "h-as-article"
49 |    ]
50 |   }
51 |  ],
52 |  "rels": {
53 |   "author": [
54 |    "http://tantek.com/",
55 |    "http://tantek.com/"
56 |   ],
57 |   "next": [
58 |    "http://tantek.com/2014/120/t1/great-wdc14-talk-removed-twitter-follow-button-js"
59 |   ],
60 |   "home": [
61 |    "http://tantek.com/"
62 |   ],
63 |   "prev": [
64 |    "http://tantek.com/2014/118/t2/indiewebcamp-nyc-lunch-sponsor-dinner"
65 |   ],
66 |   "hub": [
67 |    "http://pubsubhubbub.appspot.com/"
68 |   ],
69 |   "webmention": [
70 |    "http://webmention.io/tantek.com/webmention"
71 |   ]
72 |  },
73 |  "alternates": [
74 |   {
75 |    "type": "application/atom+xml",
76 |    "url": "http://tantek.com/updates.atom",
77 |    "rel": "home"
78 |   }
79 |  ]
80 | }


--------------------------------------------------------------------------------
/tests/interpret/article_no_p-name.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "alternates": [
  3 |     {
  4 |       "rel": "feed",
  5 |       "type": "application/rss+xml",
  6 |       "url": "http://werd.io/2014/egg-brooklyn?_t=rss"
  7 |     },
  8 |     {
  9 |       "rel": "feed",
 10 |       "type": "application/rss+xml",
 11 |       "url": "http://werd.io/content/all?_t=rss"
 12 |     }
 13 |   ],
 14 |   "items": [
 15 |     {
 16 |       "children": [
 17 |         {
 18 |           "properties": {
 19 |             "name": [
 20 |               ""
 21 |             ],
 22 |             "photo": [
 23 |               "http://werd.io/gfx/users/default-00.png"
 24 |             ],
 25 |             "url": [
 26 |               "\n"
 27 |             ]
 28 |           },
 29 |           "type": [
 30 |             "h-card"
 31 |           ],
 32 |           "value": "\n\n\n"
 33 |         }
 34 |       ],
 35 |       "properties": {
 36 |         "author": [
 37 |           {
 38 |             "properties": {
 39 |               "name": [
 40 |                 "Ben Werdm\u00fcller"
 41 |               ],
 42 |               "photo": [
 43 |                 "http://werd.io/file/538d0a4cbed7de5111a1ad31/thumb.jpg"
 44 |               ],
 45 |               "url": [
 46 |                 "http://werd.io/profile/benwerd",
 47 |                 "http://werd.io/profile/benwerd",
 48 |                 "http://werd.io/profile/benwerd"
 49 |               ]
 50 |             },
 51 |             "type": [
 52 |               "h-card"
 53 |             ],
 54 |             "value": "\n\nBen Werdm\u00fcller\n\n\n"
 55 |           }
 56 |         ],
 57 |         "content": [
 58 |           {
 59 |             "html": "\n<h2 class=\"p-photo\"><a href=\"http://werd.io/2014/egg-brooklyn\">Egg, Brooklyn</a></h2>\n<p style=\"text-align: center\">\n<a href=\"http://werd.io/file/541c3accbed7ded9797e59b2/IMG_20140919_101510.jpg\"><img class=\"u-photo\" src=\"http://werd.io/file/541c3accbed7ded9797e59bb/thumb.jpg\"/></a>\n</p>\n<p>Give me crayons and I will draw a rocketship. Fact.</p>\n",
 60 |             "value": "\nEgg, Brooklyn\n\n\n\nGive me crayons and I will draw a rocketship. Fact.\n"
 61 |           }
 62 |         ],
 63 |         "name": [
 64 |           "Ben Werdm\u00fcller\n\n\n\n\u00a0\n\n\n\n\n2014-09-19T14:16:45+00:00\n\n\n\n\nEgg, Brooklyn\n\n\n\nGive me crayons and I will draw a rocketship. Fact.\n\n\n\n\n\n\n\n 1 star\n 1 comment\n\n\n\n\n\n\n\n\n\n\n\n\n\nsorry to break it to you, that is a squid.\nKyle Mahan,\n                    Sep 20 2014\n                    on kylewm.com\n\n\n\n\n\n\n\n\n\n\nDavid Walker\n                        liked this post\n                    \nSep 20 2014 on facebook.com\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n\n\n\n\n\n\n            $(document).ready(function () {\n\n                $('#extrafield').html('<input type=\"hidden\" name=\"validator\" value=\"http://werd.io/2014/egg-brooklyn\">');\n\n            })\n        \n\n\n\n                    Also on:\n                    flickr facebook"
 65 |         ],
 66 |         "photo": [
 67 |           "Egg, Brooklyn",
 68 |           "http://werd.io/file/541c3accbed7ded9797e59bb/thumb.jpg"
 69 |         ],
 70 |         "published": [
 71 |           "2014-09-19T14:16:45+0000"
 72 |         ],
 73 |         "syndication": [
 74 |           "https://www.flickr.com/photos/benwerd/15264900606/",
 75 |           "https://facebook.com/10100928029673259"
 76 |         ],
 77 |         "url": [
 78 |           "http://werd.io/2014/egg-brooklyn"
 79 |         ]
 80 |       },
 81 |       "type": [
 82 |         "h-entry"
 83 |       ]
 84 |     }
 85 |   ],
 86 |   "rels": {
 87 |     "apple-touch-icon": [
 88 |       "http://werd.io/gfx/logos/apple-icon-57x57.png",
 89 |       "http://werd.io/gfx/logos/apple-icon-72x72.png",
 90 |       "http://werd.io/gfx/logos/apple-icon-114x114.png",
 91 |       "http://werd.io/gfx/logos/apple-icon-144x144.png",
 92 |       "http://werd.io/file/538d0a4cbed7de5111a1ad31/thumb.jpg"
 93 |     ],
 94 |     "apple-touch-icon-precomposed": [
 95 |       "http://werd.io/file/538d0a4cbed7de5111a1ad31/thumb.jpg"
 96 |     ],
 97 |     "author": [
 98 |       "http://werd.io/humans.txt"
 99 |     ],
100 |     "authorization_endpoint": [
101 |       "https://indieauth.com/auth"
102 |     ],
103 |     "feed": [
104 |       "http://werd.io/content/all"
105 |     ],
106 |     "http://webmention.org/": [
107 |       "http://werd.io/webmention/"
108 |     ],
109 |     "hub": [
110 |       "http://benwerd.superfeedr.com/"
111 |     ],
112 |     "icon": [
113 |       "http://werd.io/file/538d0a4cbed7de5111a1ad31/thumb.jpg"
114 |     ],
115 |     "micropub": [
116 |       "http://werd.io/micropub/endpoint"
117 |     ],
118 |     "openid.delegate": [
119 |       "http://werd.io/"
120 |     ],
121 |     "openid.server": [
122 |       "https://indieauth.com/openid"
123 |     ],
124 |     "permalink": [
125 |       "http://werd.io/2014/egg-brooklyn"
126 |     ],
127 |     "schema.DC": [
128 |       "http://purl.org/dc/elements/1.1/"
129 |     ],
130 |     "shortcut": [
131 |       "http://werd.io/file/538d0a4cbed7de5111a1ad31/thumb.jpg"
132 |     ],
133 |     "stylesheet": [
134 |       "http://werd.io/external/bootstrap/assets/css/bootstrap.css",
135 |       "http://werd.io/external/font-awesome/css/font-awesome.min.css",
136 |       "http://werd.io/external/bootstrap/assets/css/bootstrap-responsive.css",
137 |       "http://werd.io/css/default.css",
138 |       "http://cdn.leafletjs.com/leaflet-0.5/leaflet.css",
139 |       "http://werd.io/styles/site/",
140 |       "http://werd.io/Themes/Cherwell/css/default.css",
141 |       "http://werd.io/external/mediaelement/build/mediaelementplayer.css",
142 |       "http://werd.io/external/summernote/dist/summernote.css",
143 |       "http://werd.io/external/mention/recommended-styles.css"
144 |     ],
145 |     "syndication": [
146 |       "https://www.flickr.com/photos/benwerd/15264900606/",
147 |       "https://facebook.com/10100928029673259"
148 |     ],
149 |     "token_endpoint": [
150 |       "http://werd.io/indieauth/token"
151 |     ],
152 |     "webmention": [
153 |       "http://werd.io/webmention/"
154 |     ]
155 |   }
156 | }
157 | 


--------------------------------------------------------------------------------
/tests/interpret/article_non_ascii_content.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "items": [
 3 |     {
 4 |       "type": [
 5 |         "h-entry"
 6 |       ],
 7 |       "properties": {
 8 |         "name": [
 9 |           "foo"
10 |         ],
11 |         "content": [
12 |           "Поч"
13 |         ]
14 |       }
15 |     }
16 |   ]
17 | }
18 | 


--------------------------------------------------------------------------------
/tests/interpret/article_two_published_dates.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "items": [
 3 |   {
 4 |    "properties": {
 5 |     "name": [
 6 |      "Test Article with Two Published Dates"
 7 |     ],
 8 |     "published": [
 9 |      "2014-04-30T12:11:00-0800",
10 |      "2014-04-30T12:11:00-0800"
11 |     ],
12 |     "content": [
13 |      {
14 |       "html": "",
15 |       "value": ""
16 |      }
17 |     ],
18 |     "author": [
19 |      {
20 |       "value": "",
21 |       "properties": {
22 |        "name": [
23 |         "Aaron Parecki"
24 |        ],
25 |        "url": [
26 |         "http://aaronparecki.com/"
27 |        ]
28 |       },
29 |       "type": [
30 |        "h-card"
31 |       ]
32 |      }
33 |     ]
34 |    },
35 |    "type": [
36 |     "h-entry",
37 |     "h-as-article"
38 |    ]
39 |   }
40 |  ],
41 |  "rels": {
42 |  },
43 |  "alternates": [
44 |   {
45 |   }
46 |  ]
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/interpret/follow.json:
--------------------------------------------------------------------------------
 1 | {
 2 |  "items": [
 3 |   {
 4 |    "properties": {
 5 |     "name": [
 6 |      "I follow thee"
 7 |     ],
 8 |     "published": [
 9 |      "2014-05-05T10:10:53-07:00"
10 |     ],
11 |     "author": [
12 |      {
13 |       "value": "Ryan Barrett",
14 |       "properties": {
15 |        "name": [
16 |         "Ryan Barrett"
17 |        ],
18 |        "photo": [
19 |         "https://secure.gravatar.com/avatar/947b5f3f323da0ef785b6f02d9c265d6?s=96&d=https%3A%2F%2Fsecure.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&r=G"
20 |        ],
21 |        "url": [
22 |         "http://snarfed.org/"
23 |        ]
24 |       },
25 |       "type": [
26 |        "h-card"
27 |       ]
28 |      }
29 |     ],
30 |     "follow-of": [
31 |      "http://other/person"
32 |     ],
33 |     "url": [
34 |      "https://snarfed.org/2014-05-05_follow"
35 |     ],
36 |     "content": [
37 |      {
38 |       "html": "I follow thee",
39 |       "value": "I follow thee"
40 |      }
41 |     ]
42 |    },
43 |    "type": [
44 |     "h-entry"
45 |    ]
46 |   }
47 |  ]
48 | }
49 | 


--------------------------------------------------------------------------------
/tests/interpret/hwc-event.json:
--------------------------------------------------------------------------------
  1 | {
  2 |  "alternates": [
  3 |   {
  4 |    "type": "application/rss+xml",
  5 |    "url": "http://werd.io/2014/homebrew-website-club-4?_t=rss",
  6 |    "rel": "feed"
  7 |   },
  8 |   {
  9 |    "type": "application/rss+xml",
 10 |    "url": "http://werd.io/content/all?_t=rss",
 11 |    "rel": "feed"
 12 |   }
 13 |  ],
 14 |  "items": [
 15 |   {
 16 |    "type": [
 17 |     "h-card"
 18 |    ],
 19 |    "properties": {
 20 |     "photo": [
 21 |      "http://werd.io/file/52be39babed7deb701668dd8"
 22 |     ],
 23 |     "name": [
 24 |      "Ben Werdm\u00fcller"
 25 |     ],
 26 |     "url": [
 27 |      "http://werd.io/profile/benwerd",
 28 |      "http://werd.io/profile/benwerd"
 29 |     ]
 30 |    }
 31 |   },
 32 |   {
 33 |    "type": [
 34 |     "h-event"
 35 |    ],
 36 |    "properties": {
 37 |     "published": [
 38 |      "2014-05-05T16:34:30+00:00"
 39 |     ],
 40 |     "end": [
 41 |      "2014-05-07T19:30:00+00:00"
 42 |     ],
 43 |     "start": [
 44 |      "2014-05-07T18:30:00+00:00"
 45 |     ],
 46 |     "content": [
 47 |      {
 48 |       "value": "\n            \n    \n        Homebrew Website Club\n    \n    \n        \n             Discuss progress; meet up; make new friends.         \n        \n            Location: Mozilla SF, 1st floor, 2 Harrison st. (at Embarcadero), San Francisco, CA \n        \n                    \n                Time: May 7th, 6:30pm\n            \n                            \n                Ends: May 7th, 7:30pm\n            \n            \n\n    \nAre you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with like-minded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project ...\n\nSee the Homebrew Website Club Newsletter Volume 1 Issue 1 for a description of the first meeting.\n\nOriginally posted on indiewebcamp.com. There's also a companion event at Mozilla Portland.\n\nHere's the Facebook event, if you prefer.\n        ",
 49 |       "html": "\n            <div>\n    <h2 class=\"p-name\">\n        <a class=\"u-url\" href=\"http://werd.io/2014/homebrew-website-club-4\">Homebrew Website Club</a>\n    </h2>\n    <div class=\"well\">\n        <p class=\"p-summary\">\n             Discuss progress; meet up; make new friends.         </p>\n        <p>\n            Location: <span class=\"p-location\">Mozilla SF, 1st floor, 2 Harrison st. (at Embarcadero), San Francisco, CA </span>\n        </p>\n                    <p>\n                Time: <time class=\"dt-start\" datetime=\"2014-05-07T18:30:00+00:00\">May 7th, 6:30pm</time>\n            </p>\n                            <p>\n                Ends: <time class=\"dt-end\" datetime=\"2014-05-07T19:30:00+00:00\">May 7th, 7:30pm</time>\n            </p>\n            </div>\n\n    \n<p>Are you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with like-minded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project ...</p>\n\n<p><a href=\"http://tantek.com/2013/332/b1/homebrew-website-club-newsletter\">See the Homebrew Website Club Newsletter Volume 1 Issue 1 for a description of the first meeting.</a></p>\n\n<p><a href=\"http://indiewebcamp.com/events/2014-05-07-homebrew-website-club\">Originally posted on indiewebcamp.com</a>. There's also a companion event at Mozilla Portland.</p>\n\n<p><a class=\"u-syndication\" href=\"https://www.facebook.com/events/1430990723825351/\" rel=\"syndication\">Here's the Facebook event, if you prefer</a>.</p>\n</div>        "
 50 |      }
 51 |     ],
 52 |     "location": [
 53 |      "Mozilla SF, 1st floor, 2 Harrison st. (at Embarcadero), San Francisco, CA "
 54 |     ],
 55 |     "author": [
 56 |      {
 57 |       "value": "\n                \n                Ben Werdm\u00fcller\n                \n            ",
 58 |       "type": [
 59 |        "h-card"
 60 |       ],
 61 |       "properties": {
 62 |        "photo": [
 63 |         "http://werd.io/file/52be39babed7deb701668dd8"
 64 |        ],
 65 |        "logo": [
 66 |         "http://werd.io/file/52be39babed7deb701668dd8"
 67 |        ],
 68 |        "name": [
 69 |         "Ben Werdm\u00fcller"
 70 |        ],
 71 |        "url": [
 72 |         "http://werd.io/profile/benwerd",
 73 |         "http://werd.io/profile/benwerd"
 74 |        ]
 75 |       }
 76 |      }
 77 |     ],
 78 |     "name": [
 79 |      "\n        Homebrew Website Club\n    "
 80 |     ],
 81 |     "syndication": [
 82 |      "https://www.facebook.com/events/1430990723825351/"
 83 |     ],
 84 |     "summary": [
 85 |      "\n             Discuss progress; meet up; make new friends.         "
 86 |     ],
 87 |     "url": [
 88 |      "http://werd.io/2014/homebrew-website-club-4",
 89 |      "http://werd.io/2014/homebrew-website-club-4"
 90 |     ]
 91 |    }
 92 |   }
 93 |  ],
 94 |  "rels": {
 95 |   "http://webmention.org/": [
 96 |    "http://werd.io/webmention/"
 97 |   ],
 98 |   "icon": [
 99 |    "http://werd.io/file/52be39babed7deb701668dd8"
100 |   ],
101 |   "syndication": [
102 |    "https://www.facebook.com/events/1430990723825351/"
103 |   ],
104 |   "stylesheet": [
105 |    "http://werd.io/external/bootstrap/assets/css/bootstrap.css",
106 |    "http://werd.io/external/font-awesome/css/font-awesome.min.css",
107 |    "http://werd.io/external/bootstrap/assets/css/bootstrap-responsive.css",
108 |    "http://werd.io/css/default.css",
109 |    "http://werd.io/IdnoPlugins/Checkin/external/leaflet/leaflet.css",
110 |    "http://werd.io/styles/site/"
111 |   ],
112 |   "webmention": [
113 |    "http://werd.io/webmention/"
114 |   ],
115 |   "feed": [
116 |    "http://werd.io/content/all"
117 |   ],
118 |   "shortcut": [
119 |    "http://werd.io/file/52be39babed7deb701668dd8"
120 |   ],
121 |   "author": [
122 |    "http://werd.io/humans.txt"
123 |   ],
124 |   "apple-touch-icon-precomposed": [
125 |    "http://werd.io/file/52be39babed7deb701668dd8"
126 |   ],
127 |   "permalink": [
128 |    "http://werd.io/2014/homebrew-website-club-4"
129 |   ]
130 |  }
131 | }


--------------------------------------------------------------------------------
/tests/interpret/location_h-adr.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "items": [
 3 |     {
 4 |       "type": [
 5 |         "h-entry"
 6 |       ],
 7 |       "properties": {
 8 |         "adr": [
 9 |           {
10 |             "type": [
11 |               "h-adr"
12 |             ],
13 |             "properties": {
14 |               "street-address": ["17 Austerstræti"],
15 |               "locality": ["Reykjavík"],
16 |               "country-name": ["Iceland"],
17 |               "postal-code": ["107"],
18 |               "name": ["17 Austerstræti Reykjavík Iceland 107"]
19 |             }
20 |           }
21 |         ]
22 |       }
23 |     }
24 |   ]
25 | }
26 | 


--------------------------------------------------------------------------------
/tests/interpret/location_h-card.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "items": [
 3 |     {
 4 |       "type": [
 5 |         "h-entry"
 6 |       ],
 7 |       "properties": {
 8 |         "location": [
 9 |           {
10 |             "type": [
11 |               "h-card"
12 |             ],
13 |             "properties": {
14 |               "name": ["Timeless Coffee Roasters"],
15 |               "latitude": ["37.83"],
16 |               "longitude": ["-122.25"]
17 |             }
18 |           }
19 |         ]
20 |       }
21 |     }
22 |   ]
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/interpret/location_h-geo.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "items": [
 3 |     {
 4 |       "type": [
 5 |         "h-entry"
 6 |       ],
 7 |       "properties": {
 8 |         "geo": [
 9 |           {
10 |             "type": [
11 |               "h-geo"
12 |             ],
13 |             "properties": {
14 |               "altitude": ["123.0"],
15 |               "latitude": ["37.83"],
16 |               "longitude": ["-122.25"]
17 |             }
18 |           }
19 |         ]
20 |       }
21 |     }
22 |   ]
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/interpret/location_top_level.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "items": [
 3 |     {
 4 |       "type": [
 5 |         "h-entry"
 6 |       ],
 7 |       "properties": {
 8 |         "latitude": ["37.83"],
 9 |         "longitude": ["-122.25"]
10 |       }
11 |     }
12 |   ]
13 | }
14 | 


--------------------------------------------------------------------------------
/tests/interpret/note_with_comment_and_like.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "items": [
  3 |     {
  4 |       "properties": {
  5 |         "author": [
  6 |           {
  7 |             "properties": {
  8 |               "name": [
  9 |                 "Kyle Mahan"
 10 |               ],
 11 |               "photo": [
 12 |                 "https://kylewm.com/static/img/users/kyle.jpg"
 13 |               ],
 14 |               "url": [
 15 |                 "https://kylewm.com"
 16 |               ]
 17 |             },
 18 |             "type": [
 19 |               "h-card"
 20 |             ],
 21 |             "value": "Kyle Mahan"
 22 |           }
 23 |         ],
 24 |         "category": [
 25 |           "indieweb"
 26 |         ],
 27 |         "comment": [
 28 |           {
 29 |             "properties": {
 30 |               "author": [
 31 |                 {
 32 |                   "properties": {
 33 |                     "name": [
 34 |                       "Aaron Parecki"
 35 |                     ],
 36 |                     "photo": [
 37 |                       "https://twitter.com/aaronpk/profile_image?size=original"
 38 |                     ],
 39 |                     "url": [
 40 |                       "http://aaronparecki.com"
 41 |                     ]
 42 |                   },
 43 |                   "type": [
 44 |                     "h-card"
 45 |                   ],
 46 |                   "value": "Aaron Parecki"
 47 |                 }
 48 |               ],
 49 |               "content": [
 50 |                 {
 51 |                   "html": "\n          <a href=\"https://twitter.com/kylewmahan\">@kylewmahan</a> I usually click through a couple levels up looking to see if any of the URLs up the chain show comments <a href=\"https://twitter.com/search?q=%23indieweb\">#indieweb</a>\n        ",
 52 |                   "value": "\n          @kylewmahan I usually click through a couple levels up looking to see if any of the URLs up the chain show comments #indieweb\n        "
 53 |                 }
 54 |               ],
 55 |               "name": [
 56 |                 "Aaron Parecki\n    \n\n        \n          @kylewmahan I usually click through a couple levels up looking to see if any of the URLs up the chain show comments #indieweb\n         \n        \n          on twitter.com"
 57 |               ],
 58 |               "published": [
 59 |                 "2015-10-05T17:07:20-07:00"
 60 |               ],
 61 |               "url": [
 62 |                 "https://twitter.com/aaronpk/status/651186942114050050"
 63 |               ]
 64 |             },
 65 |             "type": [
 66 |               "h-cite"
 67 |             ],
 68 |             "value": "Aaron Parecki\n    \n\n        \n          @kylewmahan I usually click through a couple levels up looking to see if any of the URLs up the chain show comments #indieweb\n         \n        \n          on twitter.com"
 69 |           }
 70 |         ],
 71 |         "content": [
 72 |           {
 73 |             "html": "\n          <p>big thing missing from my <a href=\"/tags/indieweb\">#indieweb</a> experience is being able to see other people\u2019s comments before replying. tough problem to solve.</p>\n          \n        ",
 74 |             "value": "\n          big thing missing from my #indieweb experience is being able to see other people\u2019s comments before replying. tough problem to solve.\n          \n        "
 75 |           }
 76 |         ],
 77 |         "like": [
 78 |           {
 79 |             "properties": {
 80 |               "author": [
 81 |                 {
 82 |                   "properties": {
 83 |                     "name": [
 84 |                       ""
 85 |                     ],
 86 |                     "photo": [
 87 |                       "https://kylewm.com/imageproxy?url=https%3A%2F%2Ftwitter.com%2Fbenwerd%2Fprofile_image%3Fsize%3Doriginal&size=48&sig=fde7ce5635f5ea132a2545ff5c7d3d33"
 88 |                     ],
 89 |                     "url": [
 90 |                       "https://twitter.com/benwerd"
 91 |                     ]
 92 |                   },
 93 |                   "type": [
 94 |                     "h-card"
 95 |                   ],
 96 |                   "value": ""
 97 |                 }
 98 |               ],
 99 |               "name": [
100 |                 ""
101 |               ],
102 |               "url": [
103 |                 "https://twitter.com/kylewmahan/status/651186266701107200"
104 |               ]
105 |             },
106 |             "type": [
107 |               "h-cite"
108 |             ],
109 |             "value": "https://twitter.com/kylewmahan/status/651186266701107200"
110 |           }
111 |         ],
112 |         "name": [
113 |           "big thing missing from my #indieweb experience is being able to see other people\u2019s comments before replying. tough problem to solve."
114 |         ],
115 |         "published": [
116 |           "2015-10-05T17:04:35-07:00"
117 |         ],
118 |         "shortlink": [
119 |           "https://kylewm.com/n/4d_1"
120 |         ],
121 |         "syndication": [
122 |           "https://twitter.com/kylewmahan/status/651186266701107200"
123 |         ],
124 |         "uid": [
125 |           "https://kylewm.com/2015/10/big-thing-missing-from-my-indieweb-experience-is"
126 |         ],
127 |         "url": [
128 |           "https://kylewm.com/2015/10/big-thing-missing-from-my-indieweb-experience-is",
129 |           "https://kylewm.com/n/4d_1"
130 |         ]
131 |       },
132 |       "type": [
133 |         "h-entry"
134 |       ]
135 |     }
136 |   ],
137 |   "rel-urls": {
138 |     "https://indieauth.com/auth": {
139 |       "rels": [
140 |         "authorization_endpoint"
141 |       ],
142 |       "text": ""
143 |     },
144 |     "https://indieauth.com/openid": {
145 |       "rels": [
146 |         "openid.server"
147 |       ],
148 |       "text": ""
149 |     },
150 |     "https://keybase.io/kylewm/key.asc": {
151 |       "rels": [
152 |         "pgpkey"
153 |       ],
154 |       "text": "",
155 |       "type": "application/pgp-keys"
156 |     },
157 |     "https://kylewm.com": {
158 |       "rels": [
159 |         "openid.delegate"
160 |       ],
161 |       "text": ""
162 |     },
163 |     "https://kylewm.com/_themes/boxy/style.css?version=2015-06-25": {
164 |       "rels": [
165 |         "stylesheet"
166 |       ],
167 |       "text": ""
168 |     },
169 |     "https://kylewm.com/everything": {
170 |       "rels": [
171 |         "feed"
172 |       ],
173 |       "text": "",
174 |       "type": "text/html"
175 |     },
176 |     "https://kylewm.com/foaf.rdf": {
177 |       "rels": [
178 |         "meta"
179 |       ],
180 |       "text": "",
181 |       "title": "Contact",
182 |       "type": "application/rdf+xml"
183 |     },
184 |     "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=114&sig=b57d1f32eb45988e4b1e7f5a53afd072": {
185 |       "rels": [
186 |         "apple-touch-icon"
187 |       ],
188 |       "text": ""
189 |     },
190 |     "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=152&sig=cb27d9fb6b285da683bb869ba974ee53": {
191 |       "rels": [
192 |         "apple-touch-icon"
193 |       ],
194 |       "text": ""
195 |     },
196 |     "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=60&sig=deebbb906749f01b98a4291e7b2cff7d": {
197 |       "rels": [
198 |         "apple-touch-icon"
199 |       ],
200 |       "text": ""
201 |     },
202 |     "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=76&sig=7606f9576a5cdbfeac9fe773b19d5bf1": {
203 |       "rels": [
204 |         "apple-touch-icon"
205 |       ],
206 |       "text": ""
207 |     },
208 |     "https://kylewm.com/micropub": {
209 |       "rels": [
210 |         "micropub"
211 |       ],
212 |       "text": ""
213 |     },
214 |     "https://kylewm.com/static/img/users/kyle.jpg": {
215 |       "rels": [
216 |         "shortcut",
217 |         "icon",
218 |         "apple-touch-icon"
219 |       ],
220 |       "text": ""
221 |     },
222 |     "https://kylewm.com/static/pygments.css": {
223 |       "rels": [
224 |         "stylesheet"
225 |       ],
226 |       "text": ""
227 |     },
228 |     "https://kylewm.com/token": {
229 |       "rels": [
230 |         "token_endpoint"
231 |       ],
232 |       "text": ""
233 |     },
234 |     "https://kylewm.com/webmention": {
235 |       "rels": [
236 |         "webmention"
237 |       ],
238 |       "text": ""
239 |     },
240 |     "https://maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css": {
241 |       "rels": [
242 |         "stylesheet"
243 |       ],
244 |       "text": ""
245 |     },
246 |     "https://twitter.com/kylewmahan/status/651186266701107200": {
247 |       "rels": [
248 |         "syndication"
249 |       ],
250 |       "text": ""
251 |     },
252 |     "https://webmention.io/webmention?forward=https://kylewm.com/webmention": {
253 |       "rels": [
254 |         "pingback"
255 |       ],
256 |       "text": ""
257 |     },
258 |     "ni:///sha-256;LXQj307VecrQ7BPxkMhuI-rM14CktmXjy16DjI0MMAE?ct=application/x-x509-user-cert": {
259 |       "rels": [
260 |         "me"
261 |       ],
262 |       "text": ""
263 |     }
264 |   },
265 |   "rels": {
266 |     "apple-touch-icon": [
267 |       "https://kylewm.com/static/img/users/kyle.jpg",
268 |       "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=60&sig=deebbb906749f01b98a4291e7b2cff7d",
269 |       "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=76&sig=7606f9576a5cdbfeac9fe773b19d5bf1",
270 |       "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=114&sig=b57d1f32eb45988e4b1e7f5a53afd072",
271 |       "https://kylewm.com/imageproxy?url=%2Fstatic%2Fimg%2Fusers%2Fkyle.jpg&size=152&sig=cb27d9fb6b285da683bb869ba974ee53"
272 |     ],
273 |     "authorization_endpoint": [
274 |       "https://indieauth.com/auth"
275 |     ],
276 |     "feed": [
277 |       "https://kylewm.com/everything"
278 |     ],
279 |     "icon": [
280 |       "https://kylewm.com/static/img/users/kyle.jpg"
281 |     ],
282 |     "me": [
283 |       "ni:///sha-256;LXQj307VecrQ7BPxkMhuI-rM14CktmXjy16DjI0MMAE?ct=application/x-x509-user-cert"
284 |     ],
285 |     "meta": [
286 |       "https://kylewm.com/foaf.rdf"
287 |     ],
288 |     "micropub": [
289 |       "https://kylewm.com/micropub"
290 |     ],
291 |     "openid.delegate": [
292 |       "https://kylewm.com"
293 |     ],
294 |     "openid.server": [
295 |       "https://indieauth.com/openid"
296 |     ],
297 |     "pgpkey": [
298 |       "https://keybase.io/kylewm/key.asc"
299 |     ],
300 |     "pingback": [
301 |       "https://webmention.io/webmention?forward=https://kylewm.com/webmention"
302 |     ],
303 |     "shortcut": [
304 |       "https://kylewm.com/static/img/users/kyle.jpg"
305 |     ],
306 |     "stylesheet": [
307 |       "https://kylewm.com/_themes/boxy/style.css?version=2015-06-25",
308 |       "https://kylewm.com/static/pygments.css",
309 |       "https://maxcdn.bootstrapcdn.com/font-awesome/4.3.0/css/font-awesome.min.css"
310 |     ],
311 |     "syndication": [
312 |       "https://twitter.com/kylewmahan/status/651186266701107200"
313 |     ],
314 |     "token_endpoint": [
315 |       "https://kylewm.com/token"
316 |     ],
317 |     "webmention": [
318 |       "https://kylewm.com/webmention"
319 |     ]
320 |   }
321 | }
322 | 


--------------------------------------------------------------------------------
/tests/interpret/relative_paths.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "rels": {},
 3 |     "alternates": [],
 4 |     "items": [{
 5 |         "type": ["h-entry"],
 6 |         "properties": {
 7 |             "name": ["Example title"],
 8 |             "content": [{
 9 |                 "value": "This is an example document",
10 |                 "html": "This is an <img alt=\"alt text\" title=\"the title\" src=\"/static/img.jpg\"/> example document with <a href=\"relative_paths.html\">relative paths</a>."
11 |             }]
12 |         }
13 |     }]
14 | }
15 | 


--------------------------------------------------------------------------------
/tests/interpret/reply_h-cite.json:
--------------------------------------------------------------------------------
  1 | {
  2 |  "items": [
  3 |   {
  4 |    "properties": {
  5 |     "published": [
  6 |      "2014-05-10T14:48:33-07:00"
  7 |     ],
  8 |     "author": [
  9 |      {
 10 |       "properties": {
 11 |        "photo": [
 12 |         "https://aaronparecki.com/images/aaronpk.png"
 13 |        ],
 14 |        "logo": [
 15 |         "https://aaronparecki.com/images/aaronpk.png"
 16 |        ],
 17 |        "url": [
 18 |         "http://aaronparecki.com/"
 19 |        ],
 20 |        "name": [
 21 |         "Aaron Parecki"
 22 |        ]
 23 |       },
 24 |       "value": "\n      \n      aaronparecki.com\n      Aaron Parecki\n      Aaron Parecki\n    ",
 25 |       "type": [
 26 |        "h-card"
 27 |       ]
 28 |      }
 29 |     ],
 30 |     "name": [
 31 |      "@thedatahive We're working on it ;-) http://indiewebcamp.com/generations"
 32 |     ],
 33 |     "url": [
 34 |      "http://aaronparecki.com/replies/2014/05/10/1/indieweb"
 35 |     ],
 36 |     "shortlink": [
 37 |      "http://aaron.pk/r4W01"
 38 |     ],
 39 |     "category": [
 40 |      "indieweb "
 41 |     ],
 42 |     "syndication": [
 43 |      "https://twitter.com/aaronpk/status/465247041078034432"
 44 |     ],
 45 |     "location": [
 46 |      {
 47 |       "properties": {
 48 |        "latitude": [
 49 |         "45.512284"
 50 |        ],
 51 |        "name": [
 52 |         "Portland, Oregon, USA"
 53 |        ],
 54 |        "longitude": [
 55 |         "-122.612955"
 56 |        ]
 57 |       },
 58 |       "value": " Portland, Oregon, USA",
 59 |       "type": [
 60 |        "h-card"
 61 |       ]
 62 |      }
 63 |     ],
 64 |     "content": [
 65 |      {
 66 |       "value": "@thedatahive We're working on it ;-) http://indiewebcamp.com/generations",
 67 |       "html": "<a href=\"http://twitter.com/thedatahive\">@thedatahive</a> We're working on it ;-) <a href=\"http://indiewebcamp.com/generations\"><span class=\"protocol\">http://</span>indiewebcamp.com/generations</a>"
 68 |      }
 69 |     ],
 70 |     "in-reply-to": [
 71 |      {
 72 |       "properties": {
 73 |        "author": [
 74 |         {
 75 |          "properties": {
 76 |           "photo": [
 77 |            "http://aaronparecki.com/images/nouns/user.svg"
 78 |           ],
 79 |           "logo": [
 80 |            "http://aaronparecki.com/images/nouns/user.svg"
 81 |           ],
 82 |           "url": [
 83 |            "http://datahiveconsulting.com/author/lynne/"
 84 |           ],
 85 |           "name": [
 86 |            "Lynne Baer"
 87 |           ]
 88 |          },
 89 |          "value": "\n      \n                \n      \n              datahiveconsulting.com/author/lynne\n        Lynne Baer\n          ",
 90 |          "type": [
 91 |           "h-card"
 92 |          ]
 93 |         }
 94 |        ],
 95 |        "name": [
 96 |         "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ..."
 97 |        ],
 98 |        "url": [
 99 |         "http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/"
100 |        ],
101 |        "content": [
102 |         {
103 |          "value": "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...",
104 |          "html": "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ..."
105 |         }
106 |        ]
107 |       },
108 |       "value": "    \n      \n                \n      \n              datahiveconsulting.com/author/lynne\n        Lynne Baer\n          \nLast week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...permalink",
109 |       "type": [
110 |        "h-cite"
111 |       ]
112 |      }
113 |     ]
114 |    },
115 |    "type": [
116 |     "h-entry"
117 |    ]
118 |   }
119 |  ],
120 |  "rels": {
121 |   "pingback": [
122 |    "http://webmention.io/webmention?forward=http%3A%2F%2Faaronparecki.com%2Fwebmention.php"
123 |   ],
124 |   "icon": [
125 |    "/favicon.ico",
126 |    "/images/aaronpk-256.jpg"
127 |   ],
128 |   "search": [
129 |    "/opensearch.xml"
130 |   ],
131 |   "authorization_endpoint": [
132 |    "https://indieauth.com/auth"
133 |   ],
134 |   "apple-touch-icon-precomposed": [
135 |    "/images/aaronpk-256.jpg"
136 |   ],
137 |   "openid.server": [
138 |    "https://indieauth.com/openid"
139 |   ],
140 |   "author": [
141 |    "http://aaronparecki.com/",
142 |    "https://plus.google.com/117847912875913905493"
143 |   ],
144 |   "token_endpoint": [
145 |    "https://tokens.oauth.net/token"
146 |   ],
147 |   "prev": [
148 |    "/replies/2014/05/09/1/"
149 |   ],
150 |   "micropub": [
151 |    "https://aaronparecki.com/api/post"
152 |   ],
153 |   "webmention": [
154 |    "https://aaronparecki.com/webmention.php"
155 |   ],
156 |   "stylesheet": [
157 |    "/bootstrap-2.2.2/css/bootstrap.min.css",
158 |    "/bootstrap-2.2.2/css/bootstrap-responsive.min.css",
159 |    "/css/style.css?body=1",
160 |    "/css/font-awesome/css/font-awesome.min.css",
161 |    "/css/aaronpk.css"
162 |   ],
163 |   "openid.delegate": [
164 |    "https://aaronparecki.com/"
165 |   ],
166 |   "shortcut": [
167 |    "/favicon.ico",
168 |    "/images/aaronpk-256.jpg"
169 |   ],
170 |   "license": [
171 |    "http://creativecommons.org/licenses/by/3.0/"
172 |   ],
173 |   "tag": [
174 |    "/tag/indieweb"
175 |   ]
176 |  }
177 | }
178 | 


--------------------------------------------------------------------------------
/tests/interpret/reply_invite.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "items": [
 3 |     {
 4 |       "properties": {
 5 |         "in-reply-to": [
 6 |           "https://indiewebcamp.com/events/2015-03-25-homebrew-website-club",
 7 |           "https://kylewm.com/2015/03/homebrew-website-club-2015-march-25"
 8 |         ],
 9 |         "invitee": [
10 |           {
11 |             "properties": {
12 |               "name": [
13 |                 "Silona Bonewald"
14 |               ],
15 |               "photo": [
16 |                 "https://graph.facebook.com/v2.2/10155109753190015/picture?type=large"
17 |               ],
18 |               "url": [
19 |                 "https://www.facebook.com/10155109753190015"
20 |               ]
21 |             },
22 |             "type": [
23 |               "h-card"
24 |             ],
25 |             "value": "Silona Bonewald"
26 |           }
27 |         ],
28 |         "name": [
29 |           "invited"
30 |         ],
31 |         "uid": [
32 |           "tag:facebook.com,2013:1565113317092307_rsvp_10155109753190015"
33 |         ],
34 |         "url": [
35 |           "https://www.facebook.com/1565113317092307#10155109753190015"
36 |         ]
37 |       },
38 |       "type": [
39 |         "h-entry"
40 |       ]
41 |     }
42 |   ],
43 |   "rels": {}
44 | }
45 | 


--------------------------------------------------------------------------------
/tests/interpret/reply_rsvp.json:
--------------------------------------------------------------------------------
  1 | {
  2 |  "items": [
  3 |   {
  4 |    "children": [
  5 |     {
  6 |      "value": "\n\nI'm Ryan Barrett.\nI live, work, and play in\n San FranciscoCalifornia.\nI code, write, and post pictures here.\n\n\n\n\npublic@ryanb.org\npublic PGP key\n\n\n\n  \n    \n      Home\n    \n  \n  \n    \n      Search\n    \n  \n  \n    \n      Archives\n    \n  \n  \n    \n      Twitter\n    \n  \n  \n    \n      Facebook\n    \n  \n  \n    \n      Google+\n    \n  \n  \n    \n      GitHub\n    \n  \n  \n    \n      RSS Feed\n    \n  \n\n",
  7 |      "properties": {
  8 |       "name": [
  9 |        "Ryan Barrett"
 10 |       ],
 11 |       "photo": [
 12 |        "https://snarfed.org/ryan_profile_square_thumb.jpg"
 13 |       ],
 14 |       "url": [
 15 |        "https://snarfed.org/"
 16 |       ],
 17 |       "key": [
 18 |        "https://snarfed.org/pubkey.txt"
 19 |       ],
 20 |       "region": [
 21 |        "California"
 22 |       ],
 23 |       "locality": [
 24 |        "San Francisco"
 25 |       ],
 26 |       "email": [
 27 |        "mailto:public@ryanb.org"
 28 |       ]
 29 |      },
 30 |      "type": [
 31 |       "h-card"
 32 |      ]
 33 |     }
 34 |    ],
 35 |    "properties": {
 36 |     "name": [
 37 |      "Homebrew Website Club",
 38 |      "Homebrew Website Club"
 39 |     ],
 40 |     "published": [
 41 |      "2014-05-05T10:10:53-07:00"
 42 |     ],
 43 |     "author": [
 44 |      {
 45 |       "value": " Ryan Barrett",
 46 |       "properties": {
 47 |        "name": [
 48 |         "Ryan Barrett"
 49 |        ],
 50 |        "photo": [
 51 |         "https://secure.gravatar.com/avatar/947b5f3f323da0ef785b6f02d9c265d6?s=96&d=https%3A%2F%2Fsecure.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&r=G"
 52 |        ],
 53 |        "url": [
 54 |         "http://snarfed.org/"
 55 |        ]
 56 |       },
 57 |       "type": [
 58 |        "h-card"
 59 |       ]
 60 |      }
 61 |     ],
 62 |     "in-reply-to": [
 63 |      "http://werd.io/2014/homebrew-website-club-4"
 64 |     ],
 65 |     "url": [
 66 |      "https://snarfed.org/2014-05-05_homebrew-website-club-3"
 67 |     ],
 68 |     "content": [
 69 |      {
 70 |       "html": "\n\t\t\t<div class=\"e-content\"><p>RSVPs <data class=\"p-rsvp\" value=\"yes\">yes</data> to <a class=\"u-in-reply-to\" href=\"http://werd.io/2014/homebrew-website-club-4\">Homebrew Website Club</a></p>\n</div>\t\t\t\t\t",
 71 |       "value": "\n\t\t\tRSVPs yes to Homebrew Website Club\n\t\t\t\t\t"
 72 |      },
 73 |      {
 74 |       "html": "<p>RSVPs <data class=\"p-rsvp\" value=\"yes\">yes</data> to <a class=\"u-in-reply-to\" href=\"http://werd.io/2014/homebrew-website-club-4\">Homebrew Website Club</a></p>\n",
 75 |       "value": "RSVPs yes to Homebrew Website Club\n"
 76 |      }
 77 |     ],
 78 |     "rsvp": [
 79 |      "yes"
 80 |     ]
 81 |    },
 82 |    "type": [
 83 |     "h-entry",
 84 |     "h-as-article"
 85 |    ]
 86 |   }
 87 |  ],
 88 |  "rels": {
 89 |   "nofollow": [
 90 |    "https://snarfed.org/2014-05-05_homebrew-website-club-3#respond"
 91 |   ],
 92 |   "generator": [
 93 |    "http://wordpress.org/"
 94 |   ],
 95 |   "prev": [
 96 |    "https://snarfed.org/2014-05-05_9325"
 97 |   ],
 98 |   "stylesheet": [
 99 |    "https://snarfed.org/w/wp-content/plugins/nextgen-gallery/products/photocrati_nextgen/modules/nextgen_gallery_display/static/nextgen_gallery_related_images.css?ver=3.9.1",
100 |    "https://snarfed.org/w/wp-content/plugins/jetpack/modules/subscriptions/subscriptions.css?ver=3.9.1",
101 |    "https://snarfed.org/w/wp-content/plugins/jetpack/modules/widgets/widgets.css?ver=20121003",
102 |    "https://snarfed.org/w/wp-content/plugins/jetpack/modules/carousel/jetpack-carousel.css?ver=20120629",
103 |    "https://snarfed.org/w/wp-content/plugins/jetpack/modules/tiled-gallery/tiled-gallery/tiled-gallery.css?ver=2012-09-21",
104 |    "https://snarfed.org/w/wp-includes/js/mediaelement/mediaelementplayer.min.css?ver=2.13.0",
105 |    "https://snarfed.org/w/wp-includes/js/mediaelement/wp-mediaelement.css?ver=3.9.1",
106 |    "https://snarfed.org/w/wp-content/themes/snarfed-ryu/style.css?ver=3.9.1",
107 |    "https://fonts.googleapis.com/css?family=Lato:100,300,400,700,900,100italic,300italic,400italic,700italic,900italic",
108 |    "https://fonts.googleapis.com/css?family=Playfair+Display:400,700,900,400italic,700italic,900italic&subset=latin,latin-ext"
109 |   ],
110 |   "canonical": [
111 |    "https://snarfed.org/2014-05-05_homebrew-website-club-3"
112 |   ],
113 |   "tag": [
114 |    "https://snarfed.org/category/indieweb_rsvp"
115 |   ],
116 |   "webmention": [
117 |    "https://snarfed.org/w/?webmention=endpoint"
118 |   ],
119 |   "shortcut": [
120 |    "https://snarfed.org/ryan_profile_square_thumb.jpg"
121 |   ],
122 |   "openid2.local_id": [
123 |    "http://www.google.com/profiles/heaven"
124 |   ],
125 |   "pingback": [
126 |    "https://snarfed.org/w/xmlrpc.php"
127 |   ],
128 |   "http://webmention.org/": [
129 |    "https://snarfed.org/w/?webmention=endpoint"
130 |   ],
131 |   "apple-touch-icon-precomposed": [
132 |    "https://snarfed.org/ryan_profile_square_thumb.jpg"
133 |   ],
134 |   "shortlink": [
135 |    "http://wp.me/p3EDAq-2qr"
136 |   ],
137 |   "author": [
138 |    "http://snarfed.org/"
139 |   ],
140 |   "publisher": [
141 |    "https://plus.google.com/103651231634018158746"
142 |   ],
143 |   "key": [
144 |    "https://snarfed.org/pubkey.txt"
145 |   ],
146 |   "designer": [
147 |    "http://theme.wordpress.com/"
148 |   ],
149 |   "icon": [
150 |    "https://snarfed.org/ryan_profile_square_thumb.jpg"
151 |   ],
152 |   "bookmark": [
153 |    "https://snarfed.org/2014-05-05_homebrew-website-club-3"
154 |   ],
155 |   "category": [
156 |    "https://snarfed.org/category/indieweb_rsvp"
157 |   ],
158 |   "home": [
159 |    "https://snarfed.org/",
160 |    "https://snarfed.org/"
161 |   ],
162 |   "me": [
163 |    "mailto:public@ryanb.org",
164 |    "https://twitter.com/schnarfed",
165 |    "https://www.facebook.com/snarfed.org",
166 |    "https://plus.google.com/+RyanBarrett",
167 |    "https://github.com/snarfed"
168 |   ],
169 |   "openid2.provider": [
170 |    "https://www.google.com/accounts/o8/ud?source=profiles"
171 |   ],
172 |   "profile": [
173 |    "http://gmpg.org/xfn/11"
174 |   ],
175 |   "next": [
176 |    "https://snarfed.org/2014-05-05_i-still-think-automattic-is-undervalued-at-1-16bn-httprecode-net20140505wordpress-parent-automattic-has-raised-160-million-now-valued-at-1-16-billion-post-money"
177 |   ]
178 |  },
179 |  "alternates": [
180 |   {
181 |    "url": "https://snarfed.org/feed",
182 |    "type": "application/rss+xml"
183 |   },
184 |   {
185 |    "url": "https://snarfed.org/comments/feed",
186 |    "type": "application/rss+xml"
187 |   },
188 |   {
189 |    "url": "https://snarfed.org/2014-05-05_homebrew-website-club-3/feed",
190 |    "type": "application/rss+xml"
191 |   }
192 |  ]
193 | }


--------------------------------------------------------------------------------
/tests/interpret/reply_u-in-reply-to.json:
--------------------------------------------------------------------------------
  1 | {
  2 |  "items": [
  3 |   {
  4 |    "children": [
  5 |     {
  6 |      "value": "\n\nI'm Ryan Barrett.\nI live, work, and play in\n San FranciscoCalifornia.\nI code, write, and post pictures here.\n\n\n\n\npublic@ryanb.org\npublic PGP key\n\n\n\n  \n    \n      Home\n    \n  \n  \n    \n      Search\n    \n  \n  \n    \n      Archives\n    \n  \n  \n    \n      Twitter\n    \n  \n  \n    \n      Facebook\n    \n  \n  \n    \n      Google+\n    \n  \n  \n    \n      GitHub\n    \n  \n  \n    \n      RSS Feed\n    \n  \n\n",
  7 |      "properties": {
  8 |       "name": [
  9 |        "Ryan Barrett"
 10 |       ],
 11 |       "photo": [
 12 |        "https://snarfed.org/ryan_profile_square_thumb.jpg"
 13 |       ],
 14 |       "url": [
 15 |        "https://snarfed.org/"
 16 |       ],
 17 |       "key": [
 18 |        "https://snarfed.org/pubkey.txt"
 19 |       ],
 20 |       "region": [
 21 |        "California"
 22 |       ],
 23 |       "locality": [
 24 |        "San Francisco"
 25 |       ],
 26 |       "email": [
 27 |        "mailto:public@ryanb.org"
 28 |       ]
 29 |      },
 30 |      "type": [
 31 |       "h-card"
 32 |      ]
 33 |     }
 34 |    ],
 35 |    "properties": {
 36 |     "name": [
 37 |      "Re: Display likes in a facepile",
 38 |      "Re: Display likes in a facepile"
 39 |     ],
 40 |     "published": [
 41 |      "2014-03-09T22:48:22-07:00"
 42 |     ],
 43 |     "author": [
 44 |      {
 45 |       "value": " Ryan Barrett",
 46 |       "properties": {
 47 |        "name": [
 48 |         "Ryan Barrett"
 49 |        ],
 50 |        "photo": [
 51 |         "https://secure.gravatar.com/avatar/947b5f3f323da0ef785b6f02d9c265d6?s=96&d=https%3A%2F%2Fsecure.gravatar.com%2Favatar%2Fad516503a11cd5ca435acc9bb6523536%3Fs%3D96&r=G"
 52 |        ],
 53 |        "url": [
 54 |         "http://snarfed.org/"
 55 |        ]
 56 |       },
 57 |       "type": [
 58 |        "h-card"
 59 |       ]
 60 |      }
 61 |     ],
 62 |     "in-reply-to": [
 63 |      "https://willnorris.com/2014/03/display-likes-in-a-facepile"
 64 |     ],
 65 |     "url": [
 66 |      "https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile"
 67 |     ],
 68 |     "content": [
 69 |      {
 70 |       "html": "\n\t\t\t<div class=\"e-content\"><p>oh man, so cool! thanks for doing this. can\u2019t wait to try it myself!</p>\n\n<p><a class=\"u-in-reply-to\" href=\"https://willnorris.com/2014/03/display-likes-in-a-facepile\"></a></p>\n</div>\t\t\t\t\t",
 71 |       "value": "\n\t\t\toh man, so cool! thanks for doing this. can\u2019t wait to try it myself!\n\n\n\t\t\t\t\t"
 72 |      },
 73 |      {
 74 |       "html": "<p>oh man, so cool! thanks for doing this. can\u2019t wait to try it myself!</p>\n\n<p><a class=\"u-in-reply-to\" href=\"https://willnorris.com/2014/03/display-likes-in-a-facepile\"></a></p>\n",
 75 |       "value": "oh man, so cool! thanks for doing this. can\u2019t wait to try it myself!\n\n\n"
 76 |      }
 77 |     ]
 78 |    },
 79 |    "type": [
 80 |     "h-entry",
 81 |     "h-as-article"
 82 |    ]
 83 |   }
 84 |  ],
 85 |  "rels": {
 86 |   "nofollow": [
 87 |    "https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile#respond"
 88 |   ],
 89 |   "generator": [
 90 |    "http://wordpress.org/"
 91 |   ],
 92 |   "prev": [
 93 |    "https://snarfed.org/2014-03-09_re-oh-in-duboce-park-theyre-entering-the-medical-tricorder-s"
 94 |   ],
 95 |   "stylesheet": [
 96 |    "https://snarfed.org/w/wp-content/plugins/nextgen-gallery/products/photocrati_nextgen/modules/nextgen_gallery_display/static/nextgen_gallery_related_images.css?ver=3.9.1",
 97 |    "https://snarfed.org/w/wp-content/plugins/jetpack/modules/subscriptions/subscriptions.css?ver=3.9.1",
 98 |    "https://snarfed.org/w/wp-content/plugins/jetpack/modules/widgets/widgets.css?ver=20121003",
 99 |    "https://snarfed.org/w/wp-content/plugins/jetpack/modules/carousel/jetpack-carousel.css?ver=20120629",
100 |    "https://snarfed.org/w/wp-content/plugins/jetpack/modules/tiled-gallery/tiled-gallery/tiled-gallery.css?ver=2012-09-21",
101 |    "https://snarfed.org/w/wp-includes/js/mediaelement/mediaelementplayer.min.css?ver=2.13.0",
102 |    "https://snarfed.org/w/wp-includes/js/mediaelement/wp-mediaelement.css?ver=3.9.1",
103 |    "https://snarfed.org/w/wp-content/themes/snarfed-ryu/style.css?ver=3.9.1",
104 |    "https://fonts.googleapis.com/css?family=Lato:100,300,400,700,900,100italic,300italic,400italic,700italic,900italic",
105 |    "https://fonts.googleapis.com/css?family=Playfair+Display:400,700,900,400italic,700italic,900italic&subset=latin,latin-ext"
106 |   ],
107 |   "canonical": [
108 |    "https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile"
109 |   ],
110 |   "tag": [
111 |    "https://snarfed.org/category/indieweb"
112 |   ],
113 |   "webmention": [
114 |    "https://snarfed.org/w/?webmention=endpoint"
115 |   ],
116 |   "shortcut": [
117 |    "https://snarfed.org/ryan_profile_square_thumb.jpg"
118 |   ],
119 |   "openid2.local_id": [
120 |    "http://www.google.com/profiles/heaven"
121 |   ],
122 |   "pingback": [
123 |    "https://snarfed.org/w/xmlrpc.php"
124 |   ],
125 |   "http://webmention.org/": [
126 |    "https://snarfed.org/w/?webmention=endpoint"
127 |   ],
128 |   "apple-touch-icon-precomposed": [
129 |    "https://snarfed.org/ryan_profile_square_thumb.jpg"
130 |   ],
131 |   "shortlink": [
132 |    "http://wp.me/p3EDAq-2ak"
133 |   ],
134 |   "author": [
135 |    "http://snarfed.org/"
136 |   ],
137 |   "publisher": [
138 |    "https://plus.google.com/103651231634018158746"
139 |   ],
140 |   "key": [
141 |    "https://snarfed.org/pubkey.txt"
142 |   ],
143 |   "designer": [
144 |    "http://theme.wordpress.com/"
145 |   ],
146 |   "icon": [
147 |    "https://snarfed.org/ryan_profile_square_thumb.jpg"
148 |   ],
149 |   "bookmark": [
150 |    "https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile"
151 |   ],
152 |   "category": [
153 |    "https://snarfed.org/category/indieweb"
154 |   ],
155 |   "home": [
156 |    "https://snarfed.org/",
157 |    "https://snarfed.org/"
158 |   ],
159 |   "me": [
160 |    "mailto:public@ryanb.org",
161 |    "https://twitter.com/schnarfed",
162 |    "https://www.facebook.com/snarfed.org",
163 |    "https://plus.google.com/+RyanBarrett",
164 |    "https://github.com/snarfed"
165 |   ],
166 |   "openid2.provider": [
167 |    "https://www.google.com/accounts/o8/ud?source=profiles"
168 |   ],
169 |   "profile": [
170 |    "http://gmpg.org/xfn/11"
171 |   ],
172 |   "next": [
173 |    "https://snarfed.org/2014-03-10_re-joining-the-indie-web-my-motivation"
174 |   ]
175 |  },
176 |  "alternates": [
177 |   {
178 |    "url": "https://snarfed.org/feed",
179 |    "type": "application/rss+xml"
180 |   },
181 |   {
182 |    "url": "https://snarfed.org/comments/feed",
183 |    "type": "application/rss+xml"
184 |   },
185 |   {
186 |    "url": "https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile/feed",
187 |    "type": "application/rss+xml"
188 |   }
189 |  ]
190 | }


--------------------------------------------------------------------------------
/tests/interpret/unusual_properties.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "items": [
 3 |         {
 4 |             "properties": {
 5 |                 "name": [
 6 |                     {
 7 |                         "properties": {
 8 |                             "name": ["This is wrong"]
 9 |                         },
10 |                         "value": "Rocky Raccoon",
11 |                         "type": ["h-card"]
12 |                     },
13 |                     "This is also wrong"
14 |                 ],
15 |                 "url": [
16 |                     {
17 |                         "properties": {
18 |                             "url": ["This is probably wrong"]
19 |                         },
20 |                         "value": "https://foo.bar/",
21 |                         "type": ["h-event"]
22 |                     },
23 |                     "This is wrong too"
24 |                 ],
25 |                 "uid": ["https://foo.bar/"]
26 |             },
27 |             "type": [
28 |                 "h-entry"
29 |             ]
30 |         }
31 |     ]
32 | }
33 | 


--------------------------------------------------------------------------------
/tests/posttype/hcard_no_name.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "items": [
 3 |     {
 4 |       "type": [
 5 |         "h-card"
 6 |       ],
 7 |       "properties": {
 8 |         "url": [
 9 |           "https://tmichellemoore.com/"
10 |         ],
11 |         "uid": [
12 |           "https://tmichellemoore.com/"
13 |         ],
14 |         "photo": [
15 |           "https://tmichellemoore.com/pic.jpg"
16 |         ]
17 |       }
18 |     }
19 |   ]
20 | }
21 | 


--------------------------------------------------------------------------------
/tests/posttype/hcard_org.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "items": [
 3 |     {
 4 |       "type": [
 5 |         "h-card"
 6 |       ],
 7 |       "properties": {
 8 |         "url": [
 9 |           "https://tmichellemoore.com/"
10 |         ],
11 |         "uid": [
12 |           "https://tmichellemoore.com/"
13 |         ],
14 |         "name": [
15 |           "Foo Foundation"
16 |         ],
17 |         "org": [
18 |           "Foo Foundation"
19 |         ]
20 |       }
21 |     }
22 |   ]
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/posttype/only_html_content.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "items": [
 3 |     {
 4 |       "properties": {
 5 |         "content": [
 6 |           {
 7 |             "html": "some <br> html"
 8 |           }
 9 |         ]
10 |       },
11 |       "type": [
12 |         "h-entry"
13 |       ]
14 |     }
15 |   ]
16 | }
17 | 


--------------------------------------------------------------------------------
/tests/posttype/tantek_photo.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "items": [
  3 |     {
  4 |       "children": [
  5 |         {
  6 |           "properties": {
  7 |             "name": [
  8 |               "@W3C"
  9 |             ],
 10 |             "url": [
 11 |               "https://twitter.com/W3C"
 12 |             ]
 13 |           },
 14 |           "type": [
 15 |             "h-x-username"
 16 |           ]
 17 |         }
 18 |       ],
 19 |       "properties": {
 20 |         "author": [
 21 |           {
 22 |             "properties": {
 23 |               "name": [
 24 |                 "Tantek \u00c7elik"
 25 |               ],
 26 |               "photo": [
 27 |                 "http://tantek.com/logo.jpg"
 28 |               ],
 29 |               "url": [
 30 |                 "http://tantek.com/"
 31 |               ]
 32 |             },
 33 |             "type": [
 34 |               "h-card"
 35 |             ],
 36 |             "value": ""
 37 |           }
 38 |         ],
 39 |         "content": [
 40 |           {
 41 |             "html": "<a class=\"auto-link figure\" href=\"https://igcdn-photos-b-a.akamaihd.net/hphotos-ak-xaf1/t51.2885-15/e35/12145332_1662314194043465_2009449288_n.jpg\"><img alt=\"a photo\" class=\"auto-embed u-photo\" src=\"https://igcdn-photos-b-a.akamaihd.net/hphotos-ak-xaf1/t51.2885-15/e35/12145332_1662314194043465_2009449288_n.jpg\"/></a> <a class=\"auto-link\" href=\"https://instagram.com/p/9XVBIRA9cj/\">https://instagram.com/p/9XVBIRA9cj/</a><br class=\"auto-break\"/><br class=\"auto-break\"/>Social Web session <a class=\"auto-link h-x-username\" href=\"https://twitter.com/W3C\">@W3C</a> #TPAC2015 in Sapporo, Hokkaido, Japan.",
 42 |             "value": " https://instagram.com/p/9XVBIRA9cj/Social Web session @W3C #TPAC2015 in Sapporo, Hokkaido, Japan."
 43 |           }
 44 |         ],
 45 |         "name": [
 46 |           "https://instagram.com/p/9XVBIRA9cj/Social Web session @W3C #TPAC2015 in Sapporo, Hokkaido, Japan."
 47 |         ],
 48 |         "photo": [
 49 |           "https://igcdn-photos-b-a.akamaihd.net/hphotos-ak-xaf1/t51.2885-15/e35/12145332_1662314194043465_2009449288_n.jpg"
 50 |         ],
 51 |         "published": [
 52 |           "2015-10-27T19:48:00-0700"
 53 |         ],
 54 |         "syndication": [
 55 |           "https://www.facebook.com/photo.php?fbid=10101948228396473",
 56 |           "https://twitter.com/t/status/659200761427980288"
 57 |         ],
 58 |         "uid": [
 59 |           "http://tantek.com/2015/300/t1/social-web-session-w3c-tpac2015"
 60 |         ],
 61 |         "updated": [
 62 |           "2015-10-27T19:48:00-0700"
 63 |         ],
 64 |         "url": [
 65 |           "http://tantek.com/2015/300/t1/social-web-session-w3c-tpac2015"
 66 |         ]
 67 |       },
 68 |       "type": [
 69 |         "h-entry"
 70 |       ]
 71 |     }
 72 |   ],
 73 |   "rel-urls": {
 74 |     "http://tantek.com/": {
 75 |       "rels": [
 76 |         "author",
 77 |         "home"
 78 |       ],
 79 |       "text": "tantek.com",
 80 |       "title": "Tantek \u00c7elik"
 81 |     },
 82 |     "http://tantek.com/2015/298/t1/portable-planetarium-sciencehackday-best-hardware": {
 83 |       "rels": [
 84 |         "prev"
 85 |       ],
 86 |       "text": "\u2190",
 87 |       "title": "View the previous (older) item in the stream."
 88 |     },
 89 |     "http://tantek.com/2015/301/f1": {
 90 |       "rels": [
 91 |         "next"
 92 |       ],
 93 |       "text": "\u2192",
 94 |       "title": "View the next (newer) item in the stream"
 95 |     },
 96 |     "http://tantek.com/logo.jpg": {
 97 |       "rels": [
 98 |         "icon"
 99 |       ],
100 |       "text": ""
101 |     },
102 |     "http://webmention.io/tantek.com/webmention": {
103 |       "rels": [
104 |         "webmention"
105 |       ],
106 |       "text": ""
107 |     },
108 |     "https://twitter.com/t/status/659200761427980288": {
109 |       "rels": [
110 |         "syndication"
111 |       ],
112 |       "text": "View \n      on Twitter"
113 |     },
114 |     "https://www.facebook.com/photo.php?fbid=10101948228396473": {
115 |       "rels": [
116 |         "syndication"
117 |       ],
118 |       "text": "View on Facebook"
119 |     }
120 |   },
121 |   "rels": {
122 |     "author": [
123 |       "http://tantek.com/"
124 |     ],
125 |     "home": [
126 |       "http://tantek.com/"
127 |     ],
128 |     "icon": [
129 |       "http://tantek.com/logo.jpg"
130 |     ],
131 |     "next": [
132 |       "http://tantek.com/2015/301/f1"
133 |     ],
134 |     "prev": [
135 |       "http://tantek.com/2015/298/t1/portable-planetarium-sciencehackday-best-hardware"
136 |     ],
137 |     "syndication": [
138 |       "https://www.facebook.com/photo.php?fbid=10101948228396473",
139 |       "https://twitter.com/t/status/659200761427980288"
140 |     ],
141 |     "webmention": [
142 |       "http://webmention.io/tantek.com/webmention"
143 |     ]
144 |   }
145 | }
146 | 


--------------------------------------------------------------------------------
/tests/test_authorship.py:
--------------------------------------------------------------------------------
 1 | """Test the authorship discovery algorithm. Credit for test cases to
 2 | Sandeep Shetty https://github.com/sandeepshetty/authorship-test-cases
 3 | """
 4 | 
 5 | from __future__ import print_function
 6 | import mf2util
 7 | import mf2py
 8 | 
 9 | 
10 | def load_test(testname, hentry_func=None):
11 |     def fetch_mf2(url):
12 |         testname = url
13 |         prefix = 'http://example.com/'
14 |         if testname.startswith(prefix):
15 |             testname = testname[len(prefix):]
16 | 
17 |         with open('tests/authorship/' + testname) as f:
18 |             return mf2py.parse(url=url, doc=f.read())
19 | 
20 |     url = 'http://example.com/' + testname
21 |     parsed = fetch_mf2(url)
22 |     hentry = hentry_func and hentry_func(parsed)
23 | 
24 |     return mf2util.find_author(
25 |         parsed, url, hentry=hentry, fetch_mf2_func=fetch_mf2)
26 | 
27 | 
28 | def test_p_author_string():
29 |     blob = {
30 |         'items': [
31 |             {
32 |                 'type': ['h-entry'],
33 |                 'properties': {
34 |                     'author': ['John Doe']
35 |                 }
36 |             }
37 |         ]
38 |     }
39 |     assert mf2util.find_author(blob) == {'name': 'John Doe'}
40 | 
41 | 
42 | def test_h_entry_with_p_author_h_card():
43 |     assert load_test('h-entry_with_p-author_h-card.html') == {
44 |         'name': 'John Doe',
45 |         'url': 'http://example.com/johndoe/',
46 |         'photo': 'http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm'
47 |     }
48 | 
49 | 
50 | def test_h_entry_with_rel_author():
51 |     assert load_test('h-entry_with_rel-author.html') == {
52 |         'name': 'John Doe',
53 |         'url': 'http://example.com/h-card_with_u-url_that_is_also_rel-me.html',
54 |         'photo': 'http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm',
55 |     }
56 | 
57 | 
58 | def test_h_entry_with_u_author():
59 |     assert load_test('h-entry_with_u-author.html') == {
60 |         'name': 'John Doe',
61 |         'url': 'http://example.com/h-card_with_u-url_equal_to_self.html',
62 |         'photo': 'http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm'
63 |     }
64 | 
65 | 
66 | def test_h_feed_with_p_author_h_card():
67 |     def select_h_entry(parsed):
68 |         hfeed = parsed['items'][0]
69 |         assert hfeed['type'] == ['h-feed']
70 |         assert len(hfeed['children']) == 3
71 |         return hfeed['children'][1]
72 | 
73 |     assert load_test('h-feed_with_p-author_h-card.html', select_h_entry) == {
74 |         'name': 'John Doe',
75 |         'url': 'http://example.com/johndoe/',
76 |         'photo': 'http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm'
77 |     }
78 | 
79 | 
80 | def test_h_feed_with_u_author():
81 |     def select_h_entry(parsed):
82 |         hfeed = parsed['items'][0]
83 |         assert hfeed['type'] == ['h-feed']
84 |         assert len(hfeed['children']) == 3
85 |         return hfeed['children'][2]
86 | 
87 |     assert load_test('h-feed_with_u-author.html', select_h_entry) == {
88 |         'name': 'John Doe',
89 |         'url': 'http://example.com/h-card_with_u-url_equal_to_u-uid_equal_to_self.html',
90 |         'photo': 'http://www.gravatar.com/avatar/fd876f8cd6a58277fc664d47ea10ad19.jpg?s=80&d=mm'
91 |     }
92 | 


--------------------------------------------------------------------------------
/tests/test_classify_comments.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import mf2util
  3 | 
  4 | TEST_BLOB = {
  5 |     "alternates": [
  6 |     ],
  7 |     "items": [
  8 |         {
  9 |             "properties": {
 10 |                 "name": ["Author"],
 11 |                 "photo": ["http://example.com/author_img.jpg"],
 12 |                 "url": ["http://example.com"]
 13 |             },
 14 |             "type": ["h-card"],
 15 |             "value": "Author LastName"
 16 |         },
 17 |         {
 18 |             "properties": {
 19 |                 "content": [
 20 |                     {
 21 |                         "html": "some content",
 22 |                         "value": "some content"
 23 |                     }
 24 |                 ],
 25 |                 "name": ["some title"],
 26 |                 "published": ["2014-05-07T17:15:44+00:00"],
 27 |                 "url": ["http://example.com/reply/2014/05/07/1"]
 28 |             },
 29 |             "type": [
 30 |                 "h-entry"
 31 |             ]
 32 |         }
 33 |     ],
 34 |     "rels": {
 35 |     }
 36 | }
 37 | 
 38 | 
 39 | def test_no_reference():
 40 |     blob = copy.deepcopy(TEST_BLOB)
 41 |     assert mf2util.classify_comment(blob, ('http://example.com',)) == []
 42 | 
 43 |     # add some irrelevant references
 44 |     blob['items'][1]['in-reply-to'] = [
 45 |         "http://werd.io/2014/homebrew-website-club-4",
 46 |         "https://www.facebook.com/events/1430990723825351/"
 47 |     ]
 48 |     assert mf2util.classify_comment(blob, ('http://example.com',)) == []
 49 | 
 50 |     # no target url
 51 |     assert mf2util.classify_comment(blob, ()) == []
 52 | 
 53 | 
 54 | def test_rsvps():
 55 |     blob = copy.deepcopy(TEST_BLOB)
 56 | 
 57 |     blob['items'][1]['properties'].update({
 58 |         'in-reply-to': ['http://mydomain.com/my-post'],
 59 |         'rsvp': ['yes'],
 60 |     })
 61 | 
 62 |     assert mf2util.classify_comment(
 63 |         blob, ('http://mydoma.in/short', 'http://mydomain.com/my-post')) \
 64 |         == ['rsvp', 'reply']
 65 | 
 66 | 
 67 | def test_invites():
 68 |     blob = copy.deepcopy(TEST_BLOB)
 69 | 
 70 |     blob['items'][1]['properties'].update({
 71 |         'in-reply-to': ['http://mydomain.com/my-post'],
 72 |         'invitee': [{
 73 |             'name': 'Kyle Mahan',
 74 |             'url': 'https://kylewm.com',
 75 |         }],
 76 |     })
 77 | 
 78 |     assert mf2util.classify_comment(
 79 |         blob, ('http://mydoma.in/short', 'http://mydomain.com/my-post')) \
 80 |         == ['invite', 'reply']
 81 | 
 82 | 
 83 | def test_likes():
 84 |     """make sure we find likes"""
 85 |     blob = copy.deepcopy(TEST_BLOB)
 86 | 
 87 |     # add some references
 88 |     blob['items'][1]['properties'].update({
 89 |         'in-reply-to': ['http://someoneelse.com/post'],
 90 |         'like-of': ['http://mydomain.com/my-post'],
 91 |     })
 92 | 
 93 |     assert mf2util.classify_comment(
 94 |         blob, ('http://mydoma.in/short', 'http://mydomain.com/my-post')) \
 95 |         == ['like']
 96 | 
 97 | 
 98 | def test_reposts():
 99 |     """make sure we find reposts"""
100 |     blob = copy.deepcopy(TEST_BLOB)
101 | 
102 |     # add some references
103 |     blob['items'][1]['properties'].update({
104 |         'repost-of': ['http://mydomain.com/my-post'],
105 |         'like-of': ['http://someoneelse.com/post'],
106 |     })
107 | 
108 |     assert mf2util.classify_comment(
109 |         blob, ('http://mydoma.in/short', 'http://mydomain.com/my-post')) \
110 |         == ['repost']
111 | 
112 | 
113 | def test_multireply():
114 |     """check behavior if our post is one among several posts
115 |     in a multireply"""
116 |     blob = copy.deepcopy(TEST_BLOB)
117 | 
118 |     # add some references
119 |     blob['items'][1]['properties'].update({
120 |         'in-reply-to': [
121 |             'http://someoneelse.com/post',
122 |             'http://mydomain.com/my-post',
123 |             'http://athirddomain.org/permalink',
124 |         ],
125 |     })
126 | 
127 |     assert mf2util.classify_comment(blob, ('http://mydomain.com/my-post')) \
128 |         == ['reply']
129 | 
130 | 
131 | def test_multimodal():
132 |     """a mention can have more than one classification, make sure we find
133 |     all of them. also tests some of the alternate/historical classnames"""
134 |     blob = copy.deepcopy(TEST_BLOB)
135 | 
136 |     # add some references
137 |     blob['items'][1]['properties'].update({
138 |         'reply-to': ['http://noone.im/'],
139 |         'repost-of': [
140 |             'http://someoneelse.com',
141 |             'http://mydomain.com/my-post',
142 |         ],
143 |         'like': [
144 |             'http://mydoma.in/short',
145 |             'http://someoneelse.com/post',
146 |         ],
147 |     })
148 | 
149 |     assert sorted(
150 |         mf2util.classify_comment(
151 |             blob, ('http://mydoma.in/short', 'http://mydomain.com/my-post')))\
152 |         == ['like', 'repost']
153 | 
154 | 
155 | def test_h_cite():
156 |     """Test object references (e.g., class="p-in-reply-to h-cite")"""
157 |     blob = copy.deepcopy(TEST_BLOB)
158 | 
159 |     # add some references
160 |     blob['items'][1]['properties'].update({
161 |         'in-reply-to': [{
162 |             'type': 'h-cite',
163 |             'properties': {
164 |                 'url': ['http://mydomain.com/my-post'],
165 |             },
166 |         }],
167 |     })
168 | 
169 |     assert mf2util.classify_comment(blob, ('http://mydomain.com/my-post',))\
170 |         == ['reply']
171 | 


--------------------------------------------------------------------------------
/tests/test_datetime.py:
--------------------------------------------------------------------------------
 1 | import mf2util
 2 | from datetime import timedelta, date, datetime
 3 | import pytest
 4 | 
 5 | 
 6 | def test_none():
 7 |     assert mf2util.parse_datetime(None) is None
 8 | 
 9 | 
10 | def test_parse_dates():
11 |     assert mf2util.parse_datetime('2014-04-27') == date(2014, 4, 27)
12 |     assert mf2util.parse_datetime('2014-9-2') == date(2014, 9, 2)
13 |     assert mf2util.parse_datetime('1982-11-24') == date(1982, 11, 24)
14 | 
15 |     with pytest.raises(ValueError):
16 |         # day/month switched
17 |         mf2util.parse_datetime('2014-24-11')
18 | 
19 |     with pytest.raises(ValueError):
20 |         # 2-character year
21 |         mf2util.parse_datetime('14-09-27')
22 | 
23 | 
24 | def test_parse_datetimes_no_tz():
25 |     # tantek.com -- no seconds, no timezone
26 |     assert mf2util.parse_datetime('2014-05-09T17:53') == datetime(2014, 5, 9, 17, 53)
27 |     # same as above without 'T'
28 |     assert mf2util.parse_datetime('2014-05-09 17:53') == datetime(2014, 5, 9, 17, 53)
29 |     # Homebrew Website Club
30 |     assert mf2util.parse_datetime('2014-04-23T18:30') == datetime(2014, 4, 23, 18, 30)
31 | 
32 |     with pytest.raises(ValueError):
33 |         # hour only
34 |         mf2util.parse_datetime('2012-09-01T12')
35 | 
36 |     with pytest.raises(ValueError):
37 |         # invalid hour minute
38 |         mf2util.parse_datetime('2014-04-23T30:90')
39 | 
40 | 
41 | def test_parse_datetimes():
42 |     def assert_with_tz(dt, naive, offset):
43 |         """return a tuple with naive datetime, and an timedelta tz offset"""
44 |         assert naive == dt.replace(tzinfo=None)
45 |         assert offset == dt.utcoffset()
46 | 
47 |     # waterpigs.co.uk -- utc time
48 |     assert_with_tz(mf2util.parse_datetime('2014-05-10T10:48:28+00:00'),
49 |                    datetime(2014, 5, 10, 10, 48, 28), timedelta(hours=0))
50 | 
51 |     # same as above with Zulu time
52 |     assert_with_tz(mf2util.parse_datetime('2014-05-10T10:48:28Z'),
53 |                    datetime(2014, 5, 10, 10, 48, 28), timedelta(hours=0))
54 | 
55 |     # snarfed.org -- pacific time
56 |     assert_with_tz(mf2util.parse_datetime('2014-05-05T09:59:08-07:00'),
57 |                    datetime(2014, 5, 5, 9, 59, 8), timedelta(hours=-7))
58 | 
59 |     # same as above, no colon in tz
60 |     assert_with_tz(mf2util.parse_datetime('2014-05-05T09:59:08-0700'),
61 |                    datetime(2014, 5, 5, 9, 59, 8), timedelta(hours=-7))
62 | 
63 |     with pytest.raises(ValueError):
64 |         # cannot read timezones by name
65 |         mf2util.parse_datetime('2013-07-04T11:22 PST')
66 | 


--------------------------------------------------------------------------------
/tests/test_interpret.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | """Test the interpret module, the unification of the other utility methods.
  3 | Uses test cases from around the indieweb.
  4 | """
  5 | from __future__ import unicode_literals
  6 | from datetime import datetime, date, timedelta
  7 | import mf2util
  8 | import json
  9 | 
 10 | 
 11 | def load_test(testname):
 12 |     return json.load(open('tests/interpret/%s.json' % testname))
 13 | 
 14 | 
 15 | def test_event():
 16 |     # HWC event from werd.io
 17 |     parsed = load_test('hwc-event')
 18 |     result = mf2util.interpret(
 19 |         parsed, 'http://werd.io/2014/homebrew-website-club-4')
 20 | 
 21 |     assert result['type'] == 'event'
 22 |     assert result['name'] == 'Homebrew Website Club'
 23 |     assert 'Are you building your own website?' in result['content']
 24 |     assert result['start'].replace(tzinfo=None) == datetime(2014, 5, 7, 18, 30)
 25 |     assert result['start'].utcoffset() == timedelta(hours=0)
 26 |     assert result['end'].replace(tzinfo=None) == datetime(2014, 5, 7, 19, 30)
 27 |     assert result['end'].utcoffset() == timedelta(hours=0)
 28 |     assert result['location'] == {
 29 |         'name': 'Mozilla SF, 1st floor, 2 Harrison st. (at Embarcadero), San Francisco, CA ',
 30 |     }
 31 | 
 32 | 
 33 | def test_reply_h_cite():
 34 |     # reply with reply-context from aaronnparecki.com
 35 |     parsed = load_test('reply_h-cite')
 36 |     result = mf2util.interpret_comment(
 37 |         parsed, 'http://aaronparecki.com/replies/2014/05/10/1/indieweb',
 38 |         ['http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/', 'http://datahiveconsulting.com/2014/04/10'])
 39 | 
 40 |     assert result['type'] == 'entry'
 41 |     assert not result.get('name')
 42 |     assert "We're working on it ;-)" in result.get('content')
 43 |     assert result['published'].replace(tzinfo=None)\
 44 |         == datetime(2014, 5, 10, 14, 48, 33)
 45 |     assert result['published'].utcoffset() == timedelta(hours=-7)
 46 |     assert result['comment_type'] == ['reply']
 47 |     assert result['in-reply-to'] == [{
 48 |         'type': 'cite',
 49 |         'author': {
 50 |             'name': 'Lynne Baer',
 51 |             'photo': 'http://aaronparecki.com/images/nouns/user.svg',
 52 |             'url': 'http://datahiveconsulting.com/author/lynne/',
 53 |         },
 54 |         'content': "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...",
 55 |         'content-plain': "Last week, a friend asked me what I thought of IndieWebify.Me, a movement intended to allow people to publish on the web without relying on the tools and storage of the giant corporations that currently control the majority of the social web. I\u2019m the kind of person who gladly supports her local independent bookstores and farmers\u2019 markets and food purveyors, links to IndieBound.org instead of Amazon to buy books, and admires the ideals of Open Source Software. So, I\u2019m biased towards an ...",
 56 |         'url': 'http://datahiveconsulting.com/2014/04/10/indiewebify-me-and-the-knowledge-gap/',
 57 |         'syndication': [],
 58 |     }]
 59 |     assert result['syndication'] == ['https://twitter.com/aaronpk/status/465247041078034432']
 60 | 
 61 | 
 62 | def test_u_in_reply_to():
 63 |     # reply with simple u-in-reply-to link from snarfed.org
 64 |     parsed = load_test('reply_u-in-reply-to')
 65 |     result = mf2util.interpret_comment(
 66 |         parsed, 'https://snarfed.org/2014-03-09_re-display-likes-in-a-facepile',
 67 |         ['https://willnorris.com/2014/03/display-likes-in-a-facepile'])
 68 | 
 69 |     assert result['type'] == 'entry'
 70 |     assert result['name'] == 'Re: Display likes in a facepile'
 71 |     assert 'oh man, so cool!' in result.get('content')
 72 |     assert result['published'].replace(tzinfo=None)\
 73 |         == datetime(2014, 3, 9, 22, 48, 22)
 74 |     assert result['published'].utcoffset() == timedelta(hours=-7)
 75 |     assert result['in-reply-to'] == [{'url': 'https://willnorris.com/2014/03/display-likes-in-a-facepile'}]
 76 |     assert result['comment_type'] == ['reply']
 77 | 
 78 | 
 79 | def test_reply_rsvp():
 80 |     parsed = load_test('reply_rsvp')
 81 |     result = mf2util.interpret_comment(
 82 |         parsed, 'https://snarfed.org/2014-05-05_homebrew-website-club-3',
 83 |         ['http://werd.io/2014/homebrew-website-club-4'])
 84 | 
 85 |     assert result['type'] == 'entry'
 86 |     assert result['name'] == 'Homebrew Website Club'
 87 |     assert '<a class="u-in-reply-to"' in result.get('content')
 88 |     assert result['published'].replace(tzinfo=None)\
 89 |         == datetime(2014, 5, 5, 10, 10, 53)
 90 |     assert result['published'].utcoffset() == timedelta(hours=-7)
 91 |     assert result['comment_type'] == ['rsvp', 'reply']
 92 |     assert result['rsvp'] == 'yes'
 93 | 
 94 | 
 95 | def test_reply_invite():
 96 |     parsed = load_test('reply_invite')
 97 |     result = mf2util.interpret_comment(
 98 |         parsed, 'https://www.facebook.com/1565113317092307#10155109753190015',
 99 |         ['https://kylewm.com/2015/03/homebrew-website-club-2015-march-25'])
100 |     assert result['name'] == 'invited'
101 |     assert result['comment_type'] == ['invite', 'reply']
102 |     assert result['invitees'] == [{
103 |         'name': 'Silona Bonewald',
104 |         'url': 'https://www.facebook.com/10155109753190015',
105 |         'photo': 'https://graph.facebook.com/v2.2/10155109753190015/picture?type=large',
106 |     }]
107 | 
108 | 
109 | def test_comment_and_like():
110 |     parsed = load_test('note_with_comment_and_like')
111 |     result = mf2util.interpret(
112 |         parsed, 'https://kylewm.com/2015/10/big-thing-missing-from-my-indieweb-experience-is')
113 |     assert result['type'] == 'entry'
114 | 
115 |     assert len(result['comment']) == 1
116 | 
117 |     assert result['comment'][0]['type'] == 'cite'
118 |     assert result['comment'][0]['author'] == {
119 |         'name': 'Aaron Parecki',
120 |         'photo': 'https://twitter.com/aaronpk/profile_image?size=original',
121 |         'url': 'http://aaronparecki.com',
122 |     }
123 |     assert result['comment'][0]['content'] == '<a href=\"https://twitter.com/kylewmahan\">@kylewmahan</a> I usually click through a couple levels up looking to see if any of the URLs up the chain show comments <a href=\"https://twitter.com/search?q=%23indieweb\">#indieweb</a>'
124 | 
125 |     assert len(result['like']) == 1
126 |     assert result['like'][0]['type'] == 'cite'
127 |     assert result['like'][0]['author'] == {
128 |         'name': '',
129 |         'url': 'https://twitter.com/benwerd',
130 |         'photo': 'https://kylewm.com/imageproxy?url=https%3A%2F%2Ftwitter.com%2Fbenwerd%2Fprofile_image%3Fsize%3Doriginal&size=48&sig=fde7ce5635f5ea132a2545ff5c7d3d33',
131 |     }
132 | 
133 | 
134 | def test_article_naive_datetime():
135 |     parsed = load_test('article_naive_datetime')
136 |     result = mf2util.interpret(
137 |         parsed, 'http://tantek.com/2014/120/b1/markup-people-focused-mobile-communication')
138 |     assert result['type'] == 'entry'
139 |     assert result['name'] == 'Markup For People Focused Mobile Communication'
140 |     assert '<h2>Action labels not app names</h2>' in result['content']
141 |     assert result['published'] == datetime(2014, 4, 30, 12, 11)
142 |     assert result['updated'] == datetime(2014, 4, 30, 12, 11)
143 | 
144 | 
145 | def test_article_two_published_dates():
146 |     """Test for a case that was throwing exceptions. Could not interpret
147 |     datetime on posts with two dt-published dates because I was
148 |     concatenating them. Should just take the first instead.
149 |     """
150 |     parsed = load_test('article_two_published_dates')
151 |     result = mf2util.interpret(
152 |         parsed, 'article.html')
153 |     assert result['type'] == 'entry'
154 |     assert result['name'] == 'Test Article with Two Published Dates'
155 |     assert result['published'].replace(tzinfo=None) == datetime(2014, 4, 30, 12, 11, 00)
156 |     assert result['published'].utcoffset() == timedelta(hours=-8)
157 | 
158 | 
159 | def test_convert_relative_paths():
160 |     parsed = load_test('relative_paths')
161 |     result = mf2util.interpret(
162 |         parsed, 'http://example.com/blog/', base_href='../')
163 |     assert result['content'] == 'This is an <img alt="alt text" title="the title" src="http://example.com/static/img.jpg"/> example document with <a href="http://example.com/relative_paths.html">relative paths</a>.'
164 | 
165 | 
166 | def test_no_p_name():
167 |     parsed = load_test('article_no_p-name')
168 |     result = mf2util.interpret(
169 |         parsed, 'http://example.com')
170 |     assert 'Give me crayons and I will draw a rocketship.' in result['content']
171 |     assert 'name' not in result
172 | 
173 | 
174 | def test_p_content():
175 |     """make sure p-content (instead of the usual e-content) doesn't cause
176 |     us to throw an exception
177 |     """
178 |     parsed = {"items": [{"properties": {"author": [{"properties": {"name": ["Kyle"],
179 |                                                                    "url": ["https://kylewm.com"]},
180 |                                                     "type": ["h-card"], "value": "Kyle"}],
181 |                                         "content": ["Thanks for hosting!"],
182 |                                         "in-reply-to": ["https://snarfed.org/2014-06-16_homebrew-website-club-at-quip"],
183 |                                         "name": ["I'm attending\n Homebrew Website Club at Quip\n Thanks for hosting!\n Kyle"],
184 |                                         "rsvp": ["yes"]},
185 |                          "type": ["h-entry"]}],
186 |               "rel-urls": {}, "rels": {}}
187 |     result = mf2util.interpret(parsed, 'http://kylewm.com/test/rsvp.html')
188 |     assert 'Thanks for hosting!' == result.get('content')
189 | 
190 | 
191 | def test_unusual_properties():
192 |     parsed = load_test('unusual_properties')
193 |     result = mf2util.interpret(parsed, 'https://example.com/')
194 |     assert 'Rocky Raccoon' == result.get('name')
195 |     assert 'https://foo.bar/' == result.get('url')
196 |     assert 'https://foo.bar/' == result.get('uid')
197 | 
198 | 
199 | def test_h_feed_excludes_rel_syndication():
200 |     """Represents a feed that (incorrectly) includes page-scoped
201 |     rel=syndication values in the feed itself. If we're not careful,
202 |     these values will be slurped into every entry in the feed.
203 |     """
204 |     parsed = {
205 |         "items":[{
206 |             "type": ["h-entry"], "properties": {
207 |                 "name": ["First Post"],
208 |                 "url": ["http://example.com/first-post"],
209 |                 "content": [{
210 |                     "html": "This is the body of the first post",
211 |                     "value": "This is the body of the first post",
212 |                 }],
213 |                 "syndication": [
214 |                     "https://twitter.com/example_com/123456",
215 |                     "https://www.facebook.com/example.com/123456",
216 |                 ],
217 |             },
218 |         }, {
219 |             "type": ["h-event"], "properties": {
220 |                 "name": ["Second Post"],
221 |                 "url": ["http://example.com/second-post"],
222 |                 "content": [{
223 |                     "html": "This is the body of the second post",
224 |                     "value": "This is the body of the second post",
225 |                 }],
226 |                 "syndication": [
227 |                     "https://twitter.com/example_com/7891011",
228 |                     "https://www.facebook.com/example.com/7891011",
229 |                 ],
230 |             },
231 |         }], "rels": {
232 |             "syndication": [
233 |                 "https://twitter.com/example_com/123456",
234 |                 "https://twitter.com/example_com/7891011",
235 |                 "https://www.facebook.com/example.com/123456",
236 |                 "https://www.facebook.com/example.com/7891011"
237 |             ],
238 |         }
239 |     }
240 |     result = mf2util.interpret_feed(parsed, 'http://example.com')
241 |     assert result['entries'][0]['syndication'] == ["https://twitter.com/example_com/123456", "https://www.facebook.com/example.com/123456"]
242 |     assert result['entries'][1]['syndication'] == ["https://twitter.com/example_com/7891011", "https://www.facebook.com/example.com/7891011"]
243 | 
244 | 
245 | def test_location_hcard():
246 |     """Test the location algorithm with an h-card.
247 | 
248 |     https://indieweb.org/location#How_to_determine_the_location_of_a_microformat
249 |     """
250 |     parsed = load_test('location_h-card')
251 |     result = mf2util.interpret(parsed, 'http://example.com/')
252 |     assert result['location'] == {
253 |         'name': 'Timeless Coffee Roasters',
254 |         'latitude': '37.83',
255 |         'longitude': '-122.25',
256 |     }
257 | 
258 | 
259 | def test_location_geo():
260 |     """Test the location algorithm with an h-geo."""
261 |     parsed = load_test('location_h-geo')
262 |     result = mf2util.interpret(parsed, 'http://example.com/')
263 |     assert result['location'] == {
264 |         'altitude': '123.0',
265 |         'latitude': '37.83',
266 |         'longitude': '-122.25',
267 |     }
268 | 
269 | 
270 | def test_location_geo_url():
271 |     """Test the location algorithm with a u-geo geo: URL.
272 | 
273 |     http://microformats.org/wiki/microformats2#h-card
274 |     https://tools.ietf.org/html/rfc5870
275 |     """
276 |     parsed = {
277 |         'items': [{
278 |             'type': ['h-entry'],
279 |             'properties': {
280 |                 'geo': [u'geo:48.2010,16.3695,183;crs=wgs84;u=40'],
281 |             },
282 |         }]}
283 |     result = mf2util.interpret(parsed, 'http://example.com/')
284 |     assert result['location'] == {
285 |         'altitude': '183',
286 |         'latitude': '48.2010',
287 |         'longitude': '16.3695',
288 |     }
289 | 
290 |     parsed['items'][0]['properties']['geo'] = ['geo:48.2010,16.3695']
291 |     result = mf2util.interpret(parsed, 'http://example.com/')
292 |     assert result['location'] == {
293 |         'latitude': '48.2010',
294 |         'longitude': '16.3695',
295 |     }
296 | 
297 | def test_location_adr():
298 |     """Test the location algorithm with an h-adr."""
299 |     parsed = load_test('location_h-adr')
300 |     result = mf2util.interpret(parsed, 'http://example.com/')
301 |     assert result['location'] == {
302 |         'street-address': '17 Austerstræti',
303 |         'locality': 'Reykjavík',
304 |         'country-name': 'Iceland',
305 |         'postal-code': '107',
306 |         'name': '17 Austerstræti Reykjavík Iceland 107',
307 |     }
308 | 
309 | 
310 | def test_location_top_level():
311 |     """Test the location algorithm with top level properties."""
312 |     parsed = load_test('location_top_level')
313 |     result = mf2util.interpret(parsed, 'http://example.com/')
314 |     assert result['location'] == {
315 |         'latitude': '37.83',
316 |         'longitude': '-122.25',
317 |     }
318 | 


--------------------------------------------------------------------------------
/tests/test_is_name_a_title.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import mf2util
 3 | 
 4 | PY3 = sys.version_info[0] >= 3
 5 | 
 6 | 
 7 | def test_is_name_a_title():
 8 |     for name, content, expected in [
 9 |             # simple
10 |             ('this is the content', 'this is the content', False),
11 |             ('This is a title', 'This is some content', True),
12 |             # common case with no explicit p-name
13 |             ('nonsensethe contentnonsense', 'the content', False),
14 |             # ignore case, punctuation
15 |             ('the content', 'ThE cONTeNT...', False),
16 |             # test bytestrings
17 |             (b'This is a title', b'This is some content', True),
18 |     ]:
19 |         assert expected == mf2util.is_name_a_title(name, content)
20 | 


--------------------------------------------------------------------------------
/tests/test_post_type_discovery.py:
--------------------------------------------------------------------------------
 1 | """Tests for post_type_discovery
 2 | """
 3 | 
 4 | import json
 5 | import mf2util
 6 | 
 7 | 
 8 | def test_post_type_discovery():
 9 |     for test, implied_type in [
10 |             ('interpret/hwc-event', 'event'),
11 |             ('interpret/reply_h-cite', 'reply'),
12 |             ('interpret/reply_u-in-reply-to', 'reply'),
13 |             ('interpret/reply_rsvp', 'rsvp'),
14 |             ('interpret/note_with_comment_and_like', 'note'),
15 |             ('interpret/article_naive_datetime', 'article'),
16 |             ('interpret/article_non_ascii_content', 'article'),
17 |             ('interpret/follow', 'follow'),
18 |             ('posttype/tantek_photo', 'photo'),
19 |             ('posttype/only_html_content', 'note'),
20 |             ('posttype/hcard_no_name', 'person'),
21 |             ('posttype/hcard_org', 'org'),
22 |             # TODO add more tests
23 |     ]:
24 |         parsed = json.load(open('tests/' + test + '.json'))
25 |         types = (['h-card'] if implied_type in ('person', 'org')
26 |                  else ['h-entry', 'h-event'])
27 |         entry = mf2util.find_first_entry(parsed, types)
28 |         assert implied_type == mf2util.post_type_discovery(entry)
29 | 


--------------------------------------------------------------------------------
/tests/test_representative_hcard.py:
--------------------------------------------------------------------------------
  1 | """Test representative h-card parsing
  2 | """
  3 | 
  4 | import mf2util
  5 | 
  6 | 
  7 | def test_url_matches_uid():
  8 |     p = {
  9 |         'rels': {},
 10 |         'items': [
 11 |             {
 12 |                 'type': ['h-card'],
 13 |                 'properties': {
 14 |                     'url': ['http://foo.com/bar', 'http://tilde.club/~foobar'],
 15 |                     'name': ['Bad'],
 16 |                 }
 17 |             }, {
 18 |                 'type': ['h-card'],
 19 |                 'properties': {
 20 |                     'url': ['http://foo.com/bar', 'http://tilde.club/~foobar'],
 21 |                     'uid': ['http://foo.com/bar'],
 22 |                     'name': ['Good'],
 23 |                 }
 24 |             },
 25 |         ]
 26 |     }
 27 |     hcard = mf2util.representative_hcard(p, 'http://foo.com/bar')
 28 |     assert hcard
 29 |     assert hcard['properties']['name'][0] == 'Good'
 30 | 
 31 |     # removing the uid should prevent us from finding the h-card
 32 |     del p['items'][1]['properties']['uid']
 33 |     hcard = mf2util.representative_hcard(p, 'http://foo.com/bar')
 34 |     assert not hcard
 35 | 
 36 | 
 37 | def test_nested_hcard():
 38 |     p = {
 39 |         'rels': {},
 40 |         'items': [
 41 |             {
 42 |                 'type': ['h-card'],
 43 |                 'properties': {
 44 |                     'url': ['http://foo.com/bar', 'http://tilde.club/~foobar'],
 45 |                     'name': ['Bad'],
 46 |                 }
 47 |             }, {
 48 |                 'type': ['h-entry'],
 49 |                 'children': [
 50 |                     {
 51 |                         'type': ['h-card'],
 52 |                         'properties': {
 53 |                             'url': ['http://foo.com/bar', 'http://tilde.club/~foobar'],
 54 |                             'uid': ['http://foo.com/bar'],
 55 |                             'name': ['Good'],
 56 |                         }
 57 |                     },
 58 |                 ]
 59 |             },
 60 |         ]
 61 |     }
 62 |     hcard = mf2util.representative_hcard(p, 'http://foo.com/bar')
 63 |     assert hcard
 64 |     assert hcard['properties']['name'][0] == 'Good'
 65 | 
 66 | 
 67 | def test_url_matches_rel_me():
 68 |     # rel-me points to identity hosted on about.me
 69 |     p = {
 70 |         'rels': {
 71 |             'me': ['http://about.me/foobar'],
 72 |         },
 73 |         'items': [
 74 |             {
 75 |                 'type': ['h-card'],
 76 |                 'properties': {
 77 |                     'url': ['http://tilde.club/~foobar'],
 78 |                     'name': ['Bad'],
 79 |                 }
 80 |             }, {
 81 |                 'type': ['h-card'],
 82 |                 'properties': {
 83 |                     'url': ['http://about.me/foobar', 'http://tilde.club/~foobar'],
 84 |                     'name': ['Good'],
 85 |                 }
 86 |             },
 87 |         ]
 88 |     }
 89 |     hcard = mf2util.representative_hcard(p, 'http://foo.com/bar')
 90 |     assert hcard
 91 |     assert hcard['properties']['name'][0] == 'Good'
 92 | 
 93 | 
 94 | def test_one_matching_url():
 95 |     p = {
 96 |         'rels': {},
 97 |         'items': [
 98 |             {
 99 |                 'type': ['h-card'],
100 |                 'properties': {
101 |                     'url': ['http://tilde.club/~foobar'],
102 |                     'name': ['Bad'],
103 |                 }
104 |             }, {
105 |                 'type': ['h-card'],
106 |                 'properties': {
107 |                     'url': ['http://foo.com/bar', 'http://tilde.club/~foobar'],
108 |                     'name': ['Good'],
109 |                 }
110 |             },
111 |         ]
112 |     }
113 |     hcard = mf2util.representative_hcard(p, 'http://foo.com/bar')
114 |     assert hcard
115 |     assert hcard['properties']['name'][0] == 'Good'
116 | 
117 |     p['items'].append({
118 |         'type': ['h-card'],
119 |         'properties': {
120 |             'url': ['http://foo.com/bar', 'http://flickr.com/photos/foobar'],
121 |             'name': ['Too Many Cooks'],
122 |         }
123 |     })
124 |     hcard = mf2util.representative_hcard(p, 'http://foo.com/bar')
125 |     assert not hcard
126 | 
127 | def test_hcard_as_a_property():
128 |     """h-card is the p-author of the primary h-feed
129 |     """
130 |     p = {
131 |         'rels': {},
132 |         'items': [
133 |             {
134 |                 'type': ['h-feed'],
135 |                 'properties': {
136 |                     'author': [
137 |                         {
138 |                             'type': ['h-card'],
139 |                             'properties': {
140 |                                 'name': ['Elliot Alderson'],
141 |                                 'url': ['http://foo.com/bar']
142 |                             }
143 |                         }
144 |                     ]
145 |                 }
146 |             }
147 |         ]
148 |     }
149 |     hcard = mf2util.representative_hcard(p, 'http://foo.com/bar')
150 |     assert hcard
151 |     assert hcard['properties']['name'][0] == 'Elliot Alderson'
152 | 


--------------------------------------------------------------------------------