├── docs ├── index.md ├── changelog.md ├── parser.md ├── banner.png └── stylesheets │ └── styles.css ├── mf2py ├── temp_fixes.py ├── backcompat-rules │ ├── geo.json │ ├── hfeed.json │ ├── adr.json │ ├── hresume.json │ ├── hproduct.json │ ├── hentry.json │ ├── hrecipe.json │ ├── vevent.json │ ├── hreview-aggregate.json │ ├── hreview.json │ └── vcard.json ├── version.py ├── __init__.py ├── mf_helpers.py ├── mf2_classes.py ├── datetime_helpers.py ├── metaformats.py ├── value_class_pattern.py ├── parse_property.py ├── backcompat.py ├── dom_helpers.py ├── implied_properties.py └── parser.py ├── test ├── examples │ ├── rel_enclosure.html │ ├── tag_whitespace_inside_p_value.html │ ├── link_with_u-url.html │ ├── backcompat │ │ ├── hentry_with_rel_tag_entry_title.html │ │ ├── nested_mf2_in_mf1.html │ │ ├── nested_mf1_in_mf2.html │ │ ├── nested_mf1_in_mf2_e_content.html │ │ ├── hentry.html │ │ ├── ignore_mf1_properties_in_mf2_root.html │ │ ├── ignore_mf2_properties_in_mf1_root.html │ │ ├── no_implied_properties_mf1_root.html │ │ ├── ignore_mf1_root_if_mf2_present.html │ │ ├── hentry_content_html.html │ │ ├── hentry_with_rel_bookmark.html │ │ ├── hreview_nested_card_event_product.html │ │ ├── hfeed_with_rel_tag.html │ │ ├── hentry_with_rel_tag.html │ │ ├── hrecipe_with_rel_tag.html │ │ ├── hproduct.html │ │ ├── hproduct_hreview_nested.html │ │ ├── hreview_hentry_with_rel_tag_bookmark.html │ │ ├── hreview_with_rel_tag_bookmark.html │ │ └── feed_with_rel_bookmark.html │ ├── implied_properties │ │ ├── implied_name_empty_alt.html │ │ ├── implied_name_alt.html │ │ ├── implied_photo_relative_url.html │ │ ├── simple_person_reference_implied.html │ │ ├── stop_implied_name_e_content.html │ │ ├── stop_implied_name_p_content.html │ │ ├── stop_implied_name_nested_h.html │ │ ├── implied_relative_datetimes.html │ │ ├── implied_properties.html │ │ ├── implied_properties_silo_pub.html │ │ ├── stop_implied_url.html │ │ ├── implied_url.html │ │ └── implied_photo.html │ ├── rsvp.html │ ├── filter_roots_custom.html │ ├── template_tag.html │ ├── empty.html │ ├── string_stripping.html │ ├── base.html │ ├── class_names_format.html │ ├── simple_person_reference_same_element.html │ ├── complex_e_content.html │ ├── parse_id.html │ ├── filter_roots.html │ ├── nested_hcards.html │ ├── simple_person_reference.html │ ├── metaformats_html_meta.html │ ├── template_tag_inside_e_value.html │ ├── hfeed_on_html_tag.html │ ├── img_with_srcset_with_base.html │ ├── rel.html │ ├── person_with_url.html │ ├── plaintext_p_whitespace.html │ ├── relative_url_in_e.html │ ├── hcard_with_empty_url.html │ ├── embedded.html │ ├── nested_complex_values.html │ ├── value_class_person.html │ ├── metaformats_twitter.html │ ├── festivus.html │ ├── broken_url.html │ ├── plaintext_img_whitespace.html │ ├── ordering_dedup.html │ ├── language.html │ ├── nested_multiple_classnames.html │ ├── eras.html │ ├── nested_values.html │ ├── area.html │ ├── metaformats_ogp.html │ ├── u-test.html │ ├── img_with_alt.html │ ├── test_src_equiv.html │ ├── link-rel-minimal.html │ ├── value_name_whitespace.html │ ├── img_with_srcset.html │ ├── u_all_cases.html │ └── datetimes.html ├── test_dom_addins.py ├── test_suite.py └── test_parser.py ├── .gitignore ├── Makefile ├── CONTRIBUTORS.md ├── pyproject.toml ├── LICENSE ├── mkdocs.yml ├── CONTRIBUTING.md ├── .github └── workflows │ └── tests.yml ├── README.md └── CHANGELOG.md /docs/index.md: -------------------------------------------------------------------------------- 1 | ../README.md -------------------------------------------------------------------------------- /docs/changelog.md: -------------------------------------------------------------------------------- 1 | ../CHANGELOG.md -------------------------------------------------------------------------------- /docs/parser.md: -------------------------------------------------------------------------------- 1 | # Parser Object 2 | 3 | :::mf2py.Parser 4 | -------------------------------------------------------------------------------- /docs/banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microformats/mf2py/HEAD/docs/banner.png -------------------------------------------------------------------------------- /docs/stylesheets/styles.css: -------------------------------------------------------------------------------- 1 | :root { 2 | --md-primary-fg-color: #6CA300; 3 | } -------------------------------------------------------------------------------- /mf2py/temp_fixes.py: -------------------------------------------------------------------------------- 1 | def rm_templates(doc): 2 | for el in doc.find_all("template"): 3 | el.extract() 4 | -------------------------------------------------------------------------------- /test/examples/rel_enclosure.html: -------------------------------------------------------------------------------- 1 |

my movie

2 | -------------------------------------------------------------------------------- /test/examples/tag_whitespace_inside_p_value.html: -------------------------------------------------------------------------------- 1 |
2 |
foo bar
3 |
4 | -------------------------------------------------------------------------------- /test/examples/link_with_u-url.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.swp 3 | *.pyc 4 | .idea/ 5 | .eggs/ 6 | build/ 7 | dist/ 8 | local/ 9 | mf2py.egg-info/ 10 | nbproject/ 11 | venv/ 12 | *~ 13 | poetry.lock 14 | site/ 15 | -------------------------------------------------------------------------------- /test/examples/backcompat/hentry_with_rel_tag_entry_title.html: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | -------------------------------------------------------------------------------- /test/examples/implied_properties/implied_name_empty_alt.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | @kylewmahan 4 | 5 | -------------------------------------------------------------------------------- /test/examples/implied_properties/implied_name_alt.html: -------------------------------------------------------------------------------- 1 |
2 |
3 | Avatar of Stephen 4 |
5 |
6 | -------------------------------------------------------------------------------- /test/examples/rsvp.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | is attending. 4 |
5 | -------------------------------------------------------------------------------- /test/examples/filter_roots_custom.html: -------------------------------------------------------------------------------- 1 |

Custom root filter

2 |
fnord
3 |
fnord
4 |
fnord
5 |
fnord
6 |
fnord
7 | -------------------------------------------------------------------------------- /test/examples/implied_properties/implied_photo_relative_url.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Jane Doe 4 | Jane Doe 5 | 6 | -------------------------------------------------------------------------------- /test/examples/template_tag.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Template tag test 5 | 6 | 7 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /test/examples/empty.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Hello World 6 | 7 | 8 |

Hello world!

9 | 10 | 11 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/geo.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-geo" 4 | ], 5 | "properties": { 6 | "latitude": [ 7 | "p-latitude" 8 | ], 9 | "longitude": [ 10 | "p-longitude" 11 | ] 12 | } 13 | } -------------------------------------------------------------------------------- /test/examples/string_stripping.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | String Stripping example 5 | 6 | 7 |
8 | Tom Morris 9 |
10 | 11 | 12 | -------------------------------------------------------------------------------- /test/examples/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Hello World 6 | 7 | 8 | 9 |

Hello world!

10 | 11 | 12 | -------------------------------------------------------------------------------- /test/examples/class_names_format.html: -------------------------------------------------------------------------------- 1 |
2 | URL 3 | name 4 |
5 | 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | install: 2 | poetry install 3 | tests: 4 | poetry run pytest -s -vv --doctest-modules --doctest-glob README* 5 | lint: 6 | poetry run black . 7 | poetry run isort . 8 | docs_dev: 9 | poetry run mkdocs serve 10 | docs_deploy: 11 | poetry run mkdocs gh-deploy 12 | publish: 13 | poetry publish --build 14 | -------------------------------------------------------------------------------- /test/examples/simple_person_reference_same_element.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Simple_person_reference 6 | 7 | 8 | Frances Berriman 9 | 10 | 11 | -------------------------------------------------------------------------------- /mf2py/version.py: -------------------------------------------------------------------------------- 1 | # Define the version number. This class is exec'd by setup.py to read 2 | # the value without loading mf2py (loading mf2py is bad if its dependencies 3 | # haven't been installed yet, which is common during setup) 4 | 5 | import importlib.metadata 6 | 7 | __version__ = importlib.metadata.metadata("mf2py")["Version"] 8 | -------------------------------------------------------------------------------- /test/examples/complex_e_content.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Complex e-content test 6 | 7 | 8 |
9 |

Hello

10 | 11 | 12 | -------------------------------------------------------------------------------- /test/examples/implied_properties/simple_person_reference_implied.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Simple_person_reference 6 | 7 | 8 | Frances Berriman 9 | 10 | 11 | -------------------------------------------------------------------------------- /test/examples/parse_id.html: -------------------------------------------------------------------------------- 1 |
2 |

Recent Articles

3 |
Lorem Ipsum
4 |
empty id is invalid and should not be parsed
5 |
Max Mustermann
6 |
-------------------------------------------------------------------------------- /test/examples/backcompat/nested_mf2_in_mf1.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Nested mf2 in mf1 5 | 6 | 7 |
8 | Correct name 9 | 10 | Correct summary 11 |
12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /test/examples/filter_roots.html: -------------------------------------------------------------------------------- 1 |

Tailwind root filter

2 |
fnord
3 |
fnord
4 |
fnord
5 |
fnord
6 |
fnord
7 |
fnord
8 |
fnord
9 |
fnord
10 | -------------------------------------------------------------------------------- /test/examples/nested_hcards.html: -------------------------------------------------------------------------------- 1 | 5 |
6 |

KP

7 |

KP1

8 |
9 | -------------------------------------------------------------------------------- /test/examples/backcompat/nested_mf1_in_mf2.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Nested mf1 in mf2 5 | 6 | 7 |
8 | Correct name 9 | 10 | Correct summary 11 |
12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /test/examples/simple_person_reference.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Simple_person_reference 6 | 7 | 8 | 9 | Frances Berriman 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /test/test_dom_addins.py: -------------------------------------------------------------------------------- 1 | from mf2py.parser import Parser 2 | 3 | 4 | def test_getElementsByClassName(): 5 | p = Parser(doc=open("test/examples/person_with_url.html")) 6 | dom = p.__doc__ 7 | assert len(dom.find_all(class_="u-url")) == 1 8 | expected_el = dom.find_all(class_="u-url")[0] 9 | assert expected_el["class"] == ["u-url"] 10 | -------------------------------------------------------------------------------- /test/examples/metaformats_html_meta.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Hello World 6 | 7 | 8 | 9 | 10 |

Hello world!

11 | 12 | 13 | -------------------------------------------------------------------------------- /test/examples/template_tag_inside_e_value.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Template tag test 5 | 6 | 7 |
8 |
This is a Test with a template tag after this:
9 |
10 | 11 | 12 | -------------------------------------------------------------------------------- /test/examples/hfeed_on_html_tag.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | html tag with class h-feed 4 | 5 | 6 | 7 |
8 |

entry1

9 |
10 | 11 |
12 |

entry2

13 |
14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /test/examples/img_with_srcset_with_base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | Elva dressed as a fairy 11 |
12 | -------------------------------------------------------------------------------- /test/examples/rel.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | post 1 4 | post 2 5 | French mobile homepage 9 | -------------------------------------------------------------------------------- /test/examples/person_with_url.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Simple_person_reference 6 | 7 | 8 | 9 | Tom Morris 10 | tommorris.org 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /test/examples/plaintext_p_whitespace.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |

foo

barbaz 4 |
5 |
6 | 7 |
8 |
9 |

foo

bar baz 10 |
11 |
12 | 13 |
14 |
15 | foo bar

baz

16 |
17 |
18 | -------------------------------------------------------------------------------- /test/examples/backcompat/nested_mf1_in_mf2_e_content.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Nested mf1 in mf2 e-content 5 | 6 | 7 |
8 |
9 | Correct name 10 | 11 | Correct summary 12 |
13 |
14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /test/examples/relative_url_in_e.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Relative URLs in e-content 6 | 7 | 8 | 9 |
10 |

Cat

11 |
12 | 13 | 14 | -------------------------------------------------------------------------------- /mf2py/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Microformats2 is a general way to mark up any HTML document with 3 | classes and propeties. This library parses structured data from 4 | a microformatted HTML document and returns a well-formed JSON 5 | dictionary. 6 | """ 7 | 8 | from .mf_helpers import get_url 9 | from .parser import Parser, parse 10 | from .version import __version__ 11 | 12 | __all__ = ["Parser", "parse", "get_url", "__version__"] 13 | -------------------------------------------------------------------------------- /test/examples/hcard_with_empty_url.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 |
8 | Cheri Oteri 9 | Blank URL 10 |
11 |
12 | Maya Rudolph 13 | 14 |
15 | 16 | 17 | -------------------------------------------------------------------------------- /test/examples/backcompat/hentry.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Backcompat Properties 6 | 7 | 8 | 9 |
10 | Tom Morris 11 | A Title 12 |
Some Content
13 |
14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /test/examples/backcompat/ignore_mf1_properties_in_mf2_root.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Ignore mf2 properties for mf1 roots 5 | 6 | 7 |
8 | Correct name 9 | Wrong name 10 | 11 | Correct summary 12 | Wrong summary 13 |
14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /test/examples/backcompat/ignore_mf2_properties_in_mf1_root.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Ignore mf2 properties for mf1 roots 5 | 6 | 7 |
8 | Wrong name 9 | Correct name 10 | 11 | Wrong summary 12 | Correct summary 13 |
14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /test/examples/embedded.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Hello World 6 | 7 | 8 |
9 |

A post with embedded markup in

10 | 11 |
12 |

Blah blah blah blah blah.

13 |

Blah.

14 |

Blah blah blah.

15 |
16 |
17 | 18 | 19 | -------------------------------------------------------------------------------- /test/examples/backcompat/no_implied_properties_mf1_root.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | No implied properties for mf1 roots 5 | 6 | 7 |
8 | This should not be in the name property 9 | http://example.com should not be in url property 10 | http://example.com/photo should not be in photo property 11 |
12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /test/examples/nested_complex_values.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Nested complex h-* value parsing test 5 | 6 | 7 |
8 |
9 | Example Author 10 | Example Post 11 |
12 |
13 | 14 | 15 | -------------------------------------------------------------------------------- /test/examples/value_class_person.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Value Class Pattern 6 | 7 | 8 |
9 |
10 | Tom Morris 11 |
12 |
13 | fake 14 | +44 1234 567890 15 |
16 | 17 | 18 | -------------------------------------------------------------------------------- /test/examples/implied_properties/stop_implied_name_e_content.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Stop implied name due to e-content 6 | 7 | 8 |
9 |
10 |

Wanted content.

11 |
12 |
13 |

Footer to be ignored.

14 |
15 |
16 | 17 | 18 | -------------------------------------------------------------------------------- /test/examples/implied_properties/stop_implied_name_p_content.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Stop implied name due to p-content 6 | 7 | 8 |
9 |
10 |

Wanted content.

11 |
12 |
13 |

Footer to be ignored.

14 |
15 |
16 | 17 | 18 | -------------------------------------------------------------------------------- /test/examples/metaformats_twitter.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Hello World 6 | 7 | 8 | 9 | 10 | 11 | 12 |

Hello world!

13 | 14 | 15 | -------------------------------------------------------------------------------- /test/examples/backcompat/ignore_mf1_root_if_mf2_present.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Ignore mf1 root class if mf2 root present test 5 | 6 | 7 |
8 |

My awesome event

9 | 10 |

This will be an awesome event

11 |
12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /test/examples/festivus.html: -------------------------------------------------------------------------------- 1 | 2 |

Jerry

3 |

Happy Festivus!

4 |

Frank

5 |

It's time for the Festivus feats of 6 | strength.

7 |

The tradition of Festivus begins with 8 | the airing of grievances.

9 |

Cosmo

10 |

It's a Festivus miracle!

11 | -------------------------------------------------------------------------------- /test/examples/broken_url.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Hello World 4 | 5 | 6 |
7 |

urls with broken domains

8 | Should not change: http://www.[w3.org/ 9 | Should be relative to base url 10 | 11 |
12 | 13 | -------------------------------------------------------------------------------- /CONTRIBUTORS.md: -------------------------------------------------------------------------------- 1 | # Contributors 2 | 3 | - Tom Morris https://tommorris.org 4 | - Barnaby Walters https://waterpigs.co.uk 5 | - Kartik Prabhu https://kartikprabhu.com 6 | - Kyle Mahan https://github.com/kylewm 7 | - Kevin Marks https://www.kevinmarks.com 8 | - James https://jamesg.blog 9 | - Angelo Gladding https://ragt.ag 10 | - Paweł Miech https://pawelmhm.github.io 11 | - Sven Knebel https://www.svenknebel.de 12 | -------------------------------------------------------------------------------- /test/examples/backcompat/hentry_content_html.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Backcompat Properties 6 | 7 | 8 | 9 |
10 |
11 |

This is a summary

12 |

This is inside content.

13 |
14 |
15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /test/examples/plaintext_img_whitespace.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |
5 | selfieAt some tourist spot 6 |
7 |
8 | 9 |
10 |
11 | At another tourist spot 12 |
13 |
14 | 15 |
16 |
17 | At yet another tourist spot 18 |
19 |
20 | -------------------------------------------------------------------------------- /test/examples/implied_properties/stop_implied_name_nested_h.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Stop implied name due to embedded h-* 6 | 7 | 8 |
9 |
10 |

I really like Microformats

11 |
12 |
13 |

Footer to be ignored.

14 |
15 |
16 | 17 | 18 | -------------------------------------------------------------------------------- /test/examples/ordering_dedup.html: -------------------------------------------------------------------------------- 1 | 10 | 11 | -------------------------------------------------------------------------------- /test/examples/backcompat/hentry_with_rel_bookmark.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Backcompat rel=bookmark 6 | 7 | 8 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hfeed.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-feed" 4 | ], 5 | "properties": { 6 | "category": [ 7 | "p-category" 8 | ], 9 | "site-description": [ 10 | "p-summary" 11 | ], 12 | "description": [ 13 | "p-summary" 14 | ], 15 | "site-title": [ 16 | "p-name" 17 | ], 18 | "title": [ 19 | "p-name" 20 | ] 21 | }, 22 | "rels": { 23 | "tag": [ 24 | "p-category" 25 | ] 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /test/examples/language.html: -------------------------------------------------------------------------------- 1 | 2 |
3 |

Romero

4 |
5 |
6 |

Un titolo italiano

7 |
With an english summary
8 |
Con un riassunto italiano
9 |
10 |
11 |

En svensk titel

12 |
With an english summary
13 |
Och svensk huvudtext
14 |
15 | 16 | -------------------------------------------------------------------------------- /test/examples/backcompat/hreview_nested_card_event_product.html: -------------------------------------------------------------------------------- 1 | 17 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/adr.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-adr" 4 | ], 5 | "properties": { 6 | "locality": [ 7 | "p-locality" 8 | ], 9 | "region": [ 10 | "p-region" 11 | ], 12 | "extended-address": [ 13 | "p-extended-address" 14 | ], 15 | "post-office-box": [ 16 | "p-post-office-box" 17 | ], 18 | "street-address": [ 19 | "p-street-address" 20 | ], 21 | "postal-code": [ 22 | "p-postal-code" 23 | ], 24 | "country-name": [ 25 | "p-country-name" 26 | ] 27 | } 28 | } -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hresume.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-resume" 4 | ], 5 | "properties": { 6 | "experience": [ 7 | "h-event", 8 | "p-experience" 9 | ], 10 | "summary": [ 11 | "p-summary" 12 | ], 13 | "affiliation": [ 14 | "p-affiliation", 15 | "h-card" 16 | ], 17 | "contact": [ 18 | "h-card", 19 | "p-contact" 20 | ], 21 | "skill": [ 22 | "p-skill" 23 | ], 24 | "education": [ 25 | "h-event", 26 | "p-education" 27 | ] 28 | } 29 | } -------------------------------------------------------------------------------- /test/examples/nested_multiple_classnames.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Nested + Multiple Classnames 6 | 7 | 8 |
9 |

A BLOG POST

10 |
11 | Tom Morris 12 | tommorris.org 13 |

14 | London 15 |

16 |

17 |
18 |

Some Citation

19 |
20 | 21 | 22 | -------------------------------------------------------------------------------- /test/examples/eras.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 |

Excited for the Taylor Swift Eras Tour

10 |

Published by James 11 |

12 |
Eras tour poster
13 |

I can't decide which era is my favorite.

14 |

music, Taylor Swift

15 |
16 | 17 | 18 | -------------------------------------------------------------------------------- /test/examples/nested_values.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Nested h-* value parsing test 5 | 6 | 7 |
8 |

9 | 10 | Kyle 11 |

12 |

13 | 14 | Liked foobar. 15 |

16 | 17 | Oh by the way I should mention 18 | George 19 | who will not have a nested value. 20 |
21 | 22 | 23 | -------------------------------------------------------------------------------- /test/examples/area.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | hCard Area Example 6 | 7 | 8 |

Hey Brian.

9 | Brian Suda at dConstruct 10 | 11 | Brian Suda 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /test/examples/implied_properties/implied_relative_datetimes.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Implied_relative_datetimes 6 | 7 | 8 |
9 |
This is a post.
10 | 11 | (updated: ) 12 |
13 |

14 | Explanation: this is to test for the behaviour described 15 | here. 16 |

17 | 18 | 19 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hproduct.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-product" 4 | ], 5 | "properties": { 6 | "category": [ 7 | "p-category" 8 | ], 9 | "price": [ 10 | "p-price" 11 | ], 12 | "description": [ 13 | "p-description" 14 | ], 15 | "url": [ 16 | "u-url" 17 | ], 18 | "photo": [ 19 | "u-photo" 20 | ], 21 | "brand": [ 22 | "p-brand" 23 | ], 24 | "identifier": [ 25 | "u-identifier" 26 | ], 27 | "review": [ 28 | "p-review", 29 | "h-review" 30 | ], 31 | "fn": [ 32 | "p-name" 33 | ] 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /test/test_suite.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import os.path 4 | import sys 5 | 6 | from test_parser import check_unicode 7 | 8 | import mf2py 9 | 10 | 11 | def test_mf2tests(): 12 | allfiles = glob.glob(os.path.join(".", "testsuite", "tests", "*", "*", "*.json")) 13 | for jsonfile in allfiles: 14 | htmlfile = jsonfile[:-4] + "html" 15 | with open(htmlfile) as f: 16 | p = mf2py.parse(doc=f, url="http://example.com") 17 | check_unicode(htmlfile, p) 18 | with open(jsonfile) as jsonf: 19 | try: 20 | s = json.load(jsonf) 21 | except: 22 | s = "bad file: " + jsonfile + sys.exc_info()[0] 23 | check_mf2(htmlfile, p, s) 24 | 25 | 26 | def check_mf2(htmlfile, p, s): 27 | # TODO ignore extra keys in p that are not in s 28 | assert p == s 29 | -------------------------------------------------------------------------------- /test/examples/metaformats_ogp.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Hello World 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 |

Hello world!

19 | 20 | 21 | -------------------------------------------------------------------------------- /test/examples/backcompat/hfeed_with_rel_tag.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Backcompat rel=tag on hfeed 6 | 7 | 8 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /test/examples/backcompat/hentry_with_rel_tag.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Backcompat rel=tag on hentry 6 | 7 | 8 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /test/examples/backcompat/hrecipe_with_rel_tag.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Backcompat rel=tag on hrecipe 6 | 7 | 8 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hentry.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-entry" 4 | ], 5 | "properties": { 6 | "category": [ 7 | "p-category" 8 | ], 9 | "entry-title": [ 10 | "p-name" 11 | ], 12 | "published": [ 13 | "dt-published" 14 | ], 15 | "entry-content": [ 16 | "e-content" 17 | ], 18 | "entry-summary": [ 19 | "p-summary" 20 | ], 21 | "author": [ 22 | "p-author", 23 | "h-card" 24 | ], 25 | "geo": [ 26 | "p-geo", 27 | "h-geo" 28 | ], 29 | "updated": [ 30 | "dt-updated" 31 | ] 32 | }, 33 | "rels": { 34 | "bookmark": [ 35 | "u-url" 36 | ], 37 | "tag": [ 38 | "p-category" 39 | ] 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /test/examples/backcompat/hproduct.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | hProduct Backcompat Test 5 | 6 | 7 |
8 |

Tom's Magical Quack Tincture

9 |
10 |
Brand
11 |
Quacktastic Products
12 |
Category
13 |
bullshit
14 |
Price
15 |
£299.99
16 |
Description
17 |
Magical tasty sugar pills that don't do anything.
18 |
Identifier
19 |
BULLSHIT-001
20 |
21 |

22 | I'm a gullible idiot and I love giving money to random developers on the internet 23 | who are pretending to be quacks for a laugh while writing tests. I love this product! 24 |

25 |
26 | 27 | 28 | -------------------------------------------------------------------------------- /test/examples/implied_properties/implied_properties.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Implied Properties 6 | 7 | 8 | Tom Morris 9 | 10 | Tom Morris 11 | 12 | Tom Morris 13 | 14 | Tom Morris 15 | 16 | Tom Morris 17 | 18 | Tom Morris 19 | 20 | 21 | Tom Morris 22 | 23 | 24 | -------------------------------------------------------------------------------- /test/examples/backcompat/hproduct_hreview_nested.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | hProduct Backcompat Test 5 | 6 | 7 |
8 |

Tom's Magical Quack Tincture

9 |
10 |
Brand
11 |
Quacktastic Products
12 |
Category
13 |
bullshit
14 |
Price
15 |
£299.99
16 |
Description
17 |
Magical tasty sugar pills that don't do anything.
18 |
Identifier
19 |
BULLSHIT-001
20 |
21 |

22 | I'm a gullible idiot and I love giving money to random developers on the internet 23 | who are pretending to be quacks for a laugh while writing tests. I love this product! 24 |

25 |
26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hrecipe.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-recipe" 4 | ], 5 | "properties": { 6 | "nutrition": [ 7 | "p-nutrition" 8 | ], 9 | "yield": [ 10 | "p-yield" 11 | ], 12 | "author": [ 13 | "p-author", 14 | "h-card" 15 | ], 16 | "duration": [ 17 | "dt-duration" 18 | ], 19 | "photo": [ 20 | "u-photo" 21 | ], 22 | "instructions": [ 23 | "e-instructions" 24 | ], 25 | "summary": [ 26 | "p-summary" 27 | ], 28 | "fn": [ 29 | "p-name" 30 | ], 31 | "ingredient": [ 32 | "p-ingredient" 33 | ], 34 | "category": [ 35 | "p-category" 36 | ] 37 | }, 38 | "rels": { 39 | "tag": [ 40 | "p-category" 41 | ] 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/vevent.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-event" 4 | ], 5 | "properties": { 6 | "attendee": [ 7 | "p-attendee" 8 | ], 9 | "description": [ 10 | "p-description" 11 | ], 12 | "duration": [ 13 | "dt-duration" 14 | ], 15 | "dtend": [ 16 | "dt-end" 17 | ], 18 | "dtstart": [ 19 | "dt-start" 20 | ], 21 | "geo": [ 22 | "p-location h-geo" 23 | ], 24 | "organizer": [ 25 | "p-organizer" 26 | ], 27 | "category": [ 28 | "p-category" 29 | ], 30 | "url": [ 31 | "u-url" 32 | ], 33 | "summary": [ 34 | "p-name" 35 | ], 36 | "contact": [ 37 | "p-contact" 38 | ], 39 | "location": [ 40 | "p-location" 41 | ] 42 | } 43 | } -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hreview-aggregate.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-review-aggregate" 4 | ], 5 | "properties": { 6 | "rating": [ 7 | "p-rating" 8 | ], 9 | "description": [ 10 | "p-description" 11 | ], 12 | "photo": [ 13 | "u-photo" 14 | ], 15 | "worst": [ 16 | "p-worst" 17 | ], 18 | "reviewer": [ 19 | "p-reviewer", 20 | "p-author", 21 | "h-card" 22 | ], 23 | "best": [ 24 | "p-best" 25 | ], 26 | "count": [ 27 | "p-count" 28 | ], 29 | "votes": [ 30 | "p-votes" 31 | ], 32 | "dtreviewed": [ 33 | "dt-reviewed" 34 | ], 35 | "url": [ 36 | "u-url" 37 | ], 38 | "summary": [ 39 | "p-name" 40 | ], 41 | "fn": [ 42 | "p-item", 43 | "h-item", 44 | "p-name" 45 | ] 46 | } 47 | } -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "mf2py" 3 | version = "2.0.1" 4 | description = "Microformats parser" 5 | readme = "README.md" 6 | authors = ["Tom Morris "] 7 | license = "MIT" 8 | classifiers = [ 9 | "Intended Audience :: Developers", 10 | "License :: OSI Approved :: MIT License", 11 | "Programming Language :: Python :: 3", 12 | "Topic :: Text Processing :: Markup :: HTML" 13 | ] 14 | 15 | [tool.poetry.dependencies] 16 | python = ">=3.8" 17 | html5lib = "^1.1" 18 | requests = "^2.28.2" 19 | beautifulsoup4 = "^4.11.1" 20 | 21 | [tool.poetry.group.dev.dependencies] 22 | lxml = "^4.9.2" 23 | pytest = "^7.2.1" 24 | black = "^23.3.0" 25 | isort = "^5.12.0" 26 | mkdocs = "^1.5.3" 27 | mkdocs-material = "^9.5.0" 28 | mkdocstrings = {extras = ["python"], version = "^0.24.0"} 29 | 30 | [tool.pytest.ini_options] 31 | doctest_optionflags = "NORMALIZE_WHITESPACE IGNORE_EXCEPTION_DETAIL" 32 | 33 | [tool.isort] 34 | profile = "black" 35 | 36 | [build-system] 37 | requires = ["poetry-core>=1.0.0"] 38 | build-backend = "poetry.core.masonry.api" 39 | -------------------------------------------------------------------------------- /mf2py/mf_helpers.py: -------------------------------------------------------------------------------- 1 | # don't need anymore defer to mf2util instead (mf2util does not have this functionality) 2 | 3 | 4 | def get_url(mf): 5 | """Given a property value that may be a list of simple URLs or complex 6 | h-* dicts (with a url property), extract a list of URLs. This is useful 7 | when parsing e.g., in-reply-to. 8 | 9 | Args: 10 | mf (string or dict): URL or h-cite-style dict 11 | 12 | Returns: 13 | list: a list of URLs 14 | """ 15 | 16 | urls = [] 17 | for item in mf: 18 | if isinstance(item, str): 19 | urls.append(item) 20 | elif isinstance(item, dict) and any( 21 | x.startswith("h-") for x in item.get("type", []) 22 | ): 23 | urls.extend(item.get("properties", {}).get("url", [])) 24 | 25 | return urls 26 | 27 | 28 | def unordered_list(l): 29 | """given a list, returns another list with unique and alphabetically sorted elements. 30 | use for HTML attributes that have no semantics to their order e.g. class, rel. 31 | """ 32 | return sorted(set(l)) 33 | -------------------------------------------------------------------------------- /test/examples/u-test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Hello World 5 | 6 | 7 | 8 | 21 | 22 | -------------------------------------------------------------------------------- /test/examples/img_with_alt.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Photo with alt attribute 6 | 7 | 8 |
9 | name 10 | 11 |
12 | 13 |
14 | name 15 | alt text 16 |
17 | 18 |
19 | name 20 | 21 |
22 | 23 |
24 | name 25 | 26 |
27 | 28 |
29 | name 30 | alt text 31 |
32 | 33 |
34 | name 35 | 36 |
37 | 38 | 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyight (c) 2013, 2014 Tom Morris and contributors 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /test/examples/test_src_equiv.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | audio/video/source equiv 7 | 8 | 9 | 10 |
11 |

Alice

12 | 13 |
14 | 15 |
16 |

Bob

17 |
19 | 20 |
21 |

Clarissa

22 |
24 | 25 |
26 |

David

27 | 30 |
31 | 32 |
33 |

David

34 | 37 |
38 | 39 | 40 | -------------------------------------------------------------------------------- /test/examples/implied_properties/implied_properties_silo_pub.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | Micropub proxy for 15 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /test/examples/backcompat/hreview_hentry_with_rel_tag_bookmark.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Backcompat rel=tag and rel=bookmark on hreview and hentry 6 | 7 | 8 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: mf2py 2 | site_url: https://microformats.github.io/mf2py 3 | site_author: Microformats 4 | site_description: A microformats2 parser written in Python. 5 | repo_name: microformats/mf2py 6 | repo_url: https://github.com/microformats/mf2py 7 | edit_uri: https://github.com/microformats/mf2py/tree/main/docs 8 | copyright: MIT License 9 | 10 | extra_css: 11 | - stylesheets/styles.css 12 | 13 | nav: 14 | - Home: index.md 15 | - Parser Object: parser.md 16 | - Change Log: changelog.md 17 | 18 | theme: 19 | name: 'material' 20 | logo: https://microformats.org/microformats-logo.png 21 | favicon: https://microformats.org/microformats-logo.png 22 | palette: 23 | # Palette for light mode 24 | - scheme: default 25 | primary: 'custom' 26 | toggle: 27 | icon: material/brightness-7 28 | name: Switch to dark mode 29 | font: 30 | text: Roboto 31 | code: Roboto Mono 32 | 33 | plugins: 34 | - mkdocstrings 35 | - search 36 | 37 | markdown_extensions: 38 | - admonition 39 | - pymdownx.details 40 | - pymdownx.superfences 41 | - attr_list 42 | - md_in_html 43 | - pymdownx.tabbed: 44 | alternate_style: true 45 | - toc: 46 | permalink: true 47 | -------------------------------------------------------------------------------- /mf2py/mf2_classes.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | _mf2_classes_re = re.compile("(p|e|u|dt|h)-((:?[a-z0-9]+-)?[a-z]+(:?-[a-z]+)*)$") 4 | _mf2_roots_re = re.compile("h-(:?[a-z0-9]+-)?[a-z]+(:?-[a-z]+)*$") 5 | _mf2_properties_re = re.compile("(p|e|u|dt)-(:?[a-z0-9]+-)?[a-z]+(:?-[a-z]+)*$") 6 | _mf2_e_properties_re = re.compile("e-(:?[a-z0-9]+-)?[a-z]+(:?-[a-z]+)*$") 7 | 8 | CONFLICTING_ROOTS_TAILWIND = {"auto", "fit", "full", "max", "min", "px", "screen"} 9 | 10 | 11 | def filter_classes(classes, regex=_mf2_classes_re): 12 | """detect classes that are valid names for mf2, sort in dictionary by prefix""" 13 | 14 | types = {x: set() for x in ("u", "p", "dt", "e", "h")} 15 | for c in classes: 16 | match = regex.match(c) 17 | if match: 18 | if c[0] == "h": 19 | types["h"].add(c) 20 | else: 21 | types[match.group(1)].add(match.group(2)) 22 | return types 23 | 24 | 25 | def root(classes, filtered_roots): 26 | return { 27 | c for c in classes if _mf2_roots_re.match(c) and c[2:] not in filtered_roots 28 | } 29 | 30 | 31 | def is_property_class(class_): 32 | return _mf2_properties_re.match(class_) 33 | 34 | 35 | def has_embedded_class(classes): 36 | return any(_mf2_e_properties_re.match(c) for c in classes) 37 | -------------------------------------------------------------------------------- /test/examples/backcompat/hreview_with_rel_tag_bookmark.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Backcompat rel=tag and rel=bookmark on hreview 6 | 7 | 8 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /mf2py/backcompat-rules/hreview.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": [ 3 | "h-review" 4 | ], 5 | "properties": { 6 | "rating": [ 7 | "p-rating" 8 | ], 9 | "worst": [ 10 | "p-worst" 11 | ], 12 | "dtreviewed": [ 13 | "dt-reviewed" 14 | ], 15 | "reviewer": [ 16 | "p-author", 17 | "h-card" 18 | ], 19 | "url": [ 20 | "p-item", 21 | "h-item", 22 | "u-url" 23 | ], 24 | "photo": [ 25 | "p-item", 26 | "h-item", 27 | "u-photo" 28 | ], 29 | "best": [ 30 | "p-best" 31 | ], 32 | "description": [ 33 | "p-description" 34 | ], 35 | "fn": [ 36 | "p-item", 37 | "h-item", 38 | "p-name" 39 | ], 40 | "summary": [ 41 | "p-name" 42 | ], 43 | "item vcard": [ 44 | "p-item", 45 | "vcard" 46 | ], 47 | "item vevent": [ 48 | "p-item", 49 | "vevent" 50 | ], 51 | "item hproduct": [ 52 | "p-item", 53 | "hproduct" 54 | ] 55 | }, 56 | "rels": { 57 | "self bookmark": [ 58 | "u-url" 59 | ], 60 | "tag": [ 61 | "p-category" 62 | ] 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /test/examples/link-rel-minimal.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | this should not be inside the links at all 18 | 19 | -------------------------------------------------------------------------------- /test/examples/value_name_whitespace.html: -------------------------------------------------------------------------------- 1 |
2 |

Hello World

3 |
4 | 5 |
6 |
7 |

Hello World

8 |
9 |
10 | 11 |
12 |
Hello 13 | World
14 |
15 | 16 |
17 |

Hello
World

18 |
19 | 20 |
21 |

Hello
22 | World

23 |
24 | 25 |
26 |
Hello
27 | World
28 |
29 | 30 |
31 |

Hello
World
32 |
33 | 34 |
35 |

Hello

World

36 |
37 | 38 |
39 |
40 |
One
41 | Two
42 | Three
43 |
44 |
45 | 46 |
47 |
48 |

One

49 |

Two

50 |

Three

51 |
52 |
53 | 54 |
55 |
56 | Hello World 57 |
58 |       one
59 |       two
60 |       three
61 |     
62 |
63 |
64 | 65 |
66 |
67 | Correct name 68 | 69 | Correct summary 70 |
71 |
72 | -------------------------------------------------------------------------------- /test/examples/backcompat/feed_with_rel_bookmark.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Backcompat test for hEntry with nested rel=bookmark 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 | Lee Adama 14 | Jumping Rope for Weight Loss 15 |
Some Content
16 | Nov 24, 2014 17 |
18 | 19 |
20 | Kara Thrace 21 | Abstract Art in Graffiti 22 |
More Content
23 | Nov 23, 2014 24 |
25 | 26 |
27 | President Roslyn 28 | Dreams of Earth 29 |
Additional Content
30 | Nov 21, 2014 31 |
32 | 33 |
34 | Chief Tyrrol 35 | Organized Labor in Mining Colonies 36 |
More Content
37 | Nov 19, 2014 38 |
39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /mf2py/datetime_helpers.py: -------------------------------------------------------------------------------- 1 | """helper functions to deal wit datetime strings""" 2 | 3 | import re 4 | from datetime import datetime 5 | 6 | # REGEX! 7 | 8 | DATE_RE = r"(\d{4}-\d{2}-\d{2})|(\d{4}-\d{3})" 9 | SEC_RE = r"(:(?P\d{2})(\.\d+)?)" 10 | RAWTIME_RE = r"(?P\d{1,2})(:(?P\d{2})%s?)?" % (SEC_RE) 11 | AMPM_RE = r"am|pm|a\.m\.|p\.m\.|AM|PM|A\.M\.|P\.M\." 12 | TIMEZONE_RE = r"Z|[+-]\d{1,2}:?\d{2}?" 13 | TIME_RE = r"(?P%s)( ?(?P%s))?( ?(?P%s))?" % ( 14 | RAWTIME_RE, 15 | AMPM_RE, 16 | TIMEZONE_RE, 17 | ) 18 | DATETIME_RE = r"(?P%s)(?P[T ])(?P