├── .github └── workflows │ └── tests.yml ├── .gitignore ├── CHANGELOG.md ├── CONTRIBUTING.md ├── CONTRIBUTORS.md ├── LICENSE ├── Makefile ├── README.md ├── docs ├── banner.png ├── changelog.md ├── index.md ├── parser.md └── stylesheets │ └── styles.css ├── mf2py ├── __init__.py ├── backcompat-rules │ ├── adr.json │ ├── geo.json │ ├── hentry.json │ ├── hfeed.json │ ├── hproduct.json │ ├── hrecipe.json │ ├── hresume.json │ ├── hreview-aggregate.json │ ├── hreview.json │ ├── vcard.json │ └── vevent.json ├── backcompat.py ├── datetime_helpers.py ├── dom_helpers.py ├── implied_properties.py ├── metaformats.py ├── mf2_classes.py ├── mf_helpers.py ├── parse_property.py ├── parser.py ├── temp_fixes.py ├── value_class_pattern.py └── version.py ├── mkdocs.yml ├── pyproject.toml └── test ├── examples ├── area.html ├── backcompat │ ├── feed_with_rel_bookmark.html │ ├── hentry.html │ ├── hentry_content_html.html │ ├── hentry_with_rel_bookmark.html │ ├── hentry_with_rel_tag.html │ ├── hentry_with_rel_tag_entry_title.html │ ├── hfeed_with_rel_tag.html │ ├── hproduct.html │ ├── hproduct_hreview_nested.html │ ├── hrecipe_with_rel_tag.html │ ├── hreview_hentry_with_rel_tag_bookmark.html │ ├── hreview_nested_card_event_product.html │ ├── hreview_with_rel_tag_bookmark.html │ ├── ignore_mf1_properties_in_mf2_root.html │ ├── ignore_mf1_root_if_mf2_present.html │ ├── ignore_mf2_properties_in_mf1_root.html │ ├── nested_mf1_in_mf2.html │ ├── nested_mf1_in_mf2_e_content.html │ ├── nested_mf2_in_mf1.html │ └── no_implied_properties_mf1_root.html ├── base.html ├── broken_url.html ├── class_names_format.html ├── complex_e_content.html ├── datetimes.html ├── embedded.html ├── empty.html ├── eras.html ├── festivus.html ├── filter_roots.html ├── filter_roots_custom.html ├── hcard_with_empty_url.html ├── hfeed_on_html_tag.html ├── img_with_alt.html ├── img_with_srcset.html ├── img_with_srcset_with_base.html ├── implied_properties │ ├── implied_name_alt.html │ ├── implied_name_empty_alt.html │ ├── implied_photo.html │ ├── implied_photo_relative_url.html │ ├── implied_properties.html │ ├── implied_properties_silo_pub.html │ ├── implied_relative_datetimes.html │ ├── implied_url.html │ ├── simple_person_reference_implied.html │ ├── stop_implied_name_e_content.html │ ├── stop_implied_name_nested_h.html │ ├── stop_implied_name_p_content.html │ └── stop_implied_url.html ├── language.html ├── link-rel-minimal.html ├── link_with_u-url.html ├── metaformats_html_meta.html ├── metaformats_ogp.html ├── metaformats_twitter.html ├── nested_complex_values.html ├── nested_hcards.html ├── nested_multiple_classnames.html ├── nested_values.html ├── ordering_dedup.html ├── parse_id.html ├── person_with_url.html ├── plaintext_img_whitespace.html ├── plaintext_p_whitespace.html ├── rel.html ├── rel_enclosure.html ├── relative_url_in_e.html ├── rsvp.html ├── simple_person_reference.html ├── simple_person_reference_same_element.html ├── string_stripping.html ├── tag_whitespace_inside_p_value.html ├── template_tag.html ├── template_tag_inside_e_value.html ├── test_src_equiv.html ├── u-test.html ├── u_all_cases.html ├── value_class_person.html └── value_name_whitespace.html ├── test_dom_addins.py ├── test_parser.py └── test_suite.py /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Run Python Tests 2 | on: push 3 | 4 | jobs: 5 | build-macos: 6 | strategy: 7 | matrix: 8 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 9 | runs-on: "macos-latest" 10 | steps: 11 | - name: Install md5sha1sum 12 | run: brew install md5sha1sum 13 | - uses: actions/checkout@v3 14 | with: 15 | python-version: ${{ matrix.python-version }} 16 | - name: Install Python 17 | uses: actions/setup-python@v4 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | - name: Install Poetry 21 | uses: snok/install-poetry@v1 22 | with: 23 | version: 1.5.1 24 | virtualenvs-in-project: true 25 | - name: Install dependencies 26 | run: poetry install --no-interaction --no-root 27 | - name: Install library 28 | run: poetry install --no-interaction 29 | - name: Run tests 30 | run: poetry run make tests 31 | - uses: psf/black@stable 32 | with: 33 | options: "--check --verbose" 34 | src: "./" 35 | version: "23.3" 36 | - uses: isort/isort-action@v1 37 | build-linux: 38 | strategy: 39 | matrix: 40 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 41 | runs-on: "ubuntu-latest" 42 | steps: 43 | - name: Install libxml2 44 | run: | 45 | sudo apt-get update 46 | sudo apt-get install libxml2 libxml2-dev libxslt1-dev 47 | - uses: actions/checkout@v3 48 | with: 49 | python-version: ${{ matrix.python-version }} 50 | - name: Install Python 51 | uses: actions/setup-python@v4 52 | with: 53 | python-version: ${{ matrix.python-version }} 54 | - name: Install Poetry 55 | uses: snok/install-poetry@v1 56 | with: 57 | version: 1.5.1 58 | virtualenvs-in-project: true 59 | - name: Install dependencies 60 | run: poetry install --no-interaction --no-root 61 | - name: Install library 62 | run: poetry install --no-interaction 63 | - name: Run tests 64 | run: poetry run make tests 65 | - uses: psf/black@stable 66 | with: 67 | options: "--check --verbose" 68 | src: "./" 69 | version: "23.3" 70 | - uses: isort/isort-action@v1 71 | build-windows: 72 | strategy: 73 | matrix: 74 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 75 | runs-on: "windows-latest" 76 | defaults: 77 | run: 78 | shell: bash 79 | steps: 80 | - uses: actions/checkout@v3 81 | with: 82 | python-version: ${{ matrix.python-version }} 83 | - name: Install Python 84 | uses: actions/setup-python@v4 85 | with: 86 | python-version: ${{ matrix.python-version }} 87 | - name: Install Poetry 88 | uses: snok/install-poetry@v1 89 | with: 90 | version: 1.5.1 91 | virtualenvs-in-project: true 92 | - name: Install dependencies 93 | run: poetry install --no-interaction --no-root 94 | - name: Install library 95 | run: poetry install --no-interaction 96 | - name: Run tests 97 | run: poetry run make tests 98 | - uses: psf/black@stable 99 | with: 100 | options: "--check --verbose" 101 | src: "./" 102 | version: "23.3" 103 | # - uses: isort/isort-action@v1 104 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.swp 3 | *.pyc 4 | .idea/ 5 | .eggs/ 6 | build/ 7 | dist/ 8 | local/ 9 | mf2py.egg-info/ 10 | nbproject/ 11 | venv/ 12 | *~ 13 | poetry.lock 14 | site/ 15 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. 3 | 4 | ## 2.0.1 - 2023-12-07 5 | The mf2py library is excited to transition into 2.0. This version increase incorporates months of work from contributors, informed by active discussions among implementers and users. 6 | 7 | This release officially deprecates support for versions of Python lower than 3.8. 8 | 9 | Below are the changes we have made in this release. 10 | 11 | ### New Features 12 | - Enable `img_with_alt` by default (#184) 13 | - Add timezone offset normalisation (#206) 14 | - Add option for exposing DOM for embedded properties (#208) 15 | - Add srcset support (#209) 16 | - Add language support (#210) 17 | - Add option for filtering root class names (#211) 18 | - Add option for metaformats support (#213) 19 | 20 | ### Changes 21 | - Remove `img_with_alt` option entirely (#200) 22 | - Resolve implied photo relative paths (#205) 23 | - Make relative URLs in embedded properties absolute (#201) 24 | - Fix whitespace in plaintext conversion (#207) 25 | - Replace `dict_class` with standard `dict` (#196) 26 | 27 | ### Tests, Library and Documentation Maintenance 28 | - Update tests to include alt texts by default (#190) 29 | - Add Windows and macOS tests (#198) 30 | - Use poetry for dependency management (#189) 31 | - Deprecate Python 2 support (#179) 32 | - Lint code with `black` and `isort` 33 | - Add linting CI actions (#193) 34 | - Move from `nosetests` to `pytest` (#186) 35 | - Add 3.11, 3.12 and drop pypy from test matrix; upgrade poetry action (#204) 36 | - Prepare tests to test options (#214) 37 | - Bring README doctests up-to-date (#215) 38 | 39 | ## 1.1.3 - 2022-06-28 40 | - reduce instances where photo is implied (#135) 41 | - always do relative URL resolution (#138) 42 | - VCP now handles tz offsets without leading zeros (#142) 43 | - implement id parsing (#143) 44 | - fix outdated syntax causing SyntaxWarning (#157) 45 | 46 | ## 1.1.2 - 2018-08-08 47 | - add parsing for iframe.u-*[src] (#116) 48 | - bug fix: reduced implied urls (#117) 49 | - bug fix: don't collapse whitespace between tags 50 | - specify explicit versions for dependencies 51 | - revert BeautifulSoup copying added in 1.1.1 due to bugs (eg #108) 52 | - misc performance improvements 53 | 54 | ## 1.1.1 - 2018-06-15 55 | - streamline backcompat to use JSON only. 56 | - fix multiple mf1 root rel-tag parsing 57 | - correct url and photo for hreview. 58 | - add rules for nested hreview. update backcompat to use multiple matches in old properties. 59 | - fix `rel-tag` to `p-category` conversion so that other classes are not lost. 60 | - use original authored html for `e-*` parsing in backcompat 61 | - make classes and rels into unordered (alphabetically ordered) deduped arrays. 62 | - only use class names for mf2 which follow the naming rules 63 | - fix `parse` method to use default html parser. 64 | - always use the first value for attributes for rels. 65 | - correct AM/PM conversion in datetime value class pattern. 66 | - add ordinal date parsing to datetimes value class pattern. ordinal date is normalised to YYYY-MM-DD 67 | - remove hack for html tag classes since that is fixed in new BS 68 | - better whitespace algorithm for `name` and `html.value` parsing 69 | - experimental flag for including `alt` in `u-photo` parsing 70 | - make a copy of the BeautifulSoup given by user to work on for parsing to prevent changes to original doc 71 | - bump version to 1.1.1 72 | 73 | ## 1.1.0 - 2018-03-16 74 | - bump version to 1.1.0 since it is a "major" change 75 | - added tests for new implied name rules 76 | - modified earlier tests to accommodate new rules 77 | - use space separator instead of "T" 78 | - Don't add "00" seconds unless authored 79 | - use TZ authored in separate `value` element 80 | - only use first found `value` of a particular type `date`, `time`, or `timezone`. 81 | - move backcompat rules into JSON files 82 | - reorganise value class pattern parsing into new files 83 | - add datetime_helpers to organise datetime parsing rules 84 | - reorganise tests 85 | - remove Heroku frontend, point to mf2py-web and python.microformats.io instead in README. 86 | - remove Flask and gunicorn requirements 87 | - add debug info with description, version, url and the html parser used 88 | 89 | ## 1.0.6 - 2018-03-04 90 | - strip leading/trailing white space for `e-*[html]`. update the corresponding tests 91 | - blank values explicitly authored are allowed as property values 92 | - include `alt` or `src` from `` in parsing for `p-*` and `e-*[value]` 93 | - parse `title` from `` for `p-*` resolves #84 94 | - and `poster` from `