├── .nvmrc ├── .husky └── pre-commit ├── .prettierignore ├── test ├── utils │ ├── dirname.d.ts │ ├── loadScenarios.d.ts │ ├── dirname.js │ └── loadScenarios.js ├── suites │ ├── experimental │ │ ├── metaformats-missing-head.json │ │ ├── metaformats-missing-head.html │ │ ├── lang-meta.html │ │ ├── metaformats-prefer-mf.json │ │ ├── lang-meta.json │ │ ├── lang-feed.html │ │ ├── metaformats-twitter-article.json │ │ ├── metaformats-standard.html │ │ ├── metaformats-og-article.json │ │ ├── metaformats-standard.json │ │ ├── metaformats-twitter-article.html │ │ ├── metaformats-og-audio-soundcloud.json │ │ ├── lang-feed.json │ │ ├── text-content.html │ │ ├── metaformats-og-video-vimeo.json │ │ ├── metaformats-og-profile-linkedin.json │ │ ├── text-content.json │ │ ├── metaformats-prefer-mf.html │ │ ├── metaformats-og-article.html │ │ ├── lang.html │ │ ├── lang.json │ │ ├── metaformats-og-audio-soundcloud.html │ │ ├── metaformats-og-video-vimeo.html │ │ └── metaformats-og-profile-linkedin.html │ ├── local │ │ ├── microformats-v2 │ │ │ ├── empty-property.html │ │ │ ├── implied-name.html │ │ │ ├── empty-property.json │ │ │ ├── implied-name.json │ │ │ ├── lang.html │ │ │ ├── rel-urls.html │ │ │ ├── dates.html │ │ │ ├── rel-urls.json │ │ │ ├── dates.json │ │ │ ├── lang.json │ │ │ ├── nested.html │ │ │ ├── urls.html │ │ │ ├── nested.json │ │ │ └── urls.json │ │ └── microformats-v1 │ │ │ ├── includes.json │ │ │ └── includes.html │ └── README.md ├── package.cjs.spec.js ├── package.mjs.spec.js ├── scenarios.spec.ts └── validation.spec.ts ├── src ├── helpers │ ├── array.ts │ ├── url.ts │ ├── experimental.ts │ ├── images.ts │ ├── includes.ts │ ├── findChildren.ts │ ├── attributes.ts │ ├── valueClassPattern.ts │ ├── nodeMatchers.ts │ ├── documentSetup.ts │ ├── textContent.ts │ └── metaformats.ts ├── backcompat │ ├── geo.ts │ ├── item.ts │ ├── hfeed.ts │ ├── hnews.ts │ ├── hresume.ts │ ├── hreview-aggregate.ts │ ├── adr.ts │ ├── vevent.ts │ ├── hentry.ts │ ├── hproduct.ts │ ├── hreview.ts │ ├── vcard.ts │ └── index.ts ├── index.ts ├── implied │ ├── name.ts │ ├── url.ts │ └── photo.ts ├── parser.ts ├── rels │ └── rels.ts ├── types.ts ├── microformats │ ├── properties.ts │ ├── parse.ts │ └── property.ts └── validator.ts ├── .mocharc.json ├── .editorconfig ├── .c8rc.json ├── .github ├── workflows │ ├── release-drafter.yml │ ├── publish.yaml │ ├── dependabot.yml │ └── build_and_test.yaml ├── ISSUE_TEMPLATE │ ├── parsing-bug-report.md │ └── feature_request.md ├── dependabot.yml ├── PULL_REQUEST_TEMPLATE.md └── release-drafter.yml ├── tsconfig.json ├── .eslintrc.json ├── LICENSE ├── demo ├── demo.js ├── demo.css └── index.tpl.html ├── .gitignore ├── package.json ├── rollup.config.js ├── README.md └── CONTRIBUTING.md /.nvmrc: -------------------------------------------------------------------------------- 1 | 20 2 | -------------------------------------------------------------------------------- /.husky/pre-commit: -------------------------------------------------------------------------------- 1 | yarn lint-staged 2 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | .nyc_output/ 3 | .cache 4 | -------------------------------------------------------------------------------- /test/utils/dirname.d.ts: -------------------------------------------------------------------------------- 1 | export function dirname(url: string): string; 2 | -------------------------------------------------------------------------------- /src/helpers/array.ts: -------------------------------------------------------------------------------- 1 | export const flatten = (prev: T[], curr: T[]): T[] => [...prev, ...curr]; 2 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-missing-head.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [], 3 | "rels": {}, 4 | "rel-urls": {} 5 | } 6 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/empty-property.html: -------------------------------------------------------------------------------- 1 |
2 | 3 |
4 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/implied-name.html: -------------------------------------------------------------------------------- 1 | 2 | Author 3 | 4 | 5 | -------------------------------------------------------------------------------- /.mocharc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extension": ["ts", "mjs", "cjs", "js"], 3 | "node-option": [ 4 | "experimental-specifier-resolution=node", 5 | "loader=ts-node/esm" 6 | ] 7 | } 8 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | end_of_line = lf 7 | insert_final_newline = true 8 | trim_trailing_whitespace = true 9 | charset = utf-8 -------------------------------------------------------------------------------- /test/utils/loadScenarios.d.ts: -------------------------------------------------------------------------------- 1 | interface Scenario { 2 | name: string; 3 | input: string; 4 | expected: string; 5 | } 6 | 7 | export function loadScenarios(baseDir: string, dir: string): Scenario[]; 8 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v1/includes.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { "properties": {}, "type": ["h-card"] }, 4 | { "properties": {}, "type": ["h-card"] } 5 | ], 6 | "rel-urls": {}, 7 | "rels": {} 8 | } 9 | -------------------------------------------------------------------------------- /test/utils/dirname.js: -------------------------------------------------------------------------------- 1 | import { fileURLToPath } from "url"; 2 | import path from "path"; 3 | 4 | export function dirname(url) { 5 | const __filename = fileURLToPath(url); 6 | return path.dirname(__filename); 7 | } 8 | -------------------------------------------------------------------------------- /.c8rc.json: -------------------------------------------------------------------------------- 1 | { 2 | "all": true, 3 | "check-coverage": true, 4 | "include": ["src/**"], 5 | "exclude": ["src/types.ts"], 6 | "statements": 99, 7 | "branches": 98.5, 8 | "functions": 100, 9 | "lines": 99 10 | } 11 | -------------------------------------------------------------------------------- /src/backcompat/geo.ts: -------------------------------------------------------------------------------- 1 | import { Backcompat } from "../types"; 2 | 3 | export const geo: Backcompat = { 4 | type: ["h-geo"], 5 | properties: { 6 | latitude: "p-latitude", 7 | longitude: "p-longitude", 8 | }, 9 | }; 10 | -------------------------------------------------------------------------------- /src/backcompat/item.ts: -------------------------------------------------------------------------------- 1 | import { Backcompat } from "../types"; 2 | 3 | export const item: Backcompat = { 4 | type: ["h-item"], 5 | properties: { 6 | fn: "p-name", 7 | photo: "u-photo", 8 | url: "u-url", 9 | }, 10 | }; 11 | -------------------------------------------------------------------------------- /src/helpers/url.ts: -------------------------------------------------------------------------------- 1 | export const isLocalLink = (link: string): boolean => 2 | !link.includes("://") && !link.startsWith("#"); 3 | 4 | export const applyBaseUrl = (link: string, baseUrl: string): string => 5 | new URL(link, baseUrl).toString(); 6 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/empty-property.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "type": ["h-entry"], 5 | "properties": { 6 | "name": [""] 7 | } 8 | } 9 | ], 10 | "rels": {}, 11 | "rel-urls": {} 12 | } 13 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-missing-head.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |

Missing Head

5 |

Shouldn't return any items if properties are not found.

6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /src/backcompat/hfeed.ts: -------------------------------------------------------------------------------- 1 | import { Backcompat } from "../types"; 2 | 3 | export const hfeed: Backcompat = { 4 | type: ["h-feed"], 5 | properties: { 6 | author: "p-author", 7 | photo: "u-photo", 8 | url: "u-url", 9 | }, 10 | rels: { 11 | tag: "p-category", 12 | }, 13 | }; 14 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/implied-name.json: -------------------------------------------------------------------------------- 1 | { 2 | "rels": {}, 3 | "rel-urls": {}, 4 | "items": [ 5 | { 6 | "type": ["h-card"], 7 | "properties": { 8 | "photo": ["http://example.com/photo.jpg"], 9 | "name": ["Author"] 10 | } 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /test/suites/experimental/lang-meta.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |
Esta prueba está en español.
7 |
This test is in English.
8 | 9 | 10 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v1/includes.html: -------------------------------------------------------------------------------- 1 |
2 | Example vard 3 |
4 | 5 |
6 | Example vard 7 |
8 | 9 | 10 | Name 11 | 12 | -------------------------------------------------------------------------------- /src/backcompat/hnews.ts: -------------------------------------------------------------------------------- 1 | import { Backcompat } from "../types"; 2 | 3 | export const hnews: Backcompat = { 4 | type: ["h-news"], 5 | properties: { 6 | entry: "p-entry", 7 | "source-org": "p-source-org", 8 | dateline: "p-dateline", 9 | geo: "p-geo", 10 | }, 11 | rels: { 12 | principles: "u-principles", 13 | }, 14 | }; 15 | -------------------------------------------------------------------------------- /src/helpers/experimental.ts: -------------------------------------------------------------------------------- 1 | import { ExperimentalName, ParserOptions } from "../types"; 2 | 3 | export const isEnabled = ( 4 | options: ParserOptions, 5 | flag: ExperimentalName, 6 | ): boolean => { 7 | if (!options || !options.experimental) { 8 | return false; 9 | } 10 | 11 | return options.experimental[flag] || false; 12 | }; 13 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/lang.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 |

En svensk titel

5 |
With an english summary
6 |
Och svensk huvudtext
7 |
8 | -------------------------------------------------------------------------------- /src/backcompat/hresume.ts: -------------------------------------------------------------------------------- 1 | import { Backcompat } from "../types"; 2 | 3 | export const hresume: Backcompat = { 4 | type: ["h-resume"], 5 | properties: { 6 | contact: "p-contact", 7 | experience: "p-experience", 8 | summary: "p-summary", 9 | skill: "p-skill", 10 | education: "p-education", 11 | affiliation: "p-affiliation", 12 | }, 13 | }; 14 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/rel-urls.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | My name 4 | 5 | Go back home 6 | 7 | This URL should be trimmed 8 | 9 | This should be ignored 10 | -------------------------------------------------------------------------------- /src/backcompat/hreview-aggregate.ts: -------------------------------------------------------------------------------- 1 | import { Backcompat } from "../types"; 2 | 3 | export const hreviewAggregate: Backcompat = { 4 | type: ["h-review-aggregate"], 5 | properties: { 6 | rating: "p-rating", 7 | average: "p-average", 8 | best: "p-best", 9 | count: "p-count", 10 | item: "p-item", 11 | url: "u-url", 12 | fn: "p-name", 13 | }, 14 | }; 15 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/dates.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | 7pm 4 | 5 |
6 | 7 |
8 | 9 | on 10 | 11 | 12 |
13 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/rel-urls.json: -------------------------------------------------------------------------------- 1 | { 2 | "rels": { 3 | "me": ["http://example.com"], 4 | "home": ["http://example.com"], 5 | "example": ["http://example.com"] 6 | }, 7 | "rel-urls": { 8 | "http://example.com": { 9 | "rels": ["example", "home", "me"], 10 | "text": "My name", 11 | "type": "text/html" 12 | } 13 | }, 14 | "items": [] 15 | } 16 | -------------------------------------------------------------------------------- /src/backcompat/adr.ts: -------------------------------------------------------------------------------- 1 | import { Backcompat } from "../types"; 2 | 3 | export const adr: Backcompat = { 4 | type: ["h-adr"], 5 | properties: { 6 | "country-name": "p-country-name", 7 | locality: "p-locality", 8 | region: "p-region", 9 | "street-address": "p-street-address", 10 | "postal-code": "p-postal-code", 11 | "extended-address": "p-extended-address", 12 | }, 13 | }; 14 | -------------------------------------------------------------------------------- /src/backcompat/vevent.ts: -------------------------------------------------------------------------------- 1 | import { Backcompat } from "../types"; 2 | 3 | export const vevent: Backcompat = { 4 | type: ["h-event"], 5 | properties: { 6 | summary: "p-name", 7 | dtstart: "dt-start", 8 | dtend: "dt-end", 9 | duration: "dt-duration", 10 | description: "p-description", 11 | attendee: "p-attendee", 12 | location: "p-location", 13 | url: "u-url", 14 | }, 15 | }; 16 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-prefer-mf.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "type": ["h-entry"], 5 | "lang": "en", 6 | "properties": { 7 | "name": ["Microformats"], 8 | "content": ["True microformats should prevent metaformats"], 9 | "photo": ["http://example.com/mf-image.png"] 10 | } 11 | } 12 | ], 13 | "rels": {}, 14 | "rel-urls": {} 15 | } 16 | -------------------------------------------------------------------------------- /src/backcompat/hentry.ts: -------------------------------------------------------------------------------- 1 | import { Backcompat } from "../types"; 2 | 3 | export const hentry: Backcompat = { 4 | type: ["h-entry"], 5 | properties: { 6 | author: "p-author", 7 | "entry-content": "e-content", 8 | "entry-summary": "p-summary", 9 | "entry-title": "p-name", 10 | updated: "dt-updated", 11 | }, 12 | rels: { 13 | bookmark: "u-url", 14 | tag: "p-category", 15 | }, 16 | }; 17 | -------------------------------------------------------------------------------- /src/backcompat/hproduct.ts: -------------------------------------------------------------------------------- 1 | import { Backcompat } from "../types"; 2 | 3 | export const hproduct: Backcompat = { 4 | type: ["h-product"], 5 | properties: { 6 | price: "p-price", 7 | description: "p-description", 8 | fn: "p-name", 9 | review: "p-review", 10 | brand: "p-brand", 11 | url: "u-url", 12 | photo: "u-photo", 13 | }, 14 | rels: { 15 | tag: "p-category", 16 | }, 17 | }; 18 | -------------------------------------------------------------------------------- /src/backcompat/hreview.ts: -------------------------------------------------------------------------------- 1 | import { Backcompat } from "../types"; 2 | 3 | export const hreview: Backcompat = { 4 | type: ["h-review"], 5 | properties: { 6 | item: "p-item", 7 | rating: "p-rating", 8 | reviewer: "p-author", 9 | summary: "p-name", 10 | url: "u-url", 11 | description: "e-content", 12 | }, 13 | rels: { 14 | bookmark: "u-url", 15 | tag: "p-category", 16 | }, 17 | }; 18 | -------------------------------------------------------------------------------- /test/suites/experimental/lang-meta.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "type": ["h-entry"], 5 | "lang": "es", 6 | "properties": { 7 | "name": ["Esta prueba está en español."] 8 | } 9 | }, 10 | { 11 | "type": ["h-entry"], 12 | "lang": "en", 13 | "properties": { 14 | "name": ["This test is in English."] 15 | } 16 | } 17 | ], 18 | "rels": {}, 19 | "rel-urls": {} 20 | } 21 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release drafter 2 | on: 3 | push: 4 | branches: [main] 5 | pull_request: 6 | types: [opened, reopened, synchronize] 7 | 8 | jobs: 9 | update_release_draft: 10 | permissions: 11 | contents: write 12 | pull-requests: write 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: release-drafter/release-drafter@v6 16 | env: 17 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 18 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import { parser } from "./parser"; 2 | import { validator } from "./validator"; 3 | import { ParsedDocument } from "./types"; 4 | 5 | export interface Options { 6 | baseUrl: string; 7 | experimental?: { 8 | lang?: boolean; 9 | textContent?: boolean; 10 | metaformats?: boolean; 11 | }; 12 | } 13 | 14 | export const mf2 = (html: string, options: Options): ParsedDocument => { 15 | validator(html, options); 16 | return parser(html, options); 17 | }; 18 | -------------------------------------------------------------------------------- /test/suites/experimental/lang-feed.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 |

Test Feed

6 |
This test is in English.
7 |
Esta prueba está en español.
8 |
Ce test est en français.
9 |
10 | 11 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/dates.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "type": ["h-entry"], 5 | "properties": { 6 | "name": ["7pm"], 7 | "end": ["19:00"] 8 | } 9 | }, 10 | { 11 | "type": ["h-entry"], 12 | "properties": { 13 | "name": ["23:24 on\n 2020-03-20"], 14 | "published": ["2020-03-20 23:24-0700"], 15 | "updated": ["2020-03-20 23:24-0700"] 16 | } 17 | } 18 | ], 19 | "rels": {}, 20 | "rel-urls": {} 21 | } 22 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2018", 4 | "lib": ["es2019.array", "DOM"], 5 | "module": "esnext", 6 | "moduleResolution": "node", 7 | "declaration": true, 8 | "outDir": "./types", 9 | "noEmit": true, 10 | "strict": true, 11 | "noUnusedLocals": true, 12 | "noUnusedParameters": true, 13 | "noImplicitReturns": true, 14 | "noFallthroughCasesInSwitch": true, 15 | "esModuleInterop": true, 16 | "forceConsistentCasingInFileNames": true 17 | }, 18 | "include": ["src"] 19 | } 20 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/lang.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "type": ["h-entry"], 5 | "id": "postfrag123", 6 | "properties": { 7 | "name": ["En svensk titel"], 8 | "content": [ 9 | { 10 | "html": "With an english summary", 11 | "value": "With an english summary" 12 | }, 13 | { 14 | "html": "Och svensk huvudtext", 15 | "value": "Och svensk huvudtext" 16 | } 17 | ] 18 | } 19 | } 20 | ], 21 | "rels": {}, 22 | "rel-urls": {} 23 | } 24 | -------------------------------------------------------------------------------- /src/helpers/images.ts: -------------------------------------------------------------------------------- 1 | import { Element } from "../types"; 2 | 3 | import { getAttributeValue } from "./attributes"; 4 | import { Image, ParsingOptions } from "../types"; 5 | 6 | export const parseImage = ( 7 | node: Element, 8 | { inherited }: Partial = {}, 9 | ): Image | string | undefined => { 10 | if (node.tagName !== "img") { 11 | return; 12 | } 13 | 14 | const alt = 15 | (!inherited || !inherited.roots || !inherited.roots.length) && 16 | getAttributeValue(node, "alt"); 17 | const value = getAttributeValue(node, "src"); 18 | return alt ? { alt, value } : value; 19 | }; 20 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/nested.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |

Jane Doe

4 |
5 |
6 | 7 |
8 | My name 9 |
10 | 11 |
12 | 13 |
14 | 15 |
16 |
17 |
18 | 19 |
20 |
21 | 22 |
23 |
24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/parsing-bug-report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Parsing bug report 3 | about: Create a bug report for when microformats are incorrectly parsed 4 | title: "" 5 | labels: bug 6 | assignees: "" 7 | --- 8 | 9 | **Describe the bug** 10 | 11 | A clear and concise description of what the bug is. Please include a reference to the specification, other discussions or other parser behaviour. 12 | 13 | **To Reproduce** 14 | 15 | HTML input: 16 | 17 | ```html 18 | 19 | ``` 20 | 21 | **Expected behavior** 22 | 23 | Correct JSON output: 24 | 25 | ```json 26 | 27 | ``` 28 | 29 | **Additional context** 30 | 31 | Add any other context about the problem here. 32 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-twitter-article.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "type": ["h-entry"], 5 | "lang": "en", 6 | "properties": { 7 | "name": ["Title 4 Twitter"], 8 | "summary": ["Twitter tags are used if no OGP tags are found."], 9 | "featured": [ 10 | { 11 | "value": "http://example.com/twitter-image.png", 12 | "alt": "This is alt text for an image. " 13 | } 14 | ], 15 | "published": ["2015-05-29"], 16 | "updated": ["2016-02-14"], 17 | "author": ["Glenn Jones"] 18 | } 19 | } 20 | ], 21 | "rels": {}, 22 | "rel-urls": {} 23 | } 24 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-standard.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Standard Meta Tags 6 | 7 | 8 | 9 | 10 | 14 | 15 | 16 | 17 |

Test

18 | 19 | 20 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-og-article.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "type": ["h-entry"], 5 | "lang": "en", 6 | "properties": { 7 | "name": ["Open Graph Protocol"], 8 | "url": ["https://ogp.me/"], 9 | "summary": ["OG is preferred over other meta tags"], 10 | "featured": [ 11 | { 12 | "value": "http://example.com/image.png", 13 | "alt": "Alt text for image. " 14 | } 15 | ], 16 | "published": ["2015-05-29"], 17 | "updated": ["2016-02-14"], 18 | "author": ["Glenn Jones", "Ghost Writer"] 19 | } 20 | } 21 | ], 22 | "rels": {}, 23 | "rel-urls": {} 24 | } 25 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-standard.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "type": ["h-entry"], 5 | "lang": "en", 6 | "properties": { 7 | "name": ["Standard Meta Tags"], 8 | "summary": ["Standard meta tags are also parsed"], 9 | "published": ["2023-08-02"], 10 | "url": ["https://microformats.org/wiki/rel-canonical"], 11 | "author": ["Ted Lasso"], 12 | "publication": ["microformats-parser"] 13 | } 14 | } 15 | ], 16 | "rels": { 17 | "canonical": ["https://microformats.org/wiki/rel-canonical"] 18 | }, 19 | "rel-urls": { 20 | "https://microformats.org/wiki/rel-canonical": { 21 | "rels": ["canonical"], 22 | "text": "" 23 | } 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-twitter-article.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 |

Test

18 | 19 | 20 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: npm 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "weekly" 12 | versioning-strategy: increase-if-necessary 13 | commit-message: 14 | prefix: "chore(deps): " 15 | - package-ecosystem: "github-actions" 16 | directory: "/" 17 | schedule: 18 | interval: "weekly" 19 | commit-message: 20 | prefix: "chore(deps): " 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for new microformats support 4 | title: "" 5 | labels: enhancement 6 | assignees: "" 7 | --- 8 | 9 | **What type of feature is it?** 10 | 11 | Is it an experimental feature or a new addition to the specification? 12 | 13 | **Describe the feature** 14 | 15 | Please provide a couple of sentences describing what will change with this feature. 16 | 17 | **Example of input** 18 | 19 | Provide clear examples of input HTML that covers the proposed feature. 20 | 21 | ```html 22 | 23 | ``` 24 | 25 | **Example of output** 26 | 27 | Please provide the expected JSON output for the provided HTML. 28 | 29 | ```json 30 | 31 | ``` 32 | 33 | **Additional context** 34 | 35 | Add any other context or information about the feature request here. 36 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Fixes # . 2 | 3 | **Checklist** 4 | 5 | 6 | 7 | - [ ] Added validaton to any changes in the parser API. 8 | - [ ] Added tests covering the parsing behaviour changes. 9 | - [ ] Linked to any relevant issues this will close. 10 | - [ ] Tested the output using the [demo](../CONTRIBUTING.md#testing-your-changes). 11 | 12 | **Changes to parsing behaviour** 13 | 14 | 15 | 16 | A brief summary of any changes to the parser behaviour. 17 | 18 | **Example input covered by new behaviour** 19 | 20 | 21 | 22 | ```html 23 | 24 | ``` 25 | 26 | **Example output from new behaviour** 27 | 28 | 29 | 30 | ```json 31 | 32 | ``` 33 | 34 | **Other changes** 35 | 36 | 37 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-og-audio-soundcloud.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "lang": "en", 5 | "properties": { 6 | "name": ["Over The Moon"], 7 | "featured": [ 8 | "https://i1.sndcdn.com/artworks-92VWfGsSB6dA-0-t500x500.jpg" 9 | ], 10 | "publication": ["SoundCloud"], 11 | "summary": [ 12 | "Surprise Chef’s music is based on evoking mood; their vivid arrangements utilize time and space to build soundscapes that invite the listener into their world. The quintet’s distinct sound pulls from " 13 | ], 14 | "audio": ["https://soundcloud.com/surprisechef/over-the-moon.mp3"], 15 | "url": ["https://soundcloud.com/surprisechef/over-the-moon"] 16 | }, 17 | "type": ["h-cite"] 18 | } 19 | ], 20 | "rels": {}, 21 | "rel-urls": {} 22 | } 23 | -------------------------------------------------------------------------------- /test/suites/README.md: -------------------------------------------------------------------------------- 1 | # Test suites 2 | 3 | We use the [microformats test suite](https://github.com/microformats/tests) to test this package. Occasionally it can miss some test cases, or not provide 100% coverage of the code. To fill in these gaps, we have extra test cases defined here. 4 | 5 | ## Test folders 6 | 7 | We have 2 main test folders: 8 | 9 | - `local` - additional tests to compliment the global test suite. 10 | - `experimental` - tests designed to cover experimental features that are not yet part of the microformats specification. 11 | 12 | ## Adding a test 13 | 14 | 1. Create a HTML document in the appropriate containing folder with the input for the test. 15 | 2. Create a JSON document in the same folder with the expected test output 16 | 17 | We can run multiple tests cases within a file, so if there's a file you can add your case to, please do. 18 | -------------------------------------------------------------------------------- /test/suites/experimental/lang-feed.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "type": ["h-feed"], 5 | "lang": "en", 6 | "properties": { 7 | "name": ["Test Feed"] 8 | }, 9 | "children": [ 10 | { 11 | "type": ["h-entry"], 12 | "lang": "en", 13 | "properties": { 14 | "name": ["This test is in English."] 15 | } 16 | }, 17 | { 18 | "type": ["h-entry"], 19 | "lang": "es", 20 | "properties": { 21 | "name": ["Esta prueba está en español."] 22 | } 23 | }, 24 | { 25 | "type": ["h-entry"], 26 | "lang": "fr", 27 | "properties": { 28 | "name": ["Ce test est en français."] 29 | } 30 | } 31 | ] 32 | } 33 | ], 34 | "rels": {}, 35 | "rel-urls": {} 36 | } 37 | -------------------------------------------------------------------------------- /test/utils/loadScenarios.js: -------------------------------------------------------------------------------- 1 | import { glob } from "glob"; 2 | import path from "path"; 3 | import { readFileSync } from "fs"; 4 | 5 | export const loadScenarios = (baseDir, dir) => { 6 | const scenarios = glob 7 | .sync(`${baseDir}/${dir}/**/*.json`) 8 | .map((testFile) => path.relative(path.join(baseDir, dir), testFile)) 9 | .map((testFile) => testFile.replace(".json", "")) 10 | .map((name) => { 11 | const inputPath = path.resolve(baseDir, dir, `${name}.html`); 12 | const expectedPath = path.resolve(baseDir, dir, `${name}.json`); 13 | 14 | return { 15 | name, 16 | input: readFileSync(inputPath, "utf8"), 17 | expected: JSON.parse(readFileSync(expectedPath, "utf8")), 18 | }; 19 | }); 20 | 21 | if (!scenarios.length) { 22 | throw new Error("No scenarios found"); 23 | } 24 | 25 | return scenarios; 26 | }; 27 | -------------------------------------------------------------------------------- /test/suites/experimental/text-content.html: -------------------------------------------------------------------------------- 1 |
2 | 3 | Berlin, 4 | Berlin, 5 | DE 6 | 7 | 8 | 9 |
10 | 11 |
12 |

13 | 16 | 21 | Bob Smith 22 |

23 |
24 | Senior 25 |

26 | General 27 |
28 | Waste 29 |

30 |
31 |
32 | 33 |
34 |

35 | Bob Smith 36 |

37 |
38 | 39 | 40 | 41 | Joe Bloggs 42 | 43 | -------------------------------------------------------------------------------- /src/helpers/includes.ts: -------------------------------------------------------------------------------- 1 | import { Element } from "../types"; 2 | 3 | import { 4 | isMicroformatV2Root, 5 | isElement, 6 | isMicroformatRoot, 7 | } from "./nodeMatchers"; 8 | import { ParsingOptions } from "../types"; 9 | import { getV1IncludeNames } from "../backcompat"; 10 | 11 | const applyIncludes = (node: Element, options: ParsingOptions): void => { 12 | const includeNames = getV1IncludeNames(node); 13 | 14 | includeNames.forEach((name) => { 15 | const include = options.idRefs[name]; 16 | if (include) { 17 | node.childNodes.push(include); 18 | } 19 | }); 20 | 21 | node.childNodes.forEach( 22 | (child) => 23 | isElement(child) && 24 | !isMicroformatRoot(child) && 25 | applyIncludes(child, options), 26 | ); 27 | }; 28 | 29 | export const applyIncludesToRoot = ( 30 | node: Element, 31 | options: ParsingOptions, 32 | ): void => { 33 | if (isMicroformatV2Root(node)) { 34 | return; 35 | } 36 | 37 | applyIncludes(node, options); 38 | }; 39 | -------------------------------------------------------------------------------- /.github/workflows/publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish NPM package 2 | 3 | on: 4 | release: 5 | types: 6 | - released 7 | 8 | jobs: 9 | publish: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v4 13 | - uses: actions/setup-node@v4 14 | with: 15 | node-version-file: ".nvmrc" 16 | registry-url: https://registry.npmjs.org/ 17 | - name: Install dependencies 18 | run: yarn 19 | - name: Set correct version 20 | run: npm --no-git-tag-version version $VERSION 21 | env: 22 | VERSION: ${{ github.event.release.tag_name }} 23 | - name: Build package 24 | run: yarn build 25 | - name: Publish to NPM 26 | run: yarn publish --non-interactive 27 | env: 28 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 29 | - name: Deploy pages 30 | uses: peaceiris/actions-gh-pages@v4 31 | with: 32 | github_token: ${{ secrets.GITHUB_TOKEN }} 33 | publish_dir: ./public 34 | -------------------------------------------------------------------------------- /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "parser": "@typescript-eslint/parser", 3 | "extends": [ 4 | "plugin:@typescript-eslint/recommended", 5 | "plugin:import/errors", 6 | "plugin:import/typescript", 7 | "prettier" 8 | ], 9 | "ignorePatterns": ["node_modules/", "dist/", "public/", "**/*.html"], 10 | "settings": { 11 | "import/resolver": { 12 | "node": { "extensions": [".ts"] } 13 | } 14 | }, 15 | "rules": { 16 | "arrow-body-style": ["error", "as-needed"], 17 | "import/order": [ 18 | "error", 19 | { 20 | "groups": [["builtin", "external", "internal"]], 21 | "newlines-between": "always-and-inside-groups" 22 | } 23 | ] 24 | }, 25 | "overrides": [ 26 | { 27 | "files": ["./demo/**/*.js", "./rollup.config.js"], 28 | "rules": { 29 | "@typescript-eslint/no-var-requires": "off", 30 | "@typescript-eslint/explicit-function-return-type": "off", 31 | "@typescript-eslint/explicit-module-boundary-types": "off" 32 | } 33 | } 34 | ] 35 | } 36 | -------------------------------------------------------------------------------- /src/backcompat/vcard.ts: -------------------------------------------------------------------------------- 1 | import { Backcompat } from "../types"; 2 | 3 | export const vcard: Backcompat = { 4 | type: ["h-card"], 5 | properties: { 6 | fn: "p-name", 7 | url: "u-url", 8 | org: "p-org", 9 | adr: "p-adr", 10 | tel: "p-tel", 11 | title: "p-job-title", 12 | email: "u-email", 13 | photo: "u-photo", 14 | agent: "p-agent", 15 | "family-name": "p-family-name", 16 | "given-name": "p-given-name", 17 | "additional-name": "p-additional-name", 18 | "honorific-prefix": "p-honorific-prefix", 19 | "honorific-suffix": "p-honorific-suffix", 20 | key: "p-key", 21 | label: "p-label", 22 | logo: "u-logo", 23 | mailer: "p-mailer", 24 | nickname: "p-nickname", 25 | note: "p-note", 26 | sound: "u-sound", 27 | geo: "p-geo", 28 | bday: "dt-bday", 29 | class: "p-class", 30 | rev: "p-rev", 31 | role: "p-role", 32 | "sort-string": "p-sort-string", 33 | tz: "p-tz", 34 | uid: "u-uid", 35 | }, 36 | rels: { 37 | tag: "p-category", 38 | }, 39 | }; 40 | -------------------------------------------------------------------------------- /.github/workflows/dependabot.yml: -------------------------------------------------------------------------------- 1 | name: Dependabot auto-approve 2 | on: pull_request 3 | 4 | permissions: 5 | contents: write 6 | pull-requests: write 7 | 8 | jobs: 9 | dependabot: 10 | runs-on: ubuntu-latest 11 | if: ${{ github.actor == 'dependabot[bot]' }} 12 | steps: 13 | - name: Dependabot metadata 14 | id: metadata 15 | uses: dependabot/fetch-metadata@v2 16 | with: 17 | github-token: "${{ secrets.GITHUB_TOKEN }}" 18 | - name: Approve PR 19 | run: gh pr review --approve "$PR_URL" 20 | env: 21 | PR_URL: ${{github.event.pull_request.html_url}} 22 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 23 | - name: Auto-merge PR 24 | if: ${{steps.metadata.outputs.update-type == 'version-update:semver-patch'}} 25 | run: | 26 | gh pr review --approve "$PR_URL" | 27 | gh pr merge --auto --squash "$PR_URL" 28 | env: 29 | PR_URL: ${{github.event.pull_request.html_url}} 30 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 31 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-og-video-vimeo.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "lang": "en", 5 | "type": ["h-cite"], 6 | "properties": { 7 | "name": ["Ultromedia Please (Interactive)"], 8 | "url": ["https://vimeo.com/789006133"], 9 | "summary": [ 10 | "Ultromedia Please is a winner of the 2022 Best of the Year award. To explore the full list of winners, check out vimeo.com/bestoftheyear What begins as a helpful…" 11 | ], 12 | "featured": [ 13 | { 14 | "value": "https://i.vimeocdn.com/video/1586931541-9f193de8dc4391b9676499e272f48c10669bc145876d549fb70c917c0cb1a7dd-d", 15 | "alt": "Image alt text." 16 | } 17 | ], 18 | "video": [ 19 | "https://player.vimeo.com/video/789006133?autoplay=1&h=82e9bae2d0" 20 | ], 21 | "publication": ["Vimeo"] 22 | } 23 | } 24 | ], 25 | "rels": { 26 | "canonical": ["https://vimeo.com/789006133"] 27 | }, 28 | "rel-urls": { 29 | "https://vimeo.com/789006133": { 30 | "rels": ["canonical"], 31 | "text": "" 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Aimee Gamble-Milner 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-og-profile-linkedin.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "lang": "en", 5 | "type": ["h-card"], 6 | "properties": { 7 | "name": [ 8 | "Tantek Çelik - Web Standards Lead - Mozilla Corporation | LinkedIn" 9 | ], 10 | "summary": [ 11 | "OG: View Tantek Çelik’s profile on LinkedIn, the world’s largest professional community. Tantek has 9 jobs listed on their profile. See the complete profile on LinkedIn and discover Tantek’s connections and jobs at similar companies." 12 | ], 13 | "featured": [ 14 | "https://media.licdn.com/dms/image/C4E03AQFCJlpMj8yLiA/profile-displayphoto-shrink_800_800/0/1516166857593?e=2147483647&v=beta&t=JKdFRqEQmtFMizqPGG-yegxmy0vCycdFZMDVS4elPSY" 15 | ], 16 | "given-name": ["Tantek"], 17 | "family-name": ["Çelik"], 18 | "url": ["https://www.linkedin.com/in/tantek"] 19 | } 20 | } 21 | ], 22 | "rels": { 23 | "canonical": ["https://www.linkedin.com/in/tantek"] 24 | }, 25 | "rel-urls": { 26 | "https://www.linkedin.com/in/tantek": { 27 | "rels": ["canonical"], 28 | "text": "" 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /test/suites/experimental/text-content.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "type": ["h-entry"], 5 | "properties": { 6 | "location": [ 7 | { 8 | "type": ["h-adr"], 9 | "properties": { 10 | "locality": ["Berlin"], 11 | "region": ["Berlin"], 12 | "country-name": ["DE"], 13 | "latitude": ["52.518606"], 14 | "longitude": ["13.376127"] 15 | }, 16 | "value": "Berlin, Berlin, DE" 17 | } 18 | ] 19 | } 20 | }, 21 | { 22 | "type": ["h-card"], 23 | "properties": { 24 | "name": ["Bob Smith"], 25 | "title": ["Senior\nGeneral\nWaste"] 26 | } 27 | }, 28 | { 29 | "type": ["h-card"], 30 | "properties": { 31 | "name": ["Bob Smith"], 32 | "photo": [ 33 | { 34 | "alt": "Bob Smith", 35 | "value": "http://example.com/profile-picture.jpg" 36 | } 37 | ] 38 | } 39 | } 40 | ], 41 | "rels": { 42 | "me": ["http://example.com/"] 43 | }, 44 | "rel-urls": { 45 | "http://example.com/": { 46 | "rels": ["me"], 47 | "text": "Joe Bloggs" 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name-template: "v$RESOLVED_VERSION" 2 | tag-template: "v$RESOLVED_VERSION" 3 | template: | 4 | ## What's Changed 5 | 6 | $CHANGES 7 | categories: 8 | - title: Breaking changes 9 | labels: 10 | - major 11 | - breaking change 12 | - title: Features 13 | labels: 14 | - enhancement 15 | - minor 16 | - title: Bug fixes 17 | labels: 18 | - bug 19 | - title: Maintenance 20 | labels: 21 | - maintenance 22 | - documentation 23 | version-resolver: 24 | major: 25 | labels: 26 | - major 27 | - breaking change 28 | minor: 29 | labels: 30 | - minor 31 | - enhancement 32 | patch: 33 | labels: 34 | - bug 35 | - maintenance 36 | default: patch 37 | autolabeler: 38 | - label: bug 39 | title: 40 | - "/fix/i" 41 | - label: maintenance 42 | title: 43 | - "/perf/i" 44 | - "/refactor/i" 45 | - "/style/i" 46 | - "/test/i" 47 | - "/build/i" 48 | - "/chore/i" 49 | - "/ci/i" 50 | - label: enhancement 51 | title: 52 | - "/feat/i" 53 | - label: breaking change 54 | title: 55 | - "/breaking change/i" 56 | - label: documentation 57 | title: 58 | - "/docs/i" 59 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-prefer-mf.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 |

Microformats

21 |

True microformats should prevent metaformats

22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /src/implied/name.ts: -------------------------------------------------------------------------------- 1 | import { impliedTextContent } from "../helpers/textContent"; 2 | import { isElement } from "../helpers/nodeMatchers"; 3 | import { getClassNames, getAttributeIfTag } from "../helpers/attributes"; 4 | import { ParsingOptions, Element } from "../types"; 5 | 6 | const parseNode = (node: Element): string | undefined => 7 | getAttributeIfTag(node, ["img", "area"], "alt") ?? 8 | getAttributeIfTag(node, ["abbr"], "title"); 9 | 10 | const parseChild = (node: Element): string | undefined => { 11 | const children = node.childNodes.filter(isElement); 12 | return children.length ? parseNode(children[0]) : undefined; 13 | }; 14 | 15 | const parseGrandchild = (node: Element): string | undefined => { 16 | const children = node.childNodes.filter(isElement); 17 | return children.length === 1 ? parseChild(children[0]) : undefined; 18 | }; 19 | 20 | export const impliedName = ( 21 | node: Element, 22 | children: Element[], 23 | options: ParsingOptions, 24 | ): string | undefined => { 25 | if (children.some((child) => getClassNames(child, /^(p|e|h)-/).length)) { 26 | return; 27 | } 28 | 29 | return ( 30 | parseNode(node) ?? 31 | parseChild(node) ?? 32 | parseGrandchild(node) ?? 33 | impliedTextContent(node, options) 34 | ); 35 | }; 36 | -------------------------------------------------------------------------------- /src/implied/url.ts: -------------------------------------------------------------------------------- 1 | import { Element } from "../types"; 2 | 3 | import { getClassNames, getAttributeIfTag } from "../helpers/attributes"; 4 | import { isElement, isMicroformatV2Root } from "../helpers/nodeMatchers"; 5 | 6 | const parseNode = (node: Element): string | undefined => 7 | getAttributeIfTag(node, ["a", "area"], "href"); 8 | 9 | const parseChild = (node: Element): string | undefined => { 10 | const children = node.childNodes.filter(isElement); 11 | const a = children.filter((child) => child.tagName === "a"); 12 | const area = children.filter((child) => child.tagName === "area"); 13 | 14 | for (const list of [a, area]) { 15 | if (list.length === 1 && !isMicroformatV2Root(list[0])) { 16 | return parseNode(list[0]); 17 | } 18 | } 19 | 20 | return; 21 | }; 22 | 23 | const parseGrandchild = (node: Element): string | undefined => { 24 | const children = node.childNodes.filter(isElement); 25 | return children.length === 1 ? parseChild(children[0]) : undefined; 26 | }; 27 | 28 | export const impliedUrl = ( 29 | node: Element, 30 | children: Element[], 31 | ): string | undefined => { 32 | if (children.some((child) => getClassNames(child, "u-").length)) { 33 | return; 34 | } 35 | 36 | return parseNode(node) ?? parseChild(node) ?? parseGrandchild(node); 37 | }; 38 | -------------------------------------------------------------------------------- /demo/demo.js: -------------------------------------------------------------------------------- 1 | // eslint-disable-next-line import/no-unresolved 2 | import { mf2 } from "../dist/index.mjs"; 3 | import "./demo.css"; 4 | 5 | const setResult = (result) => { 6 | const escaped = JSON.stringify(result, null, 2) 7 | .replace(//g, ">"); 9 | document.getElementById("result").innerHTML = escaped; 10 | }; 11 | 12 | const setError = (error) => { 13 | const el = document.getElementById("error"); 14 | el.innerHTML = `Error: ${error}`; 15 | el.classList.remove("hide"); 16 | }; 17 | 18 | const parse = (html, options) => { 19 | document.getElementById("error").classList.add("hide"); 20 | 21 | try { 22 | const result = mf2(html, options); 23 | setResult(result); 24 | } catch (err) { 25 | setError(err.message); 26 | } 27 | 28 | return false; 29 | }; 30 | 31 | window.parseHtml = () => { 32 | const html = document.getElementById("html").value; 33 | const baseUrl = document.getElementById("base-url").value; 34 | const lang = document.getElementById("lang").checked; 35 | const textContent = document.getElementById("textContent").checked; 36 | const metaformats = document.getElementById("metaformats").checked; 37 | 38 | return parse(html, { 39 | baseUrl, 40 | experimental: { lang, textContent, metaformats }, 41 | }); 42 | }; 43 | -------------------------------------------------------------------------------- /src/parser.ts: -------------------------------------------------------------------------------- 1 | import { parse } from "parse5"; 2 | 3 | import { findChildren } from "./helpers/findChildren"; 4 | import { parseMicroformat } from "./microformats/parse"; 5 | import { isMicroformatRoot } from "./helpers/nodeMatchers"; 6 | import { ParsedDocument, ParserOptions, ParsingOptions } from "./types"; 7 | import { validateParsedHtml } from "./validator"; 8 | import { documentSetup } from "./helpers/documentSetup"; 9 | import { parseMetaformats } from "./helpers/metaformats"; 10 | import { isEnabled } from "./helpers/experimental"; 11 | 12 | export const parser = ( 13 | html: string, 14 | options: ParserOptions, 15 | ): ParsedDocument => { 16 | const doc = parse(html); 17 | validateParsedHtml(doc); 18 | 19 | const { idRefs, rels, relUrls, baseUrl, lang } = documentSetup(doc, options); 20 | 21 | const parsingOptions: ParsingOptions = { 22 | ...options, 23 | baseUrl, 24 | idRefs, 25 | inherited: { roots: [], lang }, 26 | }; 27 | let items = findChildren(doc, isMicroformatRoot).map((mf) => 28 | parseMicroformat(mf, parsingOptions), 29 | ); 30 | 31 | if (items.length === 0 && isEnabled(parsingOptions, "metaformats")) { 32 | items = parseMetaformats(doc, parsingOptions); 33 | } 34 | 35 | return { 36 | rels, 37 | "rel-urls": relUrls, 38 | items, 39 | }; 40 | }; 41 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/urls.html: -------------------------------------------------------------------------------- 1 |
2 |
3 | microformats 4 |
5 |
6 | 7 |
8 | 9 |
10 | 11 |
12 | 13 |
14 | 15 |
16 | Example post 17 |
18 | 19 |
20 | Example post 21 |
22 | 23 |
24 |
25 | Example post 26 |
27 |
28 | 29 |
30 |
31 | Example post 32 |
33 |
34 | 35 |
36 | A data value 37 |
38 | 39 |
40 | 41 | An image to be trimmed 42 |
43 | 44 |
45 | Photo 46 | An image to be trimmed 47 |
48 | 49 |
50 | 51 |
52 | microformats 53 |
54 |
55 | -------------------------------------------------------------------------------- /.github/workflows/build_and_test.yaml: -------------------------------------------------------------------------------- 1 | name: Build and test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | 9 | env: 10 | TZ: Europe/London 11 | 12 | jobs: 13 | build_and_test: 14 | runs-on: ubuntu-latest 15 | timeout-minutes: 5 16 | steps: 17 | - uses: actions/checkout@v4 18 | - uses: actions/setup-node@v4 19 | with: 20 | node-version-file: ".nvmrc" 21 | cache: "npm" 22 | - name: Install dependencies 23 | run: yarn 24 | - name: Lint code 25 | run: yarn lint 26 | - name: Run prettier list 27 | run: yarn prettier:list 28 | - name: Test code 29 | run: yarn test 30 | - name: Build package 31 | run: yarn build 32 | - name: Upload build artifacts 33 | uses: actions/upload-artifact@v4 34 | with: 35 | name: dist 36 | path: dist 37 | test_dist: 38 | runs-on: ubuntu-latest 39 | timeout-minutes: 5 40 | needs: [build_and_test] 41 | strategy: 42 | matrix: 43 | node: [18, 20, 22, 24] 44 | steps: 45 | - uses: actions/checkout@v4 46 | - uses: actions/setup-node@v4 47 | with: 48 | node-version: ${{ matrix.node }} 49 | - name: Install dependencies 50 | run: yarn 51 | - name: Download build artifacts 52 | uses: actions/download-artifact@v4 53 | with: 54 | name: dist 55 | path: dist 56 | - name: Test package 57 | run: yarn test:package 58 | -------------------------------------------------------------------------------- /src/implied/photo.ts: -------------------------------------------------------------------------------- 1 | import { Image, Element } from "../types"; 2 | import { parseImage } from "../helpers/images"; 3 | import { getAttributeValue, getClassNames } from "../helpers/attributes"; 4 | import { isElement, isMicroformatV2Root } from "../helpers/nodeMatchers"; 5 | 6 | const parseNode = (node: Element): Image | string | undefined => { 7 | if (node.tagName === "img") { 8 | return parseImage(node); 9 | } 10 | 11 | if (node.tagName === "object") { 12 | return getAttributeValue(node, "data"); 13 | } 14 | 15 | return; 16 | }; 17 | 18 | const parseChild = (node: Element): Image | string | undefined => { 19 | const children = node.childNodes.filter(isElement); 20 | const imgs = children.filter((child) => child.tagName === "img"); 21 | const objects = children.filter((child) => child.tagName === "object"); 22 | 23 | for (const list of [imgs, objects]) { 24 | if (list.length === 1 && !isMicroformatV2Root(list[0])) { 25 | return parseNode(list[0]); 26 | } 27 | } 28 | 29 | return; 30 | }; 31 | 32 | const parseGrandchild = (node: Element): string | Image | undefined => { 33 | const children = node.childNodes.filter(isElement); 34 | return children.length === 1 ? parseChild(children[0]) : undefined; 35 | }; 36 | 37 | export const impliedPhoto = ( 38 | node: Element, 39 | children: Element[], 40 | ): Image | string | undefined => { 41 | if (children.some((child) => getClassNames(child, "u-").length)) { 42 | return; 43 | } 44 | 45 | return parseNode(node) ?? parseChild(node) ?? parseGrandchild(node); 46 | }; 47 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-og-article.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | OGP | Title 5 | 6 | 7 | 8 | 9 | 10 | 14 | 15 | 16 | 17 | 18 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 |

OGP

32 |

OGP tags are read if no microformats are found

33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /test/suites/experimental/lang.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
This test is in English.
4 | 5 | 6 | 7 | 8 |
This test is in English.
9 | 10 | 11 | 12 | 13 |
Esta prueba está en español.
14 | 15 | 16 | 17 |
This test is in English.
18 | 19 | 20 | 21 |
This test is in English.
22 |
Esta prueba está en español.
23 | 24 | 25 | 26 |
27 |

En svensk titel

28 |
With an english summary
29 |
Och svensk huvudtext
30 |
31 | -------------------------------------------------------------------------------- /test/suites/experimental/lang.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "type": ["h-entry"], 5 | "lang": "en", 6 | "properties": { 7 | "name": ["This test is in English."] 8 | } 9 | }, 10 | { 11 | "type": ["h-entry"], 12 | "lang": "en", 13 | "properties": { 14 | "name": ["This test is in English."] 15 | } 16 | }, 17 | { 18 | "type": ["h-entry"], 19 | "lang": "es", 20 | "properties": { 21 | "name": ["Esta prueba está en español."] 22 | } 23 | }, 24 | { 25 | "type": ["h-entry"], 26 | "lang": "en", 27 | "properties": { 28 | "name": ["This test is in English."] 29 | } 30 | }, 31 | { 32 | "type": ["h-entry"], 33 | "lang": "en", 34 | "properties": { 35 | "name": ["This test is in English."] 36 | } 37 | }, 38 | { 39 | "type": ["h-entry"], 40 | "lang": "es", 41 | "properties": { 42 | "name": ["Esta prueba está en español."] 43 | } 44 | }, 45 | { 46 | "type": ["h-entry"], 47 | "lang": "sv", 48 | "id": "postfrag123", 49 | "properties": { 50 | "name": ["En svensk titel"], 51 | "content": [ 52 | { 53 | "html": "With an english summary", 54 | "value": "With an english summary", 55 | "lang": "en" 56 | }, 57 | { 58 | "html": "Och svensk huvudtext", 59 | "value": "Och svensk huvudtext", 60 | "lang": "sv" 61 | } 62 | ] 63 | } 64 | } 65 | ], 66 | "rels": {}, 67 | "rel-urls": {} 68 | } 69 | -------------------------------------------------------------------------------- /src/helpers/findChildren.ts: -------------------------------------------------------------------------------- 1 | import { Document, Element } from "../types"; 2 | import { isMicroformatRoot, isElement } from "./nodeMatchers"; 3 | import { BackcompatRoot, getBackcompatRootClassNames } from "../backcompat"; 4 | 5 | type Matcher = 6 | | ((node: Element) => boolean) 7 | | ((node: Element, roots: BackcompatRoot[]) => boolean); 8 | 9 | interface ReducerOptions { 10 | matcher: Matcher; 11 | roots: BackcompatRoot[]; 12 | } 13 | 14 | const getElementChildren = (node: Element | Document): Element[] => 15 | node.childNodes.filter(Boolean).filter(isElement); 16 | 17 | const reducer = ( 18 | microformats: Element[], 19 | node: Element, 20 | options: ReducerOptions, 21 | ): Element[] => { 22 | const { matcher, roots } = options; 23 | const match = matcher(node, roots) && node; 24 | 25 | // if we have a match and it's a h- element, stop looking 26 | if (match && isMicroformatRoot(node)) { 27 | return [...microformats, node]; 28 | } 29 | 30 | if (isMicroformatRoot(node)) { 31 | return microformats; 32 | } 33 | 34 | const childMicroformats = getElementChildren(node).reduce( 35 | (prev, curr) => reducer(prev, curr, options), 36 | match ? [match] : [], 37 | ); 38 | 39 | return [...microformats, ...childMicroformats]; 40 | }; 41 | 42 | export const findChildren = ( 43 | parent: Element | Document, 44 | matcher: Matcher, 45 | ): Element[] => { 46 | const findOptions = { 47 | roots: isElement(parent) ? getBackcompatRootClassNames(parent) : [], 48 | stopAtRoot: true, 49 | matcher, 50 | }; 51 | 52 | return getElementChildren(parent).reduce( 53 | (prev, curr) => reducer(prev, curr, findOptions), 54 | [], 55 | ); 56 | }; 57 | -------------------------------------------------------------------------------- /src/rels/rels.ts: -------------------------------------------------------------------------------- 1 | import { Rels, RelUrls, ParserOptions, Element } from "../types"; 2 | import { getAttributeValue } from "../helpers/attributes"; 3 | import { relTextContent } from "../helpers/textContent"; 4 | 5 | interface ParseRelOptions { 6 | rels: Rels; 7 | relUrls: RelUrls; 8 | } 9 | 10 | export const parseRel = ( 11 | child: Element, 12 | { rels, relUrls }: ParseRelOptions, 13 | options: ParserOptions, 14 | ): void => { 15 | /** 16 | * Ignores used as this method is only ever called if they are defined 17 | * But required for TS typechecking 18 | */ 19 | const text = relTextContent(child, options); 20 | const rel = getAttributeValue(child, "rel"); 21 | const href = getAttributeValue(child, "href")?.trim(); 22 | const title = getAttributeValue(child, "title"); 23 | const media = getAttributeValue(child, "media"); 24 | const hreflang = getAttributeValue(child, "hreflang"); 25 | const type = getAttributeValue(child, "type"); 26 | 27 | if (!rel || !href) { 28 | return; 29 | } 30 | 31 | rel.split(" ").forEach((rel) => { 32 | if (!rels[rel]) { 33 | rels[rel] = []; 34 | } 35 | 36 | if (!rels[rel].includes(href)) { 37 | rels[rel].push(href); 38 | } 39 | 40 | if (!relUrls[href]) { 41 | relUrls[href] = { rels: [rel], text }; 42 | } else if (!relUrls[href].rels.includes(rel)) { 43 | relUrls[href].rels.push(rel); 44 | relUrls[href].rels.sort(); 45 | } 46 | 47 | if (text && !relUrls[href].text) { 48 | relUrls[href].text = text; 49 | } 50 | 51 | if (title && !relUrls[href].title) { 52 | relUrls[href].title = title; 53 | } 54 | 55 | if (media && !relUrls[href].media) { 56 | relUrls[href].media = media; 57 | } 58 | 59 | if (hreflang && !relUrls[href].hreflang) { 60 | relUrls[href].hreflang = hreflang; 61 | } 62 | 63 | if (type && !relUrls[href].type) { 64 | relUrls[href].type = type; 65 | } 66 | }); 67 | }; 68 | -------------------------------------------------------------------------------- /test/package.cjs.spec.js: -------------------------------------------------------------------------------- 1 | import { expect } from "chai"; 2 | import path from "path"; 3 | import { readFileSync } from "fs"; 4 | 5 | import { loadScenarios } from "./utils/loadScenarios"; 6 | import { dirname } from "./utils/dirname"; 7 | 8 | const __dirname = dirname(import.meta.url); 9 | const { main: modulePath } = JSON.parse( 10 | readFileSync(path.resolve(__dirname, "../package.json")) 11 | ); 12 | 13 | // get the correct module value from package.json and test that 14 | const { mf2 } = await import(path.resolve(__dirname, "../", modulePath)); 15 | 16 | const scenarioDir = path.resolve( 17 | __dirname, 18 | `../node_modules/microformat-tests/tests` 19 | ); 20 | 21 | const v1 = loadScenarios(scenarioDir, "microformats-v1"); 22 | const v2 = loadScenarios(scenarioDir, "microformats-v2"); 23 | const mixed = loadScenarios(scenarioDir, "microformats-mixed"); 24 | 25 | const options = { 26 | baseUrl: "http://example.com", 27 | }; 28 | 29 | describe("package // cjs // scenarios", () => { 30 | it("should have a .cjs extension", () => { 31 | expect(modulePath).to.match(/\.cjs$/); 32 | }); 33 | 34 | describe("microformats-v1", () => { 35 | v1.forEach(({ name, input, expected }) => { 36 | it(`should correctly parse ${name}`, () => { 37 | const result = mf2(input, options); 38 | expect(result).to.deep.equal(expected); 39 | }); 40 | }); 41 | }); 42 | 43 | describe("microformats-v2", () => { 44 | v2.forEach(({ name, input, expected }) => { 45 | it(`should correctly parse ${name}`, () => { 46 | const result = mf2(input, options); 47 | expect(result).to.deep.equal(expected); 48 | }); 49 | }); 50 | }); 51 | 52 | describe("microformats-mixed", () => { 53 | mixed.forEach(({ name, input, expected }) => { 54 | it(`should correctly parse ${name}`, () => { 55 | const result = mf2(input, options); 56 | expect(result).to.deep.equal(expected); 57 | }); 58 | }); 59 | }); 60 | }); 61 | -------------------------------------------------------------------------------- /src/helpers/attributes.ts: -------------------------------------------------------------------------------- 1 | import { Attribute, Element } from "../types"; 2 | 3 | export const getAttribute = ( 4 | node: Element, 5 | name: string, 6 | ): Attribute | undefined => node.attrs.find((attr) => attr.name === name); 7 | 8 | export const getAttributeValue = ( 9 | node: Element, 10 | name: string, 11 | ): string | undefined => { 12 | const attr = getAttribute(node, name)?.value; 13 | return attr?.length ? attr : undefined; 14 | }; 15 | 16 | export const getClassNames = ( 17 | node: Element, 18 | matcher?: RegExp | string, 19 | ): string[] => { 20 | const classNames = getAttributeValue(node, "class")?.split(" ") || []; 21 | 22 | return matcher 23 | ? classNames.filter((name) => 24 | typeof matcher === "string" 25 | ? name.startsWith(matcher) 26 | : name.match(matcher), 27 | ) 28 | : classNames; 29 | }; 30 | 31 | export const getClassNameIntersect = ( 32 | node: Element, 33 | toCompare: T[], 34 | ): T[] => 35 | getClassNames(node).filter((name: string): name is T => 36 | toCompare.includes(name as T), 37 | ); 38 | 39 | export const hasClassName = (node: Element, className: string): boolean => 40 | getClassNames(node).some((name) => name === className); 41 | 42 | export const hasClassNameIntersect = ( 43 | node: Element, 44 | toCompare: string[], 45 | ): boolean => getClassNames(node).some((name) => toCompare.includes(name)); 46 | 47 | export const getAttributeIfTag = ( 48 | node: Element, 49 | tagNames: string[], 50 | attr: string, 51 | ): string | undefined => 52 | tagNames.includes(node.tagName) ? getAttributeValue(node, attr) : undefined; 53 | 54 | export const hasRelIntersect = (node: Element, toCompare: string[]): boolean => 55 | Boolean( 56 | getAttributeValue(node, "rel") 57 | ?.split(" ") 58 | .some((name) => toCompare.includes(name)), 59 | ); 60 | 61 | export const getRelIntersect = (node: Element, toCompare: string[]): string[] => 62 | getAttributeValue(node, "rel") 63 | ?.split(" ") 64 | .filter((name) => toCompare.includes(name)) || []; 65 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/nested.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "type": ["h-entry"], 5 | "properties": { 6 | "content": [ 7 | { 8 | "type": ["h-card"], 9 | "properties": { 10 | "name": ["Jane Doe"] 11 | }, 12 | "html": "

Jane Doe

", 13 | "value": "Jane Doe" 14 | } 15 | ] 16 | } 17 | }, 18 | { 19 | "type": ["h-entry"], 20 | "properties": { 21 | "photo": [ 22 | { 23 | "type": ["h-card"], 24 | "properties": { 25 | "name": ["My name"], 26 | "photo": [ 27 | { "alt": "My name", "value": "http://example.com/photo.jpg" } 28 | ] 29 | }, 30 | "value": "http://example.com/photo.jpg" 31 | } 32 | ] 33 | } 34 | }, 35 | { 36 | "type": ["h-entry"], 37 | "properties": { 38 | "photo": [ 39 | { 40 | "type": ["h-card"], 41 | "properties": { 42 | "name": [""], 43 | "photo": ["http://example.com/photo.jpg"] 44 | }, 45 | "value": "http://example.com/photo.jpg" 46 | } 47 | ] 48 | } 49 | }, 50 | { 51 | "type": ["h-entry"], 52 | "properties": { 53 | "photo": [ 54 | { 55 | "type": ["h-card"], 56 | "properties": { 57 | "name": [""] 58 | }, 59 | "value": "" 60 | } 61 | ] 62 | } 63 | }, 64 | { 65 | "type": ["h-card"], 66 | "properties": { 67 | "bday": [ 68 | { 69 | "type": ["h-event"], 70 | "properties": { 71 | "name": ["2nd May"], 72 | "start": ["2010-05-02"], 73 | "end": ["2010-05-02"] 74 | }, 75 | "value": "2nd May" 76 | } 77 | ] 78 | } 79 | } 80 | ], 81 | "rels": {}, 82 | "rel-urls": {} 83 | } 84 | -------------------------------------------------------------------------------- /test/package.mjs.spec.js: -------------------------------------------------------------------------------- 1 | import { expect } from "chai"; 2 | import path from "path"; 3 | import { readFileSync } from "fs"; 4 | 5 | import { loadScenarios } from "./utils/loadScenarios"; 6 | import { dirname } from "./utils/dirname"; 7 | 8 | const __dirname = dirname(import.meta.url); 9 | const { module: modulePath } = JSON.parse( 10 | readFileSync(path.resolve(__dirname, "../package.json")) 11 | ); 12 | 13 | // get the correct module value from package.json and test that 14 | const { mf2, __esModule } = await import( 15 | path.resolve(__dirname, "../", modulePath) 16 | ); 17 | 18 | const scenarioDir = path.resolve( 19 | __dirname, 20 | `../node_modules/microformat-tests/tests` 21 | ); 22 | 23 | const v1 = loadScenarios(scenarioDir, "microformats-v1"); 24 | const v2 = loadScenarios(scenarioDir, "microformats-v2"); 25 | const mixed = loadScenarios(scenarioDir, "microformats-mixed"); 26 | 27 | const options = { 28 | baseUrl: "http://example.com", 29 | }; 30 | 31 | describe("package // esm // scenarios", () => { 32 | it("should have a .mjs extension", () => { 33 | expect(modulePath).to.match(/\.mjs$/); 34 | }); 35 | 36 | it("should have __esModule = undefined", () => { 37 | expect(__esModule).to.equal(undefined); 38 | }); 39 | 40 | describe("microformats-v1", () => { 41 | v1.forEach(({ name, input, expected }) => { 42 | it(`should correctly parse ${name}`, () => { 43 | const result = mf2(input, options); 44 | expect(result).to.deep.equal(expected); 45 | }); 46 | }); 47 | }); 48 | 49 | describe("microformats-v2", () => { 50 | v2.forEach(({ name, input, expected }) => { 51 | it(`should correctly parse ${name}`, () => { 52 | const result = mf2(input, options); 53 | expect(result).to.deep.equal(expected); 54 | }); 55 | }); 56 | }); 57 | 58 | describe("microformats-mixed", () => { 59 | mixed.forEach(({ name, input, expected }) => { 60 | it(`should correctly parse ${name}`, () => { 61 | const result = mf2(input, options); 62 | expect(result).to.deep.equal(expected); 63 | }); 64 | }); 65 | }); 66 | }); 67 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | 9 | # Diagnostic reports (https://nodejs.org/api/report.html) 10 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 11 | 12 | # Runtime data 13 | pids 14 | *.pid 15 | *.seed 16 | *.pid.lock 17 | 18 | # Directory for instrumented libs generated by jscoverage/JSCover 19 | lib-cov 20 | 21 | # Coverage directory used by tools like istanbul 22 | coverage 23 | *.lcov 24 | 25 | # nyc test coverage 26 | .nyc_output 27 | 28 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 29 | .grunt 30 | 31 | # Bower dependency directory (https://bower.io/) 32 | bower_components 33 | 34 | # node-waf configuration 35 | .lock-wscript 36 | 37 | # Compiled binary addons (https://nodejs.org/api/addons.html) 38 | build/Release 39 | 40 | # Dependency directories 41 | node_modules/ 42 | jspm_packages/ 43 | 44 | # TypeScript v1 declaration files 45 | typings/ 46 | 47 | # TypeScript cache 48 | *.tsbuildinfo 49 | 50 | # Optional npm cache directory 51 | .npm 52 | 53 | # Optional eslint cache 54 | .eslintcache 55 | 56 | # Microbundle cache 57 | .rpt2_cache/ 58 | .rts2_cache_cjs/ 59 | .rts2_cache_es/ 60 | .rts2_cache_umd/ 61 | 62 | # Optional REPL history 63 | .node_repl_history 64 | 65 | # Output of 'npm pack' 66 | *.tgz 67 | 68 | # Yarn Integrity file 69 | .yarn-integrity 70 | 71 | # dotenv environment variables file 72 | .env 73 | .env.test 74 | 75 | # parcel-bundler cache (https://parceljs.org/) 76 | .cache 77 | 78 | # Next.js build output 79 | .next 80 | 81 | # Nuxt.js build / generate output 82 | .nuxt 83 | dist 84 | 85 | # Gatsby files 86 | .cache/ 87 | # Comment in the public line in if your project uses Gatsby and *not* Next.js 88 | # https://nextjs.org/blog/next-9-1#public-directory-support 89 | # public 90 | 91 | # vuepress build output 92 | .vuepress/dist 93 | 94 | # Serverless directories 95 | .serverless/ 96 | 97 | # FuseBox cache 98 | .fusebox/ 99 | 100 | # DynamoDB Local files 101 | .dynamodb/ 102 | 103 | # TernJS port file 104 | .tern-port 105 | 106 | public 107 | demo/dist 108 | 109 | .DS_Store 110 | -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- 1 | import { DefaultTreeAdapterMap } from "parse5"; 2 | 3 | export type Element = DefaultTreeAdapterMap["element"]; 4 | export type Document = DefaultTreeAdapterMap["document"]; 5 | export type Attribute = Element["attrs"][number]; 6 | export type Node = DefaultTreeAdapterMap["node"]; 7 | export type TextNode = DefaultTreeAdapterMap["textNode"]; 8 | 9 | import { BackcompatRoot } from "./backcompat"; 10 | 11 | export interface ParserOptions { 12 | baseUrl: string; 13 | experimental?: { 14 | lang?: boolean; 15 | textContent?: boolean; 16 | metaformats?: boolean; 17 | }; 18 | } 19 | 20 | export type ExperimentalName = keyof NonNullable; 21 | 22 | export interface ParsingOptions extends ParserOptions { 23 | implyProperties?: boolean; 24 | idRefs: IdRefs; 25 | inherited: { 26 | roots: BackcompatRoot[]; 27 | lang?: string; 28 | }; 29 | } 30 | 31 | export interface ParsedDocument { 32 | rels: Rels; 33 | "rel-urls": RelUrls; 34 | items: MicroformatRoot[]; 35 | } 36 | 37 | export type MicroformatProperties = Record; 38 | 39 | export interface MicroformatRoot { 40 | id?: string; 41 | lang?: string; 42 | type?: string[]; 43 | properties: MicroformatProperties; 44 | children?: MicroformatRoot[]; 45 | value?: MicroformatProperty; 46 | } 47 | 48 | export interface Image { 49 | alt: string; 50 | value?: string; 51 | } 52 | 53 | export interface Html { 54 | html: string; 55 | value: string; 56 | lang?: string; 57 | } 58 | 59 | export type MicroformatProperty = MicroformatRoot | Image | Html | string; 60 | 61 | export type Rels = Record; 62 | 63 | export type RelUrls = Record< 64 | string, 65 | { 66 | rels: string[]; 67 | text: string; 68 | title?: string; 69 | media?: string; 70 | hreflang?: string; 71 | type?: string; 72 | } 73 | >; 74 | 75 | export type IdRefs = Record; 76 | 77 | export type PropertyType = "p" | "u" | "e" | "dt"; 78 | 79 | export interface ParsedProperty { 80 | key: string; 81 | value: MicroformatProperty | undefined; 82 | type: PropertyType; 83 | } 84 | 85 | export interface Backcompat { 86 | type: string[]; 87 | properties: Record; 88 | rels?: Record; 89 | } 90 | -------------------------------------------------------------------------------- /src/helpers/valueClassPattern.ts: -------------------------------------------------------------------------------- 1 | import { getAttributeValue, hasClassName } from "./attributes"; 2 | import { textContent } from "./textContent"; 3 | import { findChildren } from "./findChildren"; 4 | import { isValueClass } from "./nodeMatchers"; 5 | import { ParsingOptions, Element } from "../types"; 6 | 7 | interface Options { 8 | datetime: boolean; 9 | } 10 | 11 | const datetimeProp = (node: Element): string | undefined => 12 | getAttributeValue(node, "datetime"); 13 | 14 | const valueTitle = (node: Element): string | undefined => { 15 | if (hasClassName(node, "value-title")) { 16 | return getAttributeValue(node, "title"); 17 | } 18 | 19 | return; 20 | }; 21 | 22 | const handleDate = (dateStrings: string[]): string | undefined => 23 | dateStrings 24 | .sort((a) => 25 | // Sort the date elements to move date components to the start 26 | a.match(/^[0-9]{4}/) ? -1 : 1, 27 | ) 28 | .join(" ") 29 | .trim() 30 | .replace( 31 | // remove ":" from timezones 32 | /((\+|-)[0-2][0-9]):([0-5][0-9])$/, 33 | (s) => s.replace(":", ""), 34 | ) 35 | .replace( 36 | // handle am and pm times 37 | /([0-2]?[0-9])(:[0-5][0-9])?(:[0-5][0-9])?(a\.?m\.?|p\.?m\.?)/i, 38 | (_s, hour, min, sec, ampm) => { 39 | const isAm = /a/i.test(ampm); 40 | 41 | // if the time is: 42 | // - am, zero pad 43 | // - pm, add 12 hours 44 | const newHour = isAm 45 | ? hour.padStart(2, "0") 46 | : `${parseInt(hour, 10) + 12}`; 47 | 48 | // reconstruct, and add mins if any are missing 49 | return `${newHour}${min ? min : ":00"}${sec || ""}`; 50 | }, 51 | ) 52 | .toUpperCase(); 53 | 54 | export const valueClassPattern = ( 55 | node: Element, 56 | options: ParsingOptions & Partial, 57 | ): string | undefined => { 58 | const values = findChildren(node, isValueClass); 59 | 60 | if (!values.length) { 61 | return; 62 | } 63 | 64 | if (options.datetime) { 65 | const date = values.map( 66 | (node) => 67 | datetimeProp(node) ?? valueTitle(node) ?? textContent(node, options), 68 | ); 69 | return handleDate(date); 70 | } 71 | 72 | return values 73 | .map((node) => valueTitle(node) ?? textContent(node, options)) 74 | .join("") 75 | .trim(); 76 | }; 77 | -------------------------------------------------------------------------------- /src/helpers/nodeMatchers.ts: -------------------------------------------------------------------------------- 1 | import { TextNode, Node, Element } from "../types"; 2 | import { 3 | getAttribute, 4 | hasClassNameIntersect, 5 | getClassNames, 6 | } from "./attributes"; 7 | import { 8 | backcompatRoots, 9 | hasBackcompatMicroformatProperty, 10 | BackcompatRoot, 11 | } from "../backcompat"; 12 | 13 | const classRegex = (prefix: string): RegExp => 14 | new RegExp(`^${prefix}-([a-z0-9]+-)?([a-z]+-)*[a-z]+$`); 15 | 16 | const rootClassRegex = classRegex("h"); 17 | const propClassRegex = classRegex("(p|e|u|dt)"); 18 | 19 | export const isElement = (node: Node): node is Element => 20 | "tagName" in node && "childNodes" in node; 21 | 22 | export const isTag = 23 | (tagName: string) => 24 | (node: Node): node is Element => 25 | isElement(node) && node.tagName === tagName; 26 | 27 | export const isTextNode = (node: Node): node is TextNode => "value" in node; 28 | 29 | export const isMicroformatV2Root = (node: Element): boolean => 30 | getClassNames(node).some((cl) => cl.match(rootClassRegex)); 31 | 32 | const isMicroformatV1Root = (node: Element): boolean => 33 | hasClassNameIntersect(node, backcompatRoots); 34 | 35 | export const isMicroformatRoot = (node: Element): boolean => 36 | isMicroformatV2Root(node) || isMicroformatV1Root(node); 37 | 38 | export const isMicroformatV1Property = ( 39 | node: Element, 40 | roots: BackcompatRoot[], 41 | ): boolean => hasBackcompatMicroformatProperty(node, roots); 42 | 43 | export const isMicroformatV2Property = (node: Element): boolean => 44 | getClassNames(node, propClassRegex).length > 0; 45 | 46 | export const isMicroformatChild = ( 47 | node: Element, 48 | roots: BackcompatRoot[], 49 | ): boolean => 50 | !isMicroformatV2Property(node) && 51 | !isMicroformatV1Property(node, roots) && 52 | isMicroformatRoot(node); 53 | 54 | export const isBase = (node: Element): boolean => 55 | Boolean( 56 | isElement(node) && node.tagName === "base" && getAttribute(node, "href"), 57 | ); 58 | 59 | export const isValueClass = (node: Element): boolean => 60 | isElement(node) && hasClassNameIntersect(node, ["value", "value-title"]); 61 | 62 | export const isRel = (node: Element): boolean => 63 | Boolean( 64 | isElement(node) && 65 | node.attrs.some((attr) => attr.name === "rel") && 66 | node.attrs.some((attr) => attr.name === "href"), 67 | ); 68 | -------------------------------------------------------------------------------- /src/microformats/properties.ts: -------------------------------------------------------------------------------- 1 | import { 2 | ParsedProperty, 3 | MicroformatProperties, 4 | ParsingOptions, 5 | Element, 6 | } from "../types"; 7 | import { findChildren } from "../helpers/findChildren"; 8 | import { impliedName } from "../implied/name"; 9 | import { impliedUrl } from "../implied/url"; 10 | import { 11 | isMicroformatV1Property, 12 | isMicroformatV2Property, 13 | } from "../helpers/nodeMatchers"; 14 | import { impliedPhoto } from "../implied/photo"; 15 | import { parseProperty, postParseNode } from "./property"; 16 | import { flatten } from "../helpers/array"; 17 | 18 | const addProperty = ( 19 | properties: MicroformatProperties, 20 | { key, value }: Pick, 21 | ): void => { 22 | if (typeof value === "undefined") { 23 | return; 24 | } 25 | 26 | if (!properties[key] && !Array.isArray(properties[key])) { 27 | properties[key] = [value]; 28 | return; 29 | } 30 | 31 | properties[key].push(value); 32 | }; 33 | 34 | const getPropertyNodes = (node: Element, options: ParsingOptions): Element[] => 35 | !options.inherited.roots.length 36 | ? findChildren(node, isMicroformatV2Property) 37 | : findChildren(node, isMicroformatV1Property); 38 | 39 | export const microformatProperties = ( 40 | node: Element, 41 | options: ParsingOptions, 42 | ): MicroformatProperties => { 43 | const properties: MicroformatProperties = {}; 44 | 45 | const propertyNodes = getPropertyNodes(node, options); 46 | 47 | propertyNodes 48 | .map((child) => parseProperty(child, options)) 49 | .reduce(flatten, []) 50 | .map(postParseNode) 51 | .forEach((prop) => addProperty(properties, prop)); 52 | 53 | if (options.implyProperties && !options.inherited.roots.length) { 54 | if (typeof properties.name === "undefined") { 55 | addProperty(properties, { 56 | key: "name", 57 | value: impliedName(node, propertyNodes, options), 58 | }); 59 | } 60 | 61 | if (typeof properties.url === "undefined") { 62 | addProperty(properties, { 63 | key: "url", 64 | value: impliedUrl(node, propertyNodes), 65 | }); 66 | } 67 | 68 | if (typeof properties.photo === "undefined") { 69 | addProperty(properties, { 70 | key: "photo", 71 | value: impliedPhoto(node, propertyNodes), 72 | }); 73 | } 74 | } 75 | 76 | return properties; 77 | }; 78 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "microformats-parser", 3 | "version": "2.0.1", 4 | "description": "A JavaScript microformats v2 parser for the browser and node.js", 5 | "type": "module", 6 | "main": "dist/index.cjs", 7 | "types": "dist/index.d.ts", 8 | "module": "dist/index.mjs", 9 | "homepage": "https://microformats.github.io/microformats-parser/", 10 | "repository": "https://github.com/microformats/microformats-parser.git", 11 | "author": "Aimee Gamble-Milner (https://github.com/aimee-gm)", 12 | "license": "MIT", 13 | "keywords": [ 14 | "microformats", 15 | "parser", 16 | "mf2", 17 | "indieweb" 18 | ], 19 | "scripts": { 20 | "prepare": "husky", 21 | "prebuild": "rm -rf ./dist", 22 | "build": "rollup -c", 23 | "demo": "http-server public", 24 | "lint": "eslint --cache './**/*.{ts,js}'", 25 | "prettier:list": "prettier '**/*.{ts,json,md,html}' --list-different", 26 | "prettier:fix": "prettier '**/*.{ts,json,md,html}' --write", 27 | "test": "c8 mocha ./test/*.spec.ts", 28 | "test:package": "mocha ./test/package.*.spec.js" 29 | }, 30 | "engines": { 31 | "node": ">=18" 32 | }, 33 | "files": [ 34 | "/dist/index.cjs", 35 | "/dist/index.cjs.map", 36 | "/dist/index.mjs", 37 | "/dist/index.mjs.map", 38 | "/dist/index.d.ts", 39 | "/CONTRIBUTING.md" 40 | ], 41 | "dependencies": { 42 | "parse5": "^7.1.2" 43 | }, 44 | "devDependencies": { 45 | "@rollup/plugin-commonjs": "^25.0.4", 46 | "@rollup/plugin-html": "^2.0.0", 47 | "@rollup/plugin-node-resolve": "^15.2.1", 48 | "@rollup/plugin-terser": "^0.4.3", 49 | "@rollup/plugin-typescript": "^11.1.3", 50 | "@types/chai": "^4.2.11", 51 | "@types/glob": "^8.1.0", 52 | "@types/mocha": "^10.0.1", 53 | "@types/node": "^20.8.6", 54 | "@typescript-eslint/eslint-plugin": "^7.0.0", 55 | "@typescript-eslint/parser": "^6.0.0", 56 | "c8": "^9.0.0", 57 | "chai": "^5.0.0", 58 | "eslint": "^8.16.0", 59 | "eslint-config-prettier": "^9.0.0", 60 | "eslint-plugin-import": "^2.26.0", 61 | "glob": "^10.3.4", 62 | "http-server": "^14.1.1", 63 | "husky": ">=4", 64 | "lint-staged": ">=10", 65 | "microformat-tests": "https://github.com/microformats/tests", 66 | "mocha": "^11.0.0", 67 | "prettier": "^3.0.3", 68 | "rollup": "^4.1.5", 69 | "rollup-plugin-dts": "^6.0.2", 70 | "rollup-plugin-import-css": "^4.0.1", 71 | "source-map-support": "^0.5.19", 72 | "ts-node": "^10.8.0", 73 | "tslib": "^2.6.2", 74 | "typescript": "^5.2.2" 75 | }, 76 | "lint-staged": { 77 | "*.{js,ts,json,css,md,html}": "prettier --write", 78 | "*.{js,ts}": "eslint --fix" 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /demo/demo.css: -------------------------------------------------------------------------------- 1 | *, 2 | *:before, 3 | *:after { 4 | box-sizing: border-box; 5 | } 6 | 7 | h1, 8 | p, 9 | ul, 10 | li, 11 | body, 12 | html { 13 | margin: 0; 14 | padding: 0; 15 | } 16 | 17 | html, 18 | body { 19 | font-size: 16px; 20 | font-family: "Source Sans Pro", sans-serif; 21 | } 22 | 23 | a { 24 | color: #177e89; 25 | text-decoration: none; 26 | } 27 | 28 | a:hover { 29 | color: #2b3238; 30 | } 31 | 32 | nav { 33 | background: #2b3238; 34 | } 35 | 36 | nav ul { 37 | margin: 0 -1rem; 38 | } 39 | 40 | nav ul li { 41 | display: inline-block; 42 | padding: 0.5rem; 43 | } 44 | 45 | nav a { 46 | color: #fff; 47 | } 48 | 49 | nav a:hover { 50 | color: #177e89; 51 | } 52 | 53 | header { 54 | background: #e9ecef; 55 | text-align: center; 56 | padding: 2rem 1rem; 57 | } 58 | 59 | header h1 { 60 | margin-bottom: 2rem; 61 | } 62 | 63 | footer { 64 | margin-top: 4rem; 65 | margin-bottom: 2rem; 66 | text-align: center; 67 | } 68 | 69 | .description { 70 | margin-bottom: 2rem; 71 | } 72 | 73 | .container { 74 | max-width: 750px; 75 | margin: 0 auto; 76 | padding: 0 1rem; 77 | } 78 | 79 | .documentation, 80 | button[type="submit"] { 81 | background: #08605f; 82 | color: #fff; 83 | display: inline-block; 84 | border-radius: 0.25rem; 85 | border: none; 86 | padding: 0.5rem 1rem; 87 | } 88 | 89 | .submit { 90 | text-align: center; 91 | } 92 | 93 | .documentation { 94 | padding: 0.75rem 1.5rem; 95 | font-size: 1.25rem; 96 | } 97 | 98 | .documentation:hover, 99 | button[type="submit"]:hover { 100 | background: #2b3238; 101 | color: #fff; 102 | cursor: pointer; 103 | } 104 | 105 | #result { 106 | border-radius: 0.25rem; 107 | border: 1px solid #ccc; 108 | background: #f4f4f4; 109 | min-height: 10rem; 110 | overflow: scroll; 111 | padding: 0.5rem; 112 | font-size: 0.8rem; 113 | } 114 | 115 | form label { 116 | display: block; 117 | padding: 1rem 0; 118 | } 119 | 120 | form input[type="text"], 121 | form textarea { 122 | width: 100%; 123 | display: block; 124 | border: 1px solid #177e89; 125 | background: #fff; 126 | border-radius: 0.25rem; 127 | padding: 0.5rem; 128 | } 129 | 130 | form textarea { 131 | min-height: 10rem; 132 | min-width: 100%; 133 | max-width: 100%; 134 | } 135 | 136 | .error { 137 | border: 1px solid #df3b57; 138 | border-radius: 0.25rem; 139 | padding: 0.5rem 1rem; 140 | color: #df3b57; 141 | margin: 2rem 0; 142 | } 143 | 144 | .hide { 145 | display: none; 146 | } 147 | 148 | .experimental label { 149 | display: inline-block; 150 | } 151 | 152 | h3 { 153 | font-size: 1rem; 154 | margin: 0; 155 | } 156 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-og-audio-soundcloud.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Stream Over The Moon by Surprise Chef | Listen online for free on 9 | SoundCloud 10 | 11 | 15 | 16 | 17 | 21 | 22 | 26 | 27 | 31 | 35 | 36 | 37 | 41 | 42 | 43 | 47 | 51 | 52 | 56 | 57 | 58 | 62 | 63 | 64 |

Over The Moon

65 |

66 | 67 | 68 | -------------------------------------------------------------------------------- /test/suites/local/microformats-v2/urls.json: -------------------------------------------------------------------------------- 1 | { 2 | "items": [ 3 | { 4 | "type": ["h-entry"], 5 | "properties": { 6 | "url": [ 7 | { 8 | "properties": { 9 | "name": ["microformats"] 10 | }, 11 | "type": ["h-card"], 12 | "value": "microformats" 13 | } 14 | ] 15 | } 16 | }, 17 | { 18 | "type": ["h-entry"], 19 | "properties": { 20 | "name": [""], 21 | "url": ["http://example.com/"] 22 | } 23 | }, 24 | { 25 | "type": ["h-entry"], 26 | "properties": { 27 | "name": [""], 28 | "url": ["http://example.com/"] 29 | } 30 | }, 31 | { 32 | "type": ["h-entry"], 33 | "properties": { 34 | "name": ["Example post"], 35 | "url": ["http://example.com/"] 36 | } 37 | }, 38 | { 39 | "type": ["h-entry"], 40 | "properties": { 41 | "name": ["Example post"], 42 | "url": ["http://example.com/relative/path.html"] 43 | } 44 | }, 45 | { 46 | "type": ["h-entry"], 47 | "properties": { 48 | "url": ["http://example.com/"], 49 | "content": [ 50 | { 51 | "value": "Example post", 52 | "html": "Example post" 53 | } 54 | ] 55 | } 56 | }, 57 | { 58 | "type": ["h-entry"], 59 | "properties": { 60 | "url": ["http://example.com/relative/path.html"], 61 | "content": [ 62 | { 63 | "value": "Example post", 64 | "html": "Example post" 65 | } 66 | ] 67 | } 68 | }, 69 | { 70 | "type": ["h-entry"], 71 | "properties": { 72 | "name": ["A data value"], 73 | "url": ["http://example.com/"] 74 | } 75 | }, 76 | 77 | { 78 | "properties": { 79 | "name": ["An image to be trimmed"], 80 | "photo": ["http://example.com/photo.jpg"] 81 | }, 82 | "type": ["h-entry"] 83 | }, 84 | { 85 | "properties": { 86 | "name": ["Photo"], 87 | "photo": [ 88 | { 89 | "alt": "Photo", 90 | "value": "http://example.com/photo.jpg" 91 | } 92 | ] 93 | }, 94 | "type": ["h-entry"] 95 | }, 96 | { 97 | "properties": { 98 | "author": [ 99 | { 100 | "properties": { 101 | "name": ["microformats"] 102 | }, 103 | "type": ["h-card"], 104 | "value": "microformats" 105 | } 106 | ] 107 | }, 108 | "type": ["h-entry"] 109 | } 110 | ], 111 | "rels": {}, 112 | "rel-urls": {} 113 | } 114 | -------------------------------------------------------------------------------- /test/scenarios.spec.ts: -------------------------------------------------------------------------------- 1 | import { expect, assert } from "chai"; 2 | import * as path from "path"; 3 | 4 | import { mf2 } from "../src"; 5 | import { dirname } from "./utils/dirname"; 6 | import { loadScenarios } from "./utils/loadScenarios"; 7 | 8 | const __dirname = dirname(import.meta.url); 9 | 10 | const scenarioDir = path.resolve( 11 | __dirname, 12 | `../node_modules/microformat-tests/tests`, 13 | ); 14 | 15 | const suitesDir = path.resolve(__dirname, `./suites`); 16 | 17 | const v1 = loadScenarios(scenarioDir, "microformats-v1"); 18 | const v2 = loadScenarios(scenarioDir, "microformats-v2"); 19 | const mixed = loadScenarios(scenarioDir, "microformats-mixed"); 20 | const local = loadScenarios(suitesDir, "local"); 21 | const experimental = loadScenarios(suitesDir, "experimental"); 22 | 23 | const options = { 24 | baseUrl: "http://example.com", 25 | }; 26 | 27 | describe("mf2() // scenarios", () => { 28 | describe("microformats-v1", () => { 29 | v1.forEach(({ name, input, expected }) => { 30 | it(`should correctly parse ${name}`, () => { 31 | const result = mf2(input, options); 32 | expect(result).to.deep.equal(expected); 33 | }); 34 | }); 35 | }); 36 | 37 | describe("microformats-v2", () => { 38 | v2.forEach(({ name, input, expected }) => { 39 | it(`should correctly parse ${name}`, () => { 40 | const result = mf2(input, options); 41 | expect(result).to.deep.equal(expected); 42 | }); 43 | }); 44 | }); 45 | 46 | describe("microformats-mixed", () => { 47 | mixed.forEach(({ name, input, expected }) => { 48 | it(`should correctly parse ${name}`, () => { 49 | const result = mf2(input, options); 50 | expect(result).to.deep.equal(expected); 51 | }); 52 | }); 53 | }); 54 | }); 55 | 56 | describe("mf2() // local scenarios", () => { 57 | local.forEach(({ name, input, expected }) => { 58 | it(`should correctly parse ${name}`, () => { 59 | const result = mf2(input, { ...options, experimental: {} }); 60 | expect(result).to.deep.equal(expected); 61 | }); 62 | }); 63 | }); 64 | 65 | describe("mf2() // experimental scenarios", () => { 66 | experimental.forEach(({ name, input, expected }) => { 67 | it(`should correctly parse ${name}`, () => { 68 | const result = mf2(input, { 69 | ...options, 70 | experimental: { lang: true, textContent: true, metaformats: true }, 71 | }); 72 | expect(result).to.deep.equal(expected); 73 | }); 74 | }); 75 | 76 | it("should respect the experimental flag", () => { 77 | const findTestCase = (searchName: string) => 78 | experimental.find(({ name }) => name === searchName) ?? 79 | assert.fail(`Test case "${searchName}" not found`); 80 | const { input } = findTestCase("metaformats-og-article"); 81 | const { expected: emptyMfResult } = findTestCase( 82 | "metaformats-missing-head", 83 | ); 84 | 85 | const result = mf2(input, { 86 | ...options, 87 | }); 88 | expect(result).to.deep.equal(emptyMfResult); 89 | }); 90 | }); 91 | -------------------------------------------------------------------------------- /test/suites/experimental/metaformats-og-video-vimeo.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 14 | 15 | 16 | 20 | 21 | 25 | 26 | 27 | 28 | 32 | 36 | 37 | 38 | 39 | 43 | 44 | 45 | 46 | 50 | 54 | 58 | 59 | 60 | 61 | 62 | 63 | Ultromedia Please (Interactive) on Vimeo 64 | 65 | 66 |

Test

67 | 68 | 69 | -------------------------------------------------------------------------------- /demo/index.tpl.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {{ title }} 5 | 9 | {{ links }} 10 | 11 | 12 | 13 | {{ metas }} 14 | 15 | 16 | 31 |
32 |
33 |

{{ name }}

34 |

{{ description }}

35 |

36 | Documentation 37 |

38 |
39 |
40 |
41 |

Try it out

42 | 49 |
50 | 54 | 55 | 59 | 60 |

Experimental options

61 |

62 | 66 | 76 | 86 |

87 | 88 |
89 | 90 |
91 |
92 |
93 |
94 |

Output

95 |

 96 |       
97 |
98 | 103 | {{ scripts }} 104 | 105 | 106 | -------------------------------------------------------------------------------- /src/backcompat/index.ts: -------------------------------------------------------------------------------- 1 | import { Element } from "../types"; 2 | import { adr } from "./adr"; 3 | import { geo } from "./geo"; 4 | import { hentry } from "./hentry"; 5 | import { hfeed } from "./hfeed"; 6 | import { hnews } from "./hnews"; 7 | import { hproduct } from "./hproduct"; 8 | import { hreview } from "./hreview"; 9 | import { vcard } from "./vcard"; 10 | import { 11 | getClassNameIntersect, 12 | hasClassNameIntersect, 13 | getRelIntersect, 14 | hasRelIntersect, 15 | getAttributeValue, 16 | getClassNames, 17 | } from "../helpers/attributes"; 18 | import { hreviewAggregate } from "./hreview-aggregate"; 19 | import { hresume } from "./hresume"; 20 | import { vevent } from "./vevent"; 21 | import { item } from "./item"; 22 | import { flatten } from "../helpers/array"; 23 | 24 | export const backcompat = { 25 | adr, 26 | geo, 27 | hentry, 28 | hfeed, 29 | hnews, 30 | hproduct, 31 | hreview, 32 | vcard, 33 | hresume, 34 | vevent, 35 | item, 36 | "hreview-aggregate": hreviewAggregate, 37 | }; 38 | 39 | export type BackcompatRoot = keyof typeof backcompat; 40 | 41 | export const backcompatRoots = Object.keys(backcompat) as BackcompatRoot[]; 42 | 43 | export const getBackcompatRootClassNames = (node: Element): BackcompatRoot[] => 44 | getClassNameIntersect(node, backcompatRoots); 45 | 46 | export const convertV1RootClassNames = (node: Element): string[] => { 47 | const classNames = getBackcompatRootClassNames(node) 48 | .map((cl) => backcompat[cl].type) 49 | .reduce(flatten); 50 | 51 | return classNames.length > 1 52 | ? classNames.filter((cl) => cl !== "h-item") 53 | : classNames; 54 | }; 55 | 56 | export const hasBackcompatMicroformatProperty = ( 57 | node: Element, 58 | roots: BackcompatRoot[], 59 | ): boolean => 60 | roots.some((root) => { 61 | const { properties, rels } = backcompat[root]; 62 | return ( 63 | hasClassNameIntersect(node, Object.keys(properties)) || 64 | (rels && hasRelIntersect(node, Object.keys(rels))) 65 | ); 66 | }); 67 | 68 | export const convertV1PropertyClassNames = ( 69 | node: Element, 70 | roots: BackcompatRoot[], 71 | ): string[] => [ 72 | ...new Set( 73 | roots 74 | .map((root) => { 75 | const { properties, rels } = backcompat[root]; 76 | 77 | const classes = getClassNameIntersect( 78 | node, 79 | Object.keys(properties), 80 | ).map((cl) => properties[cl]); 81 | 82 | const relClasses = 83 | (rels && 84 | getRelIntersect(node, Object.keys(rels)).map((cl) => rels[cl])) || 85 | []; 86 | 87 | return [...classes, ...relClasses]; 88 | }) 89 | .reduce(flatten), 90 | ), 91 | ]; 92 | 93 | export const getV1IncludeNames = (node: Element): string[] => { 94 | const itemref = getAttributeValue(node, "itemref"); 95 | 96 | if (itemref) { 97 | return itemref.split(" "); 98 | } 99 | 100 | if (getClassNames(node).includes("include")) { 101 | const hrefAttr = node.tagName === "object" ? "data" : "href"; 102 | 103 | const href = getAttributeValue(node, hrefAttr); 104 | 105 | if (href && href.startsWith("#")) { 106 | return [href.substring(1)]; 107 | } 108 | } 109 | 110 | const headers = node.tagName === "td" && getAttributeValue(node, "headers"); 111 | 112 | if (headers) { 113 | return [headers]; 114 | } 115 | 116 | return []; 117 | }; 118 | -------------------------------------------------------------------------------- /src/helpers/documentSetup.ts: -------------------------------------------------------------------------------- 1 | import { ParserOptions, IdRefs, Rels, RelUrls } from "../types"; 2 | import { getAttribute, getAttributeValue } from "./attributes"; 3 | import { isLocalLink, applyBaseUrl } from "./url"; 4 | import { isElement, isRel, isBase } from "./nodeMatchers"; 5 | import { parseRel } from "../rels/rels"; 6 | import { Document, Element } from "../types"; 7 | 8 | interface DocumentSetupResult { 9 | idRefs: IdRefs; 10 | rels: Rels; 11 | relUrls: RelUrls; 12 | baseUrl: string; 13 | lang?: string; 14 | } 15 | 16 | export const findBase = (node: Element | Document): string | undefined => { 17 | for (const child of node.childNodes) { 18 | if (!isElement(child)) { 19 | continue; 20 | } 21 | 22 | if (isBase(child)) { 23 | return getAttributeValue(child, "href"); 24 | } 25 | 26 | const base = findBase(child); 27 | 28 | if (base) { 29 | return base; 30 | } 31 | } 32 | 33 | return; 34 | }; 35 | 36 | // this is mutating the object, and will mutate it for everything else :-/ 37 | 38 | const handleNode = ( 39 | node: Element | Document, 40 | result: DocumentSetupResult, 41 | options: ParserOptions, 42 | ): void => { 43 | for (const i in node.childNodes) { 44 | const child = node.childNodes[i]; 45 | 46 | if (!isElement(child)) { 47 | continue; 48 | } 49 | 50 | /** 51 | * Delete