├── .eslintrc.js ├── .gitignore ├── LICENSE ├── jest.config.js ├── package-lock.json ├── package.json ├── src ├── __tests__ │ ├── __snapshots__ │ │ └── parse.test.ts.snap │ ├── parse.test.ts │ ├── roundtrip.test.ts │ └── version.test.ts ├── config.ts ├── emit.ts ├── example │ ├── convert.ts │ ├── parse.ts │ ├── rules.ts │ └── schema.ts ├── expression │ ├── __tests__ │ │ ├── expression.test.ts │ │ └── heap.test.ts │ ├── acceptor.ts │ ├── heap.ts │ ├── index.ts │ ├── parse.ts │ ├── types.ts │ └── util.ts ├── index.ts ├── meta.ts ├── pandocUtils.ts ├── parse.ts ├── transform │ ├── __tests__ │ │ └── util.test.ts │ ├── fluent.ts │ ├── fromPandoc │ │ ├── __tests__ │ │ │ ├── __snapshots__ │ │ │ │ ├── fromPandoc.test.ts.snap │ │ │ │ └── heal.test.ts.snap │ │ │ ├── fromPandoc.test.ts │ │ │ └── heal.test.ts │ │ ├── fromPandoc.ts │ │ ├── heal.ts │ │ ├── index.ts │ │ └── marks.ts │ ├── fromProsemirror │ │ ├── __tests__ │ │ │ ├── __snapshots__ │ │ │ │ └── fromProsemirror.test.ts.snap │ │ │ └── fromProsemirror.test.ts │ │ ├── fromProsemirror.ts │ │ ├── index.ts │ │ └── marks.ts │ ├── inference │ │ ├── index.ts │ │ ├── inferPandocType.ts │ │ ├── inferProsemirrorType.ts │ │ └── shared.ts │ ├── ruleset.ts │ ├── transformers │ │ ├── bare.ts │ │ ├── common.ts │ │ ├── doc.ts │ │ ├── index.ts │ │ ├── list.ts │ │ └── table │ │ │ ├── fromPandoc.ts │ │ │ ├── fromProsemirror.ts │ │ │ └── index.ts │ ├── types.ts │ └── util.ts ├── types.ts └── util.ts └── tsconfig.json /.eslintrc.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | "env": { 3 | "browser": true, 4 | "es6": true 5 | }, 6 | "extends": [ 7 | "eslint:recommended", 8 | "plugin:@typescript-eslint/recommended", 9 | "prettier/@typescript-eslint", 10 | "plugin:prettier/recommended", 11 | ], 12 | "plugins": [ 13 | "@typescript-eslint", 14 | "prettier" 15 | ], 16 | "globals": { 17 | "Atomics": "readonly", 18 | "SharedArrayBuffer": "readonly" 19 | }, 20 | "parser": "@typescript-eslint/parser", 21 | "parserOptions": { 22 | "ecmaVersion": 2018, 23 | "sourceType": "module", 24 | "project": './tsconfig.json' 25 | }, 26 | "rules": { 27 | "@typescript-eslint/ban-ts-ignore": 0, 28 | "@typescript-eslint/ban-ts-comment": 0, 29 | "@typescript-eslint/camelcase": 0, 30 | "@typescript-eslint/explicit-function-return-type": 0, 31 | "@typescript-eslint/explicit-module-boundary-types": 0, 32 | "@typescript-eslint/indent": 0, 33 | "@typescript-eslint/no-explicit-any": 0, 34 | "@typescript-eslint/no-inferrable-types": 0, 35 | "@typescript-eslint/no-use-before-define": 0, 36 | "no-constant-condition": 0, 37 | "prettier/prettier": "error", 38 | }, 39 | }; -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | .vscode 3 | dist -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Ian Reynolds 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | roots: ["/src"], 3 | transform: { 4 | "^.+\\.tsx?$": "ts-jest", 5 | }, 6 | moduleDirectories: ['node_modules', 'src'] 7 | }; 8 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@pubpub/prosemirror-pandoc", 3 | "version": "1.1.5", 4 | "description": "Convert between Prosemirror schemas and the Pandoc AST", 5 | "main": "dist/index.js", 6 | "devDependencies": { 7 | "@types/jest": "^24.0.18", 8 | "@types/katex": "^0.10.2", 9 | "@types/node": "^12.7.4", 10 | "@types/prosemirror-model": "^1.13.2", 11 | "@typescript-eslint/eslint-plugin": "^4.29.2", 12 | "@typescript-eslint/parser": "^4.29.2", 13 | "eslint": "^6.0.1", 14 | "eslint-config-prettier": "^6.0.0", 15 | "eslint-plugin-prettier": "^3.1.0", 16 | "jest": "^24.9.0", 17 | "katex": "^0.11.1", 18 | "prettier": "^2.3.2", 19 | "prosemirror-model": "^1.14.3", 20 | "prosemirror-tables": "^0.9.5", 21 | "ts-jest": "^24.0.2", 22 | "ts-node": "^8.6.2", 23 | "tsc-alias": "^1.3.9", 24 | "typescript": "^4.3.5", 25 | "yargs": "^14.0.0" 26 | }, 27 | "scripts": { 28 | "test": "jest", 29 | "build": "tsc && tsc-alias", 30 | "build:watch": "tsc -w & tsc-alias -w", 31 | "lint": "eslint src/**/*.ts", 32 | "prepublishOnly": "npm run lint && npm test && npm run build", 33 | "convert": "ts-node src/example/convert.ts", 34 | "parse": "ts-node src/example/parse.ts" 35 | }, 36 | "repository": { 37 | "type": "git", 38 | "url": "git+https://github.com/pubpub/prosemirror-pandoc.git" 39 | }, 40 | "author": "Ian Reynolds", 41 | "license": "MIT", 42 | "bugs": { 43 | "url": "https://github.com/pubpub/prosemirror-pandoc/issues" 44 | }, 45 | "homepage": "https://github.com/pubpub/prosemirror-pandoc#readme", 46 | "prettier": { 47 | "tabWidth": 4, 48 | "useTabs": false, 49 | "trailingComma": "es5" 50 | }, 51 | "dependencies": {} 52 | } 53 | -------------------------------------------------------------------------------- /src/__tests__/__snapshots__/parse.test.ts.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`parseBlock parses an OrderedList 1`] = ` 4 | Object { 5 | "content": Array [ 6 | Array [ 7 | Object { 8 | "content": Array [ 9 | Object { 10 | "content": "One!", 11 | "type": "Str", 12 | }, 13 | ], 14 | "type": "Para", 15 | }, 16 | ], 17 | Array [ 18 | Object { 19 | "content": Array [ 20 | Object { 21 | "content": "Two!", 22 | "type": "Str", 23 | }, 24 | ], 25 | "type": "Para", 26 | }, 27 | ], 28 | Array [ 29 | Object { 30 | "content": Array [ 31 | Object { 32 | "content": "Three!", 33 | "type": "Str", 34 | }, 35 | ], 36 | "type": "Para", 37 | }, 38 | ], 39 | ], 40 | "listAttributes": Object { 41 | "listNumberDelim": "Period", 42 | "listNumberStyle": "Decimal", 43 | "startNumber": 3, 44 | }, 45 | "type": "OrderedList", 46 | } 47 | `; 48 | 49 | exports[`parseInline parses a Link 1`] = ` 50 | Object { 51 | "attr": Object { 52 | "classes": Array [ 53 | "these", 54 | "are", 55 | "classes", 56 | ], 57 | "identifier": "this-is-an-identifier", 58 | "properties": Object { 59 | "key1": "val1", 60 | "key2": "val2", 61 | }, 62 | }, 63 | "content": Array [ 64 | Object { 65 | "content": Array [ 66 | Object { 67 | "content": "It's", 68 | "type": "Str", 69 | }, 70 | Object { 71 | "type": "Space", 72 | }, 73 | Object { 74 | "content": "a", 75 | "type": "Str", 76 | }, 77 | Object { 78 | "type": "Space", 79 | }, 80 | Object { 81 | "content": "link!", 82 | "type": "Str", 83 | }, 84 | ], 85 | "type": "Strong", 86 | }, 87 | ], 88 | "target": Object { 89 | "title": "This is a title??", 90 | "url": "https://url.com", 91 | }, 92 | "type": "Link", 93 | } 94 | `; 95 | -------------------------------------------------------------------------------- /src/__tests__/parse.test.ts: -------------------------------------------------------------------------------- 1 | /* global describe, it, expect */ 2 | import { parseInline, parseBlock } from "../parse"; 3 | 4 | describe("parseInline", () => { 5 | it("parses a Str", () => { 6 | expect(parseInline({ t: "Str", c: "Hello!" })).toEqual({ 7 | type: "Str", 8 | content: "Hello!", 9 | }); 10 | }); 11 | 12 | it("parses a simple inline elements", () => { 13 | [ 14 | "Emph", 15 | "Strong", 16 | "Strikeout", 17 | "Superscript", 18 | "Subscript", 19 | "SmallCaps", 20 | ].forEach((type) => { 21 | expect( 22 | // @ts-ignore 23 | parseInline({ t: type, c: [{ t: "Str", c: "Testing!" }] }) 24 | ).toEqual({ 25 | type, 26 | content: [{ type: "Str", content: "Testing!" }], 27 | }); 28 | }); 29 | }); 30 | 31 | it("parses an atom element", () => { 32 | // @ts-ignore 33 | expect(parseInline({ t: "LineBreak" })).toEqual({ type: "LineBreak" }); 34 | }); 35 | 36 | it("parses a Link", () => { 37 | expect( 38 | parseInline({ 39 | t: "Link", 40 | c: [ 41 | [ 42 | "this-is-an-identifier", 43 | ["these", "are", "classes"], 44 | [ 45 | ["key1", "val1"], 46 | ["key2", "val2"], 47 | ], 48 | ], 49 | [ 50 | { 51 | t: "Strong", 52 | c: [ 53 | { t: "Str", c: "It's" }, 54 | { t: "Space" }, 55 | { t: "Str", c: "a" }, 56 | { t: "Space" }, 57 | { t: "Str", c: "link!" }, 58 | ], 59 | }, 60 | ], 61 | ["https://url.com", "This is a title??"], 62 | ], 63 | }) 64 | ).toMatchSnapshot(); 65 | }); 66 | }); 67 | 68 | describe("parseBlock", () => { 69 | it("parses a Para with some stuff in it", () => { 70 | expect( 71 | parseBlock({ 72 | t: "Para", 73 | c: [ 74 | { t: "Strong", c: [{ t: "Str", c: "Hello," }] }, 75 | { t: "Space" }, 76 | { t: "Str", c: "world!" }, 77 | ], 78 | }) 79 | ).toEqual({ 80 | type: "Para", 81 | content: [ 82 | { 83 | type: "Strong", 84 | content: [{ type: "Str", content: "Hello," }], 85 | }, 86 | { type: "Space" }, 87 | { type: "Str", content: "world!" }, 88 | ], 89 | }); 90 | }); 91 | 92 | it("parses an OrderedList", () => { 93 | expect( 94 | parseBlock({ 95 | t: "OrderedList", 96 | c: [ 97 | [3, { t: "Decimal" }, { t: "Period" }], 98 | [ 99 | [ 100 | { 101 | t: "Para", 102 | c: [{ t: "Str", c: "One!" }], 103 | }, 104 | ], 105 | [ 106 | { 107 | t: "Para", 108 | c: [{ t: "Str", c: "Two!" }], 109 | }, 110 | ], 111 | [ 112 | { 113 | t: "Para", 114 | c: [{ t: "Str", c: "Three!" }], 115 | }, 116 | ], 117 | ], 118 | ], 119 | }) 120 | ).toMatchSnapshot(); 121 | }); 122 | }); 123 | -------------------------------------------------------------------------------- /src/__tests__/roundtrip.test.ts: -------------------------------------------------------------------------------- 1 | import { callPandoc } from "../util"; 2 | import { parsePandocJson } from "../parse"; 3 | import { emitPandocJson } from "../emit"; 4 | 5 | /* global describe, it, expect */ 6 | 7 | const testRoundtrip = (str: string, format: string = "markdown") => { 8 | const json = JSON.parse(callPandoc(str, format, "json")); 9 | const pandocAst = parsePandocJson(json); 10 | const emittedJson = emitPandocJson(pandocAst); 11 | expect(json).toEqual(emittedJson); 12 | }; 13 | 14 | const simpleTest = ` 15 | 16 | # Test document 17 | 18 | This is my _test document!_ You're gonna love it. 19 | 20 | ## A section 21 | 22 | Here is a section. *This is bold, I think?* 23 | [This is a link](https://pubpub.org) for more information. 24 | 25 | ## Another section 26 | 27 | ![Aha, this is an image](https://knowyourapples.com/jazz.jpg) 28 | 29 | 1. This is a numbered list 30 | 2. This is a second entry 31 | - Oh no 32 | - We're going deeper 33 | - And deeper still 34 | - Please stop 35 | 3. Okay, that's better [^1] 36 | 37 | > This is a quote from \`someone\` famous 38 | 39 | | This is also that? [^2] 40 | 41 | \`\`\` 42 | this_is_a_code_block(); 43 | \`\`\` 44 | 45 | And now, a table of information: 46 | 47 | | Apple | Rating | 48 | |---------------|---------------------------------| 49 | | Red Delicious | Terrible | 50 | | Granny Smith | Bad, except for *pie*! | 51 | | Jazz | It's fine | 52 | | Macintosh | Hey, not bad! | 53 | | Honeycrisp | That's the hometown apple, baby | 54 | 55 | 56 | That is the end of our document. 57 | 58 | [^1]: It's actually still not very good. 59 | [^2]: Maybe. Lol. 60 | `; 61 | 62 | describe("parse/emit roundtrip", () => { 63 | it("handles a simple document", () => { 64 | testRoundtrip(simpleTest); 65 | }); 66 | }); 67 | -------------------------------------------------------------------------------- /src/__tests__/version.test.ts: -------------------------------------------------------------------------------- 1 | /* global describe, it, expect */ 2 | import { callPandoc } from "../util"; 3 | import { emitPandocJson } from "../emit"; 4 | import { PANDOC_API_VERSION, setPandocApiVersion } from "../config"; 5 | 6 | describe("PANDOC_API_VERSION", () => { 7 | it("Matches the version produced by the Pandoc executable (update it if not!)", () => { 8 | const testJson = JSON.parse(callPandoc("", "html", "json")); 9 | expect(testJson["pandoc-api-version"]).toEqual(PANDOC_API_VERSION); 10 | }); 11 | }); 12 | 13 | describe("setPandocApiVersion", () => { 14 | it("Sets the Pandoc API version specified in emitted JSON", () => { 15 | const newPandocApiVersion = [2, 30]; 16 | setPandocApiVersion(newPandocApiVersion); 17 | expect(PANDOC_API_VERSION).toEqual(newPandocApiVersion); 18 | const testJson = emitPandocJson({ type: "Doc", blocks: [], meta: {} }); 19 | expect(testJson["pandoc-api-version"]).toEqual(newPandocApiVersion); 20 | }); 21 | }); 22 | -------------------------------------------------------------------------------- /src/config.ts: -------------------------------------------------------------------------------- 1 | export let PANDOC_API_VERSION = [1, 22, 2]; 2 | 3 | export const setPandocApiVersion = (version) => { 4 | PANDOC_API_VERSION = version; 5 | }; 6 | -------------------------------------------------------------------------------- /src/emit.ts: -------------------------------------------------------------------------------- 1 | import { PANDOC_API_VERSION } from "./config"; 2 | import { 3 | Alignment, 4 | Attr, 5 | Block, 6 | BlockQuote, 7 | BulletList, 8 | Caption, 9 | Cell, 10 | CitationMode, 11 | Cite, 12 | Code, 13 | CodeBlock, 14 | ColSpec, 15 | DefinitionList, 16 | Div, 17 | Doc, 18 | Format, 19 | Header, 20 | Image, 21 | Inline, 22 | LineBlock, 23 | Link, 24 | ListAttributes, 25 | ListNumberDelim, 26 | ListNumberStyle, 27 | Math, 28 | MathType, 29 | MetaBlocks, 30 | MetaBool, 31 | MetaInlines, 32 | MetaList, 33 | MetaMap, 34 | MetaString, 35 | MetaValue, 36 | Note, 37 | OrderedList, 38 | PandocJson, 39 | PandocNode, 40 | Para, 41 | Plain, 42 | Quoted, 43 | QuoteType, 44 | RawBlock, 45 | RawInline, 46 | Row, 47 | SimpleInline, 48 | Span, 49 | Str, 50 | Table, 51 | TableBody, 52 | TableFoot, 53 | TableHead, 54 | Target, 55 | } from "./types"; 56 | 57 | const wrapEnum = (instance: T): { t: T } => { 58 | return { t: instance }; 59 | }; 60 | 61 | const wrapAttr = (attr: Attr) => { 62 | const { identifier, classes, properties } = attr; 63 | return [ 64 | identifier ?? "", 65 | classes ?? [], 66 | properties ? Object.entries(properties) : [], 67 | ]; 68 | }; 69 | 70 | const wrapTarget = (target: Target) => { 71 | const { url, title } = target; 72 | return [url, title]; 73 | }; 74 | 75 | const wrapFormat = (format: Format) => { 76 | return format; 77 | }; 78 | 79 | const wrapListAttributes = (listAttributes: ListAttributes) => { 80 | const { 81 | startNumber = 1, 82 | listNumberStyle, 83 | listNumberDelim, 84 | } = listAttributes; 85 | return [ 86 | startNumber, 87 | wrapEnum(listNumberStyle), 88 | wrapEnum(listNumberDelim), 89 | ]; 90 | }; 91 | 92 | const emitAtom = (n: PandocNode) => { 93 | return { t: n.type }; 94 | }; 95 | 96 | const emitStr = (str: Str) => { 97 | const { content } = str; 98 | return { 99 | t: "Str", 100 | c: content, 101 | }; 102 | }; 103 | 104 | const emitSimpleInline = (node: SimpleInline) => { 105 | const { type, content } = node; 106 | return { 107 | t: type, 108 | c: content.map(emitInline), 109 | }; 110 | }; 111 | 112 | const emitQuoted = (quoted: Quoted) => { 113 | const { quoteType, content } = quoted; 114 | return { 115 | t: "Quoted", 116 | c: [wrapEnum(quoteType), content.map(emitInline)], 117 | }; 118 | }; 119 | 120 | const emitCite = (cite: Cite) => { 121 | const { citations, content } = cite; 122 | return { 123 | t: "Cite", 124 | c: [ 125 | citations.map((citation) => { 126 | const { 127 | citationHash, 128 | citationId, 129 | citationMode, 130 | citationNoteNum, 131 | citationPrefix, 132 | citationSuffix, 133 | } = citation; 134 | return { 135 | citationHash, 136 | citationId, 137 | citationMode: wrapEnum(citationMode), 138 | citationNoteNum, 139 | citationPrefix: citationPrefix.map(emitInline), 140 | citationSuffix: citationSuffix.map(emitInline), 141 | }; 142 | }), 143 | content.map(emitInline), 144 | ], 145 | }; 146 | }; 147 | 148 | const emitCode = (code: Code) => { 149 | const { attr, content } = code; 150 | return { 151 | t: "Code", 152 | c: [wrapAttr(attr), content], 153 | }; 154 | }; 155 | 156 | const emitMath = (math: Math) => { 157 | const { mathType, content } = math; 158 | return { 159 | t: "Math", 160 | c: [wrapEnum(mathType), content], 161 | }; 162 | }; 163 | 164 | const emitRawInline = (rawInline: RawInline) => { 165 | const { format, content } = rawInline; 166 | return { 167 | t: "RawInline", 168 | c: [wrapFormat(format), content], 169 | }; 170 | }; 171 | 172 | const emitImage = (image: Image) => { 173 | const { attr, content, target } = image; 174 | return { 175 | t: "Image", 176 | c: [wrapAttr(attr), content.map(emitInline), wrapTarget(target)], 177 | }; 178 | }; 179 | 180 | const emitLink = (link: Link) => { 181 | const { attr, content, target } = link; 182 | return { 183 | t: "Link", 184 | c: [wrapAttr(attr), content.map(emitInline), wrapTarget(target)], 185 | }; 186 | }; 187 | 188 | const emitNote = (note: Note) => { 189 | const { content } = note; 190 | return { 191 | t: "Note", 192 | c: content.map(emitBlock), 193 | }; 194 | }; 195 | 196 | const emitSpan = (span: Span) => { 197 | const { attr, content } = span; 198 | return { 199 | t: "Span", 200 | c: [wrapAttr(attr), content.map(emitInline)], 201 | }; 202 | }; 203 | 204 | export const emitInline = (n: Inline): { t: string; c?: string | any[] } => { 205 | switch (n.type) { 206 | case "Str": 207 | return emitStr(n); 208 | case "Emph": 209 | case "Strong": 210 | case "Underline": 211 | case "Strikeout": 212 | case "Superscript": 213 | case "Subscript": 214 | case "SmallCaps": 215 | return emitSimpleInline(n); 216 | case "Quoted": 217 | return emitQuoted(n); 218 | case "Cite": 219 | return emitCite(n); 220 | case "Code": 221 | return emitCode(n); 222 | case "Space": 223 | case "SoftBreak": 224 | case "LineBreak": 225 | return emitAtom(n); 226 | case "Math": 227 | return emitMath(n); 228 | case "RawInline": 229 | return emitRawInline(n); 230 | case "Link": 231 | return emitLink(n); 232 | case "Image": 233 | return emitImage(n); 234 | case "Note": 235 | return emitNote(n); 236 | case "Span": 237 | return emitSpan(n); 238 | } 239 | }; 240 | 241 | const emitPlain = (plain: Plain) => { 242 | const { content } = plain; 243 | return { 244 | t: "Plain", 245 | c: content.map(emitInline), 246 | }; 247 | }; 248 | 249 | const emitPara = (para: Para) => { 250 | const { content } = para; 251 | return { 252 | t: "Para", 253 | c: content.map(emitInline), 254 | }; 255 | }; 256 | 257 | const emitLineBlock = (lineBlock: LineBlock) => { 258 | const { content } = lineBlock; 259 | return { 260 | t: "LineBlock", 261 | c: content.map((line) => line.map(emitInline)), 262 | }; 263 | }; 264 | 265 | const emitCodeBlock = (codeBlock: CodeBlock) => { 266 | const { attr, content } = codeBlock; 267 | return { 268 | t: "CodeBlock", 269 | c: [wrapAttr(attr), content], 270 | }; 271 | }; 272 | 273 | const emitRawBlock = (rawBlock: RawBlock) => { 274 | const { format, content } = rawBlock; 275 | return { 276 | t: "RawBlock", 277 | c: [wrapFormat(format), content], 278 | }; 279 | }; 280 | 281 | const emitBlockQuote = (blockQuote: BlockQuote) => { 282 | const { content } = blockQuote; 283 | return { 284 | t: "BlockQuote", 285 | c: content.map(emitBlock), 286 | }; 287 | }; 288 | 289 | const emitOrderedList = (orderedList: OrderedList) => { 290 | const { content, listAttributes } = orderedList; 291 | return { 292 | t: "OrderedList", 293 | c: [ 294 | wrapListAttributes(listAttributes), 295 | content.map((entry) => entry.map(emitBlock)), 296 | ], 297 | }; 298 | }; 299 | 300 | const emitBulletList = (bulletList: BulletList) => { 301 | const { content } = bulletList; 302 | return { 303 | t: "BulletList", 304 | c: content.map((entry) => entry.map(emitBlock)), 305 | }; 306 | }; 307 | 308 | const emitDefinitionList = (definitionList: DefinitionList) => { 309 | const { entries } = definitionList; 310 | return { 311 | t: "DefinitionList", 312 | c: [ 313 | entries.map((entry) => { 314 | const { term, definitions } = entry; 315 | return [ 316 | term.map(emitInline), 317 | definitions.map((definition) => definition.map(emitBlock)), 318 | ]; 319 | }), 320 | ], 321 | }; 322 | }; 323 | 324 | const emitHeader = (header: Header) => { 325 | const { level, attr, content } = header; 326 | return { 327 | t: "Header", 328 | c: [level, wrapAttr(attr), content.map(emitInline)], 329 | }; 330 | }; 331 | 332 | const emitDiv = (div: Div) => { 333 | const { attr, content } = div; 334 | return { 335 | t: "Div", 336 | c: [wrapAttr(attr), content.map(emitBlock)], 337 | }; 338 | }; 339 | 340 | const emitCell = (cell: Cell) => { 341 | const { attr, alignment, rowSpan, colSpan, content } = cell; 342 | return [ 343 | wrapAttr(attr), 344 | wrapEnum(alignment), 345 | rowSpan, 346 | colSpan, 347 | content.map(emitBlock), 348 | ]; 349 | }; 350 | 351 | const emitRow = (row: Row) => { 352 | const { attr, cells } = row; 353 | return [wrapAttr(attr), cells.map(emitCell)]; 354 | }; 355 | 356 | const emitTableHead = (head: TableHead) => { 357 | const { attr, rows } = head; 358 | return [wrapAttr(attr), rows.map(emitRow)]; 359 | }; 360 | 361 | const emitTableFoot = (foot: TableFoot) => { 362 | const { attr, rows } = foot; 363 | return [wrapAttr(attr), rows.map(emitRow)]; 364 | }; 365 | 366 | const emitTableBody = (body: TableBody) => { 367 | const { attr, rowHeadColumns, headRows, bodyRows } = body; 368 | return [ 369 | wrapAttr(attr), 370 | rowHeadColumns, 371 | headRows.map(emitRow), 372 | bodyRows.map(emitRow), 373 | ]; 374 | }; 375 | 376 | const emitColSpec = (colSpec: ColSpec) => { 377 | const { alignment } = colSpec; 378 | return [ 379 | wrapEnum(alignment), 380 | "defaultWidth" in colSpec 381 | ? { t: "ColWidthDefault" } 382 | : { t: "ColWidth", c: colSpec.width }, 383 | ]; 384 | }; 385 | 386 | const emitCaption = (caption: Caption) => { 387 | const { shortCaption, content } = caption; 388 | return [ 389 | shortCaption ? shortCaption.map(emitInline) : null, 390 | content.map(emitBlock), 391 | ]; 392 | }; 393 | 394 | const emitTable = (table: Table) => { 395 | const { attr, caption, colSpecs, head, bodies, foot } = table; 396 | return { 397 | t: "Table", 398 | c: [ 399 | wrapAttr(attr), 400 | emitCaption(caption), 401 | colSpecs.map(emitColSpec), 402 | emitTableHead(head), 403 | bodies.map(emitTableBody), 404 | emitTableFoot(foot), 405 | ], 406 | }; 407 | }; 408 | 409 | export const emitBlock = (n: Block): { t: string; c?: any[] } => { 410 | switch (n.type) { 411 | case "Plain": 412 | return emitPlain(n); 413 | case "Para": 414 | return emitPara(n); 415 | case "LineBlock": 416 | return emitLineBlock(n); 417 | case "CodeBlock": 418 | return emitCodeBlock(n); 419 | case "RawBlock": 420 | return emitRawBlock(n); 421 | case "BlockQuote": 422 | return emitBlockQuote(n); 423 | case "OrderedList": 424 | return emitOrderedList(n); 425 | case "BulletList": 426 | return emitBulletList(n); 427 | case "DefinitionList": 428 | return emitDefinitionList(n); 429 | case "Header": 430 | return emitHeader(n); 431 | case "HorizontalRule": 432 | case "Null": 433 | return emitAtom(n); 434 | case "Div": 435 | return emitDiv(n); 436 | case "Table": 437 | return emitTable(n); 438 | } 439 | }; 440 | 441 | const emitMetaMap = (n: MetaMap) => { 442 | const mappedValues: Record = {}; 443 | Object.entries(n.values).forEach(([key, value]) => { 444 | mappedValues[key] = emitMetaValue(value); 445 | }); 446 | return { 447 | t: "MetaMap", 448 | c: mappedValues, 449 | }; 450 | }; 451 | 452 | const emitMetaBlocks = (n: MetaBlocks) => { 453 | return { 454 | t: "MetaBlocks", 455 | c: n.content.map((block) => emitBlock(block)), 456 | }; 457 | }; 458 | 459 | const emitMetaInlines = (n: MetaInlines) => { 460 | return { 461 | t: "MetaInlines", 462 | c: n.content.map((inline) => emitInline(inline)), 463 | }; 464 | }; 465 | 466 | const emitMetaList = (n: MetaList) => { 467 | return { 468 | t: "MetaList", 469 | c: n.content.map((item) => emitMetaValue(item)), 470 | }; 471 | }; 472 | 473 | const emitMetaString = (n: MetaString) => { 474 | return { 475 | t: "MetaString", 476 | c: n.content, 477 | }; 478 | }; 479 | 480 | const emitMetaBool = (n: MetaBool) => { 481 | return { 482 | t: "MetaBool", 483 | c: n.content, 484 | }; 485 | }; 486 | 487 | const emitMetaValue = (n: MetaValue) => { 488 | switch (n.type) { 489 | case "MetaMap": 490 | return emitMetaMap(n); 491 | case "MetaList": 492 | return emitMetaList(n); 493 | case "MetaBool": 494 | return emitMetaBool(n); 495 | case "MetaString": 496 | return emitMetaString(n); 497 | case "MetaInlines": 498 | return emitMetaInlines(n); 499 | case "MetaBlocks": 500 | return emitMetaBlocks(n); 501 | } 502 | }; 503 | 504 | const emitMeta = (n: Doc["meta"]) => { 505 | const res: Record = {}; 506 | Object.entries(n).forEach(([key, value]) => { 507 | res[key] = emitMetaValue(value); 508 | }); 509 | return res; 510 | }; 511 | 512 | export const emitPandocJson = (doc: Doc): PandocJson => { 513 | const { blocks, meta } = doc; 514 | return { 515 | "pandoc-api-version": PANDOC_API_VERSION, 516 | blocks: blocks.map(emitBlock), 517 | meta: emitMeta(meta), 518 | }; 519 | }; 520 | -------------------------------------------------------------------------------- /src/example/convert.ts: -------------------------------------------------------------------------------- 1 | import { argv } from "yargs"; 2 | 3 | import { loadAndTransformFromPandoc } from "../util"; 4 | 5 | import { rules } from "./rules"; 6 | 7 | const main = async () => { 8 | const { 9 | _: [filePath], 10 | } = argv; 11 | console.log( 12 | JSON.stringify(loadAndTransformFromPandoc(filePath as string, rules)) 13 | ); 14 | }; 15 | 16 | main().catch((e) => console.error(e)); 17 | -------------------------------------------------------------------------------- /src/example/parse.ts: -------------------------------------------------------------------------------- 1 | import fs from "fs"; 2 | import { argv } from "yargs"; 3 | import { parsePandocJson } from "../parse"; 4 | 5 | const main = async () => { 6 | const { 7 | _: [filePath], 8 | } = argv; 9 | const fileJson = JSON.parse(fs.readFileSync(filePath).toString()); 10 | const parsed = parsePandocJson(fileJson); 11 | console.log(JSON.stringify(parsed)); 12 | }; 13 | 14 | main().catch((e) => console.error(e)); 15 | -------------------------------------------------------------------------------- /src/example/rules.ts: -------------------------------------------------------------------------------- 1 | import * as katex from "katex"; 2 | 3 | import { Inline, Para, Plain } from "types"; 4 | import { 5 | bareMarkTransformer, 6 | docTransformer, 7 | nullTransformer, 8 | bareContentTransformer, 9 | pandocPassThroughTransformer, 10 | createListTransformer, 11 | definitionListTransformer, 12 | bareLeafTransformer, 13 | pandocQuotedTransformer, 14 | pandocTableTransformer, 15 | prosemirrorTableTransformer, 16 | } from "transform/transformers"; 17 | import { 18 | createAttr, 19 | flatten, 20 | intersperse, 21 | textFromStrSpace, 22 | textToStrSpace, 23 | } from "transform/util"; 24 | import { RuleSet } from "transform/ruleset"; 25 | 26 | import { prosemirrorSchema } from "./schema"; 27 | import { 28 | getPandocDocForHtmlString, 29 | htmlStringToPandocBlocks, 30 | htmlStringToPandocInline, 31 | pandocBlocksToHtmlString, 32 | pandocInlineToHtmlString, 33 | pandocInlineToPlainString, 34 | } from "../pandocUtils"; 35 | 36 | const rules = new RuleSet(prosemirrorSchema); 37 | 38 | // Top-level transformer for a doc 39 | rules.transform("Doc", "doc", docTransformer); 40 | 41 | // Do nothing with nothing 42 | rules.toProsemirrorNode("Null", nullTransformer); 43 | 44 | // Paragraphs are paragraphs. So are "Plain", until proven otherwise. 45 | rules.transform( 46 | "Para | Plain", 47 | "paragraph", 48 | bareContentTransformer("Para", "paragraph") 49 | ); 50 | 51 | // Divs are just boxes of other content 52 | rules.toProsemirrorNode("Div", pandocPassThroughTransformer); 53 | 54 | // I'm not really sure what a LineBlock is, but let's just call it a single paragraph 55 | // with some hard breaks thrown in. 56 | rules.toProsemirrorNode("LineBlock", (node, { transform }) => { 57 | const lines = node.content.map((line) => transform(line).asArray()); 58 | return { 59 | type: "paragraph", 60 | content: flatten( 61 | intersperse(lines, () => ({ 62 | type: "hard_break", 63 | })) 64 | ), 65 | }; 66 | }); 67 | 68 | rules.transform("CodeBlock", "code_block", { 69 | toProsemirrorNode: (node) => { 70 | return { 71 | type: "code_block", 72 | content: [{ type: "text", text: node.content }], 73 | }; 74 | }, 75 | fromProsemirrorNode: (node) => { 76 | return { 77 | type: "CodeBlock", 78 | content: node.content.map((text) => text.text).join(""), 79 | attr: createAttr(""), 80 | }; 81 | }, 82 | }); 83 | 84 | rules.transform("BlockQuote", "blockquote", bareContentTransformer); 85 | 86 | // Use a listTransformer to take care of OrderedList and BulletList 87 | const ensureFirstElementIsParagraph = (listItem) => { 88 | if ( 89 | listItem.content.length === 0 || 90 | listItem.content[0].type !== "paragraph" 91 | ) { 92 | listItem.content.unshift({ type: "paragraph", content: [] }); 93 | } 94 | return listItem; 95 | }; 96 | 97 | rules.transform( 98 | "OrderedList", 99 | "ordered_list", 100 | createListTransformer("list_item", ensureFirstElementIsParagraph) 101 | ); 102 | 103 | rules.transform( 104 | "BulletList", 105 | "bullet_list", 106 | createListTransformer("list_item", ensureFirstElementIsParagraph) 107 | ); 108 | 109 | rules.toProsemirrorNode( 110 | "DefinitionList", 111 | definitionListTransformer("bullet_list", "list_item") 112 | ); 113 | 114 | // Tranform headers 115 | rules.transform("Header", "heading", { 116 | toProsemirrorNode: (node, { transform }) => { 117 | return { 118 | type: "heading", 119 | attrs: { 120 | level: node.level, 121 | id: node.attr.identifier, 122 | }, 123 | content: transform(node.content).asArray(), 124 | }; 125 | }, 126 | fromProsemirrorNode: (node, { transform }) => { 127 | return { 128 | type: "Header", 129 | level: parseInt(node.attrs.level.toString()), 130 | attr: createAttr(node.attrs.id.toString()), 131 | content: transform(node.content).asArray() as Inline[], 132 | }; 133 | }, 134 | }); 135 | 136 | rules.transform("HorizontalRule", "horizontal_rule", bareLeafTransformer); 137 | 138 | const bareMarkTransformPairs = [ 139 | ["Strong", "strong"], 140 | ["Emph", "em"], 141 | ["Strikeout", "strike"], 142 | ["Superscript", "sup"], 143 | ["Subscript", "sub"], 144 | ["Code", "code"], 145 | ] as const; 146 | 147 | bareMarkTransformPairs.forEach(([from, to]) => 148 | rules.transform(from, to, bareMarkTransformer) 149 | ); 150 | 151 | rules.transform("Link", "link", { 152 | toProsemirrorMark: (link) => { 153 | return { 154 | type: "link", 155 | attrs: { 156 | href: link.target.url, 157 | title: link.target.title, 158 | }, 159 | }; 160 | }, 161 | fromProsemirrorMark: (link, content) => { 162 | return { 163 | type: "Link", 164 | attr: createAttr(), 165 | content: content, 166 | target: { 167 | url: link.attrs.href.toString(), 168 | title: link.attrs.title.toString(), 169 | }, 170 | }; 171 | }, 172 | }); 173 | 174 | // We don't support small caps right now 175 | rules.toProsemirrorNode("SmallCaps", pandocPassThroughTransformer); 176 | 177 | // Tell the transformer how to deal with typical content-level nodes 178 | rules.toProsemirrorNode("(Str | Space)+", (nodes) => { 179 | return { 180 | type: "text", 181 | text: textFromStrSpace(nodes), 182 | }; 183 | }); 184 | 185 | // Tell the transformer how to turn Prosemirror text back into Pandoc 186 | rules.fromProsemirrorNode("text", (node) => textToStrSpace(node.text)); 187 | 188 | // Deal with line breaks 189 | rules.transform("LineBreak", "hard_break", bareLeafTransformer); 190 | rules.toProsemirrorNode("SoftBreak", nullTransformer); 191 | 192 | // Stuff we don't have equivalents for 193 | rules.toProsemirrorNode("Span", pandocPassThroughTransformer); 194 | rules.toProsemirrorNode("Underline", pandocPassThroughTransformer); 195 | 196 | // Anything in quotation marks is its own node, to Pandoc 197 | rules.toProsemirrorNode("Quoted", pandocQuotedTransformer); 198 | 199 | rules.toProsemirrorNode("RawBlock", (node) => { 200 | return { 201 | type: "paragraph", 202 | content: [{ type: "text", text: node.content }], 203 | }; 204 | }); 205 | 206 | rules.toProsemirrorNode("RawInline", (node) => { 207 | const { format, content } = node; 208 | if (format === "tex") { 209 | return { 210 | type: "equation", 211 | attrs: { 212 | value: content, 213 | html: katex.renderToString(content, { 214 | displayMode: false, 215 | throwOnError: false, 216 | }), 217 | }, 218 | }; 219 | } 220 | return { type: "text", text: content }; 221 | }); 222 | 223 | // These next rules for images don't use transform() because they're not inverses of each other -- 224 | // the Prosemirror->Pandoc direction wraps an Image in a Para to make it block-level 225 | 226 | rules.toProsemirrorNode("Image", (node, { resources }) => { 227 | return { 228 | type: "image", 229 | attrs: { 230 | url: resources.image(node.target.url), 231 | altText: pandocInlineToPlainString(node.content), 232 | // TODO(ian): is there anything we can do about the image size here? 233 | }, 234 | }; 235 | }); 236 | 237 | rules.fromProsemirrorNode("image", (node) => { 238 | const maybeAltTextDoc = getPandocDocForHtmlString( 239 | node.attrs.altText as string 240 | ); 241 | const altTextInlines = (maybeAltTextDoc.blocks[0] as Para)?.content ?? []; 242 | const captionBlocks = htmlStringToPandocBlocks( 243 | node.attrs.caption as string 244 | ); 245 | const imageWrappedInPlain: Plain = { 246 | type: "Plain", 247 | content: [ 248 | { 249 | type: "Image", 250 | content: altTextInlines, 251 | target: { 252 | url: node.attrs.url.toString(), 253 | title: "", 254 | }, 255 | attr: createAttr(""), 256 | }, 257 | ], 258 | }; 259 | if (captionBlocks.length > 0) { 260 | return [imageWrappedInPlain, ...captionBlocks]; 261 | } 262 | return imageWrappedInPlain; 263 | }); 264 | 265 | rules.transform("Cite", "citation", { 266 | toProsemirrorNode: (node, { count }) => { 267 | const { content } = node; 268 | const unstructuredValue = pandocInlineToHtmlString(content); 269 | return { 270 | type: "citation", 271 | attrs: { 272 | unstructuredValue, 273 | count: 1 + count("Cite"), 274 | }, 275 | }; 276 | }, 277 | fromProsemirrorNode: (node) => { 278 | const inputHtml = (node.attrs.html || 279 | node.attrs.unstructuredValue) as string; 280 | const citationNumber = 281 | typeof node.attrs.count === "number" 282 | ? node.attrs.count 283 | : parseInt(node.attrs.count as string); 284 | return { 285 | type: "Cite", 286 | content: htmlStringToPandocInline(inputHtml), 287 | citations: [ 288 | { 289 | citationId: "", 290 | citationPrefix: [], 291 | citationSuffix: [], 292 | citationNoteNum: citationNumber, 293 | citationHash: citationNumber, 294 | citationMode: "NormalCitation", 295 | }, 296 | ], 297 | }; 298 | }, 299 | }); 300 | 301 | rules.transform("Note", "footnote", { 302 | toProsemirrorNode: (node, { count }) => { 303 | const { content } = node; 304 | return { 305 | type: "footnote", 306 | attrs: { 307 | unstructuredValue: pandocBlocksToHtmlString(content), 308 | count: 1 + count("Note"), 309 | }, 310 | }; 311 | }, 312 | fromProsemirrorNode: (node) => { 313 | const noteContent = (node.attrs.unstructuredValue || "") as string; 314 | return { 315 | type: "Note", 316 | content: htmlStringToPandocBlocks(noteContent), 317 | }; 318 | }, 319 | }); 320 | 321 | rules.toProsemirrorNode("Math", (node) => { 322 | const { mathType, content } = node; 323 | const isDisplay = mathType === "DisplayMath"; 324 | const prosemirrorType = isDisplay ? "block_equation" : "equation"; 325 | return { 326 | type: prosemirrorType, 327 | attrs: { 328 | value: content, 329 | html: katex.renderToString(content, { 330 | displayMode: isDisplay, 331 | throwOnError: false, 332 | }), 333 | }, 334 | }; 335 | }); 336 | 337 | rules.fromProsemirrorNode("equation", (node) => { 338 | return { 339 | type: "Math", 340 | mathType: "InlineMath", 341 | content: node.attrs.value.toString(), 342 | }; 343 | }); 344 | 345 | rules.fromProsemirrorNode("block_equation", (node) => { 346 | return { 347 | type: "Plain", 348 | content: [ 349 | { 350 | type: "Math", 351 | mathType: "DisplayMath", 352 | content: node.attrs.value.toString(), 353 | }, 354 | ], 355 | }; 356 | }); 357 | 358 | rules.toProsemirrorNode("Table", pandocTableTransformer); 359 | rules.fromProsemirrorNode("table", prosemirrorTableTransformer); 360 | 361 | rules.validate(); 362 | 363 | export { rules }; 364 | -------------------------------------------------------------------------------- /src/example/schema.ts: -------------------------------------------------------------------------------- 1 | import { Schema } from "prosemirror-model"; 2 | import { tableNodes } from "prosemirror-tables"; 3 | 4 | const nodes = { 5 | doc: { 6 | content: "block+", 7 | attrs: { 8 | meta: { default: {} }, 9 | }, 10 | }, 11 | paragraph: { 12 | content: "inline*", 13 | group: "block", 14 | attrs: { 15 | class: { default: null }, 16 | }, 17 | }, 18 | blockquote: { 19 | content: "block+", 20 | group: "block", 21 | }, 22 | horizontal_rule: { 23 | group: "block", 24 | }, 25 | heading: { 26 | attrs: { 27 | level: { default: 1 }, 28 | id: { default: "" }, 29 | }, 30 | content: "inline*", 31 | group: "block", 32 | defining: true, 33 | }, 34 | image: { 35 | atom: true, 36 | attrs: { 37 | url: { default: null }, 38 | size: { default: 50 }, // number as percentage 39 | align: { default: "center" }, 40 | caption: { default: "" }, 41 | altText: { default: "" }, 42 | }, 43 | inline: false, 44 | group: "block", 45 | }, 46 | ordered_list: { 47 | content: "list_item+", 48 | group: "block", 49 | attrs: { order: { default: 1 } }, 50 | }, 51 | bullet_list: { 52 | content: "list_item+", 53 | group: "block", 54 | }, 55 | list_item: { 56 | content: "paragraph block*", 57 | defining: true, 58 | }, 59 | code_block: { 60 | content: "text*", 61 | group: "block", 62 | }, 63 | text: { 64 | inline: true, 65 | group: "inline", 66 | }, 67 | hard_break: { 68 | inline: true, 69 | group: "inline", 70 | }, 71 | equation: { 72 | atom: true, 73 | inline: true, 74 | attrs: { 75 | value: { default: "" }, 76 | html: { default: "" }, 77 | }, 78 | group: "inline", 79 | }, 80 | block_equation: { 81 | atom: true, 82 | attrs: { 83 | value: { default: "" }, 84 | html: { default: "" }, 85 | }, 86 | inline: false, 87 | group: "block", 88 | }, 89 | citation: { 90 | atom: true, 91 | attrs: { 92 | value: { default: "" }, 93 | unstructuredValue: { default: "" }, 94 | count: { default: 0 }, 95 | }, 96 | inline: true, 97 | group: "inline", 98 | }, 99 | footnote: { 100 | atom: true, 101 | attrs: { 102 | value: { default: "" }, 103 | structuredValue: { default: "" }, 104 | count: { default: 0 }, 105 | }, 106 | inline: true, 107 | group: "inline", 108 | }, 109 | ...tableNodes({ 110 | tableGroup: "block", 111 | cellContent: "block+", 112 | cellAttributes: {}, 113 | }), 114 | }; 115 | 116 | const marks = { 117 | em: {}, 118 | strong: {}, 119 | link: { 120 | inclusive: false, 121 | attrs: { 122 | href: { default: "" }, 123 | title: { default: null }, 124 | target: { default: null }, 125 | }, 126 | }, 127 | sub: {}, 128 | sup: {}, 129 | strike: {}, 130 | code: {}, 131 | }; 132 | 133 | export const prosemirrorSchema = new Schema({ nodes, marks, topNode: "doc" }); 134 | -------------------------------------------------------------------------------- /src/expression/__tests__/expression.test.ts: -------------------------------------------------------------------------------- 1 | /* global describe, it, expect */ 2 | import { acceptItems, parseExpr } from "expression"; 3 | 4 | type Node = { 5 | type: string; 6 | }; 7 | 8 | const n = (type: string): Node => ({ type }); 9 | 10 | describe("parseRegexp", () => { 11 | it("handles a simple identifier", () => { 12 | expect(parseExpr("Foo")).toEqual({ 13 | type: "identifier", 14 | identifier: "Foo", 15 | }); 16 | }); 17 | 18 | it("handles a sequence of identifiers", () => { 19 | expect(parseExpr("Foo Bar Baz")).toEqual({ 20 | type: "sequence", 21 | children: [ 22 | { 23 | type: "identifier", 24 | identifier: "Foo", 25 | }, 26 | { 27 | type: "identifier", 28 | identifier: "Bar", 29 | }, 30 | { 31 | type: "identifier", 32 | identifier: "Baz", 33 | }, 34 | ], 35 | }); 36 | }); 37 | 38 | it("handles zero-or-more quantifiers (*)", () => { 39 | expect(parseExpr("Foo*")).toEqual({ 40 | type: "zeroOrMore", 41 | child: { 42 | type: "identifier", 43 | identifier: "Foo", 44 | }, 45 | }); 46 | }); 47 | 48 | it("handles zero-or-more quantifiers (*) in context", () => { 49 | expect(parseExpr("Foo Bar* Baz")).toEqual({ 50 | type: "sequence", 51 | children: [ 52 | { 53 | type: "identifier", 54 | identifier: "Foo", 55 | }, 56 | { 57 | type: "zeroOrMore", 58 | child: { 59 | type: "identifier", 60 | identifier: "Bar", 61 | }, 62 | }, 63 | { 64 | type: "identifier", 65 | identifier: "Baz", 66 | }, 67 | ], 68 | }); 69 | }); 70 | 71 | it("handles one-or-more quantifiers (+)", () => { 72 | expect(parseExpr("Foo+")).toEqual({ 73 | type: "oneOrMore", 74 | child: { 75 | type: "identifier", 76 | identifier: "Foo", 77 | }, 78 | }); 79 | }); 80 | 81 | it("handles one-or-more quantifiers (+) in context", () => { 82 | expect(parseExpr("Foo Bar Baz+")).toEqual({ 83 | type: "sequence", 84 | children: [ 85 | { 86 | type: "identifier", 87 | identifier: "Foo", 88 | }, 89 | { 90 | type: "identifier", 91 | identifier: "Bar", 92 | }, 93 | { 94 | type: "oneOrMore", 95 | child: { 96 | type: "identifier", 97 | identifier: "Baz", 98 | }, 99 | }, 100 | ], 101 | }); 102 | }); 103 | 104 | it("handles a range quantifier with a lower and upper bound", () => { 105 | expect(parseExpr("(Foo){3, 5}")).toEqual({ 106 | type: "range", 107 | lowerBound: 3, 108 | upperBound: 5, 109 | child: { 110 | type: "identifier", 111 | identifier: "Foo", 112 | }, 113 | }); 114 | }); 115 | 116 | it("handles a range quantifier with only a lower bound", () => { 117 | expect(parseExpr("(Foo){10,}")).toEqual({ 118 | type: "range", 119 | lowerBound: 10, 120 | upperBound: null, 121 | child: { 122 | type: "identifier", 123 | identifier: "Foo", 124 | }, 125 | }); 126 | }); 127 | 128 | it("handles an exact range quantifier", () => { 129 | expect(parseExpr("(Foo){99}")).toEqual({ 130 | type: "range", 131 | lowerBound: 99, 132 | upperBound: 99, 133 | child: { 134 | type: "identifier", 135 | identifier: "Foo", 136 | }, 137 | }); 138 | }); 139 | 140 | it("handles a choice of identifiers", () => { 141 | expect(parseExpr("Foo | Bar | Baz")).toEqual({ 142 | type: "choice", 143 | children: [ 144 | { 145 | type: "identifier", 146 | identifier: "Foo", 147 | }, 148 | { 149 | type: "identifier", 150 | identifier: "Bar", 151 | }, 152 | { 153 | type: "identifier", 154 | identifier: "Baz", 155 | }, 156 | ], 157 | }); 158 | }); 159 | 160 | it("throws an error when there is an ambiguous mix of sequence and choice markers", () => { 161 | expect(() => parseExpr("Foo | Bar Baz")).toThrow(); 162 | }); 163 | 164 | it("handles grouped choices and sequences", () => { 165 | expect(parseExpr("Foo (Bar | Baz)")).toEqual({ 166 | type: "sequence", 167 | children: [ 168 | { 169 | type: "identifier", 170 | identifier: "Foo", 171 | }, 172 | { 173 | type: "choice", 174 | children: [ 175 | { 176 | type: "identifier", 177 | identifier: "Bar", 178 | }, 179 | { 180 | type: "identifier", 181 | identifier: "Baz", 182 | }, 183 | ], 184 | }, 185 | ], 186 | }); 187 | }); 188 | 189 | it("handles grouped choices and sequences with quantifiers", () => { 190 | expect(parseExpr("(Foo Bar)+ (Bar | Baz)*")).toEqual({ 191 | type: "sequence", 192 | children: [ 193 | { 194 | type: "oneOrMore", 195 | child: { 196 | type: "sequence", 197 | children: [ 198 | { 199 | type: "identifier", 200 | identifier: "Foo", 201 | }, 202 | { 203 | type: "identifier", 204 | identifier: "Bar", 205 | }, 206 | ], 207 | }, 208 | }, 209 | { 210 | type: "zeroOrMore", 211 | child: { 212 | type: "choice", 213 | children: [ 214 | { 215 | type: "identifier", 216 | identifier: "Bar", 217 | }, 218 | { 219 | type: "identifier", 220 | identifier: "Baz", 221 | }, 222 | ], 223 | }, 224 | }, 225 | ], 226 | }); 227 | }); 228 | 229 | it("handles a very complicated expression", () => { 230 | expect( 231 | parseExpr( 232 | "(Foo Bar+ (Qux* | Baz){10}){3,5} (Bar* | (Baz{6,} Foo))*" 233 | ) 234 | ).toEqual({ 235 | type: "sequence", 236 | children: [ 237 | { 238 | type: "range", 239 | lowerBound: 3, 240 | upperBound: 5, 241 | child: { 242 | type: "sequence", 243 | children: [ 244 | { 245 | type: "identifier", 246 | identifier: "Foo", 247 | }, 248 | { 249 | type: "oneOrMore", 250 | child: { 251 | type: "identifier", 252 | identifier: "Bar", 253 | }, 254 | }, 255 | { 256 | type: "range", 257 | upperBound: 10, 258 | lowerBound: 10, 259 | child: { 260 | type: "choice", 261 | children: [ 262 | { 263 | type: "zeroOrMore", 264 | child: { 265 | type: "identifier", 266 | identifier: "Qux", 267 | }, 268 | }, 269 | { 270 | type: "identifier", 271 | identifier: "Baz", 272 | }, 273 | ], 274 | }, 275 | }, 276 | ], 277 | }, 278 | }, 279 | { 280 | type: "zeroOrMore", 281 | child: { 282 | type: "choice", 283 | children: [ 284 | { 285 | type: "zeroOrMore", 286 | child: { 287 | type: "identifier", 288 | identifier: "Bar", 289 | }, 290 | }, 291 | { 292 | type: "sequence", 293 | children: [ 294 | { 295 | type: "range", 296 | lowerBound: 6, 297 | upperBound: null, 298 | child: { 299 | type: "identifier", 300 | identifier: "Baz", 301 | }, 302 | }, 303 | 304 | { 305 | type: "identifier", 306 | identifier: "Foo", 307 | }, 308 | ], 309 | }, 310 | ], 311 | }, 312 | }, 313 | ], 314 | }); 315 | }); 316 | 317 | it("normalizes odd spacing and extra parens from an expression", () => { 318 | expect( 319 | parseExpr( 320 | " ((Foo Bar+ ((Qux* |Baz)) )+ ( (Bar)* | (Baz Foo))* ) " 321 | ) 322 | ).toEqual({ 323 | type: "sequence", 324 | children: [ 325 | { 326 | type: "oneOrMore", 327 | child: { 328 | type: "sequence", 329 | children: [ 330 | { 331 | type: "identifier", 332 | identifier: "Foo", 333 | }, 334 | { 335 | type: "oneOrMore", 336 | child: { 337 | type: "identifier", 338 | identifier: "Bar", 339 | }, 340 | }, 341 | { 342 | type: "choice", 343 | children: [ 344 | { 345 | type: "zeroOrMore", 346 | child: { 347 | type: "identifier", 348 | identifier: "Qux", 349 | }, 350 | }, 351 | { 352 | type: "identifier", 353 | identifier: "Baz", 354 | }, 355 | ], 356 | }, 357 | ], 358 | }, 359 | }, 360 | { 361 | type: "zeroOrMore", 362 | child: { 363 | type: "choice", 364 | children: [ 365 | { 366 | type: "zeroOrMore", 367 | child: { 368 | type: "identifier", 369 | identifier: "Bar", 370 | }, 371 | }, 372 | { 373 | type: "sequence", 374 | children: [ 375 | { 376 | type: "identifier", 377 | identifier: "Baz", 378 | }, 379 | 380 | { 381 | type: "identifier", 382 | identifier: "Foo", 383 | }, 384 | ], 385 | }, 386 | ], 387 | }, 388 | }, 389 | ], 390 | }); 391 | }); 392 | }); 393 | 394 | describe("accepts", () => { 395 | const acceptExpr = (pattern, nodes) => 396 | acceptItems( 397 | parseExpr(pattern), 398 | nodes, 399 | (str: string) => (node: Node) => node.type === str 400 | ); 401 | 402 | it("accepts an empty node array where expected", () => { 403 | expect(acceptExpr("Foo*", [])).toEqual(0); 404 | }); 405 | 406 | it("handles an array of empty nodes", () => { 407 | expect(acceptExpr("Foo", [])).toEqual(0); 408 | }); 409 | 410 | it("accepts a simple identifier", () => { 411 | expect(acceptExpr("Foo", [n("Foo")])).toEqual(1); 412 | }); 413 | 414 | it("rejects a mismatched identifier", () => { 415 | expect(acceptExpr("Foo", [n("Bar")])).toEqual(0); 416 | }); 417 | 418 | it("accepts a sequence of identifiers", () => { 419 | expect(acceptExpr("Foo Bar", [n("Foo"), n("Bar")])).toEqual(2); 420 | }); 421 | 422 | it("rejects a sequence of mismatched identifiers", () => { 423 | expect(acceptExpr("Foo Bar", [n("Foo"), n("Baz")])).toEqual(0); 424 | }); 425 | 426 | it("accepts a choice of identifiers", () => { 427 | expect(acceptExpr("Foo | Bar", [n("Foo")])).toEqual(1); 428 | expect(acceptExpr("Foo | Bar", [n("Bar")])).toEqual(1); 429 | }); 430 | 431 | it("rejects a mismatched choice of identifiers", () => { 432 | expect(acceptExpr("Foo | Bar", [n("Baz")])).toEqual(0); 433 | expect(acceptExpr("Foo | Bar", [n("Qux")])).toEqual(0); 434 | }); 435 | 436 | it("accepts zero or more identifiers", () => { 437 | expect(acceptExpr("Foo*", [])).toEqual(0); 438 | expect(acceptExpr("Foo*", [n("Foo")])).toEqual(1); 439 | expect(acceptExpr("Foo*", [n("Foo"), n("Foo")])).toEqual(2); 440 | expect(acceptExpr("Foo*", [n("Foo"), n("Foo"), n("Foo")])).toEqual(3); 441 | }); 442 | 443 | it("accepts one or more identifiers", () => { 444 | expect(acceptExpr("Foo+", [])).toEqual(0); 445 | expect(acceptExpr("Foo+", [n("Foo")])).toEqual(1); 446 | expect(acceptExpr("Foo+", [n("Foo"), n("Foo")])).toEqual(2); 447 | expect(acceptExpr("Foo+", [n("Foo"), n("Foo"), n("Foo")])).toEqual(3); 448 | }); 449 | 450 | it("accepts the number of nodes specified by a range quantifier", () => { 451 | expect(acceptExpr("Foo{1,3}", [])).toEqual(0); 452 | expect(acceptExpr("Foo{1,3}", [n("Foo")])).toEqual(1); 453 | expect(acceptExpr("Foo{1,3}", [n("Foo"), n("Foo")])).toEqual(2); 454 | expect(acceptExpr("Foo{1,3}", [n("Foo"), n("Foo"), n("Foo")])).toEqual( 455 | 3 456 | ); 457 | expect( 458 | acceptExpr("Foo{1,3}", [n("Foo"), n("Foo"), n("Foo"), n("Foo")]) 459 | ).toEqual(3); 460 | }); 461 | 462 | it("accepts the number of nodes specified by an exact range quantifier", () => { 463 | expect(acceptExpr("Foo{3}", [])).toEqual(0); 464 | expect(acceptExpr("Foo{3}", [n("Foo")])).toEqual(0); 465 | expect(acceptExpr("Foo{3}", [n("Foo"), n("Foo")])).toEqual(0); 466 | expect(acceptExpr("Foo{3}", [n("Foo"), n("Foo"), n("Foo")])).toEqual(3); 467 | expect( 468 | acceptExpr("Foo{3}", [n("Foo"), n("Foo"), n("Foo"), n("Foo")]) 469 | ).toEqual(3); 470 | }); 471 | 472 | it("accepts the number of nodes specified by an unbounded range quantifier", () => { 473 | expect(acceptExpr("Foo{2,}", [])).toEqual(0); 474 | expect(acceptExpr("Foo{2,}", [n("Foo")])).toEqual(0); 475 | expect(acceptExpr("Foo{2,}", [n("Foo"), n("Foo")])).toEqual(2); 476 | expect(acceptExpr("Foo{2,}", [n("Foo"), n("Foo"), n("Foo")])).toEqual( 477 | 3 478 | ); 479 | expect( 480 | acceptExpr("Foo{2,}", [n("Foo"), n("Foo"), n("Foo"), n("Foo")]) 481 | ).toEqual(4); 482 | }); 483 | 484 | it("composes quantifiers", () => { 485 | expect(acceptExpr("(Foo{2})+", [])).toEqual(0); 486 | expect(acceptExpr("(Foo{2})+", [n("Foo")])).toEqual(0); 487 | expect(acceptExpr("(Foo{2})+", [n("Foo"), n("Foo")])).toEqual(2); 488 | expect(acceptExpr("(Foo{2})+", [n("Foo"), n("Foo"), n("Foo")])).toEqual( 489 | 2 490 | ); 491 | expect( 492 | acceptExpr("(Foo{2})+", [n("Foo"), n("Foo"), n("Foo"), n("Foo")]) 493 | ).toEqual(4); 494 | }); 495 | 496 | it("handles range quantifiers in a sequence", () => { 497 | expect(acceptExpr("Foo Bar{1,2} Baz", [n("Foo"), n("Baz")])).toEqual(0); 498 | expect( 499 | acceptExpr("Foo Bar{1,2} Baz", [n("Foo"), n("Bar"), n("Baz")]) 500 | ).toEqual(3); 501 | expect( 502 | acceptExpr("Foo Bar{1,2} Baz", [ 503 | n("Foo"), 504 | n("Bar"), 505 | n("Bar"), 506 | n("Baz"), 507 | ]) 508 | ).toEqual(4); 509 | expect( 510 | acceptExpr("Foo Bar{1,2} Baz", [ 511 | n("Foo"), 512 | n("Bar"), 513 | n("Bar"), 514 | n("Bar"), 515 | n("Baz"), 516 | ]) 517 | ).toEqual(0); 518 | }); 519 | 520 | it("can use an unbounded range quantifier like a zeroOrMore", () => { 521 | expect(acceptExpr("Foo{0,}", [])).toEqual(0); 522 | expect(acceptExpr("Foo{0,}", [n("Foo")])).toEqual(1); 523 | expect(acceptExpr("Foo{0,}", [n("Foo"), n("Foo")])).toEqual(2); 524 | expect(acceptExpr("Foo{0,}", [n("Foo"), n("Foo"), n("Foo")])).toEqual( 525 | 3 526 | ); 527 | expect( 528 | acceptExpr("Foo{0,}", [n("Foo"), n("Foo"), n("Foo"), n("Foo")]) 529 | ).toEqual(4); 530 | }); 531 | 532 | it("returns a correct value when there are leftover nodes", () => { 533 | expect(acceptExpr("Foo+", [n("Bar")])).toEqual(0); 534 | expect(acceptExpr("Foo Foo", [n("Foo"), n("Foo"), n("Foo")])).toEqual( 535 | 2 536 | ); 537 | }); 538 | 539 | it("accepts a sequence of multiple identifiers", () => { 540 | expect(acceptExpr("Foo+ Bar*", [n("Foo"), n("Foo")])).toEqual(2); 541 | expect(acceptExpr("Foo+ Bar*", [n("Foo"), n("Foo"), n("Bar")])).toEqual( 542 | 3 543 | ); 544 | expect(acceptExpr("Foo+ Bar*", [n("Foo"), n("Bar")])).toEqual(2); 545 | }); 546 | 547 | it("handles a sequence of multiple identifiers with quantifiers", () => { 548 | expect(acceptExpr("Foo+ Bar*", [n("Foo"), n("Foo"), n("Baz")])).toEqual( 549 | 2 550 | ); 551 | expect(acceptExpr("Foo+ Bar*", [n("Bar")])).toEqual(0); 552 | }); 553 | 554 | it("accepts a combination of choices and sequences", () => { 555 | expect( 556 | acceptExpr("(Foo | Bar) (Bar Baz)+", [n("Foo"), n("Bar"), n("Baz")]) 557 | ).toEqual(3); 558 | expect( 559 | acceptExpr("(Foo | Bar) (Bar Baz)+", [ 560 | n("Foo"), 561 | n("Bar"), 562 | n("Baz"), 563 | n("Bar"), 564 | n("Baz"), 565 | ]) 566 | ).toEqual(5); 567 | }); 568 | 569 | it("handles a combination of choices and sequences that matches some nodes", () => { 570 | expect( 571 | acceptExpr("(Foo | Bar) (Bar Baz)+", [n("Qux"), n("Bar"), n("Baz")]) 572 | ).toEqual(0); 573 | expect( 574 | acceptExpr("(Foo | Bar) (Bar Baz)+", [ 575 | n("Foo"), 576 | n("Bar"), 577 | n("Baz"), 578 | n("Bar"), 579 | ]) 580 | ).toEqual(3); 581 | }); 582 | 583 | it("handles a combination of quantifiers", () => { 584 | expect( 585 | acceptExpr("(Foo | Bar)* (Bar Baz)+ Qux", [ 586 | n("Foo"), 587 | n("Bar"), 588 | n("Bar"), 589 | n("Bar"), 590 | n("Bar"), 591 | n("Baz"), 592 | n("Bar"), 593 | n("Baz"), 594 | n("Qux"), 595 | ]) 596 | ).toEqual(9); 597 | }); 598 | 599 | it("handles nodes that might be swallowed by a greedy quantifier", () => { 600 | expect( 601 | acceptExpr("(Foo | Bar)* Bar", [ 602 | n("Foo"), 603 | n("Bar"), 604 | n("Bar"), 605 | n("Bar"), 606 | n("Bar"), 607 | ]) 608 | ).toEqual(5); 609 | }); 610 | 611 | it("handles an unnecessarily complicated expression", () => { 612 | expect( 613 | acceptExpr("(Foo | Bar)* Baz (Bar | Qux{2,5} | Baz)+ Qux", [ 614 | n("Foo"), 615 | n("Bar"), 616 | n("Baz"), 617 | n("Qux"), 618 | n("Qux"), 619 | n("Baz"), 620 | n("Qux"), 621 | n("Qux"), 622 | n("Qux"), 623 | n("Bar"), 624 | n("Qux"), 625 | n("Foo"), 626 | n("Bar"), 627 | ]) 628 | ).toEqual(11); 629 | }); 630 | }); 631 | -------------------------------------------------------------------------------- /src/expression/__tests__/heap.test.ts: -------------------------------------------------------------------------------- 1 | /* global describe, it, expect */ 2 | import Heap from "../heap"; 3 | 4 | const consumeHeap = (heap) => { 5 | const res = []; 6 | while (heap.length()) { 7 | res.push(heap.pop()); 8 | } 9 | return res; 10 | }; 11 | 12 | describe("Heap", () => { 13 | it("works as a min-heap", () => { 14 | const heap = new Heap((x) => x, [8, 6, 7, 5, 3, 0, 9]); 15 | expect(consumeHeap(heap)).toEqual([0, 3, 5, 6, 7, 8, 9]); 16 | }); 17 | 18 | it("works as a max-heap", () => { 19 | const heap = new Heap((x) => -x, [8, 6, 7, 5, 3, 0, 9]); 20 | expect(consumeHeap(heap)).toEqual([9, 8, 7, 6, 5, 3, 0]); 21 | }); 22 | }); 23 | -------------------------------------------------------------------------------- /src/expression/acceptor.ts: -------------------------------------------------------------------------------- 1 | import Heap from "./heap"; 2 | import { Expr, IdentifierMatch } from "./types"; 3 | 4 | type State = { 5 | addSuccessor: (s: State) => void; 6 | getSuccessors: (n: Item) => State[]; 7 | consumesItem: () => boolean; 8 | }; 9 | 10 | type Machine = { 11 | startState: State; 12 | acceptState: State; 13 | }; 14 | 15 | type SearchPosition = { 16 | state: State; 17 | consumedItems: number; 18 | }; 19 | 20 | type DiscoveryState = { 21 | discoveredPositions: SearchPosition[]; 22 | positionsHeap: Heap>; 23 | }; 24 | 25 | const state = (guard?: (n: Item) => boolean): State => { 26 | const successors: Set> = new Set(); 27 | 28 | const addSuccessor = (s: State) => { 29 | successors.add(s); 30 | }; 31 | 32 | const getSuccessors = (currentItem: Item) => { 33 | const passesGuard = !guard || (currentItem && guard(currentItem)); 34 | return passesGuard ? Array.from(successors) : []; 35 | }; 36 | 37 | const consumesItem = () => { 38 | return !!guard; 39 | }; 40 | 41 | return { 42 | addSuccessor, 43 | getSuccessors, 44 | consumesItem, 45 | }; 46 | }; 47 | 48 | const createAcceptanceMachine = ( 49 | expr: Expr, 50 | matcher: IdentifierMatch 51 | ): Machine => { 52 | const startState = state(); 53 | const acceptState = state(); 54 | 55 | if (expr.type === "identifier") { 56 | const identifierState = state(matcher(expr.identifier)); 57 | startState.addSuccessor(identifierState); 58 | identifierState.addSuccessor(acceptState); 59 | } else if (expr.type === "choice") { 60 | const choiceMachines = expr.children.map((x) => 61 | createAcceptanceMachine(x, matcher) 62 | ); 63 | choiceMachines.forEach((machine) => { 64 | startState.addSuccessor(machine.startState); 65 | machine.acceptState.addSuccessor(acceptState); 66 | }); 67 | } else if (expr.type === "sequence") { 68 | const sequenceMachines = expr.children.map((x) => 69 | createAcceptanceMachine(x, matcher) 70 | ); 71 | const finalAcceptState = sequenceMachines.reduce( 72 | (intermediateAcceptState, nextMachine) => { 73 | intermediateAcceptState.addSuccessor(nextMachine.startState); 74 | return nextMachine.acceptState; 75 | }, 76 | startState 77 | ); 78 | finalAcceptState.addSuccessor(acceptState); 79 | } else if (expr.type === "range") { 80 | const { lowerBound, upperBound, child } = expr; 81 | if ( 82 | (upperBound !== null && upperBound < lowerBound) || 83 | (lowerBound === 0 && upperBound === 0) || 84 | lowerBound < 0 85 | ) { 86 | throw new Error(`Invalid range: [${lowerBound},${upperBound}]`); 87 | } 88 | const make = () => createAcceptanceMachine(child, matcher); 89 | const machines: Machine[] = []; 90 | const machineCount = Math.max( 91 | 1, 92 | upperBound !== null ? upperBound : lowerBound 93 | ); 94 | for (let i = 0; i < machineCount; i++) { 95 | const machine = make(); 96 | machines.push(machine); 97 | if (i > 0) { 98 | const prev = machines[i - 1]; 99 | prev.acceptState.addSuccessor(machine.startState); 100 | } 101 | if (i + 1 >= lowerBound) { 102 | machine.acceptState.addSuccessor(acceptState); 103 | } 104 | } 105 | if (upperBound === null) { 106 | const last = machines[machines.length - 1]; 107 | last.acceptState.addSuccessor(last.startState); 108 | } 109 | const first = machines[0]; 110 | startState.addSuccessor(first.startState); 111 | if (lowerBound === 0) { 112 | startState.addSuccessor(acceptState); 113 | } 114 | } else if (expr.type === "zeroOrMore") { 115 | const innerMachine = createAcceptanceMachine(expr.child, matcher); 116 | startState.addSuccessor(innerMachine.startState); 117 | innerMachine.acceptState.addSuccessor(acceptState); 118 | startState.addSuccessor(acceptState); 119 | acceptState.addSuccessor(startState); 120 | } else if (expr.type === "oneOrMore") { 121 | const innerMachine = createAcceptanceMachine(expr.child, matcher); 122 | startState.addSuccessor(innerMachine.startState); 123 | innerMachine.acceptState.addSuccessor(acceptState); 124 | acceptState.addSuccessor(startState); 125 | } else { 126 | startState.addSuccessor(acceptState); 127 | } 128 | 129 | return { startState, acceptState }; 130 | }; 131 | 132 | // Adds a position to a DiscoveryState if it hasn't already been discovered. 133 | const maybeEnqueuePosition = ( 134 | position: SearchPosition, 135 | discoveryState: DiscoveryState 136 | ) => { 137 | const { discoveredPositions, positionsHeap } = discoveryState; 138 | const hasAlreadyDiscoveredPosition = discoveredPositions.some( 139 | (discoveredPosition) => 140 | discoveredPosition.state === position.state && 141 | discoveredPosition.consumedItems === position.consumedItems 142 | ); 143 | if (!hasAlreadyDiscoveredPosition) { 144 | positionsHeap.push(position); 145 | } 146 | }; 147 | 148 | // Mark a position discovered in a DiscoveryState. 149 | const discoverPosition = ( 150 | position: SearchPosition, 151 | discoveryState: DiscoveryState 152 | ) => { 153 | discoveryState.discoveredPositions.push(position); 154 | }; 155 | 156 | // We want SearchPositions with high `consumedItems` to come out first, so they get low scores. 157 | const heapScore = (item: SearchPosition) => 0 - item.consumedItems; 158 | 159 | export const createItemAcceptor = ( 160 | expr: Expr, 161 | matchTest: IdentifierMatch 162 | ) => { 163 | // An "item acceptor" takes an Expr like (A | B)* and tests it against a list of items like 164 | // [A, B, B, A...]. We do this by transforming the expression into a state machine and 165 | // performing a graph search through the states, with the understanding that some edges between 166 | // states "consume" items in the list. A few bits of terminology first: 167 | // 168 | // - an expression or Expr is a pattern matching tool to test a list of items. The term item 169 | // is intentionally generic, and while an expression is ultimately a composition of string 170 | // itentifiers, the `matchTest` argument can be used to do any kind of string =?= Item 171 | // comparison that you like. 172 | // 173 | // - an expression is transformed into a graph called a state machine. The nodes or vertices of 174 | // this graph are called states. 175 | // 176 | // - a position or SearchPosition is a pair of values (state, consumedItems) that uniquely 177 | // defines a "moment" (I'm really trying to avoid using the word "state" here) in the search 178 | // that should not be repeated. In other words, the position (X, 3) means we've arrived at 179 | // state X and we've seen three items so far, and if the search takes us to this position 180 | // again, we ought not to explore that branch further because it's already been done. 181 | // 182 | // The "online" version of this algorithm is a little tricky to understand, so I'm going to lay 183 | // out how it works in some comments here. The problem is that we have a stream of Items, and 184 | // we want to see how many of them `expr` will accept. Say we have S = [I1, I2, I3]...we can 185 | // just check accepts(expr, [I1]), then accepts(expr, [I1, I2]), and finally 186 | // accepts(expr, [I1, I2, I3]). This is a reasonable solution for small batches of items, but 187 | // when checking hundreds of children (as the heal algorithm sometimes does), this quadratic 188 | // behavior is unacceptably slow. So we need a way to hold on to what we've learned about the 189 | // batch of items 0....(n - 1) when checking if the nth item is accepted. Let's look at a simple 190 | // state machine for the expr "I*". It has the structure: 191 | // 192 | // (S0) --> [ (S1) --- C(I) ---> (A1) ] ---> (A0) 193 | // ^ | 194 | // | | 195 | // ----------------- 196 | // 197 | // Where S0 and S1 are the start and accept states for the overall state machine, and S1 and A1 198 | // are the start and accept states for the actual expression N*. Note that the edge from S1 to 199 | // A1 is labelled "C(I)" to indicate that it consumes an item. Running this on the first item is 200 | // straightforward enough...we just search all the positions reachable from S0 with [I] and 201 | // see whether we arrive at the state A0. When checking [I, I], we might be tempted to use the 202 | // fact that for the first I, we found the accept state, and start the search against the second 203 | // A at state A0. But this won't work because A0 is a terminal state with no outbound edges. 204 | // 205 | // Instead, when we're given [I, I], we have no choice but to restart the search from _all_ 206 | // positions we explored in the search against [I]. The only reason this is faster than running 207 | // the whole search from scratch multiple times is the hunch that search positions with higher 208 | // nodeCount values are likely to bring us to a solution more quickly. Concretely, when we 209 | // test [I] against I* we explore the following (state, consumedItems) positions: 210 | // 211 | // (S0, 0) (S1, 0) (A1, 1) (A0, 1) 212 | // 213 | // When we test against [I, I], success means finding the position (A0, 2) -- which is only 214 | // reachable by exploring (A1, 1) from the last search. We'll use a max-heap to keep track of 215 | // positions reached in previous iterations of the search, so that positions with higher 216 | // consumedItems values are explored more readily. So that, in broad strokes, is the goal of 217 | // this code -- to test an expression against a list of items, one item at a time, while 218 | // providing partial results along the way and replicating as little work as possible. 219 | 220 | // Build a state machine graph to traverse. 221 | const { startState, acceptState } = createAcceptanceMachine( 222 | expr, 223 | matchTest 224 | ); 225 | 226 | // Keep a running list of items we're given. 227 | const items = []; 228 | 229 | // Our initial position is the start state, with no items consumed. 230 | const initialPosition = { 231 | state: startState, 232 | consumedItems: 0, 233 | }; 234 | 235 | // A DiscoveryState is a heap of positions to explore next, and a list of positions that we've 236 | // already discovered and shouldn't explore further. 237 | const globalDiscoveryState: DiscoveryState = { 238 | positionsHeap: new Heap(heapScore, [initialPosition]), 239 | discoveredPositions: [], 240 | }; 241 | 242 | return function acceptsNextItem(nextItem: Item): boolean { 243 | // Create a DiscoveryState that's a shallow copy of the global discovery state. We'll 244 | // use this to exhaust positions locally (in this acceptsNextItem call) while preserving a 245 | // monotonically-growing heap of positions and list of discovered items (for the lifetime 246 | // of the parent createItemAcceptor frame). 247 | // Keep a local discovery state object that's a shallow copy of the global one. 248 | const localDiscoveryState: DiscoveryState = { 249 | positionsHeap: new Heap( 250 | heapScore, 251 | globalDiscoveryState.positionsHeap.toArray() 252 | ), 253 | discoveredPositions: [...globalDiscoveryState.discoveredPositions], 254 | }; 255 | // We'll pop items only off the local discovery heap. 256 | const { positionsHeap: localHeap } = localDiscoveryState; 257 | // Mark the next item as consumable. 258 | items.push(nextItem); 259 | while (localHeap.length() > 0) { 260 | // Get the next best candidate position 261 | const position = localHeap.pop(); 262 | const { state, consumedItems } = position; 263 | // If we're in the accept state and consumed all the items we have, we're successful. 264 | if (state === acceptState && consumedItems === items.length) { 265 | return true; 266 | } 267 | // Mark this position as discovered so we don't bother to explore it again. 268 | discoverPosition(position, globalDiscoveryState); 269 | discoverPosition(position, localDiscoveryState); 270 | const currentItem = items[consumedItems]; 271 | // Get all the successors of this position... 272 | const successors = state.getSuccessors(currentItem); 273 | // And for each one... 274 | for (const successor of successors) { 275 | // See whether it consumes an item or not... 276 | const nextPosition = { 277 | state: successor, 278 | consumedItems: state.consumesItem() 279 | ? consumedItems + 1 280 | : consumedItems, 281 | }; 282 | // And mark it to be explored, either in this call to acceptsNextItem, or later. 283 | maybeEnqueuePosition(nextPosition, globalDiscoveryState); 284 | maybeEnqueuePosition(nextPosition, localDiscoveryState); 285 | } 286 | } 287 | // Looks like we didn't find any acceptable states. 288 | return false; 289 | }; 290 | }; 291 | 292 | const quickAcceptItems = ( 293 | expr: Expr, 294 | items: Item[], 295 | matchTest: IdentifierMatch 296 | ): number => { 297 | if ( 298 | expr.type === "oneOrMore" && 299 | expr.child.type === "choice" && 300 | expr.child.children.every((child) => child.type === "identifier") 301 | ) { 302 | const choice = expr.child; 303 | const validIdentifiers = choice.children 304 | .map((child) => child.type === "identifier" && child.identifier) 305 | .filter((x) => x); 306 | let ptr = 0; 307 | while ( 308 | ptr < items.length && 309 | validIdentifiers.some((id) => matchTest(id)(items[ptr])) 310 | ) { 311 | ++ptr; 312 | } 313 | return ptr; 314 | } 315 | return 0; 316 | }; 317 | 318 | export const acceptItems = ( 319 | expr: Expr, 320 | items: Item[], 321 | matchTest: IdentifierMatch 322 | ): number => { 323 | const quickAcceptedItems = quickAcceptItems(expr, items, matchTest); 324 | if (quickAcceptedItems > 0) { 325 | return quickAcceptedItems; 326 | } 327 | const { startState, acceptState } = createAcceptanceMachine( 328 | expr, 329 | matchTest 330 | ); 331 | const positions: SearchPosition[] = [ 332 | { state: startState, consumedItems: 0 }, 333 | ]; 334 | const discoveredPositions: SearchPosition[] = []; 335 | let maxConsumedItems = 0; 336 | 337 | const maybePushPosition = (p: SearchPosition) => { 338 | const hasAlreadyDiscoveredPosition = discoveredPositions.some( 339 | (discoveredPosition) => 340 | discoveredPosition.state === p.state && 341 | discoveredPosition.consumedItems === p.consumedItems 342 | ); 343 | if (!hasAlreadyDiscoveredPosition) { 344 | positions.push(p); 345 | } 346 | }; 347 | 348 | while (positions.length > 0) { 349 | const position = positions.shift(); 350 | const { state, consumedItems: consumedItems } = position; 351 | const currentItem = items[consumedItems]; 352 | const successors = state.getSuccessors(currentItem); 353 | discoveredPositions.push(position); 354 | if (state === acceptState) { 355 | maxConsumedItems = Math.max(maxConsumedItems, consumedItems); 356 | } 357 | for (const successor of successors) { 358 | const nextconsumedItems = state.consumesItem() 359 | ? consumedItems + 1 360 | : consumedItems; 361 | maybePushPosition({ 362 | state: successor, 363 | consumedItems: nextconsumedItems, 364 | }); 365 | } 366 | } 367 | 368 | return maxConsumedItems; 369 | }; 370 | -------------------------------------------------------------------------------- /src/expression/heap.ts: -------------------------------------------------------------------------------- 1 | // "Inspired" by https://eloquentjavascript.net/1st_edition/appendix2.html 2 | 3 | type ScoreFn = (t: T) => number; 4 | 5 | export default class Heap { 6 | private scoreFn: ScoreFn; 7 | private content: T[]; 8 | 9 | constructor(scoreFn: ScoreFn, initialItems: T[] = []) { 10 | this.scoreFn = scoreFn; 11 | this.content = []; 12 | for (const item of initialItems) { 13 | this.push(item); 14 | } 15 | } 16 | 17 | push(element: T) { 18 | this.content.push(element); 19 | this.bubbleUp(this.content.length - 1); 20 | } 21 | 22 | pop() { 23 | const [result] = this.content; 24 | const end = this.content.pop(); 25 | if (this.content.length > 0) { 26 | this.content[0] = end; 27 | this.sinkDown(0); 28 | } 29 | return result; 30 | } 31 | 32 | length() { 33 | return this.content.length; 34 | } 35 | 36 | toArray() { 37 | return [...this.content]; 38 | } 39 | 40 | private bubbleUp(index: number) { 41 | const element = this.content[index]; 42 | const score = this.scoreFn(element); 43 | while (index > 0) { 44 | const parentIndex = Math.floor((index + 1) / 2) - 1; 45 | const parent = this.content[parentIndex]; 46 | if (score >= this.scoreFn(parent)) { 47 | break; 48 | } 49 | this.content[parentIndex] = element; 50 | this.content[index] = parent; 51 | index = parentIndex; 52 | } 53 | } 54 | 55 | private sinkDown(index: number) { 56 | const { length } = this.content; 57 | const element = this.content[index]; 58 | const elemScore = this.scoreFn(element); 59 | while (true) { 60 | const child2N = (index + 1) * 2; 61 | const child1N = child2N - 1; 62 | let swap: null | number = null; 63 | let child1Score; 64 | if (child1N < length) { 65 | const child1 = this.content[child1N]; 66 | child1Score = this.scoreFn(child1); 67 | if (child1Score < elemScore) { 68 | swap = child1N; 69 | } 70 | } 71 | if (child2N < length) { 72 | const child2 = this.content[child2N]; 73 | const child2Score = this.scoreFn(child2); 74 | const thresholdScore = swap === null ? elemScore : child1Score; 75 | if (child2Score < thresholdScore) { 76 | swap = child2N; 77 | } 78 | } 79 | if (swap === null) { 80 | break; 81 | } 82 | this.content[index] = this.content[swap]; 83 | this.content[swap] = element; 84 | index = swap; 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/expression/index.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * This package implements a regular expression-like language parser, and a finite state machine 3 | * generator for evaluating lists of items against such an expression. For example: 4 | * 5 | * const expr = parseExpr("(A | B)+ C") 6 | * const acceptedItemsCount = acceptItems( 7 | * expr, 8 | * ['A', 'B', 'A', 'C', 'A'], 9 | * id => str => id === str 10 | * ) // === 4 because only the first four elements of the input array match the expression. 11 | */ 12 | 13 | export * from "./acceptor"; 14 | export * from "./parse"; 15 | export * from "./types"; 16 | export * from "./util"; 17 | -------------------------------------------------------------------------------- /src/expression/parse.ts: -------------------------------------------------------------------------------- 1 | import { Expr } from "./types"; 2 | 3 | // A simple recursive-descent parser to turn expressions like `(Space | Str)+` into syntax trees 4 | export const parseExpr = (str: string): Expr => { 5 | str = str.trim(); 6 | // Remove spaces around choice separators 7 | str = str.replace(/\s*\|\s*/g, "|"); 8 | // Remove extraneous spaces 9 | str = str.replace(/\s+/g, " "); 10 | 11 | // Keep track of the separator type we have at this level. It should either be choice separators 12 | // ("|") or sequential separators (" "). 13 | let separator; 14 | // Find separators 15 | const separators = []; 16 | // Keep track of open and close parens 17 | let parenCount = 0; 18 | // Keep track of open and close curlies 19 | let curlyCount = 0; 20 | for (let ptr = 0; ptr < str.length; ++ptr) { 21 | const char = str.charAt(ptr); 22 | if (char === "(") { 23 | ++parenCount; 24 | } else if (char === ")") { 25 | --parenCount; 26 | } else if (char === "{") { 27 | ++curlyCount; 28 | } else if (char === "}") { 29 | --curlyCount; 30 | } else if ( 31 | parenCount === 0 && 32 | curlyCount === 0 && 33 | (char === "|" || char === " ") 34 | ) { 35 | if (separator && separator !== char) { 36 | throw new Error( 37 | "Please surround mixed separators with parentheses!" + 38 | ` e.g. prefer '(Foo | Bar) Baz' over 'Foo | Bar Baz'. (at ${ptr}, parsing '${str}')` 39 | ); 40 | } else { 41 | separator = char; 42 | separators.push(ptr); 43 | } 44 | } 45 | } 46 | if (separators.length > 0) { 47 | const separated: string[] = []; 48 | let substring = ""; 49 | for (let ptr = 0; ptr < str.length; ++ptr) { 50 | if (separators.includes(ptr)) { 51 | separated.push(substring); 52 | substring = ""; 53 | } else { 54 | substring += str.charAt(ptr); 55 | } 56 | } 57 | if (substring.length > 0) { 58 | separated.push(substring); 59 | } 60 | return { 61 | type: separator === " " ? "sequence" : "choice", 62 | children: separated.map(parseExpr), 63 | }; 64 | } else if (str.endsWith("}")) { 65 | let ptr = str.length - 1; 66 | while (str.charAt(ptr) !== "{") { 67 | ptr--; 68 | } 69 | const rangeStrs = str.slice(ptr + 1, str.length - 1).split(","); 70 | const hasTwo = rangeStrs.length === 2; 71 | const range = rangeStrs.map((str) => parseInt(str.trim())); 72 | const [lowerBound, upperBound] = range; 73 | return { 74 | type: "range", 75 | lowerBound, 76 | upperBound: hasTwo 77 | ? isNaN(upperBound) 78 | ? null 79 | : upperBound 80 | : lowerBound, 81 | child: parseExpr(str.slice(0, ptr)), 82 | }; 83 | } else if (str.endsWith("+")) { 84 | return { 85 | type: "oneOrMore", 86 | child: parseExpr(str.slice(0, str.length - 1)), 87 | }; 88 | } else if (str.endsWith("*")) { 89 | return { 90 | type: "zeroOrMore", 91 | child: parseExpr(str.slice(0, str.length - 1)), 92 | }; 93 | } else if (str.startsWith("(") && str.endsWith(")")) { 94 | return parseExpr(str.slice(1, str.length - 1)); 95 | } 96 | return { type: "identifier", identifier: str }; 97 | }; 98 | -------------------------------------------------------------------------------- /src/expression/types.ts: -------------------------------------------------------------------------------- 1 | export type Identifier = { 2 | type: "identifier"; 3 | identifier: string; 4 | }; 5 | 6 | export type OneOrMore = { 7 | type: "oneOrMore"; 8 | child: Expr; 9 | }; 10 | 11 | export type ZeroOrMore = { 12 | type: "zeroOrMore"; 13 | child: Expr; 14 | }; 15 | 16 | export type Range = { 17 | type: "range"; 18 | lowerBound: number; 19 | upperBound: number | null; 20 | child: Expr; 21 | }; 22 | 23 | export type Sequence = { 24 | type: "sequence"; 25 | children: Expr[]; 26 | }; 27 | export type Choice = { 28 | type: "choice"; 29 | children: Expr[]; 30 | }; 31 | 32 | export type IdentifierMatch = (id: string) => (item: Item) => boolean; 33 | 34 | export type Expr = 35 | | Identifier 36 | | OneOrMore 37 | | ZeroOrMore 38 | | Sequence 39 | | Choice 40 | | Range; 41 | -------------------------------------------------------------------------------- /src/expression/util.ts: -------------------------------------------------------------------------------- 1 | import { Expr } from "./types"; 2 | 3 | export const exprAcceptsMultiple = (expr: Expr): boolean => { 4 | if (expr.type === "identifier") { 5 | return false; 6 | } else if (expr.type === "sequence") { 7 | return true; 8 | } else if (expr.type === "oneOrMore") { 9 | return true; 10 | } else if (expr.type === "zeroOrMore") { 11 | return true; 12 | } else if (expr.type === "range") { 13 | return expr.upperBound === null || expr.upperBound > 1; 14 | } else if (expr.type === "choice") { 15 | return expr.children.some((child) => exprAcceptsMultiple(child)); 16 | } 17 | }; 18 | 19 | export const exprWillAlwaysMatchSingleIdentifier = (expr: Expr, id: string) => { 20 | if (expr.type === "identifier") { 21 | return expr.identifier === id; 22 | } else if (expr.type === "sequence") { 23 | return false; 24 | } else if (expr.type === "choice") { 25 | return expr.children.some((child) => 26 | exprWillAlwaysMatchSingleIdentifier(child, id) 27 | ); 28 | } else if (expr.type === "range") { 29 | return ( 30 | expr.lowerBound === 1 && 31 | exprWillAlwaysMatchSingleIdentifier(expr.child, id) 32 | ); 33 | } else { 34 | return exprWillAlwaysMatchSingleIdentifier(expr.child, id); 35 | } 36 | }; 37 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import * as transformUtils from "./transform/util"; 2 | import * as transformers from "./transform/transformers"; 3 | import * as pandocUtils from "./pandocUtils"; 4 | 5 | export { transformUtils, transformers, pandocUtils }; 6 | export { fromPandoc } from "./transform/fromPandoc"; 7 | export { fromProsemirror } from "./transform/fromProsemirror"; 8 | export { RuleSet } from "./transform/ruleset"; 9 | export { emitPandocJson } from "./emit"; 10 | export { parsePandocJson } from "./parse"; 11 | export { metaValueToString, metaValueToJsonSerializable } from "./meta"; 12 | export { setPandocApiVersion } from "./config"; 13 | export { callPandoc, callPandocWithFile } from "./util"; 14 | -------------------------------------------------------------------------------- /src/meta.ts: -------------------------------------------------------------------------------- 1 | import { MetaValue, Inline, Block } from "./types"; 2 | 3 | const extractStringFromInline = (item: Inline): string => { 4 | if (item.type === "Space") { 5 | return " "; 6 | } 7 | if ("content" in item) { 8 | if ( 9 | item.type === "Str" || 10 | item.type === "Code" || 11 | item.type === "RawInline" || 12 | item.type === "Math" 13 | ) { 14 | return item.content; 15 | } else { 16 | if (item.type === "Note") { 17 | return ""; 18 | } else { 19 | return extractStringFromInlines(item.content); 20 | } 21 | } 22 | } 23 | return ""; 24 | }; 25 | 26 | const extractStringFromInlines = (inlines: Inline[]): string => { 27 | return inlines.map(extractStringFromInline).join(""); 28 | }; 29 | 30 | const extractStringFromBlock = (item: Block): string => { 31 | if (item.type === "Table") { 32 | return ""; 33 | } 34 | if ("content" in item) { 35 | if (item.type === "RawBlock" || item.type === "CodeBlock") { 36 | return item.content; 37 | } 38 | if ( 39 | item.type === "Para" || 40 | item.type === "Plain" || 41 | item.type === "Header" 42 | ) { 43 | return extractStringFromInlines(item.content); 44 | } 45 | if (item.type === "Div" || item.type === "BlockQuote") { 46 | return extractStringFromBlocks(item.content); 47 | } 48 | if (item.type === "LineBlock") { 49 | return item.content 50 | .map((inlines) => extractStringFromInlines(inlines)) 51 | .join("\n"); 52 | } 53 | return item.content 54 | .map((blocks) => extractStringFromBlocks(blocks)) 55 | .join("\n"); 56 | } 57 | }; 58 | 59 | const extractStringFromBlocks = (blocks: Block[]): string => { 60 | return blocks.map(extractStringFromBlock).join("\n"); 61 | }; 62 | 63 | export const metaValueToString = (m: MetaValue): string => { 64 | if (m.type === "MetaString") { 65 | return m.content; 66 | } 67 | if (m.type === "MetaBool") { 68 | return m.content.toString(); 69 | } 70 | if (m.type === "MetaBlocks") { 71 | return extractStringFromBlocks(m.content); 72 | } 73 | if (m.type === "MetaInlines") { 74 | return extractStringFromInlines(m.content); 75 | } 76 | if (m.type === "MetaList") { 77 | return m.content.map(metaValueToString).join(", "); 78 | } 79 | if (m.type === "MetaMap") { 80 | return Object.entries(m.values) 81 | .map(([key, value]) => `${key}: ${metaValueToString(value)}`) 82 | .join(", "); 83 | } 84 | return ""; 85 | }; 86 | 87 | export const metaValueToJsonSerializable = (m: MetaValue) => { 88 | if (m.type === "MetaBool") { 89 | return m.content; 90 | } 91 | if (m.type === "MetaList") { 92 | return m.content.map(metaValueToJsonSerializable); 93 | } 94 | if (m.type === "MetaMap") { 95 | const entries: [string, any][] = Object.entries(m.values).map( 96 | ([key, value]) => { 97 | return [key, metaValueToJsonSerializable(value)]; 98 | } 99 | ); 100 | const res: { [key: string]: any } = {}; 101 | entries.forEach((entry) => { 102 | const [key, value] = entry; 103 | res[key] = value; 104 | }); 105 | return res; 106 | } 107 | return metaValueToString(m); 108 | }; 109 | -------------------------------------------------------------------------------- /src/pandocUtils.ts: -------------------------------------------------------------------------------- 1 | import { Inline, Doc, Plain, Para, Block } from "./types"; 2 | import { callPandoc } from "./util"; 3 | import { emitPandocJson } from "./emit"; 4 | import { parsePandocJson } from "./parse"; 5 | import { flatten } from "./transform/util"; 6 | 7 | export const getOutputStringForPandocDoc = ( 8 | document: Doc, 9 | format: string 10 | ): string => 11 | callPandoc(JSON.stringify(emitPandocJson(document)), "json", format).trim(); 12 | 13 | export const getPandocDocForInputString = ( 14 | input: string, 15 | format: string 16 | ): Doc => { 17 | if (!input) { 18 | return { type: "Doc", blocks: [], meta: {} }; 19 | } 20 | return parsePandocJson(JSON.parse(callPandoc(input, format, "json"))); 21 | }; 22 | 23 | export const getHtmlStringForPandocDoc = (document: Doc): string => 24 | getOutputStringForPandocDoc(document, "html"); 25 | 26 | export const getPandocDocForHtmlString = (htmlString: string): Doc => 27 | getPandocDocForInputString(htmlString, "html"); 28 | 29 | export const pandocBlocksToOutputString = (blocks: Block[], format: string) => { 30 | if (blocks.length === 0) { 31 | return ""; 32 | } 33 | const document: Doc = { 34 | type: "Doc", 35 | blocks, 36 | meta: {}, 37 | }; 38 | return getOutputStringForPandocDoc(document, format); 39 | }; 40 | 41 | export const pandocInlineToOutputString = ( 42 | content: Inline[], 43 | format: string 44 | ) => { 45 | return pandocBlocksToOutputString([{ type: "Para", content }], format); 46 | }; 47 | 48 | export const pandocInlineToHtmlString = (nodes: Inline[]) => 49 | pandocInlineToOutputString(nodes, "html"); 50 | 51 | export const pandocInlineToPlainString = (nodes: Inline[]) => 52 | pandocInlineToOutputString(nodes, "plain"); 53 | 54 | export const pandocBlocksToHtmlString = (blocks: Block[]) => { 55 | if (blocks.length === 0) { 56 | return ""; 57 | } 58 | const document: Doc = { 59 | type: "Doc", 60 | blocks, 61 | meta: {}, 62 | }; 63 | return getHtmlStringForPandocDoc(document); 64 | }; 65 | 66 | export const htmlStringToPandocInline = (htmlString: string): Inline[] => { 67 | if (!htmlString) { 68 | return []; 69 | } 70 | const pandocAst = getPandocDocForHtmlString(htmlString); 71 | return flatten( 72 | ( 73 | pandocAst.blocks.filter( 74 | (block) => block.type === "Plain" || block.type === "Para" 75 | ) as (Plain | Para)[] 76 | ).map((block) => block.content) 77 | ); 78 | }; 79 | 80 | export const htmlStringToPandocBlocks = (htmlString: string): Block[] => { 81 | if (!htmlString) { 82 | return []; 83 | } 84 | const pandocAst = getPandocDocForHtmlString(htmlString); 85 | return pandocAst.blocks; 86 | }; 87 | -------------------------------------------------------------------------------- /src/parse.ts: -------------------------------------------------------------------------------- 1 | import { 2 | Alignment, 3 | Attr, 4 | Block, 5 | BlockQuote, 6 | BulletList, 7 | Caption, 8 | Cell, 9 | CitationMode, 10 | Cite, 11 | Code, 12 | CodeBlock, 13 | ColSpec, 14 | DefinitionList, 15 | Div, 16 | Doc, 17 | Format, 18 | Header, 19 | Image, 20 | Inline, 21 | LineBlock, 22 | Link, 23 | ListAttributes, 24 | ListNumberDelim, 25 | ListNumberStyle, 26 | Math, 27 | MathType, 28 | MetaBlocks, 29 | MetaBool, 30 | MetaInlines, 31 | MetaList, 32 | MetaMap, 33 | MetaString, 34 | MetaValue, 35 | Note, 36 | OrderedList, 37 | PandocJson, 38 | Para, 39 | Plain, 40 | Quoted, 41 | QuoteType, 42 | RawBlock, 43 | RawInline, 44 | Row, 45 | SimpleInline, 46 | Span, 47 | Str, 48 | Table, 49 | TableBody, 50 | TableFoot, 51 | TableHead, 52 | Target, 53 | } from "./types"; 54 | 55 | const unwrapEnum = (instance: { t: T }): T => { 56 | return instance.t; 57 | }; 58 | 59 | const unwrapAttr = (attr: [string, string[], [string, string][]]): Attr => { 60 | const [identifier, classes, propertiesList] = attr; 61 | const properties = {}; 62 | propertiesList.forEach(([key, value]) => { 63 | properties[key] = value; 64 | }); 65 | return { 66 | identifier, 67 | classes, 68 | properties, 69 | }; 70 | }; 71 | 72 | const unwrapTarget = (target: [string, string]): Target => { 73 | const [url, title] = target; 74 | return { 75 | url, 76 | title, 77 | }; 78 | }; 79 | 80 | const unwrapFormat = (format: any): Format => { 81 | // TODO(ian): Figure out what to do here 82 | return format; 83 | }; 84 | 85 | const unwrapListAttributes = ( 86 | listAttributes: [number, any, any] 87 | ): ListAttributes => { 88 | const [startNumber = 1, listNumberStyle, listNumberDelim] = listAttributes; 89 | return { 90 | startNumber, 91 | listNumberStyle: unwrapEnum(listNumberStyle), 92 | listNumberDelim: unwrapEnum(listNumberDelim), 93 | }; 94 | }; 95 | 96 | const parseAtom = (n: { t }) => { 97 | return { type: n.t }; 98 | }; 99 | 100 | const parseStr = (n: { c: string }): Str => { 101 | const string = n.c; 102 | return { 103 | type: "Str", 104 | content: string, 105 | }; 106 | }; 107 | 108 | const parseSimpleInline = ( 109 | n: { 110 | c: any[]; 111 | }, 112 | nodeType: SimpleInline["type"] 113 | ): SimpleInline => { 114 | const inline = n.c; 115 | return { 116 | type: nodeType, 117 | content: inline.map(parseInline), 118 | }; 119 | }; 120 | 121 | const parseQuoted = (n: { c: [any, any[]] }): Quoted => { 122 | const [quoteType, inline] = n.c; 123 | return { 124 | type: "Quoted", 125 | quoteType: unwrapEnum(quoteType), 126 | content: inline.map(parseInline), 127 | }; 128 | }; 129 | 130 | const parseCite = (n: { c: [any[], any[]] }): Cite => { 131 | const [citations, inline] = n.c; 132 | return { 133 | type: "Cite", 134 | citations: citations.map((citation) => { 135 | const { 136 | citationHash, 137 | citationId, 138 | citationMode, 139 | citationNoteNum, 140 | citationPrefix, 141 | citationSuffix, 142 | } = citation; 143 | return { 144 | citationHash, 145 | citationId, 146 | citationMode: unwrapEnum(citationMode), 147 | citationNoteNum, 148 | citationPrefix: citationPrefix.map(parseInline), 149 | citationSuffix: citationSuffix.map(parseInline), 150 | }; 151 | }), 152 | content: inline.map(parseInline), 153 | }; 154 | }; 155 | 156 | const parseCode = (n: { c: [any, string] }): Code => { 157 | const [attr, code] = n.c; 158 | return { 159 | type: "Code", 160 | attr: unwrapAttr(attr), 161 | content: code, 162 | }; 163 | }; 164 | 165 | const parseMath = (n: { c: [any, string] }): Math => { 166 | const [mathType, content] = n.c; 167 | return { 168 | type: "Math", 169 | mathType: unwrapEnum(mathType), 170 | content: content, 171 | }; 172 | }; 173 | 174 | const parseRawInline = (n: { c: [any, string] }): RawInline => { 175 | const [format, content] = n.c; 176 | return { 177 | type: "RawInline", 178 | format: unwrapFormat(format), 179 | content, 180 | }; 181 | }; 182 | 183 | const parseImage = (n: { c: [any, any[], any] }): Image => { 184 | const [attr, inline, target] = n.c; 185 | return { 186 | type: "Image", 187 | attr: unwrapAttr(attr), 188 | content: inline.map(parseInline), 189 | target: unwrapTarget(target), 190 | }; 191 | }; 192 | 193 | const parseLink = (n: { c: [any, any[], any] }): Link => { 194 | const [attr, inline, target] = n.c; 195 | return { 196 | type: "Link", 197 | attr: unwrapAttr(attr), 198 | content: inline.map(parseInline), 199 | target: unwrapTarget(target), 200 | }; 201 | }; 202 | 203 | const parseNote = (n: { c: any[] }): Note => { 204 | const blocks = n.c; 205 | return { 206 | type: "Note", 207 | content: blocks.map(parseBlock), 208 | }; 209 | }; 210 | 211 | const parseSpan = (n: { c: [any, any[]] }): Span => { 212 | const [attr, inline] = n.c; 213 | return { 214 | type: "Span", 215 | attr: unwrapAttr(attr), 216 | content: inline.map(parseInline), 217 | }; 218 | }; 219 | 220 | export const parseInline = (n: { t: Inline["type"]; c: any }): Inline => { 221 | switch (n.t) { 222 | case "Str": 223 | return parseStr(n); 224 | case "Emph": 225 | case "Strong": 226 | case "Underline": 227 | case "Strikeout": 228 | case "Superscript": 229 | case "Subscript": 230 | case "SmallCaps": 231 | return parseSimpleInline(n, n.t); 232 | case "Quoted": 233 | return parseQuoted(n); 234 | case "Cite": 235 | return parseCite(n); 236 | case "Code": 237 | return parseCode(n); 238 | case "Space": 239 | case "SoftBreak": 240 | case "LineBreak": 241 | return parseAtom(n); 242 | case "Math": 243 | return parseMath(n); 244 | case "RawInline": 245 | return parseRawInline(n); 246 | case "Link": 247 | return parseLink(n); 248 | case "Image": 249 | return parseImage(n); 250 | case "Note": 251 | return parseNote(n); 252 | case "Span": 253 | return parseSpan(n); 254 | } 255 | }; 256 | 257 | const parsePlain = (n: { c: any[] }): Plain => { 258 | const inline = n.c; 259 | return { 260 | type: "Plain", 261 | content: inline.map(parseInline), 262 | }; 263 | }; 264 | 265 | const parsePara = (n: { c: any[] }): Para => { 266 | const inline = n.c; 267 | return { 268 | type: "Para", 269 | content: inline.map(parseInline), 270 | }; 271 | }; 272 | 273 | const parseLineBlock = (n: { c: any[][] }): LineBlock => { 274 | const lines = n.c; 275 | return { 276 | type: "LineBlock", 277 | content: lines.map((line) => line.map((inline) => parseInline(inline))), 278 | }; 279 | }; 280 | 281 | const parseCodeBlock = (n: { c: [any, string] }): CodeBlock => { 282 | const [attr, content] = n.c; 283 | return { 284 | type: "CodeBlock", 285 | attr: unwrapAttr(attr), 286 | content, 287 | }; 288 | }; 289 | 290 | const parseRawBlock = (n: { c: [any, string] }): RawBlock => { 291 | const [format, content] = n.c; 292 | return { 293 | type: "RawBlock", 294 | format: unwrapFormat(format), 295 | content, 296 | }; 297 | }; 298 | 299 | const parseBlockQuote = (n: { c: any[] }): BlockQuote => { 300 | const blocks = n.c; 301 | return { 302 | type: "BlockQuote", 303 | content: blocks.map(parseBlock), 304 | }; 305 | }; 306 | 307 | const parseOrderedList = (n: { c: [any, any[][]] }): OrderedList => { 308 | const [listAttributes, items] = n.c; 309 | return { 310 | type: "OrderedList", 311 | listAttributes: unwrapListAttributes(listAttributes), 312 | content: items.map((item) => item.map(parseBlock)), 313 | }; 314 | }; 315 | 316 | const parseBulletList = (n: { c: any[][] }): BulletList => { 317 | const items = n.c; 318 | return { 319 | type: "BulletList", 320 | content: items.map((item) => item.map(parseBlock)), 321 | }; 322 | }; 323 | 324 | const parseDefinitionList = (n: { c: [any[], any[][]][] }): DefinitionList => { 325 | const items = n.c; 326 | const entries = items.map((item) => { 327 | const [term, definitions] = item; 328 | return { 329 | term: term.map(parseInline), 330 | definitions: definitions.map((definition) => 331 | definition.map(parseBlock) 332 | ), 333 | }; 334 | }); 335 | return { 336 | type: "DefinitionList", 337 | entries, 338 | }; 339 | }; 340 | 341 | const parseHeader = (n: { c: [number, any, any[]] }): Header => { 342 | const [level, attr, inline] = n.c; 343 | return { 344 | type: "Header", 345 | level, 346 | attr: unwrapAttr(attr), 347 | content: inline.map(parseInline), 348 | }; 349 | }; 350 | 351 | const parseDiv = (n: { c: [any, any[]] }): Div => { 352 | const [attr, blocks] = n.c; 353 | return { 354 | type: "Div", 355 | attr: unwrapAttr(attr), 356 | content: blocks.map(parseBlock), 357 | }; 358 | }; 359 | 360 | const parseCell = (n: [any, any, any, any, any[]]): Cell => { 361 | const [attr, alignment, rowSpan, colSpan, blocks] = n; 362 | return { 363 | type: "Cell", 364 | attr: unwrapAttr(attr), 365 | alignment: unwrapEnum(alignment), 366 | rowSpan, 367 | colSpan, 368 | content: blocks.map(parseBlock), 369 | }; 370 | }; 371 | 372 | const parseRow = (n: [any, any[]]): Row => { 373 | const [attr, cells] = n; 374 | return { 375 | type: "Row", 376 | attr: unwrapAttr(attr), 377 | cells: cells.map(parseCell), 378 | }; 379 | }; 380 | 381 | const parseTableHead = (n: [any, any[]]): TableHead => { 382 | const [attr, rows] = n; 383 | return { 384 | type: "TableHead", 385 | attr: unwrapAttr(attr), 386 | rows: rows.map(parseRow), 387 | }; 388 | }; 389 | 390 | const parseTableFoot = (n: [any, any[]]): TableFoot => { 391 | const [attr, rows] = n; 392 | return { 393 | type: "TableFoot", 394 | attr: unwrapAttr(attr), 395 | rows: rows.map(parseRow), 396 | }; 397 | }; 398 | 399 | const parseTableBody = (n: [any, any, any[], any[]]): TableBody => { 400 | const [attr, rowHeadColumns, head, body] = n; 401 | return { 402 | type: "TableBody", 403 | rowHeadColumns, 404 | attr: unwrapAttr(attr), 405 | headRows: head.map(parseRow), 406 | bodyRows: body.map(parseRow), 407 | }; 408 | }; 409 | 410 | const parseColSpec = (n: [any, any]): ColSpec => { 411 | const [alignment, colWidth] = n; 412 | const base = { 413 | type: "ColSpec" as const, 414 | alignment: unwrapEnum(alignment), 415 | }; 416 | if (colWidth.t === "ColWidthDefault") { 417 | return { ...base, defaultWidth: true }; 418 | } 419 | return { ...base, width: colWidth.c }; 420 | }; 421 | 422 | const parseCaption = (n: [null | any[], any[]]): Caption => { 423 | const [shortCaption, content] = n; 424 | const baseCaption: Caption = { 425 | type: "Caption", 426 | content: content.map(parseBlock), 427 | }; 428 | if (shortCaption) { 429 | return { 430 | ...baseCaption, 431 | shortCaption: shortCaption.map(parseInline), 432 | }; 433 | } 434 | return baseCaption; 435 | }; 436 | 437 | const parseTable = (n: { c: [any, any, any[], any, any[], any] }): Table => { 438 | const [attr, caption, colSpecs, head, bodies, foot] = n.c; 439 | return { 440 | type: "Table", 441 | attr: unwrapAttr(attr), 442 | caption: parseCaption(caption), 443 | colSpecs: colSpecs.map(parseColSpec), 444 | head: parseTableHead(head), 445 | bodies: bodies.map(parseTableBody), 446 | foot: parseTableFoot(foot), 447 | }; 448 | }; 449 | 450 | export const parseBlock = (n: { t: Block["type"]; c: any }): Block => { 451 | switch (n.t) { 452 | case "Plain": 453 | return parsePlain(n); 454 | case "Para": 455 | return parsePara(n); 456 | case "LineBlock": 457 | return parseLineBlock(n); 458 | case "CodeBlock": 459 | return parseCodeBlock(n); 460 | case "RawBlock": 461 | return parseRawBlock(n); 462 | case "BlockQuote": 463 | return parseBlockQuote(n); 464 | case "OrderedList": 465 | return parseOrderedList(n); 466 | case "BulletList": 467 | return parseBulletList(n); 468 | case "DefinitionList": 469 | return parseDefinitionList(n); 470 | case "Header": 471 | return parseHeader(n); 472 | case "HorizontalRule": 473 | case "Null": 474 | return parseAtom(n); 475 | case "Div": 476 | return parseDiv(n); 477 | case "Table": 478 | return parseTable(n); 479 | } 480 | }; 481 | 482 | const parseMetaMap = (n: { c: { [key: string]: any } }): MetaMap => { 483 | const values = {}; 484 | Object.entries(n.c).forEach(([key, value]) => { 485 | values[key] = parseMetaValue(value); 486 | }); 487 | return { type: "MetaMap", values }; 488 | }; 489 | 490 | const parseMetaList = (n: { c: any[] }): MetaList => { 491 | return { 492 | type: "MetaList", 493 | content: n.c.map(parseMetaValue), 494 | }; 495 | }; 496 | 497 | const parseMetaBool = (n: { c: boolean }): MetaBool => { 498 | return { 499 | type: "MetaBool", 500 | content: n.c, 501 | }; 502 | }; 503 | 504 | const parseMetaString = (n: { c: string }): MetaString => { 505 | return { 506 | type: "MetaString", 507 | content: n.c, 508 | }; 509 | }; 510 | 511 | const parseMetaInlines = (n: { c: any[] }): MetaInlines => { 512 | return { 513 | type: "MetaInlines", 514 | content: n.c.map(parseInline), 515 | }; 516 | }; 517 | 518 | const parseMetaBlocks = (n: { c: any[] }): MetaBlocks => { 519 | return { 520 | type: "MetaBlocks", 521 | content: n.c.map(parseBlock), 522 | }; 523 | }; 524 | 525 | const parseMetaValue = (n: { t: string; c: any }): MetaValue => { 526 | switch (n.t) { 527 | case "MetaMap": 528 | return parseMetaMap(n); 529 | case "MetaList": 530 | return parseMetaList(n); 531 | case "MetaBool": 532 | return parseMetaBool(n); 533 | case "MetaString": 534 | return parseMetaString(n); 535 | case "MetaInlines": 536 | return parseMetaInlines(n); 537 | case "MetaBlocks": 538 | return parseMetaBlocks(n); 539 | } 540 | }; 541 | 542 | const parseMeta = (meta: PandocJson["meta"]) => { 543 | const parsedMeta: Record = {}; 544 | Object.entries(meta).forEach(([key, value]) => { 545 | parsedMeta[key] = parseMetaValue(value); 546 | }); 547 | return parsedMeta; 548 | }; 549 | 550 | export const parsePandocJson = (json: PandocJson): Doc => { 551 | const { meta, blocks } = json; 552 | return { 553 | type: "Doc", 554 | blocks: blocks.map(parseBlock), 555 | meta: parseMeta(meta), 556 | }; 557 | }; 558 | -------------------------------------------------------------------------------- /src/transform/__tests__/util.test.ts: -------------------------------------------------------------------------------- 1 | /* global describe, it, expect */ 2 | import { flatten } from "../util"; 3 | 4 | describe("flatten", () => { 5 | it("turns a non-array input into an array with a single element", () => { 6 | expect(flatten(1)).toEqual([1]); 7 | }); 8 | 9 | it("handles a flat array by returning an element-wise identical array", () => { 10 | expect(flatten([1, 2, 3])).toEqual([1, 2, 3]); 11 | }); 12 | 13 | it("flattens an array two layers deep", () => { 14 | expect(flatten([1, [2, 3], 4, [5], 6])).toEqual([1, 2, 3, 4, 5, 6]); 15 | }); 16 | 17 | it("flattens an array many layers deep", () => { 18 | expect(flatten([1, [2, [3]], 4, [[5], 6], [7, [8, 9]]])).toEqual([ 19 | 1, 2, 3, 4, 5, 6, 7, 8, 9, 20 | ]); 21 | }); 22 | }); 23 | -------------------------------------------------------------------------------- /src/transform/fluent.ts: -------------------------------------------------------------------------------- 1 | import { asNode, asArray } from "./util"; 2 | import { ProsemirrorNode, PandocNode } from "../types"; 3 | 4 | const FLUENT_SYMBOL = Symbol(); 5 | 6 | export type Fluent = { 7 | asArray: () => T[]; 8 | asNode: () => T; 9 | fluent: typeof FLUENT_SYMBOL; 10 | }; 11 | 12 | const isFluent = (item: any): item is Fluent => 13 | "fluent" in item && item.fluent === FLUENT_SYMBOL; 14 | 15 | export const fluent = ( 16 | item: T | T[] | Fluent 17 | ): Fluent => { 18 | if (isFluent(item)) { 19 | return item; 20 | } else { 21 | return { 22 | asArray: () => asArray(item), 23 | asNode: () => asNode(item), 24 | fluent: FLUENT_SYMBOL, 25 | }; 26 | } 27 | }; 28 | -------------------------------------------------------------------------------- /src/transform/fromPandoc/__tests__/__snapshots__/heal.test.ts.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`healNaiveTokenList heals a real-world example of a misplaced image element 1`] = `"O(doc) O*(bullet_list) O(list_item) O(paragraph) O(text) C(text) C(paragraph) C(list_item) O*(list_item) C(list_item) C(bullet_list) O(image) C(image) O*(bullet_list) O*(list_item) C(list_item) O(list_item) O(paragraph) O(text) C(text) C(paragraph) C(list_item) C(bullet_list) C(doc)"`; 4 | 5 | exports[`healNaiveTokenList heals a real-world example of a misplaced image element that is less dire 1`] = `"O(doc) O(bullet_list) O(list_item) O(paragraph) O(text) C(text) C(paragraph) O*(paragraph) O(text) C(text) C(paragraph) O(image) C(image) O*(paragraph) O(text) C(text) C(paragraph) C(list_item) O(list_item) O(paragraph) O(text) C(text) C(paragraph) C(list_item) C(bullet_list) C(doc)"`; 6 | -------------------------------------------------------------------------------- /src/transform/fromPandoc/__tests__/heal.test.ts: -------------------------------------------------------------------------------- 1 | /* global describe, it, expect */ 2 | import { ProsemirrorSchema } from "../../../types"; 3 | import { prosemirrorSchema } from "../../../example/schema"; 4 | 5 | import { getNaiveTokenList, healNaiveTokenList, Token, heal } from "../heal"; 6 | 7 | const toyProsemirrorSchema: ProsemirrorSchema = new ProsemirrorSchema({ 8 | nodes: { 9 | A: { 10 | defining: true, 11 | content: "B*", 12 | }, 13 | B: { 14 | defining: true, 15 | content: "(C|D)*", 16 | }, 17 | C: { 18 | defining: true, 19 | content: "D*", 20 | }, 21 | D: { 22 | defining: true, 23 | content: "text*", 24 | }, 25 | text: { 26 | inline: true, 27 | group: "inline", 28 | }, 29 | }, 30 | marks: {}, 31 | topNode: "A", 32 | }); 33 | 34 | const stringify = (tokens: Token[]) => 35 | tokens 36 | .map((t: Token) => { 37 | const prefix = t.type === "open" ? "O" : "C"; 38 | const maybeAsterisk = 39 | t.type === "open" && t.createdFromSplit ? "*" : ""; 40 | return `${prefix}${maybeAsterisk}(${t.node.type})`; 41 | }) 42 | .join(" "); 43 | 44 | describe("getNaiveTokenList", () => { 45 | it("tokenizes a Prosemirror tree", () => { 46 | expect( 47 | stringify( 48 | getNaiveTokenList({ 49 | type: "A", 50 | content: [ 51 | { 52 | type: "B", 53 | content: [{ type: "C", content: [{ type: "D" }] }], 54 | }, 55 | ], 56 | }) 57 | ) 58 | ).toEqual("O(A) O(B) O(C) O(D) C(D) C(C) C(B) C(A)"); 59 | }); 60 | }); 61 | 62 | describe("healNaiveTokenList", () => { 63 | it("passes through a valid tree", () => { 64 | expect( 65 | stringify( 66 | healNaiveTokenList( 67 | getNaiveTokenList({ 68 | type: "A", 69 | content: [ 70 | { 71 | type: "B", 72 | content: [ 73 | { type: "D" }, 74 | { 75 | type: "C", 76 | content: [{ type: "D" }, { type: "D" }], 77 | }, 78 | ], 79 | }, 80 | ], 81 | }), 82 | toyProsemirrorSchema 83 | ) 84 | ) 85 | ).toEqual( 86 | "O(A) O(B) O(D) C(D) O(C) O(D) C(D) O(D) C(D) C(C) C(B) C(A)" 87 | ); 88 | }); 89 | 90 | it("heals an invalid Prosemirror tree", () => { 91 | const naiveList = getNaiveTokenList({ 92 | type: "A", 93 | content: [{ type: "B", content: [{ type: "B" }] }], 94 | }); 95 | expect( 96 | stringify(healNaiveTokenList(naiveList, toyProsemirrorSchema)) 97 | ).toEqual("O(A) O*(B) C(B) O(B) C(B) O*(B) C(B) C(A)"); 98 | }); 99 | 100 | it("heals a more complicated invalid Prosemirror tree", () => { 101 | const naiveList = getNaiveTokenList({ 102 | type: "A", 103 | content: [ 104 | { 105 | type: "B", 106 | content: [ 107 | { type: "B" }, 108 | { type: "D", content: [{ type: "C" }] }, 109 | ], 110 | }, 111 | ], 112 | }); 113 | expect( 114 | stringify(healNaiveTokenList(naiveList, toyProsemirrorSchema)) 115 | ).toEqual( 116 | "O(A) O*(B) C(B) O(B) C(B) O*(B) O*(D) C(D) O(C) C(C) O*(D) C(D) C(B) C(A)" 117 | ); 118 | }); 119 | 120 | it("heals a real-world example of a misplaced image element", () => { 121 | const naiveList = getNaiveTokenList({ 122 | type: "doc", 123 | content: [ 124 | { 125 | type: "bullet_list", 126 | content: [ 127 | { 128 | type: "list_item", 129 | content: [ 130 | { 131 | type: "paragraph", 132 | content: [{ type: "text" }], 133 | }, 134 | ], 135 | }, 136 | { type: "list_item", content: [{ type: "image" }] }, 137 | { 138 | type: "list_item", 139 | content: [ 140 | { 141 | type: "paragraph", 142 | content: [{ type: "text" }], 143 | }, 144 | ], 145 | }, 146 | ], 147 | }, 148 | ], 149 | }); 150 | expect( 151 | stringify(healNaiveTokenList(naiveList, prosemirrorSchema)) 152 | ).toMatchSnapshot(); 153 | }); 154 | 155 | it("heals a real-world example of a misplaced image element that is less dire", () => { 156 | const naiveList = getNaiveTokenList({ 157 | type: "doc", 158 | content: [ 159 | { 160 | type: "bullet_list", 161 | content: [ 162 | { 163 | type: "list_item", 164 | content: [ 165 | { 166 | type: "paragraph", 167 | content: [{ type: "text" }], 168 | }, 169 | { 170 | type: "paragraph", 171 | content: [ 172 | { type: "text" }, 173 | { type: "image" }, 174 | { type: "text" }, 175 | ], 176 | }, 177 | ], 178 | }, 179 | { 180 | type: "list_item", 181 | content: [ 182 | { 183 | type: "paragraph", 184 | content: [{ type: "text" }], 185 | }, 186 | ], 187 | }, 188 | ], 189 | }, 190 | ], 191 | }); 192 | expect( 193 | stringify(healNaiveTokenList(naiveList, prosemirrorSchema)) 194 | ).toMatchSnapshot(); 195 | }); 196 | }); 197 | 198 | describe("heal", () => { 199 | it("heals an improper Prosemirror document", () => { 200 | expect( 201 | heal( 202 | { 203 | type: "doc", 204 | content: [ 205 | { 206 | type: "bullet_list", 207 | content: [ 208 | { 209 | type: "list_item", 210 | content: [ 211 | { 212 | type: "paragraph", 213 | content: [{ type: "text" }], 214 | }, 215 | ], 216 | }, 217 | { 218 | type: "list_item", 219 | content: [{ type: "image" }], 220 | }, 221 | { 222 | type: "list_item", 223 | content: [ 224 | { 225 | type: "paragraph", 226 | content: [{ type: "text" }], 227 | }, 228 | ], 229 | }, 230 | ], 231 | }, 232 | ], 233 | }, 234 | prosemirrorSchema 235 | ) 236 | ).toEqual({ 237 | type: "doc", 238 | content: [ 239 | { 240 | type: "bullet_list", 241 | content: [ 242 | { 243 | type: "list_item", 244 | content: [ 245 | { 246 | type: "paragraph", 247 | content: [{ type: "text" }], 248 | }, 249 | ], 250 | }, 251 | ], 252 | }, 253 | { type: "image" }, 254 | { 255 | type: "bullet_list", 256 | content: [ 257 | { 258 | type: "list_item", 259 | content: [ 260 | { 261 | type: "paragraph", 262 | content: [{ type: "text" }], 263 | }, 264 | ], 265 | }, 266 | ], 267 | }, 268 | ], 269 | }); 270 | }); 271 | 272 | it("heals a broken Prosemirror document that requires less intervention", () => { 273 | expect( 274 | heal( 275 | { 276 | type: "doc", 277 | content: [ 278 | { 279 | type: "bullet_list", 280 | content: [ 281 | { 282 | type: "list_item", 283 | content: [ 284 | { 285 | type: "paragraph", 286 | content: [{ type: "text" }], 287 | }, 288 | { 289 | type: "paragraph", 290 | content: [ 291 | { type: "text" }, 292 | { type: "image" }, 293 | { type: "text" }, 294 | ], 295 | }, 296 | ], 297 | }, 298 | { 299 | type: "list_item", 300 | content: [ 301 | { 302 | type: "paragraph", 303 | content: [{ type: "text" }], 304 | }, 305 | ], 306 | }, 307 | ], 308 | }, 309 | ], 310 | }, 311 | prosemirrorSchema 312 | ) 313 | ).toEqual({ 314 | type: "doc", 315 | content: [ 316 | { 317 | type: "bullet_list", 318 | content: [ 319 | { 320 | type: "list_item", 321 | content: [ 322 | { 323 | type: "paragraph", 324 | content: [{ type: "text" }], 325 | }, 326 | { 327 | type: "paragraph", 328 | content: [{ type: "text" }], 329 | }, 330 | { type: "image" }, 331 | { 332 | type: "paragraph", 333 | content: [{ type: "text" }], 334 | }, 335 | ], 336 | }, 337 | { 338 | type: "list_item", 339 | content: [ 340 | { 341 | type: "paragraph", 342 | content: [{ type: "text" }], 343 | }, 344 | ], 345 | }, 346 | ], 347 | }, 348 | ], 349 | }); 350 | }); 351 | }); 352 | -------------------------------------------------------------------------------- /src/transform/fromPandoc/fromPandoc.ts: -------------------------------------------------------------------------------- 1 | import { 2 | PandocNode, 3 | ProsemirrorNode, 4 | ProsemirrorMark, 5 | Inline, 6 | Block, 7 | } from "types"; 8 | 9 | import { asArray, makeCounter } from "transform/util"; 10 | import { fluent, Fluent } from "transform/fluent"; 11 | import { 12 | FromPandocTransformContext, 13 | FromPandocTransformConfig, 14 | } from "transform/types"; 15 | import { RuleSet } from "transform/ruleset"; 16 | 17 | import { applyMarksToNodes } from "./marks"; 18 | import { heal } from "./heal"; 19 | 20 | const fromPandocInner = ( 21 | elementOrArray: PandocNode | PandocNode[], 22 | context: FromPandocTransformContext 23 | ): Fluent => { 24 | if (!elementOrArray) { 25 | return fluent([] as ProsemirrorNode[]); 26 | } 27 | const { ruleset, marksMap } = context; 28 | const elements = asArray(elementOrArray); 29 | const transformed: ProsemirrorNode[] = []; 30 | const localMarksMap = new Map(); 31 | let ptr = 0; 32 | while (ptr < elements.length) { 33 | const remaining = elements.slice(ptr); 34 | const { rule, acceptedCount } = ruleset.matchPandocNodes(remaining); 35 | if (rule.isMarksRule) { 36 | const accepted = elements[ptr]; 37 | const marks = asArray(rule.transformer(accepted, context)); 38 | if ("content" in accepted) { 39 | const innerTransformed = 40 | typeof accepted.content === "string" 41 | ? [{ type: "text", text: accepted.content }] 42 | : fromPandocInner( 43 | // This cast works around the fact that some Pandoc nodes have nested arrays 44 | // as their content property (e.g. OrderedList has Block[][]). This shouldn't 45 | // be a problem in practice unless you're trying to do something very stupid 46 | // like turn an OrderedList node into an em mark. 47 | accepted.content as Block[] | Inline[], 48 | context 49 | ).asArray(); 50 | for (const node of innerTransformed) { 51 | localMarksMap.set(node, marks); 52 | } 53 | transformed.push(...innerTransformed); 54 | } 55 | } else if (rule.isMarksRule === false) { 56 | const accepted = rule.acceptsMultiple 57 | ? elements.slice(ptr, ptr + acceptedCount) 58 | : elements[ptr]; 59 | const addition = rule.transformer(accepted, context); 60 | transformed.push(...asArray(addition)); 61 | } 62 | ptr += acceptedCount; 63 | } 64 | for (const [node, localMarks] of localMarksMap.entries()) { 65 | const currentMarks = marksMap.get(node) || []; 66 | marksMap.set(node, [...currentMarks, ...localMarks]); 67 | } 68 | return fluent(transformed); 69 | }; 70 | 71 | export const fromPandoc = ( 72 | elementOrArray: PandocNode | PandocNode[], 73 | ruleset: RuleSet, 74 | config: Partial = {} 75 | ): Fluent => { 76 | const { 77 | resources = {}, 78 | useSmartQuotes = false, 79 | prosemirrorDocWidth = 1000, 80 | } = config; 81 | const context: FromPandocTransformContext = { 82 | ruleset, 83 | resources, 84 | useSmartQuotes, 85 | count: makeCounter(), 86 | transform: (element, parentContext = {}) => 87 | fromPandocInner(element, { ...context, ...parentContext }), 88 | marksMap: new Map(), 89 | prosemirrorDocWidth, 90 | }; 91 | const nodes = context.transform(elementOrArray); 92 | const nodesWithMarks = applyMarksToNodes( 93 | nodes.asArray(), 94 | ruleset.prosemirrorSchema, 95 | context.marksMap 96 | ); 97 | const healed = nodesWithMarks.map((node) => 98 | heal(node, ruleset.prosemirrorSchema) 99 | ); 100 | return fluent(healed); 101 | }; 102 | -------------------------------------------------------------------------------- /src/transform/fromPandoc/heal.ts: -------------------------------------------------------------------------------- 1 | import { ProsemirrorNode, ProsemirrorSchema } from "types"; 2 | import { parseExpr, Expr, createItemAcceptor } from "expression"; 3 | 4 | type OpenToken = { 5 | type: "open"; 6 | node: ProsemirrorNode; 7 | createdFromSplit?: boolean; 8 | }; 9 | 10 | type CloseToken = { 11 | type: "close"; 12 | node: ProsemirrorNode; 13 | }; 14 | 15 | type AcceptedState = { 16 | consumeNode: (node: ProsemirrorNode) => boolean; 17 | acceptedNodes: ProsemirrorNode[]; 18 | }; 19 | 20 | export type Token = OpenToken | CloseToken; 21 | 22 | export const getNaiveTokenList = (node: ProsemirrorNode) => { 23 | const tokens: Token[] = []; 24 | 25 | const visit = (node: ProsemirrorNode) => { 26 | tokens.push({ type: "open", node }); 27 | if (node.content) { 28 | for (const child of node.content) { 29 | visit(child); 30 | } 31 | } 32 | tokens.push({ type: "close", node }); 33 | }; 34 | 35 | visit(node); 36 | return tokens; 37 | }; 38 | 39 | export const healNaiveTokenList = ( 40 | tokens: Token[], 41 | schema: ProsemirrorSchema 42 | ): Token[] => { 43 | const openTokens: OpenToken[] = []; 44 | const nextTokens: Token[] = []; 45 | 46 | const tokenToAcceptorMap: Map = new Map(); 47 | const newOpenTokensMap: Map = new Map(); 48 | const acceptorExpressions: Map = new Map(); 49 | const nodeToTokenMap: Map = new Map(); 50 | 51 | Object.entries(schema.nodes).forEach(([key, entry]) => { 52 | if (entry.spec.content) { 53 | acceptorExpressions.set(key, parseExpr(entry.spec.content)); 54 | } 55 | }); 56 | 57 | const matchProsemirrorNode = (group: string) => (node: ProsemirrorNode) => { 58 | const schemaEntry = schema.nodes[node.type]; 59 | const matchRes = 60 | node.type === group || schemaEntry.spec.group === group; 61 | return matchRes; 62 | }; 63 | 64 | const openToken = (token: OpenToken) => { 65 | nextTokens.push(token); 66 | openTokens.push(token); 67 | nodeToTokenMap.set(token.node, [ 68 | ...(nodeToTokenMap.get(token.node) || []), 69 | token, 70 | ]); 71 | }; 72 | 73 | const getOrAddAcceptedStateToTokenMap = (token): AcceptedState => { 74 | const existing = tokenToAcceptorMap.get(token); 75 | if (existing) { 76 | return existing; 77 | } 78 | const next = { 79 | consumeNode: createItemAcceptor( 80 | acceptorExpressions.get(token.node.type), 81 | matchProsemirrorNode 82 | ), 83 | acceptedNodes: [] as ProsemirrorNode[], 84 | }; 85 | tokenToAcceptorMap.set(token, next); 86 | return next; 87 | }; 88 | 89 | for (const token of tokens) { 90 | if (token.type === "open") { 91 | const toReopen: OpenToken[] = []; 92 | let acceptingParentDepth = openTokens.length - 1; 93 | let accepted = token === tokens[0]; 94 | while (!accepted) { 95 | if (acceptingParentDepth < 0) { 96 | throw new Error( 97 | `Prosemirror healer cannot find a suitable parent node for ${token.node.type}` + 98 | ` (closed ${toReopen 99 | .concat() 100 | .reverse() 101 | .map((t) => t.node.type) 102 | .join(", ")})` 103 | ); 104 | } 105 | const testingToken = openTokens[acceptingParentDepth]; 106 | const { acceptedNodes, consumeNode } = 107 | getOrAddAcceptedStateToTokenMap(testingToken); 108 | accepted = consumeNode(token.node); 109 | if (accepted) { 110 | acceptedNodes.push(token.node); 111 | } else { 112 | nextTokens.push({ 113 | type: "close", 114 | node: testingToken.node, 115 | }); 116 | openTokens.pop(); 117 | toReopen.unshift({ 118 | type: "open", 119 | node: testingToken.node, 120 | }); 121 | --acceptingParentDepth; 122 | } 123 | } 124 | openToken(token); 125 | if (toReopen.length > 0) { 126 | newOpenTokensMap.set(token.node, toReopen); 127 | } 128 | } else if (token.type === "close") { 129 | nextTokens.push(token); 130 | openTokens.pop(); 131 | const mustOpenNow = newOpenTokensMap.get(token.node); 132 | if (mustOpenNow) { 133 | for (const tokenToOpen of mustOpenNow) { 134 | openToken(tokenToOpen); 135 | } 136 | } 137 | } 138 | } 139 | return nextTokens.map((token) => { 140 | const tokensForNode = nodeToTokenMap.get(token.node); 141 | if (tokensForNode && tokensForNode.length > 1) { 142 | return { ...token, createdFromSplit: true }; 143 | } 144 | return token; 145 | }); 146 | }; 147 | 148 | export const heal = ( 149 | node: ProsemirrorNode, 150 | schema: ProsemirrorSchema 151 | ): ProsemirrorNode => { 152 | const naiveTokens = getNaiveTokenList(node); 153 | const tokens = healNaiveTokenList(naiveTokens, schema); 154 | const parentStack = []; 155 | let rootNode: ProsemirrorNode; 156 | for (let i = 0; i < tokens.length; i++) { 157 | const token = tokens[i]; 158 | const nextToken = tokens[i + 1]; 159 | if (token.type === "open") { 160 | if ( 161 | nextToken && 162 | nextToken.type === "close" && 163 | nextToken.node === token.node && 164 | token.createdFromSplit 165 | ) { 166 | // We found an open(X), close(X) pair that is empty because it was created while 167 | // spliting an element during healing. Just ignore these nodes. 168 | ++i; 169 | continue; 170 | } 171 | const contentProp = token.node.content ? { content: [] } : {}; 172 | const nextNode = { ...token.node, ...contentProp }; 173 | if (parentStack.length === 0) { 174 | rootNode = nextNode; 175 | } else { 176 | parentStack[parentStack.length - 1].content.push(nextNode); 177 | } 178 | parentStack.push(nextNode); 179 | } else if (token.type === "close") { 180 | parentStack.pop(); 181 | } 182 | } 183 | if (parentStack.length !== 0) { 184 | throw new Error( 185 | "Mismatched tokens encountered while healing document." 186 | ); 187 | } 188 | return rootNode; 189 | }; 190 | -------------------------------------------------------------------------------- /src/transform/fromPandoc/index.ts: -------------------------------------------------------------------------------- 1 | export { fromPandoc } from "./fromPandoc"; 2 | -------------------------------------------------------------------------------- /src/transform/fromPandoc/marks.ts: -------------------------------------------------------------------------------- 1 | import { ProsemirrorNode, ProsemirrorMark, ProsemirrorSchema } from "types"; 2 | 3 | const compareMarks = (first: ProsemirrorMark, second: ProsemirrorMark) => 4 | // Tell no one what you saw here 5 | JSON.stringify(first) === JSON.stringify(second); 6 | 7 | const dedupeMarks = (marks: ProsemirrorMark[]): ProsemirrorMark[] => { 8 | const collected: ProsemirrorMark[] = []; 9 | for (const mark of marks) { 10 | if ( 11 | !collected.some((existingMark) => compareMarks(existingMark, mark)) 12 | ) { 13 | collected.push(mark); 14 | } 15 | } 16 | return collected; 17 | }; 18 | 19 | const nodeAcceptsMarks = (node: ProsemirrorNode, schema: ProsemirrorSchema) => { 20 | const definition = schema.nodes[node.type]; 21 | if (!definition) { 22 | throw new Error(`No Prosemirror schema entry for node ${node.type}`); 23 | } 24 | return definition.spec.group === "inline"; 25 | }; 26 | 27 | export const applyMarksToNodes = ( 28 | nodes: ProsemirrorNode[], 29 | schema: ProsemirrorSchema, 30 | marksMap: Map 31 | ): ProsemirrorNode[] => { 32 | const applyInner = ( 33 | node: ProsemirrorNode, 34 | appliedMarks: ProsemirrorMark[], 35 | pendingMarks: ProsemirrorMark[] 36 | ): ProsemirrorNode => { 37 | const marksAtNode = marksMap.get(node) || []; 38 | const cumulativeMarks = dedupeMarks([...pendingMarks, ...marksAtNode]); 39 | const acceptMarksHere = nodeAcceptsMarks(node, schema); 40 | const marksProps = 41 | acceptMarksHere && cumulativeMarks.length > 0 42 | ? { marks: cumulativeMarks } 43 | : {}; 44 | if (!node.content && !acceptMarksHere && marksAtNode.length > 0) { 45 | console.warn( 46 | `Dropping marks at leaf node ${node.type}. This node should probably have group="inline".` 47 | ); 48 | } 49 | const nextAppliedMarks = [ 50 | ...appliedMarks, 51 | ...(acceptMarksHere ? marksAtNode : []), 52 | ]; 53 | const nextPendingMarks = acceptMarksHere ? [] : cumulativeMarks; 54 | const contentProps = node.content 55 | ? { 56 | content: node.content.map((child) => 57 | applyInner(child, nextAppliedMarks, nextPendingMarks) 58 | ), 59 | } 60 | : {}; 61 | return { 62 | ...node, 63 | ...marksProps, 64 | ...contentProps, 65 | }; 66 | }; 67 | 68 | return nodes.map((node) => applyInner(node, [], [])); 69 | }; 70 | -------------------------------------------------------------------------------- /src/transform/fromProsemirror/__tests__/__snapshots__/fromProsemirror.test.ts.snap: -------------------------------------------------------------------------------- 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP 2 | 3 | exports[`fromProsemirror converts a table 1`] = ` 4 | Object { 5 | "attr": Object { 6 | "classes": Array [], 7 | "identifier": "", 8 | "properties": Object {}, 9 | }, 10 | "bodies": Array [ 11 | Object { 12 | "attr": Object { 13 | "classes": Array [], 14 | "identifier": "", 15 | "properties": Object {}, 16 | }, 17 | "bodyRows": Array [ 18 | Object { 19 | "attr": Object { 20 | "classes": Array [], 21 | "identifier": "", 22 | "properties": Object {}, 23 | }, 24 | "cells": Array [ 25 | Object { 26 | "alignment": "AlignDefault", 27 | "attr": Object { 28 | "classes": Array [], 29 | "identifier": "", 30 | "properties": Object {}, 31 | }, 32 | "colSpan": 1, 33 | "content": Array [ 34 | Object { 35 | "content": Array [ 36 | Object { 37 | "content": "Role", 38 | "type": "Str", 39 | }, 40 | ], 41 | "type": "Para", 42 | }, 43 | ], 44 | "rowSpan": 1, 45 | "type": "Cell", 46 | }, 47 | Object { 48 | "alignment": "AlignDefault", 49 | "attr": Object { 50 | "classes": Array [], 51 | "identifier": "", 52 | "properties": Object {}, 53 | }, 54 | "colSpan": 1, 55 | "content": Array [ 56 | Object { 57 | "content": Array [ 58 | Object { 59 | "content": "Hero", 60 | "type": "Str", 61 | }, 62 | ], 63 | "type": "Para", 64 | }, 65 | ], 66 | "rowSpan": 1, 67 | "type": "Cell", 68 | }, 69 | Object { 70 | "alignment": "AlignDefault", 71 | "attr": Object { 72 | "classes": Array [], 73 | "identifier": "", 74 | "properties": Object {}, 75 | }, 76 | "colSpan": 1, 77 | "content": Array [ 78 | Object { 79 | "content": Array [ 80 | Object { 81 | "content": "Villain", 82 | "type": "Str", 83 | }, 84 | ], 85 | "type": "Para", 86 | }, 87 | ], 88 | "rowSpan": 1, 89 | "type": "Cell", 90 | }, 91 | ], 92 | "type": "Row", 93 | }, 94 | Object { 95 | "attr": Object { 96 | "classes": Array [], 97 | "identifier": "", 98 | "properties": Object {}, 99 | }, 100 | "cells": Array [ 101 | Object { 102 | "alignment": "AlignDefault", 103 | "attr": Object { 104 | "classes": Array [], 105 | "identifier": "", 106 | "properties": Object {}, 107 | }, 108 | "colSpan": 1, 109 | "content": Array [ 110 | Object { 111 | "content": Array [ 112 | Object { 113 | "content": "Weapon", 114 | "type": "Str", 115 | }, 116 | ], 117 | "type": "Para", 118 | }, 119 | ], 120 | "rowSpan": 1, 121 | "type": "Cell", 122 | }, 123 | Object { 124 | "alignment": "AlignDefault", 125 | "attr": Object { 126 | "classes": Array [], 127 | "identifier": "", 128 | "properties": Object {}, 129 | }, 130 | "colSpan": 1, 131 | "content": Array [ 132 | Object { 133 | "content": Array [ 134 | Object { 135 | "content": "Power", 136 | "type": "Str", 137 | }, 138 | Object { 139 | "type": "Space", 140 | }, 141 | Object { 142 | "content": "Sword", 143 | "type": "Str", 144 | }, 145 | ], 146 | "type": "Para", 147 | }, 148 | ], 149 | "rowSpan": 1, 150 | "type": "Cell", 151 | }, 152 | Object { 153 | "alignment": "AlignDefault", 154 | "attr": Object { 155 | "classes": Array [], 156 | "identifier": "", 157 | "properties": Object {}, 158 | }, 159 | "colSpan": 1, 160 | "content": Array [ 161 | Object { 162 | "content": Array [ 163 | Object { 164 | "content": "Havoc", 165 | "type": "Str", 166 | }, 167 | Object { 168 | "type": "Space", 169 | }, 170 | Object { 171 | "content": "Staff", 172 | "type": "Str", 173 | }, 174 | ], 175 | "type": "Para", 176 | }, 177 | ], 178 | "rowSpan": 1, 179 | "type": "Cell", 180 | }, 181 | ], 182 | "type": "Row", 183 | }, 184 | Object { 185 | "attr": Object { 186 | "classes": Array [], 187 | "identifier": "", 188 | "properties": Object {}, 189 | }, 190 | "cells": Array [ 191 | Object { 192 | "alignment": "AlignDefault", 193 | "attr": Object { 194 | "classes": Array [], 195 | "identifier": "", 196 | "properties": Object {}, 197 | }, 198 | "colSpan": 1, 199 | "content": Array [ 200 | Object { 201 | "content": Array [ 202 | Object { 203 | "content": "Dark", 204 | "type": "Str", 205 | }, 206 | Object { 207 | "type": "Space", 208 | }, 209 | Object { 210 | "content": "secret", 211 | "type": "Str", 212 | }, 213 | ], 214 | "type": "Para", 215 | }, 216 | ], 217 | "rowSpan": 1, 218 | "type": "Cell", 219 | }, 220 | Object { 221 | "alignment": "AlignDefault", 222 | "attr": Object { 223 | "classes": Array [], 224 | "identifier": "", 225 | "properties": Object {}, 226 | }, 227 | "colSpan": 1, 228 | "content": Array [ 229 | Object { 230 | "content": Array [ 231 | Object { 232 | "content": "Expert", 233 | "type": "Str", 234 | }, 235 | Object { 236 | "type": "Space", 237 | }, 238 | Object { 239 | "content": "florist", 240 | "type": "Str", 241 | }, 242 | ], 243 | "type": "Para", 244 | }, 245 | ], 246 | "rowSpan": 1, 247 | "type": "Cell", 248 | }, 249 | Object { 250 | "alignment": "AlignDefault", 251 | "attr": Object { 252 | "classes": Array [], 253 | "identifier": "", 254 | "properties": Object {}, 255 | }, 256 | "colSpan": 1, 257 | "content": Array [ 258 | Object { 259 | "content": Array [ 260 | Object { 261 | "content": "Cries", 262 | "type": "Str", 263 | }, 264 | Object { 265 | "type": "Space", 266 | }, 267 | Object { 268 | "content": "at", 269 | "type": "Str", 270 | }, 271 | Object { 272 | "type": "Space", 273 | }, 274 | Object { 275 | "content": "romcoms", 276 | "type": "Str", 277 | }, 278 | ], 279 | "type": "Para", 280 | }, 281 | ], 282 | "rowSpan": 1, 283 | "type": "Cell", 284 | }, 285 | ], 286 | "type": "Row", 287 | }, 288 | Object { 289 | "attr": Object { 290 | "classes": Array [], 291 | "identifier": "", 292 | "properties": Object {}, 293 | }, 294 | "cells": Array [ 295 | Object { 296 | "alignment": "AlignDefault", 297 | "attr": Object { 298 | "classes": Array [], 299 | "identifier": "", 300 | "properties": Object {}, 301 | }, 302 | "colSpan": 3, 303 | "content": Array [ 304 | Object { 305 | "content": Array [ 306 | Object { 307 | "content": "Some", 308 | "type": "Str", 309 | }, 310 | Object { 311 | "type": "Space", 312 | }, 313 | Object { 314 | "content": "stuff", 315 | "type": "Str", 316 | }, 317 | Object { 318 | "type": "Space", 319 | }, 320 | Object { 321 | "content": "at", 322 | "type": "Str", 323 | }, 324 | Object { 325 | "type": "Space", 326 | }, 327 | Object { 328 | "content": "the", 329 | "type": "Str", 330 | }, 331 | Object { 332 | "type": "Space", 333 | }, 334 | Object { 335 | "content": "bottom", 336 | "type": "Str", 337 | }, 338 | ], 339 | "type": "Para", 340 | }, 341 | ], 342 | "rowSpan": 1, 343 | "type": "Cell", 344 | }, 345 | ], 346 | "type": "Row", 347 | }, 348 | ], 349 | "headRows": Array [], 350 | "rowHeadColumns": 1, 351 | "type": "TableBody", 352 | }, 353 | ], 354 | "caption": Object { 355 | "content": Array [], 356 | "type": "Caption", 357 | }, 358 | "colSpecs": Array [ 359 | Object { 360 | "alignment": "AlignDefault", 361 | "type": "ColSpec", 362 | "width": 0.1985185185185185, 363 | }, 364 | Object { 365 | "alignment": "AlignDefault", 366 | "type": "ColSpec", 367 | "width": 0.49333333333333335, 368 | }, 369 | Object { 370 | "alignment": "AlignDefault", 371 | "type": "ColSpec", 372 | "width": 0.3985185185185185, 373 | }, 374 | ], 375 | "foot": Object { 376 | "attr": Object { 377 | "classes": Array [], 378 | "identifier": "", 379 | "properties": Object {}, 380 | }, 381 | "rows": Array [], 382 | "type": "TableFoot", 383 | }, 384 | "head": Object { 385 | "attr": Object { 386 | "classes": Array [], 387 | "identifier": "", 388 | "properties": Object {}, 389 | }, 390 | "rows": Array [ 391 | Object { 392 | "attr": Object { 393 | "classes": Array [], 394 | "identifier": "", 395 | "properties": Object {}, 396 | }, 397 | "cells": Array [ 398 | Object { 399 | "alignment": "AlignDefault", 400 | "attr": Object { 401 | "classes": Array [], 402 | "identifier": "", 403 | "properties": Object {}, 404 | }, 405 | "colSpan": 1, 406 | "content": Array [ 407 | Object { 408 | "content": Array [], 409 | "type": "Para", 410 | }, 411 | ], 412 | "rowSpan": 1, 413 | "type": "Cell", 414 | }, 415 | Object { 416 | "alignment": "AlignDefault", 417 | "attr": Object { 418 | "classes": Array [], 419 | "identifier": "", 420 | "properties": Object {}, 421 | }, 422 | "colSpan": 1, 423 | "content": Array [ 424 | Object { 425 | "content": Array [ 426 | Object { 427 | "content": "He-Man", 428 | "type": "Str", 429 | }, 430 | ], 431 | "type": "Para", 432 | }, 433 | ], 434 | "rowSpan": 1, 435 | "type": "Cell", 436 | }, 437 | Object { 438 | "alignment": "AlignDefault", 439 | "attr": Object { 440 | "classes": Array [], 441 | "identifier": "", 442 | "properties": Object {}, 443 | }, 444 | "colSpan": 1, 445 | "content": Array [ 446 | Object { 447 | "content": Array [ 448 | Object { 449 | "content": "Skeletor", 450 | "type": "Str", 451 | }, 452 | ], 453 | "type": "Para", 454 | }, 455 | ], 456 | "rowSpan": 1, 457 | "type": "Cell", 458 | }, 459 | ], 460 | "type": "Row", 461 | }, 462 | ], 463 | "type": "TableHead", 464 | }, 465 | "type": "Table", 466 | } 467 | `; 468 | -------------------------------------------------------------------------------- /src/transform/fromProsemirror/fromProsemirror.ts: -------------------------------------------------------------------------------- 1 | import { ProsemirrorNode, PandocNode } from "types"; 2 | 3 | import { asArray, flatten, makeCounter } from "transform/util"; 4 | import { RuleSet } from "transform/ruleset"; 5 | import { Fluent, fluent } from "transform/fluent"; 6 | import { 7 | FromProsemirrorTransformConfig, 8 | FromProsemirrorTransformContext, 9 | } from "transform/types"; 10 | 11 | import { createWrapperNodeFromMarks, splitNodesByMarks } from "./marks"; 12 | 13 | const fromProsemirrorInner = ( 14 | elementOrArray: ProsemirrorNode | ProsemirrorNode[], 15 | context: FromProsemirrorTransformContext 16 | ): Fluent => { 17 | if (!elementOrArray) { 18 | return fluent([]); 19 | } 20 | const { ruleset } = context; 21 | const nodesAndAssociatedMarks = splitNodesByMarks(asArray(elementOrArray)); 22 | const transformed: PandocNode[] = []; 23 | for (const { nodes, marks } of nodesAndAssociatedMarks) { 24 | let ptr = 0; 25 | const innerTransformed = []; 26 | while (ptr < nodes.length) { 27 | const remaining = nodes.slice(ptr); 28 | const { rule, acceptedCount } = 29 | ruleset.matchProsemirrorNodes(remaining); 30 | const addition: PandocNode[] = flatten( 31 | rule.acceptsMultiple 32 | ? rule.transformer( 33 | nodes.slice(ptr, ptr + acceptedCount), 34 | context 35 | ) 36 | : rule.transformer(nodes[ptr], context) 37 | ); 38 | innerTransformed.push(...addition); 39 | ptr += acceptedCount; 40 | } 41 | const maybeWrappedNodes = createWrapperNodeFromMarks( 42 | innerTransformed, 43 | marks, 44 | context 45 | ); 46 | transformed.push(...asArray(maybeWrappedNodes)); 47 | } 48 | return fluent(transformed); 49 | }; 50 | 51 | export const fromProsemirror = ( 52 | elementOrArray: ProsemirrorNode | ProsemirrorNode[], 53 | ruleset: RuleSet, 54 | config: Partial = {} 55 | ): Fluent => { 56 | const { resources = {}, prosemirrorDocWidth = 1000 } = config; 57 | const context: FromProsemirrorTransformContext = { 58 | ruleset, 59 | resources, 60 | prosemirrorDocWidth, 61 | count: makeCounter(), 62 | transform: (element) => fromProsemirrorInner(element, context), 63 | }; 64 | return context.transform(elementOrArray); 65 | }; 66 | -------------------------------------------------------------------------------- /src/transform/fromProsemirror/index.ts: -------------------------------------------------------------------------------- 1 | export { fromProsemirror } from "./fromProsemirror"; 2 | -------------------------------------------------------------------------------- /src/transform/fromProsemirror/marks.ts: -------------------------------------------------------------------------------- 1 | import { PandocNode, ProsemirrorNode, ProsemirrorMark } from "types"; 2 | import { FromProsemirrorTransformContext, OneOrMany } from "transform/types"; 3 | import { asArray } from "transform/util"; 4 | 5 | type NodesAndMarksBucket = { 6 | nodes: ProsemirrorNode[]; 7 | marks: ProsemirrorMark[]; 8 | }; 9 | 10 | export const createWrapperNodeFromMarks = ( 11 | innerNode: OneOrMany, 12 | marks: ProsemirrorMark[], 13 | context: FromProsemirrorTransformContext 14 | ): OneOrMany => { 15 | const { ruleset } = context; 16 | return marks.reduce((node, mark) => { 17 | const { rule } = ruleset.matchProsemirrorMarks([mark]); 18 | return rule.transformer(mark, asArray(node), context); 19 | }, innerNode); 20 | }; 21 | 22 | const createNodesAndMarksBucket = ( 23 | marks: ProsemirrorMark[] 24 | ): NodesAndMarksBucket => { 25 | return { 26 | nodes: [], 27 | marks, 28 | }; 29 | }; 30 | 31 | const alphabetizeObjectProps = >( 32 | object: T 33 | ): T => { 34 | const next: Partial = {}; 35 | Object.keys(object) 36 | .sort() 37 | .forEach((key: keyof T) => { 38 | next[key] = object[key]; 39 | }); 40 | return next as T; 41 | }; 42 | 43 | const canonicalizeMarks = (marks: ProsemirrorMark[]) => { 44 | const canonicalized = marks 45 | .concat() 46 | .sort((a, b) => (a.type > b.type ? 1 : -1)) 47 | .map((mark) => { 48 | if (mark.attrs) { 49 | return { 50 | ...mark, 51 | attrs: alphabetizeObjectProps(mark.attrs), 52 | }; 53 | } 54 | return mark; 55 | }); 56 | return JSON.stringify(canonicalized); 57 | }; 58 | 59 | export const splitNodesByMarks = ( 60 | nodes: ProsemirrorNode[] 61 | ): NodesAndMarksBucket[] => { 62 | let currentCanonicalizedMarks: null | string = null; 63 | let currentBucket: null | NodesAndMarksBucket = null; 64 | const buckets: NodesAndMarksBucket[] = []; 65 | for (const node of nodes) { 66 | const currentMarks = node.marks || []; 67 | const canonicalizedMarks = canonicalizeMarks(currentMarks); 68 | const useNewBucket = canonicalizedMarks !== currentCanonicalizedMarks; 69 | if (useNewBucket) { 70 | if (currentBucket) { 71 | buckets.push(currentBucket); 72 | } 73 | currentBucket = createNodesAndMarksBucket(currentMarks); 74 | currentCanonicalizedMarks = canonicalizedMarks; 75 | } 76 | currentBucket.nodes.push(node); 77 | } 78 | if (currentBucket) { 79 | buckets.push(currentBucket); 80 | } 81 | return buckets; 82 | }; 83 | -------------------------------------------------------------------------------- /src/transform/inference/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./inferPandocType"; 2 | export * from "./inferProsemirrorType"; 3 | -------------------------------------------------------------------------------- /src/transform/inference/inferPandocType.ts: -------------------------------------------------------------------------------- 1 | import { PandocNode } from "../../types"; 2 | 3 | import { OneOrMore, Trim } from "./shared"; 4 | 5 | type Resolve = 6 | | ResolveZeroOrMore 7 | | ResolveOneOrMore 8 | | ResolveParens 9 | | ResolveChoice 10 | | ResolveIdentifier; 11 | 12 | type ResolveZeroOrMore = Str extends `${infer Some}*` 13 | ? Resolve extends never 14 | ? never 15 | : Resolve[] 16 | : never; 17 | 18 | type ResolveOneOrMore = Str extends `${infer Some}+` 19 | ? Resolve extends never 20 | ? never 21 | : OneOrMore> 22 | : never; 23 | 24 | type ResolveParens = Str extends `(${infer Some})` ? Resolve : never; 25 | 26 | type ResolveChoice = Str extends `${infer Some}|${infer Rest}` 27 | ? Resolve> | Resolve> 28 | : never; 29 | 30 | type ResolveIdentifier = Str extends PandocNode["type"] 31 | ? Readonly 32 | : never; 33 | 34 | export type InferPandocPattern = Resolve; 35 | export type InferPandocNodeType = ResolveIdentifier; 36 | -------------------------------------------------------------------------------- /src/transform/inference/inferProsemirrorType.ts: -------------------------------------------------------------------------------- 1 | import { ProsemirrorMark, ProsemirrorNode, ProsemirrorSchema } from "types"; 2 | 3 | import { OneOrMore, Trim } from "./shared"; 4 | 5 | type Resolve = 6 | | ResolveZeroOrMore 7 | | ResolveOneOrMore 8 | | ResolveParens 9 | | ResolveChoice 10 | | ResolveIdentifier; 11 | 12 | type ResolveZeroOrMore< 13 | Str, 14 | Schema extends ProsemirrorSchema 15 | > = Str extends `${infer Some}*` 16 | ? Resolve extends never 17 | ? never 18 | : Resolve[] 19 | : never; 20 | 21 | type ResolveOneOrMore< 22 | Str, 23 | Schema extends ProsemirrorSchema 24 | > = Str extends `${infer Some}+` 25 | ? Resolve extends never 26 | ? never 27 | : OneOrMore> 28 | : never; 29 | 30 | type ResolveParens< 31 | Str, 32 | Schema extends ProsemirrorSchema 33 | > = Str extends `(${infer Some})` ? Resolve : never; 34 | 35 | type ResolveChoice< 36 | Str, 37 | Schema extends ProsemirrorSchema 38 | > = Str extends `${infer Some}|${infer Rest}` 39 | ? Resolve, Schema> | Resolve, Schema> 40 | : never; 41 | 42 | // TODO(ian): Maybe add some real inference here if it turns out to be possible 43 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 44 | type ResolveIdentifier< 45 | Str, 46 | Schema extends ProsemirrorSchema 47 | > = Str extends Nodes ? ProsemirrorNode : never; 48 | 49 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 50 | type Marks = Str extends ProsemirrorSchema 51 | ? FoundMarks 52 | : never; 53 | 54 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 55 | type Nodes = Str extends ProsemirrorSchema 56 | ? FoundNodes 57 | : never; 58 | 59 | export type InferProsemirrorNodePattern< 60 | Str, 61 | Schema extends ProsemirrorSchema 62 | > = Resolve; 63 | 64 | export type InferProsemirrorNodeType< 65 | Str, 66 | Schema extends ProsemirrorSchema 67 | > = Str extends Nodes ? ProsemirrorNode : never; 68 | 69 | export type InferProsemirrorMarkType< 70 | Str, 71 | Schema extends ProsemirrorSchema 72 | > = Str extends Marks ? ProsemirrorMark : never; 73 | 74 | export type InferProsemirrorElementType< 75 | Str, 76 | Schema extends ProsemirrorSchema 77 | > = 78 | | InferProsemirrorNodeType 79 | | InferProsemirrorMarkType; 80 | -------------------------------------------------------------------------------- /src/transform/inference/shared.ts: -------------------------------------------------------------------------------- 1 | export type OneOrMore = [T, ...T[]]; 2 | 3 | export type Trim = T extends ` ${infer U}` 4 | ? Trim 5 | : T extends `${infer U} ` 6 | ? Trim 7 | : T; 8 | -------------------------------------------------------------------------------- /src/transform/ruleset.ts: -------------------------------------------------------------------------------- 1 | import { 2 | PANDOC_NODE_TYPES, 3 | PandocNode, 4 | ProsemirrorMark, 5 | ProsemirrorNode, 6 | ProsemirrorSchema, 7 | } from "types"; 8 | import { 9 | parseExpr, 10 | exprAcceptsMultiple, 11 | exprWillAlwaysMatchSingleIdentifier, 12 | acceptItems, 13 | Expr, 14 | } from "expression"; 15 | 16 | import { 17 | InferPandocPattern, 18 | InferProsemirrorMarkType, 19 | InferProsemirrorElementType, 20 | InferProsemirrorNodePattern, 21 | InferPandocNodeType, 22 | } from "./inference"; 23 | import { 24 | BidirectionalTransformer, 25 | PandocNodeToProsemirrorMarkTransformer, 26 | PandocNodeToProsemirrorNodeTransformer, 27 | ParameterizedBidirectionalTransformer, 28 | ProsemirrorMarkToPandocNodeTransformer, 29 | ProsemirrorNodeToPandocNodeTransformer, 30 | Rule, 31 | } from "./types"; 32 | import { flatten } from "./util"; 33 | 34 | type AcceptResult = { 35 | acceptedCount: number; 36 | rule: Rule; 37 | }; 38 | 39 | const matchItemWithType = 40 | (identifier: string) => 41 | (item: { type: string }): boolean => 42 | identifier === item.type; 43 | 44 | const gatherExpressionsForCapturedNodeAssertions = ( 45 | ...items: (string[] | undefined)[] 46 | ): Expr[] => { 47 | return flatten( 48 | items 49 | .filter((x): x is string[] => !!x) 50 | .map((strs) => strs.map((str) => parseExpr(str))) 51 | ); 52 | }; 53 | 54 | const assertExpressionsSafeForParameterizedTransformer = ( 55 | pandocPattern: string, 56 | prosemirrorPattern: string 57 | ) => { 58 | const pdExpr = parseExpr(pandocPattern); 59 | const pmExpr = parseExpr(prosemirrorPattern); 60 | if (pdExpr.type !== "identifier" && pmExpr.type !== "identifier") { 61 | throw new Error( 62 | "Cannot use a transformer that takes node names as arguments in a rule that accepts patterns." + 63 | " For instance, calling rules.transform('A | B', 'a', tr) will fail if tr is a function" + 64 | " of two arguments (pandocNodeType, prosemirrorNodeType), because 'A | B' is not" + 65 | " a valid Pandoc node name. You will need to call the transformer with two statically known" + 66 | " argument types and pass the result into the transform rule instead," + 67 | " e.g. rules.transform('A | B', 'a', tr('A', 'a')).\n" + 68 | `(Attempting to transform between ${pandocPattern} and ${prosemirrorPattern}` 69 | ); 70 | } 71 | }; 72 | 73 | const throwFailedMatchError = ( 74 | items: WithType[] 75 | ) => { 76 | throw new Error( 77 | `Could not find transform rule for items: ${ 78 | items 79 | .map((item) => item.type) 80 | .slice(0, 3) 81 | .join(", ") + (items.length > 3 ? "..." : "") 82 | }` 83 | ); 84 | }; 85 | 86 | const throwMarkMatchingError = (pattern: string) => { 87 | throw new Error( 88 | `Pattern for mark conversion must accept exactly one Pandoc node or Prosemirror mark (${pattern} was supplied)` 89 | ); 90 | }; 91 | 92 | const warnAboutMissingMatchesForRules = ( 93 | matchNoun: string, 94 | requiredTypes: string[], 95 | rules: Rule[] 96 | ) => { 97 | const matchingExpressions = rules 98 | .map((rule) => [rule.expression, ...rule.capturedExpressions]) 99 | .reduce((a, b) => [...a, ...b]); 100 | const missingTypes = requiredTypes.filter( 101 | (type) => 102 | !matchingExpressions.some((expr) => 103 | exprWillAlwaysMatchSingleIdentifier(expr, type) 104 | ) 105 | ); 106 | if (missingTypes.length > 0) { 107 | console.warn( 108 | `Cannot find rules that are guaranteed to match on a ${matchNoun} of these types: ` + 109 | `${missingTypes.join(", ")}.` + 110 | " You may want to add or modify rules so that the transformer does not break" + 111 | ` if it encounters one of these ${matchNoun}s.` 112 | ); 113 | } 114 | }; 115 | 116 | export class RuleSet { 117 | readonly pandocNodeToProsemirrorRules: ( 118 | | Rule 119 | | Rule 120 | )[] = []; 121 | readonly prosemirrorNodeToPandocNodeRules: Rule[] = 122 | []; 123 | readonly prosemirrorMarkToPandocNodeRules: Rule[] = 124 | []; 125 | readonly prosemirrorSchema: Schema; 126 | 127 | constructor(schema: Schema) { 128 | this.prosemirrorSchema = schema; 129 | } 130 | 131 | toProsemirrorNode( 132 | pattern: PandocNodePattern, 133 | transformer: PandocNodeToProsemirrorNodeTransformer< 134 | InferPandocPattern 135 | >, 136 | assertCapturedPandocNodes: string[] = [] 137 | ) { 138 | const expression = parseExpr(pattern); 139 | const capturedExpressions = gatherExpressionsForCapturedNodeAssertions( 140 | assertCapturedPandocNodes, 141 | transformer.assertCapturedPandocNodes 142 | ); 143 | this.pandocNodeToProsemirrorRules.push({ 144 | isMarksRule: false, 145 | acceptsMultiple: exprAcceptsMultiple(expression), 146 | expression, 147 | transformer, 148 | capturedExpressions, 149 | }); 150 | } 151 | 152 | toProsemirrorMark( 153 | pattern: PandocNodePattern, 154 | transformer: PandocNodeToProsemirrorMarkTransformer< 155 | InferPandocPattern 156 | >, 157 | assertCapturedPandocNodes: string[] = [] 158 | ) { 159 | const expression = parseExpr(pattern); 160 | const capturedExpressions = gatherExpressionsForCapturedNodeAssertions( 161 | assertCapturedPandocNodes, 162 | transformer.assertCapturedPandocNodes 163 | ); 164 | const acceptsMultiple = exprAcceptsMultiple(expression); 165 | if (acceptsMultiple) { 166 | throwMarkMatchingError(pattern); 167 | } 168 | this.pandocNodeToProsemirrorRules.push({ 169 | isMarksRule: true, 170 | acceptsMultiple: false, 171 | expression, 172 | transformer, 173 | capturedExpressions, 174 | }); 175 | } 176 | 177 | fromProsemirrorNode( 178 | pattern: ProsemirrorNodePattern, 179 | transformer: ProsemirrorNodeToPandocNodeTransformer< 180 | InferProsemirrorNodePattern 181 | >, 182 | assertCapturedProsemirrorNodes: string[] = [] 183 | ) { 184 | const expression = parseExpr(pattern); 185 | const capturedExpressions = gatherExpressionsForCapturedNodeAssertions( 186 | assertCapturedProsemirrorNodes, 187 | transformer.assertCapturedProsemirrorNodes 188 | ); 189 | const acceptsMultiple = exprAcceptsMultiple(expression); 190 | this.prosemirrorNodeToPandocNodeRules.push({ 191 | isMarksRule: false, 192 | acceptsMultiple, 193 | expression, 194 | transformer, 195 | capturedExpressions, 196 | }); 197 | } 198 | 199 | fromProsemirrorMark( 200 | pattern: ProsemirrorMarkPattern, 201 | transformer: ProsemirrorMarkToPandocNodeTransformer< 202 | InferProsemirrorMarkType 203 | > 204 | ) { 205 | const expression = parseExpr(pattern); 206 | const acceptsMultiple = exprAcceptsMultiple(expression); 207 | if (acceptsMultiple) { 208 | throwMarkMatchingError(pattern); 209 | } 210 | this.prosemirrorMarkToPandocNodeRules.push({ 211 | isMarksRule: true, 212 | acceptsMultiple: false, 213 | expression, 214 | transformer, 215 | capturedExpressions: [], 216 | }); 217 | } 218 | 219 | transform( 220 | pandocPattern: PandocPattern, 221 | prosemirrorPattern: ProsemirrorPattern, 222 | bidirectionalTransformer: 223 | | BidirectionalTransformer< 224 | InferPandocNodeType, 225 | InferProsemirrorElementType 226 | > 227 | | ParameterizedBidirectionalTransformer< 228 | PandocPattern, 229 | ProsemirrorPattern, 230 | Schema 231 | > 232 | ) { 233 | if (typeof bidirectionalTransformer === "function") { 234 | assertExpressionsSafeForParameterizedTransformer( 235 | pandocPattern, 236 | prosemirrorPattern 237 | ); 238 | bidirectionalTransformer = bidirectionalTransformer( 239 | pandocPattern, 240 | prosemirrorPattern 241 | ); 242 | } 243 | if ("toProsemirrorNode" in bidirectionalTransformer) { 244 | const { toProsemirrorNode, assertCapturedPandocNodes = [] } = 245 | bidirectionalTransformer; 246 | this.toProsemirrorNode( 247 | pandocPattern, 248 | toProsemirrorNode, 249 | assertCapturedPandocNodes 250 | ); 251 | } 252 | if ("toProsemirrorMark" in bidirectionalTransformer) { 253 | const { toProsemirrorMark, assertCapturedPandocNodes = [] } = 254 | bidirectionalTransformer; 255 | this.toProsemirrorMark( 256 | pandocPattern, 257 | toProsemirrorMark, 258 | assertCapturedPandocNodes 259 | ); 260 | } 261 | if ("fromProsemirrorNode" in bidirectionalTransformer) { 262 | const { fromProsemirrorNode, assertCapturedProsemirrorNodes = [] } = 263 | bidirectionalTransformer; 264 | this.fromProsemirrorNode( 265 | prosemirrorPattern, 266 | fromProsemirrorNode, 267 | assertCapturedProsemirrorNodes 268 | ); 269 | } 270 | if ("fromProsemirrorMark" in bidirectionalTransformer) { 271 | const { fromProsemirrorMark } = bidirectionalTransformer; 272 | this.fromProsemirrorMark(prosemirrorPattern, fromProsemirrorMark); 273 | } 274 | } 275 | 276 | validate() { 277 | const { 278 | pandocNodeToProsemirrorRules, 279 | prosemirrorMarkToPandocNodeRules, 280 | prosemirrorNodeToPandocNodeRules, 281 | prosemirrorSchema, 282 | } = this; 283 | 284 | warnAboutMissingMatchesForRules( 285 | "Pandoc node", 286 | PANDOC_NODE_TYPES, 287 | pandocNodeToProsemirrorRules 288 | ); 289 | warnAboutMissingMatchesForRules( 290 | "Prosemirror node", 291 | Object.keys(prosemirrorSchema.nodes), 292 | prosemirrorNodeToPandocNodeRules 293 | ); 294 | warnAboutMissingMatchesForRules( 295 | "Prosemirror mark", 296 | Object.keys(prosemirrorSchema.marks), 297 | prosemirrorMarkToPandocNodeRules 298 | ); 299 | } 300 | 301 | private matchItems< 302 | ItemType extends { type: string }, 303 | RuleType extends Rule 304 | >(items: ItemType[], rules: RuleType[]): AcceptResult { 305 | for (const rule of rules) { 306 | const acceptedCount = acceptItems( 307 | rule.expression, 308 | items, 309 | matchItemWithType 310 | ); 311 | if (acceptedCount > 0) { 312 | return { 313 | acceptedCount, 314 | rule, 315 | }; 316 | } 317 | } 318 | throwFailedMatchError(items); 319 | } 320 | 321 | matchPandocNodes(nodes: PandocNode[]) { 322 | return this.matchItems(nodes, this.pandocNodeToProsemirrorRules); 323 | } 324 | 325 | matchProsemirrorNodes(nodes: ProsemirrorNode[]) { 326 | return this.matchItems(nodes, this.prosemirrorNodeToPandocNodeRules); 327 | } 328 | 329 | matchProsemirrorMarks(marks: ProsemirrorMark[]) { 330 | return this.matchItems(marks, this.prosemirrorMarkToPandocNodeRules); 331 | } 332 | } 333 | -------------------------------------------------------------------------------- /src/transform/transformers/bare.ts: -------------------------------------------------------------------------------- 1 | import { 2 | BlockQuote, 3 | Emph, 4 | SmallCaps, 5 | Strikeout, 6 | Strong, 7 | Subscript, 8 | Superscript, 9 | Underline, 10 | Plain, 11 | Para, 12 | LineBreak, 13 | } from "types"; 14 | 15 | type BareBlock = Plain | Para | BlockQuote; 16 | 17 | type BareLeaf = LineBreak; 18 | 19 | type BareInline = 20 | | Emph 21 | | Underline 22 | | Strong 23 | | Strikeout 24 | | Superscript 25 | | Subscript 26 | | SmallCaps; 27 | 28 | // A transformer appropriate for simple container nodes. Typically, these are 29 | // correspondences between Pandoc elements with a content property and 30 | // Prosemirror elements with a content property 31 | export const bareContentTransformer = ( 32 | pdNodeType: BareBlock["type"], 33 | pmNodeType 34 | ) => { 35 | return { 36 | toProsemirrorNode: (node, { transform }) => { 37 | return { 38 | type: pmNodeType, 39 | content: transform(node.content).asArray(), 40 | }; 41 | }, 42 | fromProsemirrorNode: (node, { transform }) => { 43 | return { 44 | type: pdNodeType, 45 | content: transform(node.content).asArray(), 46 | }; 47 | }, 48 | }; 49 | }; 50 | 51 | // A transformer between Pandoc inline nodes and Prosemirror marks with no attrs 52 | export const bareMarkTransformer = ( 53 | pdNodeType: BareInline["type"], 54 | pmMarkType 55 | ) => { 56 | return { 57 | toProsemirrorMark: () => { 58 | return { 59 | type: pmMarkType, 60 | }; 61 | }, 62 | fromProsemirrorMark: (_, content) => { 63 | return { 64 | type: pdNodeType, 65 | content, 66 | }; 67 | }, 68 | }; 69 | }; 70 | 71 | // A transformer that does type -> type conversion for simple leaf nodes 72 | export const bareLeafTransformer = ( 73 | pdNodeType: BareLeaf["type"], 74 | pmNodeType 75 | ) => { 76 | return { 77 | toProsemirrorNode: () => { 78 | return { 79 | type: pmNodeType, 80 | }; 81 | }, 82 | fromProsemirrorNode: () => { 83 | return { 84 | type: pdNodeType, 85 | }; 86 | }, 87 | }; 88 | }; 89 | -------------------------------------------------------------------------------- /src/transform/transformers/common.ts: -------------------------------------------------------------------------------- 1 | import { RawBlock, RawInline } from "types"; 2 | import { getQuoteChar } from "../util"; 3 | 4 | // A transformer that converts between Pandoc elements with string content and Prosemirror 5 | // elements that accept {type: 'text', text: string}[] as their content. 6 | export const textTransformer = (pdNodeName: "Str", pmNodeName: string) => { 7 | return { 8 | toProsemirrorNode: (node) => { 9 | return { 10 | type: pmNodeName, 11 | text: node.content, 12 | }; 13 | }, 14 | fromProsemirrorNode: (node) => { 15 | return { 16 | type: pdNodeName, 17 | content: node.content.join(""), 18 | }; 19 | }, 20 | }; 21 | }; 22 | 23 | // A one-way transformer that ignores a Pandoc node and passes its content through. 24 | export const pandocPassThroughTransformer = (node, { transform }) => { 25 | return transform(node.content).asArray(); 26 | }; 27 | 28 | // A one-way transformer that converts Pandoc's Quoted inline elements to quoted text. 29 | export const pandocQuotedTransformer = ( 30 | node, 31 | { transform, useSmartQuotes } 32 | ) => { 33 | const isSingleQuote = node.quoteType === "SingleQuote"; 34 | return [ 35 | { 36 | type: "text", 37 | text: getQuoteChar(isSingleQuote, true, useSmartQuotes), 38 | }, 39 | ...transform(node.content).asArray(), 40 | { 41 | type: "text", 42 | text: getQuoteChar(isSingleQuote, false, useSmartQuotes), 43 | }, 44 | ]; 45 | }; 46 | 47 | // A transformer that returns an empty array 48 | export const nullTransformer = () => []; 49 | 50 | // A transformer that turns a Pandoc RawBlock or RawInline into a paragraph 51 | export const pandocRawTransformer = ( 52 | pmInlineNodeName: string, 53 | pmBlockNodeName: string = null 54 | ) => { 55 | return (node: RawBlock | RawInline) => { 56 | const { content } = node; 57 | const textNode = { type: pmInlineNodeName, text: content }; 58 | if (pmBlockNodeName) { 59 | const blockNode = { type: pmBlockNodeName, content: [textNode] }; 60 | return blockNode; 61 | } 62 | return textNode; 63 | }; 64 | }; 65 | -------------------------------------------------------------------------------- /src/transform/transformers/doc.ts: -------------------------------------------------------------------------------- 1 | import { Doc, ProsemirrorNode } from "types"; 2 | 3 | // A transformer that turns Pandoc root-level documents into Prosemirror ones. 4 | export const docTransformer = (pdNodeType: "Doc", pmNodeType) => { 5 | return { 6 | toProsemirrorNode: (node: Doc, { transform }): ProsemirrorNode => { 7 | const { blocks } = node; 8 | return { 9 | type: pmNodeType, 10 | content: transform(blocks).asArray(), 11 | }; 12 | }, 13 | fromProsemirrorNode: (node: ProsemirrorNode, { transform }): Doc => { 14 | const { content } = node; 15 | return { 16 | type: pdNodeType, 17 | blocks: transform(content).asArray(), 18 | meta: {}, 19 | }; 20 | }, 21 | }; 22 | }; 23 | -------------------------------------------------------------------------------- /src/transform/transformers/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./common"; 2 | export * from "./table"; 3 | export * from "./bare"; 4 | export * from "./doc"; 5 | export * from "./list"; 6 | -------------------------------------------------------------------------------- /src/transform/transformers/list.ts: -------------------------------------------------------------------------------- 1 | import { 2 | DefinitionList, 3 | Block, 4 | Para, 5 | Plain, 6 | OrderedList, 7 | BulletList, 8 | } from "types"; 9 | 10 | import { flatten } from "../util"; 11 | 12 | type SimpleList = OrderedList | BulletList; 13 | 14 | // Returns a transformer appropriate for converting between Pandoc OrderedLists and BulletLists and 15 | // the equivalent types in a Prosemirror schema -- basically, anything like an
    or a
      . 16 | export const createListTransformer = 17 | (pmInnerNodeType: string, processListItem: (n: N) => N = (x) => x) => 18 | (pdNodeType: SimpleList["type"], pmNodeType) => { 19 | return { 20 | toProsemirrorNode: (node, { transform }) => { 21 | const content = node.content.map((blocks) => { 22 | return processListItem({ 23 | type: pmInnerNodeType, 24 | content: transform(blocks).asArray(), 25 | }); 26 | }); 27 | const hasOrder = 28 | node.listAttributes && 29 | typeof node.listAttributes.startNumber === "number"; 30 | const attrs = hasOrder 31 | ? { order: node.listAttributes.startNumber } 32 | : {}; 33 | return { 34 | type: pmNodeType, 35 | attrs, 36 | content, 37 | }; 38 | }, 39 | fromProsemirrorNode: (node, { transform }): SimpleList => { 40 | const content = node.content.map((listItem) => 41 | transform(listItem.content).asArray() 42 | ); 43 | if (pdNodeType === "OrderedList") { 44 | return { 45 | type: pdNodeType, 46 | content, 47 | listAttributes: { 48 | startNumber: node.attrs.order, 49 | listNumberStyle: "DefaultStyle", 50 | listNumberDelim: "DefaultDelim", 51 | }, 52 | }; 53 | } 54 | return { 55 | type: pdNodeType, 56 | content, 57 | }; 58 | }, 59 | assertCapturedProsemirrorNodes: [pmInnerNodeType], 60 | }; 61 | }; 62 | 63 | // A one-way transformer that takes the cursed DefinitionList and turns it into an unordered list. 64 | export const definitionListTransformer = 65 | (pmOuterNodeType, pmInnerNodeType) => 66 | (node: DefinitionList, { transform }) => { 67 | const content = node.entries.map((value) => { 68 | const { term, definitions } = value; 69 | const blocks = flatten(definitions); 70 | const firstBlock = blocks[0]; 71 | let prependableBlock: Para | Plain; 72 | if ( 73 | firstBlock && 74 | (firstBlock.type === "Para" || firstBlock.type === "Plain") 75 | ) { 76 | prependableBlock = firstBlock as Para | Plain; 77 | } else { 78 | prependableBlock = { type: "Para", content: [] }; 79 | blocks.unshift(prependableBlock); 80 | } 81 | prependableBlock.content.unshift({ 82 | type: "Strong", 83 | content: [ 84 | ...term, 85 | { type: "Str", content: ":" }, 86 | { type: "Space" }, 87 | ], 88 | }); 89 | return { 90 | type: pmInnerNodeType, 91 | content: transform(blocks).asArray(), 92 | }; 93 | }); 94 | return { 95 | type: pmOuterNodeType, 96 | content, 97 | }; 98 | }; 99 | -------------------------------------------------------------------------------- /src/transform/transformers/table/fromPandoc.ts: -------------------------------------------------------------------------------- 1 | import { 2 | Block, 3 | Caption, 4 | Cell, 5 | ColSpec, 6 | ProsemirrorNode, 7 | Row, 8 | Table, 9 | } from "types"; 10 | import { FromPandocTransformContext } from "transform/types"; 11 | 12 | const resolveCaption = ( 13 | caption: Caption, 14 | context: FromPandocTransformContext 15 | ): ProsemirrorNode[] => { 16 | const { shortCaption, content } = caption; 17 | return [ 18 | ...context.transform(content).asArray(), 19 | ...(shortCaption ? context.transform(shortCaption).asArray() : []), 20 | ]; 21 | }; 22 | 23 | const resolveCellAttrs = ( 24 | cell: Cell, 25 | colSpecs: ColSpec[], 26 | prosemirrorDocWidth: number 27 | ) => { 28 | const colWidths = colSpecs 29 | .map((colSpec) => ("width" in colSpec ? colSpec.width : 0)) 30 | // Subtract 1 from the total width here to account for 1px column dividers 31 | .map((percentageWidth) => -1 + percentageWidth * prosemirrorDocWidth); 32 | const widthAttr = colWidths.some((width) => width > 0) 33 | ? { colwidth: colWidths } 34 | : {}; 35 | return { 36 | ...widthAttr, 37 | rowspan: cell.rowSpan, 38 | colspan: cell.colSpan, 39 | }; 40 | }; 41 | 42 | const cellFromPandoc = ( 43 | cell: Cell, 44 | colSpecs: ColSpec[], 45 | isHead: boolean, 46 | context: FromPandocTransformContext 47 | ): ProsemirrorNode<"table_cell" | "table_header"> => { 48 | // Don't pass empty content into table_header or table_cell, which expect block+ 49 | const contentToTransform: Block[] = 50 | cell.content.length > 0 51 | ? cell.content 52 | : [{ type: "Para", content: [] }]; 53 | return { 54 | type: isHead ? "table_header" : "table_cell", 55 | attrs: resolveCellAttrs(cell, colSpecs, context.prosemirrorDocWidth), 56 | content: context.transform(contentToTransform).asArray(), 57 | }; 58 | }; 59 | 60 | const rowFromPandoc = ( 61 | row: Row, 62 | colSpecs: ColSpec[], 63 | headColumns: number | "all", 64 | context: FromPandocTransformContext 65 | ): ProsemirrorNode<"table_row"> => { 66 | const headCutoff = headColumns === "all" ? Infinity : headColumns; 67 | return { 68 | type: "table_row", 69 | content: row.cells.map((cell, idx) => 70 | cellFromPandoc( 71 | cell, 72 | colSpecs.slice(idx, idx + cell.colSpan), 73 | idx < headCutoff, 74 | context 75 | ) 76 | ), 77 | }; 78 | }; 79 | 80 | export const pandocTableTransformer = ( 81 | node: Table, 82 | context: FromPandocTransformContext 83 | ): 84 | | ProsemirrorNode<"table"> 85 | | [ProsemirrorNode<"table">, ...ProsemirrorNode[]] => { 86 | const { head, foot, bodies, caption, colSpecs } = node; 87 | 88 | const renderMyRow = (row: Row, headColumns: number | "all") => 89 | rowFromPandoc(row, colSpecs, headColumns, context); 90 | 91 | const headRows = head.rows.map((row) => renderMyRow(row, "all")); 92 | const bodyRows = bodies 93 | .map((body) => [ 94 | ...body.headRows.map((row) => renderMyRow(row, "all")), 95 | ...body.bodyRows.map((row) => 96 | renderMyRow(row, body.rowHeadColumns) 97 | ), 98 | ]) 99 | .reduce((a, b) => [...a, ...b]); 100 | const footRows = foot.rows.map((row) => renderMyRow(row, 0)); 101 | const prosemirrorCaption = resolveCaption(caption, context); 102 | 103 | const table: ProsemirrorNode<"table"> = { 104 | type: "table" as const, 105 | content: [...headRows, ...bodyRows, ...footRows], 106 | }; 107 | 108 | if (prosemirrorCaption.length > 0) { 109 | return [table, ...prosemirrorCaption]; 110 | } 111 | 112 | return table; 113 | }; 114 | -------------------------------------------------------------------------------- /src/transform/transformers/table/fromProsemirror.ts: -------------------------------------------------------------------------------- 1 | import { 2 | Block, 3 | Caption, 4 | Cell, 5 | ColSpec, 6 | ProsemirrorNode, 7 | Row, 8 | Table, 9 | } from "types"; 10 | import { FromProsemirrorTransformContext } from "transform/types"; 11 | import { createAttr } from "transform/util"; 12 | 13 | const getDefaultColSpec = (): ColSpec => ({ 14 | type: "ColSpec", 15 | alignment: "AlignDefault", 16 | defaultWidth: true, 17 | }); 18 | 19 | const getDefaultCaption = (): Caption => ({ 20 | type: "Caption", 21 | content: [], 22 | }); 23 | 24 | const getColumnCountFromRow = (row: ProsemirrorNode<"table_row">) => { 25 | return row.content 26 | .map((cell) => cell.attrs?.colspan ?? 1) 27 | .map((attr) => Number(attr)) 28 | .reduce((a, b) => a + b); 29 | }; 30 | 31 | const getColSpecsForTable = ( 32 | header: ProsemirrorNode<"table_row">, 33 | context: FromProsemirrorTransformContext 34 | ) => { 35 | const { prosemirrorDocWidth } = context; 36 | const columnCount = getColumnCountFromRow(header); 37 | const colSpecs: ColSpec[] = new Array(columnCount) 38 | .fill(0) 39 | .map(() => getDefaultColSpec()); 40 | header.content.forEach((cell, index) => { 41 | if ("colwidth" in cell.attrs && cell.attrs.colwidth) { 42 | const colWidth = cell.attrs.colwidth as (number | null)[]; 43 | colWidth.forEach((width, cellIndex) => { 44 | const realColumnIndex = index + cellIndex; 45 | if (typeof width === "number") { 46 | colSpecs[realColumnIndex] = { 47 | type: "ColSpec", 48 | alignment: "AlignDefault", 49 | width: width / prosemirrorDocWidth, 50 | }; 51 | } 52 | }); 53 | } 54 | }); 55 | return colSpecs; 56 | }; 57 | 58 | const transformCell = ( 59 | cell: ProsemirrorNode<"table_cell" | "table_header">, 60 | context: FromProsemirrorTransformContext 61 | ): Cell => { 62 | const { transform } = context; 63 | const { colspan = 1, rowspan = 1 } = cell.attrs; 64 | return { 65 | type: "Cell", 66 | attr: createAttr(), 67 | alignment: "AlignDefault", 68 | rowSpan: Number(rowspan), 69 | colSpan: Number(colspan), 70 | content: transform(cell.content).asArray() as Block[], 71 | }; 72 | }; 73 | 74 | const transformRow = ( 75 | row: ProsemirrorNode<"table_row">, 76 | context: FromProsemirrorTransformContext 77 | ): Row => { 78 | const cells = row.content as undefined | ProsemirrorNode< 79 | "table_cell" | "table_header" 80 | >[]; 81 | return { 82 | type: "Row", 83 | attr: createAttr(), 84 | // Table rows may be devoid of content, for example phantom rows that 85 | // are automatically added to satisfy a row where each element has 86 | // rowspan >1 87 | cells: cells?.map((cell) => transformCell(cell, context)) ?? [] , 88 | }; 89 | }; 90 | 91 | export const prosemirrorTableTransformer = ( 92 | table: ProsemirrorNode<"table">, 93 | context: FromProsemirrorTransformContext 94 | ): Table => { 95 | const [header, ...body] = table.content as ProsemirrorNode<"table_row">[]; 96 | return { 97 | type: "Table", 98 | attr: createAttr("id" in table.attrs ? String(table.attrs.id) : ""), 99 | caption: getDefaultCaption(), 100 | colSpecs: getColSpecsForTable(header, context), 101 | head: { 102 | type: "TableHead", 103 | attr: createAttr(), 104 | rows: [transformRow(header, context)], 105 | }, 106 | bodies: [ 107 | { 108 | type: "TableBody", 109 | attr: createAttr(), 110 | rowHeadColumns: 1, 111 | headRows: [], 112 | bodyRows: body.map((row) => transformRow(row, context)), 113 | }, 114 | ], 115 | foot: { 116 | type: "TableFoot", 117 | attr: createAttr(), 118 | rows: [], 119 | }, 120 | }; 121 | }; 122 | 123 | prosemirrorTableTransformer.assertCapturedProsemirrorNodes = [ 124 | "table_row", 125 | "table_cell", 126 | "table_header", 127 | ]; 128 | -------------------------------------------------------------------------------- /src/transform/transformers/table/index.ts: -------------------------------------------------------------------------------- 1 | export * from "./fromPandoc"; 2 | export * from "./fromProsemirror"; 3 | -------------------------------------------------------------------------------- /src/transform/types.ts: -------------------------------------------------------------------------------- 1 | import { 2 | PandocNode, 3 | ProsemirrorElement, 4 | ProsemirrorMark, 5 | ProsemirrorNode, 6 | ProsemirrorSchema, 7 | } from "types"; 8 | import { Expr } from "expression"; 9 | 10 | import { Fluent } from "./fluent"; 11 | import { InferPandocNodeType, InferProsemirrorElementType } from "./inference"; 12 | import { RuleSet } from "./ruleset"; 13 | 14 | export type OneOrMany = T | T[]; 15 | 16 | // Function type that allows rules to transform their child nodes and pass appropriate context 17 | // into this sub-transformation 18 | type TransformCallback< 19 | From extends ProsemirrorNode | PandocNode, 20 | To extends ProsemirrorNode | PandocNode, 21 | TransformParentContext extends Record = Record 22 | > = ( 23 | from: OneOrMany, 24 | context?: Partial 25 | ) => Fluent; 26 | 27 | // Options passed into the transform process in both directions 28 | type SharedTransformConfig = { 29 | resources: Record; 30 | prosemirrorDocWidth: number; 31 | }; 32 | 33 | // Items that are available from the transform context in both directions 34 | type SharedTransformContext = { 35 | ruleset: RuleSet; 36 | count: (label: string) => number; 37 | }; 38 | 39 | // Transform config with Pandoc-specific options 40 | export type FromPandocTransformConfig = SharedTransformConfig & { 41 | useSmartQuotes: boolean; 42 | }; 43 | 44 | // Transform config with Prosemirror-specific options 45 | export type FromProsemirrorTransformConfig = SharedTransformConfig; 46 | 47 | // Transform context for Pandoc 48 | export type FromPandocTransformContext = FromPandocTransformConfig & 49 | SharedTransformContext & { 50 | marksMap: Map; 51 | transform: TransformCallback; 52 | }; 53 | 54 | // Transform context for Prosemirror 55 | export type FromProsemirrorTransformContext = FromProsemirrorTransformConfig & 56 | SharedTransformContext & { 57 | transform: TransformCallback; 58 | }; 59 | 60 | export type PandocNodeToProsemirrorNodeTransformer< 61 | From extends OneOrMany = OneOrMany, 62 | To extends ProsemirrorNode = ProsemirrorNode 63 | > = ((node: From, context: FromPandocTransformContext) => OneOrMany) & { 64 | assertCapturedPandocNodes?: string[]; 65 | }; 66 | 67 | export type PandocNodeToProsemirrorMarkTransformer< 68 | From extends OneOrMany = OneOrMany, 69 | To extends ProsemirrorMark = ProsemirrorMark 70 | > = ((node: From, context: FromPandocTransformContext) => OneOrMany) & { 71 | assertCapturedPandocNodes?: string[]; 72 | }; 73 | 74 | export type ProsemirrorNodeToPandocNodeTransformer< 75 | From extends OneOrMany = OneOrMany, 76 | To extends PandocNode = PandocNode 77 | > = (( 78 | node: From, 79 | context: FromProsemirrorTransformContext 80 | ) => OneOrMany) & { 81 | assertCapturedProsemirrorNodes?: string[]; 82 | }; 83 | 84 | export type ProsemirrorMarkToPandocNodeTransformer< 85 | From extends ProsemirrorMark = ProsemirrorMark, 86 | To extends PandocNode = PandocNode 87 | > = ( 88 | mark: From, 89 | content: any, 90 | context: FromProsemirrorTransformContext 91 | ) => OneOrMany; 92 | 93 | export type NodeTransformer = 94 | | PandocNodeToProsemirrorNodeTransformer 95 | | ProsemirrorNodeToPandocNodeTransformer; 96 | 97 | export type MarksTransformer = 98 | | PandocNodeToProsemirrorMarkTransformer 99 | | ProsemirrorMarkToPandocNodeTransformer; 100 | 101 | export type AnyTransformer = NodeTransformer | MarksTransformer; 102 | 103 | export type Rule = 104 | | Readonly<{ 105 | transformer: Transformer extends NodeTransformer 106 | ? Transformer 107 | : never; 108 | expression: Expr; 109 | capturedExpressions: Expr[]; 110 | acceptsMultiple: boolean; 111 | isMarksRule: false; 112 | }> 113 | | Readonly<{ 114 | transformer: Transformer extends MarksTransformer 115 | ? Transformer 116 | : never; 117 | expression: Expr; 118 | capturedExpressions: Expr[]; 119 | acceptsMultiple: false; 120 | isMarksRule: true; 121 | }>; 122 | 123 | export type BidirectionalTransformer< 124 | PandocType extends PandocNode, 125 | ProsemirrorType extends ProsemirrorElement 126 | > = ( 127 | | { 128 | fromProsemirrorNode: ProsemirrorNodeToPandocNodeTransformer< 129 | Extract, 130 | PandocType 131 | >; 132 | toProsemirrorNode: PandocNodeToProsemirrorNodeTransformer< 133 | PandocType, 134 | Extract 135 | >; 136 | } 137 | | { 138 | fromProsemirrorMark: ProsemirrorMarkToPandocNodeTransformer< 139 | Extract, 140 | PandocType 141 | >; 142 | toProsemirrorMark: PandocNodeToProsemirrorMarkTransformer< 143 | PandocType, 144 | Extract 145 | >; 146 | } 147 | ) & { 148 | assertCapturedProsemirrorNodes?: string[]; 149 | assertCapturedPandocNodes?: string[]; 150 | }; 151 | 152 | export type ParameterizedBidirectionalTransformer< 153 | PandocPattern extends string, 154 | ProsemirrorPattern extends string, 155 | Schema extends ProsemirrorSchema 156 | > = ( 157 | pandocPattern: PandocPattern, 158 | prosemirrorPattern: ProsemirrorPattern 159 | ) => BidirectionalTransformer< 160 | InferPandocNodeType, 161 | InferProsemirrorElementType 162 | >; 163 | -------------------------------------------------------------------------------- /src/transform/util.ts: -------------------------------------------------------------------------------- 1 | import { Attr, Str, Space } from "../types"; 2 | 3 | export const createAttr = ( 4 | identifier: string = "", 5 | classes: string[] = [], 6 | properties: Record = {} 7 | ): Attr => { 8 | return { identifier, classes, properties }; 9 | }; 10 | 11 | export const textFromStrSpace = (nodes: (Str | Space)[]) => { 12 | let text = ""; 13 | for (const entry of nodes) { 14 | if (entry.type === "Str") { 15 | text = text + entry.content; 16 | } else { 17 | text = text + " "; 18 | } 19 | } 20 | return text; 21 | }; 22 | 23 | export const intersperse = ( 24 | arr: any[], 25 | intersperseFn: (index?: number) => any 26 | ): any[] => 27 | (Array.isArray(arr) ? arr : [arr]).reduce( 28 | (accumulated: any[], next: any, index: number): any[] => { 29 | const added: any[] = [next]; 30 | if (index !== (Array.isArray(arr) ? arr : [arr]).length - 1) { 31 | added.push(intersperseFn(index)); 32 | } 33 | return [...accumulated, ...added]; 34 | }, 35 | [] 36 | ); 37 | 38 | export const textToStrSpace = (text: string): (Str | Space)[] => 39 | intersperse( 40 | text.split(" ").map((word) => ({ type: "Str", content: word })), 41 | () => ({ type: "Space" }) 42 | ).filter((node) => { 43 | if (node.type === "Str" && node.content.length === 0) { 44 | return false; 45 | } 46 | return true; 47 | }); 48 | 49 | export const asArray = (item: T | T[]): T[] => { 50 | return Array.isArray(item) ? item : [item]; 51 | }; 52 | 53 | export const asNode = (item: T | T[]): T => { 54 | return Array.isArray(item) ? item[0] : item; 55 | }; 56 | 57 | export const flatten = (input: any): T[] => { 58 | if (!Array.isArray(input)) { 59 | return [input]; 60 | } 61 | return input.reduce((arr: T[], next: T | T[]) => { 62 | if (Array.isArray(next)) { 63 | return [...arr, ...flatten(next)]; 64 | } 65 | return [...arr, next]; 66 | }, [] as T[]) as T[]; 67 | }; 68 | 69 | export const getQuoteChar = ( 70 | single: boolean, 71 | opening: boolean, 72 | smart: boolean 73 | ) => { 74 | if (smart) { 75 | if (single) { 76 | return opening ? "‘" : "’"; 77 | } else { 78 | return opening ? "“" : "”"; 79 | } 80 | } else { 81 | return single ? "'" : '"'; 82 | } 83 | }; 84 | 85 | export const makeCounter = () => { 86 | const countMap: Map = new Map(); 87 | return (type: string) => { 88 | const count = countMap.get(type) || 0; 89 | countMap.set(type, count + 1); 90 | return count; 91 | }; 92 | }; 93 | -------------------------------------------------------------------------------- /src/types.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Definitions for the Pandoc AST 3 | * See https://hackage.haskell.org/package/pandoc-types-1.22/docs/Text-Pandoc-Definition.html 4 | */ 5 | 6 | export { Schema as ProsemirrorSchema } from "prosemirror-model"; 7 | 8 | export type ProsemirrorAttr = 9 | | undefined 10 | | null 11 | | number 12 | | string 13 | | ProsemirrorAttr[]; 14 | 15 | export type ProsemirrorNode = { 16 | __isMark?: false; 17 | type: Type; 18 | content?: ProsemirrorNode[]; 19 | text?: string; 20 | attrs?: Record; 21 | marks?: ProsemirrorMark[]; 22 | }; 23 | 24 | export type ProsemirrorMark = { 25 | __isMark?: true; 26 | type: Type; 27 | attrs?: Record; 28 | }; 29 | 30 | export type ProsemirrorElement = ProsemirrorNode | ProsemirrorMark; 31 | 32 | export type PandocJson = { 33 | "pandoc-api-version": number[]; 34 | meta: { 35 | [key: string]: any; 36 | }; 37 | blocks: { [key: string]: any }[]; 38 | }; 39 | export type Doc = { 40 | type: "Doc"; 41 | blocks: Block[]; 42 | meta: { 43 | [key: string]: MetaValue; 44 | }; 45 | }; 46 | 47 | export type Alignment = 48 | | "AlignLeft" 49 | | "AlignRight" 50 | | "AlignCenter" 51 | | "AlignDefault"; 52 | 53 | export type QuoteType = "SingleQuote" | "DoubleQuote"; 54 | export type MathType = "DisplayMath" | "InlineMath"; 55 | 56 | export type ListNumberStyle = 57 | | "DefaultStyle" 58 | | "Example" 59 | | "Decimal" 60 | | "LowerRoman" 61 | | "UpperRoman" 62 | | "LowerAlpha" 63 | | "UpperAlpha"; 64 | 65 | export type ListNumberDelim = 66 | | "DefaultDelim" 67 | | "Period" 68 | | "OneParen" 69 | | "TwoParens"; 70 | 71 | export type ListAttributes = { 72 | startNumber: number; 73 | listNumberStyle: ListNumberStyle; 74 | listNumberDelim: ListNumberDelim; 75 | }; 76 | 77 | export type Format = string; 78 | 79 | export type Attr = { 80 | identifier: string; 81 | classes: string[]; 82 | properties: { [key: string]: string }; 83 | }; 84 | 85 | export type Target = { 86 | url: string; 87 | title: string; 88 | }; 89 | 90 | export type CitationMode = "AuthorInText" | "SuppressAuthor" | "NormalCitation"; 91 | 92 | export type Citation = { 93 | citationId: string; 94 | citationPrefix: Inline[]; 95 | citationSuffix: Inline[]; 96 | citationMode: CitationMode; 97 | citationNoteNum: number; 98 | citationHash: number; 99 | }; 100 | 101 | /* ~~~ Block-level definitions ~~~ */ 102 | 103 | /* Plain text, not a paragraph */ 104 | export type Plain = { 105 | type: "Plain"; 106 | content: Inline[]; 107 | }; 108 | 109 | /* Paragraph */ 110 | export type Para = { 111 | type: "Para"; 112 | content: Inline[]; 113 | }; 114 | 115 | /* Multiple non-breaking lines */ 116 | export type LineBlock = { 117 | type: "LineBlock"; 118 | content: Inline[][]; 119 | }; 120 | 121 | /* Code block (literal) with attributes */ 122 | export type CodeBlock = { 123 | type: "CodeBlock"; 124 | attr: Attr; 125 | content: string; 126 | }; 127 | 128 | /* Raw block */ 129 | export type RawBlock = { 130 | type: "RawBlock"; 131 | format: Format; 132 | content: string; 133 | }; 134 | 135 | /* Block quote (list of blocks) */ 136 | export type BlockQuote = { 137 | type: "BlockQuote"; 138 | content: Block[]; 139 | }; 140 | 141 | /* Ordered list (attributes and a list of items, each a list of blocks) */ 142 | export type OrderedList = { 143 | type: "OrderedList"; 144 | listAttributes: ListAttributes; 145 | content: Block[][]; 146 | }; 147 | 148 | /* Bullet list (list of items, each a list of blocks) */ 149 | export type BulletList = { 150 | type: "BulletList"; 151 | content: Block[][]; 152 | }; 153 | 154 | /* Definition list 155 | Each list item is a pair consisting of a term (a list of inlines) 156 | and one or more definitions (each a list of blocks) */ 157 | export type DefinitionList = { 158 | type: "DefinitionList"; 159 | entries: { 160 | term: Inline[]; 161 | definitions: Block[][]; 162 | }[]; 163 | }; 164 | 165 | /* Header - level (integer) and text (inlines) */ 166 | export type Header = { 167 | type: "Header"; 168 | level: number; 169 | attr: Attr; 170 | content: Inline[]; 171 | }; 172 | 173 | /* Horizontal rule */ 174 | export type HorizontalRule = { 175 | type: "HorizontalRule"; 176 | }; 177 | 178 | /* Table stuff */ 179 | export type Caption = { 180 | type: "Caption"; 181 | shortCaption?: Inline[]; 182 | content: Block[]; 183 | }; 184 | 185 | export type ColSpec = { 186 | type: "ColSpec"; 187 | alignment: Alignment; 188 | } & ({ width: number } | { defaultWidth: true }); 189 | 190 | export type Cell = { 191 | type: "Cell"; 192 | attr: Attr; 193 | alignment: Alignment; 194 | rowSpan: number; 195 | colSpan: number; 196 | content: Block[]; 197 | }; 198 | 199 | export type Row = { 200 | type: "Row"; 201 | attr: Attr; 202 | cells: Cell[]; 203 | }; 204 | 205 | export type TableHead = { 206 | type: "TableHead"; 207 | attr: Attr; 208 | rows: Row[]; 209 | }; 210 | 211 | export type TableFoot = { 212 | type: "TableFoot"; 213 | attr: Attr; 214 | rows: Row[]; 215 | }; 216 | 217 | export type TableBody = { 218 | type: "TableBody"; 219 | attr: Attr; 220 | rowHeadColumns: number; 221 | headRows: Row[]; 222 | bodyRows: Row[]; 223 | }; 224 | 225 | export type Table = { 226 | type: "Table"; 227 | attr: Attr; 228 | caption: Caption; 229 | colSpecs: ColSpec[]; 230 | head: TableHead; 231 | bodies: TableBody[]; 232 | foot: TableFoot; 233 | }; 234 | 235 | export type TableRow = { 236 | attr: Attr; 237 | rowHeadColumns: number; 238 | }; 239 | 240 | /* Generic block container with attributes */ 241 | export type Div = { 242 | type: "Div"; 243 | attr: Attr; 244 | content: Block[]; 245 | }; 246 | 247 | /* Nothing */ 248 | export type Null = { 249 | type: "Null"; 250 | }; 251 | 252 | export type Block = 253 | | Plain 254 | | Para 255 | | LineBlock 256 | | CodeBlock 257 | | RawBlock 258 | | BlockQuote 259 | | OrderedList 260 | | BulletList 261 | | DefinitionList 262 | | Header 263 | | HorizontalRule 264 | | Table 265 | | Div 266 | | Null; 267 | 268 | /* ~~~ Inline-level definitions ~~~ */ 269 | 270 | /* Text (string) */ 271 | export type Str = { 272 | type: "Str"; 273 | content: string; 274 | }; 275 | 276 | /* Emphasized text (list of inlines) */ 277 | export type Emph = { 278 | type: "Emph"; 279 | content: Inline[]; 280 | }; 281 | 282 | /* Underlined text (list of inlines) */ 283 | export type Underline = { 284 | type: "Underline"; 285 | content: Inline[]; 286 | }; 287 | 288 | /* Strongly emphasized text (list of inlines) */ 289 | export type Strong = { 290 | type: "Strong"; 291 | content: Inline[]; 292 | }; 293 | 294 | /* Strikeout text (list of inlines) */ 295 | export type Strikeout = { 296 | type: "Strikeout"; 297 | content: Inline[]; 298 | }; 299 | 300 | /* Superscripted text (list of inlines) */ 301 | export type Superscript = { 302 | type: "Superscript"; 303 | content: Inline[]; 304 | }; 305 | 306 | /* Subscripted text (list of inlines) */ 307 | export type Subscript = { 308 | type: "Subscript"; 309 | content: Inline[]; 310 | }; 311 | 312 | /* Small caps text (list of inlines) */ 313 | export type SmallCaps = { 314 | type: "SmallCaps"; 315 | content: Inline[]; 316 | }; 317 | 318 | /* Quoted text (list of inlines) */ 319 | export type Quoted = { 320 | type: "Quoted"; 321 | quoteType: QuoteType; 322 | content: Inline[]; 323 | }; 324 | 325 | /* Citation (list of inlines) */ 326 | export type Cite = { 327 | type: "Cite"; 328 | citations: Citation[]; 329 | content: Inline[]; 330 | }; 331 | 332 | /* Inline code (literal) */ 333 | export type Code = { 334 | type: "Code"; 335 | attr: Attr; 336 | content: string; 337 | }; 338 | 339 | /* Inter-word space */ 340 | export type Space = { 341 | type: "Space"; 342 | }; 343 | 344 | /* Soft line break */ 345 | export type SoftBreak = { 346 | type: "SoftBreak"; 347 | }; 348 | 349 | /* Hard line break */ 350 | export type LineBreak = { 351 | type: "LineBreak"; 352 | }; 353 | 354 | /* TeX math (literal) */ 355 | export type Math = { 356 | type: "Math"; 357 | mathType: MathType; 358 | content: string; 359 | }; 360 | 361 | /* Raw inline */ 362 | export type RawInline = { 363 | type: "RawInline"; 364 | format: Format; 365 | content: string; 366 | }; 367 | 368 | /* Hyperlink: alt text (list of inlines), target */ 369 | export type Link = { 370 | type: "Link"; 371 | attr: Attr; 372 | content: Inline[]; 373 | target: Target; 374 | }; 375 | 376 | /* Image: alt text (list of inlines), target */ 377 | export type Image = { 378 | type: "Image"; 379 | attr: Attr; 380 | content: Inline[]; 381 | target: Target; 382 | }; 383 | 384 | /* Footnote or endnote */ 385 | export type Note = { 386 | type: "Note"; 387 | content: Block[]; 388 | }; 389 | 390 | /* Generic inline container with attributes */ 391 | export type Span = { 392 | type: "Span"; 393 | attr: Attr; 394 | content: Inline[]; 395 | }; 396 | 397 | /* Meta types */ 398 | 399 | export type MetaMap = { 400 | type: "MetaMap"; 401 | values: { [key: string]: MetaValue }; 402 | }; 403 | 404 | export type MetaList = { 405 | type: "MetaList"; 406 | content: MetaValue[]; 407 | }; 408 | 409 | export type MetaBool = { 410 | type: "MetaBool"; 411 | content: boolean; 412 | }; 413 | 414 | export type MetaString = { 415 | type: "MetaString"; 416 | content: string; 417 | }; 418 | export type MetaInlines = { 419 | type: "MetaInlines"; 420 | content: Inline[]; 421 | }; 422 | 423 | export type MetaBlocks = { 424 | type: "MetaBlocks"; 425 | content: Block[]; 426 | }; 427 | 428 | export type MetaValue = 429 | | MetaMap 430 | | MetaList 431 | | MetaBool 432 | | MetaString 433 | | MetaInlines 434 | | MetaBlocks; 435 | 436 | export type SimpleInline = 437 | | Emph 438 | | Underline 439 | | Strong 440 | | Strikeout 441 | | Superscript 442 | | Subscript 443 | | SmallCaps; 444 | 445 | export type Inline = 446 | | Str 447 | | SimpleInline 448 | | Quoted 449 | | Cite 450 | | Code 451 | | Space 452 | | SoftBreak 453 | | LineBreak 454 | | Math 455 | | RawInline 456 | | Link 457 | | Image 458 | | Note 459 | | Span; 460 | 461 | export type PandocNode = Doc | Block | Inline; 462 | 463 | export const PANDOC_NODE_TYPES = [ 464 | "BlockQuote", 465 | "BulletList", 466 | "Cite", 467 | "Code", 468 | "CodeBlock", 469 | "DefinitionList", 470 | "Doc", 471 | "Div", 472 | "Emph", 473 | "Header", 474 | "HorizontalRule", 475 | "Image", 476 | "LineBlock", 477 | "LineBreak", 478 | "Link", 479 | "Math", 480 | "Note", 481 | "Null", 482 | "OrderedList", 483 | "Para", 484 | "Plain", 485 | "Quoted", 486 | "RawBlock", 487 | "RawInline", 488 | "SmallCaps", 489 | "SoftBreak", 490 | "Space", 491 | "Span", 492 | "Str", 493 | "Strikeout", 494 | "Strong", 495 | "Subscript", 496 | "Superscript", 497 | "Table", 498 | "Underline", 499 | ]; 500 | -------------------------------------------------------------------------------- /src/util.ts: -------------------------------------------------------------------------------- 1 | import { execSync, spawnSync } from "child_process"; 2 | 3 | import { parsePandocJson } from "./parse"; 4 | import { RuleSet } from "./transform/ruleset"; 5 | import { fromPandoc } from "./transform/fromPandoc/fromPandoc"; 6 | import { PandocJson } from "./types"; 7 | 8 | const MAX_BUFFER = 0 * 1024 * 1024; 9 | 10 | export const callPandoc = ( 11 | source: string, 12 | inputFormat: string, 13 | outputFormat: string = "json", 14 | extraArgs: string[] = [] 15 | ) => { 16 | return spawnSync( 17 | "pandoc", 18 | [ 19 | "-f", 20 | inputFormat, 21 | "-t", 22 | outputFormat, 23 | "--quiet", 24 | "--wrap=none", 25 | ...extraArgs, 26 | ], 27 | { input: source, maxBuffer: MAX_BUFFER } 28 | ).stdout.toString(); 29 | }; 30 | 31 | export const callPandocWithFile = ( 32 | sourcePath: string, 33 | outputFormat: string = "json", 34 | inputFormat: string = null, 35 | extraArgs: string[] = [] 36 | ) => { 37 | const extraArgsString = extraArgs.join(" "); 38 | const inputFormatString = inputFormat ? `-f ${inputFormat}` : ""; 39 | return execSync( 40 | `pandoc ${sourcePath} ${inputFormatString} -t ${outputFormat} ${extraArgsString}`, 41 | { maxBuffer: MAX_BUFFER } 42 | ).toString(); 43 | }; 44 | 45 | export const loadAndTransformFromPandoc = ( 46 | sourcePath: string, 47 | rules: RuleSet 48 | ) => { 49 | const pandocResult = callPandocWithFile(sourcePath); 50 | let json: PandocJson; 51 | try { 52 | json = JSON.parse(pandocResult); 53 | } catch (err) { 54 | if (pandocResult) { 55 | console.error(`Couldn't parse Pandoc result: ${pandocResult}`); 56 | } else { 57 | console.error(err); 58 | } 59 | } 60 | const pandocAst = parsePandocJson(json); 61 | return fromPandoc(pandocAst, rules).asNode(); 62 | }; 63 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "allowJs": true, 4 | "downlevelIteration": true, 5 | "esModuleInterop": true, 6 | "outDir": "dist", 7 | "lib": ["es6", "dom"], 8 | "baseUrl": "src", 9 | "noErrorTruncation": true, 10 | "paths": { 11 | "transform/": ["transform/index.ts"], 12 | "transform/*": ["transform/*"], 13 | "expression": ["expression/index.ts"], 14 | "expression/*": ["expression/*"], 15 | "example": ["example/index.ts"], 16 | "example/*": ["example/*"], 17 | "types": ["types.ts"], 18 | } 19 | }, 20 | "include": ["src"], 21 | "exclude": ["node_modules"] 22 | } --------------------------------------------------------------------------------