├── .eslintrc.js
├── .gitignore
├── LICENSE
├── jest.config.js
├── package-lock.json
├── package.json
├── src
    ├── __tests__
    │   ├── __snapshots__
    │   │   └── parse.test.ts.snap
    │   ├── parse.test.ts
    │   ├── roundtrip.test.ts
    │   └── version.test.ts
    ├── config.ts
    ├── emit.ts
    ├── example
    │   ├── convert.ts
    │   ├── parse.ts
    │   ├── rules.ts
    │   └── schema.ts
    ├── expression
    │   ├── __tests__
    │   │   ├── expression.test.ts
    │   │   └── heap.test.ts
    │   ├── acceptor.ts
    │   ├── heap.ts
    │   ├── index.ts
    │   ├── parse.ts
    │   ├── types.ts
    │   └── util.ts
    ├── index.ts
    ├── meta.ts
    ├── pandocUtils.ts
    ├── parse.ts
    ├── transform
    │   ├── __tests__
    │   │   └── util.test.ts
    │   ├── fluent.ts
    │   ├── fromPandoc
    │   │   ├── __tests__
    │   │   │   ├── __snapshots__
    │   │   │   │   ├── fromPandoc.test.ts.snap
    │   │   │   │   └── heal.test.ts.snap
    │   │   │   ├── fromPandoc.test.ts
    │   │   │   └── heal.test.ts
    │   │   ├── fromPandoc.ts
    │   │   ├── heal.ts
    │   │   ├── index.ts
    │   │   └── marks.ts
    │   ├── fromProsemirror
    │   │   ├── __tests__
    │   │   │   ├── __snapshots__
    │   │   │   │   └── fromProsemirror.test.ts.snap
    │   │   │   └── fromProsemirror.test.ts
    │   │   ├── fromProsemirror.ts
    │   │   ├── index.ts
    │   │   └── marks.ts
    │   ├── inference
    │   │   ├── index.ts
    │   │   ├── inferPandocType.ts
    │   │   ├── inferProsemirrorType.ts
    │   │   └── shared.ts
    │   ├── ruleset.ts
    │   ├── transformers
    │   │   ├── bare.ts
    │   │   ├── common.ts
    │   │   ├── doc.ts
    │   │   ├── index.ts
    │   │   ├── list.ts
    │   │   └── table
    │   │   │   ├── fromPandoc.ts
    │   │   │   ├── fromProsemirror.ts
    │   │   │   └── index.ts
    │   ├── types.ts
    │   └── util.ts
    ├── types.ts
    └── util.ts
└── tsconfig.json


/.eslintrc.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |     "env": {
 3 |         "browser": true,
 4 |         "es6": true
 5 |     },
 6 |     "extends": [
 7 |         "eslint:recommended",
 8 |         "plugin:@typescript-eslint/recommended",
 9 |         "prettier/@typescript-eslint",
10 |         "plugin:prettier/recommended",
11 |     ],
12 |     "plugins": [
13 |         "@typescript-eslint",
14 |         "prettier"
15 |     ],
16 |     "globals": {
17 |         "Atomics": "readonly",
18 |         "SharedArrayBuffer": "readonly"
19 |     },
20 |     "parser": "@typescript-eslint/parser",
21 |     "parserOptions": {
22 |         "ecmaVersion": 2018,
23 |         "sourceType": "module",
24 |         "project": './tsconfig.json'
25 |     },
26 |     "rules": {
27 |         "@typescript-eslint/ban-ts-ignore": 0,
28 |         "@typescript-eslint/ban-ts-comment": 0,
29 |         "@typescript-eslint/camelcase": 0,
30 |         "@typescript-eslint/explicit-function-return-type": 0,
31 |         "@typescript-eslint/explicit-module-boundary-types": 0,
32 |         "@typescript-eslint/indent": 0,
33 |         "@typescript-eslint/no-explicit-any": 0,
34 |         "@typescript-eslint/no-inferrable-types": 0,
35 |         "@typescript-eslint/no-use-before-define": 0,
36 |         "no-constant-condition": 0,
37 |         "prettier/prettier": "error",
38 |     },
39 | };


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules
2 | .vscode
3 | dist


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Ian Reynolds
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 |     roots: ["<rootDir>/src"],
3 |     transform: {
4 |         "^.+\\.tsx?$": "ts-jest",
5 |     },
6 |     moduleDirectories: ['node_modules', 'src']
7 | };
8 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "@pubpub/prosemirror-pandoc",
 3 |   "version": "1.1.5",
 4 |   "description": "Convert between Prosemirror schemas and the Pandoc AST",
 5 |   "main": "dist/index.js",
 6 |   "devDependencies": {
 7 |     "@types/jest": "^24.0.18",
 8 |     "@types/katex": "^0.10.2",
 9 |     "@types/node": "^12.7.4",
10 |     "@types/prosemirror-model": "^1.13.2",
11 |     "@typescript-eslint/eslint-plugin": "^4.29.2",
12 |     "@typescript-eslint/parser": "^4.29.2",
13 |     "eslint": "^6.0.1",
14 |     "eslint-config-prettier": "^6.0.0",
15 |     "eslint-plugin-prettier": "^3.1.0",
16 |     "jest": "^24.9.0",
17 |     "katex": "^0.11.1",
18 |     "prettier": "^2.3.2",
19 |     "prosemirror-model": "^1.14.3",
20 |     "prosemirror-tables": "^0.9.5",
21 |     "ts-jest": "^24.0.2",
22 |     "ts-node": "^8.6.2",
23 |     "tsc-alias": "^1.3.9",
24 |     "typescript": "^4.3.5",
25 |     "yargs": "^14.0.0"
26 |   },
27 |   "scripts": {
28 |     "test": "jest",
29 |     "build": "tsc && tsc-alias",
30 |     "build:watch": "tsc -w & tsc-alias -w",
31 |     "lint": "eslint src/**/*.ts",
32 |     "prepublishOnly": "npm run lint && npm test && npm run build",
33 |     "convert": "ts-node src/example/convert.ts",
34 |     "parse": "ts-node src/example/parse.ts"
35 |   },
36 |   "repository": {
37 |     "type": "git",
38 |     "url": "git+https://github.com/pubpub/prosemirror-pandoc.git"
39 |   },
40 |   "author": "Ian Reynolds",
41 |   "license": "MIT",
42 |   "bugs": {
43 |     "url": "https://github.com/pubpub/prosemirror-pandoc/issues"
44 |   },
45 |   "homepage": "https://github.com/pubpub/prosemirror-pandoc#readme",
46 |   "prettier": {
47 |     "tabWidth": 4,
48 |     "useTabs": false,
49 |     "trailingComma": "es5"
50 |   },
51 |   "dependencies": {}
52 | }
53 | 


--------------------------------------------------------------------------------
/src/__tests__/__snapshots__/parse.test.ts.snap:
--------------------------------------------------------------------------------
 1 | // Jest Snapshot v1, https://goo.gl/fbAQLP
 2 | 
 3 | exports[`parseBlock parses an OrderedList 1`] = `
 4 | Object {
 5 |   "content": Array [
 6 |     Array [
 7 |       Object {
 8 |         "content": Array [
 9 |           Object {
10 |             "content": "One!",
11 |             "type": "Str",
12 |           },
13 |         ],
14 |         "type": "Para",
15 |       },
16 |     ],
17 |     Array [
18 |       Object {
19 |         "content": Array [
20 |           Object {
21 |             "content": "Two!",
22 |             "type": "Str",
23 |           },
24 |         ],
25 |         "type": "Para",
26 |       },
27 |     ],
28 |     Array [
29 |       Object {
30 |         "content": Array [
31 |           Object {
32 |             "content": "Three!",
33 |             "type": "Str",
34 |           },
35 |         ],
36 |         "type": "Para",
37 |       },
38 |     ],
39 |   ],
40 |   "listAttributes": Object {
41 |     "listNumberDelim": "Period",
42 |     "listNumberStyle": "Decimal",
43 |     "startNumber": 3,
44 |   },
45 |   "type": "OrderedList",
46 | }
47 | `;
48 | 
49 | exports[`parseInline parses a Link 1`] = `
50 | Object {
51 |   "attr": Object {
52 |     "classes": Array [
53 |       "these",
54 |       "are",
55 |       "classes",
56 |     ],
57 |     "identifier": "this-is-an-identifier",
58 |     "properties": Object {
59 |       "key1": "val1",
60 |       "key2": "val2",
61 |     },
62 |   },
63 |   "content": Array [
64 |     Object {
65 |       "content": Array [
66 |         Object {
67 |           "content": "It's",
68 |           "type": "Str",
69 |         },
70 |         Object {
71 |           "type": "Space",
72 |         },
73 |         Object {
74 |           "content": "a",
75 |           "type": "Str",
76 |         },
77 |         Object {
78 |           "type": "Space",
79 |         },
80 |         Object {
81 |           "content": "link!",
82 |           "type": "Str",
83 |         },
84 |       ],
85 |       "type": "Strong",
86 |     },
87 |   ],
88 |   "target": Object {
89 |     "title": "This is a title??",
90 |     "url": "https://url.com",
91 |   },
92 |   "type": "Link",
93 | }
94 | `;
95 | 


--------------------------------------------------------------------------------
/src/__tests__/parse.test.ts:
--------------------------------------------------------------------------------
  1 | /* global describe, it, expect */
  2 | import { parseInline, parseBlock } from "../parse";
  3 | 
  4 | describe("parseInline", () => {
  5 |     it("parses a Str", () => {
  6 |         expect(parseInline({ t: "Str", c: "Hello!" })).toEqual({
  7 |             type: "Str",
  8 |             content: "Hello!",
  9 |         });
 10 |     });
 11 | 
 12 |     it("parses a simple inline elements", () => {
 13 |         [
 14 |             "Emph",
 15 |             "Strong",
 16 |             "Strikeout",
 17 |             "Superscript",
 18 |             "Subscript",
 19 |             "SmallCaps",
 20 |         ].forEach((type) => {
 21 |             expect(
 22 |                 // @ts-ignore
 23 |                 parseInline({ t: type, c: [{ t: "Str", c: "Testing!" }] })
 24 |             ).toEqual({
 25 |                 type,
 26 |                 content: [{ type: "Str", content: "Testing!" }],
 27 |             });
 28 |         });
 29 |     });
 30 | 
 31 |     it("parses an atom element", () => {
 32 |         // @ts-ignore
 33 |         expect(parseInline({ t: "LineBreak" })).toEqual({ type: "LineBreak" });
 34 |     });
 35 | 
 36 |     it("parses a Link", () => {
 37 |         expect(
 38 |             parseInline({
 39 |                 t: "Link",
 40 |                 c: [
 41 |                     [
 42 |                         "this-is-an-identifier",
 43 |                         ["these", "are", "classes"],
 44 |                         [
 45 |                             ["key1", "val1"],
 46 |                             ["key2", "val2"],
 47 |                         ],
 48 |                     ],
 49 |                     [
 50 |                         {
 51 |                             t: "Strong",
 52 |                             c: [
 53 |                                 { t: "Str", c: "It's" },
 54 |                                 { t: "Space" },
 55 |                                 { t: "Str", c: "a" },
 56 |                                 { t: "Space" },
 57 |                                 { t: "Str", c: "link!" },
 58 |                             ],
 59 |                         },
 60 |                     ],
 61 |                     ["https://url.com", "This is a title??"],
 62 |                 ],
 63 |             })
 64 |         ).toMatchSnapshot();
 65 |     });
 66 | });
 67 | 
 68 | describe("parseBlock", () => {
 69 |     it("parses a Para with some stuff in it", () => {
 70 |         expect(
 71 |             parseBlock({
 72 |                 t: "Para",
 73 |                 c: [
 74 |                     { t: "Strong", c: [{ t: "Str", c: "Hello," }] },
 75 |                     { t: "Space" },
 76 |                     { t: "Str", c: "world!" },
 77 |                 ],
 78 |             })
 79 |         ).toEqual({
 80 |             type: "Para",
 81 |             content: [
 82 |                 {
 83 |                     type: "Strong",
 84 |                     content: [{ type: "Str", content: "Hello," }],
 85 |                 },
 86 |                 { type: "Space" },
 87 |                 { type: "Str", content: "world!" },
 88 |             ],
 89 |         });
 90 |     });
 91 | 
 92 |     it("parses an OrderedList", () => {
 93 |         expect(
 94 |             parseBlock({
 95 |                 t: "OrderedList",
 96 |                 c: [
 97 |                     [3, { t: "Decimal" }, { t: "Period" }],
 98 |                     [
 99 |                         [
100 |                             {
101 |                                 t: "Para",
102 |                                 c: [{ t: "Str", c: "One!" }],
103 |                             },
104 |                         ],
105 |                         [
106 |                             {
107 |                                 t: "Para",
108 |                                 c: [{ t: "Str", c: "Two!" }],
109 |                             },
110 |                         ],
111 |                         [
112 |                             {
113 |                                 t: "Para",
114 |                                 c: [{ t: "Str", c: "Three!" }],
115 |                             },
116 |                         ],
117 |                     ],
118 |                 ],
119 |             })
120 |         ).toMatchSnapshot();
121 |     });
122 | });
123 | 


--------------------------------------------------------------------------------
/src/__tests__/roundtrip.test.ts:
--------------------------------------------------------------------------------
 1 | import { callPandoc } from "../util";
 2 | import { parsePandocJson } from "../parse";
 3 | import { emitPandocJson } from "../emit";
 4 | 
 5 | /* global describe, it, expect */
 6 | 
 7 | const testRoundtrip = (str: string, format: string = "markdown") => {
 8 |     const json = JSON.parse(callPandoc(str, format, "json"));
 9 |     const pandocAst = parsePandocJson(json);
10 |     const emittedJson = emitPandocJson(pandocAst);
11 |     expect(json).toEqual(emittedJson);
12 | };
13 | 
14 | const simpleTest = `
15 | 
16 | # Test document
17 | 
18 | This is my _test document!_ You're gonna love it.
19 | 
20 | ## A section
21 | 
22 | Here is a section. *This is bold, I think?*
23 | [This is a link](https://pubpub.org) for more information.
24 | 
25 | ## Another section
26 | 
27 | ![Aha, this is an image](https://knowyourapples.com/jazz.jpg)
28 | 
29 | 1. This is a numbered list
30 | 2. This is a second entry
31 |     - Oh no
32 |     - We're going deeper
33 |         - And deeper still
34 |     - Please stop
35 | 3. Okay, that's better [^1]
36 | 
37 | > This is a quote from \`someone\` famous
38 | 
39 | | This is also that? [^2]
40 | 
41 | \`\`\`
42 | this_is_a_code_block();
43 | \`\`\`
44 | 
45 | And now, a table of information:
46 | 
47 | |  Apple        | Rating                          |
48 | |---------------|---------------------------------|
49 | | Red Delicious | Terrible                        |
50 | | Granny Smith  | Bad, except for *pie*!          |
51 | | Jazz          | It's fine                       |
52 | | Macintosh     | Hey, not bad!                   |
53 | | Honeycrisp    | That's the hometown apple, baby |
54 | 
55 | 
56 | That is the end of our document.
57 | 
58 | [^1]: It's actually still not very good.
59 | [^2]: Maybe. Lol.
60 | `;
61 | 
62 | describe("parse/emit roundtrip", () => {
63 |     it("handles a simple document", () => {
64 |         testRoundtrip(simpleTest);
65 |     });
66 | });
67 | 


--------------------------------------------------------------------------------
/src/__tests__/version.test.ts:
--------------------------------------------------------------------------------
 1 | /* global describe, it, expect */
 2 | import { callPandoc } from "../util";
 3 | import { emitPandocJson } from "../emit";
 4 | import { PANDOC_API_VERSION, setPandocApiVersion } from "../config";
 5 | 
 6 | describe("PANDOC_API_VERSION", () => {
 7 |     it("Matches the version produced by the Pandoc executable (update it if not!)", () => {
 8 |         const testJson = JSON.parse(callPandoc("", "html", "json"));
 9 |         expect(testJson["pandoc-api-version"]).toEqual(PANDOC_API_VERSION);
10 |     });
11 | });
12 | 
13 | describe("setPandocApiVersion", () => {
14 |     it("Sets the Pandoc API version specified in emitted JSON", () => {
15 |         const newPandocApiVersion = [2, 30];
16 |         setPandocApiVersion(newPandocApiVersion);
17 |         expect(PANDOC_API_VERSION).toEqual(newPandocApiVersion);
18 |         const testJson = emitPandocJson({ type: "Doc", blocks: [], meta: {} });
19 |         expect(testJson["pandoc-api-version"]).toEqual(newPandocApiVersion);
20 |     });
21 | });
22 | 


--------------------------------------------------------------------------------
/src/config.ts:
--------------------------------------------------------------------------------
1 | export let PANDOC_API_VERSION = [1, 22, 2];
2 | 
3 | export const setPandocApiVersion = (version) => {
4 |     PANDOC_API_VERSION = version;
5 | };
6 | 


--------------------------------------------------------------------------------
/src/emit.ts:
--------------------------------------------------------------------------------
  1 | import { PANDOC_API_VERSION } from "./config";
  2 | import {
  3 |     Alignment,
  4 |     Attr,
  5 |     Block,
  6 |     BlockQuote,
  7 |     BulletList,
  8 |     Caption,
  9 |     Cell,
 10 |     CitationMode,
 11 |     Cite,
 12 |     Code,
 13 |     CodeBlock,
 14 |     ColSpec,
 15 |     DefinitionList,
 16 |     Div,
 17 |     Doc,
 18 |     Format,
 19 |     Header,
 20 |     Image,
 21 |     Inline,
 22 |     LineBlock,
 23 |     Link,
 24 |     ListAttributes,
 25 |     ListNumberDelim,
 26 |     ListNumberStyle,
 27 |     Math,
 28 |     MathType,
 29 |     MetaBlocks,
 30 |     MetaBool,
 31 |     MetaInlines,
 32 |     MetaList,
 33 |     MetaMap,
 34 |     MetaString,
 35 |     MetaValue,
 36 |     Note,
 37 |     OrderedList,
 38 |     PandocJson,
 39 |     PandocNode,
 40 |     Para,
 41 |     Plain,
 42 |     Quoted,
 43 |     QuoteType,
 44 |     RawBlock,
 45 |     RawInline,
 46 |     Row,
 47 |     SimpleInline,
 48 |     Span,
 49 |     Str,
 50 |     Table,
 51 |     TableBody,
 52 |     TableFoot,
 53 |     TableHead,
 54 |     Target,
 55 | } from "./types";
 56 | 
 57 | const wrapEnum = <T>(instance: T): { t: T } => {
 58 |     return { t: instance };
 59 | };
 60 | 
 61 | const wrapAttr = (attr: Attr) => {
 62 |     const { identifier, classes, properties } = attr;
 63 |     return [
 64 |         identifier ?? "",
 65 |         classes ?? [],
 66 |         properties ? Object.entries(properties) : [],
 67 |     ];
 68 | };
 69 | 
 70 | const wrapTarget = (target: Target) => {
 71 |     const { url, title } = target;
 72 |     return [url, title];
 73 | };
 74 | 
 75 | const wrapFormat = (format: Format) => {
 76 |     return format;
 77 | };
 78 | 
 79 | const wrapListAttributes = (listAttributes: ListAttributes) => {
 80 |     const {
 81 |         startNumber = 1,
 82 |         listNumberStyle,
 83 |         listNumberDelim,
 84 |     } = listAttributes;
 85 |     return [
 86 |         startNumber,
 87 |         wrapEnum<ListNumberStyle>(listNumberStyle),
 88 |         wrapEnum<ListNumberDelim>(listNumberDelim),
 89 |     ];
 90 | };
 91 | 
 92 | const emitAtom = (n: PandocNode) => {
 93 |     return { t: n.type };
 94 | };
 95 | 
 96 | const emitStr = (str: Str) => {
 97 |     const { content } = str;
 98 |     return {
 99 |         t: "Str",
100 |         c: content,
101 |     };
102 | };
103 | 
104 | const emitSimpleInline = (node: SimpleInline) => {
105 |     const { type, content } = node;
106 |     return {
107 |         t: type,
108 |         c: content.map(emitInline),
109 |     };
110 | };
111 | 
112 | const emitQuoted = (quoted: Quoted) => {
113 |     const { quoteType, content } = quoted;
114 |     return {
115 |         t: "Quoted",
116 |         c: [wrapEnum<QuoteType>(quoteType), content.map(emitInline)],
117 |     };
118 | };
119 | 
120 | const emitCite = (cite: Cite) => {
121 |     const { citations, content } = cite;
122 |     return {
123 |         t: "Cite",
124 |         c: [
125 |             citations.map((citation) => {
126 |                 const {
127 |                     citationHash,
128 |                     citationId,
129 |                     citationMode,
130 |                     citationNoteNum,
131 |                     citationPrefix,
132 |                     citationSuffix,
133 |                 } = citation;
134 |                 return {
135 |                     citationHash,
136 |                     citationId,
137 |                     citationMode: wrapEnum<CitationMode>(citationMode),
138 |                     citationNoteNum,
139 |                     citationPrefix: citationPrefix.map(emitInline),
140 |                     citationSuffix: citationSuffix.map(emitInline),
141 |                 };
142 |             }),
143 |             content.map(emitInline),
144 |         ],
145 |     };
146 | };
147 | 
148 | const emitCode = (code: Code) => {
149 |     const { attr, content } = code;
150 |     return {
151 |         t: "Code",
152 |         c: [wrapAttr(attr), content],
153 |     };
154 | };
155 | 
156 | const emitMath = (math: Math) => {
157 |     const { mathType, content } = math;
158 |     return {
159 |         t: "Math",
160 |         c: [wrapEnum<MathType>(mathType), content],
161 |     };
162 | };
163 | 
164 | const emitRawInline = (rawInline: RawInline) => {
165 |     const { format, content } = rawInline;
166 |     return {
167 |         t: "RawInline",
168 |         c: [wrapFormat(format), content],
169 |     };
170 | };
171 | 
172 | const emitImage = (image: Image) => {
173 |     const { attr, content, target } = image;
174 |     return {
175 |         t: "Image",
176 |         c: [wrapAttr(attr), content.map(emitInline), wrapTarget(target)],
177 |     };
178 | };
179 | 
180 | const emitLink = (link: Link) => {
181 |     const { attr, content, target } = link;
182 |     return {
183 |         t: "Link",
184 |         c: [wrapAttr(attr), content.map(emitInline), wrapTarget(target)],
185 |     };
186 | };
187 | 
188 | const emitNote = (note: Note) => {
189 |     const { content } = note;
190 |     return {
191 |         t: "Note",
192 |         c: content.map(emitBlock),
193 |     };
194 | };
195 | 
196 | const emitSpan = (span: Span) => {
197 |     const { attr, content } = span;
198 |     return {
199 |         t: "Span",
200 |         c: [wrapAttr(attr), content.map(emitInline)],
201 |     };
202 | };
203 | 
204 | export const emitInline = (n: Inline): { t: string; c?: string | any[] } => {
205 |     switch (n.type) {
206 |         case "Str":
207 |             return emitStr(n);
208 |         case "Emph":
209 |         case "Strong":
210 |         case "Underline":
211 |         case "Strikeout":
212 |         case "Superscript":
213 |         case "Subscript":
214 |         case "SmallCaps":
215 |             return emitSimpleInline(n);
216 |         case "Quoted":
217 |             return emitQuoted(n);
218 |         case "Cite":
219 |             return emitCite(n);
220 |         case "Code":
221 |             return emitCode(n);
222 |         case "Space":
223 |         case "SoftBreak":
224 |         case "LineBreak":
225 |             return emitAtom(n);
226 |         case "Math":
227 |             return emitMath(n);
228 |         case "RawInline":
229 |             return emitRawInline(n);
230 |         case "Link":
231 |             return emitLink(n);
232 |         case "Image":
233 |             return emitImage(n);
234 |         case "Note":
235 |             return emitNote(n);
236 |         case "Span":
237 |             return emitSpan(n);
238 |     }
239 | };
240 | 
241 | const emitPlain = (plain: Plain) => {
242 |     const { content } = plain;
243 |     return {
244 |         t: "Plain",
245 |         c: content.map(emitInline),
246 |     };
247 | };
248 | 
249 | const emitPara = (para: Para) => {
250 |     const { content } = para;
251 |     return {
252 |         t: "Para",
253 |         c: content.map(emitInline),
254 |     };
255 | };
256 | 
257 | const emitLineBlock = (lineBlock: LineBlock) => {
258 |     const { content } = lineBlock;
259 |     return {
260 |         t: "LineBlock",
261 |         c: content.map((line) => line.map(emitInline)),
262 |     };
263 | };
264 | 
265 | const emitCodeBlock = (codeBlock: CodeBlock) => {
266 |     const { attr, content } = codeBlock;
267 |     return {
268 |         t: "CodeBlock",
269 |         c: [wrapAttr(attr), content],
270 |     };
271 | };
272 | 
273 | const emitRawBlock = (rawBlock: RawBlock) => {
274 |     const { format, content } = rawBlock;
275 |     return {
276 |         t: "RawBlock",
277 |         c: [wrapFormat(format), content],
278 |     };
279 | };
280 | 
281 | const emitBlockQuote = (blockQuote: BlockQuote) => {
282 |     const { content } = blockQuote;
283 |     return {
284 |         t: "BlockQuote",
285 |         c: content.map(emitBlock),
286 |     };
287 | };
288 | 
289 | const emitOrderedList = (orderedList: OrderedList) => {
290 |     const { content, listAttributes } = orderedList;
291 |     return {
292 |         t: "OrderedList",
293 |         c: [
294 |             wrapListAttributes(listAttributes),
295 |             content.map((entry) => entry.map(emitBlock)),
296 |         ],
297 |     };
298 | };
299 | 
300 | const emitBulletList = (bulletList: BulletList) => {
301 |     const { content } = bulletList;
302 |     return {
303 |         t: "BulletList",
304 |         c: content.map((entry) => entry.map(emitBlock)),
305 |     };
306 | };
307 | 
308 | const emitDefinitionList = (definitionList: DefinitionList) => {
309 |     const { entries } = definitionList;
310 |     return {
311 |         t: "DefinitionList",
312 |         c: [
313 |             entries.map((entry) => {
314 |                 const { term, definitions } = entry;
315 |                 return [
316 |                     term.map(emitInline),
317 |                     definitions.map((definition) => definition.map(emitBlock)),
318 |                 ];
319 |             }),
320 |         ],
321 |     };
322 | };
323 | 
324 | const emitHeader = (header: Header) => {
325 |     const { level, attr, content } = header;
326 |     return {
327 |         t: "Header",
328 |         c: [level, wrapAttr(attr), content.map(emitInline)],
329 |     };
330 | };
331 | 
332 | const emitDiv = (div: Div) => {
333 |     const { attr, content } = div;
334 |     return {
335 |         t: "Div",
336 |         c: [wrapAttr(attr), content.map(emitBlock)],
337 |     };
338 | };
339 | 
340 | const emitCell = (cell: Cell) => {
341 |     const { attr, alignment, rowSpan, colSpan, content } = cell;
342 |     return [
343 |         wrapAttr(attr),
344 |         wrapEnum(alignment),
345 |         rowSpan,
346 |         colSpan,
347 |         content.map(emitBlock),
348 |     ];
349 | };
350 | 
351 | const emitRow = (row: Row) => {
352 |     const { attr, cells } = row;
353 |     return [wrapAttr(attr), cells.map(emitCell)];
354 | };
355 | 
356 | const emitTableHead = (head: TableHead) => {
357 |     const { attr, rows } = head;
358 |     return [wrapAttr(attr), rows.map(emitRow)];
359 | };
360 | 
361 | const emitTableFoot = (foot: TableFoot) => {
362 |     const { attr, rows } = foot;
363 |     return [wrapAttr(attr), rows.map(emitRow)];
364 | };
365 | 
366 | const emitTableBody = (body: TableBody) => {
367 |     const { attr, rowHeadColumns, headRows, bodyRows } = body;
368 |     return [
369 |         wrapAttr(attr),
370 |         rowHeadColumns,
371 |         headRows.map(emitRow),
372 |         bodyRows.map(emitRow),
373 |     ];
374 | };
375 | 
376 | const emitColSpec = (colSpec: ColSpec) => {
377 |     const { alignment } = colSpec;
378 |     return [
379 |         wrapEnum<Alignment>(alignment),
380 |         "defaultWidth" in colSpec
381 |             ? { t: "ColWidthDefault" }
382 |             : { t: "ColWidth", c: colSpec.width },
383 |     ];
384 | };
385 | 
386 | const emitCaption = (caption: Caption) => {
387 |     const { shortCaption, content } = caption;
388 |     return [
389 |         shortCaption ? shortCaption.map(emitInline) : null,
390 |         content.map(emitBlock),
391 |     ];
392 | };
393 | 
394 | const emitTable = (table: Table) => {
395 |     const { attr, caption, colSpecs, head, bodies, foot } = table;
396 |     return {
397 |         t: "Table",
398 |         c: [
399 |             wrapAttr(attr),
400 |             emitCaption(caption),
401 |             colSpecs.map(emitColSpec),
402 |             emitTableHead(head),
403 |             bodies.map(emitTableBody),
404 |             emitTableFoot(foot),
405 |         ],
406 |     };
407 | };
408 | 
409 | export const emitBlock = (n: Block): { t: string; c?: any[] } => {
410 |     switch (n.type) {
411 |         case "Plain":
412 |             return emitPlain(n);
413 |         case "Para":
414 |             return emitPara(n);
415 |         case "LineBlock":
416 |             return emitLineBlock(n);
417 |         case "CodeBlock":
418 |             return emitCodeBlock(n);
419 |         case "RawBlock":
420 |             return emitRawBlock(n);
421 |         case "BlockQuote":
422 |             return emitBlockQuote(n);
423 |         case "OrderedList":
424 |             return emitOrderedList(n);
425 |         case "BulletList":
426 |             return emitBulletList(n);
427 |         case "DefinitionList":
428 |             return emitDefinitionList(n);
429 |         case "Header":
430 |             return emitHeader(n);
431 |         case "HorizontalRule":
432 |         case "Null":
433 |             return emitAtom(n);
434 |         case "Div":
435 |             return emitDiv(n);
436 |         case "Table":
437 |             return emitTable(n);
438 |     }
439 | };
440 | 
441 | const emitMetaMap = (n: MetaMap) => {
442 |     const mappedValues: Record<string, any> = {};
443 |     Object.entries(n.values).forEach(([key, value]) => {
444 |         mappedValues[key] = emitMetaValue(value);
445 |     });
446 |     return {
447 |         t: "MetaMap",
448 |         c: mappedValues,
449 |     };
450 | };
451 | 
452 | const emitMetaBlocks = (n: MetaBlocks) => {
453 |     return {
454 |         t: "MetaBlocks",
455 |         c: n.content.map((block) => emitBlock(block)),
456 |     };
457 | };
458 | 
459 | const emitMetaInlines = (n: MetaInlines) => {
460 |     return {
461 |         t: "MetaInlines",
462 |         c: n.content.map((inline) => emitInline(inline)),
463 |     };
464 | };
465 | 
466 | const emitMetaList = (n: MetaList) => {
467 |     return {
468 |         t: "MetaList",
469 |         c: n.content.map((item) => emitMetaValue(item)),
470 |     };
471 | };
472 | 
473 | const emitMetaString = (n: MetaString) => {
474 |     return {
475 |         t: "MetaString",
476 |         c: n.content,
477 |     };
478 | };
479 | 
480 | const emitMetaBool = (n: MetaBool) => {
481 |     return {
482 |         t: "MetaBool",
483 |         c: n.content,
484 |     };
485 | };
486 | 
487 | const emitMetaValue = (n: MetaValue) => {
488 |     switch (n.type) {
489 |         case "MetaMap":
490 |             return emitMetaMap(n);
491 |         case "MetaList":
492 |             return emitMetaList(n);
493 |         case "MetaBool":
494 |             return emitMetaBool(n);
495 |         case "MetaString":
496 |             return emitMetaString(n);
497 |         case "MetaInlines":
498 |             return emitMetaInlines(n);
499 |         case "MetaBlocks":
500 |             return emitMetaBlocks(n);
501 |     }
502 | };
503 | 
504 | const emitMeta = (n: Doc["meta"]) => {
505 |     const res: Record<string, MetaValue> = {};
506 |     Object.entries(n).forEach(([key, value]) => {
507 |         res[key] = emitMetaValue(value);
508 |     });
509 |     return res;
510 | };
511 | 
512 | export const emitPandocJson = (doc: Doc): PandocJson => {
513 |     const { blocks, meta } = doc;
514 |     return {
515 |         "pandoc-api-version": PANDOC_API_VERSION,
516 |         blocks: blocks.map(emitBlock),
517 |         meta: emitMeta(meta),
518 |     };
519 | };
520 | 


--------------------------------------------------------------------------------
/src/example/convert.ts:
--------------------------------------------------------------------------------
 1 | import { argv } from "yargs";
 2 | 
 3 | import { loadAndTransformFromPandoc } from "../util";
 4 | 
 5 | import { rules } from "./rules";
 6 | 
 7 | const main = async () => {
 8 |     const {
 9 |         _: [filePath],
10 |     } = argv;
11 |     console.log(
12 |         JSON.stringify(loadAndTransformFromPandoc(filePath as string, rules))
13 |     );
14 | };
15 | 
16 | main().catch((e) => console.error(e));
17 | 


--------------------------------------------------------------------------------
/src/example/parse.ts:
--------------------------------------------------------------------------------
 1 | import fs from "fs";
 2 | import { argv } from "yargs";
 3 | import { parsePandocJson } from "../parse";
 4 | 
 5 | const main = async () => {
 6 |     const {
 7 |         _: [filePath],
 8 |     } = argv;
 9 |     const fileJson = JSON.parse(fs.readFileSync(filePath).toString());
10 |     const parsed = parsePandocJson(fileJson);
11 |     console.log(JSON.stringify(parsed));
12 | };
13 | 
14 | main().catch((e) => console.error(e));
15 | 


--------------------------------------------------------------------------------
/src/example/rules.ts:
--------------------------------------------------------------------------------
  1 | import * as katex from "katex";
  2 | 
  3 | import { Inline, Para, Plain } from "types";
  4 | import {
  5 |     bareMarkTransformer,
  6 |     docTransformer,
  7 |     nullTransformer,
  8 |     bareContentTransformer,
  9 |     pandocPassThroughTransformer,
 10 |     createListTransformer,
 11 |     definitionListTransformer,
 12 |     bareLeafTransformer,
 13 |     pandocQuotedTransformer,
 14 |     pandocTableTransformer,
 15 |     prosemirrorTableTransformer,
 16 | } from "transform/transformers";
 17 | import {
 18 |     createAttr,
 19 |     flatten,
 20 |     intersperse,
 21 |     textFromStrSpace,
 22 |     textToStrSpace,
 23 | } from "transform/util";
 24 | import { RuleSet } from "transform/ruleset";
 25 | 
 26 | import { prosemirrorSchema } from "./schema";
 27 | import {
 28 |     getPandocDocForHtmlString,
 29 |     htmlStringToPandocBlocks,
 30 |     htmlStringToPandocInline,
 31 |     pandocBlocksToHtmlString,
 32 |     pandocInlineToHtmlString,
 33 |     pandocInlineToPlainString,
 34 | } from "../pandocUtils";
 35 | 
 36 | const rules = new RuleSet(prosemirrorSchema);
 37 | 
 38 | // Top-level transformer for a doc
 39 | rules.transform("Doc", "doc", docTransformer);
 40 | 
 41 | // Do nothing with nothing
 42 | rules.toProsemirrorNode("Null", nullTransformer);
 43 | 
 44 | // Paragraphs are paragraphs. So are "Plain", until proven otherwise.
 45 | rules.transform(
 46 |     "Para | Plain",
 47 |     "paragraph",
 48 |     bareContentTransformer("Para", "paragraph")
 49 | );
 50 | 
 51 | // Divs are just boxes of other content
 52 | rules.toProsemirrorNode("Div", pandocPassThroughTransformer);
 53 | 
 54 | // I'm not really sure what a LineBlock is, but let's just call it a single paragraph
 55 | // with some hard breaks thrown in.
 56 | rules.toProsemirrorNode("LineBlock", (node, { transform }) => {
 57 |     const lines = node.content.map((line) => transform(line).asArray());
 58 |     return {
 59 |         type: "paragraph",
 60 |         content: flatten(
 61 |             intersperse(lines, () => ({
 62 |                 type: "hard_break",
 63 |             }))
 64 |         ),
 65 |     };
 66 | });
 67 | 
 68 | rules.transform("CodeBlock", "code_block", {
 69 |     toProsemirrorNode: (node) => {
 70 |         return {
 71 |             type: "code_block",
 72 |             content: [{ type: "text", text: node.content }],
 73 |         };
 74 |     },
 75 |     fromProsemirrorNode: (node) => {
 76 |         return {
 77 |             type: "CodeBlock",
 78 |             content: node.content.map((text) => text.text).join(""),
 79 |             attr: createAttr(""),
 80 |         };
 81 |     },
 82 | });
 83 | 
 84 | rules.transform("BlockQuote", "blockquote", bareContentTransformer);
 85 | 
 86 | // Use a listTransformer to take care of OrderedList and BulletList
 87 | const ensureFirstElementIsParagraph = (listItem) => {
 88 |     if (
 89 |         listItem.content.length === 0 ||
 90 |         listItem.content[0].type !== "paragraph"
 91 |     ) {
 92 |         listItem.content.unshift({ type: "paragraph", content: [] });
 93 |     }
 94 |     return listItem;
 95 | };
 96 | 
 97 | rules.transform(
 98 |     "OrderedList",
 99 |     "ordered_list",
100 |     createListTransformer("list_item", ensureFirstElementIsParagraph)
101 | );
102 | 
103 | rules.transform(
104 |     "BulletList",
105 |     "bullet_list",
106 |     createListTransformer("list_item", ensureFirstElementIsParagraph)
107 | );
108 | 
109 | rules.toProsemirrorNode(
110 |     "DefinitionList",
111 |     definitionListTransformer("bullet_list", "list_item")
112 | );
113 | 
114 | // Tranform headers
115 | rules.transform("Header", "heading", {
116 |     toProsemirrorNode: (node, { transform }) => {
117 |         return {
118 |             type: "heading",
119 |             attrs: {
120 |                 level: node.level,
121 |                 id: node.attr.identifier,
122 |             },
123 |             content: transform(node.content).asArray(),
124 |         };
125 |     },
126 |     fromProsemirrorNode: (node, { transform }) => {
127 |         return {
128 |             type: "Header",
129 |             level: parseInt(node.attrs.level.toString()),
130 |             attr: createAttr(node.attrs.id.toString()),
131 |             content: transform(node.content).asArray() as Inline[],
132 |         };
133 |     },
134 | });
135 | 
136 | rules.transform("HorizontalRule", "horizontal_rule", bareLeafTransformer);
137 | 
138 | const bareMarkTransformPairs = [
139 |     ["Strong", "strong"],
140 |     ["Emph", "em"],
141 |     ["Strikeout", "strike"],
142 |     ["Superscript", "sup"],
143 |     ["Subscript", "sub"],
144 |     ["Code", "code"],
145 | ] as const;
146 | 
147 | bareMarkTransformPairs.forEach(([from, to]) =>
148 |     rules.transform(from, to, bareMarkTransformer)
149 | );
150 | 
151 | rules.transform("Link", "link", {
152 |     toProsemirrorMark: (link) => {
153 |         return {
154 |             type: "link",
155 |             attrs: {
156 |                 href: link.target.url,
157 |                 title: link.target.title,
158 |             },
159 |         };
160 |     },
161 |     fromProsemirrorMark: (link, content) => {
162 |         return {
163 |             type: "Link",
164 |             attr: createAttr(),
165 |             content: content,
166 |             target: {
167 |                 url: link.attrs.href.toString(),
168 |                 title: link.attrs.title.toString(),
169 |             },
170 |         };
171 |     },
172 | });
173 | 
174 | // We don't support small caps right now
175 | rules.toProsemirrorNode("SmallCaps", pandocPassThroughTransformer);
176 | 
177 | // Tell the transformer how to deal with typical content-level nodes
178 | rules.toProsemirrorNode("(Str | Space)+", (nodes) => {
179 |     return {
180 |         type: "text",
181 |         text: textFromStrSpace(nodes),
182 |     };
183 | });
184 | 
185 | // Tell the transformer how to turn Prosemirror text back into Pandoc
186 | rules.fromProsemirrorNode("text", (node) => textToStrSpace(node.text));
187 | 
188 | // Deal with line breaks
189 | rules.transform("LineBreak", "hard_break", bareLeafTransformer);
190 | rules.toProsemirrorNode("SoftBreak", nullTransformer);
191 | 
192 | // Stuff we don't have equivalents for
193 | rules.toProsemirrorNode("Span", pandocPassThroughTransformer);
194 | rules.toProsemirrorNode("Underline", pandocPassThroughTransformer);
195 | 
196 | // Anything in quotation marks is its own node, to Pandoc
197 | rules.toProsemirrorNode("Quoted", pandocQuotedTransformer);
198 | 
199 | rules.toProsemirrorNode("RawBlock", (node) => {
200 |     return {
201 |         type: "paragraph",
202 |         content: [{ type: "text", text: node.content }],
203 |     };
204 | });
205 | 
206 | rules.toProsemirrorNode("RawInline", (node) => {
207 |     const { format, content } = node;
208 |     if (format === "tex") {
209 |         return {
210 |             type: "equation",
211 |             attrs: {
212 |                 value: content,
213 |                 html: katex.renderToString(content, {
214 |                     displayMode: false,
215 |                     throwOnError: false,
216 |                 }),
217 |             },
218 |         };
219 |     }
220 |     return { type: "text", text: content };
221 | });
222 | 
223 | // These next rules for images don't use transform() because they're not inverses of each other --
224 | // the Prosemirror->Pandoc direction wraps an Image in a Para to make it block-level
225 | 
226 | rules.toProsemirrorNode("Image", (node, { resources }) => {
227 |     return {
228 |         type: "image",
229 |         attrs: {
230 |             url: resources.image(node.target.url),
231 |             altText: pandocInlineToPlainString(node.content),
232 |             // TODO(ian): is there anything we can do about the image size here?
233 |         },
234 |     };
235 | });
236 | 
237 | rules.fromProsemirrorNode("image", (node) => {
238 |     const maybeAltTextDoc = getPandocDocForHtmlString(
239 |         node.attrs.altText as string
240 |     );
241 |     const altTextInlines = (maybeAltTextDoc.blocks[0] as Para)?.content ?? [];
242 |     const captionBlocks = htmlStringToPandocBlocks(
243 |         node.attrs.caption as string
244 |     );
245 |     const imageWrappedInPlain: Plain = {
246 |         type: "Plain",
247 |         content: [
248 |             {
249 |                 type: "Image",
250 |                 content: altTextInlines,
251 |                 target: {
252 |                     url: node.attrs.url.toString(),
253 |                     title: "",
254 |                 },
255 |                 attr: createAttr(""),
256 |             },
257 |         ],
258 |     };
259 |     if (captionBlocks.length > 0) {
260 |         return [imageWrappedInPlain, ...captionBlocks];
261 |     }
262 |     return imageWrappedInPlain;
263 | });
264 | 
265 | rules.transform("Cite", "citation", {
266 |     toProsemirrorNode: (node, { count }) => {
267 |         const { content } = node;
268 |         const unstructuredValue = pandocInlineToHtmlString(content);
269 |         return {
270 |             type: "citation",
271 |             attrs: {
272 |                 unstructuredValue,
273 |                 count: 1 + count("Cite"),
274 |             },
275 |         };
276 |     },
277 |     fromProsemirrorNode: (node) => {
278 |         const inputHtml = (node.attrs.html ||
279 |             node.attrs.unstructuredValue) as string;
280 |         const citationNumber =
281 |             typeof node.attrs.count === "number"
282 |                 ? node.attrs.count
283 |                 : parseInt(node.attrs.count as string);
284 |         return {
285 |             type: "Cite",
286 |             content: htmlStringToPandocInline(inputHtml),
287 |             citations: [
288 |                 {
289 |                     citationId: "",
290 |                     citationPrefix: [],
291 |                     citationSuffix: [],
292 |                     citationNoteNum: citationNumber,
293 |                     citationHash: citationNumber,
294 |                     citationMode: "NormalCitation",
295 |                 },
296 |             ],
297 |         };
298 |     },
299 | });
300 | 
301 | rules.transform("Note", "footnote", {
302 |     toProsemirrorNode: (node, { count }) => {
303 |         const { content } = node;
304 |         return {
305 |             type: "footnote",
306 |             attrs: {
307 |                 unstructuredValue: pandocBlocksToHtmlString(content),
308 |                 count: 1 + count("Note"),
309 |             },
310 |         };
311 |     },
312 |     fromProsemirrorNode: (node) => {
313 |         const noteContent = (node.attrs.unstructuredValue || "") as string;
314 |         return {
315 |             type: "Note",
316 |             content: htmlStringToPandocBlocks(noteContent),
317 |         };
318 |     },
319 | });
320 | 
321 | rules.toProsemirrorNode("Math", (node) => {
322 |     const { mathType, content } = node;
323 |     const isDisplay = mathType === "DisplayMath";
324 |     const prosemirrorType = isDisplay ? "block_equation" : "equation";
325 |     return {
326 |         type: prosemirrorType,
327 |         attrs: {
328 |             value: content,
329 |             html: katex.renderToString(content, {
330 |                 displayMode: isDisplay,
331 |                 throwOnError: false,
332 |             }),
333 |         },
334 |     };
335 | });
336 | 
337 | rules.fromProsemirrorNode("equation", (node) => {
338 |     return {
339 |         type: "Math",
340 |         mathType: "InlineMath",
341 |         content: node.attrs.value.toString(),
342 |     };
343 | });
344 | 
345 | rules.fromProsemirrorNode("block_equation", (node) => {
346 |     return {
347 |         type: "Plain",
348 |         content: [
349 |             {
350 |                 type: "Math",
351 |                 mathType: "DisplayMath",
352 |                 content: node.attrs.value.toString(),
353 |             },
354 |         ],
355 |     };
356 | });
357 | 
358 | rules.toProsemirrorNode("Table", pandocTableTransformer);
359 | rules.fromProsemirrorNode("table", prosemirrorTableTransformer);
360 | 
361 | rules.validate();
362 | 
363 | export { rules };
364 | 


--------------------------------------------------------------------------------
/src/example/schema.ts:
--------------------------------------------------------------------------------
  1 | import { Schema } from "prosemirror-model";
  2 | import { tableNodes } from "prosemirror-tables";
  3 | 
  4 | const nodes = {
  5 |     doc: {
  6 |         content: "block+",
  7 |         attrs: {
  8 |             meta: { default: {} },
  9 |         },
 10 |     },
 11 |     paragraph: {
 12 |         content: "inline*",
 13 |         group: "block",
 14 |         attrs: {
 15 |             class: { default: null },
 16 |         },
 17 |     },
 18 |     blockquote: {
 19 |         content: "block+",
 20 |         group: "block",
 21 |     },
 22 |     horizontal_rule: {
 23 |         group: "block",
 24 |     },
 25 |     heading: {
 26 |         attrs: {
 27 |             level: { default: 1 },
 28 |             id: { default: "" },
 29 |         },
 30 |         content: "inline*",
 31 |         group: "block",
 32 |         defining: true,
 33 |     },
 34 |     image: {
 35 |         atom: true,
 36 |         attrs: {
 37 |             url: { default: null },
 38 |             size: { default: 50 }, // number as percentage
 39 |             align: { default: "center" },
 40 |             caption: { default: "" },
 41 |             altText: { default: "" },
 42 |         },
 43 |         inline: false,
 44 |         group: "block",
 45 |     },
 46 |     ordered_list: {
 47 |         content: "list_item+",
 48 |         group: "block",
 49 |         attrs: { order: { default: 1 } },
 50 |     },
 51 |     bullet_list: {
 52 |         content: "list_item+",
 53 |         group: "block",
 54 |     },
 55 |     list_item: {
 56 |         content: "paragraph block*",
 57 |         defining: true,
 58 |     },
 59 |     code_block: {
 60 |         content: "text*",
 61 |         group: "block",
 62 |     },
 63 |     text: {
 64 |         inline: true,
 65 |         group: "inline",
 66 |     },
 67 |     hard_break: {
 68 |         inline: true,
 69 |         group: "inline",
 70 |     },
 71 |     equation: {
 72 |         atom: true,
 73 |         inline: true,
 74 |         attrs: {
 75 |             value: { default: "" },
 76 |             html: { default: "" },
 77 |         },
 78 |         group: "inline",
 79 |     },
 80 |     block_equation: {
 81 |         atom: true,
 82 |         attrs: {
 83 |             value: { default: "" },
 84 |             html: { default: "" },
 85 |         },
 86 |         inline: false,
 87 |         group: "block",
 88 |     },
 89 |     citation: {
 90 |         atom: true,
 91 |         attrs: {
 92 |             value: { default: "" },
 93 |             unstructuredValue: { default: "" },
 94 |             count: { default: 0 },
 95 |         },
 96 |         inline: true,
 97 |         group: "inline",
 98 |     },
 99 |     footnote: {
100 |         atom: true,
101 |         attrs: {
102 |             value: { default: "" },
103 |             structuredValue: { default: "" },
104 |             count: { default: 0 },
105 |         },
106 |         inline: true,
107 |         group: "inline",
108 |     },
109 |     ...tableNodes({
110 |         tableGroup: "block",
111 |         cellContent: "block+",
112 |         cellAttributes: {},
113 |     }),
114 | };
115 | 
116 | const marks = {
117 |     em: {},
118 |     strong: {},
119 |     link: {
120 |         inclusive: false,
121 |         attrs: {
122 |             href: { default: "" },
123 |             title: { default: null },
124 |             target: { default: null },
125 |         },
126 |     },
127 |     sub: {},
128 |     sup: {},
129 |     strike: {},
130 |     code: {},
131 | };
132 | 
133 | export const prosemirrorSchema = new Schema({ nodes, marks, topNode: "doc" });
134 | 


--------------------------------------------------------------------------------
/src/expression/__tests__/expression.test.ts:
--------------------------------------------------------------------------------
  1 | /* global describe, it, expect */
  2 | import { acceptItems, parseExpr } from "expression";
  3 | 
  4 | type Node = {
  5 |     type: string;
  6 | };
  7 | 
  8 | const n = (type: string): Node => ({ type });
  9 | 
 10 | describe("parseRegexp", () => {
 11 |     it("handles a simple identifier", () => {
 12 |         expect(parseExpr("Foo")).toEqual({
 13 |             type: "identifier",
 14 |             identifier: "Foo",
 15 |         });
 16 |     });
 17 | 
 18 |     it("handles a sequence of identifiers", () => {
 19 |         expect(parseExpr("Foo Bar Baz")).toEqual({
 20 |             type: "sequence",
 21 |             children: [
 22 |                 {
 23 |                     type: "identifier",
 24 |                     identifier: "Foo",
 25 |                 },
 26 |                 {
 27 |                     type: "identifier",
 28 |                     identifier: "Bar",
 29 |                 },
 30 |                 {
 31 |                     type: "identifier",
 32 |                     identifier: "Baz",
 33 |                 },
 34 |             ],
 35 |         });
 36 |     });
 37 | 
 38 |     it("handles zero-or-more quantifiers (*)", () => {
 39 |         expect(parseExpr("Foo*")).toEqual({
 40 |             type: "zeroOrMore",
 41 |             child: {
 42 |                 type: "identifier",
 43 |                 identifier: "Foo",
 44 |             },
 45 |         });
 46 |     });
 47 | 
 48 |     it("handles zero-or-more quantifiers (*) in context", () => {
 49 |         expect(parseExpr("Foo Bar* Baz")).toEqual({
 50 |             type: "sequence",
 51 |             children: [
 52 |                 {
 53 |                     type: "identifier",
 54 |                     identifier: "Foo",
 55 |                 },
 56 |                 {
 57 |                     type: "zeroOrMore",
 58 |                     child: {
 59 |                         type: "identifier",
 60 |                         identifier: "Bar",
 61 |                     },
 62 |                 },
 63 |                 {
 64 |                     type: "identifier",
 65 |                     identifier: "Baz",
 66 |                 },
 67 |             ],
 68 |         });
 69 |     });
 70 | 
 71 |     it("handles one-or-more quantifiers (+)", () => {
 72 |         expect(parseExpr("Foo+")).toEqual({
 73 |             type: "oneOrMore",
 74 |             child: {
 75 |                 type: "identifier",
 76 |                 identifier: "Foo",
 77 |             },
 78 |         });
 79 |     });
 80 | 
 81 |     it("handles one-or-more quantifiers (+) in context", () => {
 82 |         expect(parseExpr("Foo Bar Baz+")).toEqual({
 83 |             type: "sequence",
 84 |             children: [
 85 |                 {
 86 |                     type: "identifier",
 87 |                     identifier: "Foo",
 88 |                 },
 89 |                 {
 90 |                     type: "identifier",
 91 |                     identifier: "Bar",
 92 |                 },
 93 |                 {
 94 |                     type: "oneOrMore",
 95 |                     child: {
 96 |                         type: "identifier",
 97 |                         identifier: "Baz",
 98 |                     },
 99 |                 },
100 |             ],
101 |         });
102 |     });
103 | 
104 |     it("handles a range quantifier with a lower and upper bound", () => {
105 |         expect(parseExpr("(Foo){3, 5}")).toEqual({
106 |             type: "range",
107 |             lowerBound: 3,
108 |             upperBound: 5,
109 |             child: {
110 |                 type: "identifier",
111 |                 identifier: "Foo",
112 |             },
113 |         });
114 |     });
115 | 
116 |     it("handles a range quantifier with only a lower bound", () => {
117 |         expect(parseExpr("(Foo){10,}")).toEqual({
118 |             type: "range",
119 |             lowerBound: 10,
120 |             upperBound: null,
121 |             child: {
122 |                 type: "identifier",
123 |                 identifier: "Foo",
124 |             },
125 |         });
126 |     });
127 | 
128 |     it("handles an exact range quantifier", () => {
129 |         expect(parseExpr("(Foo){99}")).toEqual({
130 |             type: "range",
131 |             lowerBound: 99,
132 |             upperBound: 99,
133 |             child: {
134 |                 type: "identifier",
135 |                 identifier: "Foo",
136 |             },
137 |         });
138 |     });
139 | 
140 |     it("handles a choice of identifiers", () => {
141 |         expect(parseExpr("Foo | Bar | Baz")).toEqual({
142 |             type: "choice",
143 |             children: [
144 |                 {
145 |                     type: "identifier",
146 |                     identifier: "Foo",
147 |                 },
148 |                 {
149 |                     type: "identifier",
150 |                     identifier: "Bar",
151 |                 },
152 |                 {
153 |                     type: "identifier",
154 |                     identifier: "Baz",
155 |                 },
156 |             ],
157 |         });
158 |     });
159 | 
160 |     it("throws an error when there is an ambiguous mix of sequence and choice markers", () => {
161 |         expect(() => parseExpr("Foo | Bar Baz")).toThrow();
162 |     });
163 | 
164 |     it("handles grouped choices and sequences", () => {
165 |         expect(parseExpr("Foo (Bar | Baz)")).toEqual({
166 |             type: "sequence",
167 |             children: [
168 |                 {
169 |                     type: "identifier",
170 |                     identifier: "Foo",
171 |                 },
172 |                 {
173 |                     type: "choice",
174 |                     children: [
175 |                         {
176 |                             type: "identifier",
177 |                             identifier: "Bar",
178 |                         },
179 |                         {
180 |                             type: "identifier",
181 |                             identifier: "Baz",
182 |                         },
183 |                     ],
184 |                 },
185 |             ],
186 |         });
187 |     });
188 | 
189 |     it("handles grouped choices and sequences with quantifiers", () => {
190 |         expect(parseExpr("(Foo Bar)+ (Bar | Baz)*")).toEqual({
191 |             type: "sequence",
192 |             children: [
193 |                 {
194 |                     type: "oneOrMore",
195 |                     child: {
196 |                         type: "sequence",
197 |                         children: [
198 |                             {
199 |                                 type: "identifier",
200 |                                 identifier: "Foo",
201 |                             },
202 |                             {
203 |                                 type: "identifier",
204 |                                 identifier: "Bar",
205 |                             },
206 |                         ],
207 |                     },
208 |                 },
209 |                 {
210 |                     type: "zeroOrMore",
211 |                     child: {
212 |                         type: "choice",
213 |                         children: [
214 |                             {
215 |                                 type: "identifier",
216 |                                 identifier: "Bar",
217 |                             },
218 |                             {
219 |                                 type: "identifier",
220 |                                 identifier: "Baz",
221 |                             },
222 |                         ],
223 |                     },
224 |                 },
225 |             ],
226 |         });
227 |     });
228 | 
229 |     it("handles a very complicated expression", () => {
230 |         expect(
231 |             parseExpr(
232 |                 "(Foo Bar+ (Qux* | Baz){10}){3,5} (Bar* | (Baz{6,} Foo))*"
233 |             )
234 |         ).toEqual({
235 |             type: "sequence",
236 |             children: [
237 |                 {
238 |                     type: "range",
239 |                     lowerBound: 3,
240 |                     upperBound: 5,
241 |                     child: {
242 |                         type: "sequence",
243 |                         children: [
244 |                             {
245 |                                 type: "identifier",
246 |                                 identifier: "Foo",
247 |                             },
248 |                             {
249 |                                 type: "oneOrMore",
250 |                                 child: {
251 |                                     type: "identifier",
252 |                                     identifier: "Bar",
253 |                                 },
254 |                             },
255 |                             {
256 |                                 type: "range",
257 |                                 upperBound: 10,
258 |                                 lowerBound: 10,
259 |                                 child: {
260 |                                     type: "choice",
261 |                                     children: [
262 |                                         {
263 |                                             type: "zeroOrMore",
264 |                                             child: {
265 |                                                 type: "identifier",
266 |                                                 identifier: "Qux",
267 |                                             },
268 |                                         },
269 |                                         {
270 |                                             type: "identifier",
271 |                                             identifier: "Baz",
272 |                                         },
273 |                                     ],
274 |                                 },
275 |                             },
276 |                         ],
277 |                     },
278 |                 },
279 |                 {
280 |                     type: "zeroOrMore",
281 |                     child: {
282 |                         type: "choice",
283 |                         children: [
284 |                             {
285 |                                 type: "zeroOrMore",
286 |                                 child: {
287 |                                     type: "identifier",
288 |                                     identifier: "Bar",
289 |                                 },
290 |                             },
291 |                             {
292 |                                 type: "sequence",
293 |                                 children: [
294 |                                     {
295 |                                         type: "range",
296 |                                         lowerBound: 6,
297 |                                         upperBound: null,
298 |                                         child: {
299 |                                             type: "identifier",
300 |                                             identifier: "Baz",
301 |                                         },
302 |                                     },
303 | 
304 |                                     {
305 |                                         type: "identifier",
306 |                                         identifier: "Foo",
307 |                                     },
308 |                                 ],
309 |                             },
310 |                         ],
311 |                     },
312 |                 },
313 |             ],
314 |         });
315 |     });
316 | 
317 |     it("normalizes odd spacing and extra parens from an expression", () => {
318 |         expect(
319 |             parseExpr(
320 |                 "    ((Foo Bar+ ((Qux* |Baz))   )+ ( (Bar)* |  (Baz Foo))*   ) "
321 |             )
322 |         ).toEqual({
323 |             type: "sequence",
324 |             children: [
325 |                 {
326 |                     type: "oneOrMore",
327 |                     child: {
328 |                         type: "sequence",
329 |                         children: [
330 |                             {
331 |                                 type: "identifier",
332 |                                 identifier: "Foo",
333 |                             },
334 |                             {
335 |                                 type: "oneOrMore",
336 |                                 child: {
337 |                                     type: "identifier",
338 |                                     identifier: "Bar",
339 |                                 },
340 |                             },
341 |                             {
342 |                                 type: "choice",
343 |                                 children: [
344 |                                     {
345 |                                         type: "zeroOrMore",
346 |                                         child: {
347 |                                             type: "identifier",
348 |                                             identifier: "Qux",
349 |                                         },
350 |                                     },
351 |                                     {
352 |                                         type: "identifier",
353 |                                         identifier: "Baz",
354 |                                     },
355 |                                 ],
356 |                             },
357 |                         ],
358 |                     },
359 |                 },
360 |                 {
361 |                     type: "zeroOrMore",
362 |                     child: {
363 |                         type: "choice",
364 |                         children: [
365 |                             {
366 |                                 type: "zeroOrMore",
367 |                                 child: {
368 |                                     type: "identifier",
369 |                                     identifier: "Bar",
370 |                                 },
371 |                             },
372 |                             {
373 |                                 type: "sequence",
374 |                                 children: [
375 |                                     {
376 |                                         type: "identifier",
377 |                                         identifier: "Baz",
378 |                                     },
379 | 
380 |                                     {
381 |                                         type: "identifier",
382 |                                         identifier: "Foo",
383 |                                     },
384 |                                 ],
385 |                             },
386 |                         ],
387 |                     },
388 |                 },
389 |             ],
390 |         });
391 |     });
392 | });
393 | 
394 | describe("accepts", () => {
395 |     const acceptExpr = (pattern, nodes) =>
396 |         acceptItems(
397 |             parseExpr(pattern),
398 |             nodes,
399 |             (str: string) => (node: Node) => node.type === str
400 |         );
401 | 
402 |     it("accepts an empty node array where expected", () => {
403 |         expect(acceptExpr("Foo*", [])).toEqual(0);
404 |     });
405 | 
406 |     it("handles an array of empty nodes", () => {
407 |         expect(acceptExpr("Foo", [])).toEqual(0);
408 |     });
409 | 
410 |     it("accepts a simple identifier", () => {
411 |         expect(acceptExpr("Foo", [n("Foo")])).toEqual(1);
412 |     });
413 | 
414 |     it("rejects a mismatched identifier", () => {
415 |         expect(acceptExpr("Foo", [n("Bar")])).toEqual(0);
416 |     });
417 | 
418 |     it("accepts a sequence of identifiers", () => {
419 |         expect(acceptExpr("Foo Bar", [n("Foo"), n("Bar")])).toEqual(2);
420 |     });
421 | 
422 |     it("rejects a sequence of mismatched identifiers", () => {
423 |         expect(acceptExpr("Foo Bar", [n("Foo"), n("Baz")])).toEqual(0);
424 |     });
425 | 
426 |     it("accepts a choice of identifiers", () => {
427 |         expect(acceptExpr("Foo | Bar", [n("Foo")])).toEqual(1);
428 |         expect(acceptExpr("Foo | Bar", [n("Bar")])).toEqual(1);
429 |     });
430 | 
431 |     it("rejects a mismatched choice of identifiers", () => {
432 |         expect(acceptExpr("Foo | Bar", [n("Baz")])).toEqual(0);
433 |         expect(acceptExpr("Foo | Bar", [n("Qux")])).toEqual(0);
434 |     });
435 | 
436 |     it("accepts zero or more identifiers", () => {
437 |         expect(acceptExpr("Foo*", [])).toEqual(0);
438 |         expect(acceptExpr("Foo*", [n("Foo")])).toEqual(1);
439 |         expect(acceptExpr("Foo*", [n("Foo"), n("Foo")])).toEqual(2);
440 |         expect(acceptExpr("Foo*", [n("Foo"), n("Foo"), n("Foo")])).toEqual(3);
441 |     });
442 | 
443 |     it("accepts one or more identifiers", () => {
444 |         expect(acceptExpr("Foo+", [])).toEqual(0);
445 |         expect(acceptExpr("Foo+", [n("Foo")])).toEqual(1);
446 |         expect(acceptExpr("Foo+", [n("Foo"), n("Foo")])).toEqual(2);
447 |         expect(acceptExpr("Foo+", [n("Foo"), n("Foo"), n("Foo")])).toEqual(3);
448 |     });
449 | 
450 |     it("accepts the number of nodes specified by a range quantifier", () => {
451 |         expect(acceptExpr("Foo{1,3}", [])).toEqual(0);
452 |         expect(acceptExpr("Foo{1,3}", [n("Foo")])).toEqual(1);
453 |         expect(acceptExpr("Foo{1,3}", [n("Foo"), n("Foo")])).toEqual(2);
454 |         expect(acceptExpr("Foo{1,3}", [n("Foo"), n("Foo"), n("Foo")])).toEqual(
455 |             3
456 |         );
457 |         expect(
458 |             acceptExpr("Foo{1,3}", [n("Foo"), n("Foo"), n("Foo"), n("Foo")])
459 |         ).toEqual(3);
460 |     });
461 | 
462 |     it("accepts the number of nodes specified by an exact range quantifier", () => {
463 |         expect(acceptExpr("Foo{3}", [])).toEqual(0);
464 |         expect(acceptExpr("Foo{3}", [n("Foo")])).toEqual(0);
465 |         expect(acceptExpr("Foo{3}", [n("Foo"), n("Foo")])).toEqual(0);
466 |         expect(acceptExpr("Foo{3}", [n("Foo"), n("Foo"), n("Foo")])).toEqual(3);
467 |         expect(
468 |             acceptExpr("Foo{3}", [n("Foo"), n("Foo"), n("Foo"), n("Foo")])
469 |         ).toEqual(3);
470 |     });
471 | 
472 |     it("accepts the number of nodes specified by an unbounded range quantifier", () => {
473 |         expect(acceptExpr("Foo{2,}", [])).toEqual(0);
474 |         expect(acceptExpr("Foo{2,}", [n("Foo")])).toEqual(0);
475 |         expect(acceptExpr("Foo{2,}", [n("Foo"), n("Foo")])).toEqual(2);
476 |         expect(acceptExpr("Foo{2,}", [n("Foo"), n("Foo"), n("Foo")])).toEqual(
477 |             3
478 |         );
479 |         expect(
480 |             acceptExpr("Foo{2,}", [n("Foo"), n("Foo"), n("Foo"), n("Foo")])
481 |         ).toEqual(4);
482 |     });
483 | 
484 |     it("composes quantifiers", () => {
485 |         expect(acceptExpr("(Foo{2})+", [])).toEqual(0);
486 |         expect(acceptExpr("(Foo{2})+", [n("Foo")])).toEqual(0);
487 |         expect(acceptExpr("(Foo{2})+", [n("Foo"), n("Foo")])).toEqual(2);
488 |         expect(acceptExpr("(Foo{2})+", [n("Foo"), n("Foo"), n("Foo")])).toEqual(
489 |             2
490 |         );
491 |         expect(
492 |             acceptExpr("(Foo{2})+", [n("Foo"), n("Foo"), n("Foo"), n("Foo")])
493 |         ).toEqual(4);
494 |     });
495 | 
496 |     it("handles range quantifiers in a sequence", () => {
497 |         expect(acceptExpr("Foo Bar{1,2} Baz", [n("Foo"), n("Baz")])).toEqual(0);
498 |         expect(
499 |             acceptExpr("Foo Bar{1,2} Baz", [n("Foo"), n("Bar"), n("Baz")])
500 |         ).toEqual(3);
501 |         expect(
502 |             acceptExpr("Foo Bar{1,2} Baz", [
503 |                 n("Foo"),
504 |                 n("Bar"),
505 |                 n("Bar"),
506 |                 n("Baz"),
507 |             ])
508 |         ).toEqual(4);
509 |         expect(
510 |             acceptExpr("Foo Bar{1,2} Baz", [
511 |                 n("Foo"),
512 |                 n("Bar"),
513 |                 n("Bar"),
514 |                 n("Bar"),
515 |                 n("Baz"),
516 |             ])
517 |         ).toEqual(0);
518 |     });
519 | 
520 |     it("can use an unbounded range quantifier like a zeroOrMore", () => {
521 |         expect(acceptExpr("Foo{0,}", [])).toEqual(0);
522 |         expect(acceptExpr("Foo{0,}", [n("Foo")])).toEqual(1);
523 |         expect(acceptExpr("Foo{0,}", [n("Foo"), n("Foo")])).toEqual(2);
524 |         expect(acceptExpr("Foo{0,}", [n("Foo"), n("Foo"), n("Foo")])).toEqual(
525 |             3
526 |         );
527 |         expect(
528 |             acceptExpr("Foo{0,}", [n("Foo"), n("Foo"), n("Foo"), n("Foo")])
529 |         ).toEqual(4);
530 |     });
531 | 
532 |     it("returns a correct value when there are leftover nodes", () => {
533 |         expect(acceptExpr("Foo+", [n("Bar")])).toEqual(0);
534 |         expect(acceptExpr("Foo Foo", [n("Foo"), n("Foo"), n("Foo")])).toEqual(
535 |             2
536 |         );
537 |     });
538 | 
539 |     it("accepts a sequence of multiple identifiers", () => {
540 |         expect(acceptExpr("Foo+ Bar*", [n("Foo"), n("Foo")])).toEqual(2);
541 |         expect(acceptExpr("Foo+ Bar*", [n("Foo"), n("Foo"), n("Bar")])).toEqual(
542 |             3
543 |         );
544 |         expect(acceptExpr("Foo+ Bar*", [n("Foo"), n("Bar")])).toEqual(2);
545 |     });
546 | 
547 |     it("handles a sequence of multiple identifiers with quantifiers", () => {
548 |         expect(acceptExpr("Foo+ Bar*", [n("Foo"), n("Foo"), n("Baz")])).toEqual(
549 |             2
550 |         );
551 |         expect(acceptExpr("Foo+ Bar*", [n("Bar")])).toEqual(0);
552 |     });
553 | 
554 |     it("accepts a combination of choices and sequences", () => {
555 |         expect(
556 |             acceptExpr("(Foo | Bar) (Bar Baz)+", [n("Foo"), n("Bar"), n("Baz")])
557 |         ).toEqual(3);
558 |         expect(
559 |             acceptExpr("(Foo | Bar) (Bar Baz)+", [
560 |                 n("Foo"),
561 |                 n("Bar"),
562 |                 n("Baz"),
563 |                 n("Bar"),
564 |                 n("Baz"),
565 |             ])
566 |         ).toEqual(5);
567 |     });
568 | 
569 |     it("handles a combination of choices and sequences that matches some nodes", () => {
570 |         expect(
571 |             acceptExpr("(Foo | Bar) (Bar Baz)+", [n("Qux"), n("Bar"), n("Baz")])
572 |         ).toEqual(0);
573 |         expect(
574 |             acceptExpr("(Foo | Bar) (Bar Baz)+", [
575 |                 n("Foo"),
576 |                 n("Bar"),
577 |                 n("Baz"),
578 |                 n("Bar"),
579 |             ])
580 |         ).toEqual(3);
581 |     });
582 | 
583 |     it("handles a combination of quantifiers", () => {
584 |         expect(
585 |             acceptExpr("(Foo | Bar)* (Bar Baz)+ Qux", [
586 |                 n("Foo"),
587 |                 n("Bar"),
588 |                 n("Bar"),
589 |                 n("Bar"),
590 |                 n("Bar"),
591 |                 n("Baz"),
592 |                 n("Bar"),
593 |                 n("Baz"),
594 |                 n("Qux"),
595 |             ])
596 |         ).toEqual(9);
597 |     });
598 | 
599 |     it("handles nodes that might be swallowed by a greedy quantifier", () => {
600 |         expect(
601 |             acceptExpr("(Foo | Bar)* Bar", [
602 |                 n("Foo"),
603 |                 n("Bar"),
604 |                 n("Bar"),
605 |                 n("Bar"),
606 |                 n("Bar"),
607 |             ])
608 |         ).toEqual(5);
609 |     });
610 | 
611 |     it("handles an unnecessarily complicated expression", () => {
612 |         expect(
613 |             acceptExpr("(Foo | Bar)* Baz (Bar | Qux{2,5} | Baz)+ Qux", [
614 |                 n("Foo"),
615 |                 n("Bar"),
616 |                 n("Baz"),
617 |                 n("Qux"),
618 |                 n("Qux"),
619 |                 n("Baz"),
620 |                 n("Qux"),
621 |                 n("Qux"),
622 |                 n("Qux"),
623 |                 n("Bar"),
624 |                 n("Qux"),
625 |                 n("Foo"),
626 |                 n("Bar"),
627 |             ])
628 |         ).toEqual(11);
629 |     });
630 | });
631 | 


--------------------------------------------------------------------------------
/src/expression/__tests__/heap.test.ts:
--------------------------------------------------------------------------------
 1 | /* global describe, it, expect */
 2 | import Heap from "../heap";
 3 | 
 4 | const consumeHeap = (heap) => {
 5 |     const res = [];
 6 |     while (heap.length()) {
 7 |         res.push(heap.pop());
 8 |     }
 9 |     return res;
10 | };
11 | 
12 | describe("Heap", () => {
13 |     it("works as a min-heap", () => {
14 |         const heap = new Heap((x) => x, [8, 6, 7, 5, 3, 0, 9]);
15 |         expect(consumeHeap(heap)).toEqual([0, 3, 5, 6, 7, 8, 9]);
16 |     });
17 | 
18 |     it("works as a max-heap", () => {
19 |         const heap = new Heap((x) => -x, [8, 6, 7, 5, 3, 0, 9]);
20 |         expect(consumeHeap(heap)).toEqual([9, 8, 7, 6, 5, 3, 0]);
21 |     });
22 | });
23 | 


--------------------------------------------------------------------------------
/src/expression/acceptor.ts:
--------------------------------------------------------------------------------
  1 | import Heap from "./heap";
  2 | import { Expr, IdentifierMatch } from "./types";
  3 | 
  4 | type State<Item> = {
  5 |     addSuccessor: (s: State<Item>) => void;
  6 |     getSuccessors: (n: Item) => State<Item>[];
  7 |     consumesItem: () => boolean;
  8 | };
  9 | 
 10 | type Machine<Item> = {
 11 |     startState: State<Item>;
 12 |     acceptState: State<Item>;
 13 | };
 14 | 
 15 | type SearchPosition<Item> = {
 16 |     state: State<Item>;
 17 |     consumedItems: number;
 18 | };
 19 | 
 20 | type DiscoveryState<Item> = {
 21 |     discoveredPositions: SearchPosition<Item>[];
 22 |     positionsHeap: Heap<SearchPosition<Item>>;
 23 | };
 24 | 
 25 | const state = <Item>(guard?: (n: Item) => boolean): State<Item> => {
 26 |     const successors: Set<State<Item>> = new Set();
 27 | 
 28 |     const addSuccessor = (s: State<Item>) => {
 29 |         successors.add(s);
 30 |     };
 31 | 
 32 |     const getSuccessors = (currentItem: Item) => {
 33 |         const passesGuard = !guard || (currentItem && guard(currentItem));
 34 |         return passesGuard ? Array.from(successors) : [];
 35 |     };
 36 | 
 37 |     const consumesItem = () => {
 38 |         return !!guard;
 39 |     };
 40 | 
 41 |     return {
 42 |         addSuccessor,
 43 |         getSuccessors,
 44 |         consumesItem,
 45 |     };
 46 | };
 47 | 
 48 | const createAcceptanceMachine = <Item>(
 49 |     expr: Expr,
 50 |     matcher: IdentifierMatch<Item>
 51 | ): Machine<Item> => {
 52 |     const startState = state();
 53 |     const acceptState = state();
 54 | 
 55 |     if (expr.type === "identifier") {
 56 |         const identifierState = state(matcher(expr.identifier));
 57 |         startState.addSuccessor(identifierState);
 58 |         identifierState.addSuccessor(acceptState);
 59 |     } else if (expr.type === "choice") {
 60 |         const choiceMachines = expr.children.map((x) =>
 61 |             createAcceptanceMachine(x, matcher)
 62 |         );
 63 |         choiceMachines.forEach((machine) => {
 64 |             startState.addSuccessor(machine.startState);
 65 |             machine.acceptState.addSuccessor(acceptState);
 66 |         });
 67 |     } else if (expr.type === "sequence") {
 68 |         const sequenceMachines = expr.children.map((x) =>
 69 |             createAcceptanceMachine(x, matcher)
 70 |         );
 71 |         const finalAcceptState = sequenceMachines.reduce(
 72 |             (intermediateAcceptState, nextMachine) => {
 73 |                 intermediateAcceptState.addSuccessor(nextMachine.startState);
 74 |                 return nextMachine.acceptState;
 75 |             },
 76 |             startState
 77 |         );
 78 |         finalAcceptState.addSuccessor(acceptState);
 79 |     } else if (expr.type === "range") {
 80 |         const { lowerBound, upperBound, child } = expr;
 81 |         if (
 82 |             (upperBound !== null && upperBound < lowerBound) ||
 83 |             (lowerBound === 0 && upperBound === 0) ||
 84 |             lowerBound < 0
 85 |         ) {
 86 |             throw new Error(`Invalid range: [${lowerBound},${upperBound}]`);
 87 |         }
 88 |         const make = () => createAcceptanceMachine(child, matcher);
 89 |         const machines: Machine<Item>[] = [];
 90 |         const machineCount = Math.max(
 91 |             1,
 92 |             upperBound !== null ? upperBound : lowerBound
 93 |         );
 94 |         for (let i = 0; i < machineCount; i++) {
 95 |             const machine = make();
 96 |             machines.push(machine);
 97 |             if (i > 0) {
 98 |                 const prev = machines[i - 1];
 99 |                 prev.acceptState.addSuccessor(machine.startState);
100 |             }
101 |             if (i + 1 >= lowerBound) {
102 |                 machine.acceptState.addSuccessor(acceptState);
103 |             }
104 |         }
105 |         if (upperBound === null) {
106 |             const last = machines[machines.length - 1];
107 |             last.acceptState.addSuccessor(last.startState);
108 |         }
109 |         const first = machines[0];
110 |         startState.addSuccessor(first.startState);
111 |         if (lowerBound === 0) {
112 |             startState.addSuccessor(acceptState);
113 |         }
114 |     } else if (expr.type === "zeroOrMore") {
115 |         const innerMachine = createAcceptanceMachine(expr.child, matcher);
116 |         startState.addSuccessor(innerMachine.startState);
117 |         innerMachine.acceptState.addSuccessor(acceptState);
118 |         startState.addSuccessor(acceptState);
119 |         acceptState.addSuccessor(startState);
120 |     } else if (expr.type === "oneOrMore") {
121 |         const innerMachine = createAcceptanceMachine(expr.child, matcher);
122 |         startState.addSuccessor(innerMachine.startState);
123 |         innerMachine.acceptState.addSuccessor(acceptState);
124 |         acceptState.addSuccessor(startState);
125 |     } else {
126 |         startState.addSuccessor(acceptState);
127 |     }
128 | 
129 |     return { startState, acceptState };
130 | };
131 | 
132 | // Adds a position to a DiscoveryState if it hasn't already been discovered.
133 | const maybeEnqueuePosition = <Item>(
134 |     position: SearchPosition<Item>,
135 |     discoveryState: DiscoveryState<Item>
136 | ) => {
137 |     const { discoveredPositions, positionsHeap } = discoveryState;
138 |     const hasAlreadyDiscoveredPosition = discoveredPositions.some(
139 |         (discoveredPosition) =>
140 |             discoveredPosition.state === position.state &&
141 |             discoveredPosition.consumedItems === position.consumedItems
142 |     );
143 |     if (!hasAlreadyDiscoveredPosition) {
144 |         positionsHeap.push(position);
145 |     }
146 | };
147 | 
148 | // Mark a position discovered in a DiscoveryState.
149 | const discoverPosition = <Item>(
150 |     position: SearchPosition<Item>,
151 |     discoveryState: DiscoveryState<Item>
152 | ) => {
153 |     discoveryState.discoveredPositions.push(position);
154 | };
155 | 
156 | // We want SearchPositions with high `consumedItems` to come out first, so they get low scores.
157 | const heapScore = <Item>(item: SearchPosition<Item>) => 0 - item.consumedItems;
158 | 
159 | export const createItemAcceptor = <Item>(
160 |     expr: Expr,
161 |     matchTest: IdentifierMatch<Item>
162 | ) => {
163 |     // An "item acceptor" takes an Expr like (A | B)* and tests it against a list of items like
164 |     // [A, B, B, A...]. We do this by transforming the expression into a state machine and
165 |     // performing a graph search through the states, with the understanding that some edges between
166 |     // states "consume" items in the list. A few bits of terminology first:
167 |     //
168 |     // - an expression or Expr is a pattern matching tool to test a list of items. The term item
169 |     //   is intentionally generic, and while an expression is ultimately a composition of string
170 |     //   itentifiers, the `matchTest` argument can be used to do any kind of string =?= Item
171 |     //   comparison that you like.
172 |     //
173 |     // - an expression is transformed into a graph called a state machine. The nodes or vertices of
174 |     //   this graph are called states.
175 |     //
176 |     // - a position or SearchPosition is a pair of values (state, consumedItems) that uniquely
177 |     //   defines a "moment" (I'm really trying to avoid using the word "state" here) in the search
178 |     //   that should not be repeated. In other words, the position (X, 3) means we've arrived at
179 |     //   state X and we've seen three items so far, and if the search takes us to this position
180 |     //   again, we ought not to explore that branch further because it's already been done.
181 |     //
182 |     // The "online" version of this algorithm is a little tricky to understand, so I'm going to lay
183 |     // out how it works in some comments here. The problem is that we have a stream of Items, and
184 |     // we want to see how many of them `expr` will accept. Say we have S = [I1, I2, I3]...we can
185 |     // just check accepts(expr, [I1]), then accepts(expr, [I1, I2]), and finally
186 |     // accepts(expr, [I1, I2, I3]). This is a reasonable solution for small batches of items, but
187 |     // when checking hundreds of children (as the heal algorithm sometimes does), this quadratic
188 |     // behavior is unacceptably slow. So we need a way to hold on to what we've learned about the
189 |     // batch of items 0....(n - 1) when checking if the nth item is accepted. Let's look at a simple
190 |     // state machine for the expr "I*". It has the structure:
191 |     //
192 |     // (S0) --> [ (S1) --- C(I) ---> (A1) ] ---> (A0)
193 |     //              ^                 |
194 |     //              |                 |
195 |     //               -----------------
196 |     //
197 |     // Where S0 and S1 are the start and accept states for the overall state machine, and S1 and A1
198 |     // are the start and accept states for the actual expression N*. Note that the edge from S1 to
199 |     // A1 is labelled "C(I)" to indicate that it consumes an item. Running this on the first item is
200 |     // straightforward enough...we just search all the positions reachable from S0 with [I] and
201 |     // see whether we arrive at the state A0. When checking [I, I], we might be tempted to use the
202 |     // fact that for the first I, we found the accept state, and start the search against the second
203 |     // A at state A0. But this won't work because A0 is a terminal state with no outbound edges.
204 |     //
205 |     // Instead, when we're given [I, I], we have no choice but to restart the search from _all_
206 |     // positions we explored in the search against [I]. The only reason this is faster than running
207 |     // the whole search from scratch multiple times is the hunch that search positions with higher
208 |     // nodeCount values are likely to bring us to a solution more quickly. Concretely, when we
209 |     // test [I] against I* we explore the following (state, consumedItems) positions:
210 |     //
211 |     // (S0, 0) (S1, 0) (A1, 1) (A0, 1)
212 |     //
213 |     // When we test against [I, I], success means finding the position (A0, 2) -- which is only
214 |     // reachable by exploring (A1, 1) from the last search. We'll use a max-heap to keep track of
215 |     // positions reached in previous iterations of the search, so that positions with higher
216 |     // consumedItems values are explored more readily. So that, in broad strokes, is the goal of
217 |     // this code -- to test an expression against a list of items, one item at a time, while
218 |     // providing partial results along the way and replicating as little work as possible.
219 | 
220 |     // Build a state machine graph to traverse.
221 |     const { startState, acceptState } = createAcceptanceMachine(
222 |         expr,
223 |         matchTest
224 |     );
225 | 
226 |     // Keep a running list of items we're given.
227 |     const items = [];
228 | 
229 |     // Our initial position is the start state, with no items consumed.
230 |     const initialPosition = {
231 |         state: startState,
232 |         consumedItems: 0,
233 |     };
234 | 
235 |     // A DiscoveryState is a heap of positions to explore next, and a list of positions that we've
236 |     // already discovered and shouldn't explore further.
237 |     const globalDiscoveryState: DiscoveryState<Item> = {
238 |         positionsHeap: new Heap(heapScore, [initialPosition]),
239 |         discoveredPositions: [],
240 |     };
241 | 
242 |     return function acceptsNextItem(nextItem: Item): boolean {
243 |         // Create a DiscoveryState that's a shallow copy of the global discovery state. We'll
244 |         // use this to exhaust positions locally (in this acceptsNextItem call) while preserving a
245 |         // monotonically-growing heap of positions and list of discovered items (for the lifetime
246 |         // of the parent createItemAcceptor frame).
247 |         // Keep a local discovery state object that's a shallow copy of the global one.
248 |         const localDiscoveryState: DiscoveryState<Item> = {
249 |             positionsHeap: new Heap(
250 |                 heapScore,
251 |                 globalDiscoveryState.positionsHeap.toArray()
252 |             ),
253 |             discoveredPositions: [...globalDiscoveryState.discoveredPositions],
254 |         };
255 |         // We'll pop items only off the local discovery heap.
256 |         const { positionsHeap: localHeap } = localDiscoveryState;
257 |         // Mark the next item as consumable.
258 |         items.push(nextItem);
259 |         while (localHeap.length() > 0) {
260 |             // Get the next best candidate position
261 |             const position = localHeap.pop();
262 |             const { state, consumedItems } = position;
263 |             // If we're in the accept state and consumed all the items we have, we're successful.
264 |             if (state === acceptState && consumedItems === items.length) {
265 |                 return true;
266 |             }
267 |             // Mark this position as discovered so we don't bother to explore it again.
268 |             discoverPosition(position, globalDiscoveryState);
269 |             discoverPosition(position, localDiscoveryState);
270 |             const currentItem = items[consumedItems];
271 |             // Get all the successors of this position...
272 |             const successors = state.getSuccessors(currentItem);
273 |             // And for each one...
274 |             for (const successor of successors) {
275 |                 // See whether it consumes an item or not...
276 |                 const nextPosition = {
277 |                     state: successor,
278 |                     consumedItems: state.consumesItem()
279 |                         ? consumedItems + 1
280 |                         : consumedItems,
281 |                 };
282 |                 // And mark it to be explored, either in this call to acceptsNextItem, or later.
283 |                 maybeEnqueuePosition(nextPosition, globalDiscoveryState);
284 |                 maybeEnqueuePosition(nextPosition, localDiscoveryState);
285 |             }
286 |         }
287 |         // Looks like we didn't find any acceptable states.
288 |         return false;
289 |     };
290 | };
291 | 
292 | const quickAcceptItems = <Item>(
293 |     expr: Expr,
294 |     items: Item[],
295 |     matchTest: IdentifierMatch<Item>
296 | ): number => {
297 |     if (
298 |         expr.type === "oneOrMore" &&
299 |         expr.child.type === "choice" &&
300 |         expr.child.children.every((child) => child.type === "identifier")
301 |     ) {
302 |         const choice = expr.child;
303 |         const validIdentifiers = choice.children
304 |             .map((child) => child.type === "identifier" && child.identifier)
305 |             .filter((x) => x);
306 |         let ptr = 0;
307 |         while (
308 |             ptr < items.length &&
309 |             validIdentifiers.some((id) => matchTest(id)(items[ptr]))
310 |         ) {
311 |             ++ptr;
312 |         }
313 |         return ptr;
314 |     }
315 |     return 0;
316 | };
317 | 
318 | export const acceptItems = <Item>(
319 |     expr: Expr,
320 |     items: Item[],
321 |     matchTest: IdentifierMatch<Item>
322 | ): number => {
323 |     const quickAcceptedItems = quickAcceptItems(expr, items, matchTest);
324 |     if (quickAcceptedItems > 0) {
325 |         return quickAcceptedItems;
326 |     }
327 |     const { startState, acceptState } = createAcceptanceMachine(
328 |         expr,
329 |         matchTest
330 |     );
331 |     const positions: SearchPosition<Item>[] = [
332 |         { state: startState, consumedItems: 0 },
333 |     ];
334 |     const discoveredPositions: SearchPosition<Item>[] = [];
335 |     let maxConsumedItems = 0;
336 | 
337 |     const maybePushPosition = (p: SearchPosition<Item>) => {
338 |         const hasAlreadyDiscoveredPosition = discoveredPositions.some(
339 |             (discoveredPosition) =>
340 |                 discoveredPosition.state === p.state &&
341 |                 discoveredPosition.consumedItems === p.consumedItems
342 |         );
343 |         if (!hasAlreadyDiscoveredPosition) {
344 |             positions.push(p);
345 |         }
346 |     };
347 | 
348 |     while (positions.length > 0) {
349 |         const position = positions.shift();
350 |         const { state, consumedItems: consumedItems } = position;
351 |         const currentItem = items[consumedItems];
352 |         const successors = state.getSuccessors(currentItem);
353 |         discoveredPositions.push(position);
354 |         if (state === acceptState) {
355 |             maxConsumedItems = Math.max(maxConsumedItems, consumedItems);
356 |         }
357 |         for (const successor of successors) {
358 |             const nextconsumedItems = state.consumesItem()
359 |                 ? consumedItems + 1
360 |                 : consumedItems;
361 |             maybePushPosition({
362 |                 state: successor,
363 |                 consumedItems: nextconsumedItems,
364 |             });
365 |         }
366 |     }
367 | 
368 |     return maxConsumedItems;
369 | };
370 | 


--------------------------------------------------------------------------------
/src/expression/heap.ts:
--------------------------------------------------------------------------------
 1 | // "Inspired" by https://eloquentjavascript.net/1st_edition/appendix2.html
 2 | 
 3 | type ScoreFn<T> = (t: T) => number;
 4 | 
 5 | export default class Heap<T> {
 6 |     private scoreFn: ScoreFn<T>;
 7 |     private content: T[];
 8 | 
 9 |     constructor(scoreFn: ScoreFn<T>, initialItems: T[] = []) {
10 |         this.scoreFn = scoreFn;
11 |         this.content = [];
12 |         for (const item of initialItems) {
13 |             this.push(item);
14 |         }
15 |     }
16 | 
17 |     push(element: T) {
18 |         this.content.push(element);
19 |         this.bubbleUp(this.content.length - 1);
20 |     }
21 | 
22 |     pop() {
23 |         const [result] = this.content;
24 |         const end = this.content.pop();
25 |         if (this.content.length > 0) {
26 |             this.content[0] = end;
27 |             this.sinkDown(0);
28 |         }
29 |         return result;
30 |     }
31 | 
32 |     length() {
33 |         return this.content.length;
34 |     }
35 | 
36 |     toArray() {
37 |         return [...this.content];
38 |     }
39 | 
40 |     private bubbleUp(index: number) {
41 |         const element = this.content[index];
42 |         const score = this.scoreFn(element);
43 |         while (index > 0) {
44 |             const parentIndex = Math.floor((index + 1) / 2) - 1;
45 |             const parent = this.content[parentIndex];
46 |             if (score >= this.scoreFn(parent)) {
47 |                 break;
48 |             }
49 |             this.content[parentIndex] = element;
50 |             this.content[index] = parent;
51 |             index = parentIndex;
52 |         }
53 |     }
54 | 
55 |     private sinkDown(index: number) {
56 |         const { length } = this.content;
57 |         const element = this.content[index];
58 |         const elemScore = this.scoreFn(element);
59 |         while (true) {
60 |             const child2N = (index + 1) * 2;
61 |             const child1N = child2N - 1;
62 |             let swap: null | number = null;
63 |             let child1Score;
64 |             if (child1N < length) {
65 |                 const child1 = this.content[child1N];
66 |                 child1Score = this.scoreFn(child1);
67 |                 if (child1Score < elemScore) {
68 |                     swap = child1N;
69 |                 }
70 |             }
71 |             if (child2N < length) {
72 |                 const child2 = this.content[child2N];
73 |                 const child2Score = this.scoreFn(child2);
74 |                 const thresholdScore = swap === null ? elemScore : child1Score;
75 |                 if (child2Score < thresholdScore) {
76 |                     swap = child2N;
77 |                 }
78 |             }
79 |             if (swap === null) {
80 |                 break;
81 |             }
82 |             this.content[index] = this.content[swap];
83 |             this.content[swap] = element;
84 |             index = swap;
85 |         }
86 |     }
87 | }
88 | 


--------------------------------------------------------------------------------
/src/expression/index.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * This package implements a regular expression-like language parser, and a finite state machine
 3 |  * generator for evaluating lists of items against such an expression. For example:
 4 |  *
 5 |  * const expr = parseExpr("(A | B)+ C")
 6 |  * const acceptedItemsCount = <string>acceptItems(
 7 |  *    expr,
 8 |  *    ['A', 'B', 'A', 'C', 'A'],
 9 |  *    id => str => id === str
10 |  * ) // === 4 because only the first four elements of the input array match the expression.
11 |  */
12 | 
13 | export * from "./acceptor";
14 | export * from "./parse";
15 | export * from "./types";
16 | export * from "./util";
17 | 


--------------------------------------------------------------------------------
/src/expression/parse.ts:
--------------------------------------------------------------------------------
 1 | import { Expr } from "./types";
 2 | 
 3 | // A simple recursive-descent parser to turn expressions like `(Space | Str)+` into syntax trees
 4 | export const parseExpr = (str: string): Expr => {
 5 |     str = str.trim();
 6 |     // Remove spaces around choice separators
 7 |     str = str.replace(/\s*\|\s*/g, "|");
 8 |     // Remove extraneous spaces
 9 |     str = str.replace(/\s+/g, " ");
10 | 
11 |     // Keep track of the separator type we have at this level. It should either be choice separators
12 |     // ("|") or sequential separators (" ").
13 |     let separator;
14 |     // Find separators
15 |     const separators = [];
16 |     // Keep track of open and close parens
17 |     let parenCount = 0;
18 |     // Keep track of open and close curlies
19 |     let curlyCount = 0;
20 |     for (let ptr = 0; ptr < str.length; ++ptr) {
21 |         const char = str.charAt(ptr);
22 |         if (char === "(") {
23 |             ++parenCount;
24 |         } else if (char === ")") {
25 |             --parenCount;
26 |         } else if (char === "{") {
27 |             ++curlyCount;
28 |         } else if (char === "}") {
29 |             --curlyCount;
30 |         } else if (
31 |             parenCount === 0 &&
32 |             curlyCount === 0 &&
33 |             (char === "|" || char === " ")
34 |         ) {
35 |             if (separator && separator !== char) {
36 |                 throw new Error(
37 |                     "Please surround mixed separators with parentheses!" +
38 |                         ` e.g. prefer '(Foo | Bar) Baz' over 'Foo | Bar Baz'. (at ${ptr}, parsing '${str}')`
39 |                 );
40 |             } else {
41 |                 separator = char;
42 |                 separators.push(ptr);
43 |             }
44 |         }
45 |     }
46 |     if (separators.length > 0) {
47 |         const separated: string[] = [];
48 |         let substring = "";
49 |         for (let ptr = 0; ptr < str.length; ++ptr) {
50 |             if (separators.includes(ptr)) {
51 |                 separated.push(substring);
52 |                 substring = "";
53 |             } else {
54 |                 substring += str.charAt(ptr);
55 |             }
56 |         }
57 |         if (substring.length > 0) {
58 |             separated.push(substring);
59 |         }
60 |         return {
61 |             type: separator === " " ? "sequence" : "choice",
62 |             children: separated.map(parseExpr),
63 |         };
64 |     } else if (str.endsWith("}")) {
65 |         let ptr = str.length - 1;
66 |         while (str.charAt(ptr) !== "{") {
67 |             ptr--;
68 |         }
69 |         const rangeStrs = str.slice(ptr + 1, str.length - 1).split(",");
70 |         const hasTwo = rangeStrs.length === 2;
71 |         const range = rangeStrs.map((str) => parseInt(str.trim()));
72 |         const [lowerBound, upperBound] = range;
73 |         return {
74 |             type: "range",
75 |             lowerBound,
76 |             upperBound: hasTwo
77 |                 ? isNaN(upperBound)
78 |                     ? null
79 |                     : upperBound
80 |                 : lowerBound,
81 |             child: parseExpr(str.slice(0, ptr)),
82 |         };
83 |     } else if (str.endsWith("+")) {
84 |         return {
85 |             type: "oneOrMore",
86 |             child: parseExpr(str.slice(0, str.length - 1)),
87 |         };
88 |     } else if (str.endsWith("*")) {
89 |         return {
90 |             type: "zeroOrMore",
91 |             child: parseExpr(str.slice(0, str.length - 1)),
92 |         };
93 |     } else if (str.startsWith("(") && str.endsWith(")")) {
94 |         return parseExpr(str.slice(1, str.length - 1));
95 |     }
96 |     return { type: "identifier", identifier: str };
97 | };
98 | 


--------------------------------------------------------------------------------
/src/expression/types.ts:
--------------------------------------------------------------------------------
 1 | export type Identifier = {
 2 |     type: "identifier";
 3 |     identifier: string;
 4 | };
 5 | 
 6 | export type OneOrMore = {
 7 |     type: "oneOrMore";
 8 |     child: Expr;
 9 | };
10 | 
11 | export type ZeroOrMore = {
12 |     type: "zeroOrMore";
13 |     child: Expr;
14 | };
15 | 
16 | export type Range = {
17 |     type: "range";
18 |     lowerBound: number;
19 |     upperBound: number | null;
20 |     child: Expr;
21 | };
22 | 
23 | export type Sequence = {
24 |     type: "sequence";
25 |     children: Expr[];
26 | };
27 | export type Choice = {
28 |     type: "choice";
29 |     children: Expr[];
30 | };
31 | 
32 | export type IdentifierMatch<Item> = (id: string) => (item: Item) => boolean;
33 | 
34 | export type Expr =
35 |     | Identifier
36 |     | OneOrMore
37 |     | ZeroOrMore
38 |     | Sequence
39 |     | Choice
40 |     | Range;
41 | 


--------------------------------------------------------------------------------
/src/expression/util.ts:
--------------------------------------------------------------------------------
 1 | import { Expr } from "./types";
 2 | 
 3 | export const exprAcceptsMultiple = (expr: Expr): boolean => {
 4 |     if (expr.type === "identifier") {
 5 |         return false;
 6 |     } else if (expr.type === "sequence") {
 7 |         return true;
 8 |     } else if (expr.type === "oneOrMore") {
 9 |         return true;
10 |     } else if (expr.type === "zeroOrMore") {
11 |         return true;
12 |     } else if (expr.type === "range") {
13 |         return expr.upperBound === null || expr.upperBound > 1;
14 |     } else if (expr.type === "choice") {
15 |         return expr.children.some((child) => exprAcceptsMultiple(child));
16 |     }
17 | };
18 | 
19 | export const exprWillAlwaysMatchSingleIdentifier = (expr: Expr, id: string) => {
20 |     if (expr.type === "identifier") {
21 |         return expr.identifier === id;
22 |     } else if (expr.type === "sequence") {
23 |         return false;
24 |     } else if (expr.type === "choice") {
25 |         return expr.children.some((child) =>
26 |             exprWillAlwaysMatchSingleIdentifier(child, id)
27 |         );
28 |     } else if (expr.type === "range") {
29 |         return (
30 |             expr.lowerBound === 1 &&
31 |             exprWillAlwaysMatchSingleIdentifier(expr.child, id)
32 |         );
33 |     } else {
34 |         return exprWillAlwaysMatchSingleIdentifier(expr.child, id);
35 |     }
36 | };
37 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
 1 | import * as transformUtils from "./transform/util";
 2 | import * as transformers from "./transform/transformers";
 3 | import * as pandocUtils from "./pandocUtils";
 4 | 
 5 | export { transformUtils, transformers, pandocUtils };
 6 | export { fromPandoc } from "./transform/fromPandoc";
 7 | export { fromProsemirror } from "./transform/fromProsemirror";
 8 | export { RuleSet } from "./transform/ruleset";
 9 | export { emitPandocJson } from "./emit";
10 | export { parsePandocJson } from "./parse";
11 | export { metaValueToString, metaValueToJsonSerializable } from "./meta";
12 | export { setPandocApiVersion } from "./config";
13 | export { callPandoc, callPandocWithFile } from "./util";
14 | 


--------------------------------------------------------------------------------
/src/meta.ts:
--------------------------------------------------------------------------------
  1 | import { MetaValue, Inline, Block } from "./types";
  2 | 
  3 | const extractStringFromInline = (item: Inline): string => {
  4 |     if (item.type === "Space") {
  5 |         return " ";
  6 |     }
  7 |     if ("content" in item) {
  8 |         if (
  9 |             item.type === "Str" ||
 10 |             item.type === "Code" ||
 11 |             item.type === "RawInline" ||
 12 |             item.type === "Math"
 13 |         ) {
 14 |             return item.content;
 15 |         } else {
 16 |             if (item.type === "Note") {
 17 |                 return "";
 18 |             } else {
 19 |                 return extractStringFromInlines(item.content);
 20 |             }
 21 |         }
 22 |     }
 23 |     return "";
 24 | };
 25 | 
 26 | const extractStringFromInlines = (inlines: Inline[]): string => {
 27 |     return inlines.map(extractStringFromInline).join("");
 28 | };
 29 | 
 30 | const extractStringFromBlock = (item: Block): string => {
 31 |     if (item.type === "Table") {
 32 |         return "";
 33 |     }
 34 |     if ("content" in item) {
 35 |         if (item.type === "RawBlock" || item.type === "CodeBlock") {
 36 |             return item.content;
 37 |         }
 38 |         if (
 39 |             item.type === "Para" ||
 40 |             item.type === "Plain" ||
 41 |             item.type === "Header"
 42 |         ) {
 43 |             return extractStringFromInlines(item.content);
 44 |         }
 45 |         if (item.type === "Div" || item.type === "BlockQuote") {
 46 |             return extractStringFromBlocks(item.content);
 47 |         }
 48 |         if (item.type === "LineBlock") {
 49 |             return item.content
 50 |                 .map((inlines) => extractStringFromInlines(inlines))
 51 |                 .join("\n");
 52 |         }
 53 |         return item.content
 54 |             .map((blocks) => extractStringFromBlocks(blocks))
 55 |             .join("\n");
 56 |     }
 57 | };
 58 | 
 59 | const extractStringFromBlocks = (blocks: Block[]): string => {
 60 |     return blocks.map(extractStringFromBlock).join("\n");
 61 | };
 62 | 
 63 | export const metaValueToString = (m: MetaValue): string => {
 64 |     if (m.type === "MetaString") {
 65 |         return m.content;
 66 |     }
 67 |     if (m.type === "MetaBool") {
 68 |         return m.content.toString();
 69 |     }
 70 |     if (m.type === "MetaBlocks") {
 71 |         return extractStringFromBlocks(m.content);
 72 |     }
 73 |     if (m.type === "MetaInlines") {
 74 |         return extractStringFromInlines(m.content);
 75 |     }
 76 |     if (m.type === "MetaList") {
 77 |         return m.content.map(metaValueToString).join(", ");
 78 |     }
 79 |     if (m.type === "MetaMap") {
 80 |         return Object.entries(m.values)
 81 |             .map(([key, value]) => `${key}: ${metaValueToString(value)}`)
 82 |             .join(", ");
 83 |     }
 84 |     return "";
 85 | };
 86 | 
 87 | export const metaValueToJsonSerializable = (m: MetaValue) => {
 88 |     if (m.type === "MetaBool") {
 89 |         return m.content;
 90 |     }
 91 |     if (m.type === "MetaList") {
 92 |         return m.content.map(metaValueToJsonSerializable);
 93 |     }
 94 |     if (m.type === "MetaMap") {
 95 |         const entries: [string, any][] = Object.entries(m.values).map(
 96 |             ([key, value]) => {
 97 |                 return [key, metaValueToJsonSerializable(value)];
 98 |             }
 99 |         );
100 |         const res: { [key: string]: any } = {};
101 |         entries.forEach((entry) => {
102 |             const [key, value] = entry;
103 |             res[key] = value;
104 |         });
105 |         return res;
106 |     }
107 |     return metaValueToString(m);
108 | };
109 | 


--------------------------------------------------------------------------------
/src/pandocUtils.ts:
--------------------------------------------------------------------------------
 1 | import { Inline, Doc, Plain, Para, Block } from "./types";
 2 | import { callPandoc } from "./util";
 3 | import { emitPandocJson } from "./emit";
 4 | import { parsePandocJson } from "./parse";
 5 | import { flatten } from "./transform/util";
 6 | 
 7 | export const getOutputStringForPandocDoc = (
 8 |     document: Doc,
 9 |     format: string
10 | ): string =>
11 |     callPandoc(JSON.stringify(emitPandocJson(document)), "json", format).trim();
12 | 
13 | export const getPandocDocForInputString = (
14 |     input: string,
15 |     format: string
16 | ): Doc => {
17 |     if (!input) {
18 |         return { type: "Doc", blocks: [], meta: {} };
19 |     }
20 |     return parsePandocJson(JSON.parse(callPandoc(input, format, "json")));
21 | };
22 | 
23 | export const getHtmlStringForPandocDoc = (document: Doc): string =>
24 |     getOutputStringForPandocDoc(document, "html");
25 | 
26 | export const getPandocDocForHtmlString = (htmlString: string): Doc =>
27 |     getPandocDocForInputString(htmlString, "html");
28 | 
29 | export const pandocBlocksToOutputString = (blocks: Block[], format: string) => {
30 |     if (blocks.length === 0) {
31 |         return "";
32 |     }
33 |     const document: Doc = {
34 |         type: "Doc",
35 |         blocks,
36 |         meta: {},
37 |     };
38 |     return getOutputStringForPandocDoc(document, format);
39 | };
40 | 
41 | export const pandocInlineToOutputString = (
42 |     content: Inline[],
43 |     format: string
44 | ) => {
45 |     return pandocBlocksToOutputString([{ type: "Para", content }], format);
46 | };
47 | 
48 | export const pandocInlineToHtmlString = (nodes: Inline[]) =>
49 |     pandocInlineToOutputString(nodes, "html");
50 | 
51 | export const pandocInlineToPlainString = (nodes: Inline[]) =>
52 |     pandocInlineToOutputString(nodes, "plain");
53 | 
54 | export const pandocBlocksToHtmlString = (blocks: Block[]) => {
55 |     if (blocks.length === 0) {
56 |         return "";
57 |     }
58 |     const document: Doc = {
59 |         type: "Doc",
60 |         blocks,
61 |         meta: {},
62 |     };
63 |     return getHtmlStringForPandocDoc(document);
64 | };
65 | 
66 | export const htmlStringToPandocInline = (htmlString: string): Inline[] => {
67 |     if (!htmlString) {
68 |         return [];
69 |     }
70 |     const pandocAst = getPandocDocForHtmlString(htmlString);
71 |     return flatten(
72 |         (
73 |             pandocAst.blocks.filter(
74 |                 (block) => block.type === "Plain" || block.type === "Para"
75 |             ) as (Plain | Para)[]
76 |         ).map((block) => block.content)
77 |     );
78 | };
79 | 
80 | export const htmlStringToPandocBlocks = (htmlString: string): Block[] => {
81 |     if (!htmlString) {
82 |         return [];
83 |     }
84 |     const pandocAst = getPandocDocForHtmlString(htmlString);
85 |     return pandocAst.blocks;
86 | };
87 | 


--------------------------------------------------------------------------------
/src/parse.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |     Alignment,
  3 |     Attr,
  4 |     Block,
  5 |     BlockQuote,
  6 |     BulletList,
  7 |     Caption,
  8 |     Cell,
  9 |     CitationMode,
 10 |     Cite,
 11 |     Code,
 12 |     CodeBlock,
 13 |     ColSpec,
 14 |     DefinitionList,
 15 |     Div,
 16 |     Doc,
 17 |     Format,
 18 |     Header,
 19 |     Image,
 20 |     Inline,
 21 |     LineBlock,
 22 |     Link,
 23 |     ListAttributes,
 24 |     ListNumberDelim,
 25 |     ListNumberStyle,
 26 |     Math,
 27 |     MathType,
 28 |     MetaBlocks,
 29 |     MetaBool,
 30 |     MetaInlines,
 31 |     MetaList,
 32 |     MetaMap,
 33 |     MetaString,
 34 |     MetaValue,
 35 |     Note,
 36 |     OrderedList,
 37 |     PandocJson,
 38 |     Para,
 39 |     Plain,
 40 |     Quoted,
 41 |     QuoteType,
 42 |     RawBlock,
 43 |     RawInline,
 44 |     Row,
 45 |     SimpleInline,
 46 |     Span,
 47 |     Str,
 48 |     Table,
 49 |     TableBody,
 50 |     TableFoot,
 51 |     TableHead,
 52 |     Target,
 53 | } from "./types";
 54 | 
 55 | const unwrapEnum = <T>(instance: { t: T }): T => {
 56 |     return instance.t;
 57 | };
 58 | 
 59 | const unwrapAttr = (attr: [string, string[], [string, string][]]): Attr => {
 60 |     const [identifier, classes, propertiesList] = attr;
 61 |     const properties = {};
 62 |     propertiesList.forEach(([key, value]) => {
 63 |         properties[key] = value;
 64 |     });
 65 |     return {
 66 |         identifier,
 67 |         classes,
 68 |         properties,
 69 |     };
 70 | };
 71 | 
 72 | const unwrapTarget = (target: [string, string]): Target => {
 73 |     const [url, title] = target;
 74 |     return {
 75 |         url,
 76 |         title,
 77 |     };
 78 | };
 79 | 
 80 | const unwrapFormat = (format: any): Format => {
 81 |     // TODO(ian): Figure out what to do here
 82 |     return format;
 83 | };
 84 | 
 85 | const unwrapListAttributes = (
 86 |     listAttributes: [number, any, any]
 87 | ): ListAttributes => {
 88 |     const [startNumber = 1, listNumberStyle, listNumberDelim] = listAttributes;
 89 |     return {
 90 |         startNumber,
 91 |         listNumberStyle: unwrapEnum<ListNumberStyle>(listNumberStyle),
 92 |         listNumberDelim: unwrapEnum<ListNumberDelim>(listNumberDelim),
 93 |     };
 94 | };
 95 | 
 96 | const parseAtom = (n: { t }) => {
 97 |     return { type: n.t };
 98 | };
 99 | 
100 | const parseStr = (n: { c: string }): Str => {
101 |     const string = n.c;
102 |     return {
103 |         type: "Str",
104 |         content: string,
105 |     };
106 | };
107 | 
108 | const parseSimpleInline = (
109 |     n: {
110 |         c: any[];
111 |     },
112 |     nodeType: SimpleInline["type"]
113 | ): SimpleInline => {
114 |     const inline = n.c;
115 |     return {
116 |         type: nodeType,
117 |         content: inline.map(parseInline),
118 |     };
119 | };
120 | 
121 | const parseQuoted = (n: { c: [any, any[]] }): Quoted => {
122 |     const [quoteType, inline] = n.c;
123 |     return {
124 |         type: "Quoted",
125 |         quoteType: unwrapEnum<QuoteType>(quoteType),
126 |         content: inline.map(parseInline),
127 |     };
128 | };
129 | 
130 | const parseCite = (n: { c: [any[], any[]] }): Cite => {
131 |     const [citations, inline] = n.c;
132 |     return {
133 |         type: "Cite",
134 |         citations: citations.map((citation) => {
135 |             const {
136 |                 citationHash,
137 |                 citationId,
138 |                 citationMode,
139 |                 citationNoteNum,
140 |                 citationPrefix,
141 |                 citationSuffix,
142 |             } = citation;
143 |             return {
144 |                 citationHash,
145 |                 citationId,
146 |                 citationMode: unwrapEnum<CitationMode>(citationMode),
147 |                 citationNoteNum,
148 |                 citationPrefix: citationPrefix.map(parseInline),
149 |                 citationSuffix: citationSuffix.map(parseInline),
150 |             };
151 |         }),
152 |         content: inline.map(parseInline),
153 |     };
154 | };
155 | 
156 | const parseCode = (n: { c: [any, string] }): Code => {
157 |     const [attr, code] = n.c;
158 |     return {
159 |         type: "Code",
160 |         attr: unwrapAttr(attr),
161 |         content: code,
162 |     };
163 | };
164 | 
165 | const parseMath = (n: { c: [any, string] }): Math => {
166 |     const [mathType, content] = n.c;
167 |     return {
168 |         type: "Math",
169 |         mathType: unwrapEnum<MathType>(mathType),
170 |         content: content,
171 |     };
172 | };
173 | 
174 | const parseRawInline = (n: { c: [any, string] }): RawInline => {
175 |     const [format, content] = n.c;
176 |     return {
177 |         type: "RawInline",
178 |         format: unwrapFormat(format),
179 |         content,
180 |     };
181 | };
182 | 
183 | const parseImage = (n: { c: [any, any[], any] }): Image => {
184 |     const [attr, inline, target] = n.c;
185 |     return {
186 |         type: "Image",
187 |         attr: unwrapAttr(attr),
188 |         content: inline.map(parseInline),
189 |         target: unwrapTarget(target),
190 |     };
191 | };
192 | 
193 | const parseLink = (n: { c: [any, any[], any] }): Link => {
194 |     const [attr, inline, target] = n.c;
195 |     return {
196 |         type: "Link",
197 |         attr: unwrapAttr(attr),
198 |         content: inline.map(parseInline),
199 |         target: unwrapTarget(target),
200 |     };
201 | };
202 | 
203 | const parseNote = (n: { c: any[] }): Note => {
204 |     const blocks = n.c;
205 |     return {
206 |         type: "Note",
207 |         content: blocks.map(parseBlock),
208 |     };
209 | };
210 | 
211 | const parseSpan = (n: { c: [any, any[]] }): Span => {
212 |     const [attr, inline] = n.c;
213 |     return {
214 |         type: "Span",
215 |         attr: unwrapAttr(attr),
216 |         content: inline.map(parseInline),
217 |     };
218 | };
219 | 
220 | export const parseInline = (n: { t: Inline["type"]; c: any }): Inline => {
221 |     switch (n.t) {
222 |         case "Str":
223 |             return parseStr(n);
224 |         case "Emph":
225 |         case "Strong":
226 |         case "Underline":
227 |         case "Strikeout":
228 |         case "Superscript":
229 |         case "Subscript":
230 |         case "SmallCaps":
231 |             return parseSimpleInline(n, n.t);
232 |         case "Quoted":
233 |             return parseQuoted(n);
234 |         case "Cite":
235 |             return parseCite(n);
236 |         case "Code":
237 |             return parseCode(n);
238 |         case "Space":
239 |         case "SoftBreak":
240 |         case "LineBreak":
241 |             return parseAtom(n);
242 |         case "Math":
243 |             return parseMath(n);
244 |         case "RawInline":
245 |             return parseRawInline(n);
246 |         case "Link":
247 |             return parseLink(n);
248 |         case "Image":
249 |             return parseImage(n);
250 |         case "Note":
251 |             return parseNote(n);
252 |         case "Span":
253 |             return parseSpan(n);
254 |     }
255 | };
256 | 
257 | const parsePlain = (n: { c: any[] }): Plain => {
258 |     const inline = n.c;
259 |     return {
260 |         type: "Plain",
261 |         content: inline.map(parseInline),
262 |     };
263 | };
264 | 
265 | const parsePara = (n: { c: any[] }): Para => {
266 |     const inline = n.c;
267 |     return {
268 |         type: "Para",
269 |         content: inline.map(parseInline),
270 |     };
271 | };
272 | 
273 | const parseLineBlock = (n: { c: any[][] }): LineBlock => {
274 |     const lines = n.c;
275 |     return {
276 |         type: "LineBlock",
277 |         content: lines.map((line) => line.map((inline) => parseInline(inline))),
278 |     };
279 | };
280 | 
281 | const parseCodeBlock = (n: { c: [any, string] }): CodeBlock => {
282 |     const [attr, content] = n.c;
283 |     return {
284 |         type: "CodeBlock",
285 |         attr: unwrapAttr(attr),
286 |         content,
287 |     };
288 | };
289 | 
290 | const parseRawBlock = (n: { c: [any, string] }): RawBlock => {
291 |     const [format, content] = n.c;
292 |     return {
293 |         type: "RawBlock",
294 |         format: unwrapFormat(format),
295 |         content,
296 |     };
297 | };
298 | 
299 | const parseBlockQuote = (n: { c: any[] }): BlockQuote => {
300 |     const blocks = n.c;
301 |     return {
302 |         type: "BlockQuote",
303 |         content: blocks.map(parseBlock),
304 |     };
305 | };
306 | 
307 | const parseOrderedList = (n: { c: [any, any[][]] }): OrderedList => {
308 |     const [listAttributes, items] = n.c;
309 |     return {
310 |         type: "OrderedList",
311 |         listAttributes: unwrapListAttributes(listAttributes),
312 |         content: items.map((item) => item.map(parseBlock)),
313 |     };
314 | };
315 | 
316 | const parseBulletList = (n: { c: any[][] }): BulletList => {
317 |     const items = n.c;
318 |     return {
319 |         type: "BulletList",
320 |         content: items.map((item) => item.map(parseBlock)),
321 |     };
322 | };
323 | 
324 | const parseDefinitionList = (n: { c: [any[], any[][]][] }): DefinitionList => {
325 |     const items = n.c;
326 |     const entries = items.map((item) => {
327 |         const [term, definitions] = item;
328 |         return {
329 |             term: term.map(parseInline),
330 |             definitions: definitions.map((definition) =>
331 |                 definition.map(parseBlock)
332 |             ),
333 |         };
334 |     });
335 |     return {
336 |         type: "DefinitionList",
337 |         entries,
338 |     };
339 | };
340 | 
341 | const parseHeader = (n: { c: [number, any, any[]] }): Header => {
342 |     const [level, attr, inline] = n.c;
343 |     return {
344 |         type: "Header",
345 |         level,
346 |         attr: unwrapAttr(attr),
347 |         content: inline.map(parseInline),
348 |     };
349 | };
350 | 
351 | const parseDiv = (n: { c: [any, any[]] }): Div => {
352 |     const [attr, blocks] = n.c;
353 |     return {
354 |         type: "Div",
355 |         attr: unwrapAttr(attr),
356 |         content: blocks.map(parseBlock),
357 |     };
358 | };
359 | 
360 | const parseCell = (n: [any, any, any, any, any[]]): Cell => {
361 |     const [attr, alignment, rowSpan, colSpan, blocks] = n;
362 |     return {
363 |         type: "Cell",
364 |         attr: unwrapAttr(attr),
365 |         alignment: unwrapEnum<Alignment>(alignment),
366 |         rowSpan,
367 |         colSpan,
368 |         content: blocks.map(parseBlock),
369 |     };
370 | };
371 | 
372 | const parseRow = (n: [any, any[]]): Row => {
373 |     const [attr, cells] = n;
374 |     return {
375 |         type: "Row",
376 |         attr: unwrapAttr(attr),
377 |         cells: cells.map(parseCell),
378 |     };
379 | };
380 | 
381 | const parseTableHead = (n: [any, any[]]): TableHead => {
382 |     const [attr, rows] = n;
383 |     return {
384 |         type: "TableHead",
385 |         attr: unwrapAttr(attr),
386 |         rows: rows.map(parseRow),
387 |     };
388 | };
389 | 
390 | const parseTableFoot = (n: [any, any[]]): TableFoot => {
391 |     const [attr, rows] = n;
392 |     return {
393 |         type: "TableFoot",
394 |         attr: unwrapAttr(attr),
395 |         rows: rows.map(parseRow),
396 |     };
397 | };
398 | 
399 | const parseTableBody = (n: [any, any, any[], any[]]): TableBody => {
400 |     const [attr, rowHeadColumns, head, body] = n;
401 |     return {
402 |         type: "TableBody",
403 |         rowHeadColumns,
404 |         attr: unwrapAttr(attr),
405 |         headRows: head.map(parseRow),
406 |         bodyRows: body.map(parseRow),
407 |     };
408 | };
409 | 
410 | const parseColSpec = (n: [any, any]): ColSpec => {
411 |     const [alignment, colWidth] = n;
412 |     const base = {
413 |         type: "ColSpec" as const,
414 |         alignment: unwrapEnum<Alignment>(alignment),
415 |     };
416 |     if (colWidth.t === "ColWidthDefault") {
417 |         return { ...base, defaultWidth: true };
418 |     }
419 |     return { ...base, width: colWidth.c };
420 | };
421 | 
422 | const parseCaption = (n: [null | any[], any[]]): Caption => {
423 |     const [shortCaption, content] = n;
424 |     const baseCaption: Caption = {
425 |         type: "Caption",
426 |         content: content.map(parseBlock),
427 |     };
428 |     if (shortCaption) {
429 |         return {
430 |             ...baseCaption,
431 |             shortCaption: shortCaption.map(parseInline),
432 |         };
433 |     }
434 |     return baseCaption;
435 | };
436 | 
437 | const parseTable = (n: { c: [any, any, any[], any, any[], any] }): Table => {
438 |     const [attr, caption, colSpecs, head, bodies, foot] = n.c;
439 |     return {
440 |         type: "Table",
441 |         attr: unwrapAttr(attr),
442 |         caption: parseCaption(caption),
443 |         colSpecs: colSpecs.map(parseColSpec),
444 |         head: parseTableHead(head),
445 |         bodies: bodies.map(parseTableBody),
446 |         foot: parseTableFoot(foot),
447 |     };
448 | };
449 | 
450 | export const parseBlock = (n: { t: Block["type"]; c: any }): Block => {
451 |     switch (n.t) {
452 |         case "Plain":
453 |             return parsePlain(n);
454 |         case "Para":
455 |             return parsePara(n);
456 |         case "LineBlock":
457 |             return parseLineBlock(n);
458 |         case "CodeBlock":
459 |             return parseCodeBlock(n);
460 |         case "RawBlock":
461 |             return parseRawBlock(n);
462 |         case "BlockQuote":
463 |             return parseBlockQuote(n);
464 |         case "OrderedList":
465 |             return parseOrderedList(n);
466 |         case "BulletList":
467 |             return parseBulletList(n);
468 |         case "DefinitionList":
469 |             return parseDefinitionList(n);
470 |         case "Header":
471 |             return parseHeader(n);
472 |         case "HorizontalRule":
473 |         case "Null":
474 |             return parseAtom(n);
475 |         case "Div":
476 |             return parseDiv(n);
477 |         case "Table":
478 |             return parseTable(n);
479 |     }
480 | };
481 | 
482 | const parseMetaMap = (n: { c: { [key: string]: any } }): MetaMap => {
483 |     const values = {};
484 |     Object.entries(n.c).forEach(([key, value]) => {
485 |         values[key] = parseMetaValue(value);
486 |     });
487 |     return { type: "MetaMap", values };
488 | };
489 | 
490 | const parseMetaList = (n: { c: any[] }): MetaList => {
491 |     return {
492 |         type: "MetaList",
493 |         content: n.c.map(parseMetaValue),
494 |     };
495 | };
496 | 
497 | const parseMetaBool = (n: { c: boolean }): MetaBool => {
498 |     return {
499 |         type: "MetaBool",
500 |         content: n.c,
501 |     };
502 | };
503 | 
504 | const parseMetaString = (n: { c: string }): MetaString => {
505 |     return {
506 |         type: "MetaString",
507 |         content: n.c,
508 |     };
509 | };
510 | 
511 | const parseMetaInlines = (n: { c: any[] }): MetaInlines => {
512 |     return {
513 |         type: "MetaInlines",
514 |         content: n.c.map(parseInline),
515 |     };
516 | };
517 | 
518 | const parseMetaBlocks = (n: { c: any[] }): MetaBlocks => {
519 |     return {
520 |         type: "MetaBlocks",
521 |         content: n.c.map(parseBlock),
522 |     };
523 | };
524 | 
525 | const parseMetaValue = (n: { t: string; c: any }): MetaValue => {
526 |     switch (n.t) {
527 |         case "MetaMap":
528 |             return parseMetaMap(n);
529 |         case "MetaList":
530 |             return parseMetaList(n);
531 |         case "MetaBool":
532 |             return parseMetaBool(n);
533 |         case "MetaString":
534 |             return parseMetaString(n);
535 |         case "MetaInlines":
536 |             return parseMetaInlines(n);
537 |         case "MetaBlocks":
538 |             return parseMetaBlocks(n);
539 |     }
540 | };
541 | 
542 | const parseMeta = (meta: PandocJson["meta"]) => {
543 |     const parsedMeta: Record<string, MetaValue> = {};
544 |     Object.entries(meta).forEach(([key, value]) => {
545 |         parsedMeta[key] = parseMetaValue(value);
546 |     });
547 |     return parsedMeta;
548 | };
549 | 
550 | export const parsePandocJson = (json: PandocJson): Doc => {
551 |     const { meta, blocks } = json;
552 |     return {
553 |         type: "Doc",
554 |         blocks: blocks.map(parseBlock),
555 |         meta: parseMeta(meta),
556 |     };
557 | };
558 | 


--------------------------------------------------------------------------------
/src/transform/__tests__/util.test.ts:
--------------------------------------------------------------------------------
 1 | /* global describe, it, expect */
 2 | import { flatten } from "../util";
 3 | 
 4 | describe("flatten", () => {
 5 |     it("turns a non-array input into an array with a single element", () => {
 6 |         expect(flatten(1)).toEqual([1]);
 7 |     });
 8 | 
 9 |     it("handles a flat array by returning an element-wise identical array", () => {
10 |         expect(flatten([1, 2, 3])).toEqual([1, 2, 3]);
11 |     });
12 | 
13 |     it("flattens an array two layers deep", () => {
14 |         expect(flatten([1, [2, 3], 4, [5], 6])).toEqual([1, 2, 3, 4, 5, 6]);
15 |     });
16 | 
17 |     it("flattens an array many layers deep", () => {
18 |         expect(flatten([1, [2, [3]], 4, [[5], 6], [7, [8, 9]]])).toEqual([
19 |             1, 2, 3, 4, 5, 6, 7, 8, 9,
20 |         ]);
21 |     });
22 | });
23 | 


--------------------------------------------------------------------------------
/src/transform/fluent.ts:
--------------------------------------------------------------------------------
 1 | import { asNode, asArray } from "./util";
 2 | import { ProsemirrorNode, PandocNode } from "../types";
 3 | 
 4 | const FLUENT_SYMBOL = Symbol();
 5 | 
 6 | export type Fluent<T> = {
 7 |     asArray: () => T[];
 8 |     asNode: () => T;
 9 |     fluent: typeof FLUENT_SYMBOL;
10 | };
11 | 
12 | const isFluent = <T>(item: any): item is Fluent<T> =>
13 |     "fluent" in item && item.fluent === FLUENT_SYMBOL;
14 | 
15 | export const fluent = <T extends ProsemirrorNode | PandocNode>(
16 |     item: T | T[] | Fluent<T>
17 | ): Fluent<T> => {
18 |     if (isFluent<T>(item)) {
19 |         return item;
20 |     } else {
21 |         return {
22 |             asArray: () => asArray(item),
23 |             asNode: () => asNode(item),
24 |             fluent: FLUENT_SYMBOL,
25 |         };
26 |     }
27 | };
28 | 


--------------------------------------------------------------------------------
/src/transform/fromPandoc/__tests__/__snapshots__/heal.test.ts.snap:
--------------------------------------------------------------------------------
1 | // Jest Snapshot v1, https://goo.gl/fbAQLP
2 | 
3 | exports[`healNaiveTokenList heals a real-world example of a misplaced image element 1`] = `"O(doc) O*(bullet_list) O(list_item) O(paragraph) O(text) C(text) C(paragraph) C(list_item) O*(list_item) C(list_item) C(bullet_list) O(image) C(image) O*(bullet_list) O*(list_item) C(list_item) O(list_item) O(paragraph) O(text) C(text) C(paragraph) C(list_item) C(bullet_list) C(doc)"`;
4 | 
5 | exports[`healNaiveTokenList heals a real-world example of a misplaced image element that is less dire 1`] = `"O(doc) O(bullet_list) O(list_item) O(paragraph) O(text) C(text) C(paragraph) O*(paragraph) O(text) C(text) C(paragraph) O(image) C(image) O*(paragraph) O(text) C(text) C(paragraph) C(list_item) O(list_item) O(paragraph) O(text) C(text) C(paragraph) C(list_item) C(bullet_list) C(doc)"`;
6 | 


--------------------------------------------------------------------------------
/src/transform/fromPandoc/__tests__/heal.test.ts:
--------------------------------------------------------------------------------
  1 | /* global describe, it, expect */
  2 | import { ProsemirrorSchema } from "../../../types";
  3 | import { prosemirrorSchema } from "../../../example/schema";
  4 | 
  5 | import { getNaiveTokenList, healNaiveTokenList, Token, heal } from "../heal";
  6 | 
  7 | const toyProsemirrorSchema: ProsemirrorSchema = new ProsemirrorSchema({
  8 |     nodes: {
  9 |         A: {
 10 |             defining: true,
 11 |             content: "B*",
 12 |         },
 13 |         B: {
 14 |             defining: true,
 15 |             content: "(C|D)*",
 16 |         },
 17 |         C: {
 18 |             defining: true,
 19 |             content: "D*",
 20 |         },
 21 |         D: {
 22 |             defining: true,
 23 |             content: "text*",
 24 |         },
 25 |         text: {
 26 |             inline: true,
 27 |             group: "inline",
 28 |         },
 29 |     },
 30 |     marks: {},
 31 |     topNode: "A",
 32 | });
 33 | 
 34 | const stringify = (tokens: Token[]) =>
 35 |     tokens
 36 |         .map((t: Token) => {
 37 |             const prefix = t.type === "open" ? "O" : "C";
 38 |             const maybeAsterisk =
 39 |                 t.type === "open" && t.createdFromSplit ? "*" : "";
 40 |             return `${prefix}${maybeAsterisk}(${t.node.type})`;
 41 |         })
 42 |         .join(" ");
 43 | 
 44 | describe("getNaiveTokenList", () => {
 45 |     it("tokenizes a Prosemirror tree", () => {
 46 |         expect(
 47 |             stringify(
 48 |                 getNaiveTokenList({
 49 |                     type: "A",
 50 |                     content: [
 51 |                         {
 52 |                             type: "B",
 53 |                             content: [{ type: "C", content: [{ type: "D" }] }],
 54 |                         },
 55 |                     ],
 56 |                 })
 57 |             )
 58 |         ).toEqual("O(A) O(B) O(C) O(D) C(D) C(C) C(B) C(A)");
 59 |     });
 60 | });
 61 | 
 62 | describe("healNaiveTokenList", () => {
 63 |     it("passes through a valid tree", () => {
 64 |         expect(
 65 |             stringify(
 66 |                 healNaiveTokenList(
 67 |                     getNaiveTokenList({
 68 |                         type: "A",
 69 |                         content: [
 70 |                             {
 71 |                                 type: "B",
 72 |                                 content: [
 73 |                                     { type: "D" },
 74 |                                     {
 75 |                                         type: "C",
 76 |                                         content: [{ type: "D" }, { type: "D" }],
 77 |                                     },
 78 |                                 ],
 79 |                             },
 80 |                         ],
 81 |                     }),
 82 |                     toyProsemirrorSchema
 83 |                 )
 84 |             )
 85 |         ).toEqual(
 86 |             "O(A) O(B) O(D) C(D) O(C) O(D) C(D) O(D) C(D) C(C) C(B) C(A)"
 87 |         );
 88 |     });
 89 | 
 90 |     it("heals an invalid Prosemirror tree", () => {
 91 |         const naiveList = getNaiveTokenList({
 92 |             type: "A",
 93 |             content: [{ type: "B", content: [{ type: "B" }] }],
 94 |         });
 95 |         expect(
 96 |             stringify(healNaiveTokenList(naiveList, toyProsemirrorSchema))
 97 |         ).toEqual("O(A) O*(B) C(B) O(B) C(B) O*(B) C(B) C(A)");
 98 |     });
 99 | 
100 |     it("heals a more complicated invalid Prosemirror tree", () => {
101 |         const naiveList = getNaiveTokenList({
102 |             type: "A",
103 |             content: [
104 |                 {
105 |                     type: "B",
106 |                     content: [
107 |                         { type: "B" },
108 |                         { type: "D", content: [{ type: "C" }] },
109 |                     ],
110 |                 },
111 |             ],
112 |         });
113 |         expect(
114 |             stringify(healNaiveTokenList(naiveList, toyProsemirrorSchema))
115 |         ).toEqual(
116 |             "O(A) O*(B) C(B) O(B) C(B) O*(B) O*(D) C(D) O(C) C(C) O*(D) C(D) C(B) C(A)"
117 |         );
118 |     });
119 | 
120 |     it("heals a real-world example of a misplaced image element", () => {
121 |         const naiveList = getNaiveTokenList({
122 |             type: "doc",
123 |             content: [
124 |                 {
125 |                     type: "bullet_list",
126 |                     content: [
127 |                         {
128 |                             type: "list_item",
129 |                             content: [
130 |                                 {
131 |                                     type: "paragraph",
132 |                                     content: [{ type: "text" }],
133 |                                 },
134 |                             ],
135 |                         },
136 |                         { type: "list_item", content: [{ type: "image" }] },
137 |                         {
138 |                             type: "list_item",
139 |                             content: [
140 |                                 {
141 |                                     type: "paragraph",
142 |                                     content: [{ type: "text" }],
143 |                                 },
144 |                             ],
145 |                         },
146 |                     ],
147 |                 },
148 |             ],
149 |         });
150 |         expect(
151 |             stringify(healNaiveTokenList(naiveList, prosemirrorSchema))
152 |         ).toMatchSnapshot();
153 |     });
154 | 
155 |     it("heals a real-world example of a misplaced image element that is less dire", () => {
156 |         const naiveList = getNaiveTokenList({
157 |             type: "doc",
158 |             content: [
159 |                 {
160 |                     type: "bullet_list",
161 |                     content: [
162 |                         {
163 |                             type: "list_item",
164 |                             content: [
165 |                                 {
166 |                                     type: "paragraph",
167 |                                     content: [{ type: "text" }],
168 |                                 },
169 |                                 {
170 |                                     type: "paragraph",
171 |                                     content: [
172 |                                         { type: "text" },
173 |                                         { type: "image" },
174 |                                         { type: "text" },
175 |                                     ],
176 |                                 },
177 |                             ],
178 |                         },
179 |                         {
180 |                             type: "list_item",
181 |                             content: [
182 |                                 {
183 |                                     type: "paragraph",
184 |                                     content: [{ type: "text" }],
185 |                                 },
186 |                             ],
187 |                         },
188 |                     ],
189 |                 },
190 |             ],
191 |         });
192 |         expect(
193 |             stringify(healNaiveTokenList(naiveList, prosemirrorSchema))
194 |         ).toMatchSnapshot();
195 |     });
196 | });
197 | 
198 | describe("heal", () => {
199 |     it("heals an improper Prosemirror document", () => {
200 |         expect(
201 |             heal(
202 |                 {
203 |                     type: "doc",
204 |                     content: [
205 |                         {
206 |                             type: "bullet_list",
207 |                             content: [
208 |                                 {
209 |                                     type: "list_item",
210 |                                     content: [
211 |                                         {
212 |                                             type: "paragraph",
213 |                                             content: [{ type: "text" }],
214 |                                         },
215 |                                     ],
216 |                                 },
217 |                                 {
218 |                                     type: "list_item",
219 |                                     content: [{ type: "image" }],
220 |                                 },
221 |                                 {
222 |                                     type: "list_item",
223 |                                     content: [
224 |                                         {
225 |                                             type: "paragraph",
226 |                                             content: [{ type: "text" }],
227 |                                         },
228 |                                     ],
229 |                                 },
230 |                             ],
231 |                         },
232 |                     ],
233 |                 },
234 |                 prosemirrorSchema
235 |             )
236 |         ).toEqual({
237 |             type: "doc",
238 |             content: [
239 |                 {
240 |                     type: "bullet_list",
241 |                     content: [
242 |                         {
243 |                             type: "list_item",
244 |                             content: [
245 |                                 {
246 |                                     type: "paragraph",
247 |                                     content: [{ type: "text" }],
248 |                                 },
249 |                             ],
250 |                         },
251 |                     ],
252 |                 },
253 |                 { type: "image" },
254 |                 {
255 |                     type: "bullet_list",
256 |                     content: [
257 |                         {
258 |                             type: "list_item",
259 |                             content: [
260 |                                 {
261 |                                     type: "paragraph",
262 |                                     content: [{ type: "text" }],
263 |                                 },
264 |                             ],
265 |                         },
266 |                     ],
267 |                 },
268 |             ],
269 |         });
270 |     });
271 | 
272 |     it("heals a broken Prosemirror document that requires less intervention", () => {
273 |         expect(
274 |             heal(
275 |                 {
276 |                     type: "doc",
277 |                     content: [
278 |                         {
279 |                             type: "bullet_list",
280 |                             content: [
281 |                                 {
282 |                                     type: "list_item",
283 |                                     content: [
284 |                                         {
285 |                                             type: "paragraph",
286 |                                             content: [{ type: "text" }],
287 |                                         },
288 |                                         {
289 |                                             type: "paragraph",
290 |                                             content: [
291 |                                                 { type: "text" },
292 |                                                 { type: "image" },
293 |                                                 { type: "text" },
294 |                                             ],
295 |                                         },
296 |                                     ],
297 |                                 },
298 |                                 {
299 |                                     type: "list_item",
300 |                                     content: [
301 |                                         {
302 |                                             type: "paragraph",
303 |                                             content: [{ type: "text" }],
304 |                                         },
305 |                                     ],
306 |                                 },
307 |                             ],
308 |                         },
309 |                     ],
310 |                 },
311 |                 prosemirrorSchema
312 |             )
313 |         ).toEqual({
314 |             type: "doc",
315 |             content: [
316 |                 {
317 |                     type: "bullet_list",
318 |                     content: [
319 |                         {
320 |                             type: "list_item",
321 |                             content: [
322 |                                 {
323 |                                     type: "paragraph",
324 |                                     content: [{ type: "text" }],
325 |                                 },
326 |                                 {
327 |                                     type: "paragraph",
328 |                                     content: [{ type: "text" }],
329 |                                 },
330 |                                 { type: "image" },
331 |                                 {
332 |                                     type: "paragraph",
333 |                                     content: [{ type: "text" }],
334 |                                 },
335 |                             ],
336 |                         },
337 |                         {
338 |                             type: "list_item",
339 |                             content: [
340 |                                 {
341 |                                     type: "paragraph",
342 |                                     content: [{ type: "text" }],
343 |                                 },
344 |                             ],
345 |                         },
346 |                     ],
347 |                 },
348 |             ],
349 |         });
350 |     });
351 | });
352 | 


--------------------------------------------------------------------------------
/src/transform/fromPandoc/fromPandoc.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |     PandocNode,
  3 |     ProsemirrorNode,
  4 |     ProsemirrorMark,
  5 |     Inline,
  6 |     Block,
  7 | } from "types";
  8 | 
  9 | import { asArray, makeCounter } from "transform/util";
 10 | import { fluent, Fluent } from "transform/fluent";
 11 | import {
 12 |     FromPandocTransformContext,
 13 |     FromPandocTransformConfig,
 14 | } from "transform/types";
 15 | import { RuleSet } from "transform/ruleset";
 16 | 
 17 | import { applyMarksToNodes } from "./marks";
 18 | import { heal } from "./heal";
 19 | 
 20 | const fromPandocInner = (
 21 |     elementOrArray: PandocNode | PandocNode[],
 22 |     context: FromPandocTransformContext
 23 | ): Fluent<ProsemirrorNode> => {
 24 |     if (!elementOrArray) {
 25 |         return fluent([] as ProsemirrorNode[]);
 26 |     }
 27 |     const { ruleset, marksMap } = context;
 28 |     const elements = asArray(elementOrArray);
 29 |     const transformed: ProsemirrorNode[] = [];
 30 |     const localMarksMap = new Map<ProsemirrorNode, ProsemirrorMark[]>();
 31 |     let ptr = 0;
 32 |     while (ptr < elements.length) {
 33 |         const remaining = elements.slice(ptr);
 34 |         const { rule, acceptedCount } = ruleset.matchPandocNodes(remaining);
 35 |         if (rule.isMarksRule) {
 36 |             const accepted = elements[ptr];
 37 |             const marks = asArray(rule.transformer(accepted, context));
 38 |             if ("content" in accepted) {
 39 |                 const innerTransformed =
 40 |                     typeof accepted.content === "string"
 41 |                         ? [{ type: "text", text: accepted.content }]
 42 |                         : fromPandocInner(
 43 |                               // This cast works around the fact that some Pandoc nodes have nested arrays
 44 |                               // as their content property (e.g. OrderedList has Block[][]). This shouldn't
 45 |                               // be a problem in practice unless you're trying to do something very stupid
 46 |                               // like turn an OrderedList node into an em mark.
 47 |                               accepted.content as Block[] | Inline[],
 48 |                               context
 49 |                           ).asArray();
 50 |                 for (const node of innerTransformed) {
 51 |                     localMarksMap.set(node, marks);
 52 |                 }
 53 |                 transformed.push(...innerTransformed);
 54 |             }
 55 |         } else if (rule.isMarksRule === false) {
 56 |             const accepted = rule.acceptsMultiple
 57 |                 ? elements.slice(ptr, ptr + acceptedCount)
 58 |                 : elements[ptr];
 59 |             const addition = rule.transformer(accepted, context);
 60 |             transformed.push(...asArray(addition));
 61 |         }
 62 |         ptr += acceptedCount;
 63 |     }
 64 |     for (const [node, localMarks] of localMarksMap.entries()) {
 65 |         const currentMarks = marksMap.get(node) || [];
 66 |         marksMap.set(node, [...currentMarks, ...localMarks]);
 67 |     }
 68 |     return fluent(transformed);
 69 | };
 70 | 
 71 | export const fromPandoc = (
 72 |     elementOrArray: PandocNode | PandocNode[],
 73 |     ruleset: RuleSet<any>,
 74 |     config: Partial<FromPandocTransformConfig> = {}
 75 | ): Fluent<ProsemirrorNode> => {
 76 |     const {
 77 |         resources = {},
 78 |         useSmartQuotes = false,
 79 |         prosemirrorDocWidth = 1000,
 80 |     } = config;
 81 |     const context: FromPandocTransformContext = {
 82 |         ruleset,
 83 |         resources,
 84 |         useSmartQuotes,
 85 |         count: makeCounter(),
 86 |         transform: (element, parentContext = {}) =>
 87 |             fromPandocInner(element, { ...context, ...parentContext }),
 88 |         marksMap: new Map(),
 89 |         prosemirrorDocWidth,
 90 |     };
 91 |     const nodes = context.transform(elementOrArray);
 92 |     const nodesWithMarks = applyMarksToNodes(
 93 |         nodes.asArray(),
 94 |         ruleset.prosemirrorSchema,
 95 |         context.marksMap
 96 |     );
 97 |     const healed = nodesWithMarks.map((node) =>
 98 |         heal(node, ruleset.prosemirrorSchema)
 99 |     );
100 |     return fluent(healed);
101 | };
102 | 


--------------------------------------------------------------------------------
/src/transform/fromPandoc/heal.ts:
--------------------------------------------------------------------------------
  1 | import { ProsemirrorNode, ProsemirrorSchema } from "types";
  2 | import { parseExpr, Expr, createItemAcceptor } from "expression";
  3 | 
  4 | type OpenToken = {
  5 |     type: "open";
  6 |     node: ProsemirrorNode;
  7 |     createdFromSplit?: boolean;
  8 | };
  9 | 
 10 | type CloseToken = {
 11 |     type: "close";
 12 |     node: ProsemirrorNode;
 13 | };
 14 | 
 15 | type AcceptedState = {
 16 |     consumeNode: (node: ProsemirrorNode) => boolean;
 17 |     acceptedNodes: ProsemirrorNode[];
 18 | };
 19 | 
 20 | export type Token = OpenToken | CloseToken;
 21 | 
 22 | export const getNaiveTokenList = (node: ProsemirrorNode) => {
 23 |     const tokens: Token[] = [];
 24 | 
 25 |     const visit = (node: ProsemirrorNode) => {
 26 |         tokens.push({ type: "open", node });
 27 |         if (node.content) {
 28 |             for (const child of node.content) {
 29 |                 visit(child);
 30 |             }
 31 |         }
 32 |         tokens.push({ type: "close", node });
 33 |     };
 34 | 
 35 |     visit(node);
 36 |     return tokens;
 37 | };
 38 | 
 39 | export const healNaiveTokenList = (
 40 |     tokens: Token[],
 41 |     schema: ProsemirrorSchema
 42 | ): Token[] => {
 43 |     const openTokens: OpenToken[] = [];
 44 |     const nextTokens: Token[] = [];
 45 | 
 46 |     const tokenToAcceptorMap: Map<OpenToken, AcceptedState> = new Map();
 47 |     const newOpenTokensMap: Map<ProsemirrorNode, OpenToken[]> = new Map();
 48 |     const acceptorExpressions: Map<string, Expr> = new Map();
 49 |     const nodeToTokenMap: Map<ProsemirrorNode, OpenToken[]> = new Map();
 50 | 
 51 |     Object.entries(schema.nodes).forEach(([key, entry]) => {
 52 |         if (entry.spec.content) {
 53 |             acceptorExpressions.set(key, parseExpr(entry.spec.content));
 54 |         }
 55 |     });
 56 | 
 57 |     const matchProsemirrorNode = (group: string) => (node: ProsemirrorNode) => {
 58 |         const schemaEntry = schema.nodes[node.type];
 59 |         const matchRes =
 60 |             node.type === group || schemaEntry.spec.group === group;
 61 |         return matchRes;
 62 |     };
 63 | 
 64 |     const openToken = (token: OpenToken) => {
 65 |         nextTokens.push(token);
 66 |         openTokens.push(token);
 67 |         nodeToTokenMap.set(token.node, [
 68 |             ...(nodeToTokenMap.get(token.node) || []),
 69 |             token,
 70 |         ]);
 71 |     };
 72 | 
 73 |     const getOrAddAcceptedStateToTokenMap = (token): AcceptedState => {
 74 |         const existing = tokenToAcceptorMap.get(token);
 75 |         if (existing) {
 76 |             return existing;
 77 |         }
 78 |         const next = {
 79 |             consumeNode: createItemAcceptor(
 80 |                 acceptorExpressions.get(token.node.type),
 81 |                 matchProsemirrorNode
 82 |             ),
 83 |             acceptedNodes: [] as ProsemirrorNode[],
 84 |         };
 85 |         tokenToAcceptorMap.set(token, next);
 86 |         return next;
 87 |     };
 88 | 
 89 |     for (const token of tokens) {
 90 |         if (token.type === "open") {
 91 |             const toReopen: OpenToken[] = [];
 92 |             let acceptingParentDepth = openTokens.length - 1;
 93 |             let accepted = token === tokens[0];
 94 |             while (!accepted) {
 95 |                 if (acceptingParentDepth < 0) {
 96 |                     throw new Error(
 97 |                         `Prosemirror healer cannot find a suitable parent node for ${token.node.type}` +
 98 |                             ` (closed ${toReopen
 99 |                                 .concat()
100 |                                 .reverse()
101 |                                 .map((t) => t.node.type)
102 |                                 .join(", ")})`
103 |                     );
104 |                 }
105 |                 const testingToken = openTokens[acceptingParentDepth];
106 |                 const { acceptedNodes, consumeNode } =
107 |                     getOrAddAcceptedStateToTokenMap(testingToken);
108 |                 accepted = consumeNode(token.node);
109 |                 if (accepted) {
110 |                     acceptedNodes.push(token.node);
111 |                 } else {
112 |                     nextTokens.push({
113 |                         type: "close",
114 |                         node: testingToken.node,
115 |                     });
116 |                     openTokens.pop();
117 |                     toReopen.unshift({
118 |                         type: "open",
119 |                         node: testingToken.node,
120 |                     });
121 |                     --acceptingParentDepth;
122 |                 }
123 |             }
124 |             openToken(token);
125 |             if (toReopen.length > 0) {
126 |                 newOpenTokensMap.set(token.node, toReopen);
127 |             }
128 |         } else if (token.type === "close") {
129 |             nextTokens.push(token);
130 |             openTokens.pop();
131 |             const mustOpenNow = newOpenTokensMap.get(token.node);
132 |             if (mustOpenNow) {
133 |                 for (const tokenToOpen of mustOpenNow) {
134 |                     openToken(tokenToOpen);
135 |                 }
136 |             }
137 |         }
138 |     }
139 |     return nextTokens.map((token) => {
140 |         const tokensForNode = nodeToTokenMap.get(token.node);
141 |         if (tokensForNode && tokensForNode.length > 1) {
142 |             return { ...token, createdFromSplit: true };
143 |         }
144 |         return token;
145 |     });
146 | };
147 | 
148 | export const heal = (
149 |     node: ProsemirrorNode,
150 |     schema: ProsemirrorSchema
151 | ): ProsemirrorNode => {
152 |     const naiveTokens = getNaiveTokenList(node);
153 |     const tokens = healNaiveTokenList(naiveTokens, schema);
154 |     const parentStack = [];
155 |     let rootNode: ProsemirrorNode;
156 |     for (let i = 0; i < tokens.length; i++) {
157 |         const token = tokens[i];
158 |         const nextToken = tokens[i + 1];
159 |         if (token.type === "open") {
160 |             if (
161 |                 nextToken &&
162 |                 nextToken.type === "close" &&
163 |                 nextToken.node === token.node &&
164 |                 token.createdFromSplit
165 |             ) {
166 |                 // We found an open(X), close(X) pair that is empty because it was created while
167 |                 // spliting an element during healing. Just ignore these nodes.
168 |                 ++i;
169 |                 continue;
170 |             }
171 |             const contentProp = token.node.content ? { content: [] } : {};
172 |             const nextNode = { ...token.node, ...contentProp };
173 |             if (parentStack.length === 0) {
174 |                 rootNode = nextNode;
175 |             } else {
176 |                 parentStack[parentStack.length - 1].content.push(nextNode);
177 |             }
178 |             parentStack.push(nextNode);
179 |         } else if (token.type === "close") {
180 |             parentStack.pop();
181 |         }
182 |     }
183 |     if (parentStack.length !== 0) {
184 |         throw new Error(
185 |             "Mismatched tokens encountered while healing document."
186 |         );
187 |     }
188 |     return rootNode;
189 | };
190 | 


--------------------------------------------------------------------------------
/src/transform/fromPandoc/index.ts:
--------------------------------------------------------------------------------
1 | export { fromPandoc } from "./fromPandoc";
2 | 


--------------------------------------------------------------------------------
/src/transform/fromPandoc/marks.ts:
--------------------------------------------------------------------------------
 1 | import { ProsemirrorNode, ProsemirrorMark, ProsemirrorSchema } from "types";
 2 | 
 3 | const compareMarks = (first: ProsemirrorMark, second: ProsemirrorMark) =>
 4 |     // Tell no one what you saw here
 5 |     JSON.stringify(first) === JSON.stringify(second);
 6 | 
 7 | const dedupeMarks = (marks: ProsemirrorMark[]): ProsemirrorMark[] => {
 8 |     const collected: ProsemirrorMark[] = [];
 9 |     for (const mark of marks) {
10 |         if (
11 |             !collected.some((existingMark) => compareMarks(existingMark, mark))
12 |         ) {
13 |             collected.push(mark);
14 |         }
15 |     }
16 |     return collected;
17 | };
18 | 
19 | const nodeAcceptsMarks = (node: ProsemirrorNode, schema: ProsemirrorSchema) => {
20 |     const definition = schema.nodes[node.type];
21 |     if (!definition) {
22 |         throw new Error(`No Prosemirror schema entry for node ${node.type}`);
23 |     }
24 |     return definition.spec.group === "inline";
25 | };
26 | 
27 | export const applyMarksToNodes = (
28 |     nodes: ProsemirrorNode[],
29 |     schema: ProsemirrorSchema,
30 |     marksMap: Map<ProsemirrorNode, ProsemirrorMark[]>
31 | ): ProsemirrorNode[] => {
32 |     const applyInner = (
33 |         node: ProsemirrorNode,
34 |         appliedMarks: ProsemirrorMark[],
35 |         pendingMarks: ProsemirrorMark[]
36 |     ): ProsemirrorNode => {
37 |         const marksAtNode = marksMap.get(node) || [];
38 |         const cumulativeMarks = dedupeMarks([...pendingMarks, ...marksAtNode]);
39 |         const acceptMarksHere = nodeAcceptsMarks(node, schema);
40 |         const marksProps =
41 |             acceptMarksHere && cumulativeMarks.length > 0
42 |                 ? { marks: cumulativeMarks }
43 |                 : {};
44 |         if (!node.content && !acceptMarksHere && marksAtNode.length > 0) {
45 |             console.warn(
46 |                 `Dropping marks at leaf node ${node.type}. This node should probably have group="inline".`
47 |             );
48 |         }
49 |         const nextAppliedMarks = [
50 |             ...appliedMarks,
51 |             ...(acceptMarksHere ? marksAtNode : []),
52 |         ];
53 |         const nextPendingMarks = acceptMarksHere ? [] : cumulativeMarks;
54 |         const contentProps = node.content
55 |             ? {
56 |                   content: node.content.map((child) =>
57 |                       applyInner(child, nextAppliedMarks, nextPendingMarks)
58 |                   ),
59 |               }
60 |             : {};
61 |         return {
62 |             ...node,
63 |             ...marksProps,
64 |             ...contentProps,
65 |         };
66 |     };
67 | 
68 |     return nodes.map((node) => applyInner(node, [], []));
69 | };
70 | 


--------------------------------------------------------------------------------
/src/transform/fromProsemirror/__tests__/__snapshots__/fromProsemirror.test.ts.snap:
--------------------------------------------------------------------------------
  1 | // Jest Snapshot v1, https://goo.gl/fbAQLP
  2 | 
  3 | exports[`fromProsemirror converts a table 1`] = `
  4 | Object {
  5 |   "attr": Object {
  6 |     "classes": Array [],
  7 |     "identifier": "",
  8 |     "properties": Object {},
  9 |   },
 10 |   "bodies": Array [
 11 |     Object {
 12 |       "attr": Object {
 13 |         "classes": Array [],
 14 |         "identifier": "",
 15 |         "properties": Object {},
 16 |       },
 17 |       "bodyRows": Array [
 18 |         Object {
 19 |           "attr": Object {
 20 |             "classes": Array [],
 21 |             "identifier": "",
 22 |             "properties": Object {},
 23 |           },
 24 |           "cells": Array [
 25 |             Object {
 26 |               "alignment": "AlignDefault",
 27 |               "attr": Object {
 28 |                 "classes": Array [],
 29 |                 "identifier": "",
 30 |                 "properties": Object {},
 31 |               },
 32 |               "colSpan": 1,
 33 |               "content": Array [
 34 |                 Object {
 35 |                   "content": Array [
 36 |                     Object {
 37 |                       "content": "Role",
 38 |                       "type": "Str",
 39 |                     },
 40 |                   ],
 41 |                   "type": "Para",
 42 |                 },
 43 |               ],
 44 |               "rowSpan": 1,
 45 |               "type": "Cell",
 46 |             },
 47 |             Object {
 48 |               "alignment": "AlignDefault",
 49 |               "attr": Object {
 50 |                 "classes": Array [],
 51 |                 "identifier": "",
 52 |                 "properties": Object {},
 53 |               },
 54 |               "colSpan": 1,
 55 |               "content": Array [
 56 |                 Object {
 57 |                   "content": Array [
 58 |                     Object {
 59 |                       "content": "Hero",
 60 |                       "type": "Str",
 61 |                     },
 62 |                   ],
 63 |                   "type": "Para",
 64 |                 },
 65 |               ],
 66 |               "rowSpan": 1,
 67 |               "type": "Cell",
 68 |             },
 69 |             Object {
 70 |               "alignment": "AlignDefault",
 71 |               "attr": Object {
 72 |                 "classes": Array [],
 73 |                 "identifier": "",
 74 |                 "properties": Object {},
 75 |               },
 76 |               "colSpan": 1,
 77 |               "content": Array [
 78 |                 Object {
 79 |                   "content": Array [
 80 |                     Object {
 81 |                       "content": "Villain",
 82 |                       "type": "Str",
 83 |                     },
 84 |                   ],
 85 |                   "type": "Para",
 86 |                 },
 87 |               ],
 88 |               "rowSpan": 1,
 89 |               "type": "Cell",
 90 |             },
 91 |           ],
 92 |           "type": "Row",
 93 |         },
 94 |         Object {
 95 |           "attr": Object {
 96 |             "classes": Array [],
 97 |             "identifier": "",
 98 |             "properties": Object {},
 99 |           },
100 |           "cells": Array [
101 |             Object {
102 |               "alignment": "AlignDefault",
103 |               "attr": Object {
104 |                 "classes": Array [],
105 |                 "identifier": "",
106 |                 "properties": Object {},
107 |               },
108 |               "colSpan": 1,
109 |               "content": Array [
110 |                 Object {
111 |                   "content": Array [
112 |                     Object {
113 |                       "content": "Weapon",
114 |                       "type": "Str",
115 |                     },
116 |                   ],
117 |                   "type": "Para",
118 |                 },
119 |               ],
120 |               "rowSpan": 1,
121 |               "type": "Cell",
122 |             },
123 |             Object {
124 |               "alignment": "AlignDefault",
125 |               "attr": Object {
126 |                 "classes": Array [],
127 |                 "identifier": "",
128 |                 "properties": Object {},
129 |               },
130 |               "colSpan": 1,
131 |               "content": Array [
132 |                 Object {
133 |                   "content": Array [
134 |                     Object {
135 |                       "content": "Power",
136 |                       "type": "Str",
137 |                     },
138 |                     Object {
139 |                       "type": "Space",
140 |                     },
141 |                     Object {
142 |                       "content": "Sword",
143 |                       "type": "Str",
144 |                     },
145 |                   ],
146 |                   "type": "Para",
147 |                 },
148 |               ],
149 |               "rowSpan": 1,
150 |               "type": "Cell",
151 |             },
152 |             Object {
153 |               "alignment": "AlignDefault",
154 |               "attr": Object {
155 |                 "classes": Array [],
156 |                 "identifier": "",
157 |                 "properties": Object {},
158 |               },
159 |               "colSpan": 1,
160 |               "content": Array [
161 |                 Object {
162 |                   "content": Array [
163 |                     Object {
164 |                       "content": "Havoc",
165 |                       "type": "Str",
166 |                     },
167 |                     Object {
168 |                       "type": "Space",
169 |                     },
170 |                     Object {
171 |                       "content": "Staff",
172 |                       "type": "Str",
173 |                     },
174 |                   ],
175 |                   "type": "Para",
176 |                 },
177 |               ],
178 |               "rowSpan": 1,
179 |               "type": "Cell",
180 |             },
181 |           ],
182 |           "type": "Row",
183 |         },
184 |         Object {
185 |           "attr": Object {
186 |             "classes": Array [],
187 |             "identifier": "",
188 |             "properties": Object {},
189 |           },
190 |           "cells": Array [
191 |             Object {
192 |               "alignment": "AlignDefault",
193 |               "attr": Object {
194 |                 "classes": Array [],
195 |                 "identifier": "",
196 |                 "properties": Object {},
197 |               },
198 |               "colSpan": 1,
199 |               "content": Array [
200 |                 Object {
201 |                   "content": Array [
202 |                     Object {
203 |                       "content": "Dark",
204 |                       "type": "Str",
205 |                     },
206 |                     Object {
207 |                       "type": "Space",
208 |                     },
209 |                     Object {
210 |                       "content": "secret",
211 |                       "type": "Str",
212 |                     },
213 |                   ],
214 |                   "type": "Para",
215 |                 },
216 |               ],
217 |               "rowSpan": 1,
218 |               "type": "Cell",
219 |             },
220 |             Object {
221 |               "alignment": "AlignDefault",
222 |               "attr": Object {
223 |                 "classes": Array [],
224 |                 "identifier": "",
225 |                 "properties": Object {},
226 |               },
227 |               "colSpan": 1,
228 |               "content": Array [
229 |                 Object {
230 |                   "content": Array [
231 |                     Object {
232 |                       "content": "Expert",
233 |                       "type": "Str",
234 |                     },
235 |                     Object {
236 |                       "type": "Space",
237 |                     },
238 |                     Object {
239 |                       "content": "florist",
240 |                       "type": "Str",
241 |                     },
242 |                   ],
243 |                   "type": "Para",
244 |                 },
245 |               ],
246 |               "rowSpan": 1,
247 |               "type": "Cell",
248 |             },
249 |             Object {
250 |               "alignment": "AlignDefault",
251 |               "attr": Object {
252 |                 "classes": Array [],
253 |                 "identifier": "",
254 |                 "properties": Object {},
255 |               },
256 |               "colSpan": 1,
257 |               "content": Array [
258 |                 Object {
259 |                   "content": Array [
260 |                     Object {
261 |                       "content": "Cries",
262 |                       "type": "Str",
263 |                     },
264 |                     Object {
265 |                       "type": "Space",
266 |                     },
267 |                     Object {
268 |                       "content": "at",
269 |                       "type": "Str",
270 |                     },
271 |                     Object {
272 |                       "type": "Space",
273 |                     },
274 |                     Object {
275 |                       "content": "romcoms",
276 |                       "type": "Str",
277 |                     },
278 |                   ],
279 |                   "type": "Para",
280 |                 },
281 |               ],
282 |               "rowSpan": 1,
283 |               "type": "Cell",
284 |             },
285 |           ],
286 |           "type": "Row",
287 |         },
288 |         Object {
289 |           "attr": Object {
290 |             "classes": Array [],
291 |             "identifier": "",
292 |             "properties": Object {},
293 |           },
294 |           "cells": Array [
295 |             Object {
296 |               "alignment": "AlignDefault",
297 |               "attr": Object {
298 |                 "classes": Array [],
299 |                 "identifier": "",
300 |                 "properties": Object {},
301 |               },
302 |               "colSpan": 3,
303 |               "content": Array [
304 |                 Object {
305 |                   "content": Array [
306 |                     Object {
307 |                       "content": "Some",
308 |                       "type": "Str",
309 |                     },
310 |                     Object {
311 |                       "type": "Space",
312 |                     },
313 |                     Object {
314 |                       "content": "stuff",
315 |                       "type": "Str",
316 |                     },
317 |                     Object {
318 |                       "type": "Space",
319 |                     },
320 |                     Object {
321 |                       "content": "at",
322 |                       "type": "Str",
323 |                     },
324 |                     Object {
325 |                       "type": "Space",
326 |                     },
327 |                     Object {
328 |                       "content": "the",
329 |                       "type": "Str",
330 |                     },
331 |                     Object {
332 |                       "type": "Space",
333 |                     },
334 |                     Object {
335 |                       "content": "bottom",
336 |                       "type": "Str",
337 |                     },
338 |                   ],
339 |                   "type": "Para",
340 |                 },
341 |               ],
342 |               "rowSpan": 1,
343 |               "type": "Cell",
344 |             },
345 |           ],
346 |           "type": "Row",
347 |         },
348 |       ],
349 |       "headRows": Array [],
350 |       "rowHeadColumns": 1,
351 |       "type": "TableBody",
352 |     },
353 |   ],
354 |   "caption": Object {
355 |     "content": Array [],
356 |     "type": "Caption",
357 |   },
358 |   "colSpecs": Array [
359 |     Object {
360 |       "alignment": "AlignDefault",
361 |       "type": "ColSpec",
362 |       "width": 0.1985185185185185,
363 |     },
364 |     Object {
365 |       "alignment": "AlignDefault",
366 |       "type": "ColSpec",
367 |       "width": 0.49333333333333335,
368 |     },
369 |     Object {
370 |       "alignment": "AlignDefault",
371 |       "type": "ColSpec",
372 |       "width": 0.3985185185185185,
373 |     },
374 |   ],
375 |   "foot": Object {
376 |     "attr": Object {
377 |       "classes": Array [],
378 |       "identifier": "",
379 |       "properties": Object {},
380 |     },
381 |     "rows": Array [],
382 |     "type": "TableFoot",
383 |   },
384 |   "head": Object {
385 |     "attr": Object {
386 |       "classes": Array [],
387 |       "identifier": "",
388 |       "properties": Object {},
389 |     },
390 |     "rows": Array [
391 |       Object {
392 |         "attr": Object {
393 |           "classes": Array [],
394 |           "identifier": "",
395 |           "properties": Object {},
396 |         },
397 |         "cells": Array [
398 |           Object {
399 |             "alignment": "AlignDefault",
400 |             "attr": Object {
401 |               "classes": Array [],
402 |               "identifier": "",
403 |               "properties": Object {},
404 |             },
405 |             "colSpan": 1,
406 |             "content": Array [
407 |               Object {
408 |                 "content": Array [],
409 |                 "type": "Para",
410 |               },
411 |             ],
412 |             "rowSpan": 1,
413 |             "type": "Cell",
414 |           },
415 |           Object {
416 |             "alignment": "AlignDefault",
417 |             "attr": Object {
418 |               "classes": Array [],
419 |               "identifier": "",
420 |               "properties": Object {},
421 |             },
422 |             "colSpan": 1,
423 |             "content": Array [
424 |               Object {
425 |                 "content": Array [
426 |                   Object {
427 |                     "content": "He-Man",
428 |                     "type": "Str",
429 |                   },
430 |                 ],
431 |                 "type": "Para",
432 |               },
433 |             ],
434 |             "rowSpan": 1,
435 |             "type": "Cell",
436 |           },
437 |           Object {
438 |             "alignment": "AlignDefault",
439 |             "attr": Object {
440 |               "classes": Array [],
441 |               "identifier": "",
442 |               "properties": Object {},
443 |             },
444 |             "colSpan": 1,
445 |             "content": Array [
446 |               Object {
447 |                 "content": Array [
448 |                   Object {
449 |                     "content": "Skeletor",
450 |                     "type": "Str",
451 |                   },
452 |                 ],
453 |                 "type": "Para",
454 |               },
455 |             ],
456 |             "rowSpan": 1,
457 |             "type": "Cell",
458 |           },
459 |         ],
460 |         "type": "Row",
461 |       },
462 |     ],
463 |     "type": "TableHead",
464 |   },
465 |   "type": "Table",
466 | }
467 | `;
468 | 


--------------------------------------------------------------------------------
/src/transform/fromProsemirror/fromProsemirror.ts:
--------------------------------------------------------------------------------
 1 | import { ProsemirrorNode, PandocNode } from "types";
 2 | 
 3 | import { asArray, flatten, makeCounter } from "transform/util";
 4 | import { RuleSet } from "transform/ruleset";
 5 | import { Fluent, fluent } from "transform/fluent";
 6 | import {
 7 |     FromProsemirrorTransformConfig,
 8 |     FromProsemirrorTransformContext,
 9 | } from "transform/types";
10 | 
11 | import { createWrapperNodeFromMarks, splitNodesByMarks } from "./marks";
12 | 
13 | const fromProsemirrorInner = (
14 |     elementOrArray: ProsemirrorNode | ProsemirrorNode[],
15 |     context: FromProsemirrorTransformContext
16 | ): Fluent<PandocNode> => {
17 |     if (!elementOrArray) {
18 |         return fluent([]);
19 |     }
20 |     const { ruleset } = context;
21 |     const nodesAndAssociatedMarks = splitNodesByMarks(asArray(elementOrArray));
22 |     const transformed: PandocNode[] = [];
23 |     for (const { nodes, marks } of nodesAndAssociatedMarks) {
24 |         let ptr = 0;
25 |         const innerTransformed = [];
26 |         while (ptr < nodes.length) {
27 |             const remaining = nodes.slice(ptr);
28 |             const { rule, acceptedCount } =
29 |                 ruleset.matchProsemirrorNodes(remaining);
30 |             const addition: PandocNode[] = flatten(
31 |                 rule.acceptsMultiple
32 |                     ? rule.transformer(
33 |                           nodes.slice(ptr, ptr + acceptedCount),
34 |                           context
35 |                       )
36 |                     : rule.transformer(nodes[ptr], context)
37 |             );
38 |             innerTransformed.push(...addition);
39 |             ptr += acceptedCount;
40 |         }
41 |         const maybeWrappedNodes = createWrapperNodeFromMarks(
42 |             innerTransformed,
43 |             marks,
44 |             context
45 |         );
46 |         transformed.push(...asArray(maybeWrappedNodes));
47 |     }
48 |     return fluent(transformed);
49 | };
50 | 
51 | export const fromProsemirror = (
52 |     elementOrArray: ProsemirrorNode | ProsemirrorNode[],
53 |     ruleset: RuleSet<any>,
54 |     config: Partial<FromProsemirrorTransformConfig> = {}
55 | ): Fluent<PandocNode> => {
56 |     const { resources = {}, prosemirrorDocWidth = 1000 } = config;
57 |     const context: FromProsemirrorTransformContext = {
58 |         ruleset,
59 |         resources,
60 |         prosemirrorDocWidth,
61 |         count: makeCounter(),
62 |         transform: (element) => fromProsemirrorInner(element, context),
63 |     };
64 |     return context.transform(elementOrArray);
65 | };
66 | 


--------------------------------------------------------------------------------
/src/transform/fromProsemirror/index.ts:
--------------------------------------------------------------------------------
1 | export { fromProsemirror } from "./fromProsemirror";
2 | 


--------------------------------------------------------------------------------
/src/transform/fromProsemirror/marks.ts:
--------------------------------------------------------------------------------
 1 | import { PandocNode, ProsemirrorNode, ProsemirrorMark } from "types";
 2 | import { FromProsemirrorTransformContext, OneOrMany } from "transform/types";
 3 | import { asArray } from "transform/util";
 4 | 
 5 | type NodesAndMarksBucket = {
 6 |     nodes: ProsemirrorNode[];
 7 |     marks: ProsemirrorMark[];
 8 | };
 9 | 
10 | export const createWrapperNodeFromMarks = (
11 |     innerNode: OneOrMany<PandocNode>,
12 |     marks: ProsemirrorMark[],
13 |     context: FromProsemirrorTransformContext
14 | ): OneOrMany<PandocNode> => {
15 |     const { ruleset } = context;
16 |     return marks.reduce((node, mark) => {
17 |         const { rule } = ruleset.matchProsemirrorMarks([mark]);
18 |         return rule.transformer(mark, asArray(node), context);
19 |     }, innerNode);
20 | };
21 | 
22 | const createNodesAndMarksBucket = (
23 |     marks: ProsemirrorMark[]
24 | ): NodesAndMarksBucket => {
25 |     return {
26 |         nodes: [],
27 |         marks,
28 |     };
29 | };
30 | 
31 | const alphabetizeObjectProps = <T extends Record<string, any>>(
32 |     object: T
33 | ): T => {
34 |     const next: Partial<T> = {};
35 |     Object.keys(object)
36 |         .sort()
37 |         .forEach((key: keyof T) => {
38 |             next[key] = object[key];
39 |         });
40 |     return next as T;
41 | };
42 | 
43 | const canonicalizeMarks = (marks: ProsemirrorMark[]) => {
44 |     const canonicalized = marks
45 |         .concat()
46 |         .sort((a, b) => (a.type > b.type ? 1 : -1))
47 |         .map((mark) => {
48 |             if (mark.attrs) {
49 |                 return {
50 |                     ...mark,
51 |                     attrs: alphabetizeObjectProps(mark.attrs),
52 |                 };
53 |             }
54 |             return mark;
55 |         });
56 |     return JSON.stringify(canonicalized);
57 | };
58 | 
59 | export const splitNodesByMarks = (
60 |     nodes: ProsemirrorNode[]
61 | ): NodesAndMarksBucket[] => {
62 |     let currentCanonicalizedMarks: null | string = null;
63 |     let currentBucket: null | NodesAndMarksBucket = null;
64 |     const buckets: NodesAndMarksBucket[] = [];
65 |     for (const node of nodes) {
66 |         const currentMarks = node.marks || [];
67 |         const canonicalizedMarks = canonicalizeMarks(currentMarks);
68 |         const useNewBucket = canonicalizedMarks !== currentCanonicalizedMarks;
69 |         if (useNewBucket) {
70 |             if (currentBucket) {
71 |                 buckets.push(currentBucket);
72 |             }
73 |             currentBucket = createNodesAndMarksBucket(currentMarks);
74 |             currentCanonicalizedMarks = canonicalizedMarks;
75 |         }
76 |         currentBucket.nodes.push(node);
77 |     }
78 |     if (currentBucket) {
79 |         buckets.push(currentBucket);
80 |     }
81 |     return buckets;
82 | };
83 | 


--------------------------------------------------------------------------------
/src/transform/inference/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./inferPandocType";
2 | export * from "./inferProsemirrorType";
3 | 


--------------------------------------------------------------------------------
/src/transform/inference/inferPandocType.ts:
--------------------------------------------------------------------------------
 1 | import { PandocNode } from "../../types";
 2 | 
 3 | import { OneOrMore, Trim } from "./shared";
 4 | 
 5 | type Resolve<Str> =
 6 |     | ResolveZeroOrMore<Str>
 7 |     | ResolveOneOrMore<Str>
 8 |     | ResolveParens<Str>
 9 |     | ResolveChoice<Str>
10 |     | ResolveIdentifier<Str>;
11 | 
12 | type ResolveZeroOrMore<Str> = Str extends `${infer Some}*`
13 |     ? Resolve<Some> extends never
14 |         ? never
15 |         : Resolve<Some>[]
16 |     : never;
17 | 
18 | type ResolveOneOrMore<Str> = Str extends `${infer Some}+`
19 |     ? Resolve<Some> extends never
20 |         ? never
21 |         : OneOrMore<Resolve<Some>>
22 |     : never;
23 | 
24 | type ResolveParens<Str> = Str extends `(${infer Some})` ? Resolve<Some> : never;
25 | 
26 | type ResolveChoice<Str> = Str extends `${infer Some}|${infer Rest}`
27 |     ? Resolve<Trim<Some>> | Resolve<Trim<Rest>>
28 |     : never;
29 | 
30 | type ResolveIdentifier<Str> = Str extends PandocNode["type"]
31 |     ? Readonly<PandocNode & { type: Str }>
32 |     : never;
33 | 
34 | export type InferPandocPattern<Str extends string> = Resolve<Str>;
35 | export type InferPandocNodeType<S> = ResolveIdentifier<S>;
36 | 


--------------------------------------------------------------------------------
/src/transform/inference/inferProsemirrorType.ts:
--------------------------------------------------------------------------------
 1 | import { ProsemirrorMark, ProsemirrorNode, ProsemirrorSchema } from "types";
 2 | 
 3 | import { OneOrMore, Trim } from "./shared";
 4 | 
 5 | type Resolve<Str, Schema extends ProsemirrorSchema> =
 6 |     | ResolveZeroOrMore<Str, Schema>
 7 |     | ResolveOneOrMore<Str, Schema>
 8 |     | ResolveParens<Str, Schema>
 9 |     | ResolveChoice<Str, Schema>
10 |     | ResolveIdentifier<Str, Schema>;
11 | 
12 | type ResolveZeroOrMore<
13 |     Str,
14 |     Schema extends ProsemirrorSchema
15 | > = Str extends `${infer Some}*`
16 |     ? Resolve<Some, Schema> extends never
17 |         ? never
18 |         : Resolve<Some, Schema>[]
19 |     : never;
20 | 
21 | type ResolveOneOrMore<
22 |     Str,
23 |     Schema extends ProsemirrorSchema
24 | > = Str extends `${infer Some}+`
25 |     ? Resolve<Some, Schema> extends never
26 |         ? never
27 |         : OneOrMore<Resolve<Some, Schema>>
28 |     : never;
29 | 
30 | type ResolveParens<
31 |     Str,
32 |     Schema extends ProsemirrorSchema
33 | > = Str extends `(${infer Some})` ? Resolve<Some, Schema> : never;
34 | 
35 | type ResolveChoice<
36 |     Str,
37 |     Schema extends ProsemirrorSchema
38 | > = Str extends `${infer Some}|${infer Rest}`
39 |     ? Resolve<Trim<Some>, Schema> | Resolve<Trim<Rest>, Schema>
40 |     : never;
41 | 
42 | // TODO(ian): Maybe add some real inference here if it turns out to be possible
43 | // eslint-disable-next-line @typescript-eslint/no-unused-vars
44 | type ResolveIdentifier<
45 |     Str,
46 |     Schema extends ProsemirrorSchema
47 | > = Str extends Nodes<Schema> ? ProsemirrorNode<Str> : never;
48 | 
49 | // eslint-disable-next-line @typescript-eslint/no-unused-vars
50 | type Marks<Str> = Str extends ProsemirrorSchema<infer _, infer FoundMarks>
51 |     ? FoundMarks
52 |     : never;
53 | 
54 | // eslint-disable-next-line @typescript-eslint/no-unused-vars
55 | type Nodes<Str> = Str extends ProsemirrorSchema<infer FoundNodes, infer _>
56 |     ? FoundNodes
57 |     : never;
58 | 
59 | export type InferProsemirrorNodePattern<
60 |     Str,
61 |     Schema extends ProsemirrorSchema
62 | > = Resolve<Str, Schema>;
63 | 
64 | export type InferProsemirrorNodeType<
65 |     Str,
66 |     Schema extends ProsemirrorSchema
67 | > = Str extends Nodes<Schema> ? ProsemirrorNode<Str> : never;
68 | 
69 | export type InferProsemirrorMarkType<
70 |     Str,
71 |     Schema extends ProsemirrorSchema
72 | > = Str extends Marks<Schema> ? ProsemirrorMark<Str> : never;
73 | 
74 | export type InferProsemirrorElementType<
75 |     Str,
76 |     Schema extends ProsemirrorSchema
77 | > =
78 |     | InferProsemirrorNodeType<Str, Schema>
79 |     | InferProsemirrorMarkType<Str, Schema>;
80 | 


--------------------------------------------------------------------------------
/src/transform/inference/shared.ts:
--------------------------------------------------------------------------------
1 | export type OneOrMore<T> = [T, ...T[]];
2 | 
3 | export type Trim<T> = T extends ` ${infer U}`
4 |     ? Trim<U>
5 |     : T extends `${infer U} `
6 |     ? Trim<U>
7 |     : T;
8 | 


--------------------------------------------------------------------------------
/src/transform/ruleset.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |     PANDOC_NODE_TYPES,
  3 |     PandocNode,
  4 |     ProsemirrorMark,
  5 |     ProsemirrorNode,
  6 |     ProsemirrorSchema,
  7 | } from "types";
  8 | import {
  9 |     parseExpr,
 10 |     exprAcceptsMultiple,
 11 |     exprWillAlwaysMatchSingleIdentifier,
 12 |     acceptItems,
 13 |     Expr,
 14 | } from "expression";
 15 | 
 16 | import {
 17 |     InferPandocPattern,
 18 |     InferProsemirrorMarkType,
 19 |     InferProsemirrorElementType,
 20 |     InferProsemirrorNodePattern,
 21 |     InferPandocNodeType,
 22 | } from "./inference";
 23 | import {
 24 |     BidirectionalTransformer,
 25 |     PandocNodeToProsemirrorMarkTransformer,
 26 |     PandocNodeToProsemirrorNodeTransformer,
 27 |     ParameterizedBidirectionalTransformer,
 28 |     ProsemirrorMarkToPandocNodeTransformer,
 29 |     ProsemirrorNodeToPandocNodeTransformer,
 30 |     Rule,
 31 | } from "./types";
 32 | import { flatten } from "./util";
 33 | 
 34 | type AcceptResult<Rule> = {
 35 |     acceptedCount: number;
 36 |     rule: Rule;
 37 | };
 38 | 
 39 | const matchItemWithType =
 40 |     (identifier: string) =>
 41 |     (item: { type: string }): boolean =>
 42 |         identifier === item.type;
 43 | 
 44 | const gatherExpressionsForCapturedNodeAssertions = (
 45 |     ...items: (string[] | undefined)[]
 46 | ): Expr[] => {
 47 |     return flatten(
 48 |         items
 49 |             .filter((x): x is string[] => !!x)
 50 |             .map((strs) => strs.map((str) => parseExpr(str)))
 51 |     );
 52 | };
 53 | 
 54 | const assertExpressionsSafeForParameterizedTransformer = (
 55 |     pandocPattern: string,
 56 |     prosemirrorPattern: string
 57 | ) => {
 58 |     const pdExpr = parseExpr(pandocPattern);
 59 |     const pmExpr = parseExpr(prosemirrorPattern);
 60 |     if (pdExpr.type !== "identifier" && pmExpr.type !== "identifier") {
 61 |         throw new Error(
 62 |             "Cannot use a transformer that takes node names as arguments in a rule that accepts patterns." +
 63 |                 " For instance, calling rules.transform('A | B', 'a', tr) will fail if tr is a function" +
 64 |                 " of two arguments (pandocNodeType, prosemirrorNodeType), because 'A | B' is not" +
 65 |                 " a valid Pandoc node name. You will need to call the transformer with two statically known" +
 66 |                 " argument types and pass the result into the transform rule instead," +
 67 |                 " e.g. rules.transform('A | B', 'a', tr('A', 'a')).\n" +
 68 |                 `(Attempting to transform between ${pandocPattern} and ${prosemirrorPattern}`
 69 |         );
 70 |     }
 71 | };
 72 | 
 73 | const throwFailedMatchError = <WithType extends { type: string }>(
 74 |     items: WithType[]
 75 | ) => {
 76 |     throw new Error(
 77 |         `Could not find transform rule for items: ${
 78 |             items
 79 |                 .map((item) => item.type)
 80 |                 .slice(0, 3)
 81 |                 .join(", ") + (items.length > 3 ? "..." : "")
 82 |         }`
 83 |     );
 84 | };
 85 | 
 86 | const throwMarkMatchingError = (pattern: string) => {
 87 |     throw new Error(
 88 |         `Pattern for mark conversion must accept exactly one Pandoc node or Prosemirror mark (${pattern} was supplied)`
 89 |     );
 90 | };
 91 | 
 92 | const warnAboutMissingMatchesForRules = (
 93 |     matchNoun: string,
 94 |     requiredTypes: string[],
 95 |     rules: Rule<any>[]
 96 | ) => {
 97 |     const matchingExpressions = rules
 98 |         .map((rule) => [rule.expression, ...rule.capturedExpressions])
 99 |         .reduce((a, b) => [...a, ...b]);
100 |     const missingTypes = requiredTypes.filter(
101 |         (type) =>
102 |             !matchingExpressions.some((expr) =>
103 |                 exprWillAlwaysMatchSingleIdentifier(expr, type)
104 |             )
105 |     );
106 |     if (missingTypes.length > 0) {
107 |         console.warn(
108 |             `Cannot find rules that are guaranteed to match on a ${matchNoun} of these types: ` +
109 |                 `${missingTypes.join(", ")}.` +
110 |                 " You may want to add or modify rules so that the transformer does not break" +
111 |                 ` if it encounters one of these ${matchNoun}s.`
112 |         );
113 |     }
114 | };
115 | 
116 | export class RuleSet<Schema extends ProsemirrorSchema> {
117 |     readonly pandocNodeToProsemirrorRules: (
118 |         | Rule<PandocNodeToProsemirrorMarkTransformer>
119 |         | Rule<PandocNodeToProsemirrorNodeTransformer>
120 |     )[] = [];
121 |     readonly prosemirrorNodeToPandocNodeRules: Rule<ProsemirrorNodeToPandocNodeTransformer>[] =
122 |         [];
123 |     readonly prosemirrorMarkToPandocNodeRules: Rule<ProsemirrorMarkToPandocNodeTransformer>[] =
124 |         [];
125 |     readonly prosemirrorSchema: Schema;
126 | 
127 |     constructor(schema: Schema) {
128 |         this.prosemirrorSchema = schema;
129 |     }
130 | 
131 |     toProsemirrorNode<PandocNodePattern extends string>(
132 |         pattern: PandocNodePattern,
133 |         transformer: PandocNodeToProsemirrorNodeTransformer<
134 |             InferPandocPattern<PandocNodePattern>
135 |         >,
136 |         assertCapturedPandocNodes: string[] = []
137 |     ) {
138 |         const expression = parseExpr(pattern);
139 |         const capturedExpressions = gatherExpressionsForCapturedNodeAssertions(
140 |             assertCapturedPandocNodes,
141 |             transformer.assertCapturedPandocNodes
142 |         );
143 |         this.pandocNodeToProsemirrorRules.push({
144 |             isMarksRule: false,
145 |             acceptsMultiple: exprAcceptsMultiple(expression),
146 |             expression,
147 |             transformer,
148 |             capturedExpressions,
149 |         });
150 |     }
151 | 
152 |     toProsemirrorMark<PandocNodePattern extends string>(
153 |         pattern: PandocNodePattern,
154 |         transformer: PandocNodeToProsemirrorMarkTransformer<
155 |             InferPandocPattern<PandocNodePattern>
156 |         >,
157 |         assertCapturedPandocNodes: string[] = []
158 |     ) {
159 |         const expression = parseExpr(pattern);
160 |         const capturedExpressions = gatherExpressionsForCapturedNodeAssertions(
161 |             assertCapturedPandocNodes,
162 |             transformer.assertCapturedPandocNodes
163 |         );
164 |         const acceptsMultiple = exprAcceptsMultiple(expression);
165 |         if (acceptsMultiple) {
166 |             throwMarkMatchingError(pattern);
167 |         }
168 |         this.pandocNodeToProsemirrorRules.push({
169 |             isMarksRule: true,
170 |             acceptsMultiple: false,
171 |             expression,
172 |             transformer,
173 |             capturedExpressions,
174 |         });
175 |     }
176 | 
177 |     fromProsemirrorNode<ProsemirrorNodePattern extends string>(
178 |         pattern: ProsemirrorNodePattern,
179 |         transformer: ProsemirrorNodeToPandocNodeTransformer<
180 |             InferProsemirrorNodePattern<ProsemirrorNodePattern, Schema>
181 |         >,
182 |         assertCapturedProsemirrorNodes: string[] = []
183 |     ) {
184 |         const expression = parseExpr(pattern);
185 |         const capturedExpressions = gatherExpressionsForCapturedNodeAssertions(
186 |             assertCapturedProsemirrorNodes,
187 |             transformer.assertCapturedProsemirrorNodes
188 |         );
189 |         const acceptsMultiple = exprAcceptsMultiple(expression);
190 |         this.prosemirrorNodeToPandocNodeRules.push({
191 |             isMarksRule: false,
192 |             acceptsMultiple,
193 |             expression,
194 |             transformer,
195 |             capturedExpressions,
196 |         });
197 |     }
198 | 
199 |     fromProsemirrorMark<ProsemirrorMarkPattern extends string>(
200 |         pattern: ProsemirrorMarkPattern,
201 |         transformer: ProsemirrorMarkToPandocNodeTransformer<
202 |             InferProsemirrorMarkType<ProsemirrorMarkPattern, Schema>
203 |         >
204 |     ) {
205 |         const expression = parseExpr(pattern);
206 |         const acceptsMultiple = exprAcceptsMultiple(expression);
207 |         if (acceptsMultiple) {
208 |             throwMarkMatchingError(pattern);
209 |         }
210 |         this.prosemirrorMarkToPandocNodeRules.push({
211 |             isMarksRule: true,
212 |             acceptsMultiple: false,
213 |             expression,
214 |             transformer,
215 |             capturedExpressions: [],
216 |         });
217 |     }
218 | 
219 |     transform<PandocPattern extends string, ProsemirrorPattern extends string>(
220 |         pandocPattern: PandocPattern,
221 |         prosemirrorPattern: ProsemirrorPattern,
222 |         bidirectionalTransformer:
223 |             | BidirectionalTransformer<
224 |                   InferPandocNodeType<PandocPattern>,
225 |                   InferProsemirrorElementType<ProsemirrorPattern, Schema>
226 |               >
227 |             | ParameterizedBidirectionalTransformer<
228 |                   PandocPattern,
229 |                   ProsemirrorPattern,
230 |                   Schema
231 |               >
232 |     ) {
233 |         if (typeof bidirectionalTransformer === "function") {
234 |             assertExpressionsSafeForParameterizedTransformer(
235 |                 pandocPattern,
236 |                 prosemirrorPattern
237 |             );
238 |             bidirectionalTransformer = bidirectionalTransformer(
239 |                 pandocPattern,
240 |                 prosemirrorPattern
241 |             );
242 |         }
243 |         if ("toProsemirrorNode" in bidirectionalTransformer) {
244 |             const { toProsemirrorNode, assertCapturedPandocNodes = [] } =
245 |                 bidirectionalTransformer;
246 |             this.toProsemirrorNode(
247 |                 pandocPattern,
248 |                 toProsemirrorNode,
249 |                 assertCapturedPandocNodes
250 |             );
251 |         }
252 |         if ("toProsemirrorMark" in bidirectionalTransformer) {
253 |             const { toProsemirrorMark, assertCapturedPandocNodes = [] } =
254 |                 bidirectionalTransformer;
255 |             this.toProsemirrorMark(
256 |                 pandocPattern,
257 |                 toProsemirrorMark,
258 |                 assertCapturedPandocNodes
259 |             );
260 |         }
261 |         if ("fromProsemirrorNode" in bidirectionalTransformer) {
262 |             const { fromProsemirrorNode, assertCapturedProsemirrorNodes = [] } =
263 |                 bidirectionalTransformer;
264 |             this.fromProsemirrorNode(
265 |                 prosemirrorPattern,
266 |                 fromProsemirrorNode,
267 |                 assertCapturedProsemirrorNodes
268 |             );
269 |         }
270 |         if ("fromProsemirrorMark" in bidirectionalTransformer) {
271 |             const { fromProsemirrorMark } = bidirectionalTransformer;
272 |             this.fromProsemirrorMark(prosemirrorPattern, fromProsemirrorMark);
273 |         }
274 |     }
275 | 
276 |     validate() {
277 |         const {
278 |             pandocNodeToProsemirrorRules,
279 |             prosemirrorMarkToPandocNodeRules,
280 |             prosemirrorNodeToPandocNodeRules,
281 |             prosemirrorSchema,
282 |         } = this;
283 | 
284 |         warnAboutMissingMatchesForRules(
285 |             "Pandoc node",
286 |             PANDOC_NODE_TYPES,
287 |             pandocNodeToProsemirrorRules
288 |         );
289 |         warnAboutMissingMatchesForRules(
290 |             "Prosemirror node",
291 |             Object.keys(prosemirrorSchema.nodes),
292 |             prosemirrorNodeToPandocNodeRules
293 |         );
294 |         warnAboutMissingMatchesForRules(
295 |             "Prosemirror mark",
296 |             Object.keys(prosemirrorSchema.marks),
297 |             prosemirrorMarkToPandocNodeRules
298 |         );
299 |     }
300 | 
301 |     private matchItems<
302 |         ItemType extends { type: string },
303 |         RuleType extends Rule<any>
304 |     >(items: ItemType[], rules: RuleType[]): AcceptResult<RuleType> {
305 |         for (const rule of rules) {
306 |             const acceptedCount = acceptItems(
307 |                 rule.expression,
308 |                 items,
309 |                 matchItemWithType
310 |             );
311 |             if (acceptedCount > 0) {
312 |                 return {
313 |                     acceptedCount,
314 |                     rule,
315 |                 };
316 |             }
317 |         }
318 |         throwFailedMatchError(items);
319 |     }
320 | 
321 |     matchPandocNodes(nodes: PandocNode[]) {
322 |         return this.matchItems(nodes, this.pandocNodeToProsemirrorRules);
323 |     }
324 | 
325 |     matchProsemirrorNodes(nodes: ProsemirrorNode[]) {
326 |         return this.matchItems(nodes, this.prosemirrorNodeToPandocNodeRules);
327 |     }
328 | 
329 |     matchProsemirrorMarks(marks: ProsemirrorMark[]) {
330 |         return this.matchItems(marks, this.prosemirrorMarkToPandocNodeRules);
331 |     }
332 | }
333 | 


--------------------------------------------------------------------------------
/src/transform/transformers/bare.ts:
--------------------------------------------------------------------------------
 1 | import {
 2 |     BlockQuote,
 3 |     Emph,
 4 |     SmallCaps,
 5 |     Strikeout,
 6 |     Strong,
 7 |     Subscript,
 8 |     Superscript,
 9 |     Underline,
10 |     Plain,
11 |     Para,
12 |     LineBreak,
13 | } from "types";
14 | 
15 | type BareBlock = Plain | Para | BlockQuote;
16 | 
17 | type BareLeaf = LineBreak;
18 | 
19 | type BareInline =
20 |     | Emph
21 |     | Underline
22 |     | Strong
23 |     | Strikeout
24 |     | Superscript
25 |     | Subscript
26 |     | SmallCaps;
27 | 
28 | // A transformer appropriate for simple container nodes. Typically, these  are
29 | // correspondences between Pandoc elements with a content property and
30 | // Prosemirror elements with a content property
31 | export const bareContentTransformer = (
32 |     pdNodeType: BareBlock["type"],
33 |     pmNodeType
34 | ) => {
35 |     return {
36 |         toProsemirrorNode: (node, { transform }) => {
37 |             return {
38 |                 type: pmNodeType,
39 |                 content: transform(node.content).asArray(),
40 |             };
41 |         },
42 |         fromProsemirrorNode: (node, { transform }) => {
43 |             return {
44 |                 type: pdNodeType,
45 |                 content: transform(node.content).asArray(),
46 |             };
47 |         },
48 |     };
49 | };
50 | 
51 | // A transformer between Pandoc inline nodes and Prosemirror marks with no attrs
52 | export const bareMarkTransformer = (
53 |     pdNodeType: BareInline["type"],
54 |     pmMarkType
55 | ) => {
56 |     return {
57 |         toProsemirrorMark: () => {
58 |             return {
59 |                 type: pmMarkType,
60 |             };
61 |         },
62 |         fromProsemirrorMark: (_, content) => {
63 |             return {
64 |                 type: pdNodeType,
65 |                 content,
66 |             };
67 |         },
68 |     };
69 | };
70 | 
71 | // A transformer that does type -> type conversion for simple leaf nodes
72 | export const bareLeafTransformer = (
73 |     pdNodeType: BareLeaf["type"],
74 |     pmNodeType
75 | ) => {
76 |     return {
77 |         toProsemirrorNode: () => {
78 |             return {
79 |                 type: pmNodeType,
80 |             };
81 |         },
82 |         fromProsemirrorNode: () => {
83 |             return {
84 |                 type: pdNodeType,
85 |             };
86 |         },
87 |     };
88 | };
89 | 


--------------------------------------------------------------------------------
/src/transform/transformers/common.ts:
--------------------------------------------------------------------------------
 1 | import { RawBlock, RawInline } from "types";
 2 | import { getQuoteChar } from "../util";
 3 | 
 4 | // A transformer that converts between Pandoc elements with string content and Prosemirror
 5 | // elements that accept {type: 'text', text: string}[] as their content.
 6 | export const textTransformer = (pdNodeName: "Str", pmNodeName: string) => {
 7 |     return {
 8 |         toProsemirrorNode: (node) => {
 9 |             return {
10 |                 type: pmNodeName,
11 |                 text: node.content,
12 |             };
13 |         },
14 |         fromProsemirrorNode: (node) => {
15 |             return {
16 |                 type: pdNodeName,
17 |                 content: node.content.join(""),
18 |             };
19 |         },
20 |     };
21 | };
22 | 
23 | // A one-way transformer that ignores a Pandoc node and passes its content through.
24 | export const pandocPassThroughTransformer = (node, { transform }) => {
25 |     return transform(node.content).asArray();
26 | };
27 | 
28 | // A one-way transformer that converts Pandoc's Quoted inline elements to quoted text.
29 | export const pandocQuotedTransformer = (
30 |     node,
31 |     { transform, useSmartQuotes }
32 | ) => {
33 |     const isSingleQuote = node.quoteType === "SingleQuote";
34 |     return [
35 |         {
36 |             type: "text",
37 |             text: getQuoteChar(isSingleQuote, true, useSmartQuotes),
38 |         },
39 |         ...transform(node.content).asArray(),
40 |         {
41 |             type: "text",
42 |             text: getQuoteChar(isSingleQuote, false, useSmartQuotes),
43 |         },
44 |     ];
45 | };
46 | 
47 | // A transformer that returns an empty array
48 | export const nullTransformer = () => [];
49 | 
50 | // A transformer that turns a Pandoc RawBlock or RawInline into a paragraph
51 | export const pandocRawTransformer = (
52 |     pmInlineNodeName: string,
53 |     pmBlockNodeName: string = null
54 | ) => {
55 |     return (node: RawBlock | RawInline) => {
56 |         const { content } = node;
57 |         const textNode = { type: pmInlineNodeName, text: content };
58 |         if (pmBlockNodeName) {
59 |             const blockNode = { type: pmBlockNodeName, content: [textNode] };
60 |             return blockNode;
61 |         }
62 |         return textNode;
63 |     };
64 | };
65 | 


--------------------------------------------------------------------------------
/src/transform/transformers/doc.ts:
--------------------------------------------------------------------------------
 1 | import { Doc, ProsemirrorNode } from "types";
 2 | 
 3 | // A transformer that turns Pandoc root-level documents into Prosemirror ones.
 4 | export const docTransformer = (pdNodeType: "Doc", pmNodeType) => {
 5 |     return {
 6 |         toProsemirrorNode: (node: Doc, { transform }): ProsemirrorNode => {
 7 |             const { blocks } = node;
 8 |             return {
 9 |                 type: pmNodeType,
10 |                 content: transform(blocks).asArray(),
11 |             };
12 |         },
13 |         fromProsemirrorNode: (node: ProsemirrorNode, { transform }): Doc => {
14 |             const { content } = node;
15 |             return {
16 |                 type: pdNodeType,
17 |                 blocks: transform(content).asArray(),
18 |                 meta: {},
19 |             };
20 |         },
21 |     };
22 | };
23 | 


--------------------------------------------------------------------------------
/src/transform/transformers/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./common";
2 | export * from "./table";
3 | export * from "./bare";
4 | export * from "./doc";
5 | export * from "./list";
6 | 


--------------------------------------------------------------------------------
/src/transform/transformers/list.ts:
--------------------------------------------------------------------------------
 1 | import {
 2 |     DefinitionList,
 3 |     Block,
 4 |     Para,
 5 |     Plain,
 6 |     OrderedList,
 7 |     BulletList,
 8 | } from "types";
 9 | 
10 | import { flatten } from "../util";
11 | 
12 | type SimpleList = OrderedList | BulletList;
13 | 
14 | // Returns a transformer appropriate for converting between Pandoc OrderedLists and BulletLists and
15 | // the equivalent types in a Prosemirror schema -- basically, anything like an <ol> or a <ul>.
16 | export const createListTransformer =
17 |     (pmInnerNodeType: string, processListItem: <N>(n: N) => N = (x) => x) =>
18 |     (pdNodeType: SimpleList["type"], pmNodeType) => {
19 |         return {
20 |             toProsemirrorNode: (node, { transform }) => {
21 |                 const content = node.content.map((blocks) => {
22 |                     return processListItem({
23 |                         type: pmInnerNodeType,
24 |                         content: transform(blocks).asArray(),
25 |                     });
26 |                 });
27 |                 const hasOrder =
28 |                     node.listAttributes &&
29 |                     typeof node.listAttributes.startNumber === "number";
30 |                 const attrs = hasOrder
31 |                     ? { order: node.listAttributes.startNumber }
32 |                     : {};
33 |                 return {
34 |                     type: pmNodeType,
35 |                     attrs,
36 |                     content,
37 |                 };
38 |             },
39 |             fromProsemirrorNode: (node, { transform }): SimpleList => {
40 |                 const content = node.content.map((listItem) =>
41 |                     transform(listItem.content).asArray()
42 |                 );
43 |                 if (pdNodeType === "OrderedList") {
44 |                     return {
45 |                         type: pdNodeType,
46 |                         content,
47 |                         listAttributes: {
48 |                             startNumber: node.attrs.order,
49 |                             listNumberStyle: "DefaultStyle",
50 |                             listNumberDelim: "DefaultDelim",
51 |                         },
52 |                     };
53 |                 }
54 |                 return {
55 |                     type: pdNodeType,
56 |                     content,
57 |                 };
58 |             },
59 |             assertCapturedProsemirrorNodes: [pmInnerNodeType],
60 |         };
61 |     };
62 | 
63 | // A one-way transformer that takes the cursed DefinitionList and turns it into an unordered list.
64 | export const definitionListTransformer =
65 |     (pmOuterNodeType, pmInnerNodeType) =>
66 |     (node: DefinitionList, { transform }) => {
67 |         const content = node.entries.map((value) => {
68 |             const { term, definitions } = value;
69 |             const blocks = flatten<Block>(definitions);
70 |             const firstBlock = blocks[0];
71 |             let prependableBlock: Para | Plain;
72 |             if (
73 |                 firstBlock &&
74 |                 (firstBlock.type === "Para" || firstBlock.type === "Plain")
75 |             ) {
76 |                 prependableBlock = firstBlock as Para | Plain;
77 |             } else {
78 |                 prependableBlock = { type: "Para", content: [] };
79 |                 blocks.unshift(prependableBlock);
80 |             }
81 |             prependableBlock.content.unshift({
82 |                 type: "Strong",
83 |                 content: [
84 |                     ...term,
85 |                     { type: "Str", content: ":" },
86 |                     { type: "Space" },
87 |                 ],
88 |             });
89 |             return {
90 |                 type: pmInnerNodeType,
91 |                 content: transform(blocks).asArray(),
92 |             };
93 |         });
94 |         return {
95 |             type: pmOuterNodeType,
96 |             content,
97 |         };
98 |     };
99 | 


--------------------------------------------------------------------------------
/src/transform/transformers/table/fromPandoc.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |     Block,
  3 |     Caption,
  4 |     Cell,
  5 |     ColSpec,
  6 |     ProsemirrorNode,
  7 |     Row,
  8 |     Table,
  9 | } from "types";
 10 | import { FromPandocTransformContext } from "transform/types";
 11 | 
 12 | const resolveCaption = (
 13 |     caption: Caption,
 14 |     context: FromPandocTransformContext
 15 | ): ProsemirrorNode[] => {
 16 |     const { shortCaption, content } = caption;
 17 |     return [
 18 |         ...context.transform(content).asArray(),
 19 |         ...(shortCaption ? context.transform(shortCaption).asArray() : []),
 20 |     ];
 21 | };
 22 | 
 23 | const resolveCellAttrs = (
 24 |     cell: Cell,
 25 |     colSpecs: ColSpec[],
 26 |     prosemirrorDocWidth: number
 27 | ) => {
 28 |     const colWidths = colSpecs
 29 |         .map((colSpec) => ("width" in colSpec ? colSpec.width : 0))
 30 |         // Subtract 1 from the total width here to account for 1px column dividers
 31 |         .map((percentageWidth) => -1 + percentageWidth * prosemirrorDocWidth);
 32 |     const widthAttr = colWidths.some((width) => width > 0)
 33 |         ? { colwidth: colWidths }
 34 |         : {};
 35 |     return {
 36 |         ...widthAttr,
 37 |         rowspan: cell.rowSpan,
 38 |         colspan: cell.colSpan,
 39 |     };
 40 | };
 41 | 
 42 | const cellFromPandoc = (
 43 |     cell: Cell,
 44 |     colSpecs: ColSpec[],
 45 |     isHead: boolean,
 46 |     context: FromPandocTransformContext
 47 | ): ProsemirrorNode<"table_cell" | "table_header"> => {
 48 |     // Don't pass empty content into table_header or table_cell, which expect block+
 49 |     const contentToTransform: Block[] =
 50 |         cell.content.length > 0
 51 |             ? cell.content
 52 |             : [{ type: "Para", content: [] }];
 53 |     return {
 54 |         type: isHead ? "table_header" : "table_cell",
 55 |         attrs: resolveCellAttrs(cell, colSpecs, context.prosemirrorDocWidth),
 56 |         content: context.transform(contentToTransform).asArray(),
 57 |     };
 58 | };
 59 | 
 60 | const rowFromPandoc = (
 61 |     row: Row,
 62 |     colSpecs: ColSpec[],
 63 |     headColumns: number | "all",
 64 |     context: FromPandocTransformContext
 65 | ): ProsemirrorNode<"table_row"> => {
 66 |     const headCutoff = headColumns === "all" ? Infinity : headColumns;
 67 |     return {
 68 |         type: "table_row",
 69 |         content: row.cells.map((cell, idx) =>
 70 |             cellFromPandoc(
 71 |                 cell,
 72 |                 colSpecs.slice(idx, idx + cell.colSpan),
 73 |                 idx < headCutoff,
 74 |                 context
 75 |             )
 76 |         ),
 77 |     };
 78 | };
 79 | 
 80 | export const pandocTableTransformer = (
 81 |     node: Table,
 82 |     context: FromPandocTransformContext
 83 | ):
 84 |     | ProsemirrorNode<"table">
 85 |     | [ProsemirrorNode<"table">, ...ProsemirrorNode[]] => {
 86 |     const { head, foot, bodies, caption, colSpecs } = node;
 87 | 
 88 |     const renderMyRow = (row: Row, headColumns: number | "all") =>
 89 |         rowFromPandoc(row, colSpecs, headColumns, context);
 90 | 
 91 |     const headRows = head.rows.map((row) => renderMyRow(row, "all"));
 92 |     const bodyRows = bodies
 93 |         .map((body) => [
 94 |             ...body.headRows.map((row) => renderMyRow(row, "all")),
 95 |             ...body.bodyRows.map((row) =>
 96 |                 renderMyRow(row, body.rowHeadColumns)
 97 |             ),
 98 |         ])
 99 |         .reduce((a, b) => [...a, ...b]);
100 |     const footRows = foot.rows.map((row) => renderMyRow(row, 0));
101 |     const prosemirrorCaption = resolveCaption(caption, context);
102 | 
103 |     const table: ProsemirrorNode<"table"> = {
104 |         type: "table" as const,
105 |         content: [...headRows, ...bodyRows, ...footRows],
106 |     };
107 | 
108 |     if (prosemirrorCaption.length > 0) {
109 |         return [table, ...prosemirrorCaption];
110 |     }
111 | 
112 |     return table;
113 | };
114 | 


--------------------------------------------------------------------------------
/src/transform/transformers/table/fromProsemirror.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |     Block,
  3 |     Caption,
  4 |     Cell,
  5 |     ColSpec,
  6 |     ProsemirrorNode,
  7 |     Row,
  8 |     Table,
  9 | } from "types";
 10 | import { FromProsemirrorTransformContext } from "transform/types";
 11 | import { createAttr } from "transform/util";
 12 | 
 13 | const getDefaultColSpec = (): ColSpec => ({
 14 |     type: "ColSpec",
 15 |     alignment: "AlignDefault",
 16 |     defaultWidth: true,
 17 | });
 18 | 
 19 | const getDefaultCaption = (): Caption => ({
 20 |     type: "Caption",
 21 |     content: [],
 22 | });
 23 | 
 24 | const getColumnCountFromRow = (row: ProsemirrorNode<"table_row">) => {
 25 |     return row.content
 26 |         .map((cell) => cell.attrs?.colspan ?? 1)
 27 |         .map((attr) => Number(attr))
 28 |         .reduce((a, b) => a + b);
 29 | };
 30 | 
 31 | const getColSpecsForTable = (
 32 |     header: ProsemirrorNode<"table_row">,
 33 |     context: FromProsemirrorTransformContext
 34 | ) => {
 35 |     const { prosemirrorDocWidth } = context;
 36 |     const columnCount = getColumnCountFromRow(header);
 37 |     const colSpecs: ColSpec[] = new Array(columnCount)
 38 |         .fill(0)
 39 |         .map(() => getDefaultColSpec());
 40 |     header.content.forEach((cell, index) => {
 41 |         if ("colwidth" in cell.attrs && cell.attrs.colwidth) {
 42 |             const colWidth = cell.attrs.colwidth as (number | null)[];
 43 |             colWidth.forEach((width, cellIndex) => {
 44 |                 const realColumnIndex = index + cellIndex;
 45 |                 if (typeof width === "number") {
 46 |                     colSpecs[realColumnIndex] = {
 47 |                         type: "ColSpec",
 48 |                         alignment: "AlignDefault",
 49 |                         width: width / prosemirrorDocWidth,
 50 |                     };
 51 |                 }
 52 |             });
 53 |         }
 54 |     });
 55 |     return colSpecs;
 56 | };
 57 | 
 58 | const transformCell = (
 59 |     cell: ProsemirrorNode<"table_cell" | "table_header">,
 60 |     context: FromProsemirrorTransformContext
 61 | ): Cell => {
 62 |     const { transform } = context;
 63 |     const { colspan = 1, rowspan = 1 } = cell.attrs;
 64 |     return {
 65 |         type: "Cell",
 66 |         attr: createAttr(),
 67 |         alignment: "AlignDefault",
 68 |         rowSpan: Number(rowspan),
 69 |         colSpan: Number(colspan),
 70 |         content: transform(cell.content).asArray() as Block[],
 71 |     };
 72 | };
 73 | 
 74 | const transformRow = (
 75 |     row: ProsemirrorNode<"table_row">,
 76 |     context: FromProsemirrorTransformContext
 77 | ): Row => {
 78 |     const cells = row.content as undefined | ProsemirrorNode<
 79 |         "table_cell" | "table_header"
 80 |     >[];
 81 |     return {
 82 |         type: "Row",
 83 |         attr: createAttr(),
 84 |         // Table rows may be devoid of content, for example phantom rows that
 85 |         // are automatically added to satisfy a row where each element has
 86 |         // rowspan >1
 87 |         cells: cells?.map((cell) => transformCell(cell, context)) ?? [] ,
 88 |     };
 89 | };
 90 | 
 91 | export const prosemirrorTableTransformer = (
 92 |     table: ProsemirrorNode<"table">,
 93 |     context: FromProsemirrorTransformContext
 94 | ): Table => {
 95 |     const [header, ...body] = table.content as ProsemirrorNode<"table_row">[];
 96 |     return {
 97 |         type: "Table",
 98 |         attr: createAttr("id" in table.attrs ? String(table.attrs.id) : ""),
 99 |         caption: getDefaultCaption(),
100 |         colSpecs: getColSpecsForTable(header, context),
101 |         head: {
102 |             type: "TableHead",
103 |             attr: createAttr(),
104 |             rows: [transformRow(header, context)],
105 |         },
106 |         bodies: [
107 |             {
108 |                 type: "TableBody",
109 |                 attr: createAttr(),
110 |                 rowHeadColumns: 1,
111 |                 headRows: [],
112 |                 bodyRows: body.map((row) => transformRow(row, context)),
113 |             },
114 |         ],
115 |         foot: {
116 |             type: "TableFoot",
117 |             attr: createAttr(),
118 |             rows: [],
119 |         },
120 |     };
121 | };
122 | 
123 | prosemirrorTableTransformer.assertCapturedProsemirrorNodes = [
124 |     "table_row",
125 |     "table_cell",
126 |     "table_header",
127 | ];
128 | 


--------------------------------------------------------------------------------
/src/transform/transformers/table/index.ts:
--------------------------------------------------------------------------------
1 | export * from "./fromPandoc";
2 | export * from "./fromProsemirror";
3 | 


--------------------------------------------------------------------------------
/src/transform/types.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |     PandocNode,
  3 |     ProsemirrorElement,
  4 |     ProsemirrorMark,
  5 |     ProsemirrorNode,
  6 |     ProsemirrorSchema,
  7 | } from "types";
  8 | import { Expr } from "expression";
  9 | 
 10 | import { Fluent } from "./fluent";
 11 | import { InferPandocNodeType, InferProsemirrorElementType } from "./inference";
 12 | import { RuleSet } from "./ruleset";
 13 | 
 14 | export type OneOrMany<T> = T | T[];
 15 | 
 16 | // Function type that allows rules to transform their child nodes and pass appropriate context
 17 | // into this sub-transformation
 18 | type TransformCallback<
 19 |     From extends ProsemirrorNode | PandocNode,
 20 |     To extends ProsemirrorNode | PandocNode,
 21 |     TransformParentContext extends Record<string, any> = Record<string, never>
 22 | > = (
 23 |     from: OneOrMany<From>,
 24 |     context?: Partial<TransformParentContext>
 25 | ) => Fluent<To>;
 26 | 
 27 | // Options passed into the transform process in both directions
 28 | type SharedTransformConfig = {
 29 |     resources: Record<string, any>;
 30 |     prosemirrorDocWidth: number;
 31 | };
 32 | 
 33 | // Items that are available from the transform context in both directions
 34 | type SharedTransformContext = {
 35 |     ruleset: RuleSet<any>;
 36 |     count: (label: string) => number;
 37 | };
 38 | 
 39 | // Transform config with Pandoc-specific options
 40 | export type FromPandocTransformConfig = SharedTransformConfig & {
 41 |     useSmartQuotes: boolean;
 42 | };
 43 | 
 44 | // Transform config with Prosemirror-specific options
 45 | export type FromProsemirrorTransformConfig = SharedTransformConfig;
 46 | 
 47 | // Transform context for Pandoc
 48 | export type FromPandocTransformContext = FromPandocTransformConfig &
 49 |     SharedTransformContext & {
 50 |         marksMap: Map<ProsemirrorNode, ProsemirrorMark[]>;
 51 |         transform: TransformCallback<PandocNode, ProsemirrorNode>;
 52 |     };
 53 | 
 54 | // Transform context for Prosemirror
 55 | export type FromProsemirrorTransformContext = FromProsemirrorTransformConfig &
 56 |     SharedTransformContext & {
 57 |         transform: TransformCallback<ProsemirrorNode, PandocNode>;
 58 |     };
 59 | 
 60 | export type PandocNodeToProsemirrorNodeTransformer<
 61 |     From extends OneOrMany<PandocNode> = OneOrMany<PandocNode>,
 62 |     To extends ProsemirrorNode = ProsemirrorNode
 63 | > = ((node: From, context: FromPandocTransformContext) => OneOrMany<To>) & {
 64 |     assertCapturedPandocNodes?: string[];
 65 | };
 66 | 
 67 | export type PandocNodeToProsemirrorMarkTransformer<
 68 |     From extends OneOrMany<PandocNode> = OneOrMany<PandocNode>,
 69 |     To extends ProsemirrorMark = ProsemirrorMark
 70 | > = ((node: From, context: FromPandocTransformContext) => OneOrMany<To>) & {
 71 |     assertCapturedPandocNodes?: string[];
 72 | };
 73 | 
 74 | export type ProsemirrorNodeToPandocNodeTransformer<
 75 |     From extends OneOrMany<ProsemirrorNode> = OneOrMany<ProsemirrorNode>,
 76 |     To extends PandocNode = PandocNode
 77 | > = ((
 78 |     node: From,
 79 |     context: FromProsemirrorTransformContext
 80 | ) => OneOrMany<To>) & {
 81 |     assertCapturedProsemirrorNodes?: string[];
 82 | };
 83 | 
 84 | export type ProsemirrorMarkToPandocNodeTransformer<
 85 |     From extends ProsemirrorMark = ProsemirrorMark,
 86 |     To extends PandocNode = PandocNode
 87 | > = (
 88 |     mark: From,
 89 |     content: any,
 90 |     context: FromProsemirrorTransformContext
 91 | ) => OneOrMany<To>;
 92 | 
 93 | export type NodeTransformer =
 94 |     | PandocNodeToProsemirrorNodeTransformer
 95 |     | ProsemirrorNodeToPandocNodeTransformer;
 96 | 
 97 | export type MarksTransformer =
 98 |     | PandocNodeToProsemirrorMarkTransformer
 99 |     | ProsemirrorMarkToPandocNodeTransformer;
100 | 
101 | export type AnyTransformer = NodeTransformer | MarksTransformer;
102 | 
103 | export type Rule<Transformer extends AnyTransformer> =
104 |     | Readonly<{
105 |           transformer: Transformer extends NodeTransformer
106 |               ? Transformer
107 |               : never;
108 |           expression: Expr;
109 |           capturedExpressions: Expr[];
110 |           acceptsMultiple: boolean;
111 |           isMarksRule: false;
112 |       }>
113 |     | Readonly<{
114 |           transformer: Transformer extends MarksTransformer
115 |               ? Transformer
116 |               : never;
117 |           expression: Expr;
118 |           capturedExpressions: Expr[];
119 |           acceptsMultiple: false;
120 |           isMarksRule: true;
121 |       }>;
122 | 
123 | export type BidirectionalTransformer<
124 |     PandocType extends PandocNode,
125 |     ProsemirrorType extends ProsemirrorElement
126 | > = (
127 |     | {
128 |           fromProsemirrorNode: ProsemirrorNodeToPandocNodeTransformer<
129 |               Extract<ProsemirrorType, ProsemirrorNode>,
130 |               PandocType
131 |           >;
132 |           toProsemirrorNode: PandocNodeToProsemirrorNodeTransformer<
133 |               PandocType,
134 |               Extract<ProsemirrorType, ProsemirrorNode>
135 |           >;
136 |       }
137 |     | {
138 |           fromProsemirrorMark: ProsemirrorMarkToPandocNodeTransformer<
139 |               Extract<ProsemirrorType, ProsemirrorMark>,
140 |               PandocType
141 |           >;
142 |           toProsemirrorMark: PandocNodeToProsemirrorMarkTransformer<
143 |               PandocType,
144 |               Extract<ProsemirrorType, ProsemirrorMark>
145 |           >;
146 |       }
147 | ) & {
148 |     assertCapturedProsemirrorNodes?: string[];
149 |     assertCapturedPandocNodes?: string[];
150 | };
151 | 
152 | export type ParameterizedBidirectionalTransformer<
153 |     PandocPattern extends string,
154 |     ProsemirrorPattern extends string,
155 |     Schema extends ProsemirrorSchema
156 | > = (
157 |     pandocPattern: PandocPattern,
158 |     prosemirrorPattern: ProsemirrorPattern
159 | ) => BidirectionalTransformer<
160 |     InferPandocNodeType<PandocPattern>,
161 |     InferProsemirrorElementType<ProsemirrorPattern, Schema>
162 | >;
163 | 


--------------------------------------------------------------------------------
/src/transform/util.ts:
--------------------------------------------------------------------------------
 1 | import { Attr, Str, Space } from "../types";
 2 | 
 3 | export const createAttr = (
 4 |     identifier: string = "",
 5 |     classes: string[] = [],
 6 |     properties: Record<string, any> = {}
 7 | ): Attr => {
 8 |     return { identifier, classes, properties };
 9 | };
10 | 
11 | export const textFromStrSpace = (nodes: (Str | Space)[]) => {
12 |     let text = "";
13 |     for (const entry of nodes) {
14 |         if (entry.type === "Str") {
15 |             text = text + entry.content;
16 |         } else {
17 |             text = text + " ";
18 |         }
19 |     }
20 |     return text;
21 | };
22 | 
23 | export const intersperse = (
24 |     arr: any[],
25 |     intersperseFn: (index?: number) => any
26 | ): any[] =>
27 |     (Array.isArray(arr) ? arr : [arr]).reduce(
28 |         (accumulated: any[], next: any, index: number): any[] => {
29 |             const added: any[] = [next];
30 |             if (index !== (Array.isArray(arr) ? arr : [arr]).length - 1) {
31 |                 added.push(intersperseFn(index));
32 |             }
33 |             return [...accumulated, ...added];
34 |         },
35 |         []
36 |     );
37 | 
38 | export const textToStrSpace = (text: string): (Str | Space)[] =>
39 |     intersperse(
40 |         text.split(" ").map((word) => ({ type: "Str", content: word })),
41 |         () => ({ type: "Space" })
42 |     ).filter((node) => {
43 |         if (node.type === "Str" && node.content.length === 0) {
44 |             return false;
45 |         }
46 |         return true;
47 |     });
48 | 
49 | export const asArray = <T>(item: T | T[]): T[] => {
50 |     return Array.isArray(item) ? item : [item];
51 | };
52 | 
53 | export const asNode = <T>(item: T | T[]): T => {
54 |     return Array.isArray(item) ? item[0] : item;
55 | };
56 | 
57 | export const flatten = <T>(input: any): T[] => {
58 |     if (!Array.isArray(input)) {
59 |         return [input];
60 |     }
61 |     return input.reduce((arr: T[], next: T | T[]) => {
62 |         if (Array.isArray(next)) {
63 |             return [...arr, ...flatten(next)];
64 |         }
65 |         return [...arr, next];
66 |     }, [] as T[]) as T[];
67 | };
68 | 
69 | export const getQuoteChar = (
70 |     single: boolean,
71 |     opening: boolean,
72 |     smart: boolean
73 | ) => {
74 |     if (smart) {
75 |         if (single) {
76 |             return opening ? "‘" : "’";
77 |         } else {
78 |             return opening ? "“" : "”";
79 |         }
80 |     } else {
81 |         return single ? "'" : '"';
82 |     }
83 | };
84 | 
85 | export const makeCounter = () => {
86 |     const countMap: Map<string, number> = new Map();
87 |     return (type: string) => {
88 |         const count = countMap.get(type) || 0;
89 |         countMap.set(type, count + 1);
90 |         return count;
91 |     };
92 | };
93 | 


--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Definitions for the Pandoc AST
  3 |  * See https://hackage.haskell.org/package/pandoc-types-1.22/docs/Text-Pandoc-Definition.html
  4 |  */
  5 | 
  6 | export { Schema as ProsemirrorSchema } from "prosemirror-model";
  7 | 
  8 | export type ProsemirrorAttr =
  9 |     | undefined
 10 |     | null
 11 |     | number
 12 |     | string
 13 |     | ProsemirrorAttr[];
 14 | 
 15 | export type ProsemirrorNode<Type = string> = {
 16 |     __isMark?: false;
 17 |     type: Type;
 18 |     content?: ProsemirrorNode[];
 19 |     text?: string;
 20 |     attrs?: Record<string, ProsemirrorAttr>;
 21 |     marks?: ProsemirrorMark[];
 22 | };
 23 | 
 24 | export type ProsemirrorMark<Type = string> = {
 25 |     __isMark?: true;
 26 |     type: Type;
 27 |     attrs?: Record<string, ProsemirrorAttr>;
 28 | };
 29 | 
 30 | export type ProsemirrorElement = ProsemirrorNode | ProsemirrorMark;
 31 | 
 32 | export type PandocJson = {
 33 |     "pandoc-api-version": number[];
 34 |     meta: {
 35 |         [key: string]: any;
 36 |     };
 37 |     blocks: { [key: string]: any }[];
 38 | };
 39 | export type Doc = {
 40 |     type: "Doc";
 41 |     blocks: Block[];
 42 |     meta: {
 43 |         [key: string]: MetaValue;
 44 |     };
 45 | };
 46 | 
 47 | export type Alignment =
 48 |     | "AlignLeft"
 49 |     | "AlignRight"
 50 |     | "AlignCenter"
 51 |     | "AlignDefault";
 52 | 
 53 | export type QuoteType = "SingleQuote" | "DoubleQuote";
 54 | export type MathType = "DisplayMath" | "InlineMath";
 55 | 
 56 | export type ListNumberStyle =
 57 |     | "DefaultStyle"
 58 |     | "Example"
 59 |     | "Decimal"
 60 |     | "LowerRoman"
 61 |     | "UpperRoman"
 62 |     | "LowerAlpha"
 63 |     | "UpperAlpha";
 64 | 
 65 | export type ListNumberDelim =
 66 |     | "DefaultDelim"
 67 |     | "Period"
 68 |     | "OneParen"
 69 |     | "TwoParens";
 70 | 
 71 | export type ListAttributes = {
 72 |     startNumber: number;
 73 |     listNumberStyle: ListNumberStyle;
 74 |     listNumberDelim: ListNumberDelim;
 75 | };
 76 | 
 77 | export type Format = string;
 78 | 
 79 | export type Attr = {
 80 |     identifier: string;
 81 |     classes: string[];
 82 |     properties: { [key: string]: string };
 83 | };
 84 | 
 85 | export type Target = {
 86 |     url: string;
 87 |     title: string;
 88 | };
 89 | 
 90 | export type CitationMode = "AuthorInText" | "SuppressAuthor" | "NormalCitation";
 91 | 
 92 | export type Citation = {
 93 |     citationId: string;
 94 |     citationPrefix: Inline[];
 95 |     citationSuffix: Inline[];
 96 |     citationMode: CitationMode;
 97 |     citationNoteNum: number;
 98 |     citationHash: number;
 99 | };
100 | 
101 | /* ~~~ Block-level definitions ~~~ */
102 | 
103 | /* Plain text, not a paragraph */
104 | export type Plain = {
105 |     type: "Plain";
106 |     content: Inline[];
107 | };
108 | 
109 | /* Paragraph */
110 | export type Para = {
111 |     type: "Para";
112 |     content: Inline[];
113 | };
114 | 
115 | /* Multiple non-breaking lines */
116 | export type LineBlock = {
117 |     type: "LineBlock";
118 |     content: Inline[][];
119 | };
120 | 
121 | /* Code block (literal) with attributes */
122 | export type CodeBlock = {
123 |     type: "CodeBlock";
124 |     attr: Attr;
125 |     content: string;
126 | };
127 | 
128 | /* Raw block */
129 | export type RawBlock = {
130 |     type: "RawBlock";
131 |     format: Format;
132 |     content: string;
133 | };
134 | 
135 | /* Block quote (list of blocks) */
136 | export type BlockQuote = {
137 |     type: "BlockQuote";
138 |     content: Block[];
139 | };
140 | 
141 | /* Ordered list (attributes and a list of items, each a list of blocks) */
142 | export type OrderedList = {
143 |     type: "OrderedList";
144 |     listAttributes: ListAttributes;
145 |     content: Block[][];
146 | };
147 | 
148 | /* Bullet list (list of items, each a list of blocks) */
149 | export type BulletList = {
150 |     type: "BulletList";
151 |     content: Block[][];
152 | };
153 | 
154 | /* Definition list
155 |  Each list item is a pair consisting of a term (a list of inlines)
156 |  and one or more definitions (each a list of blocks) */
157 | export type DefinitionList = {
158 |     type: "DefinitionList";
159 |     entries: {
160 |         term: Inline[];
161 |         definitions: Block[][];
162 |     }[];
163 | };
164 | 
165 | /* Header - level (integer) and text (inlines) */
166 | export type Header = {
167 |     type: "Header";
168 |     level: number;
169 |     attr: Attr;
170 |     content: Inline[];
171 | };
172 | 
173 | /* Horizontal rule */
174 | export type HorizontalRule = {
175 |     type: "HorizontalRule";
176 | };
177 | 
178 | /* Table stuff */
179 | export type Caption = {
180 |     type: "Caption";
181 |     shortCaption?: Inline[];
182 |     content: Block[];
183 | };
184 | 
185 | export type ColSpec = {
186 |     type: "ColSpec";
187 |     alignment: Alignment;
188 | } & ({ width: number } | { defaultWidth: true });
189 | 
190 | export type Cell = {
191 |     type: "Cell";
192 |     attr: Attr;
193 |     alignment: Alignment;
194 |     rowSpan: number;
195 |     colSpan: number;
196 |     content: Block[];
197 | };
198 | 
199 | export type Row = {
200 |     type: "Row";
201 |     attr: Attr;
202 |     cells: Cell[];
203 | };
204 | 
205 | export type TableHead = {
206 |     type: "TableHead";
207 |     attr: Attr;
208 |     rows: Row[];
209 | };
210 | 
211 | export type TableFoot = {
212 |     type: "TableFoot";
213 |     attr: Attr;
214 |     rows: Row[];
215 | };
216 | 
217 | export type TableBody = {
218 |     type: "TableBody";
219 |     attr: Attr;
220 |     rowHeadColumns: number;
221 |     headRows: Row[];
222 |     bodyRows: Row[];
223 | };
224 | 
225 | export type Table = {
226 |     type: "Table";
227 |     attr: Attr;
228 |     caption: Caption;
229 |     colSpecs: ColSpec[];
230 |     head: TableHead;
231 |     bodies: TableBody[];
232 |     foot: TableFoot;
233 | };
234 | 
235 | export type TableRow = {
236 |     attr: Attr;
237 |     rowHeadColumns: number;
238 | };
239 | 
240 | /* Generic block container with attributes */
241 | export type Div = {
242 |     type: "Div";
243 |     attr: Attr;
244 |     content: Block[];
245 | };
246 | 
247 | /* Nothing */
248 | export type Null = {
249 |     type: "Null";
250 | };
251 | 
252 | export type Block =
253 |     | Plain
254 |     | Para
255 |     | LineBlock
256 |     | CodeBlock
257 |     | RawBlock
258 |     | BlockQuote
259 |     | OrderedList
260 |     | BulletList
261 |     | DefinitionList
262 |     | Header
263 |     | HorizontalRule
264 |     | Table
265 |     | Div
266 |     | Null;
267 | 
268 | /* ~~~ Inline-level definitions ~~~ */
269 | 
270 | /* Text (string) */
271 | export type Str = {
272 |     type: "Str";
273 |     content: string;
274 | };
275 | 
276 | /* Emphasized text (list of inlines) */
277 | export type Emph = {
278 |     type: "Emph";
279 |     content: Inline[];
280 | };
281 | 
282 | /* Underlined text (list of inlines) */
283 | export type Underline = {
284 |     type: "Underline";
285 |     content: Inline[];
286 | };
287 | 
288 | /* Strongly emphasized text (list of inlines) */
289 | export type Strong = {
290 |     type: "Strong";
291 |     content: Inline[];
292 | };
293 | 
294 | /* Strikeout text (list of inlines) */
295 | export type Strikeout = {
296 |     type: "Strikeout";
297 |     content: Inline[];
298 | };
299 | 
300 | /* Superscripted text (list of inlines) */
301 | export type Superscript = {
302 |     type: "Superscript";
303 |     content: Inline[];
304 | };
305 | 
306 | /* Subscripted text (list of inlines) */
307 | export type Subscript = {
308 |     type: "Subscript";
309 |     content: Inline[];
310 | };
311 | 
312 | /* Small caps text (list of inlines) */
313 | export type SmallCaps = {
314 |     type: "SmallCaps";
315 |     content: Inline[];
316 | };
317 | 
318 | /* Quoted text (list of inlines) */
319 | export type Quoted = {
320 |     type: "Quoted";
321 |     quoteType: QuoteType;
322 |     content: Inline[];
323 | };
324 | 
325 | /* Citation (list of inlines) */
326 | export type Cite = {
327 |     type: "Cite";
328 |     citations: Citation[];
329 |     content: Inline[];
330 | };
331 | 
332 | /* Inline code (literal) */
333 | export type Code = {
334 |     type: "Code";
335 |     attr: Attr;
336 |     content: string;
337 | };
338 | 
339 | /* Inter-word space */
340 | export type Space = {
341 |     type: "Space";
342 | };
343 | 
344 | /* Soft line break */
345 | export type SoftBreak = {
346 |     type: "SoftBreak";
347 | };
348 | 
349 | /* Hard line break */
350 | export type LineBreak = {
351 |     type: "LineBreak";
352 | };
353 | 
354 | /* TeX math (literal) */
355 | export type Math = {
356 |     type: "Math";
357 |     mathType: MathType;
358 |     content: string;
359 | };
360 | 
361 | /* Raw inline */
362 | export type RawInline = {
363 |     type: "RawInline";
364 |     format: Format;
365 |     content: string;
366 | };
367 | 
368 | /* Hyperlink: alt text (list of inlines), target */
369 | export type Link = {
370 |     type: "Link";
371 |     attr: Attr;
372 |     content: Inline[];
373 |     target: Target;
374 | };
375 | 
376 | /* Image: alt text (list of inlines), target */
377 | export type Image = {
378 |     type: "Image";
379 |     attr: Attr;
380 |     content: Inline[];
381 |     target: Target;
382 | };
383 | 
384 | /* Footnote or endnote */
385 | export type Note = {
386 |     type: "Note";
387 |     content: Block[];
388 | };
389 | 
390 | /* Generic inline container with attributes */
391 | export type Span = {
392 |     type: "Span";
393 |     attr: Attr;
394 |     content: Inline[];
395 | };
396 | 
397 | /* Meta types */
398 | 
399 | export type MetaMap = {
400 |     type: "MetaMap";
401 |     values: { [key: string]: MetaValue };
402 | };
403 | 
404 | export type MetaList = {
405 |     type: "MetaList";
406 |     content: MetaValue[];
407 | };
408 | 
409 | export type MetaBool = {
410 |     type: "MetaBool";
411 |     content: boolean;
412 | };
413 | 
414 | export type MetaString = {
415 |     type: "MetaString";
416 |     content: string;
417 | };
418 | export type MetaInlines = {
419 |     type: "MetaInlines";
420 |     content: Inline[];
421 | };
422 | 
423 | export type MetaBlocks = {
424 |     type: "MetaBlocks";
425 |     content: Block[];
426 | };
427 | 
428 | export type MetaValue =
429 |     | MetaMap
430 |     | MetaList
431 |     | MetaBool
432 |     | MetaString
433 |     | MetaInlines
434 |     | MetaBlocks;
435 | 
436 | export type SimpleInline =
437 |     | Emph
438 |     | Underline
439 |     | Strong
440 |     | Strikeout
441 |     | Superscript
442 |     | Subscript
443 |     | SmallCaps;
444 | 
445 | export type Inline =
446 |     | Str
447 |     | SimpleInline
448 |     | Quoted
449 |     | Cite
450 |     | Code
451 |     | Space
452 |     | SoftBreak
453 |     | LineBreak
454 |     | Math
455 |     | RawInline
456 |     | Link
457 |     | Image
458 |     | Note
459 |     | Span;
460 | 
461 | export type PandocNode = Doc | Block | Inline;
462 | 
463 | export const PANDOC_NODE_TYPES = [
464 |     "BlockQuote",
465 |     "BulletList",
466 |     "Cite",
467 |     "Code",
468 |     "CodeBlock",
469 |     "DefinitionList",
470 |     "Doc",
471 |     "Div",
472 |     "Emph",
473 |     "Header",
474 |     "HorizontalRule",
475 |     "Image",
476 |     "LineBlock",
477 |     "LineBreak",
478 |     "Link",
479 |     "Math",
480 |     "Note",
481 |     "Null",
482 |     "OrderedList",
483 |     "Para",
484 |     "Plain",
485 |     "Quoted",
486 |     "RawBlock",
487 |     "RawInline",
488 |     "SmallCaps",
489 |     "SoftBreak",
490 |     "Space",
491 |     "Span",
492 |     "Str",
493 |     "Strikeout",
494 |     "Strong",
495 |     "Subscript",
496 |     "Superscript",
497 |     "Table",
498 |     "Underline",
499 | ];
500 | 


--------------------------------------------------------------------------------
/src/util.ts:
--------------------------------------------------------------------------------
 1 | import { execSync, spawnSync } from "child_process";
 2 | 
 3 | import { parsePandocJson } from "./parse";
 4 | import { RuleSet } from "./transform/ruleset";
 5 | import { fromPandoc } from "./transform/fromPandoc/fromPandoc";
 6 | import { PandocJson } from "./types";
 7 | 
 8 | const MAX_BUFFER = 0 * 1024 * 1024;
 9 | 
10 | export const callPandoc = (
11 |     source: string,
12 |     inputFormat: string,
13 |     outputFormat: string = "json",
14 |     extraArgs: string[] = []
15 | ) => {
16 |     return spawnSync(
17 |         "pandoc",
18 |         [
19 |             "-f",
20 |             inputFormat,
21 |             "-t",
22 |             outputFormat,
23 |             "--quiet",
24 |             "--wrap=none",
25 |             ...extraArgs,
26 |         ],
27 |         { input: source, maxBuffer: MAX_BUFFER }
28 |     ).stdout.toString();
29 | };
30 | 
31 | export const callPandocWithFile = (
32 |     sourcePath: string,
33 |     outputFormat: string = "json",
34 |     inputFormat: string = null,
35 |     extraArgs: string[] = []
36 | ) => {
37 |     const extraArgsString = extraArgs.join(" ");
38 |     const inputFormatString = inputFormat ? `-f ${inputFormat}` : "";
39 |     return execSync(
40 |         `pandoc ${sourcePath} ${inputFormatString} -t ${outputFormat} ${extraArgsString}`,
41 |         { maxBuffer: MAX_BUFFER }
42 |     ).toString();
43 | };
44 | 
45 | export const loadAndTransformFromPandoc = (
46 |     sourcePath: string,
47 |     rules: RuleSet<any>
48 | ) => {
49 |     const pandocResult = callPandocWithFile(sourcePath);
50 |     let json: PandocJson;
51 |     try {
52 |         json = JSON.parse(pandocResult);
53 |     } catch (err) {
54 |         if (pandocResult) {
55 |             console.error(`Couldn't parse Pandoc result: ${pandocResult}`);
56 |         } else {
57 |             console.error(err);
58 |         }
59 |     }
60 |     const pandocAst = parsePandocJson(json);
61 |     return fromPandoc(pandocAst, rules).asNode();
62 | };
63 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "compilerOptions": {
 3 |         "allowJs": true,
 4 |         "downlevelIteration": true,
 5 |         "esModuleInterop": true,
 6 |         "outDir": "dist",
 7 |         "lib": ["es6", "dom"],
 8 |         "baseUrl": "src",
 9 |         "noErrorTruncation": true,
10 |         "paths": {
11 |             "transform/": ["transform/index.ts"],
12 |             "transform/*": ["transform/*"],
13 |             "expression": ["expression/index.ts"],
14 |             "expression/*": ["expression/*"],
15 |             "example": ["example/index.ts"],
16 |             "example/*": ["example/*"],
17 |             "types": ["types.ts"],
18 |         }
19 |     },
20 |     "include": ["src"],
21 |     "exclude": ["node_modules"]
22 | }


--------------------------------------------------------------------------------