├── .dir-locals.el ├── .gitattributes ├── .github └── workflows │ └── main.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── bb.edn ├── build.clj ├── deps.edn ├── dev └── nextjournal │ └── markdown │ ├── parser.cljc │ └── render.cljs ├── notebooks ├── demo.docx ├── images.clj ├── pandoc.clj ├── parsing_extensibility.clj ├── reference.md ├── tight_lists.clj └── try.clj ├── out └── sci │ └── index.html ├── package.json ├── resources └── META-INF │ └── nextjournal │ └── markdown │ └── meta.edn ├── shadow-cljs.edn ├── src ├── deps.cljs ├── js │ └── markdown.js └── nextjournal │ ├── markdown.cljc │ └── markdown │ ├── impl.clj │ ├── impl.cljs │ ├── impl │ ├── extensions.clj │ ├── types.clj │ └── types │ │ ├── CustomNode.class │ │ └── CustomNode.java │ ├── transform.cljc │ ├── utils.cljc │ └── utils │ └── emoji.cljc ├── test ├── nextjournal │ ├── markdown │ │ └── multi_threading_test.clj │ └── markdown_test.cljc └── test_runner.clj └── yarn.lock /.dir-locals.el: -------------------------------------------------------------------------------- 1 | ((clojure-mode 2 | (cider-clojure-cli-aliases . ":nextjournal/clerk:test:repl"))) 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | resources/js/* linguist-vendored 2 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Delivery 2 | on: push 3 | jobs: 4 | tests: 5 | name: Tests 6 | runs-on: ${{matrix.sys.os}} 7 | strategy: 8 | matrix: 9 | sys: 10 | - { os: macos-latest, shell: bash } 11 | - { os: ubuntu-latest, shell: bash } 12 | - { os: windows-latest, shell: powershell } 13 | defaults: 14 | run: 15 | shell: ${{matrix.sys.shell}} 16 | steps: 17 | - name: 🛎 Checkout 18 | uses: actions/checkout@v2 19 | 20 | - name: 🔧 Install java 21 | uses: actions/setup-java@v1 22 | with: 23 | java-version: '11.0.7' 24 | 25 | - name: 🔧 Install clojure 26 | uses: DeLaGuardo/setup-clojure@master 27 | with: 28 | cli: '1.12.0.1530' 29 | 30 | - name: 🗝 maven cache 31 | uses: actions/cache@v4 32 | with: 33 | path: | 34 | ~/.m2 35 | ~/.gitlibs 36 | ~/.deps.clj 37 | key: ${{ runner.os }}-maven-${{ github.sha }} 38 | restore-keys: | 39 | ${{ runner.os }}-maven- 40 | 41 | - name: 🧪 Run tests 42 | run: clojure -X:test 43 | 44 | cljs-tests: 45 | name: ClojureScript Tests 46 | runs-on: ubuntu-latest 47 | steps: 48 | - name: 🛎 Checkout 49 | uses: actions/checkout@v2 50 | 51 | - name: 🔧 Setup Babashka 52 | uses: turtlequeue/setup-babashka@v1.3.0 53 | with: 54 | babashka-version: 0.7.8 55 | 56 | - name: 🗝 Shadow compiler cache 57 | uses: actions/cache@v4 58 | with: 59 | path: .shadow-cljs 60 | key: ${{ runner.os }}-shadow-cljs-${{ github.sha }} 61 | restore-keys: | 62 | ${{ runner.os }}-shadow-cljs- 63 | 64 | - name: 🧪 Run tests 65 | run: bb test:cljs 66 | 67 | notebooks: 68 | name: Clerk Notebooks Build 69 | runs-on: ubuntu-latest 70 | steps: 71 | - name: 🛎 Checkout 72 | uses: actions/checkout@v2 73 | 74 | - name: 🔧 Install java 75 | uses: actions/setup-java@v1 76 | with: 77 | java-version: '11.0.7' 78 | 79 | - name: 🔧 Install clojure 80 | uses: DeLaGuardo/setup-clojure@master 81 | with: 82 | cli: '1.10.3.943' 83 | 84 | - name: 🔧 Setup Babashka 85 | uses: turtlequeue/setup-babashka@v1.3.0 86 | with: 87 | babashka-version: 0.7.8 88 | 89 | - name: 🔧 Install Pandoc 90 | run: | 91 | curl -LO https://github.com/jgm/pandoc/releases/download/2.18/pandoc-2.18-1-amd64.deb 92 | ls -lah 93 | sudo dpkg -i pandoc-2.18-1-amd64.deb 94 | 95 | - name: 🔧 Setup LaTeX 96 | uses: wtfjoke/setup-tectonic@v3.0.4 97 | with: 98 | github-token: ${{ secrets.GITHUB_TOKEN }} 99 | 100 | - name: 🗝 maven cache 101 | uses: actions/cache@v4 102 | with: 103 | path: | 104 | ~/.m2 105 | ~/.gitlibs 106 | key: ${{ runner.os }}-maven-${{ github.sha }} 107 | restore-keys: | 108 | ${{ runner.os }}-maven- 109 | 110 | - name: 🗝 Clerk Cache 111 | uses: actions/cache@v4 112 | with: 113 | path: .clerk 114 | key: ${{ runner.os }}-clerk-cache 115 | 116 | - name: 🗝 Shadow compiler cache 117 | uses: actions/cache@v4 118 | with: 119 | path: .shadow-cljs 120 | key: ${{ runner.os }}-shadow-cljs-${{ github.sha }} 121 | restore-keys: | 122 | ${{ runner.os }}-shadow-cljs- 123 | 124 | - name: 🔐 Google Auth 125 | uses: google-github-actions/auth@v2.1.6 126 | with: 127 | credentials_json: ${{ secrets.GCLOUD_SERVICE_KEY }} 128 | 129 | - name: 🔧 Setup Google Cloud SDK 130 | uses: google-github-actions/setup-gcloud@v0.3.0 131 | 132 | - name: 🏗 Build Clerk Notebooks 133 | run: bb build:notebooks ${{ github.sha }} 134 | 135 | - name: 📠 Copy static build to bucket under SHA 136 | run: | 137 | gsutil cp -r public/build gs://nextjournal-snapshots/markdown/build/${{ github.sha }} 138 | 139 | - name: 📠 Copy static build to GitHub Pages 140 | if: ${{ github.ref == 'refs/heads/main' }} 141 | uses: JamesIves/github-pages-deploy-action@4.1.6 142 | with: 143 | branch: gh-pages # The branch the action should deploy to. 144 | folder: public/build # The folder the action should deploy. 145 | 146 | - name: 📤 Upload Pdf demo notebook 147 | uses: actions/upload-artifact@v4 148 | with: 149 | name: demo.pdf 150 | path: notebooks/demo.pdf 151 | 152 | - name: ✅ Add success status to report with link to snapshot 153 | uses: Sibz/github-status-action@v1 154 | with: 155 | authToken: ${{secrets.GITHUB_TOKEN}} 156 | context: 'Continuous Delivery / Clerk Static App' 157 | description: 'Ready' 158 | state: 'success' 159 | sha: ${{github.event.pull_request.head.sha || github.sha}} 160 | target_url: https://snapshots.nextjournal.com/markdown/build/${{ github.sha }} 161 | 162 | deploy: 163 | needs: [tests, cljs-tests] 164 | runs-on: ubuntu-latest 165 | steps: 166 | - name: 🛎 Checkout 167 | uses: actions/checkout@v3 168 | 169 | - name: 🏷 Get tags 170 | run: git fetch --tags origin 171 | 172 | - name: 🔧 Setup Babashka 173 | uses: turtlequeue/setup-babashka@v1.3.0 174 | with: 175 | babashka-version: 0.8.156 176 | 177 | - name: 🗝 maven cache 178 | uses: actions/cache@v4 179 | with: 180 | path: | 181 | ~/.m2 182 | ~/.gitlibs 183 | key: ${{ runner.os }}-maven-${{ github.sha }} 184 | restore-keys: | 185 | ${{ runner.os }}-maven- 186 | 187 | - name: 🍯 Publish to clojars 188 | env: 189 | CLOJARS_USERNAME: mkvlr 190 | CLOJARS_PASSWORD: ${{ secrets.CLOJARS_PASSWORD_MKVLR }} 191 | run: bb ci:publish 192 | 193 | - name: 🔢 Set lib version 194 | id: jar-version 195 | run: | 196 | JAR_VERSION=$(bb current-version) 197 | echo "##[set-output name=version;]${JAR_VERSION}" 198 | 199 | - name: 📤 Upload JAR 200 | uses: actions/upload-artifact@v4 201 | with: 202 | name: markdown-${{ steps.jar-version.outputs.version }}.jar 203 | path: target/markdown-${{ steps.jar-version.outputs.version }}.jar 204 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.iml 3 | *.jar 4 | .clerk 5 | .cpcache 6 | .idea 7 | .nrepl-port 8 | classes 9 | node_modules 10 | pom.xml 11 | pom.xml.asc 12 | public 13 | target 14 | .DS_Store 15 | .shadow-cljs 16 | out 17 | notebooks/scratch 18 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## Unreleased 4 | 5 | * Hiccup JVM compatibility for fragments (see [#34](https://github.com/nextjournal/markdown/issues/34)) 6 | * Support HTML blocks and inline HTML (see [#7](https://github.com/nextjournal/markdown/issues/7)) 7 | * Bump commonmark to 0.24.0 8 | * Bump markdown-it to 14.1.0 9 | * Render `:code` according to spec into `
` and `` block with language class (see [#39](https://github.com/nextjournal/markdown/issues/39))
10 | * No longer depend on `applied-science/js-interop`
11 | * Accept parsed result in `->hiccup` function
12 |
13 | ## 0.6.157
14 |
15 | * Swap out GraalJS ([#28](https://github.com/nextjournal/markdown/issues/28)) in favour of [commonmark-java](https://github.com/commonmark/commonmark-java) on the JVM side.
16 | This makes the library compatible with Java 22 and yields an approximate speedup of 10x. The clojurescript implementation stays the same.
17 | * Comply with commonmark rendering of images by default (see [#18](https://github.com/nextjournal/markdown/issues/18)).
18 |
19 | ## 0.5.148
20 |
21 | * Fixes a bug in the construction of the table of contents ([#19](https://github.com/nextjournal/markdown/issues/19)).
22 |
23 | ## 0.5.146
24 | * Fix graaljs multithreaded access ([#17](https://github.com/nextjournal/markdown/issues/17))
25 |
26 | ## 0.5.144
27 | * Disable parsing hashtags and internal links by default ([#14](https://github.com/nextjournal/markdown/issues/14))
28 | * Allow conditional application of custom tokenizers depending on document state around the text location
29 | * Arity 2 to `nextjournal.markdown/parse` was added to customize parsing options (e.g. custom tokenizers) more conveniently.
30 | * Support hard-breaks
31 | * Fix conversion to hiccup for tables with empty cells ([#13](https://github.com/nextjournal/markdown/issues/13))
32 |
33 | ## 0.4.138
34 | * Uses the official markdown-it/footnote plugin
35 | * Adds optional (post-parse) handling of footnotes as sidenotes
36 |
37 | ## 0.4.135
38 | * node-to-text transformation interprets softbreaks as spaces
39 |
40 | ## 0.4.132
41 | * Extract and assign leading emoji from heading nodes
42 |
43 | ## 0.4.130
44 | * Produce unique ids in attrs for header nodes
45 | * Drop lambdaisland.uri dependency
46 |
47 | ## 0.4.126
48 | * Add `deps.cljs` to classpath
49 |
50 | ## 0.4.123
51 | * downgrade GraalJS to keep Java 8 compatibility
52 |
53 | ## 0.4.116
54 | * Bump data.json
55 |
56 | ## 0.4.112
57 | * Distinguish between tight and loose lists
58 |
59 | ## 0.4.109
60 | * [More work on parsing extensibility](https://snapshots.nextjournal.com/markdown/build/7f5c1e24aeb3842235bc6175aa55dbd9a96d25d1/index.html#/notebooks/parsing_extensibility.clj)
61 | * A new home: https://github.com/nextjournal/markdown
62 |
63 | ## 0.3.69
64 | * Extensible parsing of leaf text nodes
65 |
66 | ## 0.2.44
67 | * Simplified `:toc` structure.
68 |
69 | ## 0.1.37
70 | * First Release.
71 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2022 Nextjournal GmbH.
2 |
3 | Permission to use, copy, modify, and/or distribute this software for any purpose
4 | with or without fee is hereby granted, provided that the above copyright notice
5 | and this permission notice appear in all copies.
6 |
7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
8 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
9 | FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
10 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
11 | OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
12 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
13 | THIS SOFTWARE.
14 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # nextjournal markdown
2 |
3 | [](https://clojars.org/io.github.nextjournal/markdown) [)](https://nextjournal.github.io/markdown)
4 |
5 | A cross-platform clojure library for [Markdown](https://en.wikipedia.org/wiki/Markdown) parsing and transformation.
6 |
7 | 🚧 _ALPHA_ status, subject to frequent change. For a richer reading experience [read this readme as a clerk notebook](https://nextjournal.github.io/markdown/README).
8 |
9 | ## Features
10 |
11 | * _Focus on data_: parsing yields an AST ([à la Pandoc](https://nextjournal.github.io/markdown/notebooks/pandoc)) of nested data representing a structured document.
12 | * _Cross Platform_: using [commonmark-java](https://github.com/commonmark/commonmark-java) on the JVM and [markdown-it](https://github.com/markdown-it/markdown-it) for clojurescript
13 | * _Configurable [Hiccup](https://github.com/weavejester/hiccup) conversion_.
14 |
15 | ## Try
16 |
17 | [Try it online](https://nextjournal.github.io/markdown/notebooks/try).
18 |
19 | ## Flavor
20 |
21 | We adhere to [CommonMark Spec](https://spec.commonmark.org/0.30/) and comply with extensions from [Github flavoured Markdown](https://github.github.com/gfm). Additionally, we parse $\LaTeX$ formulas (delimited by a $ for inline rendering or $$ for display mode).
22 |
23 | ## Usage
24 |
25 | ```clojure
26 | (ns hello-markdown
27 | (:require [nextjournal.markdown :as md]
28 | [nextjournal.markdown.transform :as md.transform]))
29 | ```
30 |
31 | Parsing markdown into an AST:
32 |
33 | ```clojure
34 | (def data
35 | (md/parse "> et tout autour, la longue cohorte de ses personnages, avec leur histoire, leur passé, leurs légendes:
36 | > 1. Pélage vainqueur d'Alkhamah se faisant couronner à Covadonga
37 | > 2. La cantatrice exilée de Russie suivant Schönberg à Amsterdam
38 | > 3. Le petit chat sourd aux yeux vairons vivant au dernier étage
39 | > 4. ...
40 |
41 | **Georges Perec**, _La Vie mode d'emploi_.
42 |
43 | ---
44 | "))
45 | ```
46 | ;; =>
47 | {:type :doc,
48 | :content [{:type :blockquote,
49 | :content [{:type :paragraph,
50 | :content [{:type :text,
51 | :text "et tout autour, la longue cohorte de ses personnage, avec leur histoire, leur passé, leurs légendes:"}]}
52 | {:type :numbered-list,
53 | :content [{:type :list-item,
54 | :content [{:type :plain,
55 | :content [{:type :text,
56 | :text "Pélage vainqueur d'Alkhamah se faisant couronner à Covadonga"}]}]}
57 | {:type :list-item,
58 | :content [{:type :plain,
59 | :content [{:type :text,
60 | :text "La cantatrice exilée de Russie suivant Schönberg à Amsterdam"}]}]}
61 | {:type :list-item,
62 | :content [{:type :plain,
63 | :content [{:type :text,
64 | :text "Le petit chat sourd aux yeux vairons vivant au dernier étage"}]}]}]}]}
65 | {:type :paragraph,
66 | :content [{:type :strong, :content [{:type :text, :text "Georges Perec"}]}
67 | {:type :text, :text ", "}
68 | {:type :em, :content [{:type :text, :text "La Vie mode d'emploi"}]}
69 | {:type :text, :text "."}]}
70 | {:type :ruler}]}
71 |
72 | and transform that AST into `hiccup` syntax.
73 |
74 | ```clojure
75 | (md.transform/->hiccup data)
76 | ```
77 | ;; =>
78 | [:div
79 | [:blockquote
80 | [:p "et tout autour, la longue cohorte de ses personnage, avec leur histoire, leur passé, leurs légendes:"]
81 | [:ol
82 | [:li [:<> "Pélage vainqueur d'Alkhamah se faisant couronner à Covadonga"]]
83 | [:li [:<> "La cantatrice exilée de Russie suivant Schönberg à Amsterdam"]]
84 | [:li [:<> "Le petit chat sourd aux yeux vairons vivant au dernier étage"]]]]
85 | [:p [:strong "Georges Perec"] ", " [:em "La Vie mode d'emploi"] "."]
86 | [:hr]]
87 |
88 | We've built hiccup transformation in for convenience, but the same approach can be used to target [more formats](https://nextjournal.github.io/markdown/notebooks/pandoc).
89 |
90 | This library is one of the building blocks of [Clerk](https://github.com/nextjournal/clerk) where it is used for rendering _literate fragments_.
91 |
92 | ```clojure
93 | ^{:nextjournal.clerk/viewer 'nextjournal.clerk.viewer/markdown-viewer}
94 | data
95 | ```
96 |
97 | The transformation of markdown node types can be customised like this:
98 |
99 | ```clojure
100 | ^{:nextjournal.clerk/viewer 'nextjournal.clerk.viewer/html-viewer}
101 | (md.transform/->hiccup
102 | (assoc md.transform/default-hiccup-renderers
103 | ;; :doc specify a custom container for the whole doc
104 | :doc (partial md.transform/into-markup [:div.viewer-markdown])
105 | ;; :text is funkier when it's zinc toned
106 | :text (fn [_ctx node] [:span {:style {:color "#71717a"}} (:text node)])
107 | ;; :plain fragments might be nice, but paragraphs help when no reagent is at hand
108 | :plain (partial md.transform/into-markup [:p {:style {:margin-top "-1.2rem"}}])
109 | ;; :ruler gets to be funky, too
110 | :ruler (constantly [:hr {:style {:border "2px dashed #71717a"}}]))
111 | data)
112 | ```
113 |
114 | ## Extensibility
115 |
116 | We added minimal tooling for [extending markdown expressions](https://nextjournal.github.io/markdown/notebooks/parsing_extensibility).
117 |
--------------------------------------------------------------------------------
/bb.edn:
--------------------------------------------------------------------------------
1 | {:min-bb-version "0.7.8"
2 | :tasks
3 | {:requires ([clojure.edn :as edn]
4 | [clojure.string :as str]
5 | [babashka.fs :as fs]
6 | [babashka.process :as p])
7 |
8 | :init (do
9 | (def major 0)
10 | (def minor 6)
11 | (def rev-count-offset 69) ;; previous repo offset
12 | (def meta-inf-file "resources/META-INF/nextjournal/markdown/meta.edn")
13 |
14 | (defn rev-count []
15 | (-> (p/process ["git" "rev-list" "HEAD" "--count"] {:out :string})
16 | p/check :out str/trim Integer/parseInt))
17 |
18 | (defn version [] (format "%d.%d.%d" major minor (inc (+ (rev-count) rev-count-offset))))
19 |
20 | (defn update-changelog []
21 | (->> (str/replace (slurp "CHANGELOG.md")
22 | (re-pattern "## [Uu]nreleased")
23 | (str "## Unreleased\n\n...\n\n"
24 | (format "## %s" (version))))
25 | (spit "CHANGELOG.md")))
26 |
27 | (defn read-version [] (-> (slurp meta-inf-file) edn/read-string :version)))
28 |
29 | yarn-install
30 | {:doc "Installs and updates npm dependencies"
31 | :task (shell "yarn install")}
32 |
33 | test
34 | {:doc "runs tests in the markdown module"
35 | :task (clojure "-X:test")}
36 |
37 | build:notebooks
38 | {:doc "builds a Clerk static with notebooks specified in deps.edn given a specified git SHA"
39 | :task (clojure (str "-X:dev:nextjournal/clerk :git/sha '\"" (or (first *command-line-args*) "SHASHASHA") "\"' :browse? false"))}
40 |
41 | dev
42 | {:doc "Boots and watches shadow browser test"
43 | :depends [yarn-install]
44 | :task (clojure "-M:dev:test:nextjournal/clerk:shadow watch browser-test")}
45 |
46 | cljs:compile:tests
47 | {:doc "compiles tests as node executable"
48 | :depends [yarn-install]
49 | :task (clojure "-M:dev:test:shadow compile test")}
50 |
51 | test:cljs
52 | {:doc "runs cljs tests via node"
53 | :depends [cljs:compile:tests]
54 | :task (shell "yarn node --trace-uncaught out/node-tests.js")}
55 |
56 | link-changelog {:doc "Turns the issue references in the changelog into links"
57 | :task (do (defn tag->issue-link [s]
58 | (clojure.string/replace s (re-pattern "(?issue-link (slurp f)))))}
61 |
62 | update-meta {:doc "Updates meta.edn with current version (based on commit count currently)."
63 | :task (spit (doto (fs/file meta-inf-file)
64 | (-> fs/parent fs/create-dirs)) {:version (version)})}
65 |
66 | tag {:doc "Tags release and pushes tag to Github."
67 | :task (let [tag (str "v" (read-version))]
68 | (shell "git tag" tag))}
69 |
70 | delete-tag {:doc "Tells git to delete the tag at the current version"
71 | :task (shell (str "git tag -d v" (read-version)))}
72 |
73 | current-version {:doc "Prints the version as written to META-INF during publishing"
74 | :task (print (read-version))}
75 |
76 | publish {:doc "Prepares repo for publishing via CI"
77 | :task (do
78 | (run 'update-meta)
79 | (println "Preparing repo for Release.\n Updated worktree has been committed (e.g. changes to CHANGELOG)" (read-version))
80 | (run 'link-changelog)
81 | (update-changelog)
82 | (shell "git add -u")
83 | (shell (str "git commit -m v" (read-version)))
84 | (run 'tag)
85 | (println "\n\nRun:\n\n" " git push --atomic"
86 | "origin" "main" (str "v" (read-version))
87 | "\n\nto push the release and let CI build it!"))}
88 |
89 | undo:publish {:doc "Reset to state prior to `bb publish`"
90 | :task (do
91 | (run 'delete-tag)
92 | (shell "git reset HEAD~1")
93 | (shell "git co -- resources/META-INF/nextjournal/markdown/meta.edn"))}
94 |
95 | -current-tag (->> (shell {:out :string} "git tag --points-at HEAD")
96 | :out
97 | str/trim
98 | not-empty)
99 |
100 | -current-branch (->> (shell {:out :string} "git branch --show-current")
101 | :out
102 | str/trim)
103 |
104 | jar {:doc "Build jar"
105 | :task (do
106 | (println "Building jar")
107 | (clojure (str "-T:build jar :version '\"" (read-version) "\"'")))}
108 |
109 | install {:doc "Install jar locally"
110 | :task (do
111 | (println "Installing locally")
112 | (clojure (str "-T:build install :version '\"" (read-version) "\"'")))}
113 |
114 | ci:publish {:doc "Publish task which will be run on CI"
115 | :depends [-current-tag -current-branch]
116 | :task (do
117 | (prn :current-tag -current-tag)
118 | (prn :current-branch -current-branch)
119 | (if (and -current-tag (= "main" -current-branch))
120 | (do
121 | (println "Deploying to clojars")
122 | (clojure (str "-T:build deploy :version '\"" (read-version) "\"'")))
123 | ;; still build jar for artifact upload
124 | (run 'jar)))}}}
125 |
--------------------------------------------------------------------------------
/build.clj:
--------------------------------------------------------------------------------
1 | (ns build
2 | (:require [clojure.tools.build.api :as b]
3 | [deps-deploy.deps-deploy :as dd]))
4 |
5 | (def lib 'io.github.nextjournal/markdown)
6 |
7 | (defn scm [version]
8 | {:url "https://github.com/nextjournal/markdown"
9 | :tag (str "v" version)
10 | :connection "scm:git:git://github.com/nextjournal/markdown.git"
11 | :developerConnection "scm:git:ssh://git@github.com/nextjournal/markdown.git"})
12 |
13 | (def class-dir "target/classes")
14 |
15 | (def basis (b/create-basis {:project "deps.edn"}))
16 |
17 | (defn jar-file [version] (format "target/%s-%s.jar" (name lib) version))
18 |
19 | (defn clean [_] (b/delete {:path "target"}))
20 |
21 | (defn jar [{:keys [version]}]
22 | (b/delete {:path "target"})
23 | (println "Producing jar: " (jar-file version))
24 | (b/write-pom {:basis basis
25 | :class-dir class-dir
26 | :lib lib
27 | :scm (scm version)
28 | :src-dirs ["src"]
29 | :version version
30 | :pom-data
31 | [[:licenses
32 | [:license
33 | [:name "ISC License"]
34 | [:url "https://opensource.org/license/isc-license-txt"]]]]})
35 | (b/copy-dir {:src-dirs ["src" "resources"]
36 | :target-dir class-dir
37 | :replace {}})
38 | (b/jar {:class-dir class-dir
39 | :jar-file (jar-file version)}))
40 |
41 | (defn install [{:keys [version] :as opts}]
42 | (jar opts)
43 | (b/install {:basis basis
44 | :lib lib
45 | :version (:version opts)
46 | :jar-file (jar-file version)
47 | :class-dir class-dir}))
48 |
49 | (defn deploy [{:keys [version] :as opts}]
50 | (println "Deploying version" (jar-file version) "to Clojars.")
51 | (jar opts)
52 | (dd/deploy {:installer :remote
53 | :artifact (jar-file version)
54 | :pom-file (b/pom-path {:lib lib :class-dir class-dir})}))
55 |
--------------------------------------------------------------------------------
/deps.edn:
--------------------------------------------------------------------------------
1 | {:paths ["src" "resources"]
2 | :deps {org.commonmark/commonmark {:mvn/version "0.24.0"}
3 | org.commonmark/commonmark-ext-autolink {:mvn/version "0.24.0"}
4 | org.commonmark/commonmark-ext-footnotes {:mvn/version "0.24.0"}
5 | org.commonmark/commonmark-ext-task-list-items {:mvn/version "0.24.0"}
6 | org.commonmark/commonmark-ext-gfm-tables {:mvn/version "0.24.0"}
7 | org.commonmark/commonmark-ext-gfm-strikethrough {:mvn/version "0.24.0"}}
8 |
9 | :aliases
10 | {:nextjournal/clerk
11 | {:extra-paths ["notebooks" "dev"]
12 | :extra-deps {io.github.nextjournal/clerk {:mvn/version "0.17.1102"
13 | :exclusions [io.github.nextjournal/markdown]}}
14 | :jvm-opts ["-Dclojure.main.report=stderr"
15 | #_"-Dclerk.resource_manifest={\"/js/viewer.js\" \"js/viewer.js\"}"] ;;
16 | :exec-fn nextjournal.clerk/build!
17 | :exec-args {:git/url "https://github.com/nextjournal/markdown"
18 | :paths ["README.md"
19 | "CHANGELOG.md"
20 | "notebooks/try.clj"
21 | "notebooks/images.clj"
22 | "notebooks/pandoc.clj"
23 | "notebooks/parsing_extensibility.clj"
24 | "notebooks/benchmarks.clj"
25 | "notebooks/tight_lists.clj"]}}
26 |
27 | :quiet
28 | {:jvm-opts ["-Dpolyglot.engine.WarnInterpreterOnly=false"]}
29 |
30 | :dev
31 | {:extra-paths ["dev" "notebooks"]
32 | :extra-deps {applied-science/js-interop {:mvn/version "0.3.3"}
33 | org.babashka/http-client {:mvn/version "0.3.11"}
34 | org.clojure/data.json {:mvn/version "2.4.0"}
35 | org.clojure/test.check {:mvn/version "1.1.1"}
36 | io.github.nextjournal/clerk {:git/sha "f4c5488e36c8df11fe352889544e7deb9af73cb7"
37 | :exclusions [io.github.nextjournal/markdown]}
38 | nubank/matcher-combinators {:mvn/version "3.8.3"}
39 | hiccup/hiccup {:mvn/version "2.0.0-RC5"}
40 | org.graalvm.js/js {:mvn/version "21.3.2.1"}}}
41 |
42 | :test
43 | {:extra-paths ["test"]
44 | :jvm-opts ["-Dclojure.main.report=stderr"]
45 | :extra-deps {nubank/matcher-combinators {:mvn/version "3.9.1"}
46 | hiccup/hiccup {:mvn/version "2.0.0-RC5"}}
47 | :exec-fn test-runner/run}
48 |
49 | :shadow
50 | {:main-opts ["-m" "shadow.cljs.devtools.cli"]
51 | :extra-deps {thheller/shadow-cljs {:mvn/version "2.18.0"}}}
52 |
53 | :build
54 | {:ns-default build
55 | :jvm-opts ["-Dclojure.main.report=stderr"]
56 | :deps {io.github.clojure/tools.build {:git/tag "v0.10.3" :git/sha "15ead66"}
57 | io.github.slipset/deps-deploy {:git/sha "b4359c5d67ca002d9ed0c4b41b710d7e5a82e3bf"}}}}}
58 |
--------------------------------------------------------------------------------
/dev/nextjournal/markdown/parser.cljc:
--------------------------------------------------------------------------------
1 | ;; # 🧩 Parsing
2 | ;;
3 | ;; Deals with transforming a sequence of tokens obtained by [markdown-it] into a nested AST composed of nested _nodes_.
4 | ;;
5 | ;; A _node_ is a clojure map and has no closed specification at the moment. We do follow a few conventions for its keys:
6 | ;;
7 | ;; - `:type` a keyword (:heading, :paragraph, :text, :code etc.) present on all nodes.
8 | ;;
9 | ;; When a node contains other child nodes, then it will have a
10 | ;;
11 | ;; - `:content` a collection of nodes representing nested content
12 | ;;
13 | ;; when a node is a textual leaf (as in a `:text` or `:formula` nodes) it carries a
14 | ;; - `:text` key with a string value
15 | ;;
16 | ;; Other keys might include e.g.
17 | ;;
18 | ;; - `:info` specific of fenced code blocks
19 | ;; - `:heading-level` specific of `:heading` nodes
20 | ;; - `:attrs` attributes as passed by markdown-it tokens (e.g `{:style "some style info"}`)
21 | (ns nextjournal.markdown.parser
22 | (:require [clojure.string :as str]
23 | [clojure.zip :as z]
24 | [nextjournal.markdown.transform :as md.transform]
25 | [nextjournal.markdown.utils.emoji :as emoji]
26 | #?@(:cljs [[applied-science.js-interop :as j]
27 | [cljs.reader :as reader]])))
28 |
29 | ;; clj common accessors
30 | (def get-in* #?(:clj get-in :cljs j/get-in))
31 | (def update* #?(:clj update :cljs j/update!))
32 |
33 | #?(:clj (defn re-groups* [m] (let [g (re-groups m)] (cond-> g (not (vector? g)) vector))))
34 | (defn re-idx-seq
35 | "Takes a regex and a string, returns a seq of triplets comprised of match groups followed by indices delimiting each match."
36 | [re text]
37 | #?(:clj (let [m (re-matcher re text)]
38 | (take-while some? (repeatedly #(when (.find m) [(re-groups* m) (.start m) (.end m)]))))
39 | :cljs (let [rex (js/RegExp. (.-source re) "g")]
40 | (take-while some? (repeatedly #(when-some [m (.exec rex text)] [(vec m) (.-index m) (.-lastIndex rex)]))))))
41 |
42 |
43 | (comment (re-idx-seq #"\{\{([^{]+)\}\}" "foo {{hello}} bar"))
44 | (comment (re-idx-seq #"\{\{[^{]+\}\}" "foo {{hello}} bar"))
45 | ;; region node operations
46 | ;; helpers
47 | (defn inc-last [path] (update path (dec (count path)) inc))
48 | (defn hlevel [{:as _token hn :tag}] (when (string? hn) (some-> (re-matches #"h([\d])" hn) second #?(:clj Integer/parseInt :cljs js/parseInt))))
49 |
50 | (defn split-by-emoji [s]
51 | (let [[match start end] (first (re-idx-seq emoji/regex s))]
52 | (if match
53 | [(subs s start end) (str/trim (subs s end))]
54 | [nil s])))
55 |
56 | #_(split-by-emoji " Stop")
57 | #_(split-by-emoji "🤚🏽 Stop")
58 | #_(split-by-emoji "🤚🏽🤚 Stop")
59 | #_(split-by-emoji "🤚🏽Stop")
60 | #_(split-by-emoji "🤚🏽 Stop")
61 | #_(split-by-emoji "😀 Stop")
62 | #_(split-by-emoji "⚛️ Stop")
63 | #_(split-by-emoji "⚛ Stop")
64 | #_(split-by-emoji "⬇ Stop")
65 | #_(split-by-emoji "Should not 🙁️ Split")
66 |
67 | (defn text->id+emoji [text]
68 | (when (string? text)
69 | (let [[emoji text'] (split-by-emoji (str/trim text))]
70 | (cond-> {:id (apply str (map (comp str/lower-case (fn [c] (case c (\space \_) \- c))) text'))}
71 | emoji (assoc :emoji emoji)))))
72 |
73 | #_(text->id+emoji "Hello There")
74 | #_(text->id+emoji "Hello_There")
75 | #_(text->id+emoji "👩🔬 Quantum Physics")
76 |
77 | ;; `parse-fence-info` ingests nextjournal, GFM, Pandoc and RMarkdown fenced code block info (any text following the leading 3 backticks) and returns a map
78 | ;;
79 | ;; _nextjournal_ / _GFM_
80 | ;;
81 | ;; ```python id=2e3541da-0735-4b7f-a12f-4fb1bfcb6138
82 | ;; python code
83 | ;; ```
84 | ;;
85 | ;; _Pandoc_
86 | ;;
87 | ;; ```{#pandoc-id .languge .extra-class key=Val}
88 | ;; code in language
89 | ;; ```
90 | ;;
91 | ;; _Rmd_
92 | ;;
93 | ;; ```{r cars, echo=FALSE}
94 | ;; R code
95 | ;; ```
96 | ;;
97 | ;; See also:
98 | ;; - https://github.github.com/gfm/#info-string
99 | ;; - https://pandoc.org/MANUAL.html#fenced-code-blocks
100 | ;; - https://rstudio.com/wp-content/uploads/2016/03/rmarkdown-cheatsheet-2.0.pdf"
101 |
102 | (defn parse-fence-info [info-str]
103 | (try
104 | ;; NOTE: this fix is backported
105 | ;; from the new implementation 👇
106 | (when (and (string? info-str) (seq info-str))
107 | (let [tokens (-> info-str
108 | str/trim
109 | (str/replace #"[\{\}\,]" "") ;; remove Pandoc/Rmarkdown brackets and commas
110 | (str/replace "." "") ;; remove dots
111 | (str/split #" "))] ;; split by spaces
112 | (reduce
113 | (fn [{:as info-map :keys [language]} token]
114 | (let [[_ k v] (re-matches #"^([^=]+)=([^=]+)$" token)]
115 | (cond
116 | (str/starts-with? token "#") (assoc info-map :id (str/replace token #"^#" "")) ;; pandoc #id
117 | (and k v) (assoc info-map (keyword k) v)
118 | (not language) (assoc info-map :language token) ;; language is the first simple token which is not a pandoc's id
119 | :else (assoc info-map (keyword token) true))))
120 | {}
121 | tokens)))
122 | (catch #?(:clj Throwable :cljs :default) _ {})))
123 |
124 | (comment
125 | (parse-fence-info "python runtime-id=5f77e475-6178-47a3-8437-45c9c34d57ff")
126 | (parse-fence-info "{#some-id .lang foo=nex}")
127 | (parse-fence-info "#id clojure")
128 | (parse-fence-info "clojure #id")
129 | (parse-fence-info "clojure")
130 | (parse-fence-info "{r cars, echo=FALSE}"))
131 |
132 | ;; leaf nodes
133 | (defn text-node [text] {:type :text :text text})
134 | (defn formula [text] {:type :formula :text text})
135 | (defn block-formula [text] {:type :block-formula :text text})
136 | (defn footnote-ref [ref label] (cond-> {:type :footnote-ref :ref ref} label (assoc :label label)))
137 |
138 | ;; node constructors
139 | (defn node
140 | [type content attrs top-level]
141 | (cond-> {:type type :content content}
142 | (seq attrs) (assoc :attrs attrs)
143 | (seq top-level) (merge top-level)))
144 |
145 | (defn empty-text-node? [{text :text t :type}] (and (= :text t) (empty? text)))
146 |
147 | (defn push-node [{:as doc ::keys [path]} node]
148 | (try
149 | (cond-> doc
150 | ;; ⬇ mdit produces empty text tokens at mark boundaries, see edge cases below
151 | (not (empty-text-node? node))
152 | (-> #_doc
153 | (update ::path inc-last)
154 | (update-in (pop path) conj node)))
155 | (catch #?(:clj Exception :cljs js/Error) e
156 | (throw (ex-info (str "nextjournal.markdown cannot add node: " node " at path: " path)
157 | {:doc doc :node node} e)))))
158 |
159 | (def push-nodes (partial reduce push-node))
160 |
161 | (defn open-node
162 | ([doc type] (open-node doc type {}))
163 | ([doc type attrs] (open-node doc type attrs {}))
164 | ([doc type attrs top-level]
165 | (-> doc
166 | (push-node (node type [] attrs top-level))
167 | (update ::path into [:content -1]))))
168 |
169 | ;; after closing a node, document ::path will point at it
170 | (def ppop (comp pop pop))
171 | (defn close-node [doc] (update doc ::path ppop))
172 | (defn update-current [{:as doc path ::path} fn & args] (apply update-in doc path fn args))
173 |
174 | (defn current-parent-node
175 | "Given an open parsing context `doc`, returns the parent of the node which was last parsed into the document."
176 | [{:as doc ::keys [path]}]
177 | (assert path "A path is needed in document context to retrieve the current node: `current-parent-node` cannot be called after `parse`.")
178 | (get-in doc (ppop path)))
179 |
180 | (defn current-ancestor-nodes
181 | "Given an open parsing context `doc`, returns the list of ancestors of the node last parsed into the document, up to but
182 | not including the top document."
183 | [{:as doc ::keys [path]}]
184 | (assert path "A path is needed in document context to retrieve the current node: `current-ancestor-nodes` cannot be called after `parse`.")
185 | (loop [p (ppop path) ancestors []]
186 | (if (seq p)
187 | (recur (ppop p) (conj ancestors (get-in doc p)))
188 | ancestors)))
189 |
190 | ;; TODO: consider rewriting parse in terms of this zipper
191 | (defn ->zip [doc]
192 | (z/zipper (every-pred map? :type) :content
193 | (fn [node cs] (assoc node :content (vec cs)))
194 | doc))
195 |
196 | (defn assign-node-id+emoji [{:as doc ::keys [id->index path] :keys [text->id+emoji-fn]}]
197 | (let [{:keys [id emoji]} (when (ifn? text->id+emoji-fn) (-> doc (get-in path) text->id+emoji-fn))
198 | id-count (when id (get id->index id))]
199 | (cond-> doc
200 | id
201 | (update-in [::id->index id] (fnil inc 0))
202 | (or id emoji)
203 | (update-in path (fn [node]
204 | (cond-> node
205 | id (assoc-in [:attrs :id] (cond-> id id-count (str "-" (inc id-count))))
206 | emoji (assoc :emoji emoji)))))))
207 |
208 | (comment ;; path after call
209 | (-> empty-doc ;; [:content -1]
210 | (open-node :heading) ;; [:content 0 :content -1]
211 | (push-node {:node/type :text :text "foo"}) ;; [:content 0 :content 0]
212 | (push-node {:node/type :text :text "foo"}) ;; [:content 0 :content 1]
213 | close-node ;; [:content 1]
214 |
215 | (open-node :paragraph) ;; [:content 1 :content]
216 | (push-node {:node/type :text :text "hello"})
217 | close-node
218 | (open-node :bullet-list)
219 | ;;
220 | ))
221 | ;; endregion
222 |
223 | ;; region TOC builder:
224 | ;; toc nodes are heading nodes but with `:type` `:toc` and an extra branching along
225 | ;; the key `:children` representing the sub-sections of the node
226 | (defn into-toc [toc {:as toc-item :keys [heading-level]}]
227 | (loop [toc toc l heading-level toc-path [:children]]
228 | ;; `toc-path` is `[:children i₁ :children i₂ ... :children]`
229 | (let [type-path (assoc toc-path (dec (count toc-path)) :type)]
230 | (cond
231 | ;; insert intermediate default empty :content collections for the final update-in (which defaults to maps otherwise)
232 | (not (get-in toc toc-path))
233 | (recur (assoc-in toc toc-path []) l toc-path)
234 |
235 | ;; fill in toc types for non-contiguous jumps like h1 -> h3
236 | (not (get-in toc type-path))
237 | (recur (assoc-in toc type-path :toc) l toc-path)
238 |
239 | (= 1 l)
240 | (update-in toc toc-path (fnil conj []) toc-item)
241 |
242 | :else
243 | (recur toc
244 | (dec l)
245 | (conj toc-path
246 | (max 0 (dec (count (get-in toc toc-path)))) ;; select last child at level if it exists
247 | :children))))))
248 |
249 | (defn add-to-toc [doc {:as h :keys [heading-level]}]
250 | (cond-> doc (pos-int? heading-level) (update :toc into-toc (assoc h :type :toc))))
251 |
252 | (defn set-title-when-missing [{:as doc :keys [title]} heading]
253 | (cond-> doc (nil? title) (assoc :title (md.transform/->text heading))))
254 |
255 | (defn add-title+toc
256 | "Computes and adds a :title and a :toc to the document-like structure `doc` which might have not been constructed by means of `parse`."
257 | [{:as doc :keys [content]}]
258 | (let [rf (fn [doc heading] (-> doc (add-to-toc heading) (set-title-when-missing heading)))
259 | xf (filter (comp #{:heading} :type))]
260 | (reduce (xf rf) (assoc doc :toc {:type :toc}) content)))
261 |
262 | (comment
263 | (-> {:type :toc}
264 | ;;(into-toc {:heading-level 3 :title "Foo"})
265 | ;;(into-toc {:heading-level 2 :title "Section 1"})
266 | (into-toc {:heading-level 1 :title "Title" :type :toc})
267 | (into-toc {:heading-level 4 :title "Section 2" :type :toc})
268 | ;;(into-toc {:heading-level 4 :title "Section 2.1"})
269 | ;;(into-toc {:heading-level 2 :title "Section 3"})
270 | )
271 |
272 | (-> "# Top _Title_
273 |
274 | par
275 |
276 | ### Three
277 |
278 | ## Two
279 |
280 | par
281 | - and a nested
282 | - ### Heading not included
283 |
284 | foo
285 |
286 | ## Two Again
287 |
288 | par
289 |
290 | # One Again
291 |
292 | [[TOC]]
293 |
294 | #### Four
295 |
296 | end"
297 | nextjournal.markdown/parse
298 | :toc
299 | ))
300 | ;; endregion
301 |
302 | ;; region token handlers
303 | (declare apply-tokens)
304 | (defmulti apply-token (fn [_doc token] (:type token)))
305 | (defmethod apply-token :default [doc token]
306 | (prn :apply-token/unknown-type {:token token})
307 | doc)
308 |
309 | ;; blocks
310 | (defmethod apply-token "heading_open" [doc token] (open-node doc :heading {} {:heading-level (hlevel token)}))
311 | (defmethod apply-token "heading_close" [doc {doc-level :level}]
312 | (let [{:as doc ::keys [path]} (close-node doc)
313 | doc' (assign-node-id+emoji doc)
314 | heading (-> doc' (get-in path) (assoc :path path))]
315 | (cond-> doc'
316 | ;; We're only considering top-level headings (e.g. not those contained inside quotes or lists)
317 | (zero? doc-level)
318 | (-> (add-to-toc heading)
319 | (set-title-when-missing heading)))))
320 |
321 | ;; for building the TOC we just care about headings at document top level (not e.g. nested under lists) ⬆
322 |
323 | (defmethod apply-token "paragraph_open" [doc {:as _token :keys [hidden]}] (open-node doc (if hidden :plain :paragraph)))
324 | (defmethod apply-token "paragraph_close" [doc _token] (close-node doc))
325 |
326 | (defmethod apply-token "bullet_list_open" [doc {{:as attrs :keys [has-todos]} :attrs}] (open-node doc (if has-todos :todo-list :bullet-list) attrs))
327 | (defmethod apply-token "bullet_list_close" [doc _token] (close-node doc))
328 |
329 | (defmethod apply-token "ordered_list_open" [doc {:keys [attrs]}] (open-node doc :numbered-list attrs))
330 | (defmethod apply-token "ordered_list_close" [doc _token] (close-node doc))
331 |
332 | (defmethod apply-token "list_item_open" [doc {{:as attrs :keys [todo]} :attrs}] (open-node doc (if todo :todo-item :list-item) attrs))
333 | (defmethod apply-token "list_item_close" [doc _token] (close-node doc))
334 |
335 | (defmethod apply-token "math_block" [doc {text :content}] (push-node doc (block-formula text)))
336 | (defmethod apply-token "math_block_end" [doc _token] doc)
337 |
338 | (defmethod apply-token "hr" [doc _token] (push-node doc {:type :ruler}))
339 |
340 | (defmethod apply-token "blockquote_open" [doc _token] (open-node doc :blockquote))
341 | (defmethod apply-token "blockquote_close" [doc _token] (close-node doc))
342 |
343 | (defmethod apply-token "tocOpen" [doc _token] (open-node doc :toc))
344 | (defmethod apply-token "tocBody" [doc _token] doc) ;; ignore body
345 | (defmethod apply-token "tocClose" [doc _token] (-> doc close-node (update-current dissoc :content)))
346 |
347 | (defmethod apply-token "code_block" [doc {:as _token c :content}]
348 | (-> doc
349 | (open-node :code)
350 | (push-node (text-node c))
351 | close-node))
352 | (defmethod apply-token "fence" [doc {:as _token i :info c :content}]
353 | (-> doc
354 | (open-node :code {} (assoc (parse-fence-info i) :info i))
355 | (push-node (text-node c))
356 | close-node))
357 |
358 | ;; footnotes
359 | (defmethod apply-token "footnote_ref" [{:as doc :keys [footnotes]} token]
360 | (push-node doc (footnote-ref (+ (count footnotes) (get-in* token [:meta :id]))
361 | (get-in* token [:meta :label]))))
362 |
363 | (defmethod apply-token "footnote_anchor" [doc token] doc)
364 |
365 | (defmethod apply-token "footnote_open" [{:as doc ::keys [footnote-offset]} token]
366 | ;; consider an offset in case we're parsing multiple inputs into the same context
367 | (let [ref (+ (get-in* token [:meta :id]) footnote-offset)
368 | label (get-in* token [:meta :label])]
369 | (open-node doc :footnote nil (cond-> {:ref ref} label (assoc :label label)))))
370 |
371 | (defmethod apply-token "footnote_close" [doc token] (close-node doc))
372 |
373 | (defmethod apply-token "footnote_block_open" [{:as doc :keys [footnotes] ::keys [path]} _token]
374 | ;; store footnotes at a top level `:footnote` key
375 | (let [footnote-offset (count footnotes)]
376 | (-> doc
377 | (assoc ::path [:footnotes (dec footnote-offset)]
378 | ::footnote-offset footnote-offset
379 | ::path-to-restore path))))
380 |
381 | (defmethod apply-token "footnote_block_close"
382 | ;; restores path for addding new tokens
383 | [{:as doc ::keys [path-to-restore]} _token]
384 | (-> doc
385 | (assoc ::path path-to-restore)
386 | (dissoc ::path-to-restore ::footnote-offset)))
387 |
388 | (defn footnote->sidenote [{:keys [ref label content]}]
389 | ;; this assumes the footnote container is a paragraph, won't work for lists
390 | (node :sidenote (-> content first :content) nil (cond-> {:ref ref} label (assoc :label label))))
391 |
392 | (defn node-with-sidenote-refs [p-node]
393 | (loop [l (->zip p-node) refs []]
394 | (if (z/end? l)
395 | (when (seq refs)
396 | {:node (z/root l) :refs refs})
397 | (let [{:keys [type ref]} (z/node l)]
398 | (if (= :footnote-ref type)
399 | (recur (z/next (z/edit l assoc :type :sidenote-ref)) (conj refs ref))
400 | (recur (z/next l) refs))))))
401 |
402 | (defn insert-sidenote-containers
403 | "Handles footnotes as sidenotes.
404 |
405 | Takes and returns a parsed document. When the document has footnotes, wraps every top-level block which contains footnote references
406 | with a `:footnote-container` node, into each of such nodes, adds a `:sidenote-column` node containing a `:sidenote` node for each found ref.
407 | Renames type `:footnote-ref` to `:sidenote-ref."
408 | [{:as doc ::keys [path] :keys [footnotes]}]
409 | (if-not (seq footnotes)
410 | doc
411 | (let [root (->zip doc)]
412 | (loop [loc (z/down root) parent root]
413 | (cond
414 | (nil? loc)
415 | (-> parent z/node (assoc :sidenotes? true))
416 | (contains? #{:plain :paragraph :blockquote :numbered-list :bullet-list :todo-list :heading :table}
417 | (:type (z/node loc)))
418 | (if-some [{:keys [node refs]} (node-with-sidenote-refs (z/node loc))]
419 | (let [new-loc (-> loc (z/replace {:type :sidenote-container :content []})
420 | (z/append-child node)
421 | (z/append-child {:type :sidenote-column
422 | ;; TODO: broken in the old implementation
423 | ;; should be :content (mapv #(footnote->sidenote (get footnotes %)) (distinct refs))}))]
424 | :content (mapv #(footnote->sidenote (get footnotes %)) refs)}))]
425 | (recur (z/right new-loc) (z/up new-loc)))
426 | (recur (z/right loc) parent))
427 | :else
428 | (recur (z/right loc) parent))))))
429 |
430 | (comment
431 | (-> "_hello_ what and foo[^note1] and^[some other note].
432 |
433 | And what.
434 |
435 | [^note1]: the _what_
436 |
437 | * and new text[^endnote] at the end.
438 | * the
439 | * hell^[that warm place]
440 |
441 | [^endnote]: conclusion.
442 | "
443 | nextjournal.markdown/tokenize
444 | parse
445 | #_ flatten-tokens
446 | insert-sidenote-containers)
447 |
448 | (-> empty-doc
449 | (update :text-tokenizers (partial map normalize-tokenizer))
450 | (apply-tokens (nextjournal.markdown/tokenize "what^[the heck]"))
451 | insert-sidenote-columns
452 | (apply-tokens (nextjournal.markdown/tokenize "# Hello"))
453 | insert-sidenote-columns
454 | (apply-tokens (nextjournal.markdown/tokenize "is^[this thing]"))
455 | insert-sidenote-columns))
456 |
457 | ;; tables
458 | ;; table data tokens might have {:style "text-align:right|left"} attrs, maybe better nested node > :attrs > :style ?
459 | (defmethod apply-token "table_open" [doc _token] (open-node doc :table))
460 | (defmethod apply-token "table_close" [doc _token] (close-node doc))
461 | (defmethod apply-token "thead_open" [doc _token] (open-node doc :table-head))
462 | (defmethod apply-token "thead_close" [doc _token] (close-node doc))
463 | (defmethod apply-token "tr_open" [doc _token] (open-node doc :table-row))
464 | (defmethod apply-token "tr_close" [doc _token] (close-node doc))
465 | (defmethod apply-token "th_open" [doc token] (open-node doc :table-header (:attrs token)))
466 | (defmethod apply-token "th_close" [doc _token] (close-node doc))
467 | (defmethod apply-token "tbody_open" [doc _token] (open-node doc :table-body))
468 | (defmethod apply-token "tbody_close" [doc _token] (close-node doc))
469 | (defmethod apply-token "td_open" [doc token] (open-node doc :table-data (:attrs token)))
470 | (defmethod apply-token "td_close" [doc _token] (close-node doc))
471 |
472 | (comment
473 | (->
474 | "
475 | | Syntax | JVM | JavaScript |
476 | |--------|:------------------------:|--------------------------------:|
477 | | foo | Loca _lDate_ ahoiii | goog.date.Date |
478 | | bar | java.time.LocalTime | some [kinky](link/to/something) |
479 | | bag | java.time.LocalDateTime | $\\phi$ |
480 | "
481 | nextjournal.markdown/parse
482 | nextjournal.markdown.transform/->hiccup
483 | ))
484 |
485 | ;; ## Handling of Text Tokens
486 | ;;
487 | ;; normalize-tokenizer :: {:regex, :doc-handler} | {:tokenizer-fn, :handler} -> Tokenizer
488 | ;; Tokenizer :: {:tokenizer-fn :: TokenizerFn, :doc-handler :: DocHandler}
489 | ;;
490 | ;; Match :: Any
491 | ;; Handler :: Match -> Node
492 | ;; IndexedMatch :: (Match, Int, Int)
493 | ;; TokenizerFn :: String -> [IndexedMatch]
494 | ;; DocHandler :: Doc -> {:match :: Match} -> Doc
495 |
496 | (def hashtag-tokenizer
497 | {:regex #"(^|\B)#[\w-]+"
498 | :pred #(every? (complement #{:link}) (map :type (current-ancestor-nodes %)))
499 | :handler (fn [match] {:type :hashtag :text (subs (match 0) 1)})})
500 |
501 | (def internal-link-tokenizer
502 | {:regex #"\[\[([^\]]+)\]\]"
503 | :pred #(every? (complement #{:link}) (map :type (current-ancestor-nodes %)))
504 | :handler (fn [match] {:type :internal-link :text (match 1)})})
505 |
506 | (comment
507 | (->> "# Hello #Fishes
508 |
509 | > what about #this
510 |
511 | _this #should be a tag_, but this [_actually #foo shouldnt_](/bar/) is not."
512 | nextjournal.markdown/tokenize
513 | (parse (update empty-doc :text-tokenizers conj hashtag-tokenizer))))
514 |
515 |
516 | (defn normalize-tokenizer
517 | "Normalizes a map of regex and handler into a Tokenizer"
518 | [{:as tokenizer :keys [doc-handler pred handler regex tokenizer-fn]}]
519 | (assert (and (or doc-handler handler) (or regex tokenizer-fn)))
520 | (cond-> tokenizer
521 | (not doc-handler) (assoc :doc-handler (fn [doc {:keys [match]}] (push-node doc (handler match))))
522 | (not tokenizer-fn) (assoc :tokenizer-fn (partial re-idx-seq regex))
523 | (not pred) (assoc :pred (constantly true))))
524 |
525 | (defn tokenize-text-node [{:as tkz :keys [tokenizer-fn pred doc-handler]} doc {:as node :keys [text]}]
526 | ;; TokenizerFn -> HNode -> [HNode]
527 | (assert (and (fn? tokenizer-fn) (fn? doc-handler) (fn? pred) (string? text))
528 | {:text text :tokenizer tkz})
529 | (let [idx-seq (when (pred doc) (tokenizer-fn text))]
530 | (if (seq idx-seq)
531 | (let [text-hnode (fn [s] (assoc (text-node s) :doc-handler push-node))
532 | {:keys [nodes remaining-text]}
533 | (reduce (fn [{:as acc :keys [remaining-text]} [match start end]]
534 | (-> acc
535 | (update :remaining-text subs 0 start)
536 | (cond->
537 | (< end (count remaining-text))
538 | (update :nodes conj (text-hnode (subs remaining-text end))))
539 | (update :nodes conj {:doc-handler doc-handler
540 | :match match :text text
541 | :start start :end end})))
542 | {:remaining-text text :nodes ()}
543 | (reverse idx-seq))]
544 | (cond-> nodes
545 | (seq remaining-text)
546 | (conj (text-hnode remaining-text))))
547 | [node])))
548 |
549 | (defmethod apply-token "text" [{:as doc :keys [text-tokenizers]} {:keys [content]}]
550 | (reduce (fn [doc {:as node :keys [doc-handler]}] (doc-handler doc (dissoc node :doc-handler)))
551 | doc
552 | (reduce (fn [nodes tokenizer]
553 | (mapcat (fn [{:as node :keys [type]}]
554 | (if (= :text type) (tokenize-text-node tokenizer doc node) [node]))
555 | nodes))
556 | [{:type :text :text content :doc-handler push-node}]
557 | text-tokenizers)))
558 |
559 | (comment
560 | (def mustache (normalize-tokenizer {:regex #"\{\{([^\{]+)\}\}" :handler (fn [m] {:type :eval :text (m 1)})}))
561 | (tokenize-text-node mustache {} {:text "{{what}} the {{hellow}}"})
562 | (apply-token (assoc empty-doc :text-tokenizers [mustache])
563 | {:type "text" :content "foo [[bar]] dang #hashy taggy [[what]] #dangy foo [[great]] and {{eval}} me"})
564 |
565 | (parse (assoc empty-doc
566 | :text-tokenizers
567 | [(normalize-tokenizer {:regex #"\{\{([^\{]+)\}\}"
568 | :doc-handler (fn [{:as doc ::keys [path]} {[_ meta] :match}]
569 | (update-in doc (ppop path) assoc :meta meta))})])
570 | (nextjournal.markdown/tokenize "# Title {{id=heading}}
571 | * one
572 | * two")))
573 |
574 | ;; inlines
575 | (defmethod apply-token "inline" [doc {:as _token ts :children}] (apply-tokens doc ts))
576 | (defmethod apply-token "math_inline" [doc {text :content}] (push-node doc (formula text)))
577 | (defmethod apply-token "math_inline_double" [doc {text :content}] (push-node doc (formula text)))
578 |
579 | ;; https://spec.commonmark.org/0.30/#softbreak
580 | (defmethod apply-token "softbreak" [doc _token] (push-node doc {:type :softbreak}))
581 | ;; https://spec.commonmark.org/0.30/#hard-line-break
582 | (defmethod apply-token "hardbreak" [doc _token] (push-node doc {:type :hardbreak}))
583 |
584 | ;; images
585 | (defmethod apply-token "image" [doc {:keys [attrs children]}] (-> doc (open-node :image attrs) (apply-tokens children) close-node))
586 |
587 | ;; marks
588 | (defmethod apply-token "em_open" [doc _token] (open-node doc :em))
589 | (defmethod apply-token "em_close" [doc _token] (close-node doc))
590 | (defmethod apply-token "strong_open" [doc _token] (open-node doc :strong))
591 | (defmethod apply-token "strong_close" [doc _token] (close-node doc))
592 | (defmethod apply-token "s_open" [doc _token] (open-node doc :strikethrough))
593 | (defmethod apply-token "s_close" [doc _token] (close-node doc))
594 | (defmethod apply-token "link_open" [doc token] (open-node doc :link (:attrs token)))
595 | (defmethod apply-token "link_close" [doc _token] (close-node doc))
596 | (defmethod apply-token "code_inline" [doc {text :content}] (-> doc (open-node :monospace) (push-node (text-node text)) close-node))
597 |
598 | ;; html (ignored)
599 | (defmethod apply-token "html_inline" [doc _] doc)
600 | (defmethod apply-token "html_block" [doc _] doc)
601 | ;; endregion
602 |
603 | ;; region data builder api
604 | (defn pairs->kmap [pairs] (into {} (map (juxt (comp keyword first) second)) pairs))
605 | (defn apply-tokens [doc tokens]
606 | (let [mapify-attrs-xf (map (fn [x] (update* x :attrs pairs->kmap)))]
607 | (reduce (mapify-attrs-xf apply-token) doc tokens)))
608 |
609 | (def empty-doc {:type :doc
610 | :content []
611 | ;; Id -> Nat, to disambiguate ids for nodes with the same textual content
612 | ::id->index {}
613 | ;; Node -> {id : String, emoji String}, dissoc from context to opt-out of ids
614 | :text->id+emoji-fn (comp text->id+emoji md.transform/->text)
615 | :toc {:type :toc}
616 | :footnotes []
617 | ::path [:content -1] ;; private
618 | :text-tokenizers []})
619 |
620 | (defn parse
621 | "Takes a doc and a collection of markdown-it tokens, applies tokens to doc. Uses an emtpy doc in arity 1."
622 | ([tokens] (parse empty-doc tokens))
623 | ([doc tokens] (-> doc
624 | (update :text-tokenizers (partial map normalize-tokenizer))
625 | (apply-tokens tokens)
626 | (dissoc ::path
627 | ::id->index
628 | :text-tokenizers
629 | :text->id+emoji-fn))))
630 |
631 | (comment
632 |
633 | (-> "# 🎱 Markdown Data
634 |
635 | some _emphatic_ **strong** [link](https://foo.com)
636 |
637 | ---
638 |
639 | > some ~~nice~~ quote
640 | > for fun
641 |
642 | ## Formulas
643 |
644 | [[TOC]]
645 |
646 | $$\\Pi^2$$
647 |
648 | - [ ] and
649 | - [x] some $\\Phi_{\\alpha}$ latext
650 | - [ ] bullets
651 |
652 | ## Sidenotes
653 |
654 | here [^mynote] to somewhere
655 |
656 | ## Fences
657 |
658 | ```py id=\"aaa-bbb-ccc\"
659 | 1
660 | print(\"this is some python\")
661 | 2
662 | 3
663 | ```
664 |
665 | 
666 |
667 | Hline Section
668 | -------------
669 |
670 | ### but also [[indented code]]
671 |
672 | import os
673 | os.listdir('/')
674 |
675 | or monospace mark [`real`](/foo/bar) fun.
676 |
677 | [^mynote]: Here you _can_ `explain` at lenght
678 | "
679 | nextjournal.markdown/tokenize
680 | parse
681 | ;;seq
682 | ;;(->> (take 10))
683 | ;;(->> (take-last 4))
684 | ))
685 | ;; endregion
686 |
687 | ;; region zoom-in at section
688 | (defn section-at [{:as doc :keys [content]} [_ pos :as path]]
689 | ;; TODO: generalize over path (zoom-in at)
690 | ;; supports only top-level headings atm (as found in TOC)
691 | (let [{:as h section-level :heading-level} (get-in doc path)
692 | in-section? (fn [{l :heading-level}] (or (not l) (< section-level l)))]
693 | (when section-level
694 | {:type :doc
695 | :content (cons h
696 | (->> content
697 | (drop (inc pos))
698 | (take-while in-section?)))})))
699 |
700 | (comment
701 | (some-> "# Title
702 |
703 | ## Section 1
704 |
705 | foo
706 |
707 | - # What is this? (no!)
708 | - maybe
709 |
710 | ### Section 1.2
711 |
712 | ## Section 2
713 |
714 | some par
715 |
716 | ### Section 2.1
717 |
718 | some other par
719 |
720 | ### Section 2.2
721 |
722 | #### Section 2.2.1
723 |
724 | two two one
725 |
726 | #### Section 2.2.2
727 |
728 | two two two
729 |
730 | ## Section 3
731 |
732 | some final par"
733 | nextjournal.markdown/parse
734 | (section-at [:content 9]) ;; ⬅ paths are stored in TOC sections
735 | nextjournal.markdown.transform/->hiccup))
736 | ;; endregion
737 |
738 |
739 | ;; ## 🔧 Debug
740 | ;; A view on flattened tokens to better inspect tokens
741 | (defn flatten-tokens [tokens]
742 | (into []
743 | (comp
744 | (mapcat (partial tree-seq (comp seq :children) :children))
745 | (map #(select-keys % [:type :content :hidden :level :info :meta])))
746 | tokens))
747 |
--------------------------------------------------------------------------------
/dev/nextjournal/markdown/render.cljs:
--------------------------------------------------------------------------------
1 | (ns nextjournal.markdown.render
2 | (:require
3 | ["katex" :as katex]
4 | ["@codemirror/language" :refer [defaultHighlightStyle syntaxHighlighting LanguageSupport]]
5 | ["@codemirror/state" :refer [EditorState]]
6 | ["@codemirror/view" :refer [EditorView keymap]]
7 | ["@codemirror/lang-markdown" :as MD :refer [markdown markdownLanguage]]
8 | ["react" :as react]
9 | [nextjournal.markdown :as md]
10 | [nextjournal.clerk.viewer :as v]
11 | [nextjournal.clerk.render.hooks :as hooks]
12 | [nextjournal.markdown.transform :as md.transform]
13 | [nextjournal.clojure-mode :as clojure-mode]
14 | [nextjournal.clerk.render.code :as code]
15 | [clojure.string :as str]
16 | [nextjournal.clerk.render :as render]
17 | [reagent.core :as r]))
18 |
19 | (def theme #js {"&.cm-editor.cm-focused" #js {:outline "none"}
20 | ".cm-activeLine" #js {:background-color "rgb(226 232 240)"}
21 | ".cm-line" #js {:padding "0"
22 | :line-height "1.6"
23 | :font-size "15px"
24 | :font-family "\"Fira Mono\", monospace"}})
25 |
26 | ;; syntax (an LRParser) + support (a set of extensions)
27 | (def clojure-lang (LanguageSupport. (clojure-mode/syntax)
28 | (.. clojure-mode/default-extensions (slice 1))))
29 | (defn on-change-ext [f]
30 | (.. EditorState -transactionExtender
31 | (of (fn [^js tr]
32 | (when (.-docChanged tr) (f (.. tr -state sliceDoc)))
33 | #js {}))))
34 |
35 | (defn eval-string [source]
36 | (when (not-empty (str/trim source))
37 | (try {:result #_:clj-kondo/ignore (load-string source)}
38 | (catch js/Error e
39 | {:error (str (.-message e))}))))
40 |
41 | (defn editor [{:keys [doc lang editable? on-change] :or {editable? true}}]
42 | (let [!editor-el (hooks/use-ref)
43 | extensions (into-array (cond-> [(syntaxHighlighting defaultHighlightStyle)
44 | (.. EditorState -allowMultipleSelections (of editable?))
45 | #_(foldGutter)
46 | (.. EditorView -editable (of editable?))
47 | (.of keymap clojure-mode/complete-keymap)
48 | (.theme EditorView theme)]
49 |
50 | on-change
51 | (conj (on-change-ext on-change))
52 |
53 | (= :clojure lang)
54 | (conj (.-extension clojure-lang))
55 |
56 | (= :markdown lang)
57 | (conj (markdown #js {:base markdownLanguage
58 | :defaultCodeLanguage clojure-lang}))))]
59 | (hooks/use-effect
60 | (fn []
61 | (let [editor-view* (code/make-view (code/make-state doc extensions) @!editor-el)]
62 | #(.destroy editor-view*))) [doc])
63 | [:div {:ref !editor-el}]))
64 |
65 | (defn clojure-editor [{:as opts :keys [doc]}]
66 | (let [!result (hooks/use-state nil)]
67 | (hooks/use-effect (fn [] (reset! !result (eval-string doc))) [doc])
68 | [:div
69 | [:div.p-2.bg-slate-100
70 | [editor (assoc opts :lang :clojure :editable? false)]]
71 | [:div.viewer-result.mt-1.ml-5
72 | (when-some [{:keys [error result]} @!result]
73 | (cond
74 | error [:div.red error]
75 | (react/isValidElement result) result
76 | :else [render/inspect result]))]]))
77 |
78 | (def renderers
79 | (assoc md.transform/default-hiccup-renderers
80 | :code (fn [_ctx node] [clojure-editor {:doc (md.transform/->text node)}])
81 | :todo-item (fn [ctx {:as node :keys [attrs]}]
82 | (md.transform/into-markup [:li [:input {:type "checkbox" :default-checked (:checked attrs)}]] ctx node))
83 | :formula (fn [_ctx node]
84 | [:span {:dangerouslySetInnerHTML {:__html (.renderToString katex (md.transform/->text node))}}])
85 | :block-formula (fn [_ctx node]
86 | [:div {:dangerouslySetInnerHTML {:__html (.renderToString katex (md.transform/->text node) #js {:displayMode true})}}])))
87 |
88 | (defn inspect-expanded [x]
89 | (r/with-let [expanded-at (r/atom {:hover-path [] :prompt-multi-expand? false})]
90 | (render/inspect-presented {:!expanded-at expanded-at}
91 | (v/present x))))
92 |
93 | (defn try-markdown [init-text]
94 | (let [text->state (fn [text]
95 | (let [parsed (md/parse text)]
96 | {:parsed parsed
97 | :hiccup (nextjournal.markdown.transform/->hiccup renderers parsed)}))
98 | !state (hooks/use-state (text->state init-text))]
99 | [:div.grid.grid-cols-2.m-10
100 | [:div.m-2.p-2.text-xl.border-2.overflow-y-scroll.bg-slate-100 {:style {:height "20rem"}}
101 | [editor {:doc init-text :on-change #(reset! !state (text->state %)) :lang :markdown}]]
102 | [:div.m-2.p-2.font-medium.overflow-y-scroll {:style {:height "20rem"}}
103 | [inspect-expanded (:parsed @!state)]]
104 | [:div.m-2.p-2.overflow-x-scroll
105 | [inspect-expanded (:hiccup @!state)]]
106 | [:div.m-2.p-2.bg-slate-50.viewer-markdown
107 | [v/html (:hiccup @!state)]]]))
108 |
--------------------------------------------------------------------------------
/notebooks/demo.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nextjournal/markdown/5829ec101331b1702841094f4dc897ee46f0ddcd/notebooks/demo.docx
--------------------------------------------------------------------------------
/notebooks/images.clj:
--------------------------------------------------------------------------------
1 | ;; # 🖼️ Block Level Images
2 | (ns images
3 | {:nextjournal.clerk/visibility {:code :hide :result :show}}
4 | (:require [nextjournal.clerk :as clerk]
5 | [nextjournal.markdown :as md]
6 | [nextjournal.markdown.transform :as md.transform]))
7 |
8 | ;; Unlike [commonmark](https://spec.commonmark.org/0.30/#example-571),
9 | ;; nextjournal.markdown distinguishes between inline images and _block images_: image syntax which span a whole
10 | ;; line of text produces a direct child of the document and is not wrapped in a paragraph note. Take the following text
11 |
12 | ^{::clerk/viewer {:var-from-def? true
13 | :transform-fn #(clerk/html [:pre @(::clerk/var-from-def (:nextjournal/value %))])}}
14 | (def text-with-images
15 | "This example shows how we're parsing images, the following is a _block image_
16 |
17 | 
18 |
19 | while this is an inline  image.
20 | ")
21 |
22 | ;; This is parsed as
23 |
24 | (clerk/code
25 | (dissoc (md/parse text-with-images)
26 | :toc :footnotes))
27 |
28 | ;; This allows for a different rendering of images, for instance we might want to render block images with a caption:
29 |
30 | ^{::clerk/visibility {:code :show} :nextjournal.clerk/viewer 'nextjournal.clerk.viewer/html-viewer}
31 | (md.transform/->hiccup
32 | (assoc md.transform/default-hiccup-renderers
33 | :image (fn [{:as _ctx ::md.transform/keys [parent]} {:as node :keys [attrs]}]
34 | (if (= :doc (:type parent))
35 | [:figure.image
36 | [:img (assoc attrs :alt (md.transform/->text node))]
37 | [:figcaption.text-center.mt-1 (md.transform/->text node)]]
38 | [:img.inline (assoc attrs :alt (md.transform/->text node))])))
39 | (md/parse text-with-images))
40 |
--------------------------------------------------------------------------------
/notebooks/pandoc.clj:
--------------------------------------------------------------------------------
1 | ;; # 🏳️🌈 Pandoc
2 | (ns pandoc
3 | {:nextjournal.clerk/toc :collapsed
4 | :nextjournal.clerk/no-cache true}
5 | (:require [clojure.data.json :as json]
6 | [clojure.java.io :as io]
7 | [clojure.java.shell :as shell]
8 | [clojure.string :as str]
9 | [nextjournal.clerk :as clerk]
10 | [nextjournal.clerk.viewer :as v]
11 | [nextjournal.markdown :as md]
12 | [nextjournal.markdown.utils :as u]
13 | [nextjournal.markdown.transform :as md.transform]))
14 |
15 | ;; From the [docs](https://pandoc.org/MANUAL.html#description):
16 | ;;
17 | ;; > Pandoc has a modular design: it consists of a set of readers, which parse text in a given format and produce a native representation of the document (an abstract syntax tree or AST), and a set of writers, which convert this native representation into a target format. Thus, adding an input or output format requires only adding a reader or writer. Users can also run custom pandoc filters to modify the intermediate AST.
18 | ;;
19 | ;; By transforming our markdown data format to and from [Pandoc](https://pandoc.org)'s internal
20 | ;; [AST](https://hackage.haskell.org/package/pandoc-types-1.22.2/docs/Text-Pandoc-Definition.html), we can achieve conversions
21 | ;; from and to potentially all of their supported formats. In both directions we're using Pandoc [JSON representation](https://pandoc.org/filters.html)
22 | ;; as intermediate format.
23 | ;;
24 | ;; ## 📤 Export
25 | ;;
26 | ;; this is a list of supported output formats as of Pandoc v2.18 (API version 1.22.2):
27 | ^{::clerk/visibility {:code :hide}}
28 | (clerk/html
29 | [:div.overflow-y-auto.shadow-lg {:style {:height "200px" :width "85%"}}
30 | (into [:ul]
31 | (map (partial vector :li))
32 | (str/split-lines (:out (shell/sh "pandoc" "--list-output-formats"))))])
33 |
34 | ;; Let's define a map of transform functions indexed by (a subset of) our markdown types
35 |
36 | ^{::clerk/visibility {:code :hide :result :hide}}
37 | (declare md->pandoc)
38 | ^{::clerk/visibility {:result :hide}}
39 | (def md-type->transform
40 | {:doc (fn [{:keys [content]}]
41 | {:blocks (into [] (map md->pandoc) content)
42 | :pandoc-api-version [1 22]
43 | :meta {}})
44 |
45 | :heading (fn [{:keys [content heading-level]}] {:t "Header" :c [heading-level ["id" [] []] (map md->pandoc content)]})
46 | :paragraph (fn [{:keys [content]}] {:t "Para" :c (map md->pandoc content)})
47 | :plain (fn [{:keys [content]}] {:t "Plain" :c (map md->pandoc content)})
48 | :code (fn [{:as node :keys [language]}] {:t "CodeBlock" :c [["" [language "code"] []] (md.transform/->text node)]})
49 | :block-formula (fn [{:keys [text]}] {:t "Para" :c [{:t "Math" :c [{:t "DisplayMath"} text]}]})
50 |
51 | :em (fn [{:keys [content]}] {:t "Emph" :c (map md->pandoc content)})
52 | :strong (fn [{:keys [content]}] {:t "Strong" :c (map md->pandoc content)})
53 | :strikethrough (fn [{:keys [content]}] {:t "Strikeout" :c (map md->pandoc content)})
54 | :link (fn [{:keys [attrs content]}] {:t "Link" :c [["" [] []] (map md->pandoc content) [(:href attrs) ""]]})
55 |
56 | :list-item (fn [{:keys [content]}] (map md->pandoc content))
57 | :bullet-list (fn [{:keys [content]}] {:t "BulletList" :c (map md->pandoc content)})
58 |
59 | :text (fn [{:keys [text]}] {:t "Str" :c text})})
60 |
61 | ;; along with a dispatch function
62 | ^{::clerk/visibility {:result :hide}}
63 | (defn md->pandoc
64 | [{:as node :keys [type]}]
65 | (if-some [xf (get md-type->transform type)]
66 | (xf node)
67 | (throw (ex-info (str "Not implemented: '" type "'.") node))))
68 |
69 | ;; and a conversion function.
70 | ^{::clerk/visibility {:result :hide}}
71 | (defn pandoc-> [pandoc-data format]
72 | (let [{:keys [exit out err]} (shell/sh "pandoc" "-f" "json" "-t" format
73 | :in (json/write-str pandoc-data))]
74 | (if (zero? exit) out err)))
75 |
76 | ;; Now take a piece of `markdown-text`
77 | ^{::clerk/visibility {:code :hide}
78 | ::clerk/viewer {:var-from-def? true
79 | :transform-fn #(v/html [:pre @(::clerk/var-from-def (v/->value %))])}}
80 | (def markdown-text "# Hello
81 |
82 | ## Sub _Section_
83 |
84 |
85 | ```python
86 | 1 + 1
87 | ```
88 |
89 | With a block formula:
90 |
91 | $$F(t) = \\int_{t_0}^t \\phi(x)dx$$
92 |
93 | this _is_ a
94 | * ~~boring~~
95 | * **awesome**
96 | * [example](https://some/path)!")
97 |
98 | ;; once we've turned it into Pandoc's JSON format
99 | (def pandoc-data (-> markdown-text md/parse md->pandoc))
100 |
101 | ^{::clerk/visibility {:result :hide}}
102 | (def verbatim (partial clerk/with-viewer {:transform-fn #(v/html [:pre (v/->value %)])}))
103 |
104 | ;; then we can convert it to whatever supported format. Say **Org Mode**
105 | (-> pandoc-data (pandoc-> "org") verbatim)
106 |
107 | ;; or **reStructuredText**
108 | (-> pandoc-data (pandoc-> "rst") verbatim)
109 |
110 | ;; or even to a **Jupyter Notebook**.
111 | (-> pandoc-data (pandoc-> "ipynb") verbatim)
112 |
113 | ;; If you're in that exotic party mode, you can also go for a pdf
114 | (shell/sh "pandoc" "--pdf-engine=tectonic" "-f" "json" "-t" "pdf" "-o" "notebooks/demo.pdf"
115 | :in (json/write-str pandoc-data))
116 |
117 | ;; ## 📥 Import
118 | ;;
119 | ;; Import works same same. This is a list of supported input formats:
120 | ^{::clerk/visibility {:code :hide}}
121 | (clerk/html
122 | [:div.overflow-y-auto.shadow-lg {:style {:height "200px" :width "85%"}}
123 | (into [:ul]
124 | (map (partial vector :li))
125 | (str/split-lines (:out (shell/sh "pandoc" "--list-input-formats"))))])
126 |
127 | ^{::clerk/visibility {:result :hide}}
128 | (declare pandoc->md)
129 | ^{::clerk/visibility {:result :hide}}
130 | (defn node+content [type pd-node] {:type type :content (keep pandoc->md (:c pd-node))})
131 | ^{::clerk/visibility {:result :hide}}
132 | (def pandoc-type->transform
133 | {:Space (constantly {:type :text :text " "})
134 | :Str (fn [node] {:type :text :text (:c node)})
135 | :Para (partial node+content :paragraph)
136 | :Plain (partial node+content :plain)
137 | :Header (fn [node]
138 | (let [[level _meta content] (:c node)]
139 | {:type :heading
140 | :heading-level level
141 | :content (keep pandoc->md content)}))
142 |
143 | :Emph (partial node+content :em)
144 | :Strong (partial node+content :strong)
145 | :Strikeout (partial node+content :strikethrough)
146 | :Underline (partial node+content :em) ;; missing on markdown
147 | :Link (fn [node]
148 | (let [[_meta content [href _]] (:c node)]
149 | {:type :link
150 | :attrs {:href href}
151 | :content (keep pandoc->md content)}))
152 |
153 | :BulletList (fn [node]
154 | {:type :bullet-list
155 | :content (map (fn [li]
156 | {:type :list-item
157 | :content (keep pandoc->md li)}) (:c node))})
158 | :OrderedList (fn [node]
159 | {:type :numbered-list
160 | :content (map (fn [li]
161 | {:type :list-item
162 | :content (keep pandoc->md li)}) (second (:c node)))})
163 |
164 | :Math (fn [node] (let [[_meta latex] (:c node)] (u/block-formula latex)))
165 | :Code (fn [node]
166 | (let [[_meta code] (:c node)]
167 | {:type :monospace :content [(u/text-node code)]}))
168 | :CodeBlock (fn [node]
169 | (let [[[_id classes _meta] code] (:c node)]
170 | {:type :code
171 | :content [(u/text-node code)]}))
172 | :SoftBreak (constantly {:type :softbreak})
173 | :RawBlock (constantly nil)
174 | :RawInline (fn [{:keys [c]}]
175 | (cond
176 | (and (vector? c) (= "latex" (first c)))
177 | (u/formula (second c))))})
178 |
179 | ^{::clerk/visibility {:result :hide}}
180 | (defn pandoc->md [{:as node :keys [t pandoc-api-version blocks]}]
181 | (if pandoc-api-version
182 | {:type :doc :content (keep pandoc->md blocks)}
183 | (if-some [xf (when t (get pandoc-type->transform (keyword t)))]
184 | (xf node)
185 | (throw (ex-info (str "Not Implemented '" t "'.") node)))))
186 |
187 | ^{::clerk/visibility {:result :hide}}
188 | (defn pandoc<- [input format]
189 | (-> (shell/sh "pandoc" "-f" format "-t" "json" :in input)
190 | :out (json/read-str :key-fn keyword)))
191 |
192 | ;; Let us test the machinery above against a **Microsoft Word** file, turning it into markdown and natively rendering it with Clerk
193 |
194 | (v/html
195 | [:div.shadow-xl.p-8
196 | (-> (io/file "notebooks/demo.docx")
197 | (pandoc<- "docx")
198 | pandoc->md
199 | v/md)])
200 |
201 | ;; or ingest some **Org Mode**.
202 | (v/html
203 | [:div.overflow-y-auto.shadow-xl {:style {:height "400px"}}
204 | [:div.p-8
205 | (-> (io/input-stream "https://raw.githubusercontent.com/erikriverson/org-mode-R-tutorial/master/org-mode-R-tutorial.org")
206 | (pandoc<- "org")
207 | pandoc->md
208 | (update :content #(take 24 %))
209 | v/md)]])
210 |
211 | ;; We also might want to test that our functions are invertible:
212 | (v/html
213 | [:div
214 | [:div.shadow-xl.p-8
215 | (-> markdown-text
216 | md/parse
217 | md->pandoc
218 | #_#_ ;; we're not property testing Pandoc!
219 | (pandoc-> "org")
220 | (pandoc<- "org")
221 | pandoc->md
222 | v/md)]])
223 |
224 | ;; this brief experiment shows how Pandoc AST makes for an interesting format for Clerk to potentially
225 | ;; interact with formats other than markdown and clojure.
226 |
227 | ^{::clerk/visibility {:result :hide :code :hide}}
228 | (comment
229 | (json/read-str
230 | (:out
231 | (shell/sh "pandoc" "-f" "markdown" "-t" "json" :in markdown-text))
232 | :key-fn keyword))
233 |
--------------------------------------------------------------------------------
/notebooks/parsing_extensibility.clj:
--------------------------------------------------------------------------------
1 | ;; # 🏗 Extending Markdown Parsing
2 | (ns parsing-extensibility
3 | {:nextjournal.clerk/toc :collapsed
4 | :nextjournal.clerk/no-cache true}
5 | (:require [nextjournal.clerk :as clerk]
6 | [nextjournal.markdown :as md]
7 | [nextjournal.markdown.utils :as u]
8 | [edamame.core :as edamame]
9 | [clojure.zip :as z]))
10 |
11 | ^{:nextjournal.clerk/visibility {:code :hide :result :hide}}
12 | (def show-text
13 | {:var-from-def? true
14 | :transform-fn (fn [{{::clerk/keys [var-from-def]} :nextjournal/value}] (clerk/html [:pre @var-from-def]))})
15 |
16 | ;; With recent additions to our `nextjournal.markdown.parser` we're allowing for a customizable parsing layer on top of the tokenization provided by `markdown-it` ([n.markdown/tokenize](https://github.com/nextjournal/markdown/blob/ae2a2f0b6d7bdc6231f5d088ee559178b55c97f4/src/nextjournal/markdown.clj#L50-L52)).
17 | ;; We're acting on the text (leaf) tokens, splitting each of those into a collection of [nodes](https://github.com/nextjournal/markdown/blob/ff68536eb15814fe81db7a6d6f11f049895a4282/src/nextjournal/markdown/parser.cljc#L5). We'll explain how that works by means of three examples.
18 | ;;
19 | ;; ## Regex-based tokenization
20 | ;;
21 | ;; A `Tokenizer` is a map with keys `:doc-handler` and `:tokenizer-fn`. For convenience, the function `u/normalize-tokenizer` will fill in the missing keys
22 | ;; starting from a map with a `:regex` and a `:handler`:
23 |
24 | (def internal-link-tokenizer
25 | (u/normalize-tokenizer
26 | {:regex #"\[\[([^\]]+)\]\]"
27 | :handler (fn [match] {:type :internal-link
28 | :text (match 1)})}))
29 |
30 | ((:tokenizer-fn internal-link-tokenizer) "some [[set]] of [[wiki]] link")
31 |
32 | (u/tokenize-text-node internal-link-tokenizer {} {:text "some [[set]] of [[wiki]] link"})
33 |
34 | ;; In order to opt-in of the extra tokenization above, we need to configure the document context as follows:
35 | (md/parse* (update u/empty-doc :text-tokenizers conj internal-link-tokenizer)
36 | "some [[set]] of [[wiki]] link")
37 |
38 | ;; We provide an `internal-link-tokenizer` as well as a `hashtag-tokenizer` as part of the `nextjournal.markdown.parser` namespace. By default, these are not used during parsing and need to be opted-in for like explained above.
39 |
40 | ;; ## Read-based tokenization
41 | ;;
42 | ;; Somewhat inspired by the Racket text processor [Pollen](https://docs.racket-lang.org/pollen/pollen-command-syntax.html) we'd like to parse a `text` like this
43 |
44 | ^{::clerk/visibility {:code :hide} ::clerk/viewer show-text}
45 | (def text "At some point in text a losange
46 | will signal ◊(foo \"one\" [[vector]]) we'll want to write
47 | code and ◊not text. Moreover it has not to conflict with
48 | existing [[links]] or #tags")
49 | ;; and _read_ any valid Clojure code comining after the lozenge character (`◊`) which we'll also call a
50 | ;; _losange_ as in French it does sound much better 🇫🇷!
51 | ;;
52 | ;; How to proceed? We might take a hint from `re-seq`.
53 | ^{::clerk/visibility {:code :hide}}
54 | (clerk/html
55 | [:div.viewer-code
56 | (clerk/code
57 | (with-out-str
58 | (clojure.repl/source re-seq)))])
59 |
60 | ;; Now, when a form is read with [Edamame](https://github.com/borkdude/edamame#edamame), it preserves its location metadata. This allows
61 | ;; us to produce an `IndexedMatch` from matching text
62 | (defn match->data+indexes [m text]
63 | (let [start (.start m) end (.end m)
64 | form (edamame/parse-string (subs text end))]
65 | [form start (+ end (dec (:end-col (meta form))))]))
66 | ;; and our modified `re-seq` becomes
67 | (defn losange-tokenizer-fn [text]
68 | (let [m (re-matcher #"◊" text)]
69 | ((fn step []
70 | (when (.find m)
71 | (cons (match->data+indexes m text)
72 | (lazy-seq (step))))))))
73 |
74 | (losange-tokenizer-fn text)
75 | (losange-tokenizer-fn "non matching text")
76 |
77 | (def losange-tokenizer
78 | (u/normalize-tokenizer
79 | {:tokenizer-fn losange-tokenizer-fn
80 | :handler (fn [clj-data] {:type :losange
81 | :data clj-data})}))
82 |
83 | (u/tokenize-text-node losange-tokenizer {} {:text text})
84 |
85 | ;; putting it all together
86 | (md/parse* (update u/empty-doc :text-tokenizers conj losange-tokenizer)
87 | text)
88 |
89 | ;; ## Parsing with Document Handlers
90 | ;;
91 | ;; Using tokenizers with document handlers we can let parsed tokens act upon the whole document tree. Consider
92 | ;; the following textual example (**TODO** _rewrite parsing with a zipper state_):
93 | ^{::clerk/viewer show-text}
94 | (def text-with-meta
95 | "# Example ◊(add-meta {:attrs {:id \"some-id\"} :class \"semantc\"})
96 | In this example we're using the losange tokenizer to modify the
97 | document AST in conjunction with the following functions:
98 | * `add-meta`: looks up the parent node, merges a map in it
99 | and adds a flag to its text.
100 | * `strong`: makes the text ◊(strong much more impactful) indeeed.
101 | ")
102 |
103 | (defn add-meta [doc-loc meta]
104 | (-> doc-loc (z/edit merge meta)
105 | z/down (z/edit update :text str "🚩️")
106 | z/up))
107 |
108 | (defn strong [doc & terms]
109 | (-> doc
110 | (z/append-child {:type :strong}) z/down z/rightmost ;; open-node
111 | (z/insert-child (u/text-node (apply str (interpose " " terms))))
112 | z/up)) ;; close-node
113 |
114 | (def data
115 | (md/parse* (-> u/empty-doc
116 | (update :text-tokenizers conj
117 | (assoc losange-tokenizer
118 | :doc-handler (fn [doc {:keys [match]}]
119 | (apply (eval (first match)) doc (rest match))))))
120 | text-with-meta))
121 |
122 | (clerk/md data)
123 |
124 | ^{::clerk/visibility {:code :hide :result :hide}}
125 | (comment
126 | ;; Tokenizer :: {:tokenizer-fn :: TokenizerFn,
127 | ;; :doc-handler :: DocHandler}
128 | ;; normalize-tokenizer :: {:regex, :doc-handler} |
129 | ;; {:tokenizer-fn, :handler} |
130 | ;; {:regex, :handler} -> Tokenizer
131 | ;;
132 | ;; Match :: Any
133 | ;; Handler :: Match -> Node
134 | ;; IndexedMatch :: (Match, Integer, Integer)
135 | ;; TokenizerFn :: String -> [IndexedMatch]
136 | ;; DocHandler :: Doc -> {:match :: Match} -> Doc
137 |
138 | ;; DocOpts :: {:text-tokenizers [Tokenizer]}
139 | ;; parse : DocOpts -> [Token] -> Doc
140 | ;;
141 | )
142 |
--------------------------------------------------------------------------------
/notebooks/reference.md:
--------------------------------------------------------------------------------
1 | # Referenz
2 |
3 | ## Absätze
4 |
5 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc.
6 |
7 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc.
8 |
9 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc.
10 |
11 | ## Formatierung
12 |
13 | * `**fett**` wird zu **fett**
14 | * `_kursiv_` wird zu _kursiv_
15 | * `~~durchgestrichen~~` wird zu ~~durchgestrichen~~
16 | * `[Linktext](https://nextjournal.com/)` wird zu [Linktext](https://nextjournal.com/)
17 | * Internal links: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. [[wikistyle-link]]. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc.
18 |
19 | ## Überschriften
20 |
21 | Überschriften beginnen mit `#`. Mehrere aufeinanderfolgende `#`
22 | definieren das Level der Überschrift.
23 |
24 | # Überschrift 1
25 | ## Überschrift 2
26 | ### Überschrift 3
27 | #### Überschrift 4
28 |
29 | ## Listen
30 |
31 | ### Aufzählungen
32 |
33 | Normale Aufzählungen beginnen mit einem `*` und können verschachtelt sein.
34 |
35 | * Kühlschrank
36 | * Butter
37 | * Eier
38 | * Milch
39 | * Vorratsschrank
40 | * Brot
41 | * Backpapier
42 | * Alufolie
43 |
44 | wird zu
45 |
46 | * Kühlschrank
47 | * Butter
48 | * Eier
49 | * Milch
50 | * Vorratsschrank
51 | * Brot
52 | * Backpapier
53 | * Alufolie
54 |
55 | ### Nummerierte Aufzählungen
56 |
57 | Nummerierte Aufzählungen beginnen mit `1.` und können ebenfalls verschachtelt sein.
58 | Verschachtelte Aufzählungen beginnen wieder mit `1.` (anstelle z.B. `1.1.`) und nehmen automatisch die
59 | übergeordneten Indizes mit.
60 |
61 | 1. Grünpflanzen
62 | 1. Charophyten
63 | 2. Chlorophyten
64 | 2. Landpflanzen
65 | 1. Lebermoose
66 | 2. Laubmoose
67 | 3. Hornmoose
68 |
69 | wird zu
70 |
71 | 1. Grünpflanzen
72 | 1. Charophyten
73 | 2. Chlorophyten
74 | 2. Landpflanzen
75 | 1. Lebermoose
76 | 2. Laubmoose
77 | 3. Hornmoose
78 |
79 | ### Todo Listen
80 |
81 | Todo Listen beginnen mit `* [ ]` oder `* [x]` wobei das `x` markiert ob
82 | das Todo erledigt ist. Todo Listen können ebenfalls verschachtelt sein.
83 |
84 | * [ ] Lebensmittel
85 | * [x] Butter
86 | * [ ] Eier
87 | * [ ] Milch
88 | * [x] Werkstatt
89 | * [x] Schrauben Torx M6
90 | * [x] Torx Bitsatz
91 |
92 | wird zu
93 |
94 | * [ ] Lebensmittel
95 | * [x] Butter
96 | * [ ] Eier
97 | * [ ] Milch
98 | * [x] Werkstatt
99 | * [x] Schrauben Torx M6
100 | * [x] Torx Bitsatz
101 |
102 | ## Tabellen
103 |
104 | Doppelpunkte können verwendet werden um den Text in Spalten links,
105 | rechts oder zentriert auszurichten.
106 |
107 |
108 | | Spalte 1 | Spalte 2 | Spalte 3 |
109 | | ------------ |:-------------------:| --------:|
110 | | Spalte 1 ist | links ausgerichtet | 1600 € |
111 | | Spalte 2 ist | zentriert | 12 € |
112 | | Spalte 3 ist | rechts ausgerichtet | 1 € |
113 |
114 | wird zu
115 |
116 | | Spalte 1 | Spalte 2 | Spalte 3 |
117 | | ------------ |:-------------------:| --------:|
118 | | Spalte 1 ist | links ausgerichtet | 1600 € |
119 | | Spalte 2 ist | zentriert | 12 € |
120 | | Spalte 3 ist | rechts ausgerichtet | 1 € |
121 |
122 | ## Bilder
123 |
124 | 
125 |
126 | wird zu
127 |
128 | 
129 |
130 | ## Zitate
131 |
132 | > “The purpose of computation is insight, not numbers.”
133 | >
134 | > ― Richard Hamming
135 |
136 | wird zu
137 |
138 | > “The purpose of computation is insight, not numbers.”
139 | >
140 | > ― Richard Hamming
141 |
142 | ## Trennlinien
143 |
144 | Verschiedene Sektionen können durch Trennlinien verdeutlicht werden.
145 | `---` produziert eine Linie über die volle Breite des Dokuments.
146 |
147 | #### Sektion 1
148 |
149 | Hier ist ein Absatz zur Sektion 1.
150 |
151 | ---
152 |
153 | #### Sektion 2
154 |
155 | Hier ist ein Absatz zur Sektion 2.
156 |
157 | wird zu
158 |
159 | #### Sektion 1
160 |
161 | Hier ist ein Absatz zur Sektion 1.
162 |
163 | ---
164 |
165 | #### Sektion 2
166 |
167 | Hier ist ein Absatz zur Sektion 2.
168 |
169 | ## Inhaltsverzeichnis
170 |
171 | Ein Inhaltsverzeichnis über alle Überschriften kann an jeder beliebigen
172 | Stelle mit `[[toc]]` eingefügt werden.
173 |
174 | [[toc]]
175 |
176 | wird zu
177 |
178 | [[toc]]
179 |
180 |
181 | # [Randomly generated](https://jaspervdj.be/lorem-markdownum/)
182 |
183 | Bracchia seque ossa minus petisse serius
184 | ========================================
185 |
186 | Ver maiores letoque via et obstipuere eburnea
187 | ---------------------------------------------
188 |
189 | Lorem markdownum pede inmensos de est, aut nisi narremur rudente fratri, Aegides
190 | parente, et in. Vulneribus tecta, non et Cipus iamdudum volvere, dives quod
191 | dixit, Titaniacis sociosque.
192 |
193 | - Viros ergo licet licebit coercet
194 | - Nec ait dic ait hasta edita similisque
195 | - Undas postquam
196 | - Vires piasque
197 | - Medias an quem nisi pugnacem haec flammis
198 | - Cum crimine in haud tertia ortus dicar
199 |
200 | Ero mihi velare per
201 | -------------------
202 |
203 | Mensuraque voluptas, venabula restabat de pedicis pectusque Lavinia promissa
204 | patris detrahit maris una iamque, At adunca reddita. Fluctibus digitis et Pelia.
205 | Cur cur, remorata prohibebant tellus anus nihil cum? Cornaque [ora est
206 | cetera](http://www.palladiasest.net/ad) quam amore, simul, et abire in corpore
207 | matris dubitabat frustra Peleus ex nimbos.
208 |
209 | 1. Adspicerent nostro solum et dedit est esse
210 | 2. Parosque et
211 | 3. Sed nomen columbas
212 | 4. Adventare spumas
213 | 5. Hoc penetratque Rhamnusia nodosaque me olim
214 | 6. Sit cum ab per inexorabile densis
215 |
216 | Troezenius utque
217 | ----------------
218 |
219 | Temptat vixque pectore spectacula patulosque ales requirenti ferum laudare
220 | oculorum volucris. Moderamine oculos nec **referebam** vestes nescit pedis,
221 | obsisto et petunt **filius** celebrare accedere et udaeque gestu. Scythiam
222 | capitis. Carpitur infernas moderamine ne alte fregit heu fuisses tamen, neque
223 | foro alium latius secundi; tecumque rapta. Rettuleram satus.
224 |
225 | thumbnailIpadVolume.resources_wins_wheel += data_wan_wizard;
226 | if (memory_acl_threading(port_path, wizard_import_log, pixel + 4)) {
227 | ppiFlatbedFirmware += dmaCompressionOdbc(qwerty_rate_word, system + 3,
228 | propertyTrackballRaster);
229 | }
230 | character_xhtml_protector.pda_syntax = jumperTrackballDenial(
231 | pretestWiredFont, megahertz_programming_imap, 79);
232 |
233 | Ipse ubi in Pirenidas inane, video rore qui fratres unum induitur mittere.
234 | Crinem repetitum mare prius Nilo dum victa **superorum colorem duobus**, ore.
235 | Cum et _corniger_ raptam corpus. In saltu ullo hoc ille _viisque_. Hominum ore
236 | Priamum Pindumve, in verbisque arvum est o **currere**.
237 |
238 | ## Potuit traxit ob sacro me mandere utrumque
239 |
240 | Lorem markdownum habetque pater non scrobibus Turno! Ubi [Dianae], per est
241 | radiis ad construit, annum quas edo purpureo flentes grege tot tanto di
242 | intibaque corpore. *Populis Tamasenum quod* rabiemque et si natus illa decorem
243 | amanti semper tui lacrimans pete: suo per osculaque? **Male populi** sic, sed et
244 | addidit flumine illis sit verba. Ferinae bimembres male.
245 |
246 | Molitur nec tellus, tabula et equos natantia nimios tangere retemptat victore,
247 | mi femina, cumque et. Undas nive **manus**, anguis stimuletur sibi umida putaret
248 | fatorum miratur dolorque Icare. Praemia vidit opposuitque sumpto.
249 |
250 | ## Fulgura optato narratibus sed
251 |
252 | Byblis iuravimus geminis titubantis rumpo recondidit Thybris umbram torruit
253 | praedamque fictus, est. Violabere lapides [audacem] hunc causa remittit erat
254 | quoque volat frondescere.
255 |
256 | ## Possunt Amphione
257 |
258 | Ego maculoso tela nec filia aut Philomela Iliacas. Et magna montis, anguem
259 | corpus extulerat, [nescio fallaces] amem quae ferali pudetque. In nata, magis
260 | moderamine cornix prohibentque ramis magis loco exosus: cum. Domum tecti
261 | agnoscis labaret **occidit rupit**, saxa credo fuerat pavido sorores oblitis;
262 | aegra semel, nostrum, idem. Nec multifori custos, iuro feralia, regemque alumno.
263 |
264 | Neque facta, ignes, erit Non Alemone risus perterrita et illi, in [cepit semper
265 | et] acui sub. Celanda mortalia strictumque quos, munera gener, ancipitesque
266 | victus, imo omnes. Tereu signo omnem, tristi, utile genetrixque hos litibus
267 | litora. E Rhoeti medeatur Lapitharum me dolorem!
268 |
269 | ## Belli iaculatricemque tumulo contigerant passibus
270 |
271 | Aera corpus natus palude. Dumque inque et parat tolerare utile, Cypriae concolor
272 | tempora, quam.
273 |
274 | Solidissima pater antris: eiectum squamigeris veterem. Vatis inde nec senis, est
275 | fuerunt damnosasque labefactus pectore unco, tuos Ammon ubi usu; *bello*.
276 |
277 | ## Utque #crudus profunda [[maritae]] tumulo {{contigerant}} passibus
278 |
279 | Rector perdis aequore mille vel crimenque senior ore velamina orbem ipsa
280 | hostiliter? In unam Lycaeo de ortus viderat inquinat ire coniunx qualia, puer.
281 | Ipse atque qui secabant vestras, Caeneus superbus et cauda siccat. Nullamque
282 | corpus est evicit, *non* vento movi animumque, fundamina.
283 |
284 | Fatebitur quae praesagia opifex, tua repulsam utrimque spiritus austrum, sic et
285 | viribus pinus, Calydonius. Et sordidus pro iugulo laudis corpore. Trucis mutatus
286 | **certatimque simulamina** inpulsum lapides, nostrumque opibus aratri ◊(java.time.Instant/now).
287 |
288 | [Dianae]: http://timeas.net/
289 | [audacem]: http://www.novavela.com/dis.aspx
290 | [cepit semper et]: http://quodimas.net/tactusputantem
291 | [nescio fallaces]: http://aequiformidine.net/famulae-miserere.aspx
292 |
--------------------------------------------------------------------------------
/notebooks/tight_lists.clj:
--------------------------------------------------------------------------------
1 | ;; # Tight Lists
2 | (ns tight-lists
3 | {:nextjournal.clerk/no-cache true}
4 | (:require [clojure.data.json :as json]
5 | [clojure.java.shell :as shell]
6 | [nextjournal.clerk :as clerk]
7 | [nextjournal.clerk.viewer :as v]
8 | [nextjournal.markdown :as md]
9 | [hiccup2.core :as h]
10 | [nextjournal.markdown.transform :as md.transform]))
11 |
12 | ;; Markdown (commonmark) distingushes between [loose and tight lists](https://spec.commonmark.org/0.30/#loose)
13 | ;;
14 | ;; > A list is loose if any of its constituent list items are separated by blank lines, or
15 | ;; > if any of its constituent list items directly contain two block-level elements with a blank line between them.
16 | ;; > Otherwise a list is tight. (The difference in HTML output is that paragraphs in a loose list are wrapped in `` tags,
17 | ;; > while paragraphs in a tight list are not.)
18 | ;;
19 | ;; ## Pandoc to the Rescue
20 | ;;
21 | ;; To comply with this behaviour [Pandoc uses a `Plain` container type](https://github.com/jgm/pandoc-types/blob/694c383dd674dad97557eb9b97adda17079ebb2c/src/Text/Pandoc/Definition.hs#L275-L278), and I think we should follow their advice
22 |
23 | ^{::clerk/visibility {:result :hide}}
24 | (defn ->pandoc-ast [text]
25 | (clerk/html [:pre
26 | (with-out-str
27 | (clojure.pprint/pprint
28 | (json/read-str
29 | (:out
30 | (shell/sh "pandoc" "-f" "markdown" "-t" "json" :in text))
31 | :key-fn keyword)))]))
32 |
33 | ;; tight
34 | (->pandoc-ast "
35 | - one
36 | - two
37 | ")
38 |
39 | ;; vs loose lists
40 | (->pandoc-ast "
41 | - one
42 |
43 | inner par
44 | - two
45 | ")
46 |
47 | (->pandoc-ast "
48 | - one
49 |
50 | - two
51 | ")
52 |
53 | (->pandoc-ast "
54 | - one
55 | * thignt sub one
56 | - two
57 | ")
58 |
59 | ^{::clerk/visibility {:result :hide}}
60 | (defn example [md-string]
61 | (v/html
62 | [:div.flex-col
63 | [:pre.code md-string]
64 | [:pre.code (with-out-str
65 | (clojure.pprint/pprint
66 | (dissoc (md/parse md-string) :toc :title :footnotes)))]
67 | [:pre.code (with-out-str
68 | (clojure.pprint/pprint
69 | (md/->hiccup md-string)))]
70 | (v/html (md/->hiccup md-string))
71 | ;; TODO: fix in clerk
72 | #_
73 | (v/html (str (h/html (md/->hiccup md-string))))]))
74 |
75 | (clerk/present!
76 | (example "
77 | * this
78 | * is
79 | * tight!"))
80 |
81 | (example "
82 | * this
83 | * is
84 | > very loose
85 |
86 | indeed
87 | * fin")
88 |
89 | (example "* one \\
90 | hardbreak
91 | * two")
92 |
93 | (example "
94 | * one
95 | softbreak
96 | * two")
97 |
98 | ;; https://spec.commonmark.org/0.30/#example-314 (loose list)
99 | (example "- a\n- b\n\n- c")
100 | ;; https://spec.commonmark.org/0.30/#example-319 (tight with loose sublist inside)
101 | (example "- a\n - b\n\n c\n- d\n")
102 | ;; https://spec.commonmark.org/0.30/#example-320 (tight with blockquote inside)
103 | (example "* a\n > b\n >\n* c")
104 |
--------------------------------------------------------------------------------
/notebooks/try.clj:
--------------------------------------------------------------------------------
1 | ;; # ✏️ Nextjournal Markdown Live Demo
2 | (ns try
3 | {:nextjournal.clerk/visibility {:code :hide}}
4 | (:require [nextjournal.clerk :as clerk]))
5 | ;; _Edit markdown text, see parsed AST and transformed hiccup live. Preview how Clerk renders it._
6 | ^{::clerk/width :full
7 | ::clerk/visibility {:code :fold}}
8 | (clerk/with-viewer {:render-fn 'nextjournal.markdown.render/try-markdown
9 | :require-cljs true}
10 | "# 👋 Hello Markdown
11 |
12 | ```clojure id=xxyyzzww
13 | (reduce + [1 2 3])
14 | ```
15 | ## Subsection
16 | - [x] type **some**
17 | - [x] ~~nasty~~
18 | - [ ] _stuff_ here")
19 |
20 | #_(clerk/serve! {:port 8989 :browse true})
21 |
--------------------------------------------------------------------------------
/out/sci/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Clerk Viewer
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "dependencies": {
3 | "@codemirror/autocomplete": "^6.0.2",
4 | "@codemirror/commands": "^6.0.0",
5 | "@codemirror/lang-markdown": "6.0.0",
6 | "@codemirror/language": "^6.1.0",
7 | "@codemirror/lint": "^6.0.0",
8 | "@codemirror/search": "^6.0.0",
9 | "@codemirror/state": "^6.0.1",
10 | "@codemirror/view": "^6.0.2",
11 | "@lezer/common": "^1.0.0",
12 | "@lezer/generator": "^1.0.0",
13 | "@lezer/highlight": "^1.0.0",
14 | "@lezer/lr": "^1.0.0",
15 | "@lezer/markdown": "^1.0.0",
16 | "@nextjournal/lang-clojure": "1.0.0",
17 | "@nextjournal/lezer-clojure": "1.0.0",
18 | "d3-require": "^1.2.4",
19 | "emoji-regex": "^10.0.0",
20 | "framer-motion": "^6.2.8",
21 | "katex": "^0.12.0",
22 | "lezer-clojure": "1.0.0-rc.0",
23 | "markdown-it": "^14.1.0",
24 | "markdown-it-block-image": "^0.0.3",
25 | "markdown-it-footnote": "^3.0.3",
26 | "markdown-it-texmath": "^1.0.0",
27 | "markdown-it-toc-done-right": "^4.2.0",
28 | "punycode": "2.1.1",
29 | "react": "^18.2.0",
30 | "react-dom": "^18.2.0",
31 | "use-sync-external-store": "1.2.0",
32 | "vh-sticky-table-header": "1.2.1",
33 | "w3c-keyname": "^2.2.4"
34 | },
35 | "devDependencies": {
36 | "esbuild": "^0.12.28",
37 | "shadow-cljs": "^2.19.3"
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/resources/META-INF/nextjournal/markdown/meta.edn:
--------------------------------------------------------------------------------
1 | {:version "0.6.157"}
--------------------------------------------------------------------------------
/shadow-cljs.edn:
--------------------------------------------------------------------------------
1 | {:source-paths ["src" "test"]
2 | :dev-http {8022 "out/test"}
3 | :nrepl {:cider false}
4 | :builds
5 | {:test
6 | {:target :node-test
7 | :output-dir "out"
8 | :output-to "out/node-tests.js"
9 | :closure-defines {shadow.debug true}
10 | :js-options {:js-provider :shadow
11 | :output-feature-set :es8}}
12 |
13 | :browser-test
14 | {:target :browser-test
15 | :test-dir "out/test"
16 | :closure-defines {shadow.debug true}
17 | :js-options {:output-feature-set :es8}}}}
18 |
--------------------------------------------------------------------------------
/src/deps.cljs:
--------------------------------------------------------------------------------
1 | {:npm-deps
2 | {"katex" "^0.12.0"
3 | "markdown-it" "^14.1.0"
4 | "markdown-it-block-image" "^0.0.3"
5 | "markdown-it-footnote" "^3.0.3"
6 | "markdown-it-texmath" "^1.0.0"
7 | "markdown-it-toc-done-right" "^4.2.0"
8 | "punycode" "2.1.1"}}
9 |
--------------------------------------------------------------------------------
/src/js/markdown.js:
--------------------------------------------------------------------------------
1 | let MarkdownIt = require('markdown-it'),
2 | MD = new MarkdownIt({html: true, linkify: true, breaks: false})
3 |
4 | let texmath = require('markdown-it-texmath')
5 | MD.use(texmath, {delimiters: "dollars"})
6 |
7 | let blockImage = require("markdown-it-block-image")
8 | MD.use(blockImage)
9 |
10 | let mdToc = require("markdown-it-toc-done-right")
11 | MD.use(mdToc)
12 |
13 | let footnotes = require("markdown-it-footnote")
14 | MD.use(footnotes)
15 |
16 | function todoListPlugin(md, opts) {
17 | const startsWithTodoSequence = (text) => {
18 | return text.startsWith("[ ] ") || text.startsWith("[x] ")
19 | }
20 | const isITodoInlineToken = (tokens, i) => {
21 | return tokens[i].type === 'inline' &&
22 | tokens[i-1].type === 'paragraph_open' &&
23 | tokens[i-2].type === 'list_item_open' &&
24 | startsWithTodoSequence(tokens[i].content)
25 | }
26 | const removeMarkup = (token) => {
27 | let textNode = token.children[0]
28 | textNode.content = textNode.content.slice(4)
29 | }
30 | const closestList = (tokens, index) => {
31 | for (let i = index; i >= 0; i--) {
32 | let token = tokens[i]
33 | if (token.type == 'bullet_list_open') { return token }
34 | }
35 | }
36 | const rule = (state) => {
37 | let tokens = state.tokens
38 | for (let i = 2; i < tokens.length; i++) {
39 | if (isITodoInlineToken(tokens, i)) {
40 | // set attrs on the list item
41 | tokens[i-2].attrSet("todo", true)
42 | tokens[i-2].attrSet("checked", tokens[i].content.startsWith("[x] "))
43 | // removes the [-] sequence from the first inline children
44 | removeMarkup(tokens[i])
45 | // set attrs on closest list container
46 | let container = closestList(tokens, i-3)
47 | if (container) { container.attrSet("has-todos", true) }
48 | }
49 | }
50 | }
51 |
52 | md.core.ruler.after('inline', 'todo-list-rule', rule)
53 | }
54 |
55 | MD.use(todoListPlugin)
56 |
57 | function tokenize(text) { return MD.parse(text, {}) }
58 | function tokenizeJSON(text) { return JSON.stringify(MD.parse(text, {})) }
59 |
60 | module.exports = {tokenize, tokenizeJSON}
61 |
--------------------------------------------------------------------------------
/src/nextjournal/markdown.cljc:
--------------------------------------------------------------------------------
1 | (ns nextjournal.markdown
2 | "Markdown as data"
3 | (:require
4 | [nextjournal.markdown.impl :as impl]
5 | [nextjournal.markdown.utils :as u]
6 | [nextjournal.markdown.transform :as markdown.transform]))
7 |
8 | (def empty-doc u/empty-doc)
9 |
10 | (defn parse*
11 | "Turns a markdown string into an AST of nested clojure data.
12 | Allows to parse multiple strings into the same document
13 | e.g. `(-> u/empty-doc (parse* text-1) (parse* text-2))`."
14 | ([markdown-text] (parse* empty-doc markdown-text))
15 | ([ctx markdown-text]
16 | (-> ctx
17 | (update :text-tokenizers (partial map u/normalize-tokenizer))
18 | (impl/parse markdown-text))))
19 |
20 | (defn parse
21 | "Turns a markdown string into an AST of nested clojure data.
22 |
23 | Accept options:
24 | - `:text-tokenizers` to customize parsing of text in leaf nodes (see https://nextjournal.github.io/markdown/notebooks/parsing_extensibility).
25 | "
26 | ([markdown-text] (parse empty-doc markdown-text))
27 | ([ctx markdown-text]
28 | (-> (parse* ctx markdown-text)
29 | (dissoc :text-tokenizers
30 | :text->id+emoji-fn
31 | ::impl/footnote-offset
32 | ::impl/id->index
33 | ::impl/label->footnote-ref
34 | ::impl/path
35 | ::impl/root))))
36 |
37 | (comment
38 | (-> u/empty-doc
39 | (parse* "# title
40 | * one
41 | * two
42 | ")
43 | (parse* "new par")
44 | (parse* "new par")))
45 |
46 | (defn ->hiccup
47 | "Turns a markdown string into hiccup."
48 | ([markdown] (->hiccup markdown.transform/default-hiccup-renderers markdown))
49 | ([ctx markdown]
50 | (let [parsed (if (string? markdown)
51 | (parse markdown)
52 | markdown)]
53 | (markdown.transform/->hiccup ctx parsed))))
54 |
55 | (comment
56 | (parse "# 🎱 Hello")
57 |
58 | (parse "# Hello Markdown
59 | - [ ] what
60 | - [ ] [nice](very/nice/thing)
61 | - [x] ~~thing~~
62 | ")
63 |
64 | (-> (nextjournal.markdown.graaljs/parse "[alt](https://this/is/a.link)") :content first :content first)
65 | (-> (parse "[alt](https://this/is/a.link)") :content first :content first)
66 |
67 | (parse "# Hello Markdown
68 | - [ ] what
69 | - [ ] [nice](very/nice/thing)
70 | - [x] ~~thing~~
71 | ")
72 |
73 | (->> (with-out-str
74 | (time (dotimes [_ 100] (parse (slurp "notebooks/reference.md")))))
75 | (re-find #"\d+.\d+")
76 | parse-double
77 | ((fn [d] (/ d 100))))
78 |
79 | (->hiccup "# Hello Markdown
80 |
81 | * What's _going_ on?
82 | ")
83 |
84 | (->hiccup
85 | (assoc markdown.transform/default-hiccup-renderers
86 | :heading (fn [ctx node]
87 | [:h1.some-extra.class
88 | (markdown.transform/into-markup [:span.some-other-class] ctx node)]))
89 | "# Hello Markdown
90 | * What's _going_ on?
91 | ")
92 |
93 | ;; launch shadow cljs repl
94 | (shadow.cljs.devtools.api/repl :browser-test))
95 |
--------------------------------------------------------------------------------
/src/nextjournal/markdown/impl.clj:
--------------------------------------------------------------------------------
1 | ;; # 🧩 Parsing
2 | (ns nextjournal.markdown.impl
3 | (:require [clojure.zip :as z]
4 | [nextjournal.markdown.impl.extensions :as extensions]
5 | [nextjournal.markdown.impl.types :as t]
6 | [nextjournal.markdown.utils :as u])
7 | (:import (org.commonmark.ext.autolink AutolinkExtension)
8 | (org.commonmark.ext.footnotes FootnotesExtension FootnoteReference FootnoteDefinition InlineFootnote)
9 | (org.commonmark.ext.gfm.strikethrough Strikethrough StrikethroughExtension)
10 | (org.commonmark.ext.gfm.tables TableBlock TableBody TableRow TableHead TableCell TablesExtension TableCell$Alignment)
11 | (org.commonmark.ext.task.list.items TaskListItemsExtension TaskListItemMarker)
12 | (org.commonmark.node Node AbstractVisitor
13 | Document
14 | BlockQuote
15 | BulletList
16 | OrderedList
17 | Code
18 | FencedCodeBlock
19 | IndentedCodeBlock
20 | Heading
21 | Text
22 | Paragraph
23 | Emphasis
24 | StrongEmphasis
25 | ListBlock
26 | ListItem
27 | Link
28 | LinkReferenceDefinition
29 | ThematicBreak
30 | SoftLineBreak
31 | HardLineBreak
32 | HtmlInline
33 | Image
34 | HtmlBlock)
35 | (org.commonmark.parser Parser)))
36 |
37 | (set! *warn-on-reflection* true)
38 | ;; TODO:
39 | ;; - [x] inline formulas
40 | ;; - [x] block formulas
41 | ;; - [x] tight lists
42 | ;; - [x] task lists
43 | ;; - [x] footnotes
44 | ;; - [ ] strikethroughs ext
45 | ;; - [x] tables
46 | ;; - [x] fenced code info
47 | ;; - [ ] html nodes
48 | ;; - [ ] auto link
49 | ;; - [ ] promote single images as blocks
50 | ;; - [ ] [[TOC]] (although not used in Clerk)
51 |
52 |
53 | (comment
54 | (parse "* this is inline $\\phi$ math
55 | * other "))
56 |
57 | (def ^Parser parser
58 | (.. Parser
59 | builder
60 | (extensions [(extensions/create)
61 | (AutolinkExtension/create)
62 | (TaskListItemsExtension/create)
63 | (TablesExtension/create)
64 | (StrikethroughExtension/create)
65 | (.. (FootnotesExtension/builder)
66 | (inlineFootnotes true)
67 | (build))])
68 | build))
69 |
70 | ;; helpers / ctx
71 | (def ^:dynamic *in-tight-list?* false)
72 |
73 | (defn paragraph-type [] (if *in-tight-list?* :plain :paragraph))
74 |
75 | (defn in-tight-list? [node]
76 | (cond
77 | (instance? ListBlock node) (.isTight ^ListBlock node)
78 | (instance? BlockQuote node) false
79 | :else *in-tight-list?*))
80 |
81 | (defmacro with-tight-list [node & body]
82 | `(binding [*in-tight-list?* (in-tight-list? ~node)]
83 | ~@body))
84 |
85 | ;; multi stuff
86 | (defmulti open-node (fn [_ctx node] (type node)))
87 | (defmulti close-node (fn [_ctx node] (type node)))
88 |
89 | (defmethod close-node :default [ctx _node] (u/update-current-loc ctx z/up))
90 |
91 | (defmethod open-node Document [ctx _node] ctx)
92 | (defmethod close-node Document [ctx _node] ctx)
93 |
94 | (defmethod open-node Paragraph [ctx _node]
95 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type (paragraph-type)}))))
96 |
97 | (defmethod open-node BlockQuote [ctx _node]
98 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :blockquote}))))
99 |
100 | (defmethod open-node Heading [ctx ^Heading node]
101 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :heading
102 | :heading-level (.getLevel node)}))))
103 |
104 | (defmethod close-node Heading [ctx ^Heading _node]
105 | (u/handle-close-heading ctx))
106 |
107 | (defmethod open-node HtmlInline [ctx ^HtmlInline node]
108 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :html-inline
109 | :content [{:type :text
110 | :text (.getLiteral node)}]}))))
111 |
112 | (defmethod open-node HtmlBlock [ctx ^HtmlBlock node]
113 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :html-block
114 | :content [{:type :text
115 | :text (.getLiteral node)}]}))))
116 |
117 | (defmethod open-node BulletList [ctx ^ListBlock _node]
118 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :bullet-list :content [] #_#_:tight? (.isTight node)}))))
119 |
120 | (defmethod open-node OrderedList [ctx _node]
121 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :numbered-list :content []}))))
122 |
123 | (defmethod open-node ListItem [ctx _node]
124 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :list-item :content []}))))
125 |
126 | (defmethod open-node Emphasis [ctx _node]
127 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :em :content []}))))
128 |
129 | (defmethod open-node StrongEmphasis [ctx _node]
130 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :strong :content []}))))
131 |
132 | (defmethod open-node Code [ctx ^Code node]
133 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :monospace
134 | :content [{:type :text
135 | :text (.getLiteral node)}]}))))
136 |
137 | (defmethod open-node Strikethrough [ctx _node]
138 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :strikethrough :content []}))))
139 |
140 | (defmethod open-node Link [ctx ^Link node]
141 | (u/update-current-loc ctx (fn [loc]
142 | (u/zopen-node loc {:type :link
143 | :attrs (cond-> {:href (.getDestination node)}
144 | (.getTitle node)
145 | (assoc :title (.getTitle node)))}))))
146 |
147 | (defmethod open-node IndentedCodeBlock [ctx ^IndentedCodeBlock node]
148 | (u/update-current-loc ctx (fn [loc]
149 | (u/zopen-node loc {:type :code
150 | :content [{:type :text
151 | :text (.getLiteral node)}]}))))
152 |
153 | (defmethod open-node FencedCodeBlock [ctx ^FencedCodeBlock node]
154 | (u/update-current-loc ctx (fn [loc]
155 | (u/zopen-node loc (merge {:type :code
156 | :info (.getInfo node)
157 | :content [{:type :text
158 | :text (.getLiteral node)}]}
159 | (u/parse-fence-info (.getInfo node)))))))
160 |
161 | (defmethod open-node Image [ctx ^Image node]
162 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :image
163 | :attrs {:src (.getDestination node) :title (.getTitle node)}}))))
164 |
165 | (defmethod open-node TableBlock [ctx ^TableBlock _node]
166 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :table}))))
167 | (defmethod open-node TableHead [ctx ^TableHead _node]
168 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :table-head}))))
169 | (defmethod open-node TableBody [ctx ^TableBody _node]
170 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :table-body}))))
171 | (defmethod open-node TableRow [ctx ^TableRow _node]
172 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :table-row}))))
173 |
174 | (defn alignment->keyword [enum]
175 | (condp = enum
176 | TableCell$Alignment/LEFT :left
177 | TableCell$Alignment/CENTER :center
178 | TableCell$Alignment/RIGHT :right))
179 |
180 | (defmethod open-node TableCell [ctx ^TableCell node]
181 | (u/update-current-loc ctx (fn [loc]
182 | (let [alignment (some-> (.getAlignment node) alignment->keyword)]
183 | (u/zopen-node loc (cond-> {:type (if (.isHeader node) :table-header :table-data)
184 | :content []}
185 | alignment
186 | (assoc :alignment alignment
187 | ;; TODO: drop/deprecate this, compute in transform
188 | :attrs {:style (str "text-align:" (name alignment))})))))))
189 |
190 | (defmethod open-node FootnoteDefinition [ctx ^FootnoteDefinition node]
191 | (-> ctx
192 | (assoc ::root :footnotes)
193 | (u/update-current-loc (fn [loc]
194 | (-> loc
195 | (z/append-child {:type :footnote
196 | :label (.getLabel node)
197 | :content []}) z/down z/rightmost)))))
198 |
199 | (defmethod close-node FootnoteDefinition [ctx ^FootnoteDefinition _node]
200 | (-> ctx (u/update-current-loc z/up) (assoc ::root :doc)))
201 |
202 | (defmethod open-node InlineFootnote [{:as ctx ::keys [label->footnote-ref]} ^InlineFootnote _node]
203 | (let [label (str "inline-note-" (count label->footnote-ref))
204 | footnote-ref {:type :footnote-ref
205 | :inline? true
206 | :ref (count label->footnote-ref)
207 | :label label}]
208 | (-> ctx
209 | (u/update-current-loc z/append-child footnote-ref)
210 | (update ::label->footnote-ref assoc label footnote-ref)
211 | (assoc ::root :footnotes)
212 | (u/update-current-loc (fn [loc]
213 | (-> loc
214 | (u/zopen-node {:type :footnote :inline? true :label label :content []})
215 | (u/zopen-node {:type :paragraph :content []})))))))
216 |
217 | (defmethod close-node InlineFootnote [ctx ^FootnoteDefinition _node]
218 | (-> ctx (u/update-current-loc (comp z/up z/up)) (assoc ::root :doc)))
219 |
220 | (defn handle-todo-list [loc ^TaskListItemMarker node]
221 | (-> loc
222 | (z/edit assoc :type :todo-item :attrs {:checked (.isChecked node)})
223 | z/up (z/edit assoc :type :todo-list)
224 | z/down z/rightmost))
225 |
226 | (def ^:private visitChildren-meth
227 | ;; cached reflection only happens once
228 | (delay (let [meth (.getDeclaredMethod AbstractVisitor "visitChildren" (into-array [Node]))]
229 | (.setAccessible meth true)
230 | meth)))
231 |
232 | (defn node->data [{:as ctx-in :keys [footnotes]} ^Node node]
233 | (assert (:type ctx-in) ":type must be set on initial doc")
234 | (assert (:content ctx-in) ":content must be set on initial doc")
235 | (assert (::root ctx-in) "context needs a ::root")
236 | ;; TODO: unify pre/post parse across impls
237 | (let [!ctx (atom (assoc ctx-in
238 | :doc (u/->zip ctx-in)
239 | :footnotes (u/->zip {:type :footnotes :content (or footnotes [])})))]
240 | (.accept node
241 | (proxy [AbstractVisitor] []
242 | ;; proxy can't overload method by arg type, while gen-class can: https://groups.google.com/g/clojure/c/TVRsy4Gnf70
243 | (visit [^Node node]
244 | (condp instance? node
245 | ;; leaf nodes
246 | LinkReferenceDefinition :ignore
247 | ;;Text (swap! !ctx u/update-current z/append-child {:type :text :text (.getLiteral ^Text node)})
248 | Text (swap! !ctx u/handle-text-token (.getLiteral ^Text node))
249 | ThematicBreak (swap! !ctx u/update-current-loc z/append-child {:type :ruler})
250 | SoftLineBreak (swap! !ctx u/update-current-loc z/append-child {:type :softbreak})
251 | HardLineBreak (swap! !ctx u/update-current-loc z/append-child {:type :hardbreak})
252 | TaskListItemMarker (swap! !ctx u/update-current-loc handle-todo-list node)
253 | nextjournal.markdown.impl.types.CustomNode
254 | (case (t/nodeType node)
255 | :block-formula (swap! !ctx u/update-current-loc z/append-child {:type :block-formula :text (t/getLiteral node)})
256 | :inline-formula (swap! !ctx u/update-current-loc z/append-child {:type :formula :text (t/getLiteral node)})
257 | :toc (swap! !ctx u/update-current-loc z/append-child {:type :toc}))
258 | FootnoteReference (swap! !ctx (fn [{:as ctx ::keys [label->footnote-ref]}]
259 | (let [label (.getLabel ^FootnoteReference node)
260 | footnote-ref (or (get label->footnote-ref label)
261 | {:type :footnote-ref
262 | :ref (count label->footnote-ref)
263 | :label label})]
264 | (-> ctx
265 | (u/update-current-loc z/append-child footnote-ref)
266 | (update ::label->footnote-ref assoc label footnote-ref)))))
267 |
268 | ;; else branch nodes
269 | (if (get-method open-node (class node))
270 | (with-tight-list node
271 | (swap! !ctx open-node node)
272 | (.invoke ^java.lang.reflect.Method @visitChildren-meth this (into-array Object [node]))
273 | (swap! !ctx close-node node))
274 | (prn ::not-implemented node))))))
275 |
276 | (let [{:as ctx-out :keys [doc title toc footnotes] ::keys [label->footnote-ref]} (deref !ctx)]
277 | (-> ctx-out
278 | (dissoc :doc)
279 | (cond->
280 | (and title (not (:title ctx-in)))
281 | (assoc :title title))
282 | (assoc :toc toc
283 | :content (:content (z/root doc))
284 | ::label->footnote-ref label->footnote-ref
285 | :footnotes
286 | ;; there will never be references without definitions, but the contrary may happen
287 | (->> footnotes z/root :content
288 | (keep (fn [{:as footnote :keys [label]}]
289 | (when (contains? label->footnote-ref label)
290 | (assoc footnote :ref (:ref (label->footnote-ref label))))))
291 | (sort-by :ref)
292 | (vec)))))))
293 |
294 | (defn parse
295 | ([md] (parse u/empty-doc md))
296 | ([ctx md] (node->data (update ctx :text-tokenizers (partial map u/normalize-tokenizer))
297 | (.parse parser md))))
298 |
299 | (comment
300 | (import '[org.commonmark.renderer.html HtmlRenderer])
301 | (remove-all-methods open-node)
302 | (remove-all-methods close-node)
303 |
304 | (.render (.build (HtmlRenderer/builder))
305 | (.parse parser "some text^[and a note]"))
306 |
307 | (parse "some text^[and a note]")
308 |
309 | (-> {}
310 | (parse "# Title")
311 | (parse "some para^[with note]")
312 | (parse "some para^[with other note]"))
313 |
314 | (parse "some `marks` inline and inline $formula$ with a [link _with_ em](https://what.tfk)")
315 | (parse (assoc u/empty-doc :text-tokenizers [u/internal-link-tokenizer])
316 | "what a [[link]] is this")
317 | (parse "what the real deal is")
318 | (parse "some
319 |
320 | [[TOC]]
321 |
322 | what")
323 |
324 | (parse "# Ahoi
325 |
326 | > par
327 | > broken
328 |
329 | * a tight **strong** list
330 | * with [a nice link](/to/some 'with a title')
331 | * * with nested
332 |
333 | * lose list
334 |
335 | - [x] one inline formula $\\phi$ here
336 | - [ ] two
337 |
338 | ---
339 | ")
340 |
341 | ;; footnotes
342 | (parse "_hello_ what and foo[^note1] and
343 |
344 | And what.
345 |
346 | [^note1]: the _what_
347 |
348 | * and new text[^note2] at the end.
349 | * the hell^[crazy _inline_ note with [a](https://a-link.xx) inside]
350 |
351 | [^note2]: conclusion and $\\phi$
352 |
353 | [^note3]: this should just be ignored
354 | ")
355 |
356 | (parse (slurp "../clerk-px23/README.md"))
357 | ;; => :ref 27
358 |
359 | (parse "Knuth's _Literate Programming_[^literateprogramming][^knuth84] emphasized the importance of focusing on human beings as consumers of computer programs. His original implementation involved authoring files that combine source code and documentation, which were then divided into two derived artifacts: source code for the computer and a typeset document in natural language to explain the program.
360 |
361 | [^knuth84]: [Literate Programming](https://doi.org/10.1093/comjnl/27.2.97)
362 | [^literateprogramming]: An extensive archive of related material is maintained [here](http://www.literateprogramming.com).")
363 |
364 | (-> (parse "this might[^reuse] here[^another] and here[^reuse] here
365 |
366 | [^another]: stuff
367 | [^reuse]: define here
368 |
369 | this should be left as is
370 |
371 | another paragraph reusing[^reuse]
372 | ")
373 | md.parser/insert-sidenote-containers))
374 |
--------------------------------------------------------------------------------
/src/nextjournal/markdown/impl.cljs:
--------------------------------------------------------------------------------
1 | ;; # 🧩 Parsing
2 | (ns nextjournal.markdown.impl
3 | (:require ["/js/markdown" :as md]
4 | [clojure.zip :as z]
5 | [nextjournal.markdown.utils :as u]))
6 |
7 | (defn hlevel [^js token]
8 | (let [hn (.-tag token)]
9 | (when (string? hn) (some-> (re-matches #"h([\d])" hn) second js/parseInt))))
10 |
11 | ;; leaf nodes
12 | ;; TODO: use from utils
13 | (defn text-node [text] {:type :text :text text})
14 | (defn formula [text] {:type :formula :text text})
15 | (defn block-formula [text] {:type :block-formula :text text})
16 |
17 | ;; node constructors
18 | (defn node
19 | [type content attrs top-level]
20 | (cond-> {:type type :content content}
21 | (seq attrs) (assoc :attrs attrs)
22 | (seq top-level) (merge top-level)))
23 |
24 | (defn empty-text-node? [{text :text t :type}] (and (= :text t) (empty? text)))
25 |
26 | (defn push-node [ctx node]
27 | (cond-> ctx
28 | (not (empty-text-node? node))
29 | (u/update-current-loc z/append-child node)))
30 |
31 | (defn open-node
32 | ([ctx type] (open-node ctx type {}))
33 | ([ctx type attrs] (open-node ctx type attrs {}))
34 | ([ctx type attrs top-level]
35 | (u/update-current-loc ctx u/zopen-node (node type [] attrs top-level))))
36 |
37 | (defn close-node [doc] (u/update-current-loc doc z/up))
38 |
39 | (comment
40 |
41 | (-> u/empty-doc
42 | (assoc :doc (u/->zip {:type :doc})) ;; [:content -1]
43 | (open-node :heading) ;; [:content 0 :content -1]
44 | (push-node {:node/type :text :text "foo"}) ;; [:content 0 :content 0]
45 | (push-node {:node/type :text :text "foo"}) ;; [:content 0 :content 1]
46 | close-node ;; [:content 1]
47 |
48 | (open-node :paragraph) ;; [:content 1 :content]
49 | (push-node {:node/type :text :text "hello"})
50 | close-node
51 | (open-node :bullet-list)
52 | ))
53 | ;; endregion
54 |
55 | ;; region token handlers
56 | (declare apply-tokens)
57 | (defmulti apply-token (fn [_doc ^js token] (.-type token)))
58 | (defmethod apply-token :default [doc token]
59 | (prn :apply-token/unknown-type {:token token})
60 | doc)
61 |
62 | ;; blocks
63 | (defmethod apply-token "heading_open" [doc token] (open-node doc :heading {} {:heading-level (hlevel token)}))
64 | (defmethod apply-token "heading_close" [ctx _]
65 | (u/handle-close-heading ctx))
66 |
67 | ;; for building the TOC we just care about headings at document top level (not e.g. nested under lists) ⬆
68 |
69 | (defmethod apply-token "paragraph_open" [doc ^js token]
70 | ;; no trace of tight vs loose on list nodes
71 | ;; markdown-it passes this info directly to paragraphs via this `hidden` key
72 | (open-node doc (if (.-hidden token) :plain :paragraph)))
73 |
74 | (defmethod apply-token "paragraph_close" [doc _token] (close-node doc))
75 |
76 | (defmethod apply-token "bullet_list_open" [doc ^js token]
77 | (let [attrs (.-attrs token)
78 | has-todos (:has-todos attrs)]
79 | (open-node doc (if has-todos :todo-list :bullet-list) attrs)))
80 |
81 | (defmethod apply-token "bullet_list_close" [doc _token] (close-node doc))
82 |
83 | (defmethod apply-token "ordered_list_open" [doc ^js token] (open-node doc :numbered-list (.-attrs token)))
84 | (defmethod apply-token "ordered_list_close" [doc _token] (close-node doc))
85 |
86 | (defmethod apply-token "list_item_open" [doc ^js token]
87 | (let [attrs (.-attrs token)
88 | todo (:todo attrs)]
89 | (open-node doc (if todo :todo-item :list-item) attrs)))
90 | (defmethod apply-token "list_item_close" [doc _token] (close-node doc))
91 |
92 | (defmethod apply-token "math_block" [doc ^js token] (push-node doc (block-formula (.-content token))))
93 | (defmethod apply-token "math_block_end" [doc _token] doc)
94 |
95 | (defmethod apply-token "hr" [doc _token] (push-node doc {:type :ruler}))
96 |
97 | (defmethod apply-token "blockquote_open" [doc _token] (open-node doc :blockquote))
98 | (defmethod apply-token "blockquote_close" [doc _token] (close-node doc))
99 |
100 | (defmethod apply-token "tocOpen" [doc _token] (open-node doc :toc))
101 | (defmethod apply-token "tocBody" [doc _token] doc) ;; ignore body
102 | (defmethod apply-token "tocClose" [ctx _token]
103 | (-> ctx
104 | (u/update-current-loc
105 | (fn [loc]
106 | (-> loc (z/edit dissoc :content) z/up)))))
107 |
108 | (defmethod apply-token "code_block" [doc ^js token]
109 | (let [c (.-content token)]
110 | (-> doc
111 | (open-node :code)
112 | (push-node (text-node c))
113 | close-node)))
114 |
115 | (defmethod apply-token "fence" [doc ^js token]
116 | (let [c (.-content token)
117 | i (.-info token)]
118 | (-> doc
119 | (open-node :code {} (assoc (u/parse-fence-info i) :info i))
120 | (push-node (text-node c))
121 | close-node)))
122 |
123 | (defn footnote-label [{:as _ctx ::keys [footnote-offset]} token]
124 | ;; TODO: consider initial offset in case we're parsing multiple inputs
125 | (or (.. token -meta -label)
126 | ;; inline labels won't have a label
127 | (str "inline-note-" (+ footnote-offset (.. token -meta -id)))))
128 |
129 | ;; footnotes
130 | (defmethod apply-token "footnote_ref" [{:as ctx ::keys [label->footnote-ref]} token]
131 | (let [label (footnote-label ctx token)
132 | footnote-ref (or (get label->footnote-ref label)
133 | {:type :footnote-ref :inline? (not (.. token -meta -label))
134 | :ref (count label->footnote-ref) ;; was (+ (count footnotes) (j/get-in token [:meta :id])) ???
135 | :label label})]
136 | (-> ctx
137 | (u/update-current-loc z/append-child footnote-ref)
138 | (update ::label->footnote-ref assoc label footnote-ref))))
139 |
140 | (defmethod apply-token "footnote_open" [ctx token]
141 | ;; TODO unify in utils
142 | (let [label (footnote-label ctx token)]
143 | (-> ctx
144 | (u/update-current-loc (fn [loc]
145 | (u/zopen-node loc {:type :footnote
146 | :inline? (not (.. token -meta -label))
147 | :label label}))))))
148 |
149 | ;; inline footnotes^[like this one]
150 | (defmethod apply-token "footnote_close" [ctx _token]
151 | (-> ctx (u/update-current-loc z/up)))
152 |
153 | (defmethod apply-token "footnote_block_open" [ctx _token]
154 | ;; store footnotes at a top level `:footnote` key
155 | (assoc ctx ::root :footnotes))
156 |
157 | (defmethod apply-token "footnote_block_close"
158 | ;; restores path for addding new tokens
159 | [ctx _token]
160 | (assoc ctx ::root :doc))
161 |
162 | (defmethod apply-token "footnote_anchor" [doc _token] doc)
163 |
164 | (comment
165 | (-> "some text^[inline note]
166 | "
167 | md/tokenize flatten-tokens
168 | #_ parse
169 | #_ u/insert-sidenote-containers)
170 |
171 | (-> empty-doc
172 | (update :text-tokenizers (partial map u/normalize-tokenizer))
173 | (apply-tokens (nextjournal.markdown/tokenize "what^[the heck]"))
174 | insert-sidenote-columns
175 | (apply-tokens (nextjournal.markdown/tokenize "# Hello"))
176 | insert-sidenote-columns
177 | (apply-tokens (nextjournal.markdown/tokenize "is^[this thing]"))
178 | insert-sidenote-columns))
179 |
180 | ;; tables
181 | ;; table data tokens might have {:style "text-align:right|left"} attrs, maybe better nested node > :attrs > :style ?
182 | (defmethod apply-token "table_open" [doc _token] (open-node doc :table))
183 | (defmethod apply-token "table_close" [doc _token] (close-node doc))
184 | (defmethod apply-token "thead_open" [doc _token] (open-node doc :table-head))
185 | (defmethod apply-token "thead_close" [doc _token] (close-node doc))
186 | (defmethod apply-token "tr_open" [doc _token] (open-node doc :table-row))
187 | (defmethod apply-token "tr_close" [doc _token] (close-node doc))
188 | (defmethod apply-token "th_open" [doc ^js token] (open-node doc :table-header (.-attrs token)))
189 | (defmethod apply-token "th_close" [doc _token] (close-node doc))
190 | (defmethod apply-token "tbody_open" [doc _token] (open-node doc :table-body))
191 | (defmethod apply-token "tbody_close" [doc _token] (close-node doc))
192 | (defmethod apply-token "td_open" [doc ^js token] (open-node doc :table-data (.-attrs token)))
193 | (defmethod apply-token "td_close" [doc _token] (close-node doc))
194 |
195 | (comment
196 | (->
197 | "
198 | | Syntax | JVM | JavaScript |
199 | |--------|:------------------------:|--------------------------------:|
200 | | foo | Loca _lDate_ ahoiii | goog.date.Date |
201 | | bar | java.time.LocalTime | some [kinky](link/to/something) |
202 | | bag | java.time.LocalDateTime | $\\phi$ |
203 | "
204 | nextjournal.markdown/parse
205 | nextjournal.markdown.transform/->hiccup
206 | ))
207 |
208 | (comment
209 | (->> "# Hello #Fishes
210 |
211 | > what about #this
212 |
213 | _this #should be a tag_, but this [_actually #foo shouldnt_](/bar/) is not."
214 | (parse (update empty-doc :text-tokenizers conj (u/normalize-tokenizer u/hashtag-tokenizer)))))
215 |
216 | (defmethod apply-token "text" [ctx ^js token]
217 | (u/handle-text-token ctx (.-content token)))
218 |
219 | (comment
220 | (def mustache (u/normalize-tokenizer {:regex #"\{\{([^\{]+)\}\}" :handler (fn [m] {:type :eval :text (m 1)})}))
221 | (u/tokenize-text-node mustache {} {:text "{{what}} the {{hellow}}"})
222 | (u/handle-text-token (assoc u/empty-doc :text-tokenizers [mustache])
223 | "foo [[bar]] dang #hashy taggy [[what]] #dangy foo [[great]] and {{eval}} me"))
224 |
225 | ;; inlines
226 | (defmethod apply-token "inline" [doc ^js token] (apply-tokens doc (.-children token)))
227 | (defmethod apply-token "math_inline" [doc ^js token] (push-node doc (formula (.-content token))))
228 | (defmethod apply-token "math_inline_double" [doc ^js token] (push-node doc (formula (.-content token))))
229 |
230 | ;; https://spec.commonmark.org/0.30/#softbreak
231 | (defmethod apply-token "softbreak" [doc _token] (push-node doc {:type :softbreak}))
232 | ;; https://spec.commonmark.org/0.30/#hard-line-break
233 | (defmethod apply-token "hardbreak" [doc _token] (push-node doc {:type :hardbreak}))
234 |
235 | ;; images
236 | (defmethod apply-token "image" [doc ^js token]
237 | (let [attrs (.-attrs token)
238 | children (.-children token)]
239 | (-> doc (open-node :image attrs) (apply-tokens children) close-node)))
240 |
241 | ;; marks
242 | (defmethod apply-token "em_open" [doc _token] (open-node doc :em))
243 | (defmethod apply-token "em_close" [doc _token] (close-node doc))
244 | (defmethod apply-token "strong_open" [doc _token] (open-node doc :strong))
245 | (defmethod apply-token "strong_close" [doc _token] (close-node doc))
246 | (defmethod apply-token "s_open" [doc _token] (open-node doc :strikethrough))
247 | (defmethod apply-token "s_close" [doc _token] (close-node doc))
248 | (defmethod apply-token "link_open" [doc ^js token] (open-node doc :link (.-attrs token)))
249 | (defmethod apply-token "link_close" [doc _token] (close-node doc))
250 | (defmethod apply-token "code_inline" [doc ^js token] (-> doc (open-node :monospace) (push-node (text-node (.-content token))) close-node))
251 |
252 | ;; html
253 | (defmethod apply-token "html_inline" [doc token]
254 | (-> doc (u/update-current-loc z/append-child {:type :html-inline :content [(text-node (.-content token))]})))
255 |
256 | (defmethod apply-token "html_block" [doc token]
257 | (-> doc (u/update-current-loc z/append-child {:type :html-block :content [(text-node (.-content token))]})))
258 |
259 | ;; html
260 | (defmethod apply-token "html_inline" [doc token]
261 | (-> doc (u/update-current-loc z/append-child {:type :html-inline :content [(text-node (.-content token))]})))
262 |
263 | (defmethod apply-token "html_block" [doc token]
264 | (-> doc (u/update-current-loc z/append-child {:type :html-block :content [(text-node (.-content token))]})))
265 |
266 | ;; endregion
267 |
268 | ;; region data builder api
269 | (defn pairs->kmap [pairs] (into {} (map (juxt (comp keyword first) second)) pairs))
270 | (defn apply-tokens [doc tokens]
271 | (let [mapify-attrs-xf (map (fn [x]
272 | (set! x -attrs (pairs->kmap (.-attrs x)))
273 | x))]
274 | (reduce (mapify-attrs-xf apply-token) doc tokens)))
275 |
276 | (defn parse
277 | ([markdown] (parse u/empty-doc markdown))
278 | ([ctx-in markdown]
279 | ;; TODO: unify implementations
280 | (let [{:as ctx-out :keys [doc title toc footnotes] ::keys [label->footnote-ref]}
281 | (-> ctx-in
282 | (assoc ::footnote-offset (count (::label->footnote-ref ctx-in)))
283 | (update :text-tokenizers (partial map u/normalize-tokenizer))
284 | (assoc :doc (u/->zip ctx-in)
285 | :footnotes (u/->zip {:type :footnotes
286 | :content (or (:footnotes ctx-in) [])}))
287 | (apply-tokens (md/tokenize markdown)))]
288 | (-> ctx-out
289 | (dissoc :doc)
290 | (cond->
291 | (and title (not (:title ctx-in)))
292 | (assoc :title title))
293 | (assoc :toc toc
294 | :content (:content (z/root doc))
295 | ::label->footnote-ref label->footnote-ref
296 | :footnotes
297 | ;; there will never be references without definitions, but the contrary may happen
298 | (->> footnotes z/root :content
299 | (keep (fn [{:as footnote :keys [label]}]
300 | (when (contains? label->footnote-ref label)
301 | (assoc footnote :ref (:ref (label->footnote-ref label))))))
302 | (sort-by :ref)
303 | (vec)))))))
304 |
305 | (comment
306 | (-> (parse "text^[a]") ::label->footnote-ref)
307 |
308 | (-> (parse "text^[a]")
309 | (parse "text^[b]")))
310 |
311 | (comment
312 | (defn pr-dbg [x] (js/console.log (js/JSON.parse (js/JSON.stringify x))))
313 | (parse "# 🎱 Hello")
314 | )
315 |
316 | (comment
317 | (some-> "# Title
318 |
319 | ## Section 1
320 |
321 | foo
322 |
323 | - # What is this? (no!)
324 | - maybe
325 |
326 | ### Section 1.2
327 |
328 | ## Section 2
329 |
330 | some par
331 |
332 | ### Section 2.1
333 |
334 | some other par
335 |
336 | ### Section 2.2
337 |
338 | #### Section 2.2.1
339 |
340 | two two one
341 |
342 | #### Section 2.2.2
343 |
344 | two two two
345 |
346 | ## Section 3
347 |
348 | some final par"
349 | nextjournal.markdown/parse
350 | (section-at [:content 9]) ;; ⬅ paths are stored in TOC sections
351 | nextjournal.markdown.transform/->hiccup))
352 | ;; endregion
353 |
354 |
355 | ;; ## 🔧 Debug
356 | ;; A view on flattened tokens to better inspect tokens
357 | (defn flatten-tokens [tokens]
358 | (into []
359 | (comp
360 | (mapcat (partial tree-seq (comp seq :children) :children))
361 | (map #(select-keys % [:type :content :hidden :level :info :meta])))
362 | tokens))
363 |
--------------------------------------------------------------------------------
/src/nextjournal/markdown/impl/extensions.clj:
--------------------------------------------------------------------------------
1 | (ns nextjournal.markdown.impl.extensions
2 | (:require [clojure.string :as str]
3 | [nextjournal.markdown.impl.types :as t])
4 | (:import (java.util.regex Matcher Pattern)
5 | (org.commonmark.parser Parser$ParserExtension Parser$Builder SourceLine)
6 | (org.commonmark.parser.beta InlineContentParser InlineContentParserFactory ParsedInline InlineParserState)
7 | (org.commonmark.parser.block AbstractBlockParser BlockContinue BlockParserFactory BlockStart ParserState BlockParser)))
8 |
9 | (set! *warn-on-reflection* true)
10 |
11 | (def block-formula-delimiter-regex (re-pattern "^\\$\\$"))
12 | (def block-toc-delimiter-regex (re-pattern "^\\[\\[TOC\\]\\]"))
13 |
14 | (defn delimiter-matcher ^Matcher [^Pattern regex ^ParserState state]
15 | (let [^SourceLine line (.getLine state)
16 | next-non-space (.getNextNonSpaceIndex state)]
17 | (re-matcher regex (subs (.getContent line) next-non-space))))
18 |
19 | (defn block-formula-delimiter-matcher ^Matcher [^ParserState s] (delimiter-matcher block-formula-delimiter-regex s))
20 | (defn block-toc-delimiter-matcher ^Matcher [^ParserState s] (delimiter-matcher block-toc-delimiter-regex s))
21 |
22 | (defn inline-formula-parser []
23 | (proxy [InlineContentParser] []
24 | (tryParse [^InlineParserState parser-state]
25 | (let [scanner (.scanner parser-state)
26 | ;; move past opening $
27 | _ (.next scanner)
28 | open-pos (.position scanner)]
29 | (if (= -1 (.find scanner \$))
30 | (ParsedInline/none)
31 | (let [^String content (.getContent (.getSource scanner open-pos (.position scanner)))]
32 | (.next scanner)
33 | (ParsedInline/of (t/->InlineFormula content) (.position scanner))))))))
34 |
35 | (defn close-block-formula? [state !lines]
36 | ;; we allow 1-liner blocks like A)
37 | ;; text
38 | ;;
39 | ;; $$\\bigoplus$$
40 | ;;
41 | ;; or blocks delimited by $$ B)
42 | ;;
43 | ;; $$
44 | ;; \\bigoplus
45 | ;; $$
46 | (or #_A (when-some [l (last @!lines)] (str/ends-with? (str/trimr l) "$$"))
47 | #_B (some? (re-find (block-formula-delimiter-matcher state)))))
48 |
49 | (defn block-formula-parser ^BlockParser []
50 | (let [block-formula (t/->BlockFormula)
51 | !lines (atom [])]
52 | (proxy [AbstractBlockParser] []
53 | (isContainer [] false)
54 | (canContain [_other] false)
55 | (getBlock [] block-formula)
56 | (addLine [^SourceLine line]
57 | (when-some [l (not-empty (str/trim (.getContent line)))]
58 | (swap! !lines conj l)))
59 | (closeBlock []
60 | (t/setLiteral block-formula (let [formula-body (str/join \newline @!lines)]
61 | (cond-> formula-body
62 | (str/ends-with? formula-body "$$")
63 | (subs 0 (- (count formula-body) 2))))))
64 | (tryContinue [^ParserState state]
65 | (let [non-space (.getNextNonSpaceIndex state)]
66 | (if (close-block-formula? state !lines)
67 | (BlockContinue/finished)
68 | (BlockContinue/atIndex non-space)))))))
69 |
70 | (def block-formula-parser-factory
71 | (proxy [BlockParserFactory] []
72 | (tryStart [^ParserState state _matchedBlockParser]
73 | (if (<= 4 (.getIndent state))
74 | (BlockStart/none)
75 | (let [next-non-space (.getNextNonSpaceIndex state)
76 | m (block-formula-delimiter-matcher state)]
77 | (if (re-find m)
78 | (.atIndex (BlockStart/of (into-array [(block-formula-parser)]))
79 | (+ next-non-space (.end m)))
80 | (BlockStart/none)))))))
81 |
82 | (defn block-toc-parser ^BlockParser []
83 | (let [toc (t/->ToC)]
84 | (proxy [AbstractBlockParser] []
85 | (getBlock [] toc)
86 | ;; close immediately
87 | (tryContinue [^ParserState _state] (BlockContinue/finished)))))
88 |
89 | (def block-toc-parser-factory
90 | (proxy [BlockParserFactory] []
91 | (tryStart [^ParserState state _matchedBlockParser]
92 | (if (<= 4 (.getIndent state))
93 | (BlockStart/none)
94 | (let [next-non-space (.getNextNonSpaceIndex state)
95 | m (block-toc-delimiter-matcher state)]
96 | (if (re-find m)
97 | (.atIndex (BlockStart/of (into-array [(block-toc-parser)]))
98 | (+ next-non-space (.end m)))
99 | (BlockStart/none)))))))
100 |
101 | (defn create []
102 | (proxy [Object Parser$ParserExtension] []
103 | (extend [^Parser$Builder pb]
104 | (.customBlockParserFactory pb block-toc-parser-factory)
105 | (.customBlockParserFactory pb block-formula-parser-factory)
106 | (.customInlineContentParserFactory pb (reify InlineContentParserFactory
107 | (getTriggerCharacters [_] #{\$})
108 | (create [_] (inline-formula-parser)))))))
109 |
110 | (comment
111 | (class (re-matcher #"" ""))
112 | (nextjournal.markdown.commonmark/parse "
113 | # Title
114 |
115 | This is an $\\mathit{inline}$ formula
116 |
117 | $$
118 | \\begin{equation}
119 | \\dfrac{1}{128\\pi^{2}}
120 | \\end{equation}
121 | $$
122 |
123 | * a $\\int_a^b\\phi(t)dt$ with discount
124 | * and what"))
125 |
--------------------------------------------------------------------------------
/src/nextjournal/markdown/impl/types.clj:
--------------------------------------------------------------------------------
1 | (ns nextjournal.markdown.impl.types
2 | (:import [nextjournal.markdown.impl.types CustomNode]))
3 |
4 | ;; See also
5 | ;; https://github.com/noties/Markwon/blob/master/markwon-ext-latex/src/main/java/io/noties/markwon/ext/latex/JLatexMathBlockParser.java
6 |
7 | (set! *warn-on-reflection* true)
8 |
9 | (defn ->InlineFormula [lit]
10 | (let [state (atom lit)]
11 | (proxy [org.commonmark.node.CustomNode CustomNode] []
12 | (getLiteral [] @state)
13 | (nodeType [] :inline-formula))))
14 |
15 | (defn ->BlockFormula
16 | ([] (->BlockFormula nil))
17 | ([lit]
18 | (let [state (atom lit)]
19 | (proxy [org.commonmark.node.CustomBlock CustomNode] []
20 | (getLiteral [] @state)
21 | (setLiteral [v] (do (reset! state v)
22 | this))
23 | (nodeType [] :block-formula)))))
24 |
25 | (defn ->ToC []
26 | (proxy [org.commonmark.node.CustomBlock CustomNode] []
27 | (nodeType [] :toc)))
28 |
29 | (defn setLiteral [^CustomNode n lit]
30 | (.setLiteral n lit))
31 |
32 | (defn getLiteral [^CustomNode n]
33 | (.getLiteral n))
34 |
35 | (defn nodeType [^CustomNode n]
36 | (.nodeType n))
37 |
38 | (comment
39 | (def i (->InlineFormula "1+1"))
40 | (instance? nextjournal.markdown.impl.types.CustomNode i)
41 | (let [b (->BlockFormula)]
42 | (-> (setLiteral b "dude")
43 | (getLiteral)))
44 | )
45 |
--------------------------------------------------------------------------------
/src/nextjournal/markdown/impl/types/CustomNode.class:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nextjournal/markdown/5829ec101331b1702841094f4dc897ee46f0ddcd/src/nextjournal/markdown/impl/types/CustomNode.class
--------------------------------------------------------------------------------
/src/nextjournal/markdown/impl/types/CustomNode.java:
--------------------------------------------------------------------------------
1 | package nextjournal.markdown.impl.types;
2 |
3 | // rebuild with:
4 | // javac -source 8 -target 1.8 src/nextjournal/markdown/impl/types/CustomNode.java
5 |
6 | public interface CustomNode {
7 |
8 | public Object setLiteral(Object v);
9 | public Object getLiteral();
10 | public Object nodeType();
11 |
12 | }
13 |
--------------------------------------------------------------------------------
/src/nextjournal/markdown/transform.cljc:
--------------------------------------------------------------------------------
1 | (ns nextjournal.markdown.transform
2 | "transform markdown data as returned by `nextjournal.markdown/parse` into other formats, currently:
3 | * hiccup")
4 |
5 | ;; helpers
6 | (defn guard [pred val] (when (pred val) val))
7 | (defn ->text [{:as _node :keys [type text content]}]
8 | (or (when (= :softbreak type) " ")
9 | text
10 | (apply str (map ->text content))))
11 |
12 | (defn hydrate-toc
13 | "Scans doc contents and replaces toc node placeholder with the toc node accumulated during parse."
14 | [{:as doc :keys [toc]}]
15 | (update doc :content (partial into [] (map (fn [{:as node t :type}] (if (= :toc t) toc node))))))
16 |
17 | (defn table-alignment [{:keys [style]}]
18 | (when (string? style)
19 | (let [[_ alignment] (re-matches #"^text-align:(.+)$" style)]
20 | (when alignment {:text-align alignment}))))
21 |
22 | (defn heading-markup [{l :heading-level}] [(keyword (str "h" (or l 1)))])
23 |
24 | ;; into-markup
25 | (declare ->hiccup)
26 | (defn into-markup
27 | "Takes a hiccup vector, a context and a node, puts node's `:content` into markup mapping through `->hiccup`."
28 | [mkup ctx {:as node :keys [text content]}]
29 | (cond ;; formula nodes are leaves: have text and no contents
30 | text (conj mkup text)
31 | content (into mkup
32 | (keep (partial ->hiccup (assoc ctx ::parent node)))
33 | content)))
34 |
35 | (defn toc->hiccup [{:as ctx ::keys [parent]} {:as node :keys [attrs content children]}]
36 | (let [id (:id attrs)
37 | toc-item (cond-> [:div]
38 | (seq content)
39 | (conj [:a {:href (str "#" id) #?@(:cljs [:on-click #(when-some [el (.getElementById js/document id)] (.preventDefault %) (.scrollIntoViewIfNeeded el))])}
40 | (-> node heading-markup (into-markup ctx node))])
41 | (seq children)
42 | (conj (into [:ul] (map (partial ->hiccup (assoc ctx ::parent node))) children)))]
43 | (cond->> toc-item
44 | (= :toc (:type parent))
45 | (conj [:li.toc-item])
46 | (not= :toc (:type parent))
47 | (conj [:div.toc]))))
48 |
49 | (comment
50 | ;; override toc rendering
51 | (-> "# Hello
52 | a paragraph
53 | [[TOC]]
54 | ## Section _nice_ One
55 | ### Section Nested
56 | ## Section **terrible** Idea
57 | "
58 | nextjournal.markdown/parse
59 | ;; :toc
60 | ;; ->hiccup #_
61 | (->> (->hiccup (assoc default-hiccup-renderers
62 | :toc (fn [ctx {:as node :keys [content children heading-level]}]
63 | (cond-> [:div]
64 | (seq content) (conj [:span.title {:data-level heading-level} (:id node)])
65 | (seq children) (conj (into [:ul] (map (partial ->hiccup ctx)) children)))))))))
66 |
67 | (def default-hiccup-renderers
68 | {:doc (partial into-markup [:div])
69 | :heading (fn [ctx {:as node :keys [attrs]}] (-> (heading-markup node) (conj attrs) (into-markup ctx node)))
70 | :paragraph (partial into-markup [:p])
71 | :plain (fn [ctx {:keys [content]}]
72 | (seq (mapv (partial ->hiccup ctx) content)))
73 | :text (fn [_ {:keys [text]}] text)
74 | :hashtag (fn [_ {:keys [text]}] [:a.tag {:href (str "/tags/" text)} (str "#" text)]) ;; TODO: make it configurable
75 | :blockquote (partial into-markup [:blockquote])
76 | :ruler (constantly [:hr])
77 |
78 | ;; by default we always wrap images in paragraph to restore compliance with commonmark
79 | :image (fn [{:as _ctx ::keys [parent]} {:as node :keys [attrs]}]
80 | (let [img-markup [:img (assoc attrs :alt (->text node))]]
81 | (if (= :doc (:type parent))
82 | [:p img-markup]
83 | img-markup)))
84 |
85 | ;; code
86 | :code (fn [_ {:keys [language] :as m}]
87 | [:pre
88 | [(if language
89 | (keyword (str "code.language-" language))
90 | :code)
91 | (-> m :content first :text)]])
92 |
93 | ;; breaks
94 | :softbreak (constantly " ")
95 | :hardbreak (constantly [:br])
96 |
97 | ;; formulas
98 | :formula (partial into-markup [:span.formula])
99 | :block-formula (partial into-markup [:figure.formula])
100 |
101 | ;; lists
102 | :bullet-list (partial into-markup [:ul])
103 | :list-item (partial into-markup [:li])
104 | :todo-list (partial into-markup [:ul.contains-task-list])
105 | :numbered-list (fn [ctx {:as node :keys [attrs]}] (into-markup [:ol attrs] ctx node))
106 |
107 | :todo-item (fn [ctx {:as node :keys [attrs]}]
108 | (into-markup [:li [:input {:type "checkbox" :checked (:checked attrs)}]] ctx node))
109 |
110 | ;; tables
111 | :table (partial into-markup [:table])
112 | :table-head (partial into-markup [:thead])
113 | :table-body (partial into-markup [:tbody])
114 | :table-row (partial into-markup [:tr])
115 | :table-header (fn [ctx {:as node :keys [attrs]}]
116 | (into-markup (let [ta (table-alignment attrs)] (cond-> [:th] ta (conj {:style ta})))
117 | ctx node))
118 | :table-data (fn [ctx {:as node :keys [attrs]}]
119 | (into-markup (let [ta (table-alignment attrs)] (cond-> [:td] ta (conj {:style ta})))
120 | ctx node))
121 |
122 | ;; footnotes & sidenodes
123 | :sidenote-container (partial into-markup [:div.sidenote-container])
124 | :sidenote-column (partial into-markup [:div.sidenote-column])
125 | :sidenote-ref (fn [_ {:keys [ref label]}] [:sup.sidenote-ref {:data-label label} (str (inc ref))])
126 | :sidenote (fn [ctx {:as node :keys [ref]}]
127 | (into-markup [:span.sidenote [:sup {:style {:margin-right "3px"}} (str (inc ref))]] ctx node))
128 |
129 | :footnote-ref (fn [_ {:keys [ref label]}] [:sup.sidenote-ref {:data-label label} (str (inc ref))])
130 | ;; NOTE: there's no default footnote placement (see n.markdown.parser/insert-sidenotes)
131 | :footnote (fn [ctx {:as node :keys [ref label]}]
132 | (into-markup [:div.footnote [:span.footnote-label {:data-ref ref} label]] ctx node))
133 |
134 | ;; TOC
135 | :toc toc->hiccup
136 |
137 | ;; marks
138 | :em (partial into-markup [:em])
139 | :strong (partial into-markup [:strong])
140 | :monospace (partial into-markup [:code])
141 | :strikethrough (partial into-markup [:s])
142 | :link (fn [ctx {:as node :keys [attrs]}] (into-markup [:a {:href (:href attrs)}] ctx node))
143 | :internal-link (fn [_ {:keys [attrs text]}] [:a.internal {:href (:href attrs text)} text])
144 |
145 | ;; default convenience fn to wrap extra markup around the default one from within the overriding function
146 | :default (fn [ctx {:as node t :type}] (when-some [d (get default-hiccup-renderers t)] (d ctx node)))
147 | })
148 |
149 | (defn ->hiccup
150 | ([node] (->hiccup default-hiccup-renderers node))
151 | ([ctx {:as node t :type}]
152 | (let [{:as node :keys [type]} (cond-> node (= :doc t) hydrate-toc)]
153 | (if-some [f (guard fn? (get ctx type))]
154 | (f ctx node)
155 | [:span.message.red
156 | [:strong (str "Unknown type: '" type "'.")]
157 | [:code (pr-str node)]]
158 | ))))
159 |
160 | (comment
161 | (-> "# Hello
162 |
163 | a nice paragraph with sidenotes[^my-note]
164 |
165 | [[TOC]]
166 |
167 | ## Section One
168 | A nice $\\phi$ formula [for _real_ **strong** fun](/path/to) soft
169 | break
170 |
171 | - [ ] one **ahoi** list
172 | - two `nice` and ~~three~~
173 | - [x] checked
174 |
175 | > that said who?
176 |
177 | ---
178 |
179 | ## Section Two
180 |
181 | ### Tables
182 |
183 | | Syntax | JVM | JavaScript |
184 | |--------|-------------------------:|:--------------------------------|
185 | | foo | Loca _lDate_ ahoiii | goog.date.Date |
186 | | bar | java.time.LocalTime | some [kinky](link/to/something) |
187 | | bag | java.time.LocalDateTime | $\\phi$ |
188 |
189 | ### Images
190 |
191 | 
192 |
193 | and here as inline  image
194 |
195 | ```clj
196 | (some nice clojure)
197 | ```
198 |
199 | [^my-note]: Here can discuss at length"
200 | nextjournal.markdown/parse
201 | ->hiccup
202 | )
203 |
204 | ;; override defaults
205 | (->> "## Title
206 | par one
207 |
208 | par two"
209 | nextjournal.markdown/parse
210 | (->hiccup (assoc default-hiccup-renderers
211 | :heading (partial into-markup [:h1.at-all-levels])
212 | ;; wrap something around the default
213 | :paragraph (fn [{:as ctx d :default} node] [:div.p-container (d ctx node)]))))
214 | )
215 |
216 | (comment
217 | (require '[hiccup2.core :as h])
218 |
219 | (-> "
220 | * one
221 | * two"
222 | nextjournal.markdown/parse
223 | ->hiccup
224 | h/html str
225 | )
226 |
227 | (-> "
228 | * one
229 |
230 | * two"
231 | nextjournal.markdown/parse
232 | ->hiccup
233 | h/html str
234 | )
235 |
236 | (-> "# foo
237 | - one \\
238 | broken
239 | - two"
240 | nextjournal.markdown/parse
241 | ->hiccup
242 | h/html str
243 | )
244 |
245 | ;; https://spec.commonmark.org/0.30/#example-319
246 | (= (str ""
247 | "- a
b
c
- d
"
248 | "")
249 | (->> "- a\n - b\n\n c\n- d"
250 | nextjournal.markdown/parse
251 | (->hiccup default-hiccup-renderers)
252 | h/html str)))
253 |
--------------------------------------------------------------------------------
/src/nextjournal/markdown/utils.cljc:
--------------------------------------------------------------------------------
1 | ;; # Markdown parsing shared utils
2 | (ns nextjournal.markdown.utils
3 | (:require [clojure.string :as str]
4 | [clojure.zip :as z]
5 | [nextjournal.markdown.utils.emoji :as emoji]
6 | [nextjournal.markdown.transform :as md.transform]))
7 |
8 | #?(:clj (defn re-groups* [m] (let [g (re-groups m)] (cond-> g (not (vector? g)) vector))))
9 | (defn re-idx-seq
10 | "Takes a regex and a string, returns a seq of triplets comprised of match groups followed by indices delimiting each match."
11 | [re text]
12 | #?(:clj (let [m (re-matcher re text)]
13 | (take-while some? (repeatedly #(when (.find m) [(re-groups* m) (.start m) (.end m)]))))
14 | :cljs (let [rex (js/RegExp. (.-source re) "g")]
15 | (take-while some? (repeatedly #(when-some [m (.exec rex text)] [(vec m) (.-index m) (.-lastIndex rex)]))))))
16 |
17 | #_ (re-idx-seq #"\{\{([^{]+)\}\}" "foo {{hello}} bar")
18 | #_ (re-idx-seq #"\{\{[^{]+\}\}" "foo {{hello}} bar {{what}} the")
19 |
20 | ;; ## Context and Nodes
21 |
22 | (defn split-by-emoji [s]
23 | (let [[match start end] (first (re-idx-seq emoji/regex s))]
24 | (if match
25 | [(subs s start end) (str/trim (subs s end))]
26 | [nil s])))
27 |
28 | #_(split-by-emoji " Stop")
29 | #_(split-by-emoji "🤚🏽 Stop")
30 | #_(split-by-emoji "🤚🏽🤚 Stop")
31 | #_(split-by-emoji "🤚🏽Stop")
32 | #_(split-by-emoji "🤚🏽 Stop")
33 | #_(split-by-emoji "😀 Stop")
34 | #_(split-by-emoji "⚛️ Stop")
35 | #_(split-by-emoji "⚛ Stop")
36 | #_(split-by-emoji "⬇ Stop")
37 | #_(split-by-emoji "Should not 🙁️ Split")
38 | #_(text->id+emoji "Hello There")
39 | #_(text->id+emoji "Hello_There")
40 | #_(text->id+emoji "👩🔬 Quantum Physics")
41 |
42 | (defn text->id+emoji [text]
43 | (when (string? text)
44 | (let [[emoji text'] (split-by-emoji (str/trim text))]
45 | (cond-> {:id (apply str (map (comp str/lower-case (fn [c] (case c (\space \_) \- c))) text'))}
46 | emoji (assoc :emoji emoji)))))
47 |
48 | ;; TODO: move this to n.markdown ns
49 | (def empty-doc
50 | {:type :doc
51 | :content []
52 | :toc {:type :toc}
53 | :footnotes []
54 | :text-tokenizers []
55 | ;; Node -> {id : String, emoji String}, dissoc from context to opt-out of ids
56 | :text->id+emoji-fn (comp text->id+emoji md.transform/->text)
57 |
58 | ;; private
59 | ;; Id -> Nat, to disambiguate ids for nodes with the same textual content
60 | :nextjournal.markdown.impl/id->index {}
61 | ;; allow to swap between :doc or :footnotes
62 | :nextjournal.markdown.impl/root :doc})
63 |
64 | (defn current-loc [{:as ctx :nextjournal.markdown.impl/keys [root]}] (get ctx root))
65 | (defn update-current-loc [{:as ctx :nextjournal.markdown.impl/keys [root]} f & args]
66 | (assert root (str "Missing root: '" (keys ctx) "'"))
67 | (apply update ctx root f args))
68 |
69 | (defn text-node [s] {:type :text :text s})
70 | (defn formula [text] {:type :formula :text text})
71 | (defn block-formula [text] {:type :block-formula :text text})
72 |
73 | (defn node
74 | [type content attrs top-level]
75 | (cond-> {:type type :content content}
76 | (seq attrs) (assoc :attrs attrs)
77 | (seq top-level) (merge top-level)))
78 |
79 | ;; ## 🤐 Zipper Utils
80 |
81 | (defn ->zip [doc]
82 | (z/zipper (every-pred map? :type) :content
83 | (fn [node cs] (assoc node :content (vec cs)))
84 | doc))
85 | (def zip? (comp some? :zip/children meta))
86 | (defn zdepth [loc] (-> loc second :pnodes count))
87 |
88 | #_(zip? (->zip {:type :doc :content []}))
89 | #_(->zip {:type :doc :content []})
90 | #_(-> {:type :doc :content []} ->zip
91 | (z/append-child {:type :heading})
92 | z/down zdepth)
93 |
94 | (defn zopen-node [loc node]
95 | (-> loc (z/append-child node) z/down z/rightmost))
96 |
97 | (defn zpath
98 | "Given a document zipper location `loc` returns a vector corresponding to the path of node at `loc`
99 | suitable for get-in from root. That is `(= (z/node loc) (get-in (z/root loc) (zpath loc)`"
100 | [loc]
101 | (loop [coords (second loc) idxs ()]
102 | (if-some [idx (when (and coords (:l coords)) (count (:l coords)))]
103 | (recur (:ppath coords) (conj idxs idx))
104 | (vec (when (seq idxs)
105 | (cons :content (interpose :content idxs)))))))
106 |
107 | (comment
108 | (def loc
109 | (-> {:type :doc} ->zip
110 | (z/append-child {:type :paragraph})
111 | (z/append-child {:type :paragraph})
112 | z/down z/rightmost
113 | (z/append-child {:type :text :text "ahoi"})
114 | z/down))
115 | (-> loc z/node)
116 | (-> loc second)
117 | )
118 |
119 | ;; TODO: rewrite in terms of zippers
120 | (def ppop (comp pop pop))
121 | (defn inc-last [path] (update path (dec (count path)) inc))
122 |
123 | ;; ## 🗂️ ToC Handling
124 | ;; region toc:
125 | ;; toc nodes are heading nodes but with `:type` `:toc` and an extra branching
126 | ;; on the key `:children` representing the sub-sections of the document
127 |
128 | (defn into-toc [toc {:as toc-item :keys [heading-level]}]
129 | (loop [toc toc l heading-level toc-path [:children]]
130 | ;; `toc-path` is `[:children i₁ :children i₂ ... :children]`
131 | (let [type-path (assoc toc-path (dec (count toc-path)) :type)]
132 | (cond
133 | ;; insert intermediate default empty :content collections for the final update-in (which defaults to maps otherwise)
134 | (not (get-in toc toc-path))
135 | (recur (assoc-in toc toc-path []) l toc-path)
136 |
137 | ;; fill in toc types for non-contiguous jumps like h1 -> h3
138 | (not (get-in toc type-path))
139 | (recur (assoc-in toc type-path :toc) l toc-path)
140 |
141 | (= 1 l)
142 | (update-in toc toc-path (fnil conj []) toc-item)
143 |
144 | :else
145 | (recur toc
146 | (dec l)
147 | (conj toc-path
148 | (max 0 (dec (count (get-in toc toc-path)))) ;; select last child at level if it exists
149 | :children))))))
150 |
151 | (defn add-to-toc [doc {:as h :keys [heading-level]}]
152 | (cond-> doc (pos-int? heading-level) (update :toc into-toc (assoc h :type :toc))))
153 |
154 | (defn set-title-when-missing [{:as doc :keys [title]} heading]
155 | (cond-> doc (nil? title) (assoc :title (md.transform/->text heading))))
156 |
157 | (defn add-title+toc
158 | "Computes and adds a :title and a :toc to the document-like structure `doc` which might have not been constructed by means of `parse`."
159 | [{:as doc :keys [content]}]
160 | (let [rf (fn [doc heading] (-> doc (add-to-toc heading) (set-title-when-missing heading)))
161 | xf (filter (comp #{:heading} :type))]
162 | (reduce (xf rf) (assoc doc :toc {:type :toc}) content)))
163 |
164 | (defn handle-close-heading [ctx]
165 | (let [{:keys [text->id+emoji-fn] :nextjournal.markdown.impl/keys [id->index]} ctx
166 | heading-loc (current-loc ctx)
167 | heading (z/node heading-loc)
168 | {:keys [id emoji]} (when (ifn? text->id+emoji-fn)
169 | (text->id+emoji-fn heading))
170 | existing-idx (when id (get id->index id))
171 | heading' (cond-> heading
172 | id (assoc-in [:attrs :id] (cond-> id existing-idx (str "-" (inc existing-idx))))
173 | emoji (assoc :emoji emoji))]
174 | (-> ctx
175 | (update :nextjournal.markdown.impl/id->index update id (fnil inc 0))
176 | (cond-> (= 1 (zdepth heading-loc))
177 | (-> (add-to-toc (assoc heading' :path (zpath heading-loc)))
178 | (set-title-when-missing heading')))
179 | (update-current-loc (fn [loc] (-> loc (z/replace heading') z/up))))))
180 |
181 | (comment
182 | (-> {:type :toc}
183 | ;;(into-toc {:heading-level 3 :title "Foo"})
184 | ;;(into-toc {:heading-level 2 :title "Section 1"})
185 | (into-toc {:heading-level 1 :title "Title" :type :toc})
186 | (into-toc {:heading-level 4 :title "Section 2" :type :toc})
187 | ;;(into-toc {:heading-level 4 :title "Section 2.1"})
188 | ;;(into-toc {:heading-level 2 :title "Section 3"})
189 | )
190 |
191 | (-> "# Top _Title_
192 |
193 | par
194 |
195 | ### Three
196 |
197 | ## Two
198 |
199 | par
200 | - and a nested
201 | - ### Heading not included
202 |
203 | foo
204 |
205 | ## Two Again
206 |
207 | par
208 |
209 | # One Again
210 |
211 | [[TOC]]
212 |
213 | #### Four
214 |
215 | end"
216 | nextjournal.markdown/parse
217 | :toc
218 | ))
219 | ;; endregion
220 |
221 | ;; ## Parsing Extensibility
222 | ;;
223 | ;; normalize-tokenizer :: {:regex, :doc-handler} | {:tokenizer-fn, :handler} -> Tokenizer
224 | ;; Tokenizer :: {:tokenizer-fn :: TokenizerFn, :doc-handler :: DocHandler}
225 | ;;
226 | ;; Match :: Any
227 | ;; Handler :: Match -> Node
228 | ;; IndexedMatch :: (Match, Int, Int)
229 | ;; TokenizerFn :: String -> [IndexedMatch]
230 | ;; DocHandler :: Doc -> {:match :: Match} -> Doc
231 |
232 | (defn tokenize-text-node [{:as tkz :keys [tokenizer-fn pred doc-handler]} ctx {:as node :keys [text]}]
233 | ;; TokenizerFn -> HNode -> [HNode]
234 | (assert (and (fn? tokenizer-fn)
235 | (fn? doc-handler)
236 | (fn? pred)
237 | (string? text))
238 | {:text text :tokenizer tkz})
239 | (let [idx-seq (when (pred (current-loc ctx)) (tokenizer-fn text))]
240 | (if (seq idx-seq)
241 | (let [text-hnode (fn [s] (assoc (text-node s) :doc-handler z/append-child))
242 | {:keys [nodes remaining-text]}
243 | (reduce (fn [{:as acc :keys [remaining-text]} [match start end]]
244 | (-> acc
245 | (update :remaining-text subs 0 start)
246 | (cond->
247 | (< end (count remaining-text))
248 | (update :nodes conj (text-hnode (subs remaining-text end))))
249 | (update :nodes conj {:doc-handler doc-handler
250 | :match match :text text
251 | :start start :end end})))
252 | {:remaining-text text :nodes ()}
253 | (reverse idx-seq))]
254 | (cond-> nodes
255 | (seq remaining-text)
256 | (conj (text-hnode remaining-text))))
257 | [node])))
258 |
259 | (defn handle-text-token [{:as ctx :keys [text-tokenizers]} text]
260 | (reduce (fn [ctx {:as node :keys [doc-handler]}] (update-current-loc ctx doc-handler (dissoc node :doc-handler)))
261 | ctx
262 | (reduce (fn [nodes tokenizer]
263 | (mapcat (fn [{:as node :keys [type]}]
264 | (if (= :text type) (tokenize-text-node tokenizer ctx node) [node]))
265 | nodes))
266 | [{:type :text :text text :doc-handler z/append-child}]
267 | text-tokenizers)))
268 |
269 | ;; clj
270 | #_(handle-text-token (->zip {:type :doc :content []}) "some-text")
271 |
272 | ;; tokenizers
273 | (defn normalize-tokenizer
274 | "Normalizes a map of regex and handler into a Tokenizer"
275 | [{:as tokenizer :keys [doc-handler pred handler regex tokenizer-fn]}]
276 | (assert (and (or doc-handler handler) (or regex tokenizer-fn)))
277 | (cond-> tokenizer
278 | (not doc-handler) (assoc :doc-handler (fn [doc {:keys [match]}] (z/append-child doc (handler match))))
279 | (not tokenizer-fn) (assoc :tokenizer-fn (partial re-idx-seq regex))
280 | (not pred) (assoc :pred (constantly true))))
281 |
282 | (defn current-ancestor-nodes [loc]
283 | (loop [loc loc ancestors []]
284 | (let [parent (z/up loc)]
285 | (if (and parent (not= :doc (:type (z/node parent))))
286 | (recur parent (conj ancestors (z/node parent)))
287 | ancestors))))
288 |
289 | (def hashtag-tokenizer
290 | {:regex #"(^|\B)#[\w-]+"
291 | :pred #(every? (complement #{:link}) (map :type (current-ancestor-nodes %)))
292 | :handler (fn [match] {:type :hashtag :text (subs (match 0) 1)})})
293 |
294 | (def internal-link-tokenizer
295 | {:regex #"\[\[([^\]]+)\]\]"
296 | :pred #(every? (complement #{:link}) (map :type (current-ancestor-nodes %)))
297 | :handler (fn [match] {:type :internal-link :text (match 1)})})
298 |
299 | #_(normalize-tokenizer internal-link-tokenizer)
300 | #_(normalize-tokenizer hashtag-tokenizer)
301 |
302 | ;; ## 🤺 Fence Info
303 | ;; `parse-fence-info` ingests nextjournal, GFM, Pandoc and RMarkdown fenced code block info (any text following the leading 3 backticks) and returns a map
304 | ;;
305 | ;; _nextjournal_ / _GFM_
306 | ;;
307 | ;; ```python id=2e3541da-0735-4b7f-a12f-4fb1bfcb6138
308 | ;; python code
309 | ;; ```
310 | ;;
311 | ;; _Pandoc_
312 | ;;
313 | ;; ```{#pandoc-id .languge .extra-class key=Val}
314 | ;; code in language
315 | ;; ```
316 | ;;
317 | ;; _Rmd_
318 | ;;
319 | ;; ```{r cars, echo=FALSE}
320 | ;; R code
321 | ;; ```
322 | ;;
323 | ;; See also:
324 | ;; - https://github.github.com/gfm/#info-string
325 | ;; - https://pandoc.org/MANUAL.html#fenced-code-blocks
326 | ;; - https://rstudio.com/wp-content/uploads/2016/03/rmarkdown-cheatsheet-2.0.pdf"
327 |
328 | (defn parse-fence-info [info-str]
329 | (try
330 | (when (and (string? info-str) (seq info-str))
331 | (let [tokens (-> info-str
332 | str/trim
333 | (str/replace #"[\{\}\,]" "") ;; remove Pandoc/Rmarkdown brackets and commas
334 | (str/replace "." "") ;; remove dots
335 | (str/split #" "))] ;; split by spaces
336 | (reduce
337 | (fn [{:as info-map :keys [language]} token]
338 | (let [[_ k v] (re-matches #"^([^=]+)=([^=]+)$" token)]
339 | (cond
340 | (str/starts-with? token "#") (assoc info-map :id (str/replace token #"^#" "")) ;; pandoc #id
341 | (and k v) (assoc info-map (keyword k) v)
342 | (not language) (assoc info-map :language token) ;; language is the first simple token which is not a pandoc's id
343 | :else (assoc info-map (keyword token) true))))
344 | {}
345 | tokens)))
346 | (catch #?(:clj Throwable :cljs :default) _ {})))
347 |
348 | (comment
349 | (parse-fence-info "python runtime-id=5f77e475-6178-47a3-8437-45c9c34d57ff")
350 | (parse-fence-info "{#some-id .lang foo=nex}")
351 | (parse-fence-info "#id clojure")
352 | (parse-fence-info "clojure #id")
353 | (parse-fence-info "clojure")
354 | (parse-fence-info "{r cars, echo=FALSE}"))
355 |
356 | ;; ## Footnote handling
357 |
358 | (defn node-with-sidenote-refs [p-node]
359 | (loop [l (->zip p-node) refs []]
360 | (if (z/end? l)
361 | (when (seq refs)
362 | {:node (z/root l) :refs refs})
363 | (let [{:keys [type ref]} (z/node l)]
364 | (if (= :footnote-ref type)
365 | (recur (z/next (z/edit l assoc :type :sidenote-ref)) (conj refs ref))
366 | (recur (z/next l) refs))))))
367 |
368 | (defn footnote->sidenote [{:keys [ref label content]}]
369 | ;; this assumes the footnote container is a paragraph, won't work for lists
370 | (node :sidenote (-> content first :content) nil (cond-> {:ref ref} label (assoc :label label))))
371 |
372 | (defn insert-sidenote-containers
373 | "Handles footnotes as sidenotes.
374 |
375 | Takes and returns a parsed document. When the document has footnotes, wraps every top-level block which contains footnote references
376 | with a `:footnote-container` node, into each of such nodes, adds a `:sidenote-column` node containing a `:sidenote` node for each found ref.
377 | Renames type `:footnote-ref` to `:sidenote-ref."
378 | [{:as doc :keys [footnotes]}]
379 | (if-not (seq footnotes)
380 | doc
381 | (let [root (->zip doc)]
382 | (loop [loc (z/down root) parent root]
383 | (cond
384 | (nil? loc)
385 | (-> parent z/node (assoc :sidenotes? true))
386 | (contains? #{:plain :paragraph :blockquote :numbered-list :bullet-list :todo-list :heading :table}
387 | (:type (z/node loc)))
388 | (if-some [{:keys [node refs]} (node-with-sidenote-refs (z/node loc))]
389 | (let [new-loc (-> loc (z/replace {:type :sidenote-container :content []})
390 | (z/append-child node)
391 | (z/append-child {:type :sidenote-column
392 | ;; TODO: broken in the old implementation
393 | ;; should be :content (mapv #(footnote->sidenote (get footnotes %)) (distinct refs))}))]
394 | :content (mapv #(footnote->sidenote (get footnotes %)) refs)}))]
395 | (recur (z/right new-loc) (z/up new-loc)))
396 | (recur (z/right loc) parent))
397 | :else
398 | (recur (z/right loc) parent))))))
399 |
--------------------------------------------------------------------------------
/src/nextjournal/markdown/utils/emoji.cljc:
--------------------------------------------------------------------------------
1 | (ns nextjournal.markdown.utils.emoji
2 | "https://github.com/mathiasbynens/emoji-test-regex-pattern
3 | MIT License
4 | Copyright Mathias Bynens ")
5 |
6 | ;; https://raw.githubusercontent.com/mathiasbynens/emoji-test-regex-pattern/f798c38987917b48e26d490590ba4f5481eb6e93/dist/latest/java.txt
7 | (def regex-java
8 | "^(?:[#*0-9]\\x{FE0F}?\\x{20E3}|[\\xA9\\xAE\\x{203C}\\x{2049}\\x{2122}\\x{2139}\\x{2194}-\\x{2199}\\x{21A9}\\x{21AA}\\x{231A}\\x{231B}\\x{2328}\\x{23CF}\\x{23ED}-\\x{23EF}\\x{23F1}\\x{23F2}\\x{23F8}-\\x{23FA}\\x{24C2}\\x{25AA}\\x{25AB}\\x{25B6}\\x{25C0}\\x{25FB}\\x{25FC}\\x{25FE}\\x{2600}-\\x{2604}\\x{260E}\\x{2611}\\x{2614}\\x{2615}\\x{2618}\\x{2620}\\x{2622}\\x{2623}\\x{2626}\\x{262A}\\x{262E}\\x{262F}\\x{2638}-\\x{263A}\\x{2640}\\x{2642}\\x{2648}-\\x{2653}\\x{265F}\\x{2660}\\x{2663}\\x{2665}\\x{2666}\\x{2668}\\x{267B}\\x{267E}\\x{267F}\\x{2692}\\x{2694}-\\x{2697}\\x{2699}\\x{269B}\\x{269C}\\x{26A0}\\x{26A7}\\x{26AA}\\x{26B0}\\x{26B1}\\x{26BD}\\x{26BE}\\x{26C4}\\x{26C8}\\x{26CF}\\x{26D1}\\x{26D3}\\x{26E9}\\x{26F0}-\\x{26F5}\\x{26F7}\\x{26F8}\\x{26FA}\\x{2702}\\x{2708}\\x{2709}\\x{270F}\\x{2712}\\x{2714}\\x{2716}\\x{271D}\\x{2721}\\x{2733}\\x{2734}\\x{2744}\\x{2747}\\x{2757}\\x{2763}\\x{27A1}\\x{2934}\\x{2935}\\x{2B05}-\\x{2B07}\\x{2B1B}\\x{2B1C}\\x{2B55}\\x{3030}\\x{303D}\\x{3297}\\x{3299}\\x{1F004}\\x{1F170}\\x{1F171}\\x{1F17E}\\x{1F17F}\\x{1F202}\\x{1F237}\\x{1F321}\\x{1F324}-\\x{1F32C}\\x{1F336}\\x{1F37D}\\x{1F396}\\x{1F397}\\x{1F399}-\\x{1F39B}\\x{1F39E}\\x{1F39F}\\x{1F3CD}\\x{1F3CE}\\x{1F3D4}-\\x{1F3DF}\\x{1F3F5}\\x{1F3F7}\\x{1F43F}\\x{1F4FD}\\x{1F549}\\x{1F54A}\\x{1F56F}\\x{1F570}\\x{1F573}\\x{1F576}-\\x{1F579}\\x{1F587}\\x{1F58A}-\\x{1F58D}\\x{1F5A5}\\x{1F5A8}\\x{1F5B1}\\x{1F5B2}\\x{1F5BC}\\x{1F5C2}-\\x{1F5C4}\\x{1F5D1}-\\x{1F5D3}\\x{1F5DC}-\\x{1F5DE}\\x{1F5E1}\\x{1F5E3}\\x{1F5E8}\\x{1F5EF}\\x{1F5F3}\\x{1F5FA}\\x{1F6CB}\\x{1F6CD}-\\x{1F6CF}\\x{1F6E0}-\\x{1F6E5}\\x{1F6E9}\\x{1F6F0}\\x{1F6F3}]\\x{FE0F}?|[\\x{261D}\\x{270C}\\x{270D}\\x{1F574}\\x{1F590}][\\x{FE0F}\\x{1F3FB}-\\x{1F3FF}]?|[\\x{26F9}\\x{1F3CB}\\x{1F3CC}\\x{1F575}][\\x{FE0F}\\x{1F3FB}-\\x{1F3FF}]?(?:\\x{200D}[\\x{2640}\\x{2642}]\\x{FE0F}?)?|[\\x{270A}\\x{270B}\\x{1F385}\\x{1F3C2}\\x{1F3C7}\\x{1F442}\\x{1F443}\\x{1F446}-\\x{1F450}\\x{1F466}\\x{1F467}\\x{1F46B}-\\x{1F46D}\\x{1F472}\\x{1F474}-\\x{1F476}\\x{1F478}\\x{1F47C}\\x{1F483}\\x{1F485}\\x{1F48F}\\x{1F491}\\x{1F4AA}\\x{1F57A}\\x{1F595}\\x{1F596}\\x{1F64C}\\x{1F64F}\\x{1F6C0}\\x{1F6CC}\\x{1F90C}\\x{1F90F}\\x{1F918}-\\x{1F91F}\\x{1F930}-\\x{1F934}\\x{1F936}\\x{1F977}\\x{1F9B5}\\x{1F9B6}\\x{1F9BB}\\x{1F9D2}\\x{1F9D3}\\x{1F9D5}\\x{1FAC3}-\\x{1FAC5}\\x{1FAF0}\\x{1FAF2}-\\x{1FAF8}][\\x{1F3FB}-\\x{1F3FF}]?|[\\x{1F3C3}\\x{1F3C4}\\x{1F3CA}\\x{1F46E}\\x{1F470}\\x{1F471}\\x{1F473}\\x{1F477}\\x{1F481}\\x{1F482}\\x{1F486}\\x{1F487}\\x{1F645}-\\x{1F647}\\x{1F64B}\\x{1F64D}\\x{1F64E}\\x{1F6A3}\\x{1F6B4}-\\x{1F6B6}\\x{1F926}\\x{1F935}\\x{1F937}-\\x{1F939}\\x{1F93D}\\x{1F93E}\\x{1F9B8}\\x{1F9B9}\\x{1F9CD}-\\x{1F9CF}\\x{1F9D4}\\x{1F9D6}-\\x{1F9DD}][\\x{1F3FB}-\\x{1F3FF}]?(?:\\x{200D}[\\x{2640}\\x{2642}]\\x{FE0F}?)?|[\\x{1F408}\\x{1F426}](?:\\x{200D}\\x{2B1B})?|[\\x{1F46F}\\x{1F9DE}\\x{1F9DF}](?:\\x{200D}[\\x{2640}\\x{2642}]\\x{FE0F}?)?|[\\x{23E9}-\\x{23EC}\\x{23F0}\\x{23F3}\\x{25FD}\\x{2693}\\x{26A1}\\x{26AB}\\x{26C5}\\x{26CE}\\x{26D4}\\x{26EA}\\x{26FD}\\x{2705}\\x{2728}\\x{274C}\\x{274E}\\x{2753}-\\x{2755}\\x{2795}-\\x{2797}\\x{27B0}\\x{27BF}\\x{2B50}\\x{1F0CF}\\x{1F18E}\\x{1F191}-\\x{1F19A}\\x{1F201}\\x{1F21A}\\x{1F22F}\\x{1F232}-\\x{1F236}\\x{1F238}-\\x{1F23A}\\x{1F250}\\x{1F251}\\x{1F300}-\\x{1F320}\\x{1F32D}-\\x{1F335}\\x{1F337}-\\x{1F37C}\\x{1F37E}-\\x{1F384}\\x{1F386}-\\x{1F393}\\x{1F3A0}-\\x{1F3C1}\\x{1F3C5}\\x{1F3C6}\\x{1F3C8}\\x{1F3C9}\\x{1F3CF}-\\x{1F3D3}\\x{1F3E0}-\\x{1F3F0}\\x{1F3F8}-\\x{1F407}\\x{1F409}-\\x{1F414}\\x{1F416}-\\x{1F425}\\x{1F427}-\\x{1F43A}\\x{1F43C}-\\x{1F43E}\\x{1F440}\\x{1F444}\\x{1F445}\\x{1F451}-\\x{1F465}\\x{1F46A}\\x{1F479}-\\x{1F47B}\\x{1F47D}-\\x{1F480}\\x{1F484}\\x{1F488}-\\x{1F48E}\\x{1F490}\\x{1F492}-\\x{1F4A9}\\x{1F4AB}-\\x{1F4FC}\\x{1F4FF}-\\x{1F53D}\\x{1F54B}-\\x{1F54E}\\x{1F550}-\\x{1F567}\\x{1F5A4}\\x{1F5FB}-\\x{1F62D}\\x{1F62F}-\\x{1F634}\\x{1F637}-\\x{1F644}\\x{1F648}-\\x{1F64A}\\x{1F680}-\\x{1F6A2}\\x{1F6A4}-\\x{1F6B3}\\x{1F6B7}-\\x{1F6BF}\\x{1F6C1}-\\x{1F6C5}\\x{1F6D0}-\\x{1F6D2}\\x{1F6D5}-\\x{1F6D7}\\x{1F6DC}-\\x{1F6DF}\\x{1F6EB}\\x{1F6EC}\\x{1F6F4}-\\x{1F6FC}\\x{1F7E0}-\\x{1F7EB}\\x{1F7F0}\\x{1F90D}\\x{1F90E}\\x{1F910}-\\x{1F917}\\x{1F920}-\\x{1F925}\\x{1F927}-\\x{1F92F}\\x{1F93A}\\x{1F93F}-\\x{1F945}\\x{1F947}-\\x{1F976}\\x{1F978}-\\x{1F9B4}\\x{1F9B7}\\x{1F9BA}\\x{1F9BC}-\\x{1F9CC}\\x{1F9D0}\\x{1F9E0}-\\x{1F9FF}\\x{1FA70}-\\x{1FA7C}\\x{1FA80}-\\x{1FA88}\\x{1FA90}-\\x{1FABD}\\x{1FABF}-\\x{1FAC2}\\x{1FACE}-\\x{1FADB}\\x{1FAE0}-\\x{1FAE8}]|\\x{2764}\\x{FE0F}?(?:\\x{200D}[\\x{1F525}\\x{1FA79}])?|\\x{1F1E6}[\\x{1F1E8}-\\x{1F1EC}\\x{1F1EE}\\x{1F1F1}\\x{1F1F2}\\x{1F1F4}\\x{1F1F6}-\\x{1F1FA}\\x{1F1FC}\\x{1F1FD}\\x{1F1FF}]|\\x{1F1E7}[\\x{1F1E6}\\x{1F1E7}\\x{1F1E9}-\\x{1F1EF}\\x{1F1F1}-\\x{1F1F4}\\x{1F1F6}-\\x{1F1F9}\\x{1F1FB}\\x{1F1FC}\\x{1F1FE}\\x{1F1FF}]|\\x{1F1E8}[\\x{1F1E6}\\x{1F1E8}\\x{1F1E9}\\x{1F1EB}-\\x{1F1EE}\\x{1F1F0}-\\x{1F1F5}\\x{1F1F7}\\x{1F1FA}-\\x{1F1FF}]|\\x{1F1E9}[\\x{1F1EA}\\x{1F1EC}\\x{1F1EF}\\x{1F1F0}\\x{1F1F2}\\x{1F1F4}\\x{1F1FF}]|\\x{1F1EA}[\\x{1F1E6}\\x{1F1E8}\\x{1F1EA}\\x{1F1EC}\\x{1F1ED}\\x{1F1F7}-\\x{1F1FA}]|\\x{1F1EB}[\\x{1F1EE}-\\x{1F1F0}\\x{1F1F2}\\x{1F1F4}\\x{1F1F7}]|\\x{1F1EC}[\\x{1F1E6}\\x{1F1E7}\\x{1F1E9}-\\x{1F1EE}\\x{1F1F1}-\\x{1F1F3}\\x{1F1F5}-\\x{1F1FA}\\x{1F1FC}\\x{1F1FE}]|\\x{1F1ED}[\\x{1F1F0}\\x{1F1F2}\\x{1F1F3}\\x{1F1F7}\\x{1F1F9}\\x{1F1FA}]|\\x{1F1EE}[\\x{1F1E8}-\\x{1F1EA}\\x{1F1F1}-\\x{1F1F4}\\x{1F1F6}-\\x{1F1F9}]|\\x{1F1EF}[\\x{1F1EA}\\x{1F1F2}\\x{1F1F4}\\x{1F1F5}]|\\x{1F1F0}[\\x{1F1EA}\\x{1F1EC}-\\x{1F1EE}\\x{1F1F2}\\x{1F1F3}\\x{1F1F5}\\x{1F1F7}\\x{1F1FC}\\x{1F1FE}\\x{1F1FF}]|\\x{1F1F1}[\\x{1F1E6}-\\x{1F1E8}\\x{1F1EE}\\x{1F1F0}\\x{1F1F7}-\\x{1F1FB}\\x{1F1FE}]|\\x{1F1F2}[\\x{1F1E6}\\x{1F1E8}-\\x{1F1ED}\\x{1F1F0}-\\x{1F1FF}]|\\x{1F1F3}[\\x{1F1E6}\\x{1F1E8}\\x{1F1EA}-\\x{1F1EC}\\x{1F1EE}\\x{1F1F1}\\x{1F1F4}\\x{1F1F5}\\x{1F1F7}\\x{1F1FA}\\x{1F1FF}]|\\x{1F1F4}\\x{1F1F2}|\\x{1F1F5}[\\x{1F1E6}\\x{1F1EA}-\\x{1F1ED}\\x{1F1F0}-\\x{1F1F3}\\x{1F1F7}-\\x{1F1F9}\\x{1F1FC}\\x{1F1FE}]|\\x{1F1F6}\\x{1F1E6}|\\x{1F1F7}[\\x{1F1EA}\\x{1F1F4}\\x{1F1F8}\\x{1F1FA}\\x{1F1FC}]|\\x{1F1F8}[\\x{1F1E6}-\\x{1F1EA}\\x{1F1EC}-\\x{1F1F4}\\x{1F1F7}-\\x{1F1F9}\\x{1F1FB}\\x{1F1FD}-\\x{1F1FF}]|\\x{1F1F9}[\\x{1F1E6}\\x{1F1E8}\\x{1F1E9}\\x{1F1EB}-\\x{1F1ED}\\x{1F1EF}-\\x{1F1F4}\\x{1F1F7}\\x{1F1F9}\\x{1F1FB}\\x{1F1FC}\\x{1F1FF}]|\\x{1F1FA}[\\x{1F1E6}\\x{1F1EC}\\x{1F1F2}\\x{1F1F3}\\x{1F1F8}\\x{1F1FE}\\x{1F1FF}]|\\x{1F1FB}[\\x{1F1E6}\\x{1F1E8}\\x{1F1EA}\\x{1F1EC}\\x{1F1EE}\\x{1F1F3}\\x{1F1FA}]|\\x{1F1FC}[\\x{1F1EB}\\x{1F1F8}]|\\x{1F1FD}\\x{1F1F0}|\\x{1F1FE}[\\x{1F1EA}\\x{1F1F9}]|\\x{1F1FF}[\\x{1F1E6}\\x{1F1F2}\\x{1F1FC}]|\\x{1F3F3}\\x{FE0F}?(?:\\x{200D}(?:\\x{26A7}\\x{FE0F}?|\\x{1F308}))?|\\x{1F3F4}(?:\\x{200D}\\x{2620}\\x{FE0F}?|\\x{E0067}\\x{E0062}(?:\\x{E0065}\\x{E006E}\\x{E0067}|\\x{E0073}\\x{E0063}\\x{E0074}|\\x{E0077}\\x{E006C}\\x{E0073})\\x{E007F})?|\\x{1F415}(?:\\x{200D}\\x{1F9BA})?|\\x{1F43B}(?:\\x{200D}\\x{2744}\\x{FE0F}?)?|\\x{1F441}\\x{FE0F}?(?:\\x{200D}\\x{1F5E8}\\x{FE0F}?)?|\\x{1F468}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F468}\\x{1F469}]\\x{200D}(?:\\x{1F466}(?:\\x{200D}\\x{1F466})?|\\x{1F467}(?:\\x{200D}[\\x{1F466}\\x{1F467}])?)|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F468}|\\x{1F466}(?:\\x{200D}\\x{1F466})?|\\x{1F467}(?:\\x{200D}[\\x{1F466}\\x{1F467}])?)|\\x{1F3FB}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F468}[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F468}[\\x{1F3FC}-\\x{1F3FF}]))?|\\x{1F3FC}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F468}[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F468}[\\x{1F3FB}\\x{1F3FD}-\\x{1F3FF}]))?|\\x{1F3FD}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F468}[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F468}[\\x{1F3FB}\\x{1F3FC}\\x{1F3FE}\\x{1F3FF}]))?|\\x{1F3FE}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F468}[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F468}[\\x{1F3FB}-\\x{1F3FD}\\x{1F3FF}]))?|\\x{1F3FF}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F468}[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F468}[\\x{1F3FB}-\\x{1F3FE}]))?)?|\\x{1F469}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?[\\x{1F468}\\x{1F469}]|\\x{1F466}(?:\\x{200D}\\x{1F466})?|\\x{1F467}(?:\\x{200D}[\\x{1F466}\\x{1F467}])?|\\x{1F469}\\x{200D}(?:\\x{1F466}(?:\\x{200D}\\x{1F466})?|\\x{1F467}(?:\\x{200D}[\\x{1F466}\\x{1F467}])?))|\\x{1F3FB}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:[\\x{1F468}\\x{1F469}]|\\x{1F48B}\\x{200D}[\\x{1F468}\\x{1F469}])[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}[\\x{1F468}\\x{1F469}][\\x{1F3FC}-\\x{1F3FF}]))?|\\x{1F3FC}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:[\\x{1F468}\\x{1F469}]|\\x{1F48B}\\x{200D}[\\x{1F468}\\x{1F469}])[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}[\\x{1F468}\\x{1F469}][\\x{1F3FB}\\x{1F3FD}-\\x{1F3FF}]))?|\\x{1F3FD}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:[\\x{1F468}\\x{1F469}]|\\x{1F48B}\\x{200D}[\\x{1F468}\\x{1F469}])[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}[\\x{1F468}\\x{1F469}][\\x{1F3FB}\\x{1F3FC}\\x{1F3FE}\\x{1F3FF}]))?|\\x{1F3FE}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:[\\x{1F468}\\x{1F469}]|\\x{1F48B}\\x{200D}[\\x{1F468}\\x{1F469}])[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}[\\x{1F468}\\x{1F469}][\\x{1F3FB}-\\x{1F3FD}\\x{1F3FF}]))?|\\x{1F3FF}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:[\\x{1F468}\\x{1F469}]|\\x{1F48B}\\x{200D}[\\x{1F468}\\x{1F469}])[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}[\\x{1F468}\\x{1F469}][\\x{1F3FB}-\\x{1F3FE}]))?)?|\\x{1F62E}(?:\\x{200D}\\x{1F4A8})?|\\x{1F635}(?:\\x{200D}\\x{1F4AB})?|\\x{1F636}(?:\\x{200D}\\x{1F32B}\\x{FE0F}?)?|\\x{1F93C}(?:[\\x{1F3FB}-\\x{1F3FF}]|\\x{200D}[\\x{2640}\\x{2642}]\\x{FE0F}?)?|\\x{1F9D1}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F384}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{1F91D}\\x{200D}\\x{1F9D1})|\\x{1F3FB}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F384}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F9D1}[\\x{1F3FC}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FF}]))?|\\x{1F3FC}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F384}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F9D1}[\\x{1F3FB}\\x{1F3FD}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FF}]))?|\\x{1F3FD}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F384}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F9D1}[\\x{1F3FB}\\x{1F3FC}\\x{1F3FE}\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FF}]))?|\\x{1F3FE}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F384}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FD}\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FF}]))?|\\x{1F3FF}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F384}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FE}]|\\x{1F91D}\\x{200D}\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FF}]))?)?|\\x{1FAF1}(?:\\x{1F3FB}(?:\\x{200D}\\x{1FAF2}[\\x{1F3FC}-\\x{1F3FF}])?|\\x{1F3FC}(?:\\x{200D}\\x{1FAF2}[\\x{1F3FB}\\x{1F3FD}-\\x{1F3FF}])?|\\x{1F3FD}(?:\\x{200D}\\x{1FAF2}[\\x{1F3FB}\\x{1F3FC}\\x{1F3FE}\\x{1F3FF}])?|\\x{1F3FE}(?:\\x{200D}\\x{1FAF2}[\\x{1F3FB}-\\x{1F3FD}\\x{1F3FF}])?|\\x{1F3FF}(?:\\x{200D}\\x{1FAF2}[\\x{1F3FB}-\\x{1F3FE}])?)?)+")
9 |
10 | ;; https://raw.githubusercontent.com/mathiasbynens/emoji-test-regex-pattern/f798c38987917b48e26d490590ba4f5481eb6e93/dist/latest/javascript.txt
11 | (def regex-js "^(?:[#*0-9]\\uFE0F?\\u20E3|[\\xA9\\xAE\\u203C\\u2049\\u2122\\u2139\\u2194-\\u2199\\u21A9\\u21AA\\u231A\\u231B\\u2328\\u23CF\\u23ED-\\u23EF\\u23F1\\u23F2\\u23F8-\\u23FA\\u24C2\\u25AA\\u25AB\\u25B6\\u25C0\\u25FB\\u25FC\\u25FE\\u2600-\\u2604\\u260E\\u2611\\u2614\\u2615\\u2618\\u2620\\u2622\\u2623\\u2626\\u262A\\u262E\\u262F\\u2638-\\u263A\\u2640\\u2642\\u2648-\\u2653\\u265F\\u2660\\u2663\\u2665\\u2666\\u2668\\u267B\\u267E\\u267F\\u2692\\u2694-\\u2697\\u2699\\u269B\\u269C\\u26A0\\u26A7\\u26AA\\u26B0\\u26B1\\u26BD\\u26BE\\u26C4\\u26C8\\u26CF\\u26D1\\u26D3\\u26E9\\u26F0-\\u26F5\\u26F7\\u26F8\\u26FA\\u2702\\u2708\\u2709\\u270F\\u2712\\u2714\\u2716\\u271D\\u2721\\u2733\\u2734\\u2744\\u2747\\u2757\\u2763\\u27A1\\u2934\\u2935\\u2B05-\\u2B07\\u2B1B\\u2B1C\\u2B55\\u3030\\u303D\\u3297\\u3299]\\uFE0F?|[\\u261D\\u270C\\u270D](?:\\uFE0F|\\uD83C[\\uDFFB-\\uDFFF])?|[\\u270A\\u270B](?:\\uD83C[\\uDFFB-\\uDFFF])?|[\\u23E9-\\u23EC\\u23F0\\u23F3\\u25FD\\u2693\\u26A1\\u26AB\\u26C5\\u26CE\\u26D4\\u26EA\\u26FD\\u2705\\u2728\\u274C\\u274E\\u2753-\\u2755\\u2795-\\u2797\\u27B0\\u27BF\\u2B50]|\\u26F9(?:\\uFE0F|\\uD83C[\\uDFFB-\\uDFFF])?(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|\\u2764\\uFE0F?(?:\\u200D(?:\\uD83D\\uDD25|\\uD83E\\uDE79))?|\\uD83C(?:[\\uDC04\\uDD70\\uDD71\\uDD7E\\uDD7F\\uDE02\\uDE37\\uDF21\\uDF24-\\uDF2C\\uDF36\\uDF7D\\uDF96\\uDF97\\uDF99-\\uDF9B\\uDF9E\\uDF9F\\uDFCD\\uDFCE\\uDFD4-\\uDFDF\\uDFF5\\uDFF7]\\uFE0F?|[\\uDF85\\uDFC2\\uDFC7](?:\\uD83C[\\uDFFB-\\uDFFF])?|[\\uDFC3\\uDFC4\\uDFCA](?:\\uD83C[\\uDFFB-\\uDFFF])?(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|[\\uDFCB\\uDFCC](?:\\uFE0F|\\uD83C[\\uDFFB-\\uDFFF])?(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|[\\uDCCF\\uDD8E\\uDD91-\\uDD9A\\uDE01\\uDE1A\\uDE2F\\uDE32-\\uDE36\\uDE38-\\uDE3A\\uDE50\\uDE51\\uDF00-\\uDF20\\uDF2D-\\uDF35\\uDF37-\\uDF7C\\uDF7E-\\uDF84\\uDF86-\\uDF93\\uDFA0-\\uDFC1\\uDFC5\\uDFC6\\uDFC8\\uDFC9\\uDFCF-\\uDFD3\\uDFE0-\\uDFF0\\uDFF8-\\uDFFF]|\\uDDE6\\uD83C[\\uDDE8-\\uDDEC\\uDDEE\\uDDF1\\uDDF2\\uDDF4\\uDDF6-\\uDDFA\\uDDFC\\uDDFD\\uDDFF]|\\uDDE7\\uD83C[\\uDDE6\\uDDE7\\uDDE9-\\uDDEF\\uDDF1-\\uDDF4\\uDDF6-\\uDDF9\\uDDFB\\uDDFC\\uDDFE\\uDDFF]|\\uDDE8\\uD83C[\\uDDE6\\uDDE8\\uDDE9\\uDDEB-\\uDDEE\\uDDF0-\\uDDF5\\uDDF7\\uDDFA-\\uDDFF]|\\uDDE9\\uD83C[\\uDDEA\\uDDEC\\uDDEF\\uDDF0\\uDDF2\\uDDF4\\uDDFF]|\\uDDEA\\uD83C[\\uDDE6\\uDDE8\\uDDEA\\uDDEC\\uDDED\\uDDF7-\\uDDFA]|\\uDDEB\\uD83C[\\uDDEE-\\uDDF0\\uDDF2\\uDDF4\\uDDF7]|\\uDDEC\\uD83C[\\uDDE6\\uDDE7\\uDDE9-\\uDDEE\\uDDF1-\\uDDF3\\uDDF5-\\uDDFA\\uDDFC\\uDDFE]|\\uDDED\\uD83C[\\uDDF0\\uDDF2\\uDDF3\\uDDF7\\uDDF9\\uDDFA]|\\uDDEE\\uD83C[\\uDDE8-\\uDDEA\\uDDF1-\\uDDF4\\uDDF6-\\uDDF9]|\\uDDEF\\uD83C[\\uDDEA\\uDDF2\\uDDF4\\uDDF5]|\\uDDF0\\uD83C[\\uDDEA\\uDDEC-\\uDDEE\\uDDF2\\uDDF3\\uDDF5\\uDDF7\\uDDFC\\uDDFE\\uDDFF]|\\uDDF1\\uD83C[\\uDDE6-\\uDDE8\\uDDEE\\uDDF0\\uDDF7-\\uDDFB\\uDDFE]|\\uDDF2\\uD83C[\\uDDE6\\uDDE8-\\uDDED\\uDDF0-\\uDDFF]|\\uDDF3\\uD83C[\\uDDE6\\uDDE8\\uDDEA-\\uDDEC\\uDDEE\\uDDF1\\uDDF4\\uDDF5\\uDDF7\\uDDFA\\uDDFF]|\\uDDF4\\uD83C\\uDDF2|\\uDDF5\\uD83C[\\uDDE6\\uDDEA-\\uDDED\\uDDF0-\\uDDF3\\uDDF7-\\uDDF9\\uDDFC\\uDDFE]|\\uDDF6\\uD83C\\uDDE6|\\uDDF7\\uD83C[\\uDDEA\\uDDF4\\uDDF8\\uDDFA\\uDDFC]|\\uDDF8\\uD83C[\\uDDE6-\\uDDEA\\uDDEC-\\uDDF4\\uDDF7-\\uDDF9\\uDDFB\\uDDFD-\\uDDFF]|\\uDDF9\\uD83C[\\uDDE6\\uDDE8\\uDDE9\\uDDEB-\\uDDED\\uDDEF-\\uDDF4\\uDDF7\\uDDF9\\uDDFB\\uDDFC\\uDDFF]|\\uDDFA\\uD83C[\\uDDE6\\uDDEC\\uDDF2\\uDDF3\\uDDF8\\uDDFE\\uDDFF]|\\uDDFB\\uD83C[\\uDDE6\\uDDE8\\uDDEA\\uDDEC\\uDDEE\\uDDF3\\uDDFA]|\\uDDFC\\uD83C[\\uDDEB\\uDDF8]|\\uDDFD\\uD83C\\uDDF0|\\uDDFE\\uD83C[\\uDDEA\\uDDF9]|\\uDDFF\\uD83C[\\uDDE6\\uDDF2\\uDDFC]|\\uDFF3\\uFE0F?(?:\\u200D(?:\\u26A7\\uFE0F?|\\uD83C\\uDF08))?|\\uDFF4(?:\\u200D\\u2620\\uFE0F?|\\uDB40\\uDC67\\uDB40\\uDC62\\uDB40(?:\\uDC65\\uDB40\\uDC6E\\uDB40\\uDC67|\\uDC73\\uDB40\\uDC63\\uDB40\\uDC74|\\uDC77\\uDB40\\uDC6C\\uDB40\\uDC73)\\uDB40\\uDC7F)?)|\\uD83D(?:[\\uDC08\\uDC26](?:\\u200D\\u2B1B)?|[\\uDC3F\\uDCFD\\uDD49\\uDD4A\\uDD6F\\uDD70\\uDD73\\uDD76-\\uDD79\\uDD87\\uDD8A-\\uDD8D\\uDDA5\\uDDA8\\uDDB1\\uDDB2\\uDDBC\\uDDC2-\\uDDC4\\uDDD1-\\uDDD3\\uDDDC-\\uDDDE\\uDDE1\\uDDE3\\uDDE8\\uDDEF\\uDDF3\\uDDFA\\uDECB\\uDECD-\\uDECF\\uDEE0-\\uDEE5\\uDEE9\\uDEF0\\uDEF3]\\uFE0F?|[\\uDC42\\uDC43\\uDC46-\\uDC50\\uDC66\\uDC67\\uDC6B-\\uDC6D\\uDC72\\uDC74-\\uDC76\\uDC78\\uDC7C\\uDC83\\uDC85\\uDC8F\\uDC91\\uDCAA\\uDD7A\\uDD95\\uDD96\\uDE4C\\uDE4F\\uDEC0\\uDECC](?:\\uD83C[\\uDFFB-\\uDFFF])?|[\\uDC6E\\uDC70\\uDC71\\uDC73\\uDC77\\uDC81\\uDC82\\uDC86\\uDC87\\uDE45-\\uDE47\\uDE4B\\uDE4D\\uDE4E\\uDEA3\\uDEB4-\\uDEB6](?:\\uD83C[\\uDFFB-\\uDFFF])?(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|[\\uDD74\\uDD90](?:\\uFE0F|\\uD83C[\\uDFFB-\\uDFFF])?|[\\uDC00-\\uDC07\\uDC09-\\uDC14\\uDC16-\\uDC25\\uDC27-\\uDC3A\\uDC3C-\\uDC3E\\uDC40\\uDC44\\uDC45\\uDC51-\\uDC65\\uDC6A\\uDC79-\\uDC7B\\uDC7D-\\uDC80\\uDC84\\uDC88-\\uDC8E\\uDC90\\uDC92-\\uDCA9\\uDCAB-\\uDCFC\\uDCFF-\\uDD3D\\uDD4B-\\uDD4E\\uDD50-\\uDD67\\uDDA4\\uDDFB-\\uDE2D\\uDE2F-\\uDE34\\uDE37-\\uDE44\\uDE48-\\uDE4A\\uDE80-\\uDEA2\\uDEA4-\\uDEB3\\uDEB7-\\uDEBF\\uDEC1-\\uDEC5\\uDED0-\\uDED2\\uDED5-\\uDED7\\uDEDC-\\uDEDF\\uDEEB\\uDEEC\\uDEF4-\\uDEFC\\uDFE0-\\uDFEB\\uDFF0]|\\uDC15(?:\\u200D\\uD83E\\uDDBA)?|\\uDC3B(?:\\u200D\\u2744\\uFE0F?)?|\\uDC41\\uFE0F?(?:\\u200D\\uD83D\\uDDE8\\uFE0F?)?|\\uDC68(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?\\uDC68|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D(?:[\\uDC68\\uDC69]\\u200D\\uD83D(?:\\uDC66(?:\\u200D\\uD83D\\uDC66)?|\\uDC67(?:\\u200D\\uD83D[\\uDC66\\uDC67])?)|[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uDC66(?:\\u200D\\uD83D\\uDC66)?|\\uDC67(?:\\u200D\\uD83D[\\uDC66\\uDC67])?)|\\uD83E[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD])|\\uD83C(?:\\uDFFB(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?\\uDC68\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D\\uDC68\\uD83C[\\uDFFC-\\uDFFF])))?|\\uDFFC(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?\\uDC68\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D\\uDC68\\uD83C[\\uDFFB\\uDFFD-\\uDFFF])))?|\\uDFFD(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?\\uDC68\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D\\uDC68\\uD83C[\\uDFFB\\uDFFC\\uDFFE\\uDFFF])))?|\\uDFFE(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?\\uDC68\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D\\uDC68\\uD83C[\\uDFFB-\\uDFFD\\uDFFF])))?|\\uDFFF(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?\\uDC68\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D\\uDC68\\uD83C[\\uDFFB-\\uDFFE])))?))?|\\uDC69(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?[\\uDC68\\uDC69]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D(?:[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uDC66(?:\\u200D\\uD83D\\uDC66)?|\\uDC67(?:\\u200D\\uD83D[\\uDC66\\uDC67])?|\\uDC69\\u200D\\uD83D(?:\\uDC66(?:\\u200D\\uD83D\\uDC66)?|\\uDC67(?:\\u200D\\uD83D[\\uDC66\\uDC67])?))|\\uD83E[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD])|\\uD83C(?:\\uDFFB(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:[\\uDC68\\uDC69]|\\uDC8B\\u200D\\uD83D[\\uDC68\\uDC69])\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D[\\uDC68\\uDC69]\\uD83C[\\uDFFC-\\uDFFF])))?|\\uDFFC(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:[\\uDC68\\uDC69]|\\uDC8B\\u200D\\uD83D[\\uDC68\\uDC69])\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D[\\uDC68\\uDC69]\\uD83C[\\uDFFB\\uDFFD-\\uDFFF])))?|\\uDFFD(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:[\\uDC68\\uDC69]|\\uDC8B\\u200D\\uD83D[\\uDC68\\uDC69])\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D[\\uDC68\\uDC69]\\uD83C[\\uDFFB\\uDFFC\\uDFFE\\uDFFF])))?|\\uDFFE(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:[\\uDC68\\uDC69]|\\uDC8B\\u200D\\uD83D[\\uDC68\\uDC69])\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D[\\uDC68\\uDC69]\\uD83C[\\uDFFB-\\uDFFD\\uDFFF])))?|\\uDFFF(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:[\\uDC68\\uDC69]|\\uDC8B\\u200D\\uD83D[\\uDC68\\uDC69])\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D[\\uDC68\\uDC69]\\uD83C[\\uDFFB-\\uDFFE])))?))?|\\uDC6F(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|\\uDD75(?:\\uFE0F|\\uD83C[\\uDFFB-\\uDFFF])?(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|\\uDE2E(?:\\u200D\\uD83D\\uDCA8)?|\\uDE35(?:\\u200D\\uD83D\\uDCAB)?|\\uDE36(?:\\u200D\\uD83C\\uDF2B\\uFE0F?)?)|\\uD83E(?:[\\uDD0C\\uDD0F\\uDD18-\\uDD1F\\uDD30-\\uDD34\\uDD36\\uDD77\\uDDB5\\uDDB6\\uDDBB\\uDDD2\\uDDD3\\uDDD5\\uDEC3-\\uDEC5\\uDEF0\\uDEF2-\\uDEF8](?:\\uD83C[\\uDFFB-\\uDFFF])?|[\\uDD26\\uDD35\\uDD37-\\uDD39\\uDD3D\\uDD3E\\uDDB8\\uDDB9\\uDDCD-\\uDDCF\\uDDD4\\uDDD6-\\uDDDD](?:\\uD83C[\\uDFFB-\\uDFFF])?(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|[\\uDDDE\\uDDDF](?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|[\\uDD0D\\uDD0E\\uDD10-\\uDD17\\uDD20-\\uDD25\\uDD27-\\uDD2F\\uDD3A\\uDD3F-\\uDD45\\uDD47-\\uDD76\\uDD78-\\uDDB4\\uDDB7\\uDDBA\\uDDBC-\\uDDCC\\uDDD0\\uDDE0-\\uDDFF\\uDE70-\\uDE7C\\uDE80-\\uDE88\\uDE90-\\uDEBD\\uDEBF-\\uDEC2\\uDECE-\\uDEDB\\uDEE0-\\uDEE8]|\\uDD3C(?:\\u200D[\\u2640\\u2642]\\uFE0F?|\\uD83C[\\uDFFB-\\uDFFF])?|\\uDDD1(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF84\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83E\\uDDD1))|\\uD83C(?:\\uDFFB(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D(?:\\uD83D\\uDC8B\\u200D)?\\uD83E\\uDDD1\\uD83C[\\uDFFC-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF84\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFF])))?|\\uDFFC(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D(?:\\uD83D\\uDC8B\\u200D)?\\uD83E\\uDDD1\\uD83C[\\uDFFB\\uDFFD-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF84\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFF])))?|\\uDFFD(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D(?:\\uD83D\\uDC8B\\u200D)?\\uD83E\\uDDD1\\uD83C[\\uDFFB\\uDFFC\\uDFFE\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF84\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFF])))?|\\uDFFE(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D(?:\\uD83D\\uDC8B\\u200D)?\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFD\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF84\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFF])))?|\\uDFFF(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D(?:\\uD83D\\uDC8B\\u200D)?\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFE]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF84\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFF])))?))?|\\uDEF1(?:\\uD83C(?:\\uDFFB(?:\\u200D\\uD83E\\uDEF2\\uD83C[\\uDFFC-\\uDFFF])?|\\uDFFC(?:\\u200D\\uD83E\\uDEF2\\uD83C[\\uDFFB\\uDFFD-\\uDFFF])?|\\uDFFD(?:\\u200D\\uD83E\\uDEF2\\uD83C[\\uDFFB\\uDFFC\\uDFFE\\uDFFF])?|\\uDFFE(?:\\u200D\\uD83E\\uDEF2\\uD83C[\\uDFFB-\\uDFFD\\uDFFF])?|\\uDFFF(?:\\u200D\\uD83E\\uDEF2\\uD83C[\\uDFFB-\\uDFFE])?))?))+")
12 |
13 | (def regex #?(:cljs (js/RegExp. regex-js)
14 | :clj (re-pattern regex-java)) )
15 |
16 | (comment
17 | ;; build regex
18 | (str "^(?:"
19 | (slurp "https://raw.githubusercontent.com/mathiasbynens/emoji-test-regex-pattern/f798c38987917b48e26d490590ba4f5481eb6e93/dist/latest/java.txt")
20 | ")+")
21 |
22 | (re-matches regex "🐞")
23 | (re-matches regex "🐞🐞")
24 | (re-matches regex "✋🏿")
25 | (re-matches regex "Not ✋🏿")
26 | (re-matches regex "⚛️")
27 | (re-matches regex "☹️"))
28 |
--------------------------------------------------------------------------------
/test/nextjournal/markdown/multi_threading_test.clj:
--------------------------------------------------------------------------------
1 | (ns nextjournal.markdown.multi-threading-test
2 | (:require [clojure.test :as t :refer [deftest testing is]]
3 | [nextjournal.markdown :as md]))
4 |
5 | (deftest multithreading
6 | (let [!exs (atom [])
7 | proc (fn []
8 | (try (md/parse (slurp "notebooks/reference.md"))
9 | (catch IllegalStateException e
10 | (swap! !exs conj e))))
11 | t1 (new Thread proc)
12 | t2 (new Thread proc)]
13 |
14 | (.start t1) (.start t2)
15 | (.join t1) (.join t2)
16 | (is (zero? (count @!exs)))))
17 |
--------------------------------------------------------------------------------
/test/test_runner.clj:
--------------------------------------------------------------------------------
1 | (ns test-runner
2 | (:require [clojure.test]
3 | [nextjournal.markdown-test]
4 | [nextjournal.markdown.multi-threading-test]))
5 |
6 | (defn run [_]
7 | (let [{:keys [fail error]} (clojure.test/run-all-tests #"nextjournal\.markdown.*-test")]
8 | (when (< 0 (+ fail error))
9 | (System/exit 1))))
10 |
11 | #_(clojure.test/run-all-tests #"nextjournal\.markdown.*-test")
12 |
--------------------------------------------------------------------------------