├── .dir-locals.el ├── .gitattributes ├── .github └── workflows │ └── main.yml ├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── bb.edn ├── build.clj ├── deps.edn ├── dev └── nextjournal │ └── markdown │ ├── parser.cljc │ └── render.cljs ├── notebooks ├── demo.docx ├── images.clj ├── pandoc.clj ├── parsing_extensibility.clj ├── reference.md ├── tight_lists.clj └── try.clj ├── out └── sci │ └── index.html ├── package.json ├── resources └── META-INF │ └── nextjournal │ └── markdown │ └── meta.edn ├── shadow-cljs.edn ├── src ├── deps.cljs ├── js │ └── markdown.js └── nextjournal │ ├── markdown.cljc │ └── markdown │ ├── impl.clj │ ├── impl.cljs │ ├── impl │ ├── extensions.clj │ ├── types.clj │ └── types │ │ ├── CustomNode.class │ │ └── CustomNode.java │ ├── transform.cljc │ ├── utils.cljc │ └── utils │ └── emoji.cljc ├── test ├── nextjournal │ ├── markdown │ │ └── multi_threading_test.clj │ └── markdown_test.cljc └── test_runner.clj └── yarn.lock /.dir-locals.el: -------------------------------------------------------------------------------- 1 | ((clojure-mode 2 | (cider-clojure-cli-aliases . ":nextjournal/clerk:test:repl"))) 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | resources/js/* linguist-vendored 2 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Continuous Delivery 2 | on: push 3 | jobs: 4 | tests: 5 | name: Tests 6 | runs-on: ${{matrix.sys.os}} 7 | strategy: 8 | matrix: 9 | sys: 10 | - { os: macos-latest, shell: bash } 11 | - { os: ubuntu-latest, shell: bash } 12 | - { os: windows-latest, shell: powershell } 13 | defaults: 14 | run: 15 | shell: ${{matrix.sys.shell}} 16 | steps: 17 | - name: 🛎 Checkout 18 | uses: actions/checkout@v2 19 | 20 | - name: 🔧 Install java 21 | uses: actions/setup-java@v1 22 | with: 23 | java-version: '11.0.7' 24 | 25 | - name: 🔧 Install clojure 26 | uses: DeLaGuardo/setup-clojure@master 27 | with: 28 | cli: '1.12.0.1530' 29 | 30 | - name: 🗝 maven cache 31 | uses: actions/cache@v4 32 | with: 33 | path: | 34 | ~/.m2 35 | ~/.gitlibs 36 | ~/.deps.clj 37 | key: ${{ runner.os }}-maven-${{ github.sha }} 38 | restore-keys: | 39 | ${{ runner.os }}-maven- 40 | 41 | - name: 🧪 Run tests 42 | run: clojure -X:test 43 | 44 | cljs-tests: 45 | name: ClojureScript Tests 46 | runs-on: ubuntu-latest 47 | steps: 48 | - name: 🛎 Checkout 49 | uses: actions/checkout@v2 50 | 51 | - name: 🔧 Setup Babashka 52 | uses: turtlequeue/setup-babashka@v1.3.0 53 | with: 54 | babashka-version: 0.7.8 55 | 56 | - name: 🗝 Shadow compiler cache 57 | uses: actions/cache@v4 58 | with: 59 | path: .shadow-cljs 60 | key: ${{ runner.os }}-shadow-cljs-${{ github.sha }} 61 | restore-keys: | 62 | ${{ runner.os }}-shadow-cljs- 63 | 64 | - name: 🧪 Run tests 65 | run: bb test:cljs 66 | 67 | notebooks: 68 | name: Clerk Notebooks Build 69 | runs-on: ubuntu-latest 70 | steps: 71 | - name: 🛎 Checkout 72 | uses: actions/checkout@v2 73 | 74 | - name: 🔧 Install java 75 | uses: actions/setup-java@v1 76 | with: 77 | java-version: '11.0.7' 78 | 79 | - name: 🔧 Install clojure 80 | uses: DeLaGuardo/setup-clojure@master 81 | with: 82 | cli: '1.10.3.943' 83 | 84 | - name: 🔧 Setup Babashka 85 | uses: turtlequeue/setup-babashka@v1.3.0 86 | with: 87 | babashka-version: 0.7.8 88 | 89 | - name: 🔧 Install Pandoc 90 | run: | 91 | curl -LO https://github.com/jgm/pandoc/releases/download/2.18/pandoc-2.18-1-amd64.deb 92 | ls -lah 93 | sudo dpkg -i pandoc-2.18-1-amd64.deb 94 | 95 | - name: 🔧 Setup LaTeX 96 | uses: wtfjoke/setup-tectonic@v3.0.4 97 | with: 98 | github-token: ${{ secrets.GITHUB_TOKEN }} 99 | 100 | - name: 🗝 maven cache 101 | uses: actions/cache@v4 102 | with: 103 | path: | 104 | ~/.m2 105 | ~/.gitlibs 106 | key: ${{ runner.os }}-maven-${{ github.sha }} 107 | restore-keys: | 108 | ${{ runner.os }}-maven- 109 | 110 | - name: 🗝 Clerk Cache 111 | uses: actions/cache@v4 112 | with: 113 | path: .clerk 114 | key: ${{ runner.os }}-clerk-cache 115 | 116 | - name: 🗝 Shadow compiler cache 117 | uses: actions/cache@v4 118 | with: 119 | path: .shadow-cljs 120 | key: ${{ runner.os }}-shadow-cljs-${{ github.sha }} 121 | restore-keys: | 122 | ${{ runner.os }}-shadow-cljs- 123 | 124 | - name: 🔐 Google Auth 125 | uses: google-github-actions/auth@v2.1.6 126 | with: 127 | credentials_json: ${{ secrets.GCLOUD_SERVICE_KEY }} 128 | 129 | - name: 🔧 Setup Google Cloud SDK 130 | uses: google-github-actions/setup-gcloud@v0.3.0 131 | 132 | - name: 🏗 Build Clerk Notebooks 133 | run: bb build:notebooks ${{ github.sha }} 134 | 135 | - name: 📠 Copy static build to bucket under SHA 136 | run: | 137 | gsutil cp -r public/build gs://nextjournal-snapshots/markdown/build/${{ github.sha }} 138 | 139 | - name: 📠 Copy static build to GitHub Pages 140 | if: ${{ github.ref == 'refs/heads/main' }} 141 | uses: JamesIves/github-pages-deploy-action@4.1.6 142 | with: 143 | branch: gh-pages # The branch the action should deploy to. 144 | folder: public/build # The folder the action should deploy. 145 | 146 | - name: 📤 Upload Pdf demo notebook 147 | uses: actions/upload-artifact@v4 148 | with: 149 | name: demo.pdf 150 | path: notebooks/demo.pdf 151 | 152 | - name: ✅ Add success status to report with link to snapshot 153 | uses: Sibz/github-status-action@v1 154 | with: 155 | authToken: ${{secrets.GITHUB_TOKEN}} 156 | context: 'Continuous Delivery / Clerk Static App' 157 | description: 'Ready' 158 | state: 'success' 159 | sha: ${{github.event.pull_request.head.sha || github.sha}} 160 | target_url: https://snapshots.nextjournal.com/markdown/build/${{ github.sha }} 161 | 162 | deploy: 163 | needs: [tests, cljs-tests] 164 | runs-on: ubuntu-latest 165 | steps: 166 | - name: 🛎 Checkout 167 | uses: actions/checkout@v3 168 | 169 | - name: 🏷 Get tags 170 | run: git fetch --tags origin 171 | 172 | - name: 🔧 Setup Babashka 173 | uses: turtlequeue/setup-babashka@v1.3.0 174 | with: 175 | babashka-version: 0.8.156 176 | 177 | - name: 🗝 maven cache 178 | uses: actions/cache@v4 179 | with: 180 | path: | 181 | ~/.m2 182 | ~/.gitlibs 183 | key: ${{ runner.os }}-maven-${{ github.sha }} 184 | restore-keys: | 185 | ${{ runner.os }}-maven- 186 | 187 | - name: 🍯 Publish to clojars 188 | env: 189 | CLOJARS_USERNAME: mkvlr 190 | CLOJARS_PASSWORD: ${{ secrets.CLOJARS_PASSWORD_MKVLR }} 191 | run: bb ci:publish 192 | 193 | - name: 🔢 Set lib version 194 | id: jar-version 195 | run: | 196 | JAR_VERSION=$(bb current-version) 197 | echo "##[set-output name=version;]${JAR_VERSION}" 198 | 199 | - name: 📤 Upload JAR 200 | uses: actions/upload-artifact@v4 201 | with: 202 | name: markdown-${{ steps.jar-version.outputs.version }}.jar 203 | path: target/markdown-${{ steps.jar-version.outputs.version }}.jar 204 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.iml 3 | *.jar 4 | .clerk 5 | .cpcache 6 | .idea 7 | .nrepl-port 8 | classes 9 | node_modules 10 | pom.xml 11 | pom.xml.asc 12 | public 13 | target 14 | .DS_Store 15 | .shadow-cljs 16 | out 17 | notebooks/scratch 18 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## Unreleased 4 | 5 | * Hiccup JVM compatibility for fragments (see [#34](https://github.com/nextjournal/markdown/issues/34)) 6 | * Support HTML blocks and inline HTML (see [#7](https://github.com/nextjournal/markdown/issues/7)) 7 | * Bump commonmark to 0.24.0 8 | * Bump markdown-it to 14.1.0 9 | * Render `:code` according to spec into `
` and `` block with language class (see [#39](https://github.com/nextjournal/markdown/issues/39))
10 | * No longer depend on `applied-science/js-interop`
11 | * Accept parsed result in `->hiccup` function
12 | 
13 | ## 0.6.157
14 | 
15 | * Swap out GraalJS ([#28](https://github.com/nextjournal/markdown/issues/28)) in favour of [commonmark-java](https://github.com/commonmark/commonmark-java) on the JVM side.
16 |   This makes the library compatible with Java 22 and yields an approximate speedup of 10x. The clojurescript implementation stays the same.
17 | * Comply with commonmark rendering of images by default (see [#18](https://github.com/nextjournal/markdown/issues/18)).
18 | 
19 | ## 0.5.148
20 | 
21 | * Fixes a bug in the construction of the table of contents ([#19](https://github.com/nextjournal/markdown/issues/19)).
22 | 
23 | ## 0.5.146
24 | * Fix graaljs multithreaded access ([#17](https://github.com/nextjournal/markdown/issues/17))
25 | 
26 | ## 0.5.144
27 | * Disable parsing hashtags and internal links by default ([#14](https://github.com/nextjournal/markdown/issues/14))
28 | * Allow conditional application of custom tokenizers depending on document state around the text location
29 | * Arity 2 to `nextjournal.markdown/parse` was added to customize parsing options (e.g. custom tokenizers) more conveniently.
30 | * Support hard-breaks
31 | * Fix conversion to hiccup for tables with empty cells ([#13](https://github.com/nextjournal/markdown/issues/13))
32 | 
33 | ## 0.4.138
34 | * Uses the official markdown-it/footnote plugin 
35 | * Adds optional (post-parse) handling of footnotes as sidenotes
36 | 
37 | ## 0.4.135
38 | * node-to-text transformation interprets softbreaks as spaces
39 | 
40 | ## 0.4.132
41 | * Extract and assign leading emoji from heading nodes
42 | 
43 | ## 0.4.130
44 | * Produce unique ids in attrs for header nodes
45 | * Drop lambdaisland.uri dependency
46 | 
47 | ## 0.4.126
48 | * Add `deps.cljs` to classpath
49 | 
50 | ## 0.4.123
51 | * downgrade GraalJS to keep Java 8 compatibility
52 | 
53 | ## 0.4.116
54 | * Bump data.json
55 | 
56 | ## 0.4.112
57 | * Distinguish between tight and loose lists
58 | 
59 | ## 0.4.109
60 | * [More work on parsing extensibility](https://snapshots.nextjournal.com/markdown/build/7f5c1e24aeb3842235bc6175aa55dbd9a96d25d1/index.html#/notebooks/parsing_extensibility.clj)
61 | * A new home: https://github.com/nextjournal/markdown
62 | 
63 | ## 0.3.69
64 | * Extensible parsing of leaf text nodes
65 | 
66 | ## 0.2.44
67 | * Simplified `:toc` structure.
68 | 
69 | ## 0.1.37
70 | * First Release.
71 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2022 Nextjournal GmbH.
 2 | 
 3 | Permission to use, copy, modify, and/or distribute this software for any purpose
 4 | with or without fee is hereby granted, provided that the above copyright notice
 5 | and this permission notice appear in all copies.
 6 | 
 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
 8 | REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
 9 | FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
10 | INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
11 | OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
12 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
13 | THIS SOFTWARE.
14 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # nextjournal markdown
  2 | 
  3 | [![Clojars Project](https://img.shields.io/clojars/v/io.github.nextjournal/markdown.svg)](https://clojars.org/io.github.nextjournal/markdown) [![Notebooks](https://img.shields.io/static/v1?label=clerk&message=notebooks&color=rgb(155,187,157))](https://nextjournal.github.io/markdown)
  4 | 
  5 | A cross-platform clojure library for [Markdown](https://en.wikipedia.org/wiki/Markdown) parsing and transformation.
  6 | 
  7 | 🚧 _ALPHA_ status, subject to frequent change. For a richer reading experience [read this readme as a clerk notebook](https://nextjournal.github.io/markdown/README).
  8 | 
  9 | ## Features
 10 | 
 11 | * _Focus on data_: parsing yields an AST ([à la Pandoc](https://nextjournal.github.io/markdown/notebooks/pandoc)) of nested data representing a structured document.
 12 | * _Cross Platform_: using [commonmark-java](https://github.com/commonmark/commonmark-java) on the JVM and [markdown-it](https://github.com/markdown-it/markdown-it) for clojurescript
 13 | * _Configurable [Hiccup](https://github.com/weavejester/hiccup) conversion_.
 14 | 
 15 | ## Try
 16 | 
 17 | [Try it online](https://nextjournal.github.io/markdown/notebooks/try).
 18 | 
 19 | ## Flavor
 20 | 
 21 | We adhere to [CommonMark Spec](https://spec.commonmark.org/0.30/) and comply with extensions from [Github flavoured Markdown](https://github.github.com/gfm). Additionally, we parse $\LaTeX$ formulas (delimited by a $ for inline rendering or $$ for display mode).
 22 | 
 23 | ## Usage
 24 | 
 25 | ```clojure
 26 | (ns hello-markdown
 27 |   (:require [nextjournal.markdown :as md]
 28 |             [nextjournal.markdown.transform :as md.transform]))
 29 | ```
 30 | 
 31 | Parsing markdown into an AST:
 32 | 
 33 | ```clojure
 34 | (def data 
 35 |   (md/parse "> et tout autour, la longue cohorte de ses personnages, avec leur histoire, leur passé, leurs légendes:
 36 | > 1. Pélage vainqueur d'Alkhamah se faisant couronner à Covadonga
 37 | > 2. La cantatrice exilée de Russie suivant Schönberg à Amsterdam
 38 | > 3. Le petit chat sourd aux yeux vairons vivant au dernier étage
 39 | > 4. ...
 40 | 
 41 | **Georges Perec**, _La Vie mode d'emploi_.
 42 | 
 43 | ---
 44 | "))
 45 | ```
 46 |     ;; =>
 47 |     {:type :doc,
 48 |      :content [{:type :blockquote,
 49 |                 :content [{:type :paragraph,
 50 |                            :content [{:type :text,
 51 |                                       :text "et tout autour, la longue cohorte de ses personnage, avec leur histoire, leur passé, leurs légendes:"}]}
 52 |                           {:type :numbered-list,
 53 |                            :content [{:type :list-item,
 54 |                                       :content [{:type :plain,
 55 |                                                  :content [{:type :text,
 56 |                                                             :text "Pélage vainqueur d'Alkhamah se faisant couronner à Covadonga"}]}]}
 57 |                                      {:type :list-item,
 58 |                                       :content [{:type :plain,
 59 |                                                  :content [{:type :text,
 60 |                                                             :text "La cantatrice exilée de Russie suivant Schönberg à Amsterdam"}]}]}
 61 |                                      {:type :list-item,
 62 |                                       :content [{:type :plain,
 63 |                                                  :content [{:type :text,
 64 |                                                             :text "Le petit chat sourd aux yeux vairons vivant au dernier étage"}]}]}]}]}
 65 |                {:type :paragraph,
 66 |                 :content [{:type :strong, :content [{:type :text, :text "Georges Perec"}]}
 67 |                           {:type :text, :text ", "}
 68 |                           {:type :em, :content [{:type :text, :text "La Vie mode d'emploi"}]}
 69 |                           {:type :text, :text "."}]}
 70 |                {:type :ruler}]}
 71 | 
 72 | and transform that AST into `hiccup` syntax.
 73 | 
 74 | ```clojure
 75 | (md.transform/->hiccup data)
 76 | ```
 77 |     ;; =>
 78 |     [:div
 79 |      [:blockquote
 80 |       [:p "et tout autour, la longue cohorte de ses personnage, avec leur histoire, leur passé, leurs légendes:"]
 81 |       [:ol
 82 |        [:li [:<> "Pélage vainqueur d'Alkhamah se faisant couronner à Covadonga"]]
 83 |        [:li [:<> "La cantatrice exilée de Russie suivant Schönberg à Amsterdam"]]
 84 |        [:li [:<> "Le petit chat sourd aux yeux vairons vivant au dernier étage"]]]]
 85 |      [:p [:strong "Georges Perec"] ", " [:em "La Vie mode d'emploi"] "."]
 86 |      [:hr]]
 87 | 
 88 | We've built hiccup transformation in for convenience, but the same approach can be used to target [more formats](https://nextjournal.github.io/markdown/notebooks/pandoc).
 89 | 
 90 | This library is one of the building blocks of [Clerk](https://github.com/nextjournal/clerk) where it is used for rendering _literate fragments_.
 91 | 
 92 | ```clojure
 93 | ^{:nextjournal.clerk/viewer 'nextjournal.clerk.viewer/markdown-viewer}
 94 | data
 95 | ```
 96 | 
 97 | The transformation of markdown node types can be customised like this:
 98 | 
 99 | ```clojure
100 | ^{:nextjournal.clerk/viewer 'nextjournal.clerk.viewer/html-viewer}
101 | (md.transform/->hiccup
102 |  (assoc md.transform/default-hiccup-renderers
103 |         ;; :doc specify a custom container for the whole doc
104 |         :doc (partial md.transform/into-markup [:div.viewer-markdown])
105 |         ;; :text is funkier when it's zinc toned 
106 |         :text (fn [_ctx node] [:span {:style {:color "#71717a"}} (:text node)])
107 |         ;; :plain fragments might be nice, but paragraphs help when no reagent is at hand
108 |         :plain (partial md.transform/into-markup [:p {:style {:margin-top "-1.2rem"}}])
109 |         ;; :ruler gets to be funky, too
110 |         :ruler (constantly [:hr {:style {:border "2px dashed #71717a"}}]))
111 |  data)
112 | ```
113 | 
114 | ## Extensibility
115 | 
116 | We added minimal tooling for [extending markdown expressions](https://nextjournal.github.io/markdown/notebooks/parsing_extensibility).
117 | 


--------------------------------------------------------------------------------
/bb.edn:
--------------------------------------------------------------------------------
  1 | {:min-bb-version "0.7.8"
  2 |  :tasks
  3 |  {:requires ([clojure.edn :as edn]
  4 |              [clojure.string :as str]
  5 |              [babashka.fs :as fs]
  6 |              [babashka.process :as p])
  7 | 
  8 |   :init (do
  9 |           (def major 0)
 10 |           (def minor 6)
 11 |           (def rev-count-offset 69) ;; previous repo offset
 12 |           (def meta-inf-file "resources/META-INF/nextjournal/markdown/meta.edn")
 13 | 
 14 |           (defn rev-count []
 15 |             (-> (p/process ["git" "rev-list" "HEAD" "--count"] {:out :string})
 16 |               p/check :out str/trim Integer/parseInt))
 17 | 
 18 |           (defn version [] (format "%d.%d.%d" major minor (inc (+ (rev-count) rev-count-offset))))
 19 | 
 20 |           (defn update-changelog []
 21 |             (->> (str/replace (slurp "CHANGELOG.md")
 22 |                    (re-pattern "## [Uu]nreleased")
 23 |                    (str "## Unreleased\n\n...\n\n"
 24 |                      (format "## %s" (version))))
 25 |               (spit "CHANGELOG.md")))
 26 | 
 27 |           (defn read-version [] (-> (slurp meta-inf-file) edn/read-string :version)))
 28 | 
 29 |   yarn-install
 30 |   {:doc "Installs and updates npm dependencies"
 31 |    :task (shell "yarn install")}
 32 | 
 33 |   test
 34 |   {:doc "runs tests in the markdown module"
 35 |    :task (clojure "-X:test")}
 36 | 
 37 |   build:notebooks
 38 |   {:doc "builds a Clerk static with notebooks specified in deps.edn given a specified git SHA"
 39 |    :task (clojure (str "-X:dev:nextjournal/clerk :git/sha '\"" (or (first *command-line-args*) "SHASHASHA") "\"' :browse? false"))}
 40 | 
 41 |   dev
 42 |   {:doc "Boots and watches shadow browser test"
 43 |    :depends [yarn-install]
 44 |    :task (clojure "-M:dev:test:nextjournal/clerk:shadow watch browser-test")}
 45 | 
 46 |   cljs:compile:tests
 47 |   {:doc "compiles tests as node executable"
 48 |    :depends [yarn-install]
 49 |    :task (clojure "-M:dev:test:shadow compile test")}
 50 | 
 51 |   test:cljs
 52 |   {:doc "runs cljs tests via node"
 53 |    :depends [cljs:compile:tests]
 54 |    :task (shell "yarn node --trace-uncaught out/node-tests.js")}
 55 | 
 56 |   link-changelog {:doc "Turns the issue references in the changelog into links"
 57 |                   :task (do (defn tag->issue-link [s]
 58 |                               (clojure.string/replace s (re-pattern "(?issue-link (slurp f)))))}
 61 | 
 62 |   update-meta {:doc "Updates meta.edn with current version (based on commit count currently)."
 63 |                :task (spit (doto (fs/file meta-inf-file)
 64 |                              (-> fs/parent fs/create-dirs)) {:version (version)})}
 65 | 
 66 |   tag {:doc "Tags release and pushes tag to Github."
 67 |        :task (let [tag (str "v" (read-version))]
 68 |                (shell "git tag" tag))}
 69 | 
 70 |   delete-tag {:doc "Tells git to delete the tag at the current version"
 71 |               :task (shell (str "git tag -d v" (read-version)))}
 72 | 
 73 |   current-version {:doc "Prints the version as written to META-INF during publishing"
 74 |                    :task (print (read-version))}
 75 | 
 76 |   publish {:doc "Prepares repo for publishing via CI"
 77 |            :task (do
 78 |                    (run 'update-meta)
 79 |                    (println "Preparing repo for Release.\n Updated worktree has been committed (e.g. changes to CHANGELOG)" (read-version))
 80 |                    (run 'link-changelog)
 81 |                    (update-changelog)
 82 |                    (shell "git add -u")
 83 |                    (shell (str "git commit -m v" (read-version)))
 84 |                    (run 'tag)
 85 |                    (println "\n\nRun:\n\n" "  git push --atomic"
 86 |                      "origin" "main" (str "v" (read-version))
 87 |                      "\n\nto push the release and let CI build it!"))}
 88 | 
 89 |   undo:publish {:doc "Reset to state prior to `bb publish`"
 90 |                 :task (do
 91 |                         (run 'delete-tag)
 92 |                         (shell "git reset HEAD~1")
 93 |                         (shell "git co -- resources/META-INF/nextjournal/markdown/meta.edn"))}
 94 | 
 95 |   -current-tag (->> (shell {:out :string} "git tag --points-at HEAD")
 96 |                  :out
 97 |                  str/trim
 98 |                  not-empty)
 99 | 
100 |   -current-branch (->> (shell {:out :string} "git branch --show-current")
101 |                     :out
102 |                     str/trim)
103 | 
104 |   jar {:doc "Build jar"
105 |        :task (do
106 |                (println "Building jar")
107 |                (clojure (str "-T:build jar :version '\"" (read-version) "\"'")))}
108 | 
109 |   install {:doc "Install jar locally"
110 |            :task (do
111 |                    (println "Installing locally")
112 |                    (clojure (str "-T:build install :version '\"" (read-version) "\"'")))}
113 | 
114 |   ci:publish {:doc "Publish task which will be run on CI"
115 |               :depends [-current-tag -current-branch]
116 |               :task (do
117 |                       (prn :current-tag -current-tag)
118 |                       (prn :current-branch -current-branch)
119 |                       (if (and -current-tag (= "main" -current-branch))
120 |                         (do
121 |                           (println "Deploying to clojars")
122 |                           (clojure (str "-T:build deploy :version '\"" (read-version) "\"'")))
123 |                         ;; still build jar for artifact upload
124 |                         (run 'jar)))}}}
125 | 


--------------------------------------------------------------------------------
/build.clj:
--------------------------------------------------------------------------------
 1 | (ns build
 2 |   (:require [clojure.tools.build.api :as b]
 3 |             [deps-deploy.deps-deploy :as dd]))
 4 | 
 5 | (def lib 'io.github.nextjournal/markdown)
 6 | 
 7 | (defn scm [version]
 8 |   {:url "https://github.com/nextjournal/markdown"
 9 |    :tag (str "v" version)
10 |    :connection "scm:git:git://github.com/nextjournal/markdown.git"
11 |    :developerConnection "scm:git:ssh://git@github.com/nextjournal/markdown.git"})
12 | 
13 | (def class-dir "target/classes")
14 | 
15 | (def basis (b/create-basis {:project "deps.edn"}))
16 | 
17 | (defn jar-file [version] (format "target/%s-%s.jar" (name lib) version))
18 | 
19 | (defn clean [_] (b/delete {:path "target"}))
20 | 
21 | (defn jar [{:keys [version]}]
22 |   (b/delete {:path "target"})
23 |   (println "Producing jar: " (jar-file version))
24 |   (b/write-pom {:basis basis
25 |                 :class-dir class-dir
26 |                 :lib lib
27 |                 :scm (scm version)
28 |                 :src-dirs ["src"]
29 |                 :version version
30 |                 :pom-data
31 |                 [[:licenses
32 |                   [:license
33 |                    [:name "ISC License"]
34 |                    [:url "https://opensource.org/license/isc-license-txt"]]]]})
35 |   (b/copy-dir {:src-dirs ["src" "resources"]
36 |                :target-dir class-dir
37 |                :replace {}})
38 |   (b/jar {:class-dir class-dir
39 |           :jar-file (jar-file version)}))
40 | 
41 | (defn install [{:keys [version] :as opts}]
42 |   (jar opts)
43 |   (b/install {:basis basis
44 |               :lib lib
45 |               :version (:version opts)
46 |               :jar-file (jar-file version)
47 |               :class-dir class-dir}))
48 | 
49 | (defn deploy [{:keys [version] :as opts}]
50 |   (println "Deploying version" (jar-file version) "to Clojars.")
51 |   (jar opts)
52 |   (dd/deploy {:installer :remote
53 |               :artifact (jar-file version)
54 |               :pom-file (b/pom-path {:lib lib :class-dir class-dir})}))
55 | 


--------------------------------------------------------------------------------
/deps.edn:
--------------------------------------------------------------------------------
 1 | {:paths ["src" "resources"]
 2 |  :deps {org.commonmark/commonmark {:mvn/version "0.24.0"}
 3 |         org.commonmark/commonmark-ext-autolink {:mvn/version "0.24.0"}
 4 |         org.commonmark/commonmark-ext-footnotes {:mvn/version "0.24.0"}
 5 |         org.commonmark/commonmark-ext-task-list-items {:mvn/version "0.24.0"}
 6 |         org.commonmark/commonmark-ext-gfm-tables {:mvn/version "0.24.0"}
 7 |         org.commonmark/commonmark-ext-gfm-strikethrough {:mvn/version "0.24.0"}}
 8 | 
 9 |  :aliases
10 |  {:nextjournal/clerk
11 |   {:extra-paths ["notebooks" "dev"]
12 |    :extra-deps {io.github.nextjournal/clerk {:mvn/version "0.17.1102"
13 |                                              :exclusions [io.github.nextjournal/markdown]}}
14 |    :jvm-opts ["-Dclojure.main.report=stderr"
15 |               #_"-Dclerk.resource_manifest={\"/js/viewer.js\" \"js/viewer.js\"}"] ;;
16 |    :exec-fn nextjournal.clerk/build!
17 |    :exec-args {:git/url "https://github.com/nextjournal/markdown"
18 |                :paths ["README.md"
19 |                        "CHANGELOG.md"
20 |                        "notebooks/try.clj"
21 |                        "notebooks/images.clj"
22 |                        "notebooks/pandoc.clj"
23 |                        "notebooks/parsing_extensibility.clj"
24 |                        "notebooks/benchmarks.clj"
25 |                        "notebooks/tight_lists.clj"]}}
26 | 
27 |   :quiet
28 |   {:jvm-opts ["-Dpolyglot.engine.WarnInterpreterOnly=false"]}
29 | 
30 |   :dev
31 |   {:extra-paths ["dev" "notebooks"]
32 |    :extra-deps {applied-science/js-interop {:mvn/version "0.3.3"}
33 |                 org.babashka/http-client {:mvn/version "0.3.11"}
34 |                 org.clojure/data.json {:mvn/version "2.4.0"}
35 |                 org.clojure/test.check {:mvn/version "1.1.1"}
36 |                 io.github.nextjournal/clerk {:git/sha "f4c5488e36c8df11fe352889544e7deb9af73cb7"
37 |                                              :exclusions [io.github.nextjournal/markdown]}
38 |                 nubank/matcher-combinators {:mvn/version "3.8.3"}
39 |                 hiccup/hiccup {:mvn/version "2.0.0-RC5"}
40 |                 org.graalvm.js/js {:mvn/version "21.3.2.1"}}}
41 | 
42 |   :test
43 |   {:extra-paths ["test"]
44 |    :jvm-opts ["-Dclojure.main.report=stderr"]
45 |    :extra-deps {nubank/matcher-combinators {:mvn/version "3.9.1"}
46 |                 hiccup/hiccup {:mvn/version "2.0.0-RC5"}}
47 |    :exec-fn test-runner/run}
48 | 
49 |   :shadow
50 |   {:main-opts ["-m" "shadow.cljs.devtools.cli"]
51 |    :extra-deps {thheller/shadow-cljs {:mvn/version "2.18.0"}}}
52 | 
53 |   :build
54 |   {:ns-default build
55 |    :jvm-opts ["-Dclojure.main.report=stderr"]
56 |    :deps {io.github.clojure/tools.build {:git/tag "v0.10.3" :git/sha "15ead66"}
57 |           io.github.slipset/deps-deploy {:git/sha "b4359c5d67ca002d9ed0c4b41b710d7e5a82e3bf"}}}}}
58 | 


--------------------------------------------------------------------------------
/dev/nextjournal/markdown/parser.cljc:
--------------------------------------------------------------------------------
  1 | ;; # 🧩 Parsing
  2 | ;;
  3 | ;; Deals with transforming a sequence of tokens obtained by [markdown-it] into a nested AST composed of nested _nodes_.
  4 | ;;
  5 | ;; A _node_ is a clojure map and has no closed specification at the moment. We do follow a few conventions for its keys:
  6 | ;;
  7 | ;; - `:type` a keyword (:heading, :paragraph, :text, :code etc.) present on all nodes.
  8 | ;;
  9 | ;; When a node contains other child nodes, then it will have a
 10 | ;;
 11 | ;; - `:content` a collection of nodes representing nested content
 12 | ;;
 13 | ;; when a node is a textual leaf (as in a `:text` or `:formula` nodes) it carries a
 14 | ;; - `:text` key with a string value
 15 | ;;
 16 | ;; Other keys might include e.g.
 17 | ;;
 18 | ;; - `:info` specific of fenced code blocks
 19 | ;; - `:heading-level` specific of `:heading` nodes
 20 | ;; - `:attrs` attributes as passed by markdown-it tokens (e.g `{:style "some style info"}`)
 21 | (ns nextjournal.markdown.parser
 22 |   (:require [clojure.string :as str]
 23 |             [clojure.zip :as z]
 24 |             [nextjournal.markdown.transform :as md.transform]
 25 |             [nextjournal.markdown.utils.emoji :as emoji]
 26 |             #?@(:cljs [[applied-science.js-interop :as j]
 27 |                        [cljs.reader :as reader]])))
 28 | 
 29 | ;; clj common accessors
 30 | (def get-in* #?(:clj get-in :cljs j/get-in))
 31 | (def update* #?(:clj update :cljs j/update!))
 32 | 
 33 | #?(:clj (defn re-groups* [m] (let [g (re-groups m)] (cond-> g (not (vector? g)) vector))))
 34 | (defn re-idx-seq
 35 |   "Takes a regex and a string, returns a seq of triplets comprised of match groups followed by indices delimiting each match."
 36 |   [re text]
 37 |   #?(:clj (let [m (re-matcher re text)]
 38 |             (take-while some? (repeatedly #(when (.find m) [(re-groups* m) (.start m) (.end m)]))))
 39 |      :cljs (let [rex (js/RegExp. (.-source re) "g")]
 40 |              (take-while some? (repeatedly #(when-some [m (.exec rex text)] [(vec m) (.-index m) (.-lastIndex rex)]))))))
 41 | 
 42 | 
 43 | (comment (re-idx-seq #"\{\{([^{]+)\}\}" "foo {{hello}} bar"))
 44 | (comment (re-idx-seq #"\{\{[^{]+\}\}" "foo {{hello}} bar"))
 45 | ;; region node operations
 46 | ;; helpers
 47 | (defn inc-last [path] (update path (dec (count path)) inc))
 48 | (defn hlevel [{:as _token hn :tag}] (when (string? hn) (some-> (re-matches #"h([\d])" hn) second #?(:clj Integer/parseInt :cljs js/parseInt))))
 49 | 
 50 | (defn split-by-emoji [s]
 51 |   (let [[match start end] (first (re-idx-seq emoji/regex s))]
 52 |     (if match
 53 |       [(subs s start end) (str/trim (subs s end))]
 54 |       [nil s])))
 55 | 
 56 | #_(split-by-emoji " Stop")
 57 | #_(split-by-emoji "🤚🏽 Stop")
 58 | #_(split-by-emoji "🤚🏽🤚 Stop")
 59 | #_(split-by-emoji "🤚🏽Stop")
 60 | #_(split-by-emoji "🤚🏽   Stop")
 61 | #_(split-by-emoji "😀 Stop")
 62 | #_(split-by-emoji "⚛️ Stop")
 63 | #_(split-by-emoji "⚛ Stop")
 64 | #_(split-by-emoji "⬇ Stop")
 65 | #_(split-by-emoji "Should not 🙁️ Split")
 66 | 
 67 | (defn text->id+emoji [text]
 68 |   (when (string? text)
 69 |     (let [[emoji text'] (split-by-emoji (str/trim text))]
 70 |       (cond-> {:id (apply str (map (comp str/lower-case (fn [c] (case c (\space \_) \- c))) text'))}
 71 |         emoji (assoc :emoji emoji)))))
 72 | 
 73 | #_(text->id+emoji "Hello There")
 74 | #_(text->id+emoji "Hello_There")
 75 | #_(text->id+emoji "👩‍🔬 Quantum Physics")
 76 | 
 77 | ;; `parse-fence-info` ingests nextjournal, GFM, Pandoc and RMarkdown fenced code block info (any text following the leading 3 backticks) and returns a map
 78 | ;;
 79 | ;; _nextjournal_ / _GFM_
 80 | ;;
 81 | ;;    ```python id=2e3541da-0735-4b7f-a12f-4fb1bfcb6138
 82 | ;;    python code
 83 | ;;    ```
 84 | ;;
 85 | ;; _Pandoc_
 86 | ;;
 87 | ;;    ```{#pandoc-id .languge .extra-class key=Val}
 88 | ;;    code in language
 89 | ;;    ```
 90 | ;;
 91 | ;; _Rmd_
 92 | ;;
 93 | ;;    ```{r cars, echo=FALSE}
 94 | ;;    R code
 95 | ;;    ```
 96 | ;;
 97 | ;; See also:
 98 | ;; - https://github.github.com/gfm/#info-string
 99 | ;; - https://pandoc.org/MANUAL.html#fenced-code-blocks
100 | ;; - https://rstudio.com/wp-content/uploads/2016/03/rmarkdown-cheatsheet-2.0.pdf"
101 | 
102 | (defn parse-fence-info [info-str]
103 |   (try
104 |     ;; NOTE: this fix is backported
105 |     ;; from the new implementation 👇
106 |     (when (and (string? info-str) (seq info-str))
107 |       (let [tokens (-> info-str
108 |                        str/trim
109 |                        (str/replace #"[\{\}\,]" "")         ;; remove Pandoc/Rmarkdown brackets and commas
110 |                        (str/replace "." "")                 ;; remove dots
111 |                        (str/split #" "))]                   ;; split by spaces
112 |         (reduce
113 |          (fn [{:as info-map :keys [language]} token]
114 |            (let [[_ k v] (re-matches #"^([^=]+)=([^=]+)$" token)]
115 |              (cond
116 |                (str/starts-with? token "#") (assoc info-map :id (str/replace token #"^#" "")) ;; pandoc #id
117 |                (and k v) (assoc info-map (keyword k) v)
118 |                (not language) (assoc info-map :language token) ;; language is the first simple token which is not a pandoc's id
119 |                :else (assoc info-map (keyword token) true))))
120 |          {}
121 |          tokens)))
122 |     (catch #?(:clj Throwable :cljs :default) _ {})))
123 | 
124 | (comment
125 |   (parse-fence-info "python runtime-id=5f77e475-6178-47a3-8437-45c9c34d57ff")
126 |   (parse-fence-info "{#some-id .lang foo=nex}")
127 |   (parse-fence-info "#id clojure")
128 |   (parse-fence-info "clojure #id")
129 |   (parse-fence-info "clojure")
130 |   (parse-fence-info "{r cars, echo=FALSE}"))
131 | 
132 | ;; leaf nodes
133 | (defn text-node [text] {:type :text :text text})
134 | (defn formula [text] {:type :formula :text text})
135 | (defn block-formula [text] {:type :block-formula :text text})
136 | (defn footnote-ref [ref label] (cond-> {:type :footnote-ref :ref ref} label (assoc :label label)))
137 | 
138 | ;; node constructors
139 | (defn node
140 |   [type content attrs top-level]
141 |   (cond-> {:type type :content content}
142 |     (seq attrs) (assoc :attrs attrs)
143 |     (seq top-level) (merge top-level)))
144 | 
145 | (defn empty-text-node? [{text :text t :type}] (and (= :text t) (empty? text)))
146 | 
147 | (defn push-node [{:as doc ::keys [path]} node]
148 |   (try
149 |     (cond-> doc
150 |       ;; ⬇ mdit produces empty text tokens at mark boundaries, see edge cases below
151 |       (not (empty-text-node? node))
152 |       (-> #_doc
153 |        (update ::path inc-last)
154 |        (update-in (pop path) conj node)))
155 |     (catch #?(:clj Exception :cljs js/Error) e
156 |       (throw (ex-info (str "nextjournal.markdown cannot add node: " node " at path: " path)
157 |                       {:doc doc :node node} e)))))
158 | 
159 | (def push-nodes (partial reduce push-node))
160 | 
161 | (defn open-node
162 |   ([doc type] (open-node doc type {}))
163 |   ([doc type attrs] (open-node doc type attrs {}))
164 |   ([doc type attrs top-level]
165 |    (-> doc
166 |        (push-node (node type [] attrs top-level))
167 |        (update ::path into [:content -1]))))
168 | 
169 | ;; after closing a node, document ::path will point at it
170 | (def ppop (comp pop pop))
171 | (defn close-node [doc] (update doc ::path ppop))
172 | (defn update-current [{:as doc path ::path} fn & args] (apply update-in doc path fn args))
173 | 
174 | (defn current-parent-node
175 |   "Given an open parsing context `doc`, returns the parent of the node which was last parsed into the document."
176 |   [{:as doc ::keys [path]}]
177 |   (assert path "A path is needed in document context to retrieve the current node: `current-parent-node` cannot be called after `parse`.")
178 |   (get-in doc (ppop path)))
179 | 
180 | (defn current-ancestor-nodes
181 |   "Given an open parsing context `doc`, returns the list of ancestors of the node last parsed into the document, up to but
182 |    not including the top document."
183 |   [{:as doc ::keys [path]}]
184 |   (assert path "A path is needed in document context to retrieve the current node: `current-ancestor-nodes` cannot be called after `parse`.")
185 |   (loop [p (ppop path) ancestors []]
186 |     (if (seq p)
187 |       (recur (ppop p) (conj ancestors (get-in doc p)))
188 |       ancestors)))
189 | 
190 | ;; TODO: consider rewriting parse in terms of this zipper
191 | (defn ->zip [doc]
192 |   (z/zipper (every-pred map? :type) :content
193 |             (fn [node cs] (assoc node :content (vec cs)))
194 |             doc))
195 | 
196 | (defn assign-node-id+emoji [{:as doc ::keys [id->index path] :keys [text->id+emoji-fn]}]
197 |   (let [{:keys [id emoji]} (when (ifn? text->id+emoji-fn) (-> doc (get-in path) text->id+emoji-fn))
198 |         id-count (when id (get id->index id))]
199 |     (cond-> doc
200 |       id
201 |       (update-in [::id->index id] (fnil inc 0))
202 |       (or id emoji)
203 |       (update-in path (fn [node]
204 |                         (cond-> node
205 |                           id (assoc-in [:attrs :id] (cond-> id id-count (str "-" (inc id-count))))
206 |                           emoji (assoc :emoji emoji)))))))
207 | 
208 | (comment                                                    ;; path after call
209 |   (-> empty-doc                                             ;; [:content -1]
210 |       (open-node :heading)                                  ;; [:content 0 :content -1]
211 |       (push-node {:node/type :text :text "foo"})            ;; [:content 0 :content 0]
212 |       (push-node {:node/type :text :text "foo"})            ;; [:content 0 :content 1]
213 |       close-node                                            ;; [:content 1]
214 | 
215 |       (open-node :paragraph)                                ;; [:content 1 :content]
216 |       (push-node {:node/type :text :text "hello"})
217 |       close-node
218 |       (open-node :bullet-list)
219 |       ;;
220 |       ))
221 | ;; endregion
222 | 
223 | ;; region TOC builder:
224 | ;; toc nodes are heading nodes but with `:type` `:toc` and an extra branching along
225 | ;; the key `:children` representing the sub-sections of the node
226 | (defn into-toc [toc {:as toc-item :keys [heading-level]}]
227 |   (loop [toc toc l heading-level toc-path [:children]]
228 |     ;; `toc-path` is `[:children i₁ :children i₂ ... :children]`
229 |     (let [type-path (assoc toc-path (dec (count toc-path)) :type)]
230 |       (cond
231 |         ;; insert intermediate default empty :content collections for the final update-in (which defaults to maps otherwise)
232 |         (not (get-in toc toc-path))
233 |         (recur (assoc-in toc toc-path []) l toc-path)
234 | 
235 |         ;; fill in toc types for non-contiguous jumps like h1 -> h3
236 |         (not (get-in toc type-path))
237 |         (recur (assoc-in toc type-path :toc) l toc-path)
238 | 
239 |         (= 1 l)
240 |         (update-in toc toc-path (fnil conj []) toc-item)
241 | 
242 |         :else
243 |         (recur toc
244 |                (dec l)
245 |                (conj toc-path
246 |                      (max 0 (dec (count (get-in toc toc-path)))) ;; select last child at level if it exists
247 |                      :children))))))
248 | 
249 | (defn add-to-toc [doc {:as h :keys [heading-level]}]
250 |   (cond-> doc (pos-int? heading-level) (update :toc into-toc (assoc h :type :toc))))
251 | 
252 | (defn set-title-when-missing [{:as doc :keys [title]} heading]
253 |   (cond-> doc (nil? title) (assoc :title (md.transform/->text heading))))
254 | 
255 | (defn add-title+toc
256 |   "Computes and adds a :title and a :toc to the document-like structure `doc` which might have not been constructed by means of `parse`."
257 |   [{:as doc :keys [content]}]
258 |   (let [rf (fn [doc heading] (-> doc (add-to-toc heading) (set-title-when-missing heading)))
259 |         xf (filter (comp #{:heading} :type))]
260 |     (reduce (xf rf) (assoc doc :toc {:type :toc}) content)))
261 | 
262 | (comment
263 |  (-> {:type :toc}
264 |      ;;(into-toc {:heading-level 3 :title "Foo"})
265 |      ;;(into-toc {:heading-level 2 :title "Section 1"})
266 |      (into-toc {:heading-level 1 :title "Title" :type :toc})
267 |      (into-toc {:heading-level 4 :title "Section 2" :type :toc})
268 |      ;;(into-toc {:heading-level 4 :title "Section 2.1"})
269 |      ;;(into-toc {:heading-level 2 :title "Section 3"})
270 |      )
271 | 
272 |  (-> "# Top _Title_
273 | 
274 | par
275 | 
276 | ### Three
277 | 
278 | ## Two
279 | 
280 | par
281 | - and a nested
282 | - ### Heading not included
283 | 
284 | foo
285 | 
286 | ## Two Again
287 | 
288 | par
289 | 
290 | # One Again
291 | 
292 | [[TOC]]
293 | 
294 | #### Four
295 | 
296 | end"
297 |      nextjournal.markdown/parse
298 |      :toc
299 |      ))
300 | ;; endregion
301 | 
302 | ;; region token handlers
303 | (declare apply-tokens)
304 | (defmulti apply-token (fn [_doc token] (:type token)))
305 | (defmethod apply-token :default [doc token]
306 |   (prn :apply-token/unknown-type {:token token})
307 |   doc)
308 | 
309 | ;; blocks
310 | (defmethod apply-token "heading_open" [doc token] (open-node doc :heading {} {:heading-level (hlevel token)}))
311 | (defmethod apply-token "heading_close" [doc {doc-level :level}]
312 |   (let [{:as doc ::keys [path]} (close-node doc)
313 |         doc' (assign-node-id+emoji doc)
314 |         heading (-> doc' (get-in path) (assoc :path path))]
315 |     (cond-> doc'
316 |       ;; We're only considering top-level headings (e.g. not those contained inside quotes or lists)
317 |       (zero? doc-level)
318 |       (-> (add-to-toc heading)
319 |           (set-title-when-missing heading)))))
320 | 
321 | ;; for building the TOC we just care about headings at document top level (not e.g. nested under lists) ⬆
322 | 
323 | (defmethod apply-token "paragraph_open" [doc {:as _token :keys [hidden]}] (open-node doc (if hidden :plain :paragraph)))
324 | (defmethod apply-token "paragraph_close" [doc _token] (close-node doc))
325 | 
326 | (defmethod apply-token "bullet_list_open" [doc {{:as attrs :keys [has-todos]} :attrs}] (open-node doc (if has-todos :todo-list :bullet-list) attrs))
327 | (defmethod apply-token "bullet_list_close" [doc _token] (close-node doc))
328 | 
329 | (defmethod apply-token "ordered_list_open" [doc {:keys [attrs]}] (open-node doc :numbered-list attrs))
330 | (defmethod apply-token "ordered_list_close" [doc _token] (close-node doc))
331 | 
332 | (defmethod apply-token "list_item_open" [doc {{:as attrs :keys [todo]} :attrs}] (open-node doc (if todo :todo-item :list-item) attrs))
333 | (defmethod apply-token "list_item_close" [doc _token] (close-node doc))
334 | 
335 | (defmethod apply-token "math_block" [doc {text :content}] (push-node doc (block-formula text)))
336 | (defmethod apply-token "math_block_end" [doc _token] doc)
337 | 
338 | (defmethod apply-token "hr" [doc _token] (push-node doc {:type :ruler}))
339 | 
340 | (defmethod apply-token "blockquote_open" [doc _token] (open-node doc :blockquote))
341 | (defmethod apply-token "blockquote_close" [doc _token] (close-node doc))
342 | 
343 | (defmethod apply-token "tocOpen" [doc _token] (open-node doc :toc))
344 | (defmethod apply-token "tocBody" [doc _token] doc) ;; ignore body
345 | (defmethod apply-token "tocClose" [doc _token] (-> doc close-node (update-current dissoc :content)))
346 | 
347 | (defmethod apply-token "code_block" [doc {:as _token c :content}]
348 |   (-> doc
349 |       (open-node :code)
350 |       (push-node (text-node c))
351 |       close-node))
352 | (defmethod apply-token "fence" [doc {:as _token i :info c :content}]
353 |   (-> doc
354 |       (open-node :code {} (assoc (parse-fence-info i) :info i))
355 |       (push-node (text-node c))
356 |       close-node))
357 | 
358 | ;; footnotes
359 | (defmethod apply-token "footnote_ref" [{:as doc :keys [footnotes]} token]
360 |   (push-node doc (footnote-ref (+ (count footnotes) (get-in* token [:meta :id]))
361 |                                (get-in* token [:meta :label]))))
362 | 
363 | (defmethod apply-token "footnote_anchor" [doc token] doc)
364 | 
365 | (defmethod apply-token "footnote_open" [{:as doc ::keys [footnote-offset]} token]
366 |   ;; consider an offset in case we're parsing multiple inputs into the same context
367 |   (let [ref (+ (get-in* token [:meta :id]) footnote-offset)
368 |         label (get-in* token [:meta :label])]
369 |     (open-node doc :footnote nil (cond-> {:ref ref} label (assoc :label label)))))
370 | 
371 | (defmethod apply-token "footnote_close" [doc token] (close-node doc))
372 | 
373 | (defmethod apply-token "footnote_block_open" [{:as doc :keys [footnotes] ::keys [path]} _token]
374 |   ;; store footnotes at a top level `:footnote` key
375 |   (let [footnote-offset (count footnotes)]
376 |     (-> doc
377 |         (assoc ::path [:footnotes (dec footnote-offset)]
378 |                ::footnote-offset footnote-offset
379 |                ::path-to-restore path))))
380 | 
381 | (defmethod apply-token "footnote_block_close"
382 |   ;; restores path for addding new tokens
383 |   [{:as doc ::keys [path-to-restore]} _token]
384 |   (-> doc
385 |       (assoc ::path path-to-restore)
386 |       (dissoc ::path-to-restore ::footnote-offset)))
387 | 
388 | (defn footnote->sidenote [{:keys [ref label content]}]
389 |   ;; this assumes the footnote container is a paragraph, won't work for lists
390 |   (node :sidenote (-> content first :content) nil (cond-> {:ref ref} label (assoc :label label))))
391 | 
392 | (defn node-with-sidenote-refs [p-node]
393 |   (loop [l (->zip p-node) refs []]
394 |     (if (z/end? l)
395 |       (when (seq refs)
396 |         {:node (z/root l) :refs refs})
397 |       (let [{:keys [type ref]} (z/node l)]
398 |         (if (= :footnote-ref type)
399 |           (recur (z/next (z/edit l assoc :type :sidenote-ref)) (conj refs ref))
400 |           (recur (z/next l) refs))))))
401 | 
402 | (defn insert-sidenote-containers
403 |   "Handles footnotes as sidenotes.
404 | 
405 |    Takes and returns a parsed document. When the document has footnotes, wraps every top-level block which contains footnote references
406 |    with a `:footnote-container` node, into each of such nodes, adds a `:sidenote-column` node containing a `:sidenote` node for each found ref.
407 |    Renames type `:footnote-ref` to `:sidenote-ref."
408 |   [{:as doc ::keys [path] :keys [footnotes]}]
409 |   (if-not (seq footnotes)
410 |     doc
411 |     (let [root (->zip doc)]
412 |       (loop [loc (z/down root) parent root]
413 |         (cond
414 |           (nil? loc)
415 |           (-> parent z/node (assoc :sidenotes? true))
416 |           (contains? #{:plain :paragraph :blockquote :numbered-list :bullet-list :todo-list :heading :table}
417 |                      (:type (z/node loc)))
418 |           (if-some [{:keys [node refs]} (node-with-sidenote-refs (z/node loc))]
419 |             (let [new-loc (-> loc (z/replace {:type :sidenote-container :content []})
420 |                               (z/append-child node)
421 |                               (z/append-child {:type :sidenote-column
422 |                                                ;; TODO: broken in the old implementation
423 |                                                ;; should be :content (mapv #(footnote->sidenote (get footnotes %)) (distinct refs))}))]
424 |                                                :content (mapv #(footnote->sidenote (get footnotes %)) refs)}))]
425 |               (recur (z/right new-loc) (z/up new-loc)))
426 |             (recur (z/right loc) parent))
427 |           :else
428 |           (recur (z/right loc) parent))))))
429 | 
430 | (comment
431 |   (-> "_hello_ what and foo[^note1] and^[some other note].
432 | 
433 | And what.
434 | 
435 | [^note1]: the _what_
436 | 
437 | * and new text[^endnote] at the end.
438 | * the
439 |   * hell^[that warm place]
440 | 
441 | [^endnote]: conclusion.
442 | "
443 |       nextjournal.markdown/tokenize
444 |       parse
445 |       #_ flatten-tokens
446 |       insert-sidenote-containers)
447 | 
448 |   (-> empty-doc
449 |       (update :text-tokenizers (partial map normalize-tokenizer))
450 |       (apply-tokens (nextjournal.markdown/tokenize "what^[the heck]"))
451 |       insert-sidenote-columns
452 |       (apply-tokens (nextjournal.markdown/tokenize "# Hello"))
453 |       insert-sidenote-columns
454 |       (apply-tokens (nextjournal.markdown/tokenize "is^[this thing]"))
455 |       insert-sidenote-columns))
456 | 
457 | ;; tables
458 | ;; table data tokens might have {:style "text-align:right|left"} attrs, maybe better nested node > :attrs > :style ?
459 | (defmethod apply-token "table_open" [doc _token] (open-node doc :table))
460 | (defmethod apply-token "table_close" [doc _token] (close-node doc))
461 | (defmethod apply-token "thead_open" [doc _token] (open-node doc :table-head))
462 | (defmethod apply-token "thead_close" [doc _token] (close-node doc))
463 | (defmethod apply-token "tr_open" [doc _token] (open-node doc :table-row))
464 | (defmethod apply-token "tr_close" [doc _token] (close-node doc))
465 | (defmethod apply-token "th_open" [doc token] (open-node doc :table-header (:attrs token)))
466 | (defmethod apply-token "th_close" [doc _token] (close-node doc))
467 | (defmethod apply-token "tbody_open" [doc _token] (open-node doc :table-body))
468 | (defmethod apply-token "tbody_close" [doc _token] (close-node doc))
469 | (defmethod apply-token "td_open" [doc token] (open-node doc :table-data (:attrs token)))
470 | (defmethod apply-token "td_close" [doc _token] (close-node doc))
471 | 
472 | (comment
473 |   (->
474 | "
475 | | Syntax |  JVM                     | JavaScript                      |
476 | |--------|:------------------------:|--------------------------------:|
477 | |   foo  |  Loca _lDate_ ahoiii     | goog.date.Date                  |
478 | |   bar  |  java.time.LocalTime     | some [kinky](link/to/something) |
479 | |   bag  |  java.time.LocalDateTime | $\\phi$                         |
480 | "
481 |     nextjournal.markdown/parse
482 |     nextjournal.markdown.transform/->hiccup
483 |     ))
484 | 
485 | ;; ## Handling of Text Tokens
486 | ;;
487 | ;;    normalize-tokenizer :: {:regex, :doc-handler} | {:tokenizer-fn, :handler} -> Tokenizer
488 | ;;    Tokenizer :: {:tokenizer-fn :: TokenizerFn, :doc-handler :: DocHandler}
489 | ;;
490 | ;;    Match :: Any
491 | ;;    Handler :: Match -> Node
492 | ;;    IndexedMatch :: (Match, Int, Int)
493 | ;;    TokenizerFn :: String -> [IndexedMatch]
494 | ;;    DocHandler :: Doc -> {:match :: Match} -> Doc
495 | 
496 | (def hashtag-tokenizer
497 |   {:regex #"(^|\B)#[\w-]+"
498 |    :pred #(every? (complement #{:link}) (map :type (current-ancestor-nodes %)))
499 |    :handler (fn [match] {:type :hashtag :text (subs (match 0) 1)})})
500 | 
501 | (def internal-link-tokenizer
502 |   {:regex #"\[\[([^\]]+)\]\]"
503 |    :pred #(every? (complement #{:link}) (map :type (current-ancestor-nodes %)))
504 |    :handler (fn [match] {:type :internal-link :text (match 1)})})
505 | 
506 | (comment
507 |   (->> "# Hello #Fishes
508 | 
509 | > what about #this
510 | 
511 | _this #should be a tag_, but this [_actually #foo shouldnt_](/bar/) is not."
512 |        nextjournal.markdown/tokenize
513 |        (parse (update empty-doc :text-tokenizers conj hashtag-tokenizer))))
514 | 
515 | 
516 | (defn normalize-tokenizer
517 |   "Normalizes a map of regex and handler into a Tokenizer"
518 |   [{:as tokenizer :keys [doc-handler pred handler regex tokenizer-fn]}]
519 |   (assert (and (or doc-handler handler) (or regex tokenizer-fn)))
520 |   (cond-> tokenizer
521 |     (not doc-handler) (assoc :doc-handler (fn [doc {:keys [match]}] (push-node doc (handler match))))
522 |     (not tokenizer-fn) (assoc :tokenizer-fn (partial re-idx-seq regex))
523 |     (not pred) (assoc :pred (constantly true))))
524 | 
525 | (defn tokenize-text-node [{:as tkz :keys [tokenizer-fn pred doc-handler]} doc {:as node :keys [text]}]
526 |   ;; TokenizerFn -> HNode -> [HNode]
527 |   (assert (and (fn? tokenizer-fn) (fn? doc-handler) (fn? pred) (string? text))
528 |           {:text text :tokenizer tkz})
529 |   (let [idx-seq (when (pred doc) (tokenizer-fn text))]
530 |     (if (seq idx-seq)
531 |       (let [text-hnode (fn [s] (assoc (text-node s) :doc-handler push-node))
532 |             {:keys [nodes remaining-text]}
533 |             (reduce (fn [{:as acc :keys [remaining-text]} [match start end]]
534 |                       (-> acc
535 |                           (update :remaining-text subs 0 start)
536 |                           (cond->
537 |                             (< end (count remaining-text))
538 |                             (update :nodes conj (text-hnode (subs remaining-text end))))
539 |                           (update :nodes conj {:doc-handler doc-handler
540 |                                                :match match :text text
541 |                                                :start start :end end})))
542 |                     {:remaining-text text :nodes ()}
543 |                     (reverse idx-seq))]
544 |         (cond-> nodes
545 |           (seq remaining-text)
546 |           (conj (text-hnode remaining-text))))
547 |       [node])))
548 | 
549 | (defmethod apply-token "text" [{:as doc :keys [text-tokenizers]} {:keys [content]}]
550 |   (reduce (fn [doc {:as node :keys [doc-handler]}] (doc-handler doc (dissoc node :doc-handler)))
551 |           doc
552 |           (reduce (fn [nodes tokenizer]
553 |                     (mapcat (fn [{:as node :keys [type]}]
554 |                               (if (= :text type) (tokenize-text-node tokenizer doc node) [node]))
555 |                             nodes))
556 |                   [{:type :text :text content :doc-handler push-node}]
557 |                   text-tokenizers)))
558 | 
559 | (comment
560 |   (def mustache (normalize-tokenizer {:regex #"\{\{([^\{]+)\}\}" :handler (fn [m] {:type :eval :text (m 1)})}))
561 |   (tokenize-text-node mustache {} {:text "{{what}} the {{hellow}}"})
562 |   (apply-token (assoc empty-doc :text-tokenizers [mustache])
563 |                {:type "text" :content "foo [[bar]] dang #hashy taggy [[what]] #dangy foo [[great]] and {{eval}} me"})
564 | 
565 |   (parse (assoc empty-doc
566 |                 :text-tokenizers
567 |                 [(normalize-tokenizer {:regex #"\{\{([^\{]+)\}\}"
568 |                                        :doc-handler (fn [{:as doc ::keys [path]} {[_ meta] :match}]
569 |                                                       (update-in doc (ppop path) assoc :meta meta))})])
570 |          (nextjournal.markdown/tokenize "# Title {{id=heading}}
571 | * one
572 | * two")))
573 | 
574 | ;; inlines
575 | (defmethod apply-token "inline" [doc {:as _token ts :children}] (apply-tokens doc ts))
576 | (defmethod apply-token "math_inline" [doc {text :content}] (push-node doc (formula text)))
577 | (defmethod apply-token "math_inline_double" [doc {text :content}] (push-node doc (formula text)))
578 | 
579 | ;; https://spec.commonmark.org/0.30/#softbreak
580 | (defmethod apply-token "softbreak" [doc _token] (push-node doc {:type :softbreak}))
581 | ;; https://spec.commonmark.org/0.30/#hard-line-break
582 | (defmethod apply-token "hardbreak" [doc _token] (push-node doc {:type :hardbreak}))
583 | 
584 | ;; images
585 | (defmethod apply-token "image" [doc {:keys [attrs children]}] (-> doc (open-node :image attrs) (apply-tokens children) close-node))
586 | 
587 | ;; marks
588 | (defmethod apply-token "em_open" [doc _token] (open-node doc :em))
589 | (defmethod apply-token "em_close" [doc _token] (close-node doc))
590 | (defmethod apply-token "strong_open" [doc _token] (open-node doc :strong))
591 | (defmethod apply-token "strong_close" [doc _token] (close-node doc))
592 | (defmethod apply-token "s_open" [doc _token] (open-node doc :strikethrough))
593 | (defmethod apply-token "s_close" [doc _token] (close-node doc))
594 | (defmethod apply-token "link_open" [doc token] (open-node doc :link (:attrs token)))
595 | (defmethod apply-token "link_close" [doc _token] (close-node doc))
596 | (defmethod apply-token "code_inline" [doc {text :content}] (-> doc (open-node :monospace) (push-node (text-node text)) close-node))
597 | 
598 | ;; html (ignored)
599 | (defmethod apply-token "html_inline" [doc _] doc)
600 | (defmethod apply-token "html_block" [doc _] doc)
601 | ;; endregion
602 | 
603 | ;; region data builder api
604 | (defn pairs->kmap [pairs] (into {} (map (juxt (comp keyword first) second)) pairs))
605 | (defn apply-tokens [doc tokens]
606 |   (let [mapify-attrs-xf (map (fn [x] (update* x :attrs pairs->kmap)))]
607 |     (reduce (mapify-attrs-xf apply-token) doc tokens)))
608 | 
609 | (def empty-doc {:type :doc
610 |                 :content []
611 |                 ;; Id -> Nat, to disambiguate ids for nodes with the same textual content
612 |                 ::id->index {}
613 |                 ;; Node -> {id : String, emoji String}, dissoc from context to opt-out of ids
614 |                 :text->id+emoji-fn (comp text->id+emoji md.transform/->text)
615 |                 :toc {:type :toc}
616 |                 :footnotes []
617 |                 ::path [:content -1] ;; private
618 |                 :text-tokenizers []})
619 | 
620 | (defn parse
621 |   "Takes a doc and a collection of markdown-it tokens, applies tokens to doc. Uses an emtpy doc in arity 1."
622 |   ([tokens] (parse empty-doc tokens))
623 |   ([doc tokens] (-> doc
624 |                     (update :text-tokenizers (partial map normalize-tokenizer))
625 |                     (apply-tokens tokens)
626 |                     (dissoc ::path
627 |                             ::id->index
628 |                             :text-tokenizers
629 |                             :text->id+emoji-fn))))
630 | 
631 | (comment
632 | 
633 |  (-> "# 🎱 Markdown Data
634 | 
635 | some _emphatic_ **strong** [link](https://foo.com)
636 | 
637 | ---
638 | 
639 | > some ~~nice~~ quote
640 | > for fun
641 | 
642 | ## Formulas
643 | 
644 | [[TOC]]
645 | 
646 | $$\\Pi^2$$
647 | 
648 | - [ ]  and
649 | - [x]  some $\\Phi_{\\alpha}$ latext
650 | - [ ]  bullets
651 | 
652 | ## Sidenotes
653 | 
654 | here [^mynote] to somewhere
655 | 
656 | ## Fences
657 | 
658 | ```py id=\"aaa-bbb-ccc\"
659 | 1
660 | print(\"this is some python\")
661 | 2
662 | 3
663 | ```
664 | 
665 | ![Image Text](https://img.icons8.com/officel/16/000000/public.png)
666 | 
667 | Hline Section
668 | -------------
669 | 
670 | ### but also [[indented code]]
671 | 
672 |     import os
673 |     os.listdir('/')
674 | 
675 | or monospace mark [`real`](/foo/bar) fun.
676 | 
677 | [^mynote]: Here you _can_ `explain` at lenght
678 | "
679 |      nextjournal.markdown/tokenize
680 |      parse
681 |      ;;seq
682 |      ;;(->> (take 10))
683 |      ;;(->> (take-last 4))
684 |      ))
685 | ;; endregion
686 | 
687 | ;; region zoom-in at section
688 | (defn section-at [{:as doc :keys [content]} [_ pos :as path]]
689 |   ;; TODO: generalize over path (zoom-in at)
690 |   ;; supports only top-level headings atm (as found in TOC)
691 |   (let [{:as h section-level :heading-level} (get-in doc path)
692 |         in-section? (fn [{l :heading-level}] (or (not l) (< section-level l)))]
693 |     (when section-level
694 |       {:type :doc
695 |        :content (cons h
696 |                       (->> content
697 |                            (drop (inc pos))
698 |                            (take-while in-section?)))})))
699 | 
700 | (comment
701 |  (some-> "# Title
702 | 
703 | ## Section 1
704 | 
705 | foo
706 | 
707 | - # What is this? (no!)
708 | - maybe
709 | 
710 | ### Section 1.2
711 | 
712 | ## Section 2
713 | 
714 | some par
715 | 
716 | ### Section 2.1
717 | 
718 | some other par
719 | 
720 | ### Section 2.2
721 | 
722 | #### Section 2.2.1
723 | 
724 | two two one
725 | 
726 | #### Section 2.2.2
727 | 
728 | two two two
729 | 
730 | ## Section 3
731 | 
732 | some final par"
733 |     nextjournal.markdown/parse
734 |     (section-at [:content 9])                         ;; ⬅ paths are stored in TOC sections
735 |     nextjournal.markdown.transform/->hiccup))
736 | ;; endregion
737 | 
738 | 
739 | ;; ## 🔧 Debug
740 | ;; A view on flattened tokens to better inspect tokens
741 | (defn flatten-tokens [tokens]
742 |   (into []
743 |         (comp
744 |          (mapcat (partial tree-seq (comp seq :children) :children))
745 |          (map #(select-keys % [:type :content :hidden :level :info :meta])))
746 |         tokens))
747 | 


--------------------------------------------------------------------------------
/dev/nextjournal/markdown/render.cljs:
--------------------------------------------------------------------------------
  1 | (ns nextjournal.markdown.render
  2 |   (:require
  3 |    ["katex" :as katex]
  4 |    ["@codemirror/language" :refer [defaultHighlightStyle syntaxHighlighting LanguageSupport]]
  5 |    ["@codemirror/state" :refer [EditorState]]
  6 |    ["@codemirror/view" :refer [EditorView keymap]]
  7 |    ["@codemirror/lang-markdown" :as MD :refer [markdown markdownLanguage]]
  8 |    ["react" :as react]
  9 |    [nextjournal.markdown :as md]
 10 |    [nextjournal.clerk.viewer :as v]
 11 |    [nextjournal.clerk.render.hooks :as hooks]
 12 |    [nextjournal.markdown.transform :as md.transform]
 13 |    [nextjournal.clojure-mode :as clojure-mode]
 14 |    [nextjournal.clerk.render.code :as code]
 15 |    [clojure.string :as str]
 16 |    [nextjournal.clerk.render :as render]
 17 |    [reagent.core :as r]))
 18 | 
 19 | (def theme #js {"&.cm-editor.cm-focused" #js {:outline "none"}
 20 |                 ".cm-activeLine" #js {:background-color "rgb(226 232 240)"}
 21 |                 ".cm-line" #js {:padding "0"
 22 |                                 :line-height "1.6"
 23 |                                 :font-size "15px"
 24 |                                 :font-family "\"Fira Mono\", monospace"}})
 25 | 
 26 | ;; syntax (an LRParser) + support (a set of extensions)
 27 | (def clojure-lang (LanguageSupport. (clojure-mode/syntax)
 28 |                                     (.. clojure-mode/default-extensions (slice 1))))
 29 | (defn on-change-ext [f]
 30 |   (.. EditorState -transactionExtender
 31 |       (of (fn [^js tr]
 32 |             (when (.-docChanged tr) (f (.. tr -state sliceDoc)))
 33 |             #js {}))))
 34 | 
 35 | (defn eval-string [source]
 36 |   (when (not-empty (str/trim source))
 37 |     (try {:result  #_:clj-kondo/ignore (load-string source)}
 38 |          (catch js/Error e
 39 |            {:error (str (.-message e))}))))
 40 | 
 41 | (defn editor [{:keys [doc lang editable? on-change] :or {editable? true}}]
 42 |   (let [!editor-el (hooks/use-ref)
 43 |         extensions (into-array (cond-> [(syntaxHighlighting defaultHighlightStyle)
 44 |                                         (.. EditorState -allowMultipleSelections (of editable?))
 45 |                                         #_(foldGutter)
 46 |                                         (.. EditorView -editable (of editable?))
 47 |                                         (.of keymap clojure-mode/complete-keymap)
 48 |                                         (.theme EditorView theme)]
 49 | 
 50 |                                  on-change
 51 |                                  (conj (on-change-ext on-change))
 52 | 
 53 |                                  (= :clojure lang)
 54 |                                  (conj (.-extension clojure-lang))
 55 | 
 56 |                                  (= :markdown lang)
 57 |                                  (conj (markdown #js {:base markdownLanguage
 58 |                                                       :defaultCodeLanguage clojure-lang}))))]
 59 |     (hooks/use-effect
 60 |      (fn []
 61 |        (let [editor-view* (code/make-view (code/make-state doc extensions) @!editor-el)]
 62 |          #(.destroy editor-view*))) [doc])
 63 |     [:div {:ref !editor-el}]))
 64 | 
 65 | (defn clojure-editor [{:as opts :keys [doc]}]
 66 |   (let [!result (hooks/use-state nil)]
 67 |     (hooks/use-effect (fn [] (reset! !result (eval-string doc))) [doc])
 68 |     [:div
 69 |      [:div.p-2.bg-slate-100
 70 |       [editor (assoc opts :lang :clojure :editable? false)]]
 71 |      [:div.viewer-result.mt-1.ml-5
 72 |       (when-some [{:keys [error result]} @!result]
 73 |         (cond
 74 |           error [:div.red error]
 75 |           (react/isValidElement result) result
 76 |           :else [render/inspect result]))]]))
 77 | 
 78 | (def renderers
 79 |   (assoc md.transform/default-hiccup-renderers
 80 |          :code (fn [_ctx node] [clojure-editor {:doc (md.transform/->text node)}])
 81 |          :todo-item (fn [ctx {:as node :keys [attrs]}]
 82 |                       (md.transform/into-markup [:li [:input {:type "checkbox" :default-checked (:checked attrs)}]] ctx node))
 83 |          :formula (fn [_ctx node]
 84 |                     [:span {:dangerouslySetInnerHTML {:__html (.renderToString katex (md.transform/->text node))}}])
 85 |          :block-formula (fn [_ctx node]
 86 |                           [:div {:dangerouslySetInnerHTML {:__html (.renderToString katex (md.transform/->text node) #js {:displayMode true})}}])))
 87 | 
 88 | (defn inspect-expanded [x]
 89 |   (r/with-let [expanded-at (r/atom {:hover-path [] :prompt-multi-expand? false})]
 90 |     (render/inspect-presented {:!expanded-at expanded-at}
 91 |                               (v/present x))))
 92 | 
 93 | (defn try-markdown [init-text]
 94 |   (let [text->state (fn [text]
 95 |                       (let [parsed (md/parse text)]
 96 |                         {:parsed parsed
 97 |                          :hiccup (nextjournal.markdown.transform/->hiccup renderers parsed)}))
 98 |         !state (hooks/use-state (text->state init-text))]
 99 |     [:div.grid.grid-cols-2.m-10
100 |      [:div.m-2.p-2.text-xl.border-2.overflow-y-scroll.bg-slate-100 {:style {:height "20rem"}}
101 |       [editor {:doc init-text :on-change #(reset! !state (text->state %)) :lang :markdown}]]
102 |      [:div.m-2.p-2.font-medium.overflow-y-scroll {:style {:height "20rem"}}
103 |       [inspect-expanded (:parsed @!state)]]
104 |      [:div.m-2.p-2.overflow-x-scroll
105 |       [inspect-expanded (:hiccup @!state)]]
106 |      [:div.m-2.p-2.bg-slate-50.viewer-markdown
107 |       [v/html (:hiccup @!state)]]]))
108 | 


--------------------------------------------------------------------------------
/notebooks/demo.docx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nextjournal/markdown/5829ec101331b1702841094f4dc897ee46f0ddcd/notebooks/demo.docx


--------------------------------------------------------------------------------
/notebooks/images.clj:
--------------------------------------------------------------------------------
 1 | ;; # 🖼️ Block Level Images
 2 | (ns images
 3 |   {:nextjournal.clerk/visibility {:code :hide :result :show}}
 4 |   (:require [nextjournal.clerk :as clerk]
 5 |             [nextjournal.markdown :as md]
 6 |             [nextjournal.markdown.transform :as md.transform]))
 7 | 
 8 | ;; Unlike [commonmark](https://spec.commonmark.org/0.30/#example-571),
 9 | ;; nextjournal.markdown distinguishes between inline images and _block images_: image syntax which span a whole
10 | ;; line of text produces a direct child of the document and is not wrapped in a paragraph note. Take the following text
11 | 
12 | ^{::clerk/viewer {:var-from-def? true
13 |                   :transform-fn #(clerk/html [:pre @(::clerk/var-from-def (:nextjournal/value %))])}}
14 | (def text-with-images
15 |   "This example shows how we're parsing images, the following is a _block image_
16 | 
17 | ![block level image](https://images.freeimages.com/images/large-previews/773/koldalen-4-1384902.jpg)
18 | 
19 | while this is an inline ![inline](https://github.com/nextjournal/clerk/actions/workflows/main.yml/badge.svg) image.
20 | ")
21 | 
22 | ;; This is parsed as
23 | 
24 | (clerk/code
25 |  (dissoc (md/parse text-with-images)
26 |          :toc :footnotes))
27 | 
28 | ;; This allows for a different rendering of images, for instance we might want to render block images with a caption:
29 | 
30 | ^{::clerk/visibility {:code :show} :nextjournal.clerk/viewer 'nextjournal.clerk.viewer/html-viewer}
31 | (md.transform/->hiccup
32 |  (assoc md.transform/default-hiccup-renderers
33 |         :image (fn [{:as _ctx ::md.transform/keys [parent]} {:as node :keys [attrs]}]
34 |                  (if (= :doc (:type parent))
35 |                    [:figure.image
36 |                     [:img (assoc attrs :alt (md.transform/->text node))]
37 |                     [:figcaption.text-center.mt-1 (md.transform/->text node)]]
38 |                    [:img.inline (assoc attrs :alt (md.transform/->text node))])))
39 |  (md/parse text-with-images))
40 | 


--------------------------------------------------------------------------------
/notebooks/pandoc.clj:
--------------------------------------------------------------------------------
  1 | ;; # 🏳️‍🌈 Pandoc
  2 | (ns pandoc
  3 |   {:nextjournal.clerk/toc :collapsed
  4 |    :nextjournal.clerk/no-cache true}
  5 |   (:require [clojure.data.json :as json]
  6 |             [clojure.java.io :as io]
  7 |             [clojure.java.shell :as shell]
  8 |             [clojure.string :as str]
  9 |             [nextjournal.clerk :as clerk]
 10 |             [nextjournal.clerk.viewer :as v]
 11 |             [nextjournal.markdown :as md]
 12 |             [nextjournal.markdown.utils :as u]
 13 |             [nextjournal.markdown.transform :as md.transform]))
 14 | 
 15 | ;; From the [docs](https://pandoc.org/MANUAL.html#description):
 16 | ;;
 17 | ;; > Pandoc has a modular design: it consists of a set of readers, which parse text in a given format and produce a native representation of the document (an abstract syntax tree or AST), and a set of writers, which convert this native representation into a target format. Thus, adding an input or output format requires only adding a reader or writer. Users can also run custom pandoc filters to modify the intermediate AST.
 18 | ;;
 19 | ;; By transforming our markdown data format to and from [Pandoc](https://pandoc.org)'s internal
 20 | ;; [AST](https://hackage.haskell.org/package/pandoc-types-1.22.2/docs/Text-Pandoc-Definition.html), we can achieve conversions
 21 | ;; from and to potentially all of their supported formats. In both directions we're using Pandoc [JSON representation](https://pandoc.org/filters.html)
 22 | ;; as intermediate format.
 23 | ;;
 24 | ;; ## 📤 Export
 25 | ;;
 26 | ;; this is a list of supported output formats as of Pandoc v2.18 (API version 1.22.2):
 27 | ^{::clerk/visibility {:code :hide}}
 28 | (clerk/html
 29 |  [:div.overflow-y-auto.shadow-lg {:style {:height "200px" :width "85%"}}
 30 |   (into [:ul]
 31 |         (map (partial vector :li))
 32 |         (str/split-lines (:out (shell/sh "pandoc" "--list-output-formats"))))])
 33 | 
 34 | ;; Let's define a map of transform functions indexed by (a subset of) our markdown types
 35 | 
 36 | ^{::clerk/visibility {:code :hide :result :hide}}
 37 | (declare md->pandoc)
 38 | ^{::clerk/visibility {:result :hide}}
 39 | (def md-type->transform
 40 |   {:doc (fn [{:keys [content]}]
 41 |           {:blocks (into [] (map md->pandoc) content)
 42 |            :pandoc-api-version [1 22]
 43 |            :meta {}})
 44 | 
 45 |    :heading (fn [{:keys [content heading-level]}] {:t "Header" :c [heading-level ["id" [] []] (map md->pandoc content)]})
 46 |    :paragraph (fn [{:keys [content]}] {:t "Para" :c (map md->pandoc content)})
 47 |    :plain (fn [{:keys [content]}] {:t "Plain" :c (map md->pandoc content)})
 48 |    :code (fn [{:as node :keys [language]}] {:t "CodeBlock" :c [["" [language "code"] []] (md.transform/->text node)]})
 49 |    :block-formula (fn [{:keys [text]}] {:t "Para" :c [{:t "Math" :c [{:t "DisplayMath"} text]}]})
 50 | 
 51 |    :em (fn [{:keys [content]}] {:t "Emph" :c (map md->pandoc content)})
 52 |    :strong (fn [{:keys [content]}] {:t "Strong" :c (map md->pandoc content)})
 53 |    :strikethrough (fn [{:keys [content]}] {:t "Strikeout" :c (map md->pandoc content)})
 54 |    :link (fn [{:keys [attrs content]}] {:t "Link" :c [["" [] []] (map md->pandoc content) [(:href attrs) ""]]})
 55 | 
 56 |    :list-item (fn [{:keys [content]}] (map md->pandoc content))
 57 |    :bullet-list (fn [{:keys [content]}] {:t "BulletList" :c (map md->pandoc content)})
 58 | 
 59 |    :text (fn [{:keys [text]}] {:t "Str" :c text})})
 60 | 
 61 | ;; along with a dispatch function
 62 | ^{::clerk/visibility {:result :hide}}
 63 | (defn md->pandoc
 64 |   [{:as node :keys [type]}]
 65 |   (if-some [xf (get md-type->transform type)]
 66 |     (xf node)
 67 |     (throw (ex-info (str "Not implemented: '" type "'.") node))))
 68 | 
 69 | ;; and a conversion function.
 70 | ^{::clerk/visibility {:result :hide}}
 71 | (defn pandoc-> [pandoc-data format]
 72 |   (let [{:keys [exit out err]} (shell/sh "pandoc" "-f" "json" "-t" format
 73 |                                          :in (json/write-str pandoc-data))]
 74 |     (if (zero? exit) out err)))
 75 | 
 76 | ;; Now take a piece of `markdown-text`
 77 | ^{::clerk/visibility {:code :hide}
 78 |   ::clerk/viewer {:var-from-def? true
 79 |                   :transform-fn #(v/html [:pre @(::clerk/var-from-def (v/->value %))])}}
 80 | (def markdown-text "# Hello
 81 | 
 82 | ## Sub _Section_
 83 | 
 84 | 
 85 | ```python
 86 | 1 + 1
 87 | ```
 88 | 
 89 | With a block formula:
 90 | 
 91 | $$F(t) = \\int_{t_0}^t \\phi(x)dx$$
 92 | 
 93 | this _is_ a
 94 | * ~~boring~~
 95 | * **awesome**
 96 | * [example](https://some/path)!")
 97 | 
 98 | ;; once we've turned it into Pandoc's JSON format
 99 | (def pandoc-data (-> markdown-text md/parse md->pandoc))
100 | 
101 | ^{::clerk/visibility {:result :hide}}
102 | (def verbatim (partial clerk/with-viewer {:transform-fn #(v/html [:pre (v/->value %)])}))
103 | 
104 | ;; then we can convert it to whatever supported format. Say **Org Mode**
105 | (-> pandoc-data (pandoc-> "org") verbatim)
106 | 
107 | ;; or **reStructuredText**
108 | (-> pandoc-data (pandoc-> "rst") verbatim)
109 | 
110 | ;; or even to a **Jupyter Notebook**.
111 | (-> pandoc-data (pandoc-> "ipynb") verbatim)
112 | 
113 | ;; If you're in that exotic party mode, you can also go for a pdf
114 | (shell/sh "pandoc" "--pdf-engine=tectonic" "-f" "json" "-t" "pdf" "-o" "notebooks/demo.pdf"
115 |           :in (json/write-str pandoc-data))
116 | 
117 | ;; ## 📥 Import
118 | ;;
119 | ;; Import works same same. This is a list of supported input formats:
120 | ^{::clerk/visibility {:code :hide}}
121 | (clerk/html
122 |  [:div.overflow-y-auto.shadow-lg {:style {:height "200px" :width "85%"}}
123 |   (into [:ul]
124 |         (map (partial vector :li))
125 |         (str/split-lines (:out (shell/sh "pandoc" "--list-input-formats"))))])
126 | 
127 | ^{::clerk/visibility {:result :hide}}
128 | (declare pandoc->md)
129 | ^{::clerk/visibility {:result :hide}}
130 | (defn node+content [type pd-node] {:type type :content (keep pandoc->md (:c pd-node))})
131 | ^{::clerk/visibility {:result :hide}}
132 | (def pandoc-type->transform
133 |   {:Space (constantly {:type :text :text " "})
134 |    :Str (fn [node] {:type :text :text (:c node)})
135 |    :Para (partial node+content :paragraph)
136 |    :Plain (partial node+content :plain)
137 |    :Header (fn [node]
138 |              (let [[level _meta content] (:c node)]
139 |                {:type :heading
140 |                 :heading-level level
141 |                 :content (keep pandoc->md content)}))
142 | 
143 |    :Emph (partial node+content :em)
144 |    :Strong (partial node+content :strong)
145 |    :Strikeout (partial node+content :strikethrough)
146 |    :Underline (partial node+content :em)                    ;; missing on markdown
147 |    :Link (fn [node]
148 |            (let [[_meta content [href _]] (:c node)]
149 |              {:type :link
150 |               :attrs {:href href}
151 |               :content (keep pandoc->md content)}))
152 | 
153 |    :BulletList (fn [node]
154 |                  {:type :bullet-list
155 |                   :content (map (fn [li]
156 |                                   {:type :list-item
157 |                                    :content (keep pandoc->md li)}) (:c node))})
158 |    :OrderedList (fn [node]
159 |                   {:type :numbered-list
160 |                    :content (map (fn [li]
161 |                                    {:type :list-item
162 |                                     :content (keep pandoc->md li)}) (second (:c node)))})
163 | 
164 |    :Math (fn [node] (let [[_meta latex] (:c node)] (u/block-formula latex)))
165 |    :Code (fn [node]
166 |            (let [[_meta code] (:c node)]
167 |              {:type :monospace :content [(u/text-node code)]}))
168 |    :CodeBlock (fn [node]
169 |                 (let [[[_id classes _meta] code] (:c node)]
170 |                   {:type :code
171 |                    :content [(u/text-node code)]}))
172 |    :SoftBreak (constantly {:type :softbreak})
173 |    :RawBlock (constantly nil)
174 |    :RawInline (fn [{:keys [c]}]
175 |                 (cond
176 |                   (and (vector? c) (= "latex" (first c)))
177 |                   (u/formula (second c))))})
178 | 
179 | ^{::clerk/visibility {:result :hide}}
180 | (defn pandoc->md [{:as node :keys [t pandoc-api-version blocks]}]
181 |   (if pandoc-api-version
182 |     {:type :doc :content (keep pandoc->md blocks)}
183 |     (if-some [xf (when t (get pandoc-type->transform (keyword t)))]
184 |       (xf node)
185 |       (throw (ex-info (str "Not Implemented '" t "'.") node)))))
186 | 
187 | ^{::clerk/visibility {:result :hide}}
188 | (defn pandoc<- [input format]
189 |   (-> (shell/sh "pandoc" "-f" format "-t" "json" :in input)
190 |       :out (json/read-str :key-fn keyword)))
191 | 
192 | ;; Let us test the machinery above against a **Microsoft Word** file, turning it into markdown and natively rendering it with Clerk
193 | 
194 | (v/html
195 |  [:div.shadow-xl.p-8
196 |   (-> (io/file "notebooks/demo.docx")
197 |       (pandoc<- "docx")
198 |       pandoc->md
199 |       v/md)])
200 | 
201 | ;; or ingest some **Org Mode**.
202 | (v/html
203 |  [:div.overflow-y-auto.shadow-xl {:style {:height "400px"}}
204 |   [:div.p-8
205 |    (-> (io/input-stream "https://raw.githubusercontent.com/erikriverson/org-mode-R-tutorial/master/org-mode-R-tutorial.org")
206 |        (pandoc<- "org")
207 |        pandoc->md
208 |        (update :content #(take 24 %))
209 |        v/md)]])
210 | 
211 | ;; We also might want to test that our functions are invertible:
212 | (v/html
213 |  [:div
214 |   [:div.shadow-xl.p-8
215 |    (-> markdown-text
216 |        md/parse
217 |        md->pandoc
218 |        #_#_ ;; we're not property testing Pandoc!
219 |        (pandoc-> "org")
220 |        (pandoc<- "org")
221 |        pandoc->md
222 |        v/md)]])
223 | 
224 | ;; this brief experiment shows how Pandoc AST makes for an interesting format for Clerk to potentially
225 | ;; interact with formats other than markdown and clojure.
226 | 
227 | ^{::clerk/visibility {:result :hide :code :hide}}
228 | (comment
229 |   (json/read-str
230 |    (:out
231 |     (shell/sh "pandoc" "-f" "markdown" "-t" "json" :in markdown-text))
232 |    :key-fn keyword))
233 | 


--------------------------------------------------------------------------------
/notebooks/parsing_extensibility.clj:
--------------------------------------------------------------------------------
  1 | ;; # 🏗 Extending Markdown Parsing
  2 | (ns parsing-extensibility
  3 |   {:nextjournal.clerk/toc :collapsed
  4 |    :nextjournal.clerk/no-cache true}
  5 |   (:require [nextjournal.clerk :as clerk]
  6 |             [nextjournal.markdown :as md]
  7 |             [nextjournal.markdown.utils :as u]
  8 |             [edamame.core :as edamame]
  9 |             [clojure.zip :as z]))
 10 | 
 11 | ^{:nextjournal.clerk/visibility {:code :hide :result :hide}}
 12 | (def show-text
 13 |   {:var-from-def? true
 14 |    :transform-fn (fn [{{::clerk/keys [var-from-def]} :nextjournal/value}] (clerk/html [:pre @var-from-def]))})
 15 | 
 16 | ;; With recent additions to our `nextjournal.markdown.parser` we're allowing for a customizable parsing layer on top of the tokenization provided by `markdown-it` ([n.markdown/tokenize](https://github.com/nextjournal/markdown/blob/ae2a2f0b6d7bdc6231f5d088ee559178b55c97f4/src/nextjournal/markdown.clj#L50-L52)).
 17 | ;; We're acting on the text (leaf) tokens, splitting each of those into a collection of [nodes](https://github.com/nextjournal/markdown/blob/ff68536eb15814fe81db7a6d6f11f049895a4282/src/nextjournal/markdown/parser.cljc#L5).  We'll explain how that works by means of three examples.
 18 | ;;
 19 | ;; ## Regex-based tokenization
 20 | ;;
 21 | ;; A `Tokenizer` is a map with keys `:doc-handler` and `:tokenizer-fn`. For convenience, the function `u/normalize-tokenizer` will fill in the missing keys
 22 | ;; starting from a map with a `:regex` and a `:handler`:
 23 | 
 24 | (def internal-link-tokenizer
 25 |   (u/normalize-tokenizer
 26 |    {:regex #"\[\[([^\]]+)\]\]"
 27 |     :handler (fn [match] {:type :internal-link
 28 |                           :text (match 1)})}))
 29 | 
 30 | ((:tokenizer-fn internal-link-tokenizer) "some [[set]] of [[wiki]] link")
 31 | 
 32 | (u/tokenize-text-node internal-link-tokenizer {} {:text "some [[set]] of [[wiki]] link"})
 33 | 
 34 | ;; In order to opt-in of the extra tokenization above, we need to configure the document context as follows:
 35 | (md/parse* (update u/empty-doc :text-tokenizers conj internal-link-tokenizer)
 36 |            "some [[set]] of [[wiki]] link")
 37 | 
 38 | ;; We provide an `internal-link-tokenizer` as well as a `hashtag-tokenizer` as part of the `nextjournal.markdown.parser` namespace. By default, these are not used during parsing and need to be opted-in for like explained above.
 39 | 
 40 | ;; ## Read-based tokenization
 41 | ;;
 42 | ;; Somewhat inspired by the Racket text processor [Pollen](https://docs.racket-lang.org/pollen/pollen-command-syntax.html) we'd like to parse a `text` like this
 43 | 
 44 | ^{::clerk/visibility {:code :hide} ::clerk/viewer show-text}
 45 | (def text "At some point in text a losange
 46 | will signal ◊(foo \"one\" [[vector]]) we'll want to write
 47 | code and ◊not text. Moreover it has not to conflict with
 48 | existing [[links]] or #tags")
 49 | ;; and _read_ any valid Clojure code comining after the lozenge character (`◊`) which we'll also call a
 50 | ;; _losange_ as in French it does sound much better 🇫🇷!
 51 | ;;
 52 | ;; How to proceed? We might take a hint from `re-seq`.
 53 | ^{::clerk/visibility {:code :hide}}
 54 | (clerk/html
 55 |  [:div.viewer-code
 56 |   (clerk/code
 57 |    (with-out-str
 58 |      (clojure.repl/source re-seq)))])
 59 | 
 60 | ;; Now, when a form is read with [Edamame](https://github.com/borkdude/edamame#edamame), it preserves its location metadata. This allows
 61 | ;; us to produce an `IndexedMatch` from matching text
 62 | (defn match->data+indexes [m text]
 63 |   (let [start (.start m) end (.end m)
 64 |         form (edamame/parse-string (subs text end))]
 65 |     [form start (+ end (dec (:end-col (meta form))))]))
 66 | ;; and our modified `re-seq` becomes
 67 | (defn losange-tokenizer-fn [text]
 68 |   (let [m (re-matcher #"◊" text)]
 69 |     ((fn step []
 70 |        (when (.find m)
 71 |          (cons (match->data+indexes m text)
 72 |                (lazy-seq (step))))))))
 73 | 
 74 | (losange-tokenizer-fn text)
 75 | (losange-tokenizer-fn "non matching text")
 76 | 
 77 | (def losange-tokenizer
 78 |   (u/normalize-tokenizer
 79 |    {:tokenizer-fn losange-tokenizer-fn
 80 |     :handler (fn [clj-data] {:type :losange
 81 |                              :data clj-data})}))
 82 | 
 83 | (u/tokenize-text-node losange-tokenizer {} {:text text})
 84 | 
 85 | ;; putting it all together
 86 | (md/parse* (update u/empty-doc :text-tokenizers conj losange-tokenizer)
 87 |            text)
 88 | 
 89 | ;; ## Parsing with Document Handlers
 90 | ;;
 91 | ;; Using tokenizers with document handlers we can let parsed tokens act upon the whole document tree. Consider
 92 | ;; the following textual example (**TODO** _rewrite parsing with a zipper state_):
 93 | ^{::clerk/viewer show-text}
 94 | (def text-with-meta
 95 |   "# Example ◊(add-meta {:attrs {:id \"some-id\"} :class \"semantc\"})
 96 | In this example we're using the losange tokenizer to modify the
 97 | document AST in conjunction with the following functions:
 98 | * `add-meta`: looks up the parent node, merges a map in it
 99 | and adds a flag to its text.
100 | * `strong`: makes the text ◊(strong much more impactful) indeeed.
101 | ")
102 | 
103 | (defn add-meta [doc-loc meta]
104 |   (-> doc-loc (z/edit merge meta)
105 |       z/down (z/edit update :text str "🚩️")
106 |       z/up))
107 | 
108 | (defn strong [doc & terms]
109 |   (-> doc
110 |       (z/append-child {:type :strong}) z/down z/rightmost   ;; open-node
111 |       (z/insert-child (u/text-node (apply str (interpose " " terms))))
112 |       z/up)) ;; close-node
113 | 
114 | (def data
115 |   (md/parse* (-> u/empty-doc
116 |                  (update :text-tokenizers conj
117 |                          (assoc losange-tokenizer
118 |                                 :doc-handler (fn [doc {:keys [match]}]
119 |                                                (apply (eval (first match)) doc (rest match))))))
120 |              text-with-meta))
121 | 
122 | (clerk/md data)
123 | 
124 | ^{::clerk/visibility {:code :hide :result :hide}}
125 | (comment
126 |   ;;    Tokenizer :: {:tokenizer-fn :: TokenizerFn,
127 |   ;;                  :doc-handler :: DocHandler}
128 |   ;;    normalize-tokenizer :: {:regex, :doc-handler} |
129 |   ;;                           {:tokenizer-fn, :handler} |
130 |   ;;                           {:regex, :handler} -> Tokenizer
131 |   ;;
132 |   ;;    Match :: Any
133 |   ;;    Handler :: Match -> Node
134 |   ;;    IndexedMatch :: (Match, Integer, Integer)
135 |   ;;    TokenizerFn :: String -> [IndexedMatch]
136 |   ;;    DocHandler :: Doc -> {:match :: Match} -> Doc
137 | 
138 |   ;;    DocOpts :: {:text-tokenizers [Tokenizer]}
139 |   ;;    parse : DocOpts -> [Token] -> Doc
140 |   ;;
141 |   )
142 | 


--------------------------------------------------------------------------------
/notebooks/reference.md:
--------------------------------------------------------------------------------
  1 | # Referenz
  2 | 
  3 | ## Absätze
  4 | 
  5 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc.
  6 | 
  7 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc.
  8 | 
  9 | Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc.
 10 | 
 11 | ## Formatierung
 12 | 
 13 | * `**fett**` wird zu **fett**
 14 | * `_kursiv_` wird zu _kursiv_
 15 | * `~~durchgestrichen~~` wird zu ~~durchgestrichen~~
 16 | * `[Linktext](https://nextjournal.com/)` wird zu [Linktext](https://nextjournal.com/)
 17 | * Internal links: Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. [[wikistyle-link]]. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc. Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Aenean commodo ligula eget dolor. Aenean massa. Cum sociis natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Donec quam felis, ultricies nec, pellentesque eu, pretium quis, sem. Nulla consequat massa quis enim. Donec pede justo, fringilla vel, aliquet nec, vulputate eget, arcu. In enim justo, rhoncus ut, imperdiet a, venenatis vitae, justo. Nullam dictum felis eu pede mollis pretium. Integer tincidunt. Cras dapibus. Vivamus elementum semper nisi. Aenean vulputate eleifend tellus. Aenean leo ligula, porttitor eu, consequat vitae, eleifend ac, enim. Aliquam lorem ante, dapibus in, viverra quis, feugiat a, tellus. Phasellus viverra nulla ut metus varius laoreet. Quisque rutrum. Aenean imperdiet. Etiam ultricies nisi vel augue. Curabitur ullamcorper ultricies nisi. Nam eget dui. Etiam rhoncus. Maecenas tempus, tellus eget condimentum rhoncus, sem quam semper libero, sit amet adipiscing sem neque sed ipsum. Nam quam nunc, blandit vel, luctus pulvinar, hendrerit id, lorem. Maecenas nec odio et ante tincidunt tempus. Donec vitae sapien ut libero venenatis faucibus. Nullam quis ante. Etiam sit amet orci eget eros faucibus tincidunt. Duis leo. Sed fringilla mauris sit amet nibh. Donec sodales sagittis magna. Sed consequat, leo eget bibendum sodales, augue velit cursus nunc.
 18 | 
 19 | ## Überschriften
 20 | 
 21 | Überschriften beginnen mit `#`. Mehrere aufeinanderfolgende `#`
 22 | definieren das Level der Überschrift.
 23 | 
 24 |     # Überschrift 1
 25 |     ## Überschrift 2
 26 |     ### Überschrift 3
 27 |     #### Überschrift 4
 28 | 
 29 | ## Listen
 30 | 
 31 | ### Aufzählungen
 32 | 
 33 | Normale Aufzählungen beginnen mit einem `*` und können verschachtelt sein.
 34 | 
 35 |     * Kühlschrank
 36 |         * Butter
 37 |         * Eier
 38 |         * Milch
 39 |     * Vorratsschrank
 40 |         * Brot
 41 |         * Backpapier
 42 |         * Alufolie
 43 | 
 44 | wird zu
 45 | 
 46 | * Kühlschrank
 47 |     * Butter
 48 |     * Eier
 49 |     * Milch
 50 | * Vorratsschrank
 51 |     * Brot
 52 |     * Backpapier
 53 |     * Alufolie
 54 | 
 55 | ### Nummerierte Aufzählungen
 56 | 
 57 | Nummerierte Aufzählungen beginnen mit `1.` und können ebenfalls verschachtelt sein.
 58 | Verschachtelte Aufzählungen beginnen wieder mit `1.` (anstelle z.B. `1.1.`) und nehmen automatisch die
 59 | übergeordneten Indizes mit.
 60 | 
 61 |     1. Grünpflanzen
 62 |         1. Charophyten
 63 |         2. Chlorophyten
 64 |     2. Landpflanzen
 65 |         1. Lebermoose
 66 |         2. Laubmoose
 67 |         3. Hornmoose
 68 | 
 69 | wird zu
 70 | 
 71 | 1. Grünpflanzen
 72 |    1. Charophyten
 73 |    2. Chlorophyten
 74 | 2. Landpflanzen
 75 |    1. Lebermoose
 76 |    2. Laubmoose
 77 |    3. Hornmoose
 78 | 
 79 | ### Todo Listen
 80 | 
 81 | Todo Listen beginnen mit `* [ ]` oder `* [x]` wobei das `x` markiert ob
 82 | das Todo erledigt ist. Todo Listen können ebenfalls verschachtelt sein. 
 83 | 
 84 |     * [ ] Lebensmittel
 85 |         * [x] Butter
 86 |         * [ ] Eier
 87 |         * [ ] Milch
 88 |     * [x] Werkstatt
 89 |         * [x] Schrauben Torx M6
 90 |         * [x] Torx Bitsatz
 91 | 
 92 | wird zu
 93 | 
 94 | * [ ] Lebensmittel
 95 |     * [x] Butter
 96 |     * [ ] Eier
 97 |     * [ ] Milch
 98 | * [x] Werkstatt
 99 |     * [x] Schrauben Torx M6
100 |     * [x] Torx Bitsatz
101 | 
102 | ## Tabellen
103 | 
104 | Doppelpunkte können verwendet werden um den Text in Spalten links,
105 | rechts oder zentriert auszurichten.
106 | 
107 | 
108 |     | Spalte 1     | Spalte 2            | Spalte 3 |
109 |     | ------------ |:-------------------:| --------:|
110 |     | Spalte 1 ist | links ausgerichtet  |   1600 € |
111 |     | Spalte 2 ist | zentriert           |     12 € |
112 |     | Spalte 3 ist | rechts ausgerichtet |      1 € |
113 | 
114 | wird zu
115 | 
116 | | Spalte 1     | Spalte 2            | Spalte 3 |
117 | | ------------ |:-------------------:| --------:|
118 | | Spalte 1 ist | links ausgerichtet  |   1600 € |
119 | | Spalte 2 ist | zentriert           |     12 € |
120 | | Spalte 3 ist | rechts ausgerichtet |      1 € |
121 | 
122 | ## Bilder
123 | 
124 |     ![ARS Altmann Waggon](https://www.ars-altmann.de/wp-content/uploads/2017/12/Schiene2.jpg)
125 | 
126 | wird zu
127 | 
128 | ![ARS Altmann Waggon](https://www.ars-altmann.de/wp-content/uploads/2017/12/Schiene2.jpg)
129 | 
130 | ## Zitate
131 | 
132 |     > “The purpose of computation is insight, not numbers.”
133 |     >
134 |     > ― Richard Hamming
135 | 
136 | wird zu
137 | 
138 | > “The purpose of computation is insight, not numbers.”
139 | >
140 | > ― Richard Hamming
141 | 
142 | ## Trennlinien
143 | 
144 | Verschiedene Sektionen können durch Trennlinien verdeutlicht werden.
145 | `---` produziert eine Linie über die volle Breite des Dokuments.
146 | 
147 |     #### Sektion 1
148 | 
149 |     Hier ist ein Absatz zur Sektion 1.
150 | 
151 |     ---
152 | 
153 |     #### Sektion 2
154 | 
155 |     Hier ist ein Absatz zur Sektion 2.
156 | 
157 | wird zu
158 | 
159 | #### Sektion 1
160 | 
161 | Hier ist ein Absatz zur Sektion 1.
162 | 
163 | ---
164 | 
165 | #### Sektion 2
166 | 
167 | Hier ist ein Absatz zur Sektion 2.
168 | 
169 | ## Inhaltsverzeichnis
170 | 
171 | Ein Inhaltsverzeichnis über alle Überschriften kann an jeder beliebigen
172 | Stelle mit `[[toc]]` eingefügt werden.
173 | 
174 |     [[toc]]
175 | 
176 | wird zu
177 | 
178 | [[toc]]
179 | 
180 | 
181 | # [Randomly generated](https://jaspervdj.be/lorem-markdownum/)
182 | 
183 | Bracchia seque ossa minus petisse serius
184 | ========================================
185 | 
186 | Ver maiores letoque via et obstipuere eburnea
187 | ---------------------------------------------
188 | 
189 | Lorem markdownum pede inmensos de est, aut nisi narremur rudente fratri, Aegides
190 | parente, et in. Vulneribus tecta, non et Cipus iamdudum volvere, dives quod
191 | dixit, Titaniacis sociosque.
192 | 
193 | - Viros ergo licet licebit coercet
194 | - Nec ait dic ait hasta edita similisque
195 | - Undas postquam
196 | - Vires piasque
197 | - Medias an quem nisi pugnacem haec flammis
198 | - Cum crimine in haud tertia ortus dicar
199 | 
200 | Ero mihi velare per
201 | -------------------
202 | 
203 | Mensuraque voluptas, venabula restabat de pedicis pectusque Lavinia promissa
204 | patris detrahit maris una iamque, At adunca reddita. Fluctibus digitis et Pelia.
205 | Cur cur, remorata prohibebant tellus anus nihil cum? Cornaque [ora est
206 | cetera](http://www.palladiasest.net/ad) quam amore, simul, et abire in corpore
207 | matris dubitabat frustra Peleus ex nimbos.
208 | 
209 | 1. Adspicerent nostro solum et dedit est esse
210 | 2. Parosque et
211 | 3. Sed nomen columbas
212 | 4. Adventare spumas
213 | 5. Hoc penetratque Rhamnusia nodosaque me olim
214 | 6. Sit cum ab per inexorabile densis
215 | 
216 | Troezenius utque
217 | ----------------
218 | 
219 | Temptat vixque pectore spectacula patulosque ales requirenti ferum laudare
220 | oculorum volucris. Moderamine oculos nec **referebam** vestes nescit pedis,
221 | obsisto et petunt **filius** celebrare accedere et udaeque gestu. Scythiam
222 | capitis. Carpitur infernas moderamine ne alte fregit heu fuisses tamen, neque
223 | foro alium latius secundi; tecumque rapta. Rettuleram satus.
224 | 
225 |     thumbnailIpadVolume.resources_wins_wheel += data_wan_wizard;
226 |     if (memory_acl_threading(port_path, wizard_import_log, pixel + 4)) {
227 |         ppiFlatbedFirmware += dmaCompressionOdbc(qwerty_rate_word, system + 3,
228 |                 propertyTrackballRaster);
229 |     }
230 |     character_xhtml_protector.pda_syntax = jumperTrackballDenial(
231 |             pretestWiredFont, megahertz_programming_imap, 79);
232 | 
233 | Ipse ubi in Pirenidas inane, video rore qui fratres unum induitur mittere.
234 | Crinem repetitum mare prius Nilo dum victa **superorum colorem duobus**, ore.
235 | Cum et _corniger_ raptam corpus. In saltu ullo hoc ille _viisque_. Hominum ore
236 | Priamum Pindumve, in verbisque arvum est o **currere**.
237 | 
238 | ## Potuit traxit ob sacro me mandere utrumque
239 | 
240 | Lorem markdownum habetque pater non scrobibus Turno! Ubi [Dianae], per est
241 | radiis ad construit, annum quas edo purpureo flentes grege tot tanto di
242 | intibaque corpore. *Populis Tamasenum quod* rabiemque et si natus illa decorem
243 | amanti semper tui lacrimans pete: suo per osculaque? **Male populi** sic, sed et
244 | addidit flumine illis sit verba. Ferinae bimembres male.
245 | 
246 | Molitur nec tellus, tabula et equos natantia nimios tangere retemptat victore,
247 | mi femina, cumque et. Undas nive **manus**, anguis stimuletur sibi umida putaret
248 | fatorum miratur dolorque Icare. Praemia vidit opposuitque sumpto.
249 | 
250 | ## Fulgura optato narratibus sed
251 | 
252 | Byblis iuravimus geminis titubantis rumpo recondidit Thybris umbram torruit
253 | praedamque fictus, est. Violabere lapides [audacem] hunc causa remittit erat
254 | quoque volat frondescere.
255 | 
256 | ## Possunt Amphione
257 | 
258 | Ego maculoso tela nec filia aut Philomela Iliacas. Et magna montis, anguem
259 | corpus extulerat, [nescio fallaces] amem quae ferali pudetque. In nata, magis
260 | moderamine cornix prohibentque ramis magis loco exosus: cum. Domum tecti
261 | agnoscis labaret **occidit rupit**, saxa credo fuerat pavido sorores oblitis;
262 | aegra semel, nostrum, idem. Nec multifori custos, iuro feralia, regemque alumno.
263 | 
264 | Neque facta, ignes, erit Non Alemone risus perterrita et illi, in [cepit semper
265 | et] acui sub. Celanda mortalia strictumque quos, munera gener, ancipitesque
266 | victus, imo omnes. Tereu signo omnem, tristi, utile genetrixque hos litibus
267 | litora. E Rhoeti medeatur Lapitharum me dolorem!
268 | 
269 | ## Belli iaculatricemque tumulo contigerant passibus
270 | 
271 | Aera corpus natus palude. Dumque inque et parat tolerare utile, Cypriae concolor
272 | tempora, quam.
273 | 
274 | Solidissima pater antris: eiectum squamigeris veterem. Vatis inde nec senis, est
275 | fuerunt damnosasque labefactus pectore unco, tuos Ammon ubi usu; *bello*.
276 | 
277 | ## Utque #crudus profunda [[maritae]] tumulo {{contigerant}} passibus
278 | 
279 | Rector perdis aequore mille vel crimenque senior ore velamina orbem ipsa
280 | hostiliter? In unam Lycaeo de ortus viderat inquinat ire coniunx qualia, puer.
281 | Ipse atque qui secabant vestras, Caeneus superbus et cauda siccat. Nullamque
282 | corpus est evicit, *non* vento movi animumque, fundamina.
283 | 
284 | Fatebitur quae praesagia opifex, tua repulsam utrimque spiritus austrum, sic et
285 | viribus pinus, Calydonius. Et sordidus pro iugulo laudis corpore. Trucis mutatus
286 | **certatimque simulamina** inpulsum lapides, nostrumque opibus aratri ◊(java.time.Instant/now).
287 | 
288 | [Dianae]: http://timeas.net/
289 | [audacem]: http://www.novavela.com/dis.aspx
290 | [cepit semper et]: http://quodimas.net/tactusputantem
291 | [nescio fallaces]: http://aequiformidine.net/famulae-miserere.aspx
292 | 


--------------------------------------------------------------------------------
/notebooks/tight_lists.clj:
--------------------------------------------------------------------------------
  1 | ;; # Tight Lists
  2 | (ns tight-lists
  3 |   {:nextjournal.clerk/no-cache true}
  4 |   (:require [clojure.data.json :as json]
  5 |             [clojure.java.shell :as shell]
  6 |             [nextjournal.clerk :as clerk]
  7 |             [nextjournal.clerk.viewer :as v]
  8 |             [nextjournal.markdown :as md]
  9 |             [hiccup2.core :as h]
 10 |             [nextjournal.markdown.transform :as md.transform]))
 11 | 
 12 | ;; Markdown (commonmark) distingushes between [loose and tight lists](https://spec.commonmark.org/0.30/#loose)
 13 | ;;
 14 | ;; > A list is loose if any of its constituent list items are separated by blank lines, or
 15 | ;; > if any of its constituent list items directly contain two block-level elements with a blank line between them.
 16 | ;; > Otherwise a list is tight. (The difference in HTML output is that paragraphs in a loose list are wrapped in `

` tags, 17 | ;; > while paragraphs in a tight list are not.) 18 | ;; 19 | ;; ## Pandoc to the Rescue 20 | ;; 21 | ;; To comply with this behaviour [Pandoc uses a `Plain` container type](https://github.com/jgm/pandoc-types/blob/694c383dd674dad97557eb9b97adda17079ebb2c/src/Text/Pandoc/Definition.hs#L275-L278), and I think we should follow their advice 22 | 23 | ^{::clerk/visibility {:result :hide}} 24 | (defn ->pandoc-ast [text] 25 | (clerk/html [:pre 26 | (with-out-str 27 | (clojure.pprint/pprint 28 | (json/read-str 29 | (:out 30 | (shell/sh "pandoc" "-f" "markdown" "-t" "json" :in text)) 31 | :key-fn keyword)))])) 32 | 33 | ;; tight 34 | (->pandoc-ast " 35 | - one 36 | - two 37 | ") 38 | 39 | ;; vs loose lists 40 | (->pandoc-ast " 41 | - one 42 | 43 | inner par 44 | - two 45 | ") 46 | 47 | (->pandoc-ast " 48 | - one 49 | 50 | - two 51 | ") 52 | 53 | (->pandoc-ast " 54 | - one 55 | * thignt sub one 56 | - two 57 | ") 58 | 59 | ^{::clerk/visibility {:result :hide}} 60 | (defn example [md-string] 61 | (v/html 62 | [:div.flex-col 63 | [:pre.code md-string] 64 | [:pre.code (with-out-str 65 | (clojure.pprint/pprint 66 | (dissoc (md/parse md-string) :toc :title :footnotes)))] 67 | [:pre.code (with-out-str 68 | (clojure.pprint/pprint 69 | (md/->hiccup md-string)))] 70 | (v/html (md/->hiccup md-string)) 71 | ;; TODO: fix in clerk 72 | #_ 73 | (v/html (str (h/html (md/->hiccup md-string))))])) 74 | 75 | (clerk/present! 76 | (example " 77 | * this 78 | * is 79 | * tight!")) 80 | 81 | (example " 82 | * this 83 | * is 84 | > very loose 85 | 86 | indeed 87 | * fin") 88 | 89 | (example "* one \\ 90 | hardbreak 91 | * two") 92 | 93 | (example " 94 | * one 95 | softbreak 96 | * two") 97 | 98 | ;; https://spec.commonmark.org/0.30/#example-314 (loose list) 99 | (example "- a\n- b\n\n- c") 100 | ;; https://spec.commonmark.org/0.30/#example-319 (tight with loose sublist inside) 101 | (example "- a\n - b\n\n c\n- d\n") 102 | ;; https://spec.commonmark.org/0.30/#example-320 (tight with blockquote inside) 103 | (example "* a\n > b\n >\n* c") 104 | -------------------------------------------------------------------------------- /notebooks/try.clj: -------------------------------------------------------------------------------- 1 | ;; # ✏️ Nextjournal Markdown Live Demo 2 | (ns try 3 | {:nextjournal.clerk/visibility {:code :hide}} 4 | (:require [nextjournal.clerk :as clerk])) 5 | ;; _Edit markdown text, see parsed AST and transformed hiccup live. Preview how Clerk renders it._ 6 | ^{::clerk/width :full 7 | ::clerk/visibility {:code :fold}} 8 | (clerk/with-viewer {:render-fn 'nextjournal.markdown.render/try-markdown 9 | :require-cljs true} 10 | "# 👋 Hello Markdown 11 | 12 | ```clojure id=xxyyzzww 13 | (reduce + [1 2 3]) 14 | ``` 15 | ## Subsection 16 | - [x] type **some** 17 | - [x] ~~nasty~~ 18 | - [ ] _stuff_ here") 19 | 20 | #_(clerk/serve! {:port 8989 :browse true}) 21 | -------------------------------------------------------------------------------- /out/sci/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Clerk Viewer 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "@codemirror/autocomplete": "^6.0.2", 4 | "@codemirror/commands": "^6.0.0", 5 | "@codemirror/lang-markdown": "6.0.0", 6 | "@codemirror/language": "^6.1.0", 7 | "@codemirror/lint": "^6.0.0", 8 | "@codemirror/search": "^6.0.0", 9 | "@codemirror/state": "^6.0.1", 10 | "@codemirror/view": "^6.0.2", 11 | "@lezer/common": "^1.0.0", 12 | "@lezer/generator": "^1.0.0", 13 | "@lezer/highlight": "^1.0.0", 14 | "@lezer/lr": "^1.0.0", 15 | "@lezer/markdown": "^1.0.0", 16 | "@nextjournal/lang-clojure": "1.0.0", 17 | "@nextjournal/lezer-clojure": "1.0.0", 18 | "d3-require": "^1.2.4", 19 | "emoji-regex": "^10.0.0", 20 | "framer-motion": "^6.2.8", 21 | "katex": "^0.12.0", 22 | "lezer-clojure": "1.0.0-rc.0", 23 | "markdown-it": "^14.1.0", 24 | "markdown-it-block-image": "^0.0.3", 25 | "markdown-it-footnote": "^3.0.3", 26 | "markdown-it-texmath": "^1.0.0", 27 | "markdown-it-toc-done-right": "^4.2.0", 28 | "punycode": "2.1.1", 29 | "react": "^18.2.0", 30 | "react-dom": "^18.2.0", 31 | "use-sync-external-store": "1.2.0", 32 | "vh-sticky-table-header": "1.2.1", 33 | "w3c-keyname": "^2.2.4" 34 | }, 35 | "devDependencies": { 36 | "esbuild": "^0.12.28", 37 | "shadow-cljs": "^2.19.3" 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /resources/META-INF/nextjournal/markdown/meta.edn: -------------------------------------------------------------------------------- 1 | {:version "0.6.157"} -------------------------------------------------------------------------------- /shadow-cljs.edn: -------------------------------------------------------------------------------- 1 | {:source-paths ["src" "test"] 2 | :dev-http {8022 "out/test"} 3 | :nrepl {:cider false} 4 | :builds 5 | {:test 6 | {:target :node-test 7 | :output-dir "out" 8 | :output-to "out/node-tests.js" 9 | :closure-defines {shadow.debug true} 10 | :js-options {:js-provider :shadow 11 | :output-feature-set :es8}} 12 | 13 | :browser-test 14 | {:target :browser-test 15 | :test-dir "out/test" 16 | :closure-defines {shadow.debug true} 17 | :js-options {:output-feature-set :es8}}}} 18 | -------------------------------------------------------------------------------- /src/deps.cljs: -------------------------------------------------------------------------------- 1 | {:npm-deps 2 | {"katex" "^0.12.0" 3 | "markdown-it" "^14.1.0" 4 | "markdown-it-block-image" "^0.0.3" 5 | "markdown-it-footnote" "^3.0.3" 6 | "markdown-it-texmath" "^1.0.0" 7 | "markdown-it-toc-done-right" "^4.2.0" 8 | "punycode" "2.1.1"}} 9 | -------------------------------------------------------------------------------- /src/js/markdown.js: -------------------------------------------------------------------------------- 1 | let MarkdownIt = require('markdown-it'), 2 | MD = new MarkdownIt({html: true, linkify: true, breaks: false}) 3 | 4 | let texmath = require('markdown-it-texmath') 5 | MD.use(texmath, {delimiters: "dollars"}) 6 | 7 | let blockImage = require("markdown-it-block-image") 8 | MD.use(blockImage) 9 | 10 | let mdToc = require("markdown-it-toc-done-right") 11 | MD.use(mdToc) 12 | 13 | let footnotes = require("markdown-it-footnote") 14 | MD.use(footnotes) 15 | 16 | function todoListPlugin(md, opts) { 17 | const startsWithTodoSequence = (text) => { 18 | return text.startsWith("[ ] ") || text.startsWith("[x] ") 19 | } 20 | const isITodoInlineToken = (tokens, i) => { 21 | return tokens[i].type === 'inline' && 22 | tokens[i-1].type === 'paragraph_open' && 23 | tokens[i-2].type === 'list_item_open' && 24 | startsWithTodoSequence(tokens[i].content) 25 | } 26 | const removeMarkup = (token) => { 27 | let textNode = token.children[0] 28 | textNode.content = textNode.content.slice(4) 29 | } 30 | const closestList = (tokens, index) => { 31 | for (let i = index; i >= 0; i--) { 32 | let token = tokens[i] 33 | if (token.type == 'bullet_list_open') { return token } 34 | } 35 | } 36 | const rule = (state) => { 37 | let tokens = state.tokens 38 | for (let i = 2; i < tokens.length; i++) { 39 | if (isITodoInlineToken(tokens, i)) { 40 | // set attrs on the list item 41 | tokens[i-2].attrSet("todo", true) 42 | tokens[i-2].attrSet("checked", tokens[i].content.startsWith("[x] ")) 43 | // removes the [-] sequence from the first inline children 44 | removeMarkup(tokens[i]) 45 | // set attrs on closest list container 46 | let container = closestList(tokens, i-3) 47 | if (container) { container.attrSet("has-todos", true) } 48 | } 49 | } 50 | } 51 | 52 | md.core.ruler.after('inline', 'todo-list-rule', rule) 53 | } 54 | 55 | MD.use(todoListPlugin) 56 | 57 | function tokenize(text) { return MD.parse(text, {}) } 58 | function tokenizeJSON(text) { return JSON.stringify(MD.parse(text, {})) } 59 | 60 | module.exports = {tokenize, tokenizeJSON} 61 | -------------------------------------------------------------------------------- /src/nextjournal/markdown.cljc: -------------------------------------------------------------------------------- 1 | (ns nextjournal.markdown 2 | "Markdown as data" 3 | (:require 4 | [nextjournal.markdown.impl :as impl] 5 | [nextjournal.markdown.utils :as u] 6 | [nextjournal.markdown.transform :as markdown.transform])) 7 | 8 | (def empty-doc u/empty-doc) 9 | 10 | (defn parse* 11 | "Turns a markdown string into an AST of nested clojure data. 12 | Allows to parse multiple strings into the same document 13 | e.g. `(-> u/empty-doc (parse* text-1) (parse* text-2))`." 14 | ([markdown-text] (parse* empty-doc markdown-text)) 15 | ([ctx markdown-text] 16 | (-> ctx 17 | (update :text-tokenizers (partial map u/normalize-tokenizer)) 18 | (impl/parse markdown-text)))) 19 | 20 | (defn parse 21 | "Turns a markdown string into an AST of nested clojure data. 22 | 23 | Accept options: 24 | - `:text-tokenizers` to customize parsing of text in leaf nodes (see https://nextjournal.github.io/markdown/notebooks/parsing_extensibility). 25 | " 26 | ([markdown-text] (parse empty-doc markdown-text)) 27 | ([ctx markdown-text] 28 | (-> (parse* ctx markdown-text) 29 | (dissoc :text-tokenizers 30 | :text->id+emoji-fn 31 | ::impl/footnote-offset 32 | ::impl/id->index 33 | ::impl/label->footnote-ref 34 | ::impl/path 35 | ::impl/root)))) 36 | 37 | (comment 38 | (-> u/empty-doc 39 | (parse* "# title 40 | * one 41 | * two 42 | ") 43 | (parse* "new par") 44 | (parse* "new par"))) 45 | 46 | (defn ->hiccup 47 | "Turns a markdown string into hiccup." 48 | ([markdown] (->hiccup markdown.transform/default-hiccup-renderers markdown)) 49 | ([ctx markdown] 50 | (let [parsed (if (string? markdown) 51 | (parse markdown) 52 | markdown)] 53 | (markdown.transform/->hiccup ctx parsed)))) 54 | 55 | (comment 56 | (parse "# 🎱 Hello") 57 | 58 | (parse "# Hello Markdown 59 | - [ ] what 60 | - [ ] [nice](very/nice/thing) 61 | - [x] ~~thing~~ 62 | ") 63 | 64 | (-> (nextjournal.markdown.graaljs/parse "[alt](https://this/is/a.link)") :content first :content first) 65 | (-> (parse "[alt](https://this/is/a.link)") :content first :content first) 66 | 67 | (parse "# Hello Markdown 68 | - [ ] what 69 | - [ ] [nice](very/nice/thing) 70 | - [x] ~~thing~~ 71 | ") 72 | 73 | (->> (with-out-str 74 | (time (dotimes [_ 100] (parse (slurp "notebooks/reference.md"))))) 75 | (re-find #"\d+.\d+") 76 | parse-double 77 | ((fn [d] (/ d 100)))) 78 | 79 | (->hiccup "# Hello Markdown 80 | 81 | * What's _going_ on? 82 | ") 83 | 84 | (->hiccup 85 | (assoc markdown.transform/default-hiccup-renderers 86 | :heading (fn [ctx node] 87 | [:h1.some-extra.class 88 | (markdown.transform/into-markup [:span.some-other-class] ctx node)])) 89 | "# Hello Markdown 90 | * What's _going_ on? 91 | ") 92 | 93 | ;; launch shadow cljs repl 94 | (shadow.cljs.devtools.api/repl :browser-test)) 95 | -------------------------------------------------------------------------------- /src/nextjournal/markdown/impl.clj: -------------------------------------------------------------------------------- 1 | ;; # 🧩 Parsing 2 | (ns nextjournal.markdown.impl 3 | (:require [clojure.zip :as z] 4 | [nextjournal.markdown.impl.extensions :as extensions] 5 | [nextjournal.markdown.impl.types :as t] 6 | [nextjournal.markdown.utils :as u]) 7 | (:import (org.commonmark.ext.autolink AutolinkExtension) 8 | (org.commonmark.ext.footnotes FootnotesExtension FootnoteReference FootnoteDefinition InlineFootnote) 9 | (org.commonmark.ext.gfm.strikethrough Strikethrough StrikethroughExtension) 10 | (org.commonmark.ext.gfm.tables TableBlock TableBody TableRow TableHead TableCell TablesExtension TableCell$Alignment) 11 | (org.commonmark.ext.task.list.items TaskListItemsExtension TaskListItemMarker) 12 | (org.commonmark.node Node AbstractVisitor 13 | Document 14 | BlockQuote 15 | BulletList 16 | OrderedList 17 | Code 18 | FencedCodeBlock 19 | IndentedCodeBlock 20 | Heading 21 | Text 22 | Paragraph 23 | Emphasis 24 | StrongEmphasis 25 | ListBlock 26 | ListItem 27 | Link 28 | LinkReferenceDefinition 29 | ThematicBreak 30 | SoftLineBreak 31 | HardLineBreak 32 | HtmlInline 33 | Image 34 | HtmlBlock) 35 | (org.commonmark.parser Parser))) 36 | 37 | (set! *warn-on-reflection* true) 38 | ;; TODO: 39 | ;; - [x] inline formulas 40 | ;; - [x] block formulas 41 | ;; - [x] tight lists 42 | ;; - [x] task lists 43 | ;; - [x] footnotes 44 | ;; - [ ] strikethroughs ext 45 | ;; - [x] tables 46 | ;; - [x] fenced code info 47 | ;; - [ ] html nodes 48 | ;; - [ ] auto link 49 | ;; - [ ] promote single images as blocks 50 | ;; - [ ] [[TOC]] (although not used in Clerk) 51 | 52 | 53 | (comment 54 | (parse "* this is inline $\\phi$ math 55 | * other ")) 56 | 57 | (def ^Parser parser 58 | (.. Parser 59 | builder 60 | (extensions [(extensions/create) 61 | (AutolinkExtension/create) 62 | (TaskListItemsExtension/create) 63 | (TablesExtension/create) 64 | (StrikethroughExtension/create) 65 | (.. (FootnotesExtension/builder) 66 | (inlineFootnotes true) 67 | (build))]) 68 | build)) 69 | 70 | ;; helpers / ctx 71 | (def ^:dynamic *in-tight-list?* false) 72 | 73 | (defn paragraph-type [] (if *in-tight-list?* :plain :paragraph)) 74 | 75 | (defn in-tight-list? [node] 76 | (cond 77 | (instance? ListBlock node) (.isTight ^ListBlock node) 78 | (instance? BlockQuote node) false 79 | :else *in-tight-list?*)) 80 | 81 | (defmacro with-tight-list [node & body] 82 | `(binding [*in-tight-list?* (in-tight-list? ~node)] 83 | ~@body)) 84 | 85 | ;; multi stuff 86 | (defmulti open-node (fn [_ctx node] (type node))) 87 | (defmulti close-node (fn [_ctx node] (type node))) 88 | 89 | (defmethod close-node :default [ctx _node] (u/update-current-loc ctx z/up)) 90 | 91 | (defmethod open-node Document [ctx _node] ctx) 92 | (defmethod close-node Document [ctx _node] ctx) 93 | 94 | (defmethod open-node Paragraph [ctx _node] 95 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type (paragraph-type)})))) 96 | 97 | (defmethod open-node BlockQuote [ctx _node] 98 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :blockquote})))) 99 | 100 | (defmethod open-node Heading [ctx ^Heading node] 101 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :heading 102 | :heading-level (.getLevel node)})))) 103 | 104 | (defmethod close-node Heading [ctx ^Heading _node] 105 | (u/handle-close-heading ctx)) 106 | 107 | (defmethod open-node HtmlInline [ctx ^HtmlInline node] 108 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :html-inline 109 | :content [{:type :text 110 | :text (.getLiteral node)}]})))) 111 | 112 | (defmethod open-node HtmlBlock [ctx ^HtmlBlock node] 113 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :html-block 114 | :content [{:type :text 115 | :text (.getLiteral node)}]})))) 116 | 117 | (defmethod open-node BulletList [ctx ^ListBlock _node] 118 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :bullet-list :content [] #_#_:tight? (.isTight node)})))) 119 | 120 | (defmethod open-node OrderedList [ctx _node] 121 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :numbered-list :content []})))) 122 | 123 | (defmethod open-node ListItem [ctx _node] 124 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :list-item :content []})))) 125 | 126 | (defmethod open-node Emphasis [ctx _node] 127 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :em :content []})))) 128 | 129 | (defmethod open-node StrongEmphasis [ctx _node] 130 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :strong :content []})))) 131 | 132 | (defmethod open-node Code [ctx ^Code node] 133 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :monospace 134 | :content [{:type :text 135 | :text (.getLiteral node)}]})))) 136 | 137 | (defmethod open-node Strikethrough [ctx _node] 138 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :strikethrough :content []})))) 139 | 140 | (defmethod open-node Link [ctx ^Link node] 141 | (u/update-current-loc ctx (fn [loc] 142 | (u/zopen-node loc {:type :link 143 | :attrs (cond-> {:href (.getDestination node)} 144 | (.getTitle node) 145 | (assoc :title (.getTitle node)))})))) 146 | 147 | (defmethod open-node IndentedCodeBlock [ctx ^IndentedCodeBlock node] 148 | (u/update-current-loc ctx (fn [loc] 149 | (u/zopen-node loc {:type :code 150 | :content [{:type :text 151 | :text (.getLiteral node)}]})))) 152 | 153 | (defmethod open-node FencedCodeBlock [ctx ^FencedCodeBlock node] 154 | (u/update-current-loc ctx (fn [loc] 155 | (u/zopen-node loc (merge {:type :code 156 | :info (.getInfo node) 157 | :content [{:type :text 158 | :text (.getLiteral node)}]} 159 | (u/parse-fence-info (.getInfo node))))))) 160 | 161 | (defmethod open-node Image [ctx ^Image node] 162 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :image 163 | :attrs {:src (.getDestination node) :title (.getTitle node)}})))) 164 | 165 | (defmethod open-node TableBlock [ctx ^TableBlock _node] 166 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :table})))) 167 | (defmethod open-node TableHead [ctx ^TableHead _node] 168 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :table-head})))) 169 | (defmethod open-node TableBody [ctx ^TableBody _node] 170 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :table-body})))) 171 | (defmethod open-node TableRow [ctx ^TableRow _node] 172 | (u/update-current-loc ctx (fn [loc] (u/zopen-node loc {:type :table-row})))) 173 | 174 | (defn alignment->keyword [enum] 175 | (condp = enum 176 | TableCell$Alignment/LEFT :left 177 | TableCell$Alignment/CENTER :center 178 | TableCell$Alignment/RIGHT :right)) 179 | 180 | (defmethod open-node TableCell [ctx ^TableCell node] 181 | (u/update-current-loc ctx (fn [loc] 182 | (let [alignment (some-> (.getAlignment node) alignment->keyword)] 183 | (u/zopen-node loc (cond-> {:type (if (.isHeader node) :table-header :table-data) 184 | :content []} 185 | alignment 186 | (assoc :alignment alignment 187 | ;; TODO: drop/deprecate this, compute in transform 188 | :attrs {:style (str "text-align:" (name alignment))}))))))) 189 | 190 | (defmethod open-node FootnoteDefinition [ctx ^FootnoteDefinition node] 191 | (-> ctx 192 | (assoc ::root :footnotes) 193 | (u/update-current-loc (fn [loc] 194 | (-> loc 195 | (z/append-child {:type :footnote 196 | :label (.getLabel node) 197 | :content []}) z/down z/rightmost))))) 198 | 199 | (defmethod close-node FootnoteDefinition [ctx ^FootnoteDefinition _node] 200 | (-> ctx (u/update-current-loc z/up) (assoc ::root :doc))) 201 | 202 | (defmethod open-node InlineFootnote [{:as ctx ::keys [label->footnote-ref]} ^InlineFootnote _node] 203 | (let [label (str "inline-note-" (count label->footnote-ref)) 204 | footnote-ref {:type :footnote-ref 205 | :inline? true 206 | :ref (count label->footnote-ref) 207 | :label label}] 208 | (-> ctx 209 | (u/update-current-loc z/append-child footnote-ref) 210 | (update ::label->footnote-ref assoc label footnote-ref) 211 | (assoc ::root :footnotes) 212 | (u/update-current-loc (fn [loc] 213 | (-> loc 214 | (u/zopen-node {:type :footnote :inline? true :label label :content []}) 215 | (u/zopen-node {:type :paragraph :content []}))))))) 216 | 217 | (defmethod close-node InlineFootnote [ctx ^FootnoteDefinition _node] 218 | (-> ctx (u/update-current-loc (comp z/up z/up)) (assoc ::root :doc))) 219 | 220 | (defn handle-todo-list [loc ^TaskListItemMarker node] 221 | (-> loc 222 | (z/edit assoc :type :todo-item :attrs {:checked (.isChecked node)}) 223 | z/up (z/edit assoc :type :todo-list) 224 | z/down z/rightmost)) 225 | 226 | (def ^:private visitChildren-meth 227 | ;; cached reflection only happens once 228 | (delay (let [meth (.getDeclaredMethod AbstractVisitor "visitChildren" (into-array [Node]))] 229 | (.setAccessible meth true) 230 | meth))) 231 | 232 | (defn node->data [{:as ctx-in :keys [footnotes]} ^Node node] 233 | (assert (:type ctx-in) ":type must be set on initial doc") 234 | (assert (:content ctx-in) ":content must be set on initial doc") 235 | (assert (::root ctx-in) "context needs a ::root") 236 | ;; TODO: unify pre/post parse across impls 237 | (let [!ctx (atom (assoc ctx-in 238 | :doc (u/->zip ctx-in) 239 | :footnotes (u/->zip {:type :footnotes :content (or footnotes [])})))] 240 | (.accept node 241 | (proxy [AbstractVisitor] [] 242 | ;; proxy can't overload method by arg type, while gen-class can: https://groups.google.com/g/clojure/c/TVRsy4Gnf70 243 | (visit [^Node node] 244 | (condp instance? node 245 | ;; leaf nodes 246 | LinkReferenceDefinition :ignore 247 | ;;Text (swap! !ctx u/update-current z/append-child {:type :text :text (.getLiteral ^Text node)}) 248 | Text (swap! !ctx u/handle-text-token (.getLiteral ^Text node)) 249 | ThematicBreak (swap! !ctx u/update-current-loc z/append-child {:type :ruler}) 250 | SoftLineBreak (swap! !ctx u/update-current-loc z/append-child {:type :softbreak}) 251 | HardLineBreak (swap! !ctx u/update-current-loc z/append-child {:type :hardbreak}) 252 | TaskListItemMarker (swap! !ctx u/update-current-loc handle-todo-list node) 253 | nextjournal.markdown.impl.types.CustomNode 254 | (case (t/nodeType node) 255 | :block-formula (swap! !ctx u/update-current-loc z/append-child {:type :block-formula :text (t/getLiteral node)}) 256 | :inline-formula (swap! !ctx u/update-current-loc z/append-child {:type :formula :text (t/getLiteral node)}) 257 | :toc (swap! !ctx u/update-current-loc z/append-child {:type :toc})) 258 | FootnoteReference (swap! !ctx (fn [{:as ctx ::keys [label->footnote-ref]}] 259 | (let [label (.getLabel ^FootnoteReference node) 260 | footnote-ref (or (get label->footnote-ref label) 261 | {:type :footnote-ref 262 | :ref (count label->footnote-ref) 263 | :label label})] 264 | (-> ctx 265 | (u/update-current-loc z/append-child footnote-ref) 266 | (update ::label->footnote-ref assoc label footnote-ref))))) 267 | 268 | ;; else branch nodes 269 | (if (get-method open-node (class node)) 270 | (with-tight-list node 271 | (swap! !ctx open-node node) 272 | (.invoke ^java.lang.reflect.Method @visitChildren-meth this (into-array Object [node])) 273 | (swap! !ctx close-node node)) 274 | (prn ::not-implemented node)))))) 275 | 276 | (let [{:as ctx-out :keys [doc title toc footnotes] ::keys [label->footnote-ref]} (deref !ctx)] 277 | (-> ctx-out 278 | (dissoc :doc) 279 | (cond-> 280 | (and title (not (:title ctx-in))) 281 | (assoc :title title)) 282 | (assoc :toc toc 283 | :content (:content (z/root doc)) 284 | ::label->footnote-ref label->footnote-ref 285 | :footnotes 286 | ;; there will never be references without definitions, but the contrary may happen 287 | (->> footnotes z/root :content 288 | (keep (fn [{:as footnote :keys [label]}] 289 | (when (contains? label->footnote-ref label) 290 | (assoc footnote :ref (:ref (label->footnote-ref label)))))) 291 | (sort-by :ref) 292 | (vec))))))) 293 | 294 | (defn parse 295 | ([md] (parse u/empty-doc md)) 296 | ([ctx md] (node->data (update ctx :text-tokenizers (partial map u/normalize-tokenizer)) 297 | (.parse parser md)))) 298 | 299 | (comment 300 | (import '[org.commonmark.renderer.html HtmlRenderer]) 301 | (remove-all-methods open-node) 302 | (remove-all-methods close-node) 303 | 304 | (.render (.build (HtmlRenderer/builder)) 305 | (.parse parser "some text^[and a note]")) 306 | 307 | (parse "some text^[and a note]") 308 | 309 | (-> {} 310 | (parse "# Title") 311 | (parse "some para^[with note]") 312 | (parse "some para^[with other note]")) 313 | 314 | (parse "some `marks` inline and inline $formula$ with a [link _with_ em](https://what.tfk)") 315 | (parse (assoc u/empty-doc :text-tokenizers [u/internal-link-tokenizer]) 316 | "what a [[link]] is this") 317 | (parse "what the real deal is") 318 | (parse "some 319 | 320 | [[TOC]] 321 | 322 | what") 323 | 324 | (parse "# Ahoi 325 | 326 | > par 327 | > broken 328 | 329 | * a tight **strong** list 330 | * with [a nice link](/to/some 'with a title') 331 | * * with nested 332 | 333 | * lose list 334 | 335 | - [x] one inline formula $\\phi$ here 336 | - [ ] two 337 | 338 | --- 339 | ![img](/some/src 'title')") 340 | 341 | ;; footnotes 342 | (parse "_hello_ what and foo[^note1] and 343 | 344 | And what. 345 | 346 | [^note1]: the _what_ 347 | 348 | * and new text[^note2] at the end. 349 | * the hell^[crazy _inline_ note with [a](https://a-link.xx) inside] 350 | 351 | [^note2]: conclusion and $\\phi$ 352 | 353 | [^note3]: this should just be ignored 354 | ") 355 | 356 | (parse (slurp "../clerk-px23/README.md")) 357 | ;; => :ref 27 358 | 359 | (parse "Knuth's _Literate Programming_[^literateprogramming][^knuth84] emphasized the importance of focusing on human beings as consumers of computer programs. His original implementation involved authoring files that combine source code and documentation, which were then divided into two derived artifacts: source code for the computer and a typeset document in natural language to explain the program. 360 | 361 | [^knuth84]: [Literate Programming](https://doi.org/10.1093/comjnl/27.2.97) 362 | [^literateprogramming]: An extensive archive of related material is maintained [here](http://www.literateprogramming.com).") 363 | 364 | (-> (parse "this might[^reuse] here[^another] and here[^reuse] here 365 | 366 | [^another]: stuff 367 | [^reuse]: define here 368 | 369 | this should be left as is 370 | 371 | another paragraph reusing[^reuse] 372 | ") 373 | md.parser/insert-sidenote-containers)) 374 | -------------------------------------------------------------------------------- /src/nextjournal/markdown/impl.cljs: -------------------------------------------------------------------------------- 1 | ;; # 🧩 Parsing 2 | (ns nextjournal.markdown.impl 3 | (:require ["/js/markdown" :as md] 4 | [clojure.zip :as z] 5 | [nextjournal.markdown.utils :as u])) 6 | 7 | (defn hlevel [^js token] 8 | (let [hn (.-tag token)] 9 | (when (string? hn) (some-> (re-matches #"h([\d])" hn) second js/parseInt)))) 10 | 11 | ;; leaf nodes 12 | ;; TODO: use from utils 13 | (defn text-node [text] {:type :text :text text}) 14 | (defn formula [text] {:type :formula :text text}) 15 | (defn block-formula [text] {:type :block-formula :text text}) 16 | 17 | ;; node constructors 18 | (defn node 19 | [type content attrs top-level] 20 | (cond-> {:type type :content content} 21 | (seq attrs) (assoc :attrs attrs) 22 | (seq top-level) (merge top-level))) 23 | 24 | (defn empty-text-node? [{text :text t :type}] (and (= :text t) (empty? text))) 25 | 26 | (defn push-node [ctx node] 27 | (cond-> ctx 28 | (not (empty-text-node? node)) 29 | (u/update-current-loc z/append-child node))) 30 | 31 | (defn open-node 32 | ([ctx type] (open-node ctx type {})) 33 | ([ctx type attrs] (open-node ctx type attrs {})) 34 | ([ctx type attrs top-level] 35 | (u/update-current-loc ctx u/zopen-node (node type [] attrs top-level)))) 36 | 37 | (defn close-node [doc] (u/update-current-loc doc z/up)) 38 | 39 | (comment 40 | 41 | (-> u/empty-doc 42 | (assoc :doc (u/->zip {:type :doc})) ;; [:content -1] 43 | (open-node :heading) ;; [:content 0 :content -1] 44 | (push-node {:node/type :text :text "foo"}) ;; [:content 0 :content 0] 45 | (push-node {:node/type :text :text "foo"}) ;; [:content 0 :content 1] 46 | close-node ;; [:content 1] 47 | 48 | (open-node :paragraph) ;; [:content 1 :content] 49 | (push-node {:node/type :text :text "hello"}) 50 | close-node 51 | (open-node :bullet-list) 52 | )) 53 | ;; endregion 54 | 55 | ;; region token handlers 56 | (declare apply-tokens) 57 | (defmulti apply-token (fn [_doc ^js token] (.-type token))) 58 | (defmethod apply-token :default [doc token] 59 | (prn :apply-token/unknown-type {:token token}) 60 | doc) 61 | 62 | ;; blocks 63 | (defmethod apply-token "heading_open" [doc token] (open-node doc :heading {} {:heading-level (hlevel token)})) 64 | (defmethod apply-token "heading_close" [ctx _] 65 | (u/handle-close-heading ctx)) 66 | 67 | ;; for building the TOC we just care about headings at document top level (not e.g. nested under lists) ⬆ 68 | 69 | (defmethod apply-token "paragraph_open" [doc ^js token] 70 | ;; no trace of tight vs loose on list nodes 71 | ;; markdown-it passes this info directly to paragraphs via this `hidden` key 72 | (open-node doc (if (.-hidden token) :plain :paragraph))) 73 | 74 | (defmethod apply-token "paragraph_close" [doc _token] (close-node doc)) 75 | 76 | (defmethod apply-token "bullet_list_open" [doc ^js token] 77 | (let [attrs (.-attrs token) 78 | has-todos (:has-todos attrs)] 79 | (open-node doc (if has-todos :todo-list :bullet-list) attrs))) 80 | 81 | (defmethod apply-token "bullet_list_close" [doc _token] (close-node doc)) 82 | 83 | (defmethod apply-token "ordered_list_open" [doc ^js token] (open-node doc :numbered-list (.-attrs token))) 84 | (defmethod apply-token "ordered_list_close" [doc _token] (close-node doc)) 85 | 86 | (defmethod apply-token "list_item_open" [doc ^js token] 87 | (let [attrs (.-attrs token) 88 | todo (:todo attrs)] 89 | (open-node doc (if todo :todo-item :list-item) attrs))) 90 | (defmethod apply-token "list_item_close" [doc _token] (close-node doc)) 91 | 92 | (defmethod apply-token "math_block" [doc ^js token] (push-node doc (block-formula (.-content token)))) 93 | (defmethod apply-token "math_block_end" [doc _token] doc) 94 | 95 | (defmethod apply-token "hr" [doc _token] (push-node doc {:type :ruler})) 96 | 97 | (defmethod apply-token "blockquote_open" [doc _token] (open-node doc :blockquote)) 98 | (defmethod apply-token "blockquote_close" [doc _token] (close-node doc)) 99 | 100 | (defmethod apply-token "tocOpen" [doc _token] (open-node doc :toc)) 101 | (defmethod apply-token "tocBody" [doc _token] doc) ;; ignore body 102 | (defmethod apply-token "tocClose" [ctx _token] 103 | (-> ctx 104 | (u/update-current-loc 105 | (fn [loc] 106 | (-> loc (z/edit dissoc :content) z/up))))) 107 | 108 | (defmethod apply-token "code_block" [doc ^js token] 109 | (let [c (.-content token)] 110 | (-> doc 111 | (open-node :code) 112 | (push-node (text-node c)) 113 | close-node))) 114 | 115 | (defmethod apply-token "fence" [doc ^js token] 116 | (let [c (.-content token) 117 | i (.-info token)] 118 | (-> doc 119 | (open-node :code {} (assoc (u/parse-fence-info i) :info i)) 120 | (push-node (text-node c)) 121 | close-node))) 122 | 123 | (defn footnote-label [{:as _ctx ::keys [footnote-offset]} token] 124 | ;; TODO: consider initial offset in case we're parsing multiple inputs 125 | (or (.. token -meta -label) 126 | ;; inline labels won't have a label 127 | (str "inline-note-" (+ footnote-offset (.. token -meta -id))))) 128 | 129 | ;; footnotes 130 | (defmethod apply-token "footnote_ref" [{:as ctx ::keys [label->footnote-ref]} token] 131 | (let [label (footnote-label ctx token) 132 | footnote-ref (or (get label->footnote-ref label) 133 | {:type :footnote-ref :inline? (not (.. token -meta -label)) 134 | :ref (count label->footnote-ref) ;; was (+ (count footnotes) (j/get-in token [:meta :id])) ??? 135 | :label label})] 136 | (-> ctx 137 | (u/update-current-loc z/append-child footnote-ref) 138 | (update ::label->footnote-ref assoc label footnote-ref)))) 139 | 140 | (defmethod apply-token "footnote_open" [ctx token] 141 | ;; TODO unify in utils 142 | (let [label (footnote-label ctx token)] 143 | (-> ctx 144 | (u/update-current-loc (fn [loc] 145 | (u/zopen-node loc {:type :footnote 146 | :inline? (not (.. token -meta -label)) 147 | :label label})))))) 148 | 149 | ;; inline footnotes^[like this one] 150 | (defmethod apply-token "footnote_close" [ctx _token] 151 | (-> ctx (u/update-current-loc z/up))) 152 | 153 | (defmethod apply-token "footnote_block_open" [ctx _token] 154 | ;; store footnotes at a top level `:footnote` key 155 | (assoc ctx ::root :footnotes)) 156 | 157 | (defmethod apply-token "footnote_block_close" 158 | ;; restores path for addding new tokens 159 | [ctx _token] 160 | (assoc ctx ::root :doc)) 161 | 162 | (defmethod apply-token "footnote_anchor" [doc _token] doc) 163 | 164 | (comment 165 | (-> "some text^[inline note] 166 | " 167 | md/tokenize flatten-tokens 168 | #_ parse 169 | #_ u/insert-sidenote-containers) 170 | 171 | (-> empty-doc 172 | (update :text-tokenizers (partial map u/normalize-tokenizer)) 173 | (apply-tokens (nextjournal.markdown/tokenize "what^[the heck]")) 174 | insert-sidenote-columns 175 | (apply-tokens (nextjournal.markdown/tokenize "# Hello")) 176 | insert-sidenote-columns 177 | (apply-tokens (nextjournal.markdown/tokenize "is^[this thing]")) 178 | insert-sidenote-columns)) 179 | 180 | ;; tables 181 | ;; table data tokens might have {:style "text-align:right|left"} attrs, maybe better nested node > :attrs > :style ? 182 | (defmethod apply-token "table_open" [doc _token] (open-node doc :table)) 183 | (defmethod apply-token "table_close" [doc _token] (close-node doc)) 184 | (defmethod apply-token "thead_open" [doc _token] (open-node doc :table-head)) 185 | (defmethod apply-token "thead_close" [doc _token] (close-node doc)) 186 | (defmethod apply-token "tr_open" [doc _token] (open-node doc :table-row)) 187 | (defmethod apply-token "tr_close" [doc _token] (close-node doc)) 188 | (defmethod apply-token "th_open" [doc ^js token] (open-node doc :table-header (.-attrs token))) 189 | (defmethod apply-token "th_close" [doc _token] (close-node doc)) 190 | (defmethod apply-token "tbody_open" [doc _token] (open-node doc :table-body)) 191 | (defmethod apply-token "tbody_close" [doc _token] (close-node doc)) 192 | (defmethod apply-token "td_open" [doc ^js token] (open-node doc :table-data (.-attrs token))) 193 | (defmethod apply-token "td_close" [doc _token] (close-node doc)) 194 | 195 | (comment 196 | (-> 197 | " 198 | | Syntax | JVM | JavaScript | 199 | |--------|:------------------------:|--------------------------------:| 200 | | foo | Loca _lDate_ ahoiii | goog.date.Date | 201 | | bar | java.time.LocalTime | some [kinky](link/to/something) | 202 | | bag | java.time.LocalDateTime | $\\phi$ | 203 | " 204 | nextjournal.markdown/parse 205 | nextjournal.markdown.transform/->hiccup 206 | )) 207 | 208 | (comment 209 | (->> "# Hello #Fishes 210 | 211 | > what about #this 212 | 213 | _this #should be a tag_, but this [_actually #foo shouldnt_](/bar/) is not." 214 | (parse (update empty-doc :text-tokenizers conj (u/normalize-tokenizer u/hashtag-tokenizer))))) 215 | 216 | (defmethod apply-token "text" [ctx ^js token] 217 | (u/handle-text-token ctx (.-content token))) 218 | 219 | (comment 220 | (def mustache (u/normalize-tokenizer {:regex #"\{\{([^\{]+)\}\}" :handler (fn [m] {:type :eval :text (m 1)})})) 221 | (u/tokenize-text-node mustache {} {:text "{{what}} the {{hellow}}"}) 222 | (u/handle-text-token (assoc u/empty-doc :text-tokenizers [mustache]) 223 | "foo [[bar]] dang #hashy taggy [[what]] #dangy foo [[great]] and {{eval}} me")) 224 | 225 | ;; inlines 226 | (defmethod apply-token "inline" [doc ^js token] (apply-tokens doc (.-children token))) 227 | (defmethod apply-token "math_inline" [doc ^js token] (push-node doc (formula (.-content token)))) 228 | (defmethod apply-token "math_inline_double" [doc ^js token] (push-node doc (formula (.-content token)))) 229 | 230 | ;; https://spec.commonmark.org/0.30/#softbreak 231 | (defmethod apply-token "softbreak" [doc _token] (push-node doc {:type :softbreak})) 232 | ;; https://spec.commonmark.org/0.30/#hard-line-break 233 | (defmethod apply-token "hardbreak" [doc _token] (push-node doc {:type :hardbreak})) 234 | 235 | ;; images 236 | (defmethod apply-token "image" [doc ^js token] 237 | (let [attrs (.-attrs token) 238 | children (.-children token)] 239 | (-> doc (open-node :image attrs) (apply-tokens children) close-node))) 240 | 241 | ;; marks 242 | (defmethod apply-token "em_open" [doc _token] (open-node doc :em)) 243 | (defmethod apply-token "em_close" [doc _token] (close-node doc)) 244 | (defmethod apply-token "strong_open" [doc _token] (open-node doc :strong)) 245 | (defmethod apply-token "strong_close" [doc _token] (close-node doc)) 246 | (defmethod apply-token "s_open" [doc _token] (open-node doc :strikethrough)) 247 | (defmethod apply-token "s_close" [doc _token] (close-node doc)) 248 | (defmethod apply-token "link_open" [doc ^js token] (open-node doc :link (.-attrs token))) 249 | (defmethod apply-token "link_close" [doc _token] (close-node doc)) 250 | (defmethod apply-token "code_inline" [doc ^js token] (-> doc (open-node :monospace) (push-node (text-node (.-content token))) close-node)) 251 | 252 | ;; html 253 | (defmethod apply-token "html_inline" [doc token] 254 | (-> doc (u/update-current-loc z/append-child {:type :html-inline :content [(text-node (.-content token))]}))) 255 | 256 | (defmethod apply-token "html_block" [doc token] 257 | (-> doc (u/update-current-loc z/append-child {:type :html-block :content [(text-node (.-content token))]}))) 258 | 259 | ;; html 260 | (defmethod apply-token "html_inline" [doc token] 261 | (-> doc (u/update-current-loc z/append-child {:type :html-inline :content [(text-node (.-content token))]}))) 262 | 263 | (defmethod apply-token "html_block" [doc token] 264 | (-> doc (u/update-current-loc z/append-child {:type :html-block :content [(text-node (.-content token))]}))) 265 | 266 | ;; endregion 267 | 268 | ;; region data builder api 269 | (defn pairs->kmap [pairs] (into {} (map (juxt (comp keyword first) second)) pairs)) 270 | (defn apply-tokens [doc tokens] 271 | (let [mapify-attrs-xf (map (fn [x] 272 | (set! x -attrs (pairs->kmap (.-attrs x))) 273 | x))] 274 | (reduce (mapify-attrs-xf apply-token) doc tokens))) 275 | 276 | (defn parse 277 | ([markdown] (parse u/empty-doc markdown)) 278 | ([ctx-in markdown] 279 | ;; TODO: unify implementations 280 | (let [{:as ctx-out :keys [doc title toc footnotes] ::keys [label->footnote-ref]} 281 | (-> ctx-in 282 | (assoc ::footnote-offset (count (::label->footnote-ref ctx-in))) 283 | (update :text-tokenizers (partial map u/normalize-tokenizer)) 284 | (assoc :doc (u/->zip ctx-in) 285 | :footnotes (u/->zip {:type :footnotes 286 | :content (or (:footnotes ctx-in) [])})) 287 | (apply-tokens (md/tokenize markdown)))] 288 | (-> ctx-out 289 | (dissoc :doc) 290 | (cond-> 291 | (and title (not (:title ctx-in))) 292 | (assoc :title title)) 293 | (assoc :toc toc 294 | :content (:content (z/root doc)) 295 | ::label->footnote-ref label->footnote-ref 296 | :footnotes 297 | ;; there will never be references without definitions, but the contrary may happen 298 | (->> footnotes z/root :content 299 | (keep (fn [{:as footnote :keys [label]}] 300 | (when (contains? label->footnote-ref label) 301 | (assoc footnote :ref (:ref (label->footnote-ref label)))))) 302 | (sort-by :ref) 303 | (vec))))))) 304 | 305 | (comment 306 | (-> (parse "text^[a]") ::label->footnote-ref) 307 | 308 | (-> (parse "text^[a]") 309 | (parse "text^[b]"))) 310 | 311 | (comment 312 | (defn pr-dbg [x] (js/console.log (js/JSON.parse (js/JSON.stringify x)))) 313 | (parse "# 🎱 Hello") 314 | ) 315 | 316 | (comment 317 | (some-> "# Title 318 | 319 | ## Section 1 320 | 321 | foo 322 | 323 | - # What is this? (no!) 324 | - maybe 325 | 326 | ### Section 1.2 327 | 328 | ## Section 2 329 | 330 | some par 331 | 332 | ### Section 2.1 333 | 334 | some other par 335 | 336 | ### Section 2.2 337 | 338 | #### Section 2.2.1 339 | 340 | two two one 341 | 342 | #### Section 2.2.2 343 | 344 | two two two 345 | 346 | ## Section 3 347 | 348 | some final par" 349 | nextjournal.markdown/parse 350 | (section-at [:content 9]) ;; ⬅ paths are stored in TOC sections 351 | nextjournal.markdown.transform/->hiccup)) 352 | ;; endregion 353 | 354 | 355 | ;; ## 🔧 Debug 356 | ;; A view on flattened tokens to better inspect tokens 357 | (defn flatten-tokens [tokens] 358 | (into [] 359 | (comp 360 | (mapcat (partial tree-seq (comp seq :children) :children)) 361 | (map #(select-keys % [:type :content :hidden :level :info :meta]))) 362 | tokens)) 363 | -------------------------------------------------------------------------------- /src/nextjournal/markdown/impl/extensions.clj: -------------------------------------------------------------------------------- 1 | (ns nextjournal.markdown.impl.extensions 2 | (:require [clojure.string :as str] 3 | [nextjournal.markdown.impl.types :as t]) 4 | (:import (java.util.regex Matcher Pattern) 5 | (org.commonmark.parser Parser$ParserExtension Parser$Builder SourceLine) 6 | (org.commonmark.parser.beta InlineContentParser InlineContentParserFactory ParsedInline InlineParserState) 7 | (org.commonmark.parser.block AbstractBlockParser BlockContinue BlockParserFactory BlockStart ParserState BlockParser))) 8 | 9 | (set! *warn-on-reflection* true) 10 | 11 | (def block-formula-delimiter-regex (re-pattern "^\\$\\$")) 12 | (def block-toc-delimiter-regex (re-pattern "^\\[\\[TOC\\]\\]")) 13 | 14 | (defn delimiter-matcher ^Matcher [^Pattern regex ^ParserState state] 15 | (let [^SourceLine line (.getLine state) 16 | next-non-space (.getNextNonSpaceIndex state)] 17 | (re-matcher regex (subs (.getContent line) next-non-space)))) 18 | 19 | (defn block-formula-delimiter-matcher ^Matcher [^ParserState s] (delimiter-matcher block-formula-delimiter-regex s)) 20 | (defn block-toc-delimiter-matcher ^Matcher [^ParserState s] (delimiter-matcher block-toc-delimiter-regex s)) 21 | 22 | (defn inline-formula-parser [] 23 | (proxy [InlineContentParser] [] 24 | (tryParse [^InlineParserState parser-state] 25 | (let [scanner (.scanner parser-state) 26 | ;; move past opening $ 27 | _ (.next scanner) 28 | open-pos (.position scanner)] 29 | (if (= -1 (.find scanner \$)) 30 | (ParsedInline/none) 31 | (let [^String content (.getContent (.getSource scanner open-pos (.position scanner)))] 32 | (.next scanner) 33 | (ParsedInline/of (t/->InlineFormula content) (.position scanner)))))))) 34 | 35 | (defn close-block-formula? [state !lines] 36 | ;; we allow 1-liner blocks like A) 37 | ;; text 38 | ;; 39 | ;; $$\\bigoplus$$ 40 | ;; 41 | ;; or blocks delimited by $$ B) 42 | ;; 43 | ;; $$ 44 | ;; \\bigoplus 45 | ;; $$ 46 | (or #_A (when-some [l (last @!lines)] (str/ends-with? (str/trimr l) "$$")) 47 | #_B (some? (re-find (block-formula-delimiter-matcher state))))) 48 | 49 | (defn block-formula-parser ^BlockParser [] 50 | (let [block-formula (t/->BlockFormula) 51 | !lines (atom [])] 52 | (proxy [AbstractBlockParser] [] 53 | (isContainer [] false) 54 | (canContain [_other] false) 55 | (getBlock [] block-formula) 56 | (addLine [^SourceLine line] 57 | (when-some [l (not-empty (str/trim (.getContent line)))] 58 | (swap! !lines conj l))) 59 | (closeBlock [] 60 | (t/setLiteral block-formula (let [formula-body (str/join \newline @!lines)] 61 | (cond-> formula-body 62 | (str/ends-with? formula-body "$$") 63 | (subs 0 (- (count formula-body) 2)))))) 64 | (tryContinue [^ParserState state] 65 | (let [non-space (.getNextNonSpaceIndex state)] 66 | (if (close-block-formula? state !lines) 67 | (BlockContinue/finished) 68 | (BlockContinue/atIndex non-space))))))) 69 | 70 | (def block-formula-parser-factory 71 | (proxy [BlockParserFactory] [] 72 | (tryStart [^ParserState state _matchedBlockParser] 73 | (if (<= 4 (.getIndent state)) 74 | (BlockStart/none) 75 | (let [next-non-space (.getNextNonSpaceIndex state) 76 | m (block-formula-delimiter-matcher state)] 77 | (if (re-find m) 78 | (.atIndex (BlockStart/of (into-array [(block-formula-parser)])) 79 | (+ next-non-space (.end m))) 80 | (BlockStart/none))))))) 81 | 82 | (defn block-toc-parser ^BlockParser [] 83 | (let [toc (t/->ToC)] 84 | (proxy [AbstractBlockParser] [] 85 | (getBlock [] toc) 86 | ;; close immediately 87 | (tryContinue [^ParserState _state] (BlockContinue/finished))))) 88 | 89 | (def block-toc-parser-factory 90 | (proxy [BlockParserFactory] [] 91 | (tryStart [^ParserState state _matchedBlockParser] 92 | (if (<= 4 (.getIndent state)) 93 | (BlockStart/none) 94 | (let [next-non-space (.getNextNonSpaceIndex state) 95 | m (block-toc-delimiter-matcher state)] 96 | (if (re-find m) 97 | (.atIndex (BlockStart/of (into-array [(block-toc-parser)])) 98 | (+ next-non-space (.end m))) 99 | (BlockStart/none))))))) 100 | 101 | (defn create [] 102 | (proxy [Object Parser$ParserExtension] [] 103 | (extend [^Parser$Builder pb] 104 | (.customBlockParserFactory pb block-toc-parser-factory) 105 | (.customBlockParserFactory pb block-formula-parser-factory) 106 | (.customInlineContentParserFactory pb (reify InlineContentParserFactory 107 | (getTriggerCharacters [_] #{\$}) 108 | (create [_] (inline-formula-parser))))))) 109 | 110 | (comment 111 | (class (re-matcher #"" "")) 112 | (nextjournal.markdown.commonmark/parse " 113 | # Title 114 | 115 | This is an $\\mathit{inline}$ formula 116 | 117 | $$ 118 | \\begin{equation} 119 | \\dfrac{1}{128\\pi^{2}} 120 | \\end{equation} 121 | $$ 122 | 123 | * a $\\int_a^b\\phi(t)dt$ with discount 124 | * and what")) 125 | -------------------------------------------------------------------------------- /src/nextjournal/markdown/impl/types.clj: -------------------------------------------------------------------------------- 1 | (ns nextjournal.markdown.impl.types 2 | (:import [nextjournal.markdown.impl.types CustomNode])) 3 | 4 | ;; See also 5 | ;; https://github.com/noties/Markwon/blob/master/markwon-ext-latex/src/main/java/io/noties/markwon/ext/latex/JLatexMathBlockParser.java 6 | 7 | (set! *warn-on-reflection* true) 8 | 9 | (defn ->InlineFormula [lit] 10 | (let [state (atom lit)] 11 | (proxy [org.commonmark.node.CustomNode CustomNode] [] 12 | (getLiteral [] @state) 13 | (nodeType [] :inline-formula)))) 14 | 15 | (defn ->BlockFormula 16 | ([] (->BlockFormula nil)) 17 | ([lit] 18 | (let [state (atom lit)] 19 | (proxy [org.commonmark.node.CustomBlock CustomNode] [] 20 | (getLiteral [] @state) 21 | (setLiteral [v] (do (reset! state v) 22 | this)) 23 | (nodeType [] :block-formula))))) 24 | 25 | (defn ->ToC [] 26 | (proxy [org.commonmark.node.CustomBlock CustomNode] [] 27 | (nodeType [] :toc))) 28 | 29 | (defn setLiteral [^CustomNode n lit] 30 | (.setLiteral n lit)) 31 | 32 | (defn getLiteral [^CustomNode n] 33 | (.getLiteral n)) 34 | 35 | (defn nodeType [^CustomNode n] 36 | (.nodeType n)) 37 | 38 | (comment 39 | (def i (->InlineFormula "1+1")) 40 | (instance? nextjournal.markdown.impl.types.CustomNode i) 41 | (let [b (->BlockFormula)] 42 | (-> (setLiteral b "dude") 43 | (getLiteral))) 44 | ) 45 | -------------------------------------------------------------------------------- /src/nextjournal/markdown/impl/types/CustomNode.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nextjournal/markdown/5829ec101331b1702841094f4dc897ee46f0ddcd/src/nextjournal/markdown/impl/types/CustomNode.class -------------------------------------------------------------------------------- /src/nextjournal/markdown/impl/types/CustomNode.java: -------------------------------------------------------------------------------- 1 | package nextjournal.markdown.impl.types; 2 | 3 | // rebuild with: 4 | // javac -source 8 -target 1.8 src/nextjournal/markdown/impl/types/CustomNode.java 5 | 6 | public interface CustomNode { 7 | 8 | public Object setLiteral(Object v); 9 | public Object getLiteral(); 10 | public Object nodeType(); 11 | 12 | } 13 | -------------------------------------------------------------------------------- /src/nextjournal/markdown/transform.cljc: -------------------------------------------------------------------------------- 1 | (ns nextjournal.markdown.transform 2 | "transform markdown data as returned by `nextjournal.markdown/parse` into other formats, currently: 3 | * hiccup") 4 | 5 | ;; helpers 6 | (defn guard [pred val] (when (pred val) val)) 7 | (defn ->text [{:as _node :keys [type text content]}] 8 | (or (when (= :softbreak type) " ") 9 | text 10 | (apply str (map ->text content)))) 11 | 12 | (defn hydrate-toc 13 | "Scans doc contents and replaces toc node placeholder with the toc node accumulated during parse." 14 | [{:as doc :keys [toc]}] 15 | (update doc :content (partial into [] (map (fn [{:as node t :type}] (if (= :toc t) toc node)))))) 16 | 17 | (defn table-alignment [{:keys [style]}] 18 | (when (string? style) 19 | (let [[_ alignment] (re-matches #"^text-align:(.+)$" style)] 20 | (when alignment {:text-align alignment})))) 21 | 22 | (defn heading-markup [{l :heading-level}] [(keyword (str "h" (or l 1)))]) 23 | 24 | ;; into-markup 25 | (declare ->hiccup) 26 | (defn into-markup 27 | "Takes a hiccup vector, a context and a node, puts node's `:content` into markup mapping through `->hiccup`." 28 | [mkup ctx {:as node :keys [text content]}] 29 | (cond ;; formula nodes are leaves: have text and no contents 30 | text (conj mkup text) 31 | content (into mkup 32 | (keep (partial ->hiccup (assoc ctx ::parent node))) 33 | content))) 34 | 35 | (defn toc->hiccup [{:as ctx ::keys [parent]} {:as node :keys [attrs content children]}] 36 | (let [id (:id attrs) 37 | toc-item (cond-> [:div] 38 | (seq content) 39 | (conj [:a {:href (str "#" id) #?@(:cljs [:on-click #(when-some [el (.getElementById js/document id)] (.preventDefault %) (.scrollIntoViewIfNeeded el))])} 40 | (-> node heading-markup (into-markup ctx node))]) 41 | (seq children) 42 | (conj (into [:ul] (map (partial ->hiccup (assoc ctx ::parent node))) children)))] 43 | (cond->> toc-item 44 | (= :toc (:type parent)) 45 | (conj [:li.toc-item]) 46 | (not= :toc (:type parent)) 47 | (conj [:div.toc])))) 48 | 49 | (comment 50 | ;; override toc rendering 51 | (-> "# Hello 52 | a paragraph 53 | [[TOC]] 54 | ## Section _nice_ One 55 | ### Section Nested 56 | ## Section **terrible** Idea 57 | " 58 | nextjournal.markdown/parse 59 | ;; :toc 60 | ;; ->hiccup #_ 61 | (->> (->hiccup (assoc default-hiccup-renderers 62 | :toc (fn [ctx {:as node :keys [content children heading-level]}] 63 | (cond-> [:div] 64 | (seq content) (conj [:span.title {:data-level heading-level} (:id node)]) 65 | (seq children) (conj (into [:ul] (map (partial ->hiccup ctx)) children))))))))) 66 | 67 | (def default-hiccup-renderers 68 | {:doc (partial into-markup [:div]) 69 | :heading (fn [ctx {:as node :keys [attrs]}] (-> (heading-markup node) (conj attrs) (into-markup ctx node))) 70 | :paragraph (partial into-markup [:p]) 71 | :plain (fn [ctx {:keys [content]}] 72 | (seq (mapv (partial ->hiccup ctx) content))) 73 | :text (fn [_ {:keys [text]}] text) 74 | :hashtag (fn [_ {:keys [text]}] [:a.tag {:href (str "/tags/" text)} (str "#" text)]) ;; TODO: make it configurable 75 | :blockquote (partial into-markup [:blockquote]) 76 | :ruler (constantly [:hr]) 77 | 78 | ;; by default we always wrap images in paragraph to restore compliance with commonmark 79 | :image (fn [{:as _ctx ::keys [parent]} {:as node :keys [attrs]}] 80 | (let [img-markup [:img (assoc attrs :alt (->text node))]] 81 | (if (= :doc (:type parent)) 82 | [:p img-markup] 83 | img-markup))) 84 | 85 | ;; code 86 | :code (fn [_ {:keys [language] :as m}] 87 | [:pre 88 | [(if language 89 | (keyword (str "code.language-" language)) 90 | :code) 91 | (-> m :content first :text)]]) 92 | 93 | ;; breaks 94 | :softbreak (constantly " ") 95 | :hardbreak (constantly [:br]) 96 | 97 | ;; formulas 98 | :formula (partial into-markup [:span.formula]) 99 | :block-formula (partial into-markup [:figure.formula]) 100 | 101 | ;; lists 102 | :bullet-list (partial into-markup [:ul]) 103 | :list-item (partial into-markup [:li]) 104 | :todo-list (partial into-markup [:ul.contains-task-list]) 105 | :numbered-list (fn [ctx {:as node :keys [attrs]}] (into-markup [:ol attrs] ctx node)) 106 | 107 | :todo-item (fn [ctx {:as node :keys [attrs]}] 108 | (into-markup [:li [:input {:type "checkbox" :checked (:checked attrs)}]] ctx node)) 109 | 110 | ;; tables 111 | :table (partial into-markup [:table]) 112 | :table-head (partial into-markup [:thead]) 113 | :table-body (partial into-markup [:tbody]) 114 | :table-row (partial into-markup [:tr]) 115 | :table-header (fn [ctx {:as node :keys [attrs]}] 116 | (into-markup (let [ta (table-alignment attrs)] (cond-> [:th] ta (conj {:style ta}))) 117 | ctx node)) 118 | :table-data (fn [ctx {:as node :keys [attrs]}] 119 | (into-markup (let [ta (table-alignment attrs)] (cond-> [:td] ta (conj {:style ta}))) 120 | ctx node)) 121 | 122 | ;; footnotes & sidenodes 123 | :sidenote-container (partial into-markup [:div.sidenote-container]) 124 | :sidenote-column (partial into-markup [:div.sidenote-column]) 125 | :sidenote-ref (fn [_ {:keys [ref label]}] [:sup.sidenote-ref {:data-label label} (str (inc ref))]) 126 | :sidenote (fn [ctx {:as node :keys [ref]}] 127 | (into-markup [:span.sidenote [:sup {:style {:margin-right "3px"}} (str (inc ref))]] ctx node)) 128 | 129 | :footnote-ref (fn [_ {:keys [ref label]}] [:sup.sidenote-ref {:data-label label} (str (inc ref))]) 130 | ;; NOTE: there's no default footnote placement (see n.markdown.parser/insert-sidenotes) 131 | :footnote (fn [ctx {:as node :keys [ref label]}] 132 | (into-markup [:div.footnote [:span.footnote-label {:data-ref ref} label]] ctx node)) 133 | 134 | ;; TOC 135 | :toc toc->hiccup 136 | 137 | ;; marks 138 | :em (partial into-markup [:em]) 139 | :strong (partial into-markup [:strong]) 140 | :monospace (partial into-markup [:code]) 141 | :strikethrough (partial into-markup [:s]) 142 | :link (fn [ctx {:as node :keys [attrs]}] (into-markup [:a {:href (:href attrs)}] ctx node)) 143 | :internal-link (fn [_ {:keys [attrs text]}] [:a.internal {:href (:href attrs text)} text]) 144 | 145 | ;; default convenience fn to wrap extra markup around the default one from within the overriding function 146 | :default (fn [ctx {:as node t :type}] (when-some [d (get default-hiccup-renderers t)] (d ctx node))) 147 | }) 148 | 149 | (defn ->hiccup 150 | ([node] (->hiccup default-hiccup-renderers node)) 151 | ([ctx {:as node t :type}] 152 | (let [{:as node :keys [type]} (cond-> node (= :doc t) hydrate-toc)] 153 | (if-some [f (guard fn? (get ctx type))] 154 | (f ctx node) 155 | [:span.message.red 156 | [:strong (str "Unknown type: '" type "'.")] 157 | [:code (pr-str node)]] 158 | )))) 159 | 160 | (comment 161 | (-> "# Hello 162 | 163 | a nice paragraph with sidenotes[^my-note] 164 | 165 | [[TOC]] 166 | 167 | ## Section One 168 | A nice $\\phi$ formula [for _real_ **strong** fun](/path/to) soft 169 | break 170 | 171 | - [ ] one **ahoi** list 172 | - two `nice` and ~~three~~ 173 | - [x] checked 174 | 175 | > that said who? 176 | 177 | --- 178 | 179 | ## Section Two 180 | 181 | ### Tables 182 | 183 | | Syntax | JVM | JavaScript | 184 | |--------|-------------------------:|:--------------------------------| 185 | | foo | Loca _lDate_ ahoiii | goog.date.Date | 186 | | bar | java.time.LocalTime | some [kinky](link/to/something) | 187 | | bag | java.time.LocalDateTime | $\\phi$ | 188 | 189 | ### Images 190 | 191 | ![Some **nice** caption](https://www.example.com/images/dinosaur.jpg) 192 | 193 | and here as inline ![alt](foo/bar) image 194 | 195 | ```clj 196 | (some nice clojure) 197 | ``` 198 | 199 | [^my-note]: Here can discuss at length" 200 | nextjournal.markdown/parse 201 | ->hiccup 202 | ) 203 | 204 | ;; override defaults 205 | (->> "## Title 206 | par one 207 | 208 | par two" 209 | nextjournal.markdown/parse 210 | (->hiccup (assoc default-hiccup-renderers 211 | :heading (partial into-markup [:h1.at-all-levels]) 212 | ;; wrap something around the default 213 | :paragraph (fn [{:as ctx d :default} node] [:div.p-container (d ctx node)])))) 214 | ) 215 | 216 | (comment 217 | (require '[hiccup2.core :as h]) 218 | 219 | (-> " 220 | * one 221 | * two" 222 | nextjournal.markdown/parse 223 | ->hiccup 224 | h/html str 225 | ) 226 | 227 | (-> " 228 | * one 229 | 230 | * two" 231 | nextjournal.markdown/parse 232 | ->hiccup 233 | h/html str 234 | ) 235 | 236 | (-> "# foo 237 | - one \\ 238 | broken 239 | - two" 240 | nextjournal.markdown/parse 241 | ->hiccup 242 | h/html str 243 | ) 244 | 245 | ;; https://spec.commonmark.org/0.30/#example-319 246 | (= (str "
" 247 | "
  • a
    • b

      c

  • d
" 248 | "
") 249 | (->> "- a\n - b\n\n c\n- d" 250 | nextjournal.markdown/parse 251 | (->hiccup default-hiccup-renderers) 252 | h/html str))) 253 | -------------------------------------------------------------------------------- /src/nextjournal/markdown/utils.cljc: -------------------------------------------------------------------------------- 1 | ;; # Markdown parsing shared utils 2 | (ns nextjournal.markdown.utils 3 | (:require [clojure.string :as str] 4 | [clojure.zip :as z] 5 | [nextjournal.markdown.utils.emoji :as emoji] 6 | [nextjournal.markdown.transform :as md.transform])) 7 | 8 | #?(:clj (defn re-groups* [m] (let [g (re-groups m)] (cond-> g (not (vector? g)) vector)))) 9 | (defn re-idx-seq 10 | "Takes a regex and a string, returns a seq of triplets comprised of match groups followed by indices delimiting each match." 11 | [re text] 12 | #?(:clj (let [m (re-matcher re text)] 13 | (take-while some? (repeatedly #(when (.find m) [(re-groups* m) (.start m) (.end m)])))) 14 | :cljs (let [rex (js/RegExp. (.-source re) "g")] 15 | (take-while some? (repeatedly #(when-some [m (.exec rex text)] [(vec m) (.-index m) (.-lastIndex rex)])))))) 16 | 17 | #_ (re-idx-seq #"\{\{([^{]+)\}\}" "foo {{hello}} bar") 18 | #_ (re-idx-seq #"\{\{[^{]+\}\}" "foo {{hello}} bar {{what}} the") 19 | 20 | ;; ## Context and Nodes 21 | 22 | (defn split-by-emoji [s] 23 | (let [[match start end] (first (re-idx-seq emoji/regex s))] 24 | (if match 25 | [(subs s start end) (str/trim (subs s end))] 26 | [nil s]))) 27 | 28 | #_(split-by-emoji " Stop") 29 | #_(split-by-emoji "🤚🏽 Stop") 30 | #_(split-by-emoji "🤚🏽🤚 Stop") 31 | #_(split-by-emoji "🤚🏽Stop") 32 | #_(split-by-emoji "🤚🏽 Stop") 33 | #_(split-by-emoji "😀 Stop") 34 | #_(split-by-emoji "⚛️ Stop") 35 | #_(split-by-emoji "⚛ Stop") 36 | #_(split-by-emoji "⬇ Stop") 37 | #_(split-by-emoji "Should not 🙁️ Split") 38 | #_(text->id+emoji "Hello There") 39 | #_(text->id+emoji "Hello_There") 40 | #_(text->id+emoji "👩‍🔬 Quantum Physics") 41 | 42 | (defn text->id+emoji [text] 43 | (when (string? text) 44 | (let [[emoji text'] (split-by-emoji (str/trim text))] 45 | (cond-> {:id (apply str (map (comp str/lower-case (fn [c] (case c (\space \_) \- c))) text'))} 46 | emoji (assoc :emoji emoji))))) 47 | 48 | ;; TODO: move this to n.markdown ns 49 | (def empty-doc 50 | {:type :doc 51 | :content [] 52 | :toc {:type :toc} 53 | :footnotes [] 54 | :text-tokenizers [] 55 | ;; Node -> {id : String, emoji String}, dissoc from context to opt-out of ids 56 | :text->id+emoji-fn (comp text->id+emoji md.transform/->text) 57 | 58 | ;; private 59 | ;; Id -> Nat, to disambiguate ids for nodes with the same textual content 60 | :nextjournal.markdown.impl/id->index {} 61 | ;; allow to swap between :doc or :footnotes 62 | :nextjournal.markdown.impl/root :doc}) 63 | 64 | (defn current-loc [{:as ctx :nextjournal.markdown.impl/keys [root]}] (get ctx root)) 65 | (defn update-current-loc [{:as ctx :nextjournal.markdown.impl/keys [root]} f & args] 66 | (assert root (str "Missing root: '" (keys ctx) "'")) 67 | (apply update ctx root f args)) 68 | 69 | (defn text-node [s] {:type :text :text s}) 70 | (defn formula [text] {:type :formula :text text}) 71 | (defn block-formula [text] {:type :block-formula :text text}) 72 | 73 | (defn node 74 | [type content attrs top-level] 75 | (cond-> {:type type :content content} 76 | (seq attrs) (assoc :attrs attrs) 77 | (seq top-level) (merge top-level))) 78 | 79 | ;; ## 🤐 Zipper Utils 80 | 81 | (defn ->zip [doc] 82 | (z/zipper (every-pred map? :type) :content 83 | (fn [node cs] (assoc node :content (vec cs))) 84 | doc)) 85 | (def zip? (comp some? :zip/children meta)) 86 | (defn zdepth [loc] (-> loc second :pnodes count)) 87 | 88 | #_(zip? (->zip {:type :doc :content []})) 89 | #_(->zip {:type :doc :content []}) 90 | #_(-> {:type :doc :content []} ->zip 91 | (z/append-child {:type :heading}) 92 | z/down zdepth) 93 | 94 | (defn zopen-node [loc node] 95 | (-> loc (z/append-child node) z/down z/rightmost)) 96 | 97 | (defn zpath 98 | "Given a document zipper location `loc` returns a vector corresponding to the path of node at `loc` 99 | suitable for get-in from root. That is `(= (z/node loc) (get-in (z/root loc) (zpath loc)`" 100 | [loc] 101 | (loop [coords (second loc) idxs ()] 102 | (if-some [idx (when (and coords (:l coords)) (count (:l coords)))] 103 | (recur (:ppath coords) (conj idxs idx)) 104 | (vec (when (seq idxs) 105 | (cons :content (interpose :content idxs))))))) 106 | 107 | (comment 108 | (def loc 109 | (-> {:type :doc} ->zip 110 | (z/append-child {:type :paragraph}) 111 | (z/append-child {:type :paragraph}) 112 | z/down z/rightmost 113 | (z/append-child {:type :text :text "ahoi"}) 114 | z/down)) 115 | (-> loc z/node) 116 | (-> loc second) 117 | ) 118 | 119 | ;; TODO: rewrite in terms of zippers 120 | (def ppop (comp pop pop)) 121 | (defn inc-last [path] (update path (dec (count path)) inc)) 122 | 123 | ;; ## 🗂️ ToC Handling 124 | ;; region toc: 125 | ;; toc nodes are heading nodes but with `:type` `:toc` and an extra branching 126 | ;; on the key `:children` representing the sub-sections of the document 127 | 128 | (defn into-toc [toc {:as toc-item :keys [heading-level]}] 129 | (loop [toc toc l heading-level toc-path [:children]] 130 | ;; `toc-path` is `[:children i₁ :children i₂ ... :children]` 131 | (let [type-path (assoc toc-path (dec (count toc-path)) :type)] 132 | (cond 133 | ;; insert intermediate default empty :content collections for the final update-in (which defaults to maps otherwise) 134 | (not (get-in toc toc-path)) 135 | (recur (assoc-in toc toc-path []) l toc-path) 136 | 137 | ;; fill in toc types for non-contiguous jumps like h1 -> h3 138 | (not (get-in toc type-path)) 139 | (recur (assoc-in toc type-path :toc) l toc-path) 140 | 141 | (= 1 l) 142 | (update-in toc toc-path (fnil conj []) toc-item) 143 | 144 | :else 145 | (recur toc 146 | (dec l) 147 | (conj toc-path 148 | (max 0 (dec (count (get-in toc toc-path)))) ;; select last child at level if it exists 149 | :children)))))) 150 | 151 | (defn add-to-toc [doc {:as h :keys [heading-level]}] 152 | (cond-> doc (pos-int? heading-level) (update :toc into-toc (assoc h :type :toc)))) 153 | 154 | (defn set-title-when-missing [{:as doc :keys [title]} heading] 155 | (cond-> doc (nil? title) (assoc :title (md.transform/->text heading)))) 156 | 157 | (defn add-title+toc 158 | "Computes and adds a :title and a :toc to the document-like structure `doc` which might have not been constructed by means of `parse`." 159 | [{:as doc :keys [content]}] 160 | (let [rf (fn [doc heading] (-> doc (add-to-toc heading) (set-title-when-missing heading))) 161 | xf (filter (comp #{:heading} :type))] 162 | (reduce (xf rf) (assoc doc :toc {:type :toc}) content))) 163 | 164 | (defn handle-close-heading [ctx] 165 | (let [{:keys [text->id+emoji-fn] :nextjournal.markdown.impl/keys [id->index]} ctx 166 | heading-loc (current-loc ctx) 167 | heading (z/node heading-loc) 168 | {:keys [id emoji]} (when (ifn? text->id+emoji-fn) 169 | (text->id+emoji-fn heading)) 170 | existing-idx (when id (get id->index id)) 171 | heading' (cond-> heading 172 | id (assoc-in [:attrs :id] (cond-> id existing-idx (str "-" (inc existing-idx)))) 173 | emoji (assoc :emoji emoji))] 174 | (-> ctx 175 | (update :nextjournal.markdown.impl/id->index update id (fnil inc 0)) 176 | (cond-> (= 1 (zdepth heading-loc)) 177 | (-> (add-to-toc (assoc heading' :path (zpath heading-loc))) 178 | (set-title-when-missing heading'))) 179 | (update-current-loc (fn [loc] (-> loc (z/replace heading') z/up)))))) 180 | 181 | (comment 182 | (-> {:type :toc} 183 | ;;(into-toc {:heading-level 3 :title "Foo"}) 184 | ;;(into-toc {:heading-level 2 :title "Section 1"}) 185 | (into-toc {:heading-level 1 :title "Title" :type :toc}) 186 | (into-toc {:heading-level 4 :title "Section 2" :type :toc}) 187 | ;;(into-toc {:heading-level 4 :title "Section 2.1"}) 188 | ;;(into-toc {:heading-level 2 :title "Section 3"}) 189 | ) 190 | 191 | (-> "# Top _Title_ 192 | 193 | par 194 | 195 | ### Three 196 | 197 | ## Two 198 | 199 | par 200 | - and a nested 201 | - ### Heading not included 202 | 203 | foo 204 | 205 | ## Two Again 206 | 207 | par 208 | 209 | # One Again 210 | 211 | [[TOC]] 212 | 213 | #### Four 214 | 215 | end" 216 | nextjournal.markdown/parse 217 | :toc 218 | )) 219 | ;; endregion 220 | 221 | ;; ## Parsing Extensibility 222 | ;; 223 | ;; normalize-tokenizer :: {:regex, :doc-handler} | {:tokenizer-fn, :handler} -> Tokenizer 224 | ;; Tokenizer :: {:tokenizer-fn :: TokenizerFn, :doc-handler :: DocHandler} 225 | ;; 226 | ;; Match :: Any 227 | ;; Handler :: Match -> Node 228 | ;; IndexedMatch :: (Match, Int, Int) 229 | ;; TokenizerFn :: String -> [IndexedMatch] 230 | ;; DocHandler :: Doc -> {:match :: Match} -> Doc 231 | 232 | (defn tokenize-text-node [{:as tkz :keys [tokenizer-fn pred doc-handler]} ctx {:as node :keys [text]}] 233 | ;; TokenizerFn -> HNode -> [HNode] 234 | (assert (and (fn? tokenizer-fn) 235 | (fn? doc-handler) 236 | (fn? pred) 237 | (string? text)) 238 | {:text text :tokenizer tkz}) 239 | (let [idx-seq (when (pred (current-loc ctx)) (tokenizer-fn text))] 240 | (if (seq idx-seq) 241 | (let [text-hnode (fn [s] (assoc (text-node s) :doc-handler z/append-child)) 242 | {:keys [nodes remaining-text]} 243 | (reduce (fn [{:as acc :keys [remaining-text]} [match start end]] 244 | (-> acc 245 | (update :remaining-text subs 0 start) 246 | (cond-> 247 | (< end (count remaining-text)) 248 | (update :nodes conj (text-hnode (subs remaining-text end)))) 249 | (update :nodes conj {:doc-handler doc-handler 250 | :match match :text text 251 | :start start :end end}))) 252 | {:remaining-text text :nodes ()} 253 | (reverse idx-seq))] 254 | (cond-> nodes 255 | (seq remaining-text) 256 | (conj (text-hnode remaining-text)))) 257 | [node]))) 258 | 259 | (defn handle-text-token [{:as ctx :keys [text-tokenizers]} text] 260 | (reduce (fn [ctx {:as node :keys [doc-handler]}] (update-current-loc ctx doc-handler (dissoc node :doc-handler))) 261 | ctx 262 | (reduce (fn [nodes tokenizer] 263 | (mapcat (fn [{:as node :keys [type]}] 264 | (if (= :text type) (tokenize-text-node tokenizer ctx node) [node])) 265 | nodes)) 266 | [{:type :text :text text :doc-handler z/append-child}] 267 | text-tokenizers))) 268 | 269 | ;; clj 270 | #_(handle-text-token (->zip {:type :doc :content []}) "some-text") 271 | 272 | ;; tokenizers 273 | (defn normalize-tokenizer 274 | "Normalizes a map of regex and handler into a Tokenizer" 275 | [{:as tokenizer :keys [doc-handler pred handler regex tokenizer-fn]}] 276 | (assert (and (or doc-handler handler) (or regex tokenizer-fn))) 277 | (cond-> tokenizer 278 | (not doc-handler) (assoc :doc-handler (fn [doc {:keys [match]}] (z/append-child doc (handler match)))) 279 | (not tokenizer-fn) (assoc :tokenizer-fn (partial re-idx-seq regex)) 280 | (not pred) (assoc :pred (constantly true)))) 281 | 282 | (defn current-ancestor-nodes [loc] 283 | (loop [loc loc ancestors []] 284 | (let [parent (z/up loc)] 285 | (if (and parent (not= :doc (:type (z/node parent)))) 286 | (recur parent (conj ancestors (z/node parent))) 287 | ancestors)))) 288 | 289 | (def hashtag-tokenizer 290 | {:regex #"(^|\B)#[\w-]+" 291 | :pred #(every? (complement #{:link}) (map :type (current-ancestor-nodes %))) 292 | :handler (fn [match] {:type :hashtag :text (subs (match 0) 1)})}) 293 | 294 | (def internal-link-tokenizer 295 | {:regex #"\[\[([^\]]+)\]\]" 296 | :pred #(every? (complement #{:link}) (map :type (current-ancestor-nodes %))) 297 | :handler (fn [match] {:type :internal-link :text (match 1)})}) 298 | 299 | #_(normalize-tokenizer internal-link-tokenizer) 300 | #_(normalize-tokenizer hashtag-tokenizer) 301 | 302 | ;; ## 🤺 Fence Info 303 | ;; `parse-fence-info` ingests nextjournal, GFM, Pandoc and RMarkdown fenced code block info (any text following the leading 3 backticks) and returns a map 304 | ;; 305 | ;; _nextjournal_ / _GFM_ 306 | ;; 307 | ;; ```python id=2e3541da-0735-4b7f-a12f-4fb1bfcb6138 308 | ;; python code 309 | ;; ``` 310 | ;; 311 | ;; _Pandoc_ 312 | ;; 313 | ;; ```{#pandoc-id .languge .extra-class key=Val} 314 | ;; code in language 315 | ;; ``` 316 | ;; 317 | ;; _Rmd_ 318 | ;; 319 | ;; ```{r cars, echo=FALSE} 320 | ;; R code 321 | ;; ``` 322 | ;; 323 | ;; See also: 324 | ;; - https://github.github.com/gfm/#info-string 325 | ;; - https://pandoc.org/MANUAL.html#fenced-code-blocks 326 | ;; - https://rstudio.com/wp-content/uploads/2016/03/rmarkdown-cheatsheet-2.0.pdf" 327 | 328 | (defn parse-fence-info [info-str] 329 | (try 330 | (when (and (string? info-str) (seq info-str)) 331 | (let [tokens (-> info-str 332 | str/trim 333 | (str/replace #"[\{\}\,]" "") ;; remove Pandoc/Rmarkdown brackets and commas 334 | (str/replace "." "") ;; remove dots 335 | (str/split #" "))] ;; split by spaces 336 | (reduce 337 | (fn [{:as info-map :keys [language]} token] 338 | (let [[_ k v] (re-matches #"^([^=]+)=([^=]+)$" token)] 339 | (cond 340 | (str/starts-with? token "#") (assoc info-map :id (str/replace token #"^#" "")) ;; pandoc #id 341 | (and k v) (assoc info-map (keyword k) v) 342 | (not language) (assoc info-map :language token) ;; language is the first simple token which is not a pandoc's id 343 | :else (assoc info-map (keyword token) true)))) 344 | {} 345 | tokens))) 346 | (catch #?(:clj Throwable :cljs :default) _ {}))) 347 | 348 | (comment 349 | (parse-fence-info "python runtime-id=5f77e475-6178-47a3-8437-45c9c34d57ff") 350 | (parse-fence-info "{#some-id .lang foo=nex}") 351 | (parse-fence-info "#id clojure") 352 | (parse-fence-info "clojure #id") 353 | (parse-fence-info "clojure") 354 | (parse-fence-info "{r cars, echo=FALSE}")) 355 | 356 | ;; ## Footnote handling 357 | 358 | (defn node-with-sidenote-refs [p-node] 359 | (loop [l (->zip p-node) refs []] 360 | (if (z/end? l) 361 | (when (seq refs) 362 | {:node (z/root l) :refs refs}) 363 | (let [{:keys [type ref]} (z/node l)] 364 | (if (= :footnote-ref type) 365 | (recur (z/next (z/edit l assoc :type :sidenote-ref)) (conj refs ref)) 366 | (recur (z/next l) refs)))))) 367 | 368 | (defn footnote->sidenote [{:keys [ref label content]}] 369 | ;; this assumes the footnote container is a paragraph, won't work for lists 370 | (node :sidenote (-> content first :content) nil (cond-> {:ref ref} label (assoc :label label)))) 371 | 372 | (defn insert-sidenote-containers 373 | "Handles footnotes as sidenotes. 374 | 375 | Takes and returns a parsed document. When the document has footnotes, wraps every top-level block which contains footnote references 376 | with a `:footnote-container` node, into each of such nodes, adds a `:sidenote-column` node containing a `:sidenote` node for each found ref. 377 | Renames type `:footnote-ref` to `:sidenote-ref." 378 | [{:as doc :keys [footnotes]}] 379 | (if-not (seq footnotes) 380 | doc 381 | (let [root (->zip doc)] 382 | (loop [loc (z/down root) parent root] 383 | (cond 384 | (nil? loc) 385 | (-> parent z/node (assoc :sidenotes? true)) 386 | (contains? #{:plain :paragraph :blockquote :numbered-list :bullet-list :todo-list :heading :table} 387 | (:type (z/node loc))) 388 | (if-some [{:keys [node refs]} (node-with-sidenote-refs (z/node loc))] 389 | (let [new-loc (-> loc (z/replace {:type :sidenote-container :content []}) 390 | (z/append-child node) 391 | (z/append-child {:type :sidenote-column 392 | ;; TODO: broken in the old implementation 393 | ;; should be :content (mapv #(footnote->sidenote (get footnotes %)) (distinct refs))}))] 394 | :content (mapv #(footnote->sidenote (get footnotes %)) refs)}))] 395 | (recur (z/right new-loc) (z/up new-loc))) 396 | (recur (z/right loc) parent)) 397 | :else 398 | (recur (z/right loc) parent)))))) 399 | -------------------------------------------------------------------------------- /src/nextjournal/markdown/utils/emoji.cljc: -------------------------------------------------------------------------------- 1 | (ns nextjournal.markdown.utils.emoji 2 | "https://github.com/mathiasbynens/emoji-test-regex-pattern 3 | MIT License 4 | Copyright Mathias Bynens ") 5 | 6 | ;; https://raw.githubusercontent.com/mathiasbynens/emoji-test-regex-pattern/f798c38987917b48e26d490590ba4f5481eb6e93/dist/latest/java.txt 7 | (def regex-java 8 | "^(?:[#*0-9]\\x{FE0F}?\\x{20E3}|[\\xA9\\xAE\\x{203C}\\x{2049}\\x{2122}\\x{2139}\\x{2194}-\\x{2199}\\x{21A9}\\x{21AA}\\x{231A}\\x{231B}\\x{2328}\\x{23CF}\\x{23ED}-\\x{23EF}\\x{23F1}\\x{23F2}\\x{23F8}-\\x{23FA}\\x{24C2}\\x{25AA}\\x{25AB}\\x{25B6}\\x{25C0}\\x{25FB}\\x{25FC}\\x{25FE}\\x{2600}-\\x{2604}\\x{260E}\\x{2611}\\x{2614}\\x{2615}\\x{2618}\\x{2620}\\x{2622}\\x{2623}\\x{2626}\\x{262A}\\x{262E}\\x{262F}\\x{2638}-\\x{263A}\\x{2640}\\x{2642}\\x{2648}-\\x{2653}\\x{265F}\\x{2660}\\x{2663}\\x{2665}\\x{2666}\\x{2668}\\x{267B}\\x{267E}\\x{267F}\\x{2692}\\x{2694}-\\x{2697}\\x{2699}\\x{269B}\\x{269C}\\x{26A0}\\x{26A7}\\x{26AA}\\x{26B0}\\x{26B1}\\x{26BD}\\x{26BE}\\x{26C4}\\x{26C8}\\x{26CF}\\x{26D1}\\x{26D3}\\x{26E9}\\x{26F0}-\\x{26F5}\\x{26F7}\\x{26F8}\\x{26FA}\\x{2702}\\x{2708}\\x{2709}\\x{270F}\\x{2712}\\x{2714}\\x{2716}\\x{271D}\\x{2721}\\x{2733}\\x{2734}\\x{2744}\\x{2747}\\x{2757}\\x{2763}\\x{27A1}\\x{2934}\\x{2935}\\x{2B05}-\\x{2B07}\\x{2B1B}\\x{2B1C}\\x{2B55}\\x{3030}\\x{303D}\\x{3297}\\x{3299}\\x{1F004}\\x{1F170}\\x{1F171}\\x{1F17E}\\x{1F17F}\\x{1F202}\\x{1F237}\\x{1F321}\\x{1F324}-\\x{1F32C}\\x{1F336}\\x{1F37D}\\x{1F396}\\x{1F397}\\x{1F399}-\\x{1F39B}\\x{1F39E}\\x{1F39F}\\x{1F3CD}\\x{1F3CE}\\x{1F3D4}-\\x{1F3DF}\\x{1F3F5}\\x{1F3F7}\\x{1F43F}\\x{1F4FD}\\x{1F549}\\x{1F54A}\\x{1F56F}\\x{1F570}\\x{1F573}\\x{1F576}-\\x{1F579}\\x{1F587}\\x{1F58A}-\\x{1F58D}\\x{1F5A5}\\x{1F5A8}\\x{1F5B1}\\x{1F5B2}\\x{1F5BC}\\x{1F5C2}-\\x{1F5C4}\\x{1F5D1}-\\x{1F5D3}\\x{1F5DC}-\\x{1F5DE}\\x{1F5E1}\\x{1F5E3}\\x{1F5E8}\\x{1F5EF}\\x{1F5F3}\\x{1F5FA}\\x{1F6CB}\\x{1F6CD}-\\x{1F6CF}\\x{1F6E0}-\\x{1F6E5}\\x{1F6E9}\\x{1F6F0}\\x{1F6F3}]\\x{FE0F}?|[\\x{261D}\\x{270C}\\x{270D}\\x{1F574}\\x{1F590}][\\x{FE0F}\\x{1F3FB}-\\x{1F3FF}]?|[\\x{26F9}\\x{1F3CB}\\x{1F3CC}\\x{1F575}][\\x{FE0F}\\x{1F3FB}-\\x{1F3FF}]?(?:\\x{200D}[\\x{2640}\\x{2642}]\\x{FE0F}?)?|[\\x{270A}\\x{270B}\\x{1F385}\\x{1F3C2}\\x{1F3C7}\\x{1F442}\\x{1F443}\\x{1F446}-\\x{1F450}\\x{1F466}\\x{1F467}\\x{1F46B}-\\x{1F46D}\\x{1F472}\\x{1F474}-\\x{1F476}\\x{1F478}\\x{1F47C}\\x{1F483}\\x{1F485}\\x{1F48F}\\x{1F491}\\x{1F4AA}\\x{1F57A}\\x{1F595}\\x{1F596}\\x{1F64C}\\x{1F64F}\\x{1F6C0}\\x{1F6CC}\\x{1F90C}\\x{1F90F}\\x{1F918}-\\x{1F91F}\\x{1F930}-\\x{1F934}\\x{1F936}\\x{1F977}\\x{1F9B5}\\x{1F9B6}\\x{1F9BB}\\x{1F9D2}\\x{1F9D3}\\x{1F9D5}\\x{1FAC3}-\\x{1FAC5}\\x{1FAF0}\\x{1FAF2}-\\x{1FAF8}][\\x{1F3FB}-\\x{1F3FF}]?|[\\x{1F3C3}\\x{1F3C4}\\x{1F3CA}\\x{1F46E}\\x{1F470}\\x{1F471}\\x{1F473}\\x{1F477}\\x{1F481}\\x{1F482}\\x{1F486}\\x{1F487}\\x{1F645}-\\x{1F647}\\x{1F64B}\\x{1F64D}\\x{1F64E}\\x{1F6A3}\\x{1F6B4}-\\x{1F6B6}\\x{1F926}\\x{1F935}\\x{1F937}-\\x{1F939}\\x{1F93D}\\x{1F93E}\\x{1F9B8}\\x{1F9B9}\\x{1F9CD}-\\x{1F9CF}\\x{1F9D4}\\x{1F9D6}-\\x{1F9DD}][\\x{1F3FB}-\\x{1F3FF}]?(?:\\x{200D}[\\x{2640}\\x{2642}]\\x{FE0F}?)?|[\\x{1F408}\\x{1F426}](?:\\x{200D}\\x{2B1B})?|[\\x{1F46F}\\x{1F9DE}\\x{1F9DF}](?:\\x{200D}[\\x{2640}\\x{2642}]\\x{FE0F}?)?|[\\x{23E9}-\\x{23EC}\\x{23F0}\\x{23F3}\\x{25FD}\\x{2693}\\x{26A1}\\x{26AB}\\x{26C5}\\x{26CE}\\x{26D4}\\x{26EA}\\x{26FD}\\x{2705}\\x{2728}\\x{274C}\\x{274E}\\x{2753}-\\x{2755}\\x{2795}-\\x{2797}\\x{27B0}\\x{27BF}\\x{2B50}\\x{1F0CF}\\x{1F18E}\\x{1F191}-\\x{1F19A}\\x{1F201}\\x{1F21A}\\x{1F22F}\\x{1F232}-\\x{1F236}\\x{1F238}-\\x{1F23A}\\x{1F250}\\x{1F251}\\x{1F300}-\\x{1F320}\\x{1F32D}-\\x{1F335}\\x{1F337}-\\x{1F37C}\\x{1F37E}-\\x{1F384}\\x{1F386}-\\x{1F393}\\x{1F3A0}-\\x{1F3C1}\\x{1F3C5}\\x{1F3C6}\\x{1F3C8}\\x{1F3C9}\\x{1F3CF}-\\x{1F3D3}\\x{1F3E0}-\\x{1F3F0}\\x{1F3F8}-\\x{1F407}\\x{1F409}-\\x{1F414}\\x{1F416}-\\x{1F425}\\x{1F427}-\\x{1F43A}\\x{1F43C}-\\x{1F43E}\\x{1F440}\\x{1F444}\\x{1F445}\\x{1F451}-\\x{1F465}\\x{1F46A}\\x{1F479}-\\x{1F47B}\\x{1F47D}-\\x{1F480}\\x{1F484}\\x{1F488}-\\x{1F48E}\\x{1F490}\\x{1F492}-\\x{1F4A9}\\x{1F4AB}-\\x{1F4FC}\\x{1F4FF}-\\x{1F53D}\\x{1F54B}-\\x{1F54E}\\x{1F550}-\\x{1F567}\\x{1F5A4}\\x{1F5FB}-\\x{1F62D}\\x{1F62F}-\\x{1F634}\\x{1F637}-\\x{1F644}\\x{1F648}-\\x{1F64A}\\x{1F680}-\\x{1F6A2}\\x{1F6A4}-\\x{1F6B3}\\x{1F6B7}-\\x{1F6BF}\\x{1F6C1}-\\x{1F6C5}\\x{1F6D0}-\\x{1F6D2}\\x{1F6D5}-\\x{1F6D7}\\x{1F6DC}-\\x{1F6DF}\\x{1F6EB}\\x{1F6EC}\\x{1F6F4}-\\x{1F6FC}\\x{1F7E0}-\\x{1F7EB}\\x{1F7F0}\\x{1F90D}\\x{1F90E}\\x{1F910}-\\x{1F917}\\x{1F920}-\\x{1F925}\\x{1F927}-\\x{1F92F}\\x{1F93A}\\x{1F93F}-\\x{1F945}\\x{1F947}-\\x{1F976}\\x{1F978}-\\x{1F9B4}\\x{1F9B7}\\x{1F9BA}\\x{1F9BC}-\\x{1F9CC}\\x{1F9D0}\\x{1F9E0}-\\x{1F9FF}\\x{1FA70}-\\x{1FA7C}\\x{1FA80}-\\x{1FA88}\\x{1FA90}-\\x{1FABD}\\x{1FABF}-\\x{1FAC2}\\x{1FACE}-\\x{1FADB}\\x{1FAE0}-\\x{1FAE8}]|\\x{2764}\\x{FE0F}?(?:\\x{200D}[\\x{1F525}\\x{1FA79}])?|\\x{1F1E6}[\\x{1F1E8}-\\x{1F1EC}\\x{1F1EE}\\x{1F1F1}\\x{1F1F2}\\x{1F1F4}\\x{1F1F6}-\\x{1F1FA}\\x{1F1FC}\\x{1F1FD}\\x{1F1FF}]|\\x{1F1E7}[\\x{1F1E6}\\x{1F1E7}\\x{1F1E9}-\\x{1F1EF}\\x{1F1F1}-\\x{1F1F4}\\x{1F1F6}-\\x{1F1F9}\\x{1F1FB}\\x{1F1FC}\\x{1F1FE}\\x{1F1FF}]|\\x{1F1E8}[\\x{1F1E6}\\x{1F1E8}\\x{1F1E9}\\x{1F1EB}-\\x{1F1EE}\\x{1F1F0}-\\x{1F1F5}\\x{1F1F7}\\x{1F1FA}-\\x{1F1FF}]|\\x{1F1E9}[\\x{1F1EA}\\x{1F1EC}\\x{1F1EF}\\x{1F1F0}\\x{1F1F2}\\x{1F1F4}\\x{1F1FF}]|\\x{1F1EA}[\\x{1F1E6}\\x{1F1E8}\\x{1F1EA}\\x{1F1EC}\\x{1F1ED}\\x{1F1F7}-\\x{1F1FA}]|\\x{1F1EB}[\\x{1F1EE}-\\x{1F1F0}\\x{1F1F2}\\x{1F1F4}\\x{1F1F7}]|\\x{1F1EC}[\\x{1F1E6}\\x{1F1E7}\\x{1F1E9}-\\x{1F1EE}\\x{1F1F1}-\\x{1F1F3}\\x{1F1F5}-\\x{1F1FA}\\x{1F1FC}\\x{1F1FE}]|\\x{1F1ED}[\\x{1F1F0}\\x{1F1F2}\\x{1F1F3}\\x{1F1F7}\\x{1F1F9}\\x{1F1FA}]|\\x{1F1EE}[\\x{1F1E8}-\\x{1F1EA}\\x{1F1F1}-\\x{1F1F4}\\x{1F1F6}-\\x{1F1F9}]|\\x{1F1EF}[\\x{1F1EA}\\x{1F1F2}\\x{1F1F4}\\x{1F1F5}]|\\x{1F1F0}[\\x{1F1EA}\\x{1F1EC}-\\x{1F1EE}\\x{1F1F2}\\x{1F1F3}\\x{1F1F5}\\x{1F1F7}\\x{1F1FC}\\x{1F1FE}\\x{1F1FF}]|\\x{1F1F1}[\\x{1F1E6}-\\x{1F1E8}\\x{1F1EE}\\x{1F1F0}\\x{1F1F7}-\\x{1F1FB}\\x{1F1FE}]|\\x{1F1F2}[\\x{1F1E6}\\x{1F1E8}-\\x{1F1ED}\\x{1F1F0}-\\x{1F1FF}]|\\x{1F1F3}[\\x{1F1E6}\\x{1F1E8}\\x{1F1EA}-\\x{1F1EC}\\x{1F1EE}\\x{1F1F1}\\x{1F1F4}\\x{1F1F5}\\x{1F1F7}\\x{1F1FA}\\x{1F1FF}]|\\x{1F1F4}\\x{1F1F2}|\\x{1F1F5}[\\x{1F1E6}\\x{1F1EA}-\\x{1F1ED}\\x{1F1F0}-\\x{1F1F3}\\x{1F1F7}-\\x{1F1F9}\\x{1F1FC}\\x{1F1FE}]|\\x{1F1F6}\\x{1F1E6}|\\x{1F1F7}[\\x{1F1EA}\\x{1F1F4}\\x{1F1F8}\\x{1F1FA}\\x{1F1FC}]|\\x{1F1F8}[\\x{1F1E6}-\\x{1F1EA}\\x{1F1EC}-\\x{1F1F4}\\x{1F1F7}-\\x{1F1F9}\\x{1F1FB}\\x{1F1FD}-\\x{1F1FF}]|\\x{1F1F9}[\\x{1F1E6}\\x{1F1E8}\\x{1F1E9}\\x{1F1EB}-\\x{1F1ED}\\x{1F1EF}-\\x{1F1F4}\\x{1F1F7}\\x{1F1F9}\\x{1F1FB}\\x{1F1FC}\\x{1F1FF}]|\\x{1F1FA}[\\x{1F1E6}\\x{1F1EC}\\x{1F1F2}\\x{1F1F3}\\x{1F1F8}\\x{1F1FE}\\x{1F1FF}]|\\x{1F1FB}[\\x{1F1E6}\\x{1F1E8}\\x{1F1EA}\\x{1F1EC}\\x{1F1EE}\\x{1F1F3}\\x{1F1FA}]|\\x{1F1FC}[\\x{1F1EB}\\x{1F1F8}]|\\x{1F1FD}\\x{1F1F0}|\\x{1F1FE}[\\x{1F1EA}\\x{1F1F9}]|\\x{1F1FF}[\\x{1F1E6}\\x{1F1F2}\\x{1F1FC}]|\\x{1F3F3}\\x{FE0F}?(?:\\x{200D}(?:\\x{26A7}\\x{FE0F}?|\\x{1F308}))?|\\x{1F3F4}(?:\\x{200D}\\x{2620}\\x{FE0F}?|\\x{E0067}\\x{E0062}(?:\\x{E0065}\\x{E006E}\\x{E0067}|\\x{E0073}\\x{E0063}\\x{E0074}|\\x{E0077}\\x{E006C}\\x{E0073})\\x{E007F})?|\\x{1F415}(?:\\x{200D}\\x{1F9BA})?|\\x{1F43B}(?:\\x{200D}\\x{2744}\\x{FE0F}?)?|\\x{1F441}\\x{FE0F}?(?:\\x{200D}\\x{1F5E8}\\x{FE0F}?)?|\\x{1F468}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F468}\\x{1F469}]\\x{200D}(?:\\x{1F466}(?:\\x{200D}\\x{1F466})?|\\x{1F467}(?:\\x{200D}[\\x{1F466}\\x{1F467}])?)|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F468}|\\x{1F466}(?:\\x{200D}\\x{1F466})?|\\x{1F467}(?:\\x{200D}[\\x{1F466}\\x{1F467}])?)|\\x{1F3FB}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F468}[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F468}[\\x{1F3FC}-\\x{1F3FF}]))?|\\x{1F3FC}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F468}[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F468}[\\x{1F3FB}\\x{1F3FD}-\\x{1F3FF}]))?|\\x{1F3FD}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F468}[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F468}[\\x{1F3FB}\\x{1F3FC}\\x{1F3FE}\\x{1F3FF}]))?|\\x{1F3FE}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F468}[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F468}[\\x{1F3FB}-\\x{1F3FD}\\x{1F3FF}]))?|\\x{1F3FF}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F468}[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F468}[\\x{1F3FB}-\\x{1F3FE}]))?)?|\\x{1F469}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?[\\x{1F468}\\x{1F469}]|\\x{1F466}(?:\\x{200D}\\x{1F466})?|\\x{1F467}(?:\\x{200D}[\\x{1F466}\\x{1F467}])?|\\x{1F469}\\x{200D}(?:\\x{1F466}(?:\\x{200D}\\x{1F466})?|\\x{1F467}(?:\\x{200D}[\\x{1F466}\\x{1F467}])?))|\\x{1F3FB}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:[\\x{1F468}\\x{1F469}]|\\x{1F48B}\\x{200D}[\\x{1F468}\\x{1F469}])[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}[\\x{1F468}\\x{1F469}][\\x{1F3FC}-\\x{1F3FF}]))?|\\x{1F3FC}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:[\\x{1F468}\\x{1F469}]|\\x{1F48B}\\x{200D}[\\x{1F468}\\x{1F469}])[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}[\\x{1F468}\\x{1F469}][\\x{1F3FB}\\x{1F3FD}-\\x{1F3FF}]))?|\\x{1F3FD}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:[\\x{1F468}\\x{1F469}]|\\x{1F48B}\\x{200D}[\\x{1F468}\\x{1F469}])[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}[\\x{1F468}\\x{1F469}][\\x{1F3FB}\\x{1F3FC}\\x{1F3FE}\\x{1F3FF}]))?|\\x{1F3FE}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:[\\x{1F468}\\x{1F469}]|\\x{1F48B}\\x{200D}[\\x{1F468}\\x{1F469}])[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}[\\x{1F468}\\x{1F469}][\\x{1F3FB}-\\x{1F3FD}\\x{1F3FF}]))?|\\x{1F3FF}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:[\\x{1F468}\\x{1F469}]|\\x{1F48B}\\x{200D}[\\x{1F468}\\x{1F469}])[\\x{1F3FB}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}[\\x{1F468}\\x{1F469}][\\x{1F3FB}-\\x{1F3FE}]))?)?|\\x{1F62E}(?:\\x{200D}\\x{1F4A8})?|\\x{1F635}(?:\\x{200D}\\x{1F4AB})?|\\x{1F636}(?:\\x{200D}\\x{1F32B}\\x{FE0F}?)?|\\x{1F93C}(?:[\\x{1F3FB}-\\x{1F3FF}]|\\x{200D}[\\x{2640}\\x{2642}]\\x{FE0F}?)?|\\x{1F9D1}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F384}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{1F91D}\\x{200D}\\x{1F9D1})|\\x{1F3FB}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F384}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F9D1}[\\x{1F3FC}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FF}]))?|\\x{1F3FC}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F384}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F9D1}[\\x{1F3FB}\\x{1F3FD}-\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FF}]))?|\\x{1F3FD}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F384}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F9D1}[\\x{1F3FB}\\x{1F3FC}\\x{1F3FE}\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FF}]))?|\\x{1F3FE}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F384}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FD}\\x{1F3FF}]|\\x{1F91D}\\x{200D}\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FF}]))?|\\x{1F3FF}(?:\\x{200D}(?:[\\x{2695}\\x{2696}\\x{2708}]\\x{FE0F}?|[\\x{1F33E}\\x{1F373}\\x{1F37C}\\x{1F384}\\x{1F393}\\x{1F3A4}\\x{1F3A8}\\x{1F3EB}\\x{1F3ED}\\x{1F4BB}\\x{1F4BC}\\x{1F527}\\x{1F52C}\\x{1F680}\\x{1F692}\\x{1F9AF}-\\x{1F9B3}\\x{1F9BC}\\x{1F9BD}]|\\x{2764}\\x{FE0F}?\\x{200D}(?:\\x{1F48B}\\x{200D})?\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FE}]|\\x{1F91D}\\x{200D}\\x{1F9D1}[\\x{1F3FB}-\\x{1F3FF}]))?)?|\\x{1FAF1}(?:\\x{1F3FB}(?:\\x{200D}\\x{1FAF2}[\\x{1F3FC}-\\x{1F3FF}])?|\\x{1F3FC}(?:\\x{200D}\\x{1FAF2}[\\x{1F3FB}\\x{1F3FD}-\\x{1F3FF}])?|\\x{1F3FD}(?:\\x{200D}\\x{1FAF2}[\\x{1F3FB}\\x{1F3FC}\\x{1F3FE}\\x{1F3FF}])?|\\x{1F3FE}(?:\\x{200D}\\x{1FAF2}[\\x{1F3FB}-\\x{1F3FD}\\x{1F3FF}])?|\\x{1F3FF}(?:\\x{200D}\\x{1FAF2}[\\x{1F3FB}-\\x{1F3FE}])?)?)+") 9 | 10 | ;; https://raw.githubusercontent.com/mathiasbynens/emoji-test-regex-pattern/f798c38987917b48e26d490590ba4f5481eb6e93/dist/latest/javascript.txt 11 | (def regex-js "^(?:[#*0-9]\\uFE0F?\\u20E3|[\\xA9\\xAE\\u203C\\u2049\\u2122\\u2139\\u2194-\\u2199\\u21A9\\u21AA\\u231A\\u231B\\u2328\\u23CF\\u23ED-\\u23EF\\u23F1\\u23F2\\u23F8-\\u23FA\\u24C2\\u25AA\\u25AB\\u25B6\\u25C0\\u25FB\\u25FC\\u25FE\\u2600-\\u2604\\u260E\\u2611\\u2614\\u2615\\u2618\\u2620\\u2622\\u2623\\u2626\\u262A\\u262E\\u262F\\u2638-\\u263A\\u2640\\u2642\\u2648-\\u2653\\u265F\\u2660\\u2663\\u2665\\u2666\\u2668\\u267B\\u267E\\u267F\\u2692\\u2694-\\u2697\\u2699\\u269B\\u269C\\u26A0\\u26A7\\u26AA\\u26B0\\u26B1\\u26BD\\u26BE\\u26C4\\u26C8\\u26CF\\u26D1\\u26D3\\u26E9\\u26F0-\\u26F5\\u26F7\\u26F8\\u26FA\\u2702\\u2708\\u2709\\u270F\\u2712\\u2714\\u2716\\u271D\\u2721\\u2733\\u2734\\u2744\\u2747\\u2757\\u2763\\u27A1\\u2934\\u2935\\u2B05-\\u2B07\\u2B1B\\u2B1C\\u2B55\\u3030\\u303D\\u3297\\u3299]\\uFE0F?|[\\u261D\\u270C\\u270D](?:\\uFE0F|\\uD83C[\\uDFFB-\\uDFFF])?|[\\u270A\\u270B](?:\\uD83C[\\uDFFB-\\uDFFF])?|[\\u23E9-\\u23EC\\u23F0\\u23F3\\u25FD\\u2693\\u26A1\\u26AB\\u26C5\\u26CE\\u26D4\\u26EA\\u26FD\\u2705\\u2728\\u274C\\u274E\\u2753-\\u2755\\u2795-\\u2797\\u27B0\\u27BF\\u2B50]|\\u26F9(?:\\uFE0F|\\uD83C[\\uDFFB-\\uDFFF])?(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|\\u2764\\uFE0F?(?:\\u200D(?:\\uD83D\\uDD25|\\uD83E\\uDE79))?|\\uD83C(?:[\\uDC04\\uDD70\\uDD71\\uDD7E\\uDD7F\\uDE02\\uDE37\\uDF21\\uDF24-\\uDF2C\\uDF36\\uDF7D\\uDF96\\uDF97\\uDF99-\\uDF9B\\uDF9E\\uDF9F\\uDFCD\\uDFCE\\uDFD4-\\uDFDF\\uDFF5\\uDFF7]\\uFE0F?|[\\uDF85\\uDFC2\\uDFC7](?:\\uD83C[\\uDFFB-\\uDFFF])?|[\\uDFC3\\uDFC4\\uDFCA](?:\\uD83C[\\uDFFB-\\uDFFF])?(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|[\\uDFCB\\uDFCC](?:\\uFE0F|\\uD83C[\\uDFFB-\\uDFFF])?(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|[\\uDCCF\\uDD8E\\uDD91-\\uDD9A\\uDE01\\uDE1A\\uDE2F\\uDE32-\\uDE36\\uDE38-\\uDE3A\\uDE50\\uDE51\\uDF00-\\uDF20\\uDF2D-\\uDF35\\uDF37-\\uDF7C\\uDF7E-\\uDF84\\uDF86-\\uDF93\\uDFA0-\\uDFC1\\uDFC5\\uDFC6\\uDFC8\\uDFC9\\uDFCF-\\uDFD3\\uDFE0-\\uDFF0\\uDFF8-\\uDFFF]|\\uDDE6\\uD83C[\\uDDE8-\\uDDEC\\uDDEE\\uDDF1\\uDDF2\\uDDF4\\uDDF6-\\uDDFA\\uDDFC\\uDDFD\\uDDFF]|\\uDDE7\\uD83C[\\uDDE6\\uDDE7\\uDDE9-\\uDDEF\\uDDF1-\\uDDF4\\uDDF6-\\uDDF9\\uDDFB\\uDDFC\\uDDFE\\uDDFF]|\\uDDE8\\uD83C[\\uDDE6\\uDDE8\\uDDE9\\uDDEB-\\uDDEE\\uDDF0-\\uDDF5\\uDDF7\\uDDFA-\\uDDFF]|\\uDDE9\\uD83C[\\uDDEA\\uDDEC\\uDDEF\\uDDF0\\uDDF2\\uDDF4\\uDDFF]|\\uDDEA\\uD83C[\\uDDE6\\uDDE8\\uDDEA\\uDDEC\\uDDED\\uDDF7-\\uDDFA]|\\uDDEB\\uD83C[\\uDDEE-\\uDDF0\\uDDF2\\uDDF4\\uDDF7]|\\uDDEC\\uD83C[\\uDDE6\\uDDE7\\uDDE9-\\uDDEE\\uDDF1-\\uDDF3\\uDDF5-\\uDDFA\\uDDFC\\uDDFE]|\\uDDED\\uD83C[\\uDDF0\\uDDF2\\uDDF3\\uDDF7\\uDDF9\\uDDFA]|\\uDDEE\\uD83C[\\uDDE8-\\uDDEA\\uDDF1-\\uDDF4\\uDDF6-\\uDDF9]|\\uDDEF\\uD83C[\\uDDEA\\uDDF2\\uDDF4\\uDDF5]|\\uDDF0\\uD83C[\\uDDEA\\uDDEC-\\uDDEE\\uDDF2\\uDDF3\\uDDF5\\uDDF7\\uDDFC\\uDDFE\\uDDFF]|\\uDDF1\\uD83C[\\uDDE6-\\uDDE8\\uDDEE\\uDDF0\\uDDF7-\\uDDFB\\uDDFE]|\\uDDF2\\uD83C[\\uDDE6\\uDDE8-\\uDDED\\uDDF0-\\uDDFF]|\\uDDF3\\uD83C[\\uDDE6\\uDDE8\\uDDEA-\\uDDEC\\uDDEE\\uDDF1\\uDDF4\\uDDF5\\uDDF7\\uDDFA\\uDDFF]|\\uDDF4\\uD83C\\uDDF2|\\uDDF5\\uD83C[\\uDDE6\\uDDEA-\\uDDED\\uDDF0-\\uDDF3\\uDDF7-\\uDDF9\\uDDFC\\uDDFE]|\\uDDF6\\uD83C\\uDDE6|\\uDDF7\\uD83C[\\uDDEA\\uDDF4\\uDDF8\\uDDFA\\uDDFC]|\\uDDF8\\uD83C[\\uDDE6-\\uDDEA\\uDDEC-\\uDDF4\\uDDF7-\\uDDF9\\uDDFB\\uDDFD-\\uDDFF]|\\uDDF9\\uD83C[\\uDDE6\\uDDE8\\uDDE9\\uDDEB-\\uDDED\\uDDEF-\\uDDF4\\uDDF7\\uDDF9\\uDDFB\\uDDFC\\uDDFF]|\\uDDFA\\uD83C[\\uDDE6\\uDDEC\\uDDF2\\uDDF3\\uDDF8\\uDDFE\\uDDFF]|\\uDDFB\\uD83C[\\uDDE6\\uDDE8\\uDDEA\\uDDEC\\uDDEE\\uDDF3\\uDDFA]|\\uDDFC\\uD83C[\\uDDEB\\uDDF8]|\\uDDFD\\uD83C\\uDDF0|\\uDDFE\\uD83C[\\uDDEA\\uDDF9]|\\uDDFF\\uD83C[\\uDDE6\\uDDF2\\uDDFC]|\\uDFF3\\uFE0F?(?:\\u200D(?:\\u26A7\\uFE0F?|\\uD83C\\uDF08))?|\\uDFF4(?:\\u200D\\u2620\\uFE0F?|\\uDB40\\uDC67\\uDB40\\uDC62\\uDB40(?:\\uDC65\\uDB40\\uDC6E\\uDB40\\uDC67|\\uDC73\\uDB40\\uDC63\\uDB40\\uDC74|\\uDC77\\uDB40\\uDC6C\\uDB40\\uDC73)\\uDB40\\uDC7F)?)|\\uD83D(?:[\\uDC08\\uDC26](?:\\u200D\\u2B1B)?|[\\uDC3F\\uDCFD\\uDD49\\uDD4A\\uDD6F\\uDD70\\uDD73\\uDD76-\\uDD79\\uDD87\\uDD8A-\\uDD8D\\uDDA5\\uDDA8\\uDDB1\\uDDB2\\uDDBC\\uDDC2-\\uDDC4\\uDDD1-\\uDDD3\\uDDDC-\\uDDDE\\uDDE1\\uDDE3\\uDDE8\\uDDEF\\uDDF3\\uDDFA\\uDECB\\uDECD-\\uDECF\\uDEE0-\\uDEE5\\uDEE9\\uDEF0\\uDEF3]\\uFE0F?|[\\uDC42\\uDC43\\uDC46-\\uDC50\\uDC66\\uDC67\\uDC6B-\\uDC6D\\uDC72\\uDC74-\\uDC76\\uDC78\\uDC7C\\uDC83\\uDC85\\uDC8F\\uDC91\\uDCAA\\uDD7A\\uDD95\\uDD96\\uDE4C\\uDE4F\\uDEC0\\uDECC](?:\\uD83C[\\uDFFB-\\uDFFF])?|[\\uDC6E\\uDC70\\uDC71\\uDC73\\uDC77\\uDC81\\uDC82\\uDC86\\uDC87\\uDE45-\\uDE47\\uDE4B\\uDE4D\\uDE4E\\uDEA3\\uDEB4-\\uDEB6](?:\\uD83C[\\uDFFB-\\uDFFF])?(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|[\\uDD74\\uDD90](?:\\uFE0F|\\uD83C[\\uDFFB-\\uDFFF])?|[\\uDC00-\\uDC07\\uDC09-\\uDC14\\uDC16-\\uDC25\\uDC27-\\uDC3A\\uDC3C-\\uDC3E\\uDC40\\uDC44\\uDC45\\uDC51-\\uDC65\\uDC6A\\uDC79-\\uDC7B\\uDC7D-\\uDC80\\uDC84\\uDC88-\\uDC8E\\uDC90\\uDC92-\\uDCA9\\uDCAB-\\uDCFC\\uDCFF-\\uDD3D\\uDD4B-\\uDD4E\\uDD50-\\uDD67\\uDDA4\\uDDFB-\\uDE2D\\uDE2F-\\uDE34\\uDE37-\\uDE44\\uDE48-\\uDE4A\\uDE80-\\uDEA2\\uDEA4-\\uDEB3\\uDEB7-\\uDEBF\\uDEC1-\\uDEC5\\uDED0-\\uDED2\\uDED5-\\uDED7\\uDEDC-\\uDEDF\\uDEEB\\uDEEC\\uDEF4-\\uDEFC\\uDFE0-\\uDFEB\\uDFF0]|\\uDC15(?:\\u200D\\uD83E\\uDDBA)?|\\uDC3B(?:\\u200D\\u2744\\uFE0F?)?|\\uDC41\\uFE0F?(?:\\u200D\\uD83D\\uDDE8\\uFE0F?)?|\\uDC68(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?\\uDC68|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D(?:[\\uDC68\\uDC69]\\u200D\\uD83D(?:\\uDC66(?:\\u200D\\uD83D\\uDC66)?|\\uDC67(?:\\u200D\\uD83D[\\uDC66\\uDC67])?)|[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uDC66(?:\\u200D\\uD83D\\uDC66)?|\\uDC67(?:\\u200D\\uD83D[\\uDC66\\uDC67])?)|\\uD83E[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD])|\\uD83C(?:\\uDFFB(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?\\uDC68\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D\\uDC68\\uD83C[\\uDFFC-\\uDFFF])))?|\\uDFFC(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?\\uDC68\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D\\uDC68\\uD83C[\\uDFFB\\uDFFD-\\uDFFF])))?|\\uDFFD(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?\\uDC68\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D\\uDC68\\uD83C[\\uDFFB\\uDFFC\\uDFFE\\uDFFF])))?|\\uDFFE(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?\\uDC68\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D\\uDC68\\uD83C[\\uDFFB-\\uDFFD\\uDFFF])))?|\\uDFFF(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?\\uDC68\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D\\uDC68\\uD83C[\\uDFFB-\\uDFFE])))?))?|\\uDC69(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:\\uDC8B\\u200D\\uD83D)?[\\uDC68\\uDC69]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D(?:[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uDC66(?:\\u200D\\uD83D\\uDC66)?|\\uDC67(?:\\u200D\\uD83D[\\uDC66\\uDC67])?|\\uDC69\\u200D\\uD83D(?:\\uDC66(?:\\u200D\\uD83D\\uDC66)?|\\uDC67(?:\\u200D\\uD83D[\\uDC66\\uDC67])?))|\\uD83E[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD])|\\uD83C(?:\\uDFFB(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:[\\uDC68\\uDC69]|\\uDC8B\\u200D\\uD83D[\\uDC68\\uDC69])\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D[\\uDC68\\uDC69]\\uD83C[\\uDFFC-\\uDFFF])))?|\\uDFFC(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:[\\uDC68\\uDC69]|\\uDC8B\\u200D\\uD83D[\\uDC68\\uDC69])\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D[\\uDC68\\uDC69]\\uD83C[\\uDFFB\\uDFFD-\\uDFFF])))?|\\uDFFD(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:[\\uDC68\\uDC69]|\\uDC8B\\u200D\\uD83D[\\uDC68\\uDC69])\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D[\\uDC68\\uDC69]\\uD83C[\\uDFFB\\uDFFC\\uDFFE\\uDFFF])))?|\\uDFFE(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:[\\uDC68\\uDC69]|\\uDC8B\\u200D\\uD83D[\\uDC68\\uDC69])\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D[\\uDC68\\uDC69]\\uD83C[\\uDFFB-\\uDFFD\\uDFFF])))?|\\uDFFF(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D\\uD83D(?:[\\uDC68\\uDC69]|\\uDC8B\\u200D\\uD83D[\\uDC68\\uDC69])\\uD83C[\\uDFFB-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83D[\\uDC68\\uDC69]\\uD83C[\\uDFFB-\\uDFFE])))?))?|\\uDC6F(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|\\uDD75(?:\\uFE0F|\\uD83C[\\uDFFB-\\uDFFF])?(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|\\uDE2E(?:\\u200D\\uD83D\\uDCA8)?|\\uDE35(?:\\u200D\\uD83D\\uDCAB)?|\\uDE36(?:\\u200D\\uD83C\\uDF2B\\uFE0F?)?)|\\uD83E(?:[\\uDD0C\\uDD0F\\uDD18-\\uDD1F\\uDD30-\\uDD34\\uDD36\\uDD77\\uDDB5\\uDDB6\\uDDBB\\uDDD2\\uDDD3\\uDDD5\\uDEC3-\\uDEC5\\uDEF0\\uDEF2-\\uDEF8](?:\\uD83C[\\uDFFB-\\uDFFF])?|[\\uDD26\\uDD35\\uDD37-\\uDD39\\uDD3D\\uDD3E\\uDDB8\\uDDB9\\uDDCD-\\uDDCF\\uDDD4\\uDDD6-\\uDDDD](?:\\uD83C[\\uDFFB-\\uDFFF])?(?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|[\\uDDDE\\uDDDF](?:\\u200D[\\u2640\\u2642]\\uFE0F?)?|[\\uDD0D\\uDD0E\\uDD10-\\uDD17\\uDD20-\\uDD25\\uDD27-\\uDD2F\\uDD3A\\uDD3F-\\uDD45\\uDD47-\\uDD76\\uDD78-\\uDDB4\\uDDB7\\uDDBA\\uDDBC-\\uDDCC\\uDDD0\\uDDE0-\\uDDFF\\uDE70-\\uDE7C\\uDE80-\\uDE88\\uDE90-\\uDEBD\\uDEBF-\\uDEC2\\uDECE-\\uDEDB\\uDEE0-\\uDEE8]|\\uDD3C(?:\\u200D[\\u2640\\u2642]\\uFE0F?|\\uD83C[\\uDFFB-\\uDFFF])?|\\uDDD1(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF84\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83E\\uDDD1))|\\uD83C(?:\\uDFFB(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D(?:\\uD83D\\uDC8B\\u200D)?\\uD83E\\uDDD1\\uD83C[\\uDFFC-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF84\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFF])))?|\\uDFFC(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D(?:\\uD83D\\uDC8B\\u200D)?\\uD83E\\uDDD1\\uD83C[\\uDFFB\\uDFFD-\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF84\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFF])))?|\\uDFFD(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D(?:\\uD83D\\uDC8B\\u200D)?\\uD83E\\uDDD1\\uD83C[\\uDFFB\\uDFFC\\uDFFE\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF84\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFF])))?|\\uDFFE(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D(?:\\uD83D\\uDC8B\\u200D)?\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFD\\uDFFF]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF84\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFF])))?|\\uDFFF(?:\\u200D(?:[\\u2695\\u2696\\u2708]\\uFE0F?|\\u2764\\uFE0F?\\u200D(?:\\uD83D\\uDC8B\\u200D)?\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFE]|\\uD83C[\\uDF3E\\uDF73\\uDF7C\\uDF84\\uDF93\\uDFA4\\uDFA8\\uDFEB\\uDFED]|\\uD83D[\\uDCBB\\uDCBC\\uDD27\\uDD2C\\uDE80\\uDE92]|\\uD83E(?:[\\uDDAF-\\uDDB3\\uDDBC\\uDDBD]|\\uDD1D\\u200D\\uD83E\\uDDD1\\uD83C[\\uDFFB-\\uDFFF])))?))?|\\uDEF1(?:\\uD83C(?:\\uDFFB(?:\\u200D\\uD83E\\uDEF2\\uD83C[\\uDFFC-\\uDFFF])?|\\uDFFC(?:\\u200D\\uD83E\\uDEF2\\uD83C[\\uDFFB\\uDFFD-\\uDFFF])?|\\uDFFD(?:\\u200D\\uD83E\\uDEF2\\uD83C[\\uDFFB\\uDFFC\\uDFFE\\uDFFF])?|\\uDFFE(?:\\u200D\\uD83E\\uDEF2\\uD83C[\\uDFFB-\\uDFFD\\uDFFF])?|\\uDFFF(?:\\u200D\\uD83E\\uDEF2\\uD83C[\\uDFFB-\\uDFFE])?))?))+") 12 | 13 | (def regex #?(:cljs (js/RegExp. regex-js) 14 | :clj (re-pattern regex-java)) ) 15 | 16 | (comment 17 | ;; build regex 18 | (str "^(?:" 19 | (slurp "https://raw.githubusercontent.com/mathiasbynens/emoji-test-regex-pattern/f798c38987917b48e26d490590ba4f5481eb6e93/dist/latest/java.txt") 20 | ")+") 21 | 22 | (re-matches regex "🐞") 23 | (re-matches regex "🐞🐞") 24 | (re-matches regex "✋🏿") 25 | (re-matches regex "Not ✋🏿") 26 | (re-matches regex "⚛️") 27 | (re-matches regex "☹️")) 28 | -------------------------------------------------------------------------------- /test/nextjournal/markdown/multi_threading_test.clj: -------------------------------------------------------------------------------- 1 | (ns nextjournal.markdown.multi-threading-test 2 | (:require [clojure.test :as t :refer [deftest testing is]] 3 | [nextjournal.markdown :as md])) 4 | 5 | (deftest multithreading 6 | (let [!exs (atom []) 7 | proc (fn [] 8 | (try (md/parse (slurp "notebooks/reference.md")) 9 | (catch IllegalStateException e 10 | (swap! !exs conj e)))) 11 | t1 (new Thread proc) 12 | t2 (new Thread proc)] 13 | 14 | (.start t1) (.start t2) 15 | (.join t1) (.join t2) 16 | (is (zero? (count @!exs))))) 17 | -------------------------------------------------------------------------------- /test/test_runner.clj: -------------------------------------------------------------------------------- 1 | (ns test-runner 2 | (:require [clojure.test] 3 | [nextjournal.markdown-test] 4 | [nextjournal.markdown.multi-threading-test])) 5 | 6 | (defn run [_] 7 | (let [{:keys [fail error]} (clojure.test/run-all-tests #"nextjournal\.markdown.*-test")] 8 | (when (< 0 (+ fail error)) 9 | (System/exit 1)))) 10 | 11 | #_(clojure.test/run-all-tests #"nextjournal\.markdown.*-test") 12 | --------------------------------------------------------------------------------