├── .github └── workflows │ ├── ci.yml │ ├── deploy.yml │ └── smoketest-wasm.yml ├── .gitignore ├── .vscode └── launch.json ├── CHANGELOG.md ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.MD ├── RELEASE.md ├── benches ├── lorem_ipsum.txt ├── moar_links.txt ├── my_benchmark.rs ├── r10s_update_message.txt └── testdata.md ├── docs.md ├── message_parser_wasm ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── example.html ├── example.js ├── src │ ├── lib.rs │ ├── manual_typings.ts │ └── utils.rs └── tests │ └── web.rs ├── rust-toolchain ├── scripts └── extract_hashtag_content_ranges.py ├── spec.md ├── src ├── lib.rs ├── main.rs └── parser │ ├── is_emoji.rs │ ├── link_url │ ├── allowed_tlds │ │ ├── country_tlds.rs │ │ └── mod.rs │ ├── ip │ │ ├── ip_literal.rs │ │ ├── ipv4.rs │ │ ├── ipv6.rs │ │ ├── ipvfuture.rs │ │ └── mod.rs │ ├── mod.rs │ ├── parenthesis_counter.rs │ ├── parse_link.rs │ └── punycode_warning.rs │ ├── mod.rs │ ├── parse_from_text │ ├── base_parsers.rs │ ├── desktop_subset.rs │ ├── find_range.rs │ ├── hashtag_content_char_ranges.rs │ ├── markdown_elements.rs │ ├── markdown_elements │ │ └── label_elements.rs │ ├── mod.rs │ └── text_elements.rs │ └── utils.rs └── tests ├── based_on_issue ├── exclamation_mark_at_end_of_link_81.rs ├── fediverse_handle_82.rs └── mod.rs ├── emoji ├── all_desktop_emojis.txt └── mod.rs ├── links.rs ├── test.rs └── text_to_ast ├── desktop_set.rs ├── links.rs ├── markdown.rs ├── mod.rs ├── mod.rs.orig └── text_only.rs /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Rust CI 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | - staging 9 | - trying 10 | 11 | jobs: 12 | fmt: 13 | name: Rustfmt 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | - uses: actions-rs/toolchain@v1 18 | with: 19 | profile: minimal 20 | toolchain: 1.77.2 21 | override: true 22 | - run: rustup component add rustfmt 23 | - uses: actions-rs/cargo@v1 24 | with: 25 | command: fmt 26 | args: --all -- --check 27 | 28 | run_clippy: 29 | runs-on: ubuntu-latest 30 | steps: 31 | - uses: actions/checkout@v2 32 | - uses: actions-rs/toolchain@v1 33 | with: 34 | toolchain: 1.77.2 35 | components: clippy 36 | override: true 37 | - uses: actions-rs/clippy-check@v1 38 | with: 39 | token: ${{ secrets.GITHUB_TOKEN }} 40 | args: --workspace --tests --examples 41 | 42 | docs: 43 | name: Rust doc comments 44 | runs-on: ubuntu-latest 45 | env: 46 | RUSTDOCFLAGS: -Dwarnings 47 | steps: 48 | - name: Checkout sources 49 | uses: actions/checkout@v2 50 | - name: Install rust stable toolchain 51 | uses: actions-rs/toolchain@v1 52 | with: 53 | toolchain: stable 54 | profile: minimal 55 | components: rust-docs 56 | override: true 57 | - name: Cache rust cargo artifacts 58 | uses: swatinem/rust-cache@v1 59 | - name: Rustdoc 60 | uses: actions-rs/cargo@v1 61 | with: 62 | command: doc 63 | args: --document-private-items --no-deps 64 | 65 | build_and_test: 66 | name: Build and test 67 | strategy: 68 | matrix: 69 | include: 70 | - os: ubuntu-latest 71 | rust: 1.77.2 72 | - os: windows-latest 73 | rust: 1.77.2 74 | runs-on: ${{ matrix.os }} 75 | steps: 76 | - uses: actions/checkout@main 77 | 78 | - name: Install ${{ matrix.rust }} 79 | uses: actions-rs/toolchain@v1 80 | with: 81 | toolchain: ${{ matrix.rust }} 82 | override: true 83 | 84 | - name: Cache cargo registry 85 | uses: actions/cache@v4 86 | with: 87 | path: ~/.cargo/registry 88 | key: ${{ matrix.os }}-${{ matrix.rust }}-cargo-registry-${{ hashFiles('**/Cargo.toml') }} 89 | 90 | - name: Cache cargo index 91 | uses: actions/cache@v4 92 | with: 93 | path: ~/.cargo/git 94 | key: ${{ matrix.os }}-${{ matrix.rust }}-cargo-index-${{ hashFiles('**/Cargo.toml') }} 95 | 96 | - name: Cache cargo build 97 | uses: actions/cache@v4 98 | with: 99 | path: target 100 | key: ${{ matrix.os }}-${{ matrix.rust }}-cargo-build-target-${{ hashFiles('**/Cargo.toml') }} 101 | 102 | - name: check 103 | uses: actions-rs/cargo@v1 104 | env: 105 | RUSTFLAGS: -D warnings 106 | with: 107 | command: check 108 | args: --all --bins --examples --tests 109 | 110 | - name: tests 111 | uses: actions-rs/cargo@v1 112 | with: 113 | command: test 114 | args: --all 115 | -------------------------------------------------------------------------------- /.github/workflows/deploy.yml: -------------------------------------------------------------------------------- 1 | name: Build & Deploy wasm Example 2 | on: 3 | push: 4 | branches: 5 | - main 6 | 7 | jobs: 8 | build-and-deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions-rs/toolchain@v1 12 | with: 13 | toolchain: stable 14 | 15 | - uses: jetli/wasm-pack-action@v0.3.0 16 | with: 17 | # Optional version of wasm-pack to install(eg. 'v0.9.1', 'latest') 18 | version: "latest" 19 | 20 | - name: checkout 21 | uses: actions/checkout@v2.3.1 22 | 23 | - name: build 24 | run: | 25 | cd message_parser_wasm 26 | wasm-pack build --target web 27 | mkdir public 28 | mv example.js public/example.js 29 | mv example.html public/index.html 30 | mv pkg public/pkg 31 | rm public/pkg/.gitignore 32 | 33 | - name: deploy 34 | uses: JamesIves/github-pages-deploy-action@4.1.4 35 | with: 36 | branch: gh-pages # The branch the action should deploy to. 37 | folder: message_parser_wasm/public # The folder the action should deploy. 38 | -------------------------------------------------------------------------------- /.github/workflows/smoketest-wasm.yml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | 3 | jobs: 4 | test: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v2 8 | 9 | - name: Install 10 | run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh 11 | 12 | - run: cargo test 13 | - run: wasm-pack test --headless --chrome 14 | - run: wasm-pack test --headless --firefox -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | 3 | # test files 4 | long_file.md 5 | test_file -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "type": "lldb", 9 | "request": "launch", 10 | "name": "Debug unit tests in library 'message_parser_wasm'", 11 | "cargo": { 12 | "args": [ 13 | "test", 14 | "--no-run", 15 | "--lib", 16 | "--package=message_parser_wasm" 17 | ], 18 | "filter": { 19 | "name": "message_parser_wasm", 20 | "kind": "lib" 21 | } 22 | }, 23 | "args": [], 24 | "cwd": "${workspaceFolder}" 25 | }, 26 | { 27 | "type": "lldb", 28 | "request": "launch", 29 | "name": "Debug integration test 'web'", 30 | "cargo": { 31 | "args": [ 32 | "test", 33 | "--no-run", 34 | "--test=web", 35 | "--package=message_parser_wasm" 36 | ], 37 | "filter": { 38 | "name": "web", 39 | "kind": "test" 40 | } 41 | }, 42 | "args": [], 43 | "cwd": "${workspaceFolder}" 44 | }, 45 | { 46 | "type": "lldb", 47 | "request": "launch", 48 | "name": "Debug unit tests in library 'deltachat_message_parser'", 49 | "cargo": { 50 | "args": [ 51 | "test", 52 | "--no-run", 53 | "--lib", 54 | "--package=deltachat_message_parser" 55 | ], 56 | "filter": { 57 | "name": "deltachat_message_parser", 58 | "kind": "lib" 59 | } 60 | }, 61 | "args": [], 62 | "cwd": "${workspaceFolder}" 63 | }, 64 | { 65 | "type": "lldb", 66 | "request": "launch", 67 | "name": "Debug executable 'deltachat_message_parser'", 68 | "cargo": { 69 | "args": [ 70 | "build", 71 | "--bin=deltachat_message_parser", 72 | "--package=deltachat_message_parser" 73 | ], 74 | "filter": { 75 | "name": "deltachat_message_parser", 76 | "kind": "bin" 77 | } 78 | }, 79 | "args": [], 80 | "cwd": "${workspaceFolder}" 81 | }, 82 | { 83 | "type": "lldb", 84 | "request": "launch", 85 | "name": "Debug unit tests in executable 'deltachat_message_parser'", 86 | "cargo": { 87 | "args": [ 88 | "test", 89 | "--no-run", 90 | "--bin=deltachat_message_parser", 91 | "--package=deltachat_message_parser" 92 | ], 93 | "filter": { 94 | "name": "deltachat_message_parser", 95 | "kind": "bin" 96 | } 97 | }, 98 | "args": [], 99 | "cwd": "${workspaceFolder}" 100 | } 101 | ] 102 | } -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## Unreleased 4 | 5 | ## 0.14.1 - Allow country TLDs in scheme-less links 6 | 7 | - allow country TLDs in scheme-less links 8 | - fix ts type for LinkDestination 9 | 10 | ## 0.14.0 - Bug fixes and scheme-less links 11 | 12 | - Parse scheme-less links for some TLDs 13 | - Change `LinkDestination` field `scheme` to an `Option` type 14 | - Disallow double quotes in email address, remove repeated trailing '.' at end of email address 15 | - Allow multiple `#` characters in links 16 | 17 | ## 0.13.0 - Bug fixes and punycode api 18 | 19 | - Add functions for punycode 20 | - `parser::punycode_encode_host(host)` 21 | - `parser::punycode_decode_host(host)` 22 | - `parser::is_puny(host)` 23 | - Fixes to link parsing. 24 | 25 | ## 0.12.0 - Bug fixes 26 | 27 | - fix: parse fediverse addresses as text, so they are not mistaken for email addresses ([issue #82](https://github.com/deltachat/message-parser/issues/82)) 28 | - fix: don't consume/eat the the exlaimation mark at the end of a link #85 29 | - fix: don't consume/eat various characters at the end of host names #96 30 | 31 | ## 0.11.0 - Bug fixes for Link Parsing 32 | 33 | ### Fixed 34 | - fix: restrict elements that can appear inside a label for a labeled link ([issue #59](https://github.com/deltachat/message-parser/issues/59)) 35 | - fix: Generic schemes were linkified even without content & also a lone http:// was linkified ([issue #71](https://github.com/deltachat/message-parser/issues/71)) 36 | 37 | ## 0.10.0 - Specification compliant detection for internationalized links 38 | 39 | ### Added 40 | - Add new methods for working with emojis (they are standalone helper functions and not part of message parsing): 41 | - `parser::is_emoji::emoji`(rust only) - nom parser that eats one emoji 42 | - `parser::is_emoji::get_first_emoji(text)` - get first emoji if text begins with an emoji 43 | - `parser::is_emoji::count_emojis_if_only_contains_emoji(text)` - counts emojis in texts that contain only emojis 44 | - Parse IRI links (Links that contain non ASCII characters in location part) - link parsing is now RFC3987 and RFC3988 compliant. 45 | 46 | ### Changed 47 | - upgrade rust toolchain to 1.77.2 48 | - improved example page (added example text) 49 | 50 | ### Fixed 51 | - fix absolute unix paths being detected as bot commands suggestions 52 | - fix parenthesis in target of labeled link 53 | 54 | ## 0.9.0 - Improve BotCommandSuggestion Parsing 55 | 56 | ### Fixed 57 | - fix bot command suggestion with `@`- char was detected as email address 58 | 59 | ## 0.8.0 - Nom 7 and more Generic URI Schemes 60 | 61 | ### Changed 62 | - upgraded nom to 7 63 | - The following generic schemes (schemes that don't end in `://`) get linkified now: 64 | `mailto:`, `news:`, `feed:`, `tel:`, `sms:`, `geo:`, `maps:`, `bitcoin:`, `bitcoincash:`, `eth:`, `ethereum:`, `magnet:` 65 | - added `scheme` property to `LinkDestination` 66 | 67 | ## 0.7.0 - All the Hashtags 68 | 69 | ### Added 70 | 71 | - hashtag parsing per UAX31 72 | 73 | ## 0.6.0 - Fix email in parentheses and wrong IPv6 puny code warning 74 | 75 | ### Fixed 76 | - Fixed problem of IPv6 links being detected as punycode 77 | - fixed: Fixed the bug of brackets being parsed as part of the email address(#34) 78 | 79 | ## 0.5.0 - Delimited email addresses and fixing greedy codeblock 80 | 81 | ### Added 82 | 83 | - support for `` 84 | 85 | ### Fixed 86 | 87 | - fix: code block with emojis removed some chars at the end 88 | 89 | ## 0.4.0 - Fixing Email, Preserve the Dots! 90 | 91 | ### Changed 92 | 93 | - update rust toolchain to `1.60.0` 94 | - enable more clippy lints to prevent panics 95 | 96 | ### Fixed 97 | 98 | - fix: do not parse last dot for email addresses #19 99 | 100 | ## 0.3.0 - Squashing Link Bugs 101 | 102 | ### Changed 103 | 104 | - `()`, `[]`, `{}`, `<>` brackets are now parsed as part of links (but only if they are opened and then closed again properly) 105 | 106 | ### Fixed 107 | 108 | - count brackets in links to fix #12 109 | - fix links eating trailing `.`,`,`,`;` or `:` #13 110 | 111 | ## 0.2.0 - Initial version 112 | 113 | This version marks the beginning of using the message parser inside of deltacht-desktop. 114 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 3 | name = "deltachat_message_parser" 4 | version = "0.14.1" 5 | edition = "2018" 6 | license = "MPL-2.0" 7 | description = "email, link, hashtag, md and more - parsing for deltachat messages" 8 | repository = "https://github.com/deltachat/message-parser" 9 | keywords = ["markdown", "deltachat-messenger", "deltachat", "dc", "message-parser"] 10 | categories = ["parser-implementations"] 11 | 12 | include = [ 13 | "CHANGELOG.md", 14 | "README.MD", 15 | "LICENSE", 16 | "spec.md", 17 | "src/*", 18 | "Cargo.toml", 19 | "message_parser_wasm/Cargo.toml" 20 | ] 21 | 22 | [dependencies] 23 | nom = "7" 24 | serde = "1.0.126" 25 | serde_derive = "1.0.126" 26 | unic-idna-punycode = "0.9.0" 27 | 28 | [workspace] 29 | members = ["message_parser_wasm"] 30 | 31 | 32 | [dev-dependencies] 33 | criterion = "0.3" 34 | 35 | [[bench]] 36 | name = "my_benchmark" 37 | path="benches/my_benchmark.rs" 38 | harness = false 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The files in this directory and under its subdirectories 2 | are (c) 2021 the Delta Chat project and contributors and released under the 3 | Mozilla Public License Version 2.0, see below for a copy. 4 | 5 | Mozilla Public License Version 2.0 6 | ================================== 7 | 8 | 1. Definitions 9 | -------------- 10 | 11 | 1.1. "Contributor" 12 | means each individual or legal entity that creates, contributes to 13 | the creation of, or owns Covered Software. 14 | 15 | 1.2. "Contributor Version" 16 | means the combination of the Contributions of others (if any) used 17 | by a Contributor and that particular Contributor's Contribution. 18 | 19 | 1.3. "Contribution" 20 | means Covered Software of a particular Contributor. 21 | 22 | 1.4. "Covered Software" 23 | means Source Code Form to which the initial Contributor has attached 24 | the notice in Exhibit A, the Executable Form of such Source Code 25 | Form, and Modifications of such Source Code Form, in each case 26 | including portions thereof. 27 | 28 | 1.5. "Incompatible With Secondary Licenses" 29 | means 30 | 31 | (a) that the initial Contributor has attached the notice described 32 | in Exhibit B to the Covered Software; or 33 | 34 | (b) that the Covered Software was made available under the terms of 35 | version 1.1 or earlier of the License, but not also under the 36 | terms of a Secondary License. 37 | 38 | 1.6. "Executable Form" 39 | means any form of the work other than Source Code Form. 40 | 41 | 1.7. "Larger Work" 42 | means a work that combines Covered Software with other material, in 43 | a separate file or files, that is not Covered Software. 44 | 45 | 1.8. "License" 46 | means this document. 47 | 48 | 1.9. "Licensable" 49 | means having the right to grant, to the maximum extent possible, 50 | whether at the time of the initial grant or subsequently, any and 51 | all of the rights conveyed by this License. 52 | 53 | 1.10. "Modifications" 54 | means any of the following: 55 | 56 | (a) any file in Source Code Form that results from an addition to, 57 | deletion from, or modification of the contents of Covered 58 | Software; or 59 | 60 | (b) any new file in Source Code Form that contains any Covered 61 | Software. 62 | 63 | 1.11. "Patent Claims" of a Contributor 64 | means any patent claim(s), including without limitation, method, 65 | process, and apparatus claims, in any patent Licensable by such 66 | Contributor that would be infringed, but for the grant of the 67 | License, by the making, using, selling, offering for sale, having 68 | made, import, or transfer of either its Contributions or its 69 | Contributor Version. 70 | 71 | 1.12. "Secondary License" 72 | means either the GNU General Public License, Version 2.0, the GNU 73 | Lesser General Public License, Version 2.1, the GNU Affero General 74 | Public License, Version 3.0, or any later versions of those 75 | licenses. 76 | 77 | 1.13. "Source Code Form" 78 | means the form of the work preferred for making modifications. 79 | 80 | 1.14. "You" (or "Your") 81 | means an individual or a legal entity exercising rights under this 82 | License. For legal entities, "You" includes any entity that 83 | controls, is controlled by, or is under common control with You. For 84 | purposes of this definition, "control" means (a) the power, direct 85 | or indirect, to cause the direction or management of such entity, 86 | whether by contract or otherwise, or (b) ownership of more than 87 | fifty percent (50%) of the outstanding shares or beneficial 88 | ownership of such entity. 89 | 90 | 2. License Grants and Conditions 91 | -------------------------------- 92 | 93 | 2.1. Grants 94 | 95 | Each Contributor hereby grants You a world-wide, royalty-free, 96 | non-exclusive license: 97 | 98 | (a) under intellectual property rights (other than patent or trademark) 99 | Licensable by such Contributor to use, reproduce, make available, 100 | modify, display, perform, distribute, and otherwise exploit its 101 | Contributions, either on an unmodified basis, with Modifications, or 102 | as part of a Larger Work; and 103 | 104 | (b) under Patent Claims of such Contributor to make, use, sell, offer 105 | for sale, have made, import, and otherwise transfer either its 106 | Contributions or its Contributor Version. 107 | 108 | 2.2. Effective Date 109 | 110 | The licenses granted in Section 2.1 with respect to any Contribution 111 | become effective for each Contribution on the date the Contributor first 112 | distributes such Contribution. 113 | 114 | 2.3. Limitations on Grant Scope 115 | 116 | The licenses granted in this Section 2 are the only rights granted under 117 | this License. No additional rights or licenses will be implied from the 118 | distribution or licensing of Covered Software under this License. 119 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 120 | Contributor: 121 | 122 | (a) for any code that a Contributor has removed from Covered Software; 123 | or 124 | 125 | (b) for infringements caused by: (i) Your and any other third party's 126 | modifications of Covered Software, or (ii) the combination of its 127 | Contributions with other software (except as part of its Contributor 128 | Version); or 129 | 130 | (c) under Patent Claims infringed by Covered Software in the absence of 131 | its Contributions. 132 | 133 | This License does not grant any rights in the trademarks, service marks, 134 | or logos of any Contributor (except as may be necessary to comply with 135 | the notice requirements in Section 3.4). 136 | 137 | 2.4. Subsequent Licenses 138 | 139 | No Contributor makes additional grants as a result of Your choice to 140 | distribute the Covered Software under a subsequent version of this 141 | License (see Section 10.2) or under the terms of a Secondary License (if 142 | permitted under the terms of Section 3.3). 143 | 144 | 2.5. Representation 145 | 146 | Each Contributor represents that the Contributor believes its 147 | Contributions are its original creation(s) or it has sufficient rights 148 | to grant the rights to its Contributions conveyed by this License. 149 | 150 | 2.6. Fair Use 151 | 152 | This License is not intended to limit any rights You have under 153 | applicable copyright doctrines of fair use, fair dealing, or other 154 | equivalents. 155 | 156 | 2.7. Conditions 157 | 158 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 159 | in Section 2.1. 160 | 161 | 3. Responsibilities 162 | ------------------- 163 | 164 | 3.1. Distribution of Source Form 165 | 166 | All distribution of Covered Software in Source Code Form, including any 167 | Modifications that You create or to which You contribute, must be under 168 | the terms of this License. You must inform recipients that the Source 169 | Code Form of the Covered Software is governed by the terms of this 170 | License, and how they can obtain a copy of this License. You may not 171 | attempt to alter or restrict the recipients' rights in the Source Code 172 | Form. 173 | 174 | 3.2. Distribution of Executable Form 175 | 176 | If You distribute Covered Software in Executable Form then: 177 | 178 | (a) such Covered Software must also be made available in Source Code 179 | Form, as described in Section 3.1, and You must inform recipients of 180 | the Executable Form how they can obtain a copy of such Source Code 181 | Form by reasonable means in a timely manner, at a charge no more 182 | than the cost of distribution to the recipient; and 183 | 184 | (b) You may distribute such Executable Form under the terms of this 185 | License, or sublicense it under different terms, provided that the 186 | license for the Executable Form does not attempt to limit or alter 187 | the recipients' rights in the Source Code Form under this License. 188 | 189 | 3.3. Distribution of a Larger Work 190 | 191 | You may create and distribute a Larger Work under terms of Your choice, 192 | provided that You also comply with the requirements of this License for 193 | the Covered Software. If the Larger Work is a combination of Covered 194 | Software with a work governed by one or more Secondary Licenses, and the 195 | Covered Software is not Incompatible With Secondary Licenses, this 196 | License permits You to additionally distribute such Covered Software 197 | under the terms of such Secondary License(s), so that the recipient of 198 | the Larger Work may, at their option, further distribute the Covered 199 | Software under the terms of either this License or such Secondary 200 | License(s). 201 | 202 | 3.4. Notices 203 | 204 | You may not remove or alter the substance of any license notices 205 | (including copyright notices, patent notices, disclaimers of warranty, 206 | or limitations of liability) contained within the Source Code Form of 207 | the Covered Software, except that You may alter any license notices to 208 | the extent required to remedy known factual inaccuracies. 209 | 210 | 3.5. Application of Additional Terms 211 | 212 | You may choose to offer, and to charge a fee for, warranty, support, 213 | indemnity or liability obligations to one or more recipients of Covered 214 | Software. However, You may do so only on Your own behalf, and not on 215 | behalf of any Contributor. You must make it absolutely clear that any 216 | such warranty, support, indemnity, or liability obligation is offered by 217 | You alone, and You hereby agree to indemnify every Contributor for any 218 | liability incurred by such Contributor as a result of warranty, support, 219 | indemnity or liability terms You offer. You may include additional 220 | disclaimers of warranty and limitations of liability specific to any 221 | jurisdiction. 222 | 223 | 4. Inability to Comply Due to Statute or Regulation 224 | --------------------------------------------------- 225 | 226 | If it is impossible for You to comply with any of the terms of this 227 | License with respect to some or all of the Covered Software due to 228 | statute, judicial order, or regulation then You must: (a) comply with 229 | the terms of this License to the maximum extent possible; and (b) 230 | describe the limitations and the code they affect. Such description must 231 | be placed in a text file included with all distributions of the Covered 232 | Software under this License. Except to the extent prohibited by statute 233 | or regulation, such description must be sufficiently detailed for a 234 | recipient of ordinary skill to be able to understand it. 235 | 236 | 5. Termination 237 | -------------- 238 | 239 | 5.1. The rights granted under this License will terminate automatically 240 | if You fail to comply with any of its terms. However, if You become 241 | compliant, then the rights granted under this License from a particular 242 | Contributor are reinstated (a) provisionally, unless and until such 243 | Contributor explicitly and finally terminates Your grants, and (b) on an 244 | ongoing basis, if such Contributor fails to notify You of the 245 | non-compliance by some reasonable means prior to 60 days after You have 246 | come back into compliance. Moreover, Your grants from a particular 247 | Contributor are reinstated on an ongoing basis if such Contributor 248 | notifies You of the non-compliance by some reasonable means, this is the 249 | first time You have received notice of non-compliance with this License 250 | from such Contributor, and You become compliant prior to 30 days after 251 | Your receipt of the notice. 252 | 253 | 5.2. If You initiate litigation against any entity by asserting a patent 254 | infringement claim (excluding declaratory judgment actions, 255 | counter-claims, and cross-claims) alleging that a Contributor Version 256 | directly or indirectly infringes any patent, then the rights granted to 257 | You by any and all Contributors for the Covered Software under Section 258 | 2.1 of this License shall terminate. 259 | 260 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 261 | end user license agreements (excluding distributors and resellers) which 262 | have been validly granted by You or Your distributors under this License 263 | prior to termination shall survive termination. 264 | 265 | ************************************************************************ 266 | * * 267 | * 6. Disclaimer of Warranty * 268 | * ------------------------- * 269 | * * 270 | * Covered Software is provided under this License on an "as is" * 271 | * basis, without warranty of any kind, either expressed, implied, or * 272 | * statutory, including, without limitation, warranties that the * 273 | * Covered Software is free of defects, merchantable, fit for a * 274 | * particular purpose or non-infringing. The entire risk as to the * 275 | * quality and performance of the Covered Software is with You. * 276 | * Should any Covered Software prove defective in any respect, You * 277 | * (not any Contributor) assume the cost of any necessary servicing, * 278 | * repair, or correction. This disclaimer of warranty constitutes an * 279 | * essential part of this License. No use of any Covered Software is * 280 | * authorized under this License except under this disclaimer. * 281 | * * 282 | ************************************************************************ 283 | 284 | ************************************************************************ 285 | * * 286 | * 7. Limitation of Liability * 287 | * -------------------------- * 288 | * * 289 | * Under no circumstances and under no legal theory, whether tort * 290 | * (including negligence), contract, or otherwise, shall any * 291 | * Contributor, or anyone who distributes Covered Software as * 292 | * permitted above, be liable to You for any direct, indirect, * 293 | * special, incidental, or consequential damages of any character * 294 | * including, without limitation, damages for lost profits, loss of * 295 | * goodwill, work stoppage, computer failure or malfunction, or any * 296 | * and all other commercial damages or losses, even if such party * 297 | * shall have been informed of the possibility of such damages. This * 298 | * limitation of liability shall not apply to liability for death or * 299 | * personal injury resulting from such party's negligence to the * 300 | * extent applicable law prohibits such limitation. Some * 301 | * jurisdictions do not allow the exclusion or limitation of * 302 | * incidental or consequential damages, so this exclusion and * 303 | * limitation may not apply to You. * 304 | * * 305 | ************************************************************************ 306 | 307 | 8. Litigation 308 | ------------- 309 | 310 | Any litigation relating to this License may be brought only in the 311 | courts of a jurisdiction where the defendant maintains its principal 312 | place of business and such litigation shall be governed by laws of that 313 | jurisdiction, without reference to its conflict-of-law provisions. 314 | Nothing in this Section shall prevent a party's ability to bring 315 | cross-claims or counter-claims. 316 | 317 | 9. Miscellaneous 318 | ---------------- 319 | 320 | This License represents the complete agreement concerning the subject 321 | matter hereof. If any provision of this License is held to be 322 | unenforceable, such provision shall be reformed only to the extent 323 | necessary to make it enforceable. Any law or regulation which provides 324 | that the language of a contract shall be construed against the drafter 325 | shall not be used to construe this License against a Contributor. 326 | 327 | 10. Versions of the License 328 | --------------------------- 329 | 330 | 10.1. New Versions 331 | 332 | Mozilla Foundation is the license steward. Except as provided in Section 333 | 10.3, no one other than the license steward has the right to modify or 334 | publish new versions of this License. Each version will be given a 335 | distinguishing version number. 336 | 337 | 10.2. Effect of New Versions 338 | 339 | You may distribute the Covered Software under the terms of the version 340 | of the License under which You originally received the Covered Software, 341 | or under the terms of any subsequent version published by the license 342 | steward. 343 | 344 | 10.3. Modified Versions 345 | 346 | If you create software not governed by this License, and you want to 347 | create a new license for such software, you may create and use a 348 | modified version of this License if you rename the license and remove 349 | any references to the name of the license steward (except to note that 350 | such modified license differs from this License). 351 | 352 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 353 | Licenses 354 | 355 | If You choose to distribute Source Code Form that is Incompatible With 356 | Secondary Licenses under the terms of this version of the License, the 357 | notice described in Exhibit B of this License must be attached. 358 | 359 | Exhibit A - Source Code Form License Notice 360 | ------------------------------------------- 361 | 362 | This Source Code Form is subject to the terms of the Mozilla Public 363 | License, v. 2.0. If a copy of the MPL was not distributed with this 364 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 365 | 366 | If it is not possible or desirable to put the notice in a particular 367 | file, then You may include the notice in a location (such as a LICENSE 368 | file in a relevant directory) where a recipient would be likely to look 369 | for such a notice. 370 | 371 | You may add additional accurate notices of copyright ownership. 372 | 373 | Exhibit B - "Incompatible With Secondary Licenses" Notice 374 | --------------------------------------------------------- 375 | 376 | This Source Code Form is "Incompatible With Secondary Licenses", as 377 | defined by the Mozilla Public License, v. 2.0. 378 | -------------------------------------------------------------------------------- /README.MD: -------------------------------------------------------------------------------- 1 | # DeltaChat Message Parser 2 | 3 | Parsing of Links, Email adresses, simple text formatting (markdown subset), user mentions, hashtags and more in DeltaChat messages. 4 | 5 | The specification can be found in [spec.md](./spec.md). 6 | 7 | WASM Demo: 8 | 9 | ## The idea behind it 10 | 11 | Have the same rich message parsing on all platforms. 12 | 13 | The basic idea is that core can use this library to convert messages to an AST format, 14 | that can then be displayed by the UIs how they see fit, for desktop it will be converted to react elements. 15 | 16 | > Desktop already uses this package (minus the markdown, because it does not make sense to only have markdown only on one platform) as wasm module (see `./message_parser_wasm`), later this will probably be integrated into deltachat core. 17 | 18 | ## Coding Principles 19 | 20 | - **many** test cases 21 | - aim to be fast - so also benchmarks to make sure the lib stays fast enough 22 | 23 | ## Recomendations 24 | 25 | If used for message parsing, don't parse messages that are over `10 000` chars in size to ensure performance stays excellent. (the lib could and should support more than that and should aim to be fast enough for it, but on slow devices or transpiled to wasm or asmjs limiting it makes sense to avoid laggy/freezed interface) 26 | 27 | ### Benchmarking: 28 | 29 | ``` 30 | cargo install cargo-criterion 31 | ``` 32 | 33 | benchmark: 34 | 35 | ``` 36 | cargo criterion 37 | ``` 38 | 39 | docs about benchmarking: https://bheisler.github.io/criterion.rs/book/criterion_rs.html 40 | 41 | ### Changing CPU power settings for consistent results 42 | 43 | These days most CPUs change their performance according to some rules to save power. 44 | To produce consistent benchmark results, CPU performance must not change between benchmarks. There are various ways to achieve this. 45 | If you've got a laptop, the first step might be connecting the AC adapter to ensure your laptop won't go on power saving mode and thus changing the 46 | CPU frequency. The next step is to change CPU frequency to a constant value *under the maximum frequency CPU can handle*. Because the CPUs usually can't handle the maximum possible frequency on all cores. 47 | 48 | On Linux, you can set the CPU frequency using `cpupower` utility: 49 | 50 | ```sh 51 | cpupower frequency-set --min 3500 --max 3500 # this to set maximum and minimum to the same value 52 | cpupower frequency-set -f 3500 # set frequency explicitly if the kernel module is available 53 | ``` 54 | 55 | ## References 56 | 57 | - Older discussion on introducing markdown into deltachat: https://github.com/deltachat/interface/pull/20 58 | - Feature request for message markdown in the forum: https://support.delta.chat/t/markdown-support-in-chat/159 59 | 60 | 61 | ## Emoji Helpers 62 | 63 | Additionally to message parsing this crate also contains some useful functions for working with emojis. 64 | 65 | - `parser::is_emoji::emoji` (rust only) - nom parser that eats one emoji 66 | - idea: could potentially be used by core to filter reactions to only emojis 67 | - `parser::is_emoji::get_first_emoji(text)` - get first emoji if text begins with an emoji 68 | - idea: can be used by UI to get the first emoji of a chat name to display it as text avatar 69 | - `parser::is_emoji::count_emojis_if_only_contains_emoji(text)` - counts emojis in texts that contain only emojis 70 | - useful for jumbomoji logic (if you send a small message with just emojis the emojis get displayed larger). 71 | - this function does not fail on too long strings, so to keep good performance check the length beforehand and if it is too long the message would not be big anyway so you don't need to call this function. 72 | 73 | ## Punycode Helpers 74 | 75 | - `parser::punycode_encode_host(host)` - encode a host to punycode encoded string 76 | - `parser::punycode_decode_host(host)` - decode a punycode encoded host to unicode string 77 | - `parser::is_puny(host)` - returns true if host string contains non ASCII characters (alphanumeric + `.` + `-`) - acts as indicator if host string needs to be encoded -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | ## How to make a new release 2 | 3 | 4 | 1. `git pull` - make sure your local checkout is up to date 5 | 2. Ensure changelog entries exist for all relevant changes. 6 | 3. Make a new version in the changelog -> semver + some title that roughly describes the changes 7 | 4. update version in `Cargo.toml` and in `message_parser_wasm/Cargo.toml`. (if you run rust-analyzer then `Cargo.lock` is updated automatically) 8 | 5. make a commit with the changes "prepare " 9 | 6. make a tag for the version 10 | 7. push both commit and tag 11 | 8. run `cargo publish` 12 | 9. run `cd message_parser_wasm && wasm-pack build --scope deltachat --target web && wasm-pack publish --target web` 13 | 10. make a release on github, copy the changelog for that version over to there -------------------------------------------------------------------------------- /benches/lorem_ipsum.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Quisque non est urna. Donec sit amet lacus eu dui egestas semper vel sit amet est. Morbi sed risus nunc. Nam egestas mattis lectus, quis egestas est dictum id. Sed ultrices tristique lectus. Mauris viverra quam erat, at ultrices ligula luctus vel. Suspendisse auctor, mauris in molestie rutrum, arcu augue aliquet quam, nec iaculis magna mi quis velit. Duis porttitor nunc ut orci molestie, eu venenatis magna euismod. 4 | 5 | Donec consectetur ipsum ac neque euismod, in aliquam lorem sollicitudin. Cras aliquet pharetra sem, condimentum lacinia erat porta vitae. Integer vel nunc eget nisl blandit finibus quis at eros. Nam id luctus risus, eget pretium orci. Nulla malesuada lectus id eros consequat, vel sagittis augue sagittis. Proin fermentum quis eros ut lacinia. Donec eget dui pulvinar, congue enim in, tempor neque. Vestibulum mattis eu lorem sed consequat. Nunc nec eros id turpis suscipit fermentum ut a metus. Proin feugiat, ante vitae faucibus vulputate, nunc libero efficitur est, sed sodales erat lectus at tellus. 6 | 7 | Curabitur magna sem, efficitur at tempor quis, pharetra blandit velit. Phasellus luctus at eros vitae varius. Suspendisse ut metus quis leo rhoncus aliquet vel sit amet leo. Curabitur iaculis porttitor massa vel euismod. Phasellus purus ipsum, tincidunt ultricies rutrum vel, tempor ac dui. Sed ut ligula eu ipsum tempus mattis. Etiam in commodo massa. Vestibulum metus tellus, mollis mollis fringilla at, consequat eget augue. Sed pulvinar odio consequat vehicula consectetur. Phasellus mi ligula, suscipit at risus a, lacinia rhoncus felis. Proin in leo metus. Ut consectetur sit amet odio vel auctor. Aenean blandit nibh quam, sit amet sollicitudin nibh luctus ut. Donec vestibulum enim a turpis sagittis tincidunt. In egestas et massa eu tristique. 8 | 9 | Nunc luctus euismod blandit. Aliquam vehicula nibh magna, quis convallis orci consequat malesuada. Vestibulum ante ipsum primis in faucibus orci luctus et ultrices posuere cubilia curae; Aliquam in fermentum mauris. Duis nisi dui, bibendum at eleifend ac, placerat ullamcorper arcu. In nunc odio, eleifend sed sapien et, consequat volutpat mauris. Suspendisse accumsan egestas dapibus. Aenean vitae mollis mi. Aenean finibus, nibh sit amet posuere vehicula, ligula elit dictum risus, nec pellentesque ante urna id mauris. Nunc id magna ut dolor facilisis egestas nec a eros. 10 | 11 | Sed efficitur libero at ante mattis, quis aliquam dolor mattis. Ut scelerisque sit amet tortor vel scelerisque. Nam rhoncus, orci vel feugiat consequat, odio dui fringilla leo, et luctus enim nunc semper nulla. Nulla sem sapien, aliquam nec quam eget, fringilla sagittis mauris. Morbi ac purus id nibh tempor feugiat vel ultricies ex. Nulla facilisi. Cras quis semper sapien. Mauris lacinia egestas tellus, ornare commodo felis semper nec. Suspendisse dignissim sollicitudin ligula, sed rhoncus enim. Aliquam eget sapien risus. Nullam ligula metus, volutpat ut fringilla nec, auctor id velit. Nulla bibendum tincidunt erat vel suscipit. 12 | 13 | Donec in pharetra dolor. Duis porta ac eros ut sagittis. Donec sit amet augue fringilla, pharetra eros id, aliquam eros. Donec risus mi, feugiat vel mauris aliquet, ultricies dapibus risus. Maecenas eros nisi, volutpat pellentesque dictum fermentum. -------------------------------------------------------------------------------- /benches/moar_links.txt: -------------------------------------------------------------------------------- 1 | Let's add some more links just for testing and benching: 2 | 3 | these are some IPv6 links: 4 | 5 | gopher://[::1]/ 6 | https://[::1]/سلام 7 | https://[2345:0425:2CA1:0000:0000:0567:5673:23b5]/hello_world 8 | https://[2345:425:2CA1:0:0:0567:5673:23b5]/hello_world 9 | 10 | an IPvfuture link: 11 | ftp://mrchickenkiller@[vA.A]/var/log/boot.log 12 | 13 | some normal links: 14 | 15 | https://www.ietf.org/rfc/rfc3987.txt 16 | https://iamb.chat/messages/index.html 17 | https://github.com/deltachat/message-parser/issues/67 18 | https://far.chickenkiller.com 19 | gopher://republic.circumlunar.space 20 | https://far.chickenkiller.com/religion/a-god-who-does-not-care/ 21 | -------------------------------------------------------------------------------- /benches/my_benchmark.rs: -------------------------------------------------------------------------------- 1 | use criterion::{black_box, criterion_group, criterion_main, Criterion}; 2 | use deltachat_message_parser::parser::{ 3 | is_emoji::get_first_emoji, parse_desktop_set, parse_markdown_text, parse_only_text, 4 | LinkDestination, 5 | }; 6 | 7 | pub fn criterion_benchmark(c: &mut Criterion) { 8 | let testdata = include_str!("testdata.md"); 9 | let lorem_ipsum_txt = include_str!("lorem_ipsum.txt"); 10 | let r10s_update_message = include_str!("r10s_update_message.txt"); 11 | let links = include_str!("moar_links.txt"); 12 | let emojies = vec![ 13 | "not an emoji", 14 | "\u{3299}\u{3300}", 15 | "🟠", 16 | "", 17 | "🟠y", 18 | "🍏", 19 | "lorem", 20 | "😀", 21 | "\u{2755}", 22 | "\u{2655}", 23 | "\u{2000}", 24 | "\u{25aa}", 25 | "\u{2934}", 26 | "\u{2195}", 27 | "🆎", 28 | "🎓", 29 | "🟰", 30 | "👨‍👩‍👧‍👧", 31 | "👨‍👩‍👧‍👧👨‍👩‍👧‍👧👨‍👩‍👧‍👧👨‍👩‍👧‍👧", 32 | "👸🏾", 33 | ]; 34 | 35 | c.bench_function("only_text_lorem_ipsum.txt", |b| { 36 | b.iter(|| parse_only_text(black_box(lorem_ipsum_txt))) 37 | }); 38 | c.bench_function("desktop_set_lorem_ipsum.txt", |b| { 39 | b.iter(|| parse_desktop_set(black_box(lorem_ipsum_txt))) 40 | }); 41 | c.bench_function("markdown_lorem_ipsum.txt", |b| { 42 | b.iter(|| parse_markdown_text(black_box(lorem_ipsum_txt))) 43 | }); 44 | 45 | c.bench_function("only_text_testdata.md", |b| { 46 | b.iter(|| parse_only_text(black_box(testdata))) 47 | }); 48 | c.bench_function("desktop_set_testdata.md", |b| { 49 | b.iter(|| parse_desktop_set(black_box(testdata))) 50 | }); 51 | c.bench_function("markdown_testdata.md", |b| { 52 | b.iter(|| parse_markdown_text(black_box(testdata))) 53 | }); 54 | 55 | c.bench_function("only_text_r10s_update_message.txt", |b| { 56 | b.iter(|| parse_only_text(black_box(r10s_update_message))) 57 | }); 58 | c.bench_function("desktop_set_r10s_update_message.txt", |b| { 59 | b.iter(|| parse_desktop_set(black_box(r10s_update_message))) 60 | }); 61 | c.bench_function("markdown_r10s_update_message.txt", |b| { 62 | b.iter(|| parse_markdown_text(black_box(r10s_update_message))) 63 | }); 64 | 65 | c.bench_function("parse_link_moar_links.txt", |b| { 66 | b.iter(|| LinkDestination::parse(black_box(links))) 67 | }); 68 | c.bench_function("emoji", |b| { 69 | b.iter(|| emojies.iter().map(|s| get_first_emoji(black_box(s)))) 70 | }); 71 | } 72 | 73 | criterion_group!(benches, criterion_benchmark); 74 | criterion_main!(benches); 75 | -------------------------------------------------------------------------------- /benches/r10s_update_message.txt: -------------------------------------------------------------------------------- 1 | and now: here is Delta Chat 1.24.2 - choose your flavour and mind your backups: 2 | - 🍋 https://testrun.org/deltachat-gplay-release-1.24.2.apk (android, overwrites existing gplay installs, should keep data) 3 | - 🍉 https://testrun.org/deltachat-fat-debug-1.24.2.apk (android, can be installed beside gplay) 4 | - 🍏 https://testflight.apple.com/join/uEMc1NxS (ios, update to 1.24.2 may take a day or so) 5 | there are some major changes in this version, most of that are widely discussed here, see https://github.com/deltachat/deltachat-android/blob/main/CHANGELOG.md or all the details. 6 | for testing, most important part seems to that we are forced by google to targeted api30 - and by that, we have to change the way how to read/write external files. this affects backup, keys, attachments, these areas should be tested carefully, eg. we just had one issue with only android10 ... @Jonathan, who thankfully did the api30 thingie wrote down a testing checklist at https://github.com/deltachat/deltachat-android/pull/2087 7 | and now ... happy testing :) 8 | -------------------------------------------------------------------------------- /docs.md: -------------------------------------------------------------------------------- 1 | ### How parsing currently roughly works 2 | 3 | We use the nom parser, which basically consists of functions that check the input and when it matches they consume the part of the input that matched. 4 | 5 | So currently, the parsing works like this: 6 | 7 | pseudocode: 8 | 9 | ```rs 10 | let mut remaining_input = input; 11 | let output = Vec 12 | 13 | while !remaining_input.is_empty() { 14 | let res = { 15 | // try the following parsers in this order (order is important with some parsers) 16 | 1. hashtag(input) 17 | 2. bot_command_suggestion(input) 18 | 3. email_address(input) 19 | 4. link(input) 20 | 5. linebreak(input) 21 | last option: consumes all text until [parse_text_element] works again 22 | } 23 | remaining_input = res.remaining_input 24 | output.push(res.element) 25 | } 26 | ``` 27 | 28 | ### Contributing principles: 29 | 30 | The single most important thing for this crate is testing, as long as we cover as many cases in tests as we can the parser stays working. 31 | 32 | The second principle is speed, we can test that with benchmarks. 33 | 34 | The third priority is binary size, so be careful with huge extra libraries, maybe there is a better way. 35 | 36 | 37 | 38 | ### Release Process 39 | 40 | 0. checkout current main and make sure no body message with main while you make the release. 41 | 1. Update changelog 42 | 2. bump versions in `Cargo.toml` and `message_parser_wasm/Cargo.toml` 43 | 3. do a commit to main with message `prepare [version]` 44 | 4. `git push` 45 | 5. `git tag [version]` and `git push --tags` 46 | 6. `cargo publish` 47 | 7. `cd message_parser_wasm/` 48 | 8. `wasm-pack build --scope deltachat --target web` 49 | 9. `wasm-pack publish --target web` 50 | -------------------------------------------------------------------------------- /message_parser_wasm/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | Cargo.lock 4 | bin/ 5 | pkg/ 6 | wasm-pack.log 7 | -------------------------------------------------------------------------------- /message_parser_wasm/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "message_parser_wasm" 3 | version = "0.14.1" 4 | edition = "2018" 5 | license = "MPL-2.0" 6 | description = "Parsing of Links, Email adresses, simple text formatting (markdown subset), user mentions, hashtags and more in DeltaChat messages." 7 | repository = "https://github.com/deltachat/message-parser" 8 | 9 | [lib] 10 | crate-type = ["cdylib", "rlib"] 11 | 12 | [features] 13 | default = ["console_error_panic_hook"] 14 | 15 | [dependencies] 16 | wasm-bindgen = {version = "0.2.63"} 17 | 18 | # The `console_error_panic_hook` crate provides better debugging of panics by 19 | # logging them with `console.error`. This is great for development, but requires 20 | # all the `std::fmt` and `std::panicking` infrastructure, so isn't great for 21 | # code size when deploying. 22 | console_error_panic_hook = { version = "0.1.6", optional = true } 23 | 24 | # `wee_alloc` is a tiny allocator for wasm that is only ~1K in code size 25 | # compared to the default allocator's ~10K. It is slower than the default 26 | # allocator, however. 27 | # 28 | # Unfortunately, `wee_alloc` requires nightly Rust when targeting wasm for now. 29 | wee_alloc = { version = "0.4.5", optional = true } 30 | deltachat_message_parser = { path = "../" } 31 | serde-wasm-bindgen = "0.6.0" 32 | serde = { version = "1.0.188", features = ["derive"] } 33 | 34 | [dev-dependencies] 35 | serde = "1.0.130" 36 | serde_json = "1.0.68" 37 | wasm-bindgen-test = "0.3.13" 38 | 39 | [profile.release] 40 | # Tell `rustc` to optimize for small code size. 41 | opt-level = "s" 42 | -------------------------------------------------------------------------------- /message_parser_wasm/LICENSE: -------------------------------------------------------------------------------- 1 | The files in this directory and under its subdirectories 2 | are (c) 2021 the Delta Chat project and contributors and released under the 3 | Mozilla Public License Version 2.0, see below for a copy. 4 | 5 | Mozilla Public License Version 2.0 6 | ================================== 7 | 8 | 1. Definitions 9 | -------------- 10 | 11 | 1.1. "Contributor" 12 | means each individual or legal entity that creates, contributes to 13 | the creation of, or owns Covered Software. 14 | 15 | 1.2. "Contributor Version" 16 | means the combination of the Contributions of others (if any) used 17 | by a Contributor and that particular Contributor's Contribution. 18 | 19 | 1.3. "Contribution" 20 | means Covered Software of a particular Contributor. 21 | 22 | 1.4. "Covered Software" 23 | means Source Code Form to which the initial Contributor has attached 24 | the notice in Exhibit A, the Executable Form of such Source Code 25 | Form, and Modifications of such Source Code Form, in each case 26 | including portions thereof. 27 | 28 | 1.5. "Incompatible With Secondary Licenses" 29 | means 30 | 31 | (a) that the initial Contributor has attached the notice described 32 | in Exhibit B to the Covered Software; or 33 | 34 | (b) that the Covered Software was made available under the terms of 35 | version 1.1 or earlier of the License, but not also under the 36 | terms of a Secondary License. 37 | 38 | 1.6. "Executable Form" 39 | means any form of the work other than Source Code Form. 40 | 41 | 1.7. "Larger Work" 42 | means a work that combines Covered Software with other material, in 43 | a separate file or files, that is not Covered Software. 44 | 45 | 1.8. "License" 46 | means this document. 47 | 48 | 1.9. "Licensable" 49 | means having the right to grant, to the maximum extent possible, 50 | whether at the time of the initial grant or subsequently, any and 51 | all of the rights conveyed by this License. 52 | 53 | 1.10. "Modifications" 54 | means any of the following: 55 | 56 | (a) any file in Source Code Form that results from an addition to, 57 | deletion from, or modification of the contents of Covered 58 | Software; or 59 | 60 | (b) any new file in Source Code Form that contains any Covered 61 | Software. 62 | 63 | 1.11. "Patent Claims" of a Contributor 64 | means any patent claim(s), including without limitation, method, 65 | process, and apparatus claims, in any patent Licensable by such 66 | Contributor that would be infringed, but for the grant of the 67 | License, by the making, using, selling, offering for sale, having 68 | made, import, or transfer of either its Contributions or its 69 | Contributor Version. 70 | 71 | 1.12. "Secondary License" 72 | means either the GNU General Public License, Version 2.0, the GNU 73 | Lesser General Public License, Version 2.1, the GNU Affero General 74 | Public License, Version 3.0, or any later versions of those 75 | licenses. 76 | 77 | 1.13. "Source Code Form" 78 | means the form of the work preferred for making modifications. 79 | 80 | 1.14. "You" (or "Your") 81 | means an individual or a legal entity exercising rights under this 82 | License. For legal entities, "You" includes any entity that 83 | controls, is controlled by, or is under common control with You. For 84 | purposes of this definition, "control" means (a) the power, direct 85 | or indirect, to cause the direction or management of such entity, 86 | whether by contract or otherwise, or (b) ownership of more than 87 | fifty percent (50%) of the outstanding shares or beneficial 88 | ownership of such entity. 89 | 90 | 2. License Grants and Conditions 91 | -------------------------------- 92 | 93 | 2.1. Grants 94 | 95 | Each Contributor hereby grants You a world-wide, royalty-free, 96 | non-exclusive license: 97 | 98 | (a) under intellectual property rights (other than patent or trademark) 99 | Licensable by such Contributor to use, reproduce, make available, 100 | modify, display, perform, distribute, and otherwise exploit its 101 | Contributions, either on an unmodified basis, with Modifications, or 102 | as part of a Larger Work; and 103 | 104 | (b) under Patent Claims of such Contributor to make, use, sell, offer 105 | for sale, have made, import, and otherwise transfer either its 106 | Contributions or its Contributor Version. 107 | 108 | 2.2. Effective Date 109 | 110 | The licenses granted in Section 2.1 with respect to any Contribution 111 | become effective for each Contribution on the date the Contributor first 112 | distributes such Contribution. 113 | 114 | 2.3. Limitations on Grant Scope 115 | 116 | The licenses granted in this Section 2 are the only rights granted under 117 | this License. No additional rights or licenses will be implied from the 118 | distribution or licensing of Covered Software under this License. 119 | Notwithstanding Section 2.1(b) above, no patent license is granted by a 120 | Contributor: 121 | 122 | (a) for any code that a Contributor has removed from Covered Software; 123 | or 124 | 125 | (b) for infringements caused by: (i) Your and any other third party's 126 | modifications of Covered Software, or (ii) the combination of its 127 | Contributions with other software (except as part of its Contributor 128 | Version); or 129 | 130 | (c) under Patent Claims infringed by Covered Software in the absence of 131 | its Contributions. 132 | 133 | This License does not grant any rights in the trademarks, service marks, 134 | or logos of any Contributor (except as may be necessary to comply with 135 | the notice requirements in Section 3.4). 136 | 137 | 2.4. Subsequent Licenses 138 | 139 | No Contributor makes additional grants as a result of Your choice to 140 | distribute the Covered Software under a subsequent version of this 141 | License (see Section 10.2) or under the terms of a Secondary License (if 142 | permitted under the terms of Section 3.3). 143 | 144 | 2.5. Representation 145 | 146 | Each Contributor represents that the Contributor believes its 147 | Contributions are its original creation(s) or it has sufficient rights 148 | to grant the rights to its Contributions conveyed by this License. 149 | 150 | 2.6. Fair Use 151 | 152 | This License is not intended to limit any rights You have under 153 | applicable copyright doctrines of fair use, fair dealing, or other 154 | equivalents. 155 | 156 | 2.7. Conditions 157 | 158 | Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted 159 | in Section 2.1. 160 | 161 | 3. Responsibilities 162 | ------------------- 163 | 164 | 3.1. Distribution of Source Form 165 | 166 | All distribution of Covered Software in Source Code Form, including any 167 | Modifications that You create or to which You contribute, must be under 168 | the terms of this License. You must inform recipients that the Source 169 | Code Form of the Covered Software is governed by the terms of this 170 | License, and how they can obtain a copy of this License. You may not 171 | attempt to alter or restrict the recipients' rights in the Source Code 172 | Form. 173 | 174 | 3.2. Distribution of Executable Form 175 | 176 | If You distribute Covered Software in Executable Form then: 177 | 178 | (a) such Covered Software must also be made available in Source Code 179 | Form, as described in Section 3.1, and You must inform recipients of 180 | the Executable Form how they can obtain a copy of such Source Code 181 | Form by reasonable means in a timely manner, at a charge no more 182 | than the cost of distribution to the recipient; and 183 | 184 | (b) You may distribute such Executable Form under the terms of this 185 | License, or sublicense it under different terms, provided that the 186 | license for the Executable Form does not attempt to limit or alter 187 | the recipients' rights in the Source Code Form under this License. 188 | 189 | 3.3. Distribution of a Larger Work 190 | 191 | You may create and distribute a Larger Work under terms of Your choice, 192 | provided that You also comply with the requirements of this License for 193 | the Covered Software. If the Larger Work is a combination of Covered 194 | Software with a work governed by one or more Secondary Licenses, and the 195 | Covered Software is not Incompatible With Secondary Licenses, this 196 | License permits You to additionally distribute such Covered Software 197 | under the terms of such Secondary License(s), so that the recipient of 198 | the Larger Work may, at their option, further distribute the Covered 199 | Software under the terms of either this License or such Secondary 200 | License(s). 201 | 202 | 3.4. Notices 203 | 204 | You may not remove or alter the substance of any license notices 205 | (including copyright notices, patent notices, disclaimers of warranty, 206 | or limitations of liability) contained within the Source Code Form of 207 | the Covered Software, except that You may alter any license notices to 208 | the extent required to remedy known factual inaccuracies. 209 | 210 | 3.5. Application of Additional Terms 211 | 212 | You may choose to offer, and to charge a fee for, warranty, support, 213 | indemnity or liability obligations to one or more recipients of Covered 214 | Software. However, You may do so only on Your own behalf, and not on 215 | behalf of any Contributor. You must make it absolutely clear that any 216 | such warranty, support, indemnity, or liability obligation is offered by 217 | You alone, and You hereby agree to indemnify every Contributor for any 218 | liability incurred by such Contributor as a result of warranty, support, 219 | indemnity or liability terms You offer. You may include additional 220 | disclaimers of warranty and limitations of liability specific to any 221 | jurisdiction. 222 | 223 | 4. Inability to Comply Due to Statute or Regulation 224 | --------------------------------------------------- 225 | 226 | If it is impossible for You to comply with any of the terms of this 227 | License with respect to some or all of the Covered Software due to 228 | statute, judicial order, or regulation then You must: (a) comply with 229 | the terms of this License to the maximum extent possible; and (b) 230 | describe the limitations and the code they affect. Such description must 231 | be placed in a text file included with all distributions of the Covered 232 | Software under this License. Except to the extent prohibited by statute 233 | or regulation, such description must be sufficiently detailed for a 234 | recipient of ordinary skill to be able to understand it. 235 | 236 | 5. Termination 237 | -------------- 238 | 239 | 5.1. The rights granted under this License will terminate automatically 240 | if You fail to comply with any of its terms. However, if You become 241 | compliant, then the rights granted under this License from a particular 242 | Contributor are reinstated (a) provisionally, unless and until such 243 | Contributor explicitly and finally terminates Your grants, and (b) on an 244 | ongoing basis, if such Contributor fails to notify You of the 245 | non-compliance by some reasonable means prior to 60 days after You have 246 | come back into compliance. Moreover, Your grants from a particular 247 | Contributor are reinstated on an ongoing basis if such Contributor 248 | notifies You of the non-compliance by some reasonable means, this is the 249 | first time You have received notice of non-compliance with this License 250 | from such Contributor, and You become compliant prior to 30 days after 251 | Your receipt of the notice. 252 | 253 | 5.2. If You initiate litigation against any entity by asserting a patent 254 | infringement claim (excluding declaratory judgment actions, 255 | counter-claims, and cross-claims) alleging that a Contributor Version 256 | directly or indirectly infringes any patent, then the rights granted to 257 | You by any and all Contributors for the Covered Software under Section 258 | 2.1 of this License shall terminate. 259 | 260 | 5.3. In the event of termination under Sections 5.1 or 5.2 above, all 261 | end user license agreements (excluding distributors and resellers) which 262 | have been validly granted by You or Your distributors under this License 263 | prior to termination shall survive termination. 264 | 265 | ************************************************************************ 266 | * * 267 | * 6. Disclaimer of Warranty * 268 | * ------------------------- * 269 | * * 270 | * Covered Software is provided under this License on an "as is" * 271 | * basis, without warranty of any kind, either expressed, implied, or * 272 | * statutory, including, without limitation, warranties that the * 273 | * Covered Software is free of defects, merchantable, fit for a * 274 | * particular purpose or non-infringing. The entire risk as to the * 275 | * quality and performance of the Covered Software is with You. * 276 | * Should any Covered Software prove defective in any respect, You * 277 | * (not any Contributor) assume the cost of any necessary servicing, * 278 | * repair, or correction. This disclaimer of warranty constitutes an * 279 | * essential part of this License. No use of any Covered Software is * 280 | * authorized under this License except under this disclaimer. * 281 | * * 282 | ************************************************************************ 283 | 284 | ************************************************************************ 285 | * * 286 | * 7. Limitation of Liability * 287 | * -------------------------- * 288 | * * 289 | * Under no circumstances and under no legal theory, whether tort * 290 | * (including negligence), contract, or otherwise, shall any * 291 | * Contributor, or anyone who distributes Covered Software as * 292 | * permitted above, be liable to You for any direct, indirect, * 293 | * special, incidental, or consequential damages of any character * 294 | * including, without limitation, damages for lost profits, loss of * 295 | * goodwill, work stoppage, computer failure or malfunction, or any * 296 | * and all other commercial damages or losses, even if such party * 297 | * shall have been informed of the possibility of such damages. This * 298 | * limitation of liability shall not apply to liability for death or * 299 | * personal injury resulting from such party's negligence to the * 300 | * extent applicable law prohibits such limitation. Some * 301 | * jurisdictions do not allow the exclusion or limitation of * 302 | * incidental or consequential damages, so this exclusion and * 303 | * limitation may not apply to You. * 304 | * * 305 | ************************************************************************ 306 | 307 | 8. Litigation 308 | ------------- 309 | 310 | Any litigation relating to this License may be brought only in the 311 | courts of a jurisdiction where the defendant maintains its principal 312 | place of business and such litigation shall be governed by laws of that 313 | jurisdiction, without reference to its conflict-of-law provisions. 314 | Nothing in this Section shall prevent a party's ability to bring 315 | cross-claims or counter-claims. 316 | 317 | 9. Miscellaneous 318 | ---------------- 319 | 320 | This License represents the complete agreement concerning the subject 321 | matter hereof. If any provision of this License is held to be 322 | unenforceable, such provision shall be reformed only to the extent 323 | necessary to make it enforceable. Any law or regulation which provides 324 | that the language of a contract shall be construed against the drafter 325 | shall not be used to construe this License against a Contributor. 326 | 327 | 10. Versions of the License 328 | --------------------------- 329 | 330 | 10.1. New Versions 331 | 332 | Mozilla Foundation is the license steward. Except as provided in Section 333 | 10.3, no one other than the license steward has the right to modify or 334 | publish new versions of this License. Each version will be given a 335 | distinguishing version number. 336 | 337 | 10.2. Effect of New Versions 338 | 339 | You may distribute the Covered Software under the terms of the version 340 | of the License under which You originally received the Covered Software, 341 | or under the terms of any subsequent version published by the license 342 | steward. 343 | 344 | 10.3. Modified Versions 345 | 346 | If you create software not governed by this License, and you want to 347 | create a new license for such software, you may create and use a 348 | modified version of this License if you rename the license and remove 349 | any references to the name of the license steward (except to note that 350 | such modified license differs from this License). 351 | 352 | 10.4. Distributing Source Code Form that is Incompatible With Secondary 353 | Licenses 354 | 355 | If You choose to distribute Source Code Form that is Incompatible With 356 | Secondary Licenses under the terms of this version of the License, the 357 | notice described in Exhibit B of this License must be attached. 358 | 359 | Exhibit A - Source Code Form License Notice 360 | ------------------------------------------- 361 | 362 | This Source Code Form is subject to the terms of the Mozilla Public 363 | License, v. 2.0. If a copy of the MPL was not distributed with this 364 | file, You can obtain one at http://mozilla.org/MPL/2.0/. 365 | 366 | If it is not possible or desirable to put the notice in a particular 367 | file, then You may include the notice in a location (such as a LICENSE 368 | file in a relevant directory) where a recipient would be likely to look 369 | for such a notice. 370 | 371 | You may add additional accurate notices of copyright ownership. 372 | 373 | Exhibit B - "Incompatible With Secondary Licenses" Notice 374 | --------------------------------------------------------- 375 | 376 | This Source Code Form is "Incompatible With Secondary Licenses", as 377 | defined by the Mozilla Public License, v. 2.0. 378 | -------------------------------------------------------------------------------- /message_parser_wasm/README.md: -------------------------------------------------------------------------------- 1 | # DeltaChat Message Parser WASM 2 | 3 | Parsing of Links, email addresses, simple text formatting (markdown subset), user mentions, hashtags and more in DeltaChat messages. 4 | 5 | The specification can be found in [spec.md](https://github.com/deltachat/message-parser/blob/main/message_parser_wasm/spec.md). 6 | 7 | The parser is written in rust with the [nom crate](https://github.com/Geal/nom) and compiled to web assembly for this package. 8 | 9 | ## The Idea behind it 10 | 11 | Have the same rich message parsing on all platforms. 12 | 13 | The basic idea is that core can use this library to convert messages to an AST format, 14 | that can then be displayed by the UIs how they see fit, for desktop it will be converted to react elements. 15 | 16 | > Desktop already uses this package (minus the markdown, because it does not make sense to only have markdown on desktop and not also on iOS and android) as wasm module (see `./message_parser_wasm`), later this will probably be integrated into deltachat core. 17 | 18 | Read more about the project on github: https://github.com/deltachat/message-parser 19 | 20 | ## 🚴 Usage 21 | 22 | ```ts 23 | function parse_text(s: string, enable_markdown: boolean): ParsedElement[]; 24 | ``` 25 | 26 | ```js 27 | import init, { parse_text } from "./pkg/message_parser_wasm.js"; 28 | 29 | init().then(() => { 30 | let parsed = parse_text("hello **world**", true) 31 | 32 | let result = parsed.map(element => { 33 | switch element.t { 34 | case "Bold": 35 | return `${element.c}` 36 | break; 37 | case "Text" 38 | return element.c 39 | // ... 40 | default 41 | console.error(`type ${element.t} not known/implemented yet`, element); 42 | return JSON.stringify(element) 43 | } 44 | }).join("") 45 | 46 | console.log(result) // "hello world" 47 | }) 48 | ``` 49 | 50 | > DO **NOT** actually write html with user input like that, this is for demonstration purposes ONLY! 51 | > It let's you and your users open to **XSS attacks**, the examples bellow are much better suitable for reference or copy+pasting. 52 | 53 | also see [example.js](./example.js) and test it live on 54 | 55 | For usage in react you can look at how we integrated this package in deltachat-desktop: [deltachat-desktop/src/renderer/components/message/MessageMarkdown.tsx](https://github.com/deltachat/deltachat-desktop/blob/7493f898bc3dff06b20565a48e93564f5996b855/src/renderer/components/message/MessageMarkdown.tsx) 56 | 57 | If you want to see it in action in deltachat-desktop, feel free to download it on . 58 | 59 | 60 | ### Emoji Helper functions 61 | 62 | ```js 63 | /** returns first emoji from text if text begins with an emoji */ 64 | export function get_first_emoji(input: string): string | undefined; 65 | /** If string contains only emojis count the emojis otherwise retuns null */ 66 | export function count_emojis_if_only_contains_emoji(input: string): number | undefined; 67 | ``` 68 | 69 | ### For Devs 70 | 71 | #### 🛠️ Build with `wasm-pack build` 72 | 73 | ``` 74 | wasm-pack build --scope deltachat --target web 75 | ``` 76 | 77 | #### 🔬 Test in Headless Browsers with `wasm-pack test` 78 | 79 | ``` 80 | wasm-pack test --headless --firefox 81 | ``` 82 | 83 | #### 🎁 Publish to NPM with `wasm-pack publish` 84 | 85 | ``` 86 | wasm-pack publish --target web 87 | ``` 88 | -------------------------------------------------------------------------------- /message_parser_wasm/example.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 50 |

Message Parser

51 |

Input

52 | 76 | 77 | 82 |

Output

83 |
93 |

Output - AST

94 |

104 | 
105 |     

Emoji Helpers

106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 |
InputFirst EmojiEmoji count if only emojis
123 | 124 |

Punycode

125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 |
InputContains non ascii (is_puny)Encoded to PunycodeDecoded to Unicode
144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /message_parser_wasm/example.js: -------------------------------------------------------------------------------- 1 | //@ts-check 2 | 3 | import init, { 4 | parse_desktop_set, 5 | parse_text, 6 | get_first_emoji, 7 | count_emojis_if_only_contains_emoji, 8 | is_puny, 9 | punycode_encode_host, 10 | punycode_decode_host, 11 | } from "./pkg/message_parser_wasm.js"; 12 | 13 | /** @typedef {import("./pkg/message_parser_wasm.js").ParsedElement} ParsedElement */ 14 | 15 | /** 16 | * 17 | * @param {Node} base where the elements should get attached to 18 | * @param {ParsedElement[]} elms 19 | * @throws if element type is not known / unimplemented 20 | */ 21 | function renderElements(base, elms) { 22 | for (const elem of elms) { 23 | base.appendChild(renderElement(elem)); 24 | } 25 | } 26 | 27 | /** 28 | * 29 | * @param {ParsedElement} elm 30 | * @returns {Node} 31 | * @throws if element type is not known / unimplemented 32 | */ 33 | function renderElement(elm) { 34 | switch (elm.t) { 35 | case "CodeBlock": 36 | let cb = document.createElement("pre"); 37 | cb.className = "code-block"; 38 | if (elm.c.language) { 39 | let lang_hint = document.createElement("span"); 40 | lang_hint.innerText = elm.c.language; 41 | cb.append(lang_hint); 42 | } 43 | let innerCb = document.createElement("code"); 44 | innerCb.innerText = elm.c.content; 45 | cb.append(innerCb); 46 | return cb; 47 | 48 | case "InlineCode": 49 | let ic = document.createElement("code"); 50 | ic.innerText = elm.c.content; 51 | return ic; 52 | 53 | case "StrikeThrough": 54 | let s = document.createElement("s"); 55 | renderElements(s, elm.c); 56 | return s; 57 | case "Italics": 58 | let i = document.createElement("i"); 59 | renderElements(i, elm.c); 60 | return i; 61 | case "Bold": 62 | let b = document.createElement("b"); 63 | renderElements(b, elm.c); 64 | return b; 65 | 66 | case "Tag": 67 | let tag = document.createElement("a"); 68 | tag.innerText = elm.c; 69 | tag.href = "#"; 70 | tag.onclick = () => 71 | alert( 72 | `Clicked on a hastag, this should open search for the text "${elm.c}"` 73 | ); 74 | return tag; 75 | 76 | case "Link": 77 | let link = document.createElement("a"); 78 | link.innerText = elm.c.destination.target; 79 | link.href = elm.c.destination.target; 80 | return link; 81 | 82 | case "LabeledLink": 83 | let labeled_link = document.createElement("a"); 84 | renderElements(labeled_link, elm.c.label); 85 | labeled_link.href = elm.c.destination.target; 86 | return labeled_link; 87 | 88 | case "EmailAddress": 89 | let email = document.createElement("a"); 90 | email.innerText = elm.c; 91 | email.href = "mailto:" + elm.c; 92 | return email; 93 | 94 | case "BotCommandSuggestion": 95 | let bcs = document.createElement("a"); 96 | bcs.innerText = elm.c; 97 | bcs.href = "#"; 98 | bcs.onclick = () => 99 | alert( 100 | `Clicked on a BotCommandSuggestion, this should replace the current draft and if the draft is not empty it should ask whether it should be replaced"${elm.c}"` 101 | ); 102 | return bcs; 103 | 104 | case "Linebreak": 105 | return document.createElement("br"); 106 | 107 | case "Text": 108 | let t = document.createTextNode(elm.c); 109 | return t; 110 | } 111 | console.error(`type ${elm.t} not known/implemented yet`, elm); 112 | let errElement = document.createElement("span"); 113 | errElement.style.color = "red"; 114 | errElement.innerText = JSON.stringify(elm); 115 | return errElement; 116 | } 117 | 118 | init().then(() => { 119 | console.log(parse_text); 120 | /** @type {HTMLTextAreaElement} */ 121 | const input = document.getElementById("input"); 122 | const output = document.getElementById("result"); 123 | const output_ast = document.getElementById("ast"); 124 | /** @type {HTMLSelectElement} */ 125 | const parse_mode = document.getElementById("parse_mode"); 126 | parse_mode.value = localStorage.getItem("lastMode") || "text"; 127 | 128 | let running = false; 129 | let should_run_again = false; 130 | 131 | const action = () => { 132 | if (running) { 133 | should_run_again = true; 134 | return; 135 | } 136 | running = true; 137 | 138 | /** @type {'text'|'desktop'|'markdown'} */ 139 | //@ts-ignore 140 | const mode = parse_mode.value; 141 | 142 | /** @type {ParsedElement[]} */ 143 | let parsed = []; 144 | 145 | switch (mode) { 146 | case "desktop": 147 | parsed = parse_desktop_set(input.value); 148 | break; 149 | case "markdown": 150 | parsed = parse_text(input.value, true); 151 | break; 152 | case "text": 153 | default: 154 | parsed = parse_text(input.value, false); 155 | break; 156 | } 157 | 158 | // console.log(parsed); 159 | 160 | output.innerText = ""; 161 | output_ast.innerText = JSON.stringify(parsed, null, 4); 162 | 163 | renderElements(output, parsed); 164 | running = false; 165 | if (should_run_again) { 166 | should_run_again = false; 167 | action(); 168 | } 169 | }; 170 | action(); 171 | 172 | input.onkeyup = action; 173 | parse_mode.onchange = () => { 174 | localStorage.setItem("lastMode", parse_mode.value); 175 | action(); 176 | }; 177 | 178 | // emoji helpers 179 | /** @type {HTMLInputElement} */ 180 | const emoji_input = document.getElementById("emoji-test"); 181 | const emoji_out_first = document.getElementById("emoji-test-first"); 182 | const emoji_out_count = document.getElementById("emoji-test-count"); 183 | const emoji_update = () => { 184 | const text = emoji_input.value; 185 | emoji_out_first.innerText = String(get_first_emoji(text)); 186 | emoji_out_count.innerText = String( 187 | count_emojis_if_only_contains_emoji(text) 188 | ); 189 | setTimeout(emoji_update, 1); 190 | }; 191 | emoji_input.onchange = emoji_input.onkeydown = () => 192 | setTimeout(emoji_update, 1); 193 | 194 | // punycode 195 | /** @type {HTMLInputElement} */ 196 | const punycode_input = document.getElementById("punycode-test"); 197 | const punycode_is_puny = document.getElementById("punycode-test-is_puny"); 198 | const punycode_puny = document.getElementById("punycode-test-puny"); 199 | const punycode_uni = document.getElementById("punycode-test-uni"); 200 | const punycode_update = () => { 201 | const input = punycode_input.value; 202 | punycode_is_puny.innerText = String(is_puny(input)); 203 | punycode_puny.innerText = punycode_encode_host(input); 204 | punycode_uni.innerText = punycode_decode_host(input); 205 | }; 206 | punycode_input.onchange = punycode_input.onkeydown = () => 207 | setTimeout(punycode_update, 1); 208 | }); 209 | -------------------------------------------------------------------------------- /message_parser_wasm/src/lib.rs: -------------------------------------------------------------------------------- 1 | mod utils; 2 | 3 | use wasm_bindgen::prelude::*; 4 | 5 | // When the `wee_alloc` feature is enabled, use `wee_alloc` as the global 6 | // allocator. 7 | #[cfg(feature = "wee_alloc")] 8 | #[global_allocator] 9 | static ALLOC: wee_alloc::WeeAlloc = wee_alloc::WeeAlloc::INIT; 10 | 11 | // #[wasm_bindgen] 12 | // extern "C" { 13 | // fn alert(s: &str); 14 | // } 15 | 16 | /// parses text to json AST 17 | #[wasm_bindgen] 18 | pub fn parse_text(s: &str, enable_markdown: bool) -> JsValue { 19 | let ast = match enable_markdown { 20 | true => deltachat_message_parser::parser::parse_markdown_text(s), 21 | false => deltachat_message_parser::parser::parse_only_text(s), 22 | }; 23 | serde_wasm_bindgen::to_value(&ast).expect("Element converts to JsValue") 24 | } 25 | 26 | /// parses text to json AST (text elements and labeled links, to replicate current desktop implementation) 27 | #[wasm_bindgen] 28 | pub fn parse_desktop_set(s: &str) -> JsValue { 29 | serde_wasm_bindgen::to_value(&deltachat_message_parser::parser::parse_desktop_set(s)) 30 | .expect("Element converts to JsValue") 31 | } 32 | 33 | #[wasm_bindgen(typescript_custom_section)] 34 | const TS_APPEND_CONTENT: &'static str = r#" 35 | export type PunycodeWarning = { 36 | original_hostname: string; 37 | ascii_hostname: string; 38 | punycode_encoded_url: string; 39 | }; 40 | export type LinkDestination = { 41 | target: string; 42 | hostname: null | string; 43 | punycode: null | PunycodeWarning; 44 | scheme: null | string; 45 | }; 46 | export type ParsedElement = 47 | | { t: "Text"; c: string } 48 | | { t: "Tag"; c: string } 49 | | { t: "Linebreak" } 50 | | { t: "Bold"; c: ParsedElement[] } 51 | | { t: "Italics"; c: ParsedElement[] } 52 | | { t: "StrikeThrough"; c: ParsedElement[] } 53 | | { t: "InlineCode"; c: { content: string } } 54 | | { t: "CodeBlock"; c: { language: null | string; content: string } } 55 | | { t: "EmailAddress"; c: string } 56 | | { t: "BotCommandSuggestion"; c: string } 57 | | { t: "Link"; c: { destination: LinkDestination } } 58 | | { 59 | t: "LabeledLink"; 60 | c: { label: ParsedElement[]; destination: LinkDestination }; 61 | }; 62 | "#; 63 | 64 | /// returns first emoji from text if text begins with an emoji 65 | #[wasm_bindgen] 66 | pub fn get_first_emoji(input: &str) -> Option { 67 | deltachat_message_parser::parser::is_emoji::get_first_emoji(input).map(|s| s.to_owned()) 68 | } 69 | 70 | /// If string contains only emojis count the emojis otherwise retuns null 71 | #[wasm_bindgen] 72 | pub fn count_emojis_if_only_contains_emoji(input: &str) -> Option { 73 | deltachat_message_parser::parser::is_emoji::count_emojis_if_only_contains_emoji(input) 74 | } 75 | 76 | /// encode a host to punycode encoded string 77 | #[wasm_bindgen] 78 | pub fn punycode_encode_host(host: &str) -> String { 79 | deltachat_message_parser::parser::punycode_encode_host(host) 80 | } 81 | 82 | /// Returns host as decoded unicode string 83 | #[wasm_bindgen] 84 | pub fn punycode_decode_host(host: &str) -> String { 85 | deltachat_message_parser::parser::punycode_decode_host(host) 86 | } 87 | 88 | /// Returns true if host string contains non ASCII characters 89 | #[wasm_bindgen] 90 | pub fn is_puny(host: &str) -> bool { 91 | deltachat_message_parser::parser::is_puny(host) 92 | } 93 | -------------------------------------------------------------------------------- /message_parser_wasm/src/manual_typings.ts: -------------------------------------------------------------------------------- 1 | // needs to be put into TS_APPEND_CONTENT manually, didn't find an easy way to include this file directly 2 | 3 | export type PunycodeWarning = { 4 | original_hostname: string; 5 | ascii_hostname: string; 6 | punycode_encoded_url: string; 7 | }; 8 | export type LinkDestination = { 9 | target: string; 10 | hostname: null | string; 11 | punycode: null | PunycodeWarning; 12 | scheme: null | string; 13 | }; 14 | export type ParsedElement = 15 | | { t: "Text"; c: string } 16 | | { t: "Tag"; c: string } 17 | | { t: "Linebreak" } 18 | | { t: "Bold"; c: ParsedElement[] } 19 | | { t: "Italics"; c: ParsedElement[] } 20 | | { t: "StrikeThrough"; c: ParsedElement[] } 21 | | { t: "InlineCode"; c: { content: string } } 22 | | { t: "CodeBlock"; c: { language: null | string; content: string } } 23 | | { t: "EmailAddress"; c: string } 24 | | { t: "BotCommandSuggestion"; c: string } 25 | | { t: "Link"; c: { destination: LinkDestination } } 26 | | { 27 | t: "LabeledLink"; 28 | c: { label: ParsedElement[]; destination: LinkDestination }; 29 | }; 30 | -------------------------------------------------------------------------------- /message_parser_wasm/src/utils.rs: -------------------------------------------------------------------------------- 1 | #[allow(dead_code)] 2 | pub fn set_panic_hook() { 3 | // When the `console_error_panic_hook` feature is enabled, we can call the 4 | // `set_panic_hook` function at least once during initialization, and then 5 | // we will get better error messages if our code ever panics. 6 | // 7 | // For more details see 8 | // https://github.com/rustwasm/console_error_panic_hook#readme 9 | #[cfg(feature = "console_error_panic_hook")] 10 | console_error_panic_hook::set_once(); 11 | } 12 | -------------------------------------------------------------------------------- /message_parser_wasm/tests/web.rs: -------------------------------------------------------------------------------- 1 | //! Test suite for the Web and headless browsers. 2 | 3 | #![cfg(target_arch = "wasm32")] 4 | 5 | extern crate wasm_bindgen_test; 6 | use message_parser_wasm::*; 7 | use serde_json::json; 8 | use wasm_bindgen::prelude::*; 9 | use wasm_bindgen_test::*; 10 | 11 | wasm_bindgen_test_configure!(run_in_browser); 12 | 13 | use wasm_bindgen::JsValue; 14 | 15 | #[wasm_bindgen] 16 | extern "C" { 17 | #[wasm_bindgen(js_namespace = JSON)] 18 | fn stringify(s: &JsValue) -> JsValue; 19 | } 20 | 21 | #[wasm_bindgen_test] 22 | fn test_parse() { 23 | assert_eq!( 24 | stringify(&parse_text("**`Block`**", true)), 25 | JsValue::from_str(r#"[{"t":"Bold","c":[{"t":"InlineCode","c":{"content":"Block"}}]}]"#) 26 | ); // this test needs somekind of deep equal because the order of the properties is not fixed 27 | } 28 | -------------------------------------------------------------------------------- /rust-toolchain: -------------------------------------------------------------------------------- 1 | 1.77.2 2 | -------------------------------------------------------------------------------- /scripts/extract_hashtag_content_ranges.py: -------------------------------------------------------------------------------- 1 | # This script is written by Farooq Karimi Zadeh to 2 | # extract hashtag content characters per UAX31. 3 | # A hashtag content character is either from XID_Continue 4 | # also known as XIDC, Emoji character or Extended_Pictographics 5 | # also known as ExtPict. Additionally, '+', '-' and '_' count, too. 6 | 7 | from xml.etree import ElementTree 8 | from dataclasses import dataclass 9 | from pprint import pprint 10 | 11 | @dataclass 12 | class Range: 13 | start: int 14 | end: int 15 | 16 | def __repr__(self) -> str: 17 | start = hex(self.start) 18 | end = hex(self.end) 19 | return f"{start}..={end}," 20 | 21 | def predicate(c) -> bool: 22 | if c.attrib.get("XIDC") == "Y": 23 | return True 24 | if c.attrib.get("Emoji") == "Y": 25 | return True 26 | if c.attrib.get("ExtPict") == "Y": 27 | return True 28 | return False 29 | 30 | if __name__ == "__main__": 31 | data = "" 32 | with open("ucd.all.flat.xml") as fp: 33 | data = fp.read() 34 | 35 | tree = ElementTree.fromstring(data) 36 | repertoire = list(tree.iter())[2] 37 | accepted_chars = map(lambda c: int(c.get("cp", "0"), 16), filter(predicate, repertoire.iter())) 38 | accepted_chars = sorted(accepted_chars) 39 | ranges: list[Range] = list() 40 | 41 | r: Range | None = None 42 | for c in accepted_chars: 43 | if r is None: 44 | r = Range(c, c) 45 | elif c == (r.end + 1): 46 | r.end = c 47 | elif c <= r.end: 48 | continue 49 | else: 50 | ranges.append(r) 51 | r = Range(c, c) 52 | 53 | if r: 54 | ranges.append(r) 55 | pprint(ranges) 56 | -------------------------------------------------------------------------------- /spec.md: -------------------------------------------------------------------------------- 1 | # Message formatting / parsing of interactive elements and Specification of the format 2 | 3 | > This list is for every formating thing that goes beyond plain-text. 4 | 5 | ## Modes of the parser 6 | 7 | - Text only 8 | - [Email addresses: `hello@delta.chat`](#email-addresses) 9 | - [Links: `https://delta.chat` and `mailto:hello@delta.chat`](#links) 10 | - [Bot `/commands`](#bot-commands) 11 | - [Hashtags: `#tag`](#hashtag) 12 | - Desktop (made for what desktop now supports, so we can use the wasm version of the message parser as drop-in replacement) 13 | - All from Text (see above) 14 | - [Delimited Email addresses: ``](#delimited-email-addresses) 15 | - [Delimited Links: ``](#delimited-links) 16 | - [Labeled Links: `[Name](url)`](#labled-links) 17 | - (everything) Markdown 18 | - [_italics_: `*italics*`](#italics) 19 | - [**bold**: `**bold**`](#bold) 20 | - [~~strikethrough~~: `~~strikethrough~~`](#strikethrough) 21 | - [`inline-code`: `` `inline-code` ``](#inline-code) 22 | - [Code Block: ` ``` fence code block ``` `](#code-block) 23 | - [Delimited Email addresses: ``](#delimited-email-addresses) 24 | - [Delimited Links: ``](#delimited-links) 25 | - [Labeled Links: `[Name](url)`](#labled-links) 26 | 27 | ## Text Enhancements 28 | 29 | Text elements that are displayed as is with no change to the content, just enhanced (made clickable) if necessary. 30 | 31 | 32 | 33 | ### `hello@delta.chat` - Email addresses 34 | 35 | Make email addresses clickable, opens the chat with that contact and creates it if it does not already exist. 36 | 37 | 38 | 39 | ### `https://delta.chat` and `mailto:example@example.com` - Links 40 | 41 | Make URLs clickable. 42 | 43 | - detect all valid hyperlink URLs that have the `://` (protocol://host). 44 | - according to [RFC3987](https://www.rfc-editor.org/rfc/rfc3987) and [RFC3988](https://www.rfc-editor.org/rfc/rfc3988) 45 | - In addition to the spec, explicitly allow multiple `#` characters in the ifragment, leaving it up to the destination to handle correctly. 46 | 47 | - other links like `mailto:` (note there is just a single `:`, no `://`) will get separate parsing that includes a whitelisted protocol name, otherwise there will likely be unexpected behavior if user types `hello:world` - will be recognized as link. 48 | 49 | - allow simple links without protocol scheme so long as they match the original world wide TLDs or `chat`. 50 | - see [RFC1591](https://www.rfc-editor.org/rfc/rfc1591) for world wide domains 51 | 52 | - `.`,`,`,`;`,`:` should not be parsed as an ending char of an inline-link(this rule is only for standalone/inline links) 53 | 54 | #### Linkified schemes: 55 | 56 | - all Common Internet Scheme links (containing `//` after scheme), 57 | - `mailto:`, `news:`, `feed:` 58 | - `tel:`, `sms:`, `geo:`, `maps:` 59 | - `bitcoin:`, `bitcoincash:`, `eth:`, `ethereum:` 60 | - `magnet:` 61 | 62 | ##### `mailto:email@address.example.com` 63 | 64 | Make mailto links clickable with all parameters: `?subject=Sample%20Subject&body=Sample%20Body` 65 | Should open in delta chat directly. 66 | 67 | ##### Custom Deltachat URI Scheme 68 | 69 | see https://support.delta.chat/t/custom-deltachat-url-scheme/346 70 | Should open in deltachat directly. 71 | 72 | 73 | 74 | ### Bot `/commands` 75 | 76 | On click, the command gets prefilled as the draft, so it can be easily send. 77 | Also if the draft is not empty it should ask before replacing it. 78 | 79 | ```regex 80 | /(?<=^|\\s)//[a-zA-Z][a-zA-Z@\\d_.-]{0,254}/ 81 | ``` 82 | 83 | 84 | 85 | ### `#tag` 86 | 87 | `/#[\w]+/i` 88 | 89 | > later we want something like `/#[^ \n\r\t#]+/` (`#` then everything (besides `#`) until space/line break/tab) to also allow for chars from other locales and emojis, see https://github.com/deltachat/message-parser/issues/8 for more info 90 | 91 | Basically a clickable search shortcut. On click, it opens the message search prefilled with that tag. 92 | 93 | Inspired by twitters and telegrams #hashtag functionality. 94 | 95 | ### other / internal 96 | 97 | - Text (what remains if nothing else could be detected) 98 | - line breaks 99 | 100 | ## Markdown subset 101 | 102 | The subset of Markdown that Deltachat is going to support, this contains everything that needs to be displayed differently, not only made clickable. 103 | 104 | 105 | 106 | ### `*italics*` and `_italics_` 107 | 108 | No whitespace as first nor as end char: 109 | correct: 110 | 111 | ``` 112 | *italics* test 113 | *italics test* 114 | ``` 115 | 116 | wrong: 117 | 118 | ``` 119 | * italics* test 120 | ``` 121 | 122 | 123 | 124 | ### `**bold**` and `__bold__` 125 | 126 | No whitespace as first nor as end char: see italics examples. 127 | 128 | 129 | 130 | ### `~~strikethrough~~` 131 | 132 | No whitespace as first nor as end char: see italics examples. 133 | 134 | 135 | 136 | ### `` `inline-code` `` 137 | 138 | Useful to send non Markdown text in your message, like source code snippets. 139 | Should get rendered in a monospace font and with a different background. 140 | In contrast to bold, italics and strike through the content of inline-code can contain spaces at beginning and ending. 141 | 142 | 143 | 144 | ### ` ``` fence code block ``` ` 145 | 146 | ``` 147 | Similar to `inline-code` but not inline, and it may support code highlighting. 148 | ``` 149 | 150 | ` ```[lang?] [content]``` ` 151 | A bit modified from the common syntax to allow one-liners. 152 | Also get displayed with a monospace font (a side effect of this is that it allows to display small ASCII art). 153 | The code **highlighting** is **optional** as implementation (time)cost 154 | may not be worth the small gain. 155 | The `language` definition should be parsed separately and omitted in this case. 156 | 157 | If no language is set in the single line variant, the content must begin with a space: 158 | WRONG: ` ```hello world``` ` (because hello will be taken as language) 159 | RIGHT: ` ``` hello world``` ` 160 | 161 | see https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet#code-and-syntax-highlighting 162 | 163 | Parsing will remove trailing whitespaces and linebreaks at the end of the codeblock content. 164 | 165 | 166 | 167 | ### `` - Delimited Email addresses 168 | 169 | same format as normal email addresses enclosed in `<>`. 170 | 171 | 172 | 173 | ### `` - Delimited Link 174 | 175 | same format as normal Links enclosed in `<>`. 176 | 177 | URL parsing allows all valid URLs, no restrictions on schemes, no whitelist is needed, because the format already specifies that it is a link. 178 | 179 | 180 | 181 | ### Labeled Links: `[Name](url)` links 182 | 183 | When implementing this, make sure to show the user the hidden URL in a confirmation dialog to make scamming harder. 184 | Also show the URL as encode puny code to make puny code attacks useless. 185 | Optionally, a client can implement a system to trust a domain (a "don't ask again for links on this domain" checkbox in the confirmation dialog) 186 | 187 | URL parsing allows all valid URLs, no restrictions on schemes, no whitelist is needed, because the format already specifies that it is a link. 188 | 189 | The label can contain basic markdown elements (bold, italics), but no "complex" linkified elements such as hashtags, links and email addresses. 190 | 191 | - parsers that run for a label: 192 | - (desktop set): none 193 | - (markdown set): bold, italics, underline, code-inline 194 | - parsers that do not run for a label (just returned as part of Text element): 195 | - hashtag, email, link, labeled link, delimited email & link, codeblock, mentions (basically everything clickable) 196 | 197 | ## Ideas For The Future: 198 | 199 | ### `:emoji:` 200 | 201 | - ':' + [A-z0-9_-] + ':' ? 202 | - could also be used for custom DC emojis in the future 203 | 204 | ### Mentions `@username` 205 | 206 | Clickable. (could get replaced with a user hash/email/ID on send/on receive so that it's still valid on name change.) 207 | 208 | On sending/receiving, this is transformed into an internal representation: 209 | 210 | Implementation idea: 211 | 212 | 1. user types @Displayname and at best gets autocompletion while typing the URL 213 | 2. on sending, the username is converted to the transmission format (special format that contains the email address as ID) 214 | 3. on receiving/storing the message inside the database, this format is converted again to contain the local contact ID to allow for future email address migration/rotation. 215 | (4.) on forwarding/sharing as chat history, the ID representation needs to be converted from the contact ID format to the transmission format again 216 | 217 | see discords mention code for reference/inspiration https://blog.discordapp.com/how-discord-renders-rich-messages-on-the-android-app-67b0e5d56fbe 218 | 219 | ### `$[inline TeX]$` and `$$[Tex displayed in block(new line)]$$` 220 | 221 | For sharing math/physics equations in LaTeX format. 222 | see https://support.delta.chat/t/latex-code-in-deltachat/558 223 | 224 | ## Things that will NOT be supported: 225 | 226 | - Inline HTML 227 | - underline - can be confused with links 228 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![deny( 2 | //panic prevention lints 3 | clippy::indexing_slicing, 4 | clippy::assertions_on_constants, 5 | clippy::await_holding_refcell_ref, 6 | clippy::diverging_sub_expression, 7 | clippy::expect_used, 8 | clippy::fallible_impl_from, 9 | clippy::get_last_with_len, 10 | clippy::get_unwrap, 11 | clippy::get_unwrap, 12 | clippy::arithmetic_side_effects, 13 | clippy::match_on_vec_items, 14 | clippy::match_wild_err_arm, 15 | clippy::missing_panics_doc, 16 | clippy::panic, 17 | clippy::panic_in_result_fn, 18 | clippy::unwrap_in_result, 19 | clippy::unwrap_used, 20 | clippy::string_slice, 21 | clippy::empty_loop, 22 | clippy::correctness, 23 | clippy::needless_borrow, 24 | clippy::cast_lossless, 25 | clippy::obfuscated_if_else, 26 | clippy::index_refutable_slice, 27 | clippy::panic_in_result_fn, 28 | clippy::unwrap_in_result, 29 | clippy::exit, 30 | clippy::todo, 31 | clippy::expect_used, 32 | clippy::unimplemented, 33 | clippy::manual_strip, 34 | )] 35 | 36 | extern crate nom; 37 | pub mod parser; 38 | 39 | #[macro_use] 40 | extern crate serde_derive; 41 | -------------------------------------------------------------------------------- /src/main.rs: -------------------------------------------------------------------------------- 1 | use std::io::{self, Read, Write}; 2 | 3 | use parser::parse_markdown_text; 4 | #[allow(dead_code)] 5 | mod parser; 6 | extern crate nom; 7 | #[macro_use] 8 | extern crate serde_derive; 9 | 10 | fn main() -> io::Result<()> { 11 | let mut buffer = String::new(); 12 | io::stdin().read_to_string(&mut buffer)?; 13 | 14 | //println!("input: {:?}", buffer); 15 | 16 | let output = parse_markdown_text(&buffer); 17 | 18 | io::stdout().write_all(format!("output: {:?}", output).as_bytes())?; 19 | 20 | //println!("output: {:?}", output); 21 | Ok(()) 22 | } 23 | -------------------------------------------------------------------------------- /src/parser/is_emoji.rs: -------------------------------------------------------------------------------- 1 | // thanks to https://medium.com/reactnative/emojis-in-javascript-f693d0eb79fb for figuring the main details and ranges. 2 | 3 | use nom::{ 4 | branch::alt, 5 | bytes::complete::tag, 6 | character::complete::{self, satisfy}, 7 | combinator::{opt, recognize}, 8 | multi::{many1, many_m_n}, 9 | sequence::tuple, 10 | IResult, 11 | }; 12 | 13 | fn variant_selector(c: char) -> bool { 14 | matches!(c, '\u{fe00}'..='\u{fe0f}') 15 | } 16 | 17 | fn zero_width_joiner(c: char) -> bool { 18 | c == '\u{200d}' 19 | } 20 | 21 | fn single_char_emoji_core(c: char) -> bool { 22 | matches!(c, 23 | // Dingbats 24 | | '\u{2700}'..='\u{27bf}' 25 | // miscSymbols 26 | | '\u{2600}'..='\u{26ff}' 27 | // cjkLettersAndMonths 28 | | '\u{3299}' | '\u{3297}' 29 | // cjkSymbolsAndPunctuation 30 | | '\u{303d}' | '\u{3030}' 31 | // enclosedAlphanumerics 32 | | '\u{24c2}' 33 | // generalPunctuation 34 | | '\u{203c}' | '\u{2049}' 35 | // geometricShapes 36 | | '\u{25aa}'..='\u{25ab}' | '\u{25b6}' | '\u{25c0}' | '\u{25fb}'..='\u{25fe}' 37 | // latin1Supplement 38 | | '\u{00a9}' | '\u{00ae}' 39 | // letterLikeSymbols 40 | | '\u{2122}' | '\u{2139}' 41 | // miscSymbolsAndArrows 42 | | '\u{2b05}' | '\u{2b06}' | '\u{2b07}' | '\u{2b1b}' | '\u{2b1c}' | '\u{2b50}' | '\u{2b55}' 43 | // miscTechnical 44 | | '\u{231a}' | '\u{231b}' | '\u{2328}' | '\u{23cf}' | '\u{23e9}'..='\u{23f3}' | '\u{23f8}'..='\u{23fa}' 45 | // supplementalArrows 46 | | '\u{2934}' | '\u{2935}' 47 | // arrows 48 | | '\u{2190}'..='\u{2199}' 49 | // Unicode Block “Enclosed Alphanumeric Supplement” 50 | | '🅰' | '🅱' | '🅾'| '🅿' | '🆎' | '🆑'..='🆚' 51 | // Unicode Block “Enclosed Ideographic Supplement” https://www.compart.com/en/unicode/block/U+1F200 52 | | '🈁' | '🈚'| '🈯' | '🈲'..='🈶' | '🈸'..='🈺' | '🉐' | '🉑' 53 | // Unicode Block “Miscellaneous Symbols and Pictographs” https://www.compart.com/en/unicode/block/U+1F300 54 | | '🌀'..='🌡' | '🌤'..='🎓' | '🎖'..='🎗'| '🎙'..='🎛' | '🎞'..='🏰' | '🏳'..='🏵' | '🏷'..='📽' | '📿'..='🔽' 55 | | '🕉'..='🕎' | '🕐'..='🕧'| '🕯' | '🕰' | '🕳'..='🕺' | '🖇' | '🖊'..='🖍' | '🖐' | '🖕' | '🖖' | '🖤' | '🖥' 56 | | '🖨' | '🖱' | '🖲' | '🖼' | '🗂'..='🗄' | '🗑'..='🗓' | '🗜' | '🗞' | '🗡' |'🗣' | '🗨' | '🗯' | '🗳' | '🗺'..='🗿' 57 | // Unicode Block “Emoticons” https://www.compart.com/en/unicode/block/U+1F600 58 | | '😀'..='🙏' 59 | // Unicode Block “Transport and Map Symbols” https://www.compart.com/en/unicode/block/U+1F680 60 | | '🚀'..='🛅' | '🛋'..='🛒' | '🛕'..='🛥' | '🛩' | '🛫'..='🛰' | '🛳'..='🛼' 61 | // Unicode Block “Geometric Shapes Extended” https://www.compart.com/en/unicode/block/U+1F780 62 | | '🟠'..='🟫' 63 | // Unicode Block “Supplemental Symbols and Pictographs” https://www.compart.com/en/unicode/block/U+1F900 64 | | '🤌'..='🤺' | '🤼'..='🥅' | '🥇'..='🧿' 65 | // Unicode Block “Symbols and Pictographs Extended-A” https://www.compart.com/en/unicode/block/U+1FA70 66 | | '🩰'..='🫸' 67 | // other 68 | | '🗝' | '🟰' 69 | ) 70 | } 71 | 72 | fn emoji_core(input: &str) -> IResult<&str, &str> { 73 | alt(( 74 | // region flags 75 | recognize(tuple(( 76 | complete::char('🏴'), 77 | many1(satisfy(|c| matches!(c, '\u{e0061}'..='\u{e007a}'))), 78 | complete::char('\u{e007f}'), 79 | ))), 80 | // Regional -> Flags 81 | recognize(tuple(( 82 | satisfy(|c| matches!(c, '🇦'..='🇿')), 83 | satisfy(|c| matches!(c, '🇦'..='🇿')), 84 | ))), 85 | // standard emoji chars 86 | recognize(satisfy(single_char_emoji_core)), 87 | // keycap 88 | recognize(tuple(( 89 | satisfy(|c| ('\u{0023}'..='\u{0039}').contains(&c)), 90 | opt(complete::char('\u{fe0f}')), 91 | complete::char('\u{20e3}'), 92 | ))), 93 | // mahjongTile 94 | tag("🀄"), 95 | // playingCard 96 | tag("🃏"), 97 | // other 98 | tag("🈂️"), 99 | tag("🈷️"), 100 | tag("↩️"), 101 | tag("↪️"), 102 | ))(input) 103 | } 104 | 105 | fn emoji_modifier(c: char) -> bool { 106 | matches!(c, '🏻' | '🏼' | '🏽' | '🏾' | '🏿') 107 | } 108 | 109 | const USIZE_MAX_COMPOSITE_LEN: usize = 10; 110 | 111 | macro_rules! emoji_with_variant { 112 | () => { 113 | tuple(( 114 | emoji_core, 115 | opt(satisfy(variant_selector)), 116 | opt(satisfy(emoji_modifier)), 117 | )) 118 | }; 119 | } 120 | 121 | // nom parser that eats one emoji 122 | pub fn emoji(input: &str) -> IResult<&str, &str> { 123 | recognize(tuple(( 124 | emoji_with_variant!(), 125 | many_m_n( 126 | 0, 127 | USIZE_MAX_COMPOSITE_LEN, 128 | tuple((satisfy(zero_width_joiner), emoji_with_variant!())), 129 | ), 130 | )))(input) 131 | } 132 | 133 | /// returns first emoji from text if text begins with an emoji 134 | pub fn get_first_emoji(text: &str) -> Option<&str> { 135 | if let Ok((_, emoji)) = emoji(text) { 136 | Some(emoji) 137 | } else { 138 | None 139 | } 140 | } 141 | 142 | /// If string contains only emojis count the emojis otherwise retuns None 143 | pub fn count_emojis_if_only_contains_emoji(input: &str) -> Option { 144 | let mut remainder = input; 145 | let mut count: u32 = 0; 146 | 147 | while let Ok((new_remainder, _)) = emoji(remainder) { 148 | remainder = new_remainder; 149 | count = count.saturating_add(1); 150 | } 151 | 152 | if !remainder.is_empty() { 153 | // string contains not only emojis 154 | return None; 155 | } 156 | 157 | if count == 0 { 158 | None 159 | } else { 160 | Some(count) 161 | } 162 | } 163 | 164 | #[cfg(test)] 165 | mod emoji_test { 166 | mod emoji_char { 167 | use crate::parser::is_emoji::emoji; 168 | 169 | #[test] 170 | fn some_emojis() { 171 | assert!(emoji("🔥").is_ok()); 172 | } 173 | 174 | #[test] 175 | fn not_emoji() { 176 | // println!("{:?}", emoji("A")); 177 | assert!(emoji("A").is_err()); 178 | } 179 | 180 | #[test] 181 | fn keycap() { 182 | // keycap emojis 183 | assert!(emoji("#️⃣").is_ok()); 184 | } 185 | 186 | #[test] 187 | fn flag() { 188 | // flag emojis 189 | assert!(emoji("🇦🇨").is_ok()); 190 | } 191 | 192 | #[test] 193 | fn mahjong() { 194 | // mahjongTiles 195 | assert!(emoji("🀄").is_ok()); 196 | } 197 | 198 | #[test] 199 | fn playing_card() { 200 | // playingCard 201 | assert!(emoji("🃏").is_ok()); 202 | } 203 | 204 | #[test] 205 | fn supplemental_arrows() { 206 | // supplementalArrows 207 | assert!(emoji("⤴").is_ok()); 208 | assert!(emoji("⤵").is_ok()); 209 | } 210 | 211 | #[test] 212 | fn test_variant_emoji() { 213 | assert!(emoji("🏋️‍♀️").is_ok()); 214 | assert!(emoji("🤹🏽").is_ok()); 215 | assert!(emoji("🛌🏿").is_ok()); 216 | } 217 | // composite with zero width joiner 218 | #[test] 219 | fn test_composite_emoji() { 220 | assert!(emoji("❤️‍🔥").is_ok()); 221 | assert!(emoji("🐕‍🦺").is_ok()); 222 | assert!(emoji("👩‍👩‍👧").is_ok()); 223 | assert!(emoji("🧑🏿‍🤝‍🧑🏿").is_ok()); 224 | assert!(emoji("👩🏽‍❤️‍👨🏽").is_ok()); 225 | } 226 | } 227 | 228 | mod exported_methods { 229 | use crate::parser::is_emoji::{count_emojis_if_only_contains_emoji, get_first_emoji}; 230 | 231 | #[test] 232 | fn test_get_first_emoji() { 233 | assert_eq!(get_first_emoji("#️⃣ Hashtag"), Some("#️⃣")); 234 | assert_eq!(get_first_emoji("#️⃣Hashtag"), Some("#️⃣")); 235 | assert_eq!(get_first_emoji("#️⃣🃏Hashtag"), Some("#️⃣")); 236 | assert_eq!(get_first_emoji("Hashtag #️⃣"), None); 237 | assert_eq!(get_first_emoji("'#️⃣"), None); 238 | assert_eq!(get_first_emoji("❤️‍🔥Hashtag"), Some("❤️‍🔥")); 239 | assert_eq!(get_first_emoji("👩🏽‍❤️‍👨🏽Hashtag"), Some("👩🏽‍❤️‍👨🏽")); 240 | assert_eq!(get_first_emoji("🇪🇸🚧"), Some("🇪🇸")); 241 | } 242 | 243 | #[test] 244 | fn test_string_contains_only_emojis_and_count() { 245 | assert_eq!(count_emojis_if_only_contains_emoji("#️⃣"), Some(1)); 246 | assert_eq!( 247 | count_emojis_if_only_contains_emoji("👩🏽‍❤️‍👨🏽Hashtag"), 248 | None 249 | ); 250 | assert_eq!(count_emojis_if_only_contains_emoji("❤️‍🔥"), Some(1)); 251 | assert_eq!(count_emojis_if_only_contains_emoji("👩🏽‍❤️‍👨🏽"), Some(1)); 252 | assert_eq!( 253 | count_emojis_if_only_contains_emoji("👩🏽‍❤️‍👨🏽👩🏽‍❤️‍👨🏽"), 254 | Some(2) 255 | ); 256 | assert_eq!( 257 | count_emojis_if_only_contains_emoji("👩🏽‍❤️‍👨🏽❤️‍🔥👩🏽‍❤️‍👨🏽"), 258 | Some(3) 259 | ); 260 | // hair color 261 | assert_eq!(count_emojis_if_only_contains_emoji("👨‍🦰"), Some(1)); 262 | assert_eq!(count_emojis_if_only_contains_emoji("👨‍🦳"), Some(1)); 263 | assert_eq!( 264 | count_emojis_if_only_contains_emoji("🇪🇸🚧🚧🚧🚧🚧🚧🚧"), 265 | Some(8) 266 | ); 267 | } 268 | } 269 | } 270 | -------------------------------------------------------------------------------- /src/parser/link_url/allowed_tlds/country_tlds.rs: -------------------------------------------------------------------------------- 1 | // extracted from first table on https://de.wikipedia.org/wiki/Liste_länderspezifischer_Top-Level-Domains 2 | pub const COUNTRY_TLDS: [&str; 254] = [ 3 | "ac", "ad", "ae", "af", "ag", "ai", "al", "am", "ao", "aq", "ar", "as", "at", "au", "aw", "ax", 4 | "az", "ba", "bb", "bd", "be", "bf", "bg", "bh", "bi", "bj", "bl", "bm", "bn", "bo", "bq", "br", 5 | "bs", "bt", "bv", "bw", "by", "bz", "ca", "cc", "cd", "cf", "cg", "ch", "ci", "ck", "cl", "cm", 6 | "cn", "co", "cr", "cu", "cv", "cw", "cx", "cy", "cz", "de", "dj", "dk", "dm", "do", "dz", "ec", 7 | "ee", "eg", "eh", "er", "es", "et", "eu", "fi", "fj", "fk", "fm", "fo", "fr", "ga", "gb", "gd", 8 | "ge", "gf", "gg", "gh", "gi", "gl", "gm", "gn", "gp", "gq", "gr", "gs", "gt", "gu", "gw", "gy", 9 | "hk", "hm", "hn", "hr", "ht", "hu", "id", "ie", "il", "im", "in", "io", "iq", "ir", "is", "it", 10 | "je", "jm", "jo", "jp", "ke", "kg", "kh", "ki", "km", "kn", "kp", "kr", "kw", "ky", "kz", "la", 11 | "lb", "lc", "li", "lk", "lr", "ls", "lt", "lu", "lv", "ly", "ma", "mc", "md", "me", "mf", "mg", 12 | "mh", "mk", "ml", "mm", "mn", "mo", "mp", "mq", "mr", "ms", "mt", "mu", "mv", "mw", "mx", "my", 13 | "mz", "na", "nc", "ne", "nf", "ng", "ni", "nl", "no", "np", "nr", "nu", "nz", "om", "pa", "pe", 14 | "pf", "pg", "ph", "pk", "pl", "pm", "pn", "pr", "ps", "pt", "pw", "py", "qa", "re", "ro", "rs", 15 | "ru", "rw", "sa", "sb", "sc", "sd", "se", "sg", "sh", "si", "sj", "sk", "sl", "sm", "sn", "so", 16 | "sr", "ss", "st", "su", "sv", "sx", "sy", "sz", "tc", "td", "tf", "tg", "th", "tj", "tk", "tl", 17 | "tm", "tn", "to", "tp", "tr", "tt", "tv", "tw", "tz", "ua", "ug", "uk", "us", "uy", "uz", "va", 18 | "vc", "ve", "vg", "vi", "vn", "vu", "wf", "ws", "ye", "yt", "za", "zm", "zr", "zw", 19 | ]; 20 | -------------------------------------------------------------------------------- /src/parser/link_url/allowed_tlds/mod.rs: -------------------------------------------------------------------------------- 1 | mod country_tlds; 2 | 3 | const ALLOWED_TOP_LEVEL_DOMAINS: &[&str] = &[ 4 | // originals from RFC920 + net 5 | "com", "org", "net", "edu", "gov", "mil", // for deltachat 6 | "chat", 7 | ]; 8 | 9 | pub fn check_if_tld_is_allowed(tld: &str) -> bool { 10 | if ALLOWED_TOP_LEVEL_DOMAINS.iter().any(|item| *item == tld) { 11 | true 12 | } else { 13 | country_tlds::COUNTRY_TLDS.binary_search(&tld).is_ok() 14 | } 15 | } 16 | 17 | #[cfg(test)] 18 | mod test { 19 | use crate::parser::link_url::allowed_tlds::check_if_tld_is_allowed; 20 | 21 | #[test] 22 | fn test_check_tld() { 23 | assert!(check_if_tld_is_allowed("chat")); 24 | assert!(check_if_tld_is_allowed("com")); 25 | 26 | assert!(check_if_tld_is_allowed("de")); 27 | assert!(check_if_tld_is_allowed("at")); 28 | assert!(check_if_tld_is_allowed("uk")); 29 | assert!(check_if_tld_is_allowed("fr")); 30 | } 31 | 32 | #[test] 33 | fn test_check_tld_not_allowed() { 34 | assert!(!check_if_tld_is_allowed("doesnotexist")); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/parser/link_url/ip/ip_literal.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | branch::alt, character::complete::char, combinator::recognize, sequence::tuple, IResult, 3 | }; 4 | 5 | use crate::parser::{ 6 | link_url::ip::{ipv6::ipv6, ipvfuture::ipvfuture}, 7 | parse_from_text::base_parsers::CustomError, 8 | }; 9 | 10 | pub fn ip_literal(input: &str) -> IResult<&str, &str, CustomError<&str>> { 11 | recognize(tuple((char('['), alt((ipv6, ipvfuture)), char(']'))))(input) 12 | } 13 | -------------------------------------------------------------------------------- /src/parser/link_url/ip/ipv4.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | character::complete::{char, u8}, 3 | combinator::recognize, 4 | sequence::tuple, 5 | IResult, 6 | }; 7 | 8 | use crate::parser::parse_from_text::base_parsers::CustomError; 9 | 10 | pub fn ipv4(input: &str) -> IResult<&str, &str, CustomError<&str>> { 11 | let (input, ipv4_) = 12 | recognize(tuple((u8, char('.'), u8, char('.'), u8, char('.'), u8)))(input)?; 13 | Ok((input, ipv4_)) 14 | } 15 | -------------------------------------------------------------------------------- /src/parser/link_url/ip/ipv6.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | branch::alt, 3 | bytes::complete::{tag, take_while_m_n}, 4 | character::complete::char, 5 | combinator::{opt, recognize}, 6 | multi::{count, many_m_n}, 7 | sequence::tuple, 8 | IResult, 9 | }; 10 | 11 | use crate::parser::{parse_from_text::base_parsers::CustomError, utils::is_hex_digit}; 12 | 13 | use super::ipv4::ipv4; 14 | 15 | fn h16(input: &str) -> IResult<&str, &str, CustomError<&str>> { 16 | take_while_m_n(1, 4, is_hex_digit)(input) 17 | } 18 | 19 | // consume or an ipv4 20 | fn ls32(input: &str) -> IResult<&str, &str, CustomError<&str>> { 21 | let result = recognize(tuple((h16, char(':'), h16)))(input); 22 | if result.is_err() { 23 | ipv4(input) 24 | } else { 25 | result 26 | } 27 | } 28 | 29 | fn h16_and_period(input: &str) -> IResult<&str, &str, CustomError<&str>> { 30 | recognize(tuple((h16, char(':'))))(input) 31 | } 32 | 33 | fn double_period(input: &str) -> IResult<&str, &str, CustomError<&str>> { 34 | tag("::")(input) 35 | } 36 | 37 | pub fn ipv6(input: &str) -> IResult<&str, &str, CustomError<&str>> { 38 | // an IPv6 is one of these: 39 | alt(( 40 | // <6 h16_and_period> 41 | recognize(tuple((count(h16_and_period, 6), ls32))), 42 | // :: <5 h16_and_period> 43 | recognize(tuple((double_period, many_m_n(5, 5, h16_and_period), ls32))), 44 | // [h16] :: <4 h16_and_period> 45 | recognize(tuple(( 46 | opt(h16), 47 | double_period, 48 | count(h16_and_period, 4), 49 | ls32, 50 | ))), 51 | // [h16_and_period] :: <3*h16_and_period> 52 | recognize(tuple(( 53 | opt(tuple((many_m_n(0, 1, h16_and_period),))), 54 | double_period, 55 | count(h16_and_period, 3), 56 | ls32, 57 | ))), 58 | // [<0 to 2 h16_and_period> ] :: <2*h16_and_period> 59 | recognize(tuple(( 60 | opt(tuple((many_m_n(0, 2, h16_and_period), h16))), 61 | double_period, 62 | count(h16_and_period, 2), 63 | ls32, 64 | ))), 65 | // [<0 to 3 h16_and_period>] :: 66 | recognize(tuple(( 67 | opt(tuple((many_m_n(0, 3, h16_and_period), h16))), 68 | double_period, 69 | ls32, 70 | ))), 71 | // [<0 to 4 h16_and_period>] :: 72 | recognize(tuple(( 73 | opt(tuple((many_m_n(0, 4, h16_and_period), h16))), 74 | double_period, 75 | ls32, 76 | ))), 77 | // [<0 to 5 h16_and_period>] :: 78 | recognize(tuple(( 79 | opt(tuple((many_m_n(0, 5, h16_and_period), h16))), 80 | double_period, 81 | h16, 82 | ))), 83 | // [<0 to 6 h16_and_period>] :: 84 | recognize(tuple(( 85 | opt(tuple((many_m_n(0, 6, h16_and_period), h16))), 86 | double_period, 87 | ))), 88 | ))(input) 89 | } 90 | -------------------------------------------------------------------------------- /src/parser/link_url/ip/ipvfuture.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | bytes::complete::take_while_m_n, character::complete::char, combinator::recognize, 3 | sequence::tuple, IResult, 4 | }; 5 | 6 | use crate::parser::{ 7 | parse_from_text::base_parsers::CustomError, 8 | utils::{is_hex_digit, is_sub_delim, is_unreserved}, 9 | }; 10 | 11 | fn is_ipvfuture_last(ch: char) -> bool { 12 | is_sub_delim(ch) || is_unreserved(ch) || ch == ':' 13 | } 14 | 15 | pub fn ipvfuture(input: &str) -> IResult<&str, &str, CustomError<&str>> { 16 | recognize(tuple(( 17 | char('v'), 18 | take_while_m_n(1, 1, is_hex_digit), 19 | char('.'), 20 | take_while_m_n(1, 1, is_ipvfuture_last), 21 | )))(input) 22 | } 23 | -------------------------------------------------------------------------------- /src/parser/link_url/ip/mod.rs: -------------------------------------------------------------------------------- 1 | pub(crate) mod ip_literal; 2 | pub(crate) mod ipv4; 3 | mod ipv6; 4 | mod ipvfuture; 5 | -------------------------------------------------------------------------------- /src/parser/link_url/mod.rs: -------------------------------------------------------------------------------- 1 | mod allowed_tlds; 2 | mod ip; 3 | mod parenthesis_counter; 4 | mod parse_link; 5 | pub(crate) mod punycode_warning; 6 | 7 | use nom::{ 8 | error::{ErrorKind, ParseError}, 9 | IResult, Slice, 10 | }; 11 | pub use punycode_warning::PunycodeWarning; 12 | 13 | use crate::parser::{link_url::parse_link::parse_link, parse_from_text::base_parsers::CustomError}; 14 | 15 | /* Parsing / Validation of URLs 16 | * 17 | * - hyperlinks (:// scheme) according to RFC3987 and RFC3988 18 | * - whitelisted scheme (: scheme) according to our own simple thing :) 19 | * 20 | * for hyperlinks it also checks whether the domain contains punycode 21 | * 22 | * There are two kinds of Urls 23 | * - Common Internet Scheme[1] 24 | * - Every other url (like mailto) 25 | * [1] RFC1738(Section 3.1), RFC3987, RFC3988 --Farooq 26 | */ 27 | 28 | #[derive(Debug, PartialEq, Eq, Serialize, Clone)] 29 | pub struct LinkDestination<'a> { 30 | pub target: &'a str, 31 | /// hostname if it was found 32 | pub hostname: Option<&'a str>, 33 | /// contains data for the punycode warning if punycode was detected 34 | /// (the host part contains non ascii unicode characters) 35 | pub punycode: Option, 36 | /// scheme 37 | pub scheme: Option<&'a str>, 38 | } 39 | 40 | impl LinkDestination<'_> { 41 | /// parse a link that is not in a delimited link or a labled link, just a part of normal text 42 | /// 43 | /// - for generic schemes (schemes without `://`) this uses a whitelist not reduce false positives 44 | /// - it also ignores the last punctuation sign if it is at the end of the link 45 | pub fn parse(input: &str) -> IResult<&str, LinkDestination, CustomError<&str>> { 46 | if let Ok((rest, link_destination)) = parse_link(input) { 47 | Ok((rest, link_destination)) 48 | } else { 49 | Err(nom::Err::Error(CustomError::InvalidLink)) 50 | } 51 | } 52 | 53 | // This is for parsing markdown labelled links. 54 | pub fn parse_labelled(input: &str) -> IResult<&str, LinkDestination, CustomError<&str>> { 55 | let (mut remaining, mut link) = Self::parse(input)?; 56 | if let Some(first) = remaining.chars().next() { 57 | if matches!(first, ';' | '.' | ',' | ':' | '!') { 58 | // ^ markdown labelled links can include one of these characters at the end 59 | // and it's therefore part of the link 60 | let point = link.target.len().saturating_add(1); 61 | link.target = input.slice(..point); 62 | remaining = input.slice(point..); 63 | } 64 | } 65 | Ok((remaining, link)) 66 | } 67 | } 68 | 69 | #[derive(Debug, PartialEq, Eq)] 70 | pub enum LinkParseError { 71 | Nom(I, ErrorKind), 72 | } 73 | 74 | impl ParseError for LinkParseError { 75 | fn from_error_kind(input: I, kind: ErrorKind) -> Self { 76 | LinkParseError::Nom(input, kind) 77 | } 78 | 79 | fn append(_: I, _: ErrorKind, other: Self) -> Self { 80 | other 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /src/parser/link_url/parenthesis_counter.rs: -------------------------------------------------------------------------------- 1 | use nom::Slice; 2 | 3 | macro_rules! adjust_balance { 4 | ($a: expr, $b: expr, $c: expr, $d: expr) => { 5 | // for opening ones 6 | { 7 | $a = $a.saturating_add(1); 8 | if $d.slice($c..).find($b).is_none() { 9 | return Some($c); 10 | } 11 | } 12 | }; 13 | ($a: expr, $b: expr) => { 14 | // for closing ones 15 | { 16 | if $a == 0 { 17 | return Some($b); 18 | } else { 19 | $a = $a.saturating_sub(1); 20 | } 21 | } 22 | }; 23 | } 24 | 25 | /// finds unbalanced closing parenthesesis and returns distance to it. 26 | /// unbalanced means it was closed but not opened before in the given string 27 | pub(super) fn count_chars_in_complete_parenthesis(input: &str) -> Option { 28 | let mut parenthes = 0usize; // () 29 | let mut curly_bracket = 0usize; // {} 30 | let mut bracket = 0usize; // [] 31 | let mut angle = 0usize; // <> 32 | 33 | for (i, ch) in input.chars().enumerate() { 34 | match ch { 35 | '(' => { 36 | adjust_balance!(parenthes, ')', i, input); 37 | } 38 | '{' => { 39 | adjust_balance!(curly_bracket, '}', i, input); 40 | } 41 | '[' => { 42 | adjust_balance!(bracket, ']', i, input); 43 | } 44 | '<' => { 45 | adjust_balance!(angle, '>', i, input); 46 | } 47 | ')' => { 48 | adjust_balance!(parenthes, i); 49 | } 50 | ']' => { 51 | adjust_balance!(bracket, i); 52 | } 53 | '}' => { 54 | adjust_balance!(curly_bracket, i); 55 | } 56 | '>' => { 57 | adjust_balance!(angle, i); 58 | } 59 | _ => continue, 60 | } 61 | } 62 | None 63 | } 64 | 65 | #[test] 66 | fn test_count_parenthesis() { 67 | assert_eq!(count_chars_in_complete_parenthesis("{}"), None); 68 | assert_eq!(count_chars_in_complete_parenthesis("{} test"), None); 69 | assert_eq!(count_chars_in_complete_parenthesis("(test) test"), None); 70 | assert_eq!(count_chars_in_complete_parenthesis("(test)) test"), Some(6)); 71 | } 72 | 73 | #[test] 74 | fn test_count_different_types_invalid() { 75 | assert_eq!(count_chars_in_complete_parenthesis("(({(})))"), None); 76 | } 77 | 78 | #[test] 79 | fn test_count_different_types_invalid2() { 80 | assert_eq!(count_chars_in_complete_parenthesis("}(({(})))"), Some(0)); 81 | } 82 | -------------------------------------------------------------------------------- /src/parser/link_url/parse_link.rs: -------------------------------------------------------------------------------- 1 | use std::ops::RangeInclusive; 2 | 3 | use nom::{ 4 | branch::alt, 5 | bytes::complete::{tag, take_while, take_while1, take_while_m_n}, 6 | character::complete::char, 7 | combinator::{opt, recognize}, 8 | multi::{many0, many1}, 9 | sequence::{pair, tuple}, 10 | IResult, Slice, 11 | }; 12 | 13 | use crate::parser::{ 14 | link_url::{ 15 | ip::{ip_literal::ip_literal, ipv4::ipv4}, 16 | LinkDestination, 17 | }, 18 | parse_from_text::base_parsers::CustomError, 19 | utils::{ 20 | is_alpha, is_digit, is_hex_digit, is_in_one_of_ranges, is_not_white_space, is_sub_delim, 21 | is_unreserved, 22 | }, 23 | }; 24 | 25 | use super::{ 26 | allowed_tlds::check_if_tld_is_allowed, 27 | parenthesis_counter::count_chars_in_complete_parenthesis, 28 | punycode_warning::get_puny_code_warning, 29 | }; 30 | 31 | /// determines which generic schemes (without '://') get linkifyed 32 | fn is_allowed_generic_scheme(scheme: &str) -> bool { 33 | matches!( 34 | scheme.to_ascii_lowercase().as_ref(), 35 | "mailto" 36 | | "news" 37 | | "feed" 38 | | "tel" 39 | | "sms" 40 | | "geo" 41 | | "maps" 42 | | "bitcoin" 43 | | "bitcoincash" 44 | | "eth" 45 | | "ethereum" 46 | | "magnet" 47 | ) 48 | } 49 | 50 | // These ranges have been extracted from RFC3987, Page 8. 51 | const UCSCHAR_RANGES: [RangeInclusive; 17] = [ 52 | 0xa0..=0xd7ff, 53 | 0xF900..=0xFDCF, 54 | 0xFDF0..=0xFFEF, 55 | 0x10000..=0x1FFFD, 56 | 0x20000..=0x2FFFD, 57 | 0x30000..=0x3FFFD, 58 | 0x40000..=0x4FFFD, 59 | 0x50000..=0x5FFFD, 60 | 0x60000..=0x6FFFD, 61 | 0x70000..=0x7FFFD, 62 | 0x80000..=0x8FFFD, 63 | 0x90000..=0x9FFFD, 64 | 0xA0000..=0xAFFFD, 65 | 0xB0000..=0xBFFFD, 66 | 0xC0000..=0xCFFFD, 67 | 0xD0000..=0xDFFFD, 68 | 0xE1000..=0xEFFFD, 69 | ]; 70 | 71 | fn is_ucschar(c: char) -> bool { 72 | is_in_one_of_ranges(c as u32, &UCSCHAR_RANGES[..]) 73 | } 74 | 75 | fn is_iunreserved(c: char) -> bool { 76 | is_unreserved(c) || is_ucschar(c) 77 | } 78 | 79 | // Here again, order is important. As URLs/IRIs have letters in them 80 | // most of the time and less digits or other characters. --Farooq 81 | fn is_scheme(c: char) -> bool { 82 | is_alpha(c) || is_digit(c) || is_other_scheme(c) 83 | } 84 | 85 | fn is_other_scheme(c: char) -> bool { 86 | matches!(c, '+' | '-' | '.') 87 | } 88 | 89 | /** 90 | * allowed chars in host names (except for pct encoded) 91 | */ 92 | fn is_ireg_name_not_pct_encoded(c: char) -> bool { 93 | is_iunreserved(c) 94 | } 95 | 96 | /// Parse host 97 | /// 98 | /// # Description 99 | /// 100 | /// Parse host. Returns the rest, the host string and a boolean indicating 101 | /// if it is IPvFuture or IPv6. 102 | /// 103 | /// A host is either an IP-Literal(IPv6 or vFuture) or an 104 | /// IPv4 or an Ireg name(e.g. far.chickenkiller.com :) 105 | /// 106 | /// # Return value 107 | /// - `(host, true)` if host is IP-Literal 108 | /// - `(host, false)` if it's ipv4 or ireg-name 109 | fn parse_host(input: &str) -> IResult<&str, (&str, bool), CustomError<&str>> { 110 | match ip_literal(input) { 111 | Ok((input, host)) => { 112 | // It got parsed, then it's an IP Literal meaning 113 | // it's either IPv6 or IPvFuture 114 | Ok((input, (host, true))) 115 | } 116 | Err(..) => { 117 | let (input, host) = alt((ipv4, take_while_ireg))(input)?; 118 | Ok((input, (host, false))) 119 | } 120 | } 121 | } 122 | 123 | fn take_while_ireg(input: &str) -> IResult<&str, &str, CustomError<&str>> { 124 | let (input, result) = recognize(many0(alt(( 125 | recognize(many1(take_while_pct_encoded)), 126 | take_while1(is_ireg_name_not_pct_encoded), 127 | ))))(input)?; 128 | 129 | Ok((input, result.trim_end_matches('.'))) 130 | } 131 | 132 | /// Parse the iauthority block 133 | /// # Description 134 | /// An iauthority is... 135 | /// `[iuserinfo] [:port]` 136 | /// # Return value 137 | /// unconsumed string AND `(iauthority, host, is_ipliteral)` where `ipliteral` is a boolean 138 | fn iauthority(input: &str) -> IResult<&str, (&str, &str, bool), CustomError<&str>> /* (iauthority, host, bool) */ 139 | { 140 | let i = <&str>::clone(&input); 141 | let (input, userinfo) = opt(recognize(tuple((take_while_iuserinfo, char('@')))))(input)?; 142 | let (input, (host, is_ipv6_or_future)) = parse_host(input)?; 143 | let (input, port) = opt(recognize(tuple((char(':'), take_while(is_digit)))))(input)?; 144 | let userinfo = userinfo.unwrap_or(""); 145 | let port = port.unwrap_or(""); 146 | let len = userinfo.len().saturating_add(port.len()); 147 | if let Some(out) = i.get(0..len) { 148 | Ok((input, (out, host, is_ipv6_or_future))) 149 | } else { 150 | Err(nom::Err::Failure(CustomError::NoContent)) 151 | } 152 | } 153 | 154 | /// Consume an iuserinfo 155 | fn take_while_iuserinfo(input: &str) -> IResult<&str, &str, CustomError<&str>> { 156 | alt(( 157 | recognize(many0(take_while_pct_encoded)), 158 | take_while(is_iuserinfo_not_pct_encoded), 159 | ))(input) 160 | } 161 | 162 | fn is_iuserinfo_not_pct_encoded(c: char) -> bool { 163 | is_iunreserved(c) || is_sub_delim(c) || c == ':' 164 | } 165 | 166 | fn is_ipchar_not_pct_encoded(c: char) -> bool { 167 | is_iunreserved(c) || is_sub_delim(c) || matches!(c, ':' | '@') 168 | } 169 | 170 | fn take_while_ipchar(input: &str) -> IResult<&str, &str, CustomError<&str>> { 171 | recognize(many0(alt(( 172 | take_while(is_ipchar_not_pct_encoded), 173 | take_while_pct_encoded, 174 | ))))(input) 175 | } 176 | 177 | fn take_while_ipchar1(input: &str) -> IResult<&str, &str, CustomError<&str>> { 178 | recognize(many1(alt(( 179 | take_while1(is_ipchar_not_pct_encoded), 180 | take_while_pct_encoded, 181 | ))))(input) 182 | } 183 | 184 | const IPRIVATE_RANGES: [RangeInclusive; 3] = 185 | [0xe000..=0xf8ff, 0xf0000..=0xffffd, 0x100000..=0x10fffd]; 186 | 187 | fn is_iprivate(c: char) -> bool { 188 | is_in_one_of_ranges(c as u32, &IPRIVATE_RANGES[..]) 189 | } 190 | 191 | fn is_iquery_not_pct_encoded(c: char) -> bool { 192 | is_iprivate(c) || is_ipchar_not_pct_encoded(c) || matches!(c, '/' | '?') 193 | } 194 | 195 | /// Consume an iquery block 196 | fn iquery(input: &str) -> IResult<&str, &str, CustomError<&str>> { 197 | recognize(many0(alt(( 198 | take_while1(is_iquery_not_pct_encoded), 199 | take_while_pct_encoded, 200 | ))))(input) 201 | } 202 | 203 | fn take_while_ifragment(input: &str) -> IResult<&str, &str, CustomError<&str>> { 204 | recognize(many0(alt((take_while_ipchar1, tag("/"), tag("?")))))(input) 205 | } 206 | 207 | /// Consume scheme characters from input and then :// or : 208 | /// 209 | /// # Description 210 | /// This function as it can be seen, consumes exactly one alpha and as many 211 | /// scheme characters as there are. then it gets a slice of input(as cloned to i) 212 | fn scheme_and_separator(input: &str) -> IResult<&str, (&str, &str), CustomError<&str>> { 213 | let _input = <&str>::clone(&input); 214 | let (input, (_first, second)) = 215 | pair(take_while_m_n(1, 1, is_alpha), take_while(is_scheme))(input)?; 216 | // "1" is for the first, its length is always 1 217 | let len = 1usize.saturating_add(second.len()); 218 | if let Some(scheme) = _input.get(0..len) { 219 | // important that we test :// before we test for lone : 220 | let (input, separator) = alt((tag("://"), tag(":")))(input)?; 221 | return Ok((input, (scheme, separator))); 222 | } 223 | Err(nom::Err::Failure(CustomError::NoContent)) 224 | } 225 | 226 | #[test] 227 | fn scheme_with_separator() { 228 | let result = opt(scheme_and_separator)("scheme:host/path"); 229 | assert_eq!(Ok(("host/path", Some(("scheme", ":")))), result); 230 | 231 | let result = opt(scheme_and_separator)("scheme://host/path"); 232 | assert_eq!(Ok(("host/path", Some(("scheme", "://")))), result); 233 | 234 | let result = opt(scheme_and_separator)("no_scheme/host/path"); 235 | assert_eq!(Ok(("no_scheme/host/path", None)), result); 236 | } 237 | 238 | /// Take as many pct encoded blocks as there are. a block is %XX where X is a hex digit 239 | fn take_while_pct_encoded(input: &str) -> IResult<&str, &str, CustomError<&str>> { 240 | recognize(many1(tuple(( 241 | char('%'), 242 | take_while_m_n(2, 2, is_hex_digit), 243 | ))))(input) 244 | } 245 | 246 | fn ifragment(input: &str) -> IResult<&str, &str, CustomError<&str>> { 247 | recognize(many0(tuple((char('#'), take_while_ifragment))))(input) 248 | } 249 | 250 | fn parse_ipath_abempty(input: &str) -> IResult<&str, &str, CustomError<&str>> { 251 | recognize(many0(tuple((char('/'), opt(take_while_ipchar1)))))(input) 252 | } 253 | 254 | #[test] 255 | fn test_ipath_abempty() { 256 | assert_eq!(parse_ipath_abempty("///foo/bar"), Ok(("", "///foo/bar"))); 257 | } 258 | 259 | fn parse_ipath_absolute(input: &str) -> IResult<&str, &str, CustomError<&str>> { 260 | recognize(tuple(( 261 | char('/'), 262 | opt(tuple(( 263 | take_while_ipchar1, 264 | many0(tuple((char('/'), opt(take_while_ipchar1)))), 265 | ))), 266 | )))(input) 267 | } 268 | 269 | #[test] 270 | fn test_ipath_absolute() { 271 | assert_eq!(parse_ipath_absolute("/foo"), Ok(("", "/foo"))); 272 | assert_eq!(parse_ipath_absolute("/foo/bar"), Ok(("", "/foo/bar"))); 273 | assert_eq!(parse_ipath_absolute("/foo//bar"), Ok(("", "/foo//bar"))); 274 | } 275 | 276 | // IRI links per RFC3987 and RFC3986 277 | fn parse_iri(input: &str) -> IResult<&str, LinkDestination, CustomError<&str>> { 278 | let input_ = <&str>::clone(&input); 279 | 280 | // A link is [scheme] ['://'] [ipath] [iquery] [ifragment] 281 | let (input, scheme_parts) = opt(scheme_and_separator)(input)?; 282 | let (scheme, separator) = scheme_parts.unwrap_or(("", "")); 283 | 284 | // host is actually part of authority but we need it separately 285 | // see iauthority function description for more information 286 | let (input, (authority, mut host, is_ipv6_or_future)) = iauthority(input)?; 287 | 288 | // now with host, if we dont have a scheme we need to check it for TLD 289 | if scheme.is_empty() { 290 | if !host.contains('.') { 291 | return Err(nom::Err::Failure(CustomError::<&str>::InvalidLink)); 292 | } 293 | 294 | let tld = host 295 | .split('.') 296 | .last() 297 | .ok_or(nom::Err::Failure(CustomError::<&str>::InvalidLinkNoTLD))?; 298 | 299 | if !check_if_tld_is_allowed(tld) { 300 | return Err(nom::Err::Failure(CustomError::<&str>::InvalidLink)); 301 | } 302 | } 303 | 304 | let (input, path) = opt(alt(( 305 | parse_ipath_abempty, 306 | parse_ipath_absolute, 307 | recognize(tuple(( 308 | take_while_ipchar, 309 | many0(tuple((char('/'), opt(take_while_ipchar1)))), 310 | ))), // ipath-rootless 311 | )))(input)?; 312 | // ^ parse one of ipath-absolute or ipath-rootless or none 313 | // which in the third case it's down to ipath-empty(see below) 314 | let path = path.unwrap_or(""); // it's ipath-empty 315 | 316 | let (input, query) = opt(recognize(tuple((char('?'), iquery))))(input)?; 317 | let query = query.unwrap_or(""); 318 | 319 | let (_, fragment) = opt(ifragment)(input)?; 320 | let fragment = fragment.unwrap_or(""); 321 | let ihier_len = authority 322 | .len() 323 | .saturating_add(host.len()) 324 | .saturating_add(path.len()); 325 | if ihier_len == 0 { 326 | return Err(nom::Err::Error(CustomError::InvalidLink)); 327 | } 328 | // compute final length of scheme + separator + ihier + path + query + fragment 329 | let mut len = scheme 330 | .len() 331 | .saturating_add(separator.len()) 332 | .saturating_add(ihier_len) 333 | .saturating_add(query.len()) 334 | .saturating_add(fragment.len()); 335 | if let Some(link) = input_.get(0..len) { 336 | if link.ends_with([':', ';', '.', ',', '!']) { 337 | len = len.saturating_sub(1); 338 | if path.is_empty() && query.is_empty() && fragment.is_empty() { 339 | host = input_.slice( 340 | scheme.len().saturating_add(separator.len())..input_.len().saturating_sub(1), 341 | ); 342 | } 343 | } 344 | len = count_chars_in_complete_parenthesis(link).unwrap_or(len); 345 | let link = input_.slice(0..len); 346 | let input = input_.slice(len..); 347 | 348 | return Ok(( 349 | input, 350 | LinkDestination { 351 | target: link, 352 | hostname: if host.is_empty() { None } else { Some(host) }, 353 | punycode: if is_ipv6_or_future { 354 | None 355 | } else { 356 | get_puny_code_warning(link, host) 357 | }, 358 | scheme: if scheme.is_empty() { 359 | None 360 | } else { 361 | Some(scheme) 362 | }, 363 | }, 364 | )); 365 | } 366 | Err(nom::Err::Failure(CustomError::NoContent)) 367 | } 368 | 369 | // White listed links in this format: scheme:some_char like tel:+989164364485 370 | fn parse_generic(input: &str) -> IResult<&str, LinkDestination, CustomError<&str>> { 371 | let i = <&str>::clone(&input); 372 | let (input, scheme_parts) = opt(scheme_and_separator)(input)?; 373 | let (scheme, _separator) = scheme_parts.unwrap_or(("", "")); 374 | if !is_allowed_generic_scheme(scheme) { 375 | return Err(nom::Err::Error(CustomError::InvalidLink)); 376 | } 377 | 378 | let (input, rest) = take_while1(is_not_white_space)(input)?; 379 | let len = scheme.len().saturating_add(1).saturating_add(rest.len()); 380 | if let Some(target) = i.get(0..len) { 381 | return Ok(( 382 | input, 383 | LinkDestination { 384 | scheme: Some(scheme), 385 | target, 386 | hostname: None, 387 | punycode: None, 388 | }, 389 | )); 390 | } 391 | Err(nom::Err::Failure(CustomError::NoContent)) 392 | } 393 | 394 | pub(super) fn parse_link(input: &str) -> IResult<&str, LinkDestination, CustomError<&str>> { 395 | alt((parse_generic, parse_iri))(input) 396 | } 397 | -------------------------------------------------------------------------------- /src/parser/link_url/punycode_warning.rs: -------------------------------------------------------------------------------- 1 | // this is to protect against https://en.wikipedia.org/wiki/IDN_homograph_attack 2 | 3 | #[derive(Debug, PartialEq, Eq, Serialize, Clone)] 4 | pub struct PunycodeWarning { 5 | pub original_hostname: String, 6 | pub ascii_hostname: String, 7 | pub punycode_encoded_url: String, 8 | } 9 | 10 | /// encode a host to punycode encoded string 11 | pub fn punycode_encode_host(host: &str) -> String { 12 | host.split('.') 13 | .map(|sub| { 14 | if is_puny(sub) { 15 | format!( 16 | "xn--{}", 17 | unic_idna_punycode::encode_str(sub) 18 | .unwrap_or_else(|| "[punycode encode failed]".to_owned()) 19 | ) 20 | } else { 21 | sub.to_owned() 22 | } 23 | }) 24 | .collect::>() 25 | .join(".") 26 | } 27 | 28 | /// Returns host as decoded unicode string 29 | pub fn punycode_decode_host(host: &str) -> String { 30 | host.split('.') 31 | .map(|sub| { 32 | if let Some(sub) = sub.strip_prefix("xn--") { 33 | unic_idna_punycode::decode_to_string(sub) 34 | .unwrap_or_else(|| "[punycode decode failed]".to_owned()) 35 | } else { 36 | sub.to_owned() 37 | } 38 | }) 39 | .collect::>() 40 | .join(".") 41 | } 42 | 43 | /// Returns true if host string contains non ASCII characters 44 | pub fn is_puny(host: &str) -> bool { 45 | for ch in host.chars() { 46 | if !(ch.is_ascii_alphanumeric() || matches!(ch, '.' | '-')) { 47 | return true; 48 | } 49 | } 50 | false 51 | } 52 | 53 | /// Return a PunycodeWarning struct if host need punycode encoding else None 54 | pub fn get_puny_code_warning(link: &str, host: &str) -> Option { 55 | if is_puny(host) { 56 | let ascii_hostname = punycode_encode_host(host); 57 | Some(PunycodeWarning { 58 | original_hostname: host.to_owned(), 59 | ascii_hostname: ascii_hostname.to_owned(), 60 | punycode_encoded_url: link.replacen(host, &ascii_hostname, 1), 61 | }) 62 | } else { 63 | None 64 | } 65 | } 66 | 67 | #[cfg(test)] 68 | mod test { 69 | use crate::parser::{is_puny, punycode_decode_host, punycode_encode_host}; 70 | 71 | #[test] 72 | fn is_puny_positive() { 73 | assert!(is_puny("münchen.de")); 74 | assert!(is_puny("wikipediа.org")); 75 | } 76 | 77 | #[test] 78 | fn is_puny_negative() { 79 | assert!(!is_puny("muenchen.de")); 80 | assert!(!is_puny("delta.chat")); 81 | } 82 | 83 | #[test] 84 | fn encode_host() { 85 | assert_eq!(punycode_encode_host("münchen.de"), "xn--mnchen-3ya.de"); 86 | assert_eq!( 87 | punycode_encode_host("wikipediа.org"), 88 | "xn--wikipedi-86g.org" 89 | ); 90 | } 91 | 92 | #[test] 93 | fn decode_host() { 94 | assert_eq!(punycode_decode_host("xn--mnchen-3ya.de"), "münchen.de"); 95 | assert_eq!( 96 | punycode_decode_host("xn--wikipedi-86g.org"), 97 | "wikipediа.org" 98 | ); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /src/parser/mod.rs: -------------------------------------------------------------------------------- 1 | // mod email; 2 | pub mod is_emoji; 3 | pub mod link_url; 4 | pub mod parse_from_text; 5 | pub mod utils; 6 | 7 | #[allow(unused_imports)] 8 | pub use crate::parser::link_url::punycode_warning::{ 9 | is_puny, punycode_decode_host, punycode_encode_host, 10 | }; 11 | pub use crate::parser::link_url::LinkDestination; 12 | 13 | /// The representation of Elements for the Abstract Syntax Tree 14 | #[derive(Debug, PartialEq, Eq, Serialize)] 15 | #[serde(tag = "t", content = "c")] 16 | pub enum Element<'a> { 17 | /* 18 | All elements that are not markdown, but still parsed. 19 | These elements are parsed from every text, but are not converted to or from html. 20 | */ 21 | Text(&'a str), 22 | /// #hashtag 23 | Tag(&'a str), 24 | /// Represents a linebreak - \n 25 | Linebreak, 26 | Link { 27 | destination: LinkDestination<'a>, 28 | }, 29 | EmailAddress(&'a str), 30 | // Later: 31 | // Mention { 32 | // internal_id: &str 33 | // }, 34 | /// On click, the command gets prefilled as the draft, so it can be easily send. 35 | BotCommandSuggestion(&'a str), 36 | 37 | /* 38 | All markdown elements. 39 | These elements are converted to html when sent out and converted back to the AST format when displaying the message. 40 | */ 41 | Bold(Vec>), 42 | Italics(Vec>), 43 | StrikeThrough(Vec>), 44 | 45 | LabeledLink { 46 | label: Vec>, 47 | destination: LinkDestination<'a>, 48 | }, 49 | InlineCode { 50 | content: &'a str, 51 | }, 52 | CodeBlock { 53 | language: Option<&'a str>, 54 | content: &'a str, 55 | }, 56 | // Later: 57 | // CollonEmoji(&'a str), 58 | // InlineTex(&str), 59 | // BlockTex(&str), 60 | } 61 | 62 | /// parses all kinds of elements, including markdown 63 | pub fn parse_markdown_text(input: &str) -> std::vec::Vec { 64 | parse_from_text::parse_all(input) 65 | } 66 | 67 | /// parses text elements such as links and email addresses, excluding markdown 68 | pub fn parse_only_text(input: &str) -> std::vec::Vec { 69 | parse_from_text::parse_only_text(input) 70 | } 71 | 72 | /// parses text and delimited/labled link elements to replicate current desktop elements 73 | pub fn parse_desktop_set(input: &str) -> std::vec::Vec { 74 | parse_from_text::parse_desktop_set(input) 75 | } 76 | -------------------------------------------------------------------------------- /src/parser/parse_from_text/base_parsers.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | // Base utility parsers, used by both text and markdown parsers 4 | use nom::{ 5 | bytes::complete::tag, 6 | error::{ErrorKind, ParseError}, 7 | sequence::delimited, 8 | IResult, 9 | }; 10 | 11 | use crate::parser::utils::is_white_space; 12 | 13 | #[derive(Debug, PartialEq, Eq)] 14 | pub enum CustomError { 15 | NoContent, 16 | InvalidWhiteSpaceFound, 17 | NoElement, 18 | Nom(I, ErrorKind), 19 | InvalidEmail, 20 | InvalidLink, 21 | InvalidLinkNoTLD, 22 | UnexpectedContent, 23 | PrecedingWhitespaceMissing, 24 | OptionIsUnexpectedNone, 25 | UnxepectedError(String), 26 | } 27 | 28 | impl ParseError for CustomError { 29 | fn from_error_kind(input: I, kind: ErrorKind) -> Self { 30 | CustomError::Nom(input, kind) 31 | } 32 | 33 | fn append(_: I, _: ErrorKind, other: Self) -> Self { 34 | other 35 | } 36 | } 37 | 38 | pub trait IntoCustomError { 39 | fn into_result(self) -> Result>>; 40 | } 41 | 42 | impl IntoCustomError for Option { 43 | fn into_result(self: Option) -> Result>> { 44 | match self { 45 | Some(v) => Ok(v), 46 | None => Err(nom::Err::Error(CustomError::OptionIsUnexpectedNone)), 47 | } 48 | } 49 | } 50 | 51 | impl IntoCustomError for Result { 52 | fn into_result(self: Result) -> Result>> { 53 | match self { 54 | Ok(v) => Ok(v), 55 | Err(err) => Err(nom::Err::Error(CustomError::UnxepectedError(format!( 56 | "{:?}", 57 | err 58 | )))), 59 | } 60 | } 61 | } 62 | 63 | /// delimited no whitespace start or end 64 | pub(crate) fn direct_delimited<'a>( 65 | input: &'a str, 66 | tag_str: &str, 67 | ) -> IResult<&'a str, &'a str, CustomError<&'a str>> { 68 | let (input, content): (&str, &str) = delimited( 69 | tag(tag_str), 70 | nom::bytes::complete::is_not(tag_str), 71 | tag(tag_str), 72 | )(input)?; 73 | if content.is_empty() { 74 | return Err(nom::Err::Error(CustomError::NoContent)); 75 | } 76 | if is_white_space(content.chars().next().into_result()?) 77 | || is_white_space(content.chars().last().into_result()?) 78 | { 79 | return Err(nom::Err::Error(CustomError::InvalidWhiteSpaceFound)); 80 | } 81 | Ok((input, content)) 82 | } 83 | 84 | /* 85 | impl From for Err> { 86 | fn from(_: I, perror: PropertiesError) { 87 | nom::Err(CustomError::ICUError(perror)) 88 | } 89 | } 90 | */ 91 | /* 92 | impl From> for nom::Err> { 93 | fn from(input: I, code: ErrorKind) -> nom::Err> { 94 | nom::Err(CustomError::Nom(input, code) 95 | } 96 | }*/ 97 | -------------------------------------------------------------------------------- /src/parser/parse_from_text/desktop_subset.rs: -------------------------------------------------------------------------------- 1 | //! desktop subset of markdown, becase this way we can already use the punycode detection of this crate 2 | //! and also we can keep delimited and labled links in desktop 3 | use nom::{ 4 | bytes::complete::{is_not, tag, take}, 5 | combinator::{peek, recognize}, 6 | sequence::{delimited, tuple}, 7 | IResult, 8 | }; 9 | 10 | use crate::parser::LinkDestination; 11 | 12 | use super::base_parsers::CustomError; 13 | use super::markdown_elements::{delimited_email_address, delimited_link}; 14 | use super::text_elements::parse_text_element; 15 | use super::Element; 16 | 17 | // [labeled](https://link) 18 | pub(crate) fn labeled_link(input: &str) -> IResult<&str, Element, CustomError<&str>> { 19 | let (input, raw_label) = delimited(tag("["), is_not("]"), tag("]"))(input)?; 20 | if raw_label.is_empty() { 21 | return Err(nom::Err::Error(CustomError::NoContent)); 22 | } 23 | 24 | // in desktop set there is no element that can appear inside of a lablel 25 | let label = vec![Element::Text(raw_label)]; 26 | 27 | let (input, (_, destination, _)) = 28 | tuple((tag("("), LinkDestination::parse_labelled, tag(")")))(input)?; 29 | 30 | Ok((input, Element::LabeledLink { label, destination })) 31 | } 32 | 33 | /// consumes all text until [parse_element] works again, this method is only for internal use by [desktopset_text] 34 | /// 35 | /// its output is not useable on its own, always combinate this with [nom::combinator::recognize] 36 | fn eat_desktopset_text(input: &str) -> IResult<&str, (), CustomError<&str>> { 37 | let mut remaining = input; 38 | while !remaining.is_empty() { 39 | // take 1, because other parsers didn't work (text is always the last used parser) 40 | let (remainder, taken) = take(1usize)(remaining)?; 41 | remaining = remainder; 42 | // peek if there is an element 43 | if peek(|input| parse_element(input, taken.chars().next()))(remaining).is_ok() { 44 | break; 45 | } 46 | } 47 | Ok((remaining, ())) 48 | } 49 | 50 | /// Consumes text until another parser of [parse_element] works again 51 | /// 52 | /// used as last parser, if the others do not consume the input it consumes the input until another parser works again 53 | /// (uses whitespace seperation to make the parsing faster) 54 | pub(crate) fn desktopset_text(input: &str) -> IResult<&str, Element, CustomError<&str>> { 55 | let (rest, content) = recognize(eat_desktopset_text)(input)?; 56 | Ok((rest, Element::Text(content))) 57 | } 58 | 59 | pub(crate) fn parse_element( 60 | input: &str, 61 | prev_char: Option, 62 | ) -> IResult<&str, Element, CustomError<&str>> { 63 | // the order is important 64 | // generaly more specific parsers that fail/return fast should be in the front 65 | // But keep in mind that the order can also change how and if the parser works as intended 66 | if let Ok((i, elm)) = labeled_link(input) { 67 | Ok((i, elm)) 68 | } else if let Ok((i, elm)) = delimited_email_address(input) { 69 | Ok((i, elm)) 70 | } else if let Ok((i, elm)) = delimited_link(input) { 71 | Ok((i, elm)) 72 | } else { 73 | parse_text_element(input, prev_char) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/parser/parse_from_text/find_range.rs: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /src/parser/parse_from_text/markdown_elements.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | bytes::complete::{is_not, tag, take, take_while}, 3 | character::complete::alphanumeric1, 4 | combinator::{opt, peek, recognize}, 5 | sequence::{delimited, tuple}, 6 | IResult, 7 | }; 8 | 9 | use super::{base_parsers::*, parse_all}; 10 | use crate::parser::{ 11 | link_url::LinkDestination, 12 | parse_from_text::{ 13 | base_parsers::direct_delimited, 14 | text_elements::{email_address, parse_text_element}, 15 | Element, 16 | }, 17 | utils::{is_white_space, is_white_space_but_not_linebreak}, 18 | }; 19 | 20 | mod label_elements; 21 | use label_elements::parse_label_elements; 22 | 23 | pub(crate) fn inline_code(input: &str) -> IResult<&str, &str, CustomError<&str>> { 24 | delimited(tag("`"), is_not("`"), tag("`"))(input) 25 | } 26 | 27 | pub(crate) fn code_block(input: &str) -> IResult<&str, Element, CustomError<&str>> { 28 | let (input, content): (&str, &str) = delimited(tag("```"), is_not("```"), tag("```"))(input)?; 29 | 30 | // parse language 31 | let (content, lang) = if is_white_space( 32 | content 33 | .chars() 34 | .next() 35 | .ok_or(nom::Err::Error(CustomError::NoContent))?, 36 | ) { 37 | // no language defined 38 | (content, None) 39 | } else { 40 | // language defined 41 | let (content, lang): (&str, &str) = alphanumeric1(content)?; 42 | (content, Some(lang)) 43 | }; 44 | 45 | // expect whitespace or new line after language or beginning (if no language is defined) 46 | let char_in_question = content 47 | .chars() 48 | .next() 49 | .ok_or(nom::Err::Error(CustomError::NoContent))?; 50 | 51 | // remove starting whitespace and first newline (if there is any). 52 | let content = if is_white_space_but_not_linebreak(char_in_question) { 53 | // remove whitespaces until newline or non whitespaces 54 | let (content, _) = take_while(is_white_space_but_not_linebreak)(content)?; 55 | // remove new line if there is one 56 | let (content, _) = opt(tag("\n"))(content)?; 57 | content 58 | } else { 59 | // remove new line if there is one 60 | let (content, _) = tag("\n")(content)?; 61 | content 62 | }; 63 | 64 | // remove spaces and newlines at end of content 65 | let mut offset: usize = 0; 66 | let mut c_iter = content.chars().rev(); 67 | while is_white_space( 68 | c_iter 69 | .next() 70 | .ok_or(nom::Err::Error(CustomError::NoContent))?, 71 | ) { 72 | offset = offset.saturating_add(1); 73 | } 74 | Ok(( 75 | input, 76 | Element::CodeBlock { 77 | language: lang, 78 | content: content 79 | .get(0..content.len().saturating_sub(offset)) 80 | .into_result()?, 81 | }, 82 | )) 83 | } 84 | 85 | // 86 | pub(crate) fn delimited_email_address(input: &str) -> IResult<&str, Element, CustomError<&str>> { 87 | let (input, content): (&str, &str) = delimited(tag("<"), is_not(">"), tag(">"))(input)?; 88 | if content.is_empty() { 89 | return Err(nom::Err::Error(CustomError::NoContent)); 90 | } 91 | let (rest, email) = email_address(content)?; 92 | if !rest.is_empty() { 93 | return Err(nom::Err::Error(CustomError::UnexpectedContent)); 94 | } 95 | Ok((input, email)) 96 | } 97 | 98 | // 99 | pub(crate) fn delimited_link(input: &str) -> IResult<&str, Element, CustomError<&str>> { 100 | let (input, (_, destination, _)): (&str, (&str, LinkDestination, &str)) = 101 | tuple((tag("<"), LinkDestination::parse_labelled, tag(">")))(input)?; 102 | Ok((input, Element::Link { destination })) 103 | } 104 | 105 | // [labeled](https://link) 106 | pub(crate) fn labeled_link(input: &str) -> IResult<&str, Element, CustomError<&str>> { 107 | let (input, raw_label): (&str, &str) = delimited(tag("["), is_not("]"), tag("]"))(input)?; 108 | if raw_label.is_empty() { 109 | return Err(nom::Err::Error(CustomError::NoContent)); 110 | } 111 | // the list of elements that can appear inside of a label is restricted 112 | // clickable elements make no sense there. 113 | let label = parse_label_elements(raw_label); 114 | 115 | let (input, (_, destination, _)) = 116 | tuple((tag("("), LinkDestination::parse_labelled, tag(")")))(input)?; 117 | 118 | Ok((input, Element::LabeledLink { label, destination })) 119 | } 120 | 121 | pub(crate) fn parse_element( 122 | input: &str, 123 | prev_char: Option, 124 | ) -> IResult<&str, Element, CustomError<&str>> { 125 | // the order is important 126 | // generaly more specific parsers that fail/return fast should be in the front 127 | // But keep in mind that the order can also change how and if the parser works as intended 128 | if let Ok((i, b)) = direct_delimited(input, "**") { 129 | Ok((i, Element::Bold(parse_all(b)))) 130 | } else if let Ok((i, b)) = direct_delimited(input, "__") { 131 | Ok((i, Element::Bold(parse_all(b)))) 132 | } else if let Ok((i, b)) = direct_delimited(input, "_") { 133 | Ok((i, Element::Italics(parse_all(b)))) 134 | } else if let Ok((i, b)) = direct_delimited(input, "*") { 135 | Ok((i, Element::Italics(parse_all(b)))) 136 | } else if let Ok((i, b)) = direct_delimited(input, "~~") { 137 | Ok((i, Element::StrikeThrough(parse_all(b)))) 138 | } else if let Ok((i, elm)) = code_block(input) { 139 | Ok((i, elm)) 140 | } else if let Ok((i, b)) = inline_code(input) { 141 | Ok((i, Element::InlineCode { content: b })) 142 | } else if let Ok((i, elm)) = labeled_link(input) { 143 | Ok((i, elm)) 144 | } else if let Ok((i, elm)) = delimited_email_address(input) { 145 | Ok((i, elm)) 146 | } else if let Ok((i, elm)) = delimited_link(input) { 147 | Ok((i, elm)) 148 | } else { 149 | parse_text_element(input, prev_char) 150 | } 151 | } 152 | 153 | /// consumes all text until [parse_element] works again, this method is only for internal use by [markdown_text] 154 | /// 155 | /// its output is not useable on its own, always combinate this with [nom::combinator::recognize] 156 | fn eat_markdown_text(input: &str) -> IResult<&str, (), CustomError<&str>> { 157 | let mut remaining = input; 158 | while !remaining.is_empty() { 159 | // take 1, because other parsers didn't work (text is always the last used parser) 160 | let (remainder, taken) = take(1usize)(remaining)?; 161 | remaining = remainder; 162 | // peek if there is an element 163 | if peek(|input| parse_element(input, taken.chars().next()))(remaining).is_ok() { 164 | break; 165 | } 166 | // take until whitespace 167 | //remaining = take_while(|c| not_blank_space(c))(remaining)?.0; 168 | } 169 | Ok((remaining, ())) 170 | } 171 | 172 | /// Consumes text until another parser of [parse_element] works again 173 | /// 174 | /// used as last parser, if the others do not consume the input it consumes the input until another parser works again 175 | /// (uses whitespace seperation to make the parsing faster) 176 | pub(crate) fn markdown_text(input: &str) -> IResult<&str, Element, CustomError<&str>> { 177 | let (rest, content) = recognize(eat_markdown_text)(input)?; 178 | Ok((rest, Element::Text(content))) 179 | } 180 | -------------------------------------------------------------------------------- /src/parser/parse_from_text/markdown_elements/label_elements.rs: -------------------------------------------------------------------------------- 1 | use nom::{ 2 | bytes::complete::take, 3 | combinator::{peek, recognize}, 4 | IResult, 5 | }; 6 | 7 | use crate::parser::{ 8 | parse_from_text::{ 9 | base_parsers::{direct_delimited, CustomError}, 10 | markdown_elements::inline_code, 11 | }, 12 | Element, 13 | }; 14 | 15 | /// Parsers for label in labelled links and later also labeled hashtags 16 | /// parse elements inside of label in markdown set 17 | pub(crate) fn parse_label_elements(input: &str) -> Vec { 18 | let mut result = Vec::new(); 19 | let mut remaining = input; 20 | // println!("p-{}", input); 21 | while !remaining.is_empty() { 22 | // println!("r-{}", remaining); 23 | if let Ok((rest, element)) = parse_markdown_label_element(remaining) { 24 | // println!("e-{:?} - {}", element, remaining); 25 | remaining = rest; 26 | result.push(element); 27 | } else if let Ok((rest, element)) = markdown_label_text(remaining) { 28 | // println!("e-{:?} - {}", element, remaining); 29 | result.push(element); 30 | remaining = rest; 31 | } else { 32 | // println!("e-textDefault-{}", remaining); 33 | result.push(Element::Text(remaining)); 34 | break; 35 | } 36 | } 37 | result 38 | } 39 | 40 | pub(crate) fn parse_markdown_label_element( 41 | input: &str, 42 | ) -> IResult<&str, Element, CustomError<&str>> { 43 | // the order is important 44 | // generaly more specific parsers that fail/return fast should be in the front 45 | // But keep in mind that the order can also change how and if the parser works as intended 46 | if let Ok((i, b)) = direct_delimited(input, "**") { 47 | Ok((i, Element::Bold(parse_label_elements(b)))) 48 | } else if let Ok((i, b)) = direct_delimited(input, "__") { 49 | Ok((i, Element::Bold(parse_label_elements(b)))) 50 | } else if let Ok((i, b)) = direct_delimited(input, "_") { 51 | Ok((i, Element::Italics(parse_label_elements(b)))) 52 | } else if let Ok((i, b)) = direct_delimited(input, "*") { 53 | Ok((i, Element::Italics(parse_label_elements(b)))) 54 | } else if let Ok((i, b)) = direct_delimited(input, "~~") { 55 | Ok((i, Element::StrikeThrough(parse_label_elements(b)))) 56 | } else if let Ok((i, b)) = inline_code(input) { 57 | Ok((i, Element::InlineCode { content: b })) 58 | } else { 59 | Err(nom::Err::Error(CustomError::NoElement)) 60 | } 61 | } 62 | /// consumes all text until [parse_label_elements] works again, this method is only for internal use by [markdown_label_text] 63 | /// 64 | /// its output is not useable on its own, always combinate this with [nom::combinator::recognize] 65 | fn eat_markdown_label_text(input: &str) -> IResult<&str, (), CustomError<&str>> { 66 | let mut remaining = input; 67 | while !remaining.is_empty() { 68 | // take 1, because other parsers didn't work (text is always the last used parser) 69 | let (remainder, _taken) = take(1usize)(remaining)?; 70 | remaining = remainder; 71 | // peek if there is an element 72 | if peek(|input| parse_markdown_label_element(input))(remaining).is_ok() { 73 | break; 74 | } 75 | // take until whitespace 76 | //remaining = take_while(|c| not_blank_space(c))(remaining)?.0; 77 | } 78 | Ok((remaining, ())) 79 | } 80 | 81 | /// Consumes text until another parser of [parse_markdown_label_element] works again 82 | /// 83 | /// used as last parser, if the others do not consume the input it consumes the input until another parser works again 84 | /// (uses whitespace seperation to make the parsing faster) 85 | fn markdown_label_text(input: &str) -> IResult<&str, Element, CustomError<&str>> { 86 | let (rest, content) = recognize(eat_markdown_label_text)(input)?; 87 | Ok((rest, Element::Text(content))) 88 | } 89 | -------------------------------------------------------------------------------- /src/parser/parse_from_text/mod.rs: -------------------------------------------------------------------------------- 1 | use super::Element; 2 | 3 | pub(crate) mod base_parsers; 4 | mod desktop_subset; 5 | pub mod find_range; 6 | pub mod hashtag_content_char_ranges; 7 | mod markdown_elements; 8 | mod text_elements; 9 | 10 | /// parses text elements such as links and email addresses, excluding markdown 11 | pub(crate) fn parse_only_text(input: &str) -> std::vec::Vec { 12 | let mut result = Vec::new(); 13 | let mut remaining = input; 14 | // println!("p-{}", input); 15 | while !remaining.is_empty() { 16 | // println!("r-{}", remaining); 17 | if let Ok((rest, element)) = text_elements::parse_text_element(remaining, None) { 18 | // println!("e-{:?} - {}", element, remaining); 19 | remaining = rest; 20 | result.push(element); 21 | } else if let Ok((rest, element)) = text_elements::text(remaining) { 22 | // println!("e-{:?} - {}", element, remaining); 23 | result.push(element); 24 | remaining = rest; 25 | } else { 26 | // println!("e-textDefault-{}", remaining); 27 | result.push(Element::Text(remaining)); 28 | break; 29 | } 30 | } 31 | result 32 | } 33 | 34 | /// parses all kinds of elements, including markdown 35 | pub(crate) fn parse_all(input: &str) -> std::vec::Vec { 36 | let mut result = Vec::new(); 37 | let mut remaining = input; 38 | // println!("p-{}", input); 39 | while !remaining.is_empty() { 40 | // println!("r-{}", remaining); 41 | if let Ok((rest, element)) = markdown_elements::parse_element(remaining, None) { 42 | // println!("e-{:?} - {}", element, remaining); 43 | remaining = rest; 44 | result.push(element); 45 | } else if let Ok((rest, element)) = markdown_elements::markdown_text(remaining) { 46 | // println!("e-{:?} - {}", element, remaining); 47 | result.push(element); 48 | remaining = rest; 49 | } else { 50 | // println!("e-textDefault-{}", remaining); 51 | result.push(Element::Text(remaining)); 52 | break; 53 | } 54 | } 55 | result 56 | } 57 | 58 | /// parses delimited and labled links additional to the text elements 59 | pub(crate) fn parse_desktop_set(input: &str) -> std::vec::Vec { 60 | let mut result = Vec::new(); 61 | let mut remaining = input; 62 | // println!("p-{}", input); 63 | while !remaining.is_empty() { 64 | // println!("r-{}", remaining); 65 | if let Ok((rest, element)) = desktop_subset::parse_element(remaining, None) { 66 | // println!("e-{:?} - {}", element, remaining); 67 | remaining = rest; 68 | result.push(element); 69 | } else if let Ok((rest, element)) = desktop_subset::desktopset_text(remaining) { 70 | // println!("e-{:?} - {}", element, remaining); 71 | result.push(element); 72 | remaining = rest; 73 | } else { 74 | // println!("e-textDefault-{}", remaining); 75 | result.push(Element::Text(remaining)); 76 | break; 77 | } 78 | } 79 | result 80 | } 81 | -------------------------------------------------------------------------------- /src/parser/parse_from_text/text_elements.rs: -------------------------------------------------------------------------------- 1 | /// nom parsers for text elements 2 | use crate::parser::link_url::LinkDestination; 3 | 4 | use super::hashtag_content_char_ranges::hashtag_content_char; 5 | use super::Element; 6 | use nom::{ 7 | bytes::{ 8 | complete::{tag, take, take_while, take_while1}, 9 | streaming::take_till1, 10 | }, 11 | character::complete::char, 12 | combinator::{peek, recognize, verify}, 13 | sequence::tuple, 14 | AsChar, IResult, Offset, Slice, 15 | }; 16 | 17 | use super::base_parsers::CustomError; 18 | 19 | fn linebreak(input: &str) -> IResult<&str, char, CustomError<&str>> { 20 | char('\n')(input) 21 | } 22 | 23 | fn hashtag(input: &str) -> IResult<&str, Element, CustomError<&str>> { 24 | let (input, content) = recognize(tuple((char('#'), take_while1(hashtag_content_char))))(input)?; 25 | 26 | Ok((input, Element::Tag(content))) 27 | } 28 | 29 | fn not_email_address_part_char(c: char) -> bool { 30 | matches!( 31 | c, 32 | '@' | '\n' 33 | | '\r' 34 | | '\t' 35 | | ' ' 36 | | ':' 37 | | ';' 38 | | '!' 39 | | '?' 40 | | ',' 41 | | '(' 42 | | ')' 43 | | '{' 44 | | '}' 45 | | '[' 46 | | ']' 47 | | '"' 48 | ) 49 | } 50 | 51 | fn email_address_part_char(c: char) -> bool { 52 | !not_email_address_part_char(c) 53 | } 54 | 55 | /// rough recognition of an email, results gets checked by a real email address parser 56 | fn email_intern(input: &str) -> IResult<&str, (), CustomError<&str>> { 57 | let (input, _) = take_till1(not_email_address_part_char)(input)?; 58 | let (input, _) = tag("@")(input)?; 59 | let (input, _) = take_while1(email_address_part_char)(input)?; 60 | Ok((input, ())) 61 | } 62 | 63 | pub(crate) fn email_address(input: &str) -> IResult<&str, Element, CustomError<&str>> { 64 | // basically 65 | // let (input, content) = recognize(email_intern)(input)?; 66 | // but don't eat the last char if it is a dot. 67 | let i = <&str>::clone(&input); 68 | let i2 = <&str>::clone(&input); 69 | let i3 = <&str>::clone(&input); 70 | let (input, content) = match email_intern(i) { 71 | Ok((mut remaining, _)) => { 72 | let index = i2.offset(remaining); 73 | let mut consumed = i2.slice(..index); 74 | while let Some('.') = consumed.chars().last() { 75 | let index = input.offset(remaining).saturating_sub(1); 76 | consumed = i3.slice(..index); 77 | remaining = input.slice(index..); 78 | } 79 | Ok((remaining, consumed)) 80 | } 81 | Err(e) => Err(e), 82 | }?; 83 | // check if result is valid email 84 | if true { 85 | Ok((input, Element::EmailAddress(content))) 86 | } else { 87 | Err(nom::Err::Error(CustomError::InvalidEmail)) 88 | } 89 | } 90 | 91 | // see https://github.com/deltachat/message-parser/issues/82 92 | pub(crate) fn fediverse_address_as_text(input: &str) -> IResult<&str, Element, CustomError<&str>> { 93 | let (input, consumed) = recognize(tuple((tag("@"), email_address)))(input)?; 94 | Ok((input, Element::Text(consumed))) 95 | } 96 | 97 | fn is_allowed_bot_cmd_suggestion_char(char: char) -> bool { 98 | match char { 99 | '@' | '\\' | '_' | '.' | '-' | '/' => true, 100 | _ => char.is_alphanum(), 101 | } 102 | } 103 | 104 | /// Bot command suggestion 105 | fn bot_command_suggestion(input: &str) -> IResult<&str, Element, CustomError<&str>> { 106 | // dc-android's: regex /(?<=^|\\s)/[a-zA-Z][a-zA-Z@\\d_/.-]{0,254}/ 107 | 108 | let (input, content) = recognize(tuple(( 109 | char('/'), 110 | verify(take(1usize), |s: &str| { 111 | s.chars().next().unwrap_or('.').is_alphabetic() 112 | }), 113 | verify(take_while(is_allowed_bot_cmd_suggestion_char), |s: &str| { 114 | s.len() < 256 115 | }), 116 | )))(input)?; 117 | if content.slice(1..).contains('/') { 118 | Ok((input, Element::Text(content))) 119 | } else { 120 | Ok((input, Element::BotCommandSuggestion(content))) 121 | } 122 | } 123 | 124 | pub(crate) fn parse_text_element( 125 | input: &str, 126 | prev_char: Option, 127 | ) -> IResult<&str, Element, CustomError<&str>> { 128 | // the order is important 129 | // generaly more specific parsers that fail/return fast should be in the front 130 | // But keep in mind that the order can also change how and if the parser works as intended 131 | // 132 | // Also as this is the text element parser, 133 | // text elements parsers MUST NOT call the parser for markdown elements internally 134 | 135 | if let Ok((i, elm)) = hashtag(input) { 136 | Ok((i, elm)) 137 | } else if let Ok((i, elm)) = { 138 | if prev_char == Some(' ') || prev_char.is_none() { 139 | bot_command_suggestion(input) 140 | } else { 141 | Err(nom::Err::Error( 142 | CustomError::<&str>::PrecedingWhitespaceMissing, 143 | )) 144 | } 145 | } { 146 | Ok((i, elm)) 147 | } else if let Ok((i, elm)) = fediverse_address_as_text(input) { 148 | Ok((i, elm)) 149 | } else if let Ok((i, elm)) = email_address(input) { 150 | Ok((i, elm)) 151 | } else if let Ok((i, destination)) = LinkDestination::parse(input) { 152 | Ok((i, Element::Link { destination })) 153 | } else if let Ok((i, _)) = linebreak(input) { 154 | Ok((i, Element::Linebreak)) 155 | } else { 156 | Err(nom::Err::Error(CustomError::NoElement)) 157 | } 158 | } 159 | 160 | /// consumes all text until [parse_text_element] works again, this method is only for internal use by [text] 161 | /// 162 | /// its output is not useable on its own, always combinate this with [nom::combinator::recognize] 163 | fn eat_text(input: &str) -> IResult<&str, (), CustomError<&str>> { 164 | let mut remaining = input; 165 | while !remaining.is_empty() { 166 | // take 1, because other parsers didn't work (text is always the last used parser) 167 | let (remainder, taken) = take(1usize)(remaining)?; 168 | remaining = remainder; 169 | // peek if there is an element 170 | if peek(|input| parse_text_element(input, taken.chars().next()))(remaining).is_ok() { 171 | break; 172 | } 173 | // take until whitespace 174 | //remaining = take_while(|c| not_blank_space(c))(remaining)?.0; 175 | } 176 | Ok((remaining, ())) 177 | } 178 | 179 | /// Consumes text until another parser of [parse_text_element] works again 180 | /// 181 | /// used as last parser, if the others do not consume the input it consumes the input until another parser works again 182 | /// (uses whitespace seperation to make the parsing faster) 183 | pub(crate) fn text(input: &str) -> IResult<&str, Element, CustomError<&str>> { 184 | let (rest, content) = recognize(eat_text)(input)?; 185 | Ok((rest, Element::Text(content))) 186 | } 187 | -------------------------------------------------------------------------------- /src/parser/utils.rs: -------------------------------------------------------------------------------- 1 | use std::ops::RangeInclusive; 2 | 3 | #[derive(Debug, PartialEq, Eq)] 4 | enum FindRangeResult<'a> { 5 | WasOnRangeStart, 6 | Range(&'a RangeInclusive), 7 | } 8 | 9 | /// Find a range which `code` might be in it. 10 | /// 11 | /// # Description 12 | /// This function gets a sorted slice of inclusive u32 ranges, performs 13 | /// binary search on them and returns a FindRangeResult enum telling 14 | /// which range the `code` might be in. It returns `FindRangeResult::WasOnRangeStart` 15 | /// if the code was exactly on start of a range. Or a `FindRangeResult::Range(range)` 16 | /// which indicates `code` is in `range` or in no ranges. 17 | /// 18 | /// # Arguments 19 | /// 20 | /// - `code` the u32 to look for a range for. 21 | /// 22 | /// - `ranges` a refernce to a slice of `RangeInclusive` 23 | fn find_range_for_char(code: u32, ranges: &'_ [RangeInclusive]) -> FindRangeResult<'_> { 24 | let index = ranges.binary_search_by_key(&code, |range| *range.start()); 25 | match index { 26 | Ok(_) => FindRangeResult::WasOnRangeStart, 27 | Err(index) => match index { 28 | #[allow(clippy::arithmetic_side_effects, clippy::indexing_slicing)] 29 | 0 => FindRangeResult::Range(&ranges[0]), 30 | // Since `index` can never be 0, `index - 1` will never overflow. Furthermore, the 31 | // maximum value which the binary search function returns is `NUMBER_OF_RANGES`. 32 | // Therefore, `index - 1` will never panic if we index the array with it. 33 | #[allow(clippy::arithmetic_side_effects, clippy::indexing_slicing)] 34 | index => FindRangeResult::Range(&ranges[index - 1]), 35 | }, 36 | } 37 | } 38 | 39 | /// Returns true of `c` is one of the `ranges`, false otherwise. 40 | /// 41 | /// # Arguments 42 | /// 43 | /// - `c` A number(u32) 44 | /// 45 | /// - `ranges` A sorted slice of ranges to see if `c` is in anyone of them 46 | pub fn is_in_one_of_ranges(c: u32, ranges: &[RangeInclusive]) -> bool { 47 | match find_range_for_char(c, ranges) { 48 | FindRangeResult::WasOnRangeStart => true, 49 | FindRangeResult::Range(range) => range.contains(&c), 50 | } 51 | } 52 | 53 | #[inline(always)] 54 | pub(crate) fn is_alpha(c: char) -> bool { 55 | c.is_alphabetic() 56 | } 57 | 58 | #[inline(always)] 59 | pub(crate) fn is_hex_digit(c: char) -> bool { 60 | c.is_ascii_hexdigit() 61 | } 62 | 63 | #[inline(always)] 64 | pub(crate) fn is_digit(c: char) -> bool { 65 | c.is_ascii_digit() 66 | } 67 | 68 | pub(crate) fn is_sub_delim(c: char) -> bool { 69 | matches!( 70 | c, 71 | '!' | '$' | '&' | '\'' | '(' | ')' | '*' | '+' | ',' | ';' | '=' 72 | ) 73 | } 74 | 75 | pub(crate) fn is_unreserved(c: char) -> bool { 76 | is_alpha(c) || is_digit(c) || is_ireg_special_chars(c) 77 | } 78 | 79 | pub(crate) fn is_ireg_special_chars(c: char) -> bool { 80 | matches!(c, '_' | '.' | '-' | '~') 81 | } 82 | 83 | pub(crate) fn is_white_space(c: char) -> bool { 84 | matches!(c, '\n' | '\r' | '\t' | ' ') 85 | } 86 | 87 | pub(crate) fn is_not_white_space(c: char) -> bool { 88 | !is_white_space(c) 89 | } 90 | 91 | pub(crate) fn is_white_space_but_not_linebreak(c: char) -> bool { 92 | matches!(c, '\t' | ' ') 93 | } 94 | -------------------------------------------------------------------------------- /tests/based_on_issue/exclamation_mark_at_end_of_link_81.rs: -------------------------------------------------------------------------------- 1 | use deltachat_message_parser::parser::Element::*; 2 | use deltachat_message_parser::parser::{parse_desktop_set, parse_markdown_text, parse_only_text}; 3 | 4 | use crate::text_to_ast::https_link_no_puny; 5 | 6 | /// don't eat/consume the ! at the end of a link 7 | /// as disscussed in https://github.com/deltachat/message-parser/issues/81 8 | 9 | #[test] 10 | fn text_only() { 11 | assert_eq!( 12 | parse_only_text("This is an my site: https://delta.chat!"), 13 | vec![ 14 | Text("This is an my site: "), 15 | Link { 16 | destination: https_link_no_puny("https://delta.chat", "delta.chat",) 17 | }, 18 | Text("!") 19 | ] 20 | ); 21 | assert_eq!( 22 | parse_only_text("This is an my site: https://delta.chat#!test"), 23 | vec![ 24 | Text("This is an my site: "), 25 | Link { 26 | destination: https_link_no_puny("https://delta.chat#!test", "delta.chat",) 27 | } 28 | ] 29 | ); 30 | } 31 | 32 | #[test] 33 | fn desktop_set() { 34 | assert_eq!( 35 | parse_desktop_set("This is an my site: https://delta.chat!"), 36 | vec![ 37 | Text("This is an my site: "), 38 | Link { 39 | destination: https_link_no_puny("https://delta.chat", "delta.chat",) 40 | }, 41 | Text("!") 42 | ] 43 | ); 44 | } 45 | 46 | #[test] 47 | fn desktop_set_negative() { 48 | assert_eq!( 49 | parse_desktop_set("This is an my site: https://delta.chat#!test"), 50 | vec![ 51 | Text("This is an my site: "), 52 | Link { 53 | destination: https_link_no_puny("https://delta.chat#!test", "delta.chat",) 54 | } 55 | ] 56 | ); 57 | } 58 | 59 | #[test] 60 | fn markdown() { 61 | assert_eq!( 62 | parse_markdown_text("This is an my site: https://delta.chat!"), 63 | vec![ 64 | Text("This is an my site: "), 65 | Link { 66 | destination: https_link_no_puny("https://delta.chat", "delta.chat",) 67 | }, 68 | Text("!") 69 | ] 70 | ); 71 | } 72 | #[test] 73 | fn markdown_negative() { 74 | assert_eq!( 75 | parse_markdown_text("This is an my site: https://delta.chat#!test"), 76 | vec![ 77 | Text("This is an my site: "), 78 | Link { 79 | destination: https_link_no_puny("https://delta.chat#!test", "delta.chat",) 80 | } 81 | ] 82 | ); 83 | } 84 | 85 | #[test] 86 | fn still_take_whole_link_in_labled_links() { 87 | assert_eq!( 88 | parse_markdown_text("This is an my [site](https://delta.chat/!)"), 89 | vec![ 90 | Text("This is an my "), 91 | LabeledLink { 92 | label: vec![Text("site")], 93 | destination: https_link_no_puny("https://delta.chat/!", "delta.chat",) 94 | } 95 | ] 96 | ); 97 | } 98 | -------------------------------------------------------------------------------- /tests/based_on_issue/fediverse_handle_82.rs: -------------------------------------------------------------------------------- 1 | use deltachat_message_parser::parser::Element::*; 2 | use deltachat_message_parser::parser::{parse_desktop_set, parse_markdown_text, parse_only_text}; 3 | 4 | /// don't parse fediverse handles as email addresses. 5 | /// as disscussed in https://github.com/deltachat/message-parser/issues/82 6 | 7 | #[test] 8 | fn text_only_fediverse_address_should_be_parsed_as_text() { 9 | assert_eq!( 10 | parse_only_text("you can reach me on @name@domain.tld!"), 11 | vec![ 12 | Text("you can reach me on "), 13 | Text("@name@domain.tld"), 14 | Text("!") 15 | ] 16 | ); 17 | } 18 | 19 | #[test] 20 | fn desktop_set_fediverse_address_should_be_parsed_as_text() { 21 | assert_eq!( 22 | parse_desktop_set("you can reach me on @name@domain.tld!"), 23 | vec![ 24 | Text("you can reach me on "), 25 | Text("@name@domain.tld"), 26 | Text("!") 27 | ] 28 | ); 29 | } 30 | 31 | #[test] 32 | fn markdown_fediverse_address_should_be_parsed_as_text() { 33 | assert_eq!( 34 | parse_markdown_text("you can reach me on @name@domain.tld!"), 35 | vec![ 36 | Text("you can reach me on "), 37 | Text("@name@domain.tld"), 38 | Text("!") 39 | ] 40 | ); 41 | } 42 | -------------------------------------------------------------------------------- /tests/based_on_issue/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod exclamation_mark_at_end_of_link_81; 2 | pub mod fediverse_handle_82; 3 | -------------------------------------------------------------------------------- /tests/emoji/all_desktop_emojis.txt: -------------------------------------------------------------------------------- 1 | 💯,🔢,😀,😃,😄,😁,😆,😅,🤣,😂,🙂,🙃,🫠,😉,😊,😇,🥰,😍,🤩,😘,😗,☺️,😚,😙,🥲,😋,😛,😜,🤪,😝,🤑,🤗,🤭,🫢,🫣,🤫,🤔,🫡,🤐,🤨,😐,😑,😶,🫥,😶‍🌫️,😏,😒,🙄,😬,😮‍💨,🤥,😌,😔,😪,🤤,😴,😷,🤒,🤕,🤢,🤮,🤧,🥵,🥶,🥴,😵,😵‍💫,🤯,🤠,🥳,🥸,😎,🤓,🧐,😕,🫤,😟,🙁,☹️,😮,😯,😲,😳,🥺,🥹,😦,😧,😨,😰,😥,😢,😭,😱,😖,😣,😞,😓,😩,😫,🥱,😤,😡,😠,🤬,😈,👿,💀,☠️,💩,🤡,👹,👺,👻,👽,👾,🤖,😺,😸,😹,😻,😼,😽,🙀,😿,😾,🙈,🙉,🙊,💋,💌,💘,💝,💖,💗,💓,💞,💕,💟,❣️,💔,❤️‍🔥,❤️‍🩹,❤️,🧡,💛,💚,💙,💜,🤎,🖤,🤍,💢,💥,💫,💦,💨,🕳️,💣,💬,👁️‍🗨️,🗨️,🗯️,💭,💤,👋,👋🏻,👋🏼,👋🏽,👋🏾,👋🏿,🤚,🤚🏻,🤚🏼,🤚🏽,🤚🏾,🤚🏿,🖐️,🖐🏻,🖐🏼,🖐🏽,🖐🏾,🖐🏿,✋,✋🏻,✋🏼,✋🏽,✋🏾,✋🏿,🖖,🖖🏻,🖖🏼,🖖🏽,🖖🏾,🖖🏿,🫱,🫱🏻,🫱🏼,🫱🏽,🫱🏾,🫱🏿,🫲,🫲🏻,🫲🏼,🫲🏽,🫲🏾,🫲🏿,🫳,🫳🏻,🫳🏼,🫳🏽,🫳🏾,🫳🏿,🫴,🫴🏻,🫴🏼,🫴🏽,🫴🏾,🫴🏿,👌,👌🏻,👌🏼,👌🏽,👌🏾,👌🏿,🤌,🤌🏻,🤌🏼,🤌🏽,🤌🏾,🤌🏿,🤏,🤏🏻,🤏🏼,🤏🏽,🤏🏾,🤏🏿,✌️,✌🏻,✌🏼,✌🏽,✌🏾,✌🏿,🤞,🤞🏻,🤞🏼,🤞🏽,🤞🏾,🤞🏿,🫰,🫰🏻,🫰🏼,🫰🏽,🫰🏾,🫰🏿,🤟,🤟🏻,🤟🏼,🤟🏽,🤟🏾,🤟🏿,🤘,🤘🏻,🤘🏼,🤘🏽,🤘🏾,🤘🏿,🤙,🤙🏻,🤙🏼,🤙🏽,🤙🏾,🤙🏿,👈,👈🏻,👈🏼,👈🏽,👈🏾,👈🏿,👉,👉🏻,👉🏼,👉🏽,👉🏾,👉🏿,👆,👆🏻,👆🏼,👆🏽,👆🏾,👆🏿,🖕,🖕🏻,🖕🏼,🖕🏽,🖕🏾,🖕🏿,👇,👇🏻,👇🏼,👇🏽,👇🏾,👇🏿,☝️,☝🏻,☝🏼,☝🏽,☝🏾,☝🏿,🫵,🫵🏻,🫵🏼,🫵🏽,🫵🏾,🫵🏿,👍,👍🏻,👍🏼,👍🏽,👍🏾,👍🏿,👎,👎🏻,👎🏼,👎🏽,👎🏾,👎🏿,✊,✊🏻,✊🏼,✊🏽,✊🏾,✊🏿,👊,👊🏻,👊🏼,👊🏽,👊🏾,👊🏿,🤛,🤛🏻,🤛🏼,🤛🏽,🤛🏾,🤛🏿,🤜,🤜🏻,🤜🏼,🤜🏽,🤜🏾,🤜🏿,👏,👏🏻,👏🏼,👏🏽,👏🏾,👏🏿,🙌,🙌🏻,🙌🏼,🙌🏽,🙌🏾,🙌🏿,🫶,🫶🏻,🫶🏼,🫶🏽,🫶🏾,🫶🏿,👐,👐🏻,👐🏼,👐🏽,👐🏾,👐🏿,🤲,🤲🏻,🤲🏼,🤲🏽,🤲🏾,🤲🏿,🤝,🤝🏻,🤝🏼,🤝🏽,🤝🏾,🤝🏿,🙏,🙏🏻,🙏🏼,🙏🏽,🙏🏾,🙏🏿,✍️,✍🏻,✍🏼,✍🏽,✍🏾,✍🏿,💅,💅🏻,💅🏼,💅🏽,💅🏾,💅🏿,🤳,🤳🏻,🤳🏼,🤳🏽,🤳🏾,🤳🏿,💪,💪🏻,💪🏼,💪🏽,💪🏾,💪🏿,🦾,🦿,🦵,🦵🏻,🦵🏼,🦵🏽,🦵🏾,🦵🏿,🦶,🦶🏻,🦶🏼,🦶🏽,🦶🏾,🦶🏿,👂,👂🏻,👂🏼,👂🏽,👂🏾,👂🏿,🦻,🦻🏻,🦻🏼,🦻🏽,🦻🏾,🦻🏿,👃,👃🏻,👃🏼,👃🏽,👃🏾,👃🏿,🧠,🫀,🫁,🦷,🦴,👀,👁️,👅,👄,🫦,👶,👶🏻,👶🏼,👶🏽,👶🏾,👶🏿,🧒,🧒🏻,🧒🏼,🧒🏽,🧒🏾,🧒🏿,👦,👦🏻,👦🏼,👦🏽,👦🏾,👦🏿,👧,👧🏻,👧🏼,👧🏽,👧🏾,👧🏿,🧑,🧑🏻,🧑🏼,🧑🏽,🧑🏾,🧑🏿,👱,👱🏻,👱🏼,👱🏽,👱🏾,👱🏿,👨,👨🏻,👨🏼,👨🏽,👨🏾,👨🏿,🧔,🧔🏻,🧔🏼,🧔🏽,🧔🏾,🧔🏿,🧔‍♂️,🧔🏻‍♂️,🧔🏼‍♂️,🧔🏽‍♂️,🧔🏾‍♂️,🧔🏿‍♂️,🧔‍♀️,🧔🏻‍♀️,🧔🏼‍♀️,🧔🏽‍♀️,🧔🏾‍♀️,🧔🏿‍♀️,👨‍🦰,👨🏻‍🦰,👨🏼‍🦰,👨🏽‍🦰,👨🏾‍🦰,👨🏿‍🦰,👨‍🦱,👨🏻‍🦱,👨🏼‍🦱,👨🏽‍🦱,👨🏾‍🦱,👨🏿‍🦱,👨‍🦳,👨🏻‍🦳,👨🏼‍🦳,👨🏽‍🦳,👨🏾‍🦳,👨🏿‍🦳,👨‍🦲,👨🏻‍🦲,👨🏼‍🦲,👨🏽‍🦲,👨🏾‍🦲,👨🏿‍🦲,👩,👩🏻,👩🏼,👩🏽,👩🏾,👩🏿,👩‍🦰,👩🏻‍🦰,👩🏼‍🦰,👩🏽‍🦰,👩🏾‍🦰,👩🏿‍🦰,🧑‍🦰,🧑🏻‍🦰,🧑🏼‍🦰,🧑🏽‍🦰,🧑🏾‍🦰,🧑🏿‍🦰,👩‍🦱,👩🏻‍🦱,👩🏼‍🦱,👩🏽‍🦱,👩🏾‍🦱,👩🏿‍🦱,🧑‍🦱,🧑🏻‍🦱,🧑🏼‍🦱,🧑🏽‍🦱,🧑🏾‍🦱,🧑🏿‍🦱,👩‍🦳,👩🏻‍🦳,👩🏼‍🦳,👩🏽‍🦳,👩🏾‍🦳,👩🏿‍🦳,🧑‍🦳,🧑🏻‍🦳,🧑🏼‍🦳,🧑🏽‍🦳,🧑🏾‍🦳,🧑🏿‍🦳,👩‍🦲,👩🏻‍🦲,👩🏼‍🦲,👩🏽‍🦲,👩🏾‍🦲,👩🏿‍🦲,🧑‍🦲,🧑🏻‍🦲,🧑🏼‍🦲,🧑🏽‍🦲,🧑🏾‍🦲,🧑🏿‍🦲,👱‍♀️,👱🏻‍♀️,👱🏼‍♀️,👱🏽‍♀️,👱🏾‍♀️,👱🏿‍♀️,👱‍♂️,👱🏻‍♂️,👱🏼‍♂️,👱🏽‍♂️,👱🏾‍♂️,👱🏿‍♂️,🧓,🧓🏻,🧓🏼,🧓🏽,🧓🏾,🧓🏿,👴,👴🏻,👴🏼,👴🏽,👴🏾,👴🏿,👵,👵🏻,👵🏼,👵🏽,👵🏾,👵🏿,🙍,🙍🏻,🙍🏼,🙍🏽,🙍🏾,🙍🏿,🙍‍♂️,🙍🏻‍♂️,🙍🏼‍♂️,🙍🏽‍♂️,🙍🏾‍♂️,🙍🏿‍♂️,🙍‍♀️,🙍🏻‍♀️,🙍🏼‍♀️,🙍🏽‍♀️,🙍🏾‍♀️,🙍🏿‍♀️,🙎,🙎🏻,🙎🏼,🙎🏽,🙎🏾,🙎🏿,🙎‍♂️,🙎🏻‍♂️,🙎🏼‍♂️,🙎🏽‍♂️,🙎🏾‍♂️,🙎🏿‍♂️,🙎‍♀️,🙎🏻‍♀️,🙎🏼‍♀️,🙎🏽‍♀️,🙎🏾‍♀️,🙎🏿‍♀️,🙅,🙅🏻,🙅🏼,🙅🏽,🙅🏾,🙅🏿,🙅‍♂️,🙅🏻‍♂️,🙅🏼‍♂️,🙅🏽‍♂️,🙅🏾‍♂️,🙅🏿‍♂️,🙅‍♀️,🙅🏻‍♀️,🙅🏼‍♀️,🙅🏽‍♀️,🙅🏾‍♀️,🙅🏿‍♀️,🙆,🙆🏻,🙆🏼,🙆🏽,🙆🏾,🙆🏿,🙆‍♂️,🙆🏻‍♂️,🙆🏼‍♂️,🙆🏽‍♂️,🙆🏾‍♂️,🙆🏿‍♂️,🙆‍♀️,🙆🏻‍♀️,🙆🏼‍♀️,🙆🏽‍♀️,🙆🏾‍♀️,🙆🏿‍♀️,💁,💁🏻,💁🏼,💁🏽,💁🏾,💁🏿,💁‍♂️,💁🏻‍♂️,💁🏼‍♂️,💁🏽‍♂️,💁🏾‍♂️,💁🏿‍♂️,💁‍♀️,💁🏻‍♀️,💁🏼‍♀️,💁🏽‍♀️,💁🏾‍♀️,💁🏿‍♀️,🙋,🙋🏻,🙋🏼,🙋🏽,🙋🏾,🙋🏿,🙋‍♂️,🙋🏻‍♂️,🙋🏼‍♂️,🙋🏽‍♂️,🙋🏾‍♂️,🙋🏿‍♂️,🙋‍♀️,🙋🏻‍♀️,🙋🏼‍♀️,🙋🏽‍♀️,🙋🏾‍♀️,🙋🏿‍♀️,🧏,🧏🏻,🧏🏼,🧏🏽,🧏🏾,🧏🏿,🧏‍♂️,🧏🏻‍♂️,🧏🏼‍♂️,🧏🏽‍♂️,🧏🏾‍♂️,🧏🏿‍♂️,🧏‍♀️,🧏🏻‍♀️,🧏🏼‍♀️,🧏🏽‍♀️,🧏🏾‍♀️,🧏🏿‍♀️,🙇,🙇🏻,🙇🏼,🙇🏽,🙇🏾,🙇🏿,🙇‍♂️,🙇🏻‍♂️,🙇🏼‍♂️,🙇🏽‍♂️,🙇🏾‍♂️,🙇🏿‍♂️,🙇‍♀️,🙇🏻‍♀️,🙇🏼‍♀️,🙇🏽‍♀️,🙇🏾‍♀️,🙇🏿‍♀️,🤦,🤦🏻,🤦🏼,🤦🏽,🤦🏾,🤦🏿,🤦‍♂️,🤦🏻‍♂️,🤦🏼‍♂️,🤦🏽‍♂️,🤦🏾‍♂️,🤦🏿‍♂️,🤦‍♀️,🤦🏻‍♀️,🤦🏼‍♀️,🤦🏽‍♀️,🤦🏾‍♀️,🤦🏿‍♀️,🤷,🤷🏻,🤷🏼,🤷🏽,🤷🏾,🤷🏿,🤷‍♂️,🤷🏻‍♂️,🤷🏼‍♂️,🤷🏽‍♂️,🤷🏾‍♂️,🤷🏿‍♂️,🤷‍♀️,🤷🏻‍♀️,🤷🏼‍♀️,🤷🏽‍♀️,🤷🏾‍♀️,🤷🏿‍♀️,🧑‍⚕️,🧑🏻‍⚕️,🧑🏼‍⚕️,🧑🏽‍⚕️,🧑🏾‍⚕️,🧑🏿‍⚕️,👨‍⚕️,👨🏻‍⚕️,👨🏼‍⚕️,👨🏽‍⚕️,👨🏾‍⚕️,👨🏿‍⚕️,👩‍⚕️,👩🏻‍⚕️,👩🏼‍⚕️,👩🏽‍⚕️,👩🏾‍⚕️,👩🏿‍⚕️,🧑‍🎓,🧑🏻‍🎓,🧑🏼‍🎓,🧑🏽‍🎓,🧑🏾‍🎓,🧑🏿‍🎓,👨‍🎓,👨🏻‍🎓,👨🏼‍🎓,👨🏽‍🎓,👨🏾‍🎓,👨🏿‍🎓,👩‍🎓,👩🏻‍🎓,👩🏼‍🎓,👩🏽‍🎓,👩🏾‍🎓,👩🏿‍🎓,🧑‍🏫,🧑🏻‍🏫,🧑🏼‍🏫,🧑🏽‍🏫,🧑🏾‍🏫,🧑🏿‍🏫,👨‍🏫,👨🏻‍🏫,👨🏼‍🏫,👨🏽‍🏫,👨🏾‍🏫,👨🏿‍🏫,👩‍🏫,👩🏻‍🏫,👩🏼‍🏫,👩🏽‍🏫,👩🏾‍🏫,👩🏿‍🏫,🧑‍⚖️,🧑🏻‍⚖️,🧑🏼‍⚖️,🧑🏽‍⚖️,🧑🏾‍⚖️,🧑🏿‍⚖️,👨‍⚖️,👨🏻‍⚖️,👨🏼‍⚖️,👨🏽‍⚖️,👨🏾‍⚖️,👨🏿‍⚖️,👩‍⚖️,👩🏻‍⚖️,👩🏼‍⚖️,👩🏽‍⚖️,👩🏾‍⚖️,👩🏿‍⚖️,🧑‍🌾,🧑🏻‍🌾,🧑🏼‍🌾,🧑🏽‍🌾,🧑🏾‍🌾,🧑🏿‍🌾,👨‍🌾,👨🏻‍🌾,👨🏼‍🌾,👨🏽‍🌾,👨🏾‍🌾,👨🏿‍🌾,👩‍🌾,👩🏻‍🌾,👩🏼‍🌾,👩🏽‍🌾,👩🏾‍🌾,👩🏿‍🌾,🧑‍🍳,🧑🏻‍🍳,🧑🏼‍🍳,🧑🏽‍🍳,🧑🏾‍🍳,🧑🏿‍🍳,👨‍🍳,👨🏻‍🍳,👨🏼‍🍳,👨🏽‍🍳,👨🏾‍🍳,👨🏿‍🍳,👩‍🍳,👩🏻‍🍳,👩🏼‍🍳,👩🏽‍🍳,👩🏾‍🍳,👩🏿‍🍳,🧑‍🔧,🧑🏻‍🔧,🧑🏼‍🔧,🧑🏽‍🔧,🧑🏾‍🔧,🧑🏿‍🔧,👨‍🔧,👨🏻‍🔧,👨🏼‍🔧,👨🏽‍🔧,👨🏾‍🔧,👨🏿‍🔧,👩‍🔧,👩🏻‍🔧,👩🏼‍🔧,👩🏽‍🔧,👩🏾‍🔧,👩🏿‍🔧,🧑‍🏭,🧑🏻‍🏭,🧑🏼‍🏭,🧑🏽‍🏭,🧑🏾‍🏭,🧑🏿‍🏭,👨‍🏭,👨🏻‍🏭,👨🏼‍🏭,👨🏽‍🏭,👨🏾‍🏭,👨🏿‍🏭,👩‍🏭,👩🏻‍🏭,👩🏼‍🏭,👩🏽‍🏭,👩🏾‍🏭,👩🏿‍🏭,🧑‍💼,🧑🏻‍💼,🧑🏼‍💼,🧑🏽‍💼,🧑🏾‍💼,🧑🏿‍💼,👨‍💼,👨🏻‍💼,👨🏼‍💼,👨🏽‍💼,👨🏾‍💼,👨🏿‍💼,👩‍💼,👩🏻‍💼,👩🏼‍💼,👩🏽‍💼,👩🏾‍💼,👩🏿‍💼,🧑‍🔬,🧑🏻‍🔬,🧑🏼‍🔬,🧑🏽‍🔬,🧑🏾‍🔬,🧑🏿‍🔬,👨‍🔬,👨🏻‍🔬,👨🏼‍🔬,👨🏽‍🔬,👨🏾‍🔬,👨🏿‍🔬,👩‍🔬,👩🏻‍🔬,👩🏼‍🔬,👩🏽‍🔬,👩🏾‍🔬,👩🏿‍🔬,🧑‍💻,🧑🏻‍💻,🧑🏼‍💻,🧑🏽‍💻,🧑🏾‍💻,🧑🏿‍💻,👨‍💻,👨🏻‍💻,👨🏼‍💻,👨🏽‍💻,👨🏾‍💻,👨🏿‍💻,👩‍💻,👩🏻‍💻,👩🏼‍💻,👩🏽‍💻,👩🏾‍💻,👩🏿‍💻,🧑‍🎤,🧑🏻‍🎤,🧑🏼‍🎤,🧑🏽‍🎤,🧑🏾‍🎤,🧑🏿‍🎤,👨‍🎤,👨🏻‍🎤,👨🏼‍🎤,👨🏽‍🎤,👨🏾‍🎤,👨🏿‍🎤,👩‍🎤,👩🏻‍🎤,👩🏼‍🎤,👩🏽‍🎤,👩🏾‍🎤,👩🏿‍🎤,🧑‍🎨,🧑🏻‍🎨,🧑🏼‍🎨,🧑🏽‍🎨,🧑🏾‍🎨,🧑🏿‍🎨,👨‍🎨,👨🏻‍🎨,👨🏼‍🎨,👨🏽‍🎨,👨🏾‍🎨,👨🏿‍🎨,👩‍🎨,👩🏻‍🎨,👩🏼‍🎨,👩🏽‍🎨,👩🏾‍🎨,👩🏿‍🎨,🧑‍✈️,🧑🏻‍✈️,🧑🏼‍✈️,🧑🏽‍✈️,🧑🏾‍✈️,🧑🏿‍✈️,👨‍✈️,👨🏻‍✈️,👨🏼‍✈️,👨🏽‍✈️,👨🏾‍✈️,👨🏿‍✈️,👩‍✈️,👩🏻‍✈️,👩🏼‍✈️,👩🏽‍✈️,👩🏾‍✈️,👩🏿‍✈️,🧑‍🚀,🧑🏻‍🚀,🧑🏼‍🚀,🧑🏽‍🚀,🧑🏾‍🚀,🧑🏿‍🚀,👨‍🚀,👨🏻‍🚀,👨🏼‍🚀,👨🏽‍🚀,👨🏾‍🚀,👨🏿‍🚀,👩‍🚀,👩🏻‍🚀,👩🏼‍🚀,👩🏽‍🚀,👩🏾‍🚀,👩🏿‍🚀,🧑‍🚒,🧑🏻‍🚒,🧑🏼‍🚒,🧑🏽‍🚒,🧑🏾‍🚒,🧑🏿‍🚒,👨‍🚒,👨🏻‍🚒,👨🏼‍🚒,👨🏽‍🚒,👨🏾‍🚒,👨🏿‍🚒,👩‍🚒,👩🏻‍🚒,👩🏼‍🚒,👩🏽‍🚒,👩🏾‍🚒,👩🏿‍🚒,👮,👮🏻,👮🏼,👮🏽,👮🏾,👮🏿,👮‍♂️,👮🏻‍♂️,👮🏼‍♂️,👮🏽‍♂️,👮🏾‍♂️,👮🏿‍♂️,👮‍♀️,👮🏻‍♀️,👮🏼‍♀️,👮🏽‍♀️,👮🏾‍♀️,👮🏿‍♀️,🕵️,🕵🏻,🕵🏼,🕵🏽,🕵🏾,🕵🏿,🕵️‍♂️,🕵🏻‍♂️,🕵🏼‍♂️,🕵🏽‍♂️,🕵🏾‍♂️,🕵🏿‍♂️,🕵️‍♀️,🕵🏻‍♀️,🕵🏼‍♀️,🕵🏽‍♀️,🕵🏾‍♀️,🕵🏿‍♀️,💂,💂🏻,💂🏼,💂🏽,💂🏾,💂🏿,💂‍♂️,💂🏻‍♂️,💂🏼‍♂️,💂🏽‍♂️,💂🏾‍♂️,💂🏿‍♂️,💂‍♀️,💂🏻‍♀️,💂🏼‍♀️,💂🏽‍♀️,💂🏾‍♀️,💂🏿‍♀️,🥷,🥷🏻,🥷🏼,🥷🏽,🥷🏾,🥷🏿,👷,👷🏻,👷🏼,👷🏽,👷🏾,👷🏿,👷‍♂️,👷🏻‍♂️,👷🏼‍♂️,👷🏽‍♂️,👷🏾‍♂️,👷🏿‍♂️,👷‍♀️,👷🏻‍♀️,👷🏼‍♀️,👷🏽‍♀️,👷🏾‍♀️,👷🏿‍♀️,🫅,🫅🏻,🫅🏼,🫅🏽,🫅🏾,🫅🏿,🤴,🤴🏻,🤴🏼,🤴🏽,🤴🏾,🤴🏿,👸,👸🏻,👸🏼,👸🏽,👸🏾,👸🏿,👳,👳🏻,👳🏼,👳🏽,👳🏾,👳🏿,👳‍♂️,👳🏻‍♂️,👳🏼‍♂️,👳🏽‍♂️,👳🏾‍♂️,👳🏿‍♂️,👳‍♀️,👳🏻‍♀️,👳🏼‍♀️,👳🏽‍♀️,👳🏾‍♀️,👳🏿‍♀️,👲,👲🏻,👲🏼,👲🏽,👲🏾,👲🏿,🧕,🧕🏻,🧕🏼,🧕🏽,🧕🏾,🧕🏿,🤵,🤵🏻,🤵🏼,🤵🏽,🤵🏾,🤵🏿,🤵‍♂️,🤵🏻‍♂️,🤵🏼‍♂️,🤵🏽‍♂️,🤵🏾‍♂️,🤵🏿‍♂️,🤵‍♀️,🤵🏻‍♀️,🤵🏼‍♀️,🤵🏽‍♀️,🤵🏾‍♀️,🤵🏿‍♀️,👰,👰🏻,👰🏼,👰🏽,👰🏾,👰🏿,👰‍♂️,👰🏻‍♂️,👰🏼‍♂️,👰🏽‍♂️,👰🏾‍♂️,👰🏿‍♂️,👰‍♀️,👰🏻‍♀️,👰🏼‍♀️,👰🏽‍♀️,👰🏾‍♀️,👰🏿‍♀️,🤰,🤰🏻,🤰🏼,🤰🏽,🤰🏾,🤰🏿,🫃,🫃🏻,🫃🏼,🫃🏽,🫃🏾,🫃🏿,🫄,🫄🏻,🫄🏼,🫄🏽,🫄🏾,🫄🏿,🤱,🤱🏻,🤱🏼,🤱🏽,🤱🏾,🤱🏿,👩‍🍼,👩🏻‍🍼,👩🏼‍🍼,👩🏽‍🍼,👩🏾‍🍼,👩🏿‍🍼,👨‍🍼,👨🏻‍🍼,👨🏼‍🍼,👨🏽‍🍼,👨🏾‍🍼,👨🏿‍🍼,🧑‍🍼,🧑🏻‍🍼,🧑🏼‍🍼,🧑🏽‍🍼,🧑🏾‍🍼,🧑🏿‍🍼,👼,👼🏻,👼🏼,👼🏽,👼🏾,👼🏿,🎅,🎅🏻,🎅🏼,🎅🏽,🎅🏾,🎅🏿,🤶,🤶🏻,🤶🏼,🤶🏽,🤶🏾,🤶🏿,🧑‍🎄,🧑🏻‍🎄,🧑🏼‍🎄,🧑🏽‍🎄,🧑🏾‍🎄,🧑🏿‍🎄,🦸,🦸🏻,🦸🏼,🦸🏽,🦸🏾,🦸🏿,🦸‍♂️,🦸🏻‍♂️,🦸🏼‍♂️,🦸🏽‍♂️,🦸🏾‍♂️,🦸🏿‍♂️,🦸‍♀️,🦸🏻‍♀️,🦸🏼‍♀️,🦸🏽‍♀️,🦸🏾‍♀️,🦸🏿‍♀️,🦹,🦹🏻,🦹🏼,🦹🏽,🦹🏾,🦹🏿,🦹‍♂️,🦹🏻‍♂️,🦹🏼‍♂️,🦹🏽‍♂️,🦹🏾‍♂️,🦹🏿‍♂️,🦹‍♀️,🦹🏻‍♀️,🦹🏼‍♀️,🦹🏽‍♀️,🦹🏾‍♀️,🦹🏿‍♀️,🧙,🧙🏻,🧙🏼,🧙🏽,🧙🏾,🧙🏿,🧙‍♂️,🧙🏻‍♂️,🧙🏼‍♂️,🧙🏽‍♂️,🧙🏾‍♂️,🧙🏿‍♂️,🧙‍♀️,🧙🏻‍♀️,🧙🏼‍♀️,🧙🏽‍♀️,🧙🏾‍♀️,🧙🏿‍♀️,🧚,🧚🏻,🧚🏼,🧚🏽,🧚🏾,🧚🏿,🧚‍♂️,🧚🏻‍♂️,🧚🏼‍♂️,🧚🏽‍♂️,🧚🏾‍♂️,🧚🏿‍♂️,🧚‍♀️,🧚🏻‍♀️,🧚🏼‍♀️,🧚🏽‍♀️,🧚🏾‍♀️,🧚🏿‍♀️,🧛,🧛🏻,🧛🏼,🧛🏽,🧛🏾,🧛🏿,🧛‍♂️,🧛🏻‍♂️,🧛🏼‍♂️,🧛🏽‍♂️,🧛🏾‍♂️,🧛🏿‍♂️,🧛‍♀️,🧛🏻‍♀️,🧛🏼‍♀️,🧛🏽‍♀️,🧛🏾‍♀️,🧛🏿‍♀️,🧜,🧜🏻,🧜🏼,🧜🏽,🧜🏾,🧜🏿,🧜‍♂️,🧜🏻‍♂️,🧜🏼‍♂️,🧜🏽‍♂️,🧜🏾‍♂️,🧜🏿‍♂️,🧜‍♀️,🧜🏻‍♀️,🧜🏼‍♀️,🧜🏽‍♀️,🧜🏾‍♀️,🧜🏿‍♀️,🧝,🧝🏻,🧝🏼,🧝🏽,🧝🏾,🧝🏿,🧝‍♂️,🧝🏻‍♂️,🧝🏼‍♂️,🧝🏽‍♂️,🧝🏾‍♂️,🧝🏿‍♂️,🧝‍♀️,🧝🏻‍♀️,🧝🏼‍♀️,🧝🏽‍♀️,🧝🏾‍♀️,🧝🏿‍♀️,🧞,🧞‍♂️,🧞‍♀️,🧟,🧟‍♂️,🧟‍♀️,🧌,💆,💆🏻,💆🏼,💆🏽,💆🏾,💆🏿,💆‍♂️,💆🏻‍♂️,💆🏼‍♂️,💆🏽‍♂️,💆🏾‍♂️,💆🏿‍♂️,💆‍♀️,💆🏻‍♀️,💆🏼‍♀️,💆🏽‍♀️,💆🏾‍♀️,💆🏿‍♀️,💇,💇🏻,💇🏼,💇🏽,💇🏾,💇🏿,💇‍♂️,💇🏻‍♂️,💇🏼‍♂️,💇🏽‍♂️,💇🏾‍♂️,💇🏿‍♂️,💇‍♀️,💇🏻‍♀️,💇🏼‍♀️,💇🏽‍♀️,💇🏾‍♀️,💇🏿‍♀️,🚶,🚶🏻,🚶🏼,🚶🏽,🚶🏾,🚶🏿,🚶‍♂️,🚶🏻‍♂️,🚶🏼‍♂️,🚶🏽‍♂️,🚶🏾‍♂️,🚶🏿‍♂️,🚶‍♀️,🚶🏻‍♀️,🚶🏼‍♀️,🚶🏽‍♀️,🚶🏾‍♀️,🚶🏿‍♀️,🧍,🧍🏻,🧍🏼,🧍🏽,🧍🏾,🧍🏿,🧍‍♂️,🧍🏻‍♂️,🧍🏼‍♂️,🧍🏽‍♂️,🧍🏾‍♂️,🧍🏿‍♂️,🧍‍♀️,🧍🏻‍♀️,🧍🏼‍♀️,🧍🏽‍♀️,🧍🏾‍♀️,🧍🏿‍♀️,🧎,🧎🏻,🧎🏼,🧎🏽,🧎🏾,🧎🏿,🧎‍♂️,🧎🏻‍♂️,🧎🏼‍♂️,🧎🏽‍♂️,🧎🏾‍♂️,🧎🏿‍♂️,🧎‍♀️,🧎🏻‍♀️,🧎🏼‍♀️,🧎🏽‍♀️,🧎🏾‍♀️,🧎🏿‍♀️,🧑‍🦯,🧑🏻‍🦯,🧑🏼‍🦯,🧑🏽‍🦯,🧑🏾‍🦯,🧑🏿‍🦯,👨‍🦯,👨🏻‍🦯,👨🏼‍🦯,👨🏽‍🦯,👨🏾‍🦯,👨🏿‍🦯,👩‍🦯,👩🏻‍🦯,👩🏼‍🦯,👩🏽‍🦯,👩🏾‍🦯,👩🏿‍🦯,🧑‍🦼,🧑🏻‍🦼,🧑🏼‍🦼,🧑🏽‍🦼,🧑🏾‍🦼,🧑🏿‍🦼,👨‍🦼,👨🏻‍🦼,👨🏼‍🦼,👨🏽‍🦼,👨🏾‍🦼,👨🏿‍🦼,👩‍🦼,👩🏻‍🦼,👩🏼‍🦼,👩🏽‍🦼,👩🏾‍🦼,👩🏿‍🦼,🧑‍🦽,🧑🏻‍🦽,🧑🏼‍🦽,🧑🏽‍🦽,🧑🏾‍🦽,🧑🏿‍🦽,👨‍🦽,👨🏻‍🦽,👨🏼‍🦽,👨🏽‍🦽,👨🏾‍🦽,👨🏿‍🦽,👩‍🦽,👩🏻‍🦽,👩🏼‍🦽,👩🏽‍🦽,👩🏾‍🦽,👩🏿‍🦽,🏃,🏃🏻,🏃🏼,🏃🏽,🏃🏾,🏃🏿,🏃‍♂️,🏃🏻‍♂️,🏃🏼‍♂️,🏃🏽‍♂️,🏃🏾‍♂️,🏃🏿‍♂️,🏃‍♀️,🏃🏻‍♀️,🏃🏼‍♀️,🏃🏽‍♀️,🏃🏾‍♀️,🏃🏿‍♀️,💃,💃🏻,💃🏼,💃🏽,💃🏾,💃🏿,🕺,🕺🏻,🕺🏼,🕺🏽,🕺🏾,🕺🏿,🕴️,🕴🏻,🕴🏼,🕴🏽,🕴🏾,🕴🏿,👯,👯‍♂️,👯‍♀️,🧖,🧖🏻,🧖🏼,🧖🏽,🧖🏾,🧖🏿,🧖‍♂️,🧖🏻‍♂️,🧖🏼‍♂️,🧖🏽‍♂️,🧖🏾‍♂️,🧖🏿‍♂️,🧖‍♀️,🧖🏻‍♀️,🧖🏼‍♀️,🧖🏽‍♀️,🧖🏾‍♀️,🧖🏿‍♀️,🧗,🧗🏻,🧗🏼,🧗🏽,🧗🏾,🧗🏿,🧗‍♂️,🧗🏻‍♂️,🧗🏼‍♂️,🧗🏽‍♂️,🧗🏾‍♂️,🧗🏿‍♂️,🧗‍♀️,🧗🏻‍♀️,🧗🏼‍♀️,🧗🏽‍♀️,🧗🏾‍♀️,🧗🏿‍♀️,🤺,🏇,🏇🏻,🏇🏼,🏇🏽,🏇🏾,🏇🏿,⛷️,🏂,🏂🏻,🏂🏼,🏂🏽,🏂🏾,🏂🏿,🏌️,🏌🏻,🏌🏼,🏌🏽,🏌🏾,🏌🏿,🏌️‍♂️,🏌🏻‍♂️,🏌🏼‍♂️,🏌🏽‍♂️,🏌🏾‍♂️,🏌🏿‍♂️,🏌️‍♀️,🏌🏻‍♀️,🏌🏼‍♀️,🏌🏽‍♀️,🏌🏾‍♀️,🏌🏿‍♀️,🏄,🏄🏻,🏄🏼,🏄🏽,🏄🏾,🏄🏿,🏄‍♂️,🏄🏻‍♂️,🏄🏼‍♂️,🏄🏽‍♂️,🏄🏾‍♂️,🏄🏿‍♂️,🏄‍♀️,🏄🏻‍♀️,🏄🏼‍♀️,🏄🏽‍♀️,🏄🏾‍♀️,🏄🏿‍♀️,🚣,🚣🏻,🚣🏼,🚣🏽,🚣🏾,🚣🏿,🚣‍♂️,🚣🏻‍♂️,🚣🏼‍♂️,🚣🏽‍♂️,🚣🏾‍♂️,🚣🏿‍♂️,🚣‍♀️,🚣🏻‍♀️,🚣🏼‍♀️,🚣🏽‍♀️,🚣🏾‍♀️,🚣🏿‍♀️,🏊,🏊🏻,🏊🏼,🏊🏽,🏊🏾,🏊🏿,🏊‍♂️,🏊🏻‍♂️,🏊🏼‍♂️,🏊🏽‍♂️,🏊🏾‍♂️,🏊🏿‍♂️,🏊‍♀️,🏊🏻‍♀️,🏊🏼‍♀️,🏊🏽‍♀️,🏊🏾‍♀️,🏊🏿‍♀️,⛹️,⛹🏻,⛹🏼,⛹🏽,⛹🏾,⛹🏿,⛹️‍♂️,⛹🏻‍♂️,⛹🏼‍♂️,⛹🏽‍♂️,⛹🏾‍♂️,⛹🏿‍♂️,⛹️‍♀️,⛹🏻‍♀️,⛹🏼‍♀️,⛹🏽‍♀️,⛹🏾‍♀️,⛹🏿‍♀️,🏋️,🏋🏻,🏋🏼,🏋🏽,🏋🏾,🏋🏿,🏋️‍♂️,🏋🏻‍♂️,🏋🏼‍♂️,🏋🏽‍♂️,🏋🏾‍♂️,🏋🏿‍♂️,🏋️‍♀️,🏋🏻‍♀️,🏋🏼‍♀️,🏋🏽‍♀️,🏋🏾‍♀️,🏋🏿‍♀️,🚴,🚴🏻,🚴🏼,🚴🏽,🚴🏾,🚴🏿,🚴‍♂️,🚴🏻‍♂️,🚴🏼‍♂️,🚴🏽‍♂️,🚴🏾‍♂️,🚴🏿‍♂️,🚴‍♀️,🚴🏻‍♀️,🚴🏼‍♀️,🚴🏽‍♀️,🚴🏾‍♀️,🚴🏿‍♀️,🚵,🚵🏻,🚵🏼,🚵🏽,🚵🏾,🚵🏿,🚵‍♂️,🚵🏻‍♂️,🚵🏼‍♂️,🚵🏽‍♂️,🚵🏾‍♂️,🚵🏿‍♂️,🚵‍♀️,🚵🏻‍♀️,🚵🏼‍♀️,🚵🏽‍♀️,🚵🏾‍♀️,🚵🏿‍♀️,🤸,🤸🏻,🤸🏼,🤸🏽,🤸🏾,🤸🏿,🤸‍♂️,🤸🏻‍♂️,🤸🏼‍♂️,🤸🏽‍♂️,🤸🏾‍♂️,🤸🏿‍♂️,🤸‍♀️,🤸🏻‍♀️,🤸🏼‍♀️,🤸🏽‍♀️,🤸🏾‍♀️,🤸🏿‍♀️,🤼,🤼‍♂️,🤼‍♀️,🤽,🤽🏻,🤽🏼,🤽🏽,🤽🏾,🤽🏿,🤽‍♂️,🤽🏻‍♂️,🤽🏼‍♂️,🤽🏽‍♂️,🤽🏾‍♂️,🤽🏿‍♂️,🤽‍♀️,🤽🏻‍♀️,🤽🏼‍♀️,🤽🏽‍♀️,🤽🏾‍♀️,🤽🏿‍♀️,🤾,🤾🏻,🤾🏼,🤾🏽,🤾🏾,🤾🏿,🤾‍♂️,🤾🏻‍♂️,🤾🏼‍♂️,🤾🏽‍♂️,🤾🏾‍♂️,🤾🏿‍♂️,🤾‍♀️,🤾🏻‍♀️,🤾🏼‍♀️,🤾🏽‍♀️,🤾🏾‍♀️,🤾🏿‍♀️,🤹,🤹🏻,🤹🏼,🤹🏽,🤹🏾,🤹🏿,🤹‍♂️,🤹🏻‍♂️,🤹🏼‍♂️,🤹🏽‍♂️,🤹🏾‍♂️,🤹🏿‍♂️,🤹‍♀️,🤹🏻‍♀️,🤹🏼‍♀️,🤹🏽‍♀️,🤹🏾‍♀️,🤹🏿‍♀️,🧘,🧘🏻,🧘🏼,🧘🏽,🧘🏾,🧘🏿,🧘‍♂️,🧘🏻‍♂️,🧘🏼‍♂️,🧘🏽‍♂️,🧘🏾‍♂️,🧘🏿‍♂️,🧘‍♀️,🧘🏻‍♀️,🧘🏼‍♀️,🧘🏽‍♀️,🧘🏾‍♀️,🧘🏿‍♀️,🛀,🛀🏻,🛀🏼,🛀🏽,🛀🏾,🛀🏿,🛌,🛌🏻,🛌🏼,🛌🏽,🛌🏾,🛌🏿,🧑‍🤝‍🧑,🧑🏻‍🤝‍🧑🏻,🧑🏼‍🤝‍🧑🏼,🧑🏽‍🤝‍🧑🏽,🧑🏾‍🤝‍🧑🏾,🧑🏿‍🤝‍🧑🏿,👭,👭🏻,👭🏼,👭🏽,👭🏾,👭🏿,👫,👫🏻,👫🏼,👫🏽,👫🏾,👫🏿,👬,👬🏻,👬🏼,👬🏽,👬🏾,👬🏿,💏,💏🏻,💏🏼,💏🏽,💏🏾,💏🏿,👩‍❤️‍💋‍👨,👩🏻‍❤️‍💋‍👨🏻,👩🏼‍❤️‍💋‍👨🏼,👩🏽‍❤️‍💋‍👨🏽,👩🏾‍❤️‍💋‍👨🏾,👩🏿‍❤️‍💋‍👨🏿,👨‍❤️‍💋‍👨,👨🏻‍❤️‍💋‍👨🏻,👨🏼‍❤️‍💋‍👨🏼,👨🏽‍❤️‍💋‍👨🏽,👨🏾‍❤️‍💋‍👨🏾,👨🏿‍❤️‍💋‍👨🏿,👩‍❤️‍💋‍👩,👩🏻‍❤️‍💋‍👩🏻,👩🏼‍❤️‍💋‍👩🏼,👩🏽‍❤️‍💋‍👩🏽,👩🏾‍❤️‍💋‍👩🏾,👩🏿‍❤️‍💋‍👩🏿,💑,💑🏻,💑🏼,💑🏽,💑🏾,💑🏿,👩‍❤️‍👨,👩🏻‍❤️‍👨🏻,👩🏼‍❤️‍👨🏼,👩🏽‍❤️‍👨🏽,👩🏾‍❤️‍👨🏾,👩🏿‍❤️‍👨🏿,👨‍❤️‍👨,👨🏻‍❤️‍👨🏻,👨🏼‍❤️‍👨🏼,👨🏽‍❤️‍👨🏽,👨🏾‍❤️‍👨🏾,👨🏿‍❤️‍👨🏿,👩‍❤️‍👩,👩🏻‍❤️‍👩🏻,👩🏼‍❤️‍👩🏼,👩🏽‍❤️‍👩🏽,👩🏾‍❤️‍👩🏾,👩🏿‍❤️‍👩🏿,👪,👨‍👩‍👦,👨‍👩‍👧,👨‍👩‍👧‍👦,👨‍👩‍👦‍👦,👨‍👩‍👧‍👧,👨‍👨‍👦,👨‍👨‍👧,👨‍👨‍👧‍👦,👨‍👨‍👦‍👦,👨‍👨‍👧‍👧,👩‍👩‍👦,👩‍👩‍👧,👩‍👩‍👧‍👦,👩‍👩‍👦‍👦,👩‍👩‍👧‍👧,👨‍👦,👨‍👦‍👦,👨‍👧,👨‍👧‍👦,👨‍👧‍👧,👩‍👦,👩‍👦‍👦,👩‍👧,👩‍👧‍👦,👩‍👧‍👧,🗣️,👤,👥,🫂,👣,🐵,🐒,🦍,🦧,🐶,🐕,🦮,🐕‍🦺,🐩,🐺,🦊,🦝,🐱,🐈,🐈‍⬛,🦁,🐯,🐅,🐆,🐴,🐎,🦄,🦓,🦌,🦬,🐮,🐂,🐃,🐄,🐷,🐖,🐗,🐽,🐏,🐑,🐐,🐪,🐫,🦙,🦒,🐘,🦣,🦏,🦛,🐭,🐁,🐀,🐹,🐰,🐇,🐿️,🦫,🦔,🦇,🐻,🐻‍❄️,🐨,🐼,🦥,🦦,🦨,🦘,🦡,🐾,🦃,🐔,🐓,🐣,🐤,🐥,🐦,🐧,🕊️,🦅,🦆,🦢,🦉,🦤,🪶,🦩,🦚,🦜,🐸,🐊,🐢,🦎,🐍,🐲,🐉,🦕,🦖,🐳,🐋,🐬,🦭,🐟,🐠,🐡,🦈,🐙,🐚,🪸,🐌,🦋,🐛,🐜,🐝,🪲,🐞,🦗,🪳,🕷️,🕸️,🦂,🦟,🪰,🪱,🦠,💐,🌸,💮,🪷,🏵️,🌹,🥀,🌺,🌻,🌼,🌷,🌱,🪴,🌲,🌳,🌴,🌵,🌾,🌿,☘️,🍀,🍁,🍂,🍃,🪹,🪺,🍇,🍈,🍉,🍊,🍋,🍌,🍍,🥭,🍎,🍏,🍐,🍑,🍒,🍓,🫐,🥝,🍅,🫒,🥥,🥑,🍆,🥔,🥕,🌽,🌶️,🫑,🥒,🥬,🥦,🧄,🧅,🍄,🥜,🫘,🌰,🍞,🥐,🥖,🫓,🥨,🥯,🥞,🧇,🧀,🍖,🍗,🥩,🥓,🍔,🍟,🍕,🌭,🥪,🌮,🌯,🫔,🥙,🧆,🥚,🍳,🥘,🍲,🫕,🥣,🥗,🍿,🧈,🧂,🥫,🍱,🍘,🍙,🍚,🍛,🍜,🍝,🍠,🍢,🍣,🍤,🍥,🥮,🍡,🥟,🥠,🥡,🦀,🦞,🦐,🦑,🦪,🍦,🍧,🍨,🍩,🍪,🎂,🍰,🧁,🥧,🍫,🍬,🍭,🍮,🍯,🍼,🥛,☕,🫖,🍵,🍶,🍾,🍷,🍸,🍹,🍺,🍻,🥂,🥃,🫗,🥤,🧋,🧃,🧉,🧊,🥢,🍽️,🍴,🥄,🔪,🫙,🏺,🌍,🌎,🌏,🌐,🗺️,🗾,🧭,🏔️,⛰️,🌋,🗻,🏕️,🏖️,🏜️,🏝️,🏞️,🏟️,🏛️,🏗️,🧱,🪨,🪵,🛖,🏘️,🏚️,🏠,🏡,🏢,🏣,🏤,🏥,🏦,🏨,🏩,🏪,🏫,🏬,🏭,🏯,🏰,💒,🗼,🗽,⛪,🕌,🛕,🕍,⛩️,🕋,⛲,⛺,🌁,🌃,🏙️,🌄,🌅,🌆,🌇,🌉,♨️,🎠,🛝,🎡,🎢,💈,🎪,🚂,🚃,🚄,🚅,🚆,🚇,🚈,🚉,🚊,🚝,🚞,🚋,🚌,🚍,🚎,🚐,🚑,🚒,🚓,🚔,🚕,🚖,🚗,🚘,🚙,🛻,🚚,🚛,🚜,🏎️,🏍️,🛵,🦽,🦼,🛺,🚲,🛴,🛹,🛼,🚏,🛣️,🛤️,🛢️,⛽,🛞,🚨,🚥,🚦,🛑,🚧,⚓,🛟,⛵,🛶,🚤,🛳️,⛴️,🛥️,🚢,✈️,🛩️,🛫,🛬,🪂,💺,🚁,🚟,🚠,🚡,🛰️,🚀,🛸,🛎️,🧳,⌛,⏳,⌚,⏰,⏱️,⏲️,🕰️,🕛,🕧,🕐,🕜,🕑,🕝,🕒,🕞,🕓,🕟,🕔,🕠,🕕,🕡,🕖,🕢,🕗,🕣,🕘,🕤,🕙,🕥,🕚,🕦,🌑,🌒,🌓,🌔,🌕,🌖,🌗,🌘,🌙,🌚,🌛,🌜,🌡️,☀️,🌝,🌞,🪐,⭐,🌟,🌠,🌌,☁️,⛅,⛈️,🌤️,🌥️,🌦️,🌧️,🌨️,🌩️,🌪️,🌫️,🌬️,🌀,🌈,🌂,☂️,☔,⛱️,⚡,❄️,☃️,⛄,☄️,🔥,💧,🌊,🎃,🎄,🎆,🎇,🧨,✨,🎈,🎉,🎊,🎋,🎍,🎎,🎏,🎐,🎑,🧧,🎀,🎁,🎗️,🎟️,🎫,🎖️,🏆,🏅,🥇,🥈,🥉,⚽,⚾,🥎,🏀,🏐,🏈,🏉,🎾,🥏,🎳,🏏,🏑,🏒,🥍,🏓,🏸,🥊,🥋,🥅,⛳,⛸️,🎣,🤿,🎽,🎿,🛷,🥌,🎯,🪀,🪁,🎱,🔮,🪄,🧿,🪬,🎮,🕹️,🎰,🎲,🧩,🧸,🪅,🪩,🪆,♠️,♥️,♦️,♣️,♟️,🃏,🀄,🎴,🎭,🖼️,🎨,🧵,🪡,🧶,🪢,👓,🕶️,🥽,🥼,🦺,👔,👕,👖,🧣,🧤,🧥,🧦,👗,👘,🥻,🩱,🩲,🩳,👙,👚,👛,👜,👝,🛍️,🎒,🩴,👞,👟,🥾,🥿,👠,👡,🩰,👢,👑,👒,🎩,🎓,🧢,🪖,⛑️,📿,💄,💍,💎,🔇,🔈,🔉,🔊,📢,📣,📯,🔔,🔕,🎼,🎵,🎶,🎙️,🎚️,🎛️,🎤,🎧,📻,🎷,🪗,🎸,🎹,🎺,🎻,🪕,🥁,🪘,📱,📲,☎️,📞,📟,📠,🔋,🪫,🔌,💻,🖥️,🖨️,⌨️,🖱️,🖲️,💽,💾,💿,📀,🧮,🎥,🎞️,📽️,🎬,📺,📷,📸,📹,📼,🔍,🔎,🕯️,💡,🔦,🏮,🪔,📔,📕,📖,📗,📘,📙,📚,📓,📒,📃,📜,📄,📰,🗞️,📑,🔖,🏷️,💰,🪙,💴,💵,💶,💷,💸,💳,🧾,💹,✉️,📧,📨,📩,📤,📥,📦,📫,📪,📬,📭,📮,🗳️,✏️,✒️,🖋️,🖊️,🖌️,🖍️,📝,💼,📁,📂,🗂️,📅,📆,🗒️,🗓️,📇,📈,📉,📊,📋,📌,📍,📎,🖇️,📏,📐,✂️,🗃️,🗄️,🗑️,🔒,🔓,🔏,🔐,🔑,🗝️,🔨,🪓,⛏️,⚒️,🛠️,🗡️,⚔️,🔫,🪃,🏹,🛡️,🪚,🔧,🪛,🔩,⚙️,🗜️,⚖️,🦯,🔗,⛓️,🪝,🧰,🧲,🪜,⚗️,🧪,🧫,🧬,🔬,🔭,📡,💉,🩸,💊,🩹,🩼,🩺,🩻,🚪,🛗,🪞,🪟,🛏️,🛋️,🪑,🚽,🪠,🚿,🛁,🪤,🪒,🧴,🧷,🧹,🧺,🧻,🪣,🧼,🫧,🪥,🧽,🧯,🛒,🚬,⚰️,🪦,⚱️,🗿,🪧,🪪,🏧,🚮,🚰,♿,🚹,🚺,🚻,🚼,🚾,🛂,🛃,🛄,🛅,⚠️,🚸,⛔,🚫,🚳,🚭,🚯,🚱,🚷,📵,🔞,☢️,☣️,⬆️,↗️,➡️,↘️,⬇️,↙️,⬅️,↖️,↕️,↔️,↩️,↪️,⤴️,⤵️,🔃,🔄,🔙,🔚,🔛,🔜,🔝,🛐,⚛️,🕉️,✡️,☸️,☯️,✝️,☦️,☪️,☮️,🕎,🔯,♈,♉,♊,♋,♌,♍,♎,♏,♐,♑,♒,♓,⛎,🔀,🔁,🔂,▶️,⏩,⏭️,⏯️,◀️,⏪,⏮️,🔼,⏫,🔽,⏬,⏸️,⏹️,⏺️,⏏️,🎦,🔅,🔆,📶,📳,📴,♀️,♂️,⚧️,✖️,➕,➖,➗,🟰,♾️,‼️,⁉️,❓,❔,❕,❗,〰️,💱,💲,⚕️,♻️,⚜️,🔱,📛,🔰,⭕,✅,☑️,✔️,❌,❎,➰,➿,〽️,✳️,✴️,❇️,©️,®️,™️,#️⃣,*️⃣,0️⃣,1️⃣,2️⃣,3️⃣,4️⃣,5️⃣,6️⃣,7️⃣,8️⃣,9️⃣,🔟,🔠,🔡,🔣,🔤,🅰️,🆎,🅱️,🆑,🆒,🆓,ℹ️,🆔,Ⓜ️,🆕,🆖,🅾️,🆗,🅿️,🆘,🆙,🆚,🈁,🈂️,🈷️,🈶,🈯,🉐,🈹,🈚,🈲,🉑,🈸,🈴,🈳,㊗️,㊙️,🈺,🈵,🔴,🟠,🟡,🟢,🔵,🟣,🟤,⚫,⚪,🟥,🟧,🟨,🟩,🟦,🟪,🟫,⬛,⬜,◼️,◻️,◾,◽,▪️,▫️,🔶,🔷,🔸,🔹,🔺,🔻,💠,🔘,🔳,🔲,🏁,🚩,🎌,🏴,🏳️,🏳️‍🌈,🏳️‍⚧️,🏴‍☠️,🇦🇨,🇦🇩,🇦🇪,🇦🇫,🇦🇬,🇦🇮,🇦🇱,🇦🇲,🇦🇴,🇦🇶,🇦🇷,🇦🇸,🇦🇹,🇦🇺,🇦🇼,🇦🇽,🇦🇿,🇧🇦,🇧🇧,🇧🇩,🇧🇪,🇧🇫,🇧🇬,🇧🇭,🇧🇮,🇧🇯,🇧🇱,🇧🇲,🇧🇳,🇧🇴,🇧🇶,🇧🇷,🇧🇸,🇧🇹,🇧🇻,🇧🇼,🇧🇾,🇧🇿,🇨🇦,🇨🇨,🇨🇩,🇨🇫,🇨🇬,🇨🇭,🇨🇮,🇨🇰,🇨🇱,🇨🇲,🇨🇳,🇨🇴,🇨🇵,🇨🇷,🇨🇺,🇨🇻,🇨🇼,🇨🇽,🇨🇾,🇨🇿,🇩🇪,🇩🇬,🇩🇯,🇩🇰,🇩🇲,🇩🇴,🇩🇿,🇪🇦,🇪🇨,🇪🇪,🇪🇬,🇪🇭,🇪🇷,🇪🇸,🇪🇹,🇪🇺,🇫🇮,🇫🇯,🇫🇰,🇫🇲,🇫🇴,🇫🇷,🇬🇦,🇬🇧,🇬🇩,🇬🇪,🇬🇫,🇬🇬,🇬🇭,🇬🇮,🇬🇱,🇬🇲,🇬🇳,🇬🇵,🇬🇶,🇬🇷,🇬🇸,🇬🇹,🇬🇺,🇬🇼,🇬🇾,🇭🇰,🇭🇲,🇭🇳,🇭🇷,🇭🇹,🇭🇺,🇮🇨,🇮🇩,🇮🇪,🇮🇱,🇮🇲,🇮🇳,🇮🇴,🇮🇶,🇮🇷,🇮🇸,🇮🇹,🇯🇪,🇯🇲,🇯🇴,🇯🇵,🇰🇪,🇰🇬,🇰🇭,🇰🇮,🇰🇲,🇰🇳,🇰🇵,🇰🇷,🇰🇼,🇰🇾,🇰🇿,🇱🇦,🇱🇧,🇱🇨,🇱🇮,🇱🇰,🇱🇷,🇱🇸,🇱🇹,🇱🇺,🇱🇻,🇱🇾,🇲🇦,🇲🇨,🇲🇩,🇲🇪,🇲🇫,🇲🇬,🇲🇭,🇲🇰,🇲🇱,🇲🇲,🇲🇳,🇲🇴,🇲🇵,🇲🇶,🇲🇷,🇲🇸,🇲🇹,🇲🇺,🇲🇻,🇲🇼,🇲🇽,🇲🇾,🇲🇿,🇳🇦,🇳🇨,🇳🇪,🇳🇫,🇳🇬,🇳🇮,🇳🇱,🇳🇴,🇳🇵,🇳🇷,🇳🇺,🇳🇿,🇴🇲,🇵🇦,🇵🇪,🇵🇫,🇵🇬,🇵🇭,🇵🇰,🇵🇱,🇵🇲,🇵🇳,🇵🇷,🇵🇸,🇵🇹,🇵🇼,🇵🇾,🇶🇦,🇷🇪,🇷🇴,🇷🇸,🇷🇺,🇷🇼,🇸🇦,🇸🇧,🇸🇨,🇸🇩,🇸🇪,🇸🇬,🇸🇭,🇸🇮,🇸🇯,🇸🇰,🇸🇱,🇸🇲,🇸🇳,🇸🇴,🇸🇷,🇸🇸,🇸🇹,🇸🇻,🇸🇽,🇸🇾,🇸🇿,🇹🇦,🇹🇨,🇹🇩,🇹🇫,🇹🇬,🇹🇭,🇹🇯,🇹🇰,🇹🇱,🇹🇲,🇹🇳,🇹🇴,🇹🇷,🇹🇹,🇹🇻,🇹🇼,🇹🇿,🇺🇦,🇺🇬,🇺🇲,🇺🇳,🇺🇸,🇺🇾,🇺🇿,🇻🇦,🇻🇨,🇻🇪,🇻🇬,🇻🇮,🇻🇳,🇻🇺,🇼🇫,🇼🇸,🇽🇰,🇾🇪,🇾🇹,🇿🇦,🇿🇲,🇿🇼,🏴󠁧󠁢󠁥󠁮󠁧󠁿,🏴󠁧󠁢󠁳󠁣󠁴󠁿,🏴󠁧󠁢󠁷󠁬󠁳󠁿 -------------------------------------------------------------------------------- /tests/emoji/mod.rs: -------------------------------------------------------------------------------- 1 | use deltachat_message_parser::parser::is_emoji::{ 2 | count_emojis_if_only_contains_emoji, emoji, get_first_emoji, 3 | }; 4 | 5 | #[test] 6 | fn test_all_desktop_emoji_picker_emojis() { 7 | let testcases = include_str!("./all_desktop_emojis.txt").split(','); 8 | let mut total = 0; 9 | let mut failed = vec![]; 10 | for case in testcases { 11 | if emoji(case).is_err() { 12 | failed.push(case); 13 | } 14 | total += 1; 15 | } 16 | let failed_count = failed.len(); 17 | if !failed.is_empty() { 18 | println!("Failed Cases"); 19 | print!("{}", failed.join(", ")); 20 | } 21 | assert_eq!( 22 | failed_count, 0, 23 | "out of {total} cases {failed_count} failed" 24 | ); 25 | } 26 | 27 | #[test] 28 | fn test_all_desktop_emoji_picker_emojis_are_single_emojis() { 29 | let testcases = include_str!("./all_desktop_emojis.txt").split(','); 30 | let mut total = 0; 31 | let mut failed = vec![]; 32 | for case in testcases { 33 | if count_emojis_if_only_contains_emoji(case) != Some(1) { 34 | let result = get_first_emoji(case); 35 | if result != Some(case) { 36 | print!( 37 | "{case}:\n{:?}\n{:?}\n", 38 | result.map(|r| r.chars()), 39 | case.chars() 40 | ); 41 | failed.push(case); 42 | } 43 | } 44 | total += 1; 45 | } 46 | let failed_count = failed.len(); 47 | if !failed.is_empty() { 48 | println!("Failed Cases"); 49 | print!("{}", failed.join(", ")); 50 | } 51 | assert_eq!( 52 | failed_count, 0, 53 | "out of {total} cases {failed_count} failed" 54 | ); 55 | } 56 | -------------------------------------------------------------------------------- /tests/links.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::unwrap_used)] 2 | use deltachat_message_parser::parser::{link_url::PunycodeWarning, LinkDestination}; 3 | 4 | #[test] 5 | fn basic_parsing() { 6 | let test_cases_no_puny = vec![ 7 | "http://delta.chat", 8 | "http://delta.chat:8080", 9 | "http://localhost", 10 | "http://127.0.0.0", 11 | "https://[::1]/", 12 | "https://[::1]:9000?hi#o", 13 | "https://delta.chat", 14 | "ftp://delta.chat", 15 | "https://delta.chat/en/help", 16 | "https://delta.chat/en/help?hi=5&e=4", 17 | "https://delta.chat?hi=5&e=4", 18 | "https://delta.chat/en/help?hi=5&e=4#section2.0", 19 | "https://delta#section2.0", 20 | "http://delta.chat:8080?hi=5&e=4#section2.0", 21 | "http://delta.chat:8080#section2.0", 22 | "mailto:delta@example.com", 23 | "mailto:delta@example.com?subject=hi&body=hello%20world", 24 | "mailto:foö@ü.chat", 25 | "ftp://test-test", 26 | "https://www.openmandriva.org/en/news/article/openmandriva-rome-24-07-released", 27 | "https://www.openmandriva.org///en/news/article/openmandriva-rome-24-07-released", 28 | ]; 29 | 30 | let test_cases_with_puny = vec!["https://ü.app#help", "http://münchen.de"]; 31 | 32 | for input in &test_cases_no_puny { 33 | let (rest, link_destination) = LinkDestination::parse(input) 34 | .unwrap_or_else(|_| panic!("Cannot parse link: {}", input)); 35 | 36 | assert_eq!(input, &link_destination.target); 37 | assert_eq!(rest.len(), 0); 38 | assert!(link_destination.punycode.is_none()); 39 | } 40 | 41 | for input in &test_cases_with_puny { 42 | let Ok((rest, link_destination)) = LinkDestination::parse(input) else { 43 | panic!("Parsing {} as link failed", input); 44 | }; 45 | 46 | assert!(link_destination.punycode.is_some()); 47 | assert_eq!(rest.len(), 0); 48 | assert_eq!(input, &link_destination.target); 49 | } 50 | } 51 | 52 | #[test] 53 | fn multiple_hashes() { 54 | assert_eq!( 55 | LinkDestination::parse("https://matrix.to/#/#deltachat:matrix.org") 56 | .unwrap() 57 | .1, 58 | LinkDestination { 59 | hostname: Some("matrix.to"), 60 | target: "https://matrix.to/#/#deltachat:matrix.org", 61 | scheme: Some("https"), 62 | punycode: None, 63 | } 64 | ); 65 | } 66 | 67 | #[test] 68 | fn bare_scheme_no_parse() { 69 | // bare scheme shouldn't be linkified 70 | let bare = vec!["tel", "tel:", "bitcoin:", "mailto", "https://", "http://"]; 71 | 72 | for input in bare { 73 | let result = LinkDestination::parse(input); 74 | assert!(result.is_err()); 75 | } 76 | } 77 | 78 | #[test] 79 | fn invalid_domains() { 80 | let test_cases = vec![";?:/hi", "##://thing"]; 81 | 82 | for input in &test_cases { 83 | println!("testing {input}"); 84 | assert!(LinkDestination::parse(input).is_err()); 85 | } 86 | } 87 | 88 | #[test] 89 | fn punycode_detection() { 90 | assert_eq!( 91 | LinkDestination::parse("http://münchen.de").unwrap().1, 92 | LinkDestination { 93 | hostname: Some("münchen.de"), 94 | target: "http://münchen.de", 95 | scheme: Some("http"), 96 | punycode: Some(PunycodeWarning { 97 | original_hostname: "münchen.de".to_owned(), 98 | ascii_hostname: "xn--mnchen-3ya.de".to_owned(), 99 | punycode_encoded_url: "http://xn--mnchen-3ya.de".to_owned(), 100 | }), 101 | } 102 | ); 103 | 104 | assert_eq!( 105 | LinkDestination::parse("http://muenchen.de").unwrap().1, 106 | LinkDestination { 107 | hostname: Some("muenchen.de"), 108 | target: "http://muenchen.de", 109 | scheme: Some("http"), 110 | punycode: None, 111 | } 112 | ); 113 | } 114 | 115 | #[test] 116 | fn common_schemes() { 117 | assert_eq!( 118 | LinkDestination::parse("http://delta.chat").unwrap(), 119 | ( 120 | "", 121 | LinkDestination { 122 | hostname: Some("delta.chat"), 123 | target: "http://delta.chat", 124 | scheme: Some("http"), 125 | punycode: None, 126 | } 127 | ) 128 | ); 129 | assert_eq!( 130 | LinkDestination::parse("https://far.chickenkiller.com").unwrap(), 131 | ( 132 | "", 133 | LinkDestination { 134 | hostname: Some("far.chickenkiller.com"), 135 | target: "https://far.chickenkiller.com", 136 | scheme: Some("https"), 137 | punycode: None, 138 | } 139 | ) 140 | ); 141 | } 142 | #[test] 143 | fn generic_schemes() { 144 | assert_eq!( 145 | LinkDestination::parse("mailto:someone@example.com").unwrap(), 146 | ( 147 | "", 148 | LinkDestination { 149 | hostname: None, 150 | scheme: Some("mailto"), 151 | punycode: None, 152 | target: "mailto:someone@example.com" 153 | } 154 | ) 155 | ); 156 | assert_eq!( 157 | LinkDestination::parse("bitcoin:bc1qt3xhfvwmdqvxkk089tllvvtzqs8ts06u3u6qka") 158 | .unwrap() 159 | .1, 160 | LinkDestination { 161 | hostname: None, 162 | scheme: Some("bitcoin"), 163 | target: "bitcoin:bc1qt3xhfvwmdqvxkk089tllvvtzqs8ts06u3u6qka", 164 | punycode: None, 165 | } 166 | ); 167 | assert_eq!( 168 | LinkDestination::parse("geo:37.786971,-122.399677") 169 | .unwrap() 170 | .1, 171 | LinkDestination { 172 | scheme: Some("geo"), 173 | punycode: None, 174 | target: "geo:37.786971,-122.399677", 175 | hostname: None 176 | } 177 | ); 178 | } 179 | 180 | #[test] 181 | fn no_scheme_simple() { 182 | assert_eq!( 183 | LinkDestination::parse("example.com").unwrap(), 184 | ( 185 | "", 186 | LinkDestination { 187 | hostname: Some("example.com"), 188 | scheme: None, 189 | punycode: None, 190 | target: "example.com" 191 | } 192 | ) 193 | ); 194 | } 195 | 196 | #[test] 197 | fn no_scheme_with_chat() { 198 | // exceptional 199 | assert_eq!( 200 | LinkDestination::parse("delta.chat").unwrap(), 201 | ( 202 | "", 203 | LinkDestination { 204 | hostname: Some("delta.chat"), 205 | scheme: None, 206 | punycode: None, 207 | target: "delta.chat" 208 | } 209 | ) 210 | ); 211 | } 212 | 213 | #[test] 214 | fn no_scheme_full_iri_segments() { 215 | // long one with all the path segments 216 | assert_eq!( 217 | LinkDestination::parse("delta.chat/path/with/segments?query=params#fragment").unwrap(), 218 | ( 219 | "", 220 | LinkDestination { 221 | hostname: Some("delta.chat"), 222 | scheme: None, 223 | punycode: None, 224 | target: "delta.chat/path/with/segments?query=params#fragment" 225 | } 226 | ) 227 | ); 228 | } 229 | 230 | #[test] 231 | fn no_scheme_punycode() { 232 | // punycode 233 | assert_eq!( 234 | LinkDestination::parse("münchen.com").unwrap(), 235 | ( 236 | "", 237 | LinkDestination { 238 | hostname: Some("münchen.com"), 239 | scheme: None, 240 | punycode: Some(PunycodeWarning { 241 | original_hostname: "münchen.com".to_owned(), 242 | ascii_hostname: "xn--mnchen-3ya.com".to_owned(), 243 | punycode_encoded_url: "xn--mnchen-3ya.com".to_owned() 244 | }), 245 | target: "münchen.com" 246 | } 247 | ) 248 | ); 249 | } 250 | 251 | #[test] 252 | fn no_scheme_disallow_zip() { 253 | // Failing case for unsupported TLD 254 | let result = LinkDestination::parse("free_money.zip"); 255 | assert!(result.is_err()); 256 | } 257 | 258 | #[test] 259 | fn no_scheme_disallow_authority() { 260 | // Failing case with user prefix, we dont want this for simple links without scheme 261 | let result = LinkDestination::parse("user@delta.chat"); 262 | assert!(result.is_err()); 263 | } 264 | 265 | #[test] 266 | fn no_scheme_disallow_port() { 267 | // Failing case with port, also not good for simple links without scheme 268 | let result = LinkDestination::parse("delta.chat:8080/api"); 269 | assert!(result.is_ok()); 270 | } 271 | -------------------------------------------------------------------------------- /tests/test.rs: -------------------------------------------------------------------------------- 1 | mod based_on_issue; 2 | mod emoji; 3 | mod links; 4 | mod text_to_ast; 5 | -------------------------------------------------------------------------------- /tests/text_to_ast/desktop_set.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use deltachat_message_parser::parser::{link_url::PunycodeWarning, parse_desktop_set}; 3 | 4 | #[test] 5 | fn do_not_parse_markdown_elements() { 6 | assert_eq!( 7 | parse_desktop_set("**bold** world"), 8 | vec![Text("**bold** world")] 9 | ); 10 | assert_eq!( 11 | parse_desktop_set("**_strange_ hello** world"), 12 | vec![Text("**_strange_ hello** world")] 13 | ); 14 | assert_eq!( 15 | parse_desktop_set("**_strange_ hello** world"), 16 | vec![Text("**_strange_ hello** world")] 17 | ); 18 | assert_eq!( 19 | parse_desktop_set("~~strikethrough~~ text ~~ notstrikethrough~~ text"), 20 | vec![Text("~~strikethrough~~ text ~~ notstrikethrough~~ text")] 21 | ); 22 | assert_eq!( 23 | parse_desktop_set("~~strikethrough and **bold**, jo!~~"), 24 | vec![Text("~~strikethrough and **bold**, jo!~~")] 25 | ); 26 | assert_eq!( 27 | parse_desktop_set( 28 | "hi there, you need to `cargo run` it.\nhi there, you need to ` cargo run ` it." 29 | ), 30 | vec![ 31 | Text("hi there, you need to `cargo run` it."), 32 | Linebreak, 33 | Text("hi there, you need to ` cargo run ` it.") 34 | ] 35 | ); 36 | } 37 | 38 | #[test] 39 | fn command_suggestions() { 40 | let input = "/yes\n/move_a5_a6 \n/answer2_gameid or /answer__no"; 41 | assert_eq!( 42 | parse_desktop_set(input), 43 | vec![ 44 | BotCommandSuggestion("/yes"), 45 | Linebreak, 46 | BotCommandSuggestion("/move_a5_a6"), 47 | Text(" "), 48 | Linebreak, 49 | BotCommandSuggestion("/answer2_gameid"), 50 | Text(" or "), 51 | BotCommandSuggestion("/answer__no") 52 | ] 53 | ); 54 | } 55 | 56 | #[test] 57 | fn invalid_command_suggestions() { 58 | let input = "/1\n /hello world"; 59 | assert_eq!( 60 | parse_desktop_set(input), 61 | vec![ 62 | Text("/1"), 63 | Linebreak, 64 | Text(" "), 65 | BotCommandSuggestion("/hello"), 66 | Text(" world") 67 | ] 68 | ); 69 | } 70 | 71 | #[test] 72 | fn invalid_command_suggestions_too_long() { 73 | let input = "/dfshadfshlhjkldfskhjlsdafhkjdkhflkdfalsklhdsfdfadfsadsfuresdffdssdfsdsd\ 74 | fjhkdsfhkhdafskhjdafshkljerwnmsdbcxzgkhjdsaljwieoqruyitohsjbdgfisdyhbjasdkhaegrykasbdhfglhawefdhlj\ 75 | ghbsfznhlkrhszfdhflsdahadjsfhlkjdfaslhkdfsahljdfashjdhjskafkhljdfashjkldafshjadsfjhdasfjkldjkhfsabcnxbkzjadsfhhd"; 76 | assert_eq!( 77 | parse_desktop_set(input), 78 | vec![ 79 | Text("/dfshadfshlhjkldfskhjlsdafhkjdkhflkdfalsklhdsfdfadfsadsfuresdffdssdfsdsd\ 80 | fjhkdsfhkhdafskhjdafshkljerwnmsdbcxzgkhjdsaljwieoqruyitohsjbdgfisdyhbjasdkhaegrykasbdhfglhawefdhlj\ 81 | ghbsfznhlkrhszfdhflsdahadjsfhlkjdfaslhkdfsahljdfashjdhjskafkhljdfashjkldafshjadsfjhdasfjkldjkhfsabcnxbkzjadsfhhd") 82 | ] 83 | ); 84 | } 85 | 86 | #[test] 87 | fn invalid_command_suggestions_should_be_text() { 88 | let input = "read/write"; 89 | assert_eq!(parse_desktop_set(input), vec![Text("read/write")]); 90 | } 91 | 92 | #[test] 93 | fn hashtag() { 94 | let input = 95 | "#hashtag\nWhen your new here look for #noob\nIf your already an expert look for #expert"; 96 | assert_eq!( 97 | parse_desktop_set(input), 98 | vec![ 99 | Tag("#hashtag"), 100 | Linebreak, 101 | Text("When your new here look for "), 102 | Tag("#noob"), 103 | Linebreak, 104 | Text("If your already an expert look for "), 105 | Tag("#expert") 106 | ] 107 | ); 108 | } 109 | 110 | #[test] 111 | fn german_umlaut_hashtag() { 112 | let input = "#bücher #Ängste"; 113 | assert_eq!( 114 | parse_desktop_set(input), 115 | vec![Tag("#bücher"), Text(" "), Tag("#Ängste")] 116 | ); 117 | } 118 | 119 | #[test] 120 | fn two_adjacent_hashtags() { 121 | let input = "#1#topic2"; 122 | assert_eq!(parse_desktop_set(input), vec![Tag("#1"), Tag("#topic2")]); 123 | } 124 | 125 | #[test] 126 | fn two_hashtags_seperated_by_linebreak() { 127 | let input = "#1\n#topic2"; 128 | assert_eq!( 129 | parse_desktop_set(input), 130 | vec![Tag("#1"), Linebreak, Tag("#topic2")] 131 | ); 132 | } 133 | 134 | #[test] 135 | fn two_hashtags_seperated_by_tab() { 136 | let input = "#1\t#topic2"; 137 | assert_eq!( 138 | parse_desktop_set(input), 139 | vec![Tag("#1"), Text("\t"), Tag("#topic2")] 140 | ); 141 | } 142 | 143 | #[test] 144 | fn email_address_standalone() { 145 | let test_cases = vec![ 146 | "message.parser@example.com", 147 | "message-parser@delta.chat", 148 | "message+parser@delta.chat", 149 | "parser@127.0.0.0", 150 | "message+parser+67543@delta.chat", 151 | "243432mmdfsa3234@example.com", 152 | "617b5772c6d10feda41fc6e0e43b976c4cc9383d3729310d3dc9e1332f0d9acd@yggmail", // TODO add email test 153 | ]; 154 | 155 | for input in test_cases { 156 | println!("testing {}", &input); 157 | assert_eq!(parse_desktop_set(input), vec![EmailAddress(input)]); 158 | } 159 | } 160 | 161 | #[test] 162 | fn email_address_example() { 163 | assert_eq!( 164 | parse_desktop_set("This is an email address: message.parser@example.com\nMessage me there"), 165 | vec![ 166 | Text("This is an email address: "), 167 | EmailAddress("message.parser@example.com"), 168 | Linebreak, 169 | Text("Message me there") 170 | ] 171 | ); 172 | } 173 | #[test] 174 | fn link() { 175 | let test_cases_no_puny = vec![ 176 | ( 177 | "http://delta.chat", 178 | http_link_no_puny("http://delta.chat", "delta.chat"), 179 | ), 180 | ( 181 | "http://delta.chat:8080", 182 | http_link_no_puny("http://delta.chat:8080", "delta.chat"), 183 | ), 184 | ( 185 | "http://localhost", 186 | http_link_no_puny("http://localhost", "localhost"), 187 | ), 188 | ( 189 | "http://127.0.0.1", 190 | http_link_no_puny("http://127.0.0.1", "127.0.0.1"), 191 | ), 192 | ( 193 | "https://delta.chat", 194 | https_link_no_puny("https://delta.chat", "delta.chat"), 195 | ), 196 | ( 197 | "ftp://delta.chat", 198 | ftp_link_no_puny("ftp://delta.chat", "delta.chat"), 199 | ), 200 | ( 201 | "https://delta.chat/en/help", 202 | https_link_no_puny("https://delta.chat/en/help", "delta.chat"), 203 | ), 204 | ( 205 | "https://delta.chat?hi=5&e=4", 206 | https_link_no_puny("https://delta.chat?hi=5&e=4", "delta.chat"), 207 | ), 208 | ( 209 | "https://delta.chat/en/help?hi=5&e=4#section2.0", 210 | https_link_no_puny( 211 | "https://delta.chat/en/help?hi=5&e=4#section2.0", 212 | "delta.chat", 213 | ), 214 | ), 215 | ( 216 | "https://delta#section2.0", 217 | https_link_no_puny("https://delta#section2.0", "delta"), 218 | ), 219 | ( 220 | "http://delta.chat:8080?hi=5&e=4#section2.0", 221 | http_link_no_puny("http://delta.chat:8080?hi=5&e=4#section2.0", "delta.chat"), 222 | ), 223 | ( 224 | "http://delta.chat:8080#section2.0", 225 | http_link_no_puny("http://delta.chat:8080#section2.0", "delta.chat"), 226 | ), 227 | ( 228 | "mailto:delta@example.com", 229 | mailto_link_no_puny("mailto:delta@example.com"), 230 | ), 231 | ( 232 | "mailto:delta@example.com?subject=hi&body=hello%20world", 233 | mailto_link_no_puny("mailto:delta@example.com?subject=hi&body=hello%20world"), 234 | ), 235 | ( 236 | "mailto:foö@ü.chat", 237 | mailto_link_no_puny("mailto:foö@ü.chat"), 238 | ), 239 | ( 240 | "https://delta.chat/%C3%BC%C3%A4%C3%B6", 241 | https_link_no_puny( 242 | "https://delta.chat/%C3%BC%C3%A4%C3%B6", 243 | "delta.chat", 244 | ) 245 | ), 246 | ( 247 | "https://delta.chat/üäö", 248 | https_link_no_puny( 249 | "https://delta.chat/üäö", 250 | "delta.chat", 251 | ) 252 | ), 253 | ( 254 | "https://90eghtesadi.com/Keywords/Index/2031708/%D9%82%D8%B1%D8%A7%D8%B1%D8%AF%D8%A7%D8%AF-%DB%B2%DB%B5-%D8%B3%D8%A7%D9%84%D9%87-%D8%A7%DB%8C%D8%B1%D8%A7%D9%86-%D9%88-%DA%86%DB%8C%D9%86", 255 | // ^ I guess shame on the Iranian government of the time? --Farooq 256 | https_link_no_puny( 257 | "https://90eghtesadi.com/Keywords/Index/2031708/%D9%82%D8%B1%D8%A7%D8%B1%D8%AF%D8%A7%D8%AF-%DB%B2%DB%B5-%D8%B3%D8%A7%D9%84%D9%87-%D8%A7%DB%8C%D8%B1%D8%A7%D9%86-%D9%88-%DA%86%DB%8C%D9%86", 258 | "90eghtesadi.com", 259 | ) 260 | ), 261 | ( 262 | "https://pcworms.ir/صفحه", 263 | https_link_no_puny( 264 | "https://pcworms.ir/صفحه", 265 | "pcworms.ir", 266 | ), 267 | ), 268 | ( 269 | "gopher://republic.circumlunar.space/1/~farooqkz", 270 | gopher_link_no_puny( 271 | "gopher://republic.circumlunar.space/1/~farooqkz", 272 | "republic.circumlunar.space", 273 | ), 274 | ), 275 | ]; 276 | 277 | let test_cases_with_puny = [( 278 | "https://ü.app#help", 279 | https_link_no_puny("https://ü.app#help", "ü.app"), 280 | )]; 281 | 282 | for (input, destination) in &test_cases_no_puny { 283 | println!("testing {input}"); 284 | assert_eq!( 285 | parse_desktop_set(input), 286 | vec![Link { 287 | destination: destination.clone() 288 | }] 289 | ); 290 | } 291 | 292 | for (input, expected_destination) in &test_cases_with_puny { 293 | println!("testing {input}"); 294 | match &parse_desktop_set(input)[0] { 295 | Link { destination } => { 296 | assert_eq!(expected_destination.target, destination.target); 297 | assert_eq!(expected_destination.scheme, destination.scheme); 298 | assert_eq!(expected_destination.hostname, destination.hostname); 299 | assert!(destination.punycode.is_some()); 300 | } 301 | _ => { 302 | panic!(); 303 | } 304 | } 305 | } 306 | } 307 | 308 | #[test] 309 | fn test_link_example() { 310 | assert_eq!( 311 | parse_desktop_set( 312 | "This is an my site: https://delta.chat/en/help?hi=5&e=4#section2.0\nVisit me there" 313 | ), 314 | vec![ 315 | Text("This is an my site: "), 316 | Link { 317 | destination: https_link_no_puny( 318 | "https://delta.chat/en/help?hi=5&e=4#section2.0", 319 | "delta.chat" 320 | ) 321 | }, 322 | Linebreak, 323 | Text("Visit me there") 324 | ] 325 | ); 326 | } 327 | 328 | #[test] 329 | fn delimited_email_example() { 330 | assert_eq!( 331 | parse_desktop_set("This is an my site: \nMessage me there"), 332 | vec![ 333 | Text("This is an my site: "), 334 | EmailAddress("hello@delta.chat"), 335 | Linebreak, 336 | Text("Message me there") 337 | ] 338 | ); 339 | } 340 | 341 | #[test] 342 | fn labeled_link_should_not_work() { 343 | assert_eq!( 344 | parse_desktop_set("[a link](https://delta.chat/en/help?hi=5&e=4#section2.0)"), 345 | vec![LabeledLink { 346 | label: vec![Text("a link")], 347 | destination: https_link_no_puny( 348 | "https://delta.chat/en/help?hi=5&e=4#section2.0", 349 | "delta.chat", 350 | ) 351 | }] 352 | ); 353 | assert_eq!( 354 | parse_desktop_set( 355 | "[rich content **bold**](https://delta.chat/en/help?hi=5&e=4#section2.0)" 356 | ), 357 | vec![LabeledLink { 358 | label: vec![Text("rich content **bold**")], 359 | destination: https_link_no_puny( 360 | "https://delta.chat/en/help?hi=5&e=4#section2.0", 361 | "delta.chat", 362 | ) 363 | }] 364 | ); 365 | } 366 | 367 | #[test] 368 | fn labeled_link_example_should_not_work() { 369 | assert_eq!( 370 | parse_desktop_set("you can find the details [here](https://delta.chat/en/help)."), 371 | vec![ 372 | Text("you can find the details "), 373 | LabeledLink { 374 | label: vec![Text("here")], 375 | destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat") 376 | }, 377 | Text(".") 378 | ] 379 | ); 380 | } 381 | 382 | #[test] 383 | fn inline_link_do_not_eat_last_char_if_it_is_special() { 384 | assert_eq!( 385 | parse_desktop_set("https://delta.chat,"), 386 | vec![ 387 | Link { 388 | destination: https_link_no_puny("https://delta.chat", "delta.chat") 389 | }, 390 | Text(",") 391 | ] 392 | ); 393 | assert_eq!( 394 | parse_desktop_set("https://delta.chat."), 395 | vec![ 396 | Link { 397 | destination: https_link_no_puny("https://delta.chat", "delta.chat") 398 | }, 399 | Text(".") 400 | ] 401 | ); 402 | assert_eq!( 403 | parse_desktop_set("https://delta.chat/page.hi"), 404 | vec![Link { 405 | destination: https_link_no_puny("https://delta.chat/page.hi", "delta.chat") 406 | }] 407 | ); 408 | assert_eq!( 409 | parse_desktop_set("So here's the link: https://delta.cat/page."), 410 | vec![ 411 | Text("So here's the link: "), 412 | Link { 413 | destination: https_link_no_puny("https://delta.cat/page", "delta.cat") 414 | }, 415 | Text(".") 416 | ] 417 | ); 418 | assert_eq!( 419 | parse_desktop_set("Here's a list of organizations which funded DC: https://delta.chat/en/help#how-are-delta-chat-developments-funded."), 420 | vec![ 421 | Text("Here's a list of organizations which funded DC: "), 422 | Link { destination: https_link_no_puny("https://delta.chat/en/help#how-are-delta-chat-developments-funded", "delta.chat")}, 423 | Text(".") 424 | ] 425 | ); 426 | assert_eq!( 427 | parse_desktop_set("So here's the link: https://delta.cat/page, have fun"), 428 | vec![ 429 | Text("So here's the link: "), 430 | Link { 431 | destination: https_link_no_puny("https://delta.cat/page", "delta.cat") 432 | }, 433 | Text(", have fun") 434 | ] 435 | ); 436 | } 437 | 438 | #[test] 439 | fn labeled_link() { 440 | assert_eq!( 441 | parse_desktop_set("[a link](https://delta.chat/en/help?hi=5&e=4#section2.0)"), 442 | vec![LabeledLink { 443 | label: vec![Text("a link")], 444 | destination: https_link_no_puny( 445 | "https://delta.chat/en/help?hi=5&e=4#section2.0", 446 | "delta.chat" 447 | ), 448 | }] 449 | ); 450 | } 451 | 452 | #[test] 453 | fn labeled_link_with_special_char_in_domain() { 454 | assert_eq!( 455 | parse_desktop_set("[munich](https://münchen.de)"), 456 | vec![LabeledLink { 457 | label: vec![Text("munich")], 458 | destination: LinkDestination { 459 | target: "https://münchen.de", 460 | hostname: Some("münchen.de"), 461 | punycode: Some(PunycodeWarning { 462 | original_hostname: "münchen.de".to_string(), 463 | ascii_hostname: "xn--mnchen-3ya.de".to_string(), 464 | punycode_encoded_url: "https://xn--mnchen-3ya.de".to_string() 465 | }), 466 | scheme: Some("https") 467 | }, 468 | }] 469 | ); 470 | } 471 | 472 | #[test] 473 | fn labeled_link_domain_only() { 474 | assert_eq!( 475 | parse_desktop_set("[a link](https://delta.chat)"), 476 | vec![LabeledLink { 477 | label: vec![Text("a link")], 478 | destination: https_link_no_puny("https://delta.chat", "delta.chat"), 479 | }] 480 | ); 481 | } 482 | 483 | #[test] 484 | fn labeled_link_no_markdown_in_desktop_set() { 485 | assert_ne!( 486 | parse_desktop_set( 487 | "[rich content **bold**](https://delta.chat/en/help?hi=5&e=4#section2.0)" 488 | ), 489 | vec![LabeledLink { 490 | label: vec![Text("rich content "), Bold(vec![Text("bold")])], 491 | destination: https_link_no_puny( 492 | "https://delta.chat/en/help?hi=5&e=4#section2.0", 493 | "delta.chat" 494 | ), 495 | }] 496 | ); 497 | } 498 | 499 | #[test] 500 | fn labeled_link_should_not_allow_codeblock() { 501 | assert_ne!( 502 | parse_desktop_set("[```\nhello world\n```](https://delta.chat)"), 503 | vec![ 504 | LabeledLink { 505 | label: vec![Text("```\nhello world\n```")], 506 | destination: https_link_no_puny("https://delta.chat/en/help", "delta.chat"), 507 | }, 508 | Text(".") 509 | ] 510 | ); 511 | } 512 | -------------------------------------------------------------------------------- /tests/text_to_ast/links.rs: -------------------------------------------------------------------------------- 1 | use super::*; 2 | use deltachat_message_parser::parser::{parse_link, LinkDestination}; 3 | 4 | #[test] 5 | fn link() { 6 | let test_cases = vec![ 7 | "http://delta.chat", 8 | "http://delta.chat:8080", 9 | "http://localhost", 10 | "http://127.0.0.0", 11 | "https://delta.chat", 12 | "ftp://delta.chat", 13 | "https://delta.chat/en/help", 14 | "https://delta.chat/en/help?hi=5&e=4", 15 | "https://delta.chat?hi=5&e=4", 16 | "https://delta.chat/en/help?hi=5&e=4#section2.0", 17 | "https://delta#section2.0", 18 | "http://delta.chat:8080?hi=5&e=4#section2.0", 19 | "http://delta.chat:8080#section2.0", 20 | "mailto:delta@example.com", 21 | "mailto:delta@example.com?subject=hi&body=hello%20world", 22 | "mailto:foö@ü.chat", 23 | "https://ü.app#help", // TODO add more urls for testing 24 | ]; 25 | 26 | } 27 | -------------------------------------------------------------------------------- /tests/text_to_ast/mod.rs: -------------------------------------------------------------------------------- 1 | use deltachat_message_parser::parser::Element::*; 2 | use deltachat_message_parser::parser::LinkDestination; 3 | 4 | pub(crate) fn gopher_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { 5 | LinkDestination { 6 | target, 7 | hostname: Some(hostname), 8 | scheme: Some("gopher"), 9 | punycode: None, 10 | } 11 | } 12 | 13 | pub(crate) fn http_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { 14 | LinkDestination { 15 | target, 16 | hostname: Some(hostname), 17 | scheme: Some("http"), 18 | punycode: None, 19 | } 20 | } 21 | 22 | pub(crate) fn ftp_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { 23 | LinkDestination { 24 | target, 25 | hostname: Some(hostname), 26 | scheme: Some("ftp"), 27 | punycode: None, 28 | } 29 | } 30 | 31 | pub(crate) fn https_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { 32 | LinkDestination { 33 | target, 34 | hostname: Some(hostname), 35 | scheme: Some("https"), 36 | punycode: None, 37 | } 38 | } 39 | 40 | pub(crate) fn mailto_link_no_puny(target: &str) -> LinkDestination<'_> { 41 | LinkDestination { 42 | target, 43 | hostname: None, 44 | scheme: Some("mailto"), 45 | punycode: None, 46 | } 47 | } 48 | 49 | mod desktop_set; 50 | mod markdown; 51 | mod text_only; 52 | -------------------------------------------------------------------------------- /tests/text_to_ast/mod.rs.orig: -------------------------------------------------------------------------------- 1 | use deltachat_message_parser::parser::Element::*; 2 | use deltachat_message_parser::parser::LinkDestination; 3 | 4 | fn gopher_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { 5 | LinkDestination { 6 | target, 7 | hostname: Some(hostname), 8 | scheme: "gopher", 9 | punycode: None, 10 | } 11 | } 12 | 13 | <<<<<<< HEAD 14 | ======= 15 | fn internal_link(target: &str) -> LinkDestination<'_> { 16 | LinkDestination { 17 | target, 18 | hostname: None, 19 | scheme: "", 20 | punycode: None, 21 | } 22 | } 23 | 24 | >>>>>>> a0203f4363e504cbe5d32a846a9c8770d6442cf7 25 | fn http_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { 26 | LinkDestination { 27 | target, 28 | hostname: Some(hostname), 29 | scheme: "http", 30 | punycode: None, 31 | } 32 | } 33 | 34 | fn ftp_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { 35 | LinkDestination { 36 | target, 37 | hostname: Some(hostname), 38 | scheme: "ftp", 39 | punycode: None, 40 | } 41 | } 42 | 43 | fn https_link_no_puny<'a>(target: &'a str, hostname: &'a str) -> LinkDestination<'a> { 44 | LinkDestination { 45 | target, 46 | hostname: Some(hostname), 47 | scheme: "https", 48 | punycode: None, 49 | } 50 | } 51 | 52 | fn mailto_link_no_puny(target: &str) -> LinkDestination<'_> { 53 | LinkDestination { 54 | target, 55 | hostname: None, 56 | scheme: "mailto", 57 | punycode: None, 58 | } 59 | } 60 | 61 | mod desktop_set; 62 | mod markdown; 63 | mod text_only; 64 | --------------------------------------------------------------------------------