├── .gitignore
├── .github
├── FUNDING.yml
└── workflows
│ └── ci.yml
├── benchmark
├── implementations
│ ├── current-match
│ │ └── index.mjs
│ ├── current-test
│ │ └── index.mjs
│ └── current-pretest
│ │ └── index.mjs
├── profile.mjs
├── benchmark.mjs
└── samples
│ ├── link_fuzzy.txt
│ ├── email_fuzzy.txt
│ ├── link_normal.txt
│ ├── many.txt
│ └── many_fast.txt
├── .eslintrc.yml
├── support
├── demo_template
│ ├── rollup.config.mjs
│ ├── index.css
│ ├── index.html
│ └── index.mjs
├── build_doc.mjs
├── rollup.config.mjs
├── check.mjs
├── tlds_2char_gen.mjs
└── build_demo.mjs
├── test
├── cjs.js
├── fixtures
│ ├── not_links.txt
│ └── links.txt
└── test.mjs
├── .ndocrc
├── LICENSE
├── package.json
├── CHANGELOG.md
├── lib
└── re.mjs
├── README.md
└── index.mjs
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | coverage/
3 | demo/
4 | doc/
5 | build/
6 | *.log
7 |
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | github: markdown-it
2 | open_collective: markdown-it
3 | tidelift: "npm/linkify-it"
4 |
--------------------------------------------------------------------------------
/benchmark/implementations/current-match/index.mjs:
--------------------------------------------------------------------------------
1 | import linkifyit from '../../../index.mjs'
2 | const linkify = linkifyit()
3 |
4 | linkify.test('')
5 |
6 | export function run (data) {
7 | return linkify.match(data)
8 | }
9 |
--------------------------------------------------------------------------------
/benchmark/implementations/current-test/index.mjs:
--------------------------------------------------------------------------------
1 | import linkifyit from '../../../index.mjs'
2 | const linkify = linkifyit()
3 |
4 | linkify.test('')
5 |
6 | export function run (data) {
7 | return linkify.test(data)
8 | }
9 |
--------------------------------------------------------------------------------
/benchmark/implementations/current-pretest/index.mjs:
--------------------------------------------------------------------------------
1 | import linkifyit from '../../../index.mjs'
2 | const linkify = linkifyit()
3 |
4 | linkify.test('')
5 |
6 | export function run (data) {
7 | return linkify.pretest(data)
8 | }
9 |
--------------------------------------------------------------------------------
/.eslintrc.yml:
--------------------------------------------------------------------------------
1 | extends: standard
2 |
3 | overrides:
4 | -
5 | files: [ '*.mjs' ]
6 | parserOptions:
7 | sourceType: module
8 | rules:
9 | no-restricted-globals: [ 2, require, __dirname ]
10 |
11 | ignorePatterns:
12 | - coverage/
13 | - demo/
14 | - build/
15 |
16 | rules:
17 | camelcase: 0
18 |
--------------------------------------------------------------------------------
/support/demo_template/rollup.config.mjs:
--------------------------------------------------------------------------------
1 | import resolve from '@rollup/plugin-node-resolve'
2 |
3 | export default [
4 | {
5 | input: 'support/demo_template/index.mjs',
6 | output: {
7 | file: 'demo/index.js',
8 | format: 'iife',
9 | name: 'demo'
10 | },
11 | plugins: [
12 | resolve()
13 | ]
14 | }
15 | ]
16 |
--------------------------------------------------------------------------------
/test/cjs.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 | /* eslint-env mocha */
3 |
4 | const linkify = require('../')
5 | const assert = require('assert')
6 |
7 | describe('CJS', () => {
8 | it('require', () => {
9 | const l = linkify()
10 |
11 | l.tlds('myroot', true)
12 |
13 | assert.ok(l.test('google.myroot'))
14 | assert.ok(!l.test('google.xyz'))
15 | })
16 | })
17 |
--------------------------------------------------------------------------------
/support/build_doc.mjs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | import shell from 'shelljs'
4 |
5 | shell.rm('-rf', 'doc')
6 |
7 | const head = shell.exec('git show-ref --hash HEAD').stdout.slice(0, 6)
8 |
9 | const link_format = `https://github.com/{package.repository}/blob/${head}/{file}#L{line}`
10 |
11 | shell.exec(`node node_modules/.bin/ndoc --link-format "${link_format}"`)
12 |
--------------------------------------------------------------------------------
/benchmark/profile.mjs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | /* eslint-disable no-console */
3 |
4 | import { readFileSync } from 'fs'
5 | import linkifyit from '../index.mjs'
6 |
7 | const linkify = linkifyit()
8 |
9 | // Force compilation
10 | linkify.test('')
11 |
12 | const data = readFileSync(new URL('/samples/lorem1.txt', import.meta.url), 'utf8')
13 |
14 | for (let i = 0; i < 20; i++) {
15 | console.log(linkify.match(data))
16 | }
17 |
--------------------------------------------------------------------------------
/support/rollup.config.mjs:
--------------------------------------------------------------------------------
1 | import resolve from '@rollup/plugin-node-resolve'
2 | import { createRequire } from 'module'
3 |
4 | const deps = createRequire(import.meta.url)('../package.json').dependencies
5 |
6 | export default [
7 | {
8 | input: 'index.mjs',
9 | output: {
10 | file: 'build/index.cjs.js',
11 | format: 'cjs'
12 | },
13 | external: Object.keys(deps),
14 | plugins: [
15 | resolve()
16 | ]
17 | }
18 | ]
19 |
--------------------------------------------------------------------------------
/support/check.mjs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | //
4 | // Simple CLI helper for quick-check patterns
5 | //
6 |
7 | /* eslint-disable no-console */
8 |
9 | import linkifyit from '../index.mjs'
10 | import { inspect } from 'node:util'
11 | const linkify = linkifyit()
12 |
13 | const text = [].concat(process.argv.slice(2)).join(' ')
14 |
15 | console.log(text)
16 | console.log(linkify.test(text))
17 | console.log('----------------')
18 | console.log(inspect(linkify, { depth: 0 }))
19 | console.log('----------------')
20 | console.log(inspect(linkify.match(text)))
21 | console.log('----------------')
22 | console.log(inspect(linkify, { depth: 0 }))
23 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on:
4 | push:
5 | pull_request:
6 | schedule:
7 | - cron: '0 0 * * 3'
8 |
9 | jobs:
10 | test:
11 | runs-on: ubuntu-latest
12 |
13 | strategy:
14 | matrix:
15 | node-version: [ '18' ]
16 |
17 | steps:
18 |
19 | - uses: actions/checkout@v4
20 |
21 | - name: Use Node.js ${{ matrix.node-version }}
22 | uses: actions/setup-node@v4
23 | with:
24 | node-version: ${{ matrix.node-version }}
25 |
26 | - run: npm install
27 |
28 | - name: Test
29 | run: npm test
30 |
31 | - name: Upload coverage report to coveralls.io
32 | uses: coverallsapp/github-action@master
33 | with:
34 | github-token: ${{ secrets.GITHUB_TOKEN }}
35 |
--------------------------------------------------------------------------------
/.ndocrc:
--------------------------------------------------------------------------------
1 | #
2 | # Common nodeca config
3 | ################################################################################
4 |
5 | --alias mjs:js
6 | --index "./README.md"
7 | --package "./package.json"
8 | --gh-ribbon "https://github.com/{package.repository}"
9 | --output "doc"
10 | --render "html"
11 | --link-format "https://github.com/{package.repository}/blob/master/{file}#L{line}"
12 | --broken-links "show"
13 |
14 |
15 | #
16 | # Paths with sources
17 | ################################################################################
18 |
19 | index.mjs
20 | lib
21 |
22 |
23 | #
24 | # Project specific configuration
25 | ################################################################################
26 |
27 | --show-all
28 |
--------------------------------------------------------------------------------
/test/fixtures/not_links.txt:
--------------------------------------------------------------------------------
1 | %
2 | % Not links
3 | %
4 | example.invalid
5 | example.invalid/
6 | http://.example.com
7 | http://-example.com
8 | hppt://example.com
9 | example.coma
10 | -example.coma
11 | foo.123
12 | localhost % only with protocol allowed
13 | localhost/
14 | ///localhost % 3 '/' not allowed
15 | ///test.com
16 | //test % Don't allow single level protocol-less domains to avoid false positives
17 |
18 | _http://example.com
19 | _//example.com
20 | _example.com
21 | http://example.com_
22 | @example.com
23 |
24 | node.js and io.js
25 |
26 | http://
27 | http://.
28 | http://..
29 | http://#
30 | http://##
31 | http://?
32 | http://??
33 | google.com:500000 // invalid port
34 | show image.jpg
35 | path:to:file.pm
36 | /path/to/file.pl
37 |
38 | %
39 | % Not IPv4
40 | %
41 | 1.2.3.4.5
42 | 1.2.3
43 | 1.2.3.400
44 | 1000.2.3.4
45 | a1.2.3.4
46 | 1.2.3.4a
47 |
48 | %
49 | % Not email
50 | %
51 | foo@bar % Should be at second level domain & with correct tld
52 | mailto:bar
53 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2015 Vitaly Puzrin.
2 |
3 | Permission is hereby granted, free of charge, to any person
4 | obtaining a copy of this software and associated documentation
5 | files (the "Software"), to deal in the Software without
6 | restriction, including without limitation the rights to use,
7 | copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the
9 | Software is furnished to do so, subject to the following
10 | conditions:
11 |
12 | The above copyright notice and this permission notice shall be
13 | included in all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
--------------------------------------------------------------------------------
/support/tlds_2char_gen.mjs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | // Generates list of 2-char english tlds.
4 | //
5 | // Code is dirty, i know, but it's needed only once
6 | //
7 |
8 | /* eslint-disable no-console */
9 |
10 | import { createRequire } from 'node:module'
11 |
12 | const tldList = createRequire(import.meta.url)('tlds')
13 |
14 | function toRanges (str) {
15 | const ranges = []
16 |
17 | str = str.slice(1, -1)
18 |
19 | while (str.length) {
20 | for (let i = 1; ; i++) {
21 | if (str[i] !== String.fromCharCode(str[i - 1].charCodeAt(0) + 1)) {
22 | if (i < 3) {
23 | ranges.push(str.slice(0, i))
24 | } else {
25 | ranges.push(str[0] + '-' + str[i - 1])
26 | }
27 | str = str.slice(i)
28 | break
29 | }
30 | }
31 | }
32 | return '[' + ranges.join('') + ']'
33 | }
34 |
35 | const tlds = tldList.filter(name => /^[a-z]{2}$/.test(name)).sort()
36 |
37 | //
38 | // group by first letter
39 | //
40 |
41 | let result = []
42 |
43 | 'abcdefghijklmnopqrstuvwxyz'.split('').forEach(letter => {
44 | const list = tlds.filter(name => name[0] === letter)
45 |
46 | if (!list.length) { return }
47 |
48 | if (list.length < 2) {
49 | result = result.concat(list)
50 | return
51 | }
52 |
53 | result.push(letter + '[' + list.map(n => n[1]).join('') + ']')
54 | })
55 |
56 | result = result.join('|')
57 |
58 | console.log(result)
59 |
60 | //
61 | // Compact ranges
62 | //
63 |
64 | result = result.replace(/\[[a-z]+\]/g, toRanges)
65 |
66 | // console.log(result);
67 |
--------------------------------------------------------------------------------
/support/demo_template/index.css:
--------------------------------------------------------------------------------
1 | html,
2 | body,
3 | .full-height {
4 | height: 100%;
5 | }
6 |
7 | body {
8 | overflow-x: hidden;
9 | padding-bottom: 160px;
10 | background-color: #fbfbfb;
11 | }
12 |
13 | .source {
14 | width: 100%;
15 | font-family: Menlo, Monaco, Consolas, "Courier New", monospace;
16 | font-size: 13px;
17 | padding: 2px;
18 | }
19 |
20 | .result-html {
21 | padding: 2px 10px;
22 | overflow: auto;
23 | background-color: #fff;
24 | border: 1px solid #ccc;
25 | border-radius: 4px;
26 | }
27 | .result-html img {
28 | max-width: 35%;
29 | }
30 |
31 | .demo-control {
32 | position: absolute;
33 | right: 15px;
34 | top: -17px;
35 | border-radius: 6px 6px 0 0;
36 | font-size: 12px;
37 | background-color: #ddd;
38 | }
39 | .demo-control a {
40 | padding: 0 20px;
41 | }
42 | .demo-control a:first-child {
43 | padding-left: 30px;
44 | }
45 | .demo-control a:last-child {
46 | padding-right: 30px;
47 | }
48 |
49 | .gh-ribbon {
50 | display: block;
51 | position: absolute;
52 | right: -60px;
53 | top: 44px;
54 | transform: rotate(45deg);
55 | width: 230px;
56 | z-index: 10000;
57 | white-space: nowrap;
58 | font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;
59 | background-color: #686868;
60 | box-shadow: 0 0 2px rgba(102,102,102,0.4);
61 | padding: 1px 0;
62 | }
63 | .gh-ribbon a {
64 | text-decoration: none !important;
65 | border: 1px solid #ccc;
66 | color: #fff;
67 | display: block;
68 | font-size: 13px;
69 | font-weight: 700;
70 | outline: medium none;
71 | padding: 4px 50px 2px;
72 | text-align: center;
73 | }
74 |
--------------------------------------------------------------------------------
/support/demo_template/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | linkify-it demo
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | linkify-it demo
18 |
19 | Type text below to see linkified example.
20 | See API Docs for usage details.
21 |
22 |
23 |
24 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "linkify-it",
3 | "version": "5.0.0",
4 | "description": "Links recognition library with FULL unicode support",
5 | "keywords": [
6 | "linkify",
7 | "linkifier",
8 | "autolink",
9 | "autolinker"
10 | ],
11 | "repository": "markdown-it/linkify-it",
12 | "main": "build/index.cjs.js",
13 | "module": "index.mjs",
14 | "exports": {
15 | ".": {
16 | "require": "./build/index.cjs.js",
17 | "import": "./index.mjs"
18 | },
19 | "./*": {
20 | "require": "./*",
21 | "import": "./*"
22 | }
23 | },
24 | "files": [
25 | "index.mjs",
26 | "lib/",
27 | "build/"
28 | ],
29 | "license": "MIT",
30 | "scripts": {
31 | "lint": "eslint .",
32 | "test": "npm run lint && npm run build && c8 --exclude build --exclude test -r text -r html -r lcov mocha",
33 | "demo": "npm run lint && node support/build_demo.mjs",
34 | "doc": "node support/build_doc.mjs",
35 | "build": "rollup -c support/rollup.config.mjs",
36 | "gh-pages": "npm run demo && npm run doc && shx cp -R doc/ demo/ && gh-pages -d demo -f",
37 | "prepublishOnly": "npm run lint && npm run build && npm run gh-pages"
38 | },
39 | "dependencies": {
40 | "uc.micro": "^2.0.0"
41 | },
42 | "devDependencies": {
43 | "@rollup/plugin-node-resolve": "^15.2.3",
44 | "ansi": "^0.3.0",
45 | "benchmark": "^2.1.0",
46 | "c8": "^8.0.1",
47 | "eslint": "^8.54.0",
48 | "eslint-config-standard": "^17.1.0",
49 | "gh-pages": "^6.1.0",
50 | "mdurl": "^2.0.0",
51 | "mocha": "^10.2.0",
52 | "ndoc": "^6.0.0",
53 | "rollup": "^4.6.1",
54 | "shelljs": "^0.8.4",
55 | "shx": "^0.3.2",
56 | "tlds": "^1.166.0"
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/support/build_demo.mjs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | import shell from 'shelljs'
4 | import { readFileSync, writeFileSync } from 'fs'
5 |
6 | function escape (input) {
7 | return input
8 | .replaceAll('&', '&')
9 | .replaceAll('<', '<')
10 | .replaceAll('>', '>')
11 | .replaceAll('"', '"')
12 | // .replaceAll("'", ''');
13 | }
14 |
15 | shell.rm('-rf', 'demo')
16 | shell.mkdir('demo')
17 |
18 | shell.cp('support/demo_template/index.css', 'demo/')
19 |
20 | // Read html template and inject escaped sample
21 | const html = readFileSync('support/demo_template/index.html', 'utf8')
22 |
23 | let sample_links = readFileSync('test/fixtures/links.txt', 'utf8')
24 |
25 | // Cleanup
26 | const lines = sample_links.split(/\r?\n/g)
27 | const result = []
28 | function isComment (str) { return /^%.*/.test(str) }
29 | function isEmpty (str) { return !(str && str.trim()) }
30 |
31 | for (let i = 0; i < lines.length; i++) {
32 | const line = lines[i]
33 |
34 | if (isComment(line)) {
35 | result.push(line)
36 | continue
37 | }
38 |
39 | if (isEmpty(line)) {
40 | if (isComment(lines[i + 1])) {
41 | result.push('')
42 | }
43 | continue
44 | }
45 |
46 | result.push(line)
47 |
48 | if (!isComment(lines[i + 1]) && !isEmpty(lines[i + 1])) {
49 | i++
50 | }
51 | }
52 |
53 | sample_links = result.join('\n')
54 |
55 | const sample_not_links = readFileSync('test/fixtures/not_links.txt', 'utf8')
56 |
57 | const sample =
58 | `${sample_links}
59 |
60 |
61 | ${sample_not_links}`
62 |
63 | const output = html.replace('', escape(sample))
64 | writeFileSync('demo/index.html', output)
65 |
66 | shell.exec('node_modules/.bin/rollup -c support/demo_template/rollup.config.mjs')
67 |
--------------------------------------------------------------------------------
/support/demo_template/index.mjs:
--------------------------------------------------------------------------------
1 | /* eslint-env browser */
2 | /* global $, _ */
3 |
4 | import linkifyit from '../../index.mjs'
5 | import * as mdurl from 'mdurl'
6 | const linkify = linkifyit({ fuzzyIP: true })
7 | let permalink
8 |
9 | function escape (str) {
10 | return str.replace(/&/g, '&').replace(//g, '>')
11 | }
12 |
13 | function setLinkifiedContent (selector, content) {
14 | let out = escape(content)
15 | const matches = linkify.match(content)
16 |
17 | if (matches) {
18 | const result = []
19 | let last = 0
20 | matches.forEach(function (match) {
21 | if (last < match.index) {
22 | result.push(escape(content.slice(last, match.index)).replace(/\r?\n/g, '
'))
23 | }
24 | result.push('')
27 | result.push(escape(match.text))
28 | result.push('')
29 | last = match.lastIndex
30 | })
31 | if (last < content.length) {
32 | result.push(escape(content.slice(last)).replace(/\r?\n/g, '
'))
33 | }
34 | out = result.join('')
35 | }
36 |
37 | $(selector).html(out)
38 | }
39 |
40 | function updateResult () {
41 | const source = $('.source').val()
42 |
43 | setLinkifiedContent('.result-html', source)
44 |
45 | if (source) {
46 | permalink.href = '#t1=' + mdurl.encode(source, mdurl.encode.componentChars)
47 | } else {
48 | permalink.href = ''
49 | }
50 | }
51 |
52 | //
53 | // Init on page load
54 | //
55 | $(function () {
56 | // Restore content if opened by permalink
57 | if (location.hash && /^(#t1=)/.test(location.hash)) {
58 | $('.source').val(mdurl.decode(location.hash.slice(4), mdurl.decode.componentChars))
59 | }
60 |
61 | // Activate tooltips
62 | $('._tip').tooltip({ container: 'body' })
63 |
64 | permalink = document.getElementById('permalink')
65 |
66 | // Setup listeners
67 | $('.source').on('keyup paste cut mouseup', _.debounce(updateResult, 300, { maxWait: 500 }))
68 |
69 | $('.source-clear').on('click', function (event) {
70 | $('.source').val('')
71 | updateResult()
72 | event.preventDefault()
73 | })
74 |
75 | updateResult()
76 | })
77 |
--------------------------------------------------------------------------------
/benchmark/benchmark.mjs:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | /* eslint-disable no-console */
3 |
4 | import { readFileSync, readdirSync } from 'fs'
5 | import util from 'node:util'
6 | import Benchmark from 'benchmark'
7 | import ansi from 'ansi'
8 | const cursor = ansi(process.stdout)
9 |
10 | const IMPLS = []
11 |
12 | for (const name of readdirSync(new URL('./implementations', import.meta.url)).sort()) {
13 | const filepath = new URL(`./implementations/${name}/index.mjs`, import.meta.url)
14 | const code = (await import(filepath))
15 |
16 | IMPLS.push({ name, code })
17 | }
18 |
19 | const SAMPLES = []
20 |
21 | readdirSync(new URL('./samples', import.meta.url)).sort().forEach(sample => {
22 | const filepath = new URL(`./samples/${sample}`, import.meta.url)
23 |
24 | const content = {}
25 |
26 | content.string = readFileSync(filepath, 'utf8')
27 |
28 | const title = `(${content.string.length} bytes)`
29 |
30 | function onComplete () {
31 | cursor.write('\n')
32 | }
33 |
34 | const suite = new Benchmark.Suite(title, {
35 | onStart: () => { console.log('\nSample: %s %s', sample, title) },
36 | onComplete
37 | })
38 |
39 | IMPLS.forEach(function (impl) {
40 | suite.add(impl.name, {
41 | onCycle: event => {
42 | cursor.horizontalAbsolute()
43 | cursor.eraseLine()
44 | cursor.write(' > ' + event.target)
45 | },
46 | onComplete,
47 | fn: () => { impl.code.run(content.string) }
48 | })
49 | })
50 |
51 | SAMPLES.push({ name: sample.split('.')[0], title, content, suite })
52 | })
53 |
54 | function select (patterns) {
55 | const result = []
56 |
57 | if (!(patterns instanceof Array)) {
58 | patterns = [patterns]
59 | }
60 |
61 | function checkName (name) {
62 | return patterns.length === 0 || patterns.some(function (regexp) {
63 | return regexp.test(name)
64 | })
65 | }
66 |
67 | SAMPLES.forEach(function (sample) {
68 | if (checkName(sample.name)) {
69 | result.push(sample)
70 | }
71 | })
72 |
73 | return result
74 | }
75 |
76 | function run (files) {
77 | const selected = select(files)
78 |
79 | if (selected.length > 0) {
80 | console.log('Selected samples: (%d of %d)', selected.length, SAMPLES.length)
81 | selected.forEach(function (sample) {
82 | console.log(' > %s', sample.name)
83 | })
84 | } else {
85 | console.log('There isn\'t any sample matches any of these patterns: %s', util.inspect(files))
86 | }
87 |
88 | selected.forEach(function (sample) {
89 | sample.suite.run()
90 | })
91 | }
92 |
93 | run(process.argv.slice(2).map(function (source) {
94 | return new RegExp(source, 'i')
95 | }))
96 |
--------------------------------------------------------------------------------
/benchmark/samples/link_fuzzy.txt:
--------------------------------------------------------------------------------
1 | Lorem ipsum dolor sit amet, adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat.
2 |
3 | Vivamus sagittis, diam in lobortis, sapien arcu mattis erat, vel aliquet sem urna et risus. Ut feugiat sapien vitae mi elementum laoreet. Suspendisse potenti. Aliquam erat nisl, aliquam pretium libero aliquet, sagittis eleifend nunc. In hac habitasse platea dictumst. Integer turpis augue, tincidunt dignissim mauris id, rhoncus dapibus purus. Maecenas et enim odio. Nullam massa metus, varius quis vehicula sed, pharetra mollis erat. In quis viverra velit. Vivamus placerat, est nec hendrerit varius, enim dui hendrerit magna, ut pulvinar nibh lorem vel lacus. Mauris a orci iaculis, hendrerit eros sed, gravida leo. In dictum mauris vel augue varius, ac ullamcorper nisl ornare. In eu posuere velit, ac fermentum arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nullam sed malesuada leo, at interdum elit.
4 |
5 | Nullam ut tincidunt nunc metus lacus, commodo eget justo ut, rutrum varius nunc. Sed non rhoncus risus. Morbi sodales gravida pulvinar. Duis malesuada, odio volutpat elementum vulputate, massa magna scelerisque ante, et accumsan tellus nunc in sem. Donec mattis arcu et velit aliquet, non sagittis justo vestibulum. Suspendisse volutpat felis lectus, nec consequat ipsum mattis id. Donec dapibus vehicula facilisis. In tincidunt mi nisi, nec faucibus tortor euismod nec. Suspendisse ante ligula, aliquet vitae libero eu, vulputate dapibus libero. Sed bibendum, sapien at posuere interdum, libero est sollicitudin magna, ac gravida tellus purus eu ipsum. Proin ut quam arcu.
6 |
7 | Suspendisse potenti. Donec ante velit, ornare at augue quis, tristique laoreet sem. Etiam in ipsum elit. Nullam cursus dolor sit amet nulla feugiat tristique. Phasellus ac tellus tincidunt, imperdiet purus eget, ullamcorper ipsum. Cras eu tincidunt sem. Nullam sed dapibus magna. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id venenatis tortor. In consectetur sollicitudin pharetra. Etiam convallis nisi nunc, et aliquam turpis viverra sit amet. Maecenas faucibus sodales tortor. Suspendisse lobortis mi eu leo viverra volutpat. Pellentesque velit ante, vehicula sodales congue ut, elementum a urna. Cras tempor, ipsum eget luctus rhoncus, arcu ligula fermentum urna, vulputate pharetra enim enim non libero.
8 |
9 | Proin diam quam, elementum in eleifend id, elementum et metus. Cras in justo consequat justo semper ultrices. Sed dignissim lectus a ante mollis, nec vulputate ante molestie. Proin in porta nunc. Etiam pulvinar turpis sed velit porttitor, vel adipiscing velit fringilla. Cras ac tellus vitae purus pharetra tincidunt. Sed cursus aliquet aliquet. Cras eleifend commodo malesuada. In turpis turpis, ullamcorper ut tincidunt a, ullamcorper a nunc. Etiam luctus tellus ac dapibus gravida. Ut nec lacus laoreet neque ullamcorper volutpat.
10 |
11 | Nunc et leo erat. Aenean mattis ultrices lorem, eget adipiscing dolor ultricies eu. In hac habitasse platea dictumst. Vivamus cursus feugiat sapien quis aliquam. Mauris quam libero, porta vel volutpat ut, blandit a purus. Vivamus vestibulum dui vel tortor molestie, sit amet feugiat sem commodo. Nulla facilisi. Sed molestie arcu eget tellus vestibulum tristique markdown-it@github.com.
12 |
--------------------------------------------------------------------------------
/benchmark/samples/email_fuzzy.txt:
--------------------------------------------------------------------------------
1 | Lorem ipsum dolor sit amet, adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat.
2 |
3 | Vivamus sagittis, diam in lobortis, sapien arcu mattis erat, vel aliquet sem urna et risus. Ut feugiat sapien vitae mi elementum laoreet. Suspendisse potenti. Aliquam erat nisl, aliquam pretium libero aliquet, sagittis eleifend nunc. In hac habitasse platea dictumst. Integer turpis augue, tincidunt dignissim mauris id, rhoncus dapibus purus. Maecenas et enim odio. Nullam massa metus, varius quis vehicula sed, pharetra mollis erat. In quis viverra velit. Vivamus placerat, est nec hendrerit varius, enim dui hendrerit magna, ut pulvinar nibh lorem vel lacus. Mauris a orci iaculis, hendrerit eros sed, gravida leo. In dictum mauris vel augue varius, ac ullamcorper nisl ornare. In eu posuere velit, ac fermentum arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nullam sed malesuada leo, at interdum elit.
4 |
5 | Nullam ut tincidunt nunc metus lacus, commodo eget justo ut, rutrum varius nunc. Sed non rhoncus risus. Morbi sodales gravida pulvinar. Duis malesuada, odio volutpat elementum vulputate, massa magna scelerisque ante, et accumsan tellus nunc in sem. Donec mattis arcu et velit aliquet, non sagittis justo vestibulum. Suspendisse volutpat felis lectus, nec consequat ipsum mattis id. Donec dapibus vehicula facilisis. In tincidunt mi nisi, nec faucibus tortor euismod nec. Suspendisse ante ligula, aliquet vitae libero eu, vulputate dapibus libero. Sed bibendum, sapien at posuere interdum, libero est sollicitudin magna, ac gravida tellus purus eu ipsum. Proin ut quam arcu.
6 |
7 | Suspendisse potenti. Donec ante velit, ornare at augue quis, tristique laoreet sem. Etiam in ipsum elit. Nullam cursus dolor sit amet nulla feugiat tristique. Phasellus ac tellus tincidunt, imperdiet purus eget, ullamcorper ipsum. Cras eu tincidunt sem. Nullam sed dapibus magna. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id venenatis tortor. In consectetur sollicitudin pharetra. Etiam convallis nisi nunc, et aliquam turpis viverra sit amet. Maecenas faucibus sodales tortor. Suspendisse lobortis mi eu leo viverra volutpat. Pellentesque velit ante, vehicula sodales congue ut, elementum a urna. Cras tempor, ipsum eget luctus rhoncus, arcu ligula fermentum urna, vulputate pharetra enim enim non libero.
8 |
9 | Proin diam quam, elementum in eleifend id, elementum et metus. Cras in justo consequat justo semper ultrices. Sed dignissim lectus a ante mollis, nec vulputate ante molestie. Proin in porta nunc. Etiam pulvinar turpis sed velit porttitor, vel adipiscing velit fringilla. Cras ac tellus vitae purus pharetra tincidunt. Sed cursus aliquet aliquet. Cras eleifend commodo malesuada. In turpis turpis, ullamcorper ut tincidunt a, ullamcorper a nunc. Etiam luctus tellus ac dapibus gravida. Ut nec lacus laoreet neque ullamcorper volutpat.
10 |
11 | Nunc et leo erat. Aenean mattis ultrices lorem, eget adipiscing dolor ultricies eu. In hac habitasse platea dictumst. Vivamus cursus feugiat sapien quis aliquam. Mauris quam libero, porta vel volutpat ut, blandit a purus. Vivamus vestibulum dui vel tortor molestie, sit amet feugiat sem commodo. Nulla facilisi. Sed molestie arcu eget tellus vestibulum tristique markdown-it@github.com.
12 |
--------------------------------------------------------------------------------
/benchmark/samples/link_normal.txt:
--------------------------------------------------------------------------------
1 | Lorem ipsum dolor sit amet, adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat.
2 |
3 | Vivamus sagittis, diam in lobortis, sapien arcu mattis erat, vel aliquet sem urna et risus. Ut feugiat sapien vitae mi elementum laoreet. Suspendisse potenti. Aliquam erat nisl, aliquam pretium libero aliquet, sagittis eleifend nunc. In hac habitasse platea dictumst. Integer turpis augue, tincidunt dignissim mauris id, rhoncus dapibus purus. Maecenas et enim odio. Nullam massa metus, varius quis vehicula sed, pharetra mollis erat. In quis viverra velit. Vivamus placerat, est nec hendrerit varius, enim dui hendrerit magna, ut pulvinar nibh lorem vel lacus. Mauris a orci iaculis, hendrerit eros sed, gravida leo. In dictum mauris vel augue varius, ac ullamcorper nisl ornare. In eu posuere velit, ac fermentum arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nullam sed malesuada leo, at interdum elit.
4 |
5 | Nullam ut tincidunt nunc metus lacus, commodo eget justo ut, rutrum varius nunc. Sed non rhoncus risus. Morbi sodales gravida pulvinar. Duis malesuada, odio volutpat elementum vulputate, massa magna scelerisque ante, et accumsan tellus nunc in sem. Donec mattis arcu et velit aliquet, non sagittis justo vestibulum. Suspendisse volutpat felis lectus, nec consequat ipsum mattis id. Donec dapibus vehicula facilisis. In tincidunt mi nisi, nec faucibus tortor euismod nec. Suspendisse ante ligula, aliquet vitae libero eu, vulputate dapibus libero. Sed bibendum, sapien at posuere interdum, libero est sollicitudin magna, ac gravida tellus purus eu ipsum. Proin ut quam arcu.
6 |
7 | Suspendisse potenti. Donec ante velit, ornare at augue quis, tristique laoreet sem. Etiam in ipsum elit. Nullam cursus dolor sit amet nulla feugiat tristique. Phasellus ac tellus tincidunt, imperdiet purus eget, ullamcorper ipsum. Cras eu tincidunt sem. Nullam sed dapibus magna. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id venenatis tortor. In consectetur sollicitudin pharetra. Etiam convallis nisi nunc, et aliquam turpis viverra sit amet. Maecenas faucibus sodales tortor. Suspendisse lobortis mi eu leo viverra volutpat. Pellentesque velit ante, vehicula sodales congue ut, elementum a urna. Cras tempor, ipsum eget luctus rhoncus, arcu ligula fermentum urna, vulputate pharetra enim enim non libero.
8 |
9 | Proin diam quam, elementum in eleifend id, elementum et metus. Cras in justo consequat justo semper ultrices. Sed dignissim lectus a ante mollis, nec vulputate ante molestie. Proin in porta nunc. Etiam pulvinar turpis sed velit porttitor, vel adipiscing velit fringilla. Cras ac tellus vitae purus pharetra tincidunt. Sed cursus aliquet aliquet. Cras eleifend commodo malesuada. In turpis turpis, ullamcorper ut tincidunt a, ullamcorper a nunc. Etiam luctus tellus ac dapibus gravida. Ut nec lacus laoreet neque ullamcorper volutpat.
10 |
11 | Nunc et leo erat. Aenean mattis ultrices lorem, eget adipiscing dolor ultricies eu. In hac habitasse platea dictumst. Vivamus cursus feugiat sapien quis aliquam. Mauris quam libero, porta vel volutpat ut, blandit a purus. Vivamus vestibulum dui vel tortor molestie, sit amet feugiat sem commodo. Nulla facilisi. Sed molestie arcu eget tellus vestibulum tristique https://github.com/markdown-it
12 |
--------------------------------------------------------------------------------
/benchmark/samples/many.txt:
--------------------------------------------------------------------------------
1 | Lorem ipsum dolor sit amet, adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat.
2 |
3 | Vivamus sagittis, diam in github.com/markdown-it/markdown-it lobortis, sapien arcu mattis erat, vel aliquet sem urna et risus. Ut feugiat sapien vitae mi elementum laoreet. Suspendisse potenti. Aliquam erat nisl, aliquam pretium libero aliquet, sagittis eleifend nunc. In hac habitasse platea dictumst. Integer turpis augue, tincidunt dignissim mauris id, rhoncus dapibus purus. Maecenas et enim odio. Nullam massa metus, varius quis vehicula sed, pharetra mollis erat. In quis viverra velit. Vivamus placerat, est nec hendrerit varius, enim dui hendrerit magna, ut pulvinar nibh lorem vel lacus. Mauris a orci iaculis, hendrerit eros sed, gravida leo. In dictum mauris vel augue varius, ac ullamcorper nisl ornare. In eu posuere velit, ac fermentum arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nullam sed malesuada leo, at interdum elit.
4 |
5 | Nullam ut tincidunt nunc metus lacus, http://github.com/markdown-it commodo eget justo ut, rutrum varius nunc. Sed non rhoncus risus. Morbi sodales gravida pulvinar. Duis malesuada, odio volutpat elementum vulputate, massa magna scelerisque ante, et accumsan tellus nunc in sem. Donec mattis arcu et velit aliquet, non sagittis justo vestibulum. Suspendisse volutpat felis lectus, nec consequat ipsum mattis id. Donec dapibus vehicula facilisis. In tincidunt mi nisi, nec faucibus tortor euismod nec. Suspendisse ante ligula, aliquet vitae libero eu, vulputate dapibus libero. Sed bibendum, sapien at posuere interdum, libero est sollicitudin magna, ac gravida tellus purus eu ipsum. Proin ut quam arcu.
6 |
7 | Suspendisse potenti. Donec ante velit, markdown-it@github.com ornare at augue quis, tristique laoreet sem. Etiam in ipsum elit. Nullam cursus dolor sit amet nulla feugiat tristique. Phasellus ac tellus tincidunt, imperdiet purus eget, ullamcorper ipsum. Cras eu tincidunt sem. Nullam sed dapibus magna. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id venenatis tortor. In consectetur sollicitudin pharetra. Etiam convallis nisi nunc, et aliquam turpis viverra sit amet. Maecenas faucibus sodales tortor. Suspendisse lobortis mi eu leo viverra volutpat. Pellentesque velit ante, vehicula sodales congue ut, elementum a urna. Cras tempor, ipsum eget luctus rhoncus, arcu ligula fermentum urna, vulputate pharetra enim enim non libero.
8 |
9 | Proin diam quam, elementum in eleifend id, elementum et metus. Cras in justo consequat justo semper ultrices. Sed dignissim lectus a ante mollis, nec vulputate ante molestie. Proin in porta nunc. Etiam pulvinar turpis sed velit porttitor, vel adipiscing velit fringilla. Cras ac tellus vitae purus pharetra tincidunt. Sed cursus aliquet aliquet. Cras eleifend commodo malesuada. In turpis turpis, ullamcorper ut tincidunt a, ullamcorper a nunc. Etiam luctus tellus ac dapibus gravida. Ut nec lacus laoreet neque ullamcorper volutpat.
10 |
11 | Nunc et leo erat. Aenean mattis ultrices lorem, eget adipiscing dolor ultricies eu. In hac habitasse platea dictumst. Vivamus cursus feugiat sapien quis aliquam. Mauris quam libero, porta vel volutpat ut, blandit a purus. Vivamus vestibulum dui vel tortor molestie, sit amet feugiat sem commodo. Nulla facilisi. Sed molestie arcu eget tellus vestibulum tristique.
12 |
--------------------------------------------------------------------------------
/benchmark/samples/many_fast.txt:
--------------------------------------------------------------------------------
1 | Lorem ipsum dolor sit amet, adipiscing elit. Cras imperdiet nec erat ac condimentum. Nulla vel rutrum ligula. Sed hendrerit interdum orci a posuere. Vivamus ut velit aliquet, mollis purus eget, iaculis nisl. Proin posuere malesuada ante. Proin auctor orci eros, ac molestie lorem dictum nec. Vestibulum sit amet erat est. Morbi luctus sed elit ac luctus. Proin blandit, enim vitae egestas posuere, neque elit ultricies dui, vel mattis nibh enim ac lorem. Maecenas molestie nisl sit amet velit dictum lobortis. Aliquam erat volutpat.
2 |
3 | Vivamus sagittis, diam in https://github.com/markdown-it/markdown-it lobortis, sapien arcu mattis erat, vel aliquet sem urna et risus. Ut feugiat sapien vitae mi elementum laoreet. Suspendisse potenti. Aliquam erat nisl, aliquam pretium libero aliquet, sagittis eleifend nunc. In hac habitasse platea dictumst. Integer turpis augue, tincidunt dignissim mauris id, rhoncus dapibus purus. Maecenas et enim odio. Nullam massa metus, varius quis vehicula sed, pharetra mollis erat. In quis viverra velit. Vivamus placerat, est nec hendrerit varius, enim dui hendrerit magna, ut pulvinar nibh lorem vel lacus. Mauris a orci iaculis, hendrerit eros sed, gravida leo. In dictum mauris vel augue varius, ac ullamcorper nisl ornare. In eu posuere velit, ac fermentum arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. Nullam sed malesuada leo, at interdum elit.
4 |
5 | Nullam ut tincidunt nunc metus lacus, github.com/markdown-it commodo eget justo ut, rutrum varius nunc. Sed non rhoncus risus. Morbi sodales gravida pulvinar. Duis malesuada, odio volutpat elementum vulputate, massa magna scelerisque ante, et accumsan tellus nunc in sem. Donec mattis arcu et velit aliquet, non sagittis justo vestibulum. Suspendisse volutpat felis lectus, nec consequat ipsum mattis id. Donec dapibus vehicula facilisis. In tincidunt mi nisi, nec faucibus tortor euismod nec. Suspendisse ante ligula, aliquet vitae libero eu, vulputate dapibus libero. Sed bibendum, sapien at posuere interdum, libero est sollicitudin magna, ac gravida tellus purus eu ipsum. Proin ut quam arcu.
6 |
7 | Suspendisse potenti. Donec ante velit, markdown-it@github.com ornare at augue quis, tristique laoreet sem. Etiam in ipsum elit. Nullam cursus dolor sit amet nulla feugiat tristique. Phasellus ac tellus tincidunt, imperdiet purus eget, ullamcorper ipsum. Cras eu tincidunt sem. Nullam sed dapibus magna. Lorem ipsum dolor sit amet, consectetur adipiscing elit. In id venenatis tortor. In consectetur sollicitudin pharetra. Etiam convallis nisi nunc, et aliquam turpis viverra sit amet. Maecenas faucibus sodales tortor. Suspendisse lobortis mi eu leo viverra volutpat. Pellentesque velit ante, vehicula sodales congue ut, elementum a urna. Cras tempor, ipsum eget luctus rhoncus, arcu ligula fermentum urna, vulputate pharetra enim enim non libero.
8 |
9 | Proin diam quam, elementum in eleifend id, elementum et metus. Cras in justo consequat justo semper ultrices. Sed dignissim lectus a ante mollis, nec vulputate ante molestie. Proin in porta nunc. Etiam pulvinar turpis sed velit porttitor, vel adipiscing velit fringilla. Cras ac tellus vitae purus pharetra tincidunt. Sed cursus aliquet aliquet. Cras eleifend commodo malesuada. In turpis turpis, ullamcorper ut tincidunt a, ullamcorper a nunc. Etiam luctus tellus ac dapibus gravida. Ut nec lacus laoreet neque ullamcorper volutpat.
10 |
11 | Nunc et leo erat. Aenean mattis ultrices lorem, eget adipiscing dolor ultricies eu. In hac habitasse platea dictumst. Vivamus cursus feugiat sapien quis aliquam. Mauris quam libero, porta vel volutpat ut, blandit a purus. Vivamus vestibulum dui vel tortor molestie, sit amet feugiat sem commodo. Nulla facilisi. Sed molestie arcu eget tellus vestibulum tristique.
12 |
13 | https://github.com/markdown-it
14 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | 5.0.0 / 2023-12-01
2 | ------------------
3 |
4 | - Rewrite to ESM.
5 |
6 |
7 | 4.0.1 / 2022-05-02
8 | ------------------
9 |
10 | - Fix `http://` incorrectly returned as a link by matchStart.
11 |
12 |
13 | 4.0.0 / 2022-04-22
14 | ------------------
15 |
16 | - Add `matchAtStart` method to match full URLs at the start of the string.
17 | - Fixed paired symbols (`()`, `{}`, `""`, etc.) after punctuation.
18 | - `---` option now affects parsing of emails (e.g. `user@example.com---`)
19 |
20 |
21 | 3.0.3 / 2021-10-01
22 | ------------------
23 |
24 | - Fixed #98. Don't count `;` at the end of link (when followed with space).
25 |
26 |
27 | 3.0.2 / 2020-05-20
28 | ------------------
29 |
30 | - Proper fix for #54. Allow multiple `!` in links (but not at the end).
31 |
32 |
33 | 3.0.1 / 2020-05-19
34 | ------------------
35 |
36 | - Reverted #54 fix (allowed multiple `!` in links), and added collision
37 | sample.
38 |
39 |
40 | 3.0.0 / 2020-05-19
41 | ------------------
42 |
43 | - Allow unlimited `.` inside link params, #81. This should not be breaking, but
44 | bumped version for sure.
45 | - Allow `..&` in params, #87.
46 | - Allow multiple `!` in links, #54.
47 | - Deps bump.
48 | - Rewrite build scripts.
49 |
50 |
51 | 2.2.0 / 2019-07-12
52 | ------------------
53 |
54 | - Improved quoted email detect (disable `"` at email start), #72.
55 | - Fix some google links (allow more consecutive `.`), #66.
56 |
57 |
58 | 2.1.0 / 2018-11-27
59 | ------------------
60 |
61 | - Allow `--` (and more dashes) in domain names, #63.
62 |
63 |
64 | 2.0.3 / 2016-12-09
65 | ------------------
66 |
67 | - Process `|` (asian vertical pipe 0xFF5C) as valid text separator.
68 |
69 |
70 | 2.0.2 / 2016-10-15
71 | ------------------
72 |
73 | - Allow dashes in local domains, #43.
74 |
75 |
76 | 2.0.1 / 2016-09-28
77 | ------------------
78 |
79 | - Restrict user:pass@... content - prohibit "()[]" chars in auth, #41.
80 |
81 |
82 | 2.0.0 / 2016-06-22
83 | ------------------
84 |
85 | - `---` no longer terminates link. Use option `{ '---': true }` to return old
86 | behaviour.
87 | - `.onCompile()` hook to modify base regexp constants.
88 | - Allow `foo'-bar` in path
89 |
90 |
91 | 1.2.4 / 2016-06-03
92 | ------------------
93 |
94 | - Consider `<` & `>` as invalid in links.
95 | - Support links in lt/gt braces: ``, ``.
96 |
97 |
98 | 1.2.3 / 2016-05-31
99 | ------------------
100 |
101 | - Allow digits in local domains, #36.
102 | - Restrict user/pass (prohibit [@/] chars) to avoid wrong domain fetch.
103 | - More restrictions for protocol-transparent links. Don't allow single-level
104 | (local) domains, except '//localhost', #19.
105 |
106 |
107 | 1.2.2 / 2016-05-30
108 | ------------------
109 |
110 | - Security fix: due problem in `Any` class regexp from old `unicode-7.0.0`
111 | package (used in `uc-micro`), hang happend with astral char patterns like
112 | `😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡 .com` if fuzzy
113 | options used. New installs will use fixed `uc-micro` automatically.
114 | Old installs need to be updated. #36.
115 | - Unicode rules updated to 8.+ version.
116 |
117 |
118 | 1.2.1 / 2016-04-29
119 | ------------------
120 |
121 | - Fix detect email after opening parenthesis: `(my@email.com)`, #32.
122 |
123 |
124 | 1.2.0 / 2015-06-29
125 | ------------------
126 |
127 | - Allow dash at the end of url, thanks to @Mumakil.
128 |
129 |
130 | 1.1.1 / 2015-06-09
131 | ------------------
132 |
133 | - Allow ".." in link paths.
134 |
135 |
136 | 1.1.0 / 2015-04-21
137 | ------------------
138 |
139 | - Added options to control fuzzy links recognition (`fuzzyLink: true`,
140 | `fuzzyEmail: true`, `fuzzyIP: false`).
141 | - Disabled IP-links without schema prefix by default.
142 |
143 |
144 | 1.0.1 / 2015-04-19
145 | ------------------
146 |
147 | - More strict default 2-characters tlds handle in fuzzy links, to avoid
148 | false positives for `node.js`, `io.js` and so on.
149 |
150 |
151 | 1.0.0 / 2015-03-25
152 | ------------------
153 |
154 | - Version bump to 1.0.0 for semver.
155 | - Removed `Cf` class from whitespace & punctuation sets (#10).
156 | - API change. Exported regex names renamed to reflect changes. Update your
157 | custom rules if needed:
158 | - `src_ZPCcCf` -> `src_ZPCc`
159 | - `src_ZCcCf` -> `src_ZCc`
160 |
161 |
162 | 0.1.5 / 2015-03-13
163 | ------------------
164 |
165 | - Fixed special chars handling (line breaks).
166 | - Fixed demo permalink encode/decode.
167 |
168 |
169 | 0.1.4 / 2015-03-12
170 | ------------------
171 |
172 | - Allow `..` and `...` inside of link paths (#9). Useful for github links with
173 | commit ranges.
174 | - Added `.pretest()` method for speed optimizations.
175 | - Autogenerate demo sample from fixtures.
176 |
177 |
178 | 0.1.3 / 2015-03-11
179 | ------------------
180 |
181 | - Maintenance release. Deps update.
182 |
183 |
184 | 0.1.2 / 2015-02-26
185 | ------------------
186 |
187 | - Fixed blockquoted links (some symbols exclusions), thanks to @MayhemYDG.
188 | - Fixed demo permalinks, thanks to @MayhemYDG.
189 |
190 |
191 | 0.1.1 / 2015-02-22
192 | ------------------
193 |
194 | - Moved unicode data to external package.
195 | - Demo permalink improvements.
196 | - Docs update.
197 |
198 |
199 | 0.1.0 / 2015-02-12
200 | ------------------
201 |
202 | - First release.
203 |
--------------------------------------------------------------------------------
/lib/re.mjs:
--------------------------------------------------------------------------------
1 | import { Any, Cc, Z, P } from 'uc.micro'
2 |
3 | export default function (opts) {
4 | const re = {}
5 | opts = opts || {}
6 |
7 | re.src_Any = Any.source
8 | re.src_Cc = Cc.source
9 | re.src_Z = Z.source
10 | re.src_P = P.source
11 |
12 | // \p{\Z\P\Cc\CF} (white spaces + control + format + punctuation)
13 | re.src_ZPCc = [re.src_Z, re.src_P, re.src_Cc].join('|')
14 |
15 | // \p{\Z\Cc} (white spaces + control)
16 | re.src_ZCc = [re.src_Z, re.src_Cc].join('|')
17 |
18 | // Experimental. List of chars, completely prohibited in links
19 | // because can separate it from other part of text
20 | const text_separators = '[><\uff5c]'
21 |
22 | // All possible word characters (everything without punctuation, spaces & controls)
23 | // Defined via punctuation & spaces to save space
24 | // Should be something like \p{\L\N\S\M} (\w but without `_`)
25 | re.src_pseudo_letter = '(?:(?!' + text_separators + '|' + re.src_ZPCc + ')' + re.src_Any + ')'
26 | // The same as abothe but without [0-9]
27 | // var src_pseudo_letter_non_d = '(?:(?![0-9]|' + src_ZPCc + ')' + src_Any + ')';
28 |
29 | re.src_ip4 =
30 |
31 | '(?:(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'
32 |
33 | // Prohibit any of "@/[]()" in user/pass to avoid wrong domain fetch.
34 | re.src_auth = '(?:(?:(?!' + re.src_ZCc + '|[@/\\[\\]()]).)+@)?'
35 |
36 | re.src_port =
37 |
38 | '(?::(?:6(?:[0-4]\\d{3}|5(?:[0-4]\\d{2}|5(?:[0-2]\\d|3[0-5])))|[1-5]?\\d{1,4}))?'
39 |
40 | re.src_host_terminator =
41 |
42 | '(?=$|' + text_separators + '|' + re.src_ZPCc + ')' +
43 | '(?!' + (opts['---'] ? '-(?!--)|' : '-|') + '_|:\\d|\\.-|\\.(?!$|' + re.src_ZPCc + '))'
44 |
45 | re.src_path =
46 |
47 | '(?:' +
48 | '[/?#]' +
49 | '(?:' +
50 | '(?!' + re.src_ZCc + '|' + text_separators + '|[()[\\]{}.,"\'?!\\-;]).|' +
51 | '\\[(?:(?!' + re.src_ZCc + '|\\]).)*\\]|' +
52 | '\\((?:(?!' + re.src_ZCc + '|[)]).)*\\)|' +
53 | '\\{(?:(?!' + re.src_ZCc + '|[}]).)*\\}|' +
54 | '\\"(?:(?!' + re.src_ZCc + '|["]).)+\\"|' +
55 | "\\'(?:(?!" + re.src_ZCc + "|[']).)+\\'|" +
56 |
57 | // allow `I'm_king` if no pair found
58 | "\\'(?=" + re.src_pseudo_letter + '|[-])|' +
59 |
60 | // google has many dots in "google search" links (#66, #81).
61 | // github has ... in commit range links,
62 | // Restrict to
63 | // - english
64 | // - percent-encoded
65 | // - parts of file path
66 | // - params separator
67 | // until more examples found.
68 | '\\.{2,}[a-zA-Z0-9%/&]|' +
69 |
70 | '\\.(?!' + re.src_ZCc + '|[.]|$)|' +
71 | (opts['---']
72 | ? '\\-(?!--(?:[^-]|$))(?:-*)|' // `---` => long dash, terminate
73 | : '\\-+|'
74 | ) +
75 | // allow `,,,` in paths
76 | ',(?!' + re.src_ZCc + '|$)|' +
77 |
78 | // allow `;` if not followed by space-like char
79 | ';(?!' + re.src_ZCc + '|$)|' +
80 |
81 | // allow `!!!` in paths, but not at the end
82 | '\\!+(?!' + re.src_ZCc + '|[!]|$)|' +
83 |
84 | '\\?(?!' + re.src_ZCc + '|[?]|$)' +
85 | ')+' +
86 | '|\\/' +
87 | ')?'
88 |
89 | // Allow anything in markdown spec, forbid quote (") at the first position
90 | // because emails enclosed in quotes are far more common
91 | re.src_email_name =
92 |
93 | '[\\-;:&=\\+\\$,\\.a-zA-Z0-9_][\\-;:&=\\+\\$,\\"\\.a-zA-Z0-9_]*'
94 |
95 | re.src_xn =
96 |
97 | 'xn--[a-z0-9\\-]{1,59}'
98 |
99 | // More to read about domain names
100 | // http://serverfault.com/questions/638260/
101 |
102 | re.src_domain_root =
103 |
104 | // Allow letters & digits (http://test1)
105 | '(?:' +
106 | re.src_xn +
107 | '|' +
108 | re.src_pseudo_letter + '{1,63}' +
109 | ')'
110 |
111 | re.src_domain =
112 |
113 | '(?:' +
114 | re.src_xn +
115 | '|' +
116 | '(?:' + re.src_pseudo_letter + ')' +
117 | '|' +
118 | '(?:' + re.src_pseudo_letter + '(?:-|' + re.src_pseudo_letter + '){0,61}' + re.src_pseudo_letter + ')' +
119 | ')'
120 |
121 | re.src_host =
122 |
123 | '(?:' +
124 | // Don't need IP check, because digits are already allowed in normal domain names
125 | // src_ip4 +
126 | // '|' +
127 | '(?:(?:(?:' + re.src_domain + ')\\.)*' + re.src_domain/* _root */ + ')' +
128 | ')'
129 |
130 | re.tpl_host_fuzzy =
131 |
132 | '(?:' +
133 | re.src_ip4 +
134 | '|' +
135 | '(?:(?:(?:' + re.src_domain + ')\\.)+(?:%TLDS%))' +
136 | ')'
137 |
138 | re.tpl_host_no_ip_fuzzy =
139 |
140 | '(?:(?:(?:' + re.src_domain + ')\\.)+(?:%TLDS%))'
141 |
142 | re.src_host_strict =
143 |
144 | re.src_host + re.src_host_terminator
145 |
146 | re.tpl_host_fuzzy_strict =
147 |
148 | re.tpl_host_fuzzy + re.src_host_terminator
149 |
150 | re.src_host_port_strict =
151 |
152 | re.src_host + re.src_port + re.src_host_terminator
153 |
154 | re.tpl_host_port_fuzzy_strict =
155 |
156 | re.tpl_host_fuzzy + re.src_port + re.src_host_terminator
157 |
158 | re.tpl_host_port_no_ip_fuzzy_strict =
159 |
160 | re.tpl_host_no_ip_fuzzy + re.src_port + re.src_host_terminator
161 |
162 | //
163 | // Main rules
164 | //
165 |
166 | // Rude test fuzzy links by host, for quick deny
167 | re.tpl_host_fuzzy_test =
168 |
169 | 'localhost|www\\.|\\.\\d{1,3}\\.|(?:\\.(?:%TLDS%)(?:' + re.src_ZPCc + '|>|$))'
170 |
171 | re.tpl_email_fuzzy =
172 |
173 | '(^|' + text_separators + '|"|\\(|' + re.src_ZCc + ')' +
174 | '(' + re.src_email_name + '@' + re.tpl_host_fuzzy_strict + ')'
175 |
176 | re.tpl_link_fuzzy =
177 | // Fuzzy link can't be prepended with .:/\- and non punctuation.
178 | // but can start with > (markdown blockquote)
179 | '(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|' + re.src_ZPCc + '))' +
180 | '((?![$+<=>^`|\uff5c])' + re.tpl_host_port_fuzzy_strict + re.src_path + ')'
181 |
182 | re.tpl_link_no_ip_fuzzy =
183 | // Fuzzy link can't be prepended with .:/\- and non punctuation.
184 | // but can start with > (markdown blockquote)
185 | '(^|(?![.:/\\-_@])(?:[$+<=>^`|\uff5c]|' + re.src_ZPCc + '))' +
186 | '((?![$+<=>^`|\uff5c])' + re.tpl_host_port_no_ip_fuzzy_strict + re.src_path + ')'
187 |
188 | return re
189 | }
190 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | linkify-it
2 | ==========
3 |
4 | [](https://github.com/markdown-it/linkify-it/actions/workflows/ci.yml)
5 | [](https://www.npmjs.org/package/linkify-it)
6 | [](https://coveralls.io/r/markdown-it/linkify-it?branch=master)
7 | [](https://gitter.im/markdown-it/linkify-it)
8 |
9 | > Links recognition library with FULL unicode support.
10 | > Focused on high quality link patterns detection in plain text.
11 |
12 | __[Demo](http://markdown-it.github.io/linkify-it/)__
13 |
14 | Why it's awesome:
15 |
16 | - Full unicode support, _with astral characters_!
17 | - International domains support.
18 | - Allows rules extension & custom normalizers.
19 |
20 |
21 | Install
22 | -------
23 |
24 | ```bash
25 | npm install linkify-it --save
26 | ```
27 |
28 | Browserification is also supported.
29 |
30 |
31 | Usage examples
32 | --------------
33 |
34 | ##### Example 1
35 |
36 | ```js
37 | import linkifyit from 'linkify-it';
38 | const linkify = linkifyit();
39 |
40 | // Reload full tlds list & add unofficial `.onion` domain.
41 | linkify
42 | .tlds(require('tlds')) // Reload with full tlds list
43 | .tlds('onion', true) // Add unofficial `.onion` domain
44 | .add('git:', 'http:') // Add `git:` protocol as "alias"
45 | .add('ftp:', null) // Disable `ftp:` protocol
46 | .set({ fuzzyIP: true }); // Enable IPs in fuzzy links (without schema)
47 |
48 | console.log(linkify.test('Site github.com!')); // true
49 |
50 | console.log(linkify.match('Site github.com!')); // [ {
51 | // schema: "",
52 | // index: 5,
53 | // lastIndex: 15,
54 | // raw: "github.com",
55 | // text: "github.com",
56 | // url: "http://github.com",
57 | // } ]
58 | ```
59 |
60 | ##### Example 2. Add twitter mentions handler
61 |
62 | ```js
63 | linkify.add('@', {
64 | validate: function (text, pos, self) {
65 | const tail = text.slice(pos);
66 |
67 | if (!self.re.twitter) {
68 | self.re.twitter = new RegExp(
69 | '^([a-zA-Z0-9_]){1,15}(?!_)(?=$|' + self.re.src_ZPCc + ')'
70 | );
71 | }
72 | if (self.re.twitter.test(tail)) {
73 | // Linkifier allows punctuation chars before prefix,
74 | // but we additionally disable `@` ("@@mention" is invalid)
75 | if (pos >= 2 && tail[pos - 2] === '@') {
76 | return false;
77 | }
78 | return tail.match(self.re.twitter)[0].length;
79 | }
80 | return 0;
81 | },
82 | normalize: function (match) {
83 | match.url = 'https://twitter.com/' + match.url.replace(/^@/, '');
84 | }
85 | });
86 | ```
87 |
88 |
89 | API
90 | ---
91 |
92 | __[API documentation](http://markdown-it.github.io/linkify-it/doc)__
93 |
94 | ### new LinkifyIt(schemas, options)
95 |
96 | Creates new linkifier instance with optional additional schemas.
97 | Can be called without `new` keyword for convenience.
98 |
99 | By default understands:
100 |
101 | - `http(s)://...` , `ftp://...`, `mailto:...` & `//...` links
102 | - "fuzzy" links and emails (google.com, foo@bar.com).
103 |
104 | `schemas` is an object, where each key/value describes protocol/rule:
105 |
106 | - __key__ - link prefix (usually, protocol name with `:` at the end, `skype:`
107 | for example). `linkify-it` makes sure that prefix is not preceded with
108 | alphanumeric char.
109 | - __value__ - rule to check tail after link prefix
110 | - _String_ - just alias to existing rule
111 | - _Object_
112 | - _validate_ - either a `RegExp` (start with `^`, and don't include the
113 | link prefix itself), or a validator function which, given arguments
114 | _text_, _pos_, and _self_, returns the length of a match in _text_
115 | starting at index _pos_. _pos_ is the index right after the link prefix.
116 | _self_ can be used to access the linkify object to cache data.
117 | - _normalize_ - optional function to normalize text & url of matched result
118 | (for example, for twitter mentions).
119 |
120 | `options`:
121 |
122 | - __fuzzyLink__ - recognize URL-s without `http(s)://` head. Default `true`.
123 | - __fuzzyIP__ - allow IPs in fuzzy links above. Can conflict with some texts
124 | like version numbers. Default `false`.
125 | - __fuzzyEmail__ - recognize emails without `mailto:` prefix. Default `true`.
126 | - __---__ - set `true` to terminate link with `---` (if it's considered as long dash).
127 |
128 |
129 | ### .test(text)
130 |
131 | Searches linkifiable pattern and returns `true` on success or `false` on fail.
132 |
133 |
134 | ### .pretest(text)
135 |
136 | Quick check if link MAY BE can exist. Can be used to optimize more expensive
137 | `.test()` calls. Return `false` if link can not be found, `true` - if `.test()`
138 | call needed to know exactly.
139 |
140 |
141 | ### .testSchemaAt(text, name, offset)
142 |
143 | Similar to `.test()` but checks only specific protocol tail exactly at given
144 | position. Returns length of found pattern (0 on fail).
145 |
146 |
147 | ### .match(text)
148 |
149 | Returns `Array` of found link matches or null if nothing found.
150 |
151 | Each match has:
152 |
153 | - __schema__ - link schema, can be empty for fuzzy links, or `//` for
154 | protocol-neutral links.
155 | - __index__ - offset of matched text
156 | - __lastIndex__ - index of next char after mathch end
157 | - __raw__ - matched text
158 | - __text__ - normalized text
159 | - __url__ - link, generated from matched text
160 |
161 |
162 | ### .matchAtStart(text)
163 |
164 | Checks if a match exists at the start of the string. Returns `Match`
165 | (see docs for `match(text)`) or null if no URL is at the start.
166 | Doesn't work with fuzzy links.
167 |
168 |
169 | ### .tlds(list[, keepOld])
170 |
171 | Load (or merge) new tlds list. Those are needed for fuzzy links (without schema)
172 | to avoid false positives. By default:
173 |
174 | - 2-letter root zones are ok.
175 | - biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф are ok.
176 | - encoded (`xn--...`) root zones are ok.
177 |
178 | If that's not enough, you can reload defaults with more detailed zones list.
179 |
180 | ### .add(key, value)
181 |
182 | Add a new schema to the schemas object. As described in the constructor
183 | definition, `key` is a link prefix (`skype:`, for example), and `value`
184 | is a String to alias to another schema, or an Object with `validate` and
185 | optionally `normalize` definitions. To disable an existing rule, use
186 | `.add(key, null)`.
187 |
188 |
189 | ### .set(options)
190 |
191 | Override default options. Missed properties will not be changed.
192 |
193 |
194 | ## License
195 |
196 | [MIT](https://github.com/markdown-it/linkify-it/blob/master/LICENSE)
197 |
--------------------------------------------------------------------------------
/test/fixtures/links.txt:
--------------------------------------------------------------------------------
1 | %
2 | % Regular links
3 | %
4 | My http://example.com site
5 | http://example.com
6 |
7 | My http://example.com/ site
8 | http://example.com/
9 |
10 | http://example.com/foo_bar/
11 |
12 | http://user:pass@example.com:8080
13 |
14 | http://user@example.com
15 |
16 | http://user@example.com:8080
17 |
18 | http://user:pass@example.com
19 |
20 | [https](https://www.ibm.com)[mailto](mailto:someone@ibm.com) % should not catch as auth (before @ in big link)
21 | https://www.ibm.com
22 |
23 | http://example.com:8080
24 |
25 | http://example.com/?foo=bar
26 |
27 | http://example.com?foo=bar
28 |
29 | http://example.com/#foo=bar
30 |
31 | http://example.com#foo=bar
32 |
33 | http://a.in
34 |
35 | HTTP://GOOGLE.COM
36 |
37 | http://example.invalid % don't restrict root domain when schema exists
38 | http://example.invalid
39 |
40 | http://inrgess2 % Allow local domains to end with digit
41 | http://inrgess2
42 |
43 | http://999 % ..and start with digit, and have digits only
44 | http://999
45 |
46 | http://host-name % local domain with dash
47 | http://host-name
48 |
49 | >>example.com % markdown blockquote
50 | example.com
51 |
52 | >>http://example.com % markdown blockquote
53 | http://example.com
54 |
55 | http://lyricstranslate.com/en/someone-you-നിന്നെ-പോലൊരാള്.html % With control character
56 | http://lyricstranslate.com/en/someone-you-നിന്നെ-പോലൊരാള്.html
57 |
58 | %
59 | % localhost (only with protocol allowed)
60 | %
61 | //localhost
62 |
63 | //test.123
64 |
65 | http://localhost:8000?
66 | http://localhost:8000
67 |
68 |
69 | %
70 | % Other protocols
71 | %
72 | My ssl https://example.com site
73 | https://example.com
74 |
75 | My ftp://example.com site
76 | ftp://example.com
77 |
78 |
79 | %
80 | % Neutral proto
81 | %
82 | My ssl //example.com site
83 | //example.com
84 |
85 | %
86 | % IPs
87 | %
88 | 4.4.4.4
89 |
90 | 192.168.1.1/abc
91 |
92 |
93 | %
94 | % Fuzzy
95 | %
96 | test.example@http://vk.com
97 | http://vk.com
98 |
99 | text:http://example.com/
100 | http://example.com/
101 |
102 | google.com
103 |
104 | google.com: // no port
105 | google.com
106 |
107 | s.l.o.w.io
108 |
109 | a-b.com
110 |
111 | GOOGLE.COM.
112 | GOOGLE.COM
113 |
114 | google.xxx // known tld
115 | google.xxx
116 |
117 |
118 | %
119 | % Correct termination for . , ! ? [] {} () "" ''
120 | %
121 | (Scoped http://example.com/foo_bar)
122 | http://example.com/foo_bar
123 |
124 | http://example.com/foo_bar_(wiki)
125 |
126 | http://foo.com/blah_blah_[other]
127 |
128 | http://foo.com/blah_blah_{I'm_king}
129 |
130 | http://foo.com/blah_blah_I'm_king
131 |
132 | http://www.kmart.com/bestway-10'-x-30inch-steel-pro-frame-pool/p-004W007538417001P
133 |
134 | http://foo.com/blah_blah_"doublequoted"
135 |
136 | http://foo.com/blah_blah_'singlequoted'
137 |
138 | (Scoped like http://example.com/foo_bar)
139 | http://example.com/foo_bar
140 |
141 | [Scoped like http://example.com/foo_bar]
142 | http://example.com/foo_bar
143 |
144 | {Scoped like http://example.com/foo_bar}
145 | http://example.com/foo_bar
146 |
147 | "Quoted like http://example.com/foo_bar"
148 | http://example.com/foo_bar
149 |
150 | 'Quoted like http://example.com/foo_bar'
151 | http://example.com/foo_bar
152 |
153 | [example.com/foo_bar.jpg)]
154 | example.com/foo_bar.jpg
155 |
156 | http://example.com/foo_bar.jpg.
157 | http://example.com/foo_bar.jpg
158 |
159 | http://example.com/foo_bar/.
160 | http://example.com/foo_bar/
161 |
162 | http://example.com/foo_bar,
163 | http://example.com/foo_bar
164 |
165 | http://index-of.es/Android/Professional.Android.2.Application.Development.(Wrox,.2010,.0470565527).pdf
166 |
167 | https://github.com/markdown-it/linkify-it/compare/360b13a733f521a8d4903d3a5e1e46c357e9d3ce...f580766349525150a80a32987bb47c2d592efc33
168 |
169 | https://www.google.com/search?sxsrf=ACYBGNTJFmX-GjNJ8fM-2LCkqyNyxGU1Ng%3A1575534146332&ei=Qr7oXf7rE4rRrgSEgrmoAw&q=clover&oq=clover&gs_l=psy-ab.3..0i67j0l9.2986.3947..4187...0.2..0.281.1366.1j0j5......0....1..gws-wiz.......0i71j35i39j0i131.qWp1nz4IJVA&ved=0ahUKEwj-lP6Iip7mAhWKqIsKHQRBDjUQ4dUDCAs&uact=5
170 |
171 | https://ourworldindata.org/grapher/covid-deaths-days-since-per-million?zoomToSelection=true&time=9..&country=FRA+DEU+ITA+ESP+GBR+USA+CAN
172 |
173 | http://example.com/foo_bar...
174 | http://example.com/foo_bar
175 |
176 | http://172.26.142.48/viewerjs/#../0529/slides.pdf
177 |
178 | http://example.com/foo_bar..
179 | http://example.com/foo_bar
180 |
181 | http://example.com/foo_bar?p=10.
182 | http://example.com/foo_bar?p=10
183 |
184 | https://www.google.ru/maps/@59.9393895,30.3165389,15z?hl=ru
185 |
186 | https://www.google.com/maps/place/New+York,+NY,+USA/@40.702271,-73.9968471,11z/data=!4m2!3m1!1s0x89c24fa5d33f083b:0xc80b8f06e177fe62?hl=en
187 |
188 | https://www.google.com/analytics/web/?hl=ru&pli=1#report/visitors-overview/a26895874w20458057p96934174/
189 |
190 | http://business.timesonline.co.uk/article/0,,9065-2473189,00.html
191 |
192 | https://google.com/mail/u/0/#label/!!!Today/15c9b8193da01e65
193 |
194 | http://example.com/123!
195 | http://example.com/123
196 |
197 | http://example.com/123!!!
198 | http://example.com/123
199 |
200 | http://example.com/foo--bar
201 |
202 | See http://example.com/123; Example link.
203 | http://example.com/123
204 |
205 | http://example.com/123;123
206 |
207 | % some sites have links with trailing dashes
208 | http://www.bloomberg.com/news/articles/2015-06-26/from-deutsche-bank-to-siemens-what-s-troubling-germany-inc-
209 |
210 | http://example.com/foo-with-trailing-dash-dot-.
211 | http://example.com/foo-with-trailing-dash-dot-
212 |
213 |
214 | http://domain.com
215 |
216 | .
217 | http://domain.com
218 |
219 |
220 | http://domain.com/foo
221 |
222 | .
223 | http://domain.com/foo
224 |
225 |
226 | domain.com
227 |
228 | .
229 | domain.com
230 |
231 |
232 | domain.com/foo
233 |
234 |
235 | user@domain.com
236 |
237 | .
238 | user@domain.com
239 |
240 |
241 | mailto:user@domain.com
242 |
243 |
244 | %
245 | % Emails
246 | %
247 |
248 | test."foo".bar@gmail.co.uk!
249 | test."foo".bar@gmail.co.uk
250 |
251 | "test@example.com"
252 | test@example.com
253 |
254 | name@example.com
255 |
256 | >>name@example.com % markdown blockquote
257 | name@example.com
258 |
259 | mailto:name@example.com
260 |
261 | MAILTO:NAME@EXAMPLE.COM
262 |
263 | mailto:foo_bar@example.com
264 |
265 | foo+bar@gmail.com
266 |
267 | 192.168.1.1@gmail.com
268 |
269 | mailto:foo@bar % explicit protocol make it valid
270 | mailto:foo@bar
271 |
272 | (foobar email@example.com)
273 | email@example.com
274 |
275 | (email@example.com foobar)
276 | email@example.com
277 |
278 | (email@example.com)
279 | email@example.com
280 |
281 |
282 | %
283 | % International
284 | %
285 | http://✪df.ws/123
286 |
287 | http://xn--df-oiy.ws/123
288 |
289 | a.ws
290 |
291 | ➡.ws/䨹
292 |
293 | example.com/䨹
294 |
295 | президент.рф
296 |
297 |
298 | % Links below provided by diaspora* guys, to make sure regressions will not happen.
299 | % Those left here for historic reasons.
300 |
301 | http://www.bürgerentscheid-krankenhäuser.de
302 |
303 | http://www.xn--brgerentscheid-krankenhuser-xkc78d.de
304 |
305 | http://bündnis-für-krankenhäuser.de/wp-content/uploads/2011/11/cropped-logohp.jpg
306 |
307 | http://xn--bndnis-fr-krankenhuser-i5b27cha.de/wp-content/uploads/2011/11/cropped-logohp.jpg
308 |
309 | http://ﻡﻮﻘﻋ.ﻭﺯﺍﺭﺓ-ﺍﻼﺘﺻﺍﻼﺗ.ﻢﺻﺭ/
310 |
311 | http://xn--4gbrim.xn----ymcbaaajlc6dj7bxne2c.xn--wgbh1c/
312 |
313 | %
314 | % Others...
315 | %
316 | |www.google.com/www.google.com/foo|bar % #46, asian vertical pipes
317 | www.google.com/www.google.com/foo
318 |
319 | |test@google.com|bar
320 | test@google.com
321 |
322 | |http://google.com|bar
323 | http://google.com
324 |
325 | %
326 | % Domains with multiple dashes
327 | %
328 |
329 | https://5b0ee223b312746c1659db3f--thelounge-chat.netlify.com/docs/
330 |
331 | www.a--b.com
332 |
333 | www.c--u.com
334 |
335 | http://a---b.com/
336 |
--------------------------------------------------------------------------------
/test/test.mjs:
--------------------------------------------------------------------------------
1 | /* eslint-env mocha */
2 |
3 | import { readFileSync } from 'fs'
4 | import assert from 'node:assert'
5 | import linkify from '../index.mjs'
6 | import { createRequire } from 'node:module'
7 |
8 | const tlds = createRequire(import.meta.url)('tlds')
9 |
10 | let lines
11 |
12 | describe('links', function () {
13 | const l = linkify({ fuzzyIP: true })
14 |
15 | l.normalize = function () {} // kill normalizer
16 |
17 | lines = readFileSync(new URL('fixtures/links.txt', import.meta.url), 'utf8').split(/\r?\n/g)
18 |
19 | let skipNext = false
20 |
21 | lines.forEach(function (line, idx) {
22 | if (skipNext) {
23 | skipNext = false
24 | return
25 | }
26 |
27 | line = line.replace(/^%.*/, '')
28 |
29 | const next = (lines[idx + 1] || '').replace(/^%.*/, '')
30 |
31 | if (!line.trim()) { return }
32 |
33 | if (next.trim()) {
34 | it('line ' + (idx + 1), function () {
35 | assert.ok(l.pretest(line), '(pretest failed in `' + line + '`)')
36 | assert.ok(l.test('\n' + line + '\n'), '(link not found in `\\n' + line + '\\n`)')
37 | assert.ok(l.test(line), '(link not found in `' + line + '`)')
38 | assert.strictEqual(l.match(line)[0].url, next)
39 | })
40 | skipNext = true
41 | } else {
42 | it('line ' + (idx + 1), function () {
43 | assert.ok(l.pretest(line), '(pretest failed in `' + line + '`)')
44 | assert.ok(l.test('\n' + line + '\n'), '(link not found in `\\n' + line + '\\n`)')
45 | assert.ok(l.test(line), '(link not found in `' + line + '`)')
46 | assert.strictEqual(l.match(line)[0].url, line)
47 | })
48 | }
49 | })
50 | })
51 |
52 | describe('not links', function () {
53 | const l = linkify()
54 |
55 | l.normalize = function () {} // kill normalizer
56 |
57 | lines = readFileSync(new URL('fixtures/not_links.txt', import.meta.url), 'utf8').split(/\r?\n/g)
58 |
59 | lines.forEach(function (line, idx) {
60 | line = line.replace(/^%.*/, '')
61 |
62 | if (!line.trim()) { return }
63 |
64 | it('line ' + (idx + 1), function () {
65 | assert.ok(!l.test(line),
66 | '(should not find link in `' + line + '`, but found `' +
67 | JSON.stringify((l.match(line) || [])[0]) + '`)')
68 | })
69 | })
70 | })
71 |
72 | describe('API', function () {
73 | it('extend tlds', function () {
74 | const l = linkify()
75 |
76 | assert.ok(!l.test('google.myroot'))
77 |
78 | l.tlds('myroot', true)
79 |
80 | assert.ok(l.test('google.myroot'))
81 | assert.ok(!l.test('google.xyz'))
82 |
83 | l.tlds(tlds)
84 |
85 | assert.ok(l.test('google.xyz'))
86 | assert.ok(!l.test('google.myroot'))
87 | })
88 |
89 | it('add rule as regexp, with default normalizer', function () {
90 | const l = linkify().add('my:', {
91 | validate: /^\/\/[a-z]+/
92 | })
93 |
94 | const match = l.match('google.com. my:// my://asdf!')
95 |
96 | assert.strictEqual(match[0].text, 'google.com')
97 | assert.strictEqual(match[1].text, 'my://asdf')
98 | })
99 |
100 | it('add rule with normalizer', function () {
101 | const l = linkify().add('my:', {
102 | validate: /^\/\/[a-z]+/,
103 | normalize: function (m) {
104 | m.text = m.text.replace(/^my:\/\//, '').toUpperCase()
105 | m.url = m.url.toUpperCase()
106 | }
107 | })
108 |
109 | const match = l.match('google.com. my:// my://asdf!')
110 |
111 | assert.strictEqual(match[1].text, 'ASDF')
112 | assert.strictEqual(match[1].url, 'MY://ASDF')
113 | })
114 |
115 | it('disable rule', function () {
116 | const l = linkify()
117 |
118 | assert.ok(l.test('http://google.com'))
119 | assert.ok(l.test('foo@bar.com'))
120 | l.add('http:', null)
121 | l.add('mailto:', null)
122 | assert.ok(!l.test('http://google.com'))
123 | assert.ok(!l.test('foo@bar.com'))
124 | })
125 |
126 | it('add bad definition', function () {
127 | let l
128 |
129 | l = linkify()
130 |
131 | assert.throws(function () {
132 | l.add('test:', [])
133 | })
134 |
135 | l = linkify()
136 |
137 | assert.throws(function () {
138 | l.add('test:', { validate: [] })
139 | })
140 |
141 | l = linkify()
142 |
143 | assert.throws(function () {
144 | l.add('test:', {
145 | validate: function () { return false },
146 | normalize: 'bad'
147 | })
148 | })
149 | })
150 |
151 | it('test at position', function () {
152 | const l = linkify()
153 |
154 | assert.ok(l.testSchemaAt('http://google.com', 'http:', 5))
155 | assert.ok(l.testSchemaAt('http://google.com', 'HTTP:', 5))
156 | assert.ok(!l.testSchemaAt('http://google.com', 'http:', 6))
157 |
158 | assert.ok(!l.testSchemaAt('http://google.com', 'bad_schema:', 6))
159 | })
160 |
161 | it('correct cache value', function () {
162 | const l = linkify()
163 |
164 | const match = l.match('.com. http://google.com google.com ftp://google.com')
165 |
166 | assert.strictEqual(match[0].text, 'http://google.com')
167 | assert.strictEqual(match[1].text, 'google.com')
168 | assert.strictEqual(match[2].text, 'ftp://google.com')
169 | })
170 |
171 | it('normalize', function () {
172 | const l = linkify()
173 |
174 | let m = l.match('mailto:foo@bar.com')[0]
175 |
176 | // assert.strictEqual(m.text, 'foo@bar.com');
177 | assert.strictEqual(m.url, 'mailto:foo@bar.com')
178 |
179 | m = l.match('foo@bar.com')[0]
180 |
181 | // assert.strictEqual(m.text, 'foo@bar.com');
182 | assert.strictEqual(m.url, 'mailto:foo@bar.com')
183 | })
184 |
185 | it('test @twitter rule', function () {
186 | const l = linkify().add('@', {
187 | validate: function (text, pos, self) {
188 | const tail = text.slice(pos)
189 |
190 | if (!self.re.twitter) {
191 | self.re.twitter = new RegExp(
192 | '^([a-zA-Z0-9_]){1,15}(?!_)(?=$|' + self.re.src_ZPCc + ')'
193 | )
194 | }
195 | if (self.re.twitter.test(tail)) {
196 | if (pos >= 2 && tail[pos - 2] === '@') {
197 | return false
198 | }
199 | return tail.match(self.re.twitter)[0].length
200 | }
201 | return 0
202 | },
203 | normalize: function (m) {
204 | m.url = 'https://twitter.com/' + m.url.replace(/^@/, '')
205 | }
206 | })
207 |
208 | assert.strictEqual(l.match('hello, @gamajoba_!')[0].text, '@gamajoba_')
209 | assert.strictEqual(l.match(':@givi')[0].text, '@givi')
210 | assert.strictEqual(l.match(':@givi')[0].url, 'https://twitter.com/givi')
211 | assert.ok(!l.test('@@invalid'))
212 | })
213 |
214 | it('set option: fuzzyLink', function () {
215 | const l = linkify({ fuzzyLink: false })
216 |
217 | assert.strictEqual(l.test('google.com.'), false)
218 |
219 | l.set({ fuzzyLink: true })
220 |
221 | assert.strictEqual(l.test('google.com.'), true)
222 | assert.strictEqual(l.match('google.com.')[0].text, 'google.com')
223 | })
224 |
225 | it('set option: fuzzyEmail', function () {
226 | const l = linkify({ fuzzyEmail: false })
227 |
228 | assert.strictEqual(l.test('foo@bar.com.'), false)
229 |
230 | l.set({ fuzzyEmail: true })
231 |
232 | assert.strictEqual(l.test('foo@bar.com.'), true)
233 | assert.strictEqual(l.match('foo@bar.com.')[0].text, 'foo@bar.com')
234 | })
235 |
236 | it('set option: fuzzyIP', function () {
237 | const l = linkify()
238 |
239 | assert.strictEqual(l.test('1.1.1.1.'), false)
240 |
241 | l.set({ fuzzyIP: true })
242 |
243 | assert.strictEqual(l.test('1.1.1.1.'), true)
244 | assert.strictEqual(l.match('1.1.1.1.')[0].text, '1.1.1.1')
245 | })
246 |
247 | it('should not hang in fuzzy mode with sequences of astrals', function () {
248 | const l = linkify()
249 |
250 | l.set({ fuzzyLink: true })
251 |
252 | l.match('😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡😡 .com')
253 | })
254 |
255 | it('should accept `---` if enabled', function () {
256 | let l = linkify()
257 |
258 | assert.strictEqual(l.match('http://e.com/foo---bar')[0].text, 'http://e.com/foo---bar')
259 | assert.strictEqual(l.match('text@example.com---foo'), null)
260 |
261 | l = linkify(null, { '---': true })
262 |
263 | assert.strictEqual(l.match('http://e.com/foo---bar')[0].text, 'http://e.com/foo')
264 | assert.strictEqual(l.match('text@example.com---foo')[0].text, 'text@example.com')
265 | })
266 |
267 | it('should find a match at the start', function () {
268 | const l = linkify()
269 |
270 | l.set({ fuzzyLink: true })
271 |
272 | assert.strictEqual(l.matchAtStart('http://google.com 123').text, 'http://google.com')
273 | assert.ok(!l.matchAtStart('google.com 123'))
274 | assert.ok(!l.matchAtStart(' http://google.com 123'))
275 | })
276 |
277 | it('matchAtStart should not interfere with normal match', function () {
278 | const l = linkify()
279 | let str
280 |
281 | str = 'http://google.com http://google.com'
282 | assert.ok(l.matchAtStart(str))
283 | assert.strictEqual(l.match(str).length, 2)
284 |
285 | str = 'aaa http://google.com http://google.com'
286 | assert.ok(!l.matchAtStart(str))
287 | assert.strictEqual(l.match(str).length, 2)
288 | })
289 |
290 | it('should not match incomplete links', function () {
291 | // regression test for https://github.com/markdown-it/markdown-it/issues/868
292 | const l = linkify()
293 |
294 | assert.ok(!l.matchAtStart('http://'))
295 | assert.ok(!l.matchAtStart('https://'))
296 | })
297 | })
298 |
--------------------------------------------------------------------------------
/index.mjs:
--------------------------------------------------------------------------------
1 | import reFactory from './lib/re.mjs'
2 |
3 | //
4 | // Helpers
5 | //
6 |
7 | // Merge objects
8 | //
9 | function assign (obj /* from1, from2, from3, ... */) {
10 | const sources = Array.prototype.slice.call(arguments, 1)
11 |
12 | sources.forEach(function (source) {
13 | if (!source) { return }
14 |
15 | Object.keys(source).forEach(function (key) {
16 | obj[key] = source[key]
17 | })
18 | })
19 |
20 | return obj
21 | }
22 |
23 | function _class (obj) { return Object.prototype.toString.call(obj) }
24 | function isString (obj) { return _class(obj) === '[object String]' }
25 | function isObject (obj) { return _class(obj) === '[object Object]' }
26 | function isRegExp (obj) { return _class(obj) === '[object RegExp]' }
27 | function isFunction (obj) { return _class(obj) === '[object Function]' }
28 |
29 | function escapeRE (str) { return str.replace(/[.?*+^$[\]\\(){}|-]/g, '\\$&') }
30 |
31 | //
32 |
33 | const defaultOptions = {
34 | fuzzyLink: true,
35 | fuzzyEmail: true,
36 | fuzzyIP: false
37 | }
38 |
39 | function isOptionsObj (obj) {
40 | return Object.keys(obj || {}).reduce(function (acc, k) {
41 | /* eslint-disable-next-line no-prototype-builtins */
42 | return acc || defaultOptions.hasOwnProperty(k)
43 | }, false)
44 | }
45 |
46 | const defaultSchemas = {
47 | 'http:': {
48 | validate: function (text, pos, self) {
49 | const tail = text.slice(pos)
50 |
51 | if (!self.re.http) {
52 | // compile lazily, because "host"-containing variables can change on tlds update.
53 | self.re.http = new RegExp(
54 | '^\\/\\/' + self.re.src_auth + self.re.src_host_port_strict + self.re.src_path, 'i'
55 | )
56 | }
57 | if (self.re.http.test(tail)) {
58 | return tail.match(self.re.http)[0].length
59 | }
60 | return 0
61 | }
62 | },
63 | 'https:': 'http:',
64 | 'ftp:': 'http:',
65 | '//': {
66 | validate: function (text, pos, self) {
67 | const tail = text.slice(pos)
68 |
69 | if (!self.re.no_http) {
70 | // compile lazily, because "host"-containing variables can change on tlds update.
71 | self.re.no_http = new RegExp(
72 | '^' +
73 | self.re.src_auth +
74 | // Don't allow single-level domains, because of false positives like '//test'
75 | // with code comments
76 | '(?:localhost|(?:(?:' + self.re.src_domain + ')\\.)+' + self.re.src_domain_root + ')' +
77 | self.re.src_port +
78 | self.re.src_host_terminator +
79 | self.re.src_path,
80 |
81 | 'i'
82 | )
83 | }
84 |
85 | if (self.re.no_http.test(tail)) {
86 | // should not be `://` & `///`, that protects from errors in protocol name
87 | if (pos >= 3 && text[pos - 3] === ':') { return 0 }
88 | if (pos >= 3 && text[pos - 3] === '/') { return 0 }
89 | return tail.match(self.re.no_http)[0].length
90 | }
91 | return 0
92 | }
93 | },
94 | 'mailto:': {
95 | validate: function (text, pos, self) {
96 | const tail = text.slice(pos)
97 |
98 | if (!self.re.mailto) {
99 | self.re.mailto = new RegExp(
100 | '^' + self.re.src_email_name + '@' + self.re.src_host_strict, 'i'
101 | )
102 | }
103 | if (self.re.mailto.test(tail)) {
104 | return tail.match(self.re.mailto)[0].length
105 | }
106 | return 0
107 | }
108 | }
109 | }
110 |
111 | // RE pattern for 2-character tlds (autogenerated by ./support/tlds_2char_gen.js)
112 | /* eslint-disable-next-line max-len */
113 | const tlds_2ch_src_re = 'a[cdefgilmnoqrstuwxz]|b[abdefghijmnorstvwyz]|c[acdfghiklmnoruvwxyz]|d[ejkmoz]|e[cegrstu]|f[ijkmor]|g[abdefghilmnpqrstuwy]|h[kmnrtu]|i[delmnoqrst]|j[emop]|k[eghimnprwyz]|l[abcikrstuvy]|m[acdeghklmnopqrstuvwxyz]|n[acefgilopruz]|om|p[aefghklmnrstwy]|qa|r[eosuw]|s[abcdeghijklmnortuvxyz]|t[cdfghjklmnortvwz]|u[agksyz]|v[aceginu]|w[fs]|y[et]|z[amw]'
114 |
115 | // DON'T try to make PRs with changes. Extend TLDs with LinkifyIt.tlds() instead
116 | const tlds_default = 'biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф'.split('|')
117 |
118 | function resetScanCache (self) {
119 | self.__index__ = -1
120 | self.__text_cache__ = ''
121 | }
122 |
123 | function createValidator (re) {
124 | return function (text, pos) {
125 | const tail = text.slice(pos)
126 |
127 | if (re.test(tail)) {
128 | return tail.match(re)[0].length
129 | }
130 | return 0
131 | }
132 | }
133 |
134 | function createNormalizer () {
135 | return function (match, self) {
136 | self.normalize(match)
137 | }
138 | }
139 |
140 | // Schemas compiler. Build regexps.
141 | //
142 | function compile (self) {
143 | // Load & clone RE patterns.
144 | const re = self.re = reFactory(self.__opts__)
145 |
146 | // Define dynamic patterns
147 | const tlds = self.__tlds__.slice()
148 |
149 | self.onCompile()
150 |
151 | if (!self.__tlds_replaced__) {
152 | tlds.push(tlds_2ch_src_re)
153 | }
154 | tlds.push(re.src_xn)
155 |
156 | re.src_tlds = tlds.join('|')
157 |
158 | function untpl (tpl) { return tpl.replace('%TLDS%', re.src_tlds) }
159 |
160 | re.email_fuzzy = RegExp(untpl(re.tpl_email_fuzzy), 'i')
161 | re.link_fuzzy = RegExp(untpl(re.tpl_link_fuzzy), 'i')
162 | re.link_no_ip_fuzzy = RegExp(untpl(re.tpl_link_no_ip_fuzzy), 'i')
163 | re.host_fuzzy_test = RegExp(untpl(re.tpl_host_fuzzy_test), 'i')
164 |
165 | //
166 | // Compile each schema
167 | //
168 |
169 | const aliases = []
170 |
171 | self.__compiled__ = {} // Reset compiled data
172 |
173 | function schemaError (name, val) {
174 | throw new Error('(LinkifyIt) Invalid schema "' + name + '": ' + val)
175 | }
176 |
177 | Object.keys(self.__schemas__).forEach(function (name) {
178 | const val = self.__schemas__[name]
179 |
180 | // skip disabled methods
181 | if (val === null) { return }
182 |
183 | const compiled = { validate: null, link: null }
184 |
185 | self.__compiled__[name] = compiled
186 |
187 | if (isObject(val)) {
188 | if (isRegExp(val.validate)) {
189 | compiled.validate = createValidator(val.validate)
190 | } else if (isFunction(val.validate)) {
191 | compiled.validate = val.validate
192 | } else {
193 | schemaError(name, val)
194 | }
195 |
196 | if (isFunction(val.normalize)) {
197 | compiled.normalize = val.normalize
198 | } else if (!val.normalize) {
199 | compiled.normalize = createNormalizer()
200 | } else {
201 | schemaError(name, val)
202 | }
203 |
204 | return
205 | }
206 |
207 | if (isString(val)) {
208 | aliases.push(name)
209 | return
210 | }
211 |
212 | schemaError(name, val)
213 | })
214 |
215 | //
216 | // Compile postponed aliases
217 | //
218 |
219 | aliases.forEach(function (alias) {
220 | if (!self.__compiled__[self.__schemas__[alias]]) {
221 | // Silently fail on missed schemas to avoid errons on disable.
222 | // schemaError(alias, self.__schemas__[alias]);
223 | return
224 | }
225 |
226 | self.__compiled__[alias].validate =
227 | self.__compiled__[self.__schemas__[alias]].validate
228 | self.__compiled__[alias].normalize =
229 | self.__compiled__[self.__schemas__[alias]].normalize
230 | })
231 |
232 | //
233 | // Fake record for guessed links
234 | //
235 | self.__compiled__[''] = { validate: null, normalize: createNormalizer() }
236 |
237 | //
238 | // Build schema condition
239 | //
240 | const slist = Object.keys(self.__compiled__)
241 | .filter(function (name) {
242 | // Filter disabled & fake schemas
243 | return name.length > 0 && self.__compiled__[name]
244 | })
245 | .map(escapeRE)
246 | .join('|')
247 | // (?!_) cause 1.5x slowdown
248 | self.re.schema_test = RegExp('(^|(?!_)(?:[><\uff5c]|' + re.src_ZPCc + '))(' + slist + ')', 'i')
249 | self.re.schema_search = RegExp('(^|(?!_)(?:[><\uff5c]|' + re.src_ZPCc + '))(' + slist + ')', 'ig')
250 | self.re.schema_at_start = RegExp('^' + self.re.schema_search.source, 'i')
251 |
252 | self.re.pretest = RegExp(
253 | '(' + self.re.schema_test.source + ')|(' + self.re.host_fuzzy_test.source + ')|@',
254 | 'i'
255 | )
256 |
257 | //
258 | // Cleanup
259 | //
260 |
261 | resetScanCache(self)
262 | }
263 |
264 | /**
265 | * class Match
266 | *
267 | * Match result. Single element of array, returned by [[LinkifyIt#match]]
268 | **/
269 | function Match (self, shift) {
270 | const start = self.__index__
271 | const end = self.__last_index__
272 | const text = self.__text_cache__.slice(start, end)
273 |
274 | /**
275 | * Match#schema -> String
276 | *
277 | * Prefix (protocol) for matched string.
278 | **/
279 | this.schema = self.__schema__.toLowerCase()
280 | /**
281 | * Match#index -> Number
282 | *
283 | * First position of matched string.
284 | **/
285 | this.index = start + shift
286 | /**
287 | * Match#lastIndex -> Number
288 | *
289 | * Next position after matched string.
290 | **/
291 | this.lastIndex = end + shift
292 | /**
293 | * Match#raw -> String
294 | *
295 | * Matched string.
296 | **/
297 | this.raw = text
298 | /**
299 | * Match#text -> String
300 | *
301 | * Notmalized text of matched string.
302 | **/
303 | this.text = text
304 | /**
305 | * Match#url -> String
306 | *
307 | * Normalized url of matched string.
308 | **/
309 | this.url = text
310 | }
311 |
312 | function createMatch (self, shift) {
313 | const match = new Match(self, shift)
314 |
315 | self.__compiled__[match.schema].normalize(match, self)
316 |
317 | return match
318 | }
319 |
320 | /**
321 | * class LinkifyIt
322 | **/
323 |
324 | /**
325 | * new LinkifyIt(schemas, options)
326 | * - schemas (Object): Optional. Additional schemas to validate (prefix/validator)
327 | * - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false }
328 | *
329 | * Creates new linkifier instance with optional additional schemas.
330 | * Can be called without `new` keyword for convenience.
331 | *
332 | * By default understands:
333 | *
334 | * - `http(s)://...` , `ftp://...`, `mailto:...` & `//...` links
335 | * - "fuzzy" links and emails (example.com, foo@bar.com).
336 | *
337 | * `schemas` is an object, where each key/value describes protocol/rule:
338 | *
339 | * - __key__ - link prefix (usually, protocol name with `:` at the end, `skype:`
340 | * for example). `linkify-it` makes shure that prefix is not preceeded with
341 | * alphanumeric char and symbols. Only whitespaces and punctuation allowed.
342 | * - __value__ - rule to check tail after link prefix
343 | * - _String_ - just alias to existing rule
344 | * - _Object_
345 | * - _validate_ - validator function (should return matched length on success),
346 | * or `RegExp`.
347 | * - _normalize_ - optional function to normalize text & url of matched result
348 | * (for example, for @twitter mentions).
349 | *
350 | * `options`:
351 | *
352 | * - __fuzzyLink__ - recognige URL-s without `http(s):` prefix. Default `true`.
353 | * - __fuzzyIP__ - allow IPs in fuzzy links above. Can conflict with some texts
354 | * like version numbers. Default `false`.
355 | * - __fuzzyEmail__ - recognize emails without `mailto:` prefix.
356 | *
357 | **/
358 | function LinkifyIt (schemas, options) {
359 | if (!(this instanceof LinkifyIt)) {
360 | return new LinkifyIt(schemas, options)
361 | }
362 |
363 | if (!options) {
364 | if (isOptionsObj(schemas)) {
365 | options = schemas
366 | schemas = {}
367 | }
368 | }
369 |
370 | this.__opts__ = assign({}, defaultOptions, options)
371 |
372 | // Cache last tested result. Used to skip repeating steps on next `match` call.
373 | this.__index__ = -1
374 | this.__last_index__ = -1 // Next scan position
375 | this.__schema__ = ''
376 | this.__text_cache__ = ''
377 |
378 | this.__schemas__ = assign({}, defaultSchemas, schemas)
379 | this.__compiled__ = {}
380 |
381 | this.__tlds__ = tlds_default
382 | this.__tlds_replaced__ = false
383 |
384 | this.re = {}
385 |
386 | compile(this)
387 | }
388 |
389 | /** chainable
390 | * LinkifyIt#add(schema, definition)
391 | * - schema (String): rule name (fixed pattern prefix)
392 | * - definition (String|RegExp|Object): schema definition
393 | *
394 | * Add new rule definition. See constructor description for details.
395 | **/
396 | LinkifyIt.prototype.add = function add (schema, definition) {
397 | this.__schemas__[schema] = definition
398 | compile(this)
399 | return this
400 | }
401 |
402 | /** chainable
403 | * LinkifyIt#set(options)
404 | * - options (Object): { fuzzyLink|fuzzyEmail|fuzzyIP: true|false }
405 | *
406 | * Set recognition options for links without schema.
407 | **/
408 | LinkifyIt.prototype.set = function set (options) {
409 | this.__opts__ = assign(this.__opts__, options)
410 | return this
411 | }
412 |
413 | /**
414 | * LinkifyIt#test(text) -> Boolean
415 | *
416 | * Searches linkifiable pattern and returns `true` on success or `false` on fail.
417 | **/
418 | LinkifyIt.prototype.test = function test (text) {
419 | // Reset scan cache
420 | this.__text_cache__ = text
421 | this.__index__ = -1
422 |
423 | if (!text.length) { return false }
424 |
425 | let m, ml, me, len, shift, next, re, tld_pos, at_pos
426 |
427 | // try to scan for link with schema - that's the most simple rule
428 | if (this.re.schema_test.test(text)) {
429 | re = this.re.schema_search
430 | re.lastIndex = 0
431 | while ((m = re.exec(text)) !== null) {
432 | len = this.testSchemaAt(text, m[2], re.lastIndex)
433 | if (len) {
434 | this.__schema__ = m[2]
435 | this.__index__ = m.index + m[1].length
436 | this.__last_index__ = m.index + m[0].length + len
437 | break
438 | }
439 | }
440 | }
441 |
442 | if (this.__opts__.fuzzyLink && this.__compiled__['http:']) {
443 | // guess schemaless links
444 | tld_pos = text.search(this.re.host_fuzzy_test)
445 | if (tld_pos >= 0) {
446 | // if tld is located after found link - no need to check fuzzy pattern
447 | if (this.__index__ < 0 || tld_pos < this.__index__) {
448 | if ((ml = text.match(this.__opts__.fuzzyIP ? this.re.link_fuzzy : this.re.link_no_ip_fuzzy)) !== null) {
449 | shift = ml.index + ml[1].length
450 |
451 | if (this.__index__ < 0 || shift < this.__index__) {
452 | this.__schema__ = ''
453 | this.__index__ = shift
454 | this.__last_index__ = ml.index + ml[0].length
455 | }
456 | }
457 | }
458 | }
459 | }
460 |
461 | if (this.__opts__.fuzzyEmail && this.__compiled__['mailto:']) {
462 | // guess schemaless emails
463 | at_pos = text.indexOf('@')
464 | if (at_pos >= 0) {
465 | // We can't skip this check, because this cases are possible:
466 | // 192.168.1.1@gmail.com, my.in@example.com
467 | if ((me = text.match(this.re.email_fuzzy)) !== null) {
468 | shift = me.index + me[1].length
469 | next = me.index + me[0].length
470 |
471 | if (this.__index__ < 0 || shift < this.__index__ ||
472 | (shift === this.__index__ && next > this.__last_index__)) {
473 | this.__schema__ = 'mailto:'
474 | this.__index__ = shift
475 | this.__last_index__ = next
476 | }
477 | }
478 | }
479 | }
480 |
481 | return this.__index__ >= 0
482 | }
483 |
484 | /**
485 | * LinkifyIt#pretest(text) -> Boolean
486 | *
487 | * Very quick check, that can give false positives. Returns true if link MAY BE
488 | * can exists. Can be used for speed optimization, when you need to check that
489 | * link NOT exists.
490 | **/
491 | LinkifyIt.prototype.pretest = function pretest (text) {
492 | return this.re.pretest.test(text)
493 | }
494 |
495 | /**
496 | * LinkifyIt#testSchemaAt(text, name, position) -> Number
497 | * - text (String): text to scan
498 | * - name (String): rule (schema) name
499 | * - position (Number): text offset to check from
500 | *
501 | * Similar to [[LinkifyIt#test]] but checks only specific protocol tail exactly
502 | * at given position. Returns length of found pattern (0 on fail).
503 | **/
504 | LinkifyIt.prototype.testSchemaAt = function testSchemaAt (text, schema, pos) {
505 | // If not supported schema check requested - terminate
506 | if (!this.__compiled__[schema.toLowerCase()]) {
507 | return 0
508 | }
509 | return this.__compiled__[schema.toLowerCase()].validate(text, pos, this)
510 | }
511 |
512 | /**
513 | * LinkifyIt#match(text) -> Array|null
514 | *
515 | * Returns array of found link descriptions or `null` on fail. We strongly
516 | * recommend to use [[LinkifyIt#test]] first, for best speed.
517 | *
518 | * ##### Result match description
519 | *
520 | * - __schema__ - link schema, can be empty for fuzzy links, or `//` for
521 | * protocol-neutral links.
522 | * - __index__ - offset of matched text
523 | * - __lastIndex__ - index of next char after mathch end
524 | * - __raw__ - matched text
525 | * - __text__ - normalized text
526 | * - __url__ - link, generated from matched text
527 | **/
528 | LinkifyIt.prototype.match = function match (text) {
529 | const result = []
530 | let shift = 0
531 |
532 | // Try to take previous element from cache, if .test() called before
533 | if (this.__index__ >= 0 && this.__text_cache__ === text) {
534 | result.push(createMatch(this, shift))
535 | shift = this.__last_index__
536 | }
537 |
538 | // Cut head if cache was used
539 | let tail = shift ? text.slice(shift) : text
540 |
541 | // Scan string until end reached
542 | while (this.test(tail)) {
543 | result.push(createMatch(this, shift))
544 |
545 | tail = tail.slice(this.__last_index__)
546 | shift += this.__last_index__
547 | }
548 |
549 | if (result.length) {
550 | return result
551 | }
552 |
553 | return null
554 | }
555 |
556 | /**
557 | * LinkifyIt#matchAtStart(text) -> Match|null
558 | *
559 | * Returns fully-formed (not fuzzy) link if it starts at the beginning
560 | * of the string, and null otherwise.
561 | **/
562 | LinkifyIt.prototype.matchAtStart = function matchAtStart (text) {
563 | // Reset scan cache
564 | this.__text_cache__ = text
565 | this.__index__ = -1
566 |
567 | if (!text.length) return null
568 |
569 | const m = this.re.schema_at_start.exec(text)
570 | if (!m) return null
571 |
572 | const len = this.testSchemaAt(text, m[2], m[0].length)
573 | if (!len) return null
574 |
575 | this.__schema__ = m[2]
576 | this.__index__ = m.index + m[1].length
577 | this.__last_index__ = m.index + m[0].length + len
578 |
579 | return createMatch(this, 0)
580 | }
581 |
582 | /** chainable
583 | * LinkifyIt#tlds(list [, keepOld]) -> this
584 | * - list (Array): list of tlds
585 | * - keepOld (Boolean): merge with current list if `true` (`false` by default)
586 | *
587 | * Load (or merge) new tlds list. Those are user for fuzzy links (without prefix)
588 | * to avoid false positives. By default this algorythm used:
589 | *
590 | * - hostname with any 2-letter root zones are ok.
591 | * - biz|com|edu|gov|net|org|pro|web|xxx|aero|asia|coop|info|museum|name|shop|рф
592 | * are ok.
593 | * - encoded (`xn--...`) root zones are ok.
594 | *
595 | * If list is replaced, then exact match for 2-chars root zones will be checked.
596 | **/
597 | LinkifyIt.prototype.tlds = function tlds (list, keepOld) {
598 | list = Array.isArray(list) ? list : [list]
599 |
600 | if (!keepOld) {
601 | this.__tlds__ = list.slice()
602 | this.__tlds_replaced__ = true
603 | compile(this)
604 | return this
605 | }
606 |
607 | this.__tlds__ = this.__tlds__.concat(list)
608 | .sort()
609 | .filter(function (el, idx, arr) {
610 | return el !== arr[idx - 1]
611 | })
612 | .reverse()
613 |
614 | compile(this)
615 | return this
616 | }
617 |
618 | /**
619 | * LinkifyIt#normalize(match)
620 | *
621 | * Default normalizer (if schema does not define it's own).
622 | **/
623 | LinkifyIt.prototype.normalize = function normalize (match) {
624 | // Do minimal possible changes by default. Need to collect feedback prior
625 | // to move forward https://github.com/markdown-it/linkify-it/issues/1
626 |
627 | if (!match.schema) { match.url = 'http://' + match.url }
628 |
629 | if (match.schema === 'mailto:' && !/^mailto:/i.test(match.url)) {
630 | match.url = 'mailto:' + match.url
631 | }
632 | }
633 |
634 | /**
635 | * LinkifyIt#onCompile()
636 | *
637 | * Override to modify basic RegExp-s.
638 | **/
639 | LinkifyIt.prototype.onCompile = function onCompile () {
640 | }
641 |
642 | export default LinkifyIt
643 |
--------------------------------------------------------------------------------