├── .githooks └── pre-commit ├── .github └── workflows │ └── test.yml ├── .gitignore ├── .mocharc.json ├── README.md ├── package.json ├── src └── no-dead-link.ts ├── test ├── fixtures │ ├── a.md │ └── b.md └── no-dead-link.ts ├── tsconfig.json └── yarn.lock /.githooks/pre-commit: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | npx --no-install lint-staged 3 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | on: [push, pull_request] 3 | jobs: 4 | test: 5 | name: "Test on Node.js ${{ matrix.node-version }} ${{ matrix.os }}" 6 | strategy: 7 | matrix: 8 | os: [ubuntu-latest, windows-latest] 9 | node-version: [ 18, 20, 22 ] 10 | runs-on: ${{ matrix.os }} 11 | steps: 12 | - name: checkout 13 | uses: actions/checkout@v3 14 | - name: setup Node.js ${{ matrix.node-version }} 15 | uses: actions/setup-node@v3 16 | with: 17 | node-version: ${{ matrix.node-version }} 18 | - name: Install 19 | run: yarn install 20 | - uses: nick-fields/retry@v2 21 | with: 22 | timeout_minutes: 10 23 | max_attempts: 3 24 | command: yarn test 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules 3 | lib 4 | *.log 5 | -------------------------------------------------------------------------------- /.mocharc.json: -------------------------------------------------------------------------------- 1 | { 2 | "timeout": 20000 3 | } 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # textlint-rule-no-dead-link 2 | 3 | [![textlint rule](https://img.shields.io/badge/textlint-fixable-green.svg?style=social)](https://textlint.github.io/) 4 | [![npm](https://img.shields.io/npm/v/textlint-rule-no-dead-link.svg)](https://www.npmjs.com/package/textlint-rule-no-dead-link) 5 | [![test](https://github.com/textlint-rule/textlint-rule-no-dead-link/actions/workflows/test.yml/badge.svg)](https://github.com/textlint-rule/textlint-rule-no-dead-link/actions/workflows/test.yml) 6 | 7 | [textlint](https://github.com/textlint/textlint) rule 8 | to make sure every link in a document is available. 9 | 10 | The primary target of this rule is Markdown documents, but it also works on plain text documents (See tests). 11 | 12 | ## Installation 13 | 14 | ```shell 15 | npm install textlint-rule-no-dead-link 16 | ``` 17 | 18 | ## Usage 19 | 20 | ```shell 21 | npm install textlint textlint-rule-no-dead-link 22 | textlint --rule textlint-rule-no-dead-link text-to-check.txt 23 | ``` 24 | 25 | ## Features 26 | 27 | ### Dead Link Detection 28 | 29 | Shows an error if a link is dead (i.e. its server returns one of the ["non-ok" responses](https://fetch.spec.whatwg.org/#ok-status)). 30 | 31 | ### Obsolete Link Detection 32 | 33 | [![Fixable](https://img.shields.io/badge/textlint-fixable-green.svg?style=social)](https://textlint.github.io/) 34 | 35 | Shows an error if a link is obsolete or moved to another location (i.e. its server returns one of the ["redirect" responses](https://fetch.spec.whatwg.org/#redirect-status)). 36 | 37 | This error is fixable and textlint will automatically replace the obsolete links with their new ones if you run it with `--fix` option. 38 | 39 | ### Relative Link Resolution 40 | 41 | Sometimes your files contain relative URIs, which don't have domain information in an URI string. 42 | In this case, we have to somehow resolve the relative URIs and convert them into absolute URIs. 43 | 44 | The resolution strategy is as follows: 45 | 46 | 1. If `baseURI` is specified, use that path to resolve relative URIs (See the below section for details). 47 | 2. If not, try to get the path of the file being linted and use its parent folder as the base path. 48 | 3. If that's not available (e.g., when you are performing linting from API), put an error `Unable to resolve the relative URI`. 49 | 50 | ## Options 51 | 52 | Please write your configurations in `.textlintrc`. 53 | 54 | The default options are: 55 | 56 | ```json 57 | { 58 | "rules": { 59 | "no-dead-link": { 60 | "checkRelative": true, 61 | "baseURI": null, 62 | "ignore": [], 63 | "dotInIgnore": false, 64 | "ignoreRedirects": false, 65 | "preferGET": [], 66 | "retry": 3, 67 | "userAgent": "textlint-rule-no-dead-link/1.0", 68 | "maxRetryTime": 10, 69 | "maxRetryAfterTime": 90 70 | } 71 | } 72 | } 73 | ``` 74 | 75 | ### checkRelative 76 | 77 | This rule checks the availability of relative URIs by default. 78 | You can turn off the checks by passing `false` to this option. 79 | 80 | ### baseURI 81 | 82 | The base URI to be used for resolving relative URIs. 83 | 84 | Though its name, you can pass either an URI starting with `http` or `https`, or an file path starting with `/`. 85 | 86 | Examples: 87 | 88 | ```json 89 | "no-dead-link": { 90 | "baseURI": "http://example.com/" 91 | } 92 | ``` 93 | 94 | ```json 95 | "no-dead-link": { 96 | "baseURI": "/Users/textlint/path/to/parent/folder/" 97 | } 98 | ``` 99 | 100 | ### ignore 101 | 102 | An array of URIs or [glob](https://github.com/isaacs/node-glob "glob")s to be ignored. 103 | These list will be skipped from the availability checks. 104 | 105 | Example: 106 | 107 | ```json 108 | "no-dead-link": { 109 | "ignore": [ 110 | "http://example.com/not-exist/index.html", 111 | "http://example.com/*" // glob format 112 | ] 113 | } 114 | ``` 115 | 116 | ### dotInIgnore 117 | 118 | This rule allows ignore patterns to match filenames starting with a period. 119 | For example, if the `ignore` option contains `"http://example.com/**"` and the `dotInIgnore` option is set to `true`, paths containing filenames that start with `.` (like `"http://example.com/.hidden/index.html"`) will be ignored. 120 | You can disable this behavior by setting `dotInIgnore` to `false`. 121 | 122 | _cf_, 123 | 124 | ### preferGET 125 | 126 | An array of [origins](https://url.spec.whatwg.org/#origin) to lets the rule connect to the origin's URL by `GET` instead of default `HEAD` request. 127 | 128 | Although the rule will fall back to `GET` method when `HEAD` request is failed (status code is not between 200 and 300), in order to shorten time to run your test, you can use this option when you are sure that target origin always returns 5xx for `HEAD` request. 129 | 130 | Example: 131 | 132 | ```json 133 | "no-dead-link": { 134 | "preferGET": [ 135 | "http://example.com" 136 | ] 137 | } 138 | ``` 139 | 140 | ### ignoreRedirects 141 | 142 | This rule checks for redirects (3xx status codes) and consider's them an error by default. 143 | To ignore redirects during checks, set this value to `false`. 144 | 145 | 153 | ### retry 154 | 155 | This rule checks the url with retry. 156 | The default max retry count is `3`. 157 | 158 | ### userAgent 159 | 160 | Customize `User-Agent` http header. 161 | 162 | ### maxRetryTime 163 | 164 | The max of waiting seconds for retry. It is related to `retry` option. 165 | 166 | :memo: It does affect to [`Retry-After`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After) header. If you want to max waiting seconds for `Retry-After` header, please use `maxRetryAfterTime` option. 167 | 168 | Default: `10` 169 | 170 | ### maxRetryAfterTime 171 | 172 | The max of allow waiting time second for [`Retry-After`](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After) header value. 173 | 174 | Some website like GitHub returns `Retry-After` header value with `429 too many requests`. 175 | This `maxRetryAfterTime` option is for that `Retry-After`. 176 | 177 | Default: `10` 178 | 179 | ## CI Integration 180 | 181 | Probably, Link Checking take long times. 182 | We recommend to use cron job like GitHub Actions. 183 | 184 | ### textlint + [SARIF output](https://www.npmjs.com/package/@microsoft/eslint-formatter-sarif) + [code scanning](https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/about-code-scanning) 185 | 186 | Preparing: 187 | 188 | ```shell 189 | # Install dependencies 190 | npm install --save-dev textlint @microsoft/eslint-formatter-sarif textlint-rule-no-dead-link 191 | # Create .textlintrc 192 | npx textlint --init 193 | ``` 194 | 195 | Following actions check links and upload the status to [code scanning](https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/about-code-scanning). 196 | 197 | You can see the result at `https://github.com/{owner}/{repo}/security/code-scanning`. 198 | 199 | ```yaml 200 | name: Link Check 201 | on: 202 | workflow_dispatch: 203 | schedule: 204 | - cron: '45 15 * * *' 205 | 206 | permissions: 207 | contents: read 208 | security-events: write 209 | 210 | jobs: 211 | test: 212 | runs-on: ubuntu-latest 213 | name: Link Check 214 | steps: 215 | - uses: actions/checkout@v3 216 | - name: Setup Node.js 217 | uses: actions/setup-node@v3 218 | with: 219 | node-version: 18 220 | - run: npm ci 221 | - run: npx textlint -f @microsoft/eslint-formatter-sarif -o textlint.sarif || exit 0 # workaround https://github.com/textlint/textlint/issues/103 222 | - name: Upload SARIF file 223 | uses: github/codeql-action/upload-sarif@v2 224 | with: 225 | sarif_file: textlint.sarif 226 | category: textlint 227 | ``` 228 | 229 | ## Tests 230 | 231 | ```shell 232 | npm test 233 | ``` 234 | 235 | ## Contribution 236 | 237 | 1. Fork it! 238 | 2. Create your feature branch: `git checkout -b my-new-feature` 239 | 3. Commit your changes: `git commit -am 'Add some feature'` 240 | 4. Push to the branch: `git push origin my-new-feature` 241 | 5. Submit a pull request :D 242 | 243 | ## License 244 | 245 | MIT License () 246 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "textlint-rule-no-dead-link", 3 | "version": "5.2.0", 4 | "description": "A textlint rule to check if all links are alive", 5 | "keywords": [ 6 | "rule", 7 | "textlint", 8 | "textlintrule", 9 | "link-checker" 10 | ], 11 | "homepage": "https://github.com/textlint-rule/textlint-rule-no-dead-link", 12 | "bugs": "https://github.com/textlint-rule/textlint-rule-no-dead-link/issues", 13 | "repository": "textlint-rule/textlint-rule-no-dead-link", 14 | "license": "MIT", 15 | "author": "nodaguti", 16 | "main": "lib/no-dead-link.js", 17 | "types": "lib/no-dead-link.d.ts", 18 | "files": [ 19 | "lib", 20 | "src" 21 | ], 22 | "scripts": { 23 | "build": "textlint-scripts build", 24 | "format": "prettier --write \"**/*.{js,jsx,ts,tsx,css}\"", 25 | "prepare": "git config --local core.hooksPath .githooks", 26 | "prepublish": "yarn run --if-present build", 27 | "test": "npm run type-check && textlint-scripts test", 28 | "type-check": "tsc --noEmit", 29 | "watch": "textlint-scripts build --watch" 30 | }, 31 | "lint-staged": { 32 | "*.{js,jsx,ts,tsx,css}": [ 33 | "prettier --write" 34 | ] 35 | }, 36 | "prettier": { 37 | "printWidth": 120, 38 | "singleQuote": false, 39 | "tabWidth": 4, 40 | "trailingComma": "none" 41 | }, 42 | "dependencies": { 43 | "fs-extra": "^8.1.0", 44 | "get-url-origin": "^1.0.1", 45 | "minimatch": "^3.0.4", 46 | "node-fetch": "^2.6.0", 47 | "p-memoize": "^3.1.0", 48 | "p-queue": "^6.2.0", 49 | "textlint-rule-helper": "^2.2.2" 50 | }, 51 | "devDependencies": { 52 | "@textlint/ast-node-types": "^12.2.2", 53 | "@textlint/types": "^12.2.2", 54 | "@types/minimatch": "^5.1.2", 55 | "@types/mocha": "^10.0.0", 56 | "@types/node": "^18.11.7", 57 | "@types/node-fetch": "^2.6.2", 58 | "lint-staged": "^13.0.3", 59 | "mocha": "^10.1.0", 60 | "prettier": "^2.7.1", 61 | "textlint": "^12.2.2", 62 | "textlint-scripts": "^12.2.2", 63 | "textlint-tester": "^12.2.2", 64 | "ts-node": "^10.9.1", 65 | "ts-node-test-register": "^10.0.0", 66 | "typescript": "^4.8.4" 67 | }, 68 | "packageManager": "yarn@1.22.15", 69 | "engines": { 70 | "node": ">=4" 71 | }, 72 | "peerDependencies": { 73 | "textlint": ">= 12.2.0" 74 | }, 75 | "peerDependenciesMeta": { 76 | "textlint": { 77 | "optional": true 78 | } 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /src/no-dead-link.ts: -------------------------------------------------------------------------------- 1 | import { RuleHelper } from "textlint-rule-helper"; 2 | import fetch, { RequestInit } from "node-fetch"; 3 | import URL from "url"; 4 | import fs from "fs/promises"; 5 | import minimatch from "minimatch"; 6 | import { isAbsolute } from "path"; 7 | import { getURLOrigin } from "get-url-origin"; 8 | import pMemoize from "p-memoize"; 9 | import PQueue from "p-queue"; 10 | import type { TextlintRuleReporter } from "@textlint/types"; 11 | import type { TxtNode } from "@textlint/ast-node-types"; 12 | 13 | export type Options = { 14 | checkRelative: boolean; // {boolean} `false` disables the checks for relative URIs. 15 | baseURI: null | string; // {String|null} a base URI to resolve relative URIs. 16 | ignore: string[]; // {Array} URIs to be skipped from availability checks. 17 | dotInIgnore: boolean; // {boolean} `true` allows ignore patterns to match filenames starting with a period 18 | ignoreRedirects: boolean; // {boolean} `false` ignores redirect status codes. 19 | preferGET: string[]; // {Array} origins to prefer GET over HEAD. 20 | retry: number; // {number} Max retry count 21 | concurrency: number; // {number} Concurrency count of linting link [Experimental] 22 | interval: number; // The length of time in milliseconds before the interval count resets. Must be finite. [Experimental] 23 | intervalCap: number; // The max number of runs in the given interval of time. [Experimental] 24 | userAgent: string; // {String} a UserAgent, 25 | maxRetryTime: number; // (number) The max of waiting seconds for retry. It is related to `retry` option. It does affect to `Retry-After` header. 26 | maxRetryAfterTime: number; // (number) The max of waiting seconds for `Retry-After` header. 27 | }; 28 | const DEFAULT_OPTIONS: Options = { 29 | checkRelative: true, // {boolean} `false` disables the checks for relative URIs. 30 | baseURI: null, // {String|null} a base URI to resolve relative URIs. 31 | ignore: [], // {Array} URIs to be skipped from availability checks. 32 | dotInIgnore: false, // {boolean} `true` allows ignore patterns to match filenames starting with a period 33 | ignoreRedirects: false, // {boolean} `false` ignores redirect status codes. 34 | preferGET: [], // {Array} origins to prefer GET over HEAD. 35 | retry: 3, // {number} Max retry count 36 | concurrency: 8, // {number} Concurrency count of linting link [Experimental] 37 | interval: 500, // The length of time in milliseconds before the interval count resets. Must be finite. [Experimental] 38 | intervalCap: 8, // The max number of runs in the given interval of time. [Experimental] 39 | userAgent: "textlint-rule-no-dead-link/1.0", // {String} a UserAgent, 40 | maxRetryTime: 10, // (number) The max of waiting seconds for retry. It is related to `retry` option. It does affect to `Retry-After` header. 41 | maxRetryAfterTime: 10 // (number) The max of waiting seconds for `Retry-After` header. 42 | }; 43 | 44 | // Adopted from http://stackoverflow.com/a/3809435/951517 45 | const URI_REGEXP = 46 | /(?:https?:)?\/\/(?:www\.)?[-a-zA-Z0-9@:%._+~#=]{2,256}\.[a-z]{2,6}\b(?:[-a-zA-Z0-9@:%_+.~#?&//=]*)/g; 47 | 48 | /** 49 | * Returns `true` if a given URI is https? url. 50 | * @param {string} uri 51 | * @return {boolean} 52 | */ 53 | function isHttp(uri: string) { 54 | const { protocol } = URL.parse(uri); 55 | return protocol === "http:" || protocol === "https:"; 56 | } 57 | 58 | /** 59 | * Returns `true` if a given URI is relative. 60 | * @param {string} uri 61 | * @return {boolean} 62 | * @see https://github.com/panosoft/is-local-path 63 | */ 64 | function isRelative(uri: string) { 65 | const { host } = URL.parse(uri); 66 | return host === null || host === ""; 67 | } 68 | 69 | /** 70 | * Returns if a given URI indicates a local file. 71 | * @param {string} uri 72 | * @return {boolean} 73 | * @see https://nodejs.org/api/path.html#path_path_isabsolute_path 74 | */ 75 | function isLocal(uri: string) { 76 | if (isAbsolute(uri)) { 77 | return true; 78 | } 79 | return isRelative(uri); 80 | } 81 | 82 | /** 83 | * Return `true` if the `code` is redirect status code. 84 | * @see https://fetch.spec.whatwg.org/#redirect-status 85 | * @param {number} code 86 | * @returns {boolean} 87 | */ 88 | function isRedirect(code: number) { 89 | return code === 301 || code === 302 || code === 303 || code === 307 || code === 308; 90 | } 91 | 92 | function isIgnored(uri: string, ignore: string[] = [], dotInIgnore: boolean) { 93 | return ignore.some((pattern) => minimatch(uri, pattern, { dot: dotInIgnore })); 94 | } 95 | 96 | /** 97 | * wait for ms and resolve the promise 98 | * @param ms 99 | * @returns {Promise} 100 | */ 101 | function waitTimeMs(ms: number) { 102 | return new Promise((resolve) => { 103 | setTimeout(resolve, ms); 104 | }); 105 | } 106 | 107 | const createFetchWithRuleDefaults = (ruleOptions: Options) => { 108 | return (uri: string, fetchOptions: RequestInit) => { 109 | const { host } = URL.parse(uri); 110 | return fetch(uri, { 111 | ...fetchOptions, 112 | // Disable gzip compression in Node.js 113 | // to avoid the zlib's "unexpected end of file" error 114 | // https://github.com/request/request/issues/2045 115 | compress: false, 116 | // Some website require UserAgent and Accept header 117 | // to avoid ECONNRESET error 118 | // https://github.com/textlint-rule/textlint-rule-no-dead-link/issues/111 119 | headers: { 120 | "User-Agent": ruleOptions.userAgent, 121 | Accept: "*/*", 122 | // avoid assign null to Host 123 | ...(host 124 | ? { 125 | // Same host for target url 126 | // https://github.com/textlint-rule/textlint-rule-no-dead-link/issues/111 127 | Host: host 128 | } 129 | : {}) 130 | } 131 | }); 132 | }; 133 | }; 134 | 135 | type AliveFunctionReturn = { 136 | ok: boolean; 137 | message: string; 138 | redirected?: boolean; 139 | redirectTo?: string | null; 140 | }; 141 | 142 | /** 143 | * Create isAliveURI function with ruleOptions 144 | * @param {object} ruleOptions 145 | * @returns {isAliveURI} 146 | */ 147 | const createCheckAliveURL = (ruleOptions: Options) => { 148 | // Create fetch function for this rule 149 | const fetchWithDefaults = createFetchWithRuleDefaults(ruleOptions); 150 | /** 151 | * Checks if a given URI is alive or not. 152 | * 153 | * Normally, this method following strategy about retry 154 | * 155 | * 1. Head 156 | * 2. Get 157 | * 3. Get 158 | * 159 | * @param {string} uri 160 | * @param {string} method 161 | * @param {number} maxRetryCount 162 | * @param {number} currentRetryCount 163 | * @return {{ ok: boolean, redirect?: string, message: string }} 164 | */ 165 | return async function isAliveURI( 166 | uri: string, 167 | method: string = "HEAD", 168 | maxRetryCount: number = 3, 169 | currentRetryCount: number = 0 170 | ): Promise { 171 | const opts: RequestInit = { 172 | method, 173 | // Use `manual` redirect behaviour to get HTTP redirect status code 174 | // and see what kind of redirect is occurring 175 | redirect: "manual" 176 | }; 177 | try { 178 | const res = await fetchWithDefaults(uri, opts); 179 | // redirected 180 | if (isRedirect(res.status)) { 181 | const redirectedUrl = res.headers.get("Location"); 182 | // Status code is 301 or 302, but Location header is not set 183 | if (redirectedUrl === null) { 184 | return { 185 | ok: false, 186 | redirected: true, 187 | redirectTo: null, 188 | message: `${res.status} ${res.statusText}` 189 | }; 190 | } 191 | const finalRes = await fetchWithDefaults(redirectedUrl, { ...opts, redirect: "follow" }); 192 | const { hash } = URL.parse(uri); 193 | return { 194 | ok: finalRes.ok, 195 | redirected: true, 196 | redirectTo: hash !== null ? `${finalRes.url}${hash}` : finalRes.url, 197 | message: `${res.status} ${res.statusText}` 198 | }; 199 | } 200 | // retry if it is not ok when use head request 201 | if (!res.ok && method === "HEAD" && currentRetryCount < maxRetryCount) { 202 | return isAliveURI(uri, "GET", maxRetryCount, currentRetryCount + 1); 203 | } 204 | 205 | // try to fetch again if not reach max retry count 206 | if (currentRetryCount < maxRetryCount) { 207 | const retryAfter = res.headers.get("Retry-After"); 208 | // If the response has `Retry-After` header, prefer it 209 | // e.g. `Retry-After: 60` and `maxRetryAfterTime: 90`, wait 60 seconds 210 | if (retryAfter) { 211 | const retryAfterMs = Number(retryAfter) * 1000; 212 | const maxRetryAfterTimeMs = ruleOptions.maxRetryAfterTime * 1000; 213 | if (retryAfterMs <= maxRetryAfterTimeMs) { 214 | await waitTimeMs(retryAfterMs); 215 | } 216 | } else { 217 | // exponential retry: 0ms -> 100ms -> 200ms -> 400ms -> 800ms ... 218 | const retryWaitTimeMs = currentRetryCount ** 2 * 100; 219 | const maxRetryTimeMs = ruleOptions.maxRetryTime * 1000; 220 | if (retryWaitTimeMs <= maxRetryTimeMs) { 221 | await waitTimeMs(retryWaitTimeMs); 222 | } 223 | } 224 | return isAliveURI(uri, "GET", maxRetryCount, currentRetryCount + 1); 225 | } 226 | 227 | return { 228 | ok: res.ok, 229 | message: `${res.status} ${res.statusText}` 230 | }; 231 | } catch (ex: any) { 232 | // Retry with `GET` method if the request failed 233 | // as some servers don't accept `HEAD` requests but are OK with `GET` requests. 234 | // https://github.com/textlint-rule/textlint-rule-no-dead-link/pull/86 235 | if (method === "HEAD" && currentRetryCount < maxRetryCount) { 236 | return isAliveURI(uri, "GET", maxRetryCount, currentRetryCount + 1); 237 | } 238 | 239 | return { 240 | ok: false, 241 | message: ex.message 242 | }; 243 | } 244 | }; 245 | }; 246 | 247 | /** 248 | * Check if a given file exists 249 | */ 250 | async function isAliveLocalFile(filePath: string): Promise { 251 | try { 252 | await fs.access(filePath.replace(/[?#].*?$/, "")); 253 | return { 254 | ok: true, 255 | message: "OK" 256 | }; 257 | } catch (ex: any) { 258 | return { 259 | ok: false, 260 | message: ex.message 261 | }; 262 | } 263 | } 264 | 265 | const reporter: TextlintRuleReporter = (context, options) => { 266 | const { Syntax, getSource, report, RuleError, fixer, getFilePath, locator } = context; 267 | const helper = new RuleHelper(context); 268 | const ruleOptions = { ...DEFAULT_OPTIONS, ...options }; 269 | const isAliveURI = createCheckAliveURL(ruleOptions); 270 | // 30sec memorized 271 | const memorizedIsAliveURI = pMemoize(isAliveURI, { 272 | maxAge: 30 * 1000 273 | }); 274 | /** 275 | * Checks a given URI's availability and report if it is dead. 276 | * @param {TextLintNode} node TextLintNode the URI belongs to. 277 | * @param {string} uri a URI string to be linted. 278 | * @param {number} index column number the URI is located at. 279 | * @param {number} maxRetryCount retry count of linting 280 | */ 281 | const lint = async ({ node, uri, index }: { node: TxtNode; uri: string; index: number }, maxRetryCount: number) => { 282 | if (isIgnored(uri, ruleOptions.ignore, ruleOptions.dotInIgnore)) { 283 | return; 284 | } 285 | 286 | if (isRelative(uri)) { 287 | if (!ruleOptions.checkRelative) { 288 | return; 289 | } 290 | 291 | const filePath = getFilePath(); 292 | const base = ruleOptions.baseURI || filePath; 293 | if (!base) { 294 | const message = 295 | "Unable to resolve the relative URI. Please check if the base URI is correctly specified."; 296 | 297 | report(node, new RuleError(message, { padding: locator.range([index, index + uri.length]) })); 298 | return; 299 | } 300 | 301 | // eslint-disable-next-line no-param-reassign 302 | uri = URL.resolve(base, uri); 303 | } 304 | 305 | // Ignore non http external link 306 | // https://github.com/textlint-rule/textlint-rule-no-dead-link/issues/112 307 | if (!isLocal(uri) && !isHttp(uri)) { 308 | return; 309 | } 310 | 311 | const method = 312 | ruleOptions.preferGET.filter((origin) => getURLOrigin(uri) === getURLOrigin(origin)).length > 0 313 | ? "GET" 314 | : "HEAD"; 315 | 316 | const result = isLocal(uri) 317 | ? await isAliveLocalFile(uri) 318 | : await memorizedIsAliveURI(uri, method, maxRetryCount); 319 | const { ok, redirected, redirectTo, message } = result; 320 | // When ignoreRedirects is true, redirected should be ignored 321 | if (redirected && ruleOptions.ignoreRedirects) { 322 | return; 323 | } 324 | if (!ok) { 325 | const lintMessage = `${uri} is dead. (${message})`; 326 | report(node, new RuleError(lintMessage, { padding: locator.range([index, index + uri.length]) })); 327 | } else if (redirected) { 328 | const lintMessage = `${uri} is redirected to ${redirectTo}. (${message})`; 329 | const fix = redirectTo ? fixer.replaceTextRange([index, index + uri.length], redirectTo) : undefined; 330 | report(node, new RuleError(lintMessage, { fix, padding: locator.range([index, index + uri.length]) })); 331 | } 332 | }; 333 | 334 | /** 335 | * URIs to be checked. 336 | */ 337 | const URIs: { node: TxtNode; uri: string; index: number }[] = []; 338 | 339 | return { 340 | [Syntax.Str](node) { 341 | if (helper.isChildNode(node, [Syntax.BlockQuote])) { 342 | return; 343 | } 344 | 345 | // prevent double checks 346 | if (helper.isChildNode(node, [Syntax.Link])) { 347 | return; 348 | } 349 | 350 | const text = getSource(node); 351 | 352 | // Use `String#replace` instead of `RegExp#exec` to allow us 353 | // perform RegExp matches in an iterate and immutable manner 354 | const matches = text.matchAll(URI_REGEXP); 355 | Array.from(matches).forEach((match) => { 356 | const url = match[0]; 357 | if (url && match.input !== undefined && match.index !== undefined) { 358 | URIs.push({ node, uri: url, index: match.index }); 359 | } 360 | }); 361 | }, 362 | 363 | [Syntax.Link](node) { 364 | if (helper.isChildNode(node, [Syntax.BlockQuote])) { 365 | return; 366 | } 367 | 368 | // Ignore HTML5 place holder link. 369 | // Ex) Placeholder Link 370 | if (typeof node.url === "undefined") { 371 | return; 372 | } 373 | 374 | // [text](http://example.com) 375 | // ^ 376 | const index = node.raw.indexOf(node.url) || 0; 377 | 378 | URIs.push({ 379 | node, 380 | uri: node.url, 381 | index 382 | }); 383 | }, 384 | 385 | // Reference links is markdown specific 386 | Definition: function (node) { 387 | if (!node.url) { 388 | return; 389 | } 390 | 391 | // Some link text[1] 392 | // 393 | // [1]: https://foo.bar 394 | // ^ 395 | const indexOfUrl = node.raw.indexOf(node.url); 396 | const index = indexOfUrl !== -1 ? indexOfUrl : 0; 397 | URIs.push({ 398 | node, 399 | uri: node.url, 400 | index 401 | }); 402 | }, 403 | 404 | [Syntax.DocumentExit]() { 405 | const queue = new PQueue({ 406 | concurrency: ruleOptions.concurrency, 407 | intervalCap: ruleOptions.intervalCap, 408 | interval: ruleOptions.interval 409 | }); 410 | const linkTasks = URIs.map((item) => () => lint(item, ruleOptions.retry)); 411 | return queue.addAll(linkTasks); 412 | } 413 | }; 414 | }; 415 | export default { 416 | linter: reporter, 417 | fixer: reporter 418 | }; 419 | -------------------------------------------------------------------------------- /test/fixtures/a.md: -------------------------------------------------------------------------------- 1 | * Good link: [b.md](b.md). 2 | * Good link: [./b.md](./b.md). 3 | * Good link: [b.md#hash](b.md#hash). 4 | * Good link: [b.md?param](b.md?param). 5 | -------------------------------------------------------------------------------- /test/fixtures/b.md: -------------------------------------------------------------------------------- 1 | * Bad link: [../NOTFOUND](../NOTFOUND). 2 | * Bad link: [NOTFOUND](NOTFOUND). 3 | * Bad link: [/NOTFOUND_XXXX](/NOTFOUND_XXXX). 4 | -------------------------------------------------------------------------------- /test/no-dead-link.ts: -------------------------------------------------------------------------------- 1 | import TextlintTester from "textlint-tester"; 2 | import fs from "fs"; 3 | import path from "path"; 4 | import rule from "../src/no-dead-link"; 5 | 6 | const tester = new TextlintTester(); 7 | 8 | // @ts-expect-error 9 | tester.run("no-dead-link", rule, { 10 | valid: [ 11 | "should ignore non-http url [email address](mailto:mail.example.com) by default", 12 | "should ignore non-http url [ftp](ftp://example.com) by default", 13 | "should ignore non-http url [websockets](ws://example.com) by default", 14 | "should be able to check a link in Markdown: [example](https://example.com/)", 15 | "should be able to check a link in Markdown: [example](https://dev.mysql.com/downloads/mysql/)", 16 | "should be able to check a URL in Markdown: https://example.com/", 17 | "should success with retrying on error: [npm results for textlint](https://www.npmjs.com/search?q=textlint)", 18 | "should treat 200 OK as alive: https://httpstat.us/200", 19 | "should treat 200 OK. It require User-Agent: Navigate to [MySQL distribution](https://dev.mysql.com/downloads/mysql/) to install MySQL `5.7`.", 20 | "should treat 200 OK. It require User-Agent: https://datatracker.ietf.org/doc/html/rfc6749", 21 | { 22 | text: "should be able to check a URL in a plain text: https://example.com/", 23 | ext: ".txt" 24 | }, 25 | { 26 | text: "should be able to check multiple URLs in a plain text: https://example.com/, https://httpstat.us/200", 27 | ext: ".txt" 28 | }, 29 | { 30 | text: "should be able to check relative paths when checkRelative is true: ![robot](index.html)", 31 | options: { 32 | baseURI: "https://example.com/" 33 | } 34 | }, 35 | { 36 | text: 'should ignore URLs in the "ignore" option: https://example.com/404.html shouldn\'t be checked.', 37 | options: { 38 | ignore: ["https://example.com/404.html"] 39 | } 40 | }, 41 | { 42 | text: 'should ignore URLs in the "ignore" option that glob formatted: https://example.com/404.html shouldn\'t be checked.', 43 | options: { 44 | ignore: ["https://example.com/*"] 45 | } 46 | }, 47 | { 48 | text: 'should ignore URLs containing . in their path in the "ignore" option that glob formatted if option is enabled: https://example.com/.hidden/404.html shouldn\'t be checked.', 49 | options: { 50 | ignore: ["https://example.com/**"], 51 | dotInIgnore: true 52 | } 53 | }, 54 | { 55 | text: "should ignore relative URIs when `checkRelative` is false: [test](./a.md).", 56 | options: { 57 | checkRelative: false 58 | } 59 | }, 60 | { 61 | text: fs.readFileSync(path.join(__dirname, "fixtures/a.md"), "utf-8"), 62 | options: { 63 | baseURI: path.join(__dirname, "fixtures/") 64 | } 65 | }, 66 | { 67 | inputPath: path.join(__dirname, "fixtures/a.md"), 68 | options: { 69 | baseURI: path.join(__dirname, "fixtures/") 70 | } 71 | }, 72 | { 73 | inputPath: path.join(__dirname, "fixtures/a.md") 74 | }, 75 | { 76 | text: "should success with GET method: [npm results for textlint](https://www.npmjs.com/search?q=textlint)", 77 | options: { 78 | preferGET: ["https://www.npmjs.com"] 79 | } 80 | }, 81 | { 82 | text: "should success with GET method whether the option is specific URL: [npm results for textlint](https://www.npmjs.com/search?q=textlint)", 83 | options: { 84 | preferGET: ["https://www.npmjs.com/search?q=textlint-rule"] 85 | } 86 | }, 87 | { 88 | text: "should not treat https://httpstat.us/301 when `ignoreRedirects` is true", 89 | options: { 90 | ignoreRedirects: true 91 | } 92 | }, 93 | { 94 | text: "should preserve hash while ignoring redirect: [BDD](http://mochajs.org/#bdd)", 95 | options: { 96 | ignoreRedirects: true 97 | } 98 | }, 99 | // https://github.com/textlint-rule/textlint-rule-no-dead-link/issues/125 100 | { 101 | text: "ignore redirect https://www.consul.io/intro/getting-started/kv.html", 102 | options: { 103 | ignoreRedirects: true 104 | } 105 | }, 106 | // https://github.com/textlint-rule/textlint-rule-no-dead-link/issues/128 107 | { 108 | text: "should treat 200 OK. It requires browser-like User-Agent: https://issues.jenkins.io/browse/JENKINS-59261", 109 | options: { 110 | userAgent: 111 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36" 112 | } 113 | } 114 | ], 115 | invalid: [ 116 | { 117 | text: "should treat 301 https://httpstat.us/301", 118 | output: "should treat 301 https://httpstat.us/", 119 | errors: [ 120 | { 121 | message: "https://httpstat.us/301 is redirected to https://httpstat.us/. (301 Moved Permanently)", 122 | range: [17, 40] 123 | } 124 | ] 125 | }, 126 | { 127 | text: "should treat 301 [link](https://httpstat.us/301)", 128 | output: "should treat 301 [link](https://httpstat.us/)", 129 | errors: [ 130 | { 131 | message: "https://httpstat.us/301 is redirected to https://httpstat.us/. (301 Moved Permanently)", 132 | range: [24, 47] 133 | } 134 | ] 135 | }, 136 | { 137 | text: "should treat 302 [link](https://httpstat.us/302)", 138 | output: "should treat 302 [link](https://httpstat.us/)", 139 | errors: [ 140 | { 141 | message: "https://httpstat.us/302 is redirected to https://httpstat.us/. (302 Found)", 142 | line: 1, 143 | column: 25 144 | } 145 | ] 146 | }, 147 | { 148 | text: "should treat 404 Not Found as dead: https://httpstat.us/404", 149 | errors: [ 150 | { 151 | message: "https://httpstat.us/404 is dead. (404 Not Found)", 152 | line: 1, 153 | column: 37 154 | } 155 | ] 156 | }, 157 | { 158 | text: "should treat 500 Internal Server Error as dead: https://httpstat.us/500", 159 | errors: [ 160 | { 161 | message: "https://httpstat.us/500 is dead. (500 Internal Server Error)", 162 | line: 1, 163 | column: 49 164 | } 165 | ] 166 | }, 167 | { 168 | text: "should locate the exact index of a URL in a plain text: https://httpstat.us/404", 169 | ext: ".txt", 170 | errors: [ 171 | { 172 | message: "https://httpstat.us/404 is dead. (404 Not Found)", 173 | line: 1, 174 | column: 57 175 | } 176 | ] 177 | }, 178 | { 179 | text: "should throw when a relative URI cannot be resolved: [test](./a.md).", 180 | errors: [ 181 | { 182 | message: "Unable to resolve the relative URI. Please check if the base URI is correctly specified.", 183 | line: 1, 184 | column: 61 185 | } 186 | ] 187 | }, 188 | { 189 | inputPath: path.join(__dirname, "fixtures/b.md"), 190 | errors: [ 191 | { 192 | line: 1, 193 | column: 14 194 | }, 195 | { 196 | line: 2, 197 | column: 14 198 | }, 199 | { 200 | line: 3, 201 | column: 14 202 | } 203 | ] 204 | }, 205 | { 206 | text: "should preserve hash while redirecting: [BDD](http://mochajs.org/#bdd)", 207 | output: "should preserve hash while redirecting: [BDD](https://mochajs.org/#bdd)", 208 | errors: [ 209 | { 210 | message: 211 | "http://mochajs.org/#bdd is redirected to https://mochajs.org/#bdd. (301 Moved Permanently)", 212 | index: 46, 213 | line: 1, 214 | column: 47 215 | } 216 | ] 217 | }, 218 | { 219 | text: `Support Reference link[^1] in Markdown. 220 | 221 | [^1] https://httpstat.us/404`, 222 | errors: [ 223 | { 224 | message: "https://httpstat.us/404 is dead. (404 Not Found)", 225 | loc: { 226 | start: { 227 | line: 3, 228 | column: 6 229 | }, 230 | end: { 231 | line: 3, 232 | column: 29 233 | } 234 | } 235 | } 236 | ] 237 | } 238 | ] 239 | }); 240 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | /* Basic Options */ 4 | "module": "commonjs", 5 | "moduleResolution": "node", 6 | "esModuleInterop": true, 7 | "newLine": "LF", 8 | "noEmit": true, 9 | "target": "ES2018", 10 | "sourceMap": true, 11 | "declaration": true, 12 | "jsx": "preserve", 13 | "lib": [ 14 | "esnext", 15 | "dom" 16 | ], 17 | /* Strict Type-Checking Options */ 18 | "strict": true, 19 | /* Additional Checks */ 20 | /* Report errors on unused locals. */ 21 | "noUnusedLocals": true, 22 | /* Report errors on unused parameters. */ 23 | "noUnusedParameters": true, 24 | /* Report error when not all code paths in function return a value. */ 25 | "noImplicitReturns": true, 26 | /* Report errors for fallthrough cases in switch statement. */ 27 | "noFallthroughCasesInSwitch": true 28 | }, 29 | "include": [ 30 | "**/*" 31 | ], 32 | "exclude": [ 33 | ".git", 34 | "node_modules" 35 | ] 36 | } 37 | --------------------------------------------------------------------------------