├── .github
├── FUNDING.yml
├── ISSUE_TEMPLATE
│ ├── bug_report.md
│ └── feature_request.md
├── pull_request_template.md
└── workflows
│ ├── npm-publish.yml
│ ├── npm-test.yml
│ └── prettier.yml
├── .gitignore
├── .linkspector.test.yml
├── .prettierrc.json
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── DEV_SETUP.md
├── Dockerfile
├── LICENSE
├── README.md
├── SECURITY.md
├── index.js
├── index.test.js
├── lib
├── batch-check-links.js
├── check-file-links.js
├── extract-asciidoc-links.js
├── extract-markdown-hyperlinks.js
├── get-unique-links.js
├── handle-links-modification.js
├── prepare-file-list.js
├── update-linkstatus-obj.js
└── validate-config.js
├── linkspector.js
├── package-lock.json
├── package.json
├── scripts
└── apparmorfix.sh
├── test
└── fixtures
│ ├── asciidoc
│ └── hyperlinks
│ │ ├── asciidoc-hyperlinks.test.js
│ │ ├── hyperlinksTest.yml
│ │ └── testhyperlinks1.adoc
│ ├── headers.test.js
│ ├── markdown
│ ├── decoded-sections
│ │ ├── .decodedTest.yml
│ │ ├── decoded.md
│ │ ├── decoded2.md
│ │ └── markdown-decoded-sections.test.js
│ ├── duplicates
│ │ ├── duplicate1.md
│ │ ├── duplicateTest.yml
│ │ └── markdown-duplicates.test.js
│ ├── headings
│ │ ├── heading1.md
│ │ ├── headingsTest.yml
│ │ └── markdown-headings.test.js
│ ├── image
│ │ ├── image.md
│ │ ├── imageTest.yml
│ │ └── markdown-image.test.js
│ ├── line-references
│ │ ├── .lineReferencesTest.yml
│ │ ├── line-file.md
│ │ ├── line-reference-test.md
│ │ └── line-references.test.js
│ ├── relative
│ │ ├── .relativeTest.yml
│ │ ├── markdown-relative.test.js
│ │ ├── relative1.md
│ │ └── relative2.md
│ ├── with-html-anchors-id
│ │ ├── .withHtmlAnchorsIdTest.yml
│ │ ├── html-anchor-id.md
│ │ └── markdown-with-html-anchors-id.test.js
│ └── with-html-anchors
│ │ ├── .withHtmlAnchorsTest.yml
│ │ ├── html-anchor.md
│ │ └── markdown-with-html-anchors.test.js
│ ├── patterns
│ ├── patterns.md
│ ├── patterns.test.js
│ └── patternsTest.yml
│ └── redirects
│ ├── config-redirects-false.yml
│ ├── config-redirects-true.yml
│ ├── redirects.md
│ └── redirects.test.js
└── vite.config.ts
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | liberapay: gaurav-nelson
2 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: '[BUG] '
5 | labels: bug
6 | assignees: ''
7 | ---
8 |
9 | **Describe the bug**
10 | A clear and concise description of what the bug is.
11 |
12 | **To Reproduce**
13 | Steps to reproduce the behavior:
14 |
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 |
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 |
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 |
26 | **Additional context**
27 | Add any other context about the problem here.
28 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Feature request
3 | about: Suggest an idea for this project
4 | title: '[FEATURE REQ] '
5 | labels: enhancement
6 | assignees: ''
7 | ---
8 |
9 | **Is your feature request related to a problem? Please describe.**
10 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
11 |
12 | **Describe the solution you'd like**
13 | A clear and concise description of what you want to happen.
14 |
15 | **Describe alternatives you've considered**
16 | A clear and concise description of any alternative solutions or features you've considered.
17 |
18 | **Additional context**
19 | Add any other context or screenshots about the feature request here.
20 |
--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ## Description
2 |
3 | Please include a summary of the change and which issue is fixed. Also include relevant motivation and context. List any dependencies that are required for this change. Delete any other sections or text that are not relevant.
4 |
5 | Fixes # (issue number)
6 |
7 | ## Type of Change
8 |
9 | Please delete options that are not relevant.
10 |
11 | - [ ] Bug fix (non-breaking change which fixes an issue)
12 | - [ ] New feature (non-breaking change which adds functionality)
13 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected)
14 | - [ ] This change requires a documentation update
15 |
16 | ## Checklist:
17 |
18 | - [ ] I have performed a self-review of my own code
19 | - [ ] I have commented my code, particularly in hard-to-understand areas
20 | - [ ] I have made corresponding changes to the documentation
21 | - [ ] My changes generate no new warnings
22 | - [ ] I have added tests that prove my fix is effective or that my feature works
23 | - [ ] New and existing unit tests pass locally with my changes
24 |
25 | ## Additional Information
26 |
27 | Include any additional information about the pull request here.
28 |
--------------------------------------------------------------------------------
/.github/workflows/npm-publish.yml:
--------------------------------------------------------------------------------
1 | # This workflow publishs the NPM package when a tag is pushed to the repository.
2 |
3 | name: Publish on NPM
4 |
5 | on:
6 | push:
7 | tags:
8 | - '*'
9 |
10 | jobs:
11 | publish-npm:
12 | runs-on: ubuntu-latest
13 | steps:
14 | - uses: actions/checkout@v4
15 | - uses: actions/setup-node@v4
16 | with:
17 | node-version: 20
18 | registry-url: https://registry.npmjs.org/
19 | - run: npm ci
20 | - run: npm publish --access public
21 | env:
22 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
23 |
--------------------------------------------------------------------------------
/.github/workflows/npm-test.yml:
--------------------------------------------------------------------------------
1 | name: Run tests
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 |
11 | jobs:
12 | test:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - uses: actions/checkout@v4
16 | - uses: actions/setup-node@v4
17 | with:
18 | node-version: '20'
19 | - run: scripts/apparmorfix.sh
20 |
--------------------------------------------------------------------------------
/.github/workflows/prettier.yml:
--------------------------------------------------------------------------------
1 | name: Run prettier check
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 |
11 | jobs:
12 | prettier:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - uses: actions/checkout@v4
16 | - uses: actions/setup-node@v4
17 | with:
18 | node-version: '20'
19 | - run: npm ci
20 | - run: npx prettier --check .
21 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Logs
2 | logs
3 | *.log
4 | npm-debug.log*
5 | yarn-debug.log*
6 | yarn-error.log*
7 | lerna-debug.log*
8 | .pnpm-debug.log*
9 |
10 | # Diagnostic reports (https://nodejs.org/api/report.html)
11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
12 |
13 | # Runtime data
14 | pids
15 | *.pid
16 | *.seed
17 | *.pid.lock
18 |
19 | # Directory for instrumented libs generated by jscoverage/JSCover
20 | lib-cov
21 |
22 | # Coverage directory used by tools like istanbul
23 | coverage
24 | *.lcov
25 |
26 | # nyc test coverage
27 | .nyc_output
28 |
29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
30 | .grunt
31 |
32 | # Bower dependency directory (https://bower.io/)
33 | bower_components
34 |
35 | # node-waf configuration
36 | .lock-wscript
37 |
38 | # Compiled binary addons (https://nodejs.org/api/addons.html)
39 | build/Release
40 |
41 | # Dependency directories
42 | node_modules/
43 | jspm_packages/
44 |
45 | # Snowpack dependency directory (https://snowpack.dev/)
46 | web_modules/
47 |
48 | # TypeScript cache
49 | *.tsbuildinfo
50 |
51 | # Optional npm cache directory
52 | .npm
53 |
54 | # Optional eslint cache
55 | .eslintcache
56 |
57 | # Optional stylelint cache
58 | .stylelintcache
59 |
60 | # Microbundle cache
61 | .rpt2_cache/
62 | .rts2_cache_cjs/
63 | .rts2_cache_es/
64 | .rts2_cache_umd/
65 |
66 | # Optional REPL history
67 | .node_repl_history
68 |
69 | # Output of 'npm pack'
70 | *.tgz
71 |
72 | # Yarn Integrity file
73 | .yarn-integrity
74 |
75 | # dotenv environment variable files
76 | .env
77 | .env.development.local
78 | .env.test.local
79 | .env.production.local
80 | .env.local
81 |
82 | # parcel-bundler cache (https://parceljs.org/)
83 | .cache
84 | .parcel-cache
85 |
86 | # Next.js build output
87 | .next
88 | out
89 |
90 | # Nuxt.js build / generate output
91 | .nuxt
92 | dist
93 |
94 | # Gatsby files
95 | .cache/
96 | # Comment in the public line in if your project uses Gatsby and not Next.js
97 | # https://nextjs.org/blog/next-9-1#public-directory-support
98 | # public
99 |
100 | # vuepress build output
101 | .vuepress/dist
102 |
103 | # vuepress v2.x temp and cache directory
104 | .temp
105 | .cache
106 |
107 | # Docusaurus cache and generated files
108 | .docusaurus
109 |
110 | # Serverless directories
111 | .serverless/
112 |
113 | # FuseBox cache
114 | .fusebox/
115 |
116 | # DynamoDB Local files
117 | .dynamodb/
118 |
119 | # TernJS port file
120 | .tern-port
121 |
122 | # Stores VSCode versions used for testing VSCode extensions
123 | .vscode-test
124 |
125 | # yarn v2
126 | .yarn/cache
127 | .yarn/unplugged
128 | .yarn/build-state.yml
129 | .yarn/install-state.gz
130 | .pnp.*
131 |
132 | # Linkspector
133 | .linkspector.yml
134 |
--------------------------------------------------------------------------------
/.linkspector.test.yml:
--------------------------------------------------------------------------------
1 | files:
2 | - README.md
3 | useGitIgnore: true
4 |
--------------------------------------------------------------------------------
/.prettierrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "trailingComma": "es5",
3 | "tabWidth": 2,
4 | "semi": false,
5 | "singleQuote": true
6 | }
7 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | ## Code of conduct
2 |
3 | - We are committed to providing a friendly, safe and welcoming environment for all, regardless of level of experience, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion, nationality, or other similar characteristic.
4 | - Please avoid using overtly sexual aliases or other nicknames that might detract from a friendly, safe and welcoming environment for all.
5 | - Please be kind and courteous. There’s no need to be mean or rude.
6 | - Respect that people have differences of opinion and that every design or implementation choice carries a trade-off and numerous costs. There is seldom a right answer.
7 | - Please keep unstructured critique to a minimum. If you have solid ideas you want to experiment with, make a fork and see how it works.
8 | - We will exclude you from interaction if you insult, demean or harass anyone. That is not welcome behavior. We interpret the term “harassment” as including the definition in the [Citizen Code of Conduct](https://github.com/stumpsyn/policies/blob/master/citizen_code_of_conduct.md); if you have any lack of clarity about what might be included in that concept, please read their definition. In particular, we don’t tolerate behavior that excludes people in socially marginalized groups.
9 | - Private harassment is also unacceptable. No matter who you are, if you feel you have been or are being harassed or made uncomfortable by a community member, please contact one of the channel ops or an employee of Oven immediately. Whether you’re a regular contributor or a newcomer, we care about making this community a safe place for you and we’ve got your back.
10 | - Likewise any spamming, trolling, flaming, baiting or other attention-stealing behavior is not welcome.
11 |
12 | This code of conduct is adapted from the [Rust Code of Conduct](https://www.rust-lang.org/policies/code-of-conduct).
13 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing Guidelines
2 |
3 | Thank you for considering contributing to Linkspector! We welcome contributions from the community to help improve and grow this tool. Please take a moment to review these guidelines to ensure a smooth and collaborative contribution process.
4 |
5 | ## Table of Contents
6 |
7 | 1. [Getting Started](#getting-started)
8 | 2. [Code of Conduct](#code-of-conduct)
9 | 3. [How to Contribute](#how-to-contribute)
10 | - [Reporting Issues](#reporting-issues)
11 | - [Submitting Pull Requests](#submitting-pull-requests)
12 | 4. [Development Setup](#development-setup)
13 | 5. [Testing](#testing)
14 | 6. [Commit Messages](#commit-messages)
15 | 7. [License](#license)
16 |
17 | ## Getting Started
18 |
19 | Before you start contributing, please make sure you have:
20 |
21 | - Familiarized yourself with the project's goals and objectives.
22 | - Read and understood the project's license (See [License](#license)).
23 | - Set up a GitHub account if you don't already have one.
24 |
25 | ## Code of Conduct
26 |
27 | Please review and adhere to our [Code of Conduct](CODE_OF_CONDUCT.md) to ensure a respectful and inclusive environment for all contributors and users.
28 |
29 | ## How to Contribute
30 |
31 | ### Reporting Issues
32 |
33 | If you encounter a bug, have a feature request, or have any questions about the project, please open an issue on the [GitHub Issues](https://github.com/UmbrellaDocs/linkspector/issues) page. Be sure to provide as much detail as possible, including the steps to reproduce the issue and any relevant error messages or screenshots.
34 |
35 | ### Submitting Pull Requests
36 |
37 | We welcome contributions in the form of pull requests (PRs). To submit a PR, follow these steps:
38 |
39 | 1. Fork the repository to your own GitHub account.
40 | 2. Create a new branch from the `main` branch for your changes.
41 | 3. Make your changes and commit them with clear and concise messages (see [Commit Messages](#commit-messages)).
42 | 4. Push your branch to your forked repository.
43 | 5. Create a pull request from your branch to the `main` branch of this repository.
44 |
45 | Our maintainers will review your PR as soon as possible and provide feedback if needed. Once your PR is approved, it will be merged into the project.
46 |
47 | ## Development Setup
48 |
49 | To set up a development environment, follow the instructions in the [Development Setup](DEV_SETUP.md) document. This will guide you through the process of installing dependencies and configuring your development environment.
50 |
51 | ## Testing
52 |
53 | Before submitting a pull request, make sure to run the test suite to ensure that your changes do not introduce any regressions. To run the tests, use the following command:
54 |
55 | ```bash
56 | npm test
57 | ```
58 |
59 | ## Commit Messages
60 |
61 | Follow these guidelines for commit messages:
62 |
63 | - Use clear and concise messages that explain the purpose of the commit.
64 | - Start the message with a verb in the present tense (e.g., "Add feature," "Fix bug").
65 | - Reference related issues or pull requests by including their numbers (e.g., "Fixes #123," "Closes #456").
66 |
67 | ## License
68 |
69 | By contributing to this project, you agree that your contributions will be licensed under the project's [LICENSE](LICENSE).
70 |
--------------------------------------------------------------------------------
/DEV_SETUP.md:
--------------------------------------------------------------------------------
1 | # Development Setup
2 |
3 | To contribute to this project, you'll need to set up your development environment. Please follow the instructions below to ensure you have the necessary tools and dependencies installed.
4 |
5 | ## Prerequisites
6 |
7 | Before you begin, make sure you have the following prerequisites installed on your system:
8 |
9 | - **Node.js**: This project requires Node.js, a JavaScript runtime, to build and run. You can download and install Node.js from the official website: [Node.js Download](https://nodejs.org/).
10 |
11 | To check if Node.js is installed, open your terminal and run:
12 |
13 | ```bash
14 | node -v
15 | ```
16 |
17 | You should see the installed Node.js version.
18 |
19 | ## Installation
20 |
21 | After ensuring you have Node.js installed, follow these steps to set up your development environment:
22 |
23 | 1. **Clone the Repository**: Fork and clone this repository to your local machine:
24 |
25 | ```bash
26 | git clone git@github.com:UmbrellaDocs/linkspector.git
27 | ```
28 |
29 | 2. **Change Directory**: Navigate to the project directory:
30 |
31 | ```bash
32 | cd linkspector
33 | ```
34 |
35 | 3. **Install Dependencies**: Use `npm` to install project dependencies:
36 |
37 | ```bash
38 | npm install
39 | ```
40 |
41 | This command will download and install all the required packages specified in the `package.json` file.
42 |
43 | ## Contributing
44 |
45 | You are now set up to contribute to the project! Follow the [Contributing Guidelines](CONTRIBUTING.md) for information on reporting issues, submitting pull requests, and more.
46 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM node:lts-bookworm-slim
2 |
3 | # The base name of the npm package
4 | ARG LINKSPECTOR_NAME=@umbrelladocs/linkspector
5 | # Use the argument below to select version to install, e.g.:
6 | # docker build --build-arg LINKSPECTOR_VERSION=0.2.7 -t umbrelladocs/linkspector .
7 | ARG LINKSPECTOR_VERSION=latest
8 | # Use the argument below the specify full package name to install,
9 | # empty value installs current directory, e.g.:
10 | # docker build --build-arg LINKSPECTOR_PACKAGE= -t umbrelladocs/linkspector .
11 | ARG LINKSPECTOR_PACKAGE=${LINKSPECTOR_NAME}@${LINKSPECTOR_VERSION}
12 |
13 | # Set default user
14 | ENV USER=node
15 |
16 | # Set installation location for node packages
17 | ENV NPM_GLOBAL=/home/${USER}/.npm-global
18 | ENV PATH=${NPM_GLOBAL}/bin:$PATH
19 |
20 | # Install chromium instead of puppeteer chrome
21 | # as puppeteer does not provide arm64
22 | # https://github.com/puppeteer/puppeteer/issues/7740
23 | ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
24 | ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium.wrapper
25 |
26 | # Install linkspector dependencies
27 | RUN apt-get update \
28 | && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
29 | bash \
30 | ca-certificates \
31 | chromium \
32 | curl \
33 | git \
34 | upower \
35 | && apt-get clean && rm -rf /var/lib/apt/lists/*
36 |
37 | # Create app directory for mounting host files
38 | RUN mkdir /app && chown ${USER}:${USER} /app
39 |
40 | # chromium in order to start either needs dbus https://github.com/puppeteer/puppeteer/issues/11028
41 | # or skip dbus by using --remote-debugging-port=0 (any free port) https://github.com/nodejs/help/issues/3220#issuecomment-1228342313
42 | # Additionally, allow chromium to start without elevated capabilities needed to start the sandbox
43 | # See https://github.com/puppeteer/puppeteer/issues/5505
44 | RUN echo /usr/bin/chromium \
45 | --no-sandbox \
46 | --headless=new \
47 | --disable-gpu \
48 | --enable-chrome-browser-cloud-management \
49 | --remote-debugging-port=0 \
50 | > /usr/bin/chromium.wrapper
51 | RUN chmod ugo+x /usr/bin/chromium.wrapper
52 |
53 | # Install linkspector as node user
54 | USER ${USER}
55 | WORKDIR /home/${USER}
56 | RUN npm config set prefix ${NPM_GLOBAL}
57 | COPY --chown=${USER}:${USER} lib lib
58 | COPY --chown=${USER}:${USER} *.js *.json test ./
59 | # npm ci does not support --global
60 | # https://github.com/npm/cli/issues/7224
61 | RUN if test -z ${LINKSPECTOR_PACKAGE}; then npm ci; fi && npm install --global ${LINKSPECTOR_PACKAGE}
62 |
63 | WORKDIR /app
64 |
65 | # Run sanity checks
66 | RUN npm list --global
67 | RUN linkspector --version
68 | RUN linkspector check
69 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://github.com/marketplace/actions/run-linkspector-with-reviewdog)
2 | [](https://www.npmjs.com/package/@umbrelladocs/linkspector)
3 |
4 |
5 |
6 |
7 |
8 | Uncover broken links in your content.
9 | Linkspector
10 |
11 | Linkspector is a CLI app that checks for dead hyperlinks in files.
12 | It supports multiple markup languages such as Markdown, AsciiDoc (limited - hyperlinks only), and ReStructured Text (coming soon).
13 |
14 | With Linkspector, you can easily check all hyperlinks in your files, ensuring that they are not broken and that your readers can access all the relevant content.
15 | The app allows you to quickly and easily identify any broken links, so you can fix them before publishing your content.
16 |
17 | Linkspector is a powerful tool for anyone who creates content using markup languages.
18 |
19 | ## How this is different from existing tools?
20 |
21 | 1. **Enhanced Link Checking with Puppeteer**: It uses [Puppeteer](https://pptr.dev/) to check links in Chrome's headless mode, reducing the number of false positives.
22 | 2. **Addresses limitations and adds user-requested features**: It is built to adress the shortcomings in [GitHub Action - Markdown link check](https://github.com/gaurav-nelson/github-action-markdown-link-check) and adds many user requested features.
23 | 3. **Single repository for seamless collaboration**: All the code it needs to run is in a single repository, making it easier for community to collaborate.
24 | 4. **Focused for CI/CD use**: Linkspector ([action-linkspector](https://github.com/UmbrellaDocs/action-linkspector)) is purposefully tailored to run into your CI/CD pipelines. This ensures that link checking becomes an integral part of your development workflow.
25 |
26 | ## Installation
27 |
28 | Before you can use Linkspector, you need to install it. You can do this using the following command:
29 |
30 | ```bash
31 | npm install -g @umbrelladocs/linkspector
32 | ```
33 |
34 | This command installs Linkspector globally, allowing you to use it from anywhere in your terminal. If you don't want to install using `npm` you can download the binary from GitHub releases.
35 |
36 | ### GitHub action
37 |
38 | For more details, see [action-linkspector](https://github.com/UmbrellaDocs/action-linkspector)
39 |
40 | ## Checking Hyperlinks
41 |
42 | To check hyperlinks in your markup language files, follow these steps:
43 |
44 | 1. Open your terminal.
45 |
46 | 1. Navigate to the directory containing the files you want to check.
47 |
48 | 1. (**Optional**) Create a [configuration](#configuration) file called `.linkspector.yml`. By default, Linkspector looks for a configuration file named `.linkspector.yml` in the current directory. If you have a custom configuration file or want to specify its path, you can use the `-c` or `--config` option.
49 |
50 | 1. Use the `linkspector check` command to initiate the hyperlink check. For example:
51 |
52 | ```bash
53 | linkspector check
54 | ```
55 |
56 | - To specify a custom configuration file path:
57 |
58 | ```bash
59 | linkspector check -c /path/to/custom-config.yml
60 | ```
61 |
62 | - To output the results in JSON format:
63 |
64 | ```bash
65 | linkspector check -j
66 | ```
67 |
68 | The JSON output follows [rdjson](https://github.com/reviewdog/reviewdog/tree/master/proto/rdf#rdjson) format.
69 |
70 | 1. Linkspector starts checking the hyperlinks in your files based on the configuration provided in the configuration file or using the default configuration. It then displays the results in your terminal.
71 |
72 | 1. After the check is complete, Linkspector provides a summary of the results. If any dead links are found, they are listed in the terminal, along with their status codes and error messages.
73 |
74 | - To display statistics about the checked links, use the `-s` or `--showstat` option:
75 |
76 | ```bash
77 | linkspector check -s
78 | ```
79 |
80 | This command shows a summary table with the number of files checked, total links, hyperlinks, file and header links, and the count of correct and failed links.
81 | Note that this option cannot be used together with the JSON output option (`-j`).
82 |
83 | 1. If no dead links are found, Linkspector displays a success message, indicating that all links are working.
84 |
85 | ## Configuration
86 |
87 | Linkspector uses a configuration file named `.linkspector.yml` to customize its behavior. If this file is not found in the current directory when the program is run, Linkspector displays a message saying "Configuration file not found. Using default configuration." and uses a default configuration.
88 |
89 | ### Default Configuration
90 |
91 | The default configuration is as follows:
92 |
93 | ```yaml
94 | dirs:
95 | - .
96 | useGitIgnore: true
97 | ```
98 |
99 | If you are defining a custom configuration, you must include the `dirs` or `files` section in the configuration file.
100 |
101 | Following are the available configuration options:
102 |
103 | | Option | Description | Required |
104 | | ------------------------------------------------- | ----------------------------------------------------------------------------------------------------- | --------------------------------- |
105 | | [`files`](#files-to-check) | The list of Markdown files to check for broken links. | Yes, if `dirs` is not specified. |
106 | | [`dirs`](#directories-to-search) | The list of directories to search for Markdown files. | Yes, if `files` is not specified. |
107 | | [`excludedFiles`](#excluded-files) | The list of Markdown files to exclude from the link checking process. | No |
108 | | [`excludedDirs`](#excluded-directories) | The list of directories to exclude from the link checking process. | No |
109 | | [`baseUrl`](#base-url) | The base URL to use when checking relative links in Markdown files. | No |
110 | | [`ignorePatterns`](#ignore-patterns) | The list of regular expressions that match URLs to be ignored during link checking. | No |
111 | | [`replacementPatterns`](#replacement-patterns) | The list of regular expressions and replacement strings to modify URLs during link checking. | No |
112 | | [`aliveStatusCodes`](#alive-status-codes) | The list of HTTP status codes that are considered as "alive" links. | No |
113 | | [`useGitIgnore`](#use-gitignore) | Indicates whether to use the rules defined in the `.gitignore` file to exclude files and directories. | No |
114 | | [`modifiedFilesOnly`](#check-modified-files-only) | Indicates whether to check only the files that have been modified in the last git commit. | No |
115 | | [`httpHeaders`](#http-headers) | The list of URLs and their corresponding HTTP headers to be used during link checking. | No |
116 | | [`followRedirects`](#follow-redirects) | Controls how HTTP redirects (e.g., 301, 302) are handled. | No |
117 |
118 | ### Files to Check
119 |
120 | The `files` section specifies the Markdown files that Linkspector should check for broken links. You can add the file paths you want to include in this list. For example:
121 |
122 | ```yaml
123 | files:
124 | - README.md
125 | - file2.md
126 | - file3.md
127 | ```
128 |
129 | ### Directories to Search
130 |
131 | The `dirs` section lists the directories where Linkspector should search for Markdown files. You can specify directories relative to the current working directory. For example:
132 |
133 | ```yaml
134 | dirs:
135 | - ./
136 | - folder2
137 | ```
138 |
139 | ### Excluded Files
140 |
141 | The `excludedFiles` section allows you to specify Markdown files that should be excluded from the link checking process. Add the paths of the files you want to exclude. For example:
142 |
143 | ```yaml
144 | excludedFiles:
145 | - ./check.md
146 | - excluded-file2.md
147 | ```
148 |
149 | ### Excluded Directories
150 |
151 | The `excludedDirs` section lets you specify directories that should be excluded from the link checking process. Provide the paths of the directories you want to exclude. For example:
152 |
153 | ```yaml
154 | excludedDirs:
155 | - ./lib
156 | - excluded-folder2
157 | ```
158 |
159 | ### Base URL
160 |
161 | The `baseUrl` option sets the base URL that will be used when checking relative links in Markdown files. In this example:
162 |
163 | ```yaml
164 | baseUrl: https://example.com
165 | ```
166 |
167 | The base URL is set to `https://example.com`.
168 |
169 | ### Ignore Patterns
170 |
171 | The `ignorePatterns` section allows you to define regular expressions that match URLs to be ignored during the link checking process. For example:
172 |
173 | ```yaml
174 | ignorePatterns:
175 | - pattern: '^https://example.com/skip/.*$'
176 | - pattern: "^(ftp)://[^\\s/$?#]*\\.[^\\s]*$"
177 | ```
178 |
179 | In this example, URLs matching the specified patterns will be skipped during link checking.
180 |
181 | ### Replacement Patterns
182 |
183 | The `replacementPatterns` section lets you define regular expressions and replacement strings to modify URLs during link checking. For example:
184 |
185 | ```yaml
186 | replacementPatterns:
187 | - pattern: "(https?://example.com)/(\\w+)/(\\d+)"
188 | replacement: '$1/id/$3'
189 | - pattern: "\\[([^\\]]+)\\]\\((https?://example.com)/file\\)"
190 | replacement: '$1'
191 | ```
192 |
193 | These patterns and replacements will be applied to URLs found in the Markdown files.
194 |
195 | ### Alive Status Codes
196 |
197 | The `aliveStatusCodes` section allows you to specify a list of HTTP status codes that are considered as "alive" links. In this example:
198 |
199 | ```yaml
200 | aliveStatusCodes:
201 | - 200
202 | - 201
203 | - 204
204 | ```
205 |
206 | Links returning any of these status codes will be considered valid.
207 |
208 | ### Use .gitignore
209 |
210 | The `useGitIgnore` option, when set to `true`, indicates that Linkspector should use the rules defined in the `.gitignore` file to exclude files and directories. For example:
211 |
212 | ```yaml
213 | useGitIgnore: true
214 | ```
215 |
216 | When enabled, the app will respect the `.gitignore` rules during link checking.
217 |
218 | ### Check Modified Files Only
219 |
220 | The `modifiedFilesOnly` option, when set to `true`, indicates that Linkspector should only check the files that have been modified in the last git commit. For example:
221 |
222 | ```yaml
223 | modifiedFilesOnly: true
224 | ```
225 |
226 | When enabled, Linkspector will use `git` to find the list of modified files and only check those files. Please note that this option requires `git` to be installed and available on your system path. If `git` is not installed or not found in the system path, Linkspector will throw an error.
227 |
228 | Also, if no modified files are found in the list of files to check, Linkspector will skip link checking and exit with a message indicating that no modified files have been edited so it will skip checking.
229 |
230 | ### HTTP headers
231 |
232 | The `httpHeaders` option allows you to specify HTTP headers for specific URLs that require authorization. You can use environment variables for secure values.
233 |
234 | 1. Create a `.env` file in the root directory of your project and add the environment variables. For example:
235 |
236 | ```env
237 | AUTH_TOKEN=abcdef123456
238 | ```
239 |
240 | 1. Add the `httpHeaders` section to the configuration file and specify the URLs and headers. For example:
241 |
242 | ```yaml
243 | httpHeaders:
244 | - url:
245 | - https://example1.com
246 | headers:
247 | Foo: Bar
248 | - url:
249 | - https://example2.com
250 | headers:
251 | Authorization: ${AUTH_TOKEN}
252 | Foo: Bar
253 | ```
254 |
255 | ### Follow Redirects
256 |
257 | The `followRedirects` option controls how Linkspector handles HTTP redirects (e.g., status codes 301, 302).
258 |
259 | - **Type:** `boolean`
260 | - **Default:** `true`
261 |
262 | **Behavior:**
263 |
264 | - When `followRedirects: true` (default):
265 | Linkspector will follow HTTP redirects to their final destination. The status of the link will be determined by the status code of this final destination. For example, if `http://example.com/old` redirects to `http://example.com/new` and `/new` returns a 200 OK, the original link `/old` will be reported as 'alive' (200), with a message indicating it was redirected.
266 |
267 | - When `followRedirects: false`:
268 | Linkspector will _not_ follow HTTP redirects. If a link returns a redirect status code (e.g., 301, 302, 307, 308), it will be reported as an 'error'. The reported status code will be the original redirect status code (e.g., 301), and the error message will indicate that the link redirected but `followRedirects` was set to `false`.
269 |
270 | **Example:**
271 |
272 | To disable following redirects:
273 |
274 | ```yaml
275 | followRedirects: false
276 | ```
277 |
278 | ### Sample configuration
279 |
280 | ```yml
281 | files:
282 | - README.md
283 | - file2.md
284 | - file3.md
285 | dirs:
286 | - ./
287 | - folder2
288 | excludedFiles:
289 | - ./check.md
290 | - excluded-file2.md
291 | excludedDirs:
292 | - ./lib
293 | - excluded-folder2
294 | baseUrl: https://example.com
295 | ignorePatterns:
296 | - pattern: '^https://example.com/skip/.*$'
297 | - pattern: "^(ftp)://[^\\s/$?#]*\\.[^\\s]*$"
298 | replacementPatterns:
299 | - pattern: "(https?://example.com)/(\\w+)/(\\d+)"
300 | replacement: '$1/id/$3'
301 | - pattern: "\\[([^\\]]+)\\]\\((https?://example.com)/file\\)"
302 | replacement: '$1'
303 | httpHeaders:
304 | - url:
305 | - https://example1.com
306 | headers:
307 | Authorization: Basic Zm9vOmJhcg==
308 | Foo: Bar
309 | aliveStatusCodes:
310 | - 200
311 | - 201
312 | - 204
313 | useGitIgnore: true
314 | followRedirects: false # Example of including it in a full config
315 | ```
316 |
317 | ## Sample output
318 |
319 | If there are failed links, linkspector shows the output as comma-seprated values and exit with error.
320 | `File, HTTP status code, Line number, Error message`
321 |
322 | ```
323 | REDISTRIBUTED.md, https://unlicense.org/, null, 186, net::ERR_SSL_VERSION_OR_CIPHER_MISMATCH at https://unlicense.org/]
324 | 💥 Error: Some hyperlinks in the specified files are invalid.
325 | ```
326 |
327 | If there are no errors, linkspector shows the following message:
328 |
329 | ```
330 | ✨ Success: All hyperlinks in the specified files are valid.
331 | ```
332 |
333 | ## Using Linkspector with Docker
334 |
335 | To use Linkspector with Docker, follow these steps:
336 |
337 | 1. Clone the Linkspector repository to your local machine and switch to the cloned directory:
338 | ```bash
339 | git clone git@github.com:UmbrellaDocs/linkspector.git
340 | cd linkspector
341 | ```
342 | 1. Build the docker image locally, while being at the root (`.`) of this project:
343 |
344 | ```bash
345 | docker build --no-cache --pull --build-arg LINKSPECTOR_PACKAGE= -t umbrelladocs/linkspector .
346 | ```
347 |
348 | 1. To perform a check using the default configuration, while being at the root (`$PWD`) of the project to be checked:
349 |
350 | ```bash
351 | docker run --rm -it -v $PWD:/app \
352 | --name linkspector umbrelladocs/linkspector \
353 | bash -c 'linkspector check'
354 | ```
355 |
356 | To specify a custom configuration file path:
357 |
358 | ```bash
359 | docker run --rm -it -v $PWD:/app -v $PWD/custom-config.yml:/path/to/custom-config.yml \
360 | --name linkspector umbrelladocs/linkspector \
361 | bash -c 'linkspector check -c /path/to/custom-config.yml'
362 | ```
363 |
364 | ## Contributing
365 |
366 | If you would like to contribute to Linkspector, please read the [contributing guidelines](/CONTRIBUTING.md).
367 |
--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
1 | # Security Policy
2 |
3 | We aim to update the supported versions with patches for security vulnerabilities as soon as possible after they are disclosed.
4 | We recommend keeping your installation of Linkspector up to date, as we only support the latest release version with security updates.
5 |
6 | | Version | Supported |
7 | | ------- | ------------------ |
8 | | latest | :white_check_mark: |
9 |
10 | ## Reporting a Vulnerability
11 |
12 | Please report (suspected) security vulnerabilities to **mmfjpjyy@duck.com**.
13 | You will receive a response from us within 48 hours. If the issue is confirmed, we will release a patch as soon as possible depending on the complexity of the issue.
14 |
15 | **Please do not report security vulnerabilities through public GitHub issues.**
16 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 |
3 | import { program } from 'commander'
4 | import kleur from 'kleur'
5 | import ora from 'ora'
6 | import { linkspector } from './linkspector.js'
7 | import { createRequire } from 'module'
8 | const require = createRequire(import.meta.url)
9 | const pkg = require('./package.json')
10 |
11 | program
12 | .version(pkg.version)
13 | .description('🔍 Uncover broken links in your content.')
14 | .command('check')
15 | .description('Check hyperlinks based on the configuration file.')
16 | .option('-c, --config ', 'Specify a custom configuration file path')
17 | .option('-j, --json', 'Output the results in JSON format')
18 | .option('-s, --showstat', 'Display statistics about the links checked')
19 | .action(async (cmd) => {
20 | // Validate that -j and -s options are not used together
21 | if (cmd.json && cmd.showstat) {
22 | console.error(
23 | kleur.red(
24 | 'Error: The --json and --showstat options cannot be used together.'
25 | )
26 | )
27 | process.exit(1)
28 | }
29 |
30 | const configFile = cmd.config || '.linkspector.yml' // Use custom config file path if provided
31 |
32 | let currentFile = '' // Variable to store the current file name
33 | let results = [] // Array to store the results if json is true
34 |
35 | // Initialize statistics counters
36 | let stats = {
37 | filesChecked: 0,
38 | totalLinks: 0,
39 | httpLinks: 0,
40 | fileLinks: 0,
41 | emailLinks: 0,
42 | correctLinks: 0,
43 | failedLinks: 0,
44 | }
45 |
46 | const spinner = cmd.json ? null : ora().start()
47 |
48 | try {
49 | let hasErrorLinks = false
50 | // Initialize the results object
51 | let results = {
52 | source: {
53 | name: 'linkspector',
54 | url: 'https://github.com/UmbrellaDocs/linkspector',
55 | },
56 | severity: 'ERROR',
57 | diagnostics: [],
58 | }
59 |
60 | for await (const { file, result } of linkspector(configFile, cmd)) {
61 | // Update the current file name
62 | currentFile = file
63 | if (!cmd.json) {
64 | spinner.text = `Checking ${currentFile}...`
65 | }
66 |
67 | // Increment file count for statistics
68 | stats.filesChecked++
69 |
70 | for (const linkStatusObj of result) {
71 | // Count total links
72 | stats.totalLinks++
73 |
74 | // Count links by type
75 | if (linkStatusObj.link && linkStatusObj.link.match(/^https?:\/\//)) {
76 | stats.httpLinks++
77 | } else if (
78 | linkStatusObj.link &&
79 | linkStatusObj.link.startsWith('mailto:')
80 | ) {
81 | stats.emailLinks++
82 | } else if (
83 | linkStatusObj.link &&
84 | (linkStatusObj.link.startsWith('#') ||
85 | linkStatusObj.link.includes('.md') ||
86 | linkStatusObj.link.includes('#'))
87 | ) {
88 | stats.fileLinks++
89 | } else if (linkStatusObj.link) {
90 | // Count any remaining links as file links
91 | stats.fileLinks++
92 | }
93 |
94 | // Count correct vs failed links - Updated to handle skipped links
95 | if (linkStatusObj.status === 'error') {
96 | stats.failedLinks++
97 | if (cmd.json) {
98 | results.diagnostics.push({
99 | message: `Cannot reach ${linkStatusObj.link} Status: ${linkStatusObj.status_code}${linkStatusObj.error_message ? ` ${linkStatusObj.error_message}` : ''}`,
100 | location: {
101 | path: currentFile,
102 | range: {
103 | start: {
104 | line: linkStatusObj.line_number,
105 | column: linkStatusObj.position.start.column,
106 | },
107 | end: {
108 | line: linkStatusObj.position.end.line,
109 | column: linkStatusObj.position.end.column,
110 | },
111 | },
112 | },
113 | severity: linkStatusObj.status.toUpperCase(),
114 | })
115 | } else {
116 | // If json is false, print the results in the console
117 | spinner.stop()
118 | console.log(
119 | kleur.red(
120 | `${currentFile}:${linkStatusObj.line_number}:${linkStatusObj.position.start.column}: 🚫 ${linkStatusObj.link} Status:${linkStatusObj.status_code}${linkStatusObj.error_message ? ` ${linkStatusObj.error_message}` : ' Cannot reach link'}`
121 | )
122 | )
123 | spinner.start(`Checking ${currentFile}...`)
124 | }
125 | hasErrorLinks = true
126 | } else if (
127 | linkStatusObj.status === 'alive' ||
128 | linkStatusObj.status === 'assumed alive'
129 | ) {
130 | stats.correctLinks++
131 | } else if (linkStatusObj.status === 'skipped') {
132 | // Skipped links don't count towards failed links
133 | } else {
134 | // Count other status as failed
135 | stats.failedLinks++
136 | }
137 | }
138 | }
139 |
140 | if (cmd.json) {
141 | // If there are no links with a status of "error", print a blank object
142 | if (results.diagnostics.length === 0) {
143 | console.log('{}')
144 | } else {
145 | console.log(JSON.stringify(results, null, 2))
146 | }
147 | }
148 |
149 | // Display statistics if --showstat option is used
150 | if (cmd.showstat) {
151 | spinner.stop()
152 | console.log('\n' + kleur.bold('💀📊 Linkspector check stats'))
153 | console.log('┌───────────────────────────────┬────────┐')
154 | console.log(
155 | `│ 🟰 ${kleur.bold('Total files checked')} │ ${kleur.cyan(padNumber(stats.filesChecked))} │`
156 | )
157 | console.log('├───────────────────────────────┼────────┤')
158 | console.log(
159 | `│ 🔗 ${kleur.bold('Total links checked')} │ ${kleur.cyan(padNumber(stats.totalLinks))} │`
160 | )
161 | console.log('├───────────────────────────────┼────────┤')
162 | console.log(
163 | `│ 🌐 ${kleur.bold('Hyperlinks')} │ ${kleur.cyan(padNumber(stats.httpLinks))} │`
164 | )
165 | console.log('├───────────────────────────────┼────────┤')
166 | console.log(
167 | `│ 📁 ${kleur.bold('File and header links')} │ ${kleur.cyan(padNumber(stats.fileLinks))} │`
168 | )
169 | console.log('├───────────────────────────────┼────────┤')
170 | console.log(
171 | `│ ✉️ ${kleur.bold('Email links (Skipped)')} │ ${kleur.cyan(padNumber(stats.emailLinks))} │`
172 | )
173 | console.log('├───────────────────────────────┼────────┤')
174 | console.log(
175 | `│ ✅ ${kleur.bold('Working links')} │ ${kleur.green(padNumber(stats.correctLinks))} │`
176 | )
177 | console.log('├───────────────────────────────┼────────┤')
178 | console.log(
179 | `│ 🚫 ${kleur.bold('Failed links')} │ ${kleur.red(padNumber(stats.failedLinks))} │`
180 | )
181 | console.log('└───────────────────────────────┴────────┘')
182 | console.log('')
183 | }
184 |
185 | if (!hasErrorLinks) {
186 | if (!cmd.json && !cmd.showstat) {
187 | spinner.stop()
188 | console.log(
189 | kleur.green(
190 | '✨ Success: All hyperlinks in the specified files are valid.'
191 | )
192 | )
193 | }
194 | process.exit(0)
195 | } else {
196 | if (!cmd.json && !cmd.showstat) {
197 | spinner.stop()
198 | console.error(
199 | kleur.red(
200 | '💥 Error: Some hyperlinks in the specified files are invalid.'
201 | )
202 | )
203 | } else if (cmd.showstat) {
204 | console.error(
205 | kleur.red(
206 | '💥 Error: Some hyperlinks in the specified files are invalid.'
207 | )
208 | )
209 | }
210 | process.exit(1)
211 | }
212 | } catch (error) {
213 | if (spinner) spinner.stop()
214 | console.error(kleur.red(`💥 Main error: ${error.message}`))
215 | process.exit(1)
216 | }
217 |
218 | // Helper function to pad numbers for consistent table formatting
219 | function padNumber(num) {
220 | return num.toString().padStart(6, ' ')
221 | }
222 | })
223 |
224 | // Parse the command line arguments
225 | program.parse(process.argv)
226 |
--------------------------------------------------------------------------------
/index.test.js:
--------------------------------------------------------------------------------
1 | import { expect, test } from 'vitest'
2 | import { linkspector } from './linkspector.js'
3 |
4 | let cmd = {
5 | json: true,
6 | }
7 |
8 | test('linkspector should check top-level relative links in Markdown file', async () => {
9 | let hasErrorLinks = false
10 | let currentFile = '' // Variable to store the current file name
11 | let results = [] // Array to store the results if json is true
12 |
13 | for await (const { file, result } of linkspector(
14 | './.linkspector.test.yml',
15 | cmd
16 | )) {
17 | currentFile = file
18 | for (const linkStatusObj of result) {
19 | if (cmd.json) {
20 | results.push({
21 | file: currentFile,
22 | link: linkStatusObj.link,
23 | status_code: linkStatusObj.status_code,
24 | line_number: linkStatusObj.line_number,
25 | position: linkStatusObj.position,
26 | status: linkStatusObj.status,
27 | error_message: linkStatusObj.error_message,
28 | })
29 | }
30 | if (linkStatusObj.status === 'error') {
31 | hasErrorLinks = true
32 | }
33 | }
34 | }
35 |
36 | expect(hasErrorLinks).toBe(false)
37 | expect(results.length).toBe(23)
38 | })
39 |
40 | test('linkspector should track statistics correctly when stats option is enabled', async () => {
41 | let cmd = {
42 | showstat: true,
43 | }
44 |
45 | // Initialize statistics counters
46 | let stats = {
47 | filesChecked: 0,
48 | totalLinks: 0,
49 | httpLinks: 0,
50 | fileLinks: 0,
51 | correctLinks: 0,
52 | failedLinks: 0,
53 | }
54 |
55 | for await (const { file, result } of linkspector(
56 | './.linkspector.test.yml',
57 | cmd
58 | )) {
59 | // Increment file count for statistics
60 | stats.filesChecked++
61 |
62 | for (const linkStatusObj of result) {
63 | // Count total links
64 | stats.totalLinks++
65 |
66 | // Count HTTP vs File links
67 | if (linkStatusObj.link.match(/^https?:\/\//)) {
68 | stats.httpLinks++
69 | } else if (
70 | !linkStatusObj.link.startsWith('#') &&
71 | !linkStatusObj.link.startsWith('mailto:')
72 | ) {
73 | stats.fileLinks++
74 | }
75 |
76 | // Count correct vs failed links
77 | if (linkStatusObj.status === 'error') {
78 | stats.failedLinks++
79 | } else if (
80 | linkStatusObj.status === 'alive' ||
81 | linkStatusObj.status === 'assumed alive'
82 | ) {
83 | stats.correctLinks++
84 | }
85 | }
86 | }
87 |
88 | // Verify statistics are being tracked correctly
89 | expect(stats.filesChecked).toBeGreaterThan(0)
90 | expect(stats.totalLinks).toBe(23)
91 | expect(stats.totalLinks).toBe(
92 | stats.httpLinks +
93 | stats.fileLinks +
94 | (stats.totalLinks - stats.httpLinks - stats.fileLinks)
95 | )
96 | expect(stats.totalLinks).toBe(stats.correctLinks + stats.failedLinks)
97 | expect(stats.correctLinks).toBeGreaterThanOrEqual(0)
98 | expect(stats.failedLinks).toBe(0)
99 | })
100 |
--------------------------------------------------------------------------------
/lib/batch-check-links.js:
--------------------------------------------------------------------------------
1 | import puppeteer from 'puppeteer'
2 | import url from 'url'
3 | import { checkFileExistence } from './check-file-links.js'
4 |
5 | function isUrl(s) {
6 | try {
7 | new url.URL(s)
8 | return true
9 | } catch (err) {
10 | return false
11 | }
12 | }
13 |
14 | function createLinkStatus(link, status, statusCode, errorMessage = null) {
15 | return {
16 | link: link.url,
17 | status,
18 | status_code: statusCode,
19 | line_number: link.position ? link.position.start.line : null,
20 | position: link.position,
21 | error_message: errorMessage,
22 | }
23 | }
24 |
25 | async function processLink(
26 | link,
27 | page,
28 | aliveStatusCodes,
29 | httpHeaders,
30 | followRedirects
31 | ) {
32 | let status = null
33 | let statusCode = null
34 | let errorMessage = null
35 |
36 | try {
37 | if (isUrl(link.url)) {
38 | const headers =
39 | httpHeaders.find((header) =>
40 | header.url.some((urlPattern) => link.url.includes(urlPattern))
41 | )?.headers || {}
42 |
43 | const response = await page.goto(link.url, {
44 | waitUntil: 'load', // Puppeteer follows redirects by default.
45 | headers,
46 | })
47 | statusCode = response.status()
48 | const redirectChain = response.request().redirectChain()
49 |
50 | if (!followRedirects && redirectChain.length > 0) {
51 | // If followRedirects is false and there was a redirect
52 | status = 'error'
53 | const originalStatusCode = redirectChain[0].response().status()
54 | errorMessage = `Link redirected (from ${redirectChain[0].url()} status: ${originalStatusCode} to ${response.url()}), but followRedirects is set to false.`
55 | // We might want to use the original redirect status code if available and makes sense
56 | statusCode = originalStatusCode !== 0 ? originalStatusCode : statusCode
57 | } else if (aliveStatusCodes && aliveStatusCodes.includes(statusCode)) {
58 | status = 'assumed alive'
59 | } else {
60 | status = response.ok() ? 'alive' : 'error'
61 | }
62 | }
63 | } catch (error) {
64 | status = 'error'
65 | errorMessage = error.message
66 | }
67 |
68 | return createLinkStatus(link, status, statusCode, errorMessage)
69 | }
70 |
71 | async function checkHyperlinks(nodes, options = {}, filePath) {
72 | const {
73 | batchSize = 100,
74 | retryCount = 3,
75 | aliveStatusCodes,
76 | httpHeaders = [],
77 | followRedirects = true, // Default to true if not provided
78 | } = options
79 | const linkStatusList = []
80 | const tempArray = []
81 |
82 | const filteredNodes = nodes.filter(
83 | (node) =>
84 | node.type === 'link' ||
85 | node.type === 'definition' ||
86 | node.type === 'image'
87 | )
88 |
89 | // First pass to check the links with default fetch
90 | for (let link of filteredNodes) {
91 | try {
92 | if (isUrl(link.url)) {
93 | const fetchOptions = {
94 | method: 'HEAD',
95 | redirect: followRedirects ? 'follow' : 'manual',
96 | }
97 | const response = await fetch(link.url, fetchOptions)
98 | const statusCode = response.status
99 | let message = null
100 |
101 | // Handle manual redirect: if followRedirects is false and a redirect occurs
102 | if (
103 | !followRedirects &&
104 | (response.type === 'opaqueredirect' ||
105 | [301, 302, 307, 308].includes(statusCode))
106 | ) {
107 | const redirectedTo = response.headers.get('location')
108 | const errorMessage = `Link redirected${redirectedTo ? ' to ' + redirectedTo : ''}, but followRedirects is set to false.`
109 | const linkStatus = createLinkStatus(
110 | link,
111 | 'error',
112 | statusCode === 0 && response.type === 'opaqueredirect'
113 | ? 302
114 | : statusCode, // Use 302 for opaque, else actual
115 | errorMessage
116 | )
117 | linkStatusList.push(linkStatus)
118 | continue
119 | }
120 |
121 | if (response.ok) {
122 | message = response.redirected ? `redirected to ${response.url}` : null
123 | const linkStatus = createLinkStatus(
124 | link,
125 | 'alive',
126 | statusCode,
127 | message
128 | )
129 | linkStatusList.push(linkStatus)
130 | continue
131 | } else if (aliveStatusCodes && aliveStatusCodes.includes(statusCode)) {
132 | const linkStatus = createLinkStatus(link, 'assumed alive', statusCode)
133 | linkStatusList.push(linkStatus)
134 | continue
135 | } else {
136 | // If not ok, and not an explicit redirect handled above, or not in aliveStatusCodes
137 | tempArray.push(link)
138 | }
139 | } else {
140 | const fileStatus = checkFileExistence(link, filePath)
141 | const linkStatus = createLinkStatus(
142 | link,
143 | fileStatus.status,
144 | fileStatus.statusCode,
145 | fileStatus.errorMessage
146 | )
147 | linkStatusList.push(linkStatus)
148 | }
149 | } catch (error) {
150 | if (isUrl(link.url)) {
151 | tempArray.push(link)
152 | } else {
153 | const fileStatus = checkFileExistence(link, filePath)
154 | const linkStatus = createLinkStatus(
155 | link,
156 | fileStatus.status,
157 | fileStatus.statusCode,
158 | fileStatus.errorMessage
159 | )
160 | linkStatusList.push(linkStatus)
161 | }
162 | }
163 | }
164 |
165 | // Second pass to check the failed links with puppeteer
166 | if (tempArray.length > 0) {
167 | const browser = await puppeteer.launch({
168 | headless: 'new',
169 | args: ['--disable-features=DialMediaRouteProvider'],
170 | })
171 | for (let i = 0; i < tempArray.length; i += batchSize) {
172 | const batch = tempArray.slice(i, i + batchSize)
173 | const promises = batch.map(async (link) => {
174 | const page = await browser.newPage()
175 | await page.setUserAgent(
176 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36'
177 | )
178 |
179 | await page.setRequestInterception(true)
180 | page.on('request', (request) => {
181 | if (request.isInterceptResolutionHandled()) return
182 | const resourceType = request.resourceType()
183 | if (
184 | resourceType === 'font' ||
185 | resourceType === 'image' ||
186 | resourceType === 'media' ||
187 | resourceType === 'script' ||
188 | resourceType === 'stylesheet' ||
189 | resourceType === 'other' ||
190 | resourceType === 'websocket'
191 | ) {
192 | request.abort()
193 | } else {
194 | request.continue()
195 | }
196 | })
197 |
198 | let retryCountLocal = 0
199 | let linkStatus
200 |
201 | while (retryCountLocal < retryCount) {
202 | try {
203 | linkStatus = await processLink(
204 | link,
205 | page,
206 | aliveStatusCodes,
207 | httpHeaders,
208 | followRedirects // Pass followRedirects here
209 | )
210 | break
211 | } catch (error) {
212 | retryCountLocal++
213 | }
214 | }
215 |
216 | await page.close()
217 | linkStatusList.push(linkStatus)
218 | })
219 |
220 | await Promise.all(promises)
221 | }
222 | await browser.close()
223 | }
224 | return linkStatusList
225 | }
226 |
227 | export { checkHyperlinks }
228 |
--------------------------------------------------------------------------------
/lib/check-file-links.js:
--------------------------------------------------------------------------------
1 | import fs from 'fs'
2 | import path from 'path'
3 | import { unified } from 'unified'
4 | import remarkParse from 'remark-parse'
5 | import remarkGfm from 'remark-gfm'
6 | import { visit } from 'unist-util-visit'
7 | import GithubSlugger from 'github-slugger'
8 |
9 | const fileCache = {}
10 |
11 | /**
12 | * Checks if a file and a section within the file exist.
13 | *
14 | * @param {Object} link - The link object.
15 | * @param {string} file - The current file path.
16 | * @returns {Object} An object containing the status code, status message, and error message (if any).
17 | */
18 |
19 | function checkFileExistence(link, file) {
20 | // Initialize status code, status message, and error message
21 | let statusCode = '200'
22 | let status = 'alive'
23 | let errorMessage = ''
24 |
25 | try {
26 | let slugger = new GithubSlugger()
27 | // Split the URL into the file part and the section part
28 | const [urlWithoutSection = '', sectionId = null] = link.url.split('#')
29 |
30 | // Determine the file path
31 | const filePath = urlWithoutSection.startsWith('/')
32 | ? path.join(process.cwd(), urlWithoutSection)
33 | : urlWithoutSection === '' || urlWithoutSection === path.basename(file)
34 | ? file
35 | : path.resolve(path.dirname(file), urlWithoutSection)
36 |
37 | // Check if the file exists
38 | if (!fs.existsSync(filePath)) {
39 | statusCode = '404'
40 | status = 'error'
41 | errorMessage = `Cannot find: ${link.url}`
42 | } else if (sectionId) {
43 | // If the file exists and there's a section part in the URL, check if the section exists
44 | const mdContent = fs.readFileSync(filePath, 'utf8')
45 |
46 | // Check if the section ID is a line reference (e.g., L20 or L23-L50)
47 | const lineReferenceMatch = sectionId.match(/^L(\d+)(?:-L(\d+))?$/)
48 |
49 | if (lineReferenceMatch) {
50 | // Count the total number of lines in the file
51 | const totalLineCount = mdContent.split('\n').length
52 |
53 | // Extract line numbers from the reference
54 | const startLine = parseInt(lineReferenceMatch[1], 10)
55 | const endLine = lineReferenceMatch[2]
56 | ? parseInt(lineReferenceMatch[2], 10)
57 | : startLine
58 |
59 | // Check if the referenced line(s) are within the file's line count
60 | if (endLine > totalLineCount) {
61 | statusCode = '404'
62 | status = 'error'
63 | errorMessage = `Cannot find Line ${endLine} in file: ${filePath}. File has ${totalLineCount} lines.`
64 | }
65 | } else {
66 | // Use the cache if the file has been parsed before
67 | let tree = fileCache[filePath]
68 | if (!tree) {
69 | tree = unified().use(remarkParse).use(remarkGfm).parse(mdContent)
70 | fileCache[filePath] = tree // Store the parsed file in the cache
71 | }
72 | // Collect all heading IDs in the file
73 | // Use GitHub slugger to generate the heading slug for comparison
74 | const headingNodes = new Set()
75 | visit(tree, ['heading', 'html'], (node) => {
76 | if (node.type === 'heading') {
77 | const headingText = getText(node)
78 | const headingId =
79 | node.children[0].type === 'html'
80 | ? node.children[0].value.match(/name="(.+?)"/)?.[1]
81 | : node.children[0] &&
82 | node.children[0].value &&
83 | node.children[0].value.includes('{#')
84 | ? node.children[0].value.match(/{#(.+?)}/)?.[1]
85 | : slugger.slug(headingText)
86 | headingNodes.add(headingId)
87 | } else if (node.type === 'html') {
88 | // Match both name and id attributes in HTML anchors
89 | const anchorNameMatch = node.value.match(
90 | //
91 | )
92 | if (anchorNameMatch) {
93 | const anchorName = anchorNameMatch[2]
94 | headingNodes.add(anchorName)
95 | }
96 | }
97 | })
98 |
99 | // Decode the section ID from the URL
100 | const decodedSectionId = decodeURIComponent(sectionId)
101 |
102 | // Check if the section exists
103 | if (!headingNodes.has(decodedSectionId)) {
104 | statusCode = '404'
105 | status = 'error'
106 | errorMessage = `Cannot find section: #${sectionId} in file: ${filePath}.`
107 | }
108 | }
109 | }
110 | } catch (err) {
111 | console.error(`Error in checking if file ${link.url} exist! ${err}`)
112 | }
113 |
114 | // Return the status code, status message, and error message
115 | return { statusCode, status, errorMessage }
116 | }
117 |
118 | function getText(node) {
119 | /**
120 | * Get the text content of a node.
121 | * @param {Object} node - The node object.
122 | * @returns {string} The text content of the node.
123 | */
124 | if (
125 | node.type === 'text' ||
126 | node.type === 'inlineCode' ||
127 | node.type === 'image'
128 | ) {
129 | return node.type === 'image' ? node.alt : node.value
130 | }
131 |
132 | if (Array.isArray(node.children)) {
133 | return node.children.map(getText).join('')
134 | }
135 |
136 | return ''
137 | }
138 |
139 | export { checkFileExistence }
140 |
--------------------------------------------------------------------------------
/lib/extract-asciidoc-links.js:
--------------------------------------------------------------------------------
1 | import fs from 'fs'
2 | import readline from 'readline'
3 | import { doReplacements } from './handle-links-modification.js'
4 |
5 | function extractAsciiDocLinks(filePath, options) {
6 | return new Promise((resolve) => {
7 | const links = []
8 | const internalRefs = new Map()
9 | const externalRefs = new Map()
10 | const externalURLs = new Map()
11 |
12 | let insideCommentBlock = false
13 |
14 | const rl = readline.createInterface({
15 | input: fs.createReadStream(filePath),
16 | crlfDelay: Infinity,
17 | })
18 |
19 | let lineNumber = 0
20 |
21 | const urlRegex =
22 | /(?:https?|ftp|irc|mailto):\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,4}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g
23 |
24 | rl.on('line', (line) => {
25 | lineNumber++
26 | // Ignore comment blocks
27 | if (line.startsWith('////')) {
28 | insideCommentBlock = !insideCommentBlock
29 | }
30 | if (insideCommentBlock) {
31 | return
32 | }
33 | // Ignore single-line comments
34 | if (line.startsWith('//')) {
35 | return
36 | }
37 | // Extract external hyperlinks
38 | let match
39 | while ((match = urlRegex.exec(line)) !== null) {
40 | const url = match[0].replace(/^link:/, '') // Remove 'link:' prefix if present
41 | const position = {
42 | start: {
43 | line: lineNumber,
44 | column: match.index,
45 | offset: match.index,
46 | },
47 | end: {
48 | line: lineNumber,
49 | column: match.index + match[0].length,
50 | offset: match.index + match[0].length,
51 | },
52 | }
53 | const linkNode = {
54 | type: 'link',
55 | title: null,
56 | url,
57 | children: [],
58 | position,
59 | }
60 | const existingLink = links.find(
61 | (link) =>
62 | link.url === linkNode.url &&
63 | link.position.start.line === linkNode.position.start.line &&
64 | link.position.start.column === linkNode.position.start.column
65 | )
66 | if (!existingLink) {
67 | links.push(linkNode) // Add link to the array only if it's not already there
68 | }
69 | continue
70 | }
71 | // Extract internal and external references
72 | if (line.match(/\[\[[^\]]+\]\]/g)) {
73 | let extractLink = line.match(/\[\[[^\]]+\]\]/g)
74 | for (let i = 0; i < extractLink.length; i++) {
75 | let newAnchor = extractLink[i]
76 | newAnchor = newAnchor.replace('[[', '')
77 | newAnchor = newAnchor.replace(']]', '')
78 | newAnchor = newAnchor.replace(/,.*/g, '') // take into account ','
79 | const matchIndex = line.indexOf(extractLink[i]) // Get the index of the match
80 | const startColumn = matchIndex + 2 // Add 2 to account for the [[ characters
81 | const endColumn = startColumn + newAnchor.length
82 | const startPosition = {
83 | line: lineNumber,
84 | column: startColumn,
85 | offset: matchIndex,
86 | }
87 | const endPosition = {
88 | line: lineNumber,
89 | column: endColumn,
90 | offset: matchIndex + newAnchor.length,
91 | }
92 | const position = {
93 | start: startPosition,
94 | end: endPosition,
95 | }
96 | const linkNode = {
97 | type: 'internal-ref',
98 | title: null,
99 | url: newAnchor,
100 | children: [],
101 | position,
102 | }
103 | internalRefs.set(newAnchor, linkNode)
104 | }
105 | return
106 | }
107 | if (line.match(/^[\s]*[\*\-][\s]+\[\[\[[^\]]+\]\]\]/g)) {
108 | let extractLink = line.match(/\[\[\[[^\]]+\]\]\]/g)
109 | for (let i = 0; i < extractLink.length; i++) {
110 | let newAnchor = extractLink[i]
111 | newAnchor = newAnchor.replace('[[[', '')
112 | newAnchor = newAnchor.replace(']]]', '')
113 | newAnchor = newAnchor.replace(/,.*/g, '') // take into account ','
114 | const matchIndex = line.indexOf(extractLink[i]) // Get the index of the match
115 | const startColumn = matchIndex + 4 // Add 4 to account for the [*-] and [[[ characters
116 | const endColumn = startColumn + newAnchor.length
117 | const startPosition = {
118 | line: lineNumber,
119 | column: startColumn,
120 | offset: matchIndex,
121 | }
122 | const endPosition = {
123 | line: lineNumber,
124 | column: endColumn,
125 | offset: matchIndex + newAnchor.length,
126 | }
127 | const position = {
128 | start: startPosition,
129 | end: endPosition,
130 | }
131 | const linkNode = {
132 | type: 'internal-ref',
133 | title: null,
134 | url: newAnchor,
135 | children: [],
136 | position,
137 | }
138 | internalRefs.set(newAnchor, linkNode)
139 | }
140 | return
141 | }
142 | if (line.match(/\[#[^\]]+\]/g)) {
143 | const extractLink = line.match(/\[#[^\]]+\]/g)
144 | extractLink.forEach((link) => {
145 | const newAnchor = link.replace(/^\[#|]$/g, '')
146 | const matchIndex = line.indexOf(link) // Get the index of the match
147 | const startColumn = matchIndex + 2 // Add 2 to account for the [# characters
148 | const endColumn = startColumn + newAnchor.length
149 | const startPosition = {
150 | line: lineNumber,
151 | column: startColumn,
152 | offset: matchIndex,
153 | }
154 | const endPosition = {
155 | line: lineNumber,
156 | column: endColumn,
157 | offset: matchIndex + newAnchor.length,
158 | }
159 | const position = {
160 | start: startPosition,
161 | end: endPosition,
162 | }
163 | const linkNode = {
164 | type: 'internal-ref',
165 | title: null,
166 | url: newAnchor,
167 | children: [],
168 | position,
169 | }
170 | internalRefs.set(newAnchor, linkNode)
171 | })
172 | return
173 | }
174 | if (line.match(/(anchor:[^\[]+)\[[^\]]*\]/g)) {
175 | let extractLink = line.match(/(anchor:[^\[]+)\[[^\]]*\]/g)
176 | extractLink.forEach((link) => {
177 | let newAnchor = link.replace(/^anchor:|\[/g, '')
178 |
179 | const matchIndex = line.indexOf(link) // Get the index of the match
180 | const startColumn = matchIndex + 7 // Add 7 to account for the "anchor:" characters
181 | const endColumn = startColumn + newAnchor.length
182 | const startPosition = {
183 | line: lineNumber,
184 | column: startColumn,
185 | offset: matchIndex,
186 | }
187 | const endPosition = {
188 | line: lineNumber,
189 | column: endColumn,
190 | offset: matchIndex + newAnchor.length,
191 | }
192 | const position = {
193 | start: startPosition,
194 | end: endPosition,
195 | }
196 | const linkNode = {
197 | type: 'internal-ref',
198 | title: null,
199 | url: newAnchor,
200 | children: [],
201 | position,
202 | }
203 | internalRefs.set(newAnchor, linkNode)
204 | })
205 | return
206 | }
207 | if (line.match(/<<[^\>]+>>/g)) {
208 | let extractLink = line.match(/<<[^\>]+>>/g)
209 | for (let i = 0; i < extractLink.length; i++) {
210 | let newReference = extractLink[i]
211 | newReference = newReference.replace('<<', '')
212 | newReference = newReference.replace('>>', '')
213 | newReference = newReference.replace(/,.*/g, '') // take into account <>
214 | const matchIndex = line.indexOf(extractLink[i]) // Get the index of the match
215 | const startColumn = matchIndex + 2 // Add 2 to account for the << characters
216 | const endColumn = startColumn + newReference.length
217 | const startPosition = {
218 | line: lineNumber,
219 | column: startColumn,
220 | offset: matchIndex,
221 | }
222 | const endPosition = {
223 | line: lineNumber,
224 | column: endColumn,
225 | offset: matchIndex + newReference.length,
226 | }
227 | if (newReference.startsWith('#')) {
228 | const position = {
229 | start: startPosition,
230 | end: endPosition,
231 | }
232 | const linkNode = {
233 | type: 'internal-ref',
234 | title: null,
235 | url: newReference,
236 | children: [],
237 | position,
238 | }
239 | internalRefs.set(newReference, linkNode)
240 | } else if (newReference.match(/(\.adoc)|(\.asciidoc)|(\.asc)|(#)/g)) {
241 | newReference = newReference.replace(
242 | /(\.adoc|\.asciidoc|\.asc)(#)?/,
243 | function (_, extension) {
244 | return extension + '#'
245 | }
246 | )
247 | const position = {
248 | start: startPosition,
249 | end: endPosition,
250 | }
251 | const linkNode = {
252 | type: 'external-ref',
253 | title: null,
254 | url: newReference,
255 | children: [],
256 | position,
257 | }
258 | externalRefs.set(newReference, linkNode)
259 | } else {
260 | const position = {
261 | start: startPosition,
262 | end: endPosition,
263 | }
264 | const linkNode = {
265 | type: 'internal-ref',
266 | title: null,
267 | url: newReference,
268 | children: [],
269 | position,
270 | }
271 | internalRefs.set(newReference, linkNode)
272 | }
273 | }
274 | return
275 | }
276 | if (line.match(/xref:[^\[]+\[[^\]]*\]/g)) {
277 | let extractLink = line.match(/xref:[^\[]+\[[^\]]*\]/g)
278 | extractLink.forEach((link) => {
279 | let newReference = link.replace(/^xref:|\[/g, '')
280 | const matchIndex = line.indexOf(link) // Get the index of the match
281 | const startColumn = matchIndex + 5 // Add 5 to account for the "xref:" characters
282 | const endColumn = startColumn + newReference.length
283 | const startPosition = {
284 | line: lineNumber,
285 | column: startColumn,
286 | offset: matchIndex,
287 | }
288 | const endPosition = {
289 | line: lineNumber,
290 | column: endColumn,
291 | offset: matchIndex + newReference.length,
292 | }
293 | if (newReference.match(/(\.adoc)|(\.asciidoc)|(\.asc)|(#)/g)) {
294 | newReference = newReference.replace(
295 | /(\.adoc|\.asciidoc|\.asc)(#)?/,
296 | (_, extension) => extension + '#'
297 | )
298 | const position = {
299 | start: startPosition,
300 | end: endPosition,
301 | }
302 | const linkNode = {
303 | type: 'external-ref',
304 | title: null,
305 | url: newReference,
306 | children: [],
307 | position,
308 | }
309 | externalRefs.set(newReference, linkNode)
310 | } else {
311 | const position = {
312 | start: startPosition,
313 | end: endPosition,
314 | }
315 | const linkNode = {
316 | type: 'internal-ref',
317 | title: null,
318 | url: newReference,
319 | children: [],
320 | position,
321 | }
322 | internalRefs.set(newReference, linkNode)
323 | }
324 | })
325 | return
326 | }
327 | if (line.match(/link:[^\[]+\[[^\]]*\]/g)) {
328 | let extractLink = line.match(/link:[^\[]+\[[^\]]*\]/g)
329 | extractLink.forEach((link) => {
330 | let newReference = link.replace(/^link:|\[/g, '')
331 | const matchIndex = line.indexOf(link) // Get the index of the match
332 | const startColumn = matchIndex + 5 // Add 5 to account for the "link:" characters
333 | const endColumn = startColumn + newReference.length
334 | const startPosition = {
335 | line: lineNumber,
336 | column: startColumn,
337 | offset: matchIndex,
338 | }
339 | const endPosition = {
340 | line: lineNumber,
341 | column: endColumn,
342 | offset: matchIndex + newReference.length,
343 | }
344 | if (newReference.match(/^(https?:\/\/|ftp|irc|mailto):\/\//g)) {
345 | const position = {
346 | start: startPosition,
347 | end: endPosition,
348 | }
349 | const linkNode = {
350 | type: newReference.startsWith('http') ? 'link' : 'external-ref',
351 | title: null,
352 | url: newReference,
353 | children: [],
354 | position,
355 | }
356 | if (linkNode.type === 'link') {
357 | const existingLink = links.find(
358 | (link) =>
359 | link.url === linkNode.url &&
360 | link.position.start.line === linkNode.position.start.line &&
361 | link.position.start.column === linkNode.position.start.column
362 | )
363 | if (!existingLink) {
364 | links.push(linkNode) // Add link to the array only if it's not already there
365 | }
366 | } else {
367 | externalRefs.set(newReference, linkNode)
368 | }
369 | } else {
370 | newReference = newReference.replace(/(\.html?5?)#.*/, '$1')
371 | const position = {
372 | start: startPosition,
373 | end: endPosition,
374 | }
375 | const linkNode = {
376 | type: 'external-ref',
377 | title: null,
378 | url: newReference,
379 | children: [],
380 | position,
381 | }
382 | externalRefs.set(newReference, linkNode)
383 | }
384 | })
385 | return
386 | }
387 | if (
388 | line.match(
389 | /(?:^|<|[\s>\(\)\[\];])((https?|file|ftp|irc):\/\/[^\s\[\]<]*[^\s.,\[\]<\)])/g
390 | )
391 | ) {
392 | let extractLink = line.match(
393 | /((https?|file|ftp|irc):\/\/[^\s\[\]<]*[^\s.,\[\]<\)])/g
394 | )
395 | for (let i = 0; i < extractLink.length; i++) {
396 | let newReference = extractLink[i]
397 | const matchIndex = line.indexOf(extractLink[i]) // Get the index of the match
398 | const startColumn = matchIndex
399 | const endColumn = startColumn + newReference.length
400 | const startPosition = {
401 | line: lineNumber,
402 | column: startColumn,
403 | offset: matchIndex,
404 | }
405 | const endPosition = {
406 | line: lineNumber,
407 | column: endColumn,
408 | offset: matchIndex + newReference.length,
409 | }
410 | const position = {
411 | start: startPosition,
412 | end: endPosition,
413 | }
414 | const linkNode = {
415 | type: 'link',
416 | title: null,
417 | url: newReference,
418 | children: [],
419 | position,
420 | }
421 | const existingLink = links.find(
422 | (link) =>
423 | link.url === linkNode.url &&
424 | link.position.start.line === linkNode.position.start.line &&
425 | link.position.start.column === linkNode.position.start.column
426 | )
427 | if (!existingLink) {
428 | links.push(linkNode) // Add link to the array only if it's not already there
429 | }
430 | }
431 | return
432 | }
433 | })
434 | rl.on('close', () => {
435 | const result = [
436 | ...links.values(),
437 | ...internalRefs.values(),
438 | ...externalRefs.values(),
439 | ...externalURLs.values(),
440 | ]
441 | resolve(doReplacements(result, options))
442 | })
443 | })
444 | }
445 |
446 | export { extractAsciiDocLinks }
447 |
--------------------------------------------------------------------------------
/lib/extract-markdown-hyperlinks.js:
--------------------------------------------------------------------------------
1 | import { unified } from 'unified'
2 | import remarkParse from 'remark-parse'
3 | import remarkGfm from 'remark-gfm'
4 | import { visit } from 'unist-util-visit'
5 | import { doReplacements } from './handle-links-modification.js'
6 |
7 | //
8 | // Function: extractMarkdownHyperlinks
9 | // Description: Extracts all links from a markdown string
10 | // Arguments:
11 | // markdownText - The markdown string to extract links from
12 | // options (optional) - An object specifying additional settings
13 | // - ignorePatterns (optional) - An array of objects holding regular expressions to skip link checking
14 | // - replacementPatterns (optional) - An array of objects holding regular expressions for link replacements
15 | // - baseUrl (optional) - A string specifying the base URL to prefix to URLs that start with '/'
16 | // Returns:
17 | // An array of MDAST nodes that represent headings, links, link references, definitions, and image references
18 | // See https://github.com/syntax-tree/mdast for more information on the types of MDAST nodes
19 | //
20 |
21 | function extractMarkdownHyperlinks(markdownText, options) {
22 | const tree = unified().use(remarkParse).use(remarkGfm).parse(markdownText)
23 |
24 | const links = []
25 | visit(tree, ['link', 'definition', 'image'], (node) => {
26 | links.push(node)
27 | })
28 | return doReplacements(links, options)
29 | }
30 |
31 | export { extractMarkdownHyperlinks }
32 |
--------------------------------------------------------------------------------
/lib/get-unique-links.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | export { getUniqueLinks }
4 |
5 | function getUniqueLinks(astNodes) {
6 | const uniqueUrls = new Set()
7 | const result = []
8 | for (const node of astNodes) {
9 | // Check if the link starts with "#" or "mailto:" and skip it
10 | if (
11 | (node.type === 'link' ||
12 | node.type === 'definition' ||
13 | node.type === 'image') &&
14 | node.url &&
15 | !uniqueUrls.has(node.url) &&
16 | !node.url.startsWith('mailto:')
17 | ) {
18 | uniqueUrls.add(node.url)
19 | result.push(node)
20 | }
21 | }
22 | return result
23 | }
24 |
--------------------------------------------------------------------------------
/lib/handle-links-modification.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Modifies the URLs of the given nodes based on the provided options.
3 | *
4 | * @param {Array} nodes - The nodes to be modified.
5 | * @param {Object} [opts={}] - The options for modifying the URLs.
6 | * @param {Array} [opts.ignorePatterns=[]] - The patterns of URLs to be ignored.
7 | * @param {Array} [opts.replacementPatterns=[]] - The patterns of URLs to be replaced.
8 | * @param {string} [opts.baseUrl] - The base URL to be prefixed to URLs that start with '/'.
9 | *
10 | * @returns {Array} The modified nodes.
11 | */
12 |
13 | function doReplacements(nodes, opts = {}) {
14 | const { ignorePatterns = [], replacementPatterns = [], baseUrl } = opts
15 |
16 | // Safer regex compilation with timeout protection
17 | function createSafeRegex(pattern) {
18 | try {
19 | // Validate pattern complexity before creating RegExp
20 | // Check for common problematic patterns that could lead to ReDoS
21 | if (
22 | pattern.includes('(.*)*') ||
23 | pattern.includes('(.+)+') ||
24 | pattern.match(/\([^)]+\)\+\+/) ||
25 | pattern.match(/\(\[.*?\]\+\)\+/) ||
26 | pattern.match(/\(a\+\)\+/)
27 | ) {
28 | console.warn(`Potentially unsafe regex pattern detected: ${pattern}`)
29 | return null
30 | }
31 |
32 | // Apply length limits for safety
33 | if (pattern.length > 100) {
34 | console.warn(
35 | `Pattern exceeds maximum safe length: ${pattern.substring(0, 50)}...`
36 | )
37 | return null
38 | }
39 |
40 | return new RegExp(pattern)
41 | } catch (e) {
42 | console.warn(`Invalid regex pattern: ${pattern}. Error: ${e.message}`)
43 | return null
44 | }
45 | }
46 |
47 | // Pre-compile regular expressions with safer approach
48 | const ignoreRegexes = ignorePatterns
49 | .map(({ pattern }) => createSafeRegex(pattern))
50 | .filter(Boolean)
51 |
52 | const replacementRegexes = replacementPatterns
53 | .map(({ pattern, replacement }) => {
54 | const regex = createSafeRegex(pattern)
55 | return regex ? { regex, replacement } : null
56 | })
57 | .filter(Boolean)
58 |
59 | return nodes.filter((node) => {
60 | let { url } = node
61 |
62 | // Skip link checking if it matches any ignore pattern
63 | if (
64 | ignoreRegexes.some((regex) => {
65 | try {
66 | return regex.test(url)
67 | } catch (e) {
68 | console.warn(`Error testing URL against pattern: ${e.message}`)
69 | return false
70 | }
71 | })
72 | ) {
73 | return false // Exclude this node
74 | }
75 |
76 | // Prefix the base URL to URLs that start with '/'
77 | if (baseUrl && url.startsWith('/')) {
78 | url = baseUrl + url
79 | }
80 |
81 | // Replace link URL based on replacement patterns
82 | replacementRegexes.forEach(({ regex, replacement }) => {
83 | try {
84 | // Use a safer string replace approach
85 | const oldUrl = url
86 | url = url.replace(regex, replacement)
87 |
88 | // If replacement leads to an extremely long string, revert
89 | if (url.length > oldUrl.length * 3 && url.length > 2000) {
90 | console.warn(`Suspicious replacement result detected. Reverting.`)
91 | url = oldUrl
92 | }
93 | } catch (e) {
94 | console.warn(`Error replacing URL: ${e.message}`)
95 | }
96 | })
97 |
98 | node.url = url
99 | return true // Include this node
100 | })
101 | }
102 |
103 | export { doReplacements }
104 |
--------------------------------------------------------------------------------
/lib/prepare-file-list.js:
--------------------------------------------------------------------------------
1 | import { readFileSync, existsSync } from 'fs'
2 | import { resolve, relative } from 'path'
3 | import { glob } from 'glob'
4 | import path from 'path'
5 | import ignore from 'ignore'
6 |
7 | /**
8 | * Normalize a file path by removing leading './' if present.
9 | * @param {string} filePath - The file path to normalize.
10 | * @returns {string} The normalized file path.
11 | */
12 | function normalizeFilePath(filePath) {
13 | if (filePath.startsWith('./')) {
14 | return filePath.substring(2) // Remove leading './'
15 | }
16 | return filePath
17 | }
18 |
19 | /**
20 | * Reads the .gitignore file and filters the list of files.
21 | * @param {string[]} filenames - An array of filenames to filter.
22 | * @returns {string[]} An array of filtered filenames.
23 | */
24 | function filterFiles(filenames) {
25 | // Check if the .gitignore file exists
26 | if (!existsSync('.gitignore')) {
27 | // If it doesn't exist, return the original filenames array
28 | return filenames
29 | }
30 |
31 | // Read the .gitignore file from the current directory
32 | const gitignore = readFileSync('.gitignore', 'utf8')
33 |
34 | // Create an ignore object and add the .gitignore rules
35 | const ig = ignore()
36 | ig.add(gitignore)
37 |
38 | // Filter the filenames using the ignore object
39 | const filtered = ig.filter(filenames)
40 |
41 | // Return the filtered array
42 | return filtered
43 | }
44 |
45 | // A function that removes duplicates from an array of strings
46 | function removeDuplicates(array) {
47 | // Create a new Set object from the array
48 | const set = new Set(array)
49 |
50 | // Convert the Set object back to an array
51 | const unique = [...set]
52 |
53 | // Return the unique array
54 | return unique
55 | }
56 |
57 | /**
58 | * Prepares a list of files based on the configuration options.
59 | * @param {string} config - The YML config object.
60 | * @returns {string[]} An array of file paths.
61 | */
62 | function prepareFilesList(config) {
63 | try {
64 | let files = []
65 | let specifiedFiles = config.files
66 | ? config.files.map((file) => file.trim())
67 | : []
68 | let dirs = config.dirs ? config.dirs.map((dir) => dir.trim()) : []
69 | let excludedFiles = config.excludedFiles
70 | ? config.excludedFiles.map((file) => normalizeFilePath(file.trim())) // Normalize paths
71 | : []
72 | let excludedDirs = config.excludedDirs
73 | ? config.excludedDirs.map((dir) => {
74 | // Normalize the excluded directory path
75 | return normalizeFilePath(dir.trim())
76 | })
77 | : []
78 |
79 | // Set a default file extension to "md" if not defined
80 | const fileExtensions = config.fileExtensions || ['md']
81 |
82 | // Check if specified files exist and add them to the list
83 | specifiedFiles.forEach((file) => {
84 | const filePath = resolve(process.cwd(), file)
85 | const fileExtension = path.extname(filePath).substring(1) // Get the file extension without the leading dot
86 |
87 | if (existsSync(filePath)) {
88 | if (!files.includes(filePath)) {
89 | if (fileExtensions.includes(fileExtension)) {
90 | files.push(filePath)
91 | } else {
92 | console.warn(
93 | `ℹ️ The file "${file}" specified in the config does not have the correct extension. Use "fileExtensions" to configure the extensions.`
94 | )
95 | }
96 | } else {
97 | console.warn(
98 | `ℹ️ The file "${file}" specified in the config is already included.`
99 | )
100 | }
101 | } else {
102 | console.warn(
103 | `ℹ️ The file "${file}" specified in the config does not exist.`
104 | )
105 | }
106 | })
107 |
108 | // Search all specified dirs recursively using glob
109 | dirs.forEach((dir) => {
110 | let directory = dir
111 | if (dir === '.' || dir === './') {
112 | // Use the current working directory if dir is '.' or './'
113 | directory = process.cwd() + '/'
114 | }
115 |
116 | // Check if the dir exists
117 | if (existsSync(directory)) {
118 | let fileExtensionsGlob =
119 | fileExtensions.length > 1
120 | ? `{${fileExtensions.join(',')}}`
121 | : fileExtensions[0]
122 | files.push(
123 | ...glob.sync(
124 | path.posix.join(directory, '**', `*.${fileExtensionsGlob}`)
125 | )
126 | )
127 | } else {
128 | console.error(
129 | `ℹ️ The directory "${directory}" specified in the config does not exist.`
130 | )
131 | }
132 | })
133 |
134 | // Make the file paths relative to the current working directory
135 | let relativeFiles = files.map((file) => relative(process.cwd(), file))
136 |
137 | // Remove any duplicates from the list of files
138 | relativeFiles = removeDuplicates(relativeFiles)
139 |
140 | // Use filterFiles function to filter the files based on .gitignore
141 | if (config.useGitIgnore === true) {
142 | relativeFiles = filterFiles(relativeFiles)
143 | }
144 |
145 | // Use the ignore module to filter out excluded files and directories specified in YAML
146 | const ig = ignore()
147 | ig.add(excludedFiles)
148 | ig.add(excludedDirs.map((dir) => dir + '/**')) // Include subdirectories of excludedDirs
149 |
150 | relativeFiles = relativeFiles.filter((file) => {
151 | return !ig.ignores(file)
152 | })
153 |
154 | // Rest of your code for normalization and filtering
155 |
156 | return relativeFiles.map((file) => resolve(process.cwd(), file)) // Convert back to absolute paths
157 | } catch (err) {
158 | // Handle any other errors that may occur
159 | console.error("Error: Couldn't prepare the list of files. ", err)
160 | return []
161 | }
162 | }
163 |
164 | export { prepareFilesList }
165 |
--------------------------------------------------------------------------------
/lib/update-linkstatus-obj.js:
--------------------------------------------------------------------------------
1 | /**
2 | * Updates the link status object with the given AST nodes and existing link status.
3 | *
4 | * @param {Array} astNodes - The AST nodes to update the link status with.
5 | * Each node is an object with properties `url`, `position`, `title`, and `children`.
6 | *
7 | * @param {Array} linkStatus - The existing link status to update.
8 | * Each status is an object with properties `link`, `status`, `status_code`, `line_number`, `position`, `error_message`, `title`, and `children`.
9 | *
10 | * @returns {Array} The updated link status. Each status is an object with properties `link`, `status`, `status_code`, `line_number`, `position`, `error_message`, `title`, and `children`.
11 | * The returned array is sorted by line number and start column in ascending order.
12 | */
13 | 'use strict'
14 |
15 | function updateLinkStatusObj(astNodes, linkStatus) {
16 | const updatedLinkStatus = [...linkStatus]
17 | astNodes.forEach((node) => {
18 | const existingLink = linkStatus.find((link) => link.link === node.url)
19 | if (existingLink) {
20 | if (!existingLink.position) {
21 | console.error(
22 | `ERROR: Markdown formatting error around link: ${existingLink.link}. Please check the file containing this link.`
23 | )
24 | existingLink.position = {
25 | start: { line: 1, column: 1 },
26 | end: { line: 1, column: 1 },
27 | }
28 | }
29 |
30 | const existingPosition = existingLink.position
31 | const nodePosition = node.position || {
32 | start: { line: 1, column: 1 },
33 | end: { line: 1, column: 1 },
34 | }
35 |
36 | if (
37 | existingPosition.start.line !== nodePosition.start.line ||
38 | existingPosition.start.column !== nodePosition.start.column ||
39 | existingPosition.end.line !== nodePosition.end.line ||
40 | existingPosition.end.column !== nodePosition.end.column
41 | ) {
42 | updatedLinkStatus.push({
43 | ...existingLink,
44 | line_number: nodePosition.start.line,
45 | position: nodePosition,
46 | })
47 | }
48 | } else {
49 | let status = null
50 | let statusCode = null
51 | let errorMessage = null
52 |
53 | // Special handling for mailto links
54 | if (node.url && node.url.startsWith('mailto:')) {
55 | status = 'skipped'
56 | statusCode = 200
57 | errorMessage = 'Email links are not checked'
58 | }
59 |
60 | updatedLinkStatus.push({
61 | link: node.url,
62 | status: status,
63 | status_code: statusCode,
64 | line_number: node.position ? node.position.start.line : null,
65 | position: node.position,
66 | error_message: errorMessage,
67 | title: node.title,
68 | children: node.children,
69 | })
70 | }
71 | })
72 | updatedLinkStatus.sort((a, b) => {
73 | if (a.position.start.line === b.position.start.line) {
74 | return a.position.start.column - b.position.start.column
75 | }
76 | return a.position.start.line - b.position.start.line
77 | })
78 | return updatedLinkStatus
79 | }
80 |
81 | export { updateLinkStatusObj }
82 |
--------------------------------------------------------------------------------
/lib/validate-config.js:
--------------------------------------------------------------------------------
1 | import Joi from 'joi'
2 |
3 | class ValidationError extends Error {
4 | constructor(message, details) {
5 | super(`${message}: ${details.map((d) => d.message).join(', ')}`)
6 | this.name = 'ValidationError'
7 | this.details = details
8 | }
9 | }
10 |
11 | /**
12 | * Validates the configuration file at the given path.
13 | * @param {string} config - YAML config object.
14 | * @returns {Promise} A promise that resolves to a boolean indicating whether the validation was successful.
15 | */
16 | async function validateConfig(config) {
17 | try {
18 | // Define the schema for validation
19 | const schema = Joi.object({
20 | files: Joi.array().items(Joi.string()),
21 | dirs: Joi.array().items(Joi.string()),
22 | excludedFiles: Joi.array().items(Joi.string()),
23 | excludedDirs: Joi.array().items(Joi.string()),
24 | fileExtensions: Joi.array().items(Joi.string()),
25 | baseUrl: Joi.string(),
26 | httpHeaders: Joi.array().items(
27 | Joi.object({
28 | url: Joi.array().items(Joi.string().uri()).required(),
29 | headers: Joi.object().pattern(Joi.string(), Joi.string()).required(),
30 | })
31 | ),
32 | aliveStatusCodes: Joi.array().items(Joi.number()),
33 | ignorePatterns: Joi.array().items(
34 | Joi.object({
35 | pattern: Joi.string().required(),
36 | })
37 | ),
38 | replacementPatterns: Joi.array().items(
39 | Joi.object({
40 | pattern: Joi.string().required(),
41 | replacement: Joi.string().required(),
42 | })
43 | ),
44 | //outputFormat: Joi.string(),
45 | //outputVerbosity: Joi.number().integer().min(1).max(5),
46 | //showErrorsOnly: Joi.boolean(),
47 | useGitIgnore: Joi.boolean(),
48 | modifiedFilesOnly: Joi.boolean(),
49 | followRedirects: Joi.boolean().default(true),
50 | }).or('files', 'dirs')
51 |
52 | // Validate the config against the schema
53 | const { error } = schema.validate(config)
54 | if (error) {
55 | throw new ValidationError('Incorrect configuration', error.details)
56 | }
57 | return true
58 | } catch (err) {
59 | if (err instanceof ValidationError) {
60 | console.error('Validation Error: ', err.message)
61 | throw err
62 | } else if (err.message.includes('ENOENT: no such file or directory')) {
63 | console.error('Error reading file:', err.message)
64 | throw err
65 | } else {
66 | console.error('Other Validation Error: ', err.message)
67 | throw err
68 | }
69 | return false
70 | }
71 | }
72 |
73 | export { validateConfig }
74 |
--------------------------------------------------------------------------------
/linkspector.js:
--------------------------------------------------------------------------------
1 | import { execSync } from 'child_process'
2 | import { readFileSync } from 'fs'
3 | import path from 'path'
4 | import yaml from 'js-yaml'
5 | import dotenv from 'dotenv'
6 | import { validateConfig } from './lib/validate-config.js'
7 | import { prepareFilesList } from './lib/prepare-file-list.js'
8 | import { extractMarkdownHyperlinks } from './lib/extract-markdown-hyperlinks.js'
9 | import { extractAsciiDocLinks } from './lib/extract-asciidoc-links.js'
10 | import { getUniqueLinks } from './lib/get-unique-links.js'
11 | import { checkHyperlinks } from './lib/batch-check-links.js'
12 | import { updateLinkStatusObj } from './lib/update-linkstatus-obj.js'
13 |
14 | // Load environment variables from .env file
15 | dotenv.config()
16 |
17 | // Function to replace placeholders with environment variables
18 | function replaceEnvVariables(config) {
19 | const configString = JSON.stringify(config)
20 | const replacedConfigString = configString.replace(
21 | /\$\{(\w+)\}/g,
22 | (_, name) => process.env[name] || ''
23 | )
24 | return JSON.parse(replacedConfigString)
25 | }
26 |
27 | // Function to check if git is installed
28 | function isGitInstalled() {
29 | try {
30 | execSync('git --version', { stdio: 'ignore' })
31 | return true
32 | } catch (error) {
33 | return false
34 | }
35 | }
36 |
37 | export async function* linkspector(configFile, cmd) {
38 | //Use default configuration if no config file is specified
39 | let config = {}
40 | let defaultConfig = {
41 | dirs: ['.'],
42 | useGitIgnore: true,
43 | }
44 |
45 | try {
46 | let configContent = readFileSync(configFile, 'utf8')
47 | // parse configFile
48 | // Check if the YAML content is empty
49 | if (!configContent.trim()) {
50 | throw new Error('The configuration file is empty.')
51 | }
52 |
53 | // Parse the YAML content
54 | config = yaml.load(configContent)
55 |
56 | // Check if the parsed YAML object is null or lacks properties
57 | if (config === null || Object.keys(config).length === 0) {
58 | throw new Error('Failed to parse the YAML content.')
59 | }
60 |
61 | // Replace environment variables in the configuration
62 | config = replaceEnvVariables(config)
63 |
64 | try {
65 | const isValid = await validateConfig(config)
66 | if (!isValid) {
67 | console.error('Validation failed!')
68 | process.exit(1)
69 | }
70 | } catch (error) {
71 | console.error(`💥 Error: Please check your configuration file.`)
72 | process.exit(1)
73 | }
74 | } catch (err) {
75 | if (err.code === 'ENOENT') {
76 | if (!cmd.json) {
77 | console.log(
78 | 'Configuration file not found. Using default configuration.'
79 | )
80 | }
81 | config = defaultConfig
82 | } else {
83 | throw new Error(err)
84 | }
85 | }
86 |
87 | // Prepare the list of files to check
88 | let filesToCheck = prepareFilesList(config)
89 |
90 | // Convert all paths in filesToCheck to relative paths
91 | filesToCheck = filesToCheck.map((file) => path.relative(process.cwd(), file))
92 |
93 | // Check if only modified files should be checked
94 | if (config.modifiedFilesOnly) {
95 | // Check if git is installed
96 | if (!isGitInstalled()) {
97 | console.error(
98 | 'Error: Git is not installed or not found in the system path.'
99 | )
100 | process.exit(1)
101 | }
102 |
103 | // Get the list of modified files from the last git commit
104 | const modifiedFiles = execSync('git diff --name-only HEAD HEAD~1', {
105 | encoding: 'utf8',
106 | }).split('\n')
107 |
108 | // Filter out files that are not in the list of files to check or do not have the correct extension
109 | const modifiedFilesToCheck = modifiedFiles.filter((file) => {
110 | const fileExtension = path.extname(file).substring(1).toLowerCase()
111 | return (
112 | filesToCheck.includes(file) &&
113 | (config.fileExtensions || ['md']).includes(fileExtension)
114 | )
115 | })
116 |
117 | // If no modified files are in the list of files to check, exit with a message
118 | if (modifiedFilesToCheck.length === 0) {
119 | if (cmd.json) {
120 | console.log('{}')
121 | } else {
122 | console.log(
123 | 'No modified files to check, skipping checking. To enable checking all files set modifiedFilesOnly: false and rerun the check.'
124 | )
125 | }
126 | process.exit(0)
127 | }
128 |
129 | // Otherwise, only check the modified files
130 | filesToCheck = modifiedFilesToCheck
131 | }
132 |
133 | // Process each file
134 | for (const file of filesToCheck) {
135 | const relativeFilePath = path.relative(process.cwd(), file)
136 |
137 | // Get the file extension
138 | const fileExtension = path.extname(file).substring(1).toLowerCase() // Get the file extension without the leading dot and convert to lowercase
139 |
140 | let astNodes
141 |
142 | // Check the file extension and use the appropriate function to extract links
143 | if (
144 | ['asciidoc', 'adoc', 'asc'].includes(fileExtension) &&
145 | config.fileExtensions &&
146 | config.fileExtensions.includes(fileExtension)
147 | ) {
148 | astNodes = await extractAsciiDocLinks(file, config)
149 | } else {
150 | const fileContent = readFileSync(file, 'utf8')
151 | astNodes = extractMarkdownHyperlinks(fileContent, config)
152 | }
153 |
154 | // Get unique hyperlinks
155 | const uniqueLinks = getUniqueLinks(astNodes)
156 |
157 | // Check the status of hyperlinks
158 | const linkStatus = await checkHyperlinks(uniqueLinks, config, file)
159 |
160 | // Update linkStatusObjects with information about removed links
161 | const updatedLinkStatus = updateLinkStatusObj(astNodes, linkStatus)
162 |
163 | // Yield an object with the relative file path and its result
164 | yield {
165 | file: relativeFilePath,
166 | result: updatedLinkStatus,
167 | }
168 | }
169 | }
170 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "@umbrelladocs/linkspector",
3 | "version": "0.4.5",
4 | "description": "Uncover broken links in your content.",
5 | "type": "module",
6 | "main": "linkspector.js",
7 | "repository": {
8 | "type": "git",
9 | "url": "git+https://github.com/UmbrellaDocs/linkspector.git"
10 | },
11 | "keywords": [
12 | "linkspector",
13 | "links analysis",
14 | "url inspection",
15 | "link checker",
16 | "link validation",
17 | "link verification",
18 | "link testing",
19 | "link testing tool",
20 | "link testing library",
21 | "link testing utility",
22 | "link testing package",
23 | "markdown link testing",
24 | "markdown link testing tool",
25 | "markdown link check",
26 | "markdown link checker",
27 | "markdown link validation",
28 | "markdown link verification",
29 | "asciidoc link testing",
30 | "asciidoc link testing tool",
31 | "asciidoc link check",
32 | "asciidoc link checker",
33 | "asciidoc link validation",
34 | "asciidoc link verification"
35 | ],
36 | "bin": {
37 | "linkspector": "./index.js"
38 | },
39 | "scripts": {
40 | "prettier:check": "npx prettier --check .",
41 | "prettier:format": "npx prettier --write .",
42 | "test": "vitest"
43 | },
44 | "author": "Gaurav Nelson",
45 | "license": "Apache-2.0",
46 | "bugs": {
47 | "url": "https://github.com/UmbrellaDocs/linkspector/issues"
48 | },
49 | "homepage": "https://github.com/UmbrellaDocs/linkspector#readme",
50 | "dependencies": {
51 | "commander": "^14.0.0",
52 | "dotenv": "^16.5.0",
53 | "github-slugger": "^2.0.0",
54 | "glob": "^11.0.2",
55 | "ignore": "^7.0.4",
56 | "joi": "^17.13.3",
57 | "js-yaml": "^4.1.0",
58 | "kleur": "^4.1.5",
59 | "ora": "^8.2.0",
60 | "puppeteer": "^24.9.0",
61 | "remark-gfm": "^4.0.1",
62 | "remark-parse": "^11.0.0",
63 | "unified": "^11.0.5",
64 | "unist-util-visit": "^5.0.0"
65 | },
66 | "devDependencies": {
67 | "vitest": "^3.1.4"
68 | }
69 | }
70 |
--------------------------------------------------------------------------------
/scripts/apparmorfix.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | set -e
3 |
4 | echo '::group::🔗💀 Setting up Chrome Linux Sandbox'
5 | # Based on the instructions found here: https://chromium.googlesource.com/chromium/src/+/main/docs/security/apparmor-userns-restrictions.md
6 | if [ "$(lsb_release -rs)" = "24.04" ]; then
7 | echo 0 | sudo tee /proc/sys/kernel/apparmor_restrict_unprivileged_userns
8 | echo 'Done'
9 | fi
10 | echo '::endgroup::'
11 |
12 | echo '::group::🔗💀 Installing NPM packages'
13 | npm ci
14 | echo '::endgroup::'
15 |
16 | echo '::group::🔗💀 Running tests'
17 | npm run test
18 | echo '::endgroup::'
19 |
--------------------------------------------------------------------------------
/test/fixtures/asciidoc/hyperlinks/asciidoc-hyperlinks.test.js:
--------------------------------------------------------------------------------
1 | import { expect, test } from 'vitest'
2 | import { linkspector } from './linkspector.js'
3 |
4 | let cmd = {
5 | json: true,
6 | }
7 |
8 | test('linkspector should check hyperlinks in AsciiDoc files', async () => {
9 | let hasErrorLinks = false
10 | let currentFile = '' // Variable to store the current file name
11 | let results = [] // Array to store the results if json is true
12 |
13 | for await (const { file, result } of linkspector(
14 | './test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml',
15 | cmd
16 | )) {
17 | currentFile = file
18 | for (const linkStatusObj of result) {
19 | if (cmd.json) {
20 | results.push({
21 | file: currentFile,
22 | link: linkStatusObj.link,
23 | status_code: linkStatusObj.status_code,
24 | line_number: linkStatusObj.line_number,
25 | position: linkStatusObj.position,
26 | status: linkStatusObj.status,
27 | error_message: linkStatusObj.error_message,
28 | })
29 | }
30 | if (linkStatusObj.status === 'error') {
31 | hasErrorLinks = true
32 | }
33 | }
34 | }
35 |
36 | expect(hasErrorLinks).toBe(true)
37 | expect(results.length).toBe(4)
38 | expect(results[0].status).toBe('error')
39 | expect(results[1].status).toBe('alive')
40 | expect(results[2].status).toBe('error')
41 | expect(results[3].status).toBe('alive')
42 | })
43 |
--------------------------------------------------------------------------------
/test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml:
--------------------------------------------------------------------------------
1 | dirs:
2 | - ./test/fixtures/asciidoc/hyperlinks/
3 | fileExtensions:
4 | - adoc
5 | useGitIgnore: true
6 |
--------------------------------------------------------------------------------
/test/fixtures/asciidoc/hyperlinks/testhyperlinks1.adoc:
--------------------------------------------------------------------------------
1 | Got to (http://www.yttftfftx.com) or [Google](http://www.google.com).
2 |
3 | Got to (http://www.yttftfftx.com) or [Google](http://www.google.com).
4 |
--------------------------------------------------------------------------------
/test/fixtures/headers.test.js:
--------------------------------------------------------------------------------
1 | import { expect, test, vi, beforeEach } from 'vitest'
2 | import { checkHyperlinks } from '../../lib/batch-check-links.js'
3 |
4 | // Add the environment variable substitution function that matches what's in linkspector.js
5 | function replaceEnvVariables(obj) {
6 | const objString = JSON.stringify(obj)
7 | const replacedObjString = objString.replace(
8 | /\$\{(\w+)\}/g,
9 | (_, name) => process.env[name] || ''
10 | )
11 | return JSON.parse(replacedObjString)
12 | }
13 |
14 | // Mock puppeteer
15 | vi.mock('puppeteer', () => {
16 | return {
17 | default: {
18 | launch: vi.fn().mockImplementation(() => {
19 | return {
20 | newPage: vi.fn().mockImplementation(() => {
21 | return {
22 | setUserAgent: vi.fn(),
23 | setRequestInterception: vi.fn(),
24 | on: vi.fn(),
25 | goto: vi.fn().mockImplementation((url, options) => {
26 | // Track which headers were passed
27 | capturedHeaders = options.headers || {}
28 |
29 | return {
30 | status: vi.fn().mockReturnValue(200),
31 | ok: vi.fn().mockReturnValue(true),
32 | }
33 | }),
34 | close: vi.fn(),
35 | }
36 | }),
37 | close: vi.fn(),
38 | }
39 | }),
40 | },
41 | }
42 | })
43 |
44 | // Variable to capture headers passed to page.goto
45 | let capturedHeaders = {}
46 |
47 | beforeEach(() => {
48 | // Reset captured headers before each test
49 | capturedHeaders = {}
50 |
51 | // Reset mocks
52 | vi.clearAllMocks()
53 | })
54 |
55 | test('applies correct HTTP headers based on URL patterns', async () => {
56 | // Prepare test data
57 | const nodes = [
58 | {
59 | type: 'link',
60 | url: 'https://example1.com/test',
61 | position: { start: { line: 1, column: 1 }, end: { line: 1, column: 30 } },
62 | },
63 | ]
64 |
65 | const httpHeaders = [
66 | {
67 | url: ['https://example1.com'],
68 | headers: {
69 | Authorization: 'Bearer token123',
70 | 'X-Custom-Header': 'CustomValue',
71 | },
72 | },
73 | ]
74 |
75 | // Run the function
76 | await checkHyperlinks(nodes, { httpHeaders }, '/path/to/file')
77 |
78 | // Verify the correct headers were applied
79 | expect(capturedHeaders).toEqual({
80 | Authorization: 'Bearer token123',
81 | 'X-Custom-Header': 'CustomValue',
82 | })
83 | })
84 |
85 | test('applies no headers when URL does not match patterns', async () => {
86 | // Prepare test data
87 | const nodes = [
88 | {
89 | type: 'link',
90 | url: 'https://different-domain.com/test',
91 | position: { start: { line: 1, column: 1 }, end: { line: 1, column: 30 } },
92 | },
93 | ]
94 |
95 | const httpHeaders = [
96 | {
97 | url: ['https://example1.com', 'https://example2.com'],
98 | headers: {
99 | Authorization: 'Bearer token123',
100 | 'X-Custom-Header': 'CustomValue',
101 | },
102 | },
103 | ]
104 |
105 | // Run the function
106 | await checkHyperlinks(nodes, { httpHeaders }, '/path/to/file')
107 |
108 | // Verify no headers were applied for non-matching URL
109 | expect(capturedHeaders).toEqual({})
110 | })
111 |
112 | test('supports environment variable substitution in headers', async () => {
113 | // Mock process.env
114 | const originalEnv = process.env
115 | process.env = {
116 | ...originalEnv,
117 | AUTH_TOKEN: 'supersecrettoken',
118 | }
119 |
120 | // Prepare test data
121 | const nodes = [
122 | {
123 | type: 'link',
124 | url: 'https://example3.com/api',
125 | position: { start: { line: 1, column: 1 }, end: { line: 1, column: 30 } },
126 | },
127 | ]
128 |
129 | let httpHeaders = [
130 | {
131 | url: ['https://example3.com'],
132 | headers: {
133 | Authorization: 'Bearer ${AUTH_TOKEN}',
134 | 'X-API-Key': 'fixed-value',
135 | },
136 | },
137 | ]
138 |
139 | // Process environment variables in headers similar to what linkspector.js does
140 | httpHeaders = replaceEnvVariables(httpHeaders)
141 |
142 | // Run the function
143 | await checkHyperlinks(nodes, { httpHeaders }, '/path/to/file')
144 |
145 | // Verify the headers with environment variable substitution
146 | expect(capturedHeaders).toEqual({
147 | Authorization: 'Bearer supersecrettoken',
148 | 'X-API-Key': 'fixed-value',
149 | })
150 |
151 | // Restore original env
152 | process.env = originalEnv
153 | })
154 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/decoded-sections/.decodedTest.yml:
--------------------------------------------------------------------------------
1 | dirs:
2 | - ./test/fixtures/markdown/decoded-sections
3 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/decoded-sections/decoded.md:
--------------------------------------------------------------------------------
1 | A távolsági - lő- és hajítófegyverekkel - végzett harc során a védekező fél nem saját Védő Értékével vesz részt a harcban, ugyanolyan “céltárgynak” minősül, mint egy szalmabábú, vagy egy agyaggalamb. Ugyanakkor a célpont mozgásának jellege (lásd “Mozgás módosító" fejezetet) és a távolság erőteljesen befolyásolják a találat esélyeit. Lásd még: [Szándékos kitérés lövés elől](decoded.md#sz%C3%A1nd%C3%A9kos-kit%C3%A9r%C3%A9s-l%C3%B6v%C3%A9s-el%C5%91l) fejezetet. A támadó a távolsági harcban a **Célzó Értékét** használja, melynek megállapítása több tényezőtől függ.
2 |
3 | [Bevezető](#0-bevezet%C5%91-jelz%C5%91k)
4 |
5 | Also a link to check [the README Documentation Section](#-documentation)
6 |
7 | ## 📖 Documentation
8 |
9 | This is the documentation section. It is a simple markdown file with some links and references to other sections.
10 |
11 | ### Szándékos kitérés lövés elől
12 |
13 | Ha valaki látja és van ideje felkészülni a rá leadott lövésre/hajításra, valamint rendelkezik elegendő hellyel a kitérésre és bejelenti, hogy megpróbálja elkerülni végzetét, akkor Gyorsaságpróbát kell dobnia, melynek nehézsége függ a lövést leadó személy távolságától, valamint az általa használt fegyvertől.
14 |
15 | | Gyorsaságpróba célszám | Dobófegyverek,
Mágikus lövedék I. | Íjak,
Mágikus lövedék II. | Nyílpuskák,
Mágikus lövedék III. |
16 | | :--------------------: | -------------------------------------: | -----------------------------: | ------------------------------------: |
17 | | 8 | 1m - 3m | 0m - 5m | 0m - 10m |
18 | | 7 | 4m - 6m | 6m - 10m | 11m - 20m |
19 | | 6 | 7m - 9m | 11m - 15m | 21m - 30m |
20 | | 5 | 10m - 12m | 16m - 20m | 31m - 40m |
21 | | 4 | Testközelben | | |
22 |
23 | ### 0. [Bevezető, jelzők](decoded2.md)
24 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/decoded-sections/decoded2.md:
--------------------------------------------------------------------------------
1 | # Decoded 2
2 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/decoded-sections/markdown-decoded-sections.test.js:
--------------------------------------------------------------------------------
1 | import { expect, test } from 'vitest'
2 | import { linkspector } from './linkspector.js'
3 |
4 | let cmd = {
5 | json: true,
6 | }
7 |
8 | test('linkspector should check HTML encoded section links', async () => {
9 | let hasErrorLinks = false
10 | let currentFile = '' // Variable to store the current file name
11 | let results = [] // Array to store the results if json is true
12 |
13 | for await (const { file, result } of linkspector(
14 | './test/fixtures/markdown/decoded-sections/.decodedTest.yml',
15 | cmd
16 | )) {
17 | currentFile = file
18 | for (const linkStatusObj of result) {
19 | if (cmd.json) {
20 | results.push({
21 | file: currentFile,
22 | link: linkStatusObj.link,
23 | status_code: linkStatusObj.status_code,
24 | line_number: linkStatusObj.line_number,
25 | position: linkStatusObj.position,
26 | status: linkStatusObj.status,
27 | error_message: linkStatusObj.error_message,
28 | })
29 | }
30 | if (linkStatusObj.status === 'error') {
31 | hasErrorLinks = true
32 | }
33 | }
34 | }
35 |
36 | expect(hasErrorLinks).toBe(false)
37 | expect(results.length).toBe(4)
38 | expect(results[0].status).toBe('alive')
39 | expect(results[1].status).toBe('alive')
40 | expect(results[2].status).toBe('alive')
41 | expect(results[3].status).toBe('alive')
42 | })
43 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/duplicates/duplicate1.md:
--------------------------------------------------------------------------------
1 | This is a link to [Google](https://www.google.com).
2 |
3 | this is a link to [Yahoo](https://www.yahoo434234esdsadasd.com).
4 |
5 | This is a link to [Google](https://www.google.com) again.
6 |
7 | this is a link to [Yahoo](https://www.yahoo434234esdsadasd.com) again.
8 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/duplicates/duplicateTest.yml:
--------------------------------------------------------------------------------
1 | dirs:
2 | - ./test/fixtures/markdown/duplicates
3 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/duplicates/markdown-duplicates.test.js:
--------------------------------------------------------------------------------
1 | import { expect, test } from 'vitest'
2 | import { linkspector } from './linkspector.js'
3 |
4 | let cmd = {
5 | json: true,
6 | }
7 |
8 | test('linkspector should add back the removed duplicates when returning the results', async () => {
9 | let hasErrorLinks = false
10 | let currentFile = '' // Variable to store the current file name
11 | let results = [] // Array to store the results if json is true
12 |
13 | for await (const { file, result } of linkspector(
14 | './test/fixtures/markdown/duplicates/duplicateTest.yml',
15 | cmd
16 | )) {
17 | currentFile = file
18 | for (const linkStatusObj of result) {
19 | if (cmd.json) {
20 | results.push({
21 | file: currentFile,
22 | link: linkStatusObj.link,
23 | status_code: linkStatusObj.status_code,
24 | line_number: linkStatusObj.line_number,
25 | position: linkStatusObj.position,
26 | status: linkStatusObj.status,
27 | error_message: linkStatusObj.error_message,
28 | })
29 | }
30 | if (linkStatusObj.status === 'error') {
31 | hasErrorLinks = true
32 | }
33 | }
34 | }
35 |
36 | expect(hasErrorLinks).toBe(true)
37 | expect(results.length).toBe(4)
38 | expect(results[0].status).toBe('alive')
39 | expect(results[1].status).toBe('error')
40 | expect(results[2].status).toBe('alive')
41 | expect(results[3].status).toBe('error')
42 | })
43 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/headings/heading1.md:
--------------------------------------------------------------------------------
1 | # Headings test
2 |
3 | This file is a test for the headings.
4 |
5 | ## Heading with **bold text**
6 |
7 | Paragraph with **bold text**. Link to heading with bold text: [Heading with **bold text**](#heading-with-bold-text).
8 |
9 | ## Heading with emoji 🎉
10 |
11 | Paragraph with emoji 🎉. Link to heading with emoji: [Heading with emoji](#heading-with-emoji-)
12 |
13 | ## Heading with _italic text_
14 |
15 | Paragraph with _italic text_. Link to heading with italic text: [Heading with _italic text_](#heading-with-italic-text).
16 |
17 | ## Heading with `code`
18 |
19 | Paragraph with `code`. Link to heading with code: [Heading with `code`](#heading-with-code).
20 |
21 | ## Heading with [link](#headings-test)
22 |
23 | Paragraph with link to heading with link: [Heading with link](#heading-with-link).
24 |
25 | ## Heading with 
26 |
27 | Paragraph with link to heading with image: [Heading with image](#heading-with-image).
28 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/headings/headingsTest.yml:
--------------------------------------------------------------------------------
1 | dirs:
2 | - ./test/fixtures/markdown/headings
3 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/headings/markdown-headings.test.js:
--------------------------------------------------------------------------------
1 | import { expect, test } from 'vitest'
2 | import { linkspector } from './linkspector.js'
3 |
4 | let cmd = {
5 | json: true,
6 | }
7 |
8 | test('linkspector should check links to headings with inline bold, italic, code, link, and image elements', async () => {
9 | let hasErrorLinks = false
10 | let currentFile = '' // Variable to store the current file name
11 | let results = [] // Array to store the results if json is true
12 |
13 | for await (const { file, result } of linkspector(
14 | './test/fixtures/markdown/headings/headingsTest.yml',
15 | cmd
16 | )) {
17 | currentFile = file
18 | for (const linkStatusObj of result) {
19 | if (cmd.json) {
20 | results.push({
21 | file: currentFile,
22 | link: linkStatusObj.link,
23 | status_code: linkStatusObj.status_code,
24 | line_number: linkStatusObj.line_number,
25 | position: linkStatusObj.position,
26 | status: linkStatusObj.status,
27 | error_message: linkStatusObj.error_message,
28 | })
29 | }
30 | if (linkStatusObj.status === 'error') {
31 | hasErrorLinks = true
32 | }
33 | }
34 | }
35 |
36 | expect(hasErrorLinks).toBe(false)
37 | expect(results.length).toBe(8)
38 | })
39 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/image/image.md:
--------------------------------------------------------------------------------
1 | # Check image links
2 |
3 | Working link:
4 | 
5 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/image/imageTest.yml:
--------------------------------------------------------------------------------
1 | dirs:
2 | - ./test/fixtures/markdown/image
3 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/image/markdown-image.test.js:
--------------------------------------------------------------------------------
1 | import { expect, test } from 'vitest'
2 | import { linkspector } from './linkspector.js'
3 |
4 | let cmd = {
5 | json: true,
6 | }
7 |
8 | test('linkspector should check image links in Markdown file', async () => {
9 | let hasErrorLinks = false
10 | let currentFile = '' // Variable to store the current file name
11 | let results = [] // Array to store the results if json is true
12 |
13 | for await (const { file, result } of linkspector(
14 | './test/fixtures/markdown/image/imageTest.yml',
15 | cmd
16 | )) {
17 | currentFile = file
18 | for (const linkStatusObj of result) {
19 | if (cmd.json) {
20 | results.push({
21 | file: currentFile,
22 | link: linkStatusObj.link,
23 | status_code: linkStatusObj.status_code,
24 | line_number: linkStatusObj.line_number,
25 | position: linkStatusObj.position,
26 | status: linkStatusObj.status,
27 | error_message: linkStatusObj.error_message,
28 | })
29 | }
30 | if (linkStatusObj.status === 'error') {
31 | hasErrorLinks = true
32 | }
33 | }
34 | }
35 |
36 | expect(hasErrorLinks).toBe(false)
37 | expect(results.length).toBe(1)
38 | expect(results[0].link).toBe(
39 | 'https://commons.wikimedia.org/wiki/Main_Page#/media/File:Praia_do_Ribeiro_do_Cavalo2.jpg'
40 | )
41 | expect(results[0].status).toBe('alive')
42 | })
43 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/line-references/.lineReferencesTest.yml:
--------------------------------------------------------------------------------
1 | dirs:
2 | - ./test/fixtures/markdown/line-references
3 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/line-references/line-file.md:
--------------------------------------------------------------------------------
1 | # Target File for Line References
2 |
3 | This is a simple file with exactly 15 lines.
4 | Line 3 should be accessible via #L3
5 | Line 4 is here.
6 | Line 5 is the first line in a range.
7 | Line 6 is in the middle of a range.
8 | Line 7 is also in the middle.
9 | Line 8 is the last line in a range.
10 | Line 9 is after the range.
11 | Line 10 is getting closer to the end.
12 | Line 11 continues.
13 | Line 12 is almost there.
14 | Line 13 is near the end.
15 | Line 14 is the penultimate line.
16 | Line 15 is the last line.
17 |
18 | ## L454
19 |
20 | This is a section with a name that looks like a line reference but the generated id uses lowercase 'l'.
21 | Since we're using lowercase 'l', this won't be interpreted as a line reference pattern.
22 | This section will be found by the normal section lookup mechanism.
23 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/line-references/line-reference-test.md:
--------------------------------------------------------------------------------
1 | # Line Reference Test
2 |
3 | This Markdown file tests GitHub-style line references.
4 |
5 | ## Valid Line References
6 |
7 | - [Link to Line 3](line-file.md#L3) - This should work
8 | - [Link to Line Range 5-8](line-file.md#L5-L8) - This should work too
9 |
10 | ## Invalid Line References
11 |
12 | - [Link to Line 25](line-file.md#L25) - This should fail (file only has 15 lines)
13 | - [Link to Line Range 4-30](line-file.md#L4-L30) - This should fail (range exceeds file line count)
14 |
15 | ## Section with L-Prefix
16 |
17 | - [Link to L454 Section](line-file.md#l454) - This should work (L454 is a section name, not a line reference)
18 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/line-references/line-references.test.js:
--------------------------------------------------------------------------------
1 | import { expect, test } from 'vitest'
2 | import { linkspector } from './linkspector.js'
3 |
4 | let cmd = {
5 | json: true,
6 | }
7 |
8 | test('linkspector should correctly handle GitHub-style line reference links', async () => {
9 | let hasErrorLinks = false
10 | let currentFile = ''
11 | let results = []
12 |
13 | for await (const { file, result } of linkspector(
14 | './test/fixtures/markdown/line-references/.lineReferencesTest.yml',
15 | cmd
16 | )) {
17 | currentFile = file
18 | for (const linkStatusObj of result) {
19 | if (cmd.json) {
20 | results.push({
21 | file: currentFile,
22 | link: linkStatusObj.link,
23 | status_code: linkStatusObj.status_code,
24 | line_number: linkStatusObj.line_number,
25 | position: linkStatusObj.position,
26 | status: linkStatusObj.status,
27 | error_message: linkStatusObj.error_message,
28 | })
29 | }
30 | if (linkStatusObj.status === 'error') {
31 | hasErrorLinks = true
32 | }
33 | }
34 | }
35 |
36 | // There should be 5 links total (3 valid, 2 invalid)
37 | expect(results.length).toBe(5)
38 |
39 | // The first link should be valid (within file line count)
40 | expect(results[0].link).toBe('line-file.md#L3')
41 | expect(results[0].status).toBe('alive')
42 |
43 | // The second link should be valid (within range)
44 | expect(results[1].link).toBe('line-file.md#L5-L8')
45 | expect(results[1].status).toBe('alive')
46 |
47 | // The third link should be invalid (line number beyond file length)
48 | expect(results[2].link).toBe('line-file.md#L25')
49 | expect(results[2].status).toBe('error')
50 | expect(results[2].error_message).toContain('Cannot find Line 25')
51 |
52 | // The fourth link should be invalid (range beyond file length)
53 | expect(results[3].link).toBe('line-file.md#L4-L30')
54 | expect(results[3].status).toBe('error')
55 | expect(results[3].error_message).toContain('Cannot find Line 30')
56 |
57 | // The fifth link should be valid (points to a lowercase l454 section name)
58 | expect(results[4].link).toBe('line-file.md#l454')
59 | expect(results[4].status).toBe('alive')
60 |
61 | // Overall status should indicate errors
62 | expect(hasErrorLinks).toBe(true)
63 | })
64 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/relative/.relativeTest.yml:
--------------------------------------------------------------------------------
1 | dirs:
2 | - ./test/fixtures/markdown/relative
3 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/relative/markdown-relative.test.js:
--------------------------------------------------------------------------------
1 | import { expect, test } from 'vitest'
2 | import { linkspector } from './linkspector.js'
3 |
4 | let cmd = {
5 | json: true,
6 | }
7 |
8 | test('linkspector should check relative links in Markdown file', async () => {
9 | let hasErrorLinks = false
10 | let currentFile = '' // Variable to store the current file name
11 | let results = [] // Array to store the results if json is true
12 |
13 | for await (const { file, result } of linkspector(
14 | './test/fixtures/markdown/relative/.relativeTest.yml',
15 | cmd
16 | )) {
17 | currentFile = file
18 | for (const linkStatusObj of result) {
19 | if (cmd.json) {
20 | results.push({
21 | file: currentFile,
22 | link: linkStatusObj.link,
23 | status_code: linkStatusObj.status_code,
24 | line_number: linkStatusObj.line_number,
25 | position: linkStatusObj.position,
26 | status: linkStatusObj.status,
27 | error_message: linkStatusObj.error_message,
28 | })
29 | }
30 | if (linkStatusObj.status === 'error') {
31 | hasErrorLinks = true
32 | }
33 | }
34 | }
35 |
36 | expect(hasErrorLinks).toBe(true)
37 | expect(results.length).toBe(8)
38 | expect(results[0].status).toBe('alive')
39 | expect(results[1].status).toBe('alive')
40 | expect(results[2].status).toBe('alive')
41 | expect(results[3].status).toBe('alive')
42 | expect(results[4].status).toBe('alive')
43 | expect(results[5].status).toBe('alive')
44 | expect(results[6].status).toBe('error')
45 | expect(results[7].status).toBe('error')
46 | })
47 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/relative/relative1.md:
--------------------------------------------------------------------------------
1 | # Relative 1 Heading Level One
2 |
3 | This is a paragraph in the first file in a first level heading.
4 |
5 | [Link to Relative 1 Heading Level Two](#relative-1-heading-level-two)
6 |
7 | ## Relative 1 Heading Level Two
8 |
9 | This is a paragraph in the first file in a second level heading.
10 |
11 | [Link to Relative 2 Heading Level Two](relative2.md#custom-id-level-two)
12 |
13 | ### Relative 1 Heading Level Three
14 |
15 | This is a paragraph in the first file in a third level heading.
16 |
17 | [Link to Relative 2 Heading Level Three](relative2.md#custom-id-level-three)
18 |
19 | #### Relative 1 Heading Level Four
20 |
21 | This is a paragraph in the first file in a fourth level heading.
22 |
23 | [Link to Relative 3 Broken link](relative3.md#relative-3-heading-level-one)
24 |
25 | ##### Relative 1 Heading Level Five
26 |
27 | [Link to broken section](#broken-section)
28 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/relative/relative2.md:
--------------------------------------------------------------------------------
1 | # Relative 2 Heading Level One
2 |
3 | This is a paragraph in the first file in a first level heading.
4 |
5 | [Link to Relative 1 Heading Level One](relative1.md#relative-1-heading-level-one)
6 |
7 | ## Relative 2 Heading Level Two
8 |
9 | This is a paragraph in the first file in a second level heading.
10 |
11 | [Link to Relative 1 Heading Level Two](relative1.md#relative-1-heading-level-two)
12 |
13 | ### Relative 2 Heading Level Three {#custom-id-level-three}
14 |
15 | This is a paragraph in the first file in a third level heading.
16 |
17 | [Link to Relative 1 Heading Level Three](relative1.md#relative-1-heading-level-three)
18 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/with-html-anchors-id/.withHtmlAnchorsIdTest.yml:
--------------------------------------------------------------------------------
1 | dirs:
2 | - ./test/fixtures/markdown/with-html-anchors-id
3 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/with-html-anchors-id/html-anchor-id.md:
--------------------------------------------------------------------------------
1 | # This is heading 1
2 |
3 | This is a paragraph in the first file in a first level heading.
4 |
5 | Anchor with `id`
6 |
7 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla vel mauris sit amet ipsum venenatis placerat.
8 |
9 | Link to anchor with `id` [Link to custom id with id](#custom-id-with-id).
10 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/with-html-anchors-id/markdown-with-html-anchors-id.test.js:
--------------------------------------------------------------------------------
1 | import { expect, test } from 'vitest'
2 | import { linkspector } from './linkspector.js'
3 |
4 | let cmd = {
5 | json: true,
6 | }
7 |
8 | test('linkspector should check HTML encoded section links using ID attribute', async () => {
9 | let hasErrorLinks = false
10 | let currentFile = '' // Variable to store the current file name
11 | let results = [] // Array to store the results if json is true
12 |
13 | for await (const { file, result } of linkspector(
14 | './test/fixtures/markdown/with-html-anchors-id/.withHtmlAnchorsIdTest.yml',
15 | cmd
16 | )) {
17 | currentFile = file
18 | for (const linkStatusObj of result) {
19 | if (cmd.json) {
20 | results.push({
21 | file: currentFile,
22 | link: linkStatusObj.link,
23 | status_code: linkStatusObj.status_code,
24 | line_number: linkStatusObj.line_number,
25 | position: linkStatusObj.position,
26 | status: linkStatusObj.status,
27 | error_message: linkStatusObj.error_message,
28 | })
29 | }
30 | if (linkStatusObj.status === 'error') {
31 | hasErrorLinks = true
32 | }
33 | }
34 | }
35 |
36 | // Test expectations for link checks
37 | expect(hasErrorLinks).toBe(false)
38 | expect(results.length).toBe(1)
39 | expect(results[0].status).toBe('alive')
40 | })
41 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/with-html-anchors/.withHtmlAnchorsTest.yml:
--------------------------------------------------------------------------------
1 | dirs:
2 | - ./test/fixtures/markdown/with-html-anchors
3 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/with-html-anchors/html-anchor.md:
--------------------------------------------------------------------------------
1 | # This is heading 1
2 |
3 | This is a paragraph in the first file in a first level heading.
4 |
5 | Anchor with `a`
6 |
7 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla vel mauris sit amet ipsum venenatis placerat.
8 |
9 | Link to anchor with `a` [Link to custom id level one](#custom-id-level-one).
10 |
--------------------------------------------------------------------------------
/test/fixtures/markdown/with-html-anchors/markdown-with-html-anchors.test.js:
--------------------------------------------------------------------------------
1 | import { expect, test } from 'vitest'
2 | import { linkspector } from './linkspector.js'
3 |
4 | let cmd = {
5 | json: true,
6 | }
7 |
8 | test('linkspector should check HTML encoded section links and include anchor names', async () => {
9 | let hasErrorLinks = false
10 | let currentFile = '' // Variable to store the current file name
11 | let results = [] // Array to store the results if json is true
12 |
13 | for await (const { file, result } of linkspector(
14 | './test/fixtures/markdown/with-html-anchors/.withHtmlAnchorsTest.yml',
15 | cmd
16 | )) {
17 | currentFile = file
18 | for (const linkStatusObj of result) {
19 | if (cmd.json) {
20 | results.push({
21 | file: currentFile,
22 | link: linkStatusObj.link,
23 | status_code: linkStatusObj.status_code,
24 | line_number: linkStatusObj.line_number,
25 | position: linkStatusObj.position,
26 | status: linkStatusObj.status,
27 | error_message: linkStatusObj.error_message,
28 | })
29 | }
30 | if (linkStatusObj.status === 'error') {
31 | hasErrorLinks = true
32 | }
33 | }
34 | }
35 |
36 | // Test expectations for link checks
37 | expect(hasErrorLinks).toBe(false)
38 | expect(results.length).toBe(1)
39 | expect(results[0].status).toBe('alive')
40 | })
41 |
--------------------------------------------------------------------------------
/test/fixtures/patterns/patterns.md:
--------------------------------------------------------------------------------
1 | # Test Patterns
2 |
3 | ## Links that should be ignored
4 |
5 | - [Ignored Link 1](https://ignored-domain.example.com/page1)
6 | - [Ignored Link 2](https://ignored-domain.example.com/page2)
7 | - [Ignored Link 3](https://another-ignored.example.com/test)
8 |
9 | ## Links that should be replaced
10 |
11 | - [Replace Example 1](https://example.com/old/path1)
12 | - [Replace Example 2](https://example.com/old/path2)
13 | - [Replace Example 3](https://replace-domain.example.com/path3)
14 |
15 | ## Normal links that should be checked
16 |
17 | - [Google](https://www.google.com)
18 | - [GitHub](https://github.com)
19 |
--------------------------------------------------------------------------------
/test/fixtures/patterns/patterns.test.js:
--------------------------------------------------------------------------------
1 | import { expect, test } from 'vitest'
2 | import { linkspector } from './linkspector.js'
3 |
4 | let cmd = {
5 | json: true,
6 | }
7 |
8 | test('linkspector should correctly apply ignorePatterns and replacementPatterns', async () => {
9 | let currentFile = ''
10 | let results = []
11 |
12 | for await (const { file, result } of linkspector(
13 | './test/fixtures/patterns/patternsTest.yml',
14 | cmd
15 | )) {
16 | currentFile = file
17 | for (const linkStatusObj of result) {
18 | results.push({
19 | file: currentFile,
20 | link: linkStatusObj.link,
21 | status_code: linkStatusObj.status_code,
22 | line_number: linkStatusObj.line_number,
23 | position: linkStatusObj.position,
24 | status: linkStatusObj.status,
25 | error_message: linkStatusObj.error_message,
26 | })
27 | }
28 | }
29 |
30 | // Test expectations for pattern checks
31 |
32 | // 1. Check that ignored links are not in the results
33 | const ignoredLinks = [
34 | 'https://ignored-domain.example.com/page1',
35 | 'https://ignored-domain.example.com/page2',
36 | 'https://another-ignored.example.com/test',
37 | ]
38 |
39 | ignoredLinks.forEach((link) => {
40 | expect(results.find((r) => r.link === link)).toBeUndefined()
41 | })
42 |
43 | // 2. Check that replacement patterns were applied
44 | expect(
45 | results.find((r) => r.link === 'https://example.com/new/path1')
46 | ).toBeDefined()
47 | expect(
48 | results.find((r) => r.link === 'https://example.com/new/path2')
49 | ).toBeDefined()
50 | expect(
51 | results.find((r) => r.link === 'https://new-domain.example.com/path3')
52 | ).toBeDefined()
53 |
54 | // 3. Confirm original links (before replacement) are not in the results
55 | expect(
56 | results.find((r) => r.link === 'https://example.com/old/path1')
57 | ).toBeUndefined()
58 | expect(
59 | results.find((r) => r.link === 'https://example.com/old/path2')
60 | ).toBeUndefined()
61 | expect(
62 | results.find((r) => r.link === 'https://replace-domain.example.com/path3')
63 | ).toBeUndefined()
64 |
65 | // 4. Check that normal links are still being checked
66 | expect(results.find((r) => r.link === 'https://www.google.com')).toBeDefined()
67 | expect(results.find((r) => r.link === 'https://github.com')).toBeDefined()
68 |
69 | // Total number of links should be 5 (2 normal + 3 replaced)
70 | expect(results.length).toBe(5)
71 | }, 10000)
72 |
--------------------------------------------------------------------------------
/test/fixtures/patterns/patternsTest.yml:
--------------------------------------------------------------------------------
1 | dirs:
2 | - ./test/fixtures/patterns
3 | ignorePatterns:
4 | - pattern: '^https://ignored-domain.example.com/.*$'
5 | - pattern: '^https://another-ignored.example.com/.*$'
6 | replacementPatterns:
7 | - pattern: 'https://example.com/old/(.*)'
8 | replacement: 'https://example.com/new/$1'
9 | - pattern: 'https://replace-domain.example.com/(.*)'
10 | replacement: 'https://new-domain.example.com/$1'
11 |
--------------------------------------------------------------------------------
/test/fixtures/redirects/config-redirects-false.yml:
--------------------------------------------------------------------------------
1 | files:
2 | - test/fixtures/redirects/redirects.md
3 | followRedirects: false
4 |
--------------------------------------------------------------------------------
/test/fixtures/redirects/config-redirects-true.yml:
--------------------------------------------------------------------------------
1 | files:
2 | - test/fixtures/redirects/redirects.md
3 | # followRedirects is true by default, so we can omit it or set it explicitly.
4 | # For clarity in testing, we can omit it to test the default behavior.
5 |
--------------------------------------------------------------------------------
/test/fixtures/redirects/redirects.md:
--------------------------------------------------------------------------------
1 | # Redirect Test Links
2 |
3 | This file contains links for testing the followRedirects feature.
4 |
5 | ## Scenario 1 & 2: Redirecting Link
6 |
7 | [Permanent Redirect (301)](http://localhost:3000/redirect-permanent)
8 | [Temporary Redirect (302)](http://localhost:3000/redirect-temporary)
9 |
10 | ## Scenario 3 & 4: Non-Redirecting Link
11 |
12 | [Direct OK (200)](http://localhost:3000/ok)
13 |
14 | ## Scenario 5: Error Link
15 |
16 | [Not Found (404)](http://localhost:3000/not-found)
17 |
18 | ## Scenario: Redirect to external allowed
19 |
20 | [External Redirect (301)](http://localhost:3000/redirect-external)
21 | [Final External Destination](https://example.com)
22 |
23 | ## Scenario: Redirect loop
24 |
25 | [Redirect Loop](http://localhost:3000/redirect-loop1)
26 |
--------------------------------------------------------------------------------
/test/fixtures/redirects/redirects.test.js:
--------------------------------------------------------------------------------
1 | import { describe, it, expect, beforeAll, afterAll } from 'vitest'
2 | import { linkspector } from '../../../linkspector.js' // Import from root linkspector.js
3 | import path from 'path'
4 | import http from 'http'
5 | import { fileURLToPath } from 'url'
6 |
7 | const __filename = fileURLToPath(import.meta.url)
8 | const __dirname = path.dirname(__filename)
9 |
10 | const fixturesDir = path.join(__dirname)
11 | // const markdownFile = path.join(fixturesDir, 'redirects.md') // No longer directly passed
12 | const configFileFollowFalse = path.join(
13 | fixturesDir,
14 | 'config-redirects-false.yml'
15 | )
16 | const configFileFollowTrue = path.join(fixturesDir, 'config-redirects-true.yml')
17 |
18 | let server
19 |
20 | const PORT = 3000
21 | const HOST = 'localhost'
22 |
23 | const serverHandler = (req, res) => {
24 | if (req.url === '/redirect-permanent') {
25 | res.writeHead(301, { Location: `http://${HOST}:${PORT}/final-destination` })
26 | res.end()
27 | } else if (req.url === '/redirect-temporary') {
28 | res.writeHead(302, { Location: `http://${HOST}:${PORT}/final-destination` })
29 | res.end()
30 | } else if (req.url === '/final-destination') {
31 | res.writeHead(200, { 'Content-Type': 'text/plain' })
32 | res.end('Final Destination Reached')
33 | } else if (req.url === '/ok') {
34 | res.writeHead(200, { 'Content-Type': 'text/plain' })
35 | res.end('OK')
36 | } else if (req.url === '/redirect-external') {
37 | res.writeHead(301, { Location: 'https://example.com' })
38 | res.end()
39 | } else if (req.url === '/redirect-loop1') {
40 | res.writeHead(302, { Location: `http://${HOST}:${PORT}/redirect-loop2` })
41 | res.end()
42 | } else if (req.url === '/redirect-loop2') {
43 | res.writeHead(302, { Location: `http://${HOST}:${PORT}/redirect-loop1` })
44 | res.end()
45 | } else {
46 | res.writeHead(404, { 'Content-Type': 'text/plain' })
47 | res.end('Not Found')
48 | }
49 | }
50 |
51 | describe('followRedirects feature', () => {
52 | beforeAll(async () => {
53 | server = http.createServer(serverHandler)
54 | await new Promise((resolve) => server.listen(PORT, HOST, resolve))
55 | })
56 |
57 | afterAll(async () => {
58 | await new Promise((resolve) => server.close(resolve))
59 | })
60 |
61 | // Scenario 1: followRedirects: true (default) - Permanent Redirect (301)
62 | it('should report a permanent redirecting link as alive (200) when followRedirects is true (default)', async () => {
63 | const resultsAsync = linkspector(configFileFollowTrue, {})
64 | const collectedResults = []
65 | for await (const item of resultsAsync) {
66 | collectedResults.push(...item.result)
67 | }
68 | const redirectLink = collectedResults.find(
69 | (r) => r.link === `http://${HOST}:${PORT}/redirect-permanent`
70 | )
71 | expect(redirectLink.status).toBe('alive')
72 | expect(redirectLink.status_code).toBe(200) // Final destination
73 | expect(redirectLink.error_message).toContain('redirected to')
74 | }, 10000) // Increased timeout to 10 seconds
75 |
76 | // Scenario 1 (bis): followRedirects: true (default) - Temporary Redirect (302)
77 | it('should report a temporary redirecting link as alive (200) when followRedirects is true (default)', async () => {
78 | const resultsAsync = linkspector(configFileFollowTrue, {})
79 | const collectedResults = []
80 | for await (const item of resultsAsync) {
81 | collectedResults.push(...item.result)
82 | }
83 | const redirectLink = collectedResults.find(
84 | (r) => r.link === `http://${HOST}:${PORT}/redirect-temporary`
85 | )
86 | expect(redirectLink.status).toBe('alive')
87 | expect(redirectLink.status_code).toBe(200) // Final destination
88 | expect(redirectLink.error_message).toContain('redirected to')
89 | }, 10000) // Increased timeout
90 |
91 | // Scenario 2: followRedirects: false - Permanent Redirect (301)
92 | it('should report a permanent redirecting link as error (301) when followRedirects is false', async () => {
93 | const resultsAsync = linkspector(configFileFollowFalse, {})
94 | const collectedResults = []
95 | for await (const item of resultsAsync) {
96 | collectedResults.push(...item.result)
97 | }
98 | const redirectLink = collectedResults.find(
99 | (r) => r.link === `http://${HOST}:${PORT}/redirect-permanent`
100 | )
101 | expect(redirectLink.status).toBe('error')
102 | expect(redirectLink.status_code).toBe(301)
103 | expect(redirectLink.error_message).toMatch(
104 | /redirected.*followRedirects is set to false/i
105 | )
106 | })
107 |
108 | // Scenario 2 (bis): followRedirects: false - Temporary Redirect (302)
109 | it('should report a temporary redirecting link as error (302) when followRedirects is false', async () => {
110 | const resultsAsync = linkspector(configFileFollowFalse, {})
111 | const collectedResults = []
112 | for await (const item of resultsAsync) {
113 | collectedResults.push(...item.result)
114 | }
115 | const redirectLink = collectedResults.find(
116 | (r) => r.link === `http://${HOST}:${PORT}/redirect-temporary`
117 | )
118 | expect(redirectLink.status).toBe('error')
119 | expect(redirectLink.status_code).toBe(302)
120 | expect(redirectLink.error_message).toMatch(
121 | /redirected.*followRedirects is set to false/i
122 | )
123 | })
124 |
125 | // Scenario 3: Non-redirecting link with followRedirects: false
126 | it('should report a non-redirecting link as alive (200) when followRedirects is false', async () => {
127 | const resultsAsync = linkspector(configFileFollowFalse, {})
128 | const collectedResults = []
129 | for await (const item of resultsAsync) {
130 | collectedResults.push(...item.result)
131 | }
132 | const okLink = collectedResults.find(
133 | (r) => r.link === `http://${HOST}:${PORT}/ok`
134 | )
135 | expect(okLink.status).toBe('alive')
136 | expect(okLink.status_code).toBe(200)
137 | })
138 |
139 | // Scenario 4: Non-redirecting link with followRedirects: true (default)
140 | it('should report a non-redirecting link as alive (200) when followRedirects is true (default)', async () => {
141 | const resultsAsync = linkspector(configFileFollowTrue, {})
142 | const collectedResults = []
143 | for await (const item of resultsAsync) {
144 | collectedResults.push(...item.result)
145 | }
146 | const okLink = collectedResults.find(
147 | (r) => r.link === `http://${HOST}:${PORT}/ok`
148 | )
149 | expect(okLink.status).toBe('alive')
150 | expect(okLink.status_code).toBe(200)
151 | })
152 |
153 | // Scenario 5: Link that results in an actual error (404) with followRedirects: false
154 | it('should report a 404 link as error (404) when followRedirects is false', async () => {
155 | const resultsAsync = linkspector(configFileFollowFalse, {})
156 | const collectedResults = []
157 | for await (const item of resultsAsync) {
158 | collectedResults.push(...item.result)
159 | }
160 | const notFoundLink = collectedResults.find(
161 | (r) => r.link === `http://${HOST}:${PORT}/not-found`
162 | )
163 | expect(notFoundLink.status).toBe('error')
164 | expect(notFoundLink.status_code).toBe(404)
165 | })
166 |
167 | // Scenario: Link that results in an actual error (404) with followRedirects: true (default)
168 | it('should report a 404 link as error (404) when followRedirects is true (default)', async () => {
169 | const resultsAsync = linkspector(configFileFollowTrue, {})
170 | const collectedResults = []
171 | for await (const item of resultsAsync) {
172 | collectedResults.push(...item.result)
173 | }
174 | const notFoundLink = collectedResults.find(
175 | (r) => r.link === `http://${HOST}:${PORT}/not-found`
176 | )
177 | expect(notFoundLink.status).toBe('error')
178 | expect(notFoundLink.status_code).toBe(404)
179 | })
180 |
181 | // Scenario: External redirect allowed when followRedirects is true
182 | it('should report an external redirecting link as alive (200 from example.com) when followRedirects is true', async () => {
183 | const resultsAsync = linkspector(configFileFollowTrue, {})
184 | const collectedResults = []
185 | for await (const item of resultsAsync) {
186 | collectedResults.push(...item.result)
187 | }
188 | const externalRedirectLink = collectedResults.find(
189 | (r) => r.link === `http://${HOST}:${PORT}/redirect-external`
190 | )
191 | expect(externalRedirectLink.status).toBe('alive')
192 | // Note: status code might be from the final destination (example.com) if HEAD request works,
193 | // or could be tricky if example.com blocks HEAD. Puppeteer fallback should handle it.
194 | // For now, checking for 'alive' is the primary goal.
195 | // expect(externalRedirectLink.status_code).toBe(200) // This can be flaky with external sites
196 | expect(externalRedirectLink.error_message).toContain(
197 | 'redirected to https://example.com'
198 | )
199 | }, 10000) // Increased timeout
200 |
201 | // Scenario: External redirect disallowed when followRedirects is false
202 | it('should report an external redirecting link as error (301) when followRedirects is false', async () => {
203 | const resultsAsync = linkspector(configFileFollowFalse, {})
204 | const collectedResults = []
205 | for await (const item of resultsAsync) {
206 | collectedResults.push(...item.result)
207 | }
208 | const externalRedirectLink = collectedResults.find(
209 | (r) => r.link === `http://${HOST}:${PORT}/redirect-external`
210 | )
211 | expect(externalRedirectLink.status).toBe('error')
212 | expect(externalRedirectLink.status_code).toBe(301)
213 | expect(externalRedirectLink.error_message).toMatch(
214 | /redirected to https:\/\/example.com, but followRedirects is set to false/i
215 | )
216 | })
217 |
218 | // Scenario: Redirect loop when followRedirects is true (Puppeteer should eventually error out)
219 | it('should report a redirect loop as error when followRedirects is true', async () => {
220 | // This test might take a bit longer due to Puppeteer's retries for loops or timeouts
221 | const resultsAsync = linkspector(configFileFollowTrue, {
222 | aliveStatusCodes: [200],
223 | }) // Ensure only 200 is "assumed alive"
224 | const collectedResults = []
225 | for await (const item of resultsAsync) {
226 | collectedResults.push(...item.result)
227 | }
228 | const loopLink = collectedResults.find(
229 | (r) => r.link === `http://${HOST}:${PORT}/redirect-loop1`
230 | )
231 | expect(loopLink.status).toBe('error')
232 | // The error message might vary depending on how Puppeteer handles max redirects
233 | // e.g., "net::ERR_TOO_MANY_REDIRECTS" or similar
234 | expect(loopLink.error_message).toBeDefined()
235 | }, 20000) // Timeout already increased, keeping it
236 |
237 | // Scenario: Redirect loop when followRedirects is false
238 | it('should report a redirect loop as error (first redirect status) when followRedirects is false', async () => {
239 | const resultsAsync = linkspector(configFileFollowFalse, {})
240 | const collectedResults = []
241 | for await (const item of resultsAsync) {
242 | collectedResults.push(...item.result)
243 | }
244 | const loopLink = collectedResults.find(
245 | (r) => r.link === `http://${HOST}:${PORT}/redirect-loop1`
246 | )
247 | expect(loopLink.status).toBe('error')
248 | expect(loopLink.status_code).toBe(302) // The first redirect in the loop
249 | expect(loopLink.error_message).toMatch(
250 | /redirected.*followRedirects is set to false/i
251 | )
252 | })
253 | })
254 |
--------------------------------------------------------------------------------
/vite.config.ts:
--------------------------------------------------------------------------------
1 | // vitest.config.js
2 | import { defineConfig } from 'vitest/config'
3 |
4 | export default defineConfig({
5 | test: {
6 | // the global timeout in milliseconds 10 seconds
7 | testTimeout: 10000,
8 | },
9 | })
10 |
--------------------------------------------------------------------------------