├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── pull_request_template.md └── workflows │ ├── npm-publish.yml │ ├── npm-test.yml │ └── prettier.yml ├── .gitignore ├── .linkspector.test.yml ├── .prettierrc.json ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── DEV_SETUP.md ├── Dockerfile ├── LICENSE ├── README.md ├── SECURITY.md ├── index.js ├── index.test.js ├── lib ├── batch-check-links.js ├── check-file-links.js ├── extract-asciidoc-links.js ├── extract-markdown-hyperlinks.js ├── get-unique-links.js ├── handle-links-modification.js ├── prepare-file-list.js ├── update-linkstatus-obj.js └── validate-config.js ├── linkspector.js ├── package-lock.json ├── package.json ├── scripts └── apparmorfix.sh ├── test └── fixtures │ ├── asciidoc │ └── hyperlinks │ │ ├── asciidoc-hyperlinks.test.js │ │ ├── hyperlinksTest.yml │ │ └── testhyperlinks1.adoc │ ├── headers.test.js │ ├── markdown │ ├── decoded-sections │ │ ├── .decodedTest.yml │ │ ├── decoded.md │ │ ├── decoded2.md │ │ └── markdown-decoded-sections.test.js │ ├── duplicates │ │ ├── duplicate1.md │ │ ├── duplicateTest.yml │ │ └── markdown-duplicates.test.js │ ├── headings │ │ ├── heading1.md │ │ ├── headingsTest.yml │ │ └── markdown-headings.test.js │ ├── image │ │ ├── image.md │ │ ├── imageTest.yml │ │ └── markdown-image.test.js │ ├── line-references │ │ ├── .lineReferencesTest.yml │ │ ├── line-file.md │ │ ├── line-reference-test.md │ │ └── line-references.test.js │ ├── relative │ │ ├── .relativeTest.yml │ │ ├── markdown-relative.test.js │ │ ├── relative1.md │ │ └── relative2.md │ ├── with-html-anchors-id │ │ ├── .withHtmlAnchorsIdTest.yml │ │ ├── html-anchor-id.md │ │ └── markdown-with-html-anchors-id.test.js │ └── with-html-anchors │ │ ├── .withHtmlAnchorsTest.yml │ │ ├── html-anchor.md │ │ └── markdown-with-html-anchors.test.js │ ├── patterns │ ├── patterns.md │ ├── patterns.test.js │ └── patternsTest.yml │ └── redirects │ ├── config-redirects-false.yml │ ├── config-redirects-true.yml │ ├── redirects.md │ └── redirects.test.js └── vite.config.ts /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | liberapay: gaurav-nelson 2 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '[BUG] ' 5 | labels: bug 6 | assignees: '' 7 | --- 8 | 9 | **Describe the bug** 10 | A clear and concise description of what the bug is. 11 | 12 | **To Reproduce** 13 | Steps to reproduce the behavior: 14 | 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Additional context** 27 | Add any other context about the problem here. 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '[FEATURE REQ] ' 5 | labels: enhancement 6 | assignees: '' 7 | --- 8 | 9 | **Is your feature request related to a problem? Please describe.** 10 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 11 | 12 | **Describe the solution you'd like** 13 | A clear and concise description of what you want to happen. 14 | 15 | **Describe alternatives you've considered** 16 | A clear and concise description of any alternative solutions or features you've considered. 17 | 18 | **Additional context** 19 | Add any other context or screenshots about the feature request here. 20 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | Please include a summary of the change and which issue is fixed. Also include relevant motivation and context. List any dependencies that are required for this change. Delete any other sections or text that are not relevant. 4 | 5 | Fixes # (issue number) 6 | 7 | ## Type of Change 8 | 9 | Please delete options that are not relevant. 10 | 11 | - [ ] Bug fix (non-breaking change which fixes an issue) 12 | - [ ] New feature (non-breaking change which adds functionality) 13 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) 14 | - [ ] This change requires a documentation update 15 | 16 | ## Checklist: 17 | 18 | - [ ] I have performed a self-review of my own code 19 | - [ ] I have commented my code, particularly in hard-to-understand areas 20 | - [ ] I have made corresponding changes to the documentation 21 | - [ ] My changes generate no new warnings 22 | - [ ] I have added tests that prove my fix is effective or that my feature works 23 | - [ ] New and existing unit tests pass locally with my changes 24 | 25 | ## Additional Information 26 | 27 | Include any additional information about the pull request here. 28 | -------------------------------------------------------------------------------- /.github/workflows/npm-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow publishs the NPM package when a tag is pushed to the repository. 2 | 3 | name: Publish on NPM 4 | 5 | on: 6 | push: 7 | tags: 8 | - '*' 9 | 10 | jobs: 11 | publish-npm: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v4 15 | - uses: actions/setup-node@v4 16 | with: 17 | node-version: 20 18 | registry-url: https://registry.npmjs.org/ 19 | - run: npm ci 20 | - run: npm publish --access public 21 | env: 22 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 23 | -------------------------------------------------------------------------------- /.github/workflows/npm-test.yml: -------------------------------------------------------------------------------- 1 | name: Run tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | test: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: actions/setup-node@v4 17 | with: 18 | node-version: '20' 19 | - run: scripts/apparmorfix.sh 20 | -------------------------------------------------------------------------------- /.github/workflows/prettier.yml: -------------------------------------------------------------------------------- 1 | name: Run prettier check 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | prettier: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | - uses: actions/setup-node@v4 17 | with: 18 | node-version: '20' 19 | - run: npm ci 20 | - run: npx prettier --check . 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Diagnostic reports (https://nodejs.org/api/report.html) 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 12 | 13 | # Runtime data 14 | pids 15 | *.pid 16 | *.seed 17 | *.pid.lock 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | 41 | # Dependency directories 42 | node_modules/ 43 | jspm_packages/ 44 | 45 | # Snowpack dependency directory (https://snowpack.dev/) 46 | web_modules/ 47 | 48 | # TypeScript cache 49 | *.tsbuildinfo 50 | 51 | # Optional npm cache directory 52 | .npm 53 | 54 | # Optional eslint cache 55 | .eslintcache 56 | 57 | # Optional stylelint cache 58 | .stylelintcache 59 | 60 | # Microbundle cache 61 | .rpt2_cache/ 62 | .rts2_cache_cjs/ 63 | .rts2_cache_es/ 64 | .rts2_cache_umd/ 65 | 66 | # Optional REPL history 67 | .node_repl_history 68 | 69 | # Output of 'npm pack' 70 | *.tgz 71 | 72 | # Yarn Integrity file 73 | .yarn-integrity 74 | 75 | # dotenv environment variable files 76 | .env 77 | .env.development.local 78 | .env.test.local 79 | .env.production.local 80 | .env.local 81 | 82 | # parcel-bundler cache (https://parceljs.org/) 83 | .cache 84 | .parcel-cache 85 | 86 | # Next.js build output 87 | .next 88 | out 89 | 90 | # Nuxt.js build / generate output 91 | .nuxt 92 | dist 93 | 94 | # Gatsby files 95 | .cache/ 96 | # Comment in the public line in if your project uses Gatsby and not Next.js 97 | # https://nextjs.org/blog/next-9-1#public-directory-support 98 | # public 99 | 100 | # vuepress build output 101 | .vuepress/dist 102 | 103 | # vuepress v2.x temp and cache directory 104 | .temp 105 | .cache 106 | 107 | # Docusaurus cache and generated files 108 | .docusaurus 109 | 110 | # Serverless directories 111 | .serverless/ 112 | 113 | # FuseBox cache 114 | .fusebox/ 115 | 116 | # DynamoDB Local files 117 | .dynamodb/ 118 | 119 | # TernJS port file 120 | .tern-port 121 | 122 | # Stores VSCode versions used for testing VSCode extensions 123 | .vscode-test 124 | 125 | # yarn v2 126 | .yarn/cache 127 | .yarn/unplugged 128 | .yarn/build-state.yml 129 | .yarn/install-state.gz 130 | .pnp.* 131 | 132 | # Linkspector 133 | .linkspector.yml 134 | -------------------------------------------------------------------------------- /.linkspector.test.yml: -------------------------------------------------------------------------------- 1 | files: 2 | - README.md 3 | useGitIgnore: true 4 | -------------------------------------------------------------------------------- /.prettierrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "trailingComma": "es5", 3 | "tabWidth": 2, 4 | "semi": false, 5 | "singleQuote": true 6 | } 7 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of conduct 2 | 3 | - We are committed to providing a friendly, safe and welcoming environment for all, regardless of level of experience, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion, nationality, or other similar characteristic. 4 | - Please avoid using overtly sexual aliases or other nicknames that might detract from a friendly, safe and welcoming environment for all. 5 | - Please be kind and courteous. There’s no need to be mean or rude. 6 | - Respect that people have differences of opinion and that every design or implementation choice carries a trade-off and numerous costs. There is seldom a right answer. 7 | - Please keep unstructured critique to a minimum. If you have solid ideas you want to experiment with, make a fork and see how it works. 8 | - We will exclude you from interaction if you insult, demean or harass anyone. That is not welcome behavior. We interpret the term “harassment” as including the definition in the [Citizen Code of Conduct](https://github.com/stumpsyn/policies/blob/master/citizen_code_of_conduct.md); if you have any lack of clarity about what might be included in that concept, please read their definition. In particular, we don’t tolerate behavior that excludes people in socially marginalized groups. 9 | - Private harassment is also unacceptable. No matter who you are, if you feel you have been or are being harassed or made uncomfortable by a community member, please contact one of the channel ops or an employee of Oven immediately. Whether you’re a regular contributor or a newcomer, we care about making this community a safe place for you and we’ve got your back. 10 | - Likewise any spamming, trolling, flaming, baiting or other attention-stealing behavior is not welcome. 11 | 12 | This code of conduct is adapted from the [Rust Code of Conduct](https://www.rust-lang.org/policies/code-of-conduct). 13 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for considering contributing to Linkspector! We welcome contributions from the community to help improve and grow this tool. Please take a moment to review these guidelines to ensure a smooth and collaborative contribution process. 4 | 5 | ## Table of Contents 6 | 7 | 1. [Getting Started](#getting-started) 8 | 2. [Code of Conduct](#code-of-conduct) 9 | 3. [How to Contribute](#how-to-contribute) 10 | - [Reporting Issues](#reporting-issues) 11 | - [Submitting Pull Requests](#submitting-pull-requests) 12 | 4. [Development Setup](#development-setup) 13 | 5. [Testing](#testing) 14 | 6. [Commit Messages](#commit-messages) 15 | 7. [License](#license) 16 | 17 | ## Getting Started 18 | 19 | Before you start contributing, please make sure you have: 20 | 21 | - Familiarized yourself with the project's goals and objectives. 22 | - Read and understood the project's license (See [License](#license)). 23 | - Set up a GitHub account if you don't already have one. 24 | 25 | ## Code of Conduct 26 | 27 | Please review and adhere to our [Code of Conduct](CODE_OF_CONDUCT.md) to ensure a respectful and inclusive environment for all contributors and users. 28 | 29 | ## How to Contribute 30 | 31 | ### Reporting Issues 32 | 33 | If you encounter a bug, have a feature request, or have any questions about the project, please open an issue on the [GitHub Issues](https://github.com/UmbrellaDocs/linkspector/issues) page. Be sure to provide as much detail as possible, including the steps to reproduce the issue and any relevant error messages or screenshots. 34 | 35 | ### Submitting Pull Requests 36 | 37 | We welcome contributions in the form of pull requests (PRs). To submit a PR, follow these steps: 38 | 39 | 1. Fork the repository to your own GitHub account. 40 | 2. Create a new branch from the `main` branch for your changes. 41 | 3. Make your changes and commit them with clear and concise messages (see [Commit Messages](#commit-messages)). 42 | 4. Push your branch to your forked repository. 43 | 5. Create a pull request from your branch to the `main` branch of this repository. 44 | 45 | Our maintainers will review your PR as soon as possible and provide feedback if needed. Once your PR is approved, it will be merged into the project. 46 | 47 | ## Development Setup 48 | 49 | To set up a development environment, follow the instructions in the [Development Setup](DEV_SETUP.md) document. This will guide you through the process of installing dependencies and configuring your development environment. 50 | 51 | ## Testing 52 | 53 | Before submitting a pull request, make sure to run the test suite to ensure that your changes do not introduce any regressions. To run the tests, use the following command: 54 | 55 | ```bash 56 | npm test 57 | ``` 58 | 59 | ## Commit Messages 60 | 61 | Follow these guidelines for commit messages: 62 | 63 | - Use clear and concise messages that explain the purpose of the commit. 64 | - Start the message with a verb in the present tense (e.g., "Add feature," "Fix bug"). 65 | - Reference related issues or pull requests by including their numbers (e.g., "Fixes #123," "Closes #456"). 66 | 67 | ## License 68 | 69 | By contributing to this project, you agree that your contributions will be licensed under the project's [LICENSE](LICENSE). 70 | -------------------------------------------------------------------------------- /DEV_SETUP.md: -------------------------------------------------------------------------------- 1 | # Development Setup 2 | 3 | To contribute to this project, you'll need to set up your development environment. Please follow the instructions below to ensure you have the necessary tools and dependencies installed. 4 | 5 | ## Prerequisites 6 | 7 | Before you begin, make sure you have the following prerequisites installed on your system: 8 | 9 | - **Node.js**: This project requires Node.js, a JavaScript runtime, to build and run. You can download and install Node.js from the official website: [Node.js Download](https://nodejs.org/). 10 | 11 | To check if Node.js is installed, open your terminal and run: 12 | 13 | ```bash 14 | node -v 15 | ``` 16 | 17 | You should see the installed Node.js version. 18 | 19 | ## Installation 20 | 21 | After ensuring you have Node.js installed, follow these steps to set up your development environment: 22 | 23 | 1. **Clone the Repository**: Fork and clone this repository to your local machine: 24 | 25 | ```bash 26 | git clone git@github.com:UmbrellaDocs/linkspector.git 27 | ``` 28 | 29 | 2. **Change Directory**: Navigate to the project directory: 30 | 31 | ```bash 32 | cd linkspector 33 | ``` 34 | 35 | 3. **Install Dependencies**: Use `npm` to install project dependencies: 36 | 37 | ```bash 38 | npm install 39 | ``` 40 | 41 | This command will download and install all the required packages specified in the `package.json` file. 42 | 43 | ## Contributing 44 | 45 | You are now set up to contribute to the project! Follow the [Contributing Guidelines](CONTRIBUTING.md) for information on reporting issues, submitting pull requests, and more. 46 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:lts-bookworm-slim 2 | 3 | # The base name of the npm package 4 | ARG LINKSPECTOR_NAME=@umbrelladocs/linkspector 5 | # Use the argument below to select version to install, e.g.: 6 | # docker build --build-arg LINKSPECTOR_VERSION=0.2.7 -t umbrelladocs/linkspector . 7 | ARG LINKSPECTOR_VERSION=latest 8 | # Use the argument below the specify full package name to install, 9 | # empty value installs current directory, e.g.: 10 | # docker build --build-arg LINKSPECTOR_PACKAGE= -t umbrelladocs/linkspector . 11 | ARG LINKSPECTOR_PACKAGE=${LINKSPECTOR_NAME}@${LINKSPECTOR_VERSION} 12 | 13 | # Set default user 14 | ENV USER=node 15 | 16 | # Set installation location for node packages 17 | ENV NPM_GLOBAL=/home/${USER}/.npm-global 18 | ENV PATH=${NPM_GLOBAL}/bin:$PATH 19 | 20 | # Install chromium instead of puppeteer chrome 21 | # as puppeteer does not provide arm64 22 | # https://github.com/puppeteer/puppeteer/issues/7740 23 | ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true 24 | ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium.wrapper 25 | 26 | # Install linkspector dependencies 27 | RUN apt-get update \ 28 | && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ 29 | bash \ 30 | ca-certificates \ 31 | chromium \ 32 | curl \ 33 | git \ 34 | upower \ 35 | && apt-get clean && rm -rf /var/lib/apt/lists/* 36 | 37 | # Create app directory for mounting host files 38 | RUN mkdir /app && chown ${USER}:${USER} /app 39 | 40 | # chromium in order to start either needs dbus https://github.com/puppeteer/puppeteer/issues/11028 41 | # or skip dbus by using --remote-debugging-port=0 (any free port) https://github.com/nodejs/help/issues/3220#issuecomment-1228342313 42 | # Additionally, allow chromium to start without elevated capabilities needed to start the sandbox 43 | # See https://github.com/puppeteer/puppeteer/issues/5505 44 | RUN echo /usr/bin/chromium \ 45 | --no-sandbox \ 46 | --headless=new \ 47 | --disable-gpu \ 48 | --enable-chrome-browser-cloud-management \ 49 | --remote-debugging-port=0 \ 50 | > /usr/bin/chromium.wrapper 51 | RUN chmod ugo+x /usr/bin/chromium.wrapper 52 | 53 | # Install linkspector as node user 54 | USER ${USER} 55 | WORKDIR /home/${USER} 56 | RUN npm config set prefix ${NPM_GLOBAL} 57 | COPY --chown=${USER}:${USER} lib lib 58 | COPY --chown=${USER}:${USER} *.js *.json test ./ 59 | # npm ci does not support --global 60 | # https://github.com/npm/cli/issues/7224 61 | RUN if test -z ${LINKSPECTOR_PACKAGE}; then npm ci; fi && npm install --global ${LINKSPECTOR_PACKAGE} 62 | 63 | WORKDIR /app 64 | 65 | # Run sanity checks 66 | RUN npm list --global 67 | RUN linkspector --version 68 | RUN linkspector check 69 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![GitHub Marketplace](https://img.shields.io/badge/GitHub%20Marketplace-action%20linkspector-brightgreen?style=for-the-badge)](https://github.com/marketplace/actions/run-linkspector-with-reviewdog) 2 | [![NPM](https://img.shields.io/npm/v/@umbrelladocs/linkspector?style=for-the-badge)](https://www.npmjs.com/package/@umbrelladocs/linkspector) 3 | Donate using Liberapay 4 | 5 |

6 | Logo 7 |

8 |

Uncover broken links in your content.

9 |

Linkspector

10 | 11 | Linkspector is a CLI app that checks for dead hyperlinks in files. 12 | It supports multiple markup languages such as Markdown, AsciiDoc (limited - hyperlinks only), and ReStructured Text (coming soon). 13 | 14 | With Linkspector, you can easily check all hyperlinks in your files, ensuring that they are not broken and that your readers can access all the relevant content. 15 | The app allows you to quickly and easily identify any broken links, so you can fix them before publishing your content. 16 | 17 | Linkspector is a powerful tool for anyone who creates content using markup languages. 18 | 19 | ## How this is different from existing tools? 20 | 21 | 1. **Enhanced Link Checking with Puppeteer**: It uses [Puppeteer](https://pptr.dev/) to check links in Chrome's headless mode, reducing the number of false positives. 22 | 2. **Addresses limitations and adds user-requested features**: It is built to adress the shortcomings in [GitHub Action - Markdown link check](https://github.com/gaurav-nelson/github-action-markdown-link-check) and adds many user requested features. 23 | 3. **Single repository for seamless collaboration**: All the code it needs to run is in a single repository, making it easier for community to collaborate. 24 | 4. **Focused for CI/CD use**: Linkspector ([action-linkspector](https://github.com/UmbrellaDocs/action-linkspector)) is purposefully tailored to run into your CI/CD pipelines. This ensures that link checking becomes an integral part of your development workflow. 25 | 26 | ## Installation 27 | 28 | Before you can use Linkspector, you need to install it. You can do this using the following command: 29 | 30 | ```bash 31 | npm install -g @umbrelladocs/linkspector 32 | ``` 33 | 34 | This command installs Linkspector globally, allowing you to use it from anywhere in your terminal. If you don't want to install using `npm` you can download the binary from GitHub releases. 35 | 36 | ### GitHub action 37 | 38 | For more details, see [action-linkspector](https://github.com/UmbrellaDocs/action-linkspector) 39 | 40 | ## Checking Hyperlinks 41 | 42 | To check hyperlinks in your markup language files, follow these steps: 43 | 44 | 1. Open your terminal. 45 | 46 | 1. Navigate to the directory containing the files you want to check. 47 | 48 | 1. (**Optional**) Create a [configuration](#configuration) file called `.linkspector.yml`. By default, Linkspector looks for a configuration file named `.linkspector.yml` in the current directory. If you have a custom configuration file or want to specify its path, you can use the `-c` or `--config` option. 49 | 50 | 1. Use the `linkspector check` command to initiate the hyperlink check. For example: 51 | 52 | ```bash 53 | linkspector check 54 | ``` 55 | 56 | - To specify a custom configuration file path: 57 | 58 | ```bash 59 | linkspector check -c /path/to/custom-config.yml 60 | ``` 61 | 62 | - To output the results in JSON format: 63 | 64 | ```bash 65 | linkspector check -j 66 | ``` 67 | 68 | The JSON output follows [rdjson](https://github.com/reviewdog/reviewdog/tree/master/proto/rdf#rdjson) format. 69 | 70 | 1. Linkspector starts checking the hyperlinks in your files based on the configuration provided in the configuration file or using the default configuration. It then displays the results in your terminal. 71 | 72 | 1. After the check is complete, Linkspector provides a summary of the results. If any dead links are found, they are listed in the terminal, along with their status codes and error messages. 73 | 74 | - To display statistics about the checked links, use the `-s` or `--showstat` option: 75 | 76 | ```bash 77 | linkspector check -s 78 | ``` 79 | 80 | This command shows a summary table with the number of files checked, total links, hyperlinks, file and header links, and the count of correct and failed links. 81 | Note that this option cannot be used together with the JSON output option (`-j`). 82 | 83 | 1. If no dead links are found, Linkspector displays a success message, indicating that all links are working. 84 | 85 | ## Configuration 86 | 87 | Linkspector uses a configuration file named `.linkspector.yml` to customize its behavior. If this file is not found in the current directory when the program is run, Linkspector displays a message saying "Configuration file not found. Using default configuration." and uses a default configuration. 88 | 89 | ### Default Configuration 90 | 91 | The default configuration is as follows: 92 | 93 | ```yaml 94 | dirs: 95 | - . 96 | useGitIgnore: true 97 | ``` 98 | 99 | If you are defining a custom configuration, you must include the `dirs` or `files` section in the configuration file. 100 | 101 | Following are the available configuration options: 102 | 103 | | Option | Description | Required | 104 | | ------------------------------------------------- | ----------------------------------------------------------------------------------------------------- | --------------------------------- | 105 | | [`files`](#files-to-check) | The list of Markdown files to check for broken links. | Yes, if `dirs` is not specified. | 106 | | [`dirs`](#directories-to-search) | The list of directories to search for Markdown files. | Yes, if `files` is not specified. | 107 | | [`excludedFiles`](#excluded-files) | The list of Markdown files to exclude from the link checking process. | No | 108 | | [`excludedDirs`](#excluded-directories) | The list of directories to exclude from the link checking process. | No | 109 | | [`baseUrl`](#base-url) | The base URL to use when checking relative links in Markdown files. | No | 110 | | [`ignorePatterns`](#ignore-patterns) | The list of regular expressions that match URLs to be ignored during link checking. | No | 111 | | [`replacementPatterns`](#replacement-patterns) | The list of regular expressions and replacement strings to modify URLs during link checking. | No | 112 | | [`aliveStatusCodes`](#alive-status-codes) | The list of HTTP status codes that are considered as "alive" links. | No | 113 | | [`useGitIgnore`](#use-gitignore) | Indicates whether to use the rules defined in the `.gitignore` file to exclude files and directories. | No | 114 | | [`modifiedFilesOnly`](#check-modified-files-only) | Indicates whether to check only the files that have been modified in the last git commit. | No | 115 | | [`httpHeaders`](#http-headers) | The list of URLs and their corresponding HTTP headers to be used during link checking. | No | 116 | | [`followRedirects`](#follow-redirects) | Controls how HTTP redirects (e.g., 301, 302) are handled. | No | 117 | 118 | ### Files to Check 119 | 120 | The `files` section specifies the Markdown files that Linkspector should check for broken links. You can add the file paths you want to include in this list. For example: 121 | 122 | ```yaml 123 | files: 124 | - README.md 125 | - file2.md 126 | - file3.md 127 | ``` 128 | 129 | ### Directories to Search 130 | 131 | The `dirs` section lists the directories where Linkspector should search for Markdown files. You can specify directories relative to the current working directory. For example: 132 | 133 | ```yaml 134 | dirs: 135 | - ./ 136 | - folder2 137 | ``` 138 | 139 | ### Excluded Files 140 | 141 | The `excludedFiles` section allows you to specify Markdown files that should be excluded from the link checking process. Add the paths of the files you want to exclude. For example: 142 | 143 | ```yaml 144 | excludedFiles: 145 | - ./check.md 146 | - excluded-file2.md 147 | ``` 148 | 149 | ### Excluded Directories 150 | 151 | The `excludedDirs` section lets you specify directories that should be excluded from the link checking process. Provide the paths of the directories you want to exclude. For example: 152 | 153 | ```yaml 154 | excludedDirs: 155 | - ./lib 156 | - excluded-folder2 157 | ``` 158 | 159 | ### Base URL 160 | 161 | The `baseUrl` option sets the base URL that will be used when checking relative links in Markdown files. In this example: 162 | 163 | ```yaml 164 | baseUrl: https://example.com 165 | ``` 166 | 167 | The base URL is set to `https://example.com`. 168 | 169 | ### Ignore Patterns 170 | 171 | The `ignorePatterns` section allows you to define regular expressions that match URLs to be ignored during the link checking process. For example: 172 | 173 | ```yaml 174 | ignorePatterns: 175 | - pattern: '^https://example.com/skip/.*$' 176 | - pattern: "^(ftp)://[^\\s/$?#]*\\.[^\\s]*$" 177 | ``` 178 | 179 | In this example, URLs matching the specified patterns will be skipped during link checking. 180 | 181 | ### Replacement Patterns 182 | 183 | The `replacementPatterns` section lets you define regular expressions and replacement strings to modify URLs during link checking. For example: 184 | 185 | ```yaml 186 | replacementPatterns: 187 | - pattern: "(https?://example.com)/(\\w+)/(\\d+)" 188 | replacement: '$1/id/$3' 189 | - pattern: "\\[([^\\]]+)\\]\\((https?://example.com)/file\\)" 190 | replacement: '$1' 191 | ``` 192 | 193 | These patterns and replacements will be applied to URLs found in the Markdown files. 194 | 195 | ### Alive Status Codes 196 | 197 | The `aliveStatusCodes` section allows you to specify a list of HTTP status codes that are considered as "alive" links. In this example: 198 | 199 | ```yaml 200 | aliveStatusCodes: 201 | - 200 202 | - 201 203 | - 204 204 | ``` 205 | 206 | Links returning any of these status codes will be considered valid. 207 | 208 | ### Use .gitignore 209 | 210 | The `useGitIgnore` option, when set to `true`, indicates that Linkspector should use the rules defined in the `.gitignore` file to exclude files and directories. For example: 211 | 212 | ```yaml 213 | useGitIgnore: true 214 | ``` 215 | 216 | When enabled, the app will respect the `.gitignore` rules during link checking. 217 | 218 | ### Check Modified Files Only 219 | 220 | The `modifiedFilesOnly` option, when set to `true`, indicates that Linkspector should only check the files that have been modified in the last git commit. For example: 221 | 222 | ```yaml 223 | modifiedFilesOnly: true 224 | ``` 225 | 226 | When enabled, Linkspector will use `git` to find the list of modified files and only check those files. Please note that this option requires `git` to be installed and available on your system path. If `git` is not installed or not found in the system path, Linkspector will throw an error. 227 | 228 | Also, if no modified files are found in the list of files to check, Linkspector will skip link checking and exit with a message indicating that no modified files have been edited so it will skip checking. 229 | 230 | ### HTTP headers 231 | 232 | The `httpHeaders` option allows you to specify HTTP headers for specific URLs that require authorization. You can use environment variables for secure values. 233 | 234 | 1. Create a `.env` file in the root directory of your project and add the environment variables. For example: 235 | 236 | ```env 237 | AUTH_TOKEN=abcdef123456 238 | ``` 239 | 240 | 1. Add the `httpHeaders` section to the configuration file and specify the URLs and headers. For example: 241 | 242 | ```yaml 243 | httpHeaders: 244 | - url: 245 | - https://example1.com 246 | headers: 247 | Foo: Bar 248 | - url: 249 | - https://example2.com 250 | headers: 251 | Authorization: ${AUTH_TOKEN} 252 | Foo: Bar 253 | ``` 254 | 255 | ### Follow Redirects 256 | 257 | The `followRedirects` option controls how Linkspector handles HTTP redirects (e.g., status codes 301, 302). 258 | 259 | - **Type:** `boolean` 260 | - **Default:** `true` 261 | 262 | **Behavior:** 263 | 264 | - When `followRedirects: true` (default): 265 | Linkspector will follow HTTP redirects to their final destination. The status of the link will be determined by the status code of this final destination. For example, if `http://example.com/old` redirects to `http://example.com/new` and `/new` returns a 200 OK, the original link `/old` will be reported as 'alive' (200), with a message indicating it was redirected. 266 | 267 | - When `followRedirects: false`: 268 | Linkspector will _not_ follow HTTP redirects. If a link returns a redirect status code (e.g., 301, 302, 307, 308), it will be reported as an 'error'. The reported status code will be the original redirect status code (e.g., 301), and the error message will indicate that the link redirected but `followRedirects` was set to `false`. 269 | 270 | **Example:** 271 | 272 | To disable following redirects: 273 | 274 | ```yaml 275 | followRedirects: false 276 | ``` 277 | 278 | ### Sample configuration 279 | 280 | ```yml 281 | files: 282 | - README.md 283 | - file2.md 284 | - file3.md 285 | dirs: 286 | - ./ 287 | - folder2 288 | excludedFiles: 289 | - ./check.md 290 | - excluded-file2.md 291 | excludedDirs: 292 | - ./lib 293 | - excluded-folder2 294 | baseUrl: https://example.com 295 | ignorePatterns: 296 | - pattern: '^https://example.com/skip/.*$' 297 | - pattern: "^(ftp)://[^\\s/$?#]*\\.[^\\s]*$" 298 | replacementPatterns: 299 | - pattern: "(https?://example.com)/(\\w+)/(\\d+)" 300 | replacement: '$1/id/$3' 301 | - pattern: "\\[([^\\]]+)\\]\\((https?://example.com)/file\\)" 302 | replacement: '$1' 303 | httpHeaders: 304 | - url: 305 | - https://example1.com 306 | headers: 307 | Authorization: Basic Zm9vOmJhcg== 308 | Foo: Bar 309 | aliveStatusCodes: 310 | - 200 311 | - 201 312 | - 204 313 | useGitIgnore: true 314 | followRedirects: false # Example of including it in a full config 315 | ``` 316 | 317 | ## Sample output 318 | 319 | If there are failed links, linkspector shows the output as comma-seprated values and exit with error. 320 | `File, HTTP status code, Line number, Error message` 321 | 322 | ``` 323 | REDISTRIBUTED.md, https://unlicense.org/, null, 186, net::ERR_SSL_VERSION_OR_CIPHER_MISMATCH at https://unlicense.org/] 324 | 💥 Error: Some hyperlinks in the specified files are invalid. 325 | ``` 326 | 327 | If there are no errors, linkspector shows the following message: 328 | 329 | ``` 330 | ✨ Success: All hyperlinks in the specified files are valid. 331 | ``` 332 | 333 | ## Using Linkspector with Docker 334 | 335 | To use Linkspector with Docker, follow these steps: 336 | 337 | 1. Clone the Linkspector repository to your local machine and switch to the cloned directory: 338 | ```bash 339 | git clone git@github.com:UmbrellaDocs/linkspector.git 340 | cd linkspector 341 | ``` 342 | 1. Build the docker image locally, while being at the root (`.`) of this project: 343 | 344 | ```bash 345 | docker build --no-cache --pull --build-arg LINKSPECTOR_PACKAGE= -t umbrelladocs/linkspector . 346 | ``` 347 | 348 | 1. To perform a check using the default configuration, while being at the root (`$PWD`) of the project to be checked: 349 | 350 | ```bash 351 | docker run --rm -it -v $PWD:/app \ 352 | --name linkspector umbrelladocs/linkspector \ 353 | bash -c 'linkspector check' 354 | ``` 355 | 356 | To specify a custom configuration file path: 357 | 358 | ```bash 359 | docker run --rm -it -v $PWD:/app -v $PWD/custom-config.yml:/path/to/custom-config.yml \ 360 | --name linkspector umbrelladocs/linkspector \ 361 | bash -c 'linkspector check -c /path/to/custom-config.yml' 362 | ``` 363 | 364 | ## Contributing 365 | 366 | If you would like to contribute to Linkspector, please read the [contributing guidelines](/CONTRIBUTING.md). 367 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | We aim to update the supported versions with patches for security vulnerabilities as soon as possible after they are disclosed. 4 | We recommend keeping your installation of Linkspector up to date, as we only support the latest release version with security updates. 5 | 6 | | Version | Supported | 7 | | ------- | ------------------ | 8 | | latest | :white_check_mark: | 9 | 10 | ## Reporting a Vulnerability 11 | 12 | Please report (suspected) security vulnerabilities to **mmfjpjyy@duck.com**. 13 | You will receive a response from us within 48 hours. If the issue is confirmed, we will release a patch as soon as possible depending on the complexity of the issue. 14 | 15 | **Please do not report security vulnerabilities through public GitHub issues.** 16 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | import { program } from 'commander' 4 | import kleur from 'kleur' 5 | import ora from 'ora' 6 | import { linkspector } from './linkspector.js' 7 | import { createRequire } from 'module' 8 | const require = createRequire(import.meta.url) 9 | const pkg = require('./package.json') 10 | 11 | program 12 | .version(pkg.version) 13 | .description('🔍 Uncover broken links in your content.') 14 | .command('check') 15 | .description('Check hyperlinks based on the configuration file.') 16 | .option('-c, --config ', 'Specify a custom configuration file path') 17 | .option('-j, --json', 'Output the results in JSON format') 18 | .option('-s, --showstat', 'Display statistics about the links checked') 19 | .action(async (cmd) => { 20 | // Validate that -j and -s options are not used together 21 | if (cmd.json && cmd.showstat) { 22 | console.error( 23 | kleur.red( 24 | 'Error: The --json and --showstat options cannot be used together.' 25 | ) 26 | ) 27 | process.exit(1) 28 | } 29 | 30 | const configFile = cmd.config || '.linkspector.yml' // Use custom config file path if provided 31 | 32 | let currentFile = '' // Variable to store the current file name 33 | let results = [] // Array to store the results if json is true 34 | 35 | // Initialize statistics counters 36 | let stats = { 37 | filesChecked: 0, 38 | totalLinks: 0, 39 | httpLinks: 0, 40 | fileLinks: 0, 41 | emailLinks: 0, 42 | correctLinks: 0, 43 | failedLinks: 0, 44 | } 45 | 46 | const spinner = cmd.json ? null : ora().start() 47 | 48 | try { 49 | let hasErrorLinks = false 50 | // Initialize the results object 51 | let results = { 52 | source: { 53 | name: 'linkspector', 54 | url: 'https://github.com/UmbrellaDocs/linkspector', 55 | }, 56 | severity: 'ERROR', 57 | diagnostics: [], 58 | } 59 | 60 | for await (const { file, result } of linkspector(configFile, cmd)) { 61 | // Update the current file name 62 | currentFile = file 63 | if (!cmd.json) { 64 | spinner.text = `Checking ${currentFile}...` 65 | } 66 | 67 | // Increment file count for statistics 68 | stats.filesChecked++ 69 | 70 | for (const linkStatusObj of result) { 71 | // Count total links 72 | stats.totalLinks++ 73 | 74 | // Count links by type 75 | if (linkStatusObj.link && linkStatusObj.link.match(/^https?:\/\//)) { 76 | stats.httpLinks++ 77 | } else if ( 78 | linkStatusObj.link && 79 | linkStatusObj.link.startsWith('mailto:') 80 | ) { 81 | stats.emailLinks++ 82 | } else if ( 83 | linkStatusObj.link && 84 | (linkStatusObj.link.startsWith('#') || 85 | linkStatusObj.link.includes('.md') || 86 | linkStatusObj.link.includes('#')) 87 | ) { 88 | stats.fileLinks++ 89 | } else if (linkStatusObj.link) { 90 | // Count any remaining links as file links 91 | stats.fileLinks++ 92 | } 93 | 94 | // Count correct vs failed links - Updated to handle skipped links 95 | if (linkStatusObj.status === 'error') { 96 | stats.failedLinks++ 97 | if (cmd.json) { 98 | results.diagnostics.push({ 99 | message: `Cannot reach ${linkStatusObj.link} Status: ${linkStatusObj.status_code}${linkStatusObj.error_message ? ` ${linkStatusObj.error_message}` : ''}`, 100 | location: { 101 | path: currentFile, 102 | range: { 103 | start: { 104 | line: linkStatusObj.line_number, 105 | column: linkStatusObj.position.start.column, 106 | }, 107 | end: { 108 | line: linkStatusObj.position.end.line, 109 | column: linkStatusObj.position.end.column, 110 | }, 111 | }, 112 | }, 113 | severity: linkStatusObj.status.toUpperCase(), 114 | }) 115 | } else { 116 | // If json is false, print the results in the console 117 | spinner.stop() 118 | console.log( 119 | kleur.red( 120 | `${currentFile}:${linkStatusObj.line_number}:${linkStatusObj.position.start.column}: 🚫 ${linkStatusObj.link} Status:${linkStatusObj.status_code}${linkStatusObj.error_message ? ` ${linkStatusObj.error_message}` : ' Cannot reach link'}` 121 | ) 122 | ) 123 | spinner.start(`Checking ${currentFile}...`) 124 | } 125 | hasErrorLinks = true 126 | } else if ( 127 | linkStatusObj.status === 'alive' || 128 | linkStatusObj.status === 'assumed alive' 129 | ) { 130 | stats.correctLinks++ 131 | } else if (linkStatusObj.status === 'skipped') { 132 | // Skipped links don't count towards failed links 133 | } else { 134 | // Count other status as failed 135 | stats.failedLinks++ 136 | } 137 | } 138 | } 139 | 140 | if (cmd.json) { 141 | // If there are no links with a status of "error", print a blank object 142 | if (results.diagnostics.length === 0) { 143 | console.log('{}') 144 | } else { 145 | console.log(JSON.stringify(results, null, 2)) 146 | } 147 | } 148 | 149 | // Display statistics if --showstat option is used 150 | if (cmd.showstat) { 151 | spinner.stop() 152 | console.log('\n' + kleur.bold('💀📊 Linkspector check stats')) 153 | console.log('┌───────────────────────────────┬────────┐') 154 | console.log( 155 | `│ 🟰 ${kleur.bold('Total files checked')} │ ${kleur.cyan(padNumber(stats.filesChecked))} │` 156 | ) 157 | console.log('├───────────────────────────────┼────────┤') 158 | console.log( 159 | `│ 🔗 ${kleur.bold('Total links checked')} │ ${kleur.cyan(padNumber(stats.totalLinks))} │` 160 | ) 161 | console.log('├───────────────────────────────┼────────┤') 162 | console.log( 163 | `│ 🌐 ${kleur.bold('Hyperlinks')} │ ${kleur.cyan(padNumber(stats.httpLinks))} │` 164 | ) 165 | console.log('├───────────────────────────────┼────────┤') 166 | console.log( 167 | `│ 📁 ${kleur.bold('File and header links')} │ ${kleur.cyan(padNumber(stats.fileLinks))} │` 168 | ) 169 | console.log('├───────────────────────────────┼────────┤') 170 | console.log( 171 | `│ ✉️ ${kleur.bold('Email links (Skipped)')} │ ${kleur.cyan(padNumber(stats.emailLinks))} │` 172 | ) 173 | console.log('├───────────────────────────────┼────────┤') 174 | console.log( 175 | `│ ✅ ${kleur.bold('Working links')} │ ${kleur.green(padNumber(stats.correctLinks))} │` 176 | ) 177 | console.log('├───────────────────────────────┼────────┤') 178 | console.log( 179 | `│ 🚫 ${kleur.bold('Failed links')} │ ${kleur.red(padNumber(stats.failedLinks))} │` 180 | ) 181 | console.log('└───────────────────────────────┴────────┘') 182 | console.log('') 183 | } 184 | 185 | if (!hasErrorLinks) { 186 | if (!cmd.json && !cmd.showstat) { 187 | spinner.stop() 188 | console.log( 189 | kleur.green( 190 | '✨ Success: All hyperlinks in the specified files are valid.' 191 | ) 192 | ) 193 | } 194 | process.exit(0) 195 | } else { 196 | if (!cmd.json && !cmd.showstat) { 197 | spinner.stop() 198 | console.error( 199 | kleur.red( 200 | '💥 Error: Some hyperlinks in the specified files are invalid.' 201 | ) 202 | ) 203 | } else if (cmd.showstat) { 204 | console.error( 205 | kleur.red( 206 | '💥 Error: Some hyperlinks in the specified files are invalid.' 207 | ) 208 | ) 209 | } 210 | process.exit(1) 211 | } 212 | } catch (error) { 213 | if (spinner) spinner.stop() 214 | console.error(kleur.red(`💥 Main error: ${error.message}`)) 215 | process.exit(1) 216 | } 217 | 218 | // Helper function to pad numbers for consistent table formatting 219 | function padNumber(num) { 220 | return num.toString().padStart(6, ' ') 221 | } 222 | }) 223 | 224 | // Parse the command line arguments 225 | program.parse(process.argv) 226 | -------------------------------------------------------------------------------- /index.test.js: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | import { linkspector } from './linkspector.js' 3 | 4 | let cmd = { 5 | json: true, 6 | } 7 | 8 | test('linkspector should check top-level relative links in Markdown file', async () => { 9 | let hasErrorLinks = false 10 | let currentFile = '' // Variable to store the current file name 11 | let results = [] // Array to store the results if json is true 12 | 13 | for await (const { file, result } of linkspector( 14 | './.linkspector.test.yml', 15 | cmd 16 | )) { 17 | currentFile = file 18 | for (const linkStatusObj of result) { 19 | if (cmd.json) { 20 | results.push({ 21 | file: currentFile, 22 | link: linkStatusObj.link, 23 | status_code: linkStatusObj.status_code, 24 | line_number: linkStatusObj.line_number, 25 | position: linkStatusObj.position, 26 | status: linkStatusObj.status, 27 | error_message: linkStatusObj.error_message, 28 | }) 29 | } 30 | if (linkStatusObj.status === 'error') { 31 | hasErrorLinks = true 32 | } 33 | } 34 | } 35 | 36 | expect(hasErrorLinks).toBe(false) 37 | expect(results.length).toBe(23) 38 | }) 39 | 40 | test('linkspector should track statistics correctly when stats option is enabled', async () => { 41 | let cmd = { 42 | showstat: true, 43 | } 44 | 45 | // Initialize statistics counters 46 | let stats = { 47 | filesChecked: 0, 48 | totalLinks: 0, 49 | httpLinks: 0, 50 | fileLinks: 0, 51 | correctLinks: 0, 52 | failedLinks: 0, 53 | } 54 | 55 | for await (const { file, result } of linkspector( 56 | './.linkspector.test.yml', 57 | cmd 58 | )) { 59 | // Increment file count for statistics 60 | stats.filesChecked++ 61 | 62 | for (const linkStatusObj of result) { 63 | // Count total links 64 | stats.totalLinks++ 65 | 66 | // Count HTTP vs File links 67 | if (linkStatusObj.link.match(/^https?:\/\//)) { 68 | stats.httpLinks++ 69 | } else if ( 70 | !linkStatusObj.link.startsWith('#') && 71 | !linkStatusObj.link.startsWith('mailto:') 72 | ) { 73 | stats.fileLinks++ 74 | } 75 | 76 | // Count correct vs failed links 77 | if (linkStatusObj.status === 'error') { 78 | stats.failedLinks++ 79 | } else if ( 80 | linkStatusObj.status === 'alive' || 81 | linkStatusObj.status === 'assumed alive' 82 | ) { 83 | stats.correctLinks++ 84 | } 85 | } 86 | } 87 | 88 | // Verify statistics are being tracked correctly 89 | expect(stats.filesChecked).toBeGreaterThan(0) 90 | expect(stats.totalLinks).toBe(23) 91 | expect(stats.totalLinks).toBe( 92 | stats.httpLinks + 93 | stats.fileLinks + 94 | (stats.totalLinks - stats.httpLinks - stats.fileLinks) 95 | ) 96 | expect(stats.totalLinks).toBe(stats.correctLinks + stats.failedLinks) 97 | expect(stats.correctLinks).toBeGreaterThanOrEqual(0) 98 | expect(stats.failedLinks).toBe(0) 99 | }) 100 | -------------------------------------------------------------------------------- /lib/batch-check-links.js: -------------------------------------------------------------------------------- 1 | import puppeteer from 'puppeteer' 2 | import url from 'url' 3 | import { checkFileExistence } from './check-file-links.js' 4 | 5 | function isUrl(s) { 6 | try { 7 | new url.URL(s) 8 | return true 9 | } catch (err) { 10 | return false 11 | } 12 | } 13 | 14 | function createLinkStatus(link, status, statusCode, errorMessage = null) { 15 | return { 16 | link: link.url, 17 | status, 18 | status_code: statusCode, 19 | line_number: link.position ? link.position.start.line : null, 20 | position: link.position, 21 | error_message: errorMessage, 22 | } 23 | } 24 | 25 | async function processLink( 26 | link, 27 | page, 28 | aliveStatusCodes, 29 | httpHeaders, 30 | followRedirects 31 | ) { 32 | let status = null 33 | let statusCode = null 34 | let errorMessage = null 35 | 36 | try { 37 | if (isUrl(link.url)) { 38 | const headers = 39 | httpHeaders.find((header) => 40 | header.url.some((urlPattern) => link.url.includes(urlPattern)) 41 | )?.headers || {} 42 | 43 | const response = await page.goto(link.url, { 44 | waitUntil: 'load', // Puppeteer follows redirects by default. 45 | headers, 46 | }) 47 | statusCode = response.status() 48 | const redirectChain = response.request().redirectChain() 49 | 50 | if (!followRedirects && redirectChain.length > 0) { 51 | // If followRedirects is false and there was a redirect 52 | status = 'error' 53 | const originalStatusCode = redirectChain[0].response().status() 54 | errorMessage = `Link redirected (from ${redirectChain[0].url()} status: ${originalStatusCode} to ${response.url()}), but followRedirects is set to false.` 55 | // We might want to use the original redirect status code if available and makes sense 56 | statusCode = originalStatusCode !== 0 ? originalStatusCode : statusCode 57 | } else if (aliveStatusCodes && aliveStatusCodes.includes(statusCode)) { 58 | status = 'assumed alive' 59 | } else { 60 | status = response.ok() ? 'alive' : 'error' 61 | } 62 | } 63 | } catch (error) { 64 | status = 'error' 65 | errorMessage = error.message 66 | } 67 | 68 | return createLinkStatus(link, status, statusCode, errorMessage) 69 | } 70 | 71 | async function checkHyperlinks(nodes, options = {}, filePath) { 72 | const { 73 | batchSize = 100, 74 | retryCount = 3, 75 | aliveStatusCodes, 76 | httpHeaders = [], 77 | followRedirects = true, // Default to true if not provided 78 | } = options 79 | const linkStatusList = [] 80 | const tempArray = [] 81 | 82 | const filteredNodes = nodes.filter( 83 | (node) => 84 | node.type === 'link' || 85 | node.type === 'definition' || 86 | node.type === 'image' 87 | ) 88 | 89 | // First pass to check the links with default fetch 90 | for (let link of filteredNodes) { 91 | try { 92 | if (isUrl(link.url)) { 93 | const fetchOptions = { 94 | method: 'HEAD', 95 | redirect: followRedirects ? 'follow' : 'manual', 96 | } 97 | const response = await fetch(link.url, fetchOptions) 98 | const statusCode = response.status 99 | let message = null 100 | 101 | // Handle manual redirect: if followRedirects is false and a redirect occurs 102 | if ( 103 | !followRedirects && 104 | (response.type === 'opaqueredirect' || 105 | [301, 302, 307, 308].includes(statusCode)) 106 | ) { 107 | const redirectedTo = response.headers.get('location') 108 | const errorMessage = `Link redirected${redirectedTo ? ' to ' + redirectedTo : ''}, but followRedirects is set to false.` 109 | const linkStatus = createLinkStatus( 110 | link, 111 | 'error', 112 | statusCode === 0 && response.type === 'opaqueredirect' 113 | ? 302 114 | : statusCode, // Use 302 for opaque, else actual 115 | errorMessage 116 | ) 117 | linkStatusList.push(linkStatus) 118 | continue 119 | } 120 | 121 | if (response.ok) { 122 | message = response.redirected ? `redirected to ${response.url}` : null 123 | const linkStatus = createLinkStatus( 124 | link, 125 | 'alive', 126 | statusCode, 127 | message 128 | ) 129 | linkStatusList.push(linkStatus) 130 | continue 131 | } else if (aliveStatusCodes && aliveStatusCodes.includes(statusCode)) { 132 | const linkStatus = createLinkStatus(link, 'assumed alive', statusCode) 133 | linkStatusList.push(linkStatus) 134 | continue 135 | } else { 136 | // If not ok, and not an explicit redirect handled above, or not in aliveStatusCodes 137 | tempArray.push(link) 138 | } 139 | } else { 140 | const fileStatus = checkFileExistence(link, filePath) 141 | const linkStatus = createLinkStatus( 142 | link, 143 | fileStatus.status, 144 | fileStatus.statusCode, 145 | fileStatus.errorMessage 146 | ) 147 | linkStatusList.push(linkStatus) 148 | } 149 | } catch (error) { 150 | if (isUrl(link.url)) { 151 | tempArray.push(link) 152 | } else { 153 | const fileStatus = checkFileExistence(link, filePath) 154 | const linkStatus = createLinkStatus( 155 | link, 156 | fileStatus.status, 157 | fileStatus.statusCode, 158 | fileStatus.errorMessage 159 | ) 160 | linkStatusList.push(linkStatus) 161 | } 162 | } 163 | } 164 | 165 | // Second pass to check the failed links with puppeteer 166 | if (tempArray.length > 0) { 167 | const browser = await puppeteer.launch({ 168 | headless: 'new', 169 | args: ['--disable-features=DialMediaRouteProvider'], 170 | }) 171 | for (let i = 0; i < tempArray.length; i += batchSize) { 172 | const batch = tempArray.slice(i, i + batchSize) 173 | const promises = batch.map(async (link) => { 174 | const page = await browser.newPage() 175 | await page.setUserAgent( 176 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36' 177 | ) 178 | 179 | await page.setRequestInterception(true) 180 | page.on('request', (request) => { 181 | if (request.isInterceptResolutionHandled()) return 182 | const resourceType = request.resourceType() 183 | if ( 184 | resourceType === 'font' || 185 | resourceType === 'image' || 186 | resourceType === 'media' || 187 | resourceType === 'script' || 188 | resourceType === 'stylesheet' || 189 | resourceType === 'other' || 190 | resourceType === 'websocket' 191 | ) { 192 | request.abort() 193 | } else { 194 | request.continue() 195 | } 196 | }) 197 | 198 | let retryCountLocal = 0 199 | let linkStatus 200 | 201 | while (retryCountLocal < retryCount) { 202 | try { 203 | linkStatus = await processLink( 204 | link, 205 | page, 206 | aliveStatusCodes, 207 | httpHeaders, 208 | followRedirects // Pass followRedirects here 209 | ) 210 | break 211 | } catch (error) { 212 | retryCountLocal++ 213 | } 214 | } 215 | 216 | await page.close() 217 | linkStatusList.push(linkStatus) 218 | }) 219 | 220 | await Promise.all(promises) 221 | } 222 | await browser.close() 223 | } 224 | return linkStatusList 225 | } 226 | 227 | export { checkHyperlinks } 228 | -------------------------------------------------------------------------------- /lib/check-file-links.js: -------------------------------------------------------------------------------- 1 | import fs from 'fs' 2 | import path from 'path' 3 | import { unified } from 'unified' 4 | import remarkParse from 'remark-parse' 5 | import remarkGfm from 'remark-gfm' 6 | import { visit } from 'unist-util-visit' 7 | import GithubSlugger from 'github-slugger' 8 | 9 | const fileCache = {} 10 | 11 | /** 12 | * Checks if a file and a section within the file exist. 13 | * 14 | * @param {Object} link - The link object. 15 | * @param {string} file - The current file path. 16 | * @returns {Object} An object containing the status code, status message, and error message (if any). 17 | */ 18 | 19 | function checkFileExistence(link, file) { 20 | // Initialize status code, status message, and error message 21 | let statusCode = '200' 22 | let status = 'alive' 23 | let errorMessage = '' 24 | 25 | try { 26 | let slugger = new GithubSlugger() 27 | // Split the URL into the file part and the section part 28 | const [urlWithoutSection = '', sectionId = null] = link.url.split('#') 29 | 30 | // Determine the file path 31 | const filePath = urlWithoutSection.startsWith('/') 32 | ? path.join(process.cwd(), urlWithoutSection) 33 | : urlWithoutSection === '' || urlWithoutSection === path.basename(file) 34 | ? file 35 | : path.resolve(path.dirname(file), urlWithoutSection) 36 | 37 | // Check if the file exists 38 | if (!fs.existsSync(filePath)) { 39 | statusCode = '404' 40 | status = 'error' 41 | errorMessage = `Cannot find: ${link.url}` 42 | } else if (sectionId) { 43 | // If the file exists and there's a section part in the URL, check if the section exists 44 | const mdContent = fs.readFileSync(filePath, 'utf8') 45 | 46 | // Check if the section ID is a line reference (e.g., L20 or L23-L50) 47 | const lineReferenceMatch = sectionId.match(/^L(\d+)(?:-L(\d+))?$/) 48 | 49 | if (lineReferenceMatch) { 50 | // Count the total number of lines in the file 51 | const totalLineCount = mdContent.split('\n').length 52 | 53 | // Extract line numbers from the reference 54 | const startLine = parseInt(lineReferenceMatch[1], 10) 55 | const endLine = lineReferenceMatch[2] 56 | ? parseInt(lineReferenceMatch[2], 10) 57 | : startLine 58 | 59 | // Check if the referenced line(s) are within the file's line count 60 | if (endLine > totalLineCount) { 61 | statusCode = '404' 62 | status = 'error' 63 | errorMessage = `Cannot find Line ${endLine} in file: ${filePath}. File has ${totalLineCount} lines.` 64 | } 65 | } else { 66 | // Use the cache if the file has been parsed before 67 | let tree = fileCache[filePath] 68 | if (!tree) { 69 | tree = unified().use(remarkParse).use(remarkGfm).parse(mdContent) 70 | fileCache[filePath] = tree // Store the parsed file in the cache 71 | } 72 | // Collect all heading IDs in the file 73 | // Use GitHub slugger to generate the heading slug for comparison 74 | const headingNodes = new Set() 75 | visit(tree, ['heading', 'html'], (node) => { 76 | if (node.type === 'heading') { 77 | const headingText = getText(node) 78 | const headingId = 79 | node.children[0].type === 'html' 80 | ? node.children[0].value.match(/name="(.+?)"/)?.[1] 81 | : node.children[0] && 82 | node.children[0].value && 83 | node.children[0].value.includes('{#') 84 | ? node.children[0].value.match(/{#(.+?)}/)?.[1] 85 | : slugger.slug(headingText) 86 | headingNodes.add(headingId) 87 | } else if (node.type === 'html') { 88 | // Match both name and id attributes in HTML anchors 89 | const anchorNameMatch = node.value.match( 90 | // 91 | ) 92 | if (anchorNameMatch) { 93 | const anchorName = anchorNameMatch[2] 94 | headingNodes.add(anchorName) 95 | } 96 | } 97 | }) 98 | 99 | // Decode the section ID from the URL 100 | const decodedSectionId = decodeURIComponent(sectionId) 101 | 102 | // Check if the section exists 103 | if (!headingNodes.has(decodedSectionId)) { 104 | statusCode = '404' 105 | status = 'error' 106 | errorMessage = `Cannot find section: #${sectionId} in file: ${filePath}.` 107 | } 108 | } 109 | } 110 | } catch (err) { 111 | console.error(`Error in checking if file ${link.url} exist! ${err}`) 112 | } 113 | 114 | // Return the status code, status message, and error message 115 | return { statusCode, status, errorMessage } 116 | } 117 | 118 | function getText(node) { 119 | /** 120 | * Get the text content of a node. 121 | * @param {Object} node - The node object. 122 | * @returns {string} The text content of the node. 123 | */ 124 | if ( 125 | node.type === 'text' || 126 | node.type === 'inlineCode' || 127 | node.type === 'image' 128 | ) { 129 | return node.type === 'image' ? node.alt : node.value 130 | } 131 | 132 | if (Array.isArray(node.children)) { 133 | return node.children.map(getText).join('') 134 | } 135 | 136 | return '' 137 | } 138 | 139 | export { checkFileExistence } 140 | -------------------------------------------------------------------------------- /lib/extract-asciidoc-links.js: -------------------------------------------------------------------------------- 1 | import fs from 'fs' 2 | import readline from 'readline' 3 | import { doReplacements } from './handle-links-modification.js' 4 | 5 | function extractAsciiDocLinks(filePath, options) { 6 | return new Promise((resolve) => { 7 | const links = [] 8 | const internalRefs = new Map() 9 | const externalRefs = new Map() 10 | const externalURLs = new Map() 11 | 12 | let insideCommentBlock = false 13 | 14 | const rl = readline.createInterface({ 15 | input: fs.createReadStream(filePath), 16 | crlfDelay: Infinity, 17 | }) 18 | 19 | let lineNumber = 0 20 | 21 | const urlRegex = 22 | /(?:https?|ftp|irc|mailto):\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,4}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/g 23 | 24 | rl.on('line', (line) => { 25 | lineNumber++ 26 | // Ignore comment blocks 27 | if (line.startsWith('////')) { 28 | insideCommentBlock = !insideCommentBlock 29 | } 30 | if (insideCommentBlock) { 31 | return 32 | } 33 | // Ignore single-line comments 34 | if (line.startsWith('//')) { 35 | return 36 | } 37 | // Extract external hyperlinks 38 | let match 39 | while ((match = urlRegex.exec(line)) !== null) { 40 | const url = match[0].replace(/^link:/, '') // Remove 'link:' prefix if present 41 | const position = { 42 | start: { 43 | line: lineNumber, 44 | column: match.index, 45 | offset: match.index, 46 | }, 47 | end: { 48 | line: lineNumber, 49 | column: match.index + match[0].length, 50 | offset: match.index + match[0].length, 51 | }, 52 | } 53 | const linkNode = { 54 | type: 'link', 55 | title: null, 56 | url, 57 | children: [], 58 | position, 59 | } 60 | const existingLink = links.find( 61 | (link) => 62 | link.url === linkNode.url && 63 | link.position.start.line === linkNode.position.start.line && 64 | link.position.start.column === linkNode.position.start.column 65 | ) 66 | if (!existingLink) { 67 | links.push(linkNode) // Add link to the array only if it's not already there 68 | } 69 | continue 70 | } 71 | // Extract internal and external references 72 | if (line.match(/\[\[[^\]]+\]\]/g)) { 73 | let extractLink = line.match(/\[\[[^\]]+\]\]/g) 74 | for (let i = 0; i < extractLink.length; i++) { 75 | let newAnchor = extractLink[i] 76 | newAnchor = newAnchor.replace('[[', '') 77 | newAnchor = newAnchor.replace(']]', '') 78 | newAnchor = newAnchor.replace(/,.*/g, '') // take into account ',' 79 | const matchIndex = line.indexOf(extractLink[i]) // Get the index of the match 80 | const startColumn = matchIndex + 2 // Add 2 to account for the [[ characters 81 | const endColumn = startColumn + newAnchor.length 82 | const startPosition = { 83 | line: lineNumber, 84 | column: startColumn, 85 | offset: matchIndex, 86 | } 87 | const endPosition = { 88 | line: lineNumber, 89 | column: endColumn, 90 | offset: matchIndex + newAnchor.length, 91 | } 92 | const position = { 93 | start: startPosition, 94 | end: endPosition, 95 | } 96 | const linkNode = { 97 | type: 'internal-ref', 98 | title: null, 99 | url: newAnchor, 100 | children: [], 101 | position, 102 | } 103 | internalRefs.set(newAnchor, linkNode) 104 | } 105 | return 106 | } 107 | if (line.match(/^[\s]*[\*\-][\s]+\[\[\[[^\]]+\]\]\]/g)) { 108 | let extractLink = line.match(/\[\[\[[^\]]+\]\]\]/g) 109 | for (let i = 0; i < extractLink.length; i++) { 110 | let newAnchor = extractLink[i] 111 | newAnchor = newAnchor.replace('[[[', '') 112 | newAnchor = newAnchor.replace(']]]', '') 113 | newAnchor = newAnchor.replace(/,.*/g, '') // take into account ',' 114 | const matchIndex = line.indexOf(extractLink[i]) // Get the index of the match 115 | const startColumn = matchIndex + 4 // Add 4 to account for the [*-] and [[[ characters 116 | const endColumn = startColumn + newAnchor.length 117 | const startPosition = { 118 | line: lineNumber, 119 | column: startColumn, 120 | offset: matchIndex, 121 | } 122 | const endPosition = { 123 | line: lineNumber, 124 | column: endColumn, 125 | offset: matchIndex + newAnchor.length, 126 | } 127 | const position = { 128 | start: startPosition, 129 | end: endPosition, 130 | } 131 | const linkNode = { 132 | type: 'internal-ref', 133 | title: null, 134 | url: newAnchor, 135 | children: [], 136 | position, 137 | } 138 | internalRefs.set(newAnchor, linkNode) 139 | } 140 | return 141 | } 142 | if (line.match(/\[#[^\]]+\]/g)) { 143 | const extractLink = line.match(/\[#[^\]]+\]/g) 144 | extractLink.forEach((link) => { 145 | const newAnchor = link.replace(/^\[#|]$/g, '') 146 | const matchIndex = line.indexOf(link) // Get the index of the match 147 | const startColumn = matchIndex + 2 // Add 2 to account for the [# characters 148 | const endColumn = startColumn + newAnchor.length 149 | const startPosition = { 150 | line: lineNumber, 151 | column: startColumn, 152 | offset: matchIndex, 153 | } 154 | const endPosition = { 155 | line: lineNumber, 156 | column: endColumn, 157 | offset: matchIndex + newAnchor.length, 158 | } 159 | const position = { 160 | start: startPosition, 161 | end: endPosition, 162 | } 163 | const linkNode = { 164 | type: 'internal-ref', 165 | title: null, 166 | url: newAnchor, 167 | children: [], 168 | position, 169 | } 170 | internalRefs.set(newAnchor, linkNode) 171 | }) 172 | return 173 | } 174 | if (line.match(/(anchor:[^\[]+)\[[^\]]*\]/g)) { 175 | let extractLink = line.match(/(anchor:[^\[]+)\[[^\]]*\]/g) 176 | extractLink.forEach((link) => { 177 | let newAnchor = link.replace(/^anchor:|\[/g, '') 178 | 179 | const matchIndex = line.indexOf(link) // Get the index of the match 180 | const startColumn = matchIndex + 7 // Add 7 to account for the "anchor:" characters 181 | const endColumn = startColumn + newAnchor.length 182 | const startPosition = { 183 | line: lineNumber, 184 | column: startColumn, 185 | offset: matchIndex, 186 | } 187 | const endPosition = { 188 | line: lineNumber, 189 | column: endColumn, 190 | offset: matchIndex + newAnchor.length, 191 | } 192 | const position = { 193 | start: startPosition, 194 | end: endPosition, 195 | } 196 | const linkNode = { 197 | type: 'internal-ref', 198 | title: null, 199 | url: newAnchor, 200 | children: [], 201 | position, 202 | } 203 | internalRefs.set(newAnchor, linkNode) 204 | }) 205 | return 206 | } 207 | if (line.match(/<<[^\>]+>>/g)) { 208 | let extractLink = line.match(/<<[^\>]+>>/g) 209 | for (let i = 0; i < extractLink.length; i++) { 210 | let newReference = extractLink[i] 211 | newReference = newReference.replace('<<', '') 212 | newReference = newReference.replace('>>', '') 213 | newReference = newReference.replace(/,.*/g, '') // take into account <> 214 | const matchIndex = line.indexOf(extractLink[i]) // Get the index of the match 215 | const startColumn = matchIndex + 2 // Add 2 to account for the << characters 216 | const endColumn = startColumn + newReference.length 217 | const startPosition = { 218 | line: lineNumber, 219 | column: startColumn, 220 | offset: matchIndex, 221 | } 222 | const endPosition = { 223 | line: lineNumber, 224 | column: endColumn, 225 | offset: matchIndex + newReference.length, 226 | } 227 | if (newReference.startsWith('#')) { 228 | const position = { 229 | start: startPosition, 230 | end: endPosition, 231 | } 232 | const linkNode = { 233 | type: 'internal-ref', 234 | title: null, 235 | url: newReference, 236 | children: [], 237 | position, 238 | } 239 | internalRefs.set(newReference, linkNode) 240 | } else if (newReference.match(/(\.adoc)|(\.asciidoc)|(\.asc)|(#)/g)) { 241 | newReference = newReference.replace( 242 | /(\.adoc|\.asciidoc|\.asc)(#)?/, 243 | function (_, extension) { 244 | return extension + '#' 245 | } 246 | ) 247 | const position = { 248 | start: startPosition, 249 | end: endPosition, 250 | } 251 | const linkNode = { 252 | type: 'external-ref', 253 | title: null, 254 | url: newReference, 255 | children: [], 256 | position, 257 | } 258 | externalRefs.set(newReference, linkNode) 259 | } else { 260 | const position = { 261 | start: startPosition, 262 | end: endPosition, 263 | } 264 | const linkNode = { 265 | type: 'internal-ref', 266 | title: null, 267 | url: newReference, 268 | children: [], 269 | position, 270 | } 271 | internalRefs.set(newReference, linkNode) 272 | } 273 | } 274 | return 275 | } 276 | if (line.match(/xref:[^\[]+\[[^\]]*\]/g)) { 277 | let extractLink = line.match(/xref:[^\[]+\[[^\]]*\]/g) 278 | extractLink.forEach((link) => { 279 | let newReference = link.replace(/^xref:|\[/g, '') 280 | const matchIndex = line.indexOf(link) // Get the index of the match 281 | const startColumn = matchIndex + 5 // Add 5 to account for the "xref:" characters 282 | const endColumn = startColumn + newReference.length 283 | const startPosition = { 284 | line: lineNumber, 285 | column: startColumn, 286 | offset: matchIndex, 287 | } 288 | const endPosition = { 289 | line: lineNumber, 290 | column: endColumn, 291 | offset: matchIndex + newReference.length, 292 | } 293 | if (newReference.match(/(\.adoc)|(\.asciidoc)|(\.asc)|(#)/g)) { 294 | newReference = newReference.replace( 295 | /(\.adoc|\.asciidoc|\.asc)(#)?/, 296 | (_, extension) => extension + '#' 297 | ) 298 | const position = { 299 | start: startPosition, 300 | end: endPosition, 301 | } 302 | const linkNode = { 303 | type: 'external-ref', 304 | title: null, 305 | url: newReference, 306 | children: [], 307 | position, 308 | } 309 | externalRefs.set(newReference, linkNode) 310 | } else { 311 | const position = { 312 | start: startPosition, 313 | end: endPosition, 314 | } 315 | const linkNode = { 316 | type: 'internal-ref', 317 | title: null, 318 | url: newReference, 319 | children: [], 320 | position, 321 | } 322 | internalRefs.set(newReference, linkNode) 323 | } 324 | }) 325 | return 326 | } 327 | if (line.match(/link:[^\[]+\[[^\]]*\]/g)) { 328 | let extractLink = line.match(/link:[^\[]+\[[^\]]*\]/g) 329 | extractLink.forEach((link) => { 330 | let newReference = link.replace(/^link:|\[/g, '') 331 | const matchIndex = line.indexOf(link) // Get the index of the match 332 | const startColumn = matchIndex + 5 // Add 5 to account for the "link:" characters 333 | const endColumn = startColumn + newReference.length 334 | const startPosition = { 335 | line: lineNumber, 336 | column: startColumn, 337 | offset: matchIndex, 338 | } 339 | const endPosition = { 340 | line: lineNumber, 341 | column: endColumn, 342 | offset: matchIndex + newReference.length, 343 | } 344 | if (newReference.match(/^(https?:\/\/|ftp|irc|mailto):\/\//g)) { 345 | const position = { 346 | start: startPosition, 347 | end: endPosition, 348 | } 349 | const linkNode = { 350 | type: newReference.startsWith('http') ? 'link' : 'external-ref', 351 | title: null, 352 | url: newReference, 353 | children: [], 354 | position, 355 | } 356 | if (linkNode.type === 'link') { 357 | const existingLink = links.find( 358 | (link) => 359 | link.url === linkNode.url && 360 | link.position.start.line === linkNode.position.start.line && 361 | link.position.start.column === linkNode.position.start.column 362 | ) 363 | if (!existingLink) { 364 | links.push(linkNode) // Add link to the array only if it's not already there 365 | } 366 | } else { 367 | externalRefs.set(newReference, linkNode) 368 | } 369 | } else { 370 | newReference = newReference.replace(/(\.html?5?)#.*/, '$1') 371 | const position = { 372 | start: startPosition, 373 | end: endPosition, 374 | } 375 | const linkNode = { 376 | type: 'external-ref', 377 | title: null, 378 | url: newReference, 379 | children: [], 380 | position, 381 | } 382 | externalRefs.set(newReference, linkNode) 383 | } 384 | }) 385 | return 386 | } 387 | if ( 388 | line.match( 389 | /(?:^|<|[\s>\(\)\[\];])((https?|file|ftp|irc):\/\/[^\s\[\]<]*[^\s.,\[\]<\)])/g 390 | ) 391 | ) { 392 | let extractLink = line.match( 393 | /((https?|file|ftp|irc):\/\/[^\s\[\]<]*[^\s.,\[\]<\)])/g 394 | ) 395 | for (let i = 0; i < extractLink.length; i++) { 396 | let newReference = extractLink[i] 397 | const matchIndex = line.indexOf(extractLink[i]) // Get the index of the match 398 | const startColumn = matchIndex 399 | const endColumn = startColumn + newReference.length 400 | const startPosition = { 401 | line: lineNumber, 402 | column: startColumn, 403 | offset: matchIndex, 404 | } 405 | const endPosition = { 406 | line: lineNumber, 407 | column: endColumn, 408 | offset: matchIndex + newReference.length, 409 | } 410 | const position = { 411 | start: startPosition, 412 | end: endPosition, 413 | } 414 | const linkNode = { 415 | type: 'link', 416 | title: null, 417 | url: newReference, 418 | children: [], 419 | position, 420 | } 421 | const existingLink = links.find( 422 | (link) => 423 | link.url === linkNode.url && 424 | link.position.start.line === linkNode.position.start.line && 425 | link.position.start.column === linkNode.position.start.column 426 | ) 427 | if (!existingLink) { 428 | links.push(linkNode) // Add link to the array only if it's not already there 429 | } 430 | } 431 | return 432 | } 433 | }) 434 | rl.on('close', () => { 435 | const result = [ 436 | ...links.values(), 437 | ...internalRefs.values(), 438 | ...externalRefs.values(), 439 | ...externalURLs.values(), 440 | ] 441 | resolve(doReplacements(result, options)) 442 | }) 443 | }) 444 | } 445 | 446 | export { extractAsciiDocLinks } 447 | -------------------------------------------------------------------------------- /lib/extract-markdown-hyperlinks.js: -------------------------------------------------------------------------------- 1 | import { unified } from 'unified' 2 | import remarkParse from 'remark-parse' 3 | import remarkGfm from 'remark-gfm' 4 | import { visit } from 'unist-util-visit' 5 | import { doReplacements } from './handle-links-modification.js' 6 | 7 | // 8 | // Function: extractMarkdownHyperlinks 9 | // Description: Extracts all links from a markdown string 10 | // Arguments: 11 | // markdownText - The markdown string to extract links from 12 | // options (optional) - An object specifying additional settings 13 | // - ignorePatterns (optional) - An array of objects holding regular expressions to skip link checking 14 | // - replacementPatterns (optional) - An array of objects holding regular expressions for link replacements 15 | // - baseUrl (optional) - A string specifying the base URL to prefix to URLs that start with '/' 16 | // Returns: 17 | // An array of MDAST nodes that represent headings, links, link references, definitions, and image references 18 | // See https://github.com/syntax-tree/mdast for more information on the types of MDAST nodes 19 | // 20 | 21 | function extractMarkdownHyperlinks(markdownText, options) { 22 | const tree = unified().use(remarkParse).use(remarkGfm).parse(markdownText) 23 | 24 | const links = [] 25 | visit(tree, ['link', 'definition', 'image'], (node) => { 26 | links.push(node) 27 | }) 28 | return doReplacements(links, options) 29 | } 30 | 31 | export { extractMarkdownHyperlinks } 32 | -------------------------------------------------------------------------------- /lib/get-unique-links.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | export { getUniqueLinks } 4 | 5 | function getUniqueLinks(astNodes) { 6 | const uniqueUrls = new Set() 7 | const result = [] 8 | for (const node of astNodes) { 9 | // Check if the link starts with "#" or "mailto:" and skip it 10 | if ( 11 | (node.type === 'link' || 12 | node.type === 'definition' || 13 | node.type === 'image') && 14 | node.url && 15 | !uniqueUrls.has(node.url) && 16 | !node.url.startsWith('mailto:') 17 | ) { 18 | uniqueUrls.add(node.url) 19 | result.push(node) 20 | } 21 | } 22 | return result 23 | } 24 | -------------------------------------------------------------------------------- /lib/handle-links-modification.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Modifies the URLs of the given nodes based on the provided options. 3 | * 4 | * @param {Array} nodes - The nodes to be modified. 5 | * @param {Object} [opts={}] - The options for modifying the URLs. 6 | * @param {Array} [opts.ignorePatterns=[]] - The patterns of URLs to be ignored. 7 | * @param {Array} [opts.replacementPatterns=[]] - The patterns of URLs to be replaced. 8 | * @param {string} [opts.baseUrl] - The base URL to be prefixed to URLs that start with '/'. 9 | * 10 | * @returns {Array} The modified nodes. 11 | */ 12 | 13 | function doReplacements(nodes, opts = {}) { 14 | const { ignorePatterns = [], replacementPatterns = [], baseUrl } = opts 15 | 16 | // Safer regex compilation with timeout protection 17 | function createSafeRegex(pattern) { 18 | try { 19 | // Validate pattern complexity before creating RegExp 20 | // Check for common problematic patterns that could lead to ReDoS 21 | if ( 22 | pattern.includes('(.*)*') || 23 | pattern.includes('(.+)+') || 24 | pattern.match(/\([^)]+\)\+\+/) || 25 | pattern.match(/\(\[.*?\]\+\)\+/) || 26 | pattern.match(/\(a\+\)\+/) 27 | ) { 28 | console.warn(`Potentially unsafe regex pattern detected: ${pattern}`) 29 | return null 30 | } 31 | 32 | // Apply length limits for safety 33 | if (pattern.length > 100) { 34 | console.warn( 35 | `Pattern exceeds maximum safe length: ${pattern.substring(0, 50)}...` 36 | ) 37 | return null 38 | } 39 | 40 | return new RegExp(pattern) 41 | } catch (e) { 42 | console.warn(`Invalid regex pattern: ${pattern}. Error: ${e.message}`) 43 | return null 44 | } 45 | } 46 | 47 | // Pre-compile regular expressions with safer approach 48 | const ignoreRegexes = ignorePatterns 49 | .map(({ pattern }) => createSafeRegex(pattern)) 50 | .filter(Boolean) 51 | 52 | const replacementRegexes = replacementPatterns 53 | .map(({ pattern, replacement }) => { 54 | const regex = createSafeRegex(pattern) 55 | return regex ? { regex, replacement } : null 56 | }) 57 | .filter(Boolean) 58 | 59 | return nodes.filter((node) => { 60 | let { url } = node 61 | 62 | // Skip link checking if it matches any ignore pattern 63 | if ( 64 | ignoreRegexes.some((regex) => { 65 | try { 66 | return regex.test(url) 67 | } catch (e) { 68 | console.warn(`Error testing URL against pattern: ${e.message}`) 69 | return false 70 | } 71 | }) 72 | ) { 73 | return false // Exclude this node 74 | } 75 | 76 | // Prefix the base URL to URLs that start with '/' 77 | if (baseUrl && url.startsWith('/')) { 78 | url = baseUrl + url 79 | } 80 | 81 | // Replace link URL based on replacement patterns 82 | replacementRegexes.forEach(({ regex, replacement }) => { 83 | try { 84 | // Use a safer string replace approach 85 | const oldUrl = url 86 | url = url.replace(regex, replacement) 87 | 88 | // If replacement leads to an extremely long string, revert 89 | if (url.length > oldUrl.length * 3 && url.length > 2000) { 90 | console.warn(`Suspicious replacement result detected. Reverting.`) 91 | url = oldUrl 92 | } 93 | } catch (e) { 94 | console.warn(`Error replacing URL: ${e.message}`) 95 | } 96 | }) 97 | 98 | node.url = url 99 | return true // Include this node 100 | }) 101 | } 102 | 103 | export { doReplacements } 104 | -------------------------------------------------------------------------------- /lib/prepare-file-list.js: -------------------------------------------------------------------------------- 1 | import { readFileSync, existsSync } from 'fs' 2 | import { resolve, relative } from 'path' 3 | import { glob } from 'glob' 4 | import path from 'path' 5 | import ignore from 'ignore' 6 | 7 | /** 8 | * Normalize a file path by removing leading './' if present. 9 | * @param {string} filePath - The file path to normalize. 10 | * @returns {string} The normalized file path. 11 | */ 12 | function normalizeFilePath(filePath) { 13 | if (filePath.startsWith('./')) { 14 | return filePath.substring(2) // Remove leading './' 15 | } 16 | return filePath 17 | } 18 | 19 | /** 20 | * Reads the .gitignore file and filters the list of files. 21 | * @param {string[]} filenames - An array of filenames to filter. 22 | * @returns {string[]} An array of filtered filenames. 23 | */ 24 | function filterFiles(filenames) { 25 | // Check if the .gitignore file exists 26 | if (!existsSync('.gitignore')) { 27 | // If it doesn't exist, return the original filenames array 28 | return filenames 29 | } 30 | 31 | // Read the .gitignore file from the current directory 32 | const gitignore = readFileSync('.gitignore', 'utf8') 33 | 34 | // Create an ignore object and add the .gitignore rules 35 | const ig = ignore() 36 | ig.add(gitignore) 37 | 38 | // Filter the filenames using the ignore object 39 | const filtered = ig.filter(filenames) 40 | 41 | // Return the filtered array 42 | return filtered 43 | } 44 | 45 | // A function that removes duplicates from an array of strings 46 | function removeDuplicates(array) { 47 | // Create a new Set object from the array 48 | const set = new Set(array) 49 | 50 | // Convert the Set object back to an array 51 | const unique = [...set] 52 | 53 | // Return the unique array 54 | return unique 55 | } 56 | 57 | /** 58 | * Prepares a list of files based on the configuration options. 59 | * @param {string} config - The YML config object. 60 | * @returns {string[]} An array of file paths. 61 | */ 62 | function prepareFilesList(config) { 63 | try { 64 | let files = [] 65 | let specifiedFiles = config.files 66 | ? config.files.map((file) => file.trim()) 67 | : [] 68 | let dirs = config.dirs ? config.dirs.map((dir) => dir.trim()) : [] 69 | let excludedFiles = config.excludedFiles 70 | ? config.excludedFiles.map((file) => normalizeFilePath(file.trim())) // Normalize paths 71 | : [] 72 | let excludedDirs = config.excludedDirs 73 | ? config.excludedDirs.map((dir) => { 74 | // Normalize the excluded directory path 75 | return normalizeFilePath(dir.trim()) 76 | }) 77 | : [] 78 | 79 | // Set a default file extension to "md" if not defined 80 | const fileExtensions = config.fileExtensions || ['md'] 81 | 82 | // Check if specified files exist and add them to the list 83 | specifiedFiles.forEach((file) => { 84 | const filePath = resolve(process.cwd(), file) 85 | const fileExtension = path.extname(filePath).substring(1) // Get the file extension without the leading dot 86 | 87 | if (existsSync(filePath)) { 88 | if (!files.includes(filePath)) { 89 | if (fileExtensions.includes(fileExtension)) { 90 | files.push(filePath) 91 | } else { 92 | console.warn( 93 | `ℹ️ The file "${file}" specified in the config does not have the correct extension. Use "fileExtensions" to configure the extensions.` 94 | ) 95 | } 96 | } else { 97 | console.warn( 98 | `ℹ️ The file "${file}" specified in the config is already included.` 99 | ) 100 | } 101 | } else { 102 | console.warn( 103 | `ℹ️ The file "${file}" specified in the config does not exist.` 104 | ) 105 | } 106 | }) 107 | 108 | // Search all specified dirs recursively using glob 109 | dirs.forEach((dir) => { 110 | let directory = dir 111 | if (dir === '.' || dir === './') { 112 | // Use the current working directory if dir is '.' or './' 113 | directory = process.cwd() + '/' 114 | } 115 | 116 | // Check if the dir exists 117 | if (existsSync(directory)) { 118 | let fileExtensionsGlob = 119 | fileExtensions.length > 1 120 | ? `{${fileExtensions.join(',')}}` 121 | : fileExtensions[0] 122 | files.push( 123 | ...glob.sync( 124 | path.posix.join(directory, '**', `*.${fileExtensionsGlob}`) 125 | ) 126 | ) 127 | } else { 128 | console.error( 129 | `ℹ️ The directory "${directory}" specified in the config does not exist.` 130 | ) 131 | } 132 | }) 133 | 134 | // Make the file paths relative to the current working directory 135 | let relativeFiles = files.map((file) => relative(process.cwd(), file)) 136 | 137 | // Remove any duplicates from the list of files 138 | relativeFiles = removeDuplicates(relativeFiles) 139 | 140 | // Use filterFiles function to filter the files based on .gitignore 141 | if (config.useGitIgnore === true) { 142 | relativeFiles = filterFiles(relativeFiles) 143 | } 144 | 145 | // Use the ignore module to filter out excluded files and directories specified in YAML 146 | const ig = ignore() 147 | ig.add(excludedFiles) 148 | ig.add(excludedDirs.map((dir) => dir + '/**')) // Include subdirectories of excludedDirs 149 | 150 | relativeFiles = relativeFiles.filter((file) => { 151 | return !ig.ignores(file) 152 | }) 153 | 154 | // Rest of your code for normalization and filtering 155 | 156 | return relativeFiles.map((file) => resolve(process.cwd(), file)) // Convert back to absolute paths 157 | } catch (err) { 158 | // Handle any other errors that may occur 159 | console.error("Error: Couldn't prepare the list of files. ", err) 160 | return [] 161 | } 162 | } 163 | 164 | export { prepareFilesList } 165 | -------------------------------------------------------------------------------- /lib/update-linkstatus-obj.js: -------------------------------------------------------------------------------- 1 | /** 2 | * Updates the link status object with the given AST nodes and existing link status. 3 | * 4 | * @param {Array} astNodes - The AST nodes to update the link status with. 5 | * Each node is an object with properties `url`, `position`, `title`, and `children`. 6 | * 7 | * @param {Array} linkStatus - The existing link status to update. 8 | * Each status is an object with properties `link`, `status`, `status_code`, `line_number`, `position`, `error_message`, `title`, and `children`. 9 | * 10 | * @returns {Array} The updated link status. Each status is an object with properties `link`, `status`, `status_code`, `line_number`, `position`, `error_message`, `title`, and `children`. 11 | * The returned array is sorted by line number and start column in ascending order. 12 | */ 13 | 'use strict' 14 | 15 | function updateLinkStatusObj(astNodes, linkStatus) { 16 | const updatedLinkStatus = [...linkStatus] 17 | astNodes.forEach((node) => { 18 | const existingLink = linkStatus.find((link) => link.link === node.url) 19 | if (existingLink) { 20 | if (!existingLink.position) { 21 | console.error( 22 | `ERROR: Markdown formatting error around link: ${existingLink.link}. Please check the file containing this link.` 23 | ) 24 | existingLink.position = { 25 | start: { line: 1, column: 1 }, 26 | end: { line: 1, column: 1 }, 27 | } 28 | } 29 | 30 | const existingPosition = existingLink.position 31 | const nodePosition = node.position || { 32 | start: { line: 1, column: 1 }, 33 | end: { line: 1, column: 1 }, 34 | } 35 | 36 | if ( 37 | existingPosition.start.line !== nodePosition.start.line || 38 | existingPosition.start.column !== nodePosition.start.column || 39 | existingPosition.end.line !== nodePosition.end.line || 40 | existingPosition.end.column !== nodePosition.end.column 41 | ) { 42 | updatedLinkStatus.push({ 43 | ...existingLink, 44 | line_number: nodePosition.start.line, 45 | position: nodePosition, 46 | }) 47 | } 48 | } else { 49 | let status = null 50 | let statusCode = null 51 | let errorMessage = null 52 | 53 | // Special handling for mailto links 54 | if (node.url && node.url.startsWith('mailto:')) { 55 | status = 'skipped' 56 | statusCode = 200 57 | errorMessage = 'Email links are not checked' 58 | } 59 | 60 | updatedLinkStatus.push({ 61 | link: node.url, 62 | status: status, 63 | status_code: statusCode, 64 | line_number: node.position ? node.position.start.line : null, 65 | position: node.position, 66 | error_message: errorMessage, 67 | title: node.title, 68 | children: node.children, 69 | }) 70 | } 71 | }) 72 | updatedLinkStatus.sort((a, b) => { 73 | if (a.position.start.line === b.position.start.line) { 74 | return a.position.start.column - b.position.start.column 75 | } 76 | return a.position.start.line - b.position.start.line 77 | }) 78 | return updatedLinkStatus 79 | } 80 | 81 | export { updateLinkStatusObj } 82 | -------------------------------------------------------------------------------- /lib/validate-config.js: -------------------------------------------------------------------------------- 1 | import Joi from 'joi' 2 | 3 | class ValidationError extends Error { 4 | constructor(message, details) { 5 | super(`${message}: ${details.map((d) => d.message).join(', ')}`) 6 | this.name = 'ValidationError' 7 | this.details = details 8 | } 9 | } 10 | 11 | /** 12 | * Validates the configuration file at the given path. 13 | * @param {string} config - YAML config object. 14 | * @returns {Promise} A promise that resolves to a boolean indicating whether the validation was successful. 15 | */ 16 | async function validateConfig(config) { 17 | try { 18 | // Define the schema for validation 19 | const schema = Joi.object({ 20 | files: Joi.array().items(Joi.string()), 21 | dirs: Joi.array().items(Joi.string()), 22 | excludedFiles: Joi.array().items(Joi.string()), 23 | excludedDirs: Joi.array().items(Joi.string()), 24 | fileExtensions: Joi.array().items(Joi.string()), 25 | baseUrl: Joi.string(), 26 | httpHeaders: Joi.array().items( 27 | Joi.object({ 28 | url: Joi.array().items(Joi.string().uri()).required(), 29 | headers: Joi.object().pattern(Joi.string(), Joi.string()).required(), 30 | }) 31 | ), 32 | aliveStatusCodes: Joi.array().items(Joi.number()), 33 | ignorePatterns: Joi.array().items( 34 | Joi.object({ 35 | pattern: Joi.string().required(), 36 | }) 37 | ), 38 | replacementPatterns: Joi.array().items( 39 | Joi.object({ 40 | pattern: Joi.string().required(), 41 | replacement: Joi.string().required(), 42 | }) 43 | ), 44 | //outputFormat: Joi.string(), 45 | //outputVerbosity: Joi.number().integer().min(1).max(5), 46 | //showErrorsOnly: Joi.boolean(), 47 | useGitIgnore: Joi.boolean(), 48 | modifiedFilesOnly: Joi.boolean(), 49 | followRedirects: Joi.boolean().default(true), 50 | }).or('files', 'dirs') 51 | 52 | // Validate the config against the schema 53 | const { error } = schema.validate(config) 54 | if (error) { 55 | throw new ValidationError('Incorrect configuration', error.details) 56 | } 57 | return true 58 | } catch (err) { 59 | if (err instanceof ValidationError) { 60 | console.error('Validation Error: ', err.message) 61 | throw err 62 | } else if (err.message.includes('ENOENT: no such file or directory')) { 63 | console.error('Error reading file:', err.message) 64 | throw err 65 | } else { 66 | console.error('Other Validation Error: ', err.message) 67 | throw err 68 | } 69 | return false 70 | } 71 | } 72 | 73 | export { validateConfig } 74 | -------------------------------------------------------------------------------- /linkspector.js: -------------------------------------------------------------------------------- 1 | import { execSync } from 'child_process' 2 | import { readFileSync } from 'fs' 3 | import path from 'path' 4 | import yaml from 'js-yaml' 5 | import dotenv from 'dotenv' 6 | import { validateConfig } from './lib/validate-config.js' 7 | import { prepareFilesList } from './lib/prepare-file-list.js' 8 | import { extractMarkdownHyperlinks } from './lib/extract-markdown-hyperlinks.js' 9 | import { extractAsciiDocLinks } from './lib/extract-asciidoc-links.js' 10 | import { getUniqueLinks } from './lib/get-unique-links.js' 11 | import { checkHyperlinks } from './lib/batch-check-links.js' 12 | import { updateLinkStatusObj } from './lib/update-linkstatus-obj.js' 13 | 14 | // Load environment variables from .env file 15 | dotenv.config() 16 | 17 | // Function to replace placeholders with environment variables 18 | function replaceEnvVariables(config) { 19 | const configString = JSON.stringify(config) 20 | const replacedConfigString = configString.replace( 21 | /\$\{(\w+)\}/g, 22 | (_, name) => process.env[name] || '' 23 | ) 24 | return JSON.parse(replacedConfigString) 25 | } 26 | 27 | // Function to check if git is installed 28 | function isGitInstalled() { 29 | try { 30 | execSync('git --version', { stdio: 'ignore' }) 31 | return true 32 | } catch (error) { 33 | return false 34 | } 35 | } 36 | 37 | export async function* linkspector(configFile, cmd) { 38 | //Use default configuration if no config file is specified 39 | let config = {} 40 | let defaultConfig = { 41 | dirs: ['.'], 42 | useGitIgnore: true, 43 | } 44 | 45 | try { 46 | let configContent = readFileSync(configFile, 'utf8') 47 | // parse configFile 48 | // Check if the YAML content is empty 49 | if (!configContent.trim()) { 50 | throw new Error('The configuration file is empty.') 51 | } 52 | 53 | // Parse the YAML content 54 | config = yaml.load(configContent) 55 | 56 | // Check if the parsed YAML object is null or lacks properties 57 | if (config === null || Object.keys(config).length === 0) { 58 | throw new Error('Failed to parse the YAML content.') 59 | } 60 | 61 | // Replace environment variables in the configuration 62 | config = replaceEnvVariables(config) 63 | 64 | try { 65 | const isValid = await validateConfig(config) 66 | if (!isValid) { 67 | console.error('Validation failed!') 68 | process.exit(1) 69 | } 70 | } catch (error) { 71 | console.error(`💥 Error: Please check your configuration file.`) 72 | process.exit(1) 73 | } 74 | } catch (err) { 75 | if (err.code === 'ENOENT') { 76 | if (!cmd.json) { 77 | console.log( 78 | 'Configuration file not found. Using default configuration.' 79 | ) 80 | } 81 | config = defaultConfig 82 | } else { 83 | throw new Error(err) 84 | } 85 | } 86 | 87 | // Prepare the list of files to check 88 | let filesToCheck = prepareFilesList(config) 89 | 90 | // Convert all paths in filesToCheck to relative paths 91 | filesToCheck = filesToCheck.map((file) => path.relative(process.cwd(), file)) 92 | 93 | // Check if only modified files should be checked 94 | if (config.modifiedFilesOnly) { 95 | // Check if git is installed 96 | if (!isGitInstalled()) { 97 | console.error( 98 | 'Error: Git is not installed or not found in the system path.' 99 | ) 100 | process.exit(1) 101 | } 102 | 103 | // Get the list of modified files from the last git commit 104 | const modifiedFiles = execSync('git diff --name-only HEAD HEAD~1', { 105 | encoding: 'utf8', 106 | }).split('\n') 107 | 108 | // Filter out files that are not in the list of files to check or do not have the correct extension 109 | const modifiedFilesToCheck = modifiedFiles.filter((file) => { 110 | const fileExtension = path.extname(file).substring(1).toLowerCase() 111 | return ( 112 | filesToCheck.includes(file) && 113 | (config.fileExtensions || ['md']).includes(fileExtension) 114 | ) 115 | }) 116 | 117 | // If no modified files are in the list of files to check, exit with a message 118 | if (modifiedFilesToCheck.length === 0) { 119 | if (cmd.json) { 120 | console.log('{}') 121 | } else { 122 | console.log( 123 | 'No modified files to check, skipping checking. To enable checking all files set modifiedFilesOnly: false and rerun the check.' 124 | ) 125 | } 126 | process.exit(0) 127 | } 128 | 129 | // Otherwise, only check the modified files 130 | filesToCheck = modifiedFilesToCheck 131 | } 132 | 133 | // Process each file 134 | for (const file of filesToCheck) { 135 | const relativeFilePath = path.relative(process.cwd(), file) 136 | 137 | // Get the file extension 138 | const fileExtension = path.extname(file).substring(1).toLowerCase() // Get the file extension without the leading dot and convert to lowercase 139 | 140 | let astNodes 141 | 142 | // Check the file extension and use the appropriate function to extract links 143 | if ( 144 | ['asciidoc', 'adoc', 'asc'].includes(fileExtension) && 145 | config.fileExtensions && 146 | config.fileExtensions.includes(fileExtension) 147 | ) { 148 | astNodes = await extractAsciiDocLinks(file, config) 149 | } else { 150 | const fileContent = readFileSync(file, 'utf8') 151 | astNodes = extractMarkdownHyperlinks(fileContent, config) 152 | } 153 | 154 | // Get unique hyperlinks 155 | const uniqueLinks = getUniqueLinks(astNodes) 156 | 157 | // Check the status of hyperlinks 158 | const linkStatus = await checkHyperlinks(uniqueLinks, config, file) 159 | 160 | // Update linkStatusObjects with information about removed links 161 | const updatedLinkStatus = updateLinkStatusObj(astNodes, linkStatus) 162 | 163 | // Yield an object with the relative file path and its result 164 | yield { 165 | file: relativeFilePath, 166 | result: updatedLinkStatus, 167 | } 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@umbrelladocs/linkspector", 3 | "version": "0.4.5", 4 | "description": "Uncover broken links in your content.", 5 | "type": "module", 6 | "main": "linkspector.js", 7 | "repository": { 8 | "type": "git", 9 | "url": "git+https://github.com/UmbrellaDocs/linkspector.git" 10 | }, 11 | "keywords": [ 12 | "linkspector", 13 | "links analysis", 14 | "url inspection", 15 | "link checker", 16 | "link validation", 17 | "link verification", 18 | "link testing", 19 | "link testing tool", 20 | "link testing library", 21 | "link testing utility", 22 | "link testing package", 23 | "markdown link testing", 24 | "markdown link testing tool", 25 | "markdown link check", 26 | "markdown link checker", 27 | "markdown link validation", 28 | "markdown link verification", 29 | "asciidoc link testing", 30 | "asciidoc link testing tool", 31 | "asciidoc link check", 32 | "asciidoc link checker", 33 | "asciidoc link validation", 34 | "asciidoc link verification" 35 | ], 36 | "bin": { 37 | "linkspector": "./index.js" 38 | }, 39 | "scripts": { 40 | "prettier:check": "npx prettier --check .", 41 | "prettier:format": "npx prettier --write .", 42 | "test": "vitest" 43 | }, 44 | "author": "Gaurav Nelson", 45 | "license": "Apache-2.0", 46 | "bugs": { 47 | "url": "https://github.com/UmbrellaDocs/linkspector/issues" 48 | }, 49 | "homepage": "https://github.com/UmbrellaDocs/linkspector#readme", 50 | "dependencies": { 51 | "commander": "^14.0.0", 52 | "dotenv": "^16.5.0", 53 | "github-slugger": "^2.0.0", 54 | "glob": "^11.0.2", 55 | "ignore": "^7.0.4", 56 | "joi": "^17.13.3", 57 | "js-yaml": "^4.1.0", 58 | "kleur": "^4.1.5", 59 | "ora": "^8.2.0", 60 | "puppeteer": "^24.9.0", 61 | "remark-gfm": "^4.0.1", 62 | "remark-parse": "^11.0.0", 63 | "unified": "^11.0.5", 64 | "unist-util-visit": "^5.0.0" 65 | }, 66 | "devDependencies": { 67 | "vitest": "^3.1.4" 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /scripts/apparmorfix.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | echo '::group::🔗💀 Setting up Chrome Linux Sandbox' 5 | # Based on the instructions found here: https://chromium.googlesource.com/chromium/src/+/main/docs/security/apparmor-userns-restrictions.md 6 | if [ "$(lsb_release -rs)" = "24.04" ]; then 7 | echo 0 | sudo tee /proc/sys/kernel/apparmor_restrict_unprivileged_userns 8 | echo 'Done' 9 | fi 10 | echo '::endgroup::' 11 | 12 | echo '::group::🔗💀 Installing NPM packages' 13 | npm ci 14 | echo '::endgroup::' 15 | 16 | echo '::group::🔗💀 Running tests' 17 | npm run test 18 | echo '::endgroup::' 19 | -------------------------------------------------------------------------------- /test/fixtures/asciidoc/hyperlinks/asciidoc-hyperlinks.test.js: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | import { linkspector } from './linkspector.js' 3 | 4 | let cmd = { 5 | json: true, 6 | } 7 | 8 | test('linkspector should check hyperlinks in AsciiDoc files', async () => { 9 | let hasErrorLinks = false 10 | let currentFile = '' // Variable to store the current file name 11 | let results = [] // Array to store the results if json is true 12 | 13 | for await (const { file, result } of linkspector( 14 | './test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml', 15 | cmd 16 | )) { 17 | currentFile = file 18 | for (const linkStatusObj of result) { 19 | if (cmd.json) { 20 | results.push({ 21 | file: currentFile, 22 | link: linkStatusObj.link, 23 | status_code: linkStatusObj.status_code, 24 | line_number: linkStatusObj.line_number, 25 | position: linkStatusObj.position, 26 | status: linkStatusObj.status, 27 | error_message: linkStatusObj.error_message, 28 | }) 29 | } 30 | if (linkStatusObj.status === 'error') { 31 | hasErrorLinks = true 32 | } 33 | } 34 | } 35 | 36 | expect(hasErrorLinks).toBe(true) 37 | expect(results.length).toBe(4) 38 | expect(results[0].status).toBe('error') 39 | expect(results[1].status).toBe('alive') 40 | expect(results[2].status).toBe('error') 41 | expect(results[3].status).toBe('alive') 42 | }) 43 | -------------------------------------------------------------------------------- /test/fixtures/asciidoc/hyperlinks/hyperlinksTest.yml: -------------------------------------------------------------------------------- 1 | dirs: 2 | - ./test/fixtures/asciidoc/hyperlinks/ 3 | fileExtensions: 4 | - adoc 5 | useGitIgnore: true 6 | -------------------------------------------------------------------------------- /test/fixtures/asciidoc/hyperlinks/testhyperlinks1.adoc: -------------------------------------------------------------------------------- 1 | Got to (http://www.yttftfftx.com) or [Google](http://www.google.com). 2 | 3 | Got to (http://www.yttftfftx.com) or [Google](http://www.google.com). 4 | -------------------------------------------------------------------------------- /test/fixtures/headers.test.js: -------------------------------------------------------------------------------- 1 | import { expect, test, vi, beforeEach } from 'vitest' 2 | import { checkHyperlinks } from '../../lib/batch-check-links.js' 3 | 4 | // Add the environment variable substitution function that matches what's in linkspector.js 5 | function replaceEnvVariables(obj) { 6 | const objString = JSON.stringify(obj) 7 | const replacedObjString = objString.replace( 8 | /\$\{(\w+)\}/g, 9 | (_, name) => process.env[name] || '' 10 | ) 11 | return JSON.parse(replacedObjString) 12 | } 13 | 14 | // Mock puppeteer 15 | vi.mock('puppeteer', () => { 16 | return { 17 | default: { 18 | launch: vi.fn().mockImplementation(() => { 19 | return { 20 | newPage: vi.fn().mockImplementation(() => { 21 | return { 22 | setUserAgent: vi.fn(), 23 | setRequestInterception: vi.fn(), 24 | on: vi.fn(), 25 | goto: vi.fn().mockImplementation((url, options) => { 26 | // Track which headers were passed 27 | capturedHeaders = options.headers || {} 28 | 29 | return { 30 | status: vi.fn().mockReturnValue(200), 31 | ok: vi.fn().mockReturnValue(true), 32 | } 33 | }), 34 | close: vi.fn(), 35 | } 36 | }), 37 | close: vi.fn(), 38 | } 39 | }), 40 | }, 41 | } 42 | }) 43 | 44 | // Variable to capture headers passed to page.goto 45 | let capturedHeaders = {} 46 | 47 | beforeEach(() => { 48 | // Reset captured headers before each test 49 | capturedHeaders = {} 50 | 51 | // Reset mocks 52 | vi.clearAllMocks() 53 | }) 54 | 55 | test('applies correct HTTP headers based on URL patterns', async () => { 56 | // Prepare test data 57 | const nodes = [ 58 | { 59 | type: 'link', 60 | url: 'https://example1.com/test', 61 | position: { start: { line: 1, column: 1 }, end: { line: 1, column: 30 } }, 62 | }, 63 | ] 64 | 65 | const httpHeaders = [ 66 | { 67 | url: ['https://example1.com'], 68 | headers: { 69 | Authorization: 'Bearer token123', 70 | 'X-Custom-Header': 'CustomValue', 71 | }, 72 | }, 73 | ] 74 | 75 | // Run the function 76 | await checkHyperlinks(nodes, { httpHeaders }, '/path/to/file') 77 | 78 | // Verify the correct headers were applied 79 | expect(capturedHeaders).toEqual({ 80 | Authorization: 'Bearer token123', 81 | 'X-Custom-Header': 'CustomValue', 82 | }) 83 | }) 84 | 85 | test('applies no headers when URL does not match patterns', async () => { 86 | // Prepare test data 87 | const nodes = [ 88 | { 89 | type: 'link', 90 | url: 'https://different-domain.com/test', 91 | position: { start: { line: 1, column: 1 }, end: { line: 1, column: 30 } }, 92 | }, 93 | ] 94 | 95 | const httpHeaders = [ 96 | { 97 | url: ['https://example1.com', 'https://example2.com'], 98 | headers: { 99 | Authorization: 'Bearer token123', 100 | 'X-Custom-Header': 'CustomValue', 101 | }, 102 | }, 103 | ] 104 | 105 | // Run the function 106 | await checkHyperlinks(nodes, { httpHeaders }, '/path/to/file') 107 | 108 | // Verify no headers were applied for non-matching URL 109 | expect(capturedHeaders).toEqual({}) 110 | }) 111 | 112 | test('supports environment variable substitution in headers', async () => { 113 | // Mock process.env 114 | const originalEnv = process.env 115 | process.env = { 116 | ...originalEnv, 117 | AUTH_TOKEN: 'supersecrettoken', 118 | } 119 | 120 | // Prepare test data 121 | const nodes = [ 122 | { 123 | type: 'link', 124 | url: 'https://example3.com/api', 125 | position: { start: { line: 1, column: 1 }, end: { line: 1, column: 30 } }, 126 | }, 127 | ] 128 | 129 | let httpHeaders = [ 130 | { 131 | url: ['https://example3.com'], 132 | headers: { 133 | Authorization: 'Bearer ${AUTH_TOKEN}', 134 | 'X-API-Key': 'fixed-value', 135 | }, 136 | }, 137 | ] 138 | 139 | // Process environment variables in headers similar to what linkspector.js does 140 | httpHeaders = replaceEnvVariables(httpHeaders) 141 | 142 | // Run the function 143 | await checkHyperlinks(nodes, { httpHeaders }, '/path/to/file') 144 | 145 | // Verify the headers with environment variable substitution 146 | expect(capturedHeaders).toEqual({ 147 | Authorization: 'Bearer supersecrettoken', 148 | 'X-API-Key': 'fixed-value', 149 | }) 150 | 151 | // Restore original env 152 | process.env = originalEnv 153 | }) 154 | -------------------------------------------------------------------------------- /test/fixtures/markdown/decoded-sections/.decodedTest.yml: -------------------------------------------------------------------------------- 1 | dirs: 2 | - ./test/fixtures/markdown/decoded-sections 3 | -------------------------------------------------------------------------------- /test/fixtures/markdown/decoded-sections/decoded.md: -------------------------------------------------------------------------------- 1 | A távolsági - lő- és hajítófegyverekkel - végzett harc során a védekező fél nem saját Védő Értékével vesz részt a harcban, ugyanolyan “céltárgynak” minősül, mint egy szalmabábú, vagy egy agyaggalamb. Ugyanakkor a célpont mozgásának jellege (lásd “Mozgás módosító" fejezetet) és a távolság erőteljesen befolyásolják a találat esélyeit. Lásd még: [Szándékos kitérés lövés elől](decoded.md#sz%C3%A1nd%C3%A9kos-kit%C3%A9r%C3%A9s-l%C3%B6v%C3%A9s-el%C5%91l) fejezetet. A támadó a távolsági harcban a **Célzó Értékét** használja, melynek megállapítása több tényezőtől függ. 2 | 3 | [Bevezető](#0-bevezet%C5%91-jelz%C5%91k) 4 | 5 | Also a link to check [the README Documentation Section](#-documentation) 6 | 7 | ## 📖 Documentation 8 | 9 | This is the documentation section. It is a simple markdown file with some links and references to other sections. 10 | 11 | ### Szándékos kitérés lövés elől 12 | 13 | Ha valaki látja és van ideje felkészülni a rá leadott lövésre/hajításra, valamint rendelkezik elegendő hellyel a kitérésre és bejelenti, hogy megpróbálja elkerülni végzetét, akkor Gyorsaságpróbát kell dobnia, melynek nehézsége függ a lövést leadó személy távolságától, valamint az általa használt fegyvertől. 14 | 15 | | Gyorsaságpróba célszám | Dobófegyverek,
Mágikus lövedék I. | Íjak,
Mágikus lövedék II. | Nyílpuskák,
Mágikus lövedék III. | 16 | | :--------------------: | -------------------------------------: | -----------------------------: | ------------------------------------: | 17 | | 8 | 1m - 3m | 0m - 5m | 0m - 10m | 18 | | 7 | 4m - 6m | 6m - 10m | 11m - 20m | 19 | | 6 | 7m - 9m | 11m - 15m | 21m - 30m | 20 | | 5 | 10m - 12m | 16m - 20m | 31m - 40m | 21 | | 4 | Testközelben | | | 22 | 23 | ### 0. [Bevezető, jelzők](decoded2.md) 24 | -------------------------------------------------------------------------------- /test/fixtures/markdown/decoded-sections/decoded2.md: -------------------------------------------------------------------------------- 1 | # Decoded 2 2 | -------------------------------------------------------------------------------- /test/fixtures/markdown/decoded-sections/markdown-decoded-sections.test.js: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | import { linkspector } from './linkspector.js' 3 | 4 | let cmd = { 5 | json: true, 6 | } 7 | 8 | test('linkspector should check HTML encoded section links', async () => { 9 | let hasErrorLinks = false 10 | let currentFile = '' // Variable to store the current file name 11 | let results = [] // Array to store the results if json is true 12 | 13 | for await (const { file, result } of linkspector( 14 | './test/fixtures/markdown/decoded-sections/.decodedTest.yml', 15 | cmd 16 | )) { 17 | currentFile = file 18 | for (const linkStatusObj of result) { 19 | if (cmd.json) { 20 | results.push({ 21 | file: currentFile, 22 | link: linkStatusObj.link, 23 | status_code: linkStatusObj.status_code, 24 | line_number: linkStatusObj.line_number, 25 | position: linkStatusObj.position, 26 | status: linkStatusObj.status, 27 | error_message: linkStatusObj.error_message, 28 | }) 29 | } 30 | if (linkStatusObj.status === 'error') { 31 | hasErrorLinks = true 32 | } 33 | } 34 | } 35 | 36 | expect(hasErrorLinks).toBe(false) 37 | expect(results.length).toBe(4) 38 | expect(results[0].status).toBe('alive') 39 | expect(results[1].status).toBe('alive') 40 | expect(results[2].status).toBe('alive') 41 | expect(results[3].status).toBe('alive') 42 | }) 43 | -------------------------------------------------------------------------------- /test/fixtures/markdown/duplicates/duplicate1.md: -------------------------------------------------------------------------------- 1 | This is a link to [Google](https://www.google.com). 2 | 3 | this is a link to [Yahoo](https://www.yahoo434234esdsadasd.com). 4 | 5 | This is a link to [Google](https://www.google.com) again. 6 | 7 | this is a link to [Yahoo](https://www.yahoo434234esdsadasd.com) again. 8 | -------------------------------------------------------------------------------- /test/fixtures/markdown/duplicates/duplicateTest.yml: -------------------------------------------------------------------------------- 1 | dirs: 2 | - ./test/fixtures/markdown/duplicates 3 | -------------------------------------------------------------------------------- /test/fixtures/markdown/duplicates/markdown-duplicates.test.js: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | import { linkspector } from './linkspector.js' 3 | 4 | let cmd = { 5 | json: true, 6 | } 7 | 8 | test('linkspector should add back the removed duplicates when returning the results', async () => { 9 | let hasErrorLinks = false 10 | let currentFile = '' // Variable to store the current file name 11 | let results = [] // Array to store the results if json is true 12 | 13 | for await (const { file, result } of linkspector( 14 | './test/fixtures/markdown/duplicates/duplicateTest.yml', 15 | cmd 16 | )) { 17 | currentFile = file 18 | for (const linkStatusObj of result) { 19 | if (cmd.json) { 20 | results.push({ 21 | file: currentFile, 22 | link: linkStatusObj.link, 23 | status_code: linkStatusObj.status_code, 24 | line_number: linkStatusObj.line_number, 25 | position: linkStatusObj.position, 26 | status: linkStatusObj.status, 27 | error_message: linkStatusObj.error_message, 28 | }) 29 | } 30 | if (linkStatusObj.status === 'error') { 31 | hasErrorLinks = true 32 | } 33 | } 34 | } 35 | 36 | expect(hasErrorLinks).toBe(true) 37 | expect(results.length).toBe(4) 38 | expect(results[0].status).toBe('alive') 39 | expect(results[1].status).toBe('error') 40 | expect(results[2].status).toBe('alive') 41 | expect(results[3].status).toBe('error') 42 | }) 43 | -------------------------------------------------------------------------------- /test/fixtures/markdown/headings/heading1.md: -------------------------------------------------------------------------------- 1 | # Headings test 2 | 3 | This file is a test for the headings. 4 | 5 | ## Heading with **bold text** 6 | 7 | Paragraph with **bold text**. Link to heading with bold text: [Heading with **bold text**](#heading-with-bold-text). 8 | 9 | ## Heading with emoji 🎉 10 | 11 | Paragraph with emoji 🎉. Link to heading with emoji: [Heading with emoji](#heading-with-emoji-) 12 | 13 | ## Heading with _italic text_ 14 | 15 | Paragraph with _italic text_. Link to heading with italic text: [Heading with _italic text_](#heading-with-italic-text). 16 | 17 | ## Heading with `code` 18 | 19 | Paragraph with `code`. Link to heading with code: [Heading with `code`](#heading-with-code). 20 | 21 | ## Heading with [link](#headings-test) 22 | 23 | Paragraph with link to heading with link: [Heading with link](#heading-with-link). 24 | 25 | ## Heading with ![image](https://cdn.iconscout.com/icon/free/png-512/free-google-160-189824.png?f=webp&w=20) 26 | 27 | Paragraph with link to heading with image: [Heading with image](#heading-with-image). 28 | -------------------------------------------------------------------------------- /test/fixtures/markdown/headings/headingsTest.yml: -------------------------------------------------------------------------------- 1 | dirs: 2 | - ./test/fixtures/markdown/headings 3 | -------------------------------------------------------------------------------- /test/fixtures/markdown/headings/markdown-headings.test.js: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | import { linkspector } from './linkspector.js' 3 | 4 | let cmd = { 5 | json: true, 6 | } 7 | 8 | test('linkspector should check links to headings with inline bold, italic, code, link, and image elements', async () => { 9 | let hasErrorLinks = false 10 | let currentFile = '' // Variable to store the current file name 11 | let results = [] // Array to store the results if json is true 12 | 13 | for await (const { file, result } of linkspector( 14 | './test/fixtures/markdown/headings/headingsTest.yml', 15 | cmd 16 | )) { 17 | currentFile = file 18 | for (const linkStatusObj of result) { 19 | if (cmd.json) { 20 | results.push({ 21 | file: currentFile, 22 | link: linkStatusObj.link, 23 | status_code: linkStatusObj.status_code, 24 | line_number: linkStatusObj.line_number, 25 | position: linkStatusObj.position, 26 | status: linkStatusObj.status, 27 | error_message: linkStatusObj.error_message, 28 | }) 29 | } 30 | if (linkStatusObj.status === 'error') { 31 | hasErrorLinks = true 32 | } 33 | } 34 | } 35 | 36 | expect(hasErrorLinks).toBe(false) 37 | expect(results.length).toBe(8) 38 | }) 39 | -------------------------------------------------------------------------------- /test/fixtures/markdown/image/image.md: -------------------------------------------------------------------------------- 1 | # Check image links 2 | 3 | Working link: 4 | ![Praia_do_Ribeiro_do_Cavalo2](https://commons.wikimedia.org/wiki/Main_Page#/media/File:Praia_do_Ribeiro_do_Cavalo2.jpg) 5 | -------------------------------------------------------------------------------- /test/fixtures/markdown/image/imageTest.yml: -------------------------------------------------------------------------------- 1 | dirs: 2 | - ./test/fixtures/markdown/image 3 | -------------------------------------------------------------------------------- /test/fixtures/markdown/image/markdown-image.test.js: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | import { linkspector } from './linkspector.js' 3 | 4 | let cmd = { 5 | json: true, 6 | } 7 | 8 | test('linkspector should check image links in Markdown file', async () => { 9 | let hasErrorLinks = false 10 | let currentFile = '' // Variable to store the current file name 11 | let results = [] // Array to store the results if json is true 12 | 13 | for await (const { file, result } of linkspector( 14 | './test/fixtures/markdown/image/imageTest.yml', 15 | cmd 16 | )) { 17 | currentFile = file 18 | for (const linkStatusObj of result) { 19 | if (cmd.json) { 20 | results.push({ 21 | file: currentFile, 22 | link: linkStatusObj.link, 23 | status_code: linkStatusObj.status_code, 24 | line_number: linkStatusObj.line_number, 25 | position: linkStatusObj.position, 26 | status: linkStatusObj.status, 27 | error_message: linkStatusObj.error_message, 28 | }) 29 | } 30 | if (linkStatusObj.status === 'error') { 31 | hasErrorLinks = true 32 | } 33 | } 34 | } 35 | 36 | expect(hasErrorLinks).toBe(false) 37 | expect(results.length).toBe(1) 38 | expect(results[0].link).toBe( 39 | 'https://commons.wikimedia.org/wiki/Main_Page#/media/File:Praia_do_Ribeiro_do_Cavalo2.jpg' 40 | ) 41 | expect(results[0].status).toBe('alive') 42 | }) 43 | -------------------------------------------------------------------------------- /test/fixtures/markdown/line-references/.lineReferencesTest.yml: -------------------------------------------------------------------------------- 1 | dirs: 2 | - ./test/fixtures/markdown/line-references 3 | -------------------------------------------------------------------------------- /test/fixtures/markdown/line-references/line-file.md: -------------------------------------------------------------------------------- 1 | # Target File for Line References 2 | 3 | This is a simple file with exactly 15 lines. 4 | Line 3 should be accessible via #L3 5 | Line 4 is here. 6 | Line 5 is the first line in a range. 7 | Line 6 is in the middle of a range. 8 | Line 7 is also in the middle. 9 | Line 8 is the last line in a range. 10 | Line 9 is after the range. 11 | Line 10 is getting closer to the end. 12 | Line 11 continues. 13 | Line 12 is almost there. 14 | Line 13 is near the end. 15 | Line 14 is the penultimate line. 16 | Line 15 is the last line. 17 | 18 | ## L454 19 | 20 | This is a section with a name that looks like a line reference but the generated id uses lowercase 'l'. 21 | Since we're using lowercase 'l', this won't be interpreted as a line reference pattern. 22 | This section will be found by the normal section lookup mechanism. 23 | -------------------------------------------------------------------------------- /test/fixtures/markdown/line-references/line-reference-test.md: -------------------------------------------------------------------------------- 1 | # Line Reference Test 2 | 3 | This Markdown file tests GitHub-style line references. 4 | 5 | ## Valid Line References 6 | 7 | - [Link to Line 3](line-file.md#L3) - This should work 8 | - [Link to Line Range 5-8](line-file.md#L5-L8) - This should work too 9 | 10 | ## Invalid Line References 11 | 12 | - [Link to Line 25](line-file.md#L25) - This should fail (file only has 15 lines) 13 | - [Link to Line Range 4-30](line-file.md#L4-L30) - This should fail (range exceeds file line count) 14 | 15 | ## Section with L-Prefix 16 | 17 | - [Link to L454 Section](line-file.md#l454) - This should work (L454 is a section name, not a line reference) 18 | -------------------------------------------------------------------------------- /test/fixtures/markdown/line-references/line-references.test.js: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | import { linkspector } from './linkspector.js' 3 | 4 | let cmd = { 5 | json: true, 6 | } 7 | 8 | test('linkspector should correctly handle GitHub-style line reference links', async () => { 9 | let hasErrorLinks = false 10 | let currentFile = '' 11 | let results = [] 12 | 13 | for await (const { file, result } of linkspector( 14 | './test/fixtures/markdown/line-references/.lineReferencesTest.yml', 15 | cmd 16 | )) { 17 | currentFile = file 18 | for (const linkStatusObj of result) { 19 | if (cmd.json) { 20 | results.push({ 21 | file: currentFile, 22 | link: linkStatusObj.link, 23 | status_code: linkStatusObj.status_code, 24 | line_number: linkStatusObj.line_number, 25 | position: linkStatusObj.position, 26 | status: linkStatusObj.status, 27 | error_message: linkStatusObj.error_message, 28 | }) 29 | } 30 | if (linkStatusObj.status === 'error') { 31 | hasErrorLinks = true 32 | } 33 | } 34 | } 35 | 36 | // There should be 5 links total (3 valid, 2 invalid) 37 | expect(results.length).toBe(5) 38 | 39 | // The first link should be valid (within file line count) 40 | expect(results[0].link).toBe('line-file.md#L3') 41 | expect(results[0].status).toBe('alive') 42 | 43 | // The second link should be valid (within range) 44 | expect(results[1].link).toBe('line-file.md#L5-L8') 45 | expect(results[1].status).toBe('alive') 46 | 47 | // The third link should be invalid (line number beyond file length) 48 | expect(results[2].link).toBe('line-file.md#L25') 49 | expect(results[2].status).toBe('error') 50 | expect(results[2].error_message).toContain('Cannot find Line 25') 51 | 52 | // The fourth link should be invalid (range beyond file length) 53 | expect(results[3].link).toBe('line-file.md#L4-L30') 54 | expect(results[3].status).toBe('error') 55 | expect(results[3].error_message).toContain('Cannot find Line 30') 56 | 57 | // The fifth link should be valid (points to a lowercase l454 section name) 58 | expect(results[4].link).toBe('line-file.md#l454') 59 | expect(results[4].status).toBe('alive') 60 | 61 | // Overall status should indicate errors 62 | expect(hasErrorLinks).toBe(true) 63 | }) 64 | -------------------------------------------------------------------------------- /test/fixtures/markdown/relative/.relativeTest.yml: -------------------------------------------------------------------------------- 1 | dirs: 2 | - ./test/fixtures/markdown/relative 3 | -------------------------------------------------------------------------------- /test/fixtures/markdown/relative/markdown-relative.test.js: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | import { linkspector } from './linkspector.js' 3 | 4 | let cmd = { 5 | json: true, 6 | } 7 | 8 | test('linkspector should check relative links in Markdown file', async () => { 9 | let hasErrorLinks = false 10 | let currentFile = '' // Variable to store the current file name 11 | let results = [] // Array to store the results if json is true 12 | 13 | for await (const { file, result } of linkspector( 14 | './test/fixtures/markdown/relative/.relativeTest.yml', 15 | cmd 16 | )) { 17 | currentFile = file 18 | for (const linkStatusObj of result) { 19 | if (cmd.json) { 20 | results.push({ 21 | file: currentFile, 22 | link: linkStatusObj.link, 23 | status_code: linkStatusObj.status_code, 24 | line_number: linkStatusObj.line_number, 25 | position: linkStatusObj.position, 26 | status: linkStatusObj.status, 27 | error_message: linkStatusObj.error_message, 28 | }) 29 | } 30 | if (linkStatusObj.status === 'error') { 31 | hasErrorLinks = true 32 | } 33 | } 34 | } 35 | 36 | expect(hasErrorLinks).toBe(true) 37 | expect(results.length).toBe(8) 38 | expect(results[0].status).toBe('alive') 39 | expect(results[1].status).toBe('alive') 40 | expect(results[2].status).toBe('alive') 41 | expect(results[3].status).toBe('alive') 42 | expect(results[4].status).toBe('alive') 43 | expect(results[5].status).toBe('alive') 44 | expect(results[6].status).toBe('error') 45 | expect(results[7].status).toBe('error') 46 | }) 47 | -------------------------------------------------------------------------------- /test/fixtures/markdown/relative/relative1.md: -------------------------------------------------------------------------------- 1 | # Relative 1 Heading Level One 2 | 3 | This is a paragraph in the first file in a first level heading. 4 | 5 | [Link to Relative 1 Heading Level Two](#relative-1-heading-level-two) 6 | 7 | ## Relative 1 Heading Level Two 8 | 9 | This is a paragraph in the first file in a second level heading. 10 | 11 | [Link to Relative 2 Heading Level Two](relative2.md#custom-id-level-two) 12 | 13 | ### Relative 1 Heading Level Three 14 | 15 | This is a paragraph in the first file in a third level heading. 16 | 17 | [Link to Relative 2 Heading Level Three](relative2.md#custom-id-level-three) 18 | 19 | #### Relative 1 Heading Level Four 20 | 21 | This is a paragraph in the first file in a fourth level heading. 22 | 23 | [Link to Relative 3 Broken link](relative3.md#relative-3-heading-level-one) 24 | 25 | ##### Relative 1 Heading Level Five 26 | 27 | [Link to broken section](#broken-section) 28 | -------------------------------------------------------------------------------- /test/fixtures/markdown/relative/relative2.md: -------------------------------------------------------------------------------- 1 | # Relative 2 Heading Level One 2 | 3 | This is a paragraph in the first file in a first level heading. 4 | 5 | [Link to Relative 1 Heading Level One](relative1.md#relative-1-heading-level-one) 6 | 7 | ## Relative 2 Heading Level Two 8 | 9 | This is a paragraph in the first file in a second level heading. 10 | 11 | [Link to Relative 1 Heading Level Two](relative1.md#relative-1-heading-level-two) 12 | 13 | ### Relative 2 Heading Level Three {#custom-id-level-three} 14 | 15 | This is a paragraph in the first file in a third level heading. 16 | 17 | [Link to Relative 1 Heading Level Three](relative1.md#relative-1-heading-level-three) 18 | -------------------------------------------------------------------------------- /test/fixtures/markdown/with-html-anchors-id/.withHtmlAnchorsIdTest.yml: -------------------------------------------------------------------------------- 1 | dirs: 2 | - ./test/fixtures/markdown/with-html-anchors-id 3 | -------------------------------------------------------------------------------- /test/fixtures/markdown/with-html-anchors-id/html-anchor-id.md: -------------------------------------------------------------------------------- 1 | # This is heading 1 2 | 3 | This is a paragraph in the first file in a first level heading. 4 | 5 | Anchor with `id` 6 | 7 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla vel mauris sit amet ipsum venenatis placerat. 8 | 9 | Link to anchor with `id` [Link to custom id with id](#custom-id-with-id). 10 | -------------------------------------------------------------------------------- /test/fixtures/markdown/with-html-anchors-id/markdown-with-html-anchors-id.test.js: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | import { linkspector } from './linkspector.js' 3 | 4 | let cmd = { 5 | json: true, 6 | } 7 | 8 | test('linkspector should check HTML encoded section links using ID attribute', async () => { 9 | let hasErrorLinks = false 10 | let currentFile = '' // Variable to store the current file name 11 | let results = [] // Array to store the results if json is true 12 | 13 | for await (const { file, result } of linkspector( 14 | './test/fixtures/markdown/with-html-anchors-id/.withHtmlAnchorsIdTest.yml', 15 | cmd 16 | )) { 17 | currentFile = file 18 | for (const linkStatusObj of result) { 19 | if (cmd.json) { 20 | results.push({ 21 | file: currentFile, 22 | link: linkStatusObj.link, 23 | status_code: linkStatusObj.status_code, 24 | line_number: linkStatusObj.line_number, 25 | position: linkStatusObj.position, 26 | status: linkStatusObj.status, 27 | error_message: linkStatusObj.error_message, 28 | }) 29 | } 30 | if (linkStatusObj.status === 'error') { 31 | hasErrorLinks = true 32 | } 33 | } 34 | } 35 | 36 | // Test expectations for link checks 37 | expect(hasErrorLinks).toBe(false) 38 | expect(results.length).toBe(1) 39 | expect(results[0].status).toBe('alive') 40 | }) 41 | -------------------------------------------------------------------------------- /test/fixtures/markdown/with-html-anchors/.withHtmlAnchorsTest.yml: -------------------------------------------------------------------------------- 1 | dirs: 2 | - ./test/fixtures/markdown/with-html-anchors 3 | -------------------------------------------------------------------------------- /test/fixtures/markdown/with-html-anchors/html-anchor.md: -------------------------------------------------------------------------------- 1 | # This is heading 1 2 | 3 | This is a paragraph in the first file in a first level heading. 4 | 5 | Anchor with `a` 6 | 7 | Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nulla vel mauris sit amet ipsum venenatis placerat. 8 | 9 | Link to anchor with `a` [Link to custom id level one](#custom-id-level-one). 10 | -------------------------------------------------------------------------------- /test/fixtures/markdown/with-html-anchors/markdown-with-html-anchors.test.js: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | import { linkspector } from './linkspector.js' 3 | 4 | let cmd = { 5 | json: true, 6 | } 7 | 8 | test('linkspector should check HTML encoded section links and include anchor names', async () => { 9 | let hasErrorLinks = false 10 | let currentFile = '' // Variable to store the current file name 11 | let results = [] // Array to store the results if json is true 12 | 13 | for await (const { file, result } of linkspector( 14 | './test/fixtures/markdown/with-html-anchors/.withHtmlAnchorsTest.yml', 15 | cmd 16 | )) { 17 | currentFile = file 18 | for (const linkStatusObj of result) { 19 | if (cmd.json) { 20 | results.push({ 21 | file: currentFile, 22 | link: linkStatusObj.link, 23 | status_code: linkStatusObj.status_code, 24 | line_number: linkStatusObj.line_number, 25 | position: linkStatusObj.position, 26 | status: linkStatusObj.status, 27 | error_message: linkStatusObj.error_message, 28 | }) 29 | } 30 | if (linkStatusObj.status === 'error') { 31 | hasErrorLinks = true 32 | } 33 | } 34 | } 35 | 36 | // Test expectations for link checks 37 | expect(hasErrorLinks).toBe(false) 38 | expect(results.length).toBe(1) 39 | expect(results[0].status).toBe('alive') 40 | }) 41 | -------------------------------------------------------------------------------- /test/fixtures/patterns/patterns.md: -------------------------------------------------------------------------------- 1 | # Test Patterns 2 | 3 | ## Links that should be ignored 4 | 5 | - [Ignored Link 1](https://ignored-domain.example.com/page1) 6 | - [Ignored Link 2](https://ignored-domain.example.com/page2) 7 | - [Ignored Link 3](https://another-ignored.example.com/test) 8 | 9 | ## Links that should be replaced 10 | 11 | - [Replace Example 1](https://example.com/old/path1) 12 | - [Replace Example 2](https://example.com/old/path2) 13 | - [Replace Example 3](https://replace-domain.example.com/path3) 14 | 15 | ## Normal links that should be checked 16 | 17 | - [Google](https://www.google.com) 18 | - [GitHub](https://github.com) 19 | -------------------------------------------------------------------------------- /test/fixtures/patterns/patterns.test.js: -------------------------------------------------------------------------------- 1 | import { expect, test } from 'vitest' 2 | import { linkspector } from './linkspector.js' 3 | 4 | let cmd = { 5 | json: true, 6 | } 7 | 8 | test('linkspector should correctly apply ignorePatterns and replacementPatterns', async () => { 9 | let currentFile = '' 10 | let results = [] 11 | 12 | for await (const { file, result } of linkspector( 13 | './test/fixtures/patterns/patternsTest.yml', 14 | cmd 15 | )) { 16 | currentFile = file 17 | for (const linkStatusObj of result) { 18 | results.push({ 19 | file: currentFile, 20 | link: linkStatusObj.link, 21 | status_code: linkStatusObj.status_code, 22 | line_number: linkStatusObj.line_number, 23 | position: linkStatusObj.position, 24 | status: linkStatusObj.status, 25 | error_message: linkStatusObj.error_message, 26 | }) 27 | } 28 | } 29 | 30 | // Test expectations for pattern checks 31 | 32 | // 1. Check that ignored links are not in the results 33 | const ignoredLinks = [ 34 | 'https://ignored-domain.example.com/page1', 35 | 'https://ignored-domain.example.com/page2', 36 | 'https://another-ignored.example.com/test', 37 | ] 38 | 39 | ignoredLinks.forEach((link) => { 40 | expect(results.find((r) => r.link === link)).toBeUndefined() 41 | }) 42 | 43 | // 2. Check that replacement patterns were applied 44 | expect( 45 | results.find((r) => r.link === 'https://example.com/new/path1') 46 | ).toBeDefined() 47 | expect( 48 | results.find((r) => r.link === 'https://example.com/new/path2') 49 | ).toBeDefined() 50 | expect( 51 | results.find((r) => r.link === 'https://new-domain.example.com/path3') 52 | ).toBeDefined() 53 | 54 | // 3. Confirm original links (before replacement) are not in the results 55 | expect( 56 | results.find((r) => r.link === 'https://example.com/old/path1') 57 | ).toBeUndefined() 58 | expect( 59 | results.find((r) => r.link === 'https://example.com/old/path2') 60 | ).toBeUndefined() 61 | expect( 62 | results.find((r) => r.link === 'https://replace-domain.example.com/path3') 63 | ).toBeUndefined() 64 | 65 | // 4. Check that normal links are still being checked 66 | expect(results.find((r) => r.link === 'https://www.google.com')).toBeDefined() 67 | expect(results.find((r) => r.link === 'https://github.com')).toBeDefined() 68 | 69 | // Total number of links should be 5 (2 normal + 3 replaced) 70 | expect(results.length).toBe(5) 71 | }, 10000) 72 | -------------------------------------------------------------------------------- /test/fixtures/patterns/patternsTest.yml: -------------------------------------------------------------------------------- 1 | dirs: 2 | - ./test/fixtures/patterns 3 | ignorePatterns: 4 | - pattern: '^https://ignored-domain.example.com/.*$' 5 | - pattern: '^https://another-ignored.example.com/.*$' 6 | replacementPatterns: 7 | - pattern: 'https://example.com/old/(.*)' 8 | replacement: 'https://example.com/new/$1' 9 | - pattern: 'https://replace-domain.example.com/(.*)' 10 | replacement: 'https://new-domain.example.com/$1' 11 | -------------------------------------------------------------------------------- /test/fixtures/redirects/config-redirects-false.yml: -------------------------------------------------------------------------------- 1 | files: 2 | - test/fixtures/redirects/redirects.md 3 | followRedirects: false 4 | -------------------------------------------------------------------------------- /test/fixtures/redirects/config-redirects-true.yml: -------------------------------------------------------------------------------- 1 | files: 2 | - test/fixtures/redirects/redirects.md 3 | # followRedirects is true by default, so we can omit it or set it explicitly. 4 | # For clarity in testing, we can omit it to test the default behavior. 5 | -------------------------------------------------------------------------------- /test/fixtures/redirects/redirects.md: -------------------------------------------------------------------------------- 1 | # Redirect Test Links 2 | 3 | This file contains links for testing the followRedirects feature. 4 | 5 | ## Scenario 1 & 2: Redirecting Link 6 | 7 | [Permanent Redirect (301)](http://localhost:3000/redirect-permanent) 8 | [Temporary Redirect (302)](http://localhost:3000/redirect-temporary) 9 | 10 | ## Scenario 3 & 4: Non-Redirecting Link 11 | 12 | [Direct OK (200)](http://localhost:3000/ok) 13 | 14 | ## Scenario 5: Error Link 15 | 16 | [Not Found (404)](http://localhost:3000/not-found) 17 | 18 | ## Scenario: Redirect to external allowed 19 | 20 | [External Redirect (301)](http://localhost:3000/redirect-external) 21 | [Final External Destination](https://example.com) 22 | 23 | ## Scenario: Redirect loop 24 | 25 | [Redirect Loop](http://localhost:3000/redirect-loop1) 26 | -------------------------------------------------------------------------------- /test/fixtures/redirects/redirects.test.js: -------------------------------------------------------------------------------- 1 | import { describe, it, expect, beforeAll, afterAll } from 'vitest' 2 | import { linkspector } from '../../../linkspector.js' // Import from root linkspector.js 3 | import path from 'path' 4 | import http from 'http' 5 | import { fileURLToPath } from 'url' 6 | 7 | const __filename = fileURLToPath(import.meta.url) 8 | const __dirname = path.dirname(__filename) 9 | 10 | const fixturesDir = path.join(__dirname) 11 | // const markdownFile = path.join(fixturesDir, 'redirects.md') // No longer directly passed 12 | const configFileFollowFalse = path.join( 13 | fixturesDir, 14 | 'config-redirects-false.yml' 15 | ) 16 | const configFileFollowTrue = path.join(fixturesDir, 'config-redirects-true.yml') 17 | 18 | let server 19 | 20 | const PORT = 3000 21 | const HOST = 'localhost' 22 | 23 | const serverHandler = (req, res) => { 24 | if (req.url === '/redirect-permanent') { 25 | res.writeHead(301, { Location: `http://${HOST}:${PORT}/final-destination` }) 26 | res.end() 27 | } else if (req.url === '/redirect-temporary') { 28 | res.writeHead(302, { Location: `http://${HOST}:${PORT}/final-destination` }) 29 | res.end() 30 | } else if (req.url === '/final-destination') { 31 | res.writeHead(200, { 'Content-Type': 'text/plain' }) 32 | res.end('Final Destination Reached') 33 | } else if (req.url === '/ok') { 34 | res.writeHead(200, { 'Content-Type': 'text/plain' }) 35 | res.end('OK') 36 | } else if (req.url === '/redirect-external') { 37 | res.writeHead(301, { Location: 'https://example.com' }) 38 | res.end() 39 | } else if (req.url === '/redirect-loop1') { 40 | res.writeHead(302, { Location: `http://${HOST}:${PORT}/redirect-loop2` }) 41 | res.end() 42 | } else if (req.url === '/redirect-loop2') { 43 | res.writeHead(302, { Location: `http://${HOST}:${PORT}/redirect-loop1` }) 44 | res.end() 45 | } else { 46 | res.writeHead(404, { 'Content-Type': 'text/plain' }) 47 | res.end('Not Found') 48 | } 49 | } 50 | 51 | describe('followRedirects feature', () => { 52 | beforeAll(async () => { 53 | server = http.createServer(serverHandler) 54 | await new Promise((resolve) => server.listen(PORT, HOST, resolve)) 55 | }) 56 | 57 | afterAll(async () => { 58 | await new Promise((resolve) => server.close(resolve)) 59 | }) 60 | 61 | // Scenario 1: followRedirects: true (default) - Permanent Redirect (301) 62 | it('should report a permanent redirecting link as alive (200) when followRedirects is true (default)', async () => { 63 | const resultsAsync = linkspector(configFileFollowTrue, {}) 64 | const collectedResults = [] 65 | for await (const item of resultsAsync) { 66 | collectedResults.push(...item.result) 67 | } 68 | const redirectLink = collectedResults.find( 69 | (r) => r.link === `http://${HOST}:${PORT}/redirect-permanent` 70 | ) 71 | expect(redirectLink.status).toBe('alive') 72 | expect(redirectLink.status_code).toBe(200) // Final destination 73 | expect(redirectLink.error_message).toContain('redirected to') 74 | }, 10000) // Increased timeout to 10 seconds 75 | 76 | // Scenario 1 (bis): followRedirects: true (default) - Temporary Redirect (302) 77 | it('should report a temporary redirecting link as alive (200) when followRedirects is true (default)', async () => { 78 | const resultsAsync = linkspector(configFileFollowTrue, {}) 79 | const collectedResults = [] 80 | for await (const item of resultsAsync) { 81 | collectedResults.push(...item.result) 82 | } 83 | const redirectLink = collectedResults.find( 84 | (r) => r.link === `http://${HOST}:${PORT}/redirect-temporary` 85 | ) 86 | expect(redirectLink.status).toBe('alive') 87 | expect(redirectLink.status_code).toBe(200) // Final destination 88 | expect(redirectLink.error_message).toContain('redirected to') 89 | }, 10000) // Increased timeout 90 | 91 | // Scenario 2: followRedirects: false - Permanent Redirect (301) 92 | it('should report a permanent redirecting link as error (301) when followRedirects is false', async () => { 93 | const resultsAsync = linkspector(configFileFollowFalse, {}) 94 | const collectedResults = [] 95 | for await (const item of resultsAsync) { 96 | collectedResults.push(...item.result) 97 | } 98 | const redirectLink = collectedResults.find( 99 | (r) => r.link === `http://${HOST}:${PORT}/redirect-permanent` 100 | ) 101 | expect(redirectLink.status).toBe('error') 102 | expect(redirectLink.status_code).toBe(301) 103 | expect(redirectLink.error_message).toMatch( 104 | /redirected.*followRedirects is set to false/i 105 | ) 106 | }) 107 | 108 | // Scenario 2 (bis): followRedirects: false - Temporary Redirect (302) 109 | it('should report a temporary redirecting link as error (302) when followRedirects is false', async () => { 110 | const resultsAsync = linkspector(configFileFollowFalse, {}) 111 | const collectedResults = [] 112 | for await (const item of resultsAsync) { 113 | collectedResults.push(...item.result) 114 | } 115 | const redirectLink = collectedResults.find( 116 | (r) => r.link === `http://${HOST}:${PORT}/redirect-temporary` 117 | ) 118 | expect(redirectLink.status).toBe('error') 119 | expect(redirectLink.status_code).toBe(302) 120 | expect(redirectLink.error_message).toMatch( 121 | /redirected.*followRedirects is set to false/i 122 | ) 123 | }) 124 | 125 | // Scenario 3: Non-redirecting link with followRedirects: false 126 | it('should report a non-redirecting link as alive (200) when followRedirects is false', async () => { 127 | const resultsAsync = linkspector(configFileFollowFalse, {}) 128 | const collectedResults = [] 129 | for await (const item of resultsAsync) { 130 | collectedResults.push(...item.result) 131 | } 132 | const okLink = collectedResults.find( 133 | (r) => r.link === `http://${HOST}:${PORT}/ok` 134 | ) 135 | expect(okLink.status).toBe('alive') 136 | expect(okLink.status_code).toBe(200) 137 | }) 138 | 139 | // Scenario 4: Non-redirecting link with followRedirects: true (default) 140 | it('should report a non-redirecting link as alive (200) when followRedirects is true (default)', async () => { 141 | const resultsAsync = linkspector(configFileFollowTrue, {}) 142 | const collectedResults = [] 143 | for await (const item of resultsAsync) { 144 | collectedResults.push(...item.result) 145 | } 146 | const okLink = collectedResults.find( 147 | (r) => r.link === `http://${HOST}:${PORT}/ok` 148 | ) 149 | expect(okLink.status).toBe('alive') 150 | expect(okLink.status_code).toBe(200) 151 | }) 152 | 153 | // Scenario 5: Link that results in an actual error (404) with followRedirects: false 154 | it('should report a 404 link as error (404) when followRedirects is false', async () => { 155 | const resultsAsync = linkspector(configFileFollowFalse, {}) 156 | const collectedResults = [] 157 | for await (const item of resultsAsync) { 158 | collectedResults.push(...item.result) 159 | } 160 | const notFoundLink = collectedResults.find( 161 | (r) => r.link === `http://${HOST}:${PORT}/not-found` 162 | ) 163 | expect(notFoundLink.status).toBe('error') 164 | expect(notFoundLink.status_code).toBe(404) 165 | }) 166 | 167 | // Scenario: Link that results in an actual error (404) with followRedirects: true (default) 168 | it('should report a 404 link as error (404) when followRedirects is true (default)', async () => { 169 | const resultsAsync = linkspector(configFileFollowTrue, {}) 170 | const collectedResults = [] 171 | for await (const item of resultsAsync) { 172 | collectedResults.push(...item.result) 173 | } 174 | const notFoundLink = collectedResults.find( 175 | (r) => r.link === `http://${HOST}:${PORT}/not-found` 176 | ) 177 | expect(notFoundLink.status).toBe('error') 178 | expect(notFoundLink.status_code).toBe(404) 179 | }) 180 | 181 | // Scenario: External redirect allowed when followRedirects is true 182 | it('should report an external redirecting link as alive (200 from example.com) when followRedirects is true', async () => { 183 | const resultsAsync = linkspector(configFileFollowTrue, {}) 184 | const collectedResults = [] 185 | for await (const item of resultsAsync) { 186 | collectedResults.push(...item.result) 187 | } 188 | const externalRedirectLink = collectedResults.find( 189 | (r) => r.link === `http://${HOST}:${PORT}/redirect-external` 190 | ) 191 | expect(externalRedirectLink.status).toBe('alive') 192 | // Note: status code might be from the final destination (example.com) if HEAD request works, 193 | // or could be tricky if example.com blocks HEAD. Puppeteer fallback should handle it. 194 | // For now, checking for 'alive' is the primary goal. 195 | // expect(externalRedirectLink.status_code).toBe(200) // This can be flaky with external sites 196 | expect(externalRedirectLink.error_message).toContain( 197 | 'redirected to https://example.com' 198 | ) 199 | }, 10000) // Increased timeout 200 | 201 | // Scenario: External redirect disallowed when followRedirects is false 202 | it('should report an external redirecting link as error (301) when followRedirects is false', async () => { 203 | const resultsAsync = linkspector(configFileFollowFalse, {}) 204 | const collectedResults = [] 205 | for await (const item of resultsAsync) { 206 | collectedResults.push(...item.result) 207 | } 208 | const externalRedirectLink = collectedResults.find( 209 | (r) => r.link === `http://${HOST}:${PORT}/redirect-external` 210 | ) 211 | expect(externalRedirectLink.status).toBe('error') 212 | expect(externalRedirectLink.status_code).toBe(301) 213 | expect(externalRedirectLink.error_message).toMatch( 214 | /redirected to https:\/\/example.com, but followRedirects is set to false/i 215 | ) 216 | }) 217 | 218 | // Scenario: Redirect loop when followRedirects is true (Puppeteer should eventually error out) 219 | it('should report a redirect loop as error when followRedirects is true', async () => { 220 | // This test might take a bit longer due to Puppeteer's retries for loops or timeouts 221 | const resultsAsync = linkspector(configFileFollowTrue, { 222 | aliveStatusCodes: [200], 223 | }) // Ensure only 200 is "assumed alive" 224 | const collectedResults = [] 225 | for await (const item of resultsAsync) { 226 | collectedResults.push(...item.result) 227 | } 228 | const loopLink = collectedResults.find( 229 | (r) => r.link === `http://${HOST}:${PORT}/redirect-loop1` 230 | ) 231 | expect(loopLink.status).toBe('error') 232 | // The error message might vary depending on how Puppeteer handles max redirects 233 | // e.g., "net::ERR_TOO_MANY_REDIRECTS" or similar 234 | expect(loopLink.error_message).toBeDefined() 235 | }, 20000) // Timeout already increased, keeping it 236 | 237 | // Scenario: Redirect loop when followRedirects is false 238 | it('should report a redirect loop as error (first redirect status) when followRedirects is false', async () => { 239 | const resultsAsync = linkspector(configFileFollowFalse, {}) 240 | const collectedResults = [] 241 | for await (const item of resultsAsync) { 242 | collectedResults.push(...item.result) 243 | } 244 | const loopLink = collectedResults.find( 245 | (r) => r.link === `http://${HOST}:${PORT}/redirect-loop1` 246 | ) 247 | expect(loopLink.status).toBe('error') 248 | expect(loopLink.status_code).toBe(302) // The first redirect in the loop 249 | expect(loopLink.error_message).toMatch( 250 | /redirected.*followRedirects is set to false/i 251 | ) 252 | }) 253 | }) 254 | -------------------------------------------------------------------------------- /vite.config.ts: -------------------------------------------------------------------------------- 1 | // vitest.config.js 2 | import { defineConfig } from 'vitest/config' 3 | 4 | export default defineConfig({ 5 | test: { 6 | // the global timeout in milliseconds 10 seconds 7 | testTimeout: 10000, 8 | }, 9 | }) 10 | --------------------------------------------------------------------------------