├── .editorconfig ├── .gitattributes ├── .github ├── dependabot.yml └── workflows │ ├── main.yml │ └── pull_request.yml ├── .gitignore ├── .npmrc ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── example.js ├── package.json ├── src └── index.js └── test ├── helpers.js ├── index.js ├── snapshots ├── index.js.md ├── index.js.snap ├── tags.js.md └── tags.js.snap ├── tags.js └── whitelist.js /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | indent_style = space 7 | indent_size = 2 8 | end_of_line = lf 9 | charset = utf-8 10 | trim_trailing_whitespace = true 11 | insert_final_newline = true 12 | max_line_length = 100 13 | indent_brace_style = 1TBS 14 | spaces_around_operators = true 15 | quote_type = auto 16 | 17 | [package.json] 18 | indent_style = space 19 | indent_size = 2 20 | 21 | [*.md] 22 | trim_trailing_whitespace = false 23 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: npm 4 | directory: '/' 5 | schedule: 6 | interval: daily 7 | - package-ecosystem: 'github-actions' 8 | directory: '/' 9 | schedule: 10 | # Check for updates to GitHub Actions every weekday 11 | interval: 'daily' 12 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: main 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | contributors: 10 | if: "${{ github.event.head_commit.message != 'build: contributors' }}" 11 | runs-on: ubuntu-latest 12 | steps: 13 | - name: Checkout 14 | uses: actions/checkout@v4 15 | with: 16 | fetch-depth: 0 17 | token: ${{ secrets.GITHUB_TOKEN }} 18 | - name: Setup Node.js 19 | uses: actions/setup-node@v4 20 | with: 21 | node-version: lts/* 22 | - name: Contributors 23 | run: | 24 | git config --global user.email ${{ secrets.GIT_EMAIL }} 25 | git config --global user.name ${{ secrets.GIT_USERNAME }} 26 | npm run contributors 27 | - name: Push changes 28 | run: | 29 | git push origin ${{ github.head_ref }} 30 | 31 | release: 32 | if: | 33 | !startsWith(github.event.head_commit.message, 'chore(release):') && 34 | !startsWith(github.event.head_commit.message, 'docs:') && 35 | !startsWith(github.event.head_commit.message, 'ci:') 36 | needs: [contributors] 37 | runs-on: ubuntu-latest 38 | steps: 39 | - name: Checkout 40 | uses: actions/checkout@v4 41 | with: 42 | token: ${{ secrets.GITHUB_TOKEN }} 43 | - name: Setup Node.js 44 | uses: actions/setup-node@v4 45 | with: 46 | node-version: lts/* 47 | - name: Setup PNPM 48 | uses: pnpm/action-setup@v4 49 | with: 50 | version: latest 51 | run_install: true 52 | - name: Test 53 | run: npm test 54 | - name: Report 55 | run: npx c8 report --reporter=text-lcov > coverage/lcov.info 56 | - name: Coverage 57 | uses: coverallsapp/github-action@main 58 | with: 59 | github-token: ${{ secrets.GITHUB_TOKEN }} 60 | - name: Release 61 | env: 62 | GH_TOKEN: ${{ secrets.GH_TOKEN }} 63 | NPM_TOKEN: ${{ secrets.NPM_TOKEN }} 64 | run: | 65 | git config --global user.email ${{ secrets.GIT_EMAIL }} 66 | git config --global user.name ${{ secrets.GIT_USERNAME }} 67 | git pull origin master 68 | npm run release 69 | -------------------------------------------------------------------------------- /.github/workflows/pull_request.yml: -------------------------------------------------------------------------------- 1 | name: pull_request 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | pull_request: 8 | branches: 9 | - master 10 | 11 | jobs: 12 | test: 13 | if: github.ref != 'refs/heads/master' 14 | runs-on: ubuntu-latest 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v4 18 | with: 19 | token: ${{ secrets.GITHUB_TOKEN }} 20 | - name: Setup Node.js 21 | uses: actions/setup-node@v4 22 | with: 23 | node-version: lts/* 24 | - name: Setup PNPM 25 | uses: pnpm/action-setup@v4 26 | with: 27 | version: latest 28 | run_install: true 29 | - name: Test 30 | run: npm test 31 | - name: Report 32 | run: npx c8 report --reporter=text-lcov > coverage/lcov.info 33 | - name: Coverage 34 | uses: coverallsapp/github-action@main 35 | with: 36 | github-token: ${{ secrets.GITHUB_TOKEN }} 37 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ############################ 2 | # npm 3 | ############################ 4 | node_modules 5 | npm-debug.log 6 | .node_history 7 | yarn.lock 8 | package-lock.json 9 | 10 | ############################ 11 | # tmp, editor & OS files 12 | ############################ 13 | .tmp 14 | *.swo 15 | *.swp 16 | *.swn 17 | *.swm 18 | .DS_Store 19 | *# 20 | *~ 21 | .idea 22 | *sublime* 23 | nbproject 24 | 25 | ############################ 26 | # Tests 27 | ############################ 28 | testApp 29 | coverage 30 | .nyc_output 31 | 32 | ############################ 33 | # Other 34 | ############################ 35 | .envrc 36 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | unsafe-perm=true 2 | save-prefix=~ 3 | save=false 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | 3 | node_js: 4 | - lts/* 5 | - node 6 | 7 | after_success: npm run coverage 8 | 9 | stages: 10 | - Test 11 | if: branch = master AND commit_message !~ /(docs|release|no-release)/ 12 | - name: Release 13 | if: branch = master AND commit_message !~ /(docs|release|no-release)/ 14 | 15 | jobs: 16 | include: 17 | - stage: Release 18 | node_js: lts/* 19 | install: npm install --no-package-lock 20 | before_deploy: 21 | - git config user.email ${GITHUB_EMAIL:-"travis@travis-ci.org"} 22 | - git config user.name ${GITHUB_USER:-"Travis CI"} 23 | - git remote set-url origin https://${GH_TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git 24 | - git checkout master 25 | deploy: 26 | skip_cleanup: true 27 | provider: script 28 | script: npm run release 29 | on: 30 | branch: master -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines. 4 | 5 | ### 2.4.65 (2025-04-20) 6 | 7 | ### 2.4.64 (2025-04-20) 8 | 9 | ### 2.4.63 (2025-03-19) 10 | 11 | ### 2.4.62 (2025-01-13) 12 | 13 | ### 2.4.61 (2025-01-13) 14 | 15 | 16 | ### Bug Fixes 17 | 18 | * sort ([200c8c3](https://github.com/Kikobeats/html-urls/commit/200c8c350c899dc567bf30a7ca6d6861d668bdb3)) 19 | 20 | ### 2.4.60 (2024-08-12) 21 | 22 | ### 2.4.59 (2024-05-08) 23 | 24 | ### 2.4.58 (2024-02-26) 25 | 26 | ### 2.4.57 (2024-02-26) 27 | 28 | 29 | ### Bug Fixes 30 | 31 | * test glob ([0cab6bc](https://github.com/Kikobeats/html-urls/commit/0cab6bc9f9bb12eff4f77b45ff4075ae84a5da43)) 32 | 33 | ### 2.4.56 (2024-02-12) 34 | 35 | ### 2.4.55 (2024-01-01) 36 | 37 | ### 2.4.54 (2023-12-21) 38 | 39 | ### 2.4.53 (2023-12-07) 40 | 41 | ### 2.4.52 (2023-11-24) 42 | 43 | ### 2.4.51 (2023-11-11) 44 | 45 | ### 2.4.50 (2023-10-24) 46 | 47 | ### 2.4.49 (2023-09-19) 48 | 49 | ### 2.4.48 (2023-09-05) 50 | 51 | ### 2.4.47 (2023-08-17) 52 | 53 | ### 2.4.46 (2023-08-10) 54 | 55 | ### 2.4.45 (2023-06-03) 56 | 57 | ### 2.4.44 (2023-05-01) 58 | 59 | ### 2.4.43 (2023-05-01) 60 | 61 | ### 2.4.42 (2023-03-30) 62 | 63 | ### 2.4.41 (2023-01-04) 64 | 65 | ### 2.4.40 (2022-11-22) 66 | 67 | ### 2.4.39 (2022-09-26) 68 | 69 | ### 2.4.38 (2022-08-15) 70 | 71 | ### 2.4.37 (2022-05-29) 72 | 73 | ### 2.4.36 (2022-05-17) 74 | 75 | ### 2.4.35 (2022-04-28) 76 | 77 | 78 | ### Bug Fixes 79 | 80 | * omit test helpers ([db10c7c](https://github.com/Kikobeats/html-urls/commit/db10c7ca14efa5e457a932bc4ef742354f8e7ab7)) 81 | 82 | ### 2.4.34 (2022-04-04) 83 | 84 | ### 2.4.33 (2022-03-28) 85 | 86 | ### 2.4.32 (2022-03-14) 87 | 88 | ### 2.4.31 (2022-03-14) 89 | 90 | ### 2.4.30 (2022-03-02) 91 | 92 | ### 2.4.29 (2022-02-25) 93 | 94 | ### 2.4.28 (2022-02-25) 95 | 96 | ### 2.4.27 (2021-10-25) 97 | 98 | ### 2.4.26 (2021-08-03) 99 | 100 | ### 2.4.25 (2021-08-03) 101 | 102 | ### 2.4.24 (2021-07-26) 103 | 104 | ### [2.4.23](https://github.com/Kikobeats/html-urls/compare/v2.4.22...v2.4.23) (2021-05-31) 105 | 106 | ### [2.4.22](https://github.com/Kikobeats/html-urls/compare/v2.4.21...v2.4.22) (2021-03-10) 107 | 108 | ### [2.4.21](https://github.com/Kikobeats/html-urls/compare/v2.4.20...v2.4.21) (2021-03-02) 109 | 110 | ### [2.4.20](https://github.com/Kikobeats/html-urls/compare/v2.4.19...v2.4.20) (2021-02-08) 111 | 112 | ### [2.4.19](https://github.com/Kikobeats/html-urls/compare/v2.4.18...v2.4.19) (2020-12-23) 113 | 114 | ### [2.4.18](https://github.com/Kikobeats/html-urls/compare/v2.4.17...v2.4.18) (2020-12-17) 115 | 116 | ### [2.4.17](https://github.com/Kikobeats/html-urls/compare/v2.4.16...v2.4.17) (2020-12-04) 117 | 118 | ### [2.4.16](https://github.com/Kikobeats/html-urls/compare/v2.4.15...v2.4.16) (2020-11-10) 119 | 120 | ### [2.4.15](https://github.com/Kikobeats/html-urls/compare/v2.4.14...v2.4.15) (2020-10-12) 121 | 122 | ### [2.4.14](https://github.com/Kikobeats/html-urls/compare/v2.4.13...v2.4.14) (2020-08-11) 123 | 124 | ### [2.4.13](https://github.com/Kikobeats/html-urls/compare/v2.4.12...v2.4.13) (2020-07-29) 125 | 126 | ### [2.4.12](https://github.com/Kikobeats/html-urls/compare/v2.4.11...v2.4.12) (2020-07-09) 127 | 128 | ### [2.4.11](https://github.com/Kikobeats/html-urls/compare/v2.4.10...v2.4.11) (2020-04-27) 129 | 130 | ### [2.4.10](https://github.com/Kikobeats/html-urls/compare/v2.4.9...v2.4.10) (2020-03-05) 131 | 132 | ### [2.4.9](https://github.com/Kikobeats/html-urls/compare/v2.4.8...v2.4.9) (2020-02-14) 133 | 134 | ### [2.4.8](https://github.com/Kikobeats/html-urls/compare/v2.4.7...v2.4.8) (2020-02-06) 135 | 136 | ### [2.4.7](https://github.com/Kikobeats/html-urls/compare/v2.4.6...v2.4.7) (2020-02-04) 137 | 138 | ### [2.4.6](https://github.com/Kikobeats/html-urls/compare/v2.4.5...v2.4.6) (2020-02-04) 139 | 140 | ### [2.4.5](https://github.com/Kikobeats/html-urls/compare/v2.4.4...v2.4.5) (2020-01-16) 141 | 142 | ### [2.4.4](https://github.com/Kikobeats/html-urls/compare/v2.4.3...v2.4.4) (2020-01-08) 143 | 144 | ### [2.4.3](https://github.com/Kikobeats/html-urls/compare/v2.4.2...v2.4.3) (2019-11-19) 145 | 146 | ### [2.4.2](https://github.com/Kikobeats/html-urls/compare/v2.4.1...v2.4.2) (2019-11-11) 147 | 148 | ### [2.4.1](https://github.com/Kikobeats/html-urls/compare/v2.4.0...v2.4.1) (2019-09-25) 149 | 150 | 151 | ### Bug Fixes 152 | 153 | * test ([5e56f83](https://github.com/Kikobeats/html-urls/commit/5e56f83)) 154 | 155 | ## [2.4.0](https://github.com/Kikobeats/html-urls/compare/v2.3.16...v2.4.0) (2019-09-24) 156 | 157 | 158 | ### Features 159 | 160 | * add uri support ([a629c99](https://github.com/Kikobeats/html-urls/commit/a629c99)) 161 | 162 | ### [2.3.16](https://github.com/Kikobeats/html-urls/compare/v2.3.15...v2.3.16) (2019-09-24) 163 | 164 | ### [2.3.15](https://github.com/Kikobeats/html-urls/compare/v2.3.14...v2.3.15) (2019-09-13) 165 | 166 | 167 | ### Bug Fixes 168 | 169 | * params sort ([cd6e1e8](https://github.com/Kikobeats/html-urls/commit/cd6e1e8)) 170 | 171 | ### [2.3.14](https://github.com/Kikobeats/html-urls/compare/v2.3.13...v2.3.14) (2019-07-11) 172 | 173 | 174 | 175 | ### [2.3.13](https://github.com/Kikobeats/html-urls/compare/v2.3.12...v2.3.13) (2019-07-04) 176 | 177 | 178 | 179 | ### [2.3.12](https://github.com/Kikobeats/html-urls/compare/v2.3.11...v2.3.12) (2019-06-20) 180 | 181 | 182 | ### Bug Fixes 183 | 184 | * **package:** update @metascraper/helpers to version 5.5.0 ([d34e750](https://github.com/Kikobeats/html-urls/commit/d34e750)) 185 | 186 | 187 | 188 | ### [2.3.11](https://github.com/Kikobeats/html-urls/compare/v2.3.10...v2.3.11) (2019-06-19) 189 | 190 | 191 | ### Build System 192 | 193 | * update meta ([039f381](https://github.com/Kikobeats/html-urls/commit/039f381)) 194 | * update travis ([c742644](https://github.com/Kikobeats/html-urls/commit/c742644)) 195 | 196 | 197 | 198 | ### [2.3.10](https://github.com/Kikobeats/html-urls/compare/v2.3.9...v2.3.10) (2019-05-31) 199 | 200 | 201 | ### Bug Fixes 202 | 203 | * **package:** update @metascraper/helpers to version 5.4.0 ([17e3c0d](https://github.com/Kikobeats/html-urls/commit/17e3c0d)) 204 | 205 | 206 | 207 | ### [2.3.9](https://github.com/Kikobeats/html-urls/compare/v2.3.8...v2.3.9) (2019-05-20) 208 | 209 | 210 | ### Build System 211 | 212 | * change git-authors-cli position ([ded4a00](https://github.com/Kikobeats/html-urls/commit/ded4a00)) 213 | 214 | 215 | 216 | ### [2.3.8](https://github.com/Kikobeats/html-urls/compare/v2.3.7...v2.3.8) (2019-05-20) 217 | 218 | 219 | ### Bug Fixes 220 | 221 | * **package:** update is-url-http to version 1.2.0 ([386be98](https://github.com/Kikobeats/html-urls/commit/386be98)) 222 | 223 | 224 | 225 | ### [2.3.7](https://github.com/Kikobeats/html-urls/compare/v2.3.6...v2.3.7) (2019-05-15) 226 | 227 | 228 | ### Bug Fixes 229 | 230 | * **package:** update @metascraper/helpers to version 5.3.0 ([2fdb7da](https://github.com/Kikobeats/html-urls/commit/2fdb7da)) 231 | 232 | 233 | 234 | ## [2.3.6](https://github.com/Kikobeats/html-urls/compare/v2.3.5...v2.3.6) (2019-05-05) 235 | 236 | 237 | ### Bug Fixes 238 | 239 | * **package:** update @metascraper/helpers to version 5.2.0 ([32f40a5](https://github.com/Kikobeats/html-urls/commit/32f40a5)) 240 | 241 | 242 | 243 | ## [2.3.5](https://github.com/Kikobeats/html-urls/compare/v2.3.4...v2.3.5) (2019-04-22) 244 | 245 | 246 | ### Bug Fixes 247 | 248 | * **package:** update matcher to version 2.0.0 ([a51f3f5](https://github.com/Kikobeats/html-urls/commit/a51f3f5)) 249 | 250 | 251 | 252 | 253 | ## [2.3.4](https://github.com/Kikobeats/html-urls/compare/v2.3.3...v2.3.4) (2019-04-03) 254 | 255 | 256 | ### Bug Fixes 257 | 258 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 5.1.0 ([42ca806](https://github.com/Kikobeats/html-urls/commit/42ca806)) 259 | 260 | 261 | 262 | 263 | ## 2.3.3 (2019-03-17) 264 | 265 | 266 | ### Bug Fixes 267 | 268 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 5.0.0 ([28d133d](https://github.com/Kikobeats/html-urls/commit/28d133d)) 269 | 270 | 271 | 272 | 273 | ## 2.3.2 (2019-03-16) 274 | 275 | 276 | ### Bug Fixes 277 | 278 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.10.2 ([d3ecff4](https://github.com/Kikobeats/html-urls/commit/d3ecff4)) 279 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.8.3 ([2435919](https://github.com/Kikobeats/html-urls/commit/2435919)) 280 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.9.0 ([c6973ff](https://github.com/Kikobeats/html-urls/commit/c6973ff)) 281 | * **package:** update is-url-http to version 1.1.1 ([6685359](https://github.com/Kikobeats/html-urls/commit/6685359)) 282 | 283 | 284 | 285 | 286 | ## 2.3.1 (2018-11-16) 287 | 288 | 289 | ### Bug Fixes 290 | 291 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.5.5 ([9af4a4f](https://github.com/Kikobeats/html-urls/commit/9af4a4f)) 292 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.7.0 ([967577e](https://github.com/Kikobeats/html-urls/commit/967577e)) 293 | 294 | 295 | 296 | 297 | # 2.3.0 (2018-09-17) 298 | 299 | 300 | 301 | 302 | ## 2.2.1 (2018-09-16) 303 | 304 | 305 | 306 | 307 | # 2.2.0 (2018-09-16) 308 | 309 | 310 | 311 | 312 | ## 2.1.3 (2018-09-05) 313 | 314 | 315 | ### Bug Fixes 316 | 317 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.1.0 ([7be224c](https://github.com/Kikobeats/html-urls/commit/7be224c)) 318 | 319 | 320 | 321 | 322 | ## 2.1.2 (2018-08-25) 323 | 324 | 325 | ### Bug Fixes 326 | 327 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.12.1 ([c3a83cf](https://github.com/Kikobeats/html-urls/commit/c3a83cf)) 328 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.0.0 ([1afba08](https://github.com/Kikobeats/html-urls/commit/1afba08)) 329 | 330 | 331 | 332 | 333 | ## 2.1.1 (2018-07-23) 334 | 335 | 336 | 337 | 338 | # 2.1.0 (2018-07-06) 339 | 340 | 341 | 342 | 343 | ## 2.0.3 (2018-07-05) 344 | 345 | 346 | 347 | 348 | ## 2.0.2 (2018-07-05) 349 | 350 | 351 | 352 | 353 | ## 2.0.1 (2018-07-04) 354 | 355 | 356 | 357 | 358 | # 2.0.0 (2018-06-26) 359 | 360 | 361 | 362 | 363 | ## 1.0.9 (2018-06-21) 364 | 365 | 366 | 367 | 368 | ## 1.0.8 (2018-05-30) 369 | 370 | 371 | 372 | 373 | ## 1.0.7 (2018-05-29) 374 | 375 | 376 | 377 | 378 | ## 1.0.6 (2018-05-29) 379 | 380 | 381 | ### Bug Fixes 382 | 383 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.11.4 ([7ae9fd7](https://github.com/Kikobeats/html-urls/commit/7ae9fd7)) 384 | 385 | 386 | 387 | 388 | ## 1.0.5 (2018-04-29) 389 | 390 | 391 | ### Bug Fixes 392 | 393 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.10.7 ([82afd42](https://github.com/Kikobeats/html-urls/commit/82afd42)) 394 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.8.0 ([71bdece](https://github.com/Kikobeats/html-urls/commit/71bdece)) 395 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.9.2 ([fd920f7](https://github.com/Kikobeats/html-urls/commit/fd920f7)) 396 | * **package:** update matcher to version 1.1.0 ([04d1be8](https://github.com/Kikobeats/html-urls/commit/04d1be8)) 397 | 398 | 399 | 400 | 401 | ## 1.0.4 (2018-02-01) 402 | 403 | 404 | 405 | 406 | ## 1.0.3 (2018-01-27) 407 | 408 | 409 | 410 | 411 | ## 1.0.2 (2018-01-27) 412 | 413 | 414 | 415 | 416 | ## 1.0.1 (2018-01-26) 417 | 418 | 419 | ### Bug Fixes 420 | 421 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.5.0 ([6e91e1f](https://github.com/Kikobeats/html-urls/commit/6e91e1f)) 422 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.6.0 ([6ea3c71](https://github.com/Kikobeats/html-urls/commit/6ea3c71)) 423 | 424 | 425 | 426 | 427 | # 1.0.0 (2018-01-16) 428 | 429 | 430 | 431 | 432 | ## 2.3.2 (2019-03-16) 433 | 434 | * fix(package): update @metascraper/helpers to version 4.10.2 ([d3ecff4](https://github.com/Kikobeats/html-urls/commit/d3ecff4)) 435 | * fix(package): update @metascraper/helpers to version 4.8.3 ([2435919](https://github.com/Kikobeats/html-urls/commit/2435919)) 436 | * fix(package): update @metascraper/helpers to version 4.9.0 ([c6973ff](https://github.com/Kikobeats/html-urls/commit/c6973ff)) 437 | * fix(package): update is-url-http to version 1.1.1 ([6685359](https://github.com/Kikobeats/html-urls/commit/6685359)) 438 | * Update README.md ([e2efa84](https://github.com/Kikobeats/html-urls/commit/e2efa84)) 439 | * Update README.md ([f0cc8fa](https://github.com/Kikobeats/html-urls/commit/f0cc8fa)) 440 | * Update README.md ([b4b2a74](https://github.com/Kikobeats/html-urls/commit/b4b2a74)) 441 | 442 | 443 | 444 | 445 | ## 2.3.1 (2018-11-16) 446 | 447 | * fix(package): update @metascraper/helpers to version 4.5.5 ([9af4a4f](https://github.com/Kikobeats/html-urls/commit/9af4a4f)) 448 | * fix(package): update @metascraper/helpers to version 4.7.0 ([967577e](https://github.com/Kikobeats/html-urls/commit/967577e)) 449 | 450 | 451 | 452 | 453 | # 2.3.0 (2018-09-17) 454 | 455 | * Add is-url-http dependency ([0ca6a28](https://github.com/Kikobeats/html-urls/commit/0ca6a28)) 456 | 457 | 458 | 459 | 460 | ## 2.2.1 (2018-09-16) 461 | 462 | * Refactor ([f5e23de](https://github.com/Kikobeats/html-urls/commit/f5e23de)) 463 | 464 | 465 | 466 | 467 | # 2.2.0 (2018-09-16) 468 | 469 | * Fix linter ([80263a2](https://github.com/Kikobeats/html-urls/commit/80263a2)) 470 | * Ignore callto URLs ([d38f561](https://github.com/Kikobeats/html-urls/commit/d38f561)) 471 | * Update package.json ([08957ee](https://github.com/Kikobeats/html-urls/commit/08957ee)) 472 | 473 | 474 | 475 | 476 | ## 2.1.3 (2018-09-05) 477 | 478 | * Update API ([537e923](https://github.com/Kikobeats/html-urls/commit/537e923)) 479 | * fix(package): update @metascraper/helpers to version 4.1.0 ([7be224c](https://github.com/Kikobeats/html-urls/commit/7be224c)) 480 | 481 | 482 | 483 | 484 | ## 2.1.2 (2018-08-25) 485 | 486 | * Ignore mailto urls ([fcc6607](https://github.com/Kikobeats/html-urls/commit/fcc6607)) 487 | * Update package.json ([8410fb2](https://github.com/Kikobeats/html-urls/commit/8410fb2)) 488 | * fix(package): update @metascraper/helpers to version 3.12.1 ([c3a83cf](https://github.com/Kikobeats/html-urls/commit/c3a83cf)) 489 | * fix(package): update @metascraper/helpers to version 4.0.0 ([1afba08](https://github.com/Kikobeats/html-urls/commit/1afba08)) 490 | 491 | 492 | 493 | 494 | ## 2.1.1 (2018-07-23) 495 | 496 | * Update tests ([7115b20](https://github.com/Kikobeats/html-urls/commit/7115b20)) 497 | * Use last version ([2bbb066](https://github.com/Kikobeats/html-urls/commit/2bbb066)) 498 | 499 | 500 | 501 | 502 | # 2.1.0 (2018-07-06) 503 | 504 | * Add removeDuplicates option ([09282e8](https://github.com/Kikobeats/html-urls/commit/09282e8)) 505 | 506 | 507 | 508 | 509 | ## 2.0.3 (2018-07-05) 510 | 511 | * Remove duplicate urls from different tags ([862aaef](https://github.com/Kikobeats/html-urls/commit/862aaef)) 512 | * Update tests snaphots ([c8f46a8](https://github.com/Kikobeats/html-urls/commit/c8f46a8)) 513 | 514 | 515 | 516 | 517 | ## 2.0.2 (2018-07-05) 518 | 519 | * Generate tests based on selectors supported ([d2a1993](https://github.com/Kikobeats/html-urls/commit/d2a1993)) 520 | * Ignore invalid URLs ([08f8529](https://github.com/Kikobeats/html-urls/commit/08f8529)) 521 | 522 | 523 | 524 | 525 | ## 2.0.1 (2018-07-04) 526 | 527 | * Use Set instead of Array ([64e63d1](https://github.com/Kikobeats/html-urls/commit/64e63d1)) 528 | 529 | 530 | 531 | 532 | # 2.0.0 (2018-06-26) 533 | 534 | * Rename normalizeUrl → normalizedUrl ([ce0300c](https://github.com/Kikobeats/html-urls/commit/ce0300c)) 535 | 536 | 537 | 538 | 539 | ## 1.0.9 (2018-06-21) 540 | 541 | * Expose tags and cheerio opts ([9c27283](https://github.com/Kikobeats/html-urls/commit/9c27283)) 542 | 543 | 544 | 545 | 546 | ## 1.0.8 (2018-05-30) 547 | 548 | * Refactor ([7da0187](https://github.com/Kikobeats/html-urls/commit/7da0187)) 549 | 550 | 551 | 552 | 553 | ## 1.0.7 (2018-05-29) 554 | 555 | * Update deps ([bb89028](https://github.com/Kikobeats/html-urls/commit/bb89028)) 556 | 557 | 558 | 559 | 560 | ## 1.0.6 (2018-05-29) 561 | 562 | * Update ([a352247](https://github.com/Kikobeats/html-urls/commit/a352247)) 563 | * Update README.md ([b04bf62](https://github.com/Kikobeats/html-urls/commit/b04bf62)) 564 | * fix(package): update @metascraper/helpers to version 3.11.4 ([7ae9fd7](https://github.com/Kikobeats/html-urls/commit/7ae9fd7)) 565 | 566 | 567 | 568 | 569 | ## 1.0.5 (2018-04-29) 570 | 571 | * Little Refactor ([ae4271b](https://github.com/Kikobeats/html-urls/commit/ae4271b)) 572 | * Update fixtures ([d734a0f](https://github.com/Kikobeats/html-urls/commit/d734a0f)) 573 | * fix(package): update @metascraper/helpers to version 3.10.7 ([82afd42](https://github.com/Kikobeats/html-urls/commit/82afd42)) 574 | * fix(package): update @metascraper/helpers to version 3.8.0 ([71bdece](https://github.com/Kikobeats/html-urls/commit/71bdece)) 575 | * fix(package): update @metascraper/helpers to version 3.9.2 ([fd920f7](https://github.com/Kikobeats/html-urls/commit/fd920f7)) 576 | * fix(package): update matcher to version 1.1.0 ([04d1be8](https://github.com/Kikobeats/html-urls/commit/04d1be8)) 577 | * chore(package): update chalk to version 2.4.0 ([7b9f93e](https://github.com/Kikobeats/html-urls/commit/7b9f93e)) 578 | * chore(package): update got to version 8.1.0 ([eb8e729](https://github.com/Kikobeats/html-urls/commit/eb8e729)) 579 | * chore(package): update got to version 8.2.0 ([ac10519](https://github.com/Kikobeats/html-urls/commit/ac10519)) 580 | * chore(package): update got to version 8.3.0 ([28f053b](https://github.com/Kikobeats/html-urls/commit/28f053b)) 581 | 582 | 583 | 584 | 585 | ## 1.0.4 (2018-02-01) 586 | 587 | * Add html helper for testing ([e3238ea](https://github.com/Kikobeats/html-urls/commit/e3238ea)) 588 | * Improve whitelist support ([1c773f1](https://github.com/Kikobeats/html-urls/commit/1c773f1)) 589 | 590 | 591 | 592 | 593 | ## 1.0.3 (2018-01-27) 594 | 595 | * Tweaks ([f7d4865](https://github.com/Kikobeats/html-urls/commit/f7d4865)) 596 | 597 | 598 | 599 | 600 | ## 1.0.2 (2018-01-27) 601 | 602 | * Rename blacklist → whitelist ([0891b36](https://github.com/Kikobeats/html-urls/commit/0891b36)) 603 | 604 | 605 | 606 | 607 | ## 1.0.1 (2018-01-26) 608 | 609 | * Rename ([0927b08](https://github.com/Kikobeats/html-urls/commit/0927b08)) 610 | * Rename interface ([df7e07f](https://github.com/Kikobeats/html-urls/commit/df7e07f)) 611 | * Update README.md ([d70a3f1](https://github.com/Kikobeats/html-urls/commit/d70a3f1)) 612 | * fix(package): update @metascraper/helpers to version 3.5.0 ([6e91e1f](https://github.com/Kikobeats/html-urls/commit/6e91e1f)) 613 | * fix(package): update @metascraper/helpers to version 3.6.0 ([6ea3c71](https://github.com/Kikobeats/html-urls/commit/6ea3c71)) 614 | 615 | 616 | 617 | 618 | # 1.0.0 (2018-01-16) 619 | 620 | * First commit ([6fac1f4](https://github.com/Kikobeats/html-urls/commit/6fac1f4)) 621 | * Update README.md ([7507a54](https://github.com/Kikobeats/html-urls/commit/7507a54)) 622 | * docs(readme): add Greenkeeper badge ([d67bfe3](https://github.com/Kikobeats/html-urls/commit/d67bfe3)) 623 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright © 2018 Kiko Beats (kikobeats.com) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # html-urls 2 | 3 | ![Last version](https://img.shields.io/github/tag/Kikobeats/html-urls.svg?style=flat-square) 4 | [![Coverage Status](https://img.shields.io/coveralls/Kikobeats/html-urls.svg?style=flat-square)](https://coveralls.io/github/Kikobeats/html-urls) 5 | [![NPM Status](https://img.shields.io/npm/dm/html-urls.svg?style=flat-square)](https://www.npmjs.org/package/html-urls) 6 | 7 | > Get all URLs from a HTML markup. It's based on [W3C link checker](https://github.com/w3c/node-linkchecker). 8 | 9 | ## Install 10 | 11 | ```bash 12 | $ npm install html-urls --save 13 | ``` 14 | 15 | ## Usage 16 | 17 | ```js 18 | const got = require('got') 19 | const htmlUrls = require('html-urls') 20 | 21 | ;(async () => { 22 | const url = process.argv[2] 23 | if (!url) throw new TypeError('Need to provide an url as first argument.') 24 | const { body: html } = await got(url) 25 | const links = htmlUrls({ html, url }) 26 | 27 | links.forEach(({ url }) => console.log(url)) 28 | 29 | // => [ 30 | // 'https://microlink.io/component---src-layouts-index-js-86b5f94dfa48cb04ae41.js', 31 | // 'https://microlink.io/component---src-pages-index-js-a302027ab59365471b7d.js', 32 | // 'https://microlink.io/path---index-709b6cf5b986a710cc3a.js', 33 | // 'https://microlink.io/app-8b4269e1fadd08e6ea1e.js', 34 | // 'https://microlink.io/commons-8b286eac293678e1c98c.js', 35 | // 'https://microlink.io', 36 | // ... 37 | // ] 38 | })() 39 | ``` 40 | 41 | It returns the following structure per every value detect on the HTML markup: 42 | 43 | ##### value 44 | Type: `` 45 | 46 | The original value. 47 | 48 | ##### url 49 | Type: `` 50 | 51 | The normalized URL, if the value can be considered an URL. 52 | 53 | ##### uri 54 | Type: `` 55 | 56 | The normalized value as URI. 57 | 58 |
59 | 60 | See [examples](/examples) for more! 61 | 62 | ## API 63 | 64 | ### htmlUrls([options]) 65 | 66 | #### options 67 | 68 | ##### html 69 | 70 | Type: `string`
71 | Default: `''` 72 | 73 | The HTML markup. 74 | 75 | ##### url 76 | 77 | Type: `string`
78 | Default: `''` 79 | 80 | The URL associated with the HTML markup. 81 | 82 | It is used for resolve relative links that can be present in the HTML markup. 83 | 84 | ##### whitelist 85 | 86 | Type: `array`
87 | Default: `[]` 88 | 89 | A list of links to be excluded from the final output. It supports regex patterns. 90 | 91 | See [matcher](https://github.com/sindresorhus/matcher#matcher) for know more. 92 | 93 | ##### removeDuplicates 94 | 95 | Type: `boolean`
96 | Default: `true` 97 | 98 | Remove duplicated links detected over all the HTML tags. 99 | 100 | ## Related 101 | 102 | - [xml-urls](https://github.com/Kikobeats/xml-urls) – Get all urls from a Feed/Atom/RSS/Sitemap xml markup. 103 | - [css-urls](https://github.com/Kikobeats/css-urls) – Get all URLs referenced from stylesheet files. 104 | 105 | ## License 106 | 107 | **html-urls** © [Kiko Beats](https://kikobeats.com), released under the [MIT](https://github.com/Kikobeats/html-urls/blob/master/LICENSE.md) License.
108 | Authored and maintained by Kiko Beats with help from [contributors](https://github.com/Kikobeats/html-urls/contributors). 109 | 110 | > [kikobeats.com](https://kikobeats.com) · GitHub [@Kiko Beats](https://github.com/Kikobeats) · X [@Kikobeats](https://x.com/Kikobeats) 111 | -------------------------------------------------------------------------------- /example.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const getLinks = require('.') 4 | const got = require('got') 5 | ;(async () => { 6 | const url = process.argv[2] 7 | if (!url) throw new TypeError('Need to provide an url as first argument.') 8 | const { body: html } = await got(url) 9 | const links = getLinks({ html, url }) 10 | links.forEach(link => console.log(link.normalizedUrl)) 11 | })() 12 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "html-urls", 3 | "description": "Get all links from a HTML markup", 4 | "homepage": "https://github.com/Kikobeats/html-urls", 5 | "version": "2.4.65", 6 | "main": "src/index.js", 7 | "author": { 8 | "email": "josefrancisco.verdu@gmail.com", 9 | "name": "Kiko Beats", 10 | "url": "https://kikobeats.com" 11 | }, 12 | "contributors": [], 13 | "repository": { 14 | "type": "git", 15 | "url": "git+https://github.com/Kikobeats/html-urls.git" 16 | }, 17 | "bugs": { 18 | "url": "https://github.com/Kikobeats/html-urls/issues" 19 | }, 20 | "keywords": [ 21 | "href", 22 | "hrefs", 23 | "html", 24 | "link", 25 | "links", 26 | "src", 27 | "url", 28 | "urls" 29 | ], 30 | "dependencies": { 31 | "@metascraper/helpers": "~5.46.1", 32 | "cheerio": "~1.0.0", 33 | "is-uri": "~1.2.6", 34 | "is-url-http": "~2.3.9", 35 | "lodash": "~4.17.21", 36 | "matcher": "~4.0.0" 37 | }, 38 | "devDependencies": { 39 | "@commitlint/cli": "latest", 40 | "@commitlint/config-conventional": "latest", 41 | "@ksmithut/prettier-standard": "latest", 42 | "ava": "latest", 43 | "c8": "latest", 44 | "ci-publish": "latest", 45 | "finepack": "latest", 46 | "git-authors-cli": "latest", 47 | "github-generate-release": "latest", 48 | "nano-staged": "latest", 49 | "simple-git-hooks": "latest", 50 | "standard": "latest", 51 | "standard-version": "latest" 52 | }, 53 | "engines": { 54 | "node": ">= 6" 55 | }, 56 | "files": [ 57 | "src" 58 | ], 59 | "scripts": { 60 | "clean": "rm -rf node_modules", 61 | "contributors": "(npx git-authors-cli && npx finepack && git add package.json && git commit -m 'build: contributors' --no-verify) || true", 62 | "lint": "standard", 63 | "postrelease": "npm run release:tags && npm run release:github && (ci-publish || npm publish --access=public)", 64 | "pretest": "npm run lint", 65 | "pretty": "prettier-standard index.js {core,test,bin}/**/*.js --single-quote", 66 | "release": "standard-version -a", 67 | "release:github": "github-generate-release", 68 | "release:tags": "git push --follow-tags origin HEAD:master", 69 | "test": "c8 ava" 70 | }, 71 | "license": "MIT", 72 | "ava": { 73 | "files": [ 74 | "test/**/*.js", 75 | "!test/helpers.js" 76 | ] 77 | }, 78 | "commitlint": { 79 | "extends": [ 80 | "@commitlint/config-conventional" 81 | ], 82 | "rules": { 83 | "body-max-line-length": [ 84 | 0 85 | ] 86 | } 87 | }, 88 | "nano-staged": { 89 | "*.js": [ 90 | "prettier-standard", 91 | "standard --fix" 92 | ], 93 | "package.json": [ 94 | "finepack" 95 | ] 96 | }, 97 | "simple-git-hooks": { 98 | "commit-msg": "npx commitlint --edit", 99 | "pre-commit": "npx nano-staged" 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/index.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const { uniqBy, concat, isEmpty, reduce, get, findIndex } = require('lodash') 4 | const { normalizeUrl } = require('@metascraper/helpers') 5 | const isHttpUrl = require('is-url-http') 6 | const cheerio = require('cheerio') 7 | const matcher = require('matcher') 8 | const isUri = require('is-uri') 9 | 10 | const UID = 'uri' 11 | 12 | /** 13 | * Originally picked from https://github.com/rehypejs/rehype-minify/blob/main/packages/html-url-attributes/index.js 14 | */ 15 | const TAGS = { 16 | action: ['form'], 17 | cite: ['blockquote', 'del', 'ins', 'q'], 18 | data: ['object'], 19 | formaction: ['button', 'input'], 20 | href: ['a', 'area', 'base', 'link'], 21 | icon: ['menuitem'], 22 | manifest: ['html'], 23 | ping: ['a', 'area'], 24 | poster: ['video'], 25 | src: ['audio', 'embed', 'iframe', 'img', 'input', 'script', 'source', 'track', 'video'] 26 | } 27 | 28 | const reduceSelector = (collection, fn, acc = []) => { 29 | collection.each(function () { 30 | acc = fn(acc, this) 31 | }) 32 | return acc 33 | } 34 | 35 | const includes = (collection, fn) => findIndex(collection, fn) !== -1 36 | 37 | const getLink = ({ url, el, attribute }) => { 38 | const attr = get(el, `attribs.${attribute}`, '') 39 | if (isEmpty(attr)) return undefined 40 | const absoluteUrl = url ? normalizeUrl(url, attr) : normalizeUrl(attr) 41 | return { 42 | value: attr, 43 | url: isHttpUrl(absoluteUrl) ? absoluteUrl : undefined, 44 | uri: isUri(absoluteUrl) ? absoluteUrl : undefined 45 | } 46 | } 47 | 48 | const createGetLinksByAttribute = ({ removeDuplicates }) => { 49 | const has = removeDuplicates 50 | ? (acc, uid) => includes(acc, item => get(item, UID) === uid) 51 | : () => false 52 | 53 | return ({ selector, attribute, url, whitelist }) => 54 | reduceSelector( 55 | selector, 56 | (acc, el) => { 57 | const link = getLink({ url, el, attribute }) 58 | const uid = get(link, UID) 59 | if (isEmpty(link)) return acc 60 | const isAlreadyAdded = has(acc, uid) 61 | if (isAlreadyAdded) return acc 62 | const match = !isEmpty(whitelist) && matcher([uid], concat(whitelist)) 63 | return isEmpty(match) ? concat(acc, link) : acc 64 | }, 65 | [] 66 | ) 67 | } 68 | 69 | const createAdd = ({ removeDuplicates }) => 70 | removeDuplicates 71 | ? (acc, links) => uniqBy(concat(acc, links), UID) 72 | : (acc, links) => concat(acc, links) 73 | 74 | module.exports = ({ 75 | html = '', 76 | url = '', 77 | whitelist = false, 78 | removeDuplicates = true, 79 | cheerioOpts = {} 80 | } = {}) => { 81 | const $ = cheerio.load(html, cheerioOpts) 82 | 83 | const add = createAdd({ removeDuplicates }) 84 | const getLinksByAttribute = createGetLinksByAttribute({ removeDuplicates }) 85 | 86 | return reduce( 87 | TAGS, 88 | (acc, htmlTags, attribute) => { 89 | const links = getLinksByAttribute({ 90 | selector: $(htmlTags.join(',')), 91 | attribute, 92 | url, 93 | whitelist 94 | }) 95 | return add(acc, links) 96 | }, 97 | [] 98 | ) 99 | } 100 | 101 | module.exports.TAGS = TAGS 102 | -------------------------------------------------------------------------------- /test/helpers.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const generateHtml = ({ links = [], urls = [] }) => ` 4 | 5 | 6 | 7 | 8 | 9 | 10 | hello world 11 | ${links.map(link => ``).join('\n')} 12 | 13 | 14 | ${urls.map(url => ``).join('\n')} 15 | 16 | 17 | ` 18 | 19 | module.exports = { generateHtml } 20 | -------------------------------------------------------------------------------- /test/index.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const test = require('ava') 4 | 5 | const getLinks = require('..') 6 | 7 | const { generateHtml } = require('./helpers') 8 | 9 | test('empty html generate empty output', t => { 10 | t.deepEqual(getLinks(), []) 11 | t.deepEqual(getLinks(''), []) 12 | }) 13 | 14 | test('get links from a semantic markup', t => { 15 | const html = generateHtml({ 16 | urls: ['https://google.com', 'https://facebook.com', 'mailto://kiko@example.com'] 17 | }) 18 | 19 | t.snapshot(getLinks({ html })) 20 | }) 21 | 22 | test('remove duplicate urls from same tag', t => { 23 | const html = generateHtml({ 24 | urls: ['https://google.com', 'https://google.com', 'https://facebook.com'] 25 | }) 26 | t.snapshot(getLinks({ html })) 27 | }) 28 | 29 | test('remove duplicate urls from different tags', t => { 30 | const html = generateHtml({ 31 | urls: ['https://google.com'], 32 | links: ['https://google.com'] 33 | }) 34 | t.snapshot(getLinks({ html })) 35 | }) 36 | 37 | test('non remove duplicate urls from same tag', t => { 38 | const html = generateHtml({ 39 | urls: ['https://google.com', 'https://google.com', 'https://facebook.com'] 40 | }) 41 | t.snapshot(getLinks({ html, removeDuplicates: false })) 42 | }) 43 | 44 | test('non remove duplicate urls from different tags', t => { 45 | const html = generateHtml({ 46 | urls: ['https://google.com'], 47 | links: ['https://google.com'] 48 | }) 49 | t.snapshot(getLinks({ html, removeDuplicates: false })) 50 | }) 51 | 52 | test('normalize trailing slash', t => { 53 | const html = generateHtml({ 54 | urls: ['https://google.com/', 'https://google.com', 'https://facebook.com'] 55 | }) 56 | 57 | t.snapshot(getLinks({ html })) 58 | }) 59 | 60 | test('normalize wwww', t => { 61 | const html = generateHtml({ 62 | urls: ['https://www.google.com', 'https://google.com', 'https://facebook.com'] 63 | }) 64 | 65 | t.snapshot(getLinks({ html })) 66 | }) 67 | 68 | test('normalize query string parameters', t => { 69 | const html = generateHtml({ 70 | urls: [ 71 | 'https://google.com?hello=world&foo=bar', 72 | 'https://google.com?foo=bar&hello=world', 73 | 'https://facebook.com' 74 | ] 75 | }) 76 | 77 | t.snapshot(getLinks({ html })) 78 | }) 79 | 80 | test('ignore invalid URLs', t => { 81 | const html = generateHtml({ 82 | urls: ['http://'] 83 | }) 84 | 85 | t.snapshot(getLinks({ html })) 86 | }) 87 | -------------------------------------------------------------------------------- /test/snapshots/index.js.md: -------------------------------------------------------------------------------- 1 | # Snapshot report for `test/index.js` 2 | 3 | The actual snapshot is saved in `index.js.snap`. 4 | 5 | Generated by [AVA](https://avajs.dev). 6 | 7 | ## get links from a semantic markup 8 | 9 | > Snapshot 1 10 | 11 | [ 12 | { 13 | uri: 'https://google.com/', 14 | url: 'https://google.com/', 15 | value: 'https://google.com', 16 | }, 17 | { 18 | uri: 'https://facebook.com/', 19 | url: 'https://facebook.com/', 20 | value: 'https://facebook.com', 21 | }, 22 | { 23 | uri: 'mailto://example.com', 24 | url: undefined, 25 | value: 'mailto://kiko@example.com', 26 | }, 27 | ] 28 | 29 | ## remove duplicate urls from same tag 30 | 31 | > Snapshot 1 32 | 33 | [ 34 | { 35 | uri: 'https://google.com/', 36 | url: 'https://google.com/', 37 | value: 'https://google.com', 38 | }, 39 | { 40 | uri: 'https://facebook.com/', 41 | url: 'https://facebook.com/', 42 | value: 'https://facebook.com', 43 | }, 44 | ] 45 | 46 | ## remove duplicate urls from different tags 47 | 48 | > Snapshot 1 49 | 50 | [ 51 | { 52 | uri: 'https://google.com/', 53 | url: 'https://google.com/', 54 | value: 'https://google.com', 55 | }, 56 | ] 57 | 58 | ## non remove duplicate urls from same tag 59 | 60 | > Snapshot 1 61 | 62 | [ 63 | { 64 | uri: 'https://google.com/', 65 | url: 'https://google.com/', 66 | value: 'https://google.com', 67 | }, 68 | { 69 | uri: 'https://google.com/', 70 | url: 'https://google.com/', 71 | value: 'https://google.com', 72 | }, 73 | { 74 | uri: 'https://facebook.com/', 75 | url: 'https://facebook.com/', 76 | value: 'https://facebook.com', 77 | }, 78 | ] 79 | 80 | ## non remove duplicate urls from different tags 81 | 82 | > Snapshot 1 83 | 84 | [ 85 | { 86 | uri: 'https://google.com/', 87 | url: 'https://google.com/', 88 | value: 'https://google.com', 89 | }, 90 | { 91 | uri: 'https://google.com/', 92 | url: 'https://google.com/', 93 | value: 'https://google.com', 94 | }, 95 | ] 96 | 97 | ## normalize trailing slash 98 | 99 | > Snapshot 1 100 | 101 | [ 102 | { 103 | uri: 'https://google.com/', 104 | url: 'https://google.com/', 105 | value: 'https://google.com/', 106 | }, 107 | { 108 | uri: 'https://facebook.com/', 109 | url: 'https://facebook.com/', 110 | value: 'https://facebook.com', 111 | }, 112 | ] 113 | 114 | ## normalize wwww 115 | 116 | > Snapshot 1 117 | 118 | [ 119 | { 120 | uri: 'https://www.google.com/', 121 | url: 'https://www.google.com/', 122 | value: 'https://www.google.com', 123 | }, 124 | { 125 | uri: 'https://google.com/', 126 | url: 'https://google.com/', 127 | value: 'https://google.com', 128 | }, 129 | { 130 | uri: 'https://facebook.com/', 131 | url: 'https://facebook.com/', 132 | value: 'https://facebook.com', 133 | }, 134 | ] 135 | 136 | ## normalize query string parameters 137 | 138 | > Snapshot 1 139 | 140 | [ 141 | { 142 | uri: 'https://google.com/?hello=world&foo=bar', 143 | url: 'https://google.com/?hello=world&foo=bar', 144 | value: 'https://google.com?hello=world&foo=bar', 145 | }, 146 | { 147 | uri: 'https://google.com/?foo=bar&hello=world', 148 | url: 'https://google.com/?foo=bar&hello=world', 149 | value: 'https://google.com?foo=bar&hello=world', 150 | }, 151 | { 152 | uri: 'https://facebook.com/', 153 | url: 'https://facebook.com/', 154 | value: 'https://facebook.com', 155 | }, 156 | ] 157 | 158 | ## ignore invalid URLs 159 | 160 | > Snapshot 1 161 | 162 | [ 163 | { 164 | uri: undefined, 165 | url: undefined, 166 | value: 'http://', 167 | }, 168 | ] 169 | -------------------------------------------------------------------------------- /test/snapshots/index.js.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kikobeats/html-urls/03692f85e49a0e402123c6161db4035f7bed1d41/test/snapshots/index.js.snap -------------------------------------------------------------------------------- /test/snapshots/tags.js.md: -------------------------------------------------------------------------------- 1 | # Snapshot report for `test/tags.js` 2 | 3 | The actual snapshot is saved in `tags.js.snap`. 4 | 5 | Generated by [AVA](https://avajs.dev). 6 | 7 | ## form (action) 8 | 9 | > Snapshot 1 10 | 11 | [ 12 | { 13 | uri: 'https://example.com/', 14 | url: 'https://example.com/', 15 | value: 'https://example.com', 16 | }, 17 | ] 18 | 19 | ## blockquote (cite) 20 | 21 | > Snapshot 1 22 | 23 | [ 24 | { 25 | uri: 'https://example.com/', 26 | url: 'https://example.com/', 27 | value: 'https://example.com', 28 | }, 29 | ] 30 | 31 | ## del (cite) 32 | 33 | > Snapshot 1 34 | 35 | [ 36 | { 37 | uri: 'https://example.com/', 38 | url: 'https://example.com/', 39 | value: 'https://example.com', 40 | }, 41 | ] 42 | 43 | ## ins (cite) 44 | 45 | > Snapshot 1 46 | 47 | [ 48 | { 49 | uri: 'https://example.com/', 50 | url: 'https://example.com/', 51 | value: 'https://example.com', 52 | }, 53 | ] 54 | 55 | ## q (cite) 56 | 57 | > Snapshot 1 58 | 59 | [ 60 | { 61 | uri: 'https://example.com/', 62 | url: 'https://example.com/', 63 | value: 'https://example.com', 64 | }, 65 | ] 66 | 67 | ## object (data) 68 | 69 | > Snapshot 1 70 | 71 | [ 72 | { 73 | uri: 'https://example.com/', 74 | url: 'https://example.com/', 75 | value: 'https://example.com', 76 | }, 77 | ] 78 | 79 | ## button (formaction) 80 | 81 | > Snapshot 1 82 | 83 | [ 84 | { 85 | uri: 'https://example.com/', 86 | url: 'https://example.com/', 87 | value: 'https://example.com', 88 | }, 89 | ] 90 | 91 | ## input (formaction) 92 | 93 | > Snapshot 1 94 | 95 | [ 96 | { 97 | uri: 'https://example.com/', 98 | url: 'https://example.com/', 99 | value: 'https://example.com', 100 | }, 101 | ] 102 | 103 | ## a (href) 104 | 105 | > Snapshot 1 106 | 107 | [ 108 | { 109 | uri: 'https://example.com/', 110 | url: 'https://example.com/', 111 | value: 'https://example.com', 112 | }, 113 | ] 114 | 115 | ## area (href) 116 | 117 | > Snapshot 1 118 | 119 | [ 120 | { 121 | uri: 'https://example.com/', 122 | url: 'https://example.com/', 123 | value: 'https://example.com', 124 | }, 125 | ] 126 | 127 | ## base (href) 128 | 129 | > Snapshot 1 130 | 131 | [ 132 | { 133 | uri: 'https://example.com/', 134 | url: 'https://example.com/', 135 | value: 'https://example.com', 136 | }, 137 | ] 138 | 139 | ## link (href) 140 | 141 | > Snapshot 1 142 | 143 | [ 144 | { 145 | uri: 'https://example.com/', 146 | url: 'https://example.com/', 147 | value: 'https://example.com', 148 | }, 149 | ] 150 | 151 | ## menuitem (icon) 152 | 153 | > Snapshot 1 154 | 155 | [ 156 | { 157 | uri: 'https://example.com/', 158 | url: 'https://example.com/', 159 | value: 'https://example.com', 160 | }, 161 | ] 162 | 163 | ## html (manifest) 164 | 165 | > Snapshot 1 166 | 167 | [ 168 | { 169 | uri: 'https://example.com/', 170 | url: 'https://example.com/', 171 | value: 'https://example.com', 172 | }, 173 | ] 174 | 175 | ## a (ping) 176 | 177 | > Snapshot 1 178 | 179 | [ 180 | { 181 | uri: 'https://example.com/', 182 | url: 'https://example.com/', 183 | value: 'https://example.com', 184 | }, 185 | ] 186 | 187 | ## area (ping) 188 | 189 | > Snapshot 1 190 | 191 | [ 192 | { 193 | uri: 'https://example.com/', 194 | url: 'https://example.com/', 195 | value: 'https://example.com', 196 | }, 197 | ] 198 | 199 | ## video (poster) 200 | 201 | > Snapshot 1 202 | 203 | [ 204 | { 205 | uri: 'https://example.com/', 206 | url: 'https://example.com/', 207 | value: 'https://example.com', 208 | }, 209 | ] 210 | 211 | ## audio (src) 212 | 213 | > Snapshot 1 214 | 215 | [ 216 | { 217 | uri: 'https://example.com/', 218 | url: 'https://example.com/', 219 | value: 'https://example.com', 220 | }, 221 | ] 222 | 223 | ## embed (src) 224 | 225 | > Snapshot 1 226 | 227 | [ 228 | { 229 | uri: 'https://example.com/', 230 | url: 'https://example.com/', 231 | value: 'https://example.com', 232 | }, 233 | ] 234 | 235 | ## iframe (src) 236 | 237 | > Snapshot 1 238 | 239 | [ 240 | { 241 | uri: 'https://example.com/', 242 | url: 'https://example.com/', 243 | value: 'https://example.com', 244 | }, 245 | ] 246 | 247 | ## img (src) 248 | 249 | > Snapshot 1 250 | 251 | [ 252 | { 253 | uri: 'https://example.com/', 254 | url: 'https://example.com/', 255 | value: 'https://example.com', 256 | }, 257 | ] 258 | 259 | ## input (src) 260 | 261 | > Snapshot 1 262 | 263 | [ 264 | { 265 | uri: 'https://example.com/', 266 | url: 'https://example.com/', 267 | value: 'https://example.com', 268 | }, 269 | ] 270 | 271 | ## script (src) 272 | 273 | > Snapshot 1 274 | 275 | [ 276 | { 277 | uri: 'https://example.com/', 278 | url: 'https://example.com/', 279 | value: 'https://example.com', 280 | }, 281 | ] 282 | 283 | ## source (src) 284 | 285 | > Snapshot 1 286 | 287 | [ 288 | { 289 | uri: 'https://example.com/', 290 | url: 'https://example.com/', 291 | value: 'https://example.com', 292 | }, 293 | ] 294 | 295 | ## track (src) 296 | 297 | > Snapshot 1 298 | 299 | [ 300 | { 301 | uri: 'https://example.com/', 302 | url: 'https://example.com/', 303 | value: 'https://example.com', 304 | }, 305 | ] 306 | 307 | ## video (src) 308 | 309 | > Snapshot 1 310 | 311 | [ 312 | { 313 | uri: 'https://example.com/', 314 | url: 'https://example.com/', 315 | value: 'https://example.com', 316 | }, 317 | ] 318 | -------------------------------------------------------------------------------- /test/snapshots/tags.js.snap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kikobeats/html-urls/03692f85e49a0e402123c6161db4035f7bed1d41/test/snapshots/tags.js.snap -------------------------------------------------------------------------------- /test/tags.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const { forEach } = require('lodash') 4 | const test = require('ava') 5 | 6 | const getLinks = require('..') 7 | 8 | const { TAGS } = getLinks 9 | 10 | forEach(TAGS, (tags, attributeName) => { 11 | forEach(tags, tag => { 12 | test(`${tag} (${attributeName})`, t => { 13 | const html = ` 14 | 15 | 16 | 17 | 18 | 19 | 20 | hello world 21 | <${tag} ${attributeName}="https://example.com"> 22 | 23 | 24 | 25 | 26 | ` 27 | const url = 'https://example.com' 28 | t.snapshot(getLinks({ html, url })) 29 | }) 30 | }) 31 | }) 32 | -------------------------------------------------------------------------------- /test/whitelist.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const test = require('ava') 4 | 5 | const getLinks = require('..') 6 | 7 | const { generateHtml } = require('./helpers') 8 | 9 | test('exclude exact match from whitelist', t => { 10 | const urls = [ 11 | 'https://indiehackers.com/images/favicons/favicon', 12 | 'https://www.indiehackers.com/forum/introduce-yourself-january-2018-411d4f5173', 13 | 'https://indiehackers.com/assets/indie-hackers-12c4cfc88599dcf564ce2d9f226133.css', 14 | 'https://indiehackers.com/feed.xml', 15 | 'https://indiehackers.com' 16 | ] 17 | 18 | const html = generateHtml({ urls }) 19 | const whitelist = ['https://indiehackers.com/'] 20 | const htmlUrls = getLinks({ html, whitelist }).map(({ uri }) => uri) 21 | 22 | t.deepEqual(htmlUrls, [ 23 | 'https://indiehackers.com/images/favicons/favicon', 24 | 'https://www.indiehackers.com/forum/introduce-yourself-january-2018-411d4f5173', 25 | 'https://indiehackers.com/assets/indie-hackers-12c4cfc88599dcf564ce2d9f226133.css', 26 | 'https://indiehackers.com/feed.xml' 27 | ]) 28 | }) 29 | 30 | test('exclude pattern from whitelist', t => { 31 | const urls = [ 32 | 'https://indiehackers.com/images/favicons/favicon', 33 | 'https://www.indiehackers.com/forum/introduce-yourself-january-2018-411d4f5173', 34 | 'https://indiehackers.com/assets/indie-hackers-12c4cfc88599dcf564ce2d9f226133.css', 35 | 'https://indiehackers.com/feed.xml', 36 | 'https://indiehackers.com' 37 | ] 38 | 39 | const html = generateHtml({ urls }) 40 | const whitelist = ['https://indiehackers.com*'] 41 | const htmlUrls = getLinks({ html, whitelist }).map(({ uri }) => uri) 42 | 43 | t.deepEqual(htmlUrls, [ 44 | 'https://www.indiehackers.com/forum/introduce-yourself-january-2018-411d4f5173' 45 | ]) 46 | }) 47 | 48 | test('exclude multiple pattern from whitelist', t => { 49 | const urls = [ 50 | 'https://indiehackers.com/images/favicons/favicon', 51 | 'https://www.indiehackers.com/forum/introduce-yourself-january-2018-411d4f5173', 52 | 'https://indiehackers.com/assets/indie-hackers-12c4cfc88599dcf564ce2d9f226133.css', 53 | 'https://indiehackers.com/feed.xml', 54 | 'https://indiehackers.com' 55 | ] 56 | 57 | const html = generateHtml({ urls }) 58 | const whitelist = ['https://indiehackers.com*', 'https://www.indiehackers.com*'] 59 | const htmlUrls = getLinks({ html, whitelist }).map(({ uri }) => uri) 60 | 61 | t.deepEqual(htmlUrls, []) 62 | }) 63 | --------------------------------------------------------------------------------