├── .editorconfig
├── .gitattributes
├── .github
├── dependabot.yml
└── workflows
│ ├── main.yml
│ └── pull_request.yml
├── .gitignore
├── .npmrc
├── .travis.yml
├── CHANGELOG.md
├── LICENSE
├── README.md
├── example.js
├── package.json
├── src
└── index.js
└── test
├── helpers.js
├── index.js
├── snapshots
├── index.js.md
├── index.js.snap
├── tags.js.md
└── tags.js.snap
├── tags.js
└── whitelist.js
/.editorconfig:
--------------------------------------------------------------------------------
1 | # http://editorconfig.org
2 |
3 | root = true
4 |
5 | [*]
6 | indent_style = space
7 | indent_size = 2
8 | end_of_line = lf
9 | charset = utf-8
10 | trim_trailing_whitespace = true
11 | insert_final_newline = true
12 | max_line_length = 100
13 | indent_brace_style = 1TBS
14 | spaces_around_operators = true
15 | quote_type = auto
16 |
17 | [package.json]
18 | indent_style = space
19 | indent_size = 2
20 |
21 | [*.md]
22 | trim_trailing_whitespace = false
23 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto
2 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: npm
4 | directory: '/'
5 | schedule:
6 | interval: daily
7 | - package-ecosystem: 'github-actions'
8 | directory: '/'
9 | schedule:
10 | # Check for updates to GitHub Actions every weekday
11 | interval: 'daily'
12 |
--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
1 | name: main
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 |
8 | jobs:
9 | contributors:
10 | if: "${{ github.event.head_commit.message != 'build: contributors' }}"
11 | runs-on: ubuntu-latest
12 | steps:
13 | - name: Checkout
14 | uses: actions/checkout@v4
15 | with:
16 | fetch-depth: 0
17 | token: ${{ secrets.GITHUB_TOKEN }}
18 | - name: Setup Node.js
19 | uses: actions/setup-node@v4
20 | with:
21 | node-version: lts/*
22 | - name: Contributors
23 | run: |
24 | git config --global user.email ${{ secrets.GIT_EMAIL }}
25 | git config --global user.name ${{ secrets.GIT_USERNAME }}
26 | npm run contributors
27 | - name: Push changes
28 | run: |
29 | git push origin ${{ github.head_ref }}
30 |
31 | release:
32 | if: |
33 | !startsWith(github.event.head_commit.message, 'chore(release):') &&
34 | !startsWith(github.event.head_commit.message, 'docs:') &&
35 | !startsWith(github.event.head_commit.message, 'ci:')
36 | needs: [contributors]
37 | runs-on: ubuntu-latest
38 | steps:
39 | - name: Checkout
40 | uses: actions/checkout@v4
41 | with:
42 | token: ${{ secrets.GITHUB_TOKEN }}
43 | - name: Setup Node.js
44 | uses: actions/setup-node@v4
45 | with:
46 | node-version: lts/*
47 | - name: Setup PNPM
48 | uses: pnpm/action-setup@v4
49 | with:
50 | version: latest
51 | run_install: true
52 | - name: Test
53 | run: npm test
54 | - name: Report
55 | run: npx c8 report --reporter=text-lcov > coverage/lcov.info
56 | - name: Coverage
57 | uses: coverallsapp/github-action@main
58 | with:
59 | github-token: ${{ secrets.GITHUB_TOKEN }}
60 | - name: Release
61 | env:
62 | GH_TOKEN: ${{ secrets.GH_TOKEN }}
63 | NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
64 | run: |
65 | git config --global user.email ${{ secrets.GIT_EMAIL }}
66 | git config --global user.name ${{ secrets.GIT_USERNAME }}
67 | git pull origin master
68 | npm run release
69 |
--------------------------------------------------------------------------------
/.github/workflows/pull_request.yml:
--------------------------------------------------------------------------------
1 | name: pull_request
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | pull_request:
8 | branches:
9 | - master
10 |
11 | jobs:
12 | test:
13 | if: github.ref != 'refs/heads/master'
14 | runs-on: ubuntu-latest
15 | steps:
16 | - name: Checkout
17 | uses: actions/checkout@v4
18 | with:
19 | token: ${{ secrets.GITHUB_TOKEN }}
20 | - name: Setup Node.js
21 | uses: actions/setup-node@v4
22 | with:
23 | node-version: lts/*
24 | - name: Setup PNPM
25 | uses: pnpm/action-setup@v4
26 | with:
27 | version: latest
28 | run_install: true
29 | - name: Test
30 | run: npm test
31 | - name: Report
32 | run: npx c8 report --reporter=text-lcov > coverage/lcov.info
33 | - name: Coverage
34 | uses: coverallsapp/github-action@main
35 | with:
36 | github-token: ${{ secrets.GITHUB_TOKEN }}
37 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ############################
2 | # npm
3 | ############################
4 | node_modules
5 | npm-debug.log
6 | .node_history
7 | yarn.lock
8 | package-lock.json
9 |
10 | ############################
11 | # tmp, editor & OS files
12 | ############################
13 | .tmp
14 | *.swo
15 | *.swp
16 | *.swn
17 | *.swm
18 | .DS_Store
19 | *#
20 | *~
21 | .idea
22 | *sublime*
23 | nbproject
24 |
25 | ############################
26 | # Tests
27 | ############################
28 | testApp
29 | coverage
30 | .nyc_output
31 |
32 | ############################
33 | # Other
34 | ############################
35 | .envrc
36 |
--------------------------------------------------------------------------------
/.npmrc:
--------------------------------------------------------------------------------
1 | unsafe-perm=true
2 | save-prefix=~
3 | save=false
4 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: node_js
2 |
3 | node_js:
4 | - lts/*
5 | - node
6 |
7 | after_success: npm run coverage
8 |
9 | stages:
10 | - Test
11 | if: branch = master AND commit_message !~ /(docs|release|no-release)/
12 | - name: Release
13 | if: branch = master AND commit_message !~ /(docs|release|no-release)/
14 |
15 | jobs:
16 | include:
17 | - stage: Release
18 | node_js: lts/*
19 | install: npm install --no-package-lock
20 | before_deploy:
21 | - git config user.email ${GITHUB_EMAIL:-"travis@travis-ci.org"}
22 | - git config user.name ${GITHUB_USER:-"Travis CI"}
23 | - git remote set-url origin https://${GH_TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git
24 | - git checkout master
25 | deploy:
26 | skip_cleanup: true
27 | provider: script
28 | script: npm run release
29 | on:
30 | branch: master
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to this project will be documented in this file. See [standard-version](https://github.com/conventional-changelog/standard-version) for commit guidelines.
4 |
5 | ### 2.4.65 (2025-04-20)
6 |
7 | ### 2.4.64 (2025-04-20)
8 |
9 | ### 2.4.63 (2025-03-19)
10 |
11 | ### 2.4.62 (2025-01-13)
12 |
13 | ### 2.4.61 (2025-01-13)
14 |
15 |
16 | ### Bug Fixes
17 |
18 | * sort ([200c8c3](https://github.com/Kikobeats/html-urls/commit/200c8c350c899dc567bf30a7ca6d6861d668bdb3))
19 |
20 | ### 2.4.60 (2024-08-12)
21 |
22 | ### 2.4.59 (2024-05-08)
23 |
24 | ### 2.4.58 (2024-02-26)
25 |
26 | ### 2.4.57 (2024-02-26)
27 |
28 |
29 | ### Bug Fixes
30 |
31 | * test glob ([0cab6bc](https://github.com/Kikobeats/html-urls/commit/0cab6bc9f9bb12eff4f77b45ff4075ae84a5da43))
32 |
33 | ### 2.4.56 (2024-02-12)
34 |
35 | ### 2.4.55 (2024-01-01)
36 |
37 | ### 2.4.54 (2023-12-21)
38 |
39 | ### 2.4.53 (2023-12-07)
40 |
41 | ### 2.4.52 (2023-11-24)
42 |
43 | ### 2.4.51 (2023-11-11)
44 |
45 | ### 2.4.50 (2023-10-24)
46 |
47 | ### 2.4.49 (2023-09-19)
48 |
49 | ### 2.4.48 (2023-09-05)
50 |
51 | ### 2.4.47 (2023-08-17)
52 |
53 | ### 2.4.46 (2023-08-10)
54 |
55 | ### 2.4.45 (2023-06-03)
56 |
57 | ### 2.4.44 (2023-05-01)
58 |
59 | ### 2.4.43 (2023-05-01)
60 |
61 | ### 2.4.42 (2023-03-30)
62 |
63 | ### 2.4.41 (2023-01-04)
64 |
65 | ### 2.4.40 (2022-11-22)
66 |
67 | ### 2.4.39 (2022-09-26)
68 |
69 | ### 2.4.38 (2022-08-15)
70 |
71 | ### 2.4.37 (2022-05-29)
72 |
73 | ### 2.4.36 (2022-05-17)
74 |
75 | ### 2.4.35 (2022-04-28)
76 |
77 |
78 | ### Bug Fixes
79 |
80 | * omit test helpers ([db10c7c](https://github.com/Kikobeats/html-urls/commit/db10c7ca14efa5e457a932bc4ef742354f8e7ab7))
81 |
82 | ### 2.4.34 (2022-04-04)
83 |
84 | ### 2.4.33 (2022-03-28)
85 |
86 | ### 2.4.32 (2022-03-14)
87 |
88 | ### 2.4.31 (2022-03-14)
89 |
90 | ### 2.4.30 (2022-03-02)
91 |
92 | ### 2.4.29 (2022-02-25)
93 |
94 | ### 2.4.28 (2022-02-25)
95 |
96 | ### 2.4.27 (2021-10-25)
97 |
98 | ### 2.4.26 (2021-08-03)
99 |
100 | ### 2.4.25 (2021-08-03)
101 |
102 | ### 2.4.24 (2021-07-26)
103 |
104 | ### [2.4.23](https://github.com/Kikobeats/html-urls/compare/v2.4.22...v2.4.23) (2021-05-31)
105 |
106 | ### [2.4.22](https://github.com/Kikobeats/html-urls/compare/v2.4.21...v2.4.22) (2021-03-10)
107 |
108 | ### [2.4.21](https://github.com/Kikobeats/html-urls/compare/v2.4.20...v2.4.21) (2021-03-02)
109 |
110 | ### [2.4.20](https://github.com/Kikobeats/html-urls/compare/v2.4.19...v2.4.20) (2021-02-08)
111 |
112 | ### [2.4.19](https://github.com/Kikobeats/html-urls/compare/v2.4.18...v2.4.19) (2020-12-23)
113 |
114 | ### [2.4.18](https://github.com/Kikobeats/html-urls/compare/v2.4.17...v2.4.18) (2020-12-17)
115 |
116 | ### [2.4.17](https://github.com/Kikobeats/html-urls/compare/v2.4.16...v2.4.17) (2020-12-04)
117 |
118 | ### [2.4.16](https://github.com/Kikobeats/html-urls/compare/v2.4.15...v2.4.16) (2020-11-10)
119 |
120 | ### [2.4.15](https://github.com/Kikobeats/html-urls/compare/v2.4.14...v2.4.15) (2020-10-12)
121 |
122 | ### [2.4.14](https://github.com/Kikobeats/html-urls/compare/v2.4.13...v2.4.14) (2020-08-11)
123 |
124 | ### [2.4.13](https://github.com/Kikobeats/html-urls/compare/v2.4.12...v2.4.13) (2020-07-29)
125 |
126 | ### [2.4.12](https://github.com/Kikobeats/html-urls/compare/v2.4.11...v2.4.12) (2020-07-09)
127 |
128 | ### [2.4.11](https://github.com/Kikobeats/html-urls/compare/v2.4.10...v2.4.11) (2020-04-27)
129 |
130 | ### [2.4.10](https://github.com/Kikobeats/html-urls/compare/v2.4.9...v2.4.10) (2020-03-05)
131 |
132 | ### [2.4.9](https://github.com/Kikobeats/html-urls/compare/v2.4.8...v2.4.9) (2020-02-14)
133 |
134 | ### [2.4.8](https://github.com/Kikobeats/html-urls/compare/v2.4.7...v2.4.8) (2020-02-06)
135 |
136 | ### [2.4.7](https://github.com/Kikobeats/html-urls/compare/v2.4.6...v2.4.7) (2020-02-04)
137 |
138 | ### [2.4.6](https://github.com/Kikobeats/html-urls/compare/v2.4.5...v2.4.6) (2020-02-04)
139 |
140 | ### [2.4.5](https://github.com/Kikobeats/html-urls/compare/v2.4.4...v2.4.5) (2020-01-16)
141 |
142 | ### [2.4.4](https://github.com/Kikobeats/html-urls/compare/v2.4.3...v2.4.4) (2020-01-08)
143 |
144 | ### [2.4.3](https://github.com/Kikobeats/html-urls/compare/v2.4.2...v2.4.3) (2019-11-19)
145 |
146 | ### [2.4.2](https://github.com/Kikobeats/html-urls/compare/v2.4.1...v2.4.2) (2019-11-11)
147 |
148 | ### [2.4.1](https://github.com/Kikobeats/html-urls/compare/v2.4.0...v2.4.1) (2019-09-25)
149 |
150 |
151 | ### Bug Fixes
152 |
153 | * test ([5e56f83](https://github.com/Kikobeats/html-urls/commit/5e56f83))
154 |
155 | ## [2.4.0](https://github.com/Kikobeats/html-urls/compare/v2.3.16...v2.4.0) (2019-09-24)
156 |
157 |
158 | ### Features
159 |
160 | * add uri support ([a629c99](https://github.com/Kikobeats/html-urls/commit/a629c99))
161 |
162 | ### [2.3.16](https://github.com/Kikobeats/html-urls/compare/v2.3.15...v2.3.16) (2019-09-24)
163 |
164 | ### [2.3.15](https://github.com/Kikobeats/html-urls/compare/v2.3.14...v2.3.15) (2019-09-13)
165 |
166 |
167 | ### Bug Fixes
168 |
169 | * params sort ([cd6e1e8](https://github.com/Kikobeats/html-urls/commit/cd6e1e8))
170 |
171 | ### [2.3.14](https://github.com/Kikobeats/html-urls/compare/v2.3.13...v2.3.14) (2019-07-11)
172 |
173 |
174 |
175 | ### [2.3.13](https://github.com/Kikobeats/html-urls/compare/v2.3.12...v2.3.13) (2019-07-04)
176 |
177 |
178 |
179 | ### [2.3.12](https://github.com/Kikobeats/html-urls/compare/v2.3.11...v2.3.12) (2019-06-20)
180 |
181 |
182 | ### Bug Fixes
183 |
184 | * **package:** update @metascraper/helpers to version 5.5.0 ([d34e750](https://github.com/Kikobeats/html-urls/commit/d34e750))
185 |
186 |
187 |
188 | ### [2.3.11](https://github.com/Kikobeats/html-urls/compare/v2.3.10...v2.3.11) (2019-06-19)
189 |
190 |
191 | ### Build System
192 |
193 | * update meta ([039f381](https://github.com/Kikobeats/html-urls/commit/039f381))
194 | * update travis ([c742644](https://github.com/Kikobeats/html-urls/commit/c742644))
195 |
196 |
197 |
198 | ### [2.3.10](https://github.com/Kikobeats/html-urls/compare/v2.3.9...v2.3.10) (2019-05-31)
199 |
200 |
201 | ### Bug Fixes
202 |
203 | * **package:** update @metascraper/helpers to version 5.4.0 ([17e3c0d](https://github.com/Kikobeats/html-urls/commit/17e3c0d))
204 |
205 |
206 |
207 | ### [2.3.9](https://github.com/Kikobeats/html-urls/compare/v2.3.8...v2.3.9) (2019-05-20)
208 |
209 |
210 | ### Build System
211 |
212 | * change git-authors-cli position ([ded4a00](https://github.com/Kikobeats/html-urls/commit/ded4a00))
213 |
214 |
215 |
216 | ### [2.3.8](https://github.com/Kikobeats/html-urls/compare/v2.3.7...v2.3.8) (2019-05-20)
217 |
218 |
219 | ### Bug Fixes
220 |
221 | * **package:** update is-url-http to version 1.2.0 ([386be98](https://github.com/Kikobeats/html-urls/commit/386be98))
222 |
223 |
224 |
225 | ### [2.3.7](https://github.com/Kikobeats/html-urls/compare/v2.3.6...v2.3.7) (2019-05-15)
226 |
227 |
228 | ### Bug Fixes
229 |
230 | * **package:** update @metascraper/helpers to version 5.3.0 ([2fdb7da](https://github.com/Kikobeats/html-urls/commit/2fdb7da))
231 |
232 |
233 |
234 | ## [2.3.6](https://github.com/Kikobeats/html-urls/compare/v2.3.5...v2.3.6) (2019-05-05)
235 |
236 |
237 | ### Bug Fixes
238 |
239 | * **package:** update @metascraper/helpers to version 5.2.0 ([32f40a5](https://github.com/Kikobeats/html-urls/commit/32f40a5))
240 |
241 |
242 |
243 | ## [2.3.5](https://github.com/Kikobeats/html-urls/compare/v2.3.4...v2.3.5) (2019-04-22)
244 |
245 |
246 | ### Bug Fixes
247 |
248 | * **package:** update matcher to version 2.0.0 ([a51f3f5](https://github.com/Kikobeats/html-urls/commit/a51f3f5))
249 |
250 |
251 |
252 |
253 | ## [2.3.4](https://github.com/Kikobeats/html-urls/compare/v2.3.3...v2.3.4) (2019-04-03)
254 |
255 |
256 | ### Bug Fixes
257 |
258 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 5.1.0 ([42ca806](https://github.com/Kikobeats/html-urls/commit/42ca806))
259 |
260 |
261 |
262 |
263 | ## 2.3.3 (2019-03-17)
264 |
265 |
266 | ### Bug Fixes
267 |
268 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 5.0.0 ([28d133d](https://github.com/Kikobeats/html-urls/commit/28d133d))
269 |
270 |
271 |
272 |
273 | ## 2.3.2 (2019-03-16)
274 |
275 |
276 | ### Bug Fixes
277 |
278 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.10.2 ([d3ecff4](https://github.com/Kikobeats/html-urls/commit/d3ecff4))
279 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.8.3 ([2435919](https://github.com/Kikobeats/html-urls/commit/2435919))
280 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.9.0 ([c6973ff](https://github.com/Kikobeats/html-urls/commit/c6973ff))
281 | * **package:** update is-url-http to version 1.1.1 ([6685359](https://github.com/Kikobeats/html-urls/commit/6685359))
282 |
283 |
284 |
285 |
286 | ## 2.3.1 (2018-11-16)
287 |
288 |
289 | ### Bug Fixes
290 |
291 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.5.5 ([9af4a4f](https://github.com/Kikobeats/html-urls/commit/9af4a4f))
292 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.7.0 ([967577e](https://github.com/Kikobeats/html-urls/commit/967577e))
293 |
294 |
295 |
296 |
297 | # 2.3.0 (2018-09-17)
298 |
299 |
300 |
301 |
302 | ## 2.2.1 (2018-09-16)
303 |
304 |
305 |
306 |
307 | # 2.2.0 (2018-09-16)
308 |
309 |
310 |
311 |
312 | ## 2.1.3 (2018-09-05)
313 |
314 |
315 | ### Bug Fixes
316 |
317 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.1.0 ([7be224c](https://github.com/Kikobeats/html-urls/commit/7be224c))
318 |
319 |
320 |
321 |
322 | ## 2.1.2 (2018-08-25)
323 |
324 |
325 | ### Bug Fixes
326 |
327 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.12.1 ([c3a83cf](https://github.com/Kikobeats/html-urls/commit/c3a83cf))
328 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 4.0.0 ([1afba08](https://github.com/Kikobeats/html-urls/commit/1afba08))
329 |
330 |
331 |
332 |
333 | ## 2.1.1 (2018-07-23)
334 |
335 |
336 |
337 |
338 | # 2.1.0 (2018-07-06)
339 |
340 |
341 |
342 |
343 | ## 2.0.3 (2018-07-05)
344 |
345 |
346 |
347 |
348 | ## 2.0.2 (2018-07-05)
349 |
350 |
351 |
352 |
353 | ## 2.0.1 (2018-07-04)
354 |
355 |
356 |
357 |
358 | # 2.0.0 (2018-06-26)
359 |
360 |
361 |
362 |
363 | ## 1.0.9 (2018-06-21)
364 |
365 |
366 |
367 |
368 | ## 1.0.8 (2018-05-30)
369 |
370 |
371 |
372 |
373 | ## 1.0.7 (2018-05-29)
374 |
375 |
376 |
377 |
378 | ## 1.0.6 (2018-05-29)
379 |
380 |
381 | ### Bug Fixes
382 |
383 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.11.4 ([7ae9fd7](https://github.com/Kikobeats/html-urls/commit/7ae9fd7))
384 |
385 |
386 |
387 |
388 | ## 1.0.5 (2018-04-29)
389 |
390 |
391 | ### Bug Fixes
392 |
393 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.10.7 ([82afd42](https://github.com/Kikobeats/html-urls/commit/82afd42))
394 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.8.0 ([71bdece](https://github.com/Kikobeats/html-urls/commit/71bdece))
395 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.9.2 ([fd920f7](https://github.com/Kikobeats/html-urls/commit/fd920f7))
396 | * **package:** update matcher to version 1.1.0 ([04d1be8](https://github.com/Kikobeats/html-urls/commit/04d1be8))
397 |
398 |
399 |
400 |
401 | ## 1.0.4 (2018-02-01)
402 |
403 |
404 |
405 |
406 | ## 1.0.3 (2018-01-27)
407 |
408 |
409 |
410 |
411 | ## 1.0.2 (2018-01-27)
412 |
413 |
414 |
415 |
416 | ## 1.0.1 (2018-01-26)
417 |
418 |
419 | ### Bug Fixes
420 |
421 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.5.0 ([6e91e1f](https://github.com/Kikobeats/html-urls/commit/6e91e1f))
422 | * **package:** update [@metascraper](https://github.com/metascraper)/helpers to version 3.6.0 ([6ea3c71](https://github.com/Kikobeats/html-urls/commit/6ea3c71))
423 |
424 |
425 |
426 |
427 | # 1.0.0 (2018-01-16)
428 |
429 |
430 |
431 |
432 | ## 2.3.2 (2019-03-16)
433 |
434 | * fix(package): update @metascraper/helpers to version 4.10.2 ([d3ecff4](https://github.com/Kikobeats/html-urls/commit/d3ecff4))
435 | * fix(package): update @metascraper/helpers to version 4.8.3 ([2435919](https://github.com/Kikobeats/html-urls/commit/2435919))
436 | * fix(package): update @metascraper/helpers to version 4.9.0 ([c6973ff](https://github.com/Kikobeats/html-urls/commit/c6973ff))
437 | * fix(package): update is-url-http to version 1.1.1 ([6685359](https://github.com/Kikobeats/html-urls/commit/6685359))
438 | * Update README.md ([e2efa84](https://github.com/Kikobeats/html-urls/commit/e2efa84))
439 | * Update README.md ([f0cc8fa](https://github.com/Kikobeats/html-urls/commit/f0cc8fa))
440 | * Update README.md ([b4b2a74](https://github.com/Kikobeats/html-urls/commit/b4b2a74))
441 |
442 |
443 |
444 |
445 | ## 2.3.1 (2018-11-16)
446 |
447 | * fix(package): update @metascraper/helpers to version 4.5.5 ([9af4a4f](https://github.com/Kikobeats/html-urls/commit/9af4a4f))
448 | * fix(package): update @metascraper/helpers to version 4.7.0 ([967577e](https://github.com/Kikobeats/html-urls/commit/967577e))
449 |
450 |
451 |
452 |
453 | # 2.3.0 (2018-09-17)
454 |
455 | * Add is-url-http dependency ([0ca6a28](https://github.com/Kikobeats/html-urls/commit/0ca6a28))
456 |
457 |
458 |
459 |
460 | ## 2.2.1 (2018-09-16)
461 |
462 | * Refactor ([f5e23de](https://github.com/Kikobeats/html-urls/commit/f5e23de))
463 |
464 |
465 |
466 |
467 | # 2.2.0 (2018-09-16)
468 |
469 | * Fix linter ([80263a2](https://github.com/Kikobeats/html-urls/commit/80263a2))
470 | * Ignore callto URLs ([d38f561](https://github.com/Kikobeats/html-urls/commit/d38f561))
471 | * Update package.json ([08957ee](https://github.com/Kikobeats/html-urls/commit/08957ee))
472 |
473 |
474 |
475 |
476 | ## 2.1.3 (2018-09-05)
477 |
478 | * Update API ([537e923](https://github.com/Kikobeats/html-urls/commit/537e923))
479 | * fix(package): update @metascraper/helpers to version 4.1.0 ([7be224c](https://github.com/Kikobeats/html-urls/commit/7be224c))
480 |
481 |
482 |
483 |
484 | ## 2.1.2 (2018-08-25)
485 |
486 | * Ignore mailto urls ([fcc6607](https://github.com/Kikobeats/html-urls/commit/fcc6607))
487 | * Update package.json ([8410fb2](https://github.com/Kikobeats/html-urls/commit/8410fb2))
488 | * fix(package): update @metascraper/helpers to version 3.12.1 ([c3a83cf](https://github.com/Kikobeats/html-urls/commit/c3a83cf))
489 | * fix(package): update @metascraper/helpers to version 4.0.0 ([1afba08](https://github.com/Kikobeats/html-urls/commit/1afba08))
490 |
491 |
492 |
493 |
494 | ## 2.1.1 (2018-07-23)
495 |
496 | * Update tests ([7115b20](https://github.com/Kikobeats/html-urls/commit/7115b20))
497 | * Use last version ([2bbb066](https://github.com/Kikobeats/html-urls/commit/2bbb066))
498 |
499 |
500 |
501 |
502 | # 2.1.0 (2018-07-06)
503 |
504 | * Add removeDuplicates option ([09282e8](https://github.com/Kikobeats/html-urls/commit/09282e8))
505 |
506 |
507 |
508 |
509 | ## 2.0.3 (2018-07-05)
510 |
511 | * Remove duplicate urls from different tags ([862aaef](https://github.com/Kikobeats/html-urls/commit/862aaef))
512 | * Update tests snaphots ([c8f46a8](https://github.com/Kikobeats/html-urls/commit/c8f46a8))
513 |
514 |
515 |
516 |
517 | ## 2.0.2 (2018-07-05)
518 |
519 | * Generate tests based on selectors supported ([d2a1993](https://github.com/Kikobeats/html-urls/commit/d2a1993))
520 | * Ignore invalid URLs ([08f8529](https://github.com/Kikobeats/html-urls/commit/08f8529))
521 |
522 |
523 |
524 |
525 | ## 2.0.1 (2018-07-04)
526 |
527 | * Use Set instead of Array ([64e63d1](https://github.com/Kikobeats/html-urls/commit/64e63d1))
528 |
529 |
530 |
531 |
532 | # 2.0.0 (2018-06-26)
533 |
534 | * Rename normalizeUrl → normalizedUrl ([ce0300c](https://github.com/Kikobeats/html-urls/commit/ce0300c))
535 |
536 |
537 |
538 |
539 | ## 1.0.9 (2018-06-21)
540 |
541 | * Expose tags and cheerio opts ([9c27283](https://github.com/Kikobeats/html-urls/commit/9c27283))
542 |
543 |
544 |
545 |
546 | ## 1.0.8 (2018-05-30)
547 |
548 | * Refactor ([7da0187](https://github.com/Kikobeats/html-urls/commit/7da0187))
549 |
550 |
551 |
552 |
553 | ## 1.0.7 (2018-05-29)
554 |
555 | * Update deps ([bb89028](https://github.com/Kikobeats/html-urls/commit/bb89028))
556 |
557 |
558 |
559 |
560 | ## 1.0.6 (2018-05-29)
561 |
562 | * Update ([a352247](https://github.com/Kikobeats/html-urls/commit/a352247))
563 | * Update README.md ([b04bf62](https://github.com/Kikobeats/html-urls/commit/b04bf62))
564 | * fix(package): update @metascraper/helpers to version 3.11.4 ([7ae9fd7](https://github.com/Kikobeats/html-urls/commit/7ae9fd7))
565 |
566 |
567 |
568 |
569 | ## 1.0.5 (2018-04-29)
570 |
571 | * Little Refactor ([ae4271b](https://github.com/Kikobeats/html-urls/commit/ae4271b))
572 | * Update fixtures ([d734a0f](https://github.com/Kikobeats/html-urls/commit/d734a0f))
573 | * fix(package): update @metascraper/helpers to version 3.10.7 ([82afd42](https://github.com/Kikobeats/html-urls/commit/82afd42))
574 | * fix(package): update @metascraper/helpers to version 3.8.0 ([71bdece](https://github.com/Kikobeats/html-urls/commit/71bdece))
575 | * fix(package): update @metascraper/helpers to version 3.9.2 ([fd920f7](https://github.com/Kikobeats/html-urls/commit/fd920f7))
576 | * fix(package): update matcher to version 1.1.0 ([04d1be8](https://github.com/Kikobeats/html-urls/commit/04d1be8))
577 | * chore(package): update chalk to version 2.4.0 ([7b9f93e](https://github.com/Kikobeats/html-urls/commit/7b9f93e))
578 | * chore(package): update got to version 8.1.0 ([eb8e729](https://github.com/Kikobeats/html-urls/commit/eb8e729))
579 | * chore(package): update got to version 8.2.0 ([ac10519](https://github.com/Kikobeats/html-urls/commit/ac10519))
580 | * chore(package): update got to version 8.3.0 ([28f053b](https://github.com/Kikobeats/html-urls/commit/28f053b))
581 |
582 |
583 |
584 |
585 | ## 1.0.4 (2018-02-01)
586 |
587 | * Add html helper for testing ([e3238ea](https://github.com/Kikobeats/html-urls/commit/e3238ea))
588 | * Improve whitelist support ([1c773f1](https://github.com/Kikobeats/html-urls/commit/1c773f1))
589 |
590 |
591 |
592 |
593 | ## 1.0.3 (2018-01-27)
594 |
595 | * Tweaks ([f7d4865](https://github.com/Kikobeats/html-urls/commit/f7d4865))
596 |
597 |
598 |
599 |
600 | ## 1.0.2 (2018-01-27)
601 |
602 | * Rename blacklist → whitelist ([0891b36](https://github.com/Kikobeats/html-urls/commit/0891b36))
603 |
604 |
605 |
606 |
607 | ## 1.0.1 (2018-01-26)
608 |
609 | * Rename ([0927b08](https://github.com/Kikobeats/html-urls/commit/0927b08))
610 | * Rename interface ([df7e07f](https://github.com/Kikobeats/html-urls/commit/df7e07f))
611 | * Update README.md ([d70a3f1](https://github.com/Kikobeats/html-urls/commit/d70a3f1))
612 | * fix(package): update @metascraper/helpers to version 3.5.0 ([6e91e1f](https://github.com/Kikobeats/html-urls/commit/6e91e1f))
613 | * fix(package): update @metascraper/helpers to version 3.6.0 ([6ea3c71](https://github.com/Kikobeats/html-urls/commit/6ea3c71))
614 |
615 |
616 |
617 |
618 | # 1.0.0 (2018-01-16)
619 |
620 | * First commit ([6fac1f4](https://github.com/Kikobeats/html-urls/commit/6fac1f4))
621 | * Update README.md ([7507a54](https://github.com/Kikobeats/html-urls/commit/7507a54))
622 | * docs(readme): add Greenkeeper badge ([d67bfe3](https://github.com/Kikobeats/html-urls/commit/d67bfe3))
623 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright © 2018 Kiko Beats (kikobeats.com)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # html-urls
2 |
3 | 
4 | [](https://coveralls.io/github/Kikobeats/html-urls)
5 | [](https://www.npmjs.org/package/html-urls)
6 |
7 | > Get all URLs from a HTML markup. It's based on [W3C link checker](https://github.com/w3c/node-linkchecker).
8 |
9 | ## Install
10 |
11 | ```bash
12 | $ npm install html-urls --save
13 | ```
14 |
15 | ## Usage
16 |
17 | ```js
18 | const got = require('got')
19 | const htmlUrls = require('html-urls')
20 |
21 | ;(async () => {
22 | const url = process.argv[2]
23 | if (!url) throw new TypeError('Need to provide an url as first argument.')
24 | const { body: html } = await got(url)
25 | const links = htmlUrls({ html, url })
26 |
27 | links.forEach(({ url }) => console.log(url))
28 |
29 | // => [
30 | // 'https://microlink.io/component---src-layouts-index-js-86b5f94dfa48cb04ae41.js',
31 | // 'https://microlink.io/component---src-pages-index-js-a302027ab59365471b7d.js',
32 | // 'https://microlink.io/path---index-709b6cf5b986a710cc3a.js',
33 | // 'https://microlink.io/app-8b4269e1fadd08e6ea1e.js',
34 | // 'https://microlink.io/commons-8b286eac293678e1c98c.js',
35 | // 'https://microlink.io',
36 | // ...
37 | // ]
38 | })()
39 | ```
40 |
41 | It returns the following structure per every value detect on the HTML markup:
42 |
43 | ##### value
44 | Type: ``
45 |
46 | The original value.
47 |
48 | ##### url
49 | Type: ``
50 |
51 | The normalized URL, if the value can be considered an URL.
52 |
53 | ##### uri
54 | Type: ``
55 |
56 | The normalized value as URI.
57 |
58 |
59 |
60 | See [examples](/examples) for more!
61 |
62 | ## API
63 |
64 | ### htmlUrls([options])
65 |
66 | #### options
67 |
68 | ##### html
69 |
70 | Type: `string`
71 | Default: `''`
72 |
73 | The HTML markup.
74 |
75 | ##### url
76 |
77 | Type: `string`
78 | Default: `''`
79 |
80 | The URL associated with the HTML markup.
81 |
82 | It is used for resolve relative links that can be present in the HTML markup.
83 |
84 | ##### whitelist
85 |
86 | Type: `array`
87 | Default: `[]`
88 |
89 | A list of links to be excluded from the final output. It supports regex patterns.
90 |
91 | See [matcher](https://github.com/sindresorhus/matcher#matcher) for know more.
92 |
93 | ##### removeDuplicates
94 |
95 | Type: `boolean`
96 | Default: `true`
97 |
98 | Remove duplicated links detected over all the HTML tags.
99 |
100 | ## Related
101 |
102 | - [xml-urls](https://github.com/Kikobeats/xml-urls) – Get all urls from a Feed/Atom/RSS/Sitemap xml markup.
103 | - [css-urls](https://github.com/Kikobeats/css-urls) – Get all URLs referenced from stylesheet files.
104 |
105 | ## License
106 |
107 | **html-urls** © [Kiko Beats](https://kikobeats.com), released under the [MIT](https://github.com/Kikobeats/html-urls/blob/master/LICENSE.md) License.
108 | Authored and maintained by Kiko Beats with help from [contributors](https://github.com/Kikobeats/html-urls/contributors).
109 |
110 | > [kikobeats.com](https://kikobeats.com) · GitHub [@Kiko Beats](https://github.com/Kikobeats) · X [@Kikobeats](https://x.com/Kikobeats)
111 |
--------------------------------------------------------------------------------
/example.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | const getLinks = require('.')
4 | const got = require('got')
5 | ;(async () => {
6 | const url = process.argv[2]
7 | if (!url) throw new TypeError('Need to provide an url as first argument.')
8 | const { body: html } = await got(url)
9 | const links = getLinks({ html, url })
10 | links.forEach(link => console.log(link.normalizedUrl))
11 | })()
12 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "html-urls",
3 | "description": "Get all links from a HTML markup",
4 | "homepage": "https://github.com/Kikobeats/html-urls",
5 | "version": "2.4.65",
6 | "main": "src/index.js",
7 | "author": {
8 | "email": "josefrancisco.verdu@gmail.com",
9 | "name": "Kiko Beats",
10 | "url": "https://kikobeats.com"
11 | },
12 | "contributors": [],
13 | "repository": {
14 | "type": "git",
15 | "url": "git+https://github.com/Kikobeats/html-urls.git"
16 | },
17 | "bugs": {
18 | "url": "https://github.com/Kikobeats/html-urls/issues"
19 | },
20 | "keywords": [
21 | "href",
22 | "hrefs",
23 | "html",
24 | "link",
25 | "links",
26 | "src",
27 | "url",
28 | "urls"
29 | ],
30 | "dependencies": {
31 | "@metascraper/helpers": "~5.46.1",
32 | "cheerio": "~1.0.0",
33 | "is-uri": "~1.2.6",
34 | "is-url-http": "~2.3.9",
35 | "lodash": "~4.17.21",
36 | "matcher": "~4.0.0"
37 | },
38 | "devDependencies": {
39 | "@commitlint/cli": "latest",
40 | "@commitlint/config-conventional": "latest",
41 | "@ksmithut/prettier-standard": "latest",
42 | "ava": "latest",
43 | "c8": "latest",
44 | "ci-publish": "latest",
45 | "finepack": "latest",
46 | "git-authors-cli": "latest",
47 | "github-generate-release": "latest",
48 | "nano-staged": "latest",
49 | "simple-git-hooks": "latest",
50 | "standard": "latest",
51 | "standard-version": "latest"
52 | },
53 | "engines": {
54 | "node": ">= 6"
55 | },
56 | "files": [
57 | "src"
58 | ],
59 | "scripts": {
60 | "clean": "rm -rf node_modules",
61 | "contributors": "(npx git-authors-cli && npx finepack && git add package.json && git commit -m 'build: contributors' --no-verify) || true",
62 | "lint": "standard",
63 | "postrelease": "npm run release:tags && npm run release:github && (ci-publish || npm publish --access=public)",
64 | "pretest": "npm run lint",
65 | "pretty": "prettier-standard index.js {core,test,bin}/**/*.js --single-quote",
66 | "release": "standard-version -a",
67 | "release:github": "github-generate-release",
68 | "release:tags": "git push --follow-tags origin HEAD:master",
69 | "test": "c8 ava"
70 | },
71 | "license": "MIT",
72 | "ava": {
73 | "files": [
74 | "test/**/*.js",
75 | "!test/helpers.js"
76 | ]
77 | },
78 | "commitlint": {
79 | "extends": [
80 | "@commitlint/config-conventional"
81 | ],
82 | "rules": {
83 | "body-max-line-length": [
84 | 0
85 | ]
86 | }
87 | },
88 | "nano-staged": {
89 | "*.js": [
90 | "prettier-standard",
91 | "standard --fix"
92 | ],
93 | "package.json": [
94 | "finepack"
95 | ]
96 | },
97 | "simple-git-hooks": {
98 | "commit-msg": "npx commitlint --edit",
99 | "pre-commit": "npx nano-staged"
100 | }
101 | }
102 |
--------------------------------------------------------------------------------
/src/index.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | const { uniqBy, concat, isEmpty, reduce, get, findIndex } = require('lodash')
4 | const { normalizeUrl } = require('@metascraper/helpers')
5 | const isHttpUrl = require('is-url-http')
6 | const cheerio = require('cheerio')
7 | const matcher = require('matcher')
8 | const isUri = require('is-uri')
9 |
10 | const UID = 'uri'
11 |
12 | /**
13 | * Originally picked from https://github.com/rehypejs/rehype-minify/blob/main/packages/html-url-attributes/index.js
14 | */
15 | const TAGS = {
16 | action: ['form'],
17 | cite: ['blockquote', 'del', 'ins', 'q'],
18 | data: ['object'],
19 | formaction: ['button', 'input'],
20 | href: ['a', 'area', 'base', 'link'],
21 | icon: ['menuitem'],
22 | manifest: ['html'],
23 | ping: ['a', 'area'],
24 | poster: ['video'],
25 | src: ['audio', 'embed', 'iframe', 'img', 'input', 'script', 'source', 'track', 'video']
26 | }
27 |
28 | const reduceSelector = (collection, fn, acc = []) => {
29 | collection.each(function () {
30 | acc = fn(acc, this)
31 | })
32 | return acc
33 | }
34 |
35 | const includes = (collection, fn) => findIndex(collection, fn) !== -1
36 |
37 | const getLink = ({ url, el, attribute }) => {
38 | const attr = get(el, `attribs.${attribute}`, '')
39 | if (isEmpty(attr)) return undefined
40 | const absoluteUrl = url ? normalizeUrl(url, attr) : normalizeUrl(attr)
41 | return {
42 | value: attr,
43 | url: isHttpUrl(absoluteUrl) ? absoluteUrl : undefined,
44 | uri: isUri(absoluteUrl) ? absoluteUrl : undefined
45 | }
46 | }
47 |
48 | const createGetLinksByAttribute = ({ removeDuplicates }) => {
49 | const has = removeDuplicates
50 | ? (acc, uid) => includes(acc, item => get(item, UID) === uid)
51 | : () => false
52 |
53 | return ({ selector, attribute, url, whitelist }) =>
54 | reduceSelector(
55 | selector,
56 | (acc, el) => {
57 | const link = getLink({ url, el, attribute })
58 | const uid = get(link, UID)
59 | if (isEmpty(link)) return acc
60 | const isAlreadyAdded = has(acc, uid)
61 | if (isAlreadyAdded) return acc
62 | const match = !isEmpty(whitelist) && matcher([uid], concat(whitelist))
63 | return isEmpty(match) ? concat(acc, link) : acc
64 | },
65 | []
66 | )
67 | }
68 |
69 | const createAdd = ({ removeDuplicates }) =>
70 | removeDuplicates
71 | ? (acc, links) => uniqBy(concat(acc, links), UID)
72 | : (acc, links) => concat(acc, links)
73 |
74 | module.exports = ({
75 | html = '',
76 | url = '',
77 | whitelist = false,
78 | removeDuplicates = true,
79 | cheerioOpts = {}
80 | } = {}) => {
81 | const $ = cheerio.load(html, cheerioOpts)
82 |
83 | const add = createAdd({ removeDuplicates })
84 | const getLinksByAttribute = createGetLinksByAttribute({ removeDuplicates })
85 |
86 | return reduce(
87 | TAGS,
88 | (acc, htmlTags, attribute) => {
89 | const links = getLinksByAttribute({
90 | selector: $(htmlTags.join(',')),
91 | attribute,
92 | url,
93 | whitelist
94 | })
95 | return add(acc, links)
96 | },
97 | []
98 | )
99 | }
100 |
101 | module.exports.TAGS = TAGS
102 |
--------------------------------------------------------------------------------
/test/helpers.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | const generateHtml = ({ links = [], urls = [] }) => `
4 |
5 |
6 |
7 |
8 |
9 |
10 | hello world
11 | ${links.map(link => ``).join('\n')}
12 |
13 |
14 | ${urls.map(url => ``).join('\n')}
15 |
16 |
17 | `
18 |
19 | module.exports = { generateHtml }
20 |
--------------------------------------------------------------------------------
/test/index.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | const test = require('ava')
4 |
5 | const getLinks = require('..')
6 |
7 | const { generateHtml } = require('./helpers')
8 |
9 | test('empty html generate empty output', t => {
10 | t.deepEqual(getLinks(), [])
11 | t.deepEqual(getLinks(''), [])
12 | })
13 |
14 | test('get links from a semantic markup', t => {
15 | const html = generateHtml({
16 | urls: ['https://google.com', 'https://facebook.com', 'mailto://kiko@example.com']
17 | })
18 |
19 | t.snapshot(getLinks({ html }))
20 | })
21 |
22 | test('remove duplicate urls from same tag', t => {
23 | const html = generateHtml({
24 | urls: ['https://google.com', 'https://google.com', 'https://facebook.com']
25 | })
26 | t.snapshot(getLinks({ html }))
27 | })
28 |
29 | test('remove duplicate urls from different tags', t => {
30 | const html = generateHtml({
31 | urls: ['https://google.com'],
32 | links: ['https://google.com']
33 | })
34 | t.snapshot(getLinks({ html }))
35 | })
36 |
37 | test('non remove duplicate urls from same tag', t => {
38 | const html = generateHtml({
39 | urls: ['https://google.com', 'https://google.com', 'https://facebook.com']
40 | })
41 | t.snapshot(getLinks({ html, removeDuplicates: false }))
42 | })
43 |
44 | test('non remove duplicate urls from different tags', t => {
45 | const html = generateHtml({
46 | urls: ['https://google.com'],
47 | links: ['https://google.com']
48 | })
49 | t.snapshot(getLinks({ html, removeDuplicates: false }))
50 | })
51 |
52 | test('normalize trailing slash', t => {
53 | const html = generateHtml({
54 | urls: ['https://google.com/', 'https://google.com', 'https://facebook.com']
55 | })
56 |
57 | t.snapshot(getLinks({ html }))
58 | })
59 |
60 | test('normalize wwww', t => {
61 | const html = generateHtml({
62 | urls: ['https://www.google.com', 'https://google.com', 'https://facebook.com']
63 | })
64 |
65 | t.snapshot(getLinks({ html }))
66 | })
67 |
68 | test('normalize query string parameters', t => {
69 | const html = generateHtml({
70 | urls: [
71 | 'https://google.com?hello=world&foo=bar',
72 | 'https://google.com?foo=bar&hello=world',
73 | 'https://facebook.com'
74 | ]
75 | })
76 |
77 | t.snapshot(getLinks({ html }))
78 | })
79 |
80 | test('ignore invalid URLs', t => {
81 | const html = generateHtml({
82 | urls: ['http://']
83 | })
84 |
85 | t.snapshot(getLinks({ html }))
86 | })
87 |
--------------------------------------------------------------------------------
/test/snapshots/index.js.md:
--------------------------------------------------------------------------------
1 | # Snapshot report for `test/index.js`
2 |
3 | The actual snapshot is saved in `index.js.snap`.
4 |
5 | Generated by [AVA](https://avajs.dev).
6 |
7 | ## get links from a semantic markup
8 |
9 | > Snapshot 1
10 |
11 | [
12 | {
13 | uri: 'https://google.com/',
14 | url: 'https://google.com/',
15 | value: 'https://google.com',
16 | },
17 | {
18 | uri: 'https://facebook.com/',
19 | url: 'https://facebook.com/',
20 | value: 'https://facebook.com',
21 | },
22 | {
23 | uri: 'mailto://example.com',
24 | url: undefined,
25 | value: 'mailto://kiko@example.com',
26 | },
27 | ]
28 |
29 | ## remove duplicate urls from same tag
30 |
31 | > Snapshot 1
32 |
33 | [
34 | {
35 | uri: 'https://google.com/',
36 | url: 'https://google.com/',
37 | value: 'https://google.com',
38 | },
39 | {
40 | uri: 'https://facebook.com/',
41 | url: 'https://facebook.com/',
42 | value: 'https://facebook.com',
43 | },
44 | ]
45 |
46 | ## remove duplicate urls from different tags
47 |
48 | > Snapshot 1
49 |
50 | [
51 | {
52 | uri: 'https://google.com/',
53 | url: 'https://google.com/',
54 | value: 'https://google.com',
55 | },
56 | ]
57 |
58 | ## non remove duplicate urls from same tag
59 |
60 | > Snapshot 1
61 |
62 | [
63 | {
64 | uri: 'https://google.com/',
65 | url: 'https://google.com/',
66 | value: 'https://google.com',
67 | },
68 | {
69 | uri: 'https://google.com/',
70 | url: 'https://google.com/',
71 | value: 'https://google.com',
72 | },
73 | {
74 | uri: 'https://facebook.com/',
75 | url: 'https://facebook.com/',
76 | value: 'https://facebook.com',
77 | },
78 | ]
79 |
80 | ## non remove duplicate urls from different tags
81 |
82 | > Snapshot 1
83 |
84 | [
85 | {
86 | uri: 'https://google.com/',
87 | url: 'https://google.com/',
88 | value: 'https://google.com',
89 | },
90 | {
91 | uri: 'https://google.com/',
92 | url: 'https://google.com/',
93 | value: 'https://google.com',
94 | },
95 | ]
96 |
97 | ## normalize trailing slash
98 |
99 | > Snapshot 1
100 |
101 | [
102 | {
103 | uri: 'https://google.com/',
104 | url: 'https://google.com/',
105 | value: 'https://google.com/',
106 | },
107 | {
108 | uri: 'https://facebook.com/',
109 | url: 'https://facebook.com/',
110 | value: 'https://facebook.com',
111 | },
112 | ]
113 |
114 | ## normalize wwww
115 |
116 | > Snapshot 1
117 |
118 | [
119 | {
120 | uri: 'https://www.google.com/',
121 | url: 'https://www.google.com/',
122 | value: 'https://www.google.com',
123 | },
124 | {
125 | uri: 'https://google.com/',
126 | url: 'https://google.com/',
127 | value: 'https://google.com',
128 | },
129 | {
130 | uri: 'https://facebook.com/',
131 | url: 'https://facebook.com/',
132 | value: 'https://facebook.com',
133 | },
134 | ]
135 |
136 | ## normalize query string parameters
137 |
138 | > Snapshot 1
139 |
140 | [
141 | {
142 | uri: 'https://google.com/?hello=world&foo=bar',
143 | url: 'https://google.com/?hello=world&foo=bar',
144 | value: 'https://google.com?hello=world&foo=bar',
145 | },
146 | {
147 | uri: 'https://google.com/?foo=bar&hello=world',
148 | url: 'https://google.com/?foo=bar&hello=world',
149 | value: 'https://google.com?foo=bar&hello=world',
150 | },
151 | {
152 | uri: 'https://facebook.com/',
153 | url: 'https://facebook.com/',
154 | value: 'https://facebook.com',
155 | },
156 | ]
157 |
158 | ## ignore invalid URLs
159 |
160 | > Snapshot 1
161 |
162 | [
163 | {
164 | uri: undefined,
165 | url: undefined,
166 | value: 'http://',
167 | },
168 | ]
169 |
--------------------------------------------------------------------------------
/test/snapshots/index.js.snap:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kikobeats/html-urls/03692f85e49a0e402123c6161db4035f7bed1d41/test/snapshots/index.js.snap
--------------------------------------------------------------------------------
/test/snapshots/tags.js.md:
--------------------------------------------------------------------------------
1 | # Snapshot report for `test/tags.js`
2 |
3 | The actual snapshot is saved in `tags.js.snap`.
4 |
5 | Generated by [AVA](https://avajs.dev).
6 |
7 | ## form (action)
8 |
9 | > Snapshot 1
10 |
11 | [
12 | {
13 | uri: 'https://example.com/',
14 | url: 'https://example.com/',
15 | value: 'https://example.com',
16 | },
17 | ]
18 |
19 | ## blockquote (cite)
20 |
21 | > Snapshot 1
22 |
23 | [
24 | {
25 | uri: 'https://example.com/',
26 | url: 'https://example.com/',
27 | value: 'https://example.com',
28 | },
29 | ]
30 |
31 | ## del (cite)
32 |
33 | > Snapshot 1
34 |
35 | [
36 | {
37 | uri: 'https://example.com/',
38 | url: 'https://example.com/',
39 | value: 'https://example.com',
40 | },
41 | ]
42 |
43 | ## ins (cite)
44 |
45 | > Snapshot 1
46 |
47 | [
48 | {
49 | uri: 'https://example.com/',
50 | url: 'https://example.com/',
51 | value: 'https://example.com',
52 | },
53 | ]
54 |
55 | ## q (cite)
56 |
57 | > Snapshot 1
58 |
59 | [
60 | {
61 | uri: 'https://example.com/',
62 | url: 'https://example.com/',
63 | value: 'https://example.com',
64 | },
65 | ]
66 |
67 | ## object (data)
68 |
69 | > Snapshot 1
70 |
71 | [
72 | {
73 | uri: 'https://example.com/',
74 | url: 'https://example.com/',
75 | value: 'https://example.com',
76 | },
77 | ]
78 |
79 | ## button (formaction)
80 |
81 | > Snapshot 1
82 |
83 | [
84 | {
85 | uri: 'https://example.com/',
86 | url: 'https://example.com/',
87 | value: 'https://example.com',
88 | },
89 | ]
90 |
91 | ## input (formaction)
92 |
93 | > Snapshot 1
94 |
95 | [
96 | {
97 | uri: 'https://example.com/',
98 | url: 'https://example.com/',
99 | value: 'https://example.com',
100 | },
101 | ]
102 |
103 | ## a (href)
104 |
105 | > Snapshot 1
106 |
107 | [
108 | {
109 | uri: 'https://example.com/',
110 | url: 'https://example.com/',
111 | value: 'https://example.com',
112 | },
113 | ]
114 |
115 | ## area (href)
116 |
117 | > Snapshot 1
118 |
119 | [
120 | {
121 | uri: 'https://example.com/',
122 | url: 'https://example.com/',
123 | value: 'https://example.com',
124 | },
125 | ]
126 |
127 | ## base (href)
128 |
129 | > Snapshot 1
130 |
131 | [
132 | {
133 | uri: 'https://example.com/',
134 | url: 'https://example.com/',
135 | value: 'https://example.com',
136 | },
137 | ]
138 |
139 | ## link (href)
140 |
141 | > Snapshot 1
142 |
143 | [
144 | {
145 | uri: 'https://example.com/',
146 | url: 'https://example.com/',
147 | value: 'https://example.com',
148 | },
149 | ]
150 |
151 | ## menuitem (icon)
152 |
153 | > Snapshot 1
154 |
155 | [
156 | {
157 | uri: 'https://example.com/',
158 | url: 'https://example.com/',
159 | value: 'https://example.com',
160 | },
161 | ]
162 |
163 | ## html (manifest)
164 |
165 | > Snapshot 1
166 |
167 | [
168 | {
169 | uri: 'https://example.com/',
170 | url: 'https://example.com/',
171 | value: 'https://example.com',
172 | },
173 | ]
174 |
175 | ## a (ping)
176 |
177 | > Snapshot 1
178 |
179 | [
180 | {
181 | uri: 'https://example.com/',
182 | url: 'https://example.com/',
183 | value: 'https://example.com',
184 | },
185 | ]
186 |
187 | ## area (ping)
188 |
189 | > Snapshot 1
190 |
191 | [
192 | {
193 | uri: 'https://example.com/',
194 | url: 'https://example.com/',
195 | value: 'https://example.com',
196 | },
197 | ]
198 |
199 | ## video (poster)
200 |
201 | > Snapshot 1
202 |
203 | [
204 | {
205 | uri: 'https://example.com/',
206 | url: 'https://example.com/',
207 | value: 'https://example.com',
208 | },
209 | ]
210 |
211 | ## audio (src)
212 |
213 | > Snapshot 1
214 |
215 | [
216 | {
217 | uri: 'https://example.com/',
218 | url: 'https://example.com/',
219 | value: 'https://example.com',
220 | },
221 | ]
222 |
223 | ## embed (src)
224 |
225 | > Snapshot 1
226 |
227 | [
228 | {
229 | uri: 'https://example.com/',
230 | url: 'https://example.com/',
231 | value: 'https://example.com',
232 | },
233 | ]
234 |
235 | ## iframe (src)
236 |
237 | > Snapshot 1
238 |
239 | [
240 | {
241 | uri: 'https://example.com/',
242 | url: 'https://example.com/',
243 | value: 'https://example.com',
244 | },
245 | ]
246 |
247 | ## img (src)
248 |
249 | > Snapshot 1
250 |
251 | [
252 | {
253 | uri: 'https://example.com/',
254 | url: 'https://example.com/',
255 | value: 'https://example.com',
256 | },
257 | ]
258 |
259 | ## input (src)
260 |
261 | > Snapshot 1
262 |
263 | [
264 | {
265 | uri: 'https://example.com/',
266 | url: 'https://example.com/',
267 | value: 'https://example.com',
268 | },
269 | ]
270 |
271 | ## script (src)
272 |
273 | > Snapshot 1
274 |
275 | [
276 | {
277 | uri: 'https://example.com/',
278 | url: 'https://example.com/',
279 | value: 'https://example.com',
280 | },
281 | ]
282 |
283 | ## source (src)
284 |
285 | > Snapshot 1
286 |
287 | [
288 | {
289 | uri: 'https://example.com/',
290 | url: 'https://example.com/',
291 | value: 'https://example.com',
292 | },
293 | ]
294 |
295 | ## track (src)
296 |
297 | > Snapshot 1
298 |
299 | [
300 | {
301 | uri: 'https://example.com/',
302 | url: 'https://example.com/',
303 | value: 'https://example.com',
304 | },
305 | ]
306 |
307 | ## video (src)
308 |
309 | > Snapshot 1
310 |
311 | [
312 | {
313 | uri: 'https://example.com/',
314 | url: 'https://example.com/',
315 | value: 'https://example.com',
316 | },
317 | ]
318 |
--------------------------------------------------------------------------------
/test/snapshots/tags.js.snap:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kikobeats/html-urls/03692f85e49a0e402123c6161db4035f7bed1d41/test/snapshots/tags.js.snap
--------------------------------------------------------------------------------
/test/tags.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | const { forEach } = require('lodash')
4 | const test = require('ava')
5 |
6 | const getLinks = require('..')
7 |
8 | const { TAGS } = getLinks
9 |
10 | forEach(TAGS, (tags, attributeName) => {
11 | forEach(tags, tag => {
12 | test(`${tag} (${attributeName})`, t => {
13 | const html = `
14 |
15 |
16 |
17 |
18 |
19 |
20 | hello world
21 | <${tag} ${attributeName}="https://example.com">
22 |
23 |
24 |
25 |
26 | `
27 | const url = 'https://example.com'
28 | t.snapshot(getLinks({ html, url }))
29 | })
30 | })
31 | })
32 |
--------------------------------------------------------------------------------
/test/whitelist.js:
--------------------------------------------------------------------------------
1 | 'use strict'
2 |
3 | const test = require('ava')
4 |
5 | const getLinks = require('..')
6 |
7 | const { generateHtml } = require('./helpers')
8 |
9 | test('exclude exact match from whitelist', t => {
10 | const urls = [
11 | 'https://indiehackers.com/images/favicons/favicon',
12 | 'https://www.indiehackers.com/forum/introduce-yourself-january-2018-411d4f5173',
13 | 'https://indiehackers.com/assets/indie-hackers-12c4cfc88599dcf564ce2d9f226133.css',
14 | 'https://indiehackers.com/feed.xml',
15 | 'https://indiehackers.com'
16 | ]
17 |
18 | const html = generateHtml({ urls })
19 | const whitelist = ['https://indiehackers.com/']
20 | const htmlUrls = getLinks({ html, whitelist }).map(({ uri }) => uri)
21 |
22 | t.deepEqual(htmlUrls, [
23 | 'https://indiehackers.com/images/favicons/favicon',
24 | 'https://www.indiehackers.com/forum/introduce-yourself-january-2018-411d4f5173',
25 | 'https://indiehackers.com/assets/indie-hackers-12c4cfc88599dcf564ce2d9f226133.css',
26 | 'https://indiehackers.com/feed.xml'
27 | ])
28 | })
29 |
30 | test('exclude pattern from whitelist', t => {
31 | const urls = [
32 | 'https://indiehackers.com/images/favicons/favicon',
33 | 'https://www.indiehackers.com/forum/introduce-yourself-january-2018-411d4f5173',
34 | 'https://indiehackers.com/assets/indie-hackers-12c4cfc88599dcf564ce2d9f226133.css',
35 | 'https://indiehackers.com/feed.xml',
36 | 'https://indiehackers.com'
37 | ]
38 |
39 | const html = generateHtml({ urls })
40 | const whitelist = ['https://indiehackers.com*']
41 | const htmlUrls = getLinks({ html, whitelist }).map(({ uri }) => uri)
42 |
43 | t.deepEqual(htmlUrls, [
44 | 'https://www.indiehackers.com/forum/introduce-yourself-january-2018-411d4f5173'
45 | ])
46 | })
47 |
48 | test('exclude multiple pattern from whitelist', t => {
49 | const urls = [
50 | 'https://indiehackers.com/images/favicons/favicon',
51 | 'https://www.indiehackers.com/forum/introduce-yourself-january-2018-411d4f5173',
52 | 'https://indiehackers.com/assets/indie-hackers-12c4cfc88599dcf564ce2d9f226133.css',
53 | 'https://indiehackers.com/feed.xml',
54 | 'https://indiehackers.com'
55 | ]
56 |
57 | const html = generateHtml({ urls })
58 | const whitelist = ['https://indiehackers.com*', 'https://www.indiehackers.com*']
59 | const htmlUrls = getLinks({ html, whitelist }).map(({ uri }) => uri)
60 |
61 | t.deepEqual(htmlUrls, [])
62 | })
63 |
--------------------------------------------------------------------------------