├── .eslintrc.json ├── .gitattributes ├── .github ├── FUNDING.yml ├── dependabot.yml ├── issue_template.md └── workflows │ ├── benchmark.yml │ ├── ci.yml │ ├── codeql.yml │ ├── dependabot-automerge.yml │ ├── lint.yml │ ├── site.yml │ └── sponsors.yml ├── .gitignore ├── .husky ├── .gitignore └── pre-commit ├── CONTRIBUTING.md ├── LICENSE ├── Readme.md ├── SECURITY.md ├── benchmark ├── benchmark.ts └── documents │ └── jquery.html ├── package-lock.json ├── package.json ├── scripts └── fetch-sponsors.mts ├── src ├── __fixtures__ │ └── fixtures.ts ├── __tests__ │ ├── deprecated.spec.ts │ └── xml.spec.ts ├── api │ ├── attributes.spec.ts │ ├── attributes.ts │ ├── css.spec.ts │ ├── css.ts │ ├── extract.spec.ts │ ├── extract.ts │ ├── forms.spec.ts │ ├── forms.ts │ ├── manipulation.spec.ts │ ├── manipulation.ts │ ├── traversing.spec.ts │ └── traversing.ts ├── cheerio.spec.ts ├── cheerio.ts ├── index-browser.mts ├── index.spec.ts ├── index.ts ├── load-parse.ts ├── load.spec.ts ├── load.ts ├── options.ts ├── parse.spec.ts ├── parse.ts ├── parsers │ └── parse5-adapter.ts ├── slim.ts ├── static.spec.ts ├── static.ts ├── types.ts ├── utils.spec.ts └── utils.ts ├── tsconfig.json ├── tsconfig.typedoc.json ├── vitest.config.ts └── website ├── .eslintrc.json ├── README.md ├── babel.config.js ├── blog ├── 2023-02-13-new-website.md ├── 2024-08-07-version-1.md └── authors.yml ├── crowdin.yml ├── docs ├── advanced │ ├── _category_.json │ ├── configuring-cheerio.md │ ├── extending-cheerio.md │ └── extract.md ├── basics │ ├── _category_.json │ ├── loading.md │ ├── manipulation.md │ ├── selecting.md │ └── traversing.md └── intro.md ├── docusaurus.config.js ├── package-lock.json ├── package.json ├── sponsors.json ├── src ├── components │ ├── HomepageFeatures.tsx │ ├── HomepageSponsors.module.css │ ├── HomepageSponsors.tsx │ └── HomepageTweets.tsx ├── css │ └── custom.css ├── pages │ ├── attribution.mdx │ └── index.tsx └── theme │ └── ReactLiveScope │ └── index.tsx ├── static ├── fonts │ ├── inter.woff │ └── rubik.woff └── img │ ├── 1F496.svg │ ├── 1F57A.svg │ ├── 26A1.svg │ ├── favicon.ico │ ├── orange-c-animated.svg │ └── orange-c.svg └── tsconfig.json /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "plugins": ["jsdoc"], 3 | "extends": [ 4 | "eslint:recommended", 5 | "plugin:jsdoc/recommended", 6 | "plugin:n/recommended", 7 | "plugin:unicorn/recommended", 8 | "prettier" 9 | ], 10 | "env": { "node": true }, 11 | "rules": { 12 | // Ensures array methods like .map() and .forEach() have return statements 13 | "array-callback-return": [ 14 | 2, 15 | { 16 | "allowImplicit": true 17 | } 18 | ], 19 | // Disallows `if` statements as the only statement in an `else` block 20 | "no-lonely-if": 2, 21 | "no-proto": 2, 22 | "eqeqeq": [2, "smart"], 23 | "no-caller": 2, 24 | // Encourages dot notation instead of brackets for property access 25 | "dot-notation": 2, 26 | "no-var": 2, 27 | "prefer-const": 2, 28 | "prefer-arrow-callback": [2, { "allowNamedFunctions": true }], 29 | "arrow-body-style": [2, "as-needed"], 30 | "object-shorthand": 2, 31 | "prefer-template": 2, 32 | "one-var": [2, "never"], 33 | "prefer-destructuring": [2, { "object": true }], 34 | // Ensures comments start with a capital letter 35 | "capitalized-comments": 2, 36 | // Enforces a consistent style for multiline comments 37 | "multiline-comment-style": [2, "starred-block"], 38 | "spaced-comment": 2, 39 | "yoda": [2, "never"], 40 | // Requires curly braces for multi-line control statements 41 | "curly": [2, "multi-line"], 42 | 43 | "no-else-return": [ 44 | 2, 45 | { 46 | "allowElseIf": false 47 | } 48 | ], 49 | "no-unused-expressions": 2, 50 | "no-useless-call": 2, 51 | "no-use-before-define": [2, "nofunc"], 52 | "no-constant-binary-expression": 2, 53 | "no-void": 2, 54 | 55 | "jsdoc/require-jsdoc": 0, 56 | "jsdoc/check-tag-names": 2, 57 | "jsdoc/tag-lines": [2, "any", { "startLines": 1 }], 58 | "jsdoc/require-description-complete-sentence": 2, 59 | "jsdoc/require-hyphen-before-param-description": 2, 60 | "jsdoc/require-param-description": 2, 61 | "jsdoc/require-param-name": 2, 62 | "jsdoc/require-param-type": 0, 63 | "jsdoc/require-returns-type": 0, 64 | "jsdoc/no-types": 2, 65 | "jsdoc/valid-types": 2, 66 | 67 | "n/file-extension-in-import": [2, "always"], 68 | "n/no-missing-import": 0, 69 | 70 | "unicorn/no-null": 0, 71 | "unicorn/prevent-abbreviations": 0, 72 | "unicorn/prefer-code-point": 0, 73 | "unicorn/no-for-loop": 0, 74 | "unicorn/no-array-callback-reference": 0, 75 | "unicorn/prefer-spread": 0, 76 | "unicorn/no-useless-undefined": 0, 77 | "unicorn/no-array-reduce": 0, 78 | "unicorn/prefer-array-find": 0, 79 | "unicorn/prefer-module": 0, 80 | "unicorn/prefer-at": 0, 81 | "unicorn/prefer-string-replace-all": 0, 82 | "unicorn/prefer-switch": [2, { "emptyDefaultCase": "do-nothing-comment" }] 83 | }, 84 | "settings": { 85 | "jsdoc": { 86 | "mode": "typescript", 87 | "tagNamePreference": { 88 | "category": "category" 89 | } 90 | } 91 | }, 92 | "overrides": [ 93 | { 94 | "files": "*.ts", 95 | "extends": [ 96 | "plugin:@typescript-eslint/eslint-recommended", 97 | "plugin:@typescript-eslint/recommended-type-checked", 98 | "plugin:@typescript-eslint/stylistic-type-checked", 99 | "prettier" 100 | ], 101 | "parserOptions": { 102 | "sourceType": "module", 103 | "project": "./tsconfig.json" 104 | }, 105 | "rules": { 106 | "dot-notation": 0, 107 | "curly": [2, "multi-line"], 108 | "n/no-unsupported-features/es-syntax": 0, 109 | 110 | "jsdoc/require-returns-check": 0, // Broken with overloaded fns 111 | 112 | "@typescript-eslint/prefer-for-of": 0, 113 | "@typescript-eslint/member-ordering": 0, 114 | "@typescript-eslint/explicit-function-return-type": 0, 115 | "@typescript-eslint/no-unused-vars": 0, 116 | "no-use-before-define": 0, 117 | "@typescript-eslint/no-use-before-define": [2, { "functions": false }], 118 | "@typescript-eslint/consistent-type-definitions": [2, "interface"], 119 | "@typescript-eslint/prefer-function-type": 2, 120 | "@typescript-eslint/no-unnecessary-type-arguments": 2, 121 | "@typescript-eslint/prefer-string-starts-ends-with": 2, 122 | "@typescript-eslint/prefer-readonly": 2, 123 | "@typescript-eslint/prefer-includes": 2, 124 | "@typescript-eslint/switch-exhaustiveness-check": 2, 125 | "@typescript-eslint/prefer-nullish-coalescing": 2, 126 | "@typescript-eslint/no-non-null-assertion": 1, 127 | "@typescript-eslint/consistent-type-imports": 2, 128 | 129 | "@typescript-eslint/no-explicit-any": 1 // TODO 130 | } 131 | }, 132 | { 133 | "files": "*.spec.ts", 134 | "extends": "plugin:vitest/legacy-recommended", 135 | "rules": { 136 | "n/no-unpublished-import": 0, 137 | "@typescript-eslint/no-explicit-any": 0 138 | } 139 | } 140 | ] 141 | } 142 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Enforce Unix newlines 2 | * text=auto eol=lf 3 | 4 | benchmark/documents/* binary 5 | benchmark/jquery*.js binary 6 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: [cheeriojs, fb55] 2 | open_collective: cheerio 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: npm 4 | directory: '/' 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | versioning-strategy: increase 9 | - package-ecosystem: npm 10 | directory: '/website' 11 | schedule: 12 | interval: daily 13 | open-pull-requests-limit: 4 14 | versioning-strategy: increase 15 | - package-ecosystem: 'github-actions' 16 | directory: '/' 17 | schedule: 18 | interval: daily 19 | -------------------------------------------------------------------------------- /.github/issue_template.md: -------------------------------------------------------------------------------- 1 | 10 | -------------------------------------------------------------------------------- /.github/workflows/benchmark.yml: -------------------------------------------------------------------------------- 1 | name: Benchmark 2 | 3 | on: 4 | push: 5 | branches-ignore: 6 | - 'dependabot/**' 7 | pull_request: 8 | 9 | env: 10 | FORCE_COLOR: 2 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | benchmark: 17 | runs-on: ubuntu-latest 18 | if: 19 | "!contains(github.event.commits[0].message, '[bench skip]') && 20 | !contains(github.event.commits[0].message, '[skip bench]')" 21 | 22 | steps: 23 | - name: Clone repository 24 | uses: actions/checkout@v4 25 | 26 | - name: Set up Node.js 27 | uses: actions/setup-node@v4.4.0 28 | with: 29 | node-version: lts/* 30 | cache: 'npm' 31 | 32 | - name: Install npm dependencies 33 | run: npm ci 34 | 35 | - name: Run benchmarks 36 | run: npm run benchmark 37 | env: 38 | BENCHMARK: true 39 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches-ignore: 6 | - 'dependabot/**' 7 | pull_request: 8 | 9 | env: 10 | FORCE_COLOR: 2 11 | NODE_COV: lts/* # The Node.js version to run coveralls on 12 | 13 | permissions: 14 | contents: read 15 | 16 | jobs: 17 | run: 18 | permissions: 19 | checks: write # for coverallsapp/github-action to create new checks 20 | contents: read # for actions/checkout to fetch code 21 | name: Node ${{ matrix.node }} 22 | runs-on: ubuntu-latest 23 | 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | node: 28 | - 18 29 | - 20 30 | - 22 31 | - lts/* 32 | 33 | steps: 34 | - name: Clone repository 35 | uses: actions/checkout@v4 36 | 37 | - name: Set up Node.js 38 | uses: actions/setup-node@v4.4.0 39 | with: 40 | node-version: '${{ matrix.node }}' 41 | cache: 'npm' 42 | 43 | - name: Install npm dependencies 44 | run: npm ci 45 | 46 | - name: Run tests 47 | run: npm run test:vi 48 | if: matrix.node != env.NODE_COV 49 | 50 | - name: Run tests with coverage 51 | run: npm run test:vi -- --coverage 52 | if: matrix.node == env.NODE_COV 53 | 54 | - name: Run Coveralls 55 | uses: coverallsapp/github-action@v2.3.6 56 | if: matrix.node == env.NODE_COV 57 | continue-on-error: true 58 | with: 59 | github-token: '${{ secrets.GITHUB_TOKEN }}' 60 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | name: 'CodeQL' 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - '!dependabot/**' 8 | pull_request: 9 | # The branches below must be a subset of the branches above 10 | branches: 11 | - main 12 | - '!dependabot/**' 13 | schedule: 14 | - cron: '0 0 * * 0' 15 | 16 | jobs: 17 | analyze: 18 | name: Analyze 19 | runs-on: ubuntu-latest 20 | permissions: 21 | actions: read 22 | contents: read 23 | security-events: write 24 | 25 | steps: 26 | - name: Checkout repository 27 | uses: actions/checkout@v4 28 | 29 | - name: Initialize CodeQL 30 | uses: github/codeql-action/init@v3 31 | with: 32 | languages: 'javascript' 33 | 34 | - name: Perform CodeQL Analysis 35 | uses: github/codeql-action/analyze@v3 36 | -------------------------------------------------------------------------------- /.github/workflows/dependabot-automerge.yml: -------------------------------------------------------------------------------- 1 | # Based on https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/automating-dependabot-with-github-actions#enable-auto-merge-on-a-pull-request 2 | name: Dependabot auto-merge 3 | on: pull_request_target 4 | 5 | permissions: 6 | pull-requests: write 7 | contents: write 8 | 9 | jobs: 10 | dependabot: 11 | runs-on: ubuntu-latest 12 | if: ${{ github.actor == 'dependabot[bot]' }} 13 | steps: 14 | - name: Dependabot metadata 15 | id: metadata 16 | uses: dependabot/fetch-metadata@v2.4.0 17 | with: 18 | github-token: '${{ secrets.GITHUB_TOKEN }}' 19 | - name: Enable auto-merge for Dependabot PRs 20 | # Automatically merge semver-patch and semver-minor PRs 21 | if: 22 | "${{ steps.metadata.outputs.update-type == 23 | 'version-update:semver-minor' || steps.metadata.outputs.update-type == 24 | 'version-update:semver-patch' }}" 25 | run: gh pr merge --auto --squash "$PR_URL" 26 | env: 27 | PR_URL: ${{github.event.pull_request.html_url}} 28 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 29 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches-ignore: 6 | - 'dependabot/**' 7 | pull_request: 8 | 9 | env: 10 | FORCE_COLOR: 2 11 | 12 | permissions: 13 | contents: read 14 | 15 | jobs: 16 | lint: 17 | runs-on: ubuntu-latest 18 | 19 | steps: 20 | - name: Clone repository 21 | uses: actions/checkout@v4 22 | 23 | - name: Set up Node.js 24 | uses: actions/setup-node@v4.4.0 25 | with: 26 | node-version: lts/* 27 | cache: 'npm' 28 | 29 | - name: Install npm dependencies 30 | run: npm ci 31 | 32 | - name: Run lint 33 | run: npm run lint 34 | -------------------------------------------------------------------------------- /.github/workflows/site.yml: -------------------------------------------------------------------------------- 1 | name: Deploy website to GitHub Pages 2 | 3 | # Based on https://raw.githubusercontent.com/actions/starter-workflows 4 | 5 | on: 6 | # Runs on pushes targeting the main branch 7 | push: 8 | branches: [main] 9 | 10 | # Allows you to run this workflow manually from the Actions tab 11 | workflow_dispatch: 12 | 13 | env: 14 | FORCE_COLOR: 2 15 | 16 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 17 | permissions: 18 | contents: read 19 | pages: write 20 | id-token: write 21 | 22 | # Allow one concurrent deployment 23 | concurrency: 24 | group: 'pages' 25 | cancel-in-progress: true 26 | 27 | jobs: 28 | # Build job 29 | build: 30 | runs-on: ubuntu-latest 31 | steps: 32 | - name: Checkout 33 | uses: actions/checkout@v4 34 | - name: Setup Node 35 | uses: actions/setup-node@v4.4.0 36 | with: 37 | # Use current Node LTS version 38 | node-version: lts/* 39 | cache: 'npm' 40 | - name: Setup Pages 41 | id: pages 42 | uses: actions/configure-pages@v5 43 | - name: Install dependencies 44 | run: npm ci 45 | - name: Build 46 | run: npm run build 47 | - name: Install website dependencies 48 | working-directory: website 49 | run: npm ci 50 | - name: Sync Crowdin 51 | working-directory: website 52 | run: npm run crowdin:sync 53 | continue-on-error: true 54 | env: 55 | CROWDIN_PERSONAL_TOKEN: ${{ secrets.CROWDIN_PERSONAL_TOKEN }} 56 | - name: Build docs 57 | working-directory: website 58 | run: npm run build 59 | - name: Upload artifact 60 | uses: actions/upload-pages-artifact@v3 61 | with: 62 | path: ./website/build 63 | 64 | # Deployment job 65 | deploy: 66 | environment: 67 | name: github-pages 68 | url: ${{ steps.deployment.outputs.page_url }} 69 | runs-on: ubuntu-latest 70 | needs: build 71 | if: ${{github.repository == 'cheeriojs/cheerio'}} 72 | steps: 73 | - name: Deploy to GitHub Pages 74 | id: deployment 75 | uses: actions/deploy-pages@v4 76 | -------------------------------------------------------------------------------- /.github/workflows/sponsors.yml: -------------------------------------------------------------------------------- 1 | name: Update Sponsors 2 | 3 | on: 4 | schedule: 5 | # Run once a day, at 4pm 6 | - cron: '0 16 * * *' 7 | # Allow manual trigger 8 | workflow_dispatch: 9 | 10 | env: 11 | FORCE_COLOR: 2 12 | 13 | permissions: 14 | contents: read 15 | 16 | jobs: 17 | fetch: 18 | permissions: 19 | contents: write # for peter-evans/create-pull-request to create branch 20 | pull-requests: write # for peter-evans/create-pull-request to create a PR 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - name: Clone repository 25 | uses: actions/checkout@v4 26 | 27 | - name: Set up Node.js 28 | uses: actions/setup-node@v4.4.0 29 | with: 30 | node-version: lts/* 31 | cache: 'npm' 32 | 33 | - name: Install npm dependencies 34 | run: npm ci 35 | 36 | - name: Update the README 37 | run: npm run update-sponsors 38 | env: 39 | CHEERIO_SPONSORS_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 40 | IMGIX_TOKEN: ${{ secrets.IMGIX_TOKEN }} 41 | 42 | - name: Create Pull Request 43 | uses: peter-evans/create-pull-request@v7 44 | continue-on-error: true 45 | with: 46 | commit-message: 'docs(readme): Update Sponsors' 47 | title: Update Sponsors 48 | branch: docs/sponsors 49 | delete-branch: true 50 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | npm-debug.log 3 | .DS_Store 4 | .docusaurus 5 | .cache-loader 6 | /coverage 7 | /.tshy 8 | /.tshy-build 9 | /dist 10 | /website/docs/api 11 | /website/build 12 | -------------------------------------------------------------------------------- /.husky/.gitignore: -------------------------------------------------------------------------------- 1 | _ 2 | -------------------------------------------------------------------------------- /.husky/pre-commit: -------------------------------------------------------------------------------- 1 | lint-staged -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Cheerio 2 | 3 | Thanks for your interest in contributing to the project! Here's a rundown of how 4 | we'd like to work with you: 5 | 6 | 1. File an issue on GitHub describing the contribution you'd like to make. This 7 | will help us to get you started on the right foot. 8 | 2. Fork the project, and make your changes in a new branch based off of the 9 | `main` branch: 10 | 1. Follow the project's code style (see below) 11 | 2. Add enough unit tests to "prove" that your patch is correct 12 | 3. Update the project documentation as needed (see below) 13 | 4. Describe your approach with as much detail as necessary in the git 14 | commit message 15 | 3. Open a pull request, and reference the initial issue in the pull request 16 | message. 17 | 18 | # Documentation 19 | 20 | Any API change should be reflected in the project's README.md file. Reuse 21 | [jQuery's documentation](https://api.jquery.com) wherever possible, but take 22 | care to note aspects that make Cheerio distinct. 23 | 24 | # Code Style 25 | 26 | Please make sure commit hooks are run, which will enforce the code style. 27 | 28 | When implementing private functionality that isn't part of the jQuery API, 29 | please opt for: 30 | 31 | - _Static methods_: If the functionality does not require a reference to a 32 | Cheerio instance, simply define a named function within the module it is 33 | needed. 34 | - _Instance methods_: If the functionality requires a reference to a Cheerio 35 | instance, informally define the method as "private" using the following 36 | conventions: 37 | - Define the method as a function on the Cheerio prototype 38 | - Prefix the method name with an underscore (`_`) character 39 | - Include `@api private` in the code comment the documents the method 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 The Cheerio contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 |

cheerio

2 | 3 |
The fast, flexible, and elegant library for parsing and manipulating HTML and XML.
4 | 5 |
6 | 7 | Build Status 8 | 9 | 10 | Coverage 11 | 12 | 13 | OpenCollective backers 14 | 15 | 16 | OpenCollective sponsors 17 | 18 |
19 | 20 |
21 | 22 | [中文文档 (Chinese Readme)](https://github.com/cheeriojs/cheerio/wiki/Chinese-README) 23 | 24 | ```js 25 | import * as cheerio from 'cheerio'; 26 | const $ = cheerio.load('

Hello world

'); 27 | 28 | $('h2.title').text('Hello there!'); 29 | $('h2').addClass('welcome'); 30 | 31 | $.html(); 32 | //=>

Hello there!

33 | ``` 34 | 35 | ## Installation 36 | 37 | Install Cheerio using a package manager like npm, yarn, or bun. 38 | 39 | ```bash 40 | npm install cheerio 41 | # or 42 | bun add cheerio 43 | ``` 44 | 45 | ## Features 46 | 47 | **❤ Proven syntax:** Cheerio implements a subset of core jQuery. Cheerio 48 | removes all the DOM inconsistencies and browser cruft from the jQuery library, 49 | revealing its truly gorgeous API. 50 | 51 | **ϟ Blazingly fast:** Cheerio works with a very simple, consistent DOM 52 | model. As a result parsing, manipulating, and rendering are incredibly 53 | efficient. 54 | 55 | **❁ Incredibly flexible:** Cheerio wraps around 56 | [parse5](https://github.com/inikulin/parse5) for parsing HTML and can optionally 57 | use the forgiving [htmlparser2](https://github.com/fb55/htmlparser2/). Cheerio 58 | can parse nearly any HTML or XML document. Cheerio works in both browser and 59 | server environments. 60 | 61 | ## API 62 | 63 | ### Loading 64 | 65 | First you need to load in the HTML. This step in jQuery is implicit, since 66 | jQuery operates on the one, baked-in DOM. With Cheerio, we need to pass in the 67 | HTML document. 68 | 69 | ```js 70 | // ESM or TypeScript: 71 | import * as cheerio from 'cheerio'; 72 | 73 | // In other environments: 74 | const cheerio = require('cheerio'); 75 | 76 | const $ = cheerio.load(''); 77 | 78 | $.html(); 79 | //=> 80 | ``` 81 | 82 | ### Selectors 83 | 84 | Once you've loaded the HTML, you can use jQuery-style selectors to find elements 85 | within the document. 86 | 87 | #### \$( selector, [context], [root] ) 88 | 89 | `selector` searches within the `context` scope which searches within the `root` 90 | scope. `selector` and `context` can be a string expression, DOM Element, array 91 | of DOM elements, or cheerio object. `root`, if provided, is typically the HTML 92 | document string. 93 | 94 | This selector method is the starting point for traversing and manipulating the 95 | document. Like in jQuery, it's the primary method for selecting elements in the 96 | document. 97 | 98 | ```js 99 | $('.apple', '#fruits').text(); 100 | //=> Apple 101 | 102 | $('ul .pear').attr('class'); 103 | //=> pear 104 | 105 | $('li[class=orange]').html(); 106 | //=> Orange 107 | ``` 108 | 109 | ### Rendering 110 | 111 | When you're ready to render the document, you can call the `html` method on the 112 | "root" selection: 113 | 114 | ```js 115 | $.root().html(); 116 | //=> 117 | // 118 | // 119 | // 124 | // 125 | // 126 | ``` 127 | 128 | If you want to render the 129 | [`outerHTML`](https://developer.mozilla.org/en-US/docs/Web/API/Element/outerHTML) 130 | of a selection, you can use the `outerHTML` prop: 131 | 132 | ```js 133 | $('.pear').prop('outerHTML'); 134 | //=>
  • Pear
  • 135 | ``` 136 | 137 | You may also render the text content of a Cheerio object using the `text` 138 | method: 139 | 140 | ```js 141 | const $ = cheerio.load('This is content.'); 142 | $('body').text(); 143 | //=> This is content. 144 | ``` 145 | 146 | ### The "DOM Node" object 147 | 148 | Cheerio collections are made up of objects that bear some resemblance to 149 | [browser-based DOM nodes](https://developer.mozilla.org/en-US/docs/Web/API/Node). 150 | You can expect them to define the following properties: 151 | 152 | - `tagName` 153 | - `parentNode` 154 | - `previousSibling` 155 | - `nextSibling` 156 | - `nodeValue` 157 | - `firstChild` 158 | - `childNodes` 159 | - `lastChild` 160 | 161 | ## Screencasts 162 | 163 | [https://vimeo.com/31950192](https://vimeo.com/31950192) 164 | 165 | > This video tutorial is a follow-up to Nettut's "How to Scrape Web Pages with 166 | > Node.js and jQuery", using cheerio instead of JSDOM + jQuery. This video shows 167 | > how easy it is to use cheerio and how much faster cheerio is than JSDOM + 168 | > jQuery. 169 | 170 | ## Cheerio in the real world 171 | 172 | Are you using cheerio in production? Add it to the 173 | [wiki](https://github.com/cheeriojs/cheerio/wiki/Cheerio-in-Production)! 174 | 175 | ## Sponsors 176 | 177 | Does your company use Cheerio in production? Please consider 178 | [sponsoring this project](https://github.com/cheeriojs/cheerio?sponsor=1)! Your 179 | help will allow maintainers to dedicate more time and resources to its 180 | development and support. 181 | 182 | **Headlining Sponsors** 183 | 184 | 185 | 186 | 187 | Tidelift 188 | 189 | 190 | Github 191 | 192 | 193 | AirBnB 194 | 195 | 196 | 197 | 198 | **Other Sponsors** 199 | 200 | 201 | 202 | 203 | Ігрові автомати 204 | 205 | 206 | OnlineCasinosSpelen 207 | 208 | 209 | CasinoZonderRegistratie.net 210 | 211 | 212 | Nieuwe-Casinos.net 213 | 214 | 215 | 216 | 217 | ## Backers 218 | 219 | [Become a backer](https://github.com/cheeriojs/cheerio?sponsor=1) to show your 220 | support for Cheerio and help us maintain and improve this open source project. 221 | 222 | 223 | 224 | 225 | Vasy Kafidoff 226 | 227 | 228 | Espen Klem 229 | 230 | 231 | 232 | 233 | ## License 234 | 235 | MIT 236 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Only the latest release will receive security updates. 6 | 7 | ## Reporting a Vulnerability 8 | 9 | To report a security vulnerability, please use the 10 | [Tidelift security contact](https://tidelift.com/security). Tidelift will 11 | coordinate the fix and disclosure. 12 | -------------------------------------------------------------------------------- /benchmark/benchmark.ts: -------------------------------------------------------------------------------- 1 | import fs from 'node:fs/promises'; 2 | import { Script } from 'node:vm'; 3 | import { Bench } from 'tinybench'; 4 | import type { Element } from 'domhandler'; 5 | import type { Cheerio } from '../src/cheerio.js'; 6 | import type { CheerioAPI } from '../src/load.js'; 7 | import { JSDOM } from 'jsdom'; 8 | import { load } from '../src/load-parse.js'; 9 | 10 | const documentDir = new URL('documents/', import.meta.url); 11 | const jQuerySrc = await fs.readFile( 12 | new URL('../node_modules/jquery/dist/jquery.slim.js', import.meta.url), 13 | 'utf8', 14 | ); 15 | const jQueryScript = new Script(jQuerySrc); 16 | const filterIndex = process.argv.indexOf('--filter') + 1; 17 | const benchmarkFilter = filterIndex >= 0 ? process.argv[filterIndex] : ''; 18 | 19 | const cheerioOnly = process.argv.includes('--cheerio-only'); 20 | 21 | type SuiteOptions = T extends void 22 | ? { 23 | test(this: void, $: CheerioAPI): void; 24 | setup?: (this: void, $: CheerioAPI) => T; 25 | } 26 | : { 27 | test(this: void, $: CheerioAPI, data: T): void; 28 | setup(this: void, $: CheerioAPI): T; 29 | }; 30 | 31 | async function benchmark( 32 | name: string, 33 | fileName: string, 34 | options: SuiteOptions, 35 | ): Promise { 36 | if (!name.includes(benchmarkFilter)) { 37 | return; 38 | } 39 | const markup = await fs.readFile(new URL(fileName, documentDir), 'utf8'); 40 | 41 | console.log(`Test: ${name} (file: ${fileName})`); 42 | 43 | const bench = new Bench(); 44 | const { test, setup } = options; 45 | 46 | // Add Cheerio test 47 | const $ = load(markup); 48 | const setupData = setup?.($) as T; 49 | 50 | bench.add('cheerio', () => { 51 | test($, setupData); 52 | }); 53 | 54 | // Add JSDOM test 55 | if (!cheerioOnly) { 56 | const dom = new JSDOM(markup, { runScripts: 'outside-only' }); 57 | 58 | jQueryScript.runInContext(dom.getInternalVMContext()); 59 | 60 | const setupData = setup?.(dom.window['$'] as CheerioAPI) as T; 61 | 62 | bench.add('jsdom', () => test(dom.window['$'] as CheerioAPI, setupData)); 63 | } 64 | 65 | await bench.run(); 66 | 67 | console.table(bench.table()); 68 | } 69 | 70 | await benchmark('Select all', 'jquery.html', { 71 | test: ($) => $('*').length, 72 | }); 73 | await benchmark('Select some', 'jquery.html', { 74 | test: ($) => $('li').length, 75 | }); 76 | 77 | /* 78 | * Manipulation Tests 79 | */ 80 | const DIVS_MARKUP = '
    '.repeat(50); 81 | await benchmark>('manipulation - append', 'jquery.html', { 82 | setup: ($) => $('body'), 83 | test: (_, $body) => $body.append(DIVS_MARKUP), 84 | }); 85 | 86 | // JSDOM used to run out of memory on these tests 87 | await benchmark>( 88 | 'manipulation - prepend - highmem', 89 | 'jquery.html', 90 | { 91 | setup: ($) => $('body'), 92 | test: (_, $body) => $body.prepend(DIVS_MARKUP), 93 | }, 94 | ); 95 | await benchmark>( 96 | 'manipulation - after - highmem', 97 | 'jquery.html', 98 | { 99 | setup: ($) => $('body'), 100 | test: (_, $body) => $body.after(DIVS_MARKUP), 101 | }, 102 | ); 103 | await benchmark>( 104 | 'manipulation - before - highmem', 105 | 'jquery.html', 106 | { 107 | setup: ($) => $('body'), 108 | test: (_, $body) => $body.before(DIVS_MARKUP), 109 | }, 110 | ); 111 | 112 | await benchmark>('manipulation - remove', 'jquery.html', { 113 | setup: ($) => $('body'), 114 | test($, $lis) { 115 | const child = $('
    '); 116 | $lis.append(child); 117 | child.remove(); 118 | }, 119 | }); 120 | 121 | await benchmark('manipulation - replaceWith', 'jquery.html', { 122 | setup($) { 123 | $('body').append('
    '); 124 | }, 125 | test($) { 126 | $('#foo').replaceWith('
    '); 127 | }, 128 | }); 129 | 130 | await benchmark>('manipulation - empty', 'jquery.html', { 131 | setup: ($) => $('li'), 132 | test(_, $lis) { 133 | $lis.empty(); 134 | }, 135 | }); 136 | await benchmark>('manipulation - html', 'jquery.html', { 137 | setup: ($) => $('li'), 138 | test(_, $lis) { 139 | $lis.html(); 140 | $lis.html('foo'); 141 | }, 142 | }); 143 | await benchmark>('manipulation - html render', 'jquery.html', { 144 | setup: ($) => $('body'), 145 | test(_, $lis) { 146 | $lis.html(); 147 | }, 148 | }); 149 | 150 | const HTML_INDEPENDENT_MARKUP = 151 | '
    bat
    baz
    '.repeat(6); 152 | await benchmark('manipulation - html independent', 'jquery.html', { 153 | test: ($) => $(HTML_INDEPENDENT_MARKUP).html(), 154 | }); 155 | await benchmark>('manipulation - text', 'jquery.html', { 156 | setup: ($) => $('li'), 157 | test(_, $lis) { 158 | $lis.text(); 159 | $lis.text('foo'); 160 | }, 161 | }); 162 | 163 | /* 164 | * Traversing Tests 165 | */ 166 | await benchmark>('traversing - Find', 'jquery.html', { 167 | setup: ($) => $('li'), 168 | test: (_, $lis) => $lis.find('li').length, 169 | }); 170 | await benchmark>('traversing - Parent', 'jquery.html', { 171 | setup: ($) => $('li'), 172 | test: (_, $lis) => $lis.parent('div').length, 173 | }); 174 | await benchmark>('traversing - Parents', 'jquery.html', { 175 | setup: ($) => $('li'), 176 | test: (_, $lis) => $lis.parents('div').length, 177 | }); 178 | await benchmark>('traversing - Closest', 'jquery.html', { 179 | setup: ($) => $('li'), 180 | test: (_, $lis) => $lis.closest('div').length, 181 | }); 182 | await benchmark>('traversing - next', 'jquery.html', { 183 | setup: ($) => $('li'), 184 | test: (_, $lis) => $lis.next().length, 185 | }); 186 | await benchmark>('traversing - nextAll', 'jquery.html', { 187 | setup: ($) => $('li'), 188 | test: (_, $lis) => $lis.nextAll('li').length, 189 | }); 190 | await benchmark>('traversing - nextUntil', 'jquery.html', { 191 | setup: ($) => $('li'), 192 | test: (_, $lis) => $lis.nextUntil('li').length, 193 | }); 194 | await benchmark>('traversing - prev', 'jquery.html', { 195 | setup: ($) => $('li'), 196 | test: (_, $lis) => $lis.prev().length, 197 | }); 198 | await benchmark>('traversing - prevAll', 'jquery.html', { 199 | setup: ($) => $('li'), 200 | test: (_, $lis) => $lis.prevAll('li').length, 201 | }); 202 | await benchmark>('traversing - prevUntil', 'jquery.html', { 203 | setup: ($) => $('li'), 204 | test: (_, $lis) => $lis.prevUntil('li').length, 205 | }); 206 | await benchmark>('traversing - siblings', 'jquery.html', { 207 | setup: ($) => $('li'), 208 | test: (_, $lis) => $lis.siblings('li').length, 209 | }); 210 | await benchmark>('traversing - Children', 'jquery.html', { 211 | setup: ($) => $('li'), 212 | test: (_, $lis) => $lis.children('a').length, 213 | }); 214 | await benchmark>('traversing - Filter', 'jquery.html', { 215 | setup: ($) => $('li'), 216 | test: (_, $lis) => $lis.filter('li').length, 217 | }); 218 | await benchmark>('traversing - First', 'jquery.html', { 219 | setup: ($) => $('li'), 220 | test: (_, $lis) => $lis.first().first().length, 221 | }); 222 | await benchmark>('traversing - Last', 'jquery.html', { 223 | setup: ($) => $('li'), 224 | test: (_, $lis) => $lis.last().last().length, 225 | }); 226 | await benchmark>('traversing - Eq', 'jquery.html', { 227 | setup: ($) => $('li'), 228 | test: (_, $lis) => $lis.eq(0).eq(0).length, 229 | }); 230 | 231 | /* 232 | * Attributes Tests 233 | */ 234 | await benchmark>('attributes - Attributes', 'jquery.html', { 235 | setup: ($) => $('li'), 236 | test(_, $lis) { 237 | $lis.attr('foo', 'bar'); 238 | $lis.attr('foo'); 239 | $lis.removeAttr('foo'); 240 | }, 241 | }); 242 | await benchmark>( 243 | 'attributes - Single Attribute', 244 | 'jquery.html', 245 | { 246 | setup: ($) => $('body'), 247 | test(_, $lis) { 248 | $lis.attr('foo', 'bar'); 249 | $lis.attr('foo'); 250 | $lis.removeAttr('foo'); 251 | }, 252 | }, 253 | ); 254 | await benchmark>('attributes - Data', 'jquery.html', { 255 | setup: ($) => $('li'), 256 | test(_, $lis) { 257 | $lis.data('foo', 'bar'); 258 | $lis.data('foo'); 259 | }, 260 | }); 261 | await benchmark>('attributes - Val', 'jquery.html', { 262 | setup: ($) => $('select,input,textarea,option'), 263 | test($, $lis) { 264 | $lis.each(function () { 265 | $(this).val(); 266 | $(this).val('foo'); 267 | }); 268 | }, 269 | }); 270 | 271 | await benchmark>('attributes - Has class', 'jquery.html', { 272 | setup: ($) => $('li'), 273 | test: (_, $lis) => $lis.hasClass('foo'), 274 | }); 275 | await benchmark>('attributes - Toggle class', 'jquery.html', { 276 | setup: ($) => $('li'), 277 | test: (_, $lis) => $lis.toggleClass('foo'), 278 | }); 279 | await benchmark>( 280 | 'attributes - Add Remove class', 281 | 'jquery.html', 282 | { 283 | setup: ($) => $('li'), 284 | test(_, $lis) { 285 | $lis.addClass('foo'); 286 | $lis.removeClass('foo'); 287 | }, 288 | }, 289 | ); 290 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cheerio", 3 | "version": "1.0.0", 4 | "description": "The fast, flexible & elegant library for parsing and manipulating HTML and XML.", 5 | "keywords": [ 6 | "htmlparser", 7 | "jquery", 8 | "selector", 9 | "scraper", 10 | "parser", 11 | "dom", 12 | "xml", 13 | "html" 14 | ], 15 | "homepage": "https://cheerio.js.org/", 16 | "bugs": { 17 | "url": "https://github.com/cheeriojs/cheerio/issues" 18 | }, 19 | "repository": { 20 | "type": "git", 21 | "url": "git://github.com/cheeriojs/cheerio.git" 22 | }, 23 | "funding": "https://github.com/cheeriojs/cheerio?sponsor=1", 24 | "license": "MIT", 25 | "author": "Matt Mueller ", 26 | "maintainers": [ 27 | "Felix Boehm " 28 | ], 29 | "type": "module", 30 | "exports": { 31 | ".": { 32 | "browser": { 33 | "types": "./dist/browser/index.d.ts", 34 | "default": "./dist/browser/index.js" 35 | }, 36 | "import": { 37 | "types": "./dist/esm/index.d.ts", 38 | "default": "./dist/esm/index.js" 39 | }, 40 | "require": { 41 | "types": "./dist/commonjs/index.d.ts", 42 | "default": "./dist/commonjs/index.js" 43 | } 44 | }, 45 | "./slim": { 46 | "browser": { 47 | "types": "./dist/browser/slim.d.ts", 48 | "default": "./dist/browser/slim.js" 49 | }, 50 | "import": { 51 | "types": "./dist/esm/slim.d.ts", 52 | "default": "./dist/esm/slim.js" 53 | }, 54 | "require": { 55 | "types": "./dist/commonjs/slim.d.ts", 56 | "default": "./dist/commonjs/slim.js" 57 | } 58 | }, 59 | "./utils": { 60 | "browser": { 61 | "types": "./dist/browser/utils.d.ts", 62 | "default": "./dist/browser/utils.js" 63 | }, 64 | "import": { 65 | "types": "./dist/esm/utils.d.ts", 66 | "default": "./dist/esm/utils.js" 67 | }, 68 | "require": { 69 | "types": "./dist/commonjs/utils.d.ts", 70 | "default": "./dist/commonjs/utils.js" 71 | } 72 | }, 73 | "./package.json": "./package.json" 74 | }, 75 | "main": "./dist/commonjs/index.js", 76 | "module": "./dist/esm/index.js", 77 | "browser": "./dist/browser/index.js", 78 | "types": "./dist/commonjs/index.d.ts", 79 | "files": [ 80 | "dist", 81 | "src" 82 | ], 83 | "scripts": { 84 | "benchmark": "node --import=tsx benchmark/benchmark.ts", 85 | "build": "tshy", 86 | "format": "npm run format:es && npm run format:prettier", 87 | "format:es": "npm run lint:es -- --fix", 88 | "format:prettier": "npm run format:prettier:raw -- --write", 89 | "format:prettier:raw": "prettier \"**/*.{{m,c,}{j,t}s{x,},md{x,},json,y{a,}ml}\" --ignore-path .gitignore", 90 | "lint": "npm run lint:es && npm run lint:prettier && npm run lint:ts", 91 | "lint:es": "eslint --report-unused-disable-directives --ignore-path .gitignore .", 92 | "lint:prettier": "npm run format:prettier:raw -- --check", 93 | "lint:ts": "tsc --noEmit", 94 | "prepare": "husky", 95 | "prepublishOnly": "npm run build", 96 | "test": "npm run lint && npm run test:vi", 97 | "test:vi": "vitest run", 98 | "update-sponsors": "tsx scripts/fetch-sponsors.mts" 99 | }, 100 | "lint-staged": { 101 | "*.js": [ 102 | "prettier --write", 103 | "npm run lint:es -- --fix" 104 | ], 105 | "*.{json,md,ts,yml}": [ 106 | "prettier --write" 107 | ] 108 | }, 109 | "prettier": { 110 | "plugins": [ 111 | "./node_modules/prettier-plugin-jsdoc/dist/index.js" 112 | ], 113 | "proseWrap": "always", 114 | "singleQuote": true, 115 | "tabWidth": 2, 116 | "tsdoc": true 117 | }, 118 | "dependencies": { 119 | "cheerio-select": "^2.1.0", 120 | "dom-serializer": "^2.0.0", 121 | "domhandler": "^5.0.3", 122 | "domutils": "^3.2.2", 123 | "encoding-sniffer": "^0.2.0", 124 | "htmlparser2": "^10.0.0", 125 | "parse5": "^7.3.0", 126 | "parse5-htmlparser2-tree-adapter": "^7.1.0", 127 | "parse5-parser-stream": "^7.1.2", 128 | "undici": "^7.10.0", 129 | "whatwg-mimetype": "^4.0.0" 130 | }, 131 | "devDependencies": { 132 | "@imgix/js-core": "^3.8.0", 133 | "@octokit/graphql": "^8.2.2", 134 | "@types/jsdom": "^21.1.7", 135 | "@types/node": "^22.15.29", 136 | "@types/whatwg-mimetype": "^3.0.2", 137 | "@typescript-eslint/eslint-plugin": "^8.33.0", 138 | "@typescript-eslint/parser": "^8.32.1", 139 | "@vitest/coverage-v8": "^3.1.4", 140 | "eslint": "^8.57.0", 141 | "eslint-config-prettier": "^10.1.5", 142 | "eslint-plugin-jsdoc": "^50.7.1", 143 | "eslint-plugin-n": "^17.18.0", 144 | "eslint-plugin-unicorn": "^56.0.1", 145 | "eslint-plugin-vitest": "^0.5.4", 146 | "husky": "^9.1.7", 147 | "jquery": "^3.7.1", 148 | "jsdom": "^26.1.0", 149 | "lint-staged": "^15.5.1", 150 | "prettier": "^3.5.3", 151 | "prettier-plugin-jsdoc": "^1.3.2", 152 | "tinybench": "^4.0.1", 153 | "tshy": "^3.0.2", 154 | "tsx": "^4.19.4", 155 | "typescript": "^5.8.3", 156 | "vitest": "^3.1.4" 157 | }, 158 | "engines": { 159 | "node": ">=18.17" 160 | }, 161 | "tshy": { 162 | "esmDialects": [ 163 | "browser" 164 | ], 165 | "exports": { 166 | ".": "./src/index.ts", 167 | "./slim": "./src/slim.ts", 168 | "./utils": "./src/utils.ts", 169 | "./package.json": "./package.json" 170 | }, 171 | "exclude": [ 172 | "**/*.spec.ts", 173 | "**/__fixtures__/*", 174 | "**/__tests__/*", 175 | "**/__snapshots__/*" 176 | ] 177 | } 178 | } 179 | -------------------------------------------------------------------------------- /src/__fixtures__/fixtures.ts: -------------------------------------------------------------------------------- 1 | import type { CheerioAPI } from '../load.js'; 2 | import { load } from '../load-parse.js'; 3 | 4 | /** A Cheerio instance with no content. */ 5 | export const cheerio: CheerioAPI = load([]); 6 | 7 | export const fruits: string = [ 8 | '
      ', 9 | '
    • Apple
    • ', 10 | '
    • Orange
    • ', 11 | '
    • Pear
    • ', 12 | '
    ', 13 | ].join(''); 14 | 15 | export const vegetables: string = [ 16 | '
      ', 17 | '
    • Carrot
    • ', 18 | '
    • Sweetcorn
    • ', 19 | '
    ', 20 | ].join(''); 21 | 22 | export const divcontainers: string = [ 23 | '
    ', 24 | '
    First
    ', 25 | '
    Second
    ', 26 | '
    ', 27 | '
    ', 28 | '
    Third
    ', 29 | '
    Fourth
    ', 30 | '
    ', 31 | '
    ', 32 | '
    \n\n

    \n\n
    ', 33 | '
    ', 34 | ].join(''); 35 | 36 | export const chocolates: string = [ 37 | '
      ', 38 | '
    • Linth
    • ', 39 | '
    • Frey
    • ', 40 | '
    • Cailler
    • ', 41 | '
    ', 42 | ].join(''); 43 | 44 | export const drinks: string = [ 45 | '
      ', 46 | '
    • Beer
    • ', 47 | '
    • Juice
    • ', 48 | '
    • Milk
    • ', 49 | '
    • Water
    • ', 50 | '
    • Cider
    • ', 51 | '
    ', 52 | ].join(''); 53 | 54 | export const food: string = [ 55 | '
      ', 56 | fruits, 57 | vegetables, 58 | '
    ', 59 | ].join(''); 60 | 61 | export const eleven = ` 62 | 63 | 64 |
      65 |
    • One
    • 66 |
    • Two
    • 67 |
    • Three
    • 68 |
    • Four
    • 69 |
    70 | 71 |
      72 |
    • Five
    • 73 |
    • Six
    • 74 |
    • Seven
    • 75 |
    76 | 77 |
      78 |
    • Eight
    • 79 |
    • Nine
    • 80 |
    • Ten
    • 81 |
    • Eleven
    • 82 |
    83 | 84 | 85 | `; 86 | 87 | export const unwrapspans: string = [ 88 | '', 93 | ].join(''); 94 | 95 | export const inputs: string = [ 96 | '', 97 | '', 98 | '', 99 | '', 100 | '', 101 | '', 102 | '', 103 | '', 104 | '', 105 | '', 106 | '', 107 | '', 108 | ].join(''); 109 | 110 | export const text: string = [ 111 | '

    Apples, oranges and pears.

    ', 112 | '

    Carrots and

    ', 113 | ].join(''); 114 | 115 | export const forms: string = [ 116 | '
    ', 117 | '
    ', 118 | '
    ', 119 | '
    ', 120 | '
    ', 121 | '
    ', 122 | '
    ', 123 | '
    ', 124 | '
    ', 125 | ].join(''); 126 | 127 | export const noscript: string = [ 128 | '', 129 | '', 133 | '

    Rocks!

    ', 134 | '', 135 | ].join(''); 136 | 137 | export const script: string = [ 138 | '
    ', 139 | 'A', 140 | '', 143 | 'B', 144 | '
    ', 145 | ].join(''); 146 | 147 | export const mixedText = '1TEXT2'; 148 | -------------------------------------------------------------------------------- /src/__tests__/xml.spec.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { load } from '../index.js'; 3 | import type { CheerioOptions } from '../options.js'; 4 | 5 | function xml(str: string, options?: CheerioOptions) { 6 | options = { xml: true, ...options }; 7 | const $ = load(str, options); 8 | return $.xml(); 9 | } 10 | 11 | function dom(str: string, options?: CheerioOptions) { 12 | const $ = load('', options); 13 | return $(str).html(); 14 | } 15 | 16 | describe('render', () => { 17 | describe('(xml)', () => { 18 | it('should render tags correctly', () => { 19 | const str = 20 | ''; 21 | expect(xml(str)).toBe( 22 | '', 23 | ); 24 | }); 25 | 26 | it('should render tags (RSS) correctly', () => { 27 | const str = 'http://www.github.com/'; 28 | expect(xml(str)).toBe('http://www.github.com/'); 29 | }); 30 | 31 | it('should escape entities', () => { 32 | const str = ''; 33 | expect(xml(str)).toBe(str); 34 | }); 35 | 36 | it('should render HTML as XML', () => { 37 | const $ = load('', null, false); 38 | expect($.xml()).toBe(''); 39 | }); 40 | }); 41 | 42 | describe('(dom)', () => { 43 | it('should not keep camelCase for new nodes', () => { 44 | const str = 'hello'; 45 | expect(dom(str, { xml: false })).toBe( 46 | 'hello', 47 | ); 48 | }); 49 | 50 | it('should keep camelCase for new nodes', () => { 51 | const str = 'hello'; 52 | expect(dom(str, { xml: true })).toBe( 53 | 'hello', 54 | ); 55 | }); 56 | 57 | it('should maintain the parsing options of distinct contexts independently', () => { 58 | const str = 'hello'; 59 | const $ = load('', { xml: false }); 60 | 61 | expect($(str).html()).toBe( 62 | 'hello', 63 | ); 64 | }); 65 | }); 66 | }); 67 | -------------------------------------------------------------------------------- /src/api/css.spec.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect, beforeEach } from 'vitest'; 2 | import { load, type Cheerio } from '../index.js'; 3 | import type { Element } from 'domhandler'; 4 | import { cheerio, mixedText } from '../__fixtures__/fixtures.js'; 5 | 6 | describe('$(...)', () => { 7 | describe('.css', () => { 8 | it('(prop): should return a css property value', () => { 9 | const el = cheerio('
  • '); 10 | expect(el.css('hai')).toBe('there'); 11 | }); 12 | 13 | it('([prop1, prop2]): should return the specified property values as an object', () => { 14 | const el = cheerio( 15 | '
  • ', 16 | ); 17 | expect(el.css(['margin', 'color'])).toStrictEqual({ 18 | margin: '1px', 19 | color: 'blue', 20 | }); 21 | }); 22 | 23 | it('(prop, val): should set a css property', () => { 24 | const el = cheerio('
  • '); 25 | el.css('color', 'red'); 26 | expect(el.attr('style')).toBe('margin: 0; color: red;'); 27 | expect(el.eq(1).attr('style')).toBe('color: red;'); 28 | }); 29 | 30 | it('(prop, val) : should skip text nodes', () => { 31 | const $text = load(mixedText); 32 | const $body = $text($text('body')[0].children); 33 | 34 | $body.css('test', 'value'); 35 | 36 | expect($text('body').html()).toBe( 37 | '1TEXT2', 38 | ); 39 | }); 40 | 41 | it('(prop, ""): should unset a css property', () => { 42 | const el = cheerio('
  • '); 43 | el.css('padding', ''); 44 | expect(el.attr('style')).toBe('margin: 0;'); 45 | }); 46 | 47 | it('(any, val): should ignore unsupported prop types', () => { 48 | const el = cheerio('
  • '); 49 | el.css(123 as never, 'test'); 50 | expect(el.attr('style')).toBe('padding: 1px;'); 51 | }); 52 | 53 | it('(prop): should not mangle embedded urls', () => { 54 | const el = cheerio( 55 | '
  • ', 56 | ); 57 | expect(el.css('background-image')).toBe( 58 | 'url(http://example.com/img.png)', 59 | ); 60 | }); 61 | 62 | it('(prop): should ignore blank properties', () => { 63 | const el = cheerio('
  • '); 64 | expect(el.css()).toStrictEqual({ color: '#aaa' }); 65 | }); 66 | 67 | it('(prop): should ignore blank values', () => { 68 | const el = cheerio('
  • '); 69 | expect(el.css()).toStrictEqual({ position: 'absolute' }); 70 | }); 71 | 72 | it('(prop): should return undefined for unmatched elements', () => { 73 | const $ = load('
  • '); 74 | expect($('ul').css('background-image')).toBeUndefined(); 75 | }); 76 | 77 | it('(prop): should return undefined for unmatched styles', () => { 78 | const el = cheerio('
  • '); 79 | expect(el.css('margin')).toBeUndefined(); 80 | }); 81 | 82 | describe('(prop, function):', () => { 83 | let $el: Cheerio; 84 | beforeEach(() => { 85 | const $ = load( 86 | '
    ', 87 | ); 88 | $el = $('div'); 89 | }); 90 | 91 | it('should iterate over the selection', () => { 92 | let count = 0; 93 | $el.css('margin', function (idx, value) { 94 | expect(idx).toBe(count); 95 | expect(value).toBe(`${count}px`); 96 | expect(this).toBe($el[count]); 97 | count++; 98 | return undefined; 99 | }); 100 | expect(count).toBe(3); 101 | }); 102 | 103 | it('should set each attribute independently', () => { 104 | const values = ['4px', '', undefined]; 105 | $el.css('margin', (idx) => values[idx]); 106 | expect($el.eq(0).attr('style')).toBe('margin: 4px;'); 107 | expect($el.eq(1).attr('style')).toBe(''); 108 | expect($el.eq(2).attr('style')).toBe('margin: 2px;'); 109 | }); 110 | }); 111 | 112 | it('(obj): should set each key and val', () => { 113 | const el = cheerio('
  • '); 114 | el.css({ foo: 0 } as never); 115 | expect(el.eq(0).attr('style')).toBe('padding: 0; foo: 0;'); 116 | expect(el.eq(1).attr('style')).toBe('foo: 0;'); 117 | }); 118 | 119 | describe('parser', () => { 120 | it('should allow any whitespace between declarations', () => { 121 | const el = cheerio('
  • '); 122 | expect(el.css(['one', 'two', 'five'])).toStrictEqual({ 123 | one: '0', 124 | two: '1', 125 | }); 126 | }); 127 | 128 | it('should add malformed values to previous field (#1134)', () => { 129 | const el = cheerio( 130 | '', 131 | ); 132 | expect(el.css('background-image')).toStrictEqual( 133 | 'url(data:image/png;base64,iVBORw0KGgo)', 134 | ); 135 | }); 136 | }); 137 | }); 138 | }); 139 | -------------------------------------------------------------------------------- /src/api/css.ts: -------------------------------------------------------------------------------- 1 | import { domEach } from '../utils.js'; 2 | import { isTag, type Element, type AnyNode } from 'domhandler'; 3 | import type { Cheerio } from '../cheerio.js'; 4 | 5 | /** 6 | * Get the value of a style property for the first element in the set of matched 7 | * elements. 8 | * 9 | * @category CSS 10 | * @param names - Optionally the names of the properties of interest. 11 | * @returns A map of all of the style properties. 12 | * @see {@link https://api.jquery.com/css/} 13 | */ 14 | export function css( 15 | this: Cheerio, 16 | names?: string[], 17 | ): Record | undefined; 18 | /** 19 | * Get the value of a style property for the first element in the set of matched 20 | * elements. 21 | * 22 | * @category CSS 23 | * @param name - The name of the property. 24 | * @returns The property value for the given name. 25 | * @see {@link https://api.jquery.com/css/} 26 | */ 27 | export function css( 28 | this: Cheerio, 29 | name: string, 30 | ): string | undefined; 31 | /** 32 | * Set one CSS property for every matched element. 33 | * 34 | * @category CSS 35 | * @param prop - The name of the property. 36 | * @param val - The new value. 37 | * @returns The instance itself. 38 | * @see {@link https://api.jquery.com/css/} 39 | */ 40 | export function css( 41 | this: Cheerio, 42 | prop: string, 43 | val: 44 | | string 45 | | ((this: Element, i: number, style: string) => string | undefined), 46 | ): Cheerio; 47 | /** 48 | * Set multiple CSS properties for every matched element. 49 | * 50 | * @category CSS 51 | * @param map - A map of property names and values. 52 | * @returns The instance itself. 53 | * @see {@link https://api.jquery.com/css/} 54 | */ 55 | export function css( 56 | this: Cheerio, 57 | map: Record, 58 | ): Cheerio; 59 | /** 60 | * Set multiple CSS properties for every matched element. 61 | * 62 | * @category CSS 63 | * @param prop - The names of the properties. 64 | * @param val - The new values. 65 | * @returns The instance itself. 66 | * @see {@link https://api.jquery.com/css/} 67 | */ 68 | export function css( 69 | this: Cheerio, 70 | prop?: string | string[] | Record, 71 | val?: 72 | | string 73 | | ((this: Element, i: number, style: string) => string | undefined), 74 | ): Cheerio | Record | string | undefined { 75 | if ( 76 | (prop != null && val != null) || 77 | // When `prop` is a "plain" object 78 | (typeof prop === 'object' && !Array.isArray(prop)) 79 | ) { 80 | return domEach(this, (el, i) => { 81 | if (isTag(el)) { 82 | // `prop` can't be an array here anymore. 83 | setCss(el, prop as string, val, i); 84 | } 85 | }); 86 | } 87 | 88 | if (this.length === 0) { 89 | return undefined; 90 | } 91 | 92 | return getCss(this[0], prop as string); 93 | } 94 | 95 | /** 96 | * Set styles of all elements. 97 | * 98 | * @private 99 | * @param el - Element to set style of. 100 | * @param prop - Name of property. 101 | * @param value - Value to set property to. 102 | * @param idx - Optional index within the selection. 103 | */ 104 | function setCss( 105 | el: Element, 106 | prop: string | Record, 107 | value: 108 | | string 109 | | ((this: Element, i: number, style: string) => string | undefined) 110 | | undefined, 111 | idx: number, 112 | ) { 113 | if (typeof prop === 'string') { 114 | const styles = getCss(el); 115 | 116 | const val = 117 | typeof value === 'function' ? value.call(el, idx, styles[prop]) : value; 118 | 119 | if (val === '') { 120 | delete styles[prop]; 121 | } else if (val != null) { 122 | styles[prop] = val; 123 | } 124 | 125 | el.attribs['style'] = stringify(styles); 126 | } else if (typeof prop === 'object') { 127 | const keys = Object.keys(prop); 128 | for (let i = 0; i < keys.length; i++) { 129 | const k = keys[i]; 130 | setCss(el, k, prop[k], i); 131 | } 132 | } 133 | } 134 | 135 | /** 136 | * Get the parsed styles of the first element. 137 | * 138 | * @private 139 | * @category CSS 140 | * @param el - Element to get styles from. 141 | * @param props - Optionally the names of the properties of interest. 142 | * @returns The parsed styles. 143 | */ 144 | function getCss(el: AnyNode, props?: string[]): Record; 145 | /** 146 | * Get a property from the parsed styles of the first element. 147 | * 148 | * @private 149 | * @category CSS 150 | * @param el - Element to get styles from. 151 | * @param prop - Name of the prop. 152 | * @returns The value of the property. 153 | */ 154 | function getCss(el: AnyNode, prop: string): string | undefined; 155 | function getCss( 156 | el: AnyNode, 157 | prop?: string | string[], 158 | ): Record | string | undefined { 159 | if (!el || !isTag(el)) return; 160 | 161 | const styles = parse(el.attribs['style']); 162 | if (typeof prop === 'string') { 163 | return styles[prop]; 164 | } 165 | if (Array.isArray(prop)) { 166 | const newStyles: Record = {}; 167 | for (const item of prop) { 168 | if (styles[item] != null) { 169 | newStyles[item] = styles[item]; 170 | } 171 | } 172 | return newStyles; 173 | } 174 | return styles; 175 | } 176 | 177 | /** 178 | * Stringify `obj` to styles. 179 | * 180 | * @private 181 | * @category CSS 182 | * @param obj - Object to stringify. 183 | * @returns The serialized styles. 184 | */ 185 | function stringify(obj: Record): string { 186 | return Object.keys(obj).reduce( 187 | (str, prop) => `${str}${str ? ' ' : ''}${prop}: ${obj[prop]};`, 188 | '', 189 | ); 190 | } 191 | 192 | /** 193 | * Parse `styles`. 194 | * 195 | * @private 196 | * @category CSS 197 | * @param styles - Styles to be parsed. 198 | * @returns The parsed styles. 199 | */ 200 | function parse(styles: string): Record { 201 | styles = (styles || '').trim(); 202 | 203 | if (!styles) return {}; 204 | 205 | const obj: Record = {}; 206 | 207 | let key: string | undefined; 208 | 209 | for (const str of styles.split(';')) { 210 | const n = str.indexOf(':'); 211 | // If there is no :, or if it is the first/last character, add to the previous item's value 212 | if (n < 1 || n === str.length - 1) { 213 | const trimmed = str.trimEnd(); 214 | if (trimmed.length > 0 && key !== undefined) { 215 | obj[key] += `;${trimmed}`; 216 | } 217 | } else { 218 | key = str.slice(0, n).trim(); 219 | obj[key] = str.slice(n + 1).trim(); 220 | } 221 | } 222 | 223 | return obj; 224 | } 225 | -------------------------------------------------------------------------------- /src/api/extract.spec.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect, expectTypeOf } from 'vitest'; 2 | import * as fixtures from '../__fixtures__/fixtures.js'; 3 | import { load } from '../load-parse.js'; 4 | 5 | interface RedSelObject { 6 | red: string | undefined; 7 | sel: string | undefined; 8 | } 9 | 10 | interface RedSelMultipleObject { 11 | red: string[]; 12 | sel: string[]; 13 | } 14 | 15 | describe('$.extract', () => { 16 | it('should return an empty object when no selectors are provided', () => { 17 | const $ = load(fixtures.eleven); 18 | const $root = $.root(); 19 | 20 | expectTypeOf($root.extract({})).toEqualTypeOf>(); 21 | const emptyExtract = $root.extract({}); 22 | expect(emptyExtract).toStrictEqual({}); 23 | }); 24 | 25 | it('should return undefined for selectors that do not match any elements', () => { 26 | const $ = load(fixtures.eleven); 27 | const $root = $.root(); 28 | 29 | expectTypeOf($root.extract({ foo: 'bar' })).toEqualTypeOf<{ 30 | foo: string | undefined; 31 | }>(); 32 | const simpleExtract = $root.extract({ foo: 'bar' }); 33 | expect(simpleExtract).toStrictEqual({ foo: undefined }); 34 | }); 35 | 36 | it('should extract values for existing selectors', () => { 37 | const $ = load(fixtures.eleven); 38 | const $root = $.root(); 39 | 40 | expectTypeOf($root.extract({ red: '.red' })).toEqualTypeOf<{ 41 | red: string | undefined; 42 | }>(); 43 | expect($root.extract({ red: '.red' })).toStrictEqual({ red: 'Four' }); 44 | 45 | expectTypeOf( 46 | $root.extract({ red: '.red', sel: '.sel' }), 47 | ).toEqualTypeOf(); 48 | expect($root.extract({ red: '.red', sel: '.sel' })).toStrictEqual({ 49 | red: 'Four', 50 | sel: 'Three', 51 | }); 52 | }); 53 | 54 | it('should extract values using descriptor objects', () => { 55 | const $ = load(fixtures.eleven); 56 | const $root = $.root(); 57 | 58 | expectTypeOf( 59 | $root.extract({ 60 | red: { selector: '.red' }, 61 | sel: { selector: '.sel' }, 62 | }), 63 | ).toEqualTypeOf(); 64 | expect( 65 | $root.extract({ 66 | red: { selector: '.red' }, 67 | sel: { selector: '.sel' }, 68 | }), 69 | ).toStrictEqual({ red: 'Four', sel: 'Three' }); 70 | }); 71 | 72 | it('should extract multiple values for selectors', () => { 73 | const $ = load(fixtures.eleven); 74 | const $root = $.root(); 75 | 76 | expectTypeOf( 77 | $root.extract({ 78 | red: ['.red'], 79 | sel: ['.sel'], 80 | }), 81 | ).toEqualTypeOf<{ red: string[]; sel: string[] }>(); 82 | const multipleExtract = $root.extract({ 83 | red: ['.red'], 84 | sel: ['.sel'], 85 | }); 86 | expectTypeOf(multipleExtract).toEqualTypeOf(); 87 | expect(multipleExtract).toStrictEqual({ 88 | red: ['Four', 'Five', 'Nine'], 89 | sel: ['Three', 'Nine', 'Eleven'], 90 | }); 91 | }); 92 | 93 | it('should extract custom properties specified by the user', () => { 94 | const $ = load(fixtures.eleven); 95 | const $root = $.root(); 96 | 97 | expectTypeOf( 98 | $root.extract({ 99 | red: { selector: '.red', value: 'outerHTML' }, 100 | sel: { selector: '.sel', value: 'tagName' }, 101 | }), 102 | ).toEqualTypeOf(); 103 | expect( 104 | $root.extract({ 105 | red: { selector: '.red', value: 'outerHTML' }, 106 | sel: { selector: '.sel', value: 'tagName' }, 107 | }), 108 | ).toStrictEqual({ red: '
  • Four
  • ', sel: 'LI' }); 109 | }); 110 | 111 | it('should extract multiple custom properties for selectors', () => { 112 | const $ = load(fixtures.eleven); 113 | const $root = $.root(); 114 | 115 | expectTypeOf( 116 | $root.extract({ 117 | red: [{ selector: '.red', value: 'outerHTML' }], 118 | }), 119 | ).toEqualTypeOf<{ red: string[] }>(); 120 | expect( 121 | $root.extract({ 122 | red: [{ selector: '.red', value: 'outerHTML' }], 123 | }), 124 | ).toStrictEqual({ 125 | red: [ 126 | '
  • Four
  • ', 127 | '
  • Five
  • ', 128 | '
  • Nine
  • ', 129 | ], 130 | }); 131 | }); 132 | 133 | it('should extract values using custom extraction functions', () => { 134 | const $ = load(fixtures.eleven); 135 | const $root = $.root(); 136 | 137 | expectTypeOf( 138 | $root.extract({ 139 | red: { 140 | selector: '.red', 141 | value: (el, key) => `${key}=${$(el).text()}`, 142 | }, 143 | }), 144 | ).toEqualTypeOf<{ red: string | undefined }>(); 145 | expect( 146 | $root.extract({ 147 | red: { 148 | selector: '.red', 149 | value: (el, key) => `${key}=${$(el).text()}`, 150 | }, 151 | }), 152 | ).toStrictEqual({ red: 'red=Four' }); 153 | }); 154 | 155 | it('should correctly type check custom extraction functions returning non-string values', () => { 156 | const $ = load(fixtures.eleven); 157 | const $root = $.root(); 158 | 159 | expectTypeOf( 160 | $root.extract({ 161 | red: { 162 | selector: '.red', 163 | value: (el) => $(el).text().length, 164 | }, 165 | }), 166 | ).toEqualTypeOf<{ red: number | undefined }>(); 167 | expect( 168 | $root.extract({ 169 | red: { 170 | selector: '.red', 171 | value: (el) => $(el).text().length, 172 | }, 173 | }), 174 | ).toStrictEqual({ red: 4 }); 175 | }); 176 | 177 | it('should extract multiple values using custom extraction functions', () => { 178 | const $ = load(fixtures.eleven); 179 | const $root = $.root(); 180 | 181 | expectTypeOf( 182 | $root.extract({ 183 | red: [ 184 | { 185 | selector: '.red', 186 | value: (el, key) => `${key}=${$(el).text()}`, 187 | }, 188 | ], 189 | }), 190 | ).toEqualTypeOf<{ red: string[] }>(); 191 | expect( 192 | $root.extract({ 193 | red: [ 194 | { 195 | selector: '.red', 196 | value: (el, key) => `${key}=${$(el).text()}`, 197 | }, 198 | ], 199 | }), 200 | ).toStrictEqual({ red: ['red=Four', 'red=Five', 'red=Nine'] }); 201 | }); 202 | 203 | it('should extract nested objects based on selectors', () => { 204 | const $ = load(fixtures.eleven); 205 | const $root = $.root(); 206 | 207 | expectTypeOf( 208 | $root.extract({ 209 | section: { 210 | selector: 'ul:nth(1)', 211 | value: { 212 | red: '.red', 213 | sel: '.blue', 214 | }, 215 | }, 216 | }), 217 | ).toEqualTypeOf<{ 218 | section: { red: string | undefined; sel: string | undefined } | undefined; 219 | }>(); 220 | const subExtractObject = $root.extract({ 221 | section: { 222 | selector: 'ul:nth(1)', 223 | value: { 224 | red: '.red', 225 | sel: '.blue', 226 | }, 227 | }, 228 | }); 229 | expectTypeOf(subExtractObject).toEqualTypeOf<{ 230 | section: RedSelObject | undefined; 231 | }>(); 232 | expect(subExtractObject).toStrictEqual({ 233 | section: { 234 | red: 'Five', 235 | sel: 'Seven', 236 | }, 237 | }); 238 | }); 239 | 240 | it('should correctly type check nested objects returning non-string values', () => { 241 | const $ = load(fixtures.eleven); 242 | const $root = $.root(); 243 | 244 | expectTypeOf( 245 | $root.extract({ 246 | section: { 247 | selector: 'ul:nth(1)', 248 | value: { 249 | red: { 250 | selector: '.red', 251 | value: (el) => $(el).text().length, 252 | }, 253 | }, 254 | }, 255 | }), 256 | ).toEqualTypeOf<{ 257 | section: { red: number | undefined } | undefined; 258 | }>(); 259 | expect( 260 | $root.extract({ 261 | section: { 262 | selector: 'ul:nth(1)', 263 | value: { 264 | red: { 265 | selector: '.red', 266 | value: (el) => $(el).text().length, 267 | }, 268 | }, 269 | }, 270 | }), 271 | ).toStrictEqual({ 272 | section: { 273 | red: 4, 274 | }, 275 | }); 276 | }); 277 | 278 | it('should handle missing href properties without errors (#4239)', () => { 279 | const $ = load(fixtures.eleven); 280 | expect<{ links: string[] }>( 281 | $.extract({ links: [{ selector: 'li', value: 'href' }] }), 282 | ).toStrictEqual({ links: [] }); 283 | }); 284 | }); 285 | -------------------------------------------------------------------------------- /src/api/extract.ts: -------------------------------------------------------------------------------- 1 | import type { AnyNode, Element } from 'domhandler'; 2 | import type { Cheerio } from '../cheerio.js'; 3 | import type { prop } from './attributes.js'; 4 | 5 | type ExtractDescriptorFn = ( 6 | el: Element, 7 | key: string, 8 | // TODO: This could be typed with ExtractedMap 9 | obj: Record, 10 | ) => unknown; 11 | 12 | interface ExtractDescriptor { 13 | selector: string; 14 | value?: string | ExtractDescriptorFn | ExtractMap; 15 | } 16 | 17 | type ExtractValue = string | ExtractDescriptor | [string | ExtractDescriptor]; 18 | 19 | export type ExtractMap = Record; 20 | 21 | type ExtractedValue = V extends [ 22 | string | ExtractDescriptor, 23 | ] 24 | ? NonNullable>[] 25 | : V extends string 26 | ? string | undefined 27 | : V extends ExtractDescriptor 28 | ? V['value'] extends infer U 29 | ? U extends ExtractMap 30 | ? ExtractedMap | undefined 31 | : U extends ExtractDescriptorFn 32 | ? ReturnType | undefined 33 | : ReturnType | undefined 34 | : never 35 | : never; 36 | 37 | export type ExtractedMap = { 38 | [key in keyof M]: ExtractedValue; 39 | }; 40 | 41 | function getExtractDescr( 42 | descr: string | ExtractDescriptor, 43 | ): Required { 44 | if (typeof descr === 'string') { 45 | return { selector: descr, value: 'textContent' }; 46 | } 47 | 48 | return { 49 | selector: descr.selector, 50 | value: descr.value ?? 'textContent', 51 | }; 52 | } 53 | 54 | /** 55 | * Extract multiple values from a document, and store them in an object. 56 | * 57 | * @param map - An object containing key-value pairs. The keys are the names of 58 | * the properties to be created on the object, and the values are the 59 | * selectors to be used to extract the values. 60 | * @returns An object containing the extracted values. 61 | */ 62 | export function extract( 63 | this: Cheerio, 64 | map: M, 65 | ): ExtractedMap { 66 | const ret: Record = {}; 67 | 68 | for (const key in map) { 69 | const descr = map[key]; 70 | const isArray = Array.isArray(descr); 71 | 72 | const { selector, value } = getExtractDescr(isArray ? descr[0] : descr); 73 | 74 | const fn: ExtractDescriptorFn = 75 | typeof value === 'function' 76 | ? value 77 | : typeof value === 'string' 78 | ? (el: Element) => this._make(el).prop(value) 79 | : (el: Element) => this._make(el).extract(value); 80 | 81 | if (isArray) { 82 | ret[key] = this._findBySelector(selector, Number.POSITIVE_INFINITY) 83 | .map((_, el) => fn(el, key, ret)) 84 | .get(); 85 | } else { 86 | const $ = this._findBySelector(selector, 1); 87 | ret[key] = $.length > 0 ? fn($[0], key, ret) : undefined; 88 | } 89 | } 90 | 91 | return ret as ExtractedMap; 92 | } 93 | -------------------------------------------------------------------------------- /src/api/forms.spec.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect, beforeEach } from 'vitest'; 2 | import { type CheerioAPI } from '../index.js'; 3 | import { cheerio, forms } from '../__fixtures__/fixtures.js'; 4 | 5 | describe('$(...)', () => { 6 | let $: CheerioAPI; 7 | 8 | beforeEach(() => { 9 | $ = cheerio.load(forms); 10 | }); 11 | 12 | describe('.serializeArray', () => { 13 | it('() : should get form controls', () => { 14 | expect($('form#simple').serializeArray()).toStrictEqual([ 15 | { 16 | name: 'fruit', 17 | value: 'Apple', 18 | }, 19 | ]); 20 | }); 21 | 22 | it('() : should get nested form controls', () => { 23 | expect($('form#nested').serializeArray()).toHaveLength(2); 24 | const data = $('form#nested').serializeArray(); 25 | data.sort((a, b) => (a.value > b.value ? 1 : -1)); 26 | expect(data).toStrictEqual([ 27 | { 28 | name: 'fruit', 29 | value: 'Apple', 30 | }, 31 | { 32 | name: 'vegetable', 33 | value: 'Carrot', 34 | }, 35 | ]); 36 | }); 37 | 38 | it('() : should not get disabled form controls', () => { 39 | expect($('form#disabled').serializeArray()).toStrictEqual([]); 40 | }); 41 | 42 | it('() : should not get form controls with the wrong type', () => { 43 | expect($('form#submit').serializeArray()).toStrictEqual([ 44 | { 45 | name: 'fruit', 46 | value: 'Apple', 47 | }, 48 | ]); 49 | }); 50 | 51 | it('() : should get selected options', () => { 52 | expect($('form#select').serializeArray()).toStrictEqual([ 53 | { 54 | name: 'fruit', 55 | value: 'Orange', 56 | }, 57 | ]); 58 | }); 59 | 60 | it('() : should not get unnamed form controls', () => { 61 | expect($('form#unnamed').serializeArray()).toStrictEqual([ 62 | { 63 | name: 'fruit', 64 | value: 'Apple', 65 | }, 66 | ]); 67 | }); 68 | 69 | it('() : should get multiple selected options', () => { 70 | expect($('form#multiple').serializeArray()).toHaveLength(2); 71 | const data = $('form#multiple').serializeArray(); 72 | data.sort((a, b) => (a.value > b.value ? 1 : -1)); 73 | expect(data).toStrictEqual([ 74 | { 75 | name: 'fruit', 76 | value: 'Apple', 77 | }, 78 | { 79 | name: 'fruit', 80 | value: 'Orange', 81 | }, 82 | ]); 83 | }); 84 | 85 | it('() : should get individually selected elements', () => { 86 | const data = $('form#nested input').serializeArray(); 87 | data.sort((a, b) => (a.value > b.value ? 1 : -1)); 88 | expect(data).toStrictEqual([ 89 | { 90 | name: 'fruit', 91 | value: 'Apple', 92 | }, 93 | { 94 | name: 'vegetable', 95 | value: 'Carrot', 96 | }, 97 | ]); 98 | }); 99 | 100 | it('() : should standardize line breaks', () => { 101 | expect($('form#textarea').serializeArray()).toStrictEqual([ 102 | { 103 | name: 'fruits', 104 | value: 'Apple\r\nOrange', 105 | }, 106 | ]); 107 | }); 108 | 109 | it("() : shouldn't serialize the empty string", () => { 110 | expect($('').serializeArray()).toStrictEqual([]); 111 | expect( 112 | $('').serializeArray(), 113 | ).toStrictEqual([]); 114 | expect( 115 | $('').serializeArray(), 116 | ).toStrictEqual([ 117 | { 118 | name: 'fruit', 119 | value: 'pineapple', 120 | }, 121 | ]); 122 | }); 123 | 124 | it('() : should serialize inputs without value attributes', () => { 125 | expect($('').serializeArray()).toStrictEqual([ 126 | { 127 | name: 'fruit', 128 | value: '', 129 | }, 130 | ]); 131 | }); 132 | }); 133 | 134 | describe('.serialize', () => { 135 | it('() : should get form controls', () => { 136 | expect($('form#simple').serialize()).toBe('fruit=Apple'); 137 | }); 138 | 139 | it('() : should get nested form controls', () => { 140 | expect($('form#nested').serialize()).toBe('fruit=Apple&vegetable=Carrot'); 141 | }); 142 | 143 | it('() : should not get disabled form controls', () => { 144 | expect($('form#disabled').serialize()).toBe(''); 145 | }); 146 | 147 | it('() : should get multiple selected options', () => { 148 | expect($('form#multiple').serialize()).toBe('fruit=Apple&fruit=Orange'); 149 | }); 150 | 151 | it("() : should encode spaces as +'s", () => { 152 | expect($('form#spaces').serialize()).toBe('fruit=Blood+orange'); 153 | }); 154 | }); 155 | }); 156 | -------------------------------------------------------------------------------- /src/api/forms.ts: -------------------------------------------------------------------------------- 1 | import { isTag, type AnyNode } from 'domhandler'; 2 | import type { Cheerio } from '../cheerio.js'; 3 | 4 | /* 5 | * https://github.com/jquery/jquery/blob/2.1.3/src/manipulation/var/rcheckableType.js 6 | * https://github.com/jquery/jquery/blob/2.1.3/src/serialize.js 7 | */ 8 | const submittableSelector = 'input,select,textarea,keygen'; 9 | const r20 = /%20/g; 10 | const rCRLF = /\r?\n/g; 11 | 12 | /** 13 | * Encode a set of form elements as a string for submission. 14 | * 15 | * @category Forms 16 | * @example 17 | * 18 | * ```js 19 | * $('
    ').serialize(); 20 | * //=> 'foo=bar' 21 | * ``` 22 | * 23 | * @returns The serialized form. 24 | * @see {@link https://api.jquery.com/serialize/} 25 | */ 26 | export function serialize(this: Cheerio): string { 27 | // Convert form elements into name/value objects 28 | const arr = this.serializeArray(); 29 | 30 | // Serialize each element into a key/value string 31 | const retArr = arr.map( 32 | (data) => 33 | `${encodeURIComponent(data.name)}=${encodeURIComponent(data.value)}`, 34 | ); 35 | 36 | // Return the resulting serialization 37 | return retArr.join('&').replace(r20, '+'); 38 | } 39 | 40 | /** 41 | * Encode a set of form elements as an array of names and values. 42 | * 43 | * @category Forms 44 | * @example 45 | * 46 | * ```js 47 | * $('
    ').serializeArray(); 48 | * //=> [ { name: 'foo', value: 'bar' } ] 49 | * ``` 50 | * 51 | * @returns The serialized form. 52 | * @see {@link https://api.jquery.com/serializeArray/} 53 | */ 54 | export function serializeArray( 55 | this: Cheerio, 56 | ): { 57 | name: string; 58 | value: string; 59 | }[] { 60 | // Resolve all form elements from either forms or collections of form elements 61 | return this.map((_, elem) => { 62 | const $elem = this._make(elem); 63 | if (isTag(elem) && elem.name === 'form') { 64 | return $elem.find(submittableSelector).toArray(); 65 | } 66 | return $elem.filter(submittableSelector).toArray(); 67 | }) 68 | .filter( 69 | // Verify elements have a name (`attr.name`) and are not disabled (`:enabled`) 70 | '[name!=""]:enabled' + 71 | // And cannot be clicked (`[type=submit]`) or are used in `x-www-form-urlencoded` (`[type=file]`) 72 | ':not(:submit, :button, :image, :reset, :file)' + 73 | // And are either checked/don't have a checkable state 74 | ':matches([checked], :not(:checkbox, :radio))', 75 | // Convert each of the elements to its value(s) 76 | ) 77 | .map< 78 | AnyNode, 79 | { 80 | name: string; 81 | value: string; 82 | } 83 | >((_, elem) => { 84 | const $elem = this._make(elem); 85 | const name = $elem.attr('name')!; // We have filtered for elements with a name before. 86 | // If there is no value set (e.g. `undefined`, `null`), then default value to empty 87 | const value = $elem.val() ?? ''; 88 | 89 | // If we have an array of values (e.g. `