├── .gitattributes ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .vscode ├── extensions.json └── settings.json ├── LICENSE ├── README.md ├── cheerio.d.ts └── mod.ts /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | *.bat text eol=crlf 3 | *.cmd text eol=crlf 4 | *.ps1 text eol=crlf 5 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: ci 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | name: ${{ matrix.kind }} ${{ matrix.os }} 8 | runs-on: ${{ matrix.os }} 9 | strategy: 10 | matrix: 11 | os: [macOS-latest, ubuntu-latest, windows-latest] 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Setup Deno 16 | uses: denoland/setup-deno@main 17 | 18 | - name: Format 19 | run: deno fmt --check 20 | 21 | - name: Release 22 | uses: softprops/action-gh-release@v1 23 | if: | 24 | matrix.os == 'ubuntu-latest' && 25 | startsWith(github.repository, 'justjavac') && 26 | startsWith(github.ref, 'refs/tags/') 27 | env: 28 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 29 | with: 30 | draft: true 31 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | .cache 3 | .DS_Store 4 | *bak 5 | .history 6 | .temp/** 7 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": ["denoland.vscode-deno"] 3 | } 4 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "deno.enable": true, 3 | "deno.unstable": true, 4 | "deno.lint": false 5 | } 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) justjavac. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # deno_cheerio 2 | 3 | [![Build Status](https://github.com/justjavac/deno_cheerio/workflows/ci/badge.svg?branch=master)](https://github.com/justjavac/deno_cheerio/actions) 4 | 5 | > 记录一次在 Deno 中使用 cheerio 库的过程。 6 | 7 | 如何在 Deno 中使用 [cheerio](https://github.com/cheeriojs/cheerio)。 8 | 9 | cheerio 是一个非常流行的 npm 包,为服务器特别定制的,快速、灵活、实施的 jQuery 核心实现。可以说 cheerio 就是一个 Node.js 10 | 版的 jQuery。 11 | 12 | 那么我们在 Deno 中如何使用这个库呢? 13 | 14 | ## 使用 15 | 16 | 如果直接在 Deno 中使用源码,像这样: 17 | 18 | ```ts 19 | import * as Cheerio from "https://raw.githubusercontent.com/cheeriojs/cheerio/v1.0.0/lib/cheerio.js"; 20 | ``` 21 | 22 | 会报错: 23 | 24 | ```plain 25 | error: Uncaught ReferenceError: require is not defined 26 | var parse = require('./parse'), 27 | ^ 28 | at https://raw.githubusercontent.com/cheeriojs/cheerio/v1.0.0/lib/cheerio.js:6:13 29 | ``` 30 | 31 | 因为 Deno 并不支持 commonjs 规范,只支持 esm。 32 | 33 | 因此我们必需借助 jspm.io(或其他类似服务)来将 commonjs 转换为兼容的 esm 格式。 34 | 35 | 我们可以这样: 36 | 37 | ```ts 38 | import cheerio from "https://dev.jspm.io/npm:cheerio/index.js"; 39 | 40 | const $ = cheerio.load('

Hello world

'); 41 | 42 | $("h2.title").text("Hello Deno!"); 43 | $("h2").addClass("deno"); 44 | 45 | console.log($.html()); 46 | ``` 47 | 48 | 我们试着运行一下: 49 | 50 | ```shell 51 | deno run mod.ts 52 | ``` 53 | 54 | 成功输出了 `

Hello Deno!

`。 55 | 56 | ## 添加 TypeScript 支持 57 | 58 | 好在 @types 仓库提供了 cheerio 的类型定义文件,我们在 mod.ts 顶部增加一行: 59 | 60 | ```diff 61 | +// @deno-types="https://dev.jspm.io/@types/cheerio/index.d.ts" 62 | import cheerio from "https://dev.jspm.io/cheerio/index.js"; 63 | ``` 64 | 65 | 运行一下,又报错了 66 | 67 | ```plain 68 | error: relative import path "node" not prefixed with / or ./ or ../ Imported 69 | from "https://dev.jspm.io/npm:@types/cheerio@0.22.21/index.d.ts" 70 | ``` 71 | 72 | 看来这个 d.ts 文件和 deno 不兼容,把这个文件下载到本地,新建 cheerio.d.ts 改造一下。 73 | 74 | 问题出在第 14 行,`/// ` 与 Deno 不兼容,于是删掉这一行: 75 | 76 | ```diff 77 | -/// 78 | - 79 | ``` 80 | 81 | 再次运行,又报错: 82 | 83 | ```plain 84 | error: TS2580 [ERROR]: Cannot find name 'Buffer'. Do you need to install type definitions for node? Try `npm i @types/node`. 85 | load(html: string | Buffer, options?: CheerioOptionsInterface): CheerioStatic; 86 | ~~~~~~ 87 | at https://cdn.jsdelivr.net/gh/justjavac/deno_cheerio/cheerio.d.ts:310:23 88 | ``` 89 | 90 | `Buffer` 是 nodejs 的类型,所以报错了。 91 | 92 | 其实 Deno 也有 `Buffer`,我们需要使用 `Deno.Buffer` 来引用,考虑到 Deno 的 `Buffer` 和 Node.js 93 | 的并不兼容,于是直接删掉这个类型。 94 | 95 | (补充 2021-04-19,Deno 1.9 已经放弃了 `Deno.Buffer`,在 2.0 会将其移除) 96 | 97 | ```diff 98 | - load(html: string | Buffer, options?: CheerioOptionsInterface): CheerioStatic; 99 | + load(html: string, options?: CheerioOptionsInterface): CheerioStatic; 100 | ``` 101 | 102 | 再次运行,终于得到了我们想要的结果: 103 | 104 | ```plain 105 |

Hello Deno!

106 | ``` 107 | 108 | ## 例子 109 | 110 | ```bash 111 | deno run https://cdn.jsdelivr.net/gh/justjavac/deno_cheerio/mod.ts 112 | ``` 113 | 114 | ## License 115 | 116 | [deno_cheerio](https://github.com/justjavac/deno_cheerio) is released under the 117 | MIT License. See the bundled [LICENSE](./LICENSE) file for details. 118 | -------------------------------------------------------------------------------- /cheerio.d.ts: -------------------------------------------------------------------------------- 1 | // Type definitions for Cheerio v0.22.0 2 | // Project: https://github.com/cheeriojs/cheerio 3 | // Definitions by: Bret Little 4 | // VILIC VANE 5 | // Wayne Maurer 6 | // Umar Nizamani 7 | // LiJinyao 8 | // Chennakrishna 9 | // AzSiAz 10 | // Ryo Ota 11 | // Hiroki Osame 12 | // Definitions: https://github.com/DefinitelyTyped/DefinitelyTyped 13 | 14 | declare namespace cheerio { 15 | type AttrFunction = ( 16 | el: CheerioElement, 17 | i: number, 18 | currentValue: string, 19 | ) => any; 20 | 21 | interface Cheerio { 22 | // Document References 23 | // Cheerio https://github.com/cheeriojs/cheerio 24 | // JQuery http://api.jquery.com 25 | 26 | [index: number]: CheerioElement; 27 | cheerio: string; 28 | length: number; 29 | 30 | // Attributes 31 | 32 | attr(): { [attr: string]: string }; 33 | attr(name: string): string | undefined; 34 | attr(name: string, value: AttrFunction): Cheerio; 35 | // `value` *can* be `any` here but: 36 | // 1. That makes type-checking the function-type useless 37 | // 2. It's converted to a string anyways 38 | attr(name: string, value: string): Cheerio; 39 | // The map's values *can* be `any` but they'll all be cast to strings 40 | // regardless. 41 | attr(map: { [key: string]: any }): Cheerio; 42 | 43 | data(): any; 44 | data(name: string): any; 45 | data(name: string, value: any): any; 46 | 47 | val(): string; 48 | val(value: string): Cheerio; 49 | 50 | removeAttr(name: string): Cheerio; 51 | 52 | has(selector: string): Cheerio; 53 | has(element: CheerioElement): Cheerio; 54 | 55 | hasClass(className: string): boolean; 56 | addClass(classNames: string): Cheerio; 57 | 58 | removeClass(): Cheerio; 59 | removeClass(className: string): Cheerio; 60 | removeClass(func: (index: number, className: string) => string): Cheerio; 61 | 62 | toggleClass(className: string): Cheerio; 63 | toggleClass(className: string, toggleSwitch: boolean): Cheerio; 64 | toggleClass(toggleSwitch?: boolean): Cheerio; 65 | toggleClass( 66 | func: (index: number, className: string, toggleSwitch: boolean) => string, 67 | toggleSwitch?: boolean, 68 | ): Cheerio; 69 | 70 | is(selector: string): boolean; 71 | is(element: CheerioElement): boolean; 72 | is(element: CheerioElement[]): boolean; 73 | is(selection: Cheerio): boolean; 74 | is(func: (index: number, element: CheerioElement) => boolean): boolean; 75 | 76 | // Form 77 | serialize(): string; 78 | serializeArray(): { name: string; value: string }[]; 79 | 80 | // Traversing 81 | 82 | find(selector: string): Cheerio; 83 | find(element: Cheerio): Cheerio; 84 | 85 | parent(selector?: string): Cheerio; 86 | parents(selector?: string): Cheerio; 87 | parentsUntil(selector?: string, filter?: string): Cheerio; 88 | parentsUntil(element: CheerioElement, filter?: string): Cheerio; 89 | parentsUntil(element: Cheerio, filter?: string): Cheerio; 90 | 91 | prop(name: string): any; 92 | prop(name: string, value: any): Cheerio; 93 | 94 | closest(): Cheerio; 95 | closest(selector: string): Cheerio; 96 | 97 | next(selector?: string): Cheerio; 98 | nextAll(): Cheerio; 99 | nextAll(selector: string): Cheerio; 100 | 101 | nextUntil(selector?: string, filter?: string): Cheerio; 102 | nextUntil(element: CheerioElement, filter?: string): Cheerio; 103 | nextUntil(element: Cheerio, filter?: string): Cheerio; 104 | 105 | prev(selector?: string): Cheerio; 106 | prevAll(): Cheerio; 107 | prevAll(selector: string): Cheerio; 108 | 109 | prevUntil(selector?: string, filter?: string): Cheerio; 110 | prevUntil(element: CheerioElement, filter?: string): Cheerio; 111 | prevUntil(element: Cheerio, filter?: string): Cheerio; 112 | 113 | slice(start: number, end?: number): Cheerio; 114 | 115 | siblings(selector?: string): Cheerio; 116 | 117 | children(selector?: string): Cheerio; 118 | 119 | contents(): Cheerio; 120 | 121 | each(func: (index: number, element: CheerioElement) => any): Cheerio; 122 | map(func: (index: number, element: CheerioElement) => any): Cheerio; 123 | 124 | filter(selector: string): Cheerio; 125 | filter(selection: Cheerio): Cheerio; 126 | filter(element: CheerioElement): Cheerio; 127 | filter(elements: CheerioElement[]): Cheerio; 128 | filter(func: (index: number, element: CheerioElement) => boolean): Cheerio; 129 | 130 | not(selector: string): Cheerio; 131 | not(selection: Cheerio): Cheerio; 132 | not(element: CheerioElement): Cheerio; 133 | not(func: (index: number, element: CheerioElement) => boolean): Cheerio; 134 | 135 | first(): Cheerio; 136 | last(): Cheerio; 137 | 138 | eq(index: number): Cheerio; 139 | 140 | get(): any[]; 141 | get(index: number): any; 142 | 143 | index(): number; 144 | index(selector: string): number; 145 | index(selection: Cheerio): number; 146 | 147 | end(): Cheerio; 148 | 149 | add(selectorOrHtml: string): Cheerio; 150 | add(selector: string, context: Document): Cheerio; 151 | add(element: CheerioElement): Cheerio; 152 | add(elements: CheerioElement[]): Cheerio; 153 | add(selection: Cheerio): Cheerio; 154 | 155 | addBack(): Cheerio; 156 | addBack(filter: string): Cheerio; 157 | 158 | // Manipulation 159 | appendTo(target: Cheerio): Cheerio; 160 | prependTo(target: Cheerio): Cheerio; 161 | 162 | append(content: string, ...contents: any[]): Cheerio; 163 | append(content: Document, ...contents: any[]): Cheerio; 164 | append(content: Document[], ...contents: any[]): Cheerio; 165 | append(content: Cheerio, ...contents: any[]): Cheerio; 166 | 167 | prepend(content: string, ...contents: any[]): Cheerio; 168 | prepend(content: Document, ...contents: any[]): Cheerio; 169 | prepend(content: Document[], ...contents: any[]): Cheerio; 170 | prepend(content: Cheerio, ...contents: any[]): Cheerio; 171 | 172 | after(content: string, ...contents: any[]): Cheerio; 173 | after(content: Document, ...contents: any[]): Cheerio; 174 | after(content: Document[], ...contents: any[]): Cheerio; 175 | after(content: Cheerio, ...contents: any[]): Cheerio; 176 | 177 | insertAfter(content: string): Cheerio; 178 | insertAfter(content: Document): Cheerio; 179 | insertAfter(content: Cheerio): Cheerio; 180 | 181 | before(content: string, ...contents: any[]): Cheerio; 182 | before(content: Document, ...contents: any[]): Cheerio; 183 | before(content: Document[], ...contents: any[]): Cheerio; 184 | before(content: Cheerio, ...contents: any[]): Cheerio; 185 | 186 | insertBefore(content: string): Cheerio; 187 | insertBefore(content: Document): Cheerio; 188 | insertBefore(content: Cheerio): Cheerio; 189 | 190 | remove(selector?: string): Cheerio; 191 | 192 | replaceWith(content: string): Cheerio; 193 | replaceWith(content: CheerioElement): Cheerio; 194 | replaceWith(content: CheerioElement[]): Cheerio; 195 | replaceWith(content: Cheerio): Cheerio; 196 | replaceWith(content: () => Cheerio): Cheerio; 197 | 198 | empty(): Cheerio; 199 | 200 | html(): string | null; 201 | html(html: string): Cheerio; 202 | 203 | text(): string; 204 | text(text: string): Cheerio; 205 | 206 | wrap(content: string): Cheerio; 207 | wrap(content: Document): Cheerio; 208 | wrap(content: Cheerio): Cheerio; 209 | 210 | css(propertyName: string): string; 211 | css(propertyNames: string[]): string[]; 212 | css(propertyName: string, value: string): Cheerio; 213 | css(propertyName: string, value: number): Cheerio; 214 | css( 215 | propertyName: string, 216 | func: (index: number, value: string) => string, 217 | ): Cheerio; 218 | css( 219 | propertyName: string, 220 | func: (index: number, value: string) => number, 221 | ): Cheerio; 222 | css(properties: Object): Cheerio; 223 | 224 | // Rendering 225 | 226 | // Miscellaneous 227 | 228 | clone(): Cheerio; 229 | 230 | // Not Documented 231 | 232 | toArray(): CheerioElement[]; 233 | } 234 | 235 | interface CheerioOptionsInterface { 236 | // Document References 237 | // Cheerio https://github.com/cheeriojs/cheerio 238 | // HTMLParser2 https://github.com/fb55/htmlparser2/wiki/Parser-options 239 | // DomHandler https://github.com/fb55/DomHandler 240 | 241 | xmlMode?: boolean; 242 | decodeEntities?: boolean; 243 | lowerCaseTags?: boolean; 244 | lowerCaseAttributeNames?: boolean; 245 | recognizeCDATA?: boolean; 246 | recognizeSelfClosing?: boolean; 247 | normalizeWhitespace?: boolean; 248 | withStartIndices?: boolean; 249 | withEndIndices?: boolean; 250 | ignoreWhitespace?: boolean; 251 | _useHtmlParser2?: boolean; 252 | } 253 | 254 | interface CheerioSelector { 255 | (selector: string): Cheerio; 256 | (selector: string, context: string): Cheerio; 257 | (selector: string, context: CheerioElement): Cheerio; 258 | (selector: string, context: CheerioElement[]): Cheerio; 259 | (selector: string, context: Cheerio): Cheerio; 260 | (selector: string, context: string, root: string): Cheerio; 261 | (selector: string, context: CheerioElement, root: string): Cheerio; 262 | (selector: string, context: CheerioElement[], root: string): Cheerio; 263 | (selector: string, context: Cheerio, root: string): Cheerio; 264 | (selector: any): Cheerio; 265 | } 266 | 267 | interface CheerioStatic extends CheerioSelector { 268 | // Document References 269 | // Cheerio https://github.com/cheeriojs/cheerio 270 | // JQuery http://api.jquery.com 271 | root(): Cheerio; 272 | contains(container: CheerioElement, contained: CheerioElement): boolean; 273 | parseHTML( 274 | data: string, 275 | context?: Document, 276 | keepScripts?: boolean, 277 | ): Document[]; 278 | 279 | html(options?: CheerioOptionsInterface): string; 280 | html( 281 | dom: string | Cheerio | CheerioElement, 282 | options?: CheerioOptionsInterface, 283 | ): string; 284 | 285 | xml(dom?: string | Cheerio | CheerioElement): string; 286 | } 287 | 288 | interface CheerioElement { 289 | // Document References 290 | // Node Console 291 | tagName: string; 292 | type: string; 293 | name: string; 294 | attribs: { [attr: string]: string }; 295 | children: CheerioElement[]; 296 | childNodes: CheerioElement[]; 297 | lastChild: CheerioElement; 298 | firstChild: CheerioElement; 299 | next: CheerioElement; 300 | nextSibling: CheerioElement; 301 | prev: CheerioElement; 302 | previousSibling: CheerioElement; 303 | parent: CheerioElement; 304 | parentNode: CheerioElement; 305 | nodeValue: string; 306 | data?: string; 307 | startIndex?: number; 308 | } 309 | 310 | interface CheerioAPI extends CheerioSelector, CheerioStatic { 311 | load(html: string, options?: CheerioOptionsInterface): CheerioStatic; 312 | load( 313 | element: CheerioElement, 314 | options?: CheerioOptionsInterface, 315 | ): CheerioStatic; 316 | } 317 | 318 | interface Document {} 319 | } 320 | 321 | declare const cheerio: cheerio.CheerioAPI; 322 | export default cheerio; 323 | -------------------------------------------------------------------------------- /mod.ts: -------------------------------------------------------------------------------- 1 | // @deno-types="https://cdn.jsdelivr.net/gh/justjavac/deno_cheerio/cheerio.d.ts" 2 | import cheerio from "https://dev.jspm.io/cheerio/index.js"; 3 | 4 | const $ = cheerio.load('

Hello world

'); 5 | 6 | $("h2.title").text("Hello Deno!"); 7 | $("h2").addClass("deno"); 8 | 9 | console.log($.html()); 10 | 11 | export interface HtmlImage { 12 | readonly src?: string; 13 | readonly alt?: string; 14 | readonly width?: number | string; 15 | readonly height?: number | string; 16 | readonly imageElem: cheerio.CheerioElement; 17 | } 18 | --------------------------------------------------------------------------------