├── .npmrc ├── funding.yml ├── .prettierignore ├── .gitignore ├── index.js ├── .editorconfig ├── lib ├── util │ ├── format-basic.js │ ├── to-decimal.js │ ├── to-hexadecimal.js │ ├── to-named.js │ └── format-smart.js ├── constant │ └── dangerous.js ├── index.js └── core.js ├── tsconfig.json ├── .github └── workflows │ └── main.yml ├── license ├── package.json ├── readme.md └── test.js /.npmrc: -------------------------------------------------------------------------------- 1 | package-lock=false 2 | -------------------------------------------------------------------------------- /funding.yml: -------------------------------------------------------------------------------- 1 | github: wooorm 2 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | coverage/ 2 | *.md 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | coverage/ 2 | node_modules/ 3 | *.d.ts 4 | *.log 5 | .DS_Store 6 | yarn.lock 7 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @typedef {import('./lib/index.js').LightOptions} LightOptions 3 | * @typedef {import('./lib/index.js').Options} Options 4 | */ 5 | 6 | export * from './lib/index.js' 7 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 2 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | -------------------------------------------------------------------------------- /lib/util/format-basic.js: -------------------------------------------------------------------------------- 1 | /** 2 | * The smallest way to encode a character. 3 | * 4 | * @param {number} code 5 | * @returns {string} 6 | */ 7 | export function formatBasic(code) { 8 | return '&#x' + code.toString(16).toUpperCase() + ';' 9 | } 10 | -------------------------------------------------------------------------------- /lib/constant/dangerous.js: -------------------------------------------------------------------------------- 1 | /** 2 | * List of legacy (that don’t need a trailing `;`) named references which could, 3 | * depending on what follows them, turn into a different meaning 4 | * 5 | * @type {Array} 6 | */ 7 | export const dangerous = [ 8 | 'cent', 9 | 'copy', 10 | 'divide', 11 | 'gt', 12 | 'lt', 13 | 'not', 14 | 'para', 15 | 'times' 16 | ] 17 | -------------------------------------------------------------------------------- /lib/util/to-decimal.js: -------------------------------------------------------------------------------- 1 | const decimalRegex = /\d/ 2 | 3 | /** 4 | * Configurable ways to encode characters as decimal references. 5 | * 6 | * @param {number} code 7 | * @param {number} next 8 | * @param {boolean|undefined} omit 9 | * @returns {string} 10 | */ 11 | export function toDecimal(code, next, omit) { 12 | const value = '&#' + String(code) 13 | return omit && next && !decimalRegex.test(String.fromCharCode(next)) 14 | ? value 15 | : value + ';' 16 | } 17 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": ["**/**.js"], 3 | "exclude": ["coverage", "node_modules"], 4 | "compilerOptions": { 5 | "checkJs": true, 6 | "declaration": true, 7 | "emitDeclarationOnly": true, 8 | "exactOptionalPropertyTypes": true, 9 | "forceConsistentCasingInFileNames": true, 10 | "lib": ["es2020"], 11 | "module": "node16", 12 | "newLine": "lf", 13 | "skipLibCheck": true, 14 | "strict": true, 15 | "target": "es2020" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /lib/util/to-hexadecimal.js: -------------------------------------------------------------------------------- 1 | const hexadecimalRegex = /[\dA-Fa-f]/ 2 | 3 | /** 4 | * Configurable ways to encode characters as hexadecimal references. 5 | * 6 | * @param {number} code 7 | * @param {number} next 8 | * @param {boolean|undefined} omit 9 | * @returns {string} 10 | */ 11 | export function toHexadecimal(code, next, omit) { 12 | const value = '&#x' + code.toString(16).toUpperCase() 13 | return omit && next && !hexadecimalRegex.test(String.fromCharCode(next)) 14 | ? value 15 | : value + ';' 16 | } 17 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: main 2 | on: 3 | - pull_request 4 | - push 5 | jobs: 6 | main: 7 | name: ${{matrix.node}} 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v3 11 | - uses: actions/setup-node@v3 12 | with: 13 | node-version: ${{matrix.node}} 14 | - run: npm install 15 | - run: npm test 16 | - uses: codecov/codecov-action@v3 17 | strategy: 18 | matrix: 19 | node: 20 | - lts/hydrogen 21 | - node 22 | -------------------------------------------------------------------------------- /lib/index.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @typedef {import('./core.js').CoreOptions & import('./util/format-smart.js').FormatSmartOptions} Options 3 | * @typedef {import('./core.js').CoreOptions} LightOptions 4 | */ 5 | 6 | import {core} from './core.js' 7 | import {formatSmart} from './util/format-smart.js' 8 | import {formatBasic} from './util/format-basic.js' 9 | 10 | /** 11 | * Encode special characters in `value`. 12 | * 13 | * @param {string} value 14 | * Value to encode. 15 | * @param {Options} [options] 16 | * Configuration. 17 | * @returns {string} 18 | * Encoded value. 19 | */ 20 | export function stringifyEntities(value, options) { 21 | return core(value, Object.assign({format: formatSmart}, options)) 22 | } 23 | 24 | /** 25 | * Encode special characters in `value` as hexadecimals. 26 | * 27 | * @param {string} value 28 | * Value to encode. 29 | * @param {LightOptions} [options] 30 | * Configuration. 31 | * @returns {string} 32 | * Encoded value. 33 | */ 34 | export function stringifyEntitiesLight(value, options) { 35 | return core(value, Object.assign({format: formatBasic}, options)) 36 | } 37 | -------------------------------------------------------------------------------- /license: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2015 Titus Wormer 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | 'Software'), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /lib/util/to-named.js: -------------------------------------------------------------------------------- 1 | import {characterEntitiesLegacy} from 'character-entities-legacy' 2 | import {characterEntitiesHtml4} from 'character-entities-html4' 3 | import {dangerous} from '../constant/dangerous.js' 4 | 5 | const own = {}.hasOwnProperty 6 | 7 | /** 8 | * `characterEntitiesHtml4` but inverted. 9 | * 10 | * @type {Record} 11 | */ 12 | const characters = {} 13 | 14 | /** @type {string} */ 15 | let key 16 | 17 | for (key in characterEntitiesHtml4) { 18 | if (own.call(characterEntitiesHtml4, key)) { 19 | characters[characterEntitiesHtml4[key]] = key 20 | } 21 | } 22 | 23 | const notAlphanumericRegex = /[^\dA-Za-z]/ 24 | 25 | /** 26 | * Configurable ways to encode characters as named references. 27 | * 28 | * @param {number} code 29 | * @param {number} next 30 | * @param {boolean|undefined} omit 31 | * @param {boolean|undefined} attribute 32 | * @returns {string} 33 | */ 34 | export function toNamed(code, next, omit, attribute) { 35 | const character = String.fromCharCode(code) 36 | 37 | if (own.call(characters, character)) { 38 | const name = characters[character] 39 | const value = '&' + name 40 | 41 | if ( 42 | omit && 43 | characterEntitiesLegacy.includes(name) && 44 | !dangerous.includes(name) && 45 | (!attribute || 46 | (next && 47 | next !== 61 /* `=` */ && 48 | notAlphanumericRegex.test(String.fromCharCode(next)))) 49 | ) { 50 | return value 51 | } 52 | 53 | return value + ';' 54 | } 55 | 56 | return '' 57 | } 58 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "stringify-entities", 3 | "version": "4.0.4", 4 | "description": "Serialize (encode) HTML character references", 5 | "license": "MIT", 6 | "keywords": [ 7 | "stringify", 8 | "encode", 9 | "escape", 10 | "html", 11 | "character", 12 | "reference", 13 | "entity", 14 | "entities" 15 | ], 16 | "repository": "wooorm/stringify-entities", 17 | "bugs": "https://github.com/wooorm/stringify-entities/issues", 18 | "funding": { 19 | "type": "github", 20 | "url": "https://github.com/sponsors/wooorm" 21 | }, 22 | "author": "Titus Wormer (https://wooorm.com)", 23 | "contributors": [ 24 | "Titus Wormer (https://wooorm.com)" 25 | ], 26 | "sideEffects": false, 27 | "type": "module", 28 | "main": "index.js", 29 | "types": "index.d.ts", 30 | "files": [ 31 | "lib/", 32 | "index.d.ts", 33 | "index.js" 34 | ], 35 | "dependencies": { 36 | "character-entities-html4": "^2.0.0", 37 | "character-entities-legacy": "^3.0.0" 38 | }, 39 | "devDependencies": { 40 | "@types/node": "^20.0.0", 41 | "c8": "^9.0.0", 42 | "character-entities": "^2.0.0", 43 | "prettier": "^3.0.0", 44 | "remark-cli": "^11.0.0", 45 | "remark-preset-wooorm": "^9.0.0", 46 | "type-coverage": "^2.0.0", 47 | "typescript": "^5.0.0", 48 | "xo": "^0.58.0" 49 | }, 50 | "scripts": { 51 | "prepack": "npm run build && npm run format", 52 | "generate": "node --conditions development build.js", 53 | "build": "tsc --build --clean && tsc --build && type-coverage", 54 | "format": "remark . -qfo && prettier . -w --log-level warn && xo --fix", 55 | "test-api": "node --conditions development test.js", 56 | "test-coverage": "c8 --check-coverage --100 --reporter lcov npm run test-api", 57 | "test": "npm run generate && npm run build && npm run format && npm run test-coverage" 58 | }, 59 | "prettier": { 60 | "tabWidth": 2, 61 | "useTabs": false, 62 | "singleQuote": true, 63 | "bracketSpacing": false, 64 | "semi": false, 65 | "trailingComma": "none" 66 | }, 67 | "xo": { 68 | "prettier": true, 69 | "rules": { 70 | "unicorn/prefer-code-point": "off", 71 | "unicorn/prefer-string-replace-all": "off", 72 | "unicorn/numeric-separators-style": "off" 73 | } 74 | }, 75 | "remarkConfig": { 76 | "plugins": [ 77 | "preset-wooorm" 78 | ] 79 | }, 80 | "typeCoverage": { 81 | "atLeast": 100, 82 | "detail": true, 83 | "strict": true, 84 | "ignoreCatch": true 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /lib/util/format-smart.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @typedef FormatSmartOptions 3 | * @property {boolean} [useNamedReferences=false] 4 | * Prefer named character references (`&`) where possible. 5 | * @property {boolean} [useShortestReferences=false] 6 | * Prefer the shortest possible reference, if that results in less bytes. 7 | * **Note**: `useNamedReferences` can be omitted when using `useShortestReferences`. 8 | * @property {boolean} [omitOptionalSemicolons=false] 9 | * Whether to omit semicolons when possible. 10 | * **Note**: This creates what HTML calls “parse errors” but is otherwise still valid HTML — don’t use this except when building a minifier. 11 | * Omitting semicolons is possible for certain named and numeric references in some cases. 12 | * @property {boolean} [attribute=false] 13 | * Create character references which don’t fail in attributes. 14 | * **Note**: `attribute` only applies when operating dangerously with 15 | * `omitOptionalSemicolons: true`. 16 | */ 17 | 18 | import {toHexadecimal} from './to-hexadecimal.js' 19 | import {toDecimal} from './to-decimal.js' 20 | import {toNamed} from './to-named.js' 21 | 22 | /** 23 | * Configurable ways to encode a character yielding pretty or small results. 24 | * 25 | * @param {number} code 26 | * @param {number} next 27 | * @param {FormatSmartOptions} options 28 | * @returns {string} 29 | */ 30 | export function formatSmart(code, next, options) { 31 | let numeric = toHexadecimal(code, next, options.omitOptionalSemicolons) 32 | /** @type {string|undefined} */ 33 | let named 34 | 35 | if (options.useNamedReferences || options.useShortestReferences) { 36 | named = toNamed( 37 | code, 38 | next, 39 | options.omitOptionalSemicolons, 40 | options.attribute 41 | ) 42 | } 43 | 44 | // Use the shortest numeric reference when requested. 45 | // A simple algorithm would use decimal for all code points under 100, as 46 | // those are shorter than hexadecimal: 47 | // 48 | // * `c` vs `c` (decimal shorter) 49 | // * `d` vs `d` (equal) 50 | // 51 | // However, because we take `next` into consideration when `omit` is used, 52 | // And it would be possible that decimals are shorter on bigger values as 53 | // well if `next` is hexadecimal but not decimal, we instead compare both. 54 | if ( 55 | (options.useShortestReferences || !named) && 56 | options.useShortestReferences 57 | ) { 58 | const decimal = toDecimal(code, next, options.omitOptionalSemicolons) 59 | 60 | if (decimal.length < numeric.length) { 61 | numeric = decimal 62 | } 63 | } 64 | 65 | return named && 66 | (!options.useShortestReferences || named.length < numeric.length) 67 | ? named 68 | : numeric 69 | } 70 | -------------------------------------------------------------------------------- /lib/core.js: -------------------------------------------------------------------------------- 1 | /** 2 | * @typedef CoreOptions 3 | * @property {ReadonlyArray} [subset=[]] 4 | * Whether to only escape the given subset of characters. 5 | * @property {boolean} [escapeOnly=false] 6 | * Whether to only escape possibly dangerous characters. 7 | * Those characters are `"`, `&`, `'`, `<`, `>`, and `` ` ``. 8 | * 9 | * @typedef FormatOptions 10 | * @property {(code: number, next: number, options: CoreWithFormatOptions) => string} format 11 | * Format strategy. 12 | * 13 | * @typedef {CoreOptions & FormatOptions & import('./util/format-smart.js').FormatSmartOptions} CoreWithFormatOptions 14 | */ 15 | 16 | const defaultSubsetRegex = /["&'<>`]/g 17 | const surrogatePairsRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g 18 | const controlCharactersRegex = 19 | // eslint-disable-next-line no-control-regex, unicorn/no-hex-escape 20 | /[\x01-\t\v\f\x0E-\x1F\x7F\x81\x8D\x8F\x90\x9D\xA0-\uFFFF]/g 21 | const regexEscapeRegex = /[|\\{}()[\]^$+*?.]/g 22 | 23 | /** @type {WeakMap, RegExp>} */ 24 | const subsetToRegexCache = new WeakMap() 25 | 26 | /** 27 | * Encode certain characters in `value`. 28 | * 29 | * @param {string} value 30 | * @param {CoreWithFormatOptions} options 31 | * @returns {string} 32 | */ 33 | export function core(value, options) { 34 | value = value.replace( 35 | options.subset 36 | ? charactersToExpressionCached(options.subset) 37 | : defaultSubsetRegex, 38 | basic 39 | ) 40 | 41 | if (options.subset || options.escapeOnly) { 42 | return value 43 | } 44 | 45 | return ( 46 | value 47 | // Surrogate pairs. 48 | .replace(surrogatePairsRegex, surrogate) 49 | // BMP control characters (C0 except for LF, CR, SP; DEL; and some more 50 | // non-ASCII ones). 51 | .replace(controlCharactersRegex, basic) 52 | ) 53 | 54 | /** 55 | * @param {string} pair 56 | * @param {number} index 57 | * @param {string} all 58 | */ 59 | function surrogate(pair, index, all) { 60 | return options.format( 61 | (pair.charCodeAt(0) - 0xd800) * 0x400 + 62 | pair.charCodeAt(1) - 63 | 0xdc00 + 64 | 0x10000, 65 | all.charCodeAt(index + 2), 66 | options 67 | ) 68 | } 69 | 70 | /** 71 | * @param {string} character 72 | * @param {number} index 73 | * @param {string} all 74 | */ 75 | function basic(character, index, all) { 76 | return options.format( 77 | character.charCodeAt(0), 78 | all.charCodeAt(index + 1), 79 | options 80 | ) 81 | } 82 | } 83 | 84 | /** 85 | * A wrapper function that caches the result of `charactersToExpression` with a WeakMap. 86 | * This can improve performance when tooling calls `charactersToExpression` repeatedly 87 | * with the same subset. 88 | * 89 | * @param {ReadonlyArray} subset 90 | * @returns {RegExp} 91 | */ 92 | function charactersToExpressionCached(subset) { 93 | let cached = subsetToRegexCache.get(subset) 94 | 95 | if (!cached) { 96 | cached = charactersToExpression(subset) 97 | subsetToRegexCache.set(subset, cached) 98 | } 99 | 100 | return cached 101 | } 102 | 103 | /** 104 | * @param {ReadonlyArray} subset 105 | * @returns {RegExp} 106 | */ 107 | function charactersToExpression(subset) { 108 | /** @type {Array} */ 109 | const groups = [] 110 | let index = -1 111 | 112 | while (++index < subset.length) { 113 | groups.push(subset[index].replace(regexEscapeRegex, '\\$&')) 114 | } 115 | 116 | return new RegExp('(?:' + groups.join('|') + ')', 'g') 117 | } 118 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # stringify-entities 2 | 3 | [![Build Status][build-badge]][build] 4 | [![Coverage Status][coverage-badge]][coverage] 5 | [![Downloads][downloads-badge]][downloads] 6 | [![Size][size-badge]][size] 7 | 8 | Serialize (encode) HTML character references. 9 | 10 | ## Contents 11 | 12 | * [What is this?](#what-is-this) 13 | * [When should I use this?](#when-should-i-use-this) 14 | * [Install](#install) 15 | * [Use](#use) 16 | * [API](#api) 17 | * [`stringifyEntities(value[, options])`](#stringifyentitiesvalue-options) 18 | * [Algorithm](#algorithm) 19 | * [Types](#types) 20 | * [Compatibility](#compatibility) 21 | * [Security](#security) 22 | * [Related](#related) 23 | * [Contribute](#contribute) 24 | * [License](#license) 25 | 26 | ## What is this? 27 | 28 | This is a small and powerful encoder of HTML character references (often called 29 | entities). 30 | This one has either all the options you need for a minifier/formatter, or a 31 | tiny size when using `stringifyEntitiesLight`. 32 | 33 | ## When should I use this? 34 | 35 | You can use this for spec-compliant encoding of character references. 36 | It’s small and fast enough to do that well. 37 | You can also use this when making an HTML formatter or minifier, because there 38 | are different ways to produce pretty or tiny output. 39 | This package is reliable: ``'`'`` characters are encoded to ensure no scripts 40 | run in Internet Explorer 6 to 8. 41 | Additionally, only named references recognized by HTML 4 are encoded, meaning 42 | the infamous `'` (which people think is a [virus][]) won’t show up. 43 | 44 | ## Install 45 | 46 | This package is [ESM only][esm]. 47 | In Node.js (version 14.14+, 16.0+), install with [npm][]: 48 | 49 | ```sh 50 | npm install stringify-entities 51 | ``` 52 | 53 | In Deno with [`esm.sh`][esmsh]: 54 | 55 | ```js 56 | import {stringifyEntities} from 'https://esm.sh/stringify-entities@4' 57 | ``` 58 | 59 | In browsers with [`esm.sh`][esmsh]: 60 | 61 | ```html 62 | 65 | ``` 66 | 67 | ## Use 68 | 69 | ```js 70 | import {stringifyEntities} from 'stringify-entities' 71 | 72 | stringifyEntities('alpha © bravo ≠ charlie 𝌆 delta') 73 | // => 'alpha © bravo ≠ charlie 𝌆 delta' 74 | 75 | stringifyEntities('alpha © bravo ≠ charlie 𝌆 delta', {useNamedReferences: true}) 76 | // => 'alpha © bravo ≠ charlie 𝌆 delta' 77 | ``` 78 | 79 | ## API 80 | 81 | This package exports the identifiers `stringifyEntities` and 82 | `stringifyEntitiesLight`. 83 | There is no default export. 84 | 85 | ### `stringifyEntities(value[, options])` 86 | 87 | Encode special characters in `value`. 88 | 89 | ##### Core options 90 | 91 | ###### `options.escapeOnly` 92 | 93 | Whether to only escape possibly dangerous characters (`boolean`, default: 94 | `false`). 95 | Those characters are `"`, `&`, `'`, `<`, `>`, and `` ` ``. 96 | 97 | ###### `options.subset` 98 | 99 | Whether to only escape the given subset of characters (`Array`). 100 | Note that only BMP characters are supported here (so no emoji). 101 | 102 | ##### Formatting options 103 | 104 | If you do not care about the following options, use `stringifyEntitiesLight`, 105 | which always outputs hexadecimal character references. 106 | 107 | ###### `options.useNamedReferences` 108 | 109 | Prefer named character references (`&`) where possible (`boolean?`, default: 110 | `false`). 111 | 112 | ###### `options.useShortestReferences` 113 | 114 | Prefer the shortest possible reference, if that results in less bytes 115 | (`boolean?`, default: `false`). 116 | 117 | > ⚠️ **Note**: `useNamedReferences` can be omitted when using 118 | > `useShortestReferences`. 119 | 120 | ###### `options.omitOptionalSemicolons` 121 | 122 | Whether to omit semicolons when possible (`boolean?`, default: `false`). 123 | 124 | > ⚠️ **Note**: This creates what HTML calls “parse errors” but is otherwise 125 | > still valid HTML — don’t use this except when building a minifier. 126 | > Omitting semicolons is possible for certain named and numeric references in 127 | > some cases. 128 | 129 | ###### `options.attribute` 130 | 131 | Create character references which don’t fail in attributes (`boolean?`, default: 132 | `false`). 133 | 134 | > ⚠️ **Note**: `attribute` only applies when operating dangerously with 135 | > `omitOptionalSemicolons: true`. 136 | 137 | #### Returns 138 | 139 | Encoded value (`string`). 140 | 141 | ## Algorithm 142 | 143 | By default, all dangerous, non-ASCII, and non-printable ASCII characters are 144 | encoded. 145 | A [subset][] of characters can be given to encode just those characters. 146 | Alternatively, pass [`escapeOnly`][escapeonly] to escape just the dangerous 147 | characters (`"`, `'`, `<`, `>`, `&`, `` ` ``). 148 | By default, hexadecimal character references are used. 149 | Pass [`useNamedReferences`][named] to use named character references when 150 | possible, or [`useShortestReferences`][short] to use whichever is shortest: 151 | decimal, hexadecimal, or named. 152 | There is also a `stringifyEntitiesLight` export, which works just like 153 | `stringifyEntities` but without the formatting options: it’s much smaller but 154 | always outputs hexadecimal character references. 155 | 156 | ## Types 157 | 158 | This package is fully typed with [TypeScript][]. 159 | It exports the additional types `Options` and `LightOptions` types. 160 | 161 | ## Compatibility 162 | 163 | This package is at least compatible with all maintained versions of Node.js. 164 | As of now, that is Node.js 14.14+ and 16.0+. 165 | It also works in Deno and modern browsers. 166 | 167 | ## Security 168 | 169 | This package is safe. 170 | 171 | ## Related 172 | 173 | * [`parse-entities`](https://github.com/wooorm/parse-entities) 174 | — parse (decode) HTML character references 175 | * [`wooorm/character-entities`](https://github.com/wooorm/character-entities) 176 | — info on character references 177 | * [`wooorm/character-entities-html4`](https://github.com/wooorm/character-entities-html4) 178 | — info on HTML 4 character references 179 | * [`wooorm/character-entities-legacy`](https://github.com/wooorm/character-entities-legacy) 180 | — info on legacy character references 181 | * [`wooorm/character-reference-invalid`](https://github.com/wooorm/character-reference-invalid) 182 | — info on invalid numeric character references 183 | 184 | ## Contribute 185 | 186 | Yes please! 187 | See [How to Contribute to Open Source][contribute]. 188 | 189 | ## License 190 | 191 | [MIT][license] © [Titus Wormer][author] 192 | 193 | 194 | 195 | [build-badge]: https://github.com/wooorm/stringify-entities/workflows/main/badge.svg 196 | 197 | [build]: https://github.com/wooorm/stringify-entities/actions 198 | 199 | [coverage-badge]: https://img.shields.io/codecov/c/github/wooorm/stringify-entities.svg 200 | 201 | [coverage]: https://codecov.io/github/wooorm/stringify-entities 202 | 203 | [downloads-badge]: https://img.shields.io/npm/dm/stringify-entities.svg 204 | 205 | [downloads]: https://www.npmjs.com/package/stringify-entities 206 | 207 | [size-badge]: https://img.shields.io/bundlephobia/minzip/stringify-entities.svg 208 | 209 | [size]: https://bundlephobia.com/result?p=stringify-entities 210 | 211 | [npm]: https://docs.npmjs.com/cli/install 212 | 213 | [esmsh]: https://esm.sh 214 | 215 | [license]: license 216 | 217 | [author]: https://wooorm.com 218 | 219 | [esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c 220 | 221 | [typescript]: https://www.typescriptlang.org 222 | 223 | [contribute]: https://opensource.guide/how-to-contribute/ 224 | 225 | [virus]: https://www.telegraph.co.uk/technology/advice/10516839/Why-do-some-apostrophes-get-replaced-with-andapos.html 226 | 227 | [subset]: #optionssubset 228 | 229 | [escapeonly]: #optionsescapeonly 230 | 231 | [named]: #optionsusenamedreferences 232 | 233 | [short]: #optionsuseshortestreferences 234 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | import assert from 'node:assert/strict' 2 | import test from 'node:test' 3 | import {stringifyEntities, stringifyEntitiesLight} from './index.js' 4 | 5 | test('stringifyEntities(value[, options])', function () { 6 | assert.equal( 7 | stringifyEntities('foo\u00A9bar\uD834\uDF06baz\u2603qux'), 8 | 'foo©bar𝌆baz☃qux', 9 | 'should encode non-ASCII characters by default' 10 | ) 11 | 12 | assert.equal( 13 | stringifyEntities('foo\u00A9bar\uD834\uDF06baz\u2603qux', { 14 | useNamedReferences: true 15 | }), 16 | 'foo©bar𝌆baz☃qux', 17 | 'should encode w/ named character references if possible w/ `useNamedReferences`' 18 | ) 19 | 20 | assert.equal( 21 | stringifyEntities('alpha © bravo ≠ charlie 𝌆 delta " echo', { 22 | useShortestReferences: true 23 | }), 24 | 'alpha © bravo ≠ charlie 𝌆 delta " echo', 25 | 'should encode w/ shortest character references if `useShortestReferences`' 26 | ) 27 | 28 | assert.equal( 29 | stringifyEntities('" "0 "a "z µ µ0 µa µz', { 30 | useShortestReferences: true, 31 | omitOptionalSemicolons: true 32 | }), 33 | '" "0 "a "z µ µ0 µa µz', 34 | 'should encode w/ shortest numeric reference based on `next` w/ `omitOptionalSemicolons`' 35 | ) 36 | 37 | assert.equal( 38 | stringifyEntities('\'"<>&'), 39 | ''"<>&', 40 | 'should encode dangerous characters as hexadecimal character references by default' 41 | ) 42 | 43 | assert.equal( 44 | stringifyEntities('\'"<>&', {subset: ['&']}), 45 | '\'"<>&', 46 | 'should encode a `subset`' 47 | ) 48 | 49 | assert.equal( 50 | stringifyEntities('a[b]c', {subset: ['[', ']']}), 51 | 'a[b]c', 52 | 'should encode special regex characters in `subset`' 53 | ) 54 | 55 | assert.equal( 56 | stringifyEntities('\'"<>&', {subset: ['&'], useNamedReferences: true}), 57 | '\'"<>&', 58 | 'should encode a `subset` w/ `useNamedReferences`' 59 | ) 60 | 61 | assert.equal( 62 | stringifyEntities('&such', {omitOptionalSemicolons: true}), 63 | '&such', 64 | 'should omit semicolons w/ `omitOptionalSemicolons`' 65 | ) 66 | 67 | assert.equal( 68 | stringifyEntities('&such', { 69 | useNamedReferences: true, 70 | omitOptionalSemicolons: true 71 | }), 72 | '&such', 73 | 'should omit semicolons w/ `omitOptionalSemicolons` and `useNamedReferences`' 74 | ) 75 | 76 | assert.equal( 77 | stringifyEntities('&bada55', {omitOptionalSemicolons: true}), 78 | '&bada55', 79 | 'should not omit semicolons when numeric and the next is hexadecimal' 80 | ) 81 | 82 | assert.equal( 83 | stringifyEntities('& such', { 84 | attribute: true, 85 | useNamedReferences: true, 86 | omitOptionalSemicolons: true 87 | }), 88 | '& such', 89 | 'should omit semicolons (named in attribute)' 90 | ) 91 | 92 | assert.equal( 93 | stringifyEntities('&such', { 94 | attribute: true, 95 | useNamedReferences: true, 96 | omitOptionalSemicolons: true 97 | }), 98 | '&such', 99 | 'should not omit semicolons when named in attribute and the next character is alphanumeric' 100 | ) 101 | 102 | assert.equal( 103 | stringifyEntities('&=such', { 104 | attribute: true, 105 | useNamedReferences: true, 106 | omitOptionalSemicolons: true 107 | }), 108 | '&=such', 109 | 'should not omit semicolons when named in attribute and the next character is `=`' 110 | ) 111 | 112 | assert.equal( 113 | stringifyEntities('¬it;', { 114 | useNamedReferences: true, 115 | omitOptionalSemicolons: true 116 | }), 117 | '¬it;', 118 | 'should not omit semicolons when conflicting' 119 | ) 120 | 121 | assert.equal( 122 | stringifyEntities('&', { 123 | useNamedReferences: true, 124 | omitOptionalSemicolons: true 125 | }), 126 | '&amp', 127 | 'should omit semicolons when named, not in an attribute, and the next character is alphanumeric' 128 | ) 129 | 130 | assert.equal( 131 | stringifyEntities('&=', { 132 | useNamedReferences: true, 133 | omitOptionalSemicolons: true 134 | }), 135 | '&=', 136 | 'should omit semicolons when named, not in an attribute, and the next character is `=`' 137 | ) 138 | 139 | assert.equal( 140 | stringifyEntities('foo\uD800bar'), 141 | 'foo�bar', 142 | 'should support a lone high surrogate (lowest)' 143 | ) 144 | 145 | assert.equal( 146 | stringifyEntities('foo\uDBFFbar'), 147 | 'foo�bar', 148 | 'should support a lone high surrogate (highest)' 149 | ) 150 | 151 | assert.equal( 152 | stringifyEntities('\uD800bar'), 153 | '�bar', 154 | 'should support a lone high surrogate at the start of a string (lowest)' 155 | ) 156 | 157 | assert.equal( 158 | stringifyEntities('\uDBFFbar'), 159 | '�bar', 160 | 'should support a lone high surrogate at the start of a string (highest)' 161 | ) 162 | 163 | assert.equal( 164 | stringifyEntities('foo\uD800'), 165 | 'foo�', 166 | 'should support a lone high surrogate at the end of a string (lowest)' 167 | ) 168 | 169 | assert.equal( 170 | stringifyEntities('foo\uDBFF'), 171 | 'foo�', 172 | 'should support a lone high surrogate at the end of a string (highest)' 173 | ) 174 | 175 | assert.equal( 176 | stringifyEntities('foo\uDC00bar'), 177 | 'foo�bar', 178 | 'should support a lone low surrogate (lowest)' 179 | ) 180 | 181 | assert.equal( 182 | stringifyEntities('foo\uDFFFbar'), 183 | 'foo�bar', 184 | 'should support a lone low surrogate (highest)' 185 | ) 186 | 187 | assert.equal( 188 | stringifyEntities('\uDC00bar'), 189 | '�bar', 190 | 'should support a lone low surrogate at the start of a string (lowest)' 191 | ) 192 | 193 | assert.equal( 194 | stringifyEntities('\uDFFFbar'), 195 | '�bar', 196 | 'should support a lone low surrogate at the start of a string (highest)' 197 | ) 198 | 199 | assert.equal( 200 | stringifyEntities('foo\uDC00'), 201 | 'foo�', 202 | 'should support a lone low surrogate at the end of a string (lowest)' 203 | ) 204 | 205 | assert.equal( 206 | stringifyEntities( 207 | '\0\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\u000B\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F\u007F\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\uFDD0\uFDD1\uFDD2\uFDD3\uFDD4\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE\uFDDF\uFDE0\uFDE1\uFDE2\uFDE3\uFDE4\uFDE5\uFDE6\uFDE7\uFDE8\uFDE9\uFDEA\uFDEB\uFDEC\uFDED\uFDEE\uFDEF\uFFFE\uFFFF\uD83F\uDFFE\uD83F\uDFFF\uD87F\uDFFE\uD87F\uDFFF\uD8BF\uDFFE\uD8BF\uDFFF\uD8FF\uDFFE\uD8FF\uDFFF\uD93F\uDFFE\uD93F\uDFFF\uD97F\uDFFE\uD97F\uDFFF\uD9BF\uDFFE\uD9BF\uDFFF\uD9FF\uDFFE\uD9FF\uDFFF\uDA3F\uDFFE\uDA3F\uDFFF\uDA7F\uDFFE\uDA7F\uDFFF\uDABF\uDFFE\uDABF\uDFFF\uDAFF\uDFFE\uDAFF\uDFFF\uDB3F\uDFFE\uDB3F\uDFFF\uDB7F\uDFFE\uDB7F\uDFFF\uDBBF\uDFFE\uDBBF\uDFFF\uDBFF\uDFFE\uDBFF\uDFFF' 208 | ), 209 | '\0 \u0080\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008E\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009E\u009F﷐﷑﷒﷓﷔﷕﷖﷗﷘﷙﷚﷛﷜﷝﷞﷟﷠﷡﷢﷣﷤﷥﷦﷧﷨﷩﷪﷫﷬﷭﷮﷯￾￿🿾🿿𯿾𯿿𿿾𿿿񏿾񏿿񟿾񟿿񯿾񯿿񿿾񿿿򏿾򏿿򟿾򟿿򯿾򯿿򿿾򿿿󏿾󏿿󟿾󟿿󯿾󯿿󿿾󿿿􏿾􏿿', 210 | 'should encodes disallowed code points in input, except those whose character references would refer to another code point' 211 | ) 212 | 213 | assert.equal( 214 | stringifyEntities('\0\u0089'), 215 | '\0\u0089', 216 | 'should not encode invalid code points whose character references would refer to another code point' 217 | ) 218 | }) 219 | 220 | test('stringifyEntitiesLight(value[, options])', function () { 221 | assert.equal( 222 | stringifyEntitiesLight('foo\u00A9bar\uD834\uDF06baz\u2603qux'), 223 | 'foo©bar𝌆baz☃qux', 224 | 'should encode in light mode' 225 | ) 226 | 227 | assert.equal( 228 | stringifyEntitiesLight('\'"<>&', {subset: ['&']}), 229 | '\'"<>&', 230 | 'should support a `subset`' 231 | ) 232 | }) 233 | --------------------------------------------------------------------------------