├── .npmrc
├── funding.yml
├── .prettierignore
├── .gitignore
├── index.js
├── .editorconfig
├── lib
    ├── util
    │   ├── format-basic.js
    │   ├── to-decimal.js
    │   ├── to-hexadecimal.js
    │   ├── to-named.js
    │   └── format-smart.js
    ├── constant
    │   └── dangerous.js
    ├── index.js
    └── core.js
├── tsconfig.json
├── .github
    └── workflows
    │   └── main.yml
├── license
├── package.json
├── readme.md
└── test.js


/.npmrc:
--------------------------------------------------------------------------------
1 | package-lock=false
2 | 


--------------------------------------------------------------------------------
/funding.yml:
--------------------------------------------------------------------------------
1 | github: wooorm
2 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | coverage/
2 | *.md
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | coverage/
2 | node_modules/
3 | *.d.ts
4 | *.log
5 | .DS_Store
6 | yarn.lock
7 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | /**
2 |  * @typedef {import('./lib/index.js').LightOptions} LightOptions
3 |  * @typedef {import('./lib/index.js').Options} Options
4 |  */
5 | 
6 | export * from './lib/index.js'
7 | 


--------------------------------------------------------------------------------
/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | indent_style = space
 5 | indent_size = 2
 6 | end_of_line = lf
 7 | charset = utf-8
 8 | trim_trailing_whitespace = true
 9 | insert_final_newline = true
10 | 


--------------------------------------------------------------------------------
/lib/util/format-basic.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * The smallest way to encode a character.
 3 |  *
 4 |  * @param {number} code
 5 |  * @returns {string}
 6 |  */
 7 | export function formatBasic(code) {
 8 |   return '&#x' + code.toString(16).toUpperCase() + ';'
 9 | }
10 | 


--------------------------------------------------------------------------------
/lib/constant/dangerous.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * List of legacy (that don’t need a trailing `;`) named references which could,
 3 |  * depending on what follows them, turn into a different meaning
 4 |  *
 5 |  * @type {Array<string>}
 6 |  */
 7 | export const dangerous = [
 8 |   'cent',
 9 |   'copy',
10 |   'divide',
11 |   'gt',
12 |   'lt',
13 |   'not',
14 |   'para',
15 |   'times'
16 | ]
17 | 


--------------------------------------------------------------------------------
/lib/util/to-decimal.js:
--------------------------------------------------------------------------------
 1 | const decimalRegex = /\d/
 2 | 
 3 | /**
 4 |  * Configurable ways to encode characters as decimal references.
 5 |  *
 6 |  * @param {number} code
 7 |  * @param {number} next
 8 |  * @param {boolean|undefined} omit
 9 |  * @returns {string}
10 |  */
11 | export function toDecimal(code, next, omit) {
12 |   const value = '&#' + String(code)
13 |   return omit && next && !decimalRegex.test(String.fromCharCode(next))
14 |     ? value
15 |     : value + ';'
16 | }
17 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "include": ["**/**.js"],
 3 |   "exclude": ["coverage", "node_modules"],
 4 |   "compilerOptions": {
 5 |     "checkJs": true,
 6 |     "declaration": true,
 7 |     "emitDeclarationOnly": true,
 8 |     "exactOptionalPropertyTypes": true,
 9 |     "forceConsistentCasingInFileNames": true,
10 |     "lib": ["es2020"],
11 |     "module": "node16",
12 |     "newLine": "lf",
13 |     "skipLibCheck": true,
14 |     "strict": true,
15 |     "target": "es2020"
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/lib/util/to-hexadecimal.js:
--------------------------------------------------------------------------------
 1 | const hexadecimalRegex = /[\dA-Fa-f]/
 2 | 
 3 | /**
 4 |  * Configurable ways to encode characters as hexadecimal references.
 5 |  *
 6 |  * @param {number} code
 7 |  * @param {number} next
 8 |  * @param {boolean|undefined} omit
 9 |  * @returns {string}
10 |  */
11 | export function toHexadecimal(code, next, omit) {
12 |   const value = '&#x' + code.toString(16).toUpperCase()
13 |   return omit && next && !hexadecimalRegex.test(String.fromCharCode(next))
14 |     ? value
15 |     : value + ';'
16 | }
17 | 


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: main
 2 | on:
 3 |   - pull_request
 4 |   - push
 5 | jobs:
 6 |   main:
 7 |     name: ${{matrix.node}}
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@v3
11 |       - uses: actions/setup-node@v3
12 |         with:
13 |           node-version: ${{matrix.node}}
14 |       - run: npm install
15 |       - run: npm test
16 |       - uses: codecov/codecov-action@v3
17 |     strategy:
18 |       matrix:
19 |         node:
20 |           - lts/hydrogen
21 |           - node
22 | 


--------------------------------------------------------------------------------
/lib/index.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @typedef {import('./core.js').CoreOptions & import('./util/format-smart.js').FormatSmartOptions} Options
 3 |  * @typedef {import('./core.js').CoreOptions} LightOptions
 4 |  */
 5 | 
 6 | import {core} from './core.js'
 7 | import {formatSmart} from './util/format-smart.js'
 8 | import {formatBasic} from './util/format-basic.js'
 9 | 
10 | /**
11 |  * Encode special characters in `value`.
12 |  *
13 |  * @param {string} value
14 |  *   Value to encode.
15 |  * @param {Options} [options]
16 |  *   Configuration.
17 |  * @returns {string}
18 |  *   Encoded value.
19 |  */
20 | export function stringifyEntities(value, options) {
21 |   return core(value, Object.assign({format: formatSmart}, options))
22 | }
23 | 
24 | /**
25 |  * Encode special characters in `value` as hexadecimals.
26 |  *
27 |  * @param {string} value
28 |  *   Value to encode.
29 |  * @param {LightOptions} [options]
30 |  *   Configuration.
31 |  * @returns {string}
32 |  *   Encoded value.
33 |  */
34 | export function stringifyEntitiesLight(value, options) {
35 |   return core(value, Object.assign({format: formatBasic}, options))
36 | }
37 | 


--------------------------------------------------------------------------------
/license:
--------------------------------------------------------------------------------
 1 | (The MIT License)
 2 | 
 3 | Copyright (c) 2015 Titus Wormer <mailto:tituswormer@gmail.com>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | 'Software'), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/lib/util/to-named.js:
--------------------------------------------------------------------------------
 1 | import {characterEntitiesLegacy} from 'character-entities-legacy'
 2 | import {characterEntitiesHtml4} from 'character-entities-html4'
 3 | import {dangerous} from '../constant/dangerous.js'
 4 | 
 5 | const own = {}.hasOwnProperty
 6 | 
 7 | /**
 8 |  * `characterEntitiesHtml4` but inverted.
 9 |  *
10 |  * @type {Record<string, string>}
11 |  */
12 | const characters = {}
13 | 
14 | /** @type {string} */
15 | let key
16 | 
17 | for (key in characterEntitiesHtml4) {
18 |   if (own.call(characterEntitiesHtml4, key)) {
19 |     characters[characterEntitiesHtml4[key]] = key
20 |   }
21 | }
22 | 
23 | const notAlphanumericRegex = /[^\dA-Za-z]/
24 | 
25 | /**
26 |  * Configurable ways to encode characters as named references.
27 |  *
28 |  * @param {number} code
29 |  * @param {number} next
30 |  * @param {boolean|undefined} omit
31 |  * @param {boolean|undefined} attribute
32 |  * @returns {string}
33 |  */
34 | export function toNamed(code, next, omit, attribute) {
35 |   const character = String.fromCharCode(code)
36 | 
37 |   if (own.call(characters, character)) {
38 |     const name = characters[character]
39 |     const value = '&' + name
40 | 
41 |     if (
42 |       omit &&
43 |       characterEntitiesLegacy.includes(name) &&
44 |       !dangerous.includes(name) &&
45 |       (!attribute ||
46 |         (next &&
47 |           next !== 61 /* `=` */ &&
48 |           notAlphanumericRegex.test(String.fromCharCode(next))))
49 |     ) {
50 |       return value
51 |     }
52 | 
53 |     return value + ';'
54 |   }
55 | 
56 |   return ''
57 | }
58 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "stringify-entities",
 3 |   "version": "4.0.4",
 4 |   "description": "Serialize (encode) HTML character references",
 5 |   "license": "MIT",
 6 |   "keywords": [
 7 |     "stringify",
 8 |     "encode",
 9 |     "escape",
10 |     "html",
11 |     "character",
12 |     "reference",
13 |     "entity",
14 |     "entities"
15 |   ],
16 |   "repository": "wooorm/stringify-entities",
17 |   "bugs": "https://github.com/wooorm/stringify-entities/issues",
18 |   "funding": {
19 |     "type": "github",
20 |     "url": "https://github.com/sponsors/wooorm"
21 |   },
22 |   "author": "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)",
23 |   "contributors": [
24 |     "Titus Wormer <tituswormer@gmail.com> (https://wooorm.com)"
25 |   ],
26 |   "sideEffects": false,
27 |   "type": "module",
28 |   "main": "index.js",
29 |   "types": "index.d.ts",
30 |   "files": [
31 |     "lib/",
32 |     "index.d.ts",
33 |     "index.js"
34 |   ],
35 |   "dependencies": {
36 |     "character-entities-html4": "^2.0.0",
37 |     "character-entities-legacy": "^3.0.0"
38 |   },
39 |   "devDependencies": {
40 |     "@types/node": "^20.0.0",
41 |     "c8": "^9.0.0",
42 |     "character-entities": "^2.0.0",
43 |     "prettier": "^3.0.0",
44 |     "remark-cli": "^11.0.0",
45 |     "remark-preset-wooorm": "^9.0.0",
46 |     "type-coverage": "^2.0.0",
47 |     "typescript": "^5.0.0",
48 |     "xo": "^0.58.0"
49 |   },
50 |   "scripts": {
51 |     "prepack": "npm run build && npm run format",
52 |     "generate": "node --conditions development build.js",
53 |     "build": "tsc --build --clean && tsc --build && type-coverage",
54 |     "format": "remark . -qfo && prettier . -w --log-level warn && xo --fix",
55 |     "test-api": "node --conditions development test.js",
56 |     "test-coverage": "c8 --check-coverage --100 --reporter lcov npm run test-api",
57 |     "test": "npm run generate && npm run build && npm run format && npm run test-coverage"
58 |   },
59 |   "prettier": {
60 |     "tabWidth": 2,
61 |     "useTabs": false,
62 |     "singleQuote": true,
63 |     "bracketSpacing": false,
64 |     "semi": false,
65 |     "trailingComma": "none"
66 |   },
67 |   "xo": {
68 |     "prettier": true,
69 |     "rules": {
70 |       "unicorn/prefer-code-point": "off",
71 |       "unicorn/prefer-string-replace-all": "off",
72 |       "unicorn/numeric-separators-style": "off"
73 |     }
74 |   },
75 |   "remarkConfig": {
76 |     "plugins": [
77 |       "preset-wooorm"
78 |     ]
79 |   },
80 |   "typeCoverage": {
81 |     "atLeast": 100,
82 |     "detail": true,
83 |     "strict": true,
84 |     "ignoreCatch": true
85 |   }
86 | }
87 | 


--------------------------------------------------------------------------------
/lib/util/format-smart.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * @typedef FormatSmartOptions
 3 |  * @property {boolean} [useNamedReferences=false]
 4 |  *   Prefer named character references (`&amp;`) where possible.
 5 |  * @property {boolean} [useShortestReferences=false]
 6 |  *   Prefer the shortest possible reference, if that results in less bytes.
 7 |  *   **Note**: `useNamedReferences` can be omitted when using `useShortestReferences`.
 8 |  * @property {boolean} [omitOptionalSemicolons=false]
 9 |  *   Whether to omit semicolons when possible.
10 |  *   **Note**: This creates what HTML calls “parse errors” but is otherwise still valid HTML — don’t use this except when building a minifier.
11 |  *   Omitting semicolons is possible for certain named and numeric references in some cases.
12 |  * @property {boolean} [attribute=false]
13 |  *   Create character references which don’t fail in attributes.
14 |  *   **Note**: `attribute` only applies when operating dangerously with
15 |  *   `omitOptionalSemicolons: true`.
16 |  */
17 | 
18 | import {toHexadecimal} from './to-hexadecimal.js'
19 | import {toDecimal} from './to-decimal.js'
20 | import {toNamed} from './to-named.js'
21 | 
22 | /**
23 |  * Configurable ways to encode a character yielding pretty or small results.
24 |  *
25 |  * @param {number} code
26 |  * @param {number} next
27 |  * @param {FormatSmartOptions} options
28 |  * @returns {string}
29 |  */
30 | export function formatSmart(code, next, options) {
31 |   let numeric = toHexadecimal(code, next, options.omitOptionalSemicolons)
32 |   /** @type {string|undefined} */
33 |   let named
34 | 
35 |   if (options.useNamedReferences || options.useShortestReferences) {
36 |     named = toNamed(
37 |       code,
38 |       next,
39 |       options.omitOptionalSemicolons,
40 |       options.attribute
41 |     )
42 |   }
43 | 
44 |   // Use the shortest numeric reference when requested.
45 |   // A simple algorithm would use decimal for all code points under 100, as
46 |   // those are shorter than hexadecimal:
47 |   //
48 |   // * `&#99;` vs `&#x63;` (decimal shorter)
49 |   // * `&#100;` vs `&#x64;` (equal)
50 |   //
51 |   // However, because we take `next` into consideration when `omit` is used,
52 |   // And it would be possible that decimals are shorter on bigger values as
53 |   // well if `next` is hexadecimal but not decimal, we instead compare both.
54 |   if (
55 |     (options.useShortestReferences || !named) &&
56 |     options.useShortestReferences
57 |   ) {
58 |     const decimal = toDecimal(code, next, options.omitOptionalSemicolons)
59 | 
60 |     if (decimal.length < numeric.length) {
61 |       numeric = decimal
62 |     }
63 |   }
64 | 
65 |   return named &&
66 |     (!options.useShortestReferences || named.length < numeric.length)
67 |     ? named
68 |     : numeric
69 | }
70 | 


--------------------------------------------------------------------------------
/lib/core.js:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * @typedef CoreOptions
  3 |  * @property {ReadonlyArray<string>} [subset=[]]
  4 |  *   Whether to only escape the given subset of characters.
  5 |  * @property {boolean} [escapeOnly=false]
  6 |  *   Whether to only escape possibly dangerous characters.
  7 |  *   Those characters are `"`, `&`, `'`, `<`, `>`, and `` ` ``.
  8 |  *
  9 |  * @typedef FormatOptions
 10 |  * @property {(code: number, next: number, options: CoreWithFormatOptions) => string} format
 11 |  *   Format strategy.
 12 |  *
 13 |  * @typedef {CoreOptions & FormatOptions & import('./util/format-smart.js').FormatSmartOptions} CoreWithFormatOptions
 14 |  */
 15 | 
 16 | const defaultSubsetRegex = /["&'<>`]/g
 17 | const surrogatePairsRegex = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g
 18 | const controlCharactersRegex =
 19 |   // eslint-disable-next-line no-control-regex, unicorn/no-hex-escape
 20 |   /[\x01-\t\v\f\x0E-\x1F\x7F\x81\x8D\x8F\x90\x9D\xA0-\uFFFF]/g
 21 | const regexEscapeRegex = /[|\\{}()[\]^$+*?.]/g
 22 | 
 23 | /** @type {WeakMap<ReadonlyArray<string>, RegExp>} */
 24 | const subsetToRegexCache = new WeakMap()
 25 | 
 26 | /**
 27 |  * Encode certain characters in `value`.
 28 |  *
 29 |  * @param {string} value
 30 |  * @param {CoreWithFormatOptions} options
 31 |  * @returns {string}
 32 |  */
 33 | export function core(value, options) {
 34 |   value = value.replace(
 35 |     options.subset
 36 |       ? charactersToExpressionCached(options.subset)
 37 |       : defaultSubsetRegex,
 38 |     basic
 39 |   )
 40 | 
 41 |   if (options.subset || options.escapeOnly) {
 42 |     return value
 43 |   }
 44 | 
 45 |   return (
 46 |     value
 47 |       // Surrogate pairs.
 48 |       .replace(surrogatePairsRegex, surrogate)
 49 |       // BMP control characters (C0 except for LF, CR, SP; DEL; and some more
 50 |       // non-ASCII ones).
 51 |       .replace(controlCharactersRegex, basic)
 52 |   )
 53 | 
 54 |   /**
 55 |    * @param {string} pair
 56 |    * @param {number} index
 57 |    * @param {string} all
 58 |    */
 59 |   function surrogate(pair, index, all) {
 60 |     return options.format(
 61 |       (pair.charCodeAt(0) - 0xd800) * 0x400 +
 62 |         pair.charCodeAt(1) -
 63 |         0xdc00 +
 64 |         0x10000,
 65 |       all.charCodeAt(index + 2),
 66 |       options
 67 |     )
 68 |   }
 69 | 
 70 |   /**
 71 |    * @param {string} character
 72 |    * @param {number} index
 73 |    * @param {string} all
 74 |    */
 75 |   function basic(character, index, all) {
 76 |     return options.format(
 77 |       character.charCodeAt(0),
 78 |       all.charCodeAt(index + 1),
 79 |       options
 80 |     )
 81 |   }
 82 | }
 83 | 
 84 | /**
 85 |  * A wrapper function that caches the result of `charactersToExpression` with a WeakMap.
 86 |  * This can improve performance when tooling calls `charactersToExpression` repeatedly
 87 |  * with the same subset.
 88 |  *
 89 |  * @param {ReadonlyArray<string>} subset
 90 |  * @returns {RegExp}
 91 |  */
 92 | function charactersToExpressionCached(subset) {
 93 |   let cached = subsetToRegexCache.get(subset)
 94 | 
 95 |   if (!cached) {
 96 |     cached = charactersToExpression(subset)
 97 |     subsetToRegexCache.set(subset, cached)
 98 |   }
 99 | 
100 |   return cached
101 | }
102 | 
103 | /**
104 |  * @param {ReadonlyArray<string>} subset
105 |  * @returns {RegExp}
106 |  */
107 | function charactersToExpression(subset) {
108 |   /** @type {Array<string>} */
109 |   const groups = []
110 |   let index = -1
111 | 
112 |   while (++index < subset.length) {
113 |     groups.push(subset[index].replace(regexEscapeRegex, '\\$&'))
114 |   }
115 | 
116 |   return new RegExp('(?:' + groups.join('|') + ')', 'g')
117 | }
118 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # stringify-entities
  2 | 
  3 | [![Build Status][build-badge]][build]
  4 | [![Coverage Status][coverage-badge]][coverage]
  5 | [![Downloads][downloads-badge]][downloads]
  6 | [![Size][size-badge]][size]
  7 | 
  8 | Serialize (encode) HTML character references.
  9 | 
 10 | ## Contents
 11 | 
 12 | *   [What is this?](#what-is-this)
 13 | *   [When should I use this?](#when-should-i-use-this)
 14 | *   [Install](#install)
 15 | *   [Use](#use)
 16 | *   [API](#api)
 17 |     *   [`stringifyEntities(value[, options])`](#stringifyentitiesvalue-options)
 18 | *   [Algorithm](#algorithm)
 19 | *   [Types](#types)
 20 | *   [Compatibility](#compatibility)
 21 | *   [Security](#security)
 22 | *   [Related](#related)
 23 | *   [Contribute](#contribute)
 24 | *   [License](#license)
 25 | 
 26 | ## What is this?
 27 | 
 28 | This is a small and powerful encoder of HTML character references (often called
 29 | entities).
 30 | This one has either all the options you need for a minifier/formatter, or a
 31 | tiny size when using `stringifyEntitiesLight`.
 32 | 
 33 | ## When should I use this?
 34 | 
 35 | You can use this for spec-compliant encoding of character references.
 36 | It’s small and fast enough to do that well.
 37 | You can also use this when making an HTML formatter or minifier, because there
 38 | are different ways to produce pretty or tiny output.
 39 | This package is reliable: ``'`'`` characters are encoded to ensure no scripts
 40 | run in Internet Explorer 6 to 8.
 41 | Additionally, only named references recognized by HTML 4 are encoded, meaning
 42 | the infamous `&apos;` (which people think is a [virus][]) won’t show up.
 43 | 
 44 | ## Install
 45 | 
 46 | This package is [ESM only][esm].
 47 | In Node.js (version 14.14+, 16.0+), install with [npm][]:
 48 | 
 49 | ```sh
 50 | npm install stringify-entities
 51 | ```
 52 | 
 53 | In Deno with [`esm.sh`][esmsh]:
 54 | 
 55 | ```js
 56 | import {stringifyEntities} from 'https://esm.sh/stringify-entities@4'
 57 | ```
 58 | 
 59 | In browsers with [`esm.sh`][esmsh]:
 60 | 
 61 | ```html
 62 | <script type="module">
 63 |   import {stringifyEntities} from 'https://esm.sh/stringify-entities@4?bundle'
 64 | </script>
 65 | ```
 66 | 
 67 | ## Use
 68 | 
 69 | ```js
 70 | import {stringifyEntities} from 'stringify-entities'
 71 | 
 72 | stringifyEntities('alpha © bravo ≠ charlie 𝌆 delta')
 73 | // => 'alpha &#xA9; bravo &#x2260; charlie &#x1D306; delta'
 74 | 
 75 | stringifyEntities('alpha © bravo ≠ charlie 𝌆 delta', {useNamedReferences: true})
 76 | // => 'alpha &copy; bravo &ne; charlie &#x1D306; delta'
 77 | ```
 78 | 
 79 | ## API
 80 | 
 81 | This package exports the identifiers `stringifyEntities` and
 82 | `stringifyEntitiesLight`.
 83 | There is no default export.
 84 | 
 85 | ### `stringifyEntities(value[, options])`
 86 | 
 87 | Encode special characters in `value`.
 88 | 
 89 | ##### Core options
 90 | 
 91 | ###### `options.escapeOnly`
 92 | 
 93 | Whether to only escape possibly dangerous characters (`boolean`, default:
 94 | `false`).
 95 | Those characters are `"`, `&`, `'`, `<`, `>`, and `` ` ``.
 96 | 
 97 | ###### `options.subset`
 98 | 
 99 | Whether to only escape the given subset of characters (`Array<string>`).
100 | Note that only BMP characters are supported here (so no emoji).
101 | 
102 | ##### Formatting options
103 | 
104 | If you do not care about the following options, use `stringifyEntitiesLight`,
105 | which always outputs hexadecimal character references.
106 | 
107 | ###### `options.useNamedReferences`
108 | 
109 | Prefer named character references (`&amp;`) where possible (`boolean?`, default:
110 | `false`).
111 | 
112 | ###### `options.useShortestReferences`
113 | 
114 | Prefer the shortest possible reference, if that results in less bytes
115 | (`boolean?`, default: `false`).
116 | 
117 | > ⚠️ **Note**: `useNamedReferences` can be omitted when using
118 | > `useShortestReferences`.
119 | 
120 | ###### `options.omitOptionalSemicolons`
121 | 
122 | Whether to omit semicolons when possible (`boolean?`, default: `false`).
123 | 
124 | > ⚠️ **Note**: This creates what HTML calls “parse errors” but is otherwise
125 | > still valid HTML — don’t use this except when building a minifier.
126 | > Omitting semicolons is possible for certain named and numeric references in
127 | > some cases.
128 | 
129 | ###### `options.attribute`
130 | 
131 | Create character references which don’t fail in attributes (`boolean?`, default:
132 | `false`).
133 | 
134 | > ⚠️ **Note**: `attribute` only applies when operating dangerously with
135 | > `omitOptionalSemicolons: true`.
136 | 
137 | #### Returns
138 | 
139 | Encoded value (`string`).
140 | 
141 | ## Algorithm
142 | 
143 | By default, all dangerous, non-ASCII, and non-printable ASCII characters are
144 | encoded.
145 | A [subset][] of characters can be given to encode just those characters.
146 | Alternatively, pass [`escapeOnly`][escapeonly] to escape just the dangerous
147 | characters (`"`, `'`, `<`, `>`, `&`, `` ` ``).
148 | By default, hexadecimal character references are used.
149 | Pass [`useNamedReferences`][named] to use named character references when
150 | possible, or [`useShortestReferences`][short] to use whichever is shortest:
151 | decimal, hexadecimal, or named.
152 | There is also a `stringifyEntitiesLight` export, which works just like
153 | `stringifyEntities` but without the formatting options: it’s much smaller but
154 | always outputs hexadecimal character references.
155 | 
156 | ## Types
157 | 
158 | This package is fully typed with [TypeScript][].
159 | It exports the additional types `Options` and `LightOptions` types.
160 | 
161 | ## Compatibility
162 | 
163 | This package is at least compatible with all maintained versions of Node.js.
164 | As of now, that is Node.js 14.14+ and 16.0+.
165 | It also works in Deno and modern browsers.
166 | 
167 | ## Security
168 | 
169 | This package is safe.
170 | 
171 | ## Related
172 | 
173 | *   [`parse-entities`](https://github.com/wooorm/parse-entities)
174 |     — parse (decode) HTML character references
175 | *   [`wooorm/character-entities`](https://github.com/wooorm/character-entities)
176 |     — info on character references
177 | *   [`wooorm/character-entities-html4`](https://github.com/wooorm/character-entities-html4)
178 |     — info on HTML 4 character references
179 | *   [`wooorm/character-entities-legacy`](https://github.com/wooorm/character-entities-legacy)
180 |     — info on legacy character references
181 | *   [`wooorm/character-reference-invalid`](https://github.com/wooorm/character-reference-invalid)
182 |     — info on invalid numeric character references
183 | 
184 | ## Contribute
185 | 
186 | Yes please!
187 | See [How to Contribute to Open Source][contribute].
188 | 
189 | ## License
190 | 
191 | [MIT][license] © [Titus Wormer][author]
192 | 
193 | <!-- Definitions -->
194 | 
195 | [build-badge]: https://github.com/wooorm/stringify-entities/workflows/main/badge.svg
196 | 
197 | [build]: https://github.com/wooorm/stringify-entities/actions
198 | 
199 | [coverage-badge]: https://img.shields.io/codecov/c/github/wooorm/stringify-entities.svg
200 | 
201 | [coverage]: https://codecov.io/github/wooorm/stringify-entities
202 | 
203 | [downloads-badge]: https://img.shields.io/npm/dm/stringify-entities.svg
204 | 
205 | [downloads]: https://www.npmjs.com/package/stringify-entities
206 | 
207 | [size-badge]: https://img.shields.io/bundlephobia/minzip/stringify-entities.svg
208 | 
209 | [size]: https://bundlephobia.com/result?p=stringify-entities
210 | 
211 | [npm]: https://docs.npmjs.com/cli/install
212 | 
213 | [esmsh]: https://esm.sh
214 | 
215 | [license]: license
216 | 
217 | [author]: https://wooorm.com
218 | 
219 | [esm]: https://gist.github.com/sindresorhus/a39789f98801d908bbc7ff3ecc99d99c
220 | 
221 | [typescript]: https://www.typescriptlang.org
222 | 
223 | [contribute]: https://opensource.guide/how-to-contribute/
224 | 
225 | [virus]: https://www.telegraph.co.uk/technology/advice/10516839/Why-do-some-apostrophes-get-replaced-with-andapos.html
226 | 
227 | [subset]: #optionssubset
228 | 
229 | [escapeonly]: #optionsescapeonly
230 | 
231 | [named]: #optionsusenamedreferences
232 | 
233 | [short]: #optionsuseshortestreferences
234 | 


--------------------------------------------------------------------------------
/test.js:
--------------------------------------------------------------------------------
  1 | import assert from 'node:assert/strict'
  2 | import test from 'node:test'
  3 | import {stringifyEntities, stringifyEntitiesLight} from './index.js'
  4 | 
  5 | test('stringifyEntities(value[, options])', function () {
  6 |   assert.equal(
  7 |     stringifyEntities('foo\u00A9bar\uD834\uDF06baz\u2603qux'),
  8 |     'foo&#xA9;bar&#x1D306;baz&#x2603;qux',
  9 |     'should encode non-ASCII characters by default'
 10 |   )
 11 | 
 12 |   assert.equal(
 13 |     stringifyEntities('foo\u00A9bar\uD834\uDF06baz\u2603qux', {
 14 |       useNamedReferences: true
 15 |     }),
 16 |     'foo&copy;bar&#x1D306;baz&#x2603;qux',
 17 |     'should encode w/ named character references if possible w/ `useNamedReferences`'
 18 |   )
 19 | 
 20 |   assert.equal(
 21 |     stringifyEntities('alpha © bravo ≠ charlie 𝌆 delta " echo', {
 22 |       useShortestReferences: true
 23 |     }),
 24 |     'alpha &#xA9; bravo &ne; charlie &#x1D306; delta &#34; echo',
 25 |     'should encode w/ shortest character references if `useShortestReferences`'
 26 |   )
 27 | 
 28 |   assert.equal(
 29 |     stringifyEntities('" "0 "a "z µ µ0 µa µz', {
 30 |       useShortestReferences: true,
 31 |       omitOptionalSemicolons: true
 32 |     }),
 33 |     '&#34 &#34;0 &#34a &#34z &#xB5 &#xB5;0 &#181a &#xB5z',
 34 |     'should encode w/ shortest numeric reference based on `next` w/ `omitOptionalSemicolons`'
 35 |   )
 36 | 
 37 |   assert.equal(
 38 |     stringifyEntities('\'"<>&'),
 39 |     '&#x27;&#x22;&#x3C;&#x3E;&#x26;',
 40 |     'should encode dangerous characters as hexadecimal character references by default'
 41 |   )
 42 | 
 43 |   assert.equal(
 44 |     stringifyEntities('\'"<>&', {subset: ['&']}),
 45 |     '\'"<>&#x26;',
 46 |     'should encode a `subset`'
 47 |   )
 48 | 
 49 |   assert.equal(
 50 |     stringifyEntities('a[b]c', {subset: ['[', ']']}),
 51 |     'a&#x5B;b&#x5D;c',
 52 |     'should encode special regex characters in `subset`'
 53 |   )
 54 | 
 55 |   assert.equal(
 56 |     stringifyEntities('\'"<>&', {subset: ['&'], useNamedReferences: true}),
 57 |     '\'"<>&amp;',
 58 |     'should encode a `subset` w/ `useNamedReferences`'
 59 |   )
 60 | 
 61 |   assert.equal(
 62 |     stringifyEntities('&such', {omitOptionalSemicolons: true}),
 63 |     '&#x26such',
 64 |     'should omit semicolons w/ `omitOptionalSemicolons`'
 65 |   )
 66 | 
 67 |   assert.equal(
 68 |     stringifyEntities('&such', {
 69 |       useNamedReferences: true,
 70 |       omitOptionalSemicolons: true
 71 |     }),
 72 |     '&ampsuch',
 73 |     'should omit semicolons w/ `omitOptionalSemicolons` and `useNamedReferences`'
 74 |   )
 75 | 
 76 |   assert.equal(
 77 |     stringifyEntities('&bada55', {omitOptionalSemicolons: true}),
 78 |     '&#x26;bada55',
 79 |     'should not omit semicolons when numeric and the next is hexadecimal'
 80 |   )
 81 | 
 82 |   assert.equal(
 83 |     stringifyEntities('& such', {
 84 |       attribute: true,
 85 |       useNamedReferences: true,
 86 |       omitOptionalSemicolons: true
 87 |     }),
 88 |     '&amp such',
 89 |     'should omit semicolons (named in attribute)'
 90 |   )
 91 | 
 92 |   assert.equal(
 93 |     stringifyEntities('&such', {
 94 |       attribute: true,
 95 |       useNamedReferences: true,
 96 |       omitOptionalSemicolons: true
 97 |     }),
 98 |     '&amp;such',
 99 |     'should not omit semicolons when named in attribute and the next character is alphanumeric'
100 |   )
101 | 
102 |   assert.equal(
103 |     stringifyEntities('&=such', {
104 |       attribute: true,
105 |       useNamedReferences: true,
106 |       omitOptionalSemicolons: true
107 |     }),
108 |     '&amp;=such',
109 |     'should not omit semicolons when named in attribute and the next character is `=`'
110 |   )
111 | 
112 |   assert.equal(
113 |     stringifyEntities('¬it;', {
114 |       useNamedReferences: true,
115 |       omitOptionalSemicolons: true
116 |     }),
117 |     '&not;it;',
118 |     'should not omit semicolons when conflicting'
119 |   )
120 | 
121 |   assert.equal(
122 |     stringifyEntities('&amp', {
123 |       useNamedReferences: true,
124 |       omitOptionalSemicolons: true
125 |     }),
126 |     '&ampamp',
127 |     'should omit semicolons when named, not in an attribute, and the next character is alphanumeric'
128 |   )
129 | 
130 |   assert.equal(
131 |     stringifyEntities('&=', {
132 |       useNamedReferences: true,
133 |       omitOptionalSemicolons: true
134 |     }),
135 |     '&amp=',
136 |     'should omit semicolons when named, not in an attribute, and the next character is `=`'
137 |   )
138 | 
139 |   assert.equal(
140 |     stringifyEntities('foo\uD800bar'),
141 |     'foo&#xD800;bar',
142 |     'should support a lone high surrogate (lowest)'
143 |   )
144 | 
145 |   assert.equal(
146 |     stringifyEntities('foo\uDBFFbar'),
147 |     'foo&#xDBFF;bar',
148 |     'should support a lone high surrogate (highest)'
149 |   )
150 | 
151 |   assert.equal(
152 |     stringifyEntities('\uD800bar'),
153 |     '&#xD800;bar',
154 |     'should support a lone high surrogate at the start of a string (lowest)'
155 |   )
156 | 
157 |   assert.equal(
158 |     stringifyEntities('\uDBFFbar'),
159 |     '&#xDBFF;bar',
160 |     'should support a lone high surrogate at the start of a string (highest)'
161 |   )
162 | 
163 |   assert.equal(
164 |     stringifyEntities('foo\uD800'),
165 |     'foo&#xD800;',
166 |     'should support a lone high surrogate at the end of a string (lowest)'
167 |   )
168 | 
169 |   assert.equal(
170 |     stringifyEntities('foo\uDBFF'),
171 |     'foo&#xDBFF;',
172 |     'should support a lone high surrogate at the end of a string (highest)'
173 |   )
174 | 
175 |   assert.equal(
176 |     stringifyEntities('foo\uDC00bar'),
177 |     'foo&#xDC00;bar',
178 |     'should support a lone low surrogate (lowest)'
179 |   )
180 | 
181 |   assert.equal(
182 |     stringifyEntities('foo\uDFFFbar'),
183 |     'foo&#xDFFF;bar',
184 |     'should support a lone low surrogate (highest)'
185 |   )
186 | 
187 |   assert.equal(
188 |     stringifyEntities('\uDC00bar'),
189 |     '&#xDC00;bar',
190 |     'should support a lone low surrogate at the start of a string (lowest)'
191 |   )
192 | 
193 |   assert.equal(
194 |     stringifyEntities('\uDFFFbar'),
195 |     '&#xDFFF;bar',
196 |     'should support a lone low surrogate at the start of a string (highest)'
197 |   )
198 | 
199 |   assert.equal(
200 |     stringifyEntities('foo\uDC00'),
201 |     'foo&#xDC00;',
202 |     'should support a lone low surrogate at the end of a string (lowest)'
203 |   )
204 | 
205 |   assert.equal(
206 |     stringifyEntities(
207 |       '\0\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\u000B\u000E\u000F\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001A\u001B\u001C\u001D\u001E\u001F\u007F\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\uFDD0\uFDD1\uFDD2\uFDD3\uFDD4\uFDD5\uFDD6\uFDD7\uFDD8\uFDD9\uFDDA\uFDDB\uFDDC\uFDDD\uFDDE\uFDDF\uFDE0\uFDE1\uFDE2\uFDE3\uFDE4\uFDE5\uFDE6\uFDE7\uFDE8\uFDE9\uFDEA\uFDEB\uFDEC\uFDED\uFDEE\uFDEF\uFFFE\uFFFF\uD83F\uDFFE\uD83F\uDFFF\uD87F\uDFFE\uD87F\uDFFF\uD8BF\uDFFE\uD8BF\uDFFF\uD8FF\uDFFE\uD8FF\uDFFF\uD93F\uDFFE\uD93F\uDFFF\uD97F\uDFFE\uD97F\uDFFF\uD9BF\uDFFE\uD9BF\uDFFF\uD9FF\uDFFE\uD9FF\uDFFF\uDA3F\uDFFE\uDA3F\uDFFF\uDA7F\uDFFE\uDA7F\uDFFF\uDABF\uDFFE\uDABF\uDFFF\uDAFF\uDFFE\uDAFF\uDFFF\uDB3F\uDFFE\uDB3F\uDFFF\uDB7F\uDFFE\uDB7F\uDFFF\uDBBF\uDFFE\uDBBF\uDFFF\uDBFF\uDFFE\uDBFF\uDFFF'
208 |     ),
209 |     '\0&#x5;&#x6;&#x7;&#x8;&#xB;&#xE;&#xF;&#x10;&#x11;&#x12;&#x13;&#x14;&#x15;&#x16;&#x17;&#x18;&#x19;&#x1A;&#x1B;&#x1C;&#x1D;&#x1E;&#x1F;&#x7F;\u0080&#x81;\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C&#x8D;\u008E&#x8F;&#x90;\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C&#x9D;\u009E\u009F&#xFDD0;&#xFDD1;&#xFDD2;&#xFDD3;&#xFDD4;&#xFDD5;&#xFDD6;&#xFDD7;&#xFDD8;&#xFDD9;&#xFDDA;&#xFDDB;&#xFDDC;&#xFDDD;&#xFDDE;&#xFDDF;&#xFDE0;&#xFDE1;&#xFDE2;&#xFDE3;&#xFDE4;&#xFDE5;&#xFDE6;&#xFDE7;&#xFDE8;&#xFDE9;&#xFDEA;&#xFDEB;&#xFDEC;&#xFDED;&#xFDEE;&#xFDEF;&#xFFFE;&#xFFFF;&#x1FFFE;&#x1FFFF;&#x2FFFE;&#x2FFFF;&#x3FFFE;&#x3FFFF;&#x4FFFE;&#x4FFFF;&#x5FFFE;&#x5FFFF;&#x6FFFE;&#x6FFFF;&#x7FFFE;&#x7FFFF;&#x8FFFE;&#x8FFFF;&#x9FFFE;&#x9FFFF;&#xAFFFE;&#xAFFFF;&#xBFFFE;&#xBFFFF;&#xCFFFE;&#xCFFFF;&#xDFFFE;&#xDFFFF;&#xEFFFE;&#xEFFFF;&#xFFFFE;&#xFFFFF;&#x10FFFE;&#x10FFFF;',
210 |     'should encodes disallowed code points in input, except those whose character references would refer to another code point'
211 |   )
212 | 
213 |   assert.equal(
214 |     stringifyEntities('\0\u0089'),
215 |     '\0\u0089',
216 |     'should not encode invalid code points whose character references would refer to another code point'
217 |   )
218 | })
219 | 
220 | test('stringifyEntitiesLight(value[, options])', function () {
221 |   assert.equal(
222 |     stringifyEntitiesLight('foo\u00A9bar\uD834\uDF06baz\u2603qux'),
223 |     'foo&#xA9;bar&#x1D306;baz&#x2603;qux',
224 |     'should encode in light mode'
225 |   )
226 | 
227 |   assert.equal(
228 |     stringifyEntitiesLight('\'"<>&', {subset: ['&']}),
229 |     '\'"<>&#x26;',
230 |     'should support a `subset`'
231 |   )
232 | })
233 | 


--------------------------------------------------------------------------------