├── jsdoc.json ├── eslint.config.js ├── .gitignore ├── .github └── workflows │ ├── test.yml │ └── release.yml ├── CITATION.cff ├── data ├── chatino.json └── blns.json ├── LICENSE.md ├── package.json ├── transliterate.test.js ├── transliterate.js └── README.md /jsdoc.json: -------------------------------------------------------------------------------- 1 | { 2 | "opts": { 3 | "destination": "./docs/", 4 | "readme": "./README.md", 5 | "recurse": true 6 | }, 7 | "plugins": [ 8 | "plugins/markdown" 9 | ], 10 | "source": { 11 | "include": ["transliterate.js"] 12 | }, 13 | "templates": { 14 | "theme": "cosmo" 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /eslint.config.js: -------------------------------------------------------------------------------- 1 | import chaiPlugin from 'eslint-plugin-chai-friendly' 2 | import config from '@digitallinguistics/eslint-config' 3 | 4 | export default [ 5 | ...config, 6 | { 7 | plugins: { 'chai-friendly': chaiPlugin }, 8 | rules: { 9 | 'chai-friendly/no-unused-expressions': `error`, 10 | indent: [`error`, 2, { MemberExpression: 0 }], 11 | 'no-unused-expressions': `off`, 12 | }, 13 | }, 14 | ] 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | 8 | # Dependency directories 9 | node_modules/ 10 | jspm_packages/ 11 | 12 | # Optional npm cache directory 13 | .npm 14 | 15 | # Optional eslint cache 16 | .eslintcache 17 | 18 | # Optional REPL history 19 | .node_repl_history 20 | 21 | # Output of 'npm pack' 22 | *.tgz 23 | 24 | # Yarn Integrity file 25 | .yarn-integrity 26 | 27 | # dotenv environment variables file 28 | .env 29 | 30 | # Project 31 | docs 32 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: 7 | - main 8 | workflow_dispatch: 9 | 10 | jobs: 11 | run-tests: 12 | name: Run tests 13 | runs-on: ubuntu-latest 14 | steps: 15 | 16 | - name: Check out repo 17 | uses: actions/checkout@v4 18 | 19 | - name: Set up Node for npm 20 | uses: actions/setup-node@v4 21 | with: 22 | node-version: 20.x 23 | registry-url: https://registry.npmjs.org/ 24 | 25 | - name: Install dependencies 26 | run: npm ci 27 | 28 | - name: Run tests 29 | run: npm test 30 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | authors: 2 | - family-names: Hieber 3 | given-names: Daniel W. 4 | orcid: https://orcid.org/0000-0002-1411-3773 5 | website: https://github.com/dwhieb 6 | cff-version: 1.2.0 7 | doi: 10.5281/zenodo.2550468 8 | keywords: 9 | - digital linguistics 10 | - digital humanities 11 | - linguistics 12 | - orthography 13 | - transliteration 14 | - writing 15 | license: MIT 16 | message: If you use this software, please cite it using these metadata. 17 | repository-code: https://github.com/digitallinguistics/transliterate 18 | title: DLx Transliterator 19 | url: https://github.com/digitallinguistics/transliterate -------------------------------------------------------------------------------- /data/chatino.json: -------------------------------------------------------------------------------- 1 | { 2 | " -": "", 3 | "=": "", 4 | "-": "", 5 | "7": "'", 6 | "a": "a", 7 | "a!": "á", 8 | "a!&": "ä́", 9 | "a&": "ä", 10 | "a_": "a̱", 11 | "a_&": "ä̱", 12 | "b": "b", 13 | "ch": "ch", 14 | "e": "e", 15 | "e!": "é", 16 | "e!&": "ë́", 17 | "e&": "ë", 18 | "e_": "e̱", 19 | "e_&": "ë̱", 20 | "i": "i", 21 | "i!": "í", 22 | "i!&": "ḯ", 23 | "i&": "ï", 24 | "i_": "i̱", 25 | "i_&": "ï̱", 26 | "j": "j", 27 | "k": "k", 28 | "kw": "ku", 29 | "ky": "ky", 30 | "l": "l", 31 | "ly": "ly", 32 | "m": "m", 33 | "n": "n", 34 | "ny": "ny", 35 | "o": "o", 36 | "o!": "ó", 37 | "o!&": "ö́", 38 | "o&": "ö", 39 | "o_": "o̱", 40 | "o_&": "ö̱", 41 | "p": "p", 42 | "r": "r", 43 | "s": "s", 44 | "sh": "sh", 45 | "t": "t", 46 | "ts": "ts", 47 | "ty": "ty", 48 | "u": "u", 49 | "u!": "ú", 50 | "u!&": "ǘ", 51 | "u&": "ü", 52 | "u_": "u̱", 53 | "u_&": "ü̱", 54 | "w": "u", 55 | "y": "y" 56 | } 57 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Daniel W. Hieber 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@digitallinguistics/transliterate", 3 | "version": "0.4.1", 4 | "description": "A small JavaScript library for transliterating and/or sanitizing strings", 5 | "keywords": [ 6 | "transliteration", 7 | "Digital Linguistics", 8 | "DLx", 9 | "linguistics", 10 | "digital humanities" 11 | ], 12 | "homepage": "https://github.com/digitallinguistics/transliterate", 13 | "bugs": "https://github.com/digitallinguistics/transliterate/issues", 14 | "license": "MIT", 15 | "author": { 16 | "name": "Daniel W. Hieber", 17 | "url": "https://danielhieber.com", 18 | "email": "dwhieb@gmail.com" 19 | }, 20 | "type": "module", 21 | "repository": { 22 | "type": "git", 23 | "url": "git+https://github.com/digitallinguistics/transliterate.git" 24 | }, 25 | "main": "transliterate.js", 26 | "scripts": { 27 | "docs": "jsdoc -c jsdoc.json", 28 | "test": "node --test" 29 | }, 30 | "publishConfig": { 31 | "access": "public" 32 | }, 33 | "engines": { 34 | "node": ">=20.x" 35 | }, 36 | "devDependencies": { 37 | "@digitallinguistics/eslint-config": "^0.5.2", 38 | "@eslint/js": "^9.3.0", 39 | "@stylistic/eslint-plugin-js": "^2.1.0", 40 | "chai": "^5.1.1", 41 | "eslint": "^9.3.0", 42 | "eslint-plugin-chai-friendly": "^0.8.0", 43 | "jsdoc": "^4.0.3" 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: release 2 | 3 | on: 4 | release: 5 | types: 6 | - published 7 | workflow_dispatch: 8 | 9 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 10 | permissions: 11 | contents: read 12 | pages: write 13 | id-token: write 14 | 15 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 16 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 17 | concurrency: 18 | group: "pages" 19 | cancel-in-progress: false 20 | 21 | jobs: 22 | 23 | publish: 24 | name: Publish to npm 25 | runs-on: ubuntu-latest 26 | steps: 27 | 28 | - name: Check out repo 29 | uses: actions/checkout@v4 30 | 31 | - name: Set up Node for npm 32 | uses: actions/setup-node@v4 33 | with: 34 | node-version: 20.x 35 | registry-url: https://registry.npmjs.org/ 36 | 37 | - name: Install dependencies 38 | run: npm ci 39 | 40 | - name: Run tests 41 | run: npm test 42 | 43 | - name: Publish to npm 44 | run: npm publish 45 | env: 46 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 47 | 48 | deploy: 49 | name: Deploy documentation to GitHub Pages 50 | environment: 51 | name: github-pages 52 | url: ${{ steps.deployment.outputs.page_url }} 53 | runs-on: ubuntu-latest 54 | steps: 55 | 56 | - name: Check out repo 57 | uses: actions/checkout@v4 58 | 59 | - name: Set up Node for npm 60 | uses: actions/setup-node@v4 61 | with: 62 | node-version: 20.x 63 | 64 | - name: Install dependencies 65 | run: npm ci 66 | 67 | - name: Run tests 68 | run: npm test 69 | 70 | - name: Build docs 71 | run: npm run docs 72 | 73 | - name: Set up pages 74 | uses: actions/configure-pages@v5 75 | 76 | - name: Upload artifact 77 | uses: actions/upload-pages-artifact@v3 78 | with: 79 | path: './docs' 80 | 81 | - name: Deploy to GitHub Pages 82 | id: deployment 83 | uses: actions/deploy-pages@v4 -------------------------------------------------------------------------------- /transliterate.test.js: -------------------------------------------------------------------------------- 1 | import { expect } from 'chai' 2 | import path from 'node:path' 3 | import { readFile } from 'node:fs/promises' 4 | import { transliterate } from './transliterate.js' 5 | 6 | import { describe, it } from 'node:test' 7 | 8 | const dataDir = path.resolve(import.meta.dirname, `./data`) 9 | 10 | const blnsJSON = await readFile(path.join(dataDir, `blns.json`)) 11 | const blns = JSON.parse(blnsJSON) 12 | 13 | const chatinoJSON = await readFile(path.join(dataDir, `chatino.json`)) 14 | const chatino = JSON.parse(chatinoJSON) 15 | 16 | describe(`transliterate`, function() { 17 | 18 | it(`accepts empty strings`, function() { 19 | 20 | const substitutions = { t: `d` } 21 | const output = transliterate(``, substitutions) 22 | 23 | expect(output).to.be.empty 24 | 25 | }) 26 | 27 | it(`accepts Maps`, function() { 28 | 29 | const substitutions = new Map([ 30 | [`a`, `aa`], 31 | [`bc`, `cc`], 32 | [`b`, `bb`], 33 | ]) 34 | 35 | const input = `abbc` 36 | const correctOutput = `aabbcc` 37 | const actualOutput = transliterate(input, substitutions) 38 | 39 | expect(actualOutput).to.be.equal(correctOutput) 40 | 41 | }) 42 | 43 | it(`handles bleeding problems`, function() { 44 | 45 | const substitutions = { 46 | s: `z`, 47 | ts: `c`, 48 | } 49 | 50 | const input = `atsa` 51 | const correctOutput = `aca` 52 | const actualOutput = transliterate(input, substitutions) 53 | 54 | expect(actualOutput).to.equal(correctOutput) 55 | 56 | }) 57 | 58 | it(`handles feeding problems`, function() { 59 | 60 | const substitutions = { 61 | d: `θ`, 62 | t: `d`, 63 | } 64 | 65 | const input = `atada` 66 | const correctOutput = `adaθa` 67 | const actualOutput = transliterate(input, substitutions) 68 | 69 | expect(actualOutput).to.equal(correctOutput) 70 | 71 | }) 72 | 73 | it(`handles naughty strings`, function() { 74 | 75 | const substitutions = { ʃ: `s` } 76 | 77 | for (const str of blns) { 78 | expect(transliterate(str, substitutions)).to.equal(str) 79 | } 80 | 81 | }) 82 | 83 | it(`handles numbers as inputs`, function() { 84 | 85 | const substitutions = { 86 | 0: `a`, 87 | 1: `b`, 88 | 2: `c`, 89 | 3: `d`, 90 | 4: `e`, 91 | 5: `f`, 92 | 6: `g`, 93 | 7: `h`, 94 | 8: `i`, 95 | 9: `j`, 96 | } 97 | 98 | const input = `0123456789` 99 | const correctOutput = `abcdefghij` 100 | const actualOutput = transliterate(input, substitutions) 101 | 102 | expect(actualOutput).to.equal(correctOutput) 103 | 104 | }) 105 | 106 | it(`handles regular expression special characters`, function() { 107 | 108 | const substitutions = { 109 | '*': `·`, 110 | 'a*': `a·`, 111 | } 112 | 113 | const input = `*ata*` 114 | const correctOutput = `·ata·` 115 | const actualOutput = transliterate(input, substitutions) 116 | 117 | expect(actualOutput).to.equal(correctOutput) 118 | 119 | }) 120 | 121 | it(`retains line breaks`, function() { 122 | 123 | const substitutions = {} 124 | 125 | const input = `Hello world, 126 | This is some multi-line input.\nThis is also multi-line.` 127 | 128 | const output = transliterate(input, substitutions) 129 | 130 | expect(output).to.equal(input) 131 | 132 | }) 133 | 134 | it(`transliterates Chatino`, function() { 135 | 136 | const input = `ji_& xiku_na!7a laa7 nka7nelo!7o_ na! nkata_a!` 137 | const correctOutput = `jï̱ xiku̱ná'a laa' nka'neló'o̱ ná nkata̱á` 138 | const actualOutput = transliterate(input, chatino) 139 | 140 | expect(actualOutput).to.equal(correctOutput) 141 | 142 | }) 143 | 144 | }) 145 | -------------------------------------------------------------------------------- /transliterate.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable 2 | no-constructor-return, 3 | no-use-before-define, 4 | */ 5 | 6 | /** 7 | * Escapes RegExp characters in a string. 8 | * @param {String} input 9 | * @returns {String} 10 | */ 11 | function escapeRegExp(input) { 12 | return input.replace(/[.*+?^${}()|[\]\\]/gu, `\\$&`) 13 | } 14 | 15 | /** 16 | * Gets a random code point from the Unicode geometric shapes block 17 | * @private 18 | * @return {String} Returns the random Unicode character 19 | */ 20 | function getRandomCodePoint() { 21 | const blockBegin = 9632 22 | const blockLength = 95 23 | return String.fromCodePoint(Math.floor(Math.random() * blockLength) + blockBegin) 24 | } 25 | 26 | /** 27 | * An alias for the {@link transliterate} method 28 | * @see transliterate 29 | */ 30 | function sanitize(string, substitutions) { 31 | return transliterate(string, substitutions) 32 | } 33 | 34 | /** 35 | * An alias for the {@link Transliterator} class 36 | * @see Transliterator 37 | */ 38 | class Sanitizer { 39 | constructor(substitutions) { 40 | this.substitutions = substitutions 41 | return string => sanitize(string, substitutions) 42 | } 43 | } 44 | 45 | /** 46 | * Makes a series of substitutions on a string. Can be used to convert a string from one writing system to another (a process known as "transliteration") or to remove unwanted characters or sequences of characters from a string (a process known as "sanitization"). 47 | * @param {String} [string=``] The string to transliterate or sanitize. 48 | * @param {Object} [substitutions = new Map] A hash of substitutions to make on the string. Each key in this object should be a string of characters you want to replace, and the value for that key should be the new string of characters to replace it with. For example, setting `"s": "z"` will replace all `s` characters with `z`. To sanitize a string, provide each unwanted character or sequence of characters as as a key, and set the value of that key to an empty string. For example, setting `"ts": ""` in this object will remove all sequences of `ts` from the string (but leave individual instances of `t` and `s` that do not appear in sequence). 49 | * @return {String} Returns a new string with all substitutions made. 50 | * @example {@lang javascript} 51 | * const substitutions = { 52 | * tʼ: `d`, 53 | * ts: `c`, 54 | * }; 55 | * 56 | * const input = `tsatʼ`; 57 | * const output = transliterate(input, substitutions); 58 | * console.log(output); // --> "cad" 59 | */ 60 | function transliterate(string = ``, subs = new Map) { 61 | 62 | // Type Checking 63 | 64 | if (typeof string !== `string`) { 65 | throw new TypeError(`The first argument passed to the transliterate function must be a string.`) 66 | } 67 | 68 | if (!(subs instanceof Map || typeof subs === `object`)) { 69 | throw new TypeError(`The substitutions object must be a Map or Object.`) 70 | } 71 | 72 | if (!(subs instanceof Map)) { 73 | subs = new Map(Object.entries(subs)) 74 | } 75 | 76 | const values = Array.from(subs.values()) 77 | 78 | if (!values.every(val => typeof val === `string`)) { 79 | throw new TypeError(`Replacements must all be strings.`) 80 | } 81 | 82 | // Variables 83 | 84 | const temps = new Map // Track of any temporary placeholders 85 | let str = string // The string to manipulate 86 | 87 | // Transliteration Steps 88 | 89 | // Sort the substitutions by length of the input (avoids partial replacements) 90 | subs = new Map(Array.from(subs.entries()).sort(([a], [b]) => b.length - a.length)) 91 | 92 | // Make each substitution on the string, using temporary placeholders if needed 93 | for (const [input, replacement] of subs) { 94 | 95 | // Add the escaped substitution to the set of substitutions to make 96 | subs.set(input, replacement) 97 | 98 | // Check for feeding problems, and create temporary placeholder substitutions if found 99 | if (subs.get(replacement)) { 100 | 101 | // Get a random temporary placeholder to substitute 102 | let temp = getRandomCodePoint() 103 | 104 | // Make sure you haven't already used that placeholder, and generate a new one if so 105 | while (temps.has(temp)) temp = getRandomCodePoint() 106 | 107 | // Add the placeholder to the set of temporary substitutions 108 | temps.set(temp, replacement) 109 | 110 | // Update the list of substitutions to use the temporary placeholder 111 | subs.set(input, temp) 112 | 113 | } 114 | 115 | // Escape regexp special characters in the input 116 | const escapedInput = escapeRegExp(input) 117 | 118 | // Make the substitution on the string, using the temporary placeholder if present 119 | const regexp = new RegExp(escapedInput, `gu`) 120 | str = str.replace(regexp, subs.get(input)) 121 | 122 | } 123 | 124 | // Replace the temporary placeholders with their original values 125 | for (const [temp, replacement] of temps) { 126 | const regexp = new RegExp(temp, `gu`) 127 | str = str.replace(regexp, replacement) 128 | } 129 | 130 | // Return the transliterated string 131 | return str 132 | 133 | } 134 | 135 | /** 136 | * A Transliterator class that saves a set of transliteration rules for repeated use. 137 | * @prop {Object} substitutions The set of substitution rules for this Transliterator. You can update the substitution rules used by this Transliterator at any time by modifying this object. See the {@link transliterate} method for documentation on how this substitutions object should be formatted. 138 | * @example {@lang javascript} 139 | * const substitutions = { 140 | * tʼ: `d`, 141 | * ts: `c`, 142 | * }; 143 | * 144 | * const transliterate = new Transliterator(substitutions); 145 | * const input = `tsatʼ`; 146 | * const output = transliterate(input); 147 | * console.log(output); // --> "cad" 148 | */ 149 | class Transliterator { 150 | /** 151 | * Create a new Transliterator 152 | * @param {Object} substitutions The set of substitution rules that this Transliterator should use. See the {@link transliterate} method for documentation on how this substitutions object should be formatted. 153 | * @return {Function} Returns a transliterate function that accepts a string and makes the substitutions provided in the `transliterate` argument. 154 | */ 155 | constructor(substitutions) { 156 | this.substitutions = substitutions 157 | return string => transliterate(string, this.substitutions) 158 | } 159 | } 160 | 161 | // Exports 162 | 163 | export { 164 | sanitize, 165 | Sanitizer, 166 | transliterate, 167 | Transliterator, 168 | } 169 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Transliterate 2 | 3 | A small JavaScript library for transliterating and/or sanitizing strings. Tested against a variety of edge cases and unusual inputs. 4 | 5 | ![GitHub Release](https://img.shields.io/github/v/release/digitallinguistics/transliterate) 6 | [![GitHub issues](https://img.shields.io/github/issues/digitallinguistics/transliterate)][issues] 7 | [![DOI](https://zenodo.org/badge/167235084.svg)][Zenodo] 8 | [![GitHub license](https://img.shields.io/github/license/digitallinguistics/transliterate)][license] 9 | [![GitHub stars](https://img.shields.io/github/stars/digitallinguistics/transliterate?style=social)][stargazers] 10 | 11 | ## Overview 12 | 13 | This library is useful for linguists and data analysts working with language data. It can be used to convert a string from one writing system to another (a process known as **transliteration**), or to remove unwanted characters or sequences of characters from a string (a process known as **sanitization**). This library handles common problems that arise during transliteration and sanitization, including [bleeding][bleeding] and [feeding][feeding] issues. 14 | 15 | - [Get answers to questions here.][discussions] 16 | - [Report a problem here.][new-issue] 17 | - [Request a change or feature here.][new-issue] 18 | - [View the complete API for this library here.][API] 19 | 20 | ## Citation & Attribution 21 | 22 | This library is maintained by [Daniel W. Hieber][me]. You can cite this library with its DOI using the following model: 23 | 24 | > Hieber, Daniel W. 2019. digitallinguistics/transliterate. DOI: [10.5281/zenodo.2550468](https://doi.org/10.5281/zenodo.2550468). 25 | 26 | Each version of this library is archived on this project's [Zenodo page][Zenodo]. 27 | 28 | ## Installation 29 | 30 | Install with **npm** or **yarn**: 31 | 32 | ```sh 33 | npm install @digitallinguistics/transliterate # npm 34 | yarn add @digitallinguistics/transliterate # yarn 35 | ``` 36 | 37 | ## Importing the Library 38 | 39 | In the browser, include the library in your HTML (adjust the `src` to point to the location of the `transliterate.js` file in your project): 40 | 41 | ```html 42 | 43 | ``` 44 | 45 | In Node, simply import the library: 46 | 47 | ```js 48 | import { transliterate } from '@digitallinguistics/transliterate'; 49 | ``` 50 | 51 | ## Basic Usage 52 | 53 | The `transliterate` library exports an object with four methods: 54 | 55 | - `transliterate` 56 | - `Transliterator` 57 | - `sanitize` 58 | - `Sanitizer` 59 | 60 | The `sanitize` and `Sanitizer` exports are essentially just aliases for `transliterate` and `Transliterator` respectively. 61 | 62 | To transliterate a string, use the `transliterate` method: 63 | 64 | ```js 65 | // Import the "transliterate" method from the library 66 | import { transliterate } from '@digitallinguistics/transliterate'; 67 | 68 | // The list of substitutions to make 69 | const substitutions = { 70 | p: `b`, 71 | t: `d`, 72 | k: `g`, 73 | }; 74 | 75 | // The string to transliterate 76 | const input = `patak`; 77 | 78 | // Transliterate the string 79 | const output = transliterate(input, substitutions); 80 | 81 | console.log(output); // --> "badag" 82 | ``` 83 | 84 | To save a set of transliteration rules for reuse on more than one string, use the `Transliterator` class: 85 | 86 | ```js 87 | // Import the Transliterator class 88 | import { Transliterator } from '@digitallinguistics/transliterate'; 89 | 90 | // The list of substitutions to use for transliteration 91 | const substitutions = { 92 | p: `b`, 93 | t: `d`, 94 | k: `g`, 95 | }; 96 | 97 | // Create a transliterate function that always 98 | // applies the same substitutions 99 | const transliterate = new Transliterator(substitutions); 100 | 101 | // The string to transliterate 102 | const input = `patak`; 103 | 104 | // Transliterate the string 105 | const output = transliterate(input); 106 | 107 | console.log(output); // --> "badag" 108 | ``` 109 | 110 | **[View the entire API for this library here.][API]** 111 | 112 | ## Working with Substitution Rules 113 | 114 | The transliterate library already handles several tricky cases on your behalf. For example, say you have the following substitution rules, and want to use them on the string `abc`: 115 | 116 | Input | Output 117 | :----:|:-----: 118 | a | b 119 | b | c 120 | 121 | In this case, you probably intend the output to be `bcc`. But if you apply the `a → b` rule before the `b → c` rule, you get the output `ccc`. This is called a [feeding problem][feeding]. The transliterate library automatically avoids feeding problems, so that you get the expected result `bcc` rather than `ccc`. 122 | 123 | Now say that you want to apply the following rules to the string `abacad`. 124 | 125 | Input | Output 126 | :----:|:-----: 127 | a | b 128 | ac | d 129 | 130 | You probably intend the output to be `abdbd`. But if you apply the `a → b` rule before the `ac → d` rule, you get the output `bbbcbd`. This is called a [bleeding problem][bleeding]. The transliterate library automatically avoids bleeding problems as well, so that you get the expected result `abdbd` rather than `bbbcbd`. 131 | 132 | Here are some things to remember about how the transliterate library applies substitutions: 133 | 134 | - Longer substitutions are always made first. If you have substitution rules for both `ch` and `c`, the library will first substitute all instances of `ch` with its replacement, followed by all instances of `c`. 135 | 136 | - If two substitution inputs are the same length, the substitutions will be applied in the order they were passed to the library. For example, if you have the rules `ab → d` and `bc → e`, in that order, the `ab → d` substitutions will be applied first. 137 | 138 | Sometimes the way you want to transliterate a character or sequence of characters will depend on context. For example, you might want `a` to sometimes become `b`, and other times become `c`. In this case you have several options: 139 | 140 | - **Update the original text** to indicate the difference. For example, you might change all the `a`s that you want to become `c`s to `ɑ` or maybe `ac` or `aa` or `\a`, or whatever makes sense for your project. 141 | 142 | - **Update the substitution rules** to take more context into account. For example, if `a` becomes `b` before `c` and becomes `d` elsewhere, you could write your rules like this: 143 | 144 | Input | Output 145 | :----:|:-----: 146 | ab | c 147 | a | d 148 | 149 | - **Update both the original text and the subsitution rules.** For example, you could update the original text to indicate syllable boundaries, and then update your substitution rules to use those boundaries. For instance, the sequence `abc` could be syllabified as `a.bc` or `ab.c`. After updating the original text with syllable boundaries, you could change your rules to target syllable-initial vs. syllable-final `b`; for example: `.b → d` (syllable-initial) and `b. → e` (syllable-final). 150 | 151 | [API]: https://developer.digitallinguistics.io/transliterate 152 | [bleeding]: https://en.wikipedia.org/wiki/Bleeding_order 153 | [discussions]: https://github.com/orgs/digitallinguistics/discussions?discussions_q=is%3Aopen+label%3A%22%F0%9F%94%84+Transliterate%22 154 | [feeding]: https://en.wikipedia.org/wiki/Feeding_order 155 | [issues]: https://github.com/digitallinguistics/transliterate/issues 156 | [license]: https://github.com/digitallinguistics/transliterate/blob/master/LICENSE.md 157 | [new-issue]: https://github.com/digitallinguistics/transliterate/issues/new 158 | [me]: https://github.com/dwhieb 159 | [stargazers]: https://github.com/digitallinguistics/transliterate/stargazers 160 | [Zenodo]: https://doi.org/10.5281/zenodo.2550468 161 | -------------------------------------------------------------------------------- /data/blns.json: -------------------------------------------------------------------------------- 1 | [ 2 | "", 3 | "undefined", 4 | "undef", 5 | "null", 6 | "NULL", 7 | "(null)", 8 | "nil", 9 | "NIL", 10 | "true", 11 | "false", 12 | "True", 13 | "False", 14 | "TRUE", 15 | "FALSE", 16 | "None", 17 | "hasOwnProperty", 18 | "\\", 19 | "\\\\", 20 | "0", 21 | "1", 22 | "1.00", 23 | "$1.00", 24 | "1/2", 25 | "1E2", 26 | "1E02", 27 | "1E+02", 28 | "-1", 29 | "-1.00", 30 | "-$1.00", 31 | "-1/2", 32 | "-1E2", 33 | "-1E02", 34 | "-1E+02", 35 | "1/0", 36 | "0/0", 37 | "-2147483648/-1", 38 | "-9223372036854775808/-1", 39 | "-0", 40 | "-0.0", 41 | "+0", 42 | "+0.0", 43 | "0.00", 44 | "0..0", 45 | ".", 46 | "0.0.0", 47 | "0,00", 48 | "0,,0", 49 | ",", 50 | "0,0,0", 51 | "0.0/0", 52 | "1.0/0.0", 53 | "0.0/0.0", 54 | "1,0/0,0", 55 | "0,0/0,0", 56 | "--1", 57 | "-", 58 | "-.", 59 | "-,", 60 | "999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999", 61 | "NaN", 62 | "Infinity", 63 | "-Infinity", 64 | "INF", 65 | "1#INF", 66 | "-1#IND", 67 | "1#QNAN", 68 | "1#SNAN", 69 | "1#IND", 70 | "0x0", 71 | "0xffffffff", 72 | "0xffffffffffffffff", 73 | "0xabad1dea", 74 | "123456789012345678901234567890123456789", 75 | "1,000.00", 76 | "1 000.00", 77 | "1'000.00", 78 | "1,000,000.00", 79 | "1 000 000.00", 80 | "1'000'000.00", 81 | "1.000,00", 82 | "1 000,00", 83 | "1'000,00", 84 | "1.000.000,00", 85 | "1 000 000,00", 86 | "1'000'000,00", 87 | "01000", 88 | "08", 89 | "09", 90 | "2.2250738585072011e-308", 91 | ",./;'[]\\-=", 92 | "<>?:\"{}|_+", 93 | "!@#$%^&*()`~", 94 | "\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f", 95 | "€‚ƒ„†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ", 96 | "\t\u000b\f …             ​

   ", 97 | "­؀؁؂؃؄؅؜۝܏᠎​‌‍‎‏‪‫‬‭‮⁠⁡⁢⁣⁤⁦⁧⁨⁩𑂽𛲠𛲡𛲢𛲣𝅳𝅴𝅵𝅶𝅷𝅸𝅹𝅺󠀁󠀠󠀡󠀢󠀣󠀤󠀥󠀦󠀧󠀨󠀩󠀪󠀫󠀬󠀭󠀮󠀯󠀰󠀱󠀲󠀳󠀴󠀵󠀶󠀷󠀸󠀹󠀺󠀻󠀼󠀽󠀾󠀿󠁀󠁁󠁂󠁃󠁄󠁅󠁆󠁇󠁈󠁉󠁊󠁋󠁌󠁍󠁎󠁏󠁐󠁑󠁒󠁓󠁔󠁕󠁖󠁗󠁘󠁙󠁚󠁛󠁜󠁝󠁞󠁟󠁠󠁡󠁢󠁣󠁤󠁥󠁦󠁧󠁨󠁩󠁪󠁫󠁬󠁭󠁮󠁯󠁰󠁱󠁲󠁳󠁴󠁵󠁶󠁷󠁸󠁹󠁺󠁻󠁼󠁽󠁾󠁿", 98 | "", 99 | "￾", 100 | "Ω≈ç√∫˜µ≤≥÷", 101 | "åß∂ƒ©˙∆˚¬…æ", 102 | "œ∑´®†¥¨ˆøπ“‘", 103 | "¡™£¢∞§¶•ªº–≠", 104 | "¸˛Ç◊ı˜Â¯˘¿", 105 | "ÅÍÎÏ˝ÓÔÒÚÆ☃", 106 | "Œ„´‰ˇÁ¨ˆØ∏”’", 107 | "`⁄€‹›fifl‡°·‚—±", 108 | "⅛⅜⅝⅞", 109 | "ЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя", 110 | "٠١٢٣٤٥٦٧٨٩", 111 | "⁰⁴⁵", 112 | "₀₁₂", 113 | "⁰⁴⁵₀₁₂", 114 | "ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็", 115 | "'", 116 | "\"", 117 | "''", 118 | "\"\"", 119 | "'\"'", 120 | "\"''''\"'\"", 121 | "\"'\"'\"''''\"", 122 | "", 123 | "", 124 | "", 125 | "", 126 | "田中さんにあげて下さい", 127 | "パーティーへ行かないか", 128 | "和製漢語", 129 | "部落格", 130 | "사회과학원 어학연구소", 131 | "찦차를 타고 온 펲시맨과 쑛다리 똠방각하", 132 | "社會科學院語學研究所", 133 | "울란바토르", 134 | "𠜎𠜱𠝹𠱓𠱸𠲖𠳏", 135 | "表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀", 136 | "Ⱥ", 137 | "Ⱦ", 138 | "ヽ༼ຈل͜ຈ༽ノ ヽ༼ຈل͜ຈ༽ノ", 139 | "(。◕ ∀ ◕。)", 140 | "`ィ(´∀`∩", 141 | "__ロ(,_,*)", 142 | "・( ̄∀ ̄)・:*:", 143 | "゚・✿ヾ╲(。◕‿◕。)╱✿・゚", 144 | ",。・:*:・゜’( ☻ ω ☻ )。・:*:・゜’", 145 | "(╯°□°)╯︵ ┻━┻)", 146 | "(ノಥ益ಥ)ノ ┻━┻", 147 | "┬─┬ノ( º _ ºノ)", 148 | "( ͡° ͜ʖ ͡°)", 149 | "😍", 150 | "👩🏽", 151 | "👾 🙇 💁 🙅 🙆 🙋 🙎 🙍", 152 | "🐵 🙈 🙉 🙊", 153 | "❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙", 154 | "✋🏿 💪🏿 👐🏿 🙌🏿 👏🏿 🙏🏿", 155 | "🚾 🆒 🆓 🆕 🆖 🆗 🆙 🏧", 156 | "0️⃣ 1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ 🔟", 157 | "🇺🇸🇷🇺🇸 🇦🇫🇦🇲🇸", 158 | "🇺🇸🇷🇺🇸🇦🇫🇦🇲", 159 | "🇺🇸🇷🇺🇸🇦", 160 | "123", 161 | "١٢٣", 162 | "ثم نفس سقطت وبالتحديد،, جزيرتي باستخدام أن دنو. إذ هنا؟ الستار وتنصيب كان. أهّل ايطاليا، بريطانيا-فرنسا قد أخذ. سليمان، إتفاقية بين ما, يذكر الحدود أي بعد, معاملة بولندا، الإطلاق عل إيو.", 163 | "בְּרֵאשִׁית, בָּרָא אֱלֹהִים, אֵת הַשָּׁמַיִם, וְאֵת הָאָרֶץ", 164 | "הָיְתָהtestالصفحات التّحول", 165 | "﷽", 166 | "ﷺ", 167 | "مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ، ", 168 | "‪‪test‪", 169 | "‫test‫", 170 | "
test
", 171 | "test⁠test‫", 172 | "⁦test⁧", 173 | "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣", 174 | "̡͓̞ͅI̗̘̦͝n͇͇͙v̮̫ok̲̫̙͈i̖͙̭̹̠̞n̡̻̮̣̺g̲͈͙̭͙̬͎ ̰t͔̦h̞̲e̢̤ ͍̬̲͖f̴̘͕̣è͖ẹ̥̩l͖͔͚i͓͚̦͠n͖͍̗͓̳̮g͍ ̨o͚̪͡f̘̣̬ ̖̘͖̟͙̮c҉͔̫͖͓͇͖ͅh̵̤̣͚͔á̗̼͕ͅo̼̣̥s̱͈̺̖̦̻͢.̛̖̞̠̫̰", 175 | "̗̺͖̹̯͓Ṯ̤͍̥͇͈h̲́e͏͓̼̗̙̼̣͔ ͇̜̱̠͓͍ͅN͕͠e̗̱z̘̝̜̺͙p̤̺̹͍̯͚e̠̻̠͜r̨̤͍̺̖͔̖̖d̠̟̭̬̝͟i̦͖̩͓͔̤a̠̗̬͉̙n͚͜ ̻̞̰͚ͅh̵͉i̳̞v̢͇ḙ͎͟-҉̭̩̼͔m̤̭̫i͕͇̝̦n̗͙ḍ̟ ̯̲͕͞ǫ̟̯̰̲͙̻̝f ̪̰̰̗̖̭̘͘c̦͍̲̞͍̩̙ḥ͚a̮͎̟̙͜ơ̩̹͎s̤.̝̝ ҉Z̡̖̜͖̰̣͉̜a͖̰͙̬͡l̲̫̳͍̩g̡̟̼̱͚̞̬ͅo̗͜.̟", 176 | "̦H̬̤̗̤͝e͜ ̜̥̝̻͍̟́w̕h̖̯͓o̝͙̖͎̱̮ ҉̺̙̞̟͈W̷̼̭a̺̪͍į͈͕̭͙̯̜t̶̼̮s̘͙͖̕ ̠̫̠B̻͍͙͉̳ͅe̵h̵̬͇̫͙i̹͓̳̳̮͎̫̕n͟d̴̪̜̖ ̰͉̩͇͙̲͞ͅT͖̼͓̪͢h͏͓̮̻e̬̝̟ͅ ̤̹̝W͙̞̝͔͇͝ͅa͏͓͔̹̼̣l̴͔̰̤̟͔ḽ̫.͕", 177 | "Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮", 178 | "˙ɐnbᴉlɐ ɐuƃɐɯ ǝɹolop ʇǝ ǝɹoqɐl ʇn ʇunpᴉpᴉɔuᴉ ɹodɯǝʇ poɯsnᴉǝ op pǝs 'ʇᴉlǝ ƃuᴉɔsᴉdᴉpɐ ɹnʇǝʇɔǝsuoɔ 'ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥", 179 | "00˙Ɩ$-", 180 | "The quick brown fox jumps over the lazy dog", 181 | "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠", 182 | "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌", 183 | "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈", 184 | "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰", 185 | "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘", 186 | "𝚃𝚑𝚎 𝚚𝚞𝚒𝚌𝚔 𝚋𝚛𝚘𝚠𝚗 𝚏𝚘𝚡 𝚓𝚞𝚖𝚙𝚜 𝚘𝚟𝚎𝚛 𝚝𝚑𝚎 𝚕𝚊𝚣𝚢 𝚍𝚘𝚐", 187 | "⒯⒣⒠ ⒬⒰⒤⒞⒦ ⒝⒭⒪⒲⒩ ⒡⒪⒳ ⒥⒰⒨⒫⒮ ⒪⒱⒠⒭ ⒯⒣⒠ ⒧⒜⒵⒴ ⒟⒪⒢", 188 | "", 189 | "<script>alert('123');</script>", 190 | "", 191 | "", 192 | "\">", 193 | "'>", 194 | ">", 195 | "", 196 | "< / script >< script >alert(123)< / script >", 197 | " onfocus=JaVaSCript:alert(123) autofocus", 198 | "\" onfocus=JaVaSCript:alert(123) autofocus", 199 | "' onfocus=JaVaSCript:alert(123) autofocus", 200 | "<script>alert(123)</script>", 201 | "ript>alert(123)ript>", 202 | "-->", 203 | "\";alert(123);t=\"", 204 | "';alert(123);t='", 205 | "JavaSCript:alert(123)", 206 | ";alert(123);", 207 | "src=JaVaSCript:prompt(132)", 208 | "\"><\\x3Cscript>javascript:alert(1)", 221 | "'`\"><\\x00script>javascript:alert(1)", 222 | "ABC
DEF", 223 | "ABC
DEF", 224 | "ABC
DEF", 225 | "ABC
DEF", 226 | "ABC
DEF", 227 | "ABC
DEF", 228 | "ABC
DEF", 229 | "ABC
DEF", 230 | "ABC
DEF", 231 | "ABC
DEF", 232 | "ABC
DEF", 233 | "ABC
DEF", 234 | "ABC
DEF", 235 | "ABC
DEF", 236 | "ABC
DEF", 237 | "ABC
DEF", 238 | "ABC
DEF", 239 | "ABC
DEF", 240 | "ABC
DEF", 241 | "ABC
DEF", 242 | "ABC
DEF", 243 | "ABC
DEF", 244 | "ABC
DEF", 245 | "ABC
DEF", 246 | "ABC
DEF", 247 | "ABC
DEF", 248 | "ABC
DEF", 249 | "test", 250 | "test", 251 | "test", 252 | "test", 253 | "test", 254 | "test", 255 | "test", 256 | "test", 257 | "test", 258 | "test", 259 | "test", 260 | "test", 261 | "test", 262 | "test", 263 | "test", 264 | "test", 265 | "test", 266 | "test", 267 | "test", 268 | "test", 269 | "test", 270 | "test", 271 | "test", 272 | "test", 273 | "test", 274 | "test", 275 | "test", 276 | "test", 277 | "test", 278 | "test", 279 | "test", 280 | "test", 281 | "test", 282 | "test", 283 | "test", 284 | "test", 285 | "test", 286 | "test", 287 | "test", 288 | "test", 289 | "test", 290 | "test", 291 | "test", 292 | "test", 293 | "test", 294 | "test", 295 | "test", 296 | "test", 297 | "test", 298 | "test", 299 | "test", 300 | "test", 301 | "test", 302 | "test", 303 | "test", 304 | "test", 305 | "test", 306 | "`\"'>", 307 | "`\"'>", 308 | "`\"'>", 309 | "`\"'>", 310 | "`\"'>", 311 | "`\"'>", 312 | "`\"'>", 313 | "`\"'>", 314 | "`\"'>", 315 | "`\"'>", 316 | "\"`'>", 317 | "\"`'>", 318 | "\"`'>", 319 | "\"`'>", 320 | "\"`'>", 321 | "\"`'>", 322 | "\"`'>", 323 | "\"`'>", 324 | "\"`'>", 325 | "\"`'>", 326 | "\"`'>", 327 | "\"`'>", 328 | "\"`'>", 329 | "\"`'>", 330 | "\"`'>", 331 | "\"`'>", 332 | "\"`'>", 333 | "\"`'>", 334 | "\"`'>", 335 | "\"`'>", 336 | "\"`'>", 337 | "\"`'>", 338 | "\"`'>", 339 | "\"`'>", 340 | "\"`'>", 341 | "\"`'>", 342 | "\"`'>", 343 | "\"`'>", 344 | "\"`'>", 345 | "\"`'>", 346 | "\"`'>", 347 | "\"`'>", 348 | "\"`'>", 349 | "\"`'>", 350 | "\"`'>", 351 | "\"`'>", 352 | "\"`'>", 353 | "", 354 | "", 355 | "", 356 | "", 357 | "", 358 | "", 359 | "", 360 | "", 361 | "", 362 | "", 363 | "", 364 | "", 365 | "", 366 | "", 367 | "", 368 | "", 369 | "", 370 | "", 371 | "", 372 | "", 373 | "", 374 | "", 375 | "", 376 | "", 377 | "", 378 | "", 379 | "", 380 | "", 381 | "", 382 | "", 383 | "", 384 | "", 385 | "", 386 | "", 387 | "XXX", 388 | "javascript:alert(1)\"` `>", 389 | "", 390 | "", 391 | "<a href=http://foo.bar/#x=`y></a><img alt=\"`><img src=x:x onerror=javascript:alert(1)></a>\">", 392 | "<!--[if]><script>javascript:alert(1)</script -->", 393 | "<!--[if<img src=x onerror=javascript:alert(1)//]> -->", 394 | "<script src=\"/\\%(jscript)s\"></script>", 395 | "<script src=\"\\\\%(jscript)s\"></script>", 396 | "<IMG \"\"\"><SCRIPT>alert(\"XSS\")</SCRIPT>\">", 397 | "<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>", 398 | "<IMG SRC=# onmouseover=\"alert('xxs')\">", 399 | "<IMG SRC= onmouseover=\"alert('xxs')\">", 400 | "<IMG onmouseover=\"alert('xxs')\">", 401 | "<IMG SRC=javascript:alert('XSS')>", 402 | "<IMG SRC=javascript:alert('XSS')>", 403 | "<IMG SRC=javascript:alert('XSS')>", 404 | "<IMG SRC=\"jav ascript:alert('XSS');\">", 405 | "<IMG SRC=\"jav ascript:alert('XSS');\">", 406 | "<IMG SRC=\"jav ascript:alert('XSS');\">", 407 | "<IMG SRC=\"jav ascript:alert('XSS');\">", 408 | "perl -e 'print \"<IMG SRC=java\\0script:alert(\\\"XSS\\\")>\";' > out", 409 | "<IMG SRC=\"  javascript:alert('XSS');\">", 410 | "<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", 411 | "<BODY onload!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>", 412 | "<SCRIPT/SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>", 413 | "<<SCRIPT>alert(\"XSS\");//<</SCRIPT>", 414 | "<SCRIPT SRC=http://ha.ckers.org/xss.js?< B >", 415 | "<SCRIPT SRC=//ha.ckers.org/.j>", 416 | "<IMG SRC=\"javascript:alert('XSS')\"", 417 | "<iframe src=http://ha.ckers.org/scriptlet.html <", 418 | "\\\";alert('XSS');//", 419 | "<u oncopy=alert()> Copy me</u>", 420 | "<i onwheel=alert(1)> Scroll over me </i>", 421 | "<plaintext>", 422 | "http://a/%%30%30", 423 | "</textarea><script>alert(123)</script>", 424 | "1;DROP TABLE users", 425 | "1'; DROP TABLE users-- 1", 426 | "' OR 1=1 -- 1", 427 | "' OR '1'='1", 428 | " ", 429 | "%", 430 | "_", 431 | "-", 432 | "--", 433 | "--version", 434 | "--help", 435 | "$USER", 436 | "/dev/null; touch /tmp/blns.fail ; echo", 437 | "`touch /tmp/blns.fail`", 438 | "$(touch /tmp/blns.fail)", 439 | "@{[system \"touch /tmp/blns.fail\"]}", 440 | "eval(\"puts 'hello world'\")", 441 | "System(\"ls -al /\")", 442 | "`ls -al /`", 443 | "Kernel.exec(\"ls -al /\")", 444 | "Kernel.exit(1)", 445 | "%x('ls -al /')", 446 | "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?><!DOCTYPE foo [ <!ELEMENT foo ANY ><!ENTITY xxe SYSTEM \"file:///etc/passwd\" >]><foo>&xxe;</foo>", 447 | "$HOME", 448 | "$ENV{'HOME'}", 449 | "%d", 450 | "%s%s%s%s%s", 451 | "{0}", 452 | "%*.*s", 453 | "%@", 454 | "%n", 455 | "File:///", 456 | "../../../../../../../../../../../etc/passwd%00", 457 | "../../../../../../../../../../../etc/hosts", 458 | "() { 0; }; touch /tmp/blns.shellshock1.fail;", 459 | "() { _; } >_[$($())] { touch /tmp/blns.shellshock2.fail; }", 460 | "<<< %s(un='%s') = %u", 461 | "+++ATH0", 462 | "CON", 463 | "PRN", 464 | "AUX", 465 | "CLOCK$", 466 | "NUL", 467 | "A:", 468 | "ZZ:", 469 | "COM1", 470 | "LPT1", 471 | "LPT2", 472 | "LPT3", 473 | "COM2", 474 | "COM3", 475 | "COM4", 476 | "DCC SEND STARTKEYLOGGER 0 0 0", 477 | "Scunthorpe General Hospital", 478 | "Penistone Community Church", 479 | "Lightwater Country Park", 480 | "Jimmy Clitheroe", 481 | "Horniman Museum", 482 | "shitake mushrooms", 483 | "RomansInSussex.co.uk", 484 | "http://www.cum.qc.ca/", 485 | "Craig Cockburn, Software Specialist", 486 | "Linda Callahan", 487 | "Dr. Herman I. Libshitz", 488 | "magna cum laude", 489 | "Super Bowl XXX", 490 | "medieval erection of parapets", 491 | "evaluate", 492 | "mocha", 493 | "expression", 494 | "Arsenal canal", 495 | "classic", 496 | "Tyson Gay", 497 | "Dick Van Dyke", 498 | "basement", 499 | "If you're reading this, you've been in a coma for almost 20 years now. We're trying a new technique. We don't know where this message will end up in your dream, but we hope it works. Please wake up, we miss you.", 500 | "Roses are \u001b[0;31mred\u001b[0m, violets are \u001b[0;34mblue. Hope you enjoy terminal hue", 501 | "But now...\u001b[20Cfor my greatest trick...\u001b[8m", 502 | "The quic\b\b\b\b\b\bk brown fo\u0007\u0007\u0007\u0007\u0007\u0007\u0007\u0007\u0007\u0007\u0007x... [Beeeep]", 503 | "Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗", 504 | "🏳0🌈️", 505 | "జ్ఞ‌ా" 506 | ] --------------------------------------------------------------------------------