├── .gitmodules ├── .npmignore ├── .travis.yml ├── AUTHORS ├── LICENSE.MIT.txt ├── LICENSE.WTFPL.txt ├── README.md ├── browser.js ├── index.js ├── lib └── truncate.js ├── package.json └── test.js /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vendor/big-list-of-naughty-strings"] 2 | path = vendor/big-list-of-naughty-strings 3 | url = https://github.com/minimaxir/big-list-of-naughty-strings.git 4 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | vendor/ 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - "0.10" 4 | - "0.12" 5 | - "1" 6 | - "2" 7 | - "3" 8 | - "4" 9 | - "5" 10 | - "node" 11 | before_install: 12 | - npm install -g npm 13 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Carl Xiong 2 | Parsha Pourkhomami 3 | -------------------------------------------------------------------------------- /LICENSE.MIT.txt: -------------------------------------------------------------------------------- 1 | Copyright 2023 Carl Xiong & Parsha Pourkhomami 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /LICENSE.WTFPL.txt: -------------------------------------------------------------------------------- 1 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 2 | Version 2, December 2004 3 | 4 | Copyright (C) 2004 Sam Hocevar 5 | 6 | Everyone is permitted to copy and distribute verbatim or modified 7 | copies of this license document, and changing it is allowed as long 8 | as the name is changed. 9 | 10 | DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 11 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 12 | 13 | 0. You just DO WHAT THE FUCK YOU WANT TO. 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # truncate-utf8-bytes [![build status](https://secure.travis-ci.org/parshap/truncate-utf8-bytes.svg?branch=master)](http://travis-ci.org/parshap/truncate-utf8-bytes) 2 | 3 | Truncate a string to the given length in bytes. Correctly handles 4 | multi-byte characters and surrogate pairs. 5 | 6 | A browser implementation that doesn't use `Buffer.byteLength` is 7 | provided to minimize build size. 8 | 9 | ## Example 10 | 11 | ```js 12 | var truncate = require("truncate-utf8-bytes") 13 | var str = "a☃" // a = 1 byte, ☃ = 3 bytes 14 | console.log(truncate(str, 2)) 15 | // -> "a" 16 | ``` 17 | 18 | ## API 19 | 20 | ### `var truncate = require("truncate-utf8-bytes")` 21 | 22 | *When using browserify or webpack*, this automatically resolves to an 23 | implementation that does not use `Buffer.byteLength`. 24 | 25 | ### `truncate(string, length)` 26 | 27 | Returns `string` truncated to at most `length` bytes in length. 28 | -------------------------------------------------------------------------------- /browser.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var truncate = require("./lib/truncate"); 4 | var getLength = require("utf8-byte-length/browser"); 5 | module.exports = truncate.bind(null, getLength); 6 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var truncate = require("./lib/truncate"); 4 | var getLength = Buffer.byteLength.bind(Buffer); 5 | module.exports = truncate.bind(null, getLength); 6 | -------------------------------------------------------------------------------- /lib/truncate.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | function isHighSurrogate(codePoint) { 4 | return codePoint >= 0xd800 && codePoint <= 0xdbff; 5 | } 6 | 7 | function isLowSurrogate(codePoint) { 8 | return codePoint >= 0xdc00 && codePoint <= 0xdfff; 9 | } 10 | 11 | // Truncate string by size in bytes 12 | module.exports = function truncate(getLength, string, byteLength) { 13 | if (typeof string !== "string") { 14 | throw new Error("Input must be string"); 15 | } 16 | 17 | var charLength = string.length; 18 | var curByteLength = 0; 19 | var codePoint; 20 | var segment; 21 | 22 | for (var i = 0; i < charLength; i += 1) { 23 | codePoint = string.charCodeAt(i); 24 | segment = string[i]; 25 | 26 | if (isHighSurrogate(codePoint) && isLowSurrogate(string.charCodeAt(i + 1))) { 27 | i += 1; 28 | segment += string[i]; 29 | } 30 | 31 | curByteLength += getLength(segment); 32 | 33 | if (curByteLength === byteLength) { 34 | return string.slice(0, i + 1); 35 | } 36 | else if (curByteLength > byteLength) { 37 | return string.slice(0, i - segment.length + 1); 38 | } 39 | } 40 | 41 | return string; 42 | }; 43 | 44 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "truncate-utf8-bytes", 3 | "version": "1.0.2", 4 | "description": "Truncate string to given length in bytes", 5 | "main": "index.js", 6 | "browser": "browser.js", 7 | "scripts": { 8 | "test": "tape test.js" 9 | }, 10 | "repository": { 11 | "type": "git", 12 | "url": "git+https://github.com/parshap/truncate-utf8-bytes.git" 13 | }, 14 | "keywords": [ 15 | "truncate", 16 | "utf8" 17 | ], 18 | "author": "Carl Xiong ", 19 | "license": "(WTFPL OR MIT)", 20 | "bugs": { 21 | "url": "https://github.com/parshap/truncate-utf8-bytes/issues" 22 | }, 23 | "homepage": "https://github.com/parshap/truncate-utf8-bytes#readme", 24 | "devDependencies": { 25 | "tape": "^4.2.2" 26 | }, 27 | "dependencies": { 28 | "utf8-byte-length": "^1.0.1" 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | var test = require("tape"); 4 | var truncate = require("./"); 5 | var browserTruncate = require("./browser"); 6 | 7 | function isHighSurrogate(codePoint) { 8 | return codePoint >= 0xd800 && codePoint <= 0xdbff; 9 | } 10 | 11 | function repeat(string, times) { 12 | return new Array(times + 1).join(string); 13 | } 14 | 15 | function assertLengths(t, string, charLength, byteLength) { 16 | t.equal(string.length, charLength); 17 | t.equal(Buffer.byteLength(string), byteLength); 18 | } 19 | 20 | // Test writing files to the fs 21 | // 22 | 23 | try { 24 | var blns = require("./vendor/big-list-of-naughty-strings/blns.json"); 25 | } 26 | catch (err) { 27 | console.error("Error: Cannot load file './vendor/big-list-of-naughty-strings/blns.json'"); 28 | console.error(); 29 | console.error("Make sure you've initialized git submodules by running"); 30 | console.error(); 31 | console.error(" git submodule update --init"); 32 | console.error(); 33 | process.exit(1); 34 | } 35 | 36 | // Run tests against both implementations 37 | [truncate, browserTruncate].forEach(function(truncate) { 38 | test("strings", function(t) { 39 | assertLengths(t, truncate("a☃", 2), 1, 1); 40 | assertLengths(t, truncate(repeat("a", 250) + '\uD800\uDC00', 255), 252, 254); 41 | assertLengths(t, truncate(repeat("a", 251) + '\uD800\uDC00', 255), 253, 255); 42 | assertLengths(t, truncate(repeat("a", 252) + '\uD800\uDC00', 255), 252, 252); 43 | assertLengths(t, truncate(repeat("a", 253) + '\uD800\uDC00', 255), 253, 253); 44 | assertLengths(t, truncate(repeat("a", 254) + '\uD800\uDC00', 255), 254, 254); 45 | assertLengths(t, truncate(repeat("a", 255) + '\uD800\uDC00', 255), 255, 255); 46 | t.end(); 47 | }); 48 | 49 | // Truncate various strings 50 | [].concat( 51 | [ 52 | repeat("a", 300), 53 | repeat("a", 252) + '\uD800\uDC00', 54 | repeat("a", 251) + '\uD800\uDC00', 55 | repeat("a", 253) + '\uD800\uDC00', 56 | ], 57 | blns 58 | ).forEach(function(str) { 59 | test(JSON.stringify(str), function(t) { 60 | var i = 0; 61 | t.equals(truncate(str, 0), ""); 62 | // Truncate string one byte at a time 63 | while (true) { 64 | var truncated = truncate(str, i); 65 | t.ok(Buffer.byteLength(truncated) <= i); 66 | t.ok( ! isHighSurrogate(truncated[truncated.length - 1])); 67 | if (truncated === str) { 68 | break; 69 | } 70 | i += 1; 71 | } 72 | t.end(); 73 | }); 74 | }); 75 | }); 76 | --------------------------------------------------------------------------------