├── .gitignore ├── .travis.yml ├── index.js ├── test.js ├── LICENSE ├── package.json └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - '5' 4 | - '4' 5 | - '0.12' 6 | - '0.10' 7 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | var isInteger = require('is-integer') 4 | var slice = require('unicode-substring') 5 | 6 | module.exports = function (str, len) { 7 | if (typeof str !== 'string') throw new Error('Expected first argument to be a string') 8 | if (!isInteger(len) || len < 0) throw new Error('Expected second argument be an integer greater than or equal to 0') 9 | 10 | var origLen = len 11 | while (Buffer.byteLength(str) > origLen) { 12 | str = slice(str, 0, len--) 13 | } 14 | 15 | return str 16 | } 17 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | var test = require('tape') 4 | var trunc = require('./') 5 | 6 | test('normal string', function (t) { 7 | var s = 'foobar' 8 | t.equal(trunc(s, 0), '') 9 | t.equal(trunc(s, 3), 'foo') 10 | t.equal(trunc(s, 6), 'foobar') 11 | t.equal(trunc(s, 9), 'foobar') 12 | t.end() 13 | }) 14 | 15 | test('multibyte string', function (t) { 16 | var s = 'foo🎉bar' 17 | t.equal(trunc(s, 3), 'foo') 18 | t.equal(trunc(s, 4), 'foo') 19 | t.equal(trunc(s, 5), 'foo') 20 | t.equal(trunc(s, 6), 'foo') 21 | t.equal(trunc(s, 7), 'foo🎉') 22 | t.equal(trunc(s, 8), 'foo🎉b') 23 | t.end() 24 | }) 25 | 26 | test('invalid values', function (t) { 27 | t.throws(function () { trunc() }) 28 | t.throws(function () { trunc(1, 0) }) 29 | t.throws(function () { trunc('') }) 30 | t.throws(function () { trunc('', '') }) 31 | t.throws(function () { trunc('', NaN) }) 32 | t.throws(function () { trunc('', false) }) 33 | t.throws(function () { trunc('', -1) }) 34 | t.throws(function () { trunc('', Infinity) }) 35 | t.end() 36 | }) 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Thomas Watson Steen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "unicode-byte-truncate", 3 | "version": "1.0.0", 4 | "description": "Unicode aware string truncation that given a max byte size will truncate the string to or just below that size", 5 | "main": "index.js", 6 | "dependencies": { 7 | "is-integer": "^1.0.6", 8 | "unicode-substring": "^0.1.0" 9 | }, 10 | "devDependencies": { 11 | "standard": "^6.0.4", 12 | "tape": "^4.4.0" 13 | }, 14 | "scripts": { 15 | "test": "standard && tape test.js" 16 | }, 17 | "repository": { 18 | "type": "git", 19 | "url": "git+https://github.com/watson/unicode-byte-truncate.git" 20 | }, 21 | "keywords": [ 22 | "slice", 23 | "substring", 24 | "substr", 25 | "trunc", 26 | "truncate", 27 | "trim", 28 | "unicode", 29 | "multibyte", 30 | "multi-byte", 31 | "surrogate", 32 | "pair", 33 | "pairs", 34 | "max", 35 | "byte", 36 | "bytes", 37 | "characters", 38 | "chars" 39 | ], 40 | "author": "Thomas Watson Steen (https://twitter.com/wa7son)", 41 | "license": "MIT", 42 | "bugs": { 43 | "url": "https://github.com/watson/unicode-byte-truncate/issues" 44 | }, 45 | "homepage": "https://github.com/watson/unicode-byte-truncate#readme", 46 | "coordinates": [ 47 | 55.6666217, 48 | 12.5798077 49 | ] 50 | } 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # unicode-byte-truncate 2 | 3 | Truncate a string to a given byte size by removing bytes from the right 4 | while making sure not to slice in the middle of a multi-byte unicode 5 | character. 6 | 7 | [![Build status](https://travis-ci.org/watson/unicode-byte-truncate.svg?branch=master)](https://travis-ci.org/watson/unicode-byte-truncate) 8 | [![js-standard-style](https://img.shields.io/badge/code%20style-standard-brightgreen.svg?style=flat)](https://github.com/feross/standard) 9 | 10 | ## Installation 11 | 12 | ``` 13 | npm install unicode-byte-truncate --save 14 | ``` 15 | 16 | ## Usage 17 | 18 | ```js 19 | var trunc = require('unicode-byte-truncate') 20 | 21 | var str = 'foo🎉bar' // 10 byte string - byte 4 to 7 is a single character 22 | 23 | console.log(trunc(str, 4)) // `foo` == 0x666F6F (3 bytes) 24 | console.log(trunc(str, 5)) // `foo` == 0x666F6F (3 bytes) 25 | console.log(trunc(str, 6)) // `foo` == 0x666F6F (3 bytes) 26 | console.log(trunc(str, 7)) // `foo🎉` == 0x666F6FF09F8E89 (7 bytes) 27 | ``` 28 | 29 | ## API 30 | 31 | The unicode-byte-truncate module exposes a single `trunc` function. 32 | 33 | ```js 34 | result = trunc(string, maxBytes) 35 | ``` 36 | 37 | Given a `string` and a `maxBytes` integer greater than or equal to zero, 38 | the `trunc` function will slice characters off the end of the string to 39 | ensure that it doesn't contain more bytes than specified by the 40 | `maxBytes` argument. 41 | 42 | The truncated string will be returned as the `result`. 43 | 44 | The `trunc` function is multi-byte unicode aware and will never cut up 45 | surrogate pairs. This means that the `result` _may_ contain fewer bytes 46 | than specified by the `maxBytes` argument. 47 | 48 | ## License 49 | 50 | MIT 51 | --------------------------------------------------------------------------------