├── .gitignore ├── .npmignore ├── .travis.yml ├── CHANGELOG.md ├── LICENSE ├── README.md ├── package.json ├── scripts └── bench.coffee ├── src ├── emoji_char.coffee └── emoji_data.coffee ├── test ├── emoji_char.spec.coffee ├── emoji_data.spec.coffee └── mocha.opts └── vendor └── emoji-data ├── README.md └── emoji.json /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | lib 3 | doc 4 | -------------------------------------------------------------------------------- /.npmignore: -------------------------------------------------------------------------------- 1 | src 2 | test 3 | scripts 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: node_js 3 | node_js: 4 | - "0.10" 5 | - "0.12" 6 | - iojs 7 | 8 | cache: 9 | directories: 10 | - node_modules 11 | 12 | matrix: 13 | allow_failures: 14 | - node_js: iojs 15 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## 0.2.0 (8 September 2014) 4 | 5 | * Initial release. 6 | * Does everything the Ruby version does, super fast! (V8 optimizations lead to 7 | most benchmarks being roughly ~50% faster on NodeJS 0.10.31 vs MRI 2.1.2) 8 | * Should be API compatible with the most recent release of the Ruby version of 9 | the library. Setting the version number accordingly. Minor releases will be 10 | versioned independently, but will make an attempt to keep major version 11 | numbers consistent to feature set across different platforms. 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright 2015 Matthew Rothenberg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # emoji-data-js 2 | 3 | NodeJS library providing low level operations for dealing with Emoji 4 | glyphs in the Unicode standard. :cool: 5 | 6 | EmojiData.js is like a swiss-army knife for dealing with Emoji encoding issues. 7 | If all you need to do is translate `:poop:` into :poop:, then there are plenty 8 | of other libs out there that will probably do what you want. But once you are 9 | dealing with Emoji as a fundamental part of your application, and you start to 10 | realize the nightmare of [doublebyte encoding][doublebyte] or 11 | [variants][variant], then this library may be your new best friend. 12 | :raised_hands: 13 | 14 | EmojiData.js is written by the same author as the Ruby [emoji_data.rb][rb] gem, 15 | which is used in production by [Emojitracker.com][emojitracker] to parse well 16 | over 100M+ emoji glyphs daily. This version was written to provide all the same 17 | functionality while taking advantage of the crazy speed of the V8 runtime 18 | environment. :dizzy: 19 | 20 | [![Build Status](https://travis-ci.org/mroth/emoji-data-js.svg?branch=master)](https://travis-ci.org/mroth/emoji-data-js) 21 | 22 | [doublebyte]: http://www.quora.com/Why-does-using-emoji-reduce-my-SMS-character-limit-to-70 23 | [variant]: http://www.unicode.org/L2/L2011/11438-emoji-var.pdf 24 | [rb]: https://github.com/mroth/emoji_data.rb 25 | [emojitracker]: http://www.emojitracker.com 26 | 27 | ## Installation 28 | 29 | npm install emoji-data 30 | 31 | ## Usage Examples 32 | 33 | ```js 34 | > var EmojiData = require('emoji-data'); 35 | 36 | > EmojiData.from_unified('2665'); 37 | { name: 'BLACK HEART SUIT', 38 | unified: '2665', 39 | variations: [ '2665-FE0F' ], 40 | docomo: 'E68D', 41 | au: 'EAA5', 42 | softbank: 'E20C', 43 | google: 'FEB1A', 44 | short_name: 'hearts', 45 | short_names: [ 'hearts' ], 46 | text: null, 47 | apple_img: true, 48 | hangouts_img: true, 49 | twitter_img: true } 50 | 51 | > EmojiData.all().length 52 | 845 53 | 54 | > EmojiData.all_with_variants().length 55 | 107 56 | 57 | > EmojiData.find_by_short_name("moon").length 58 | 13 59 | 60 | > EmojiData.find_by_name("tree").map( 61 | function(c) { return [c.unified, c.render(), c.name]; } 62 | ); 63 | [ [ '1F332', '🌲', 'EVERGREEN TREE' ], 64 | [ '1F333', '🌳', 'DECIDUOUS TREE' ], 65 | [ '1F334', '🌴', 'PALM TREE' ], 66 | [ '1F384', '🎄', 'CHRISTMAS TREE' ], 67 | [ '1F38B', '🎋', 'TANABATA TREE' ] ] 68 | 69 | > EmojiData.scan("I ♥ when marketers talk about the ☁. #blessed").forEach( 70 | function(ec) { console.log("Found some " + ec.short_name + "!"); } 71 | ); 72 | Found some hearts! 73 | Found some cloud! 74 | ``` 75 | 76 | ## API Documentation 77 | 78 | http://coffeedoc.info/github/mroth/emoji-data-js/master/ 79 | 80 | ## Contributing 81 | 82 | Please be sure to run `npm test` and help keep test coverage at :100:. 83 | 84 | There is a full benchmark suite available via `npm run-script bench`. Please 85 | test before and after your changes to ensure you have not caused a performance 86 | regression. 87 | 88 | ## License 89 | 90 | [The MIT License (MIT)](LICENSE) 91 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "emoji-data", 3 | "version": "0.2.0", 4 | "description": "Emoji encoding swiss army knife for NodeJS", 5 | "keywords": [ 6 | "emoji", 7 | "unicode" 8 | ], 9 | "homepage": "https://github.com/mroth/emoji-data-js", 10 | "bugs": "https://github.com/mroth/emoji-data-js/issues", 11 | "author": "Matthew Rothenberg (http://github.com/mroth)", 12 | "repository": { 13 | "type": "git", 14 | "url": "https://github.com/mroth/emoji-data-js.git" 15 | }, 16 | "main": "./lib/emoji_data.js", 17 | "scripts": { 18 | "test": "mocha test/*.spec.coffee", 19 | "pretest": "npm run-script compile", 20 | "prepublish": "npm run-script compile", 21 | "compile": "coffee --bare --output ./lib --compile ./src/*.coffee", 22 | "bench": "coffee ./scripts/bench.coffee" 23 | }, 24 | "dependencies": { 25 | "underscore.string": "^2.3.3" 26 | }, 27 | "devDependencies": { 28 | "benchmark": "^1.0.0", 29 | "chai": "^1.9.1", 30 | "codo": "^2.0.9", 31 | "coffee-errors": "^0.8.5", 32 | "coffee-script": "^1.7.1", 33 | "lodash": "^2.4.1", 34 | "microtime": "^1.0.1", 35 | "mocha": "^1.18.2", 36 | "sinon": "^1.9.1", 37 | "sinon-chai": "^2.5.0" 38 | }, 39 | "engines": { 40 | "node": ">=0.10.0", 41 | "npm": ">=1.2.10" 42 | }, 43 | "license": "MIT" 44 | } 45 | -------------------------------------------------------------------------------- /scripts/bench.coffee: -------------------------------------------------------------------------------- 1 | Benchmark = require('benchmark') 2 | EmojiData = require('../lib/emoji_data.js') 3 | EmojiChar = EmojiData.EmojiChar 4 | _str = require('underscore.string') 5 | 6 | suites = [] 7 | 8 | s0 = "I liek to eat cake oh so very much cake eating is nice!! #cake #food" 9 | s1 = "🚀" 10 | s2 = "flying on my 🚀 to visit the 👾 people." 11 | s3 = "first a \u0023\uFE0F\u20E3 then a 🚀" 12 | 13 | suite = new Benchmark.Suite("Scanner") 14 | suite 15 | .add "scan(s0)", -> EmojiData.scan(s0) 16 | .add "scan(s1)", -> EmojiData.scan(s1) 17 | .add "scan(s2)", -> EmojiData.scan(s2) 18 | .add "scan(s3)", -> EmojiData.scan(s3) 19 | suites.push(suite) 20 | 21 | suite = new Benchmark.Suite("EmojiData") 22 | suite 23 | .add "all", -> EmojiData.all() 24 | .add "all_doublebyte", -> EmojiData.all_doublebyte() 25 | .add "all_with_variants", -> EmojiData.all_with_variants() 26 | .add "from_unified", -> EmojiData.from_unified("1F680") 27 | .add "chars", -> EmojiData.chars() 28 | .add "codepoints", -> EmojiData.codepoints() 29 | .add "find_by_name - many", -> EmojiData.find_by_name("tree") 30 | .add "find_by_name - none", -> EmojiData.find_by_name("zzzz") 31 | .add "find_by_short_name - many", -> EmojiData.find_by_short_name("MOON") 32 | .add "find_by_short_name - none", -> EmojiData.find_by_short_name("zzzz") 33 | .add "char_to_unified - single", -> EmojiData.char_to_unified("🚀") 34 | .add "char_to_unified - double", -> EmojiData.char_to_unified("\u2601\uFE0F") 35 | .add "unified_to_char - single", -> EmojiData.unified_to_char("1F47E") 36 | .add "unified_to_char - double", -> EmojiData.unified_to_char("2764-fe0f") 37 | .add "unified_to_char - triple", -> EmojiData.unified_to_char("0030-FE0F-20E3") 38 | suites.push(suite) 39 | 40 | invader = new EmojiChar({unified: '1F47E'}) 41 | usflag = new EmojiChar({unified: '1F1FA-1F1F8'}) 42 | hourglass = new EmojiChar({unified: '231B', variations: ['231B-FE0F']}) 43 | cloud = new EmojiChar({unified: '2601', variations: ['2601-FE0F']}) 44 | 45 | suite = new Benchmark.Suite("EmojiChar") 46 | suite 47 | .add "render - single", -> invader.render() 48 | .add "render - double", -> usflag.render() 49 | .add "render - variant", -> cloud.render({variant_encoding: true}) 50 | .add "chars", -> cloud.chars() 51 | .add "is_doublebyte", -> invader.is_doublebyte() 52 | .add "has_variants", -> invader.has_variants() 53 | .add "variant", -> invader.variant() 54 | suites.push(suite) 55 | 56 | micros = (hz) -> (1000000 / hz) 57 | formatResult = (suitename, r) -> 58 | _str.sprintf( 59 | "%-45s %10u %.2f µs/op", 60 | "#{suitename}.#{r.name}", r.count, micros(r.hz) 61 | ) 62 | 63 | Benchmark.forEach(suites, (suite) -> 64 | results = suite.run() 65 | results.sort( (a,b) -> b.hz - a.hz ) 66 | console.log "" 67 | Benchmark.forEach(results, (r) -> 68 | console.log formatResult(results.name, r) 69 | ) 70 | ) 71 | -------------------------------------------------------------------------------- /src/emoji_char.coffee: -------------------------------------------------------------------------------- 1 | punycode = require('punycode') 2 | 3 | # EmojiChar represents a single Emoji character and its associated metadata. 4 | # 5 | # ## Properties 6 | # 7 | # * `name` - The standardized name used in the Unicode specification to 8 | # represent this emoji character. 9 | # * `unified` - The primary unified codepoint ID for the emoji. 10 | # * `variations` - A list of all variant codepoints that may also represent this 11 | # emoji. 12 | # * `short_name` - The canonical "short name" or keyword used in many systems to 13 | # refer to this emoji. Often surrounded by `:colons:` in systems like GitHub 14 | # & Campfire. 15 | # * `short_names` - A full list of possible keywords for the emoji. 16 | # * `text` - An alternate textual representation of the emoji, for example a 17 | # smiley face emoji may be represented with an ASCII alternative. Most emoji 18 | # do not have a text alternative. This is typically used when building an 19 | # automatic translation from typed emoticons. 20 | # 21 | # It also contains a few helper functions to deal with this data type. 22 | # 23 | class EmojiChar 24 | 25 | # @param blob [Object] the JSON blob entry from emoji-data 26 | # @return [EmojiChar] 27 | constructor: (blob) -> 28 | @[k] = v for k,v of blob 29 | # source file doesnt include blank variations field if none exist, 30 | # for our sake, lets add that here. 31 | @variations = [] unless @variations? 32 | 33 | # Is the `EmojiChar` represented by a doublebyte codepoint in Unicode? 34 | # 35 | # @return [Boolean] 36 | is_doublebyte: -> 37 | @unified.indexOf('-') isnt -1 38 | 39 | # Does the `EmojiChar` have an alternate Unicode variant encoding? 40 | # 41 | # @return [Boolean] true when the EmojiChar has at least one variant encoding 42 | has_variants: -> 43 | @variations.length > 0 44 | 45 | # Returns the most likely variant-encoding codepoint ID for an `EmojiChar`. 46 | # 47 | # For now we only know of one possible variant encoding for certain 48 | # characters, but there could be others in the future. 49 | # 50 | # This is typically used to force Emoji rendering for characters that could 51 | # be represented in standard font glyphs on certain operating systems. 52 | # 53 | # The resulting encoded string will be two codepoints, or three codepoints 54 | # for doublebyte Emoji characters. 55 | # 56 | # @return [String, null] 57 | # The most likely variant-encoding codepoint ID. 58 | # If there is no variant-encoding for a character, returns null. 59 | variant: -> 60 | return null unless @variations.length > 0 61 | @variations[0] 62 | 63 | # Renders a UCS-2 string representation of the glyph for writing to screen. 64 | # 65 | # If you want to specify whether or not to use variant encoding, pass an 66 | # options hash such as: 67 | # 68 | # foo.char({variant_encoding: true}) 69 | # 70 | # By default this will use the variant encoding if it exists. 71 | # 72 | # @param options [Object] the encoding options 73 | # @option options [Boolean] variant_encoding true if you want to render with 74 | # variant encoding. 75 | # 76 | # @return [String] the emoji character rendered to a UCS-2 string 77 | render: ({variant_encoding} = {variant_encoding: true}) -> 78 | target = switch 79 | when @has_variants() && variant_encoding then @variant() 80 | else @unified 81 | 82 | EmojiChar._unified_to_char(target) 83 | 84 | # Returns a list of all possible UTF-8 string renderings of an `EmojiChar`. 85 | # 86 | # E.g., normal, with variant selectors, etc. This is useful if you want to 87 | # have all possible values to match against when searching for the emoji in 88 | # a string representation. 89 | # 90 | # @return [Array] all possible UCS-2 string renderings 91 | chars: -> 92 | (EmojiChar._unified_to_char(id) for id in [@unified].concat(@variations)) 93 | 94 | # @see #render() 95 | # @return [String] 96 | toString: -> @render() 97 | 98 | # Convert a unified codepoint ID to the UCS-2 string representation. 99 | # 100 | # @param [String] uid the unified codepoint ID for an emoji 101 | # @return [String] UCS-2 string representation of the emoji glyph 102 | # @private 103 | @_unified_to_char: (uid) -> 104 | cps = (parseInt(cp, 16) for cp in uid.split('-')) 105 | punycode.ucs2.encode(cps) 106 | 107 | 108 | module.exports = EmojiChar 109 | -------------------------------------------------------------------------------- /src/emoji_data.coffee: -------------------------------------------------------------------------------- 1 | EmojiChar = require('./emoji_char') 2 | punycode = require('punycode') 3 | _str = require('underscore.string') 4 | 5 | class EmojiData 6 | EMOJI_MAP = require('../vendor/emoji-data/emoji.json') 7 | EMOJI_CHARS = (new EmojiChar(char_blob) for char_blob in EMOJI_MAP) 8 | 9 | # construct hashmap for fast precached lookups for `.from_unified` 10 | EMOJICHAR_UNIFIED_MAP = {} 11 | for ec in EMOJI_CHARS 12 | EMOJICHAR_UNIFIED_MAP[ec.unified] = ec 13 | EMOJICHAR_UNIFIED_MAP[variant] = ec for variant in ec.variations 14 | 15 | # precomputed hashmap for fast precached lookups in .from_short_name 16 | EMOJICHAR_KEYWORD_MAP = {} 17 | for ec in EMOJI_CHARS 18 | EMOJICHAR_KEYWORD_MAP[keyword] = ec for keyword in ec.short_names 19 | 20 | 21 | # Returns a list of all known Emoji characters as `EmojiChar` objects. 22 | # 23 | # @return [Array] a list of all known `EmojiChar`. 24 | @all: -> 25 | EMOJI_CHARS 26 | 27 | # Returns a list of all `EmojiChar` that are represented with doublebyte 28 | # encoding. 29 | # 30 | # @return [Array] a list of all doublebyte `EmojiChar`. 31 | @all_doublebyte: -> 32 | ( ec for ec in EMOJI_CHARS when ec.is_doublebyte() ) 33 | 34 | # Returns a list of all `EmojiChar` that have at least one variant encoding. 35 | # 36 | # @return [Array] a list of all `EmojiChar` with variant encoding. 37 | @all_with_variants: -> 38 | ( ec for ec in EMOJI_CHARS when ec.has_variants() ) 39 | 40 | # Returns a list of all known Emoji characters rendered as UCS-2 strings. 41 | # 42 | # By default, the default rendering options for this library will be used. 43 | # However, if you pass an option hash with `include_variants: true` then all 44 | # possible renderings of a single glyph will be included, meaning that: 45 | # 46 | # 1. You will have "duplicate" emojis in your list. 47 | # 2. This list is now suitable for exhaustably matching against in a search. 48 | # 49 | # @option options [Boolean] :include_variants whether or not to include all 50 | # possible encoding variants in the list 51 | # 52 | # @return [Array] all Emoji characters rendered as UTF-8 strings 53 | @chars: (options = {include_variants: false}) -> 54 | norms = (ec.render({variant_encoding: false}) for ec in EMOJI_CHARS) 55 | extra = (ec.render({variant_encoding: true}) for ec in @all_with_variants()) 56 | return norms.concat(extra) if options.include_variants 57 | norms 58 | 59 | # Returns a list of all known codepoints representing Emoji characters. 60 | # 61 | # @option options [Boolean] :include_variants whether or not to include all 62 | # possible encoding variants in the list 63 | # @return [Array] all codepoints represented as unified ID strings 64 | @codepoints: (options = {include_variants: false}) -> 65 | norms = (ec.unified for ec in EMOJI_CHARS) 66 | extra = (ec.variant() for ec in @all_with_variants()) 67 | return norms.concat(extra) if options.include_variants 68 | norms 69 | 70 | # Convert a native UCS-2 string glyph to its unified codepoint ID. 71 | # 72 | # This is a conversion operation, not a match, so it may produce unexpected 73 | # results with different types of values. 74 | # 75 | # @param char [String] a single rendered emoji glyph encoded as a UCS-2 string 76 | # @return [String] the unified ID 77 | # 78 | # @example 79 | # > EmojiData.unified_to_char("1F47E"); 80 | # '👾' 81 | @char_to_unified: (char) -> 82 | cps = punycode.ucs2.decode(char) 83 | hexes = ( _str.rjust( cp.toString(16), 4, "0") for cp in cps ) 84 | hexes.join("-").toUpperCase() 85 | 86 | # Convert a unified codepoint ID directly to its UCS-2 string representation. 87 | # 88 | # @param uid [String] the unified codepoint ID for an emoji 89 | # @return [String] UCS-2 string rendering of the emoji character 90 | # 91 | # @example 92 | # > EmojiData.char_to_unified("👾"); 93 | # '1F47E' 94 | @unified_to_char: (uid) -> 95 | EmojiChar._unified_to_char(uid) 96 | 97 | # Finds any `EmojiChar` that contains given string in its official name. 98 | # 99 | # @param name [String] 100 | # @return [Array] 101 | @find_by_name: (name) -> 102 | target = name.toUpperCase() 103 | (ec for ec in EMOJI_CHARS when ec.name.indexOf(target) != -1) 104 | 105 | # Find all `EmojiChar` that match string in any of their associated short 106 | # name keywords. 107 | # 108 | # @param short_name [String] 109 | # @return [Array] 110 | @find_by_short_name: (short_name) -> 111 | target = short_name.toLowerCase() 112 | (ec for ec in EMOJI_CHARS when ec.short_names.some( 113 | (sn)->sn.indexOf(target) != -1 114 | )) 115 | 116 | # Finds a specific `EmojiChar` based on the unified codepoint ID. 117 | # 118 | # Must be exact match. 119 | # 120 | # @param short_name [String] 121 | # @return [EmojiChar] 122 | @from_short_name: (short_name) -> 123 | EMOJICHAR_KEYWORD_MAP[short_name.toLowerCase()] 124 | 125 | # Finds a specific `EmojiChar` based on its unified codepoint ID. 126 | # 127 | # @param uid [String] the unified codepoint ID for an emoji 128 | # @return [EmojiChar] 129 | @from_unified: (uid) -> 130 | EMOJICHAR_UNIFIED_MAP[uid.toUpperCase()] 131 | 132 | # The RegExp matcher we use to do .scan() efficiently. 133 | # needs to be defined after self.chars so not at top of file for now... 134 | FBS_REGEXP = new RegExp( 135 | "(?:#{EmojiData.chars({include_variants: true}).join("|")})", 136 | "g" 137 | ) 138 | 139 | # Scans a string for all encoded emoji characters contained within. 140 | # 141 | # @param str [String] the target string to search 142 | # @return [Array] all emoji characters contained within the target 143 | # string, in the order they appeared. 144 | @scan: (str) -> 145 | # since JS doesnt seem to have the equivalent of .scan we do some hacky shit 146 | # http://stackoverflow.com/questions/13895373/ 147 | 148 | # reset regexp pointer (really js? sigh) 149 | FBS_REGEXP.lastIndex = 0 150 | 151 | # keep executing regex until it returns no more results 152 | matches = [] 153 | while (m = FBS_REGEXP.exec(str)) 154 | matches.push(m[0]) 155 | 156 | # map matched chars to EmojiChar objects 157 | (@from_unified( @char_to_unified(id) ) for id in matches) 158 | 159 | 160 | module.exports = EmojiData 161 | module.exports.EmojiChar = EmojiChar 162 | -------------------------------------------------------------------------------- /test/emoji_char.spec.coffee: -------------------------------------------------------------------------------- 1 | require 'coffee-errors' 2 | 3 | chai = require 'chai' 4 | sinon = require 'sinon' 5 | # using compiled JavaScript file here to be sure module works 6 | EmojiChar = require '../lib/emoji_char.js' 7 | 8 | expect = chai.expect 9 | chai.use require 'sinon-chai' 10 | 11 | 12 | describe "EmojiChar", -> 13 | 14 | describe ".new", -> 15 | before -> 16 | poop_json = '{"name":"PILE OF POO","unified":"1F4A9","variations":[],"docomo":"","au":"E4F5","softbank":"E05A","google":"FE4F4","image":"1f4a9.png","sheet_x":11,"sheet_y":19,"short_name":"hankey","short_names":["hankey","poop","shit"],"text":null}' 17 | @poop = new EmojiChar(JSON.parse poop_json) 18 | 19 | it "should create instance getters for all key-values in emoji.json, with blanks as nil", -> 20 | @poop.name.should.equal 'PILE OF POO' 21 | @poop.unified.should.equal '1F4A9' 22 | @poop.variations.should.deep.equal [] 23 | @poop.docomo.should.equal '' 24 | @poop.au.should.equal 'E4F5' 25 | @poop.softbank.should.equal 'E05A' 26 | @poop.google.should.equal 'FE4F4' 27 | @poop.image.should.equal '1f4a9.png' 28 | @poop.sheet_x.should.equal 11 29 | @poop.sheet_y.should.equal 19 30 | @poop.short_name.should.equal 'hankey' 31 | @poop.short_names.should.deep.equal ["hankey","poop","shit"] 32 | expect(@poop.text).to.be.null 33 | 34 | 35 | context "instance methods", -> 36 | before -> 37 | @invader = new EmojiChar({unified: '1F47E'}) 38 | @usflag = new EmojiChar({unified: '1F1FA-1F1F8'}) 39 | @hourglass = new EmojiChar({unified: '231B', variations: ['231B-FE0F']}) 40 | @cloud = new EmojiChar({unified: '2601', variations: ['2601-FE0F']}) 41 | 42 | describe "#toString", -> 43 | it "should return the unicode glyph as string as default to_s", -> 44 | @invader.toString().should.equal @invader.render() 45 | 46 | describe "#render", -> 47 | it "should render as happy shiny unicode", -> 48 | @invader.render().should.equal "👾" 49 | 50 | it "should render as happy shiny unicode for doublebyte chars too", -> 51 | @usflag.render().should.equal "🇺🇸" 52 | 53 | it "should have a flag to output forced emoji variant char encoding if requested", -> 54 | @cloud.render( {variant_encoding: false}).should.equal "\u2601" 55 | @cloud.render( {variant_encoding: true}).should.equal "\u2601\uFE0F" 56 | 57 | it "should fall back to normal encoding if no variant exists, even when requested", -> 58 | @invader.render( {variant_encoding: false}).should.equal "👾" 59 | @invader.render( {variant_encoding: true}).should.equal "👾" 60 | 61 | it "should default to variant encoding for chars with a variant present", -> 62 | @cloud.render().should.equal "\u2601\uFE0F" 63 | @hourglass.render().should.equal "\u231B\uFE0F" 64 | 65 | 66 | describe "#chars", -> 67 | it "should return an array of all possible string render variations", -> 68 | @invader.chars().should.deep.equal ["👾"] 69 | @cloud.chars().should.deep.equal ["\u2601","\u2601\uFE0F"] 70 | 71 | describe "#is_doublebyte", -> 72 | it "should indicate when a character is doublebyte based on the unified ID", -> 73 | @usflag.is_doublebyte().should.be.true 74 | @invader.is_doublebyte().should.be.false 75 | 76 | describe "#has_variants", -> 77 | it "should indicate when a character has an alternate variant encoding", -> 78 | @hourglass.has_variants().should.be.true 79 | @usflag.has_variants().should.be.false 80 | 81 | describe "#variant", -> 82 | it "should return the most likely variant encoding ID representation for the char", -> 83 | @hourglass.variant().should.equal '231B-FE0F' 84 | 85 | it "should return null if no variant encoding for the char exists", -> 86 | expect(@usflag.variant()).to.be.null 87 | -------------------------------------------------------------------------------- /test/emoji_data.spec.coffee: -------------------------------------------------------------------------------- 1 | require 'coffee-errors' 2 | 3 | chai = require 'chai' 4 | sinon = require 'sinon' 5 | _ = require 'lodash' 6 | 7 | # using compiled JavaScript file here to be sure module works 8 | EmojiData = require '../lib/emoji_data.js' 9 | 10 | expect = chai.expect 11 | chai.should() 12 | 13 | chai.use require 'sinon-chai' 14 | 15 | 16 | describe 'EmojiData', -> 17 | describe ".all", -> 18 | it "should return an array of all 845 known emoji chars", -> 19 | EmojiData.all().length.should.equal 845 20 | 21 | it "should return all EmojiChar objects", -> 22 | result.should.be.an.instanceof(EmojiData.EmojiChar) for result in EmojiData.all() 23 | 24 | 25 | describe ".all_doublebyte", -> 26 | it "should return an array of all 21 known emoji chars with doublebyte encoding", -> 27 | results = EmojiData.all_doublebyte() 28 | results.length.should.equal 21 29 | result.should.be.an.instanceof(EmojiData.EmojiChar) for result in results 30 | 31 | 32 | describe ".all_with_variants", -> 33 | it "should return an array of all 107 known emoji chars with variant encodings", -> 34 | results = EmojiData.all_with_variants() 35 | results.length.should.equal 107 36 | result.should.be.an.instanceof(EmojiData.EmojiChar) for result in results 37 | 38 | 39 | describe ".chars", -> 40 | it "should return an array of all chars in unicode string format", -> 41 | char.should.be.a('String') for char in EmojiData.chars() 42 | 43 | it "should by default return one entry per known EmojiChar", -> 44 | EmojiData.chars().length.should.equal EmojiData.all().length 45 | 46 | it "should include variants in list when options {include_variants: true}", -> 47 | results = EmojiData.chars({include_variants: true}) 48 | numChars = EmojiData.all().length 49 | numVariants = EmojiData.all_with_variants().length 50 | results.length.should.equal numChars + numVariants 51 | 52 | it "should not have any duplicates in list when variants are included", -> 53 | results = EmojiData.chars({include_variants: true}) 54 | results.length.should.equal _.uniq(results).length 55 | 56 | describe ".codepoints", -> 57 | it "should return an array of all known codepoints in dashed string representation", -> 58 | results = EmojiData.codepoints() 59 | results.length.should.equal 845 60 | for result in results 61 | result.should.be.a 'string' 62 | result.should.match /^[0-9A-F\-]{4,11}$/ 63 | 64 | it "should include variants in list when options {include_variants: true}", -> 65 | numChars = EmojiData.all().length 66 | numVariants = EmojiData.all_with_variants().length 67 | results = EmojiData.codepoints({include_variants: true}) 68 | results.length.should.equal (numChars + numVariants) 69 | for result in results 70 | result.should.be.a 'string' 71 | result.should.match /^[0-9A-F\-]{4,16}$/ 72 | 73 | 74 | describe ".scan", -> 75 | before -> 76 | @exact_results = EmojiData.scan("🚀") 77 | @multi_results = EmojiData.scan("flying on my 🚀 to visit the 👾 people.") 78 | @variant_results = EmojiData.scan("\u0023\uFE0F\u20E3") 79 | @variant_multi = EmojiData.scan("first a \u0023\uFE0F\u20E3 then a 🚀") 80 | 81 | it "should find the proper EmojiChar object from a single string char", -> 82 | @exact_results.should.be.a 'array' 83 | @exact_results.length.should.equal 1 84 | @exact_results[0].should.be.an.instanceof EmojiData.EmojiChar 85 | @exact_results[0].name.should.equal 'ROCKET' 86 | 87 | it "should find the proper EmojiChar object from a variant encoded char", -> 88 | @variant_results.length.should.equal 1 89 | @variant_results[0].name.should.equal 'HASH KEY' 90 | 91 | it "should match multiple chars from within a string", -> 92 | @multi_results.should.be.a 'array' 93 | @multi_results.length.should.equal 2 94 | @multi_results[0].should.be.an.instanceof EmojiData.EmojiChar 95 | @multi_results[1].should.be.an.instanceof EmojiData.EmojiChar 96 | 97 | it "should return multiple matches in the proper order", -> 98 | @multi_results[0].name.should.equal 'ROCKET' 99 | @multi_results[1].name.should.equal 'ALIEN MONSTER' 100 | 101 | it "should return multiple matches in the proper order for variant encodings", -> 102 | @variant_multi[0].name.should.equal 'HASH KEY' 103 | @variant_multi[1].name.should.equal 'ROCKET' 104 | 105 | it "should return multiple matches including duplicates", -> 106 | results = EmojiData.scan("flying my 🚀 to visit the 👾 people who have their own 🚀 too.") 107 | results.should.be.a 'array' 108 | results.length.should.equal 3 109 | 110 | it "returns [] if nothing is found", -> 111 | EmojiData.scan("i like turtles").should.deep.equal [] 112 | 113 | 114 | describe ".from_unified", -> 115 | it "should find the proper EmojiChar object", -> 116 | results = EmojiData.from_unified('1F680') 117 | results.should.be.an.instanceof(EmojiData.EmojiChar) 118 | results.name.should.equal 'ROCKET' 119 | 120 | it "should normalise capitalization for hex values", -> 121 | EmojiData.from_unified('1f680').should.deep.equal EmojiData.from_unified('1F680') 122 | 123 | it "should find via variant encoding ID format as well", -> 124 | results = EmojiData.from_unified('2764-fe0f') 125 | results.should.be.an.instanceof(EmojiData.EmojiChar) 126 | results.name.should.equal 'HEAVY BLACK HEART' 127 | 128 | it "should return undefined when there is no match", -> 129 | expect(EmojiData.from_unified('tacoz')).to.be.undefined 130 | 131 | 132 | describe ".find_by_name", -> 133 | it "returns an array of results, upcasing input if needed", -> 134 | EmojiData.find_by_name('tree').should.be.a 'array' 135 | EmojiData.find_by_name('tree').length.should.equal 5 136 | 137 | it "returns [] if nothing is found", -> 138 | EmojiData.find_by_name('sdlkfjlskdfj').should.deep.equal [] 139 | 140 | 141 | describe ".find_by_short_name", -> 142 | it "returns an array of results, downcasing input if needed", -> 143 | EmojiData.find_by_short_name('MOON').should.be.a 'array' 144 | EmojiData.find_by_short_name('MOON').length.should.equal 13 145 | 146 | it "returns [] if nothing is found", -> 147 | EmojiData.find_by_short_name('sdlkfjlskdfj').should.deep.equal [] 148 | 149 | 150 | describe ".from_short_name", -> 151 | it "returns exact matches on a short name", -> 152 | results = EmojiData.from_short_name('scream') 153 | results.should.be.an.instanceof(EmojiData.EmojiChar) 154 | results.name.should.equal 'FACE SCREAMING IN FEAR' 155 | 156 | it "handles lowercasing input if required", -> 157 | EmojiData.from_short_name('SCREAM').should.equal EmojiData.from_short_name('scream') 158 | 159 | it "works on secondary keywords", -> 160 | primary = EmojiData.from_short_name('hankey') 161 | EmojiData.from_short_name('poop').should.equal primary 162 | EmojiData.from_short_name('shit').should.equal primary 163 | 164 | it "returns undefined if nothing matches", -> 165 | expect(EmojiData.from_short_name('taco')).to.be.undefined 166 | 167 | 168 | describe ".char_to_unified", -> 169 | it "converts normal emoji to unified codepoint", -> 170 | EmojiData.char_to_unified("👾").should.equal '1F47E' 171 | EmojiData.char_to_unified("🚀").should.equal '1F680' 172 | 173 | it "converts double-byte emoji to proper codepoint", -> 174 | EmojiData.char_to_unified("🇺🇸").should.equal '1F1FA-1F1F8' 175 | 176 | it "in doublebyte, adds padding to hex codes that are <4 chars", -> 177 | EmojiData.char_to_unified("#⃣").should.equal '0023-20E3' 178 | 179 | it "converts variant encoded emoji to variant unified codepoint", -> 180 | EmojiData.char_to_unified("\u2601\uFE0F").should.equal '2601-FE0F' 181 | 182 | 183 | describe ".unified_to_char", -> 184 | it "converts normal unified codepoints to unicode strings", -> 185 | EmojiData.unified_to_char('1F47E').should.equal "👾" 186 | EmojiData.unified_to_char('1F680').should.equal "🚀" 187 | 188 | it "converts doublebyte unified codepoints to unicode strings", -> 189 | EmojiData.unified_to_char('1F1FA-1F1F8').should.equal "🇺🇸" 190 | EmojiData.unified_to_char('0023-20E3').should.equal "#⃣" 191 | 192 | it "converts variant unified codepoints to unicode strings", -> 193 | EmojiData.unified_to_char('2764-fe0f').should.equal "\u2764\uFE0F" 194 | 195 | it "converts variant+doublebyte chars (triplets!) to unicode strings", -> 196 | EmojiData.unified_to_char('0030-FE0F-20E3').should.equal "\u0030\uFE0F\u20E3" 197 | -------------------------------------------------------------------------------- /test/mocha.opts: -------------------------------------------------------------------------------- 1 | --compilers coffee:coffee-script/register 2 | --recursive 3 | --reporter spec 4 | --ui bdd 5 | --timeout 20000 -------------------------------------------------------------------------------- /vendor/emoji-data/README.md: -------------------------------------------------------------------------------- 1 | Manually vendored from iamcal/emoji-data 2 | 3 | (Because git submodules cause more problems then they are worth for a 4 | single file.) 5 | 6 | Most recent vendoring from revision: 7 | iamcal/emoji-data@6cb685cd1e 8 | --------------------------------------------------------------------------------