├── CHANGELOG.md ├── README.md ├── resources └── emojis.json └── src └── com └── vdurmont └── emoji ├── Emoji.kt ├── EmojiLoader.kt ├── EmojiManager.kt ├── EmojiParser.kt ├── EmojiTrie.kt ├── Fitzpatrick.kt └── Gender.kt /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## v6.0i 4 | 5 | - Ivan Ivanov forked main repository 6 | - Added class `Gender` 7 | - Added understanding of complex emojies: `{basic_emoji}{skin_color}?{gender}?` 8 | - Added support of bugged VK.COM web-version emojis, where ending char `\uFE0F` is absent 9 | - `EmojiParser.UnicodeCandidate` renamed to `EmojiParser.EmojiResult`. And added new fields 10 | - `EmojiParser` algorithm improvement. `getEmojiEndPos` is replaced with `getNextEmoji`
11 | - `EmojiTrie.getBestEmoji` introduced to replace slow `EmojiTrie.getEmojiEndPos` method 12 | - Memory usage fixes by optimizing generation of new String objects. 13 | - As a result speed improvement reaches up to 5x times comparing to v.5.1.1 14 | 15 | ## v5.1.1 16 | 17 | - Bugfix: respect fitzpatrick modifier when extracting emojis (thanks @sullis) 18 | 19 | ## v5.1.0 20 | 21 | - Many performance improvements to the parsing of the emojis (thanks @freva) 22 | - Add a `containsEmoji` function (thanks @freva!) 23 | 24 | ## v5.0.0 25 | 26 | - Fix the HTML to Unicode parser to always parse the longer emojis (thanks @freva) 27 | - Add alias for "pumpkin" (thanks @sullis) 28 | - Add a lot of missing flag emojis (thanks @ankitkariryaa) 29 | - Support for all emojis from Unicode 11.0 30 | - Support for all emojis from Unicode 10.0 31 | - Add a `EmojiParser.replaceAllEmojis` function (thanks @cbedoy) 32 | 33 | ## v4.0.0 34 | 35 | - Add "source code" strings to emoji json database 36 | - Fix some missing/out-of-order code points (thanks @BillyGalbreath) 37 | - Upgrade `org.json:json` dependency (thanks @PhotonQyv) 38 | - Update README with new emojis (thanks @jtobard) 39 | 40 | ## v3.3.0 41 | 42 | - Add `family_man_woman_girl_boy` emoji (thanks @freva) 43 | - Fix `EmojiManager.isEmoji` to support fitzpatrick modifiers (thanks @freva) 44 | - Fixed several emojis that had the wrong `support_fitzpatrick` flag (thanks @Tapchicoma) 45 | - Add some tests to avoid duplicate aliases 46 | - Fixed several duplicated aliases in the emoji DB 47 | 48 | ## v3.2.0 49 | 50 | - Fixed Poland flag (thanks @Sheigutn) 51 | - Improvements to the smile emojis (thanks @miquelbeltran) 52 | - Add a bunch of emojis from Apple iOS 10.2 release 53 | - Fix some missing fitzpatrick modifiers 54 | - Add an `EmojiManager.isOnlyEmojis()` method 55 | 56 | ## v3.1.3 57 | 58 | - Removed all variance selectors from the JSON database. Thanks @roberterdin ! 59 | 60 | ## v3.1.2 61 | 62 | - Additions and updates to the emoji database (victory hand now supports fitzpatrick, adds Saint Vincent Grenadines' flag, add the regional indicator symbols). Thanks @lologist ! 63 | - Force the database to be loaded in UTF-8. 64 | - Enable the extension of the `EmojiParser` class. 65 | 66 | ## v3.1.1 67 | 68 | - Add the ability to provide a custom `EmojiTransformer` that will enable developers to add their custom emoji replacement methods. Thanks @freva ! 69 | 70 | ## v3.1.0 71 | 72 | - Add fitzpatrick support for 👃 ("nose") and 👂 ("ear") 73 | - Fix duplicated "sunglasses" alias 74 | - Performance improvements (using a Trie structure) 75 | - Parsing support for multiple emojis (such as "family_man_woman_boy") 76 | - Fix `EmojiManager.getAll()` that returned some duplicates 77 | - Use a BufferedReader to load the database 78 | 79 | ## v3.0.0 80 | 81 | Update the emoji database to support the additions of iOS 9.1 82 | 83 | ## v2.2.1 84 | 85 | Fix the `htmlDec` and `htmlHex` codes for the multiple emojis (such as `family (man, man, girl, boy)`) 86 | 87 | ## v2.2.0 88 | 89 | Rollback dependency org.json:json to 20140107 to keep the compatibility with Java 6 & 7 90 | 91 | ## v2.1.0 92 | 93 | - Add methods: 94 | - `EmojiParser#removeAllEmojis(String)` 95 | - `EmojiParser#removeAllEmojisExcept(String, Collection)` 96 | - `EmojiParser#removeEmojis(String, Collection)` 97 | - Upgrade dependency org.json:json 98 | 99 | ## v2.0.1 100 | 101 | Bug fix on the :-1: emoji 102 | 103 | ## v2.0.0 104 | 105 | - Update of the emoji database 106 | - Add 14 new family emojis (man_man_boy, woman_woman_girl, etc.) 107 | - Add 4 new couple emojis 108 | - Add the "vulcan_salute" and "middle_finger" emojis 109 | - Add 198 flags 110 | - Addition of the support for the diversity emojis (Fitzpatrick modifiers) 111 | - Removal of the deprecated methods `Emoji#getHtml` and `EmojiParser#parseToHtml` 112 | - Improvements in the javadoc 113 | 114 | ## v1.1.1 115 | 116 | Closing the stream used to read the emoji database in `EmojiManager.java` 117 | 118 | ## v1.1.0 119 | 120 | - Update of the emoji database 121 | - Adding support for HTML hexadecimal: 122 | - `Emoji#getHtmlHexadecimal` 123 | - `EmojiParser#parseToHtmlHexadecimal` 124 | - The old HTML support is now HTML decimal: 125 | - Deprecating `Emoji#getHtml` (replaced by `Emoji#getHtmlDecimal`) 126 | - Deprecating `EmojiParser#parseToHtml` (replaced by `EmojiParser#parseToHtmlDecimal`) 127 | 128 | ## v1.0.1 129 | 130 | Bug fix on the :+1: emoji 131 | 132 | ## v1.0.0 133 | 134 | First release. 135 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # iris:emoji-kotlin 2 | 3 | [![License Info](http://img.shields.io/badge/license-The%20MIT%20License-brightgreen.svg)](https://github.com/vdurmont/emoji-java/blob/master/LICENSE.md) 4 | 5 | _The missing emoji library for Kotlin._ 6 | 7 | **[iris:emoji-kotlin](https://github.com/iris2iris/iris-emoji-kotlin)** is a lightweight Kotlin library that helps you use Emojis in your java applications. 8 | 9 | It is copy of my Java repo **[iris:emoji-java](https://github.com/iris2iris/iris-emoji-java)**. 10 | 11 | And it in its turn was forked from **[emoji-java](https://github.com/vdurmont/emoji-java)** 12 | 13 | #### Reasons I forked it: 14 | 15 | - ❗️ The most important reason I forked it was I have found how to improve its __speed up to 5x__ times! 16 | - My repo supports complex emojis of combination `{basic_emoji}{skin_color}?{gender}?` 17 | - It supports of bugged VK.COM web-version emojis, where ending char `\uFE0F` is absent 18 | 19 | And of course, extend useful methods. 20 | 21 | ## How to get it? 22 | 23 | ##### Via Direct Download: 24 | 25 | - Use [releases](https://github.com/iris2iris/iris-emoji-kotlin/releases) tab to download the jar directly. 26 | - Download JSON-java dependency from http://mvnrepository.com/artifact/org.json/json. 27 | 28 | ## How to use it? 29 | 30 | ### EmojiManager 31 | 32 | The `EmojiManager` provides several static methods to search through the emojis database: 33 | 34 | - `getForTag` returns all the emojis for a given tag 35 | - `getForAlias` returns the emoji for an alias 36 | - `getAll` returns all the emojis 37 | - `isEmoji` checks if a string is an emoji 38 | - `containsEmoji` checks if a string contains any emoji 39 | 40 | You can also query the metadata: 41 | 42 | - `getAllTags` returns the available tags 43 | 44 | Or get everything: 45 | 46 | - `getAll` returns all the emojis 47 | 48 | ### Emoji model 49 | 50 | An `Emoji` is a POJO (plain old java object), which provides the following methods: 51 | 52 | - `getUnicode` returns the unicode representation of the emoji 53 | - `getUnicode(Fitzpatrick)` returns the unicode representation of the emoji with the provided Fitzpatrick modifier. If the emoji doesn't support the Fitzpatrick modifiers, this method will throw an `UnsupportedOperationException`. If the provided Fitzpatrick is null, this method will return the unicode of the emoji. 54 | - `getDescription` returns the (optional) description of the emoji 55 | - `getAliases` returns a list of aliases for this emoji 56 | - `getTags` returns a list of tags for this emoji 57 | - `getHtmlDecimal` returns an html decimal representation of the emoji 58 | - `getHtmlHexadecimal` returns an html decimal representation of the emoji 59 | - `supportsFitzpatrick` returns true if the emoji supports the Fitzpatrick modifiers, else false 60 | 61 | ### Fitzpatrick modifiers 62 | 63 | Some emojis now support the use of Fitzpatrick modifiers that gives the choice between 5 shades of skin tones: 64 | 65 | | Modifier | Type | 66 | | :------: | -------- | 67 | | 🏻 | type_1_2 | 68 | | 🏼 | type_3 | 69 | | 🏽 | type_4 | 70 | | 🏾 | type_5 | 71 | | 🏿 | type_6 | 72 | 73 | We defined the format of the aliases including a Fitzpatrick modifier as: 74 | 75 | ``` 76 | :ALIAS|TYPE: 77 | ``` 78 | 79 | A few examples: 80 | 81 | ``` 82 | :boy|type_1_2: 83 | :swimmer|type_4: 84 | :santa|type_6: 85 | ``` 86 | 87 | ### EmojiParser 88 | 89 | #### To unicode 90 | 91 | To replace all the aliases and the html representations found in a string by their unicode, use `EmojiParser#parseToUnicode(String)`. 92 | 93 | For example: 94 | 95 | ```kotlin 96 | val str = "An :grinning:awesome :smiley:string 😄with a few :wink:emojis!" 97 | val result = EmojiParser.parseToUnicode(str) 98 | println(result) 99 | // Prints: 100 | // "An 😀awesome 😃string 😄with a few 😉emojis!" 101 | ``` 102 | 103 | #### To aliases 104 | 105 | To replace all the emoji's unicodes found in a string by their aliases, use `EmojiParser#parseToAliases(String)`. 106 | 107 | For example: 108 | 109 | ```kotlin 110 | val str = "An 😀awesome 😃string with a few 😉emojis!" 111 | val result = EmojiParser.parseToAliases(str) 112 | println(result) 113 | // Prints: 114 | // "An :grinning:awesome :smiley:string with a few :wink:emojis!" 115 | ``` 116 | 117 | By default, the aliases will parse and include any Fitzpatrick modifier that would be provided. If you want to remove or ignore the Fitzpatrick modifiers, use `EmojiParser#parseToAliases(String, FitzpatrickAction)`. Examples: 118 | 119 | ```kotlin 120 | val str = "Here is a boy: \uD83D\uDC66\uD83C\uDFFF!" 121 | println(EmojiParser.parseToAliases(str)) 122 | println(EmojiParser.parseToAliases(str, FitzpatrickAction.PARSE)) 123 | // Prints twice: "Here is a boy: :boy|type_6:!" 124 | println(EmojiParser.parseToAliases(str, FitzpatrickAction.REMOVE)) 125 | // Prints: "Here is a boy: :boy:!" 126 | println(EmojiParser.parseToAliases(str, FitzpatrickAction.IGNORE)) 127 | // Prints: "Here is a boy: :boy:🏿!" 128 | ``` 129 | 130 | #### To html 131 | 132 | To replace all the emoji's unicodes found in a string by their html representation, use `EmojiParser#parseToHtmlDecimal(String)` or `EmojiParser#parseToHtmlHexadecimal(String)`. 133 | 134 | For example: 135 | 136 | ```kotlin 137 | val str = "An 😀awesome 😃string with a few 😉emojis!" 138 | 139 | val resultDecimal = EmojiParser.parseToHtmlDecimal(str) 140 | println(resultDecimal) 141 | // Prints: 142 | // "An 😀awesome 😃string with a few 😉emojis!" 143 | 144 | val resultHexadecimal = EmojiParser.parseToHtmlHexadecimal(str) 145 | println(resultHexadecimal) 146 | // Prints: 147 | // "An 😀awesome 😃string with a few 😉emojis!" 148 | ``` 149 | 150 | By default, any Fitzpatrick modifier will be removed. If you want to ignore the Fitzpatrick modifiers, use `EmojiParser#parseToAliases(String, FitzpatrickAction)`. Examples: 151 | 152 | ```kotlin 153 | val str = "Here is a boy: \uD83D\uDC66\uD83C\uDFFF!" 154 | println(EmojiParser.parseToHtmlDecimal(str)) 155 | println(EmojiParser.parseToHtmlDecimal(str, FitzpatrickAction.PARSE)) 156 | println(EmojiParser.parseToHtmlDecimal(str, FitzpatrickAction.REMOVE)) 157 | // Print 3 times: "Here is a boy: 👦!" 158 | println(EmojiParser.parseToHtmlDecimal(str, FitzpatrickAction.IGNORE)) 159 | // Prints: "Here is a boy: 👦🏿!" 160 | ``` 161 | 162 | The same applies for the methods `EmojiParser#parseToHtmlHexadecimal(String)` and `EmojiParser#parseToHtmlHexadecimal(String, FitzpatrickAction)`. 163 | 164 | #### Remove emojis 165 | 166 | You can easily remove emojis from a string using one of the following methods: 167 | 168 | - `EmojiParser#removeAllEmojis(String)`: removes all the emojis from the String 169 | - `EmojiParser#removeAllEmojisExcept(String, Collection)`: removes all the emojis from the String, except the ones in the Collection 170 | - `EmojiParser#removeEmojis(String, Collection)`: removes the emojis in the Collection from the String 171 | 172 | For example: 173 | 174 | ```kotlin 175 | val str = "An 😀awesome 😃string with a few 😉emojis!" 176 | val collection = ArrayList() 177 | collection.add(EmojiManager.getForAlias("wink")) // This is 😉 178 | 179 | println(EmojiParser.removeAllEmojis(str)) 180 | println(EmojiParser.removeAllEmojisExcept(str, collection)) 181 | println(EmojiParser.removeEmojis(str, collection)) 182 | 183 | // Prints: 184 | // "An awesome string with a few emojis!" 185 | // "An awesome string with a few 😉emojis!" 186 | // "An 😀awesome 😃string with a few emojis!" 187 | ``` 188 | 189 | #### Extract Emojis from a string 190 | 191 | You can search a string of mixed emoji/non-emoji characters and have all of the emoji characters returned as a Collection. 192 | 193 | - `EmojiParser#extractEmojis(String)`: returns all emojis as a Collection. This will include duplicates if emojis are present more than once. 194 | 195 | ## Credits 196 | 197 | **iris-emoji-java** is based on [github/vdurmont/emoji-java](https://github.com/vdurmont/emoji-java). 198 | 199 | And in its turn **emoji-java** originally used the data provided by the [github/gemoji project](https://github.com/github/gemoji). It is still based on it but has evolved since. 200 | 201 | ## Available Emojis 202 | 203 | See a table of the available emojis and their aliases [HERE](./EMOJIS.md). 204 | -------------------------------------------------------------------------------- /src/com/vdurmont/emoji/Emoji.kt: -------------------------------------------------------------------------------- 1 | package com.vdurmont.emoji 2 | 3 | import java.nio.charset.StandardCharsets 4 | 5 | /** 6 | * This class represents an emoji.

7 | *

8 | * This object is immutable so it can be used safely in a multithreaded context. 9 | * 10 | * @author Improver: Ivan Ivanov [https://vk.com/irisism]

11 | * Creator: Vincent DURMONT [vdurmont@gmail.com] 12 | */ 13 | class Emoji( 14 | /** 15 | * Returns the description of the emoji 16 | * 17 | * @return the description 18 | */ 19 | val description: String, 20 | val supportsFitzpatrick: Boolean, 21 | /** 22 | * Returns the aliases of the emoji 23 | * 24 | * @return the aliases (unmodifiable) 25 | */ 26 | val aliases: List, 27 | val tags: List, 28 | vararg bytes: Byte 29 | ) { 30 | 31 | val unicode: String 32 | val htmlDec: String 33 | val htmlHex: String 34 | 35 | /** 36 | * Constructor for the Emoji. 37 | * 38 | * @param description The description of the emoji 39 | * @param supportsFitzpatrick Whether the emoji supports Fitzpatrick modifiers 40 | * @param aliases the aliases for this emoji 41 | * @param tags the tags associated with this emoji 42 | * @param bytes the bytes that represent the emoji 43 | */ 44 | init { 45 | var count = 0 46 | unicode = String(bytes, StandardCharsets.UTF_8) 47 | val stringLength = unicode.length 48 | val pointCodes = arrayOfNulls(stringLength) 49 | val pointCodesHex = arrayOfNulls(stringLength) 50 | var offset = 0 51 | while (offset < stringLength) { 52 | val codePoint = unicode.codePointAt(offset) 53 | pointCodes[count] = String.format("&#%d;", codePoint) 54 | pointCodesHex[count++] = String.format("&#x%x;", codePoint) 55 | offset += Character.charCount(codePoint) 56 | } 57 | htmlDec = stringJoin(pointCodes, count) 58 | htmlHex = stringJoin(pointCodesHex, count) 59 | } 60 | 61 | /** 62 | * Method to replace String.join, since it was only introduced in java8 63 | * 64 | * @param array the array to be concatenated 65 | * @return concatenated String 66 | */ 67 | private fun stringJoin(array: Array, count: Int): String { 68 | val joined = StringBuilder() 69 | for (i in 0 until count) joined.append(array[i]) 70 | return joined.toString() 71 | } 72 | 73 | /** 74 | * Returns wether the emoji supports the Fitzpatrick modifiers or not 75 | * 76 | * @return true if the emoji supports the Fitzpatrick modifiers 77 | */ 78 | fun supportsFitzpatrick(): Boolean { 79 | return supportsFitzpatrick 80 | } 81 | 82 | /** 83 | * Returns the unicode representation of the emoji associated with the 84 | * provided Fitzpatrick modifier.

85 | * If the modifier is null, then the result is similar to 86 | * [Emoji.getUnicode] 87 | * 88 | * @param fitzpatrick the fitzpatrick modifier or null 89 | * @return the unicode representation 90 | * @throws UnsupportedOperationException if the emoji doesn't support the 91 | * Fitzpatrick modifiers 92 | */ 93 | fun getUnicode(fitzpatrick: Fitzpatrick?): String { 94 | if (!supportsFitzpatrick()) { 95 | throw UnsupportedOperationException( 96 | "Cannot get the unicode with a fitzpatrick modifier, " + 97 | "the emoji doesn't support fitzpatrick." 98 | ) 99 | } else if (fitzpatrick == null) { 100 | return this.unicode 101 | } 102 | return this.unicode + fitzpatrick.unicode 103 | } 104 | 105 | /** 106 | * Returns the HTML decimal representation of the emoji 107 | * 108 | * @return the HTML decimal representation 109 | */ 110 | fun getHtmlDecimal(): String { 111 | return htmlDec 112 | } 113 | 114 | /** 115 | * Returns the HTML hexadecimal representation of the emoji 116 | * 117 | * @return the HTML hexadecimal representation 118 | */ 119 | fun getHtmlHexadecimal(): String { 120 | return htmlHex 121 | } 122 | 123 | override fun equals(other: Any?): Boolean { 124 | return other is Emoji && other.unicode == unicode 125 | } 126 | 127 | override fun hashCode(): Int { 128 | return unicode.hashCode() 129 | } 130 | 131 | /** 132 | * Returns the String representation of the Emoji object.

133 | *

134 | * Example:

135 | * `Emoji { 136 | * description='smiling face with open mouth and smiling eyes', 137 | * supportsFitzpatrick=false, 138 | * aliases=[smile], 139 | * tags=[happy, joy, pleased], 140 | * unicode='😄', 141 | * htmlDec='😄', 142 | * htmlHex='😄' 143 | * }` 144 | * 145 | * @return the string representation 146 | */ 147 | override fun toString(): String { 148 | return "Emoji{" + 149 | "description='" + description + '\'' + 150 | ", supportsFitzpatrick=" + supportsFitzpatrick + 151 | ", aliases=" + aliases + 152 | ", tags=" + tags + 153 | ", unicode='" + unicode + '\'' + 154 | ", htmlDec='" + htmlDec + '\'' + 155 | ", htmlHex='" + htmlHex + '\'' + 156 | '}' 157 | } 158 | } -------------------------------------------------------------------------------- /src/com/vdurmont/emoji/EmojiLoader.kt: -------------------------------------------------------------------------------- 1 | package com.vdurmont.emoji 2 | 3 | import org.json.JSONArray 4 | import org.json.JSONObject 5 | import java.io.* 6 | import java.nio.charset.StandardCharsets 7 | import java.util.* 8 | 9 | /** 10 | * Loads the emojis from a JSON database. 11 | * 12 | * @author Improver: Ivan Ivanov [https://vk.com/irisism]

13 | * Creator: Vincent DURMONT [vdurmont@gmail.com] 14 | */ 15 | object EmojiLoader { 16 | /** 17 | * Loads a JSONArray of emojis from an InputStream, parses it and returns the 18 | * associated list of [Emoji]s 19 | * 20 | * @param stream the stream of the JSONArray 21 | * 22 | * @return the list of [Emoji]s 23 | * @throws IOException if an error occurs while reading the stream or parsing 24 | * the JSONArray 25 | */ 26 | @Throws(IOException::class) 27 | fun loadEmojis(stream: InputStream): MutableList { 28 | val emojisJSON = JSONArray(inputStreamToString(stream)) 29 | val emojis: MutableList = ArrayList(emojisJSON.length()) 30 | for (i in 0 until emojisJSON.length()) { 31 | val emoji = buildEmojiFromJSON(emojisJSON.getJSONObject(i)) 32 | if (emoji != null) { 33 | emojis.add(emoji) 34 | } 35 | } 36 | return emojis 37 | } 38 | 39 | @Throws(IOException::class) 40 | private fun inputStreamToString(stream: InputStream): String { 41 | val bytes = stream.readAllBytes() 42 | stream.close() 43 | return String(bytes, StandardCharsets.UTF_8) 44 | 45 | /*val sb = StringBuilder() 46 | val isr = InputStreamReader(stream, StandardCharsets.UTF_8) 47 | val br = BufferedReader(isr) 48 | var read: String? 49 | while (br.readLine().also { read = it } != null) { 50 | sb.append(read) 51 | } 52 | br.close() 53 | return sb.toString()*/ 54 | } 55 | 56 | /* 57 | private static final String[][] gender1 = { 58 | {"adult", "\uD83E\uDDD1"} 59 | , {"male", "\uD83D\uDC68"} 60 | , {"female", "\uD83D\uDC69"} 61 | }; 62 | private static final String[][] gender2 = { 63 | {"male", "\u200D♂️"} 64 | , {"female", "\u200D♀️"} 65 | }; 66 | 67 | private static final String[][] skins = { 68 | {"white", "\uD83C\uDFFB"} 69 | , {"cream white", "\uD83C\uDFFC"} 70 | , {"moderate brown", "\uD83C\uDFFD"} 71 | , {"dark brown", "\uD83C\uDFFE"} 72 | , {"black", "\uD83C\uDFFF"} 73 | }; 74 | 75 | protected static List buildEmojiesFromJSON(JSONObject json) throws UnsupportedEncodingException { 76 | if (!json.has("emoji")) { 77 | return null; 78 | } 79 | 80 | String pattern = json.getString("emoji"); 81 | List aliases = jsonArrayToStringList(json.getJSONArray("aliases")); 82 | EmojiPrepare[] emojies; 83 | if (pattern.indexOf('{') != -1) { 84 | boolean hasGender1 = pattern.contains("{person}"); 85 | boolean hasGender2 = pattern.contains("{gender}"); 86 | boolean hasSkin = pattern.contains("{skin}"); 87 | var patterns = new LinkedList(); 88 | patterns.add(new EmojiPrepare(pattern, aliases)); 89 | 90 | if (hasSkin) { 91 | var tmp = new LinkedList(); 92 | for (EmojiPrepare i : patterns) { 93 | tmp.add(new EmojiPrepare(i.pattern.replace("{skin}", ""), aliases)); 94 | for (String[] g : skins) { 95 | var aa = new LinkedList(); 96 | for (String a : i.aliases) 97 | aa.add(g[0] + ' ' + a); 98 | var newPattern = i.pattern.replace("{skin}", g[1]); 99 | tmp.add(new EmojiPrepare(newPattern, aa)); 100 | } 101 | } 102 | patterns = tmp; 103 | } 104 | 105 | if (hasGender1) { 106 | var tmp = new LinkedList(); 107 | for (EmojiPrepare i : patterns) 108 | for (String[] g : gender1) { 109 | var aa = new LinkedList(); 110 | for (String a : i.aliases) 111 | aa.add(g[0] + ' ' + a); 112 | var newPattern = i.pattern.replace("{person}", g[1]); 113 | tmp.add(new EmojiPrepare(newPattern, aa)); 114 | } 115 | patterns = tmp; 116 | } 117 | 118 | if (hasGender2) { 119 | var tmp = new LinkedList(); 120 | for (EmojiPrepare i : patterns) 121 | for (String[] g : gender2) { 122 | tmp.add(new EmojiPrepare(i.pattern.replace("{gender}", ""), aliases)); 123 | var aa = new LinkedList(); 124 | for (String a : i.aliases) 125 | aa.add(g[0] + ' ' + a); 126 | var newPattern = i.pattern.replace("{gender}", g[1]); 127 | tmp.add(new EmojiPrepare(newPattern, aa)); 128 | } 129 | patterns = tmp; 130 | } 131 | 132 | 133 | 134 | emojies = patterns.toArray(new EmojiPrepare[0]); 135 | 136 | } else 137 | emojies = new EmojiPrepare[] {new EmojiPrepare(pattern, aliases)}; 138 | String description = null; 139 | if (json.has("description")) { 140 | description = json.getString("description"); 141 | } 142 | boolean supportsFitzpatrick = false; 143 | if (json.has("supports_fitzpatrick")) { 144 | supportsFitzpatrick = json.getBoolean("supports_fitzpatrick"); 145 | } 146 | 147 | List tags = jsonArrayToStringList(json.getJSONArray("tags")); 148 | 149 | ArrayList res = new ArrayList<>(); 150 | for (EmojiPrepare emoji : emojies) { 151 | byte[] bytes = emoji.pattern.getBytes(StandardCharsets.UTF_8); 152 | res.add(new Emoji(description, supportsFitzpatrick, emoji.aliases, tags, bytes)); 153 | } 154 | return res; 155 | //return new Emoji(description, supportsFitzpatrick, aliases, tags, bytes); 156 | } 157 | 158 | private static final class EmojiPrepare { 159 | List aliases; 160 | String pattern; 161 | 162 | public EmojiPrepare(String patter, List aliases) { 163 | this.aliases = aliases; 164 | this.pattern = patter; 165 | } 166 | }*/ 167 | @Throws(UnsupportedEncodingException::class) 168 | internal fun buildEmojiFromJSON( 169 | json: JSONObject 170 | ): Emoji? { 171 | if (!json.has("emoji")) { 172 | return null 173 | } 174 | val bytes = json.getString("emoji").toByteArray(StandardCharsets.UTF_8) 175 | var description: String? = null 176 | if (json.has("description")) { 177 | description = json.getString("description") 178 | } 179 | var supportsFitzpatrick = false 180 | if (json.has("supports_fitzpatrick")) { 181 | supportsFitzpatrick = json.getBoolean("supports_fitzpatrick") 182 | } 183 | val aliases = 184 | jsonArrayToStringList(json.getJSONArray("aliases")) 185 | val tags = jsonArrayToStringList(json.getJSONArray("tags")) 186 | return Emoji(description!!, supportsFitzpatrick, aliases, tags, *bytes) 187 | } 188 | 189 | private fun jsonArrayToStringList(array: JSONArray): List { 190 | val strings: MutableList = ArrayList(array.length()) 191 | for (i in 0 until array.length()) { 192 | strings.add(array.getString(i)) 193 | } 194 | return strings 195 | } 196 | } -------------------------------------------------------------------------------- /src/com/vdurmont/emoji/EmojiManager.kt: -------------------------------------------------------------------------------- 1 | package com.vdurmont.emoji 2 | 3 | import com.vdurmont.emoji.EmojiLoader.loadEmojis 4 | import com.vdurmont.emoji.EmojiTrie.Matches 5 | import java.io.IOException 6 | 7 | /** 8 | * Holds the loaded emojis and provides search functions. 9 | * 10 | * @author Improver: Ivan Ivanov [https://vk.com/irisism]

11 | * Creator: Vincent DURMONT [vdurmont@gmail.com] 12 | */ 13 | object EmojiManager { 14 | private const val PATH = "/emojis.json" 15 | private val EMOJIS_BY_ALIAS: MutableMap = HashMap() 16 | private val EMOJIS_BY_TAG: MutableMap> = HashMap() 17 | lateinit var ALL_EMOJIS: List 18 | lateinit var EMOJI_TRIE: EmojiTrie 19 | 20 | /** 21 | * Returns all the [Emoji]s for a given tag. 22 | * 23 | * @param tag the tag 24 | * 25 | * @return the associated [Emoji]s, null if the tag 26 | * is unknown 27 | */ 28 | fun getForTag(tag: String?): Set? { 29 | return if (tag == null) { 30 | null 31 | } else EMOJIS_BY_TAG[tag] 32 | } 33 | 34 | /** 35 | * Returns the [Emoji] for a given alias. 36 | * 37 | * @param alias the alias 38 | * 39 | * @return the associated [Emoji], null if the alias 40 | * is unknown 41 | */ 42 | fun getForAlias(alias: String?): Emoji? { 43 | return if (alias == null || alias.isEmpty()) { 44 | null 45 | } else EMOJIS_BY_ALIAS[trimAlias(alias)] 46 | } 47 | 48 | private fun trimAlias(alias: String): String { 49 | val len = alias.length 50 | return alias.substring( 51 | if (alias[0] == ':') 1 else 0, 52 | if (alias[len - 1] == ':') len - 1 else len 53 | ) 54 | } 55 | 56 | /** 57 | * Returns the [Emoji] for a given unicode. 58 | * 59 | * @param unicode the the unicode 60 | * 61 | * @return the associated [Emoji], null if the 62 | * unicode is unknown 63 | */ 64 | fun getByUnicode(unicode: String?): Emoji? { 65 | if (unicode == null) { 66 | return null 67 | } 68 | val res = EmojiParser.getEmojiInPosition(unicode.toCharArray(), 0) ?: return null 69 | return res.emoji 70 | } 71 | 72 | /** 73 | * Returns all the [Emoji]s 74 | * 75 | * @return all the [Emoji]s 76 | */ 77 | val all: Collection? 78 | get() = ALL_EMOJIS 79 | 80 | /** 81 | * Tests if a given String is an emoji. 82 | * 83 | * @param string the string to test 84 | * @return true if the string is an emoji's unicode, false else 85 | */ 86 | fun isEmoji(string: String?): Boolean { 87 | if (string == null) return false 88 | val chars = string.toCharArray() 89 | val result = EmojiParser.getEmojiInPosition(chars, 0) 90 | return result != null && result.emojiStartIndex == 0 && result.endIndex == chars.size 91 | } 92 | 93 | /** 94 | * Tests if a given String contains an emoji. 95 | * 96 | * @param string the string to test 97 | * @return true if the string contains an emoji's unicode, false otherwise 98 | */ 99 | fun containsEmoji(string: String?): Boolean { 100 | return if (string == null) false else EmojiParser.getNextEmoji(string.toCharArray(), 0) != null 101 | } 102 | 103 | /** 104 | * Tests if a given String only contains emojis. 105 | * 106 | * @param string the string to test 107 | * @return true if the string only contains emojis, false else 108 | */ 109 | fun isOnlyEmojis(string: String?): Boolean { 110 | return string != null && EmojiParser.removeAllEmojis(string).isEmpty() 111 | } 112 | 113 | /** 114 | * Checks if sequence of chars contain an emoji. 115 | * 116 | * @param sequence Sequence of char that may contain emoji in full or 117 | * partially. 118 | * @return <li> 119 | * Matches.EXACTLY if char sequence in its entirety is an emoji 120 | * </li> 121 | * <li> 122 | * Matches.POSSIBLY if char sequence matches prefix of an emoji 123 | * </li> 124 | * <li> 125 | * Matches.IMPOSSIBLE if char sequence matches no emoji or prefix of an 126 | * emoji 127 | * </li> 128 | */ 129 | fun isEmoji(sequence: CharArray): Matches { 130 | return EMOJI_TRIE.isEmoji(sequence) 131 | } 132 | 133 | /** 134 | * Returns all the tags in the database 135 | * 136 | * @return the tags 137 | */ 138 | val allTags: Collection 139 | get() = EMOJIS_BY_TAG.keys 140 | 141 | init { 142 | try { 143 | val stream = EmojiLoader::class.java.getResourceAsStream(PATH) 144 | val emojis = loadEmojis(stream) 145 | 146 | for (emoji in emojis) { 147 | for (tag in emoji.tags) { 148 | val tagSet = EMOJIS_BY_TAG.computeIfAbsent(tag) { k: String? -> HashSet() } as HashSet 149 | tagSet.add(emoji) 150 | } 151 | for (alias in emoji.aliases) { 152 | EMOJIS_BY_ALIAS[alias] = emoji 153 | } 154 | } 155 | EMOJI_TRIE = EmojiTrie(emojis) 156 | emojis.sortWith(java.util.Comparator { e1: Emoji, e2: Emoji -> e2.unicode.length - e1.unicode.length }) 157 | ALL_EMOJIS = emojis 158 | stream.close() 159 | } catch (e: IOException) { 160 | throw RuntimeException(e) 161 | } 162 | } 163 | } -------------------------------------------------------------------------------- /src/com/vdurmont/emoji/EmojiParser.kt: -------------------------------------------------------------------------------- 1 | package com.vdurmont.emoji; 2 | 3 | import com.vdurmont.emoji.EmojiManager.isEmoji 4 | import com.vdurmont.emoji.EmojiParser.FitzpatrickAction.* 5 | import com.vdurmont.emoji.EmojiParser.extractEmojiStrings 6 | import java.util.* 7 | 8 | /** 9 | * Provides methods to parse strings with emojis. 10 | * 11 | * @author Improver: Ivan Ivanov [https://vk.com/irisism]
12 | * Creator: Vincent DURMONT [vdurmont@gmail.com] 13 | */ 14 | object EmojiParser { 15 | 16 | /** 17 | * See {@link #parseToAliases(String, FitzpatrickAction)} with the action 18 | * "PARSE" 19 | * 20 | * @param input the string to parse 21 | * @return the string with the emojis replaced by their alias. 22 | */ 23 | fun parseToAliases(input: String): String { 24 | return parseToAliases(input, PARSE); 25 | } 26 | 27 | /** 28 | * Replaces the emoji's unicode occurrences by one of their alias 29 | * (between 2 ':').
30 | * Example: 😄 will be replaced by :smile:
31 | *
32 | * When a fitzpatrick modifier is present with a PARSE action, a "|" will be 33 | * appendend to the alias, with the fitzpatrick type.
34 | * Example: 👦🏿 will be replaced by 35 | * :boy|type_6:
36 | * The fitzpatrick types are: type_1_2, type_3, type_4, type_5, type_6
37 | *
38 | * When a fitzpatrick modifier is present with a REMOVE action, the modifier 39 | * will be deleted.
40 | * Example: 👦🏿 will be replaced by :boy:
41 | *
42 | * When a fitzpatrick modifier is present with a IGNORE action, the modifier 43 | * will be ignored.
44 | * Example: 👦🏿 will be replaced by :boy:🏿
45 | * 46 | * @param input the string to parse 47 | * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers 48 | * @return the string with the emojis replaced by their alias. 49 | */ 50 | fun parseToAliases(input: String, fitzpatrickAction: FitzpatrickAction): String { 51 | val emojiTransformer = object : EmojiTransformer { 52 | override fun transform(emoji: EmojiResult): String { 53 | when (fitzpatrickAction) { 54 | REMOVE -> 55 | return ":" + emoji.emoji.aliases.get(0) + ":"; 56 | IGNORE -> 57 | return ":" + emoji.emoji.aliases.get(0) + ":" + emoji.fitzpatrickUnicode; 58 | 59 | /*FitzpatrickAction.PARSE*/ else -> 60 | if (emoji.hasFitzpatrick()) { 61 | return ":" + emoji.emoji.aliases.get(0) + "|" + emoji.fitzpatrickType + ":"; 62 | } else { 63 | return ":" + emoji.emoji.aliases.get(0) + ":"; 64 | } 65 | } 66 | } 67 | }; 68 | 69 | return parseFromUnicode(input, emojiTransformer); 70 | } 71 | 72 | /** 73 | * Replace all emojis with character 74 | * 75 | * @param str the string to process 76 | * @param replacementString replacement the string that will replace all the emojis 77 | * @return the string with replaced character 78 | */ 79 | fun replaceAllEmojis(str: String, replacementString: String): String { 80 | val emojiTransformer: EmojiTransformer = object : EmojiTransformer { 81 | override fun transform(emoji: EmojiResult): String { 82 | return replacementString; 83 | } 84 | }; 85 | 86 | return parseFromUnicode(str, emojiTransformer); 87 | } 88 | 89 | 90 | /** 91 | * Replaces the emoji's aliases (between 2 ':') occurrences and the html 92 | * representations by their unicode.
93 | * Examples:
94 | * :smile: will be replaced by 😄
95 | * &#128516; will be replaced by 😄
96 | * :boy|type_6: will be replaced by 👦🏿 97 | * 98 | * @param input the string to parse 99 | * @return the string with the aliases and html representations replaced by 100 | * their unicode. 101 | */ 102 | fun parseToUnicode(input: String): String? { 103 | val sb = StringBuilder(input.length) 104 | var last = 0 105 | while (last < input.length) { 106 | var alias: AliasCandidate? = getAliasAt(input, last) 107 | if (alias == null) { 108 | alias = getHtmlEncodedEmojiAt(input, last) 109 | } 110 | if (alias != null) { 111 | sb.append(alias.emoji.unicode) 112 | last = alias.endIndex 113 | if (alias.fitzpatrick != null) { 114 | sb.append(alias.fitzpatrick!!.unicode) 115 | } 116 | } else { 117 | sb.append(input[last]) 118 | } 119 | last++ 120 | } 121 | return sb.toString() 122 | } 123 | 124 | /** 125 | * Finds the alias in the given string starting at the given point, null otherwise 126 | */ 127 | private fun getAliasAt(input: String, start: Int): AliasCandidate? { 128 | if (input.length < start + 2 || input[start] != ':') return null // Aliases start with : 129 | val aliasEnd = input.indexOf(':', start + 2) // Alias must be at least 1 char in length 130 | if (aliasEnd == -1) return null // No alias end found 131 | val fitzpatrickStart = input.indexOf('|', start + 2) 132 | if (fitzpatrickStart != -1 && fitzpatrickStart < aliasEnd) { 133 | val emoji = EmojiManager.getForAlias(input.substring(start, fitzpatrickStart)) ?: return null 134 | // Not a valid alias 135 | if (!emoji.supportsFitzpatrick()) return null // Fitzpatrick was specified, but the emoji does not support it 136 | val fitzpatrick = 137 | Fitzpatrick.fitzpatrickFromType(input.substring(fitzpatrickStart + 1, aliasEnd)) 138 | return AliasCandidate(emoji, fitzpatrick, start, aliasEnd) 139 | } 140 | val emoji = EmojiManager.getForAlias(input.substring(start, aliasEnd)) ?: return null 141 | // Not a valid alias 142 | return AliasCandidate(emoji, null, start, aliasEnd) 143 | } 144 | 145 | /** 146 | * Finds the HTML encoded emoji in the given string starting at the given point, null otherwise 147 | */ 148 | private fun getHtmlEncodedEmojiAt(input: String, start: Int): AliasCandidate? { 149 | if (input.length < start + 4 || input[start] != '&' || input[start + 1] != '#') return null 150 | var longestEmoji: Emoji? = null 151 | var longestCodePointEnd = -1 152 | val chars = CharArray(EmojiManager.EMOJI_TRIE.maxDepth) 153 | var charsIndex = 0 154 | var codePointStart = start 155 | do { 156 | val codePointEnd = 157 | input.indexOf(';', codePointStart + 3) // Code point must be at least 1 char in length 158 | if (codePointEnd == -1) break 159 | charsIndex += try { 160 | val radix = if (input[codePointStart + 2] == 'x') 16 else 10 161 | val codePoint = input.substring(codePointStart + 2 + radix / 16, codePointEnd).toInt(radix) 162 | Character.toChars(codePoint, chars, charsIndex) 163 | } catch (e: IllegalArgumentException) { 164 | break 165 | } 166 | val foundEmoji = EmojiManager.EMOJI_TRIE.getEmoji(chars, 0, charsIndex) 167 | if (foundEmoji != null) { 168 | longestEmoji = foundEmoji 169 | longestCodePointEnd = codePointEnd 170 | } 171 | codePointStart = codePointEnd + 1 172 | } while (input.length > codePointStart + 4 && input[codePointStart] == '&' && input[codePointStart + 1] == '#' && charsIndex < chars.size && 173 | !EmojiManager.EMOJI_TRIE.isEmoji(chars, 0, charsIndex).impossibleMatch() 174 | ) 175 | return if (longestEmoji == null) null else AliasCandidate(longestEmoji, null, start, longestCodePointEnd) 176 | } 177 | 178 | /** 179 | * See [.parseToHtmlDecimal] with the action 180 | * "PARSE" 181 | * 182 | * @param input the string to parse 183 | * @return the string with the emojis replaced by their html decimal 184 | * representation. 185 | */ 186 | fun parseToHtmlDecimal(input: String): String? { 187 | return parseToHtmlDecimal(input, PARSE) 188 | } 189 | 190 | /** 191 | * Replaces the emoji's unicode occurrences by their html representation.
192 | * Example: 😄 will be replaced by &#128516;
193 | *
194 | * When a fitzpatrick modifier is present with a PARSE or REMOVE action, the 195 | * modifier will be deleted from the string.
196 | * Example: 👦🏿 will be replaced by 197 | * &#128102;
198 | *
199 | * When a fitzpatrick modifier is present with a IGNORE action, the modifier 200 | * will be ignored and will remain in the string.
201 | * Example: 👦🏿 will be replaced by 202 | * &#128102;🏿 203 | * 204 | * @param input the string to parse 205 | * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers 206 | * @return the string with the emojis replaced by their html decimal 207 | * representation. 208 | */ 209 | fun parseToHtmlDecimal(input: String, fitzpatrickAction: FitzpatrickAction): String { 210 | val emojiTransformer = object : EmojiTransformer { 211 | override fun transform(emoji: EmojiResult): String { 212 | return when (fitzpatrickAction) { 213 | PARSE, REMOVE -> emoji.emoji.getHtmlDecimal(); 214 | IGNORE -> emoji.emoji.getHtmlDecimal() + 215 | emoji.fitzpatrickUnicode; 216 | }; 217 | } 218 | }; 219 | 220 | return parseFromUnicode(input, emojiTransformer); 221 | } 222 | 223 | /** 224 | * See {@link #parseToHtmlHexadecimal(String, FitzpatrickAction)} with the 225 | * action "PARSE" 226 | * 227 | * @param input the string to parse 228 | * @return the string with the emojis replaced by their html hex 229 | * representation. 230 | */ 231 | fun parseToHtmlHexadecimal(input: String): String { 232 | return parseToHtmlHexadecimal(input, PARSE); 233 | } 234 | 235 | /** 236 | * Replaces the emoji's unicode occurrences by their html hex 237 | * representation.
238 | * Example: 👦 will be replaced by &#x1f466;
239 | *
240 | * When a fitzpatrick modifier is present with a PARSE or REMOVE action, the 241 | * modifier will be deleted.
242 | * Example: 👦🏿 will be replaced by 243 | * &#x1f466;
244 | *
245 | * When a fitzpatrick modifier is present with a IGNORE action, the modifier 246 | * will be ignored and will remain in the string.
247 | * Example: 👦🏿 will be replaced by 248 | * &#x1f466;🏿 249 | * 250 | * @param input the string to parse 251 | * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers 252 | * @return the string with the emojis replaced by their html hex 253 | * representation. 254 | */ 255 | fun parseToHtmlHexadecimal(input: String,fitzpatrickAction: FitzpatrickAction): String { 256 | val emojiTransformer = object : EmojiTransformer { 257 | override fun transform(unicodeCandidate: EmojiResult): String { 258 | return when (fitzpatrickAction) { 259 | PARSE, REMOVE -> unicodeCandidate.emoji.getHtmlHexadecimal(); 260 | IGNORE -> unicodeCandidate.emoji.getHtmlHexadecimal() + 261 | unicodeCandidate.fitzpatrickUnicode; 262 | }; 263 | } 264 | }; 265 | 266 | return parseFromUnicode(input, emojiTransformer); 267 | } 268 | 269 | /** 270 | * Removes all emojis from a String 271 | * 272 | * @param str the string to process 273 | * @return the string without any emoji 274 | */ 275 | fun removeAllEmojis(str: String): String { 276 | val emojiTransformer = object : EmojiTransformer { 277 | override fun transform(emoji: EmojiResult): String { 278 | return ""; 279 | } 280 | }; 281 | 282 | return parseFromUnicode(str, emojiTransformer); 283 | } 284 | 285 | 286 | /** 287 | * Removes a set of emojis from a String 288 | * 289 | * @param str the string to process 290 | * @param emojisToRemove the emojis to remove from this string 291 | * @return the string without the emojis that were removed 292 | */ 293 | fun removeEmojis(str: String, emojisToRemove: Collection):String { 294 | val emojiTransformer = object : EmojiTransformer { 295 | override fun transform(emoji: EmojiResult): String { 296 | if (!emojisToRemove.contains(emoji.emoji)) { 297 | return emoji.emoji.unicode + 298 | emoji.fitzpatrickUnicode; 299 | } 300 | return ""; 301 | } 302 | }; 303 | 304 | return parseFromUnicode(str, emojiTransformer); 305 | } 306 | 307 | /** 308 | * Removes all the emojis in a String except a provided set 309 | * 310 | * @param str the string to process 311 | * @param emojisToKeep the emojis to keep in this string 312 | * @return the string without the emojis that were removed 313 | */ 314 | fun removeAllEmojisExcept(str: String, emojisToKeep: Collection): String { 315 | val emojiTransformer = object : EmojiTransformer { 316 | override fun transform(emoji: EmojiResult): String { 317 | if (emojisToKeep.contains(emoji.emoji)) { 318 | return emoji.emoji.unicode + 319 | emoji.fitzpatrickUnicode; 320 | } 321 | return ""; 322 | } 323 | }; 324 | 325 | return parseFromUnicode(str, emojiTransformer); 326 | } 327 | 328 | 329 | /** 330 | * Detects all unicode emojis in input string and replaces them with the 331 | * return value of transformer.transform() 332 | * 333 | * @param input the string to process 334 | * @param transformer emoji transformer to apply to each emoji 335 | * @return input string with all emojis transformed 336 | */ 337 | fun parseFromUnicode(input: String, transformer: EmojiTransformer): String { 338 | var prev = 0; 339 | val sb = StringBuilder(input.length); 340 | val replacements = getEmojies(input); 341 | for (candidate in replacements) { 342 | sb.append(input, prev, candidate.emojiStartIndex); 343 | 344 | sb.append(transformer.transform(candidate)); 345 | prev = candidate.endIndex; 346 | } 347 | 348 | return sb.append(input.substring(prev)).toString(); 349 | } 350 | 351 | /*fun extractEmojiStrings(input: String?): List? { 352 | return extractEmojiStrings(input, 0) 353 | }*/ 354 | 355 | fun extractEmojiStrings(input: String, limit: Int = 0): List? { 356 | val items = extractEmojis(input, limit) 357 | val result: MutableList = ArrayList(items.size) 358 | for (i in items) { 359 | result.add(i.toString()) 360 | } 361 | return result 362 | } 363 | 364 | /*fun extractEmojis(input: String): List? { 365 | return getEmojies(input, 0) 366 | }*/ 367 | 368 | fun extractEmojis(input: String, limit: Int = 0): List { 369 | return getEmojies(input, limit) 370 | } 371 | 372 | /** 373 | * Generates a list UnicodeCandidates found in input string. A 374 | * UnicodeCandidate is created for every unicode emoticon found in input 375 | * string, additionally if Fitzpatrick modifier follows the emoji, it is 376 | * included in UnicodeCandidate. Finally, it contains start and end index of 377 | * unicode emoji itself (WITHOUT Fitzpatrick modifier whether it is there or 378 | * not!). 379 | * 380 | * @param input String to find all unicode emojis in 381 | * @return List of UnicodeCandidates for each unicode emote in text 382 | */ 383 | fun getEmojies(input: String, limit: Int): List { 384 | var limit = limit 385 | val inputCharArray = input.toCharArray() 386 | val candidates: MutableList = ArrayList() 387 | var next: EmojiResult? 388 | var i = 0 389 | while (getNextEmoji(inputCharArray, i).also { next = it } != null) { 390 | next!! 391 | candidates.add(next!!) 392 | if (limit != 0) { 393 | limit-- 394 | if (limit <= 0) break 395 | } 396 | i = next!!.endIndex 397 | } 398 | return candidates 399 | } 400 | 401 | fun getEmojies(input: String): List { 402 | return getEmojies(input, 0) 403 | } 404 | 405 | /** 406 | * Finds the next UnicodeCandidate after a given starting index 407 | * 408 | * @param chars char array to find UnicodeCandidate in 409 | * @param start starting index for search 410 | * @return the next UnicodeCandidate or null if no UnicodeCandidate is found after start index 411 | */ 412 | fun getNextEmoji(chars: CharArray, start: Int): EmojiResult? { 413 | for (i in start until chars.size) { 414 | val emoji = getEmojiInPosition(chars, i); 415 | if (emoji != null) 416 | return emoji; 417 | } 418 | 419 | return null; 420 | } 421 | 422 | fun getEmojiInPosition(chars: CharArray, start: Int): EmojiResult? { 423 | val emoji = getBestBaseEmoji(chars, start); 424 | if (emoji == null) 425 | return null; 426 | 427 | var fitzpatrick: Fitzpatrick? = null; 428 | var gender: Gender? = null; 429 | var endPos = start + emoji.unicode.length; 430 | if (emoji.supportsFitzpatrick) { 431 | fitzpatrick = Fitzpatrick.find(chars, endPos); 432 | if (fitzpatrick != null) { 433 | endPos += 2; 434 | } 435 | val gg = findGender(chars, endPos); 436 | if (gg != null) { 437 | endPos = gg.endPos + 1; 438 | gender = gg.gender; 439 | } 440 | } 441 | 442 | if (chars.size > endPos) { 443 | val ch = chars[endPos]; 444 | if (ch == '\uFE0F') 445 | endPos++; 446 | } 447 | return EmojiResult(emoji, fitzpatrick, gender, chars, start, endPos); 448 | } 449 | 450 | private fun findGender(chars: CharArray, startPos: Int): GenderMatch? { 451 | val len = chars.size; 452 | if (len <= startPos) 453 | return null; 454 | var pos = startPos; 455 | val ch = chars[pos]; 456 | if (ch != '\u200D') 457 | return null; 458 | pos++; 459 | val gender = Gender.find(chars, pos) ?: return null; 460 | return GenderMatch(gender, pos); 461 | } 462 | 463 | private class GenderMatch(val gender: Gender?, val endPos: Int) 464 | 465 | 466 | /** 467 | * Returns end index of a unicode emoji if it is found in text starting at 468 | * index startPos, -1 if not found. 469 | * This returns the longest matching emoji, for example, in 470 | * "\uD83D\uDC68\u200D\uD83D\uDC69\u200D\uD83D\uDC66" 471 | * it will find alias:family_man_woman_boy, NOT alias:man 472 | * 473 | * @param text the current text where we are looking for an emoji 474 | * @param startPos the position in the text where we should start looking for 475 | * an emoji end 476 | * @return the end index of the unicode emoji starting at startPos. -1 if not 477 | * found 478 | */ 479 | fun getBestBaseEmoji(text: CharArray, startPos: Int): Emoji? { 480 | return EmojiManager.EMOJI_TRIE.getBestEmoji(text, startPos); 481 | } 482 | 483 | 484 | class EmojiResult( 485 | val emoji: Emoji, 486 | val fitzpatrick: Fitzpatrick?, 487 | val gender: Gender?, 488 | val source: CharArray, 489 | val emojiStartIndex: Int, 490 | val endIndex: Int 491 | ) { 492 | 493 | fun hasFitzpatrick(): Boolean { 494 | return fitzpatrick != null 495 | } 496 | 497 | val fitzpatrickType: String 498 | get() = if (hasFitzpatrick()) fitzpatrick!!.name else "" 499 | 500 | val fitzpatrickUnicode: String 501 | get() = if (hasFitzpatrick()) fitzpatrick!!.unicode else "" 502 | 503 | val emojiEndIndex: Int 504 | get() = emojiStartIndex + emoji.unicode.length 505 | 506 | val fitzpatrickEndIndex: Int 507 | get() = emojiEndIndex + if (fitzpatrick != null) 2 else 0 508 | 509 | private var sub: String? = null 510 | 511 | override fun toString(): String { 512 | if (sub != null) return sub!! 513 | val len = endIndex - emojiStartIndex 514 | val sub = CharArray(len) 515 | System.arraycopy(source, emojiStartIndex, sub, 0, len) 516 | this.sub = String(sub) 517 | return this.sub!! 518 | } 519 | 520 | } 521 | 522 | 523 | private class AliasCandidate ( 524 | val emoji: Emoji, 525 | val fitzpatrick: Fitzpatrick?, 526 | val startIndex: Int, 527 | val endIndex: Int 528 | ) 529 | 530 | /** 531 | * Enum used to indicate what should be done when a Fitzpatrick modifier is 532 | * found. 533 | */ 534 | enum class FitzpatrickAction { 535 | /** 536 | * Tries to match the Fitzpatrick modifier with the previous emoji 537 | */ 538 | PARSE, 539 | 540 | /** 541 | * Removes the Fitzpatrick modifier from the string 542 | */ 543 | REMOVE, 544 | 545 | /** 546 | * Ignores the Fitzpatrick modifier (it will stay in the string) 547 | */ 548 | IGNORE 549 | } 550 | 551 | interface EmojiTransformer { 552 | fun transform(emoji: EmojiResult): String 553 | } 554 | 555 | 556 | } 557 | 558 | fun main() { 559 | val text = 560 | "\uD83D\uDC68\u200D\uD83D\uDCBB\uD83E\uDDB9\uD83C\uDFFE\uD83E\uDDD1\uD83C\uDFFD\u200D\uD83D\uDD2C\uD83E\uDDD1\uD83C\uDFFB\u200D\uD83C\uDF73\uD83D\uDC70\uD83C\uDFFE\uD83E\uDDDB\uD83C\uDFFD\u200D♂️\uD83E\uDD31\uD83C\uDFFF\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFEB\uD83E\uDDD1\uD83C\uDFFB\u200D\uD83C\uDF73\uD83E\uDDD1\uD83C\uDFFB\u200D\uD83C\uDF73\uD83D\uDC73\uD83C\uDFFB\u200D♂️" 561 | val items = extractEmojiStrings(text) 562 | println(items) 563 | val res = isEmoji("\uD83E\uDDDB\uD83C\uDFFD\u200D♂️ ") 564 | println(res) 565 | } -------------------------------------------------------------------------------- /src/com/vdurmont/emoji/EmojiTrie.kt: -------------------------------------------------------------------------------- 1 | package com.vdurmont.emoji 2 | 3 | /** 4 | * 5 | * @author Improver: Ivan Ivanov [https://vk.com/irisism]

6 | * Creator: Vincent DURMONT [vdurmont@gmail.com] 7 | */ 8 | class EmojiTrie(emojis: Collection) { 9 | private val root = Node() 10 | val maxDepth: Int 11 | 12 | /** 13 | * Checks if sequence of chars contain an emoji. 14 | * 15 | * @param sequence Sequence of char that may contain emoji in full or 16 | * partially. 17 | * @return <li> 18 | * Matches.EXACTLY if char sequence in its entirety is an emoji 19 | * </li> 20 | * <li> 21 | * Matches.POSSIBLY if char sequence matches prefix of an emoji 22 | * </li> 23 | * <li> 24 | * Matches.IMPOSSIBLE if char sequence matches no emoji or prefix of an 25 | * emoji 26 | * </li> 27 | */ 28 | fun isEmoji(sequence: CharArray): Matches { 29 | return isEmoji(sequence, 0, sequence.size) 30 | } 31 | 32 | /** 33 | * Checks if the sequence of chars within the given bound indices contain an emoji. 34 | * 35 | * @see .isEmoji 36 | */ 37 | fun isEmoji(sequence: CharArray, start: Int, end: Int): Matches { 38 | if (start < 0 || start > end || end > sequence.size) { 39 | throw ArrayIndexOutOfBoundsException("start " + start + ", end " + end + ", length " + sequence.size) 40 | } 41 | var tree: Node = root 42 | for (i in start until end) { 43 | if (!tree.hasChild(sequence[i])) { 44 | return Matches.IMPOSSIBLE 45 | } 46 | tree = tree.getChild(sequence[i])?: return Matches.IMPOSSIBLE 47 | 48 | } 49 | return if (tree.isEndOfEmoji) Matches.EXACTLY else Matches.POSSIBLY 50 | } 51 | 52 | fun getBestEmoji(sequence: CharArray, start: Int): Emoji? { 53 | if (start < 0) { 54 | throw ArrayIndexOutOfBoundsException("start " + start + ", length " + sequence.size) 55 | } 56 | val end = sequence.size 57 | var tree: Node = root 58 | for (i in start until end) { 59 | if (!tree.hasChild(sequence[i])) { 60 | return if (tree.isEndOfEmoji) tree.emoji else null 61 | } 62 | tree = tree.getChild(sequence[i])?: return null 63 | } 64 | return if (tree.isEndOfEmoji) tree.emoji else null 65 | } 66 | 67 | /** 68 | * Finds Emoji instance from emoji unicode 69 | * 70 | * @param unicode unicode of emoji to get 71 | * @return Emoji instance if unicode matches and emoji, null otherwise. 72 | */ 73 | fun getEmoji(unicode: String): Emoji? { 74 | return getEmoji(unicode.toCharArray(), 0, unicode.length) 75 | } 76 | 77 | fun getEmoji(sequence: CharArray, start: Int, end: Int): Emoji? { 78 | if (start < 0 || start > end || end > sequence.size) { 79 | throw ArrayIndexOutOfBoundsException( 80 | "start " + start + ", end " + end + ", length " + sequence.size 81 | ) 82 | } 83 | var tree: Node = root 84 | for (i in 0 until end) { 85 | if (!tree.hasChild(sequence[i])) { 86 | return null 87 | } 88 | tree = tree.getChild(sequence[i])?: return null 89 | } 90 | return tree.emoji 91 | } 92 | 93 | enum class Matches { 94 | EXACTLY, POSSIBLY, IMPOSSIBLE; 95 | 96 | fun exactMatch(): Boolean { 97 | return this == EXACTLY 98 | } 99 | 100 | fun impossibleMatch(): Boolean { 101 | return this == IMPOSSIBLE 102 | } 103 | } 104 | 105 | private class Node { 106 | private val children: MutableMap = HashMap() 107 | var emoji: Emoji? = null 108 | set(emoji) { 109 | field = emoji 110 | } 111 | 112 | fun hasChild(child: Char): Boolean { 113 | return children.containsKey(child) 114 | } 115 | 116 | fun addChild(child: Char) { 117 | children[child] = Node() 118 | } 119 | 120 | fun getChild(child: Char): Node? { 121 | return children[child] 122 | } 123 | 124 | val isEndOfEmoji: Boolean 125 | get() = emoji != null 126 | } 127 | 128 | init { 129 | var maxDepth = 0 130 | for (emoji in emojis) { 131 | var tree: Node = root 132 | val chars = emoji.unicode.toCharArray() 133 | maxDepth = Math.max(maxDepth, chars.size) 134 | for (c in chars) { 135 | if (!tree.hasChild(c)) { 136 | tree.addChild(c) 137 | } 138 | tree = tree.getChild(c)?: break 139 | } 140 | tree.emoji = emoji 141 | } 142 | this.maxDepth = maxDepth 143 | } 144 | } -------------------------------------------------------------------------------- /src/com/vdurmont/emoji/Fitzpatrick.kt: -------------------------------------------------------------------------------- 1 | package com.vdurmont.emoji 2 | 3 | /** 4 | * Enum that represents the Fitzpatrick modifiers supported by the emojis. 5 | * @author Improver: Ivan Ivanov [https://vk.com/irisism]

6 | * Creator: Vincent DURMONT [vdurmont@gmail.com] 7 | */ 8 | enum class Fitzpatrick( 9 | /** 10 | * The unicode representation of the Fitzpatrick modifier 11 | */ 12 | val unicode: String 13 | ) { 14 | /** 15 | * Fitzpatrick modifier of type 1/2 (pale white/white) 16 | */ 17 | TYPE_1_2("\uD83C\uDFFB"), 18 | 19 | /** 20 | * Fitzpatrick modifier of type 3 (cream white) 21 | */ 22 | TYPE_3("\uD83C\uDFFC"), 23 | 24 | /** 25 | * Fitzpatrick modifier of type 4 (moderate brown) 26 | */ 27 | TYPE_4("\uD83C\uDFFD"), 28 | 29 | /** 30 | * Fitzpatrick modifier of type 5 (dark brown) 31 | */ 32 | TYPE_5("\uD83C\uDFFE"), 33 | 34 | /** 35 | * Fitzpatrick modifier of type 6 (black) 36 | */ 37 | TYPE_6("\uD83C\uDFFF"); 38 | 39 | companion object { 40 | fun fitzpatrickFromUnicode(unicode: String): Fitzpatrick? { 41 | for (v in values()) { 42 | if (v.unicode == unicode) { 43 | return v 44 | } 45 | } 46 | return null 47 | } 48 | 49 | fun fitzpatrickFromType(type: String): Fitzpatrick? { 50 | return try { 51 | valueOf(type.toUpperCase()) 52 | } catch (e: IllegalArgumentException) { 53 | null 54 | } 55 | } 56 | 57 | fun find(chars: CharArray, start: Int): Fitzpatrick? { 58 | if (chars.size < start + 1) return null 59 | var ch = chars[start] 60 | if (ch != '\uD83C') return null 61 | ch = chars[start + 1] 62 | when (ch) { 63 | '\uDFFB' -> return TYPE_1_2 64 | '\uDFFC' -> return TYPE_3 65 | '\uDFFD' -> return TYPE_4 66 | '\uDFFE' -> return TYPE_5 67 | '\uDFFF' -> return TYPE_6 68 | } 69 | return null 70 | } 71 | } 72 | 73 | } -------------------------------------------------------------------------------- /src/com/vdurmont/emoji/Gender.kt: -------------------------------------------------------------------------------- 1 | package com.vdurmont.emoji 2 | 3 | /** 4 | * Создано 07.07.2020 5 | * @author Improver: Ivan Ivanov [https://vk.com/irisism] 6 | */ 7 | enum class Gender(val unicode: String) { 8 | MALE("♂️"), FEMALE("♀️"); 9 | 10 | companion object { 11 | fun genderFromUnicode(unicode: String): Gender? { 12 | for (v in values()) { 13 | if (v.unicode == unicode) { 14 | return v 15 | } 16 | } 17 | return null 18 | } 19 | 20 | fun genderFromType(type: String): Gender? { 21 | return try { 22 | valueOf(type.toUpperCase()) 23 | } catch (e: IllegalArgumentException) { 24 | null 25 | } 26 | } 27 | 28 | fun find(chars: CharArray, startPos: Int): Gender? { 29 | if (startPos >= chars.size) return null 30 | val ch = chars[startPos] 31 | when (ch) { 32 | '♂' -> return MALE 33 | '♀' -> return FEMALE 34 | } 35 | return null 36 | } 37 | } 38 | 39 | } --------------------------------------------------------------------------------