├── CHANGELOG.md
├── README.md
├── resources
└── emojis.json
└── src
└── com
└── vdurmont
└── emoji
├── Emoji.kt
├── EmojiLoader.kt
├── EmojiManager.kt
├── EmojiParser.kt
├── EmojiTrie.kt
├── Fitzpatrick.kt
└── Gender.kt
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | ## v6.0i
4 |
5 | - Ivan Ivanov forked main repository
6 | - Added class `Gender`
7 | - Added understanding of complex emojies: `{basic_emoji}{skin_color}?{gender}?`
8 | - Added support of bugged VK.COM web-version emojis, where ending char `\uFE0F` is absent
9 | - `EmojiParser.UnicodeCandidate` renamed to `EmojiParser.EmojiResult`. And added new fields
10 | - `EmojiParser` algorithm improvement. `getEmojiEndPos` is replaced with `getNextEmoji`
11 | - `EmojiTrie.getBestEmoji` introduced to replace slow `EmojiTrie.getEmojiEndPos` method
12 | - Memory usage fixes by optimizing generation of new String objects.
13 | - As a result speed improvement reaches up to 5x times comparing to v.5.1.1
14 |
15 | ## v5.1.1
16 |
17 | - Bugfix: respect fitzpatrick modifier when extracting emojis (thanks @sullis)
18 |
19 | ## v5.1.0
20 |
21 | - Many performance improvements to the parsing of the emojis (thanks @freva)
22 | - Add a `containsEmoji` function (thanks @freva!)
23 |
24 | ## v5.0.0
25 |
26 | - Fix the HTML to Unicode parser to always parse the longer emojis (thanks @freva)
27 | - Add alias for "pumpkin" (thanks @sullis)
28 | - Add a lot of missing flag emojis (thanks @ankitkariryaa)
29 | - Support for all emojis from Unicode 11.0
30 | - Support for all emojis from Unicode 10.0
31 | - Add a `EmojiParser.replaceAllEmojis` function (thanks @cbedoy)
32 |
33 | ## v4.0.0
34 |
35 | - Add "source code" strings to emoji json database
36 | - Fix some missing/out-of-order code points (thanks @BillyGalbreath)
37 | - Upgrade `org.json:json` dependency (thanks @PhotonQyv)
38 | - Update README with new emojis (thanks @jtobard)
39 |
40 | ## v3.3.0
41 |
42 | - Add `family_man_woman_girl_boy` emoji (thanks @freva)
43 | - Fix `EmojiManager.isEmoji` to support fitzpatrick modifiers (thanks @freva)
44 | - Fixed several emojis that had the wrong `support_fitzpatrick` flag (thanks @Tapchicoma)
45 | - Add some tests to avoid duplicate aliases
46 | - Fixed several duplicated aliases in the emoji DB
47 |
48 | ## v3.2.0
49 |
50 | - Fixed Poland flag (thanks @Sheigutn)
51 | - Improvements to the smile emojis (thanks @miquelbeltran)
52 | - Add a bunch of emojis from Apple iOS 10.2 release
53 | - Fix some missing fitzpatrick modifiers
54 | - Add an `EmojiManager.isOnlyEmojis()` method
55 |
56 | ## v3.1.3
57 |
58 | - Removed all variance selectors from the JSON database. Thanks @roberterdin !
59 |
60 | ## v3.1.2
61 |
62 | - Additions and updates to the emoji database (victory hand now supports fitzpatrick, adds Saint Vincent Grenadines' flag, add the regional indicator symbols). Thanks @lologist !
63 | - Force the database to be loaded in UTF-8.
64 | - Enable the extension of the `EmojiParser` class.
65 |
66 | ## v3.1.1
67 |
68 | - Add the ability to provide a custom `EmojiTransformer` that will enable developers to add their custom emoji replacement methods. Thanks @freva !
69 |
70 | ## v3.1.0
71 |
72 | - Add fitzpatrick support for 👃 ("nose") and 👂 ("ear")
73 | - Fix duplicated "sunglasses" alias
74 | - Performance improvements (using a Trie structure)
75 | - Parsing support for multiple emojis (such as "family_man_woman_boy")
76 | - Fix `EmojiManager.getAll()` that returned some duplicates
77 | - Use a BufferedReader to load the database
78 |
79 | ## v3.0.0
80 |
81 | Update the emoji database to support the additions of iOS 9.1
82 |
83 | ## v2.2.1
84 |
85 | Fix the `htmlDec` and `htmlHex` codes for the multiple emojis (such as `family (man, man, girl, boy)`)
86 |
87 | ## v2.2.0
88 |
89 | Rollback dependency org.json:json to 20140107 to keep the compatibility with Java 6 & 7
90 |
91 | ## v2.1.0
92 |
93 | - Add methods:
94 | - `EmojiParser#removeAllEmojis(String)`
95 | - `EmojiParser#removeAllEmojisExcept(String, Collection)`
96 | - `EmojiParser#removeEmojis(String, Collection)`
97 | - Upgrade dependency org.json:json
98 |
99 | ## v2.0.1
100 |
101 | Bug fix on the :-1: emoji
102 |
103 | ## v2.0.0
104 |
105 | - Update of the emoji database
106 | - Add 14 new family emojis (man_man_boy, woman_woman_girl, etc.)
107 | - Add 4 new couple emojis
108 | - Add the "vulcan_salute" and "middle_finger" emojis
109 | - Add 198 flags
110 | - Addition of the support for the diversity emojis (Fitzpatrick modifiers)
111 | - Removal of the deprecated methods `Emoji#getHtml` and `EmojiParser#parseToHtml`
112 | - Improvements in the javadoc
113 |
114 | ## v1.1.1
115 |
116 | Closing the stream used to read the emoji database in `EmojiManager.java`
117 |
118 | ## v1.1.0
119 |
120 | - Update of the emoji database
121 | - Adding support for HTML hexadecimal:
122 | - `Emoji#getHtmlHexadecimal`
123 | - `EmojiParser#parseToHtmlHexadecimal`
124 | - The old HTML support is now HTML decimal:
125 | - Deprecating `Emoji#getHtml` (replaced by `Emoji#getHtmlDecimal`)
126 | - Deprecating `EmojiParser#parseToHtml` (replaced by `EmojiParser#parseToHtmlDecimal`)
127 |
128 | ## v1.0.1
129 |
130 | Bug fix on the :+1: emoji
131 |
132 | ## v1.0.0
133 |
134 | First release.
135 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # iris:emoji-kotlin
2 |
3 | [](https://github.com/vdurmont/emoji-java/blob/master/LICENSE.md)
4 |
5 | _The missing emoji library for Kotlin._
6 |
7 | **[iris:emoji-kotlin](https://github.com/iris2iris/iris-emoji-kotlin)** is a lightweight Kotlin library that helps you use Emojis in your java applications.
8 |
9 | It is copy of my Java repo **[iris:emoji-java](https://github.com/iris2iris/iris-emoji-java)**.
10 |
11 | And it in its turn was forked from **[emoji-java](https://github.com/vdurmont/emoji-java)**
12 |
13 | #### Reasons I forked it:
14 |
15 | - ❗️ The most important reason I forked it was I have found how to improve its __speed up to 5x__ times!
16 | - My repo supports complex emojis of combination `{basic_emoji}{skin_color}?{gender}?`
17 | - It supports of bugged VK.COM web-version emojis, where ending char `\uFE0F` is absent
18 |
19 | And of course, extend useful methods.
20 |
21 | ## How to get it?
22 |
23 | ##### Via Direct Download:
24 |
25 | - Use [releases](https://github.com/iris2iris/iris-emoji-kotlin/releases) tab to download the jar directly.
26 | - Download JSON-java dependency from http://mvnrepository.com/artifact/org.json/json.
27 |
28 | ## How to use it?
29 |
30 | ### EmojiManager
31 |
32 | The `EmojiManager` provides several static methods to search through the emojis database:
33 |
34 | - `getForTag` returns all the emojis for a given tag
35 | - `getForAlias` returns the emoji for an alias
36 | - `getAll` returns all the emojis
37 | - `isEmoji` checks if a string is an emoji
38 | - `containsEmoji` checks if a string contains any emoji
39 |
40 | You can also query the metadata:
41 |
42 | - `getAllTags` returns the available tags
43 |
44 | Or get everything:
45 |
46 | - `getAll` returns all the emojis
47 |
48 | ### Emoji model
49 |
50 | An `Emoji` is a POJO (plain old java object), which provides the following methods:
51 |
52 | - `getUnicode` returns the unicode representation of the emoji
53 | - `getUnicode(Fitzpatrick)` returns the unicode representation of the emoji with the provided Fitzpatrick modifier. If the emoji doesn't support the Fitzpatrick modifiers, this method will throw an `UnsupportedOperationException`. If the provided Fitzpatrick is null, this method will return the unicode of the emoji.
54 | - `getDescription` returns the (optional) description of the emoji
55 | - `getAliases` returns a list of aliases for this emoji
56 | - `getTags` returns a list of tags for this emoji
57 | - `getHtmlDecimal` returns an html decimal representation of the emoji
58 | - `getHtmlHexadecimal` returns an html decimal representation of the emoji
59 | - `supportsFitzpatrick` returns true if the emoji supports the Fitzpatrick modifiers, else false
60 |
61 | ### Fitzpatrick modifiers
62 |
63 | Some emojis now support the use of Fitzpatrick modifiers that gives the choice between 5 shades of skin tones:
64 |
65 | | Modifier | Type |
66 | | :------: | -------- |
67 | | 🏻 | type_1_2 |
68 | | 🏼 | type_3 |
69 | | 🏽 | type_4 |
70 | | 🏾 | type_5 |
71 | | 🏿 | type_6 |
72 |
73 | We defined the format of the aliases including a Fitzpatrick modifier as:
74 |
75 | ```
76 | :ALIAS|TYPE:
77 | ```
78 |
79 | A few examples:
80 |
81 | ```
82 | :boy|type_1_2:
83 | :swimmer|type_4:
84 | :santa|type_6:
85 | ```
86 |
87 | ### EmojiParser
88 |
89 | #### To unicode
90 |
91 | To replace all the aliases and the html representations found in a string by their unicode, use `EmojiParser#parseToUnicode(String)`.
92 |
93 | For example:
94 |
95 | ```kotlin
96 | val str = "An :grinning:awesome :smiley:string 😄with a few :wink:emojis!"
97 | val result = EmojiParser.parseToUnicode(str)
98 | println(result)
99 | // Prints:
100 | // "An 😀awesome 😃string 😄with a few 😉emojis!"
101 | ```
102 |
103 | #### To aliases
104 |
105 | To replace all the emoji's unicodes found in a string by their aliases, use `EmojiParser#parseToAliases(String)`.
106 |
107 | For example:
108 |
109 | ```kotlin
110 | val str = "An 😀awesome 😃string with a few 😉emojis!"
111 | val result = EmojiParser.parseToAliases(str)
112 | println(result)
113 | // Prints:
114 | // "An :grinning:awesome :smiley:string with a few :wink:emojis!"
115 | ```
116 |
117 | By default, the aliases will parse and include any Fitzpatrick modifier that would be provided. If you want to remove or ignore the Fitzpatrick modifiers, use `EmojiParser#parseToAliases(String, FitzpatrickAction)`. Examples:
118 |
119 | ```kotlin
120 | val str = "Here is a boy: \uD83D\uDC66\uD83C\uDFFF!"
121 | println(EmojiParser.parseToAliases(str))
122 | println(EmojiParser.parseToAliases(str, FitzpatrickAction.PARSE))
123 | // Prints twice: "Here is a boy: :boy|type_6:!"
124 | println(EmojiParser.parseToAliases(str, FitzpatrickAction.REMOVE))
125 | // Prints: "Here is a boy: :boy:!"
126 | println(EmojiParser.parseToAliases(str, FitzpatrickAction.IGNORE))
127 | // Prints: "Here is a boy: :boy:🏿!"
128 | ```
129 |
130 | #### To html
131 |
132 | To replace all the emoji's unicodes found in a string by their html representation, use `EmojiParser#parseToHtmlDecimal(String)` or `EmojiParser#parseToHtmlHexadecimal(String)`.
133 |
134 | For example:
135 |
136 | ```kotlin
137 | val str = "An 😀awesome 😃string with a few 😉emojis!"
138 |
139 | val resultDecimal = EmojiParser.parseToHtmlDecimal(str)
140 | println(resultDecimal)
141 | // Prints:
142 | // "An 😀awesome 😃string with a few 😉emojis!"
143 |
144 | val resultHexadecimal = EmojiParser.parseToHtmlHexadecimal(str)
145 | println(resultHexadecimal)
146 | // Prints:
147 | // "An 😀awesome 😃string with a few 😉emojis!"
148 | ```
149 |
150 | By default, any Fitzpatrick modifier will be removed. If you want to ignore the Fitzpatrick modifiers, use `EmojiParser#parseToAliases(String, FitzpatrickAction)`. Examples:
151 |
152 | ```kotlin
153 | val str = "Here is a boy: \uD83D\uDC66\uD83C\uDFFF!"
154 | println(EmojiParser.parseToHtmlDecimal(str))
155 | println(EmojiParser.parseToHtmlDecimal(str, FitzpatrickAction.PARSE))
156 | println(EmojiParser.parseToHtmlDecimal(str, FitzpatrickAction.REMOVE))
157 | // Print 3 times: "Here is a boy: 👦!"
158 | println(EmojiParser.parseToHtmlDecimal(str, FitzpatrickAction.IGNORE))
159 | // Prints: "Here is a boy: 👦🏿!"
160 | ```
161 |
162 | The same applies for the methods `EmojiParser#parseToHtmlHexadecimal(String)` and `EmojiParser#parseToHtmlHexadecimal(String, FitzpatrickAction)`.
163 |
164 | #### Remove emojis
165 |
166 | You can easily remove emojis from a string using one of the following methods:
167 |
168 | - `EmojiParser#removeAllEmojis(String)`: removes all the emojis from the String
169 | - `EmojiParser#removeAllEmojisExcept(String, Collection)`: removes all the emojis from the String, except the ones in the Collection
170 | - `EmojiParser#removeEmojis(String, Collection)`: removes the emojis in the Collection from the String
171 |
172 | For example:
173 |
174 | ```kotlin
175 | val str = "An 😀awesome 😃string with a few 😉emojis!"
176 | val collection = ArrayList()
177 | collection.add(EmojiManager.getForAlias("wink")) // This is 😉
178 |
179 | println(EmojiParser.removeAllEmojis(str))
180 | println(EmojiParser.removeAllEmojisExcept(str, collection))
181 | println(EmojiParser.removeEmojis(str, collection))
182 |
183 | // Prints:
184 | // "An awesome string with a few emojis!"
185 | // "An awesome string with a few 😉emojis!"
186 | // "An 😀awesome 😃string with a few emojis!"
187 | ```
188 |
189 | #### Extract Emojis from a string
190 |
191 | You can search a string of mixed emoji/non-emoji characters and have all of the emoji characters returned as a Collection.
192 |
193 | - `EmojiParser#extractEmojis(String)`: returns all emojis as a Collection. This will include duplicates if emojis are present more than once.
194 |
195 | ## Credits
196 |
197 | **iris-emoji-java** is based on [github/vdurmont/emoji-java](https://github.com/vdurmont/emoji-java).
198 |
199 | And in its turn **emoji-java** originally used the data provided by the [github/gemoji project](https://github.com/github/gemoji). It is still based on it but has evolved since.
200 |
201 | ## Available Emojis
202 |
203 | See a table of the available emojis and their aliases [HERE](./EMOJIS.md).
204 |
--------------------------------------------------------------------------------
/src/com/vdurmont/emoji/Emoji.kt:
--------------------------------------------------------------------------------
1 | package com.vdurmont.emoji
2 |
3 | import java.nio.charset.StandardCharsets
4 |
5 | /**
6 | * This class represents an emoji.
7 | *
8 | * This object is immutable so it can be used safely in a multithreaded context.
9 | *
10 | * @author Improver: Ivan Ivanov [https://vk.com/irisism]
11 | * Creator: Vincent DURMONT [vdurmont@gmail.com]
12 | */
13 | class Emoji(
14 | /**
15 | * Returns the description of the emoji
16 | *
17 | * @return the description
18 | */
19 | val description: String,
20 | val supportsFitzpatrick: Boolean,
21 | /**
22 | * Returns the aliases of the emoji
23 | *
24 | * @return the aliases (unmodifiable)
25 | */
26 | val aliases: List,
27 | val tags: List,
28 | vararg bytes: Byte
29 | ) {
30 |
31 | val unicode: String
32 | val htmlDec: String
33 | val htmlHex: String
34 |
35 | /**
36 | * Constructor for the Emoji.
37 | *
38 | * @param description The description of the emoji
39 | * @param supportsFitzpatrick Whether the emoji supports Fitzpatrick modifiers
40 | * @param aliases the aliases for this emoji
41 | * @param tags the tags associated with this emoji
42 | * @param bytes the bytes that represent the emoji
43 | */
44 | init {
45 | var count = 0
46 | unicode = String(bytes, StandardCharsets.UTF_8)
47 | val stringLength = unicode.length
48 | val pointCodes = arrayOfNulls(stringLength)
49 | val pointCodesHex = arrayOfNulls(stringLength)
50 | var offset = 0
51 | while (offset < stringLength) {
52 | val codePoint = unicode.codePointAt(offset)
53 | pointCodes[count] = String.format("%d;", codePoint)
54 | pointCodesHex[count++] = String.format("%x;", codePoint)
55 | offset += Character.charCount(codePoint)
56 | }
57 | htmlDec = stringJoin(pointCodes, count)
58 | htmlHex = stringJoin(pointCodesHex, count)
59 | }
60 |
61 | /**
62 | * Method to replace String.join, since it was only introduced in java8
63 | *
64 | * @param array the array to be concatenated
65 | * @return concatenated String
66 | */
67 | private fun stringJoin(array: Array, count: Int): String {
68 | val joined = StringBuilder()
69 | for (i in 0 until count) joined.append(array[i])
70 | return joined.toString()
71 | }
72 |
73 | /**
74 | * Returns wether the emoji supports the Fitzpatrick modifiers or not
75 | *
76 | * @return true if the emoji supports the Fitzpatrick modifiers
77 | */
78 | fun supportsFitzpatrick(): Boolean {
79 | return supportsFitzpatrick
80 | }
81 |
82 | /**
83 | * Returns the unicode representation of the emoji associated with the
84 | * provided Fitzpatrick modifier.
85 | * If the modifier is null, then the result is similar to
86 | * [Emoji.getUnicode]
87 | *
88 | * @param fitzpatrick the fitzpatrick modifier or null
89 | * @return the unicode representation
90 | * @throws UnsupportedOperationException if the emoji doesn't support the
91 | * Fitzpatrick modifiers
92 | */
93 | fun getUnicode(fitzpatrick: Fitzpatrick?): String {
94 | if (!supportsFitzpatrick()) {
95 | throw UnsupportedOperationException(
96 | "Cannot get the unicode with a fitzpatrick modifier, " +
97 | "the emoji doesn't support fitzpatrick."
98 | )
99 | } else if (fitzpatrick == null) {
100 | return this.unicode
101 | }
102 | return this.unicode + fitzpatrick.unicode
103 | }
104 |
105 | /**
106 | * Returns the HTML decimal representation of the emoji
107 | *
108 | * @return the HTML decimal representation
109 | */
110 | fun getHtmlDecimal(): String {
111 | return htmlDec
112 | }
113 |
114 | /**
115 | * Returns the HTML hexadecimal representation of the emoji
116 | *
117 | * @return the HTML hexadecimal representation
118 | */
119 | fun getHtmlHexadecimal(): String {
120 | return htmlHex
121 | }
122 |
123 | override fun equals(other: Any?): Boolean {
124 | return other is Emoji && other.unicode == unicode
125 | }
126 |
127 | override fun hashCode(): Int {
128 | return unicode.hashCode()
129 | }
130 |
131 | /**
132 | * Returns the String representation of the Emoji object.
133 | *
134 | * Example:
135 | * `Emoji {
136 | * description='smiling face with open mouth and smiling eyes',
137 | * supportsFitzpatrick=false,
138 | * aliases=[smile],
139 | * tags=[happy, joy, pleased],
140 | * unicode='😄',
141 | * htmlDec='😄',
142 | * htmlHex='😄'
143 | * }`
144 | *
145 | * @return the string representation
146 | */
147 | override fun toString(): String {
148 | return "Emoji{" +
149 | "description='" + description + '\'' +
150 | ", supportsFitzpatrick=" + supportsFitzpatrick +
151 | ", aliases=" + aliases +
152 | ", tags=" + tags +
153 | ", unicode='" + unicode + '\'' +
154 | ", htmlDec='" + htmlDec + '\'' +
155 | ", htmlHex='" + htmlHex + '\'' +
156 | '}'
157 | }
158 | }
--------------------------------------------------------------------------------
/src/com/vdurmont/emoji/EmojiLoader.kt:
--------------------------------------------------------------------------------
1 | package com.vdurmont.emoji
2 |
3 | import org.json.JSONArray
4 | import org.json.JSONObject
5 | import java.io.*
6 | import java.nio.charset.StandardCharsets
7 | import java.util.*
8 |
9 | /**
10 | * Loads the emojis from a JSON database.
11 | *
12 | * @author Improver: Ivan Ivanov [https://vk.com/irisism]
13 | * Creator: Vincent DURMONT [vdurmont@gmail.com]
14 | */
15 | object EmojiLoader {
16 | /**
17 | * Loads a JSONArray of emojis from an InputStream, parses it and returns the
18 | * associated list of [Emoji]s
19 | *
20 | * @param stream the stream of the JSONArray
21 | *
22 | * @return the list of [Emoji]s
23 | * @throws IOException if an error occurs while reading the stream or parsing
24 | * the JSONArray
25 | */
26 | @Throws(IOException::class)
27 | fun loadEmojis(stream: InputStream): MutableList {
28 | val emojisJSON = JSONArray(inputStreamToString(stream))
29 | val emojis: MutableList = ArrayList(emojisJSON.length())
30 | for (i in 0 until emojisJSON.length()) {
31 | val emoji = buildEmojiFromJSON(emojisJSON.getJSONObject(i))
32 | if (emoji != null) {
33 | emojis.add(emoji)
34 | }
35 | }
36 | return emojis
37 | }
38 |
39 | @Throws(IOException::class)
40 | private fun inputStreamToString(stream: InputStream): String {
41 | val bytes = stream.readAllBytes()
42 | stream.close()
43 | return String(bytes, StandardCharsets.UTF_8)
44 |
45 | /*val sb = StringBuilder()
46 | val isr = InputStreamReader(stream, StandardCharsets.UTF_8)
47 | val br = BufferedReader(isr)
48 | var read: String?
49 | while (br.readLine().also { read = it } != null) {
50 | sb.append(read)
51 | }
52 | br.close()
53 | return sb.toString()*/
54 | }
55 |
56 | /*
57 | private static final String[][] gender1 = {
58 | {"adult", "\uD83E\uDDD1"}
59 | , {"male", "\uD83D\uDC68"}
60 | , {"female", "\uD83D\uDC69"}
61 | };
62 | private static final String[][] gender2 = {
63 | {"male", "\u200D♂️"}
64 | , {"female", "\u200D♀️"}
65 | };
66 |
67 | private static final String[][] skins = {
68 | {"white", "\uD83C\uDFFB"}
69 | , {"cream white", "\uD83C\uDFFC"}
70 | , {"moderate brown", "\uD83C\uDFFD"}
71 | , {"dark brown", "\uD83C\uDFFE"}
72 | , {"black", "\uD83C\uDFFF"}
73 | };
74 |
75 | protected static List buildEmojiesFromJSON(JSONObject json) throws UnsupportedEncodingException {
76 | if (!json.has("emoji")) {
77 | return null;
78 | }
79 |
80 | String pattern = json.getString("emoji");
81 | List aliases = jsonArrayToStringList(json.getJSONArray("aliases"));
82 | EmojiPrepare[] emojies;
83 | if (pattern.indexOf('{') != -1) {
84 | boolean hasGender1 = pattern.contains("{person}");
85 | boolean hasGender2 = pattern.contains("{gender}");
86 | boolean hasSkin = pattern.contains("{skin}");
87 | var patterns = new LinkedList();
88 | patterns.add(new EmojiPrepare(pattern, aliases));
89 |
90 | if (hasSkin) {
91 | var tmp = new LinkedList();
92 | for (EmojiPrepare i : patterns) {
93 | tmp.add(new EmojiPrepare(i.pattern.replace("{skin}", ""), aliases));
94 | for (String[] g : skins) {
95 | var aa = new LinkedList();
96 | for (String a : i.aliases)
97 | aa.add(g[0] + ' ' + a);
98 | var newPattern = i.pattern.replace("{skin}", g[1]);
99 | tmp.add(new EmojiPrepare(newPattern, aa));
100 | }
101 | }
102 | patterns = tmp;
103 | }
104 |
105 | if (hasGender1) {
106 | var tmp = new LinkedList();
107 | for (EmojiPrepare i : patterns)
108 | for (String[] g : gender1) {
109 | var aa = new LinkedList();
110 | for (String a : i.aliases)
111 | aa.add(g[0] + ' ' + a);
112 | var newPattern = i.pattern.replace("{person}", g[1]);
113 | tmp.add(new EmojiPrepare(newPattern, aa));
114 | }
115 | patterns = tmp;
116 | }
117 |
118 | if (hasGender2) {
119 | var tmp = new LinkedList();
120 | for (EmojiPrepare i : patterns)
121 | for (String[] g : gender2) {
122 | tmp.add(new EmojiPrepare(i.pattern.replace("{gender}", ""), aliases));
123 | var aa = new LinkedList();
124 | for (String a : i.aliases)
125 | aa.add(g[0] + ' ' + a);
126 | var newPattern = i.pattern.replace("{gender}", g[1]);
127 | tmp.add(new EmojiPrepare(newPattern, aa));
128 | }
129 | patterns = tmp;
130 | }
131 |
132 |
133 |
134 | emojies = patterns.toArray(new EmojiPrepare[0]);
135 |
136 | } else
137 | emojies = new EmojiPrepare[] {new EmojiPrepare(pattern, aliases)};
138 | String description = null;
139 | if (json.has("description")) {
140 | description = json.getString("description");
141 | }
142 | boolean supportsFitzpatrick = false;
143 | if (json.has("supports_fitzpatrick")) {
144 | supportsFitzpatrick = json.getBoolean("supports_fitzpatrick");
145 | }
146 |
147 | List tags = jsonArrayToStringList(json.getJSONArray("tags"));
148 |
149 | ArrayList res = new ArrayList<>();
150 | for (EmojiPrepare emoji : emojies) {
151 | byte[] bytes = emoji.pattern.getBytes(StandardCharsets.UTF_8);
152 | res.add(new Emoji(description, supportsFitzpatrick, emoji.aliases, tags, bytes));
153 | }
154 | return res;
155 | //return new Emoji(description, supportsFitzpatrick, aliases, tags, bytes);
156 | }
157 |
158 | private static final class EmojiPrepare {
159 | List aliases;
160 | String pattern;
161 |
162 | public EmojiPrepare(String patter, List aliases) {
163 | this.aliases = aliases;
164 | this.pattern = patter;
165 | }
166 | }*/
167 | @Throws(UnsupportedEncodingException::class)
168 | internal fun buildEmojiFromJSON(
169 | json: JSONObject
170 | ): Emoji? {
171 | if (!json.has("emoji")) {
172 | return null
173 | }
174 | val bytes = json.getString("emoji").toByteArray(StandardCharsets.UTF_8)
175 | var description: String? = null
176 | if (json.has("description")) {
177 | description = json.getString("description")
178 | }
179 | var supportsFitzpatrick = false
180 | if (json.has("supports_fitzpatrick")) {
181 | supportsFitzpatrick = json.getBoolean("supports_fitzpatrick")
182 | }
183 | val aliases =
184 | jsonArrayToStringList(json.getJSONArray("aliases"))
185 | val tags = jsonArrayToStringList(json.getJSONArray("tags"))
186 | return Emoji(description!!, supportsFitzpatrick, aliases, tags, *bytes)
187 | }
188 |
189 | private fun jsonArrayToStringList(array: JSONArray): List {
190 | val strings: MutableList = ArrayList(array.length())
191 | for (i in 0 until array.length()) {
192 | strings.add(array.getString(i))
193 | }
194 | return strings
195 | }
196 | }
--------------------------------------------------------------------------------
/src/com/vdurmont/emoji/EmojiManager.kt:
--------------------------------------------------------------------------------
1 | package com.vdurmont.emoji
2 |
3 | import com.vdurmont.emoji.EmojiLoader.loadEmojis
4 | import com.vdurmont.emoji.EmojiTrie.Matches
5 | import java.io.IOException
6 |
7 | /**
8 | * Holds the loaded emojis and provides search functions.
9 | *
10 | * @author Improver: Ivan Ivanov [https://vk.com/irisism]
11 | * Creator: Vincent DURMONT [vdurmont@gmail.com]
12 | */
13 | object EmojiManager {
14 | private const val PATH = "/emojis.json"
15 | private val EMOJIS_BY_ALIAS: MutableMap = HashMap()
16 | private val EMOJIS_BY_TAG: MutableMap> = HashMap()
17 | lateinit var ALL_EMOJIS: List
18 | lateinit var EMOJI_TRIE: EmojiTrie
19 |
20 | /**
21 | * Returns all the [Emoji]s for a given tag.
22 | *
23 | * @param tag the tag
24 | *
25 | * @return the associated [Emoji]s, null if the tag
26 | * is unknown
27 | */
28 | fun getForTag(tag: String?): Set? {
29 | return if (tag == null) {
30 | null
31 | } else EMOJIS_BY_TAG[tag]
32 | }
33 |
34 | /**
35 | * Returns the [Emoji] for a given alias.
36 | *
37 | * @param alias the alias
38 | *
39 | * @return the associated [Emoji], null if the alias
40 | * is unknown
41 | */
42 | fun getForAlias(alias: String?): Emoji? {
43 | return if (alias == null || alias.isEmpty()) {
44 | null
45 | } else EMOJIS_BY_ALIAS[trimAlias(alias)]
46 | }
47 |
48 | private fun trimAlias(alias: String): String {
49 | val len = alias.length
50 | return alias.substring(
51 | if (alias[0] == ':') 1 else 0,
52 | if (alias[len - 1] == ':') len - 1 else len
53 | )
54 | }
55 |
56 | /**
57 | * Returns the [Emoji] for a given unicode.
58 | *
59 | * @param unicode the the unicode
60 | *
61 | * @return the associated [Emoji], null if the
62 | * unicode is unknown
63 | */
64 | fun getByUnicode(unicode: String?): Emoji? {
65 | if (unicode == null) {
66 | return null
67 | }
68 | val res = EmojiParser.getEmojiInPosition(unicode.toCharArray(), 0) ?: return null
69 | return res.emoji
70 | }
71 |
72 | /**
73 | * Returns all the [Emoji]s
74 | *
75 | * @return all the [Emoji]s
76 | */
77 | val all: Collection?
78 | get() = ALL_EMOJIS
79 |
80 | /**
81 | * Tests if a given String is an emoji.
82 | *
83 | * @param string the string to test
84 | * @return true if the string is an emoji's unicode, false else
85 | */
86 | fun isEmoji(string: String?): Boolean {
87 | if (string == null) return false
88 | val chars = string.toCharArray()
89 | val result = EmojiParser.getEmojiInPosition(chars, 0)
90 | return result != null && result.emojiStartIndex == 0 && result.endIndex == chars.size
91 | }
92 |
93 | /**
94 | * Tests if a given String contains an emoji.
95 | *
96 | * @param string the string to test
97 | * @return true if the string contains an emoji's unicode, false otherwise
98 | */
99 | fun containsEmoji(string: String?): Boolean {
100 | return if (string == null) false else EmojiParser.getNextEmoji(string.toCharArray(), 0) != null
101 | }
102 |
103 | /**
104 | * Tests if a given String only contains emojis.
105 | *
106 | * @param string the string to test
107 | * @return true if the string only contains emojis, false else
108 | */
109 | fun isOnlyEmojis(string: String?): Boolean {
110 | return string != null && EmojiParser.removeAllEmojis(string).isEmpty()
111 | }
112 |
113 | /**
114 | * Checks if sequence of chars contain an emoji.
115 | *
116 | * @param sequence Sequence of char that may contain emoji in full or
117 | * partially.
118 | * @return <li>
119 | * Matches.EXACTLY if char sequence in its entirety is an emoji
120 | * </li>
121 | * <li>
122 | * Matches.POSSIBLY if char sequence matches prefix of an emoji
123 | * </li>
124 | * <li>
125 | * Matches.IMPOSSIBLE if char sequence matches no emoji or prefix of an
126 | * emoji
127 | * </li>
128 | */
129 | fun isEmoji(sequence: CharArray): Matches {
130 | return EMOJI_TRIE.isEmoji(sequence)
131 | }
132 |
133 | /**
134 | * Returns all the tags in the database
135 | *
136 | * @return the tags
137 | */
138 | val allTags: Collection
139 | get() = EMOJIS_BY_TAG.keys
140 |
141 | init {
142 | try {
143 | val stream = EmojiLoader::class.java.getResourceAsStream(PATH)
144 | val emojis = loadEmojis(stream)
145 |
146 | for (emoji in emojis) {
147 | for (tag in emoji.tags) {
148 | val tagSet = EMOJIS_BY_TAG.computeIfAbsent(tag) { k: String? -> HashSet() } as HashSet
149 | tagSet.add(emoji)
150 | }
151 | for (alias in emoji.aliases) {
152 | EMOJIS_BY_ALIAS[alias] = emoji
153 | }
154 | }
155 | EMOJI_TRIE = EmojiTrie(emojis)
156 | emojis.sortWith(java.util.Comparator { e1: Emoji, e2: Emoji -> e2.unicode.length - e1.unicode.length })
157 | ALL_EMOJIS = emojis
158 | stream.close()
159 | } catch (e: IOException) {
160 | throw RuntimeException(e)
161 | }
162 | }
163 | }
--------------------------------------------------------------------------------
/src/com/vdurmont/emoji/EmojiParser.kt:
--------------------------------------------------------------------------------
1 | package com.vdurmont.emoji;
2 |
3 | import com.vdurmont.emoji.EmojiManager.isEmoji
4 | import com.vdurmont.emoji.EmojiParser.FitzpatrickAction.*
5 | import com.vdurmont.emoji.EmojiParser.extractEmojiStrings
6 | import java.util.*
7 |
8 | /**
9 | * Provides methods to parse strings with emojis.
10 | *
11 | * @author Improver: Ivan Ivanov [https://vk.com/irisism]
12 | * Creator: Vincent DURMONT [vdurmont@gmail.com]
13 | */
14 | object EmojiParser {
15 |
16 | /**
17 | * See {@link #parseToAliases(String, FitzpatrickAction)} with the action
18 | * "PARSE"
19 | *
20 | * @param input the string to parse
21 | * @return the string with the emojis replaced by their alias.
22 | */
23 | fun parseToAliases(input: String): String {
24 | return parseToAliases(input, PARSE);
25 | }
26 |
27 | /**
28 | * Replaces the emoji's unicode occurrences by one of their alias
29 | * (between 2 ':').
30 | * Example: 😄
will be replaced by :smile:
31 | *
32 | * When a fitzpatrick modifier is present with a PARSE action, a "|" will be
33 | * appendend to the alias, with the fitzpatrick type.
34 | * Example: 👦🏿
will be replaced by
35 | * :boy|type_6:
36 | * The fitzpatrick types are: type_1_2, type_3, type_4, type_5, type_6
37 | *
38 | * When a fitzpatrick modifier is present with a REMOVE action, the modifier
39 | * will be deleted.
40 | * Example: 👦🏿
will be replaced by :boy:
41 | *
42 | * When a fitzpatrick modifier is present with a IGNORE action, the modifier
43 | * will be ignored.
44 | * Example: 👦🏿
will be replaced by :boy:🏿
45 | *
46 | * @param input the string to parse
47 | * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers
48 | * @return the string with the emojis replaced by their alias.
49 | */
50 | fun parseToAliases(input: String, fitzpatrickAction: FitzpatrickAction): String {
51 | val emojiTransformer = object : EmojiTransformer {
52 | override fun transform(emoji: EmojiResult): String {
53 | when (fitzpatrickAction) {
54 | REMOVE ->
55 | return ":" + emoji.emoji.aliases.get(0) + ":";
56 | IGNORE ->
57 | return ":" + emoji.emoji.aliases.get(0) + ":" + emoji.fitzpatrickUnicode;
58 |
59 | /*FitzpatrickAction.PARSE*/ else ->
60 | if (emoji.hasFitzpatrick()) {
61 | return ":" + emoji.emoji.aliases.get(0) + "|" + emoji.fitzpatrickType + ":";
62 | } else {
63 | return ":" + emoji.emoji.aliases.get(0) + ":";
64 | }
65 | }
66 | }
67 | };
68 |
69 | return parseFromUnicode(input, emojiTransformer);
70 | }
71 |
72 | /**
73 | * Replace all emojis with character
74 | *
75 | * @param str the string to process
76 | * @param replacementString replacement the string that will replace all the emojis
77 | * @return the string with replaced character
78 | */
79 | fun replaceAllEmojis(str: String, replacementString: String): String {
80 | val emojiTransformer: EmojiTransformer = object : EmojiTransformer {
81 | override fun transform(emoji: EmojiResult): String {
82 | return replacementString;
83 | }
84 | };
85 |
86 | return parseFromUnicode(str, emojiTransformer);
87 | }
88 |
89 |
90 | /**
91 | * Replaces the emoji's aliases (between 2 ':') occurrences and the html
92 | * representations by their unicode.
93 | * Examples:
94 | * :smile:
will be replaced by 😄
95 | * 😄
will be replaced by 😄
96 | * :boy|type_6:
will be replaced by 👦🏿
97 | *
98 | * @param input the string to parse
99 | * @return the string with the aliases and html representations replaced by
100 | * their unicode.
101 | */
102 | fun parseToUnicode(input: String): String? {
103 | val sb = StringBuilder(input.length)
104 | var last = 0
105 | while (last < input.length) {
106 | var alias: AliasCandidate? = getAliasAt(input, last)
107 | if (alias == null) {
108 | alias = getHtmlEncodedEmojiAt(input, last)
109 | }
110 | if (alias != null) {
111 | sb.append(alias.emoji.unicode)
112 | last = alias.endIndex
113 | if (alias.fitzpatrick != null) {
114 | sb.append(alias.fitzpatrick!!.unicode)
115 | }
116 | } else {
117 | sb.append(input[last])
118 | }
119 | last++
120 | }
121 | return sb.toString()
122 | }
123 |
124 | /**
125 | * Finds the alias in the given string starting at the given point, null otherwise
126 | */
127 | private fun getAliasAt(input: String, start: Int): AliasCandidate? {
128 | if (input.length < start + 2 || input[start] != ':') return null // Aliases start with :
129 | val aliasEnd = input.indexOf(':', start + 2) // Alias must be at least 1 char in length
130 | if (aliasEnd == -1) return null // No alias end found
131 | val fitzpatrickStart = input.indexOf('|', start + 2)
132 | if (fitzpatrickStart != -1 && fitzpatrickStart < aliasEnd) {
133 | val emoji = EmojiManager.getForAlias(input.substring(start, fitzpatrickStart)) ?: return null
134 | // Not a valid alias
135 | if (!emoji.supportsFitzpatrick()) return null // Fitzpatrick was specified, but the emoji does not support it
136 | val fitzpatrick =
137 | Fitzpatrick.fitzpatrickFromType(input.substring(fitzpatrickStart + 1, aliasEnd))
138 | return AliasCandidate(emoji, fitzpatrick, start, aliasEnd)
139 | }
140 | val emoji = EmojiManager.getForAlias(input.substring(start, aliasEnd)) ?: return null
141 | // Not a valid alias
142 | return AliasCandidate(emoji, null, start, aliasEnd)
143 | }
144 |
145 | /**
146 | * Finds the HTML encoded emoji in the given string starting at the given point, null otherwise
147 | */
148 | private fun getHtmlEncodedEmojiAt(input: String, start: Int): AliasCandidate? {
149 | if (input.length < start + 4 || input[start] != '&' || input[start + 1] != '#') return null
150 | var longestEmoji: Emoji? = null
151 | var longestCodePointEnd = -1
152 | val chars = CharArray(EmojiManager.EMOJI_TRIE.maxDepth)
153 | var charsIndex = 0
154 | var codePointStart = start
155 | do {
156 | val codePointEnd =
157 | input.indexOf(';', codePointStart + 3) // Code point must be at least 1 char in length
158 | if (codePointEnd == -1) break
159 | charsIndex += try {
160 | val radix = if (input[codePointStart + 2] == 'x') 16 else 10
161 | val codePoint = input.substring(codePointStart + 2 + radix / 16, codePointEnd).toInt(radix)
162 | Character.toChars(codePoint, chars, charsIndex)
163 | } catch (e: IllegalArgumentException) {
164 | break
165 | }
166 | val foundEmoji = EmojiManager.EMOJI_TRIE.getEmoji(chars, 0, charsIndex)
167 | if (foundEmoji != null) {
168 | longestEmoji = foundEmoji
169 | longestCodePointEnd = codePointEnd
170 | }
171 | codePointStart = codePointEnd + 1
172 | } while (input.length > codePointStart + 4 && input[codePointStart] == '&' && input[codePointStart + 1] == '#' && charsIndex < chars.size &&
173 | !EmojiManager.EMOJI_TRIE.isEmoji(chars, 0, charsIndex).impossibleMatch()
174 | )
175 | return if (longestEmoji == null) null else AliasCandidate(longestEmoji, null, start, longestCodePointEnd)
176 | }
177 |
178 | /**
179 | * See [.parseToHtmlDecimal] with the action
180 | * "PARSE"
181 | *
182 | * @param input the string to parse
183 | * @return the string with the emojis replaced by their html decimal
184 | * representation.
185 | */
186 | fun parseToHtmlDecimal(input: String): String? {
187 | return parseToHtmlDecimal(input, PARSE)
188 | }
189 |
190 | /**
191 | * Replaces the emoji's unicode occurrences by their html representation.
192 | * Example: 😄
will be replaced by 😄
193 | *
194 | * When a fitzpatrick modifier is present with a PARSE or REMOVE action, the
195 | * modifier will be deleted from the string.
196 | * Example: 👦🏿
will be replaced by
197 | * 👦
198 | *
199 | * When a fitzpatrick modifier is present with a IGNORE action, the modifier
200 | * will be ignored and will remain in the string.
201 | * Example: 👦🏿
will be replaced by
202 | * 👦🏿
203 | *
204 | * @param input the string to parse
205 | * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers
206 | * @return the string with the emojis replaced by their html decimal
207 | * representation.
208 | */
209 | fun parseToHtmlDecimal(input: String, fitzpatrickAction: FitzpatrickAction): String {
210 | val emojiTransformer = object : EmojiTransformer {
211 | override fun transform(emoji: EmojiResult): String {
212 | return when (fitzpatrickAction) {
213 | PARSE, REMOVE -> emoji.emoji.getHtmlDecimal();
214 | IGNORE -> emoji.emoji.getHtmlDecimal() +
215 | emoji.fitzpatrickUnicode;
216 | };
217 | }
218 | };
219 |
220 | return parseFromUnicode(input, emojiTransformer);
221 | }
222 |
223 | /**
224 | * See {@link #parseToHtmlHexadecimal(String, FitzpatrickAction)} with the
225 | * action "PARSE"
226 | *
227 | * @param input the string to parse
228 | * @return the string with the emojis replaced by their html hex
229 | * representation.
230 | */
231 | fun parseToHtmlHexadecimal(input: String): String {
232 | return parseToHtmlHexadecimal(input, PARSE);
233 | }
234 |
235 | /**
236 | * Replaces the emoji's unicode occurrences by their html hex
237 | * representation.
238 | * Example: 👦
will be replaced by 👦
239 | *
240 | * When a fitzpatrick modifier is present with a PARSE or REMOVE action, the
241 | * modifier will be deleted.
242 | * Example: 👦🏿
will be replaced by
243 | * 👦
244 | *
245 | * When a fitzpatrick modifier is present with a IGNORE action, the modifier
246 | * will be ignored and will remain in the string.
247 | * Example: 👦🏿
will be replaced by
248 | * 👦🏿
249 | *
250 | * @param input the string to parse
251 | * @param fitzpatrickAction the action to apply for the fitzpatrick modifiers
252 | * @return the string with the emojis replaced by their html hex
253 | * representation.
254 | */
255 | fun parseToHtmlHexadecimal(input: String,fitzpatrickAction: FitzpatrickAction): String {
256 | val emojiTransformer = object : EmojiTransformer {
257 | override fun transform(unicodeCandidate: EmojiResult): String {
258 | return when (fitzpatrickAction) {
259 | PARSE, REMOVE -> unicodeCandidate.emoji.getHtmlHexadecimal();
260 | IGNORE -> unicodeCandidate.emoji.getHtmlHexadecimal() +
261 | unicodeCandidate.fitzpatrickUnicode;
262 | };
263 | }
264 | };
265 |
266 | return parseFromUnicode(input, emojiTransformer);
267 | }
268 |
269 | /**
270 | * Removes all emojis from a String
271 | *
272 | * @param str the string to process
273 | * @return the string without any emoji
274 | */
275 | fun removeAllEmojis(str: String): String {
276 | val emojiTransformer = object : EmojiTransformer {
277 | override fun transform(emoji: EmojiResult): String {
278 | return "";
279 | }
280 | };
281 |
282 | return parseFromUnicode(str, emojiTransformer);
283 | }
284 |
285 |
286 | /**
287 | * Removes a set of emojis from a String
288 | *
289 | * @param str the string to process
290 | * @param emojisToRemove the emojis to remove from this string
291 | * @return the string without the emojis that were removed
292 | */
293 | fun removeEmojis(str: String, emojisToRemove: Collection):String {
294 | val emojiTransformer = object : EmojiTransformer {
295 | override fun transform(emoji: EmojiResult): String {
296 | if (!emojisToRemove.contains(emoji.emoji)) {
297 | return emoji.emoji.unicode +
298 | emoji.fitzpatrickUnicode;
299 | }
300 | return "";
301 | }
302 | };
303 |
304 | return parseFromUnicode(str, emojiTransformer);
305 | }
306 |
307 | /**
308 | * Removes all the emojis in a String except a provided set
309 | *
310 | * @param str the string to process
311 | * @param emojisToKeep the emojis to keep in this string
312 | * @return the string without the emojis that were removed
313 | */
314 | fun removeAllEmojisExcept(str: String, emojisToKeep: Collection): String {
315 | val emojiTransformer = object : EmojiTransformer {
316 | override fun transform(emoji: EmojiResult): String {
317 | if (emojisToKeep.contains(emoji.emoji)) {
318 | return emoji.emoji.unicode +
319 | emoji.fitzpatrickUnicode;
320 | }
321 | return "";
322 | }
323 | };
324 |
325 | return parseFromUnicode(str, emojiTransformer);
326 | }
327 |
328 |
329 | /**
330 | * Detects all unicode emojis in input string and replaces them with the
331 | * return value of transformer.transform()
332 | *
333 | * @param input the string to process
334 | * @param transformer emoji transformer to apply to each emoji
335 | * @return input string with all emojis transformed
336 | */
337 | fun parseFromUnicode(input: String, transformer: EmojiTransformer): String {
338 | var prev = 0;
339 | val sb = StringBuilder(input.length);
340 | val replacements = getEmojies(input);
341 | for (candidate in replacements) {
342 | sb.append(input, prev, candidate.emojiStartIndex);
343 |
344 | sb.append(transformer.transform(candidate));
345 | prev = candidate.endIndex;
346 | }
347 |
348 | return sb.append(input.substring(prev)).toString();
349 | }
350 |
351 | /*fun extractEmojiStrings(input: String?): List? {
352 | return extractEmojiStrings(input, 0)
353 | }*/
354 |
355 | fun extractEmojiStrings(input: String, limit: Int = 0): List? {
356 | val items = extractEmojis(input, limit)
357 | val result: MutableList = ArrayList(items.size)
358 | for (i in items) {
359 | result.add(i.toString())
360 | }
361 | return result
362 | }
363 |
364 | /*fun extractEmojis(input: String): List? {
365 | return getEmojies(input, 0)
366 | }*/
367 |
368 | fun extractEmojis(input: String, limit: Int = 0): List {
369 | return getEmojies(input, limit)
370 | }
371 |
372 | /**
373 | * Generates a list UnicodeCandidates found in input string. A
374 | * UnicodeCandidate is created for every unicode emoticon found in input
375 | * string, additionally if Fitzpatrick modifier follows the emoji, it is
376 | * included in UnicodeCandidate. Finally, it contains start and end index of
377 | * unicode emoji itself (WITHOUT Fitzpatrick modifier whether it is there or
378 | * not!).
379 | *
380 | * @param input String to find all unicode emojis in
381 | * @return List of UnicodeCandidates for each unicode emote in text
382 | */
383 | fun getEmojies(input: String, limit: Int): List {
384 | var limit = limit
385 | val inputCharArray = input.toCharArray()
386 | val candidates: MutableList = ArrayList()
387 | var next: EmojiResult?
388 | var i = 0
389 | while (getNextEmoji(inputCharArray, i).also { next = it } != null) {
390 | next!!
391 | candidates.add(next!!)
392 | if (limit != 0) {
393 | limit--
394 | if (limit <= 0) break
395 | }
396 | i = next!!.endIndex
397 | }
398 | return candidates
399 | }
400 |
401 | fun getEmojies(input: String): List {
402 | return getEmojies(input, 0)
403 | }
404 |
405 | /**
406 | * Finds the next UnicodeCandidate after a given starting index
407 | *
408 | * @param chars char array to find UnicodeCandidate in
409 | * @param start starting index for search
410 | * @return the next UnicodeCandidate or null if no UnicodeCandidate is found after start index
411 | */
412 | fun getNextEmoji(chars: CharArray, start: Int): EmojiResult? {
413 | for (i in start until chars.size) {
414 | val emoji = getEmojiInPosition(chars, i);
415 | if (emoji != null)
416 | return emoji;
417 | }
418 |
419 | return null;
420 | }
421 |
422 | fun getEmojiInPosition(chars: CharArray, start: Int): EmojiResult? {
423 | val emoji = getBestBaseEmoji(chars, start);
424 | if (emoji == null)
425 | return null;
426 |
427 | var fitzpatrick: Fitzpatrick? = null;
428 | var gender: Gender? = null;
429 | var endPos = start + emoji.unicode.length;
430 | if (emoji.supportsFitzpatrick) {
431 | fitzpatrick = Fitzpatrick.find(chars, endPos);
432 | if (fitzpatrick != null) {
433 | endPos += 2;
434 | }
435 | val gg = findGender(chars, endPos);
436 | if (gg != null) {
437 | endPos = gg.endPos + 1;
438 | gender = gg.gender;
439 | }
440 | }
441 |
442 | if (chars.size > endPos) {
443 | val ch = chars[endPos];
444 | if (ch == '\uFE0F')
445 | endPos++;
446 | }
447 | return EmojiResult(emoji, fitzpatrick, gender, chars, start, endPos);
448 | }
449 |
450 | private fun findGender(chars: CharArray, startPos: Int): GenderMatch? {
451 | val len = chars.size;
452 | if (len <= startPos)
453 | return null;
454 | var pos = startPos;
455 | val ch = chars[pos];
456 | if (ch != '\u200D')
457 | return null;
458 | pos++;
459 | val gender = Gender.find(chars, pos) ?: return null;
460 | return GenderMatch(gender, pos);
461 | }
462 |
463 | private class GenderMatch(val gender: Gender?, val endPos: Int)
464 |
465 |
466 | /**
467 | * Returns end index of a unicode emoji if it is found in text starting at
468 | * index startPos, -1 if not found.
469 | * This returns the longest matching emoji, for example, in
470 | * "\uD83D\uDC68\u200D\uD83D\uDC69\u200D\uD83D\uDC66"
471 | * it will find alias:family_man_woman_boy, NOT alias:man
472 | *
473 | * @param text the current text where we are looking for an emoji
474 | * @param startPos the position in the text where we should start looking for
475 | * an emoji end
476 | * @return the end index of the unicode emoji starting at startPos. -1 if not
477 | * found
478 | */
479 | fun getBestBaseEmoji(text: CharArray, startPos: Int): Emoji? {
480 | return EmojiManager.EMOJI_TRIE.getBestEmoji(text, startPos);
481 | }
482 |
483 |
484 | class EmojiResult(
485 | val emoji: Emoji,
486 | val fitzpatrick: Fitzpatrick?,
487 | val gender: Gender?,
488 | val source: CharArray,
489 | val emojiStartIndex: Int,
490 | val endIndex: Int
491 | ) {
492 |
493 | fun hasFitzpatrick(): Boolean {
494 | return fitzpatrick != null
495 | }
496 |
497 | val fitzpatrickType: String
498 | get() = if (hasFitzpatrick()) fitzpatrick!!.name else ""
499 |
500 | val fitzpatrickUnicode: String
501 | get() = if (hasFitzpatrick()) fitzpatrick!!.unicode else ""
502 |
503 | val emojiEndIndex: Int
504 | get() = emojiStartIndex + emoji.unicode.length
505 |
506 | val fitzpatrickEndIndex: Int
507 | get() = emojiEndIndex + if (fitzpatrick != null) 2 else 0
508 |
509 | private var sub: String? = null
510 |
511 | override fun toString(): String {
512 | if (sub != null) return sub!!
513 | val len = endIndex - emojiStartIndex
514 | val sub = CharArray(len)
515 | System.arraycopy(source, emojiStartIndex, sub, 0, len)
516 | this.sub = String(sub)
517 | return this.sub!!
518 | }
519 |
520 | }
521 |
522 |
523 | private class AliasCandidate (
524 | val emoji: Emoji,
525 | val fitzpatrick: Fitzpatrick?,
526 | val startIndex: Int,
527 | val endIndex: Int
528 | )
529 |
530 | /**
531 | * Enum used to indicate what should be done when a Fitzpatrick modifier is
532 | * found.
533 | */
534 | enum class FitzpatrickAction {
535 | /**
536 | * Tries to match the Fitzpatrick modifier with the previous emoji
537 | */
538 | PARSE,
539 |
540 | /**
541 | * Removes the Fitzpatrick modifier from the string
542 | */
543 | REMOVE,
544 |
545 | /**
546 | * Ignores the Fitzpatrick modifier (it will stay in the string)
547 | */
548 | IGNORE
549 | }
550 |
551 | interface EmojiTransformer {
552 | fun transform(emoji: EmojiResult): String
553 | }
554 |
555 |
556 | }
557 |
558 | fun main() {
559 | val text =
560 | "\uD83D\uDC68\u200D\uD83D\uDCBB\uD83E\uDDB9\uD83C\uDFFE\uD83E\uDDD1\uD83C\uDFFD\u200D\uD83D\uDD2C\uD83E\uDDD1\uD83C\uDFFB\u200D\uD83C\uDF73\uD83D\uDC70\uD83C\uDFFE\uD83E\uDDDB\uD83C\uDFFD\u200D♂️\uD83E\uDD31\uD83C\uDFFF\uD83D\uDC68\uD83C\uDFFC\u200D\uD83C\uDFEB\uD83E\uDDD1\uD83C\uDFFB\u200D\uD83C\uDF73\uD83E\uDDD1\uD83C\uDFFB\u200D\uD83C\uDF73\uD83D\uDC73\uD83C\uDFFB\u200D♂️"
561 | val items = extractEmojiStrings(text)
562 | println(items)
563 | val res = isEmoji("\uD83E\uDDDB\uD83C\uDFFD\u200D♂️ ")
564 | println(res)
565 | }
--------------------------------------------------------------------------------
/src/com/vdurmont/emoji/EmojiTrie.kt:
--------------------------------------------------------------------------------
1 | package com.vdurmont.emoji
2 |
3 | /**
4 | *
5 | * @author Improver: Ivan Ivanov [https://vk.com/irisism]
6 | * Creator: Vincent DURMONT [vdurmont@gmail.com]
7 | */
8 | class EmojiTrie(emojis: Collection) {
9 | private val root = Node()
10 | val maxDepth: Int
11 |
12 | /**
13 | * Checks if sequence of chars contain an emoji.
14 | *
15 | * @param sequence Sequence of char that may contain emoji in full or
16 | * partially.
17 | * @return <li>
18 | * Matches.EXACTLY if char sequence in its entirety is an emoji
19 | * </li>
20 | * <li>
21 | * Matches.POSSIBLY if char sequence matches prefix of an emoji
22 | * </li>
23 | * <li>
24 | * Matches.IMPOSSIBLE if char sequence matches no emoji or prefix of an
25 | * emoji
26 | * </li>
27 | */
28 | fun isEmoji(sequence: CharArray): Matches {
29 | return isEmoji(sequence, 0, sequence.size)
30 | }
31 |
32 | /**
33 | * Checks if the sequence of chars within the given bound indices contain an emoji.
34 | *
35 | * @see .isEmoji
36 | */
37 | fun isEmoji(sequence: CharArray, start: Int, end: Int): Matches {
38 | if (start < 0 || start > end || end > sequence.size) {
39 | throw ArrayIndexOutOfBoundsException("start " + start + ", end " + end + ", length " + sequence.size)
40 | }
41 | var tree: Node = root
42 | for (i in start until end) {
43 | if (!tree.hasChild(sequence[i])) {
44 | return Matches.IMPOSSIBLE
45 | }
46 | tree = tree.getChild(sequence[i])?: return Matches.IMPOSSIBLE
47 |
48 | }
49 | return if (tree.isEndOfEmoji) Matches.EXACTLY else Matches.POSSIBLY
50 | }
51 |
52 | fun getBestEmoji(sequence: CharArray, start: Int): Emoji? {
53 | if (start < 0) {
54 | throw ArrayIndexOutOfBoundsException("start " + start + ", length " + sequence.size)
55 | }
56 | val end = sequence.size
57 | var tree: Node = root
58 | for (i in start until end) {
59 | if (!tree.hasChild(sequence[i])) {
60 | return if (tree.isEndOfEmoji) tree.emoji else null
61 | }
62 | tree = tree.getChild(sequence[i])?: return null
63 | }
64 | return if (tree.isEndOfEmoji) tree.emoji else null
65 | }
66 |
67 | /**
68 | * Finds Emoji instance from emoji unicode
69 | *
70 | * @param unicode unicode of emoji to get
71 | * @return Emoji instance if unicode matches and emoji, null otherwise.
72 | */
73 | fun getEmoji(unicode: String): Emoji? {
74 | return getEmoji(unicode.toCharArray(), 0, unicode.length)
75 | }
76 |
77 | fun getEmoji(sequence: CharArray, start: Int, end: Int): Emoji? {
78 | if (start < 0 || start > end || end > sequence.size) {
79 | throw ArrayIndexOutOfBoundsException(
80 | "start " + start + ", end " + end + ", length " + sequence.size
81 | )
82 | }
83 | var tree: Node = root
84 | for (i in 0 until end) {
85 | if (!tree.hasChild(sequence[i])) {
86 | return null
87 | }
88 | tree = tree.getChild(sequence[i])?: return null
89 | }
90 | return tree.emoji
91 | }
92 |
93 | enum class Matches {
94 | EXACTLY, POSSIBLY, IMPOSSIBLE;
95 |
96 | fun exactMatch(): Boolean {
97 | return this == EXACTLY
98 | }
99 |
100 | fun impossibleMatch(): Boolean {
101 | return this == IMPOSSIBLE
102 | }
103 | }
104 |
105 | private class Node {
106 | private val children: MutableMap = HashMap()
107 | var emoji: Emoji? = null
108 | set(emoji) {
109 | field = emoji
110 | }
111 |
112 | fun hasChild(child: Char): Boolean {
113 | return children.containsKey(child)
114 | }
115 |
116 | fun addChild(child: Char) {
117 | children[child] = Node()
118 | }
119 |
120 | fun getChild(child: Char): Node? {
121 | return children[child]
122 | }
123 |
124 | val isEndOfEmoji: Boolean
125 | get() = emoji != null
126 | }
127 |
128 | init {
129 | var maxDepth = 0
130 | for (emoji in emojis) {
131 | var tree: Node = root
132 | val chars = emoji.unicode.toCharArray()
133 | maxDepth = Math.max(maxDepth, chars.size)
134 | for (c in chars) {
135 | if (!tree.hasChild(c)) {
136 | tree.addChild(c)
137 | }
138 | tree = tree.getChild(c)?: break
139 | }
140 | tree.emoji = emoji
141 | }
142 | this.maxDepth = maxDepth
143 | }
144 | }
--------------------------------------------------------------------------------
/src/com/vdurmont/emoji/Fitzpatrick.kt:
--------------------------------------------------------------------------------
1 | package com.vdurmont.emoji
2 |
3 | /**
4 | * Enum that represents the Fitzpatrick modifiers supported by the emojis.
5 | * @author Improver: Ivan Ivanov [https://vk.com/irisism]
6 | * Creator: Vincent DURMONT [vdurmont@gmail.com]
7 | */
8 | enum class Fitzpatrick(
9 | /**
10 | * The unicode representation of the Fitzpatrick modifier
11 | */
12 | val unicode: String
13 | ) {
14 | /**
15 | * Fitzpatrick modifier of type 1/2 (pale white/white)
16 | */
17 | TYPE_1_2("\uD83C\uDFFB"),
18 |
19 | /**
20 | * Fitzpatrick modifier of type 3 (cream white)
21 | */
22 | TYPE_3("\uD83C\uDFFC"),
23 |
24 | /**
25 | * Fitzpatrick modifier of type 4 (moderate brown)
26 | */
27 | TYPE_4("\uD83C\uDFFD"),
28 |
29 | /**
30 | * Fitzpatrick modifier of type 5 (dark brown)
31 | */
32 | TYPE_5("\uD83C\uDFFE"),
33 |
34 | /**
35 | * Fitzpatrick modifier of type 6 (black)
36 | */
37 | TYPE_6("\uD83C\uDFFF");
38 |
39 | companion object {
40 | fun fitzpatrickFromUnicode(unicode: String): Fitzpatrick? {
41 | for (v in values()) {
42 | if (v.unicode == unicode) {
43 | return v
44 | }
45 | }
46 | return null
47 | }
48 |
49 | fun fitzpatrickFromType(type: String): Fitzpatrick? {
50 | return try {
51 | valueOf(type.toUpperCase())
52 | } catch (e: IllegalArgumentException) {
53 | null
54 | }
55 | }
56 |
57 | fun find(chars: CharArray, start: Int): Fitzpatrick? {
58 | if (chars.size < start + 1) return null
59 | var ch = chars[start]
60 | if (ch != '\uD83C') return null
61 | ch = chars[start + 1]
62 | when (ch) {
63 | '\uDFFB' -> return TYPE_1_2
64 | '\uDFFC' -> return TYPE_3
65 | '\uDFFD' -> return TYPE_4
66 | '\uDFFE' -> return TYPE_5
67 | '\uDFFF' -> return TYPE_6
68 | }
69 | return null
70 | }
71 | }
72 |
73 | }
--------------------------------------------------------------------------------
/src/com/vdurmont/emoji/Gender.kt:
--------------------------------------------------------------------------------
1 | package com.vdurmont.emoji
2 |
3 | /**
4 | * Создано 07.07.2020
5 | * @author Improver: Ivan Ivanov [https://vk.com/irisism]
6 | */
7 | enum class Gender(val unicode: String) {
8 | MALE("♂️"), FEMALE("♀️");
9 |
10 | companion object {
11 | fun genderFromUnicode(unicode: String): Gender? {
12 | for (v in values()) {
13 | if (v.unicode == unicode) {
14 | return v
15 | }
16 | }
17 | return null
18 | }
19 |
20 | fun genderFromType(type: String): Gender? {
21 | return try {
22 | valueOf(type.toUpperCase())
23 | } catch (e: IllegalArgumentException) {
24 | null
25 | }
26 | }
27 |
28 | fun find(chars: CharArray, startPos: Int): Gender? {
29 | if (startPos >= chars.size) return null
30 | val ch = chars[startPos]
31 | when (ch) {
32 | '♂' -> return MALE
33 | '♀' -> return FEMALE
34 | }
35 | return null
36 | }
37 | }
38 |
39 | }
--------------------------------------------------------------------------------