');
36 | div.attr('id', 'g' + i);
37 | div.click(click_handler);
38 | cell.append(div);
39 | row.append(cell);
40 | grid_elements.push(div);
41 | }
42 | }
43 |
44 | function cp_display(cp) {
45 | var diagonal = pointToDiagonal(cp);
46 |
47 | if (diagonal != null) {
48 | return diagonal;
49 | }
50 |
51 | var substitute = pointToSubstitute(data, cp);
52 |
53 | if (substitute != null) {
54 | return substitute;
55 | }
56 |
57 | return pointToString(cp);
58 | }
59 |
60 | function update_grid() {
61 | grid_elements.forEach(function(e, i) {
62 | var cp = grid_base + i;
63 | e.text(cp_display(cp));
64 | e.removeClass("like_emoji");
65 | e.removeClass("like_C0");
66 | e.removeClass("like_C1");
67 | e.removeClass("like_space");
68 | if (like_emoji(cp))
69 | e.addClass("like_emoji");
70 | if (like_C0(cp))
71 | e.addClass("like_C0");
72 | if (like_C1(cp))
73 | e.addClass("like_C1");
74 | if (like_space(cp))
75 | e.addClass("like_space");
76 | });
77 | }
78 |
79 | function update_info() {
80 | var cp = current_cp;
81 | $('#cp').text(pointToYouPlus(cp));
82 | $('#big').val(cp_display(cp));
83 | $("#big, #goto_char").removeClass("like_emoji");
84 | $("#big, #goto_char").removeClass("like_C0");
85 | $("#big, #goto_char").removeClass("like_C1");
86 | $("#big, #goto_char").removeClass("like_space");
87 | if (like_emoji(cp))
88 | $("#big, #goto_char").addClass("like_emoji");
89 | if (like_C0(cp))
90 | $("#big, #goto_char").addClass("like_C0");
91 | if (like_C1(cp))
92 | $("#big, #goto_char").addClass("like_C1");
93 | if (like_space(cp))
94 | $("#big, #goto_char").addClass("like_space");
95 | if (!data_ready)
96 | return;
97 | document.title = pointToYouPlus(cp) + ' ' + get_data(cp, 'name');
98 | for (var x in data_defaults)
99 | $('#data_' + x).text(get_data(cp, x));
100 | }
101 |
102 | function set_hash(cp) {
103 | location.hash = toHexadecimal(cp);
104 | }
105 |
106 | function set_hash_text(text, field) {
107 | if (text.length == 0)
108 | return;
109 | else
110 | var cp = stringToPoint(text);
111 | if (field !== void 0) {
112 | $(field).val(cp_display(cp));
113 | yield_then_select(field);
114 | }
115 | set_hash(cp);
116 | }
117 |
118 | function yield_then_select(field) {
119 | if (!$(field).data("composing")) {
120 | setTimeout(function() {
121 | $(field).select();
122 | }, 0);
123 | }
124 | }
125 |
126 | function replace(cp) {
127 | location.replace("#" + toHexadecimal(cp));
128 | }
129 |
130 | function hashchange_handler() {
131 | var cp = parseInt(location.hash.slice(1), 16);
132 | if (isNaN(cp) || cp < 0 || cp > 0x10ffff)
133 | if (current_cp == undefined)
134 | return replace(0);
135 | else
136 | return replace(current_cp);
137 | if (location.hash.slice(1) != toHexadecimal(cp))
138 | return replace(cp);
139 | current_cp = cp;
140 | var new_grid_base = cp - cp % 256;
141 | if (new_grid_base != grid_base) {
142 | grid_base = new_grid_base;
143 | update_grid();
144 | }
145 | $('#goto_hex').val(toHexadecimal(cp));
146 | $('#goto_dec').val(cp);
147 | $('#goto_char').val(cp_display(cp));
148 | $('#grid td').removeClass('selected');
149 | grid_elements[cp % 256].parent().addClass('selected');
150 | update_info();
151 | }
152 |
153 | function click_handler() {
154 | var i = parseInt(this.id.substr(1));
155 | set_hash(grid_base + i);
156 | }
157 |
158 | function load_data() {
159 | $('#loading_noscript').hide();
160 | $('#loading_files').show();
161 | fetchAllData().then(result => {
162 | data = result;
163 | data_ready = true;
164 | $('#loading').hide();
165 | $('#ui').show();
166 | update_grid();
167 | update_info();
168 | });
169 | }
170 |
171 | function get_data(cp, prop) {
172 | if (!data || !(prop in data)) {
173 | var substitute = data_defaults[prop];
174 | if (typeof substitute == "function")
175 | return substitute(cp);
176 | return substitute;
177 | }
178 | if (
179 | prop == "name"
180 | || prop == "gc"
181 | || prop == "block"
182 | || prop == "age"
183 | || prop == "uhman"
184 | ) {
185 | var result = prop == "name"
186 | ? getOldName(data, cp)
187 | : getString(data, prop, cp);
188 | if (result != null) {
189 | return result;
190 | }
191 |
192 | var substitute = data_defaults[prop];
193 | if (typeof substitute == "function")
194 | return substitute(cp);
195 | return substitute;
196 | }
197 | throw new Error;
198 | }
199 |
200 | function get_clipboard(event) {
201 | if ("clipboardData" in event) {
202 | return event.clipboardData;
203 | }
204 | if ("originalEvent" in event) {
205 | return get_clipboard(event.originalEvent);
206 | }
207 | if ("clipboardData" in window) {
208 | return window.clipboardData;
209 | }
210 | return null;
211 | }
212 |
213 | function like_emoji(cp) {
214 | return data ? isEmojiPresentation(data, cp) : false;
215 | }
216 |
217 | function like_space(cp) {
218 | return data ? isSpaceSeparator(data, cp) : false;
219 | }
220 |
221 | function is_C0(cp) {
222 | return cp < 0x0020;
223 | }
224 |
225 | function like_C0(cp) {
226 | return is_C0(cp)
227 | || cp == 0x007F
228 | || cp == 0x2061
229 | || cp == 0x2062
230 | || cp == 0x2063
231 | || cp == 0x2064
232 | || cp >= 0xE0020 && cp < 0xE007F;
233 | }
234 |
235 | function is_C1(cp) {
236 | return cp >= 0x0080 && cp < 0x00A0;
237 | }
238 |
239 | function like_C1(cp) {
240 | return is_C1(cp)
241 | || cp == 0x00AD
242 | || cp == 0x034F
243 | || cp == 0x061C
244 | || cp >= 0x180B && cp < 0x180F
245 | || cp >= 0x200B && cp < 0x2010
246 | || cp >= 0x2028 && cp < 0x202F
247 | || cp >= 0x2060 && cp < 0x2061
248 | || cp >= 0x2066 && cp < 0x2070
249 | || cp >= 0x3164 && cp < 0x3165
250 | || cp >= 0xFFA0 && cp < 0xFFA1
251 | || cp >= 0xFFF9 && cp < 0xFFFD
252 | || cp >= 0xFE00 && cp < 0xFE10
253 | || cp >= 0xFEFF && cp < 0xFF00
254 | || cp >= 0xE0001 && cp < 0xE0002
255 | || cp >= 0xE007F && cp < 0xE0080
256 | || cp >= 0xE0100 && cp < 0xE01F0;
257 | }
258 |
259 | init_grid();
260 | hashchange_handler();
261 | load_data();
262 |
263 | $(window).on('hashchange', hashchange_handler);
264 | $(window).keydown(function(e) {
265 | if (e.metaKey || e.shiftKey || e.ctrlKey || e.altKey)
266 | return;
267 | switch (e.keyCode) {
268 | case 33: // page up
269 | set_hash(current_cp - 256); break;
270 | case 34: // page down
271 | set_hash(current_cp + 256); break;
272 | case 35: // end
273 | if (current_cp == grid_base + 255)
274 | set_hash(0x10ffff);
275 | else
276 | set_hash(grid_base + 255);
277 | break;
278 | case 36: // home
279 | if (current_cp == grid_base)
280 | set_hash(0);
281 | else
282 | set_hash(grid_base);
283 | break;
284 | case 37: // left arrow
285 | set_hash(current_cp - 1); break;
286 | case 38: // up arrow
287 | set_hash(current_cp - 16); break;
288 | case 39: // right arrow
289 | set_hash(current_cp + 1); break;
290 | case 40: // down arrow
291 | set_hash(current_cp + 16); break;
292 | }
293 | });
294 | $('input').keydown(function(e) {
295 | e.stopPropagation();
296 | });
297 | $('#ui_tabs a').click(function(e) {
298 | $('#ui_content > div').hide();
299 | $('#' + this.id.substr(4)).show();
300 | $('#ui_tabs a').removeClass('selected');
301 | $(this).addClass('selected');
302 | e.preventDefault();
303 | });
304 | $('#ucd_version').text(ucd_version);
305 | $('#search_form, #search_han').on('change keydown paste input submit', function(e) {
306 | var q = $('#search_query').val().toUpperCase();
307 | var sr = $('#search_results');
308 | if (!q.length)
309 | return;
310 | sr.empty();
311 | var han = $("#search_han").is(":checked");
312 | for (var n = 0, i = 0; n < 50 && i < 0x110000; i++) {
313 | if (!han && kDefinitionExists(data, i))
314 | continue;
315 | var name = getOldName(data, i);
316 | if (name == null)
317 | continue;
318 | if (name.toUpperCase().indexOf(q) > -1) {
319 | n++;
320 | sr.append($("
")
321 | .text(pointToYouPlus(i) + "\u2001" + name)
322 | .click(set_hash.bind(null, i)));
323 | }
324 | }
325 | e.preventDefault();
326 | });
327 | $('#goto_hex').on('change keydown paste input', function() {
328 | if (this.value.length == 0)
329 | return;
330 | set_hash(parseInt(this.value, 16));
331 | });
332 | $('#goto_dec').on('change keydown paste input', function() {
333 | if (this.value.length == 0)
334 | return;
335 | set_hash(parseInt(this.value, 10));
336 | });
337 |
338 | $("#big, #goto_char")
339 | .on("cut copy", function(event) {
340 | event.preventDefault();
341 | var text = pointToString(current_cp);
342 | get_clipboard(event).setData("text", text);
343 | })
344 | .on("paste", function(event) {
345 | event.preventDefault();
346 | var text = get_clipboard(event).getData("text");
347 | set_hash_text(text, this);
348 | })
349 | .on("compositionstart", function() {
350 | $(this).data("composing", true);
351 | })
352 | .on("compositionend", function() {
353 | $(this).data("composing", false);
354 | yield_then_select(this);
355 | })
356 | .on("input", function(event) {
357 | set_hash_text(this.value, this);
358 | })
359 | .on("focus", function(event) {
360 | $(this).select();
361 | });
362 |
--------------------------------------------------------------------------------
/src/search.worker.ts:
--------------------------------------------------------------------------------
1 | import "core-js/stable";
2 | import "regenerator-runtime/runtime";
3 |
4 | import {
5 | Data,
6 | findSequenceIndex,
7 | getAliasCount,
8 | getAliasType,
9 | getAliasValue,
10 | getNameExceptNr2,
11 | getNextClusterBreak,
12 | getSequenceNames,
13 | getSequencePoints,
14 | getString,
15 | hasAnyAlias,
16 | hasAnyNameExceptNr2,
17 | hasAnyUhdef,
18 | } from "./data";
19 | import { pointToString, stringToPoint, stringToPoints } from "./encoding";
20 | import { toHexadecimal, toDecimal } from "./formatting";
21 | import { KeyedSearchResult, SearchResult } from "./search";
22 |
23 | // https://github.com/webpack-contrib/worker-loader/issues/94#issuecomment-449861198
24 | export default {} as typeof Worker & { new (): Worker };
25 |
26 | declare function postMessage(message: any): void;
27 |
28 | let cache: Data | null = null;
29 |
30 | function* searchByHexadecimal(query: string): Generator {
31 | const point = parseInt(query, 16);
32 |
33 | if (point != point || point < 0) {
34 | return;
35 | }
36 |
37 | if (toHexadecimal(point).length != query.length) {
38 | return;
39 | }
40 |
41 | if (point >= 0x110000) {
42 | return;
43 | }
44 |
45 | yield { key: `hex/${point}`, points: [point], reason: "hex", score: 0 };
46 | }
47 |
48 | function* searchByDecimal(query: string): Generator {
49 | const point = parseInt(query, 10);
50 |
51 | if (point != point || point < 0) {
52 | return;
53 | }
54 |
55 | if (toDecimal(point).length != query.length) {
56 | return;
57 | }
58 |
59 | if (point >= 0x110000) {
60 | return;
61 | }
62 |
63 | yield { key: `dec/${point}`, points: [point], reason: "dec", score: 0 };
64 | }
65 |
66 | function* searchByBreakdown(
67 | data: Data,
68 | query: string,
69 | graphemes: number,
70 | ): Generator {
71 | let context = getNextClusterBreak(data, query);
72 | if (context == null) return;
73 |
74 | let graphemeCount = 0;
75 | let pointCount = 0;
76 | let i = context.startPointIndex;
77 | while ((context = getNextClusterBreak(data, query, context)) != null) {
78 | for (const pointish of query.slice(i, context.startUnitIndex)) {
79 | const point = stringToPoint(pointish);
80 |
81 | if (point != null) {
82 | yield {
83 | key: `breakdown/${pointCount++}/${point}`,
84 | points: [point],
85 | reason: "breakdown",
86 | score: 0,
87 | };
88 | }
89 | }
90 |
91 | i = context.startUnitIndex;
92 | if (++graphemeCount >= graphemes) {
93 | return;
94 | }
95 | }
96 | }
97 |
98 | function* searchBySequenceValue(
99 | data: Data,
100 | query: string,
101 | ): Generator {
102 | if (query.length == 0) return;
103 | if (query.length == pointToString(stringToPoint(query)!).length) return;
104 |
105 | const points = stringToPoints(query);
106 | const sequenceIndex = findSequenceIndex(data, points);
107 | if (sequenceIndex == null) return;
108 |
109 | yield {
110 | key: `sequenceValue/${points.join("+")}`,
111 | points,
112 | reason: "sequenceValue",
113 | sequenceIndex,
114 | score: 0,
115 | };
116 | }
117 |
118 | function* searchBySequenceName(
119 | data: Data,
120 | query: string,
121 | ): Generator {
122 | if (query.length == 0) return;
123 | const upper = query.toUpperCase();
124 |
125 | for (let i = 0; i < data.info.sequenceCount; i++) {
126 | const sequenceNames = getSequenceNames(data, i);
127 | if (sequenceNames == null) continue;
128 |
129 | for (const [j, sequenceName] of sequenceNames.entries()) {
130 | const search = sequenceName.toUpperCase();
131 | if (search.includes(upper)) {
132 | const points = getSequencePoints(data, i)!;
133 | const [score, offset] = scoreMatch(search, upper);
134 | yield {
135 | key: `sequenceName/${points.join("+")}`,
136 | points,
137 | reason: "sequenceName",
138 | sequenceIndex: i,
139 | sequenceNameIndex: j,
140 | score,
141 | offset,
142 | };
143 | }
144 | }
145 | }
146 | }
147 |
148 | function* searchByName(
149 | data: Data,
150 | query: string,
151 | ): Generator {
152 | const upper = query.toUpperCase();
153 |
154 | for (let page = 0; page < 0x1100; page++) {
155 | if (page % 0x100 == 0)
156 | performance.mark(`sBN ${Math.floor(page / 0x100)} <`);
157 | if (page % 0x100 == 0xff)
158 | performance.mark(`sBN ${Math.floor(page / 0x100)} >`);
159 | if (!hasAnyNameExceptNr2(data, page)) continue;
160 |
161 | for (let point = page * 0x100; point < (page + 1) * 0x100; point++) {
162 | const name = getNameExceptNr2(data, point);
163 | if (name == null) continue;
164 |
165 | const search = name.toUpperCase();
166 | if (search.includes(upper)) {
167 | const [score, offset] = scoreMatch(search, upper);
168 | yield {
169 | key: `nameish/${point}`,
170 | points: [point],
171 | reason: "name",
172 | score,
173 | offset,
174 | };
175 | }
176 | }
177 | }
178 |
179 | for (let i = 0; i < 17; i++)
180 | performance.measure(`sBN ${i}`, `sBN ${i} <`, `sBN ${i} >`);
181 | }
182 |
183 | function* searchByNameAlias(
184 | data: Data,
185 | query: string,
186 | ): Generator {
187 | const upper = query.toUpperCase();
188 | let aliasIndex = 0;
189 |
190 | for (let page = 0; page < 0x1100; page++) {
191 | if (page % 0x100 == 0)
192 | performance.mark(`sBNA ${Math.floor(page / 0x100)} <`);
193 | if (page % 0x100 == 0xff)
194 | performance.mark(`sBNA ${Math.floor(page / 0x100)} >`);
195 | if (!hasAnyAlias(data, page)) continue;
196 |
197 | for (let point = page * 0x100; point < (page + 1) * 0x100; point++) {
198 | const aliasCount = getAliasCount(data, point);
199 | for (let i = 0; i < aliasCount; i++, aliasIndex++) {
200 | const name = getAliasValue(data, aliasIndex)!;
201 | const type = getAliasType(data, aliasIndex)!;
202 |
203 | const search = name.toUpperCase();
204 | if (search.includes(upper)) {
205 | const [score, offset] = scoreMatch(search, upper);
206 | yield {
207 | key: `nameish/${point}`,
208 | points: [point],
209 | reason: "alias",
210 | aliasIndex,
211 | aliasType: type,
212 | score,
213 | offset,
214 | };
215 | }
216 | }
217 | }
218 | }
219 |
220 | for (let i = 0; i < 17; i++)
221 | performance.measure(`sBNA ${i}`, `sBNA ${i} <`, `sBNA ${i} >`);
222 | }
223 |
224 | function* searchByUhdef(
225 | data: Data,
226 | query: string,
227 | ): Generator {
228 | const upper = query.toUpperCase();
229 |
230 | for (let page = 0; page < 0x1100; page++) {
231 | if (page % 0x100 == 0)
232 | performance.mark(`sBU ${Math.floor(page / 0x100)} <`);
233 | if (page % 0x100 == 0xff)
234 | performance.mark(`sBU ${Math.floor(page / 0x100)} >`);
235 | if (!hasAnyUhdef(data, page)) continue;
236 |
237 | for (let point = page * 0x100; point < (page + 1) * 0x100; point++) {
238 | const uhdef = getString(data, "uhdef", point);
239 | if (uhdef == null) continue;
240 |
241 | const search = uhdef.toUpperCase();
242 | if (search.includes(upper)) {
243 | const [score, offset] = scoreMatch(search, upper);
244 | yield {
245 | key: `uhdef/${point}`,
246 | points: [point],
247 | reason: "uhdef",
248 | score,
249 | offset,
250 | };
251 | }
252 | }
253 | }
254 |
255 | for (let i = 0; i < 17; i++)
256 | performance.measure(`sBU ${i}`, `sBU ${i} <`, `sBU ${i} >`);
257 | }
258 |
259 | function scoreMatch(haystack: string, needle: string): [number, number] {
260 | let resultScore = 0;
261 | let resultOffset = haystack.indexOf(needle);
262 |
263 | // prettier-ignore
264 | {
265 | // count each kind of match only once, and use offset of best match
266 | check(1, haystack.endsWith(needle), x => x, () => haystack.length - needle.length)
267 | || check(1, haystack.indexOf(`${needle} `), x => x != -1, x => x);
268 | check(2, haystack.startsWith(needle), x => x, () => 0)
269 | || check(2, haystack.indexOf(` ${needle}`), x => x != -1, x => x + 1);
270 | check(4, haystack.startsWith(`${needle} `), x => x, () => 0)
271 | || check(4, haystack.endsWith(` ${needle}`), x => x, () => haystack.length - needle.length)
272 | || check(4, haystack.indexOf(` ${needle} `), x => x != -1, x => x + 1);
273 | check(8, haystack == needle, x => x, () => 0);
274 | }
275 |
276 | return [resultScore, resultOffset];
277 |
278 | function check(
279 | score: number,
280 | result: any,
281 | pred: (_: any) => boolean,
282 | offset: (_: any) => number,
283 | ): boolean {
284 | if (pred(result)) {
285 | resultScore += score;
286 | resultOffset = offset(result);
287 | return true;
288 | } else {
289 | return false;
290 | }
291 | }
292 | }
293 |
294 | function sortByScore(results: KeyedSearchResult[]): KeyedSearchResult[] {
295 | // sort by score descending, then by point ascending
296 | return results.sort((p, q) => q.score - p.score || comparePoints(p, q));
297 | }
298 |
299 | function dedupResults(results: KeyedSearchResult[]): KeyedSearchResult[] {
300 | // sort by point ascending, then by score descending, then keep best result for each key
301 | return results
302 | .sort((p, q) => comparePoints(p, q) || q.score - p.score)
303 | .filter((x, i, xs) => x.key != xs[i - 1]?.key);
304 | }
305 |
306 | function comparePoints(p: SearchResult, q: SearchResult): number {
307 | for (let i = 0; i < Math.min(p.points.length, q.points.length); i++)
308 | if (p.points[i] != q.points[i]) return p.points[i] - q.points[i];
309 | return p.points.length - q.points.length;
310 | }
311 |
312 | addEventListener("message", ({ data: { data = cache, query } }) => {
313 | const result: KeyedSearchResult[] = [
314 | ...searchByHexadecimal(query),
315 | ...searchByDecimal(query),
316 | ...searchBySequenceValue(data, query),
317 | // three graphemes allows checking for invisible characters between two visible characters
318 | ...searchByBreakdown(data, query, 3),
319 | ...sortByScore(
320 | dedupResults([
321 | ...searchByName(data, query),
322 | ...searchByNameAlias(data, query),
323 | ...searchBySequenceName(data, query),
324 | ]),
325 | ),
326 | ...sortByScore([...searchByUhdef(data, query)]),
327 | ];
328 |
329 | cache = data;
330 | postMessage(result);
331 | });
332 |
--------------------------------------------------------------------------------
/data/Blocks.txt:
--------------------------------------------------------------------------------
1 | # Blocks-16.0.0.txt
2 | # Date: 2024-02-02
3 | # © 2024 Unicode®, Inc.
4 | # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
5 | # For terms of use and license, see https://www.unicode.org/terms_of_use.html
6 | #
7 | # Unicode Character Database
8 | # For documentation, see https://www.unicode.org/reports/tr44/
9 | #
10 | # Format:
11 | # Start Code..End Code; Block Name
12 |
13 | # ================================================
14 |
15 | # Note: When comparing block names, casing, whitespace, hyphens,
16 | # and underbars are ignored.
17 | # For example, "Latin Extended-A" and "latin extended a" are equivalent.
18 | # For more information on the comparison of property values,
19 | # see UAX #44: https://www.unicode.org/reports/tr44/
20 | #
21 | # All block ranges start with a value where (cp MOD 16) = 0,
22 | # and end with a value where (cp MOD 16) = 15. In other words,
23 | # the last hexadecimal digit of the start of range is ...0
24 | # and the last hexadecimal digit of the end of range is ...F.
25 | # This constraint on block ranges guarantees that allocations
26 | # are done in terms of whole columns, and that code chart display
27 | # never involves splitting columns in the charts.
28 | #
29 | # All code points not explicitly listed for Block
30 | # have the value No_Block.
31 |
32 | # Property: Block
33 | #
34 | # @missing: 0000..10FFFF; No_Block
35 |
36 | 0000..007F; Basic Latin
37 | 0080..00FF; Latin-1 Supplement
38 | 0100..017F; Latin Extended-A
39 | 0180..024F; Latin Extended-B
40 | 0250..02AF; IPA Extensions
41 | 02B0..02FF; Spacing Modifier Letters
42 | 0300..036F; Combining Diacritical Marks
43 | 0370..03FF; Greek and Coptic
44 | 0400..04FF; Cyrillic
45 | 0500..052F; Cyrillic Supplement
46 | 0530..058F; Armenian
47 | 0590..05FF; Hebrew
48 | 0600..06FF; Arabic
49 | 0700..074F; Syriac
50 | 0750..077F; Arabic Supplement
51 | 0780..07BF; Thaana
52 | 07C0..07FF; NKo
53 | 0800..083F; Samaritan
54 | 0840..085F; Mandaic
55 | 0860..086F; Syriac Supplement
56 | 0870..089F; Arabic Extended-B
57 | 08A0..08FF; Arabic Extended-A
58 | 0900..097F; Devanagari
59 | 0980..09FF; Bengali
60 | 0A00..0A7F; Gurmukhi
61 | 0A80..0AFF; Gujarati
62 | 0B00..0B7F; Oriya
63 | 0B80..0BFF; Tamil
64 | 0C00..0C7F; Telugu
65 | 0C80..0CFF; Kannada
66 | 0D00..0D7F; Malayalam
67 | 0D80..0DFF; Sinhala
68 | 0E00..0E7F; Thai
69 | 0E80..0EFF; Lao
70 | 0F00..0FFF; Tibetan
71 | 1000..109F; Myanmar
72 | 10A0..10FF; Georgian
73 | 1100..11FF; Hangul Jamo
74 | 1200..137F; Ethiopic
75 | 1380..139F; Ethiopic Supplement
76 | 13A0..13FF; Cherokee
77 | 1400..167F; Unified Canadian Aboriginal Syllabics
78 | 1680..169F; Ogham
79 | 16A0..16FF; Runic
80 | 1700..171F; Tagalog
81 | 1720..173F; Hanunoo
82 | 1740..175F; Buhid
83 | 1760..177F; Tagbanwa
84 | 1780..17FF; Khmer
85 | 1800..18AF; Mongolian
86 | 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
87 | 1900..194F; Limbu
88 | 1950..197F; Tai Le
89 | 1980..19DF; New Tai Lue
90 | 19E0..19FF; Khmer Symbols
91 | 1A00..1A1F; Buginese
92 | 1A20..1AAF; Tai Tham
93 | 1AB0..1AFF; Combining Diacritical Marks Extended
94 | 1B00..1B7F; Balinese
95 | 1B80..1BBF; Sundanese
96 | 1BC0..1BFF; Batak
97 | 1C00..1C4F; Lepcha
98 | 1C50..1C7F; Ol Chiki
99 | 1C80..1C8F; Cyrillic Extended-C
100 | 1C90..1CBF; Georgian Extended
101 | 1CC0..1CCF; Sundanese Supplement
102 | 1CD0..1CFF; Vedic Extensions
103 | 1D00..1D7F; Phonetic Extensions
104 | 1D80..1DBF; Phonetic Extensions Supplement
105 | 1DC0..1DFF; Combining Diacritical Marks Supplement
106 | 1E00..1EFF; Latin Extended Additional
107 | 1F00..1FFF; Greek Extended
108 | 2000..206F; General Punctuation
109 | 2070..209F; Superscripts and Subscripts
110 | 20A0..20CF; Currency Symbols
111 | 20D0..20FF; Combining Diacritical Marks for Symbols
112 | 2100..214F; Letterlike Symbols
113 | 2150..218F; Number Forms
114 | 2190..21FF; Arrows
115 | 2200..22FF; Mathematical Operators
116 | 2300..23FF; Miscellaneous Technical
117 | 2400..243F; Control Pictures
118 | 2440..245F; Optical Character Recognition
119 | 2460..24FF; Enclosed Alphanumerics
120 | 2500..257F; Box Drawing
121 | 2580..259F; Block Elements
122 | 25A0..25FF; Geometric Shapes
123 | 2600..26FF; Miscellaneous Symbols
124 | 2700..27BF; Dingbats
125 | 27C0..27EF; Miscellaneous Mathematical Symbols-A
126 | 27F0..27FF; Supplemental Arrows-A
127 | 2800..28FF; Braille Patterns
128 | 2900..297F; Supplemental Arrows-B
129 | 2980..29FF; Miscellaneous Mathematical Symbols-B
130 | 2A00..2AFF; Supplemental Mathematical Operators
131 | 2B00..2BFF; Miscellaneous Symbols and Arrows
132 | 2C00..2C5F; Glagolitic
133 | 2C60..2C7F; Latin Extended-C
134 | 2C80..2CFF; Coptic
135 | 2D00..2D2F; Georgian Supplement
136 | 2D30..2D7F; Tifinagh
137 | 2D80..2DDF; Ethiopic Extended
138 | 2DE0..2DFF; Cyrillic Extended-A
139 | 2E00..2E7F; Supplemental Punctuation
140 | 2E80..2EFF; CJK Radicals Supplement
141 | 2F00..2FDF; Kangxi Radicals
142 | 2FF0..2FFF; Ideographic Description Characters
143 | 3000..303F; CJK Symbols and Punctuation
144 | 3040..309F; Hiragana
145 | 30A0..30FF; Katakana
146 | 3100..312F; Bopomofo
147 | 3130..318F; Hangul Compatibility Jamo
148 | 3190..319F; Kanbun
149 | 31A0..31BF; Bopomofo Extended
150 | 31C0..31EF; CJK Strokes
151 | 31F0..31FF; Katakana Phonetic Extensions
152 | 3200..32FF; Enclosed CJK Letters and Months
153 | 3300..33FF; CJK Compatibility
154 | 3400..4DBF; CJK Unified Ideographs Extension A
155 | 4DC0..4DFF; Yijing Hexagram Symbols
156 | 4E00..9FFF; CJK Unified Ideographs
157 | A000..A48F; Yi Syllables
158 | A490..A4CF; Yi Radicals
159 | A4D0..A4FF; Lisu
160 | A500..A63F; Vai
161 | A640..A69F; Cyrillic Extended-B
162 | A6A0..A6FF; Bamum
163 | A700..A71F; Modifier Tone Letters
164 | A720..A7FF; Latin Extended-D
165 | A800..A82F; Syloti Nagri
166 | A830..A83F; Common Indic Number Forms
167 | A840..A87F; Phags-pa
168 | A880..A8DF; Saurashtra
169 | A8E0..A8FF; Devanagari Extended
170 | A900..A92F; Kayah Li
171 | A930..A95F; Rejang
172 | A960..A97F; Hangul Jamo Extended-A
173 | A980..A9DF; Javanese
174 | A9E0..A9FF; Myanmar Extended-B
175 | AA00..AA5F; Cham
176 | AA60..AA7F; Myanmar Extended-A
177 | AA80..AADF; Tai Viet
178 | AAE0..AAFF; Meetei Mayek Extensions
179 | AB00..AB2F; Ethiopic Extended-A
180 | AB30..AB6F; Latin Extended-E
181 | AB70..ABBF; Cherokee Supplement
182 | ABC0..ABFF; Meetei Mayek
183 | AC00..D7AF; Hangul Syllables
184 | D7B0..D7FF; Hangul Jamo Extended-B
185 | D800..DB7F; High Surrogates
186 | DB80..DBFF; High Private Use Surrogates
187 | DC00..DFFF; Low Surrogates
188 | E000..F8FF; Private Use Area
189 | F900..FAFF; CJK Compatibility Ideographs
190 | FB00..FB4F; Alphabetic Presentation Forms
191 | FB50..FDFF; Arabic Presentation Forms-A
192 | FE00..FE0F; Variation Selectors
193 | FE10..FE1F; Vertical Forms
194 | FE20..FE2F; Combining Half Marks
195 | FE30..FE4F; CJK Compatibility Forms
196 | FE50..FE6F; Small Form Variants
197 | FE70..FEFF; Arabic Presentation Forms-B
198 | FF00..FFEF; Halfwidth and Fullwidth Forms
199 | FFF0..FFFF; Specials
200 | 10000..1007F; Linear B Syllabary
201 | 10080..100FF; Linear B Ideograms
202 | 10100..1013F; Aegean Numbers
203 | 10140..1018F; Ancient Greek Numbers
204 | 10190..101CF; Ancient Symbols
205 | 101D0..101FF; Phaistos Disc
206 | 10280..1029F; Lycian
207 | 102A0..102DF; Carian
208 | 102E0..102FF; Coptic Epact Numbers
209 | 10300..1032F; Old Italic
210 | 10330..1034F; Gothic
211 | 10350..1037F; Old Permic
212 | 10380..1039F; Ugaritic
213 | 103A0..103DF; Old Persian
214 | 10400..1044F; Deseret
215 | 10450..1047F; Shavian
216 | 10480..104AF; Osmanya
217 | 104B0..104FF; Osage
218 | 10500..1052F; Elbasan
219 | 10530..1056F; Caucasian Albanian
220 | 10570..105BF; Vithkuqi
221 | 105C0..105FF; Todhri
222 | 10600..1077F; Linear A
223 | 10780..107BF; Latin Extended-F
224 | 10800..1083F; Cypriot Syllabary
225 | 10840..1085F; Imperial Aramaic
226 | 10860..1087F; Palmyrene
227 | 10880..108AF; Nabataean
228 | 108E0..108FF; Hatran
229 | 10900..1091F; Phoenician
230 | 10920..1093F; Lydian
231 | 10980..1099F; Meroitic Hieroglyphs
232 | 109A0..109FF; Meroitic Cursive
233 | 10A00..10A5F; Kharoshthi
234 | 10A60..10A7F; Old South Arabian
235 | 10A80..10A9F; Old North Arabian
236 | 10AC0..10AFF; Manichaean
237 | 10B00..10B3F; Avestan
238 | 10B40..10B5F; Inscriptional Parthian
239 | 10B60..10B7F; Inscriptional Pahlavi
240 | 10B80..10BAF; Psalter Pahlavi
241 | 10C00..10C4F; Old Turkic
242 | 10C80..10CFF; Old Hungarian
243 | 10D00..10D3F; Hanifi Rohingya
244 | 10D40..10D8F; Garay
245 | 10E60..10E7F; Rumi Numeral Symbols
246 | 10E80..10EBF; Yezidi
247 | 10EC0..10EFF; Arabic Extended-C
248 | 10F00..10F2F; Old Sogdian
249 | 10F30..10F6F; Sogdian
250 | 10F70..10FAF; Old Uyghur
251 | 10FB0..10FDF; Chorasmian
252 | 10FE0..10FFF; Elymaic
253 | 11000..1107F; Brahmi
254 | 11080..110CF; Kaithi
255 | 110D0..110FF; Sora Sompeng
256 | 11100..1114F; Chakma
257 | 11150..1117F; Mahajani
258 | 11180..111DF; Sharada
259 | 111E0..111FF; Sinhala Archaic Numbers
260 | 11200..1124F; Khojki
261 | 11280..112AF; Multani
262 | 112B0..112FF; Khudawadi
263 | 11300..1137F; Grantha
264 | 11380..113FF; Tulu-Tigalari
265 | 11400..1147F; Newa
266 | 11480..114DF; Tirhuta
267 | 11580..115FF; Siddham
268 | 11600..1165F; Modi
269 | 11660..1167F; Mongolian Supplement
270 | 11680..116CF; Takri
271 | 116D0..116FF; Myanmar Extended-C
272 | 11700..1174F; Ahom
273 | 11800..1184F; Dogra
274 | 118A0..118FF; Warang Citi
275 | 11900..1195F; Dives Akuru
276 | 119A0..119FF; Nandinagari
277 | 11A00..11A4F; Zanabazar Square
278 | 11A50..11AAF; Soyombo
279 | 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
280 | 11AC0..11AFF; Pau Cin Hau
281 | 11B00..11B5F; Devanagari Extended-A
282 | 11BC0..11BFF; Sunuwar
283 | 11C00..11C6F; Bhaiksuki
284 | 11C70..11CBF; Marchen
285 | 11D00..11D5F; Masaram Gondi
286 | 11D60..11DAF; Gunjala Gondi
287 | 11EE0..11EFF; Makasar
288 | 11F00..11F5F; Kawi
289 | 11FB0..11FBF; Lisu Supplement
290 | 11FC0..11FFF; Tamil Supplement
291 | 12000..123FF; Cuneiform
292 | 12400..1247F; Cuneiform Numbers and Punctuation
293 | 12480..1254F; Early Dynastic Cuneiform
294 | 12F90..12FFF; Cypro-Minoan
295 | 13000..1342F; Egyptian Hieroglyphs
296 | 13430..1345F; Egyptian Hieroglyph Format Controls
297 | 13460..143FF; Egyptian Hieroglyphs Extended-A
298 | 14400..1467F; Anatolian Hieroglyphs
299 | 16100..1613F; Gurung Khema
300 | 16800..16A3F; Bamum Supplement
301 | 16A40..16A6F; Mro
302 | 16A70..16ACF; Tangsa
303 | 16AD0..16AFF; Bassa Vah
304 | 16B00..16B8F; Pahawh Hmong
305 | 16D40..16D7F; Kirat Rai
306 | 16E40..16E9F; Medefaidrin
307 | 16F00..16F9F; Miao
308 | 16FE0..16FFF; Ideographic Symbols and Punctuation
309 | 17000..187FF; Tangut
310 | 18800..18AFF; Tangut Components
311 | 18B00..18CFF; Khitan Small Script
312 | 18D00..18D7F; Tangut Supplement
313 | 1AFF0..1AFFF; Kana Extended-B
314 | 1B000..1B0FF; Kana Supplement
315 | 1B100..1B12F; Kana Extended-A
316 | 1B130..1B16F; Small Kana Extension
317 | 1B170..1B2FF; Nushu
318 | 1BC00..1BC9F; Duployan
319 | 1BCA0..1BCAF; Shorthand Format Controls
320 | 1CC00..1CEBF; Symbols for Legacy Computing Supplement
321 | 1CF00..1CFCF; Znamenny Musical Notation
322 | 1D000..1D0FF; Byzantine Musical Symbols
323 | 1D100..1D1FF; Musical Symbols
324 | 1D200..1D24F; Ancient Greek Musical Notation
325 | 1D2C0..1D2DF; Kaktovik Numerals
326 | 1D2E0..1D2FF; Mayan Numerals
327 | 1D300..1D35F; Tai Xuan Jing Symbols
328 | 1D360..1D37F; Counting Rod Numerals
329 | 1D400..1D7FF; Mathematical Alphanumeric Symbols
330 | 1D800..1DAAF; Sutton SignWriting
331 | 1DF00..1DFFF; Latin Extended-G
332 | 1E000..1E02F; Glagolitic Supplement
333 | 1E030..1E08F; Cyrillic Extended-D
334 | 1E100..1E14F; Nyiakeng Puachue Hmong
335 | 1E290..1E2BF; Toto
336 | 1E2C0..1E2FF; Wancho
337 | 1E4D0..1E4FF; Nag Mundari
338 | 1E5D0..1E5FF; Ol Onal
339 | 1E7E0..1E7FF; Ethiopic Extended-B
340 | 1E800..1E8DF; Mende Kikakui
341 | 1E900..1E95F; Adlam
342 | 1EC70..1ECBF; Indic Siyaq Numbers
343 | 1ED00..1ED4F; Ottoman Siyaq Numbers
344 | 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
345 | 1F000..1F02F; Mahjong Tiles
346 | 1F030..1F09F; Domino Tiles
347 | 1F0A0..1F0FF; Playing Cards
348 | 1F100..1F1FF; Enclosed Alphanumeric Supplement
349 | 1F200..1F2FF; Enclosed Ideographic Supplement
350 | 1F300..1F5FF; Miscellaneous Symbols and Pictographs
351 | 1F600..1F64F; Emoticons
352 | 1F650..1F67F; Ornamental Dingbats
353 | 1F680..1F6FF; Transport and Map Symbols
354 | 1F700..1F77F; Alchemical Symbols
355 | 1F780..1F7FF; Geometric Shapes Extended
356 | 1F800..1F8FF; Supplemental Arrows-C
357 | 1F900..1F9FF; Supplemental Symbols and Pictographs
358 | 1FA00..1FA6F; Chess Symbols
359 | 1FA70..1FAFF; Symbols and Pictographs Extended-A
360 | 1FB00..1FBFF; Symbols for Legacy Computing
361 | 20000..2A6DF; CJK Unified Ideographs Extension B
362 | 2A700..2B73F; CJK Unified Ideographs Extension C
363 | 2B740..2B81F; CJK Unified Ideographs Extension D
364 | 2B820..2CEAF; CJK Unified Ideographs Extension E
365 | 2CEB0..2EBEF; CJK Unified Ideographs Extension F
366 | 2EBF0..2EE5F; CJK Unified Ideographs Extension I
367 | 2F800..2FA1F; CJK Compatibility Ideographs Supplement
368 | 30000..3134F; CJK Unified Ideographs Extension G
369 | 31350..323AF; CJK Unified Ideographs Extension H
370 | E0000..E007F; Tags
371 | E0100..E01EF; Variation Selectors Supplement
372 | F0000..FFFFF; Supplementary Private Use Area-A
373 | 100000..10FFFF; Supplementary Private Use Area-B
374 |
375 | # EOF
376 |
--------------------------------------------------------------------------------
/data/Cargo.lock:
--------------------------------------------------------------------------------
1 | # This file is automatically @generated by Cargo.
2 | # It is not intended for manual editing.
3 | version = 4
4 |
5 | [[package]]
6 | name = "addr2line"
7 | version = "0.21.0"
8 | source = "registry+https://github.com/rust-lang/crates.io-index"
9 | checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb"
10 | dependencies = [
11 | "gimli",
12 | ]
13 |
14 | [[package]]
15 | name = "adler"
16 | version = "1.0.2"
17 | source = "registry+https://github.com/rust-lang/crates.io-index"
18 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
19 |
20 | [[package]]
21 | name = "aho-corasick"
22 | version = "1.1.3"
23 | source = "registry+https://github.com/rust-lang/crates.io-index"
24 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
25 | dependencies = [
26 | "memchr",
27 | ]
28 |
29 | [[package]]
30 | name = "backtrace"
31 | version = "0.3.71"
32 | source = "registry+https://github.com/rust-lang/crates.io-index"
33 | checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d"
34 | dependencies = [
35 | "addr2line",
36 | "cc",
37 | "cfg-if",
38 | "libc",
39 | "miniz_oxide",
40 | "object",
41 | "rustc-demangle",
42 | ]
43 |
44 | [[package]]
45 | name = "bon"
46 | version = "3.4.0"
47 | source = "registry+https://github.com/rust-lang/crates.io-index"
48 | checksum = "8a8a41e51fda5f7d87152d00f50d08ce24bf5cee8a962facf7f2526a66f8a5fa"
49 | dependencies = [
50 | "bon-macros",
51 | "rustversion",
52 | ]
53 |
54 | [[package]]
55 | name = "bon-macros"
56 | version = "3.4.0"
57 | source = "registry+https://github.com/rust-lang/crates.io-index"
58 | checksum = "6b592add4016ac26ca340298fed5cc2524abe8bacae78ebca3780286da588304"
59 | dependencies = [
60 | "darling",
61 | "ident_case",
62 | "prettyplease",
63 | "proc-macro2",
64 | "quote",
65 | "rustversion",
66 | "syn",
67 | ]
68 |
69 | [[package]]
70 | name = "byteorder"
71 | version = "1.5.0"
72 | source = "registry+https://github.com/rust-lang/crates.io-index"
73 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
74 |
75 | [[package]]
76 | name = "cc"
77 | version = "1.2.16"
78 | source = "registry+https://github.com/rust-lang/crates.io-index"
79 | checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c"
80 | dependencies = [
81 | "shlex",
82 | ]
83 |
84 | [[package]]
85 | name = "cfg-if"
86 | version = "1.0.0"
87 | source = "registry+https://github.com/rust-lang/crates.io-index"
88 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
89 |
90 | [[package]]
91 | name = "color-eyre"
92 | version = "0.6.3"
93 | source = "registry+https://github.com/rust-lang/crates.io-index"
94 | checksum = "55146f5e46f237f7423d74111267d4597b59b0dad0ffaf7303bce9945d843ad5"
95 | dependencies = [
96 | "backtrace",
97 | "color-spantrace",
98 | "eyre",
99 | "indenter",
100 | "once_cell",
101 | "owo-colors",
102 | "tracing-error",
103 | ]
104 |
105 | [[package]]
106 | name = "color-spantrace"
107 | version = "0.2.1"
108 | source = "registry+https://github.com/rust-lang/crates.io-index"
109 | checksum = "cd6be1b2a7e382e2b98b43b2adcca6bb0e465af0bdd38123873ae61eb17a72c2"
110 | dependencies = [
111 | "once_cell",
112 | "owo-colors",
113 | "tracing-core",
114 | "tracing-error",
115 | ]
116 |
117 | [[package]]
118 | name = "darling"
119 | version = "0.20.10"
120 | source = "registry+https://github.com/rust-lang/crates.io-index"
121 | checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989"
122 | dependencies = [
123 | "darling_core",
124 | "darling_macro",
125 | ]
126 |
127 | [[package]]
128 | name = "darling_core"
129 | version = "0.20.10"
130 | source = "registry+https://github.com/rust-lang/crates.io-index"
131 | checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5"
132 | dependencies = [
133 | "fnv",
134 | "ident_case",
135 | "proc-macro2",
136 | "quote",
137 | "strsim",
138 | "syn",
139 | ]
140 |
141 | [[package]]
142 | name = "darling_macro"
143 | version = "0.20.10"
144 | source = "registry+https://github.com/rust-lang/crates.io-index"
145 | checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806"
146 | dependencies = [
147 | "darling_core",
148 | "quote",
149 | "syn",
150 | ]
151 |
152 | [[package]]
153 | name = "data"
154 | version = "0.0.0"
155 | dependencies = [
156 | "bon",
157 | "byteorder",
158 | "color-eyre",
159 | "enumflags2",
160 | "nom",
161 | "regex",
162 | "serde",
163 | "serde_json",
164 | ]
165 |
166 | [[package]]
167 | name = "enumflags2"
168 | version = "0.7.11"
169 | source = "registry+https://github.com/rust-lang/crates.io-index"
170 | checksum = "ba2f4b465f5318854c6f8dd686ede6c0a9dc67d4b1ac241cf0eb51521a309147"
171 | dependencies = [
172 | "enumflags2_derive",
173 | ]
174 |
175 | [[package]]
176 | name = "enumflags2_derive"
177 | version = "0.7.11"
178 | source = "registry+https://github.com/rust-lang/crates.io-index"
179 | checksum = "fc4caf64a58d7a6d65ab00639b046ff54399a39f5f2554728895ace4b297cd79"
180 | dependencies = [
181 | "proc-macro2",
182 | "quote",
183 | "syn",
184 | ]
185 |
186 | [[package]]
187 | name = "eyre"
188 | version = "0.6.12"
189 | source = "registry+https://github.com/rust-lang/crates.io-index"
190 | checksum = "7cd915d99f24784cdc19fd37ef22b97e3ff0ae756c7e492e9fbfe897d61e2aec"
191 | dependencies = [
192 | "indenter",
193 | "once_cell",
194 | ]
195 |
196 | [[package]]
197 | name = "fnv"
198 | version = "1.0.7"
199 | source = "registry+https://github.com/rust-lang/crates.io-index"
200 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
201 |
202 | [[package]]
203 | name = "gimli"
204 | version = "0.28.1"
205 | source = "registry+https://github.com/rust-lang/crates.io-index"
206 | checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253"
207 |
208 | [[package]]
209 | name = "ident_case"
210 | version = "1.0.1"
211 | source = "registry+https://github.com/rust-lang/crates.io-index"
212 | checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
213 |
214 | [[package]]
215 | name = "indenter"
216 | version = "0.3.3"
217 | source = "registry+https://github.com/rust-lang/crates.io-index"
218 | checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683"
219 |
220 | [[package]]
221 | name = "itoa"
222 | version = "1.0.4"
223 | source = "registry+https://github.com/rust-lang/crates.io-index"
224 | checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
225 |
226 | [[package]]
227 | name = "lazy_static"
228 | version = "1.5.0"
229 | source = "registry+https://github.com/rust-lang/crates.io-index"
230 | checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
231 |
232 | [[package]]
233 | name = "libc"
234 | version = "0.2.171"
235 | source = "registry+https://github.com/rust-lang/crates.io-index"
236 | checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6"
237 |
238 | [[package]]
239 | name = "memchr"
240 | version = "2.7.4"
241 | source = "registry+https://github.com/rust-lang/crates.io-index"
242 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
243 |
244 | [[package]]
245 | name = "minimal-lexical"
246 | version = "0.2.1"
247 | source = "registry+https://github.com/rust-lang/crates.io-index"
248 | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
249 |
250 | [[package]]
251 | name = "miniz_oxide"
252 | version = "0.7.4"
253 | source = "registry+https://github.com/rust-lang/crates.io-index"
254 | checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08"
255 | dependencies = [
256 | "adler",
257 | ]
258 |
259 | [[package]]
260 | name = "nom"
261 | version = "7.1.3"
262 | source = "registry+https://github.com/rust-lang/crates.io-index"
263 | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
264 | dependencies = [
265 | "memchr",
266 | "minimal-lexical",
267 | ]
268 |
269 | [[package]]
270 | name = "object"
271 | version = "0.32.2"
272 | source = "registry+https://github.com/rust-lang/crates.io-index"
273 | checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441"
274 | dependencies = [
275 | "memchr",
276 | ]
277 |
278 | [[package]]
279 | name = "once_cell"
280 | version = "1.21.1"
281 | source = "registry+https://github.com/rust-lang/crates.io-index"
282 | checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc"
283 |
284 | [[package]]
285 | name = "owo-colors"
286 | version = "3.5.0"
287 | source = "registry+https://github.com/rust-lang/crates.io-index"
288 | checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f"
289 |
290 | [[package]]
291 | name = "pin-project-lite"
292 | version = "0.2.16"
293 | source = "registry+https://github.com/rust-lang/crates.io-index"
294 | checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b"
295 |
296 | [[package]]
297 | name = "prettyplease"
298 | version = "0.2.31"
299 | source = "registry+https://github.com/rust-lang/crates.io-index"
300 | checksum = "5316f57387668042f561aae71480de936257848f9c43ce528e311d89a07cadeb"
301 | dependencies = [
302 | "proc-macro2",
303 | "syn",
304 | ]
305 |
306 | [[package]]
307 | name = "proc-macro2"
308 | version = "1.0.94"
309 | source = "registry+https://github.com/rust-lang/crates.io-index"
310 | checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84"
311 | dependencies = [
312 | "unicode-ident",
313 | ]
314 |
315 | [[package]]
316 | name = "quote"
317 | version = "1.0.40"
318 | source = "registry+https://github.com/rust-lang/crates.io-index"
319 | checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
320 | dependencies = [
321 | "proc-macro2",
322 | ]
323 |
324 | [[package]]
325 | name = "regex"
326 | version = "1.11.1"
327 | source = "registry+https://github.com/rust-lang/crates.io-index"
328 | checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191"
329 | dependencies = [
330 | "aho-corasick",
331 | "memchr",
332 | "regex-automata",
333 | "regex-syntax",
334 | ]
335 |
336 | [[package]]
337 | name = "regex-automata"
338 | version = "0.4.9"
339 | source = "registry+https://github.com/rust-lang/crates.io-index"
340 | checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
341 | dependencies = [
342 | "aho-corasick",
343 | "memchr",
344 | "regex-syntax",
345 | ]
346 |
347 | [[package]]
348 | name = "regex-syntax"
349 | version = "0.8.5"
350 | source = "registry+https://github.com/rust-lang/crates.io-index"
351 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
352 |
353 | [[package]]
354 | name = "rustc-demangle"
355 | version = "0.1.24"
356 | source = "registry+https://github.com/rust-lang/crates.io-index"
357 | checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
358 |
359 | [[package]]
360 | name = "rustversion"
361 | version = "1.0.20"
362 | source = "registry+https://github.com/rust-lang/crates.io-index"
363 | checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2"
364 |
365 | [[package]]
366 | name = "ryu"
367 | version = "1.0.0"
368 | source = "registry+https://github.com/rust-lang/crates.io-index"
369 | checksum = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997"
370 |
371 | [[package]]
372 | name = "serde"
373 | version = "1.0.219"
374 | source = "registry+https://github.com/rust-lang/crates.io-index"
375 | checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
376 | dependencies = [
377 | "serde_derive",
378 | ]
379 |
380 | [[package]]
381 | name = "serde_derive"
382 | version = "1.0.219"
383 | source = "registry+https://github.com/rust-lang/crates.io-index"
384 | checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
385 | dependencies = [
386 | "proc-macro2",
387 | "quote",
388 | "syn",
389 | ]
390 |
391 | [[package]]
392 | name = "serde_json"
393 | version = "1.0.140"
394 | source = "registry+https://github.com/rust-lang/crates.io-index"
395 | checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
396 | dependencies = [
397 | "itoa",
398 | "memchr",
399 | "ryu",
400 | "serde",
401 | ]
402 |
403 | [[package]]
404 | name = "sharded-slab"
405 | version = "0.1.7"
406 | source = "registry+https://github.com/rust-lang/crates.io-index"
407 | checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
408 | dependencies = [
409 | "lazy_static",
410 | ]
411 |
412 | [[package]]
413 | name = "shlex"
414 | version = "1.3.0"
415 | source = "registry+https://github.com/rust-lang/crates.io-index"
416 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
417 |
418 | [[package]]
419 | name = "strsim"
420 | version = "0.11.1"
421 | source = "registry+https://github.com/rust-lang/crates.io-index"
422 | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
423 |
424 | [[package]]
425 | name = "syn"
426 | version = "2.0.100"
427 | source = "registry+https://github.com/rust-lang/crates.io-index"
428 | checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0"
429 | dependencies = [
430 | "proc-macro2",
431 | "quote",
432 | "unicode-ident",
433 | ]
434 |
435 | [[package]]
436 | name = "thread_local"
437 | version = "1.1.8"
438 | source = "registry+https://github.com/rust-lang/crates.io-index"
439 | checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
440 | dependencies = [
441 | "cfg-if",
442 | "once_cell",
443 | ]
444 |
445 | [[package]]
446 | name = "tracing"
447 | version = "0.1.41"
448 | source = "registry+https://github.com/rust-lang/crates.io-index"
449 | checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0"
450 | dependencies = [
451 | "pin-project-lite",
452 | "tracing-core",
453 | ]
454 |
455 | [[package]]
456 | name = "tracing-core"
457 | version = "0.1.33"
458 | source = "registry+https://github.com/rust-lang/crates.io-index"
459 | checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
460 | dependencies = [
461 | "once_cell",
462 | "valuable",
463 | ]
464 |
465 | [[package]]
466 | name = "tracing-error"
467 | version = "0.2.1"
468 | source = "registry+https://github.com/rust-lang/crates.io-index"
469 | checksum = "8b1581020d7a273442f5b45074a6a57d5757ad0a47dac0e9f0bd57b81936f3db"
470 | dependencies = [
471 | "tracing",
472 | "tracing-subscriber",
473 | ]
474 |
475 | [[package]]
476 | name = "tracing-subscriber"
477 | version = "0.3.19"
478 | source = "registry+https://github.com/rust-lang/crates.io-index"
479 | checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008"
480 | dependencies = [
481 | "sharded-slab",
482 | "thread_local",
483 | "tracing-core",
484 | ]
485 |
486 | [[package]]
487 | name = "unicode-ident"
488 | version = "1.0.5"
489 | source = "registry+https://github.com/rust-lang/crates.io-index"
490 | checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3"
491 |
492 | [[package]]
493 | name = "valuable"
494 | version = "0.1.1"
495 | source = "registry+https://github.com/rust-lang/crates.io-index"
496 | checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65"
497 |
--------------------------------------------------------------------------------
/src/data.ts:
--------------------------------------------------------------------------------
1 | import { EGCBREAK } from "../data/egcbreak";
2 |
3 | import { pointToYouPlus } from "./formatting";
4 | import { pointToString, stringToPoint, stringToPoints } from "./encoding";
5 |
6 | export type StringField =
7 | | "dnrp"
8 | | "gc"
9 | | "block"
10 | | "age"
11 | | "hjsn"
12 | | "uhdef"
13 | | "uhman";
14 |
15 | export interface Data {
16 | info: DataInfo;
17 |
18 | string: string[];
19 |
20 | bits: DataView;
21 | ebits: DataView;
22 | pagebits: DataView;
23 | name: DataView;
24 | aliasc: DataView;
25 | aliasi: DataView;
26 | aliass: DataView;
27 | aliast: DataView;
28 | dnrp: DataView;
29 | gb: DataView;
30 | gc: DataView;
31 | block: DataView;
32 | age: DataView;
33 | hlvt: DataView;
34 | hjsn: DataView;
35 | uhdef: DataView;
36 | uhman: DataView;
37 |
38 | seqb: DataView;
39 | seqp: DataView;
40 | seqn: DataView;
41 | }
42 |
43 | export interface DataInfo {
44 | sequenceBucketCount: number;
45 | sequenceCount: number;
46 | }
47 |
48 | export enum AliasType {
49 | Correction = 0,
50 | Control = 1,
51 | Alternate = 2,
52 | Figment = 3,
53 | Abbreviation = 4,
54 | Unicode1 = 5,
55 | Cldr = 6,
56 | }
57 |
58 | export interface SequenceBucket {
59 | start: number;
60 | len: number;
61 | }
62 |
63 | export enum GraphemeBreak {
64 | Cr = 1,
65 | Lf = 2,
66 | Control = 3,
67 | Extend = 4,
68 | Zwj = 5,
69 | RegionalIndicator = 6,
70 | Prepend = 7,
71 | SpacingMark = 8,
72 | HangulL = 9,
73 | HangulV = 10,
74 | HangulT = 11,
75 | HangulLV = 12,
76 | HangulLVT = 13,
77 | }
78 |
79 | // https://stackoverflow.com/q/51419176
80 | type KeyOfType = { [P in keyof T]: T[P] extends U ? P : never }[keyof T];
81 | type SparseMemberType = {
82 | method: KeyOfType number>;
83 | len: number;
84 | };
85 | const Uint8: SparseMemberType = { method: "getUint8", len: 1 };
86 | const Uint16: SparseMemberType = { method: "getUint16", len: 2 };
87 |
88 | function getSparse(
89 | ty: SparseMemberType,
90 | field: DataView,
91 | def: T,
92 | point: number,
93 | ): number | T {
94 | const page_offset = field.getUint16(Math.floor(point / 256) * 2);
95 | if (page_offset == 0xffff) return def;
96 |
97 | const offset = 8704 + (page_offset * 256 + (point % 256)) * ty.len;
98 | return field[ty.method](offset);
99 | }
100 |
101 | function getFlag(data: Data, shift: number, point: number): boolean {
102 | return !!((getSparse(Uint8, data.bits, 0, point) >> shift) & 1);
103 | }
104 |
105 | function getEmojiFlag(data: Data, shift: number, point: number): boolean {
106 | return !!((getSparse(Uint8, data.ebits, 0, point) >> shift) & 1);
107 | }
108 |
109 | function getPageFlag(data: Data, shift: number, page: number): boolean {
110 | return !!((data.pagebits.getUint8(page) >> shift) & 1);
111 | }
112 |
113 | /**
114 | * Returns the string value for the given point and field.
115 | *
116 | * Do not use this function for the name field. For names, choose a
117 | * semantics appropriate for the context, then define a higher-level
118 | * function that wraps getString0.
119 | */
120 | export function getString(
121 | data: Data,
122 | field: StringField,
123 | point: number,
124 | ): string | null {
125 | return getString0(data, field, point);
126 | }
127 |
128 | function getString0(
129 | data: Data,
130 | field: "name" | StringField,
131 | point: number,
132 | ): string | null {
133 | const index = getSparse(Uint16, data[field], 0xffff, point);
134 | return getStringByIndex(data, index);
135 | }
136 |
137 | function getStringByIndex(data: Data, index: number): string | null {
138 | if (index == 0xffff || index >= data.string.length) {
139 | return null;
140 | }
141 |
142 | return data.string[index];
143 | }
144 |
145 | /**
146 | * Returns the Name property for the given point, regardless of
147 | * whether the property is defined by enumeration or by rule.
148 | *
149 | * This name is displayed in the details panel and search results.
150 | *
151 | * Note that charming currently overrides this name while generating
152 | * data files, with the last formal name alias of type figment or
153 | * control or correction (if any).
154 | */
155 | export function getNameProperty(data: Data, point: number): string | null {
156 | if (hasDerivedNameNr1(data, point)) {
157 | const prefix = getString(data, "dnrp", point);
158 | return `${prefix}${getHangulSyllableName(data, point)}`;
159 | } else if (hasDerivedNameNr2(data, point)) {
160 | const prefix = getString(data, "dnrp", point);
161 | return `${prefix}${pointToYouPlus(point, "")}`;
162 | }
163 |
164 | return getString0(data, "name", point);
165 | }
166 |
167 | /**
168 | * Returns the Name property for the given point, but only if it can’t
169 | * be derived by a rule (regardless of whether the name is stated in
170 | * UnicodeData.txt explicitly).
171 | */
172 | export function getNonDerivedName(data: Data, point: number): string | null {
173 | if (hasDerivedNameNr1(data, point) || hasDerivedNameNr2(data, point)) {
174 | return null;
175 | }
176 |
177 | return getNameProperty(data, point);
178 | }
179 |
180 | /**
181 | * Returns the Name property for the given point, but only if it can’t
182 | * be derived by rule NR2 (regardless of whether the name is stated in
183 | * UnicodeData.txt explicitly).
184 | *
185 | * This name is used by the search algorithm.
186 | */
187 | export function getNameExceptNr2(data: Data, point: number): string | null {
188 | if (hasDerivedNameNr2(data, point)) {
189 | return null;
190 | }
191 |
192 | return getNameProperty(data, point);
193 | }
194 |
195 | /**
196 | * Returns the old-charming character name for the given point.
197 | *
198 | * Old-charming overrides character names with Unihan kDefinition (if
199 | * defined), allowing users to search for CJK ideographs by definition
200 | * when #search_han is checked.
201 | */
202 | export function getOldName(data: Data, point: number): string | null {
203 | // FIXME figment/control/correction
204 | return getString(data, "uhdef", point) ?? getString0(data, "name", point);
205 | }
206 |
207 | export function getHangulSyllableName(
208 | data: Data,
209 | point: number,
210 | ): string | null {
211 | // 3.12 Conjoining Jamo Behavior
212 | const L_BASE = 0x1100;
213 | const V_BASE = 0x1161;
214 | const T_BASE = 0x11a7;
215 |
216 | const lvt = getSparse(Uint16, data.hlvt, 0, point);
217 | const [present, l, v, t] = [
218 | (lvt >> 15) & 0b1,
219 | (lvt >> 10) & 0b11111,
220 | (lvt >> 5) & 0b11111,
221 | lvt & 0b11111,
222 | ];
223 |
224 | if (present == 1) {
225 | const ln = getString(data, "hjsn", L_BASE + l);
226 | const vn = getString(data, "hjsn", V_BASE + v);
227 | const tn = t > 0 ? getString(data, "hjsn", T_BASE + t) : "";
228 | return `${ln}${vn}${tn}`;
229 | }
230 |
231 | return null;
232 | }
233 |
234 | export function getAliasCount(data: Data, point: number): number {
235 | return getSparse(Uint8, data.aliasc, 0, point);
236 | }
237 |
238 | export function getAliasBaseIndex(data: Data, point: number): number | null {
239 | return getSparse(Uint16, data.aliasi, null, point);
240 | }
241 |
242 | export function getAliasValue(data: Data, aliasIndex: number): string | null {
243 | const ty = Uint16;
244 | const offset = aliasIndex * ty.len;
245 | return getStringByIndex(data, data.aliass[ty.method](offset));
246 | }
247 |
248 | export function getAliasType(data: Data, aliasIndex: number): AliasType | null {
249 | const ty = Uint8;
250 | const offset = aliasIndex * ty.len;
251 | return data.aliast[ty.method](offset);
252 | }
253 |
254 | export function findSequenceBucket(
255 | data: Data,
256 | firstPoint: number,
257 | secondPoint: number,
258 | ): SequenceBucket | null {
259 | let h = 0,
260 | i = 0,
261 | j = data.seqb.byteLength / 11;
262 | while (h < j) {
263 | i = h + Math.floor((j - h) / 2);
264 | const x = data.seqb.getUint32(i * 11 + 0);
265 | if (x != firstPoint) {
266 | if (j - h == 1) return null;
267 | else if (x < firstPoint) h = i;
268 | else if (x > firstPoint) j = i;
269 | continue;
270 | }
271 | const y = data.seqb.getUint32(i * 11 + 4);
272 | if (y != secondPoint) {
273 | if (j - h == 1) return null;
274 | else if (y < secondPoint) h = i;
275 | else if (y > secondPoint) j = i;
276 | continue;
277 | } else {
278 | break;
279 | }
280 | }
281 | const start = data.seqb.getUint16(i * 11 + 8);
282 | const len = data.seqb.getUint8(i * 11 + 10);
283 | return { start, len };
284 | }
285 |
286 | export function findSequenceIndex(data: Data, points: number[]): number | null {
287 | if (points.length < 2) return null;
288 | const bucket = findSequenceBucket(data, points[0], points[1]);
289 | if (bucket == null) return null;
290 |
291 | for (let i = bucket.start; i < bucket.start + bucket.len; i++) {
292 | const ps = getSequencePoints(data, i)!;
293 | if (ps.length == points.length && ps.every((p, i) => p == points[i])) {
294 | return i;
295 | }
296 | }
297 |
298 | return null;
299 | }
300 |
301 | export function getSequencePoints(
302 | data: Data,
303 | sequenceIndex: number,
304 | ): number[] | null {
305 | const start = data.seqp.getUint16(sequenceIndex * 3 + 0);
306 | const len = data.seqp.getUint8(sequenceIndex * 3 + 2);
307 | const base = data.info.sequenceCount * 3;
308 | const result = [];
309 | for (let i = start; i < start + len; i++)
310 | result.push(data.seqp.getUint32(base + i * 4));
311 | return result;
312 | }
313 |
314 | export function getSequenceNames(
315 | data: Data,
316 | sequenceIndex: number,
317 | ): string[] | null {
318 | const start = data.seqn.getUint16(sequenceIndex * 3 + 0);
319 | const len = data.seqn.getUint8(sequenceIndex * 3 + 2);
320 | const base = data.info.sequenceCount * 3;
321 | const result = [];
322 | for (let i = start; i < start + len; i++)
323 | result.push(getStringByIndex(data, data.seqn.getUint16(base + i * 2))!);
324 | return result;
325 | }
326 |
327 | export function getSequenceNameByIndices(
328 | data: Data,
329 | sequenceIndex: number,
330 | sequenceNameIndex: number,
331 | ): string | null {
332 | const start = data.seqn.getUint16(sequenceIndex * 3 + 0);
333 | const len = data.seqn.getUint16(sequenceIndex * 3 + 2);
334 | if (sequenceNameIndex < 0 || sequenceNameIndex >= len) return null;
335 |
336 | const base = data.info.sequenceCount * 3;
337 | return getStringByIndex(
338 | data,
339 | data.seqn.getUint16(base + (start + sequenceNameIndex) * 2),
340 | );
341 | }
342 |
343 | export function getGraphemeBreak(
344 | data: Data,
345 | point: number,
346 | ): GraphemeBreak | null {
347 | return getSparse(Uint8, data.gb, null, point);
348 | }
349 |
350 | export function kDefinitionExists(data: Data, point: number): boolean {
351 | return getFlag(data, 0, point);
352 | }
353 |
354 | export function isSpaceSeparator(data: Data, point: number): boolean {
355 | return getFlag(data, 2, point);
356 | }
357 |
358 | export function isAnyMark(data: Data, point: number): boolean {
359 | return getFlag(data, 3, point);
360 | }
361 |
362 | export function hasDerivedNameNr1(data: Data, point: number): boolean {
363 | return getFlag(data, 4, point);
364 | }
365 |
366 | export function hasDerivedNameNr2(data: Data, point: number): boolean {
367 | return getFlag(data, 5, point);
368 | }
369 |
370 | export function hasAnyNameExceptNr2(data: Data, page: number): boolean {
371 | return getPageFlag(data, 0, page);
372 | }
373 |
374 | export function hasAnyUhdef(data: Data, page: number): boolean {
375 | return getPageFlag(data, 1, page);
376 | }
377 |
378 | export function hasAnyAlias(data: Data, page: number): boolean {
379 | return getPageFlag(data, 2, page);
380 | }
381 |
382 | export function isEmoji(data: Data, point: number): boolean {
383 | return getEmojiFlag(data, 0, point);
384 | }
385 |
386 | export function isExtendedPictographic(data: Data, point: number): boolean {
387 | return getEmojiFlag(data, 1, point);
388 | }
389 |
390 | export function isEmojiComponent(data: Data, point: number): boolean {
391 | return getEmojiFlag(data, 2, point);
392 | }
393 |
394 | export function isEmojiPresentation(data: Data, point: number): boolean {
395 | return getEmojiFlag(data, 3, point);
396 | }
397 |
398 | export function isEmojiModifier(data: Data, point: number): boolean {
399 | return getEmojiFlag(data, 4, point);
400 | }
401 |
402 | export function isEmojiModifierBase(data: Data, point: number): boolean {
403 | return getEmojiFlag(data, 5, point);
404 | }
405 |
406 | interface ClusterBreaker {
407 | startUnitIndex: number;
408 | startPointIndex: number;
409 | kind: string;
410 | }
411 |
412 | export function getNextClusterBreak(
413 | data: Data,
414 | string: string,
415 | context: ClusterBreaker | null = null,
416 | ): ClusterBreaker | null {
417 | if (context == null) {
418 | if (string.length == 0) return null;
419 |
420 | let kind = "";
421 | for (const pointish of string) {
422 | const point = stringToPoint(pointish)!;
423 | const gb = getGraphemeBreak(data, point) ?? 0;
424 | const exp = Number(isExtendedPictographic(data, point));
425 | kind += String.fromCharCode((exp << 7) | gb);
426 | }
427 |
428 | // GB1: sot / Any
429 | return {
430 | startUnitIndex: 0,
431 | startPointIndex: 0,
432 | kind,
433 | };
434 | }
435 |
436 | if (context.startUnitIndex == string.length) return null;
437 |
438 | EGCBREAK.lastIndex = context.startPointIndex;
439 | EGCBREAK.exec(context.kind);
440 |
441 | for (let i = context.startPointIndex; i < EGCBREAK.lastIndex; i++)
442 | context.startUnitIndex +=
443 | string.codePointAt(context.startUnitIndex)! > 0xffff ? 2 : 1;
444 | context.startPointIndex = EGCBREAK.lastIndex;
445 |
446 | return context;
447 | }
448 |
449 | export function getEmojiPresentationRuns(data: Data, string: string): number[] {
450 | const points = stringToPoints(string);
451 | const result = [0];
452 | let emojiRun = false;
453 | for (let i = 0, j = 0; j < string.length /* nothing */; ) {
454 | const n = consumeEmojiSeq(i);
455 | // console.log(`gEPR j=${j} i=${i} n=${n} point=${pointToYouPlus(points[i])} emojiRun=${emojiRun}`);
456 | if ((n != null) != emojiRun) {
457 | emojiRun = !emojiRun;
458 | result.push(j);
459 | }
460 | for (let k = 0; k < (n ?? 1); i++, k++) j += points[i] > 0xffff ? 2 : 1;
461 | }
462 | return result;
463 |
464 | function consumeEmojiSeq(i: number): number | null {
465 | return consumeStandaloneSeq(i) ?? consumeSeqSeq(i);
466 | }
467 |
468 | function consumeStandaloneSeq(i: number): number | null {
469 | return consumeKeycapSeq(i) ?? consumeFlagSeq(i);
470 | }
471 |
472 | function consumeSeqSeq(i: number): number | null {
473 | const n = consumeTagBaseOrZwjElement(i);
474 | if (n == null) return null;
475 | return consumeZwjSeq(i, n) ?? consumeTagSeq(i, n) ?? n;
476 | }
477 |
478 | function consumeTagBaseOrZwjElement(i: number): number | null {
479 | const point = lookahead(i, 0, (x) => x);
480 | if (point == null) return null;
481 | if (isEmoji(data, point))
482 | if (isEmojiPresentation(data, point))
483 | if (lookahead(i, 1, (x) => x == 0xfe0e)) return null;
484 | else return 1;
485 | else if (lookahead(i, 1, (x) => x == 0xfe0f)) return 2;
486 | else if (isEmojiModifierBase(data, point))
487 | if (lookahead(i, 1, (x) => isEmojiModifier(data, x))) return 2;
488 | else return null;
489 | return null;
490 | }
491 |
492 | function consumeZwjSeq(i: number, n: number): number | null {
493 | const isZwj = (x: number) => x == 0x200d;
494 | let n_ = add(n, consume(i, n, isZwj));
495 | if (n_ == null) return null;
496 | n_ = add(n_, consumeTagBaseOrZwjElement(i + n_));
497 | if (n_ == null) return null;
498 | while (i + n_ < points.length) {
499 | let updated = add(n_, consume(i, n_, isZwj));
500 | if (updated == null) break;
501 | updated = add(updated, consumeTagBaseOrZwjElement(i + updated));
502 | if (updated == null) break;
503 | n_ = updated;
504 | }
505 | return n_;
506 | }
507 |
508 | function consumeTagSeq(i: number, n: number): number | null {
509 | while (lookahead(i, n, (x) => 0xe0020 <= x && x <= 0xe007e)) n += 1;
510 | if (lookahead(i, n, (x) => x == 0xe007f)) return n + 1;
511 | return null;
512 | }
513 |
514 | function consumeKeycapSeq(i: number): number | null {
515 | if (lookahead(i, 0, (x) => /[0-9#*]/.test(pointToString(x))))
516 | if (lookahead(i, 1, (x) => x == 0xfe0f))
517 | if (lookahead(i, 2, (x) => x == 0x20e3)) return 3;
518 | return null;
519 | }
520 |
521 | function consumeFlagSeq(i: number): number | null {
522 | const RI = GraphemeBreak.RegionalIndicator;
523 | if (!lookahead(i, 0, (x) => getGraphemeBreak(data, x) == RI)) return null;
524 | if (!lookahead(i, 1, (x) => getGraphemeBreak(data, x) == RI)) return null;
525 | return 2;
526 | }
527 |
528 | function add(result: number, add: number | null): number | null {
529 | return add != null ? result + add : null;
530 | }
531 |
532 | function consume(
533 | i: number,
534 | n: number,
535 | pred: (_: number) => boolean,
536 | ): number | null {
537 | return lookahead(i, n, (x) => (pred(x) == true ? 1 : null));
538 | }
539 |
540 | function lookahead(i: number, n: number, fun: (_: number) => T): T | null {
541 | return i + n < points.length ? fun(points[i + n]) : null;
542 | }
543 | }
544 |
--------------------------------------------------------------------------------
/data/NameAliases.txt:
--------------------------------------------------------------------------------
1 | # NameAliases-16.0.0.txt
2 | # Date: 2024-04-24
3 | # © 2024 Unicode®, Inc.
4 | # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
5 | # For terms of use and license, see https://www.unicode.org/terms_of_use.html
6 | #
7 | # Unicode Character Database
8 | # For documentation, see https://www.unicode.org/reports/tr44/
9 | #
10 | # This file is a normative contributory data file in the
11 | # Unicode Character Database.
12 | #
13 | # This file defines the formal name aliases for Unicode characters.
14 | #
15 | # For informative aliases, see NamesList.txt
16 | #
17 | # The formal name aliases are divided into five types, each with a distinct label.
18 | #
19 | # Type Labels:
20 | #
21 | # 1. correction
22 | # Corrections for serious problems in the character names
23 | # 2. control
24 | # ISO 6429 names for C0 and C1 control functions, and other
25 | # commonly occurring names for control codes
26 | # 3. alternate
27 | # A few widely used alternate names for format characters
28 | # 4. figment
29 | # Several documented labels for C1 control code points which
30 | # were never actually approved in any standard
31 | # 5. abbreviation
32 | # Commonly occurring abbreviations (or acronyms) for control codes,
33 | # format characters, spaces, and variation selectors
34 | #
35 | # The formal name aliases are part of the Unicode character namespace, which
36 | # includes the character names and the names of named character sequences.
37 | # The inclusion of ISO 6429 names and other commonly occurring names and
38 | # abbreviations for control codes and format characters as formal name aliases
39 | # is to help avoid name collisions between Unicode character names and the
40 | # labels which commonly appear in text and/or in implementations such as regex, for
41 | # control codes (which for historical reasons have no Unicode character name)
42 | # or for format characters.
43 | #
44 | # For documentation, see NamesList.html and https://www.unicode.org/reports/tr44/
45 | #
46 | # FORMAT
47 | #
48 | # Each line has three fields, as described here:
49 | #
50 | # First field: Code point
51 | # Second field: Alias
52 | # Third field: Type
53 | #
54 | # The type labels used are defined above. As for property values, comparisons
55 | # of type labels should ignore case.
56 | #
57 | # The type labels can be mapped to other strings for display, if desired.
58 | #
59 | # In case multiple aliases are assigned, additional aliases
60 | # are provided on separate lines. Parsers of this data file should
61 | # take note that the same code point can (and does) occur more than once.
62 | #
63 | # Note that currently the only instances of multiple aliases of the same
64 | # type for a single code point are either of type "control" or "abbreviation".
65 | # An alias of type "abbreviation" can, in principle, be added for any code
66 | # point, although currently aliases of type "correction" do not have
67 | # any additional aliases of type "abbreviation". Such relationships
68 | # are not enforced by stability policies.
69 | #
70 | #-----------------------------------------------------------------
71 |
72 | 0000;NULL;control
73 | 0000;NUL;abbreviation
74 | 0001;START OF HEADING;control
75 | 0001;SOH;abbreviation
76 | 0002;START OF TEXT;control
77 | 0002;STX;abbreviation
78 | 0003;END OF TEXT;control
79 | 0003;ETX;abbreviation
80 | 0004;END OF TRANSMISSION;control
81 | 0004;EOT;abbreviation
82 | 0005;ENQUIRY;control
83 | 0005;ENQ;abbreviation
84 | 0006;ACKNOWLEDGE;control
85 | 0006;ACK;abbreviation
86 |
87 | # Note that no formal name alias for the ISO 6429 "BELL" is
88 | # provided for U+0007, because of the existing name collision
89 | # with U+1F514 BELL.
90 |
91 | 0007;ALERT;control
92 | 0007;BEL;abbreviation
93 |
94 | 0008;BACKSPACE;control
95 | 0008;BS;abbreviation
96 | 0009;CHARACTER TABULATION;control
97 | 0009;HORIZONTAL TABULATION;control
98 | 0009;HT;abbreviation
99 | 0009;TAB;abbreviation
100 | 000A;LINE FEED;control
101 | 000A;NEW LINE;control
102 | 000A;END OF LINE;control
103 | 000A;LF;abbreviation
104 | 000A;NL;abbreviation
105 | 000A;EOL;abbreviation
106 | 000B;LINE TABULATION;control
107 | 000B;VERTICAL TABULATION;control
108 | 000B;VT;abbreviation
109 | 000C;FORM FEED;control
110 | 000C;FF;abbreviation
111 | 000D;CARRIAGE RETURN;control
112 | 000D;CR;abbreviation
113 | 000E;SHIFT OUT;control
114 | 000E;LOCKING-SHIFT ONE;control
115 | 000E;SO;abbreviation
116 | 000F;SHIFT IN;control
117 | 000F;LOCKING-SHIFT ZERO;control
118 | 000F;SI;abbreviation
119 | 0010;DATA LINK ESCAPE;control
120 | 0010;DLE;abbreviation
121 | 0011;DEVICE CONTROL ONE;control
122 | 0011;DC1;abbreviation
123 | 0012;DEVICE CONTROL TWO;control
124 | 0012;DC2;abbreviation
125 | 0013;DEVICE CONTROL THREE;control
126 | 0013;DC3;abbreviation
127 | 0014;DEVICE CONTROL FOUR;control
128 | 0014;DC4;abbreviation
129 | 0015;NEGATIVE ACKNOWLEDGE;control
130 | 0015;NAK;abbreviation
131 | 0016;SYNCHRONOUS IDLE;control
132 | 0016;SYN;abbreviation
133 | 0017;END OF TRANSMISSION BLOCK;control
134 | 0017;ETB;abbreviation
135 | 0018;CANCEL;control
136 | 0018;CAN;abbreviation
137 | 0019;END OF MEDIUM;control
138 | 0019;EOM;abbreviation
139 | 0019;EM;abbreviation
140 | 001A;SUBSTITUTE;control
141 | 001A;SUB;abbreviation
142 | 001B;ESCAPE;control
143 | 001B;ESC;abbreviation
144 | 001C;INFORMATION SEPARATOR FOUR;control
145 | 001C;FILE SEPARATOR;control
146 | 001C;FS;abbreviation
147 | 001D;INFORMATION SEPARATOR THREE;control
148 | 001D;GROUP SEPARATOR;control
149 | 001D;GS;abbreviation
150 | 001E;INFORMATION SEPARATOR TWO;control
151 | 001E;RECORD SEPARATOR;control
152 | 001E;RS;abbreviation
153 | 001F;INFORMATION SEPARATOR ONE;control
154 | 001F;UNIT SEPARATOR;control
155 | 001F;US;abbreviation
156 | 0020;SP;abbreviation
157 | 007F;DELETE;control
158 | 007F;DEL;abbreviation
159 |
160 | # PADDING CHARACTER and HIGH OCTET PRESET represent
161 | # architectural concepts initially proposed for early
162 | # drafts of ISO/IEC 10646-1. They were never actually
163 | # approved or standardized: hence their designation
164 | # here as the "figment" type. Formal name aliases
165 | # (and corresponding abbreviations) for these code
166 | # points are included here because these names leaked
167 | # out from the draft documents and were published in
168 | # at least one RFC whose names for code points were
169 | # implemented in Perl regex expressions.
170 |
171 | 0080;PADDING CHARACTER;figment
172 | 0080;PAD;abbreviation
173 | 0081;HIGH OCTET PRESET;figment
174 | 0081;HOP;abbreviation
175 |
176 | 0082;BREAK PERMITTED HERE;control
177 | 0082;BPH;abbreviation
178 | 0083;NO BREAK HERE;control
179 | 0083;NBH;abbreviation
180 | 0084;INDEX;control
181 | 0084;IND;abbreviation
182 | 0085;NEXT LINE;control
183 | 0085;NEL;abbreviation
184 | 0086;START OF SELECTED AREA;control
185 | 0086;SSA;abbreviation
186 | 0087;END OF SELECTED AREA;control
187 | 0087;ESA;abbreviation
188 | 0088;CHARACTER TABULATION SET;control
189 | 0088;HORIZONTAL TABULATION SET;control
190 | 0088;HTS;abbreviation
191 | 0089;CHARACTER TABULATION WITH JUSTIFICATION;control
192 | 0089;HORIZONTAL TABULATION WITH JUSTIFICATION;control
193 | 0089;HTJ;abbreviation
194 | 008A;LINE TABULATION SET;control
195 | 008A;VERTICAL TABULATION SET;control
196 | 008A;VTS;abbreviation
197 | 008B;PARTIAL LINE FORWARD;control
198 | 008B;PARTIAL LINE DOWN;control
199 | 008B;PLD;abbreviation
200 | 008C;PARTIAL LINE BACKWARD;control
201 | 008C;PARTIAL LINE UP;control
202 | 008C;PLU;abbreviation
203 | 008D;REVERSE LINE FEED;control
204 | 008D;REVERSE INDEX;control
205 | 008D;RI;abbreviation
206 | 008E;SINGLE SHIFT TWO;control
207 | 008E;SINGLE-SHIFT-2;control
208 | 008E;SS2;abbreviation
209 | 008F;SINGLE SHIFT THREE;control
210 | 008F;SINGLE-SHIFT-3;control
211 | 008F;SS3;abbreviation
212 | 0090;DEVICE CONTROL STRING;control
213 | 0090;DCS;abbreviation
214 | 0091;PRIVATE USE ONE;control
215 | 0091;PRIVATE USE-1;control
216 | 0091;PU1;abbreviation
217 | 0092;PRIVATE USE TWO;control
218 | 0092;PRIVATE USE-2;control
219 | 0092;PU2;abbreviation
220 | 0093;SET TRANSMIT STATE;control
221 | 0093;STS;abbreviation
222 | 0094;CANCEL CHARACTER;control
223 | 0094;CCH;abbreviation
224 | 0095;MESSAGE WAITING;control
225 | 0095;MW;abbreviation
226 | 0096;START OF GUARDED AREA;control
227 | 0096;START OF PROTECTED AREA;control
228 | 0096;SPA;abbreviation
229 | 0097;END OF GUARDED AREA;control
230 | 0097;END OF PROTECTED AREA;control
231 | 0097;EPA;abbreviation
232 | 0098;START OF STRING;control
233 | 0098;SOS;abbreviation
234 |
235 | # SINGLE GRAPHIC CHARACTER INTRODUCER is another
236 | # architectural concept from early drafts of ISO/IEC 10646-1
237 | # which was never approved and standardized.
238 |
239 | 0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment
240 | 0099;SGC;abbreviation
241 |
242 | 009A;SINGLE CHARACTER INTRODUCER;control
243 | 009A;SCI;abbreviation
244 | 009B;CONTROL SEQUENCE INTRODUCER;control
245 | 009B;CSI;abbreviation
246 | 009C;STRING TERMINATOR;control
247 | 009C;ST;abbreviation
248 | 009D;OPERATING SYSTEM COMMAND;control
249 | 009D;OSC;abbreviation
250 | 009E;PRIVACY MESSAGE;control
251 | 009E;PM;abbreviation
252 | 009F;APPLICATION PROGRAM COMMAND;control
253 | 009F;APC;abbreviation
254 | 00A0;NBSP;abbreviation
255 | 00AD;SHY;abbreviation
256 | 01A2;LATIN CAPITAL LETTER GHA;correction
257 | 01A3;LATIN SMALL LETTER GHA;correction
258 | 034F;CGJ;abbreviation
259 | 0616;ARABIC SMALL HIGH LIGATURE ALEF WITH YEH BARREE;correction
260 | 061C;ALM;abbreviation
261 | 0709;SYRIAC SUBLINEAR COLON SKEWED LEFT;correction
262 | 0CDE;KANNADA LETTER LLLA;correction
263 | 0E9D;LAO LETTER FO FON;correction
264 | 0E9F;LAO LETTER FO FAY;correction
265 | 0EA3;LAO LETTER RO;correction
266 | 0EA5;LAO LETTER LO;correction
267 | 0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN;correction
268 | 11EC;HANGUL JONGSEONG YESIEUNG-KIYEOK;correction
269 | 11ED;HANGUL JONGSEONG YESIEUNG-SSANGKIYEOK;correction
270 | 11EE;HANGUL JONGSEONG SSANGYESIEUNG;correction
271 | 11EF;HANGUL JONGSEONG YESIEUNG-KHIEUKH;correction
272 | 180B;FVS1;abbreviation
273 | 180C;FVS2;abbreviation
274 | 180D;FVS3;abbreviation
275 | 180E;MVS;abbreviation
276 | 180F;FVS4;abbreviation
277 | 1BBD;SUNDANESE LETTER ARCHAIC I;correction
278 | 200B;ZWSP;abbreviation
279 | 200C;ZWNJ;abbreviation
280 | 200D;ZWJ;abbreviation
281 | 200E;LRM;abbreviation
282 | 200F;RLM;abbreviation
283 | 202A;LRE;abbreviation
284 | 202B;RLE;abbreviation
285 | 202C;PDF;abbreviation
286 | 202D;LRO;abbreviation
287 | 202E;RLO;abbreviation
288 | 202F;NNBSP;abbreviation
289 | 205F;MMSP;abbreviation
290 | 2060;WJ;abbreviation
291 | 2066;LRI;abbreviation
292 | 2067;RLI;abbreviation
293 | 2068;FSI;abbreviation
294 | 2069;PDI;abbreviation
295 | 2118;WEIERSTRASS ELLIPTIC FUNCTION;correction
296 | 2448;MICR ON US SYMBOL;correction
297 | 2449;MICR DASH SYMBOL;correction
298 | 2B7A;LEFTWARDS TRIANGLE-HEADED ARROW WITH DOUBLE VERTICAL STROKE;correction
299 | 2B7C;RIGHTWARDS TRIANGLE-HEADED ARROW WITH DOUBLE VERTICAL STROKE;correction
300 | A015;YI SYLLABLE ITERATION MARK;correction
301 | AA6E;MYANMAR LETTER KHAMTI LLA;correction
302 | FE00;VS1;abbreviation
303 | FE01;VS2;abbreviation
304 | FE02;VS3;abbreviation
305 | FE03;VS4;abbreviation
306 | FE04;VS5;abbreviation
307 | FE05;VS6;abbreviation
308 | FE06;VS7;abbreviation
309 | FE07;VS8;abbreviation
310 | FE08;VS9;abbreviation
311 | FE09;VS10;abbreviation
312 | FE0A;VS11;abbreviation
313 | FE0B;VS12;abbreviation
314 | FE0C;VS13;abbreviation
315 | FE0D;VS14;abbreviation
316 | FE0E;VS15;abbreviation
317 | FE0F;VS16;abbreviation
318 | FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET;correction
319 | FEFF;BYTE ORDER MARK;alternate
320 | FEFF;BOM;abbreviation
321 | FEFF;ZWNBSP;abbreviation
322 | 122D4;CUNEIFORM SIGN NU11 TENU;correction
323 | 122D5;CUNEIFORM SIGN NU11 OVER NU11 BUR OVER BUR;correction
324 | 12327;CUNEIFORM SIGN KALAM;correction
325 | 1680B;BAMUM LETTER PHASE-A MAEMGBIEE;correction
326 | 16E56;MEDEFAIDRIN CAPITAL LETTER H;correction
327 | 16E57;MEDEFAIDRIN CAPITAL LETTER NG;correction
328 | 16E76;MEDEFAIDRIN SMALL LETTER H;correction
329 | 16E77;MEDEFAIDRIN SMALL LETTER NG;correction
330 | 1B001;HENTAIGANA LETTER E-1;correction
331 | 1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS;correction
332 | 1E899;MENDE KIKAKUI SYLLABLE M172 MBO;correction
333 | 1E89A;MENDE KIKAKUI SYLLABLE M174 MBOO;correction
334 | E0100;VS17;abbreviation
335 | E0101;VS18;abbreviation
336 | E0102;VS19;abbreviation
337 | E0103;VS20;abbreviation
338 | E0104;VS21;abbreviation
339 | E0105;VS22;abbreviation
340 | E0106;VS23;abbreviation
341 | E0107;VS24;abbreviation
342 | E0108;VS25;abbreviation
343 | E0109;VS26;abbreviation
344 | E010A;VS27;abbreviation
345 | E010B;VS28;abbreviation
346 | E010C;VS29;abbreviation
347 | E010D;VS30;abbreviation
348 | E010E;VS31;abbreviation
349 | E010F;VS32;abbreviation
350 | E0110;VS33;abbreviation
351 | E0111;VS34;abbreviation
352 | E0112;VS35;abbreviation
353 | E0113;VS36;abbreviation
354 | E0114;VS37;abbreviation
355 | E0115;VS38;abbreviation
356 | E0116;VS39;abbreviation
357 | E0117;VS40;abbreviation
358 | E0118;VS41;abbreviation
359 | E0119;VS42;abbreviation
360 | E011A;VS43;abbreviation
361 | E011B;VS44;abbreviation
362 | E011C;VS45;abbreviation
363 | E011D;VS46;abbreviation
364 | E011E;VS47;abbreviation
365 | E011F;VS48;abbreviation
366 | E0120;VS49;abbreviation
367 | E0121;VS50;abbreviation
368 | E0122;VS51;abbreviation
369 | E0123;VS52;abbreviation
370 | E0124;VS53;abbreviation
371 | E0125;VS54;abbreviation
372 | E0126;VS55;abbreviation
373 | E0127;VS56;abbreviation
374 | E0128;VS57;abbreviation
375 | E0129;VS58;abbreviation
376 | E012A;VS59;abbreviation
377 | E012B;VS60;abbreviation
378 | E012C;VS61;abbreviation
379 | E012D;VS62;abbreviation
380 | E012E;VS63;abbreviation
381 | E012F;VS64;abbreviation
382 | E0130;VS65;abbreviation
383 | E0131;VS66;abbreviation
384 | E0132;VS67;abbreviation
385 | E0133;VS68;abbreviation
386 | E0134;VS69;abbreviation
387 | E0135;VS70;abbreviation
388 | E0136;VS71;abbreviation
389 | E0137;VS72;abbreviation
390 | E0138;VS73;abbreviation
391 | E0139;VS74;abbreviation
392 | E013A;VS75;abbreviation
393 | E013B;VS76;abbreviation
394 | E013C;VS77;abbreviation
395 | E013D;VS78;abbreviation
396 | E013E;VS79;abbreviation
397 | E013F;VS80;abbreviation
398 | E0140;VS81;abbreviation
399 | E0141;VS82;abbreviation
400 | E0142;VS83;abbreviation
401 | E0143;VS84;abbreviation
402 | E0144;VS85;abbreviation
403 | E0145;VS86;abbreviation
404 | E0146;VS87;abbreviation
405 | E0147;VS88;abbreviation
406 | E0148;VS89;abbreviation
407 | E0149;VS90;abbreviation
408 | E014A;VS91;abbreviation
409 | E014B;VS92;abbreviation
410 | E014C;VS93;abbreviation
411 | E014D;VS94;abbreviation
412 | E014E;VS95;abbreviation
413 | E014F;VS96;abbreviation
414 | E0150;VS97;abbreviation
415 | E0151;VS98;abbreviation
416 | E0152;VS99;abbreviation
417 | E0153;VS100;abbreviation
418 | E0154;VS101;abbreviation
419 | E0155;VS102;abbreviation
420 | E0156;VS103;abbreviation
421 | E0157;VS104;abbreviation
422 | E0158;VS105;abbreviation
423 | E0159;VS106;abbreviation
424 | E015A;VS107;abbreviation
425 | E015B;VS108;abbreviation
426 | E015C;VS109;abbreviation
427 | E015D;VS110;abbreviation
428 | E015E;VS111;abbreviation
429 | E015F;VS112;abbreviation
430 | E0160;VS113;abbreviation
431 | E0161;VS114;abbreviation
432 | E0162;VS115;abbreviation
433 | E0163;VS116;abbreviation
434 | E0164;VS117;abbreviation
435 | E0165;VS118;abbreviation
436 | E0166;VS119;abbreviation
437 | E0167;VS120;abbreviation
438 | E0168;VS121;abbreviation
439 | E0169;VS122;abbreviation
440 | E016A;VS123;abbreviation
441 | E016B;VS124;abbreviation
442 | E016C;VS125;abbreviation
443 | E016D;VS126;abbreviation
444 | E016E;VS127;abbreviation
445 | E016F;VS128;abbreviation
446 | E0170;VS129;abbreviation
447 | E0171;VS130;abbreviation
448 | E0172;VS131;abbreviation
449 | E0173;VS132;abbreviation
450 | E0174;VS133;abbreviation
451 | E0175;VS134;abbreviation
452 | E0176;VS135;abbreviation
453 | E0177;VS136;abbreviation
454 | E0178;VS137;abbreviation
455 | E0179;VS138;abbreviation
456 | E017A;VS139;abbreviation
457 | E017B;VS140;abbreviation
458 | E017C;VS141;abbreviation
459 | E017D;VS142;abbreviation
460 | E017E;VS143;abbreviation
461 | E017F;VS144;abbreviation
462 | E0180;VS145;abbreviation
463 | E0181;VS146;abbreviation
464 | E0182;VS147;abbreviation
465 | E0183;VS148;abbreviation
466 | E0184;VS149;abbreviation
467 | E0185;VS150;abbreviation
468 | E0186;VS151;abbreviation
469 | E0187;VS152;abbreviation
470 | E0188;VS153;abbreviation
471 | E0189;VS154;abbreviation
472 | E018A;VS155;abbreviation
473 | E018B;VS156;abbreviation
474 | E018C;VS157;abbreviation
475 | E018D;VS158;abbreviation
476 | E018E;VS159;abbreviation
477 | E018F;VS160;abbreviation
478 | E0190;VS161;abbreviation
479 | E0191;VS162;abbreviation
480 | E0192;VS163;abbreviation
481 | E0193;VS164;abbreviation
482 | E0194;VS165;abbreviation
483 | E0195;VS166;abbreviation
484 | E0196;VS167;abbreviation
485 | E0197;VS168;abbreviation
486 | E0198;VS169;abbreviation
487 | E0199;VS170;abbreviation
488 | E019A;VS171;abbreviation
489 | E019B;VS172;abbreviation
490 | E019C;VS173;abbreviation
491 | E019D;VS174;abbreviation
492 | E019E;VS175;abbreviation
493 | E019F;VS176;abbreviation
494 | E01A0;VS177;abbreviation
495 | E01A1;VS178;abbreviation
496 | E01A2;VS179;abbreviation
497 | E01A3;VS180;abbreviation
498 | E01A4;VS181;abbreviation
499 | E01A5;VS182;abbreviation
500 | E01A6;VS183;abbreviation
501 | E01A7;VS184;abbreviation
502 | E01A8;VS185;abbreviation
503 | E01A9;VS186;abbreviation
504 | E01AA;VS187;abbreviation
505 | E01AB;VS188;abbreviation
506 | E01AC;VS189;abbreviation
507 | E01AD;VS190;abbreviation
508 | E01AE;VS191;abbreviation
509 | E01AF;VS192;abbreviation
510 | E01B0;VS193;abbreviation
511 | E01B1;VS194;abbreviation
512 | E01B2;VS195;abbreviation
513 | E01B3;VS196;abbreviation
514 | E01B4;VS197;abbreviation
515 | E01B5;VS198;abbreviation
516 | E01B6;VS199;abbreviation
517 | E01B7;VS200;abbreviation
518 | E01B8;VS201;abbreviation
519 | E01B9;VS202;abbreviation
520 | E01BA;VS203;abbreviation
521 | E01BB;VS204;abbreviation
522 | E01BC;VS205;abbreviation
523 | E01BD;VS206;abbreviation
524 | E01BE;VS207;abbreviation
525 | E01BF;VS208;abbreviation
526 | E01C0;VS209;abbreviation
527 | E01C1;VS210;abbreviation
528 | E01C2;VS211;abbreviation
529 | E01C3;VS212;abbreviation
530 | E01C4;VS213;abbreviation
531 | E01C5;VS214;abbreviation
532 | E01C6;VS215;abbreviation
533 | E01C7;VS216;abbreviation
534 | E01C8;VS217;abbreviation
535 | E01C9;VS218;abbreviation
536 | E01CA;VS219;abbreviation
537 | E01CB;VS220;abbreviation
538 | E01CC;VS221;abbreviation
539 | E01CD;VS222;abbreviation
540 | E01CE;VS223;abbreviation
541 | E01CF;VS224;abbreviation
542 | E01D0;VS225;abbreviation
543 | E01D1;VS226;abbreviation
544 | E01D2;VS227;abbreviation
545 | E01D3;VS228;abbreviation
546 | E01D4;VS229;abbreviation
547 | E01D5;VS230;abbreviation
548 | E01D6;VS231;abbreviation
549 | E01D7;VS232;abbreviation
550 | E01D8;VS233;abbreviation
551 | E01D9;VS234;abbreviation
552 | E01DA;VS235;abbreviation
553 | E01DB;VS236;abbreviation
554 | E01DC;VS237;abbreviation
555 | E01DD;VS238;abbreviation
556 | E01DE;VS239;abbreviation
557 | E01DF;VS240;abbreviation
558 | E01E0;VS241;abbreviation
559 | E01E1;VS242;abbreviation
560 | E01E2;VS243;abbreviation
561 | E01E3;VS244;abbreviation
562 | E01E4;VS245;abbreviation
563 | E01E5;VS246;abbreviation
564 | E01E6;VS247;abbreviation
565 | E01E7;VS248;abbreviation
566 | E01E8;VS249;abbreviation
567 | E01E9;VS250;abbreviation
568 | E01EA;VS251;abbreviation
569 | E01EB;VS252;abbreviation
570 | E01EC;VS253;abbreviation
571 | E01ED;VS254;abbreviation
572 | E01EE;VS255;abbreviation
573 | E01EF;VS256;abbreviation
574 |
575 | # EOF
576 |
--------------------------------------------------------------------------------