├── .envrc ├── .python-version ├── .gitattributes ├── __mocks__ └── file.js ├── .gitignore ├── data ├── rust-toolchain.toml ├── .gitignore ├── src │ ├── page.rs │ ├── gbp.rs │ ├── block.rs │ ├── age.rs │ ├── gc.rs │ ├── jamo.rs │ ├── hst.rs │ ├── ns.rs │ ├── na.rs │ ├── captures.rs │ ├── range.rs │ ├── parse.rs │ ├── ur.rs │ ├── sequence.rs │ ├── et.rs │ ├── ed.rs │ ├── pool.rs │ ├── dynamic.rs │ ├── ud.rs │ ├── details.rs │ └── uax29.rs ├── Cargo.toml ├── update.sh ├── Jamo.txt ├── Blocks.txt ├── Cargo.lock └── NameAliases.txt ├── src ├── old.d.ts ├── LastResort-6.0d1e3.ttf ├── AdobeBlank-1.045.otf.woff ├── perf.html ├── default.test.ts ├── new.html ├── default.ts ├── custom.d.ts ├── state.test.ts ├── encoding.ts ├── state.ts ├── encoding.test.ts ├── Display.test.tsx ├── search.ts ├── formatting.ts ├── perf.ts ├── fetch.ts ├── testing.ts ├── old.html ├── formatting.test.ts ├── old.css ├── data.test.ts ├── Display.tsx ├── new.sass ├── old.js ├── search.worker.ts └── data.ts ├── dist ├── .gitignore ├── scratch │ └── edge-points │ │ ├── LastResort-6.0d1e3.ttf │ │ ├── NixOS19.03-Firefox67.0.png │ │ ├── Windows1809-Edge18.17763.png │ │ ├── Windows1809-Firefox67.0.png │ │ ├── NixOS19.03-Chrome75.0.3770.90.png │ │ ├── Windows1809-Chrome75.0.3770.100.png │ │ └── index.html └── nginx.sh ├── prettier.config.js ├── ci └── build.sh ├── helper ├── .gitignore ├── MaterialSymbolsOutlined.woff2 └── nanoemoji.sh ├── .prettierignore ├── jest.config.js ├── pyproject.toml ├── .github └── workflows │ └── size.yml ├── babel.config.js ├── flake.nix ├── tsconfig.json ├── LICENSE.txt ├── shell.nix ├── README.opacus.txt ├── README.md ├── flake.lock ├── package.json ├── Makefile └── webpack.config.js /.envrc: -------------------------------------------------------------------------------- 1 | use nix 2 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.13 2 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | -------------------------------------------------------------------------------- /__mocks__/file.js: -------------------------------------------------------------------------------- 1 | export default null; 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules/ 2 | /coverage/ 3 | -------------------------------------------------------------------------------- /data/rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "1.85" 3 | -------------------------------------------------------------------------------- /src/old.d.ts: -------------------------------------------------------------------------------- 1 | export default function update_info(): void; 2 | -------------------------------------------------------------------------------- /dist/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !/scratch 3 | !.gitignore 4 | !nginx.sh 5 | -------------------------------------------------------------------------------- /prettier.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | trailingComma: "all", 3 | }; 4 | -------------------------------------------------------------------------------- /dist/scratch/edge-points/LastResort-6.0d1e3.ttf: -------------------------------------------------------------------------------- 1 | ../../../src/LastResort-6.0d1e3.ttf -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | **/*.rs.bk 3 | /egcbreak.ts 4 | /data.info.json 5 | /data.*.bin 6 | -------------------------------------------------------------------------------- /src/LastResort-6.0d1e3.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delan/charming/HEAD/src/LastResort-6.0d1e3.ttf -------------------------------------------------------------------------------- /src/AdobeBlank-1.045.otf.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delan/charming/HEAD/src/AdobeBlank-1.045.otf.woff -------------------------------------------------------------------------------- /ci/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | npm install 4 | make init 5 | make init-nixos 6 | make assets 7 | npm run build 8 | -------------------------------------------------------------------------------- /helper/.gitignore: -------------------------------------------------------------------------------- 1 | /.venv/ 2 | /build/ 3 | /dist/ 4 | /fa-*.woff2 5 | /twemoji-*/ 6 | /twemoji-*.tar.gz 7 | /Symbola-* 8 | -------------------------------------------------------------------------------- /helper/MaterialSymbolsOutlined.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delan/charming/HEAD/helper/MaterialSymbolsOutlined.woff2 -------------------------------------------------------------------------------- /src/perf.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | -------------------------------------------------------------------------------- /dist/scratch/edge-points/NixOS19.03-Firefox67.0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delan/charming/HEAD/dist/scratch/edge-points/NixOS19.03-Firefox67.0.png -------------------------------------------------------------------------------- /dist/scratch/edge-points/Windows1809-Edge18.17763.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delan/charming/HEAD/dist/scratch/edge-points/Windows1809-Edge18.17763.png -------------------------------------------------------------------------------- /dist/scratch/edge-points/Windows1809-Firefox67.0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delan/charming/HEAD/dist/scratch/edge-points/Windows1809-Firefox67.0.png -------------------------------------------------------------------------------- /dist/scratch/edge-points/NixOS19.03-Chrome75.0.3770.90.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delan/charming/HEAD/dist/scratch/edge-points/NixOS19.03-Chrome75.0.3770.90.png -------------------------------------------------------------------------------- /dist/scratch/edge-points/Windows1809-Chrome75.0.3770.100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/delan/charming/HEAD/dist/scratch/edge-points/Windows1809-Chrome75.0.3770.100.png -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | /package.json 2 | /package-lock.json 3 | /dist/ 4 | 5 | /src/old.js 6 | /src/old.css 7 | /src/jquery-1.8.3.min.js 8 | /data/ 9 | /helper/ 10 | /minilzo-js/ 11 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | moduleNameMapper: { 3 | "[.](sass|woff|ttf|bin)$": "/__mocks__/file.js", 4 | }, 5 | testPathIgnorePatterns: ["/helper/"], 6 | }; 7 | -------------------------------------------------------------------------------- /data/src/page.rs: -------------------------------------------------------------------------------- 1 | #[repr(u8)] 2 | #[derive(Debug, Clone, Copy)] 3 | pub(crate) enum PageBits { 4 | HasAnyNameExceptNr2 = 1 << 0, 5 | HasAnyUhdef = 1 << 1, 6 | HasAnyAlias = 1 << 2, 7 | } 8 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "charming" 3 | version = "0.1.0" 4 | description = "Unicode character browser" 5 | readme = "README.md" 6 | requires-python = ">=3.13" 7 | dependencies = [ 8 | "nanoemoji==0.15.1", 9 | "Brotli==1.0.9", 10 | ] 11 | -------------------------------------------------------------------------------- /src/default.test.ts: -------------------------------------------------------------------------------- 1 | import { nullToDefault } from "./default"; 2 | 3 | test("nullToDefault returns or when value is null", () => 4 | void expect(nullToDefault(null, 42)).toBe(42)); 5 | 6 | test("nullToDefault returns value when value is not null", () => 7 | void expect(nullToDefault(13, 42)).toBe(13)); 8 | -------------------------------------------------------------------------------- /data/src/gbp.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::eyre; 2 | use regex::Captures; 3 | 4 | use crate::details::Details; 5 | use crate::range::range_handler; 6 | 7 | pub(crate) fn gbp_handler(sink: &mut [Details], captures: Captures) -> eyre::Result<()> { 8 | range_handler(|r, x| r.gb = Some(x.parse().unwrap()), sink, captures) 9 | } 10 | -------------------------------------------------------------------------------- /helper/nanoemoji.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eu 3 | 4 | cd helper 5 | 6 | . .venv/bin/activate 7 | 8 | set -x 9 | # exec nanoemoji --helpfull 10 | # twemoji-13.1.1/assets/svg/1f496.svg 11 | [ -f build/Font.ttf ] \ 12 | || find $1 -name '*.svg' -print0 \ 13 | | >&2 xargs -0xs 262144 -n 999999999 nanoemoji --color_format cff_colr_0 14 | -------------------------------------------------------------------------------- /src/new.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | charming 5 | 6 | 7 | 8 |
9 | -------------------------------------------------------------------------------- /data/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "data" 3 | version = "0.0.0" 4 | authors = ["Delan Azabani "] 5 | edition = "2018" 6 | 7 | [dependencies] 8 | regex = "1.11.1" 9 | serde = { version = "1.0.219", features = ["rc", "derive"] } 10 | serde_json = "1.0.140" 11 | byteorder = "1.5.0" 12 | nom = "7.1.3" 13 | color-eyre = "0.6.3" 14 | bon = "3.4.0" 15 | enumflags2 = "0.7.11" 16 | -------------------------------------------------------------------------------- /data/src/block.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::eyre; 2 | use regex::Captures; 3 | 4 | use crate::details::Details; 5 | use crate::pool::Popularity; 6 | use crate::range::range_handler; 7 | 8 | pub(crate) fn block_handler( 9 | popularity: &mut Popularity, 10 | sink: &mut [Details], 11 | captures: Captures, 12 | ) -> eyre::Result<()> { 13 | range_handler(|r, x| r.block = Some(popularity.vote(x)), sink, captures) 14 | } 15 | -------------------------------------------------------------------------------- /src/default.ts: -------------------------------------------------------------------------------- 1 | export function nullToDefault(value: boolean | null, or: boolean): boolean; 2 | export function nullToDefault(value: number | null, or: number): number; 3 | export function nullToDefault(value: string | null, or: string): string; 4 | export function nullToDefault(value: symbol | null, or: symbol): symbol; 5 | export function nullToDefault(value: T | null, or: T): T { 6 | if (value == null) { 7 | return or; 8 | } 9 | 10 | return value; 11 | } 12 | -------------------------------------------------------------------------------- /data/src/age.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::eyre; 2 | use regex::Captures; 3 | 4 | use crate::details::Details; 5 | use crate::pool::Popularity; 6 | use crate::range::range_handler; 7 | 8 | pub(crate) fn age_handler( 9 | popularity: &mut Popularity, 10 | sink: &mut [Details], 11 | captures: Captures, 12 | ) -> eyre::Result<()> { 13 | range_handler( 14 | |r, x| r.age = Some(popularity.vote(&format!("Unicode {}", x))), 15 | sink, 16 | captures, 17 | ) 18 | } 19 | -------------------------------------------------------------------------------- /.github/workflows/size.yml: -------------------------------------------------------------------------------- 1 | name: "output size" 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | 7 | jobs: 8 | build: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v4 12 | - uses: samueldr/lix-gha-installer-action@v1 13 | - run: nix develop -c ci/build.sh 14 | - name: network size 15 | run: ls dist/*/*.{br,woff2} | sort -t/ -sk3,3 | xargs du --apparent-size -ch 16 | - name: estimated memory size (data only) 17 | run: ls dist/*/*.bin | sort -t/ -sk3,3 | xargs du --apparent-size -ch 18 | -------------------------------------------------------------------------------- /data/src/gc.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use color_eyre::eyre; 4 | use regex::Captures; 5 | 6 | use crate::captures::CapturesExt; 7 | 8 | pub(crate) fn gc_handler( 9 | sink: &mut HashMap, 10 | captures: Captures, 11 | ) -> eyre::Result<()> { 12 | let key = captures.try_name("key")?; 13 | let value = captures.try_name("value")?; 14 | let value = value.replace('_', " "); 15 | let value = format!("{} ({})", value, key); 16 | 17 | sink.insert(key.to_owned(), value); 18 | 19 | Ok(()) 20 | } 21 | -------------------------------------------------------------------------------- /data/src/jamo.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::eyre; 2 | use regex::Captures; 3 | 4 | use crate::captures::CapturesExt; 5 | use crate::details::Details; 6 | use crate::pool::Popularity; 7 | 8 | pub(crate) fn jamo_handler( 9 | popularity: &mut Popularity, 10 | sink: &mut [Details], 11 | captures: Captures, 12 | ) -> eyre::Result<()> { 13 | let point = usize::from_str_radix(captures.try_name("point")?, 16)?; 14 | let value = captures.try_name("value")?; 15 | 16 | sink[point].hjsn = Some(popularity.vote(value)); 17 | 18 | Ok(()) 19 | } 20 | -------------------------------------------------------------------------------- /babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = (api) => { 2 | const envPresetOptions = api.env("test") 3 | ? { targets: { node: "current" } } 4 | : { 5 | useBuiltIns: "entry", 6 | corejs: 3, 7 | include: ["@babel/plugin-proposal-optional-chaining"], 8 | }; 9 | 10 | const presets = [ 11 | ["@babel/preset-env", envPresetOptions], 12 | "@babel/preset-typescript", 13 | "@babel/preset-react", 14 | ]; 15 | 16 | const parserOpts = { 17 | strictMode: true, 18 | }; 19 | 20 | return { presets, parserOpts }; 21 | }; 22 | -------------------------------------------------------------------------------- /data/src/hst.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::eyre; 2 | use regex::Captures; 3 | 4 | use crate::details::{Details, HangulSyllableType}; 5 | use crate::range::range_handler; 6 | 7 | pub(crate) fn hst_handler(sink: &mut [Details], captures: Captures) -> eyre::Result<()> { 8 | range_handler( 9 | |r, x| { 10 | r.hst = match x { 11 | "LV" => Some(HangulSyllableType::Lv), 12 | "LVT" => Some(HangulSyllableType::Lvt), 13 | _ => None, 14 | } 15 | }, 16 | sink, 17 | captures, 18 | ) 19 | } 20 | -------------------------------------------------------------------------------- /flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | description = "Unicode character browser"; 3 | 4 | inputs = { 5 | nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable"; 6 | flake-utils.url = "github:numtide/flake-utils"; 7 | flake-compat.url = "github:edolstra/flake-compat"; 8 | }; 9 | 10 | outputs = 11 | { 12 | nixpkgs, 13 | flake-utils, 14 | ... 15 | }: 16 | flake-utils.lib.eachDefaultSystem ( 17 | system: 18 | let 19 | pkgs = nixpkgs.legacyPackages.${system}; 20 | in 21 | { 22 | devShell = import ./shell.nix { inherit pkgs; }; 23 | } 24 | ); 25 | } 26 | -------------------------------------------------------------------------------- /data/src/ns.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::eyre; 2 | use regex::Captures; 3 | 4 | use crate::captures::CapturesExt; 5 | use crate::pool::Popularity; 6 | use crate::sequence::Sequences; 7 | 8 | pub(crate) fn ns_handler( 9 | popularity: &mut Popularity, 10 | sequences: &mut Sequences, 11 | captures: Captures, 12 | ) -> eyre::Result<()> { 13 | let points = captures 14 | .try_name("points")? 15 | .split(" ") 16 | .map(|x| usize::from_str_radix(x, 16)) 17 | .collect::, _>>()?; 18 | let name = captures.try_name("name")?; 19 | 20 | sequences.insert(&points, popularity.vote(name)); 21 | 22 | Ok(()) 23 | } 24 | -------------------------------------------------------------------------------- /data/src/na.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::eyre; 2 | use regex::Captures; 3 | 4 | use crate::captures::CapturesExt; 5 | use crate::details::{Alias, Details}; 6 | use crate::pool::Popularity; 7 | 8 | pub(crate) fn na_handler( 9 | popularity: &mut Popularity, 10 | sink: &mut [Details], 11 | captures: Captures, 12 | ) -> eyre::Result<()> { 13 | let point = usize::from_str_radix(captures.try_name("point")?, 16)?; 14 | let alias = captures.try_name("alias")?; 15 | let r#type = captures.try_name("type")?; 16 | 17 | sink[point].alias.push(Alias { 18 | inner: popularity.vote(alias), 19 | r#type: r#type.parse()?, 20 | }); 21 | 22 | Ok(()) 23 | } 24 | -------------------------------------------------------------------------------- /data/src/captures.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::eyre::{self, OptionExt}; 2 | use regex::Captures; 3 | 4 | pub(crate) trait CapturesExt { 5 | fn try_name(&self, name: &str) -> eyre::Result<&str>; 6 | fn name_or<'a>(&'a self, name: &str, default: &'a str) -> &'a str; 7 | } 8 | 9 | impl CapturesExt for Captures<'_> { 10 | fn try_name(&self, name: &str) -> eyre::Result<&str> { 11 | self.name(name) 12 | .ok_or_eyre("capture group doesn’t exist or didn’t participate") 13 | .map(|x| x.as_str()) 14 | } 15 | 16 | fn name_or<'a>(&'a self, name: &str, default: &'a str) -> &'a str { 17 | self.try_name(name).unwrap_or(default) 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/custom.d.ts: -------------------------------------------------------------------------------- 1 | declare module "*.bin" { 2 | const path: any; 3 | export default path; 4 | } 5 | 6 | // TODO upstream into DefinitelyTyped/DefinitelyTyped 7 | declare module "react-virtualized-auto-sizer" { 8 | import { Component, ReactNode } from "react"; 9 | 10 | export interface Size { 11 | width: number; 12 | height: number; 13 | } 14 | 15 | export interface AutoSizerProps { 16 | children: (size: Size) => ReactNode; 17 | defaultWidth?: number; 18 | defaultHeight?: number; 19 | } 20 | 21 | export default class extends Component {} 22 | } 23 | 24 | // see DefinePlugin in webpack.config.js 25 | declare const __COMMIT_HASH__: string; 26 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "strict": true, 4 | "alwaysStrict": true, 5 | "noImplicitAny": true, 6 | "noImplicitThis": true, 7 | "strictNullChecks": true, 8 | "strictFunctionTypes": true, 9 | "strictPropertyInitialization": true, 10 | "noFallthroughCasesInSwitch": true, 11 | "noImplicitReturns": true, 12 | "noUnusedParameters": true, 13 | "noUnusedLocals": true, 14 | 15 | "noEmit": true, 16 | "target": "ES2018", 17 | "module": "ES2015", 18 | "moduleResolution": "Node", 19 | "resolveJsonModule": true, 20 | "esModuleInterop": true, 21 | "jsx": "preserve" 22 | }, 23 | "exclude": ["helper/"] 24 | } 25 | -------------------------------------------------------------------------------- /data/src/range.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::eyre; 2 | use regex::Captures; 3 | 4 | use crate::captures::CapturesExt; 5 | use crate::details::Details; 6 | 7 | pub(crate) fn range_handler( 8 | mut setter: S, 9 | sink: &mut [Details], 10 | captures: Captures, 11 | ) -> eyre::Result<()> { 12 | let first = captures.try_name("first")?; 13 | let last = captures.name_or("last", first); 14 | 15 | let start = usize::from_str_radix(first, 16)?; 16 | let len = usize::from_str_radix(last, 16)? - start + 1; 17 | 18 | for item in sink.iter_mut().skip(start).take(len) { 19 | setter(item, captures.try_name("value")?); 20 | } 21 | 22 | Ok(()) 23 | } 24 | -------------------------------------------------------------------------------- /data/update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # usage: ( cd data; ./update.sh ) 3 | set -eu 4 | 5 | unicode_ver=16.0.0 6 | emoji_ver=16.0 7 | 8 | for i in \ 9 | Blocks.txt \ 10 | DerivedAge.txt \ 11 | HangulSyllableType.txt \ 12 | Jamo.txt \ 13 | NameAliases.txt \ 14 | NamedSequences.txt \ 15 | PropertyValueAliases.txt \ 16 | UnicodeData.txt \ 17 | auxiliary/GraphemeBreakProperty.txt \ 18 | auxiliary/GraphemeBreakTest.txt \ 19 | emoji/emoji-data.txt \ 20 | Unihan.zip \ 21 | ; do 22 | echo $i 23 | curl -f\#O https://www.unicode.org/Public/${unicode_ver}/ucd/$i 24 | done 25 | echo 'emoji-test.txt (this may take a while)' 26 | curl -f\#O https://www.unicode.org/Public/emoji/${emoji_ver}/emoji-test.txt 27 | unzip -o Unihan.zip Unihan_Readings.txt 28 | rm Unihan.zip 29 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012, Delan Azabani 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /data/src/parse.rs: -------------------------------------------------------------------------------- 1 | use std::fs::File; 2 | use std::io::{BufRead, BufReader}; 3 | 4 | use color_eyre::eyre; 5 | use regex::{Captures, Regex}; 6 | 7 | pub(crate) fn parse eyre::Result<()>>( 8 | sink: &mut R, 9 | mut handler: H, 10 | path: &str, 11 | label: impl Into>, 12 | pattern: &str, 13 | ) -> eyre::Result<()> { 14 | if let Some(label) = label.into() { 15 | println!("Processing {} ({}) ...", path, label); 16 | } else { 17 | println!("Processing {} ...", path); 18 | } 19 | 20 | let source = BufReader::new(File::open(path)?); 21 | let pattern = Regex::new(pattern)?; 22 | 23 | for line in source.lines() { 24 | if let Some(captures) = pattern.captures(&line?) { 25 | handler(sink, captures)?; 26 | } 27 | } 28 | 29 | Ok(()) 30 | } 31 | -------------------------------------------------------------------------------- /shell.nix: -------------------------------------------------------------------------------- 1 | { pkgs ? import {} }: 2 | pkgs.mkShell { 3 | shellHook = '' 4 | # helper > requirements.txt > nanoemoji + Brotli 5 | # https://discourse.nixos.org/t/x/5522/2 6 | export LD_LIBRARY_PATH=${pkgs.lib.makeLibraryPath [pkgs.stdenv.cc.cc]} 7 | ''; 8 | 9 | buildInputs = [ 10 | # Makefile 11 | pkgs.patchelf 12 | pkgs.curl 13 | pkgs.cacert # for curl https 14 | pkgs.poppler_utils # pdfdetach(1) 15 | 16 | # package.json 17 | # pkgs.nodejs-10_x is marked as insecure 18 | # pkgs.nodejs-12_x is marked as insecure 19 | # pkgs.nodejs-14_x is ye olde (and marked as EOL) 20 | pkgs.nodejs_20 21 | 22 | # webpack.config.js > DefinePlugin 23 | pkgs.git 24 | 25 | # data > Cargo.toml 26 | pkgs.cargo # pkgs.rust_1_58.packages.stable.cargo 27 | 28 | # helper > requirements.txt 29 | pkgs.python313 30 | pkgs.uv 31 | ]; 32 | } 33 | -------------------------------------------------------------------------------- /data/src/ur.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::eyre; 2 | use regex::Captures; 3 | 4 | use crate::captures::CapturesExt; 5 | use crate::details::{Bits, Details}; 6 | use crate::pool::Popularity; 7 | 8 | pub(crate) fn ur_handler( 9 | popularity: &mut Popularity, 10 | sink: &mut [Details], 11 | captures: Captures, 12 | ) -> eyre::Result<()> { 13 | let point = usize::from_str_radix(captures.try_name("point")?, 16)?; 14 | let key = captures.try_name("key")?; 15 | let value = captures.try_name("value")?; 16 | 17 | match key { 18 | "kDefinition" => { 19 | assert!(sink[point].name.is_none()); 20 | sink[point].uhdef = Some(popularity.vote(value)); 21 | sink[point].bits |= Bits::KdefinitionExists; 22 | } 23 | "kMandarin" => { 24 | sink[point].uhman = Some(popularity.vote(value)); 25 | } 26 | _ => {} 27 | } 28 | 29 | Ok(()) 30 | } 31 | -------------------------------------------------------------------------------- /dist/nginx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Generates an nginx.conf snippet that rewrites requests for 4 | # charming’s files to their Brotli-compressed siblings, based on the 5 | # current set of webpack bundles being served. 6 | # 7 | # I wrote them because I can’t be bothered compiling an ngx_brotli 8 | # module for charming.daz.cat right now, but the approach I’m using 9 | # breaks clients that don’t have Accept-Encoding: br. 10 | # 11 | # Consider using ngx_brotli with “brotli_static on;” someday. 12 | 13 | cat << end 14 | location ~ [.]br\$ { 15 | more_set_headers 'Content-Encoding: br'; 16 | expires 24h; 17 | } 18 | location / { 19 | $( 20 | ls */*.br \ 21 | | sed 's/^/\//' \ 22 | | sed 's/[.]br$//' \ 23 | | sed 's/[.]/[&]/g' \ 24 | | sed 's/^/ rewrite ^(/' \ 25 | | sed 's/$/)$ $1.br last;/' 26 | ) 27 | 28 | expires 24h; 29 | 30 | location ~ [.]html\$ { 31 | expires off; 32 | } 33 | } 34 | end 35 | -------------------------------------------------------------------------------- /README.opacus.txt: -------------------------------------------------------------------------------- 1 | To deploy charming to opacus.daz.cat, first do the following locally: 2 | 3 | 1. $ npm i 4 | 2. $ git clean -fx dist 5 | 3. $ npm run build 6 | 4. $ rsync -aL --info=progress2 --no-i-r dist/ opacus:/var/www/htdocs/www.azabani.com/labs/charming 7 | 8 | Then do this on opacus in /var/www/htdocs/www.azabani.com: 9 | 10 | 5. $ ( cd labs/charming; ./nginx.sh | doas tee /etc/nginx/.site/charming.conf ) 11 | 6. $ doas rcctl reload nginx 12 | 7. $ make BUNDLE=bundleNN; git -C _staging diff --cached --stat 13 | 8. $ make deploy 14 | 15 | After at least 72 hours (3 × expires), delete any webpack bundle 16 | directories (labs/charming/[0-9a-f]{20}) still having older mtime 17 | than the current deployment, then repeat steps 5 through 8. 18 | 19 | Cleaning dist in step 2 ensures that the bundle hashes still in use 20 | get fresh mtimes, so that we don’t accidentally delete any bundles 21 | that are still in use but were introduced in an older deployment. 22 | -------------------------------------------------------------------------------- /data/src/sequence.rs: -------------------------------------------------------------------------------- 1 | use std::{collections::BTreeMap, rc::Rc}; 2 | 3 | #[derive(Debug, Default)] 4 | pub struct Sequences { 5 | pub buckets: BTreeMap>, 6 | } 7 | 8 | pub type SequenceKey = (usize, usize); 9 | 10 | #[derive(Debug)] 11 | pub struct Sequence { 12 | pub points: Vec, 13 | pub names: Vec>, 14 | } 15 | 16 | impl Sequences { 17 | pub fn insert(&mut self, points: &[usize], name: Rc) { 18 | let bucket = self.buckets.entry(key(points)).or_default(); 19 | 20 | for sequence in bucket.iter_mut() { 21 | if sequence.points == points { 22 | sequence.names.push(name); 23 | return; 24 | } 25 | } 26 | 27 | bucket.push(Sequence { 28 | points: points.to_owned(), 29 | names: vec![name], 30 | }) 31 | } 32 | } 33 | 34 | pub fn key(points: &[usize]) -> SequenceKey { 35 | (points[0], points[1]) 36 | } 37 | -------------------------------------------------------------------------------- /data/src/et.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::eyre; 2 | use regex::Captures; 3 | 4 | use crate::captures::CapturesExt; 5 | use crate::details::{Alias, AliasType, Details}; 6 | use crate::pool::Popularity; 7 | use crate::sequence::Sequences; 8 | 9 | pub(crate) fn et_handler( 10 | popularity: &mut Popularity, 11 | sink: &mut [Details], 12 | sequences: &mut Sequences, 13 | captures: Captures, 14 | ) -> eyre::Result<()> { 15 | let points = captures 16 | .try_name("points")? 17 | .split(" ") 18 | .map(|x| usize::from_str_radix(x, 16)) 19 | .collect::, _>>()?; 20 | let name = captures.try_name("name")?; 21 | 22 | if points.len() > 1 { 23 | // eprintln!("{} {}", captures.name_ok("points")?, captures.name_ok("name")?); 24 | sequences.insert(&points, popularity.vote(name)); 25 | return Ok(()); 26 | } 27 | 28 | sink[points[0]].alias.push(Alias { 29 | inner: popularity.vote(name), 30 | r#type: AliasType::Cldr, 31 | }); 32 | 33 | Ok(()) 34 | } 35 | -------------------------------------------------------------------------------- /data/src/ed.rs: -------------------------------------------------------------------------------- 1 | use color_eyre::eyre; 2 | use regex::Captures; 3 | 4 | use crate::captures::CapturesExt; 5 | use crate::details::{Details, EmojiBits}; 6 | 7 | pub(crate) fn ed_handler(sink: &mut [Details], captures: Captures) -> eyre::Result<()> { 8 | let first = captures.try_name("first")?; 9 | let last = captures.name_or("last", first); 10 | let property = captures.try_name("property")?; 11 | 12 | let start = usize::from_str_radix(first, 16)?; 13 | let len = usize::from_str_radix(last, 16)? - start + 1; 14 | 15 | for item in sink.iter_mut().skip(start).take(len) { 16 | item.ebits |= match property { 17 | "Emoji" => EmojiBits::Emoji, 18 | "Extended_Pictographic" => EmojiBits::ExtendedPictographic, 19 | "Emoji_Component" => EmojiBits::EmojiComponent, 20 | "Emoji_Presentation" => EmojiBits::EmojiPresentation, 21 | "Emoji_Modifier" => EmojiBits::EmojiModifier, 22 | "Emoji_Modifier_Base" => EmojiBits::EmojiModifierBase, 23 | x => panic!("unexpected property: {}", x), 24 | }; 25 | } 26 | 27 | Ok(()) 28 | } 29 | -------------------------------------------------------------------------------- /dist/scratch/edge-points/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 8 |

9 |
46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Welcome! You’ll need: 2 | 3 | - Nix and one of the following 4 | - nix-shell [--pure] 5 | - direnv allow 6 | - or your own environment with 7 | - make(1) + git(1) + pdfdetach(1) 8 | - Rust 1.58+ 9 | - Python 3.8+ (for make assets) 10 | - Node.js 10+ (for make assets) 11 | 12 | # fetch dependencies 13 | git submodule update --init --recursive 14 | 15 | # install dependencies 16 | npm i 17 | make [init-clean] init 18 | make init-nixos # if using NixOS 19 | 20 | # build data 21 | make data 22 | 23 | # build assets 24 | make assets 25 | 26 | # build client 27 | npm run build 28 | 29 | # start dev server 30 | npm run start 31 | 32 | # start and open 33 | npm run open 34 | 35 | # format code 36 | npm run prettier:write 37 | 38 | # check TypeScript 39 | npm run check 40 | 41 | # run unit tests 42 | npm run test 43 | 44 | # do all three 45 | npm run dwim 46 | 47 | # test coverage 48 | npm run test:coverage 49 | 50 | # how to update unicode 51 | 52 | 1. bump unicode and emoji versions in data/update.sh 53 | 2. run the update script: `( cd data; ./update.sh )` 54 | 3. update generate_egcbreak() in data/src/uax29.rs 55 | 4. update NAME_RULES in data/src/dynamic.rs 56 | 57 | example commit: 58 | -------------------------------------------------------------------------------- /src/state.test.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @jest-environment jsdom 3 | */ 4 | 5 | import { toFragment, getHashPoints, fixHashPoints } from "./state"; 6 | 7 | test("toFragment returns correct value", () => 8 | void expect(toFragment([0x1f496])).toBe("#1F496")); 9 | 10 | test("getHashPoints returns correct point", () => { 11 | expect(getHashPoints("#1F496")).toEqual([0x1f496]); 12 | expect(getHashPoints("#1f496")).toEqual([0x1f496]); 13 | }); 14 | 15 | test("getHashPoints returns or when hash is undefined", () => 16 | void expect(getHashPoints(undefined, null)).toBe(null)); 17 | 18 | test("getHashPoints returns or when hash is empty", () => 19 | void expect(getHashPoints("", null)).toBe(null)); 20 | 21 | test("getHashPoints returns or when hash is invalid", () => 22 | void expect(getHashPoints("#G", null)).toBe(null)); 23 | 24 | test("getHashPoints returns or when hash has trailing rubbish", () => 25 | void expect(getHashPoints("#FG", null)).toBe(null)); 26 | 27 | test("getHashPoints returns or when point ≥ 0x110000", () => 28 | void expect(getHashPoints("#110000", null)).toBe(null)); 29 | 30 | test("fixHashPoints calls History#replaceState iff it needs fixing", () => { 31 | const replaceState = jest.spyOn(history, "replaceState"); 32 | location.hash = "#f"; 33 | 34 | replaceState.mockClear(); 35 | fixHashPoints(location.hash, [0xf]); 36 | expect(replaceState).toHaveBeenCalledWith(null, "", "#F"); 37 | 38 | replaceState.mockClear(); 39 | fixHashPoints(location.hash, [0xf]); 40 | expect(replaceState).not.toHaveBeenCalled(); 41 | }); 42 | -------------------------------------------------------------------------------- /data/src/pool.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{HashMap, HashSet}; 2 | use std::rc::Rc; 3 | 4 | #[derive(Debug, Default)] 5 | pub(crate) struct Popularity { 6 | inner: HashSet>, 7 | } 8 | 9 | #[derive(Debug, Default)] 10 | pub(crate) struct Pool { 11 | inner: HashMap, usize>, 12 | } 13 | 14 | impl Popularity { 15 | pub fn vote(&mut self, string: &str) -> Rc { 16 | if let Some(result) = self.inner.get(string) { 17 | return result.clone(); 18 | } 19 | 20 | let result: Rc = string.to_owned().into(); 21 | 22 | self.inner.insert(result.clone()); 23 | 24 | result 25 | } 26 | 27 | pub fn report(mut self) -> Vec> { 28 | let mut result: Vec<_> = self 29 | .inner 30 | .drain() 31 | .filter(|x| Rc::strong_count(x) > 1) 32 | .collect(); 33 | 34 | result.sort_by(|p, q| { 35 | Rc::strong_count(p) 36 | .cmp(&Rc::strong_count(q)) 37 | .reverse() 38 | .then_with(|| p.cmp(q)) 39 | }); 40 | 41 | result 42 | } 43 | } 44 | 45 | impl Pool { 46 | pub fn r#use(&self, string: &str) -> usize { 47 | self.inner[string] 48 | } 49 | } 50 | 51 | impl From<&Vec>> for Pool { 52 | fn from(report: &Vec>) -> Self { 53 | let mut result = Self::default(); 54 | 55 | for string in report { 56 | result.inner.insert(string.clone(), result.inner.len()); 57 | } 58 | 59 | result 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/encoding.ts: -------------------------------------------------------------------------------- 1 | export function pointToString(point: number): string { 2 | if (isSurrogate(point)) { 3 | return "\uFFFD"; 4 | } 5 | 6 | return String.fromCodePoint(point); 7 | } 8 | 9 | export function stringToPoint(string: string): number | null { 10 | const result = string.codePointAt(0); 11 | 12 | if (result == undefined) { 13 | return null; 14 | } 15 | 16 | return result; 17 | } 18 | 19 | export function stringToUnits16(string: string): Array { 20 | // string.split("") is not to be confused with [...string] 21 | return string.split("").map(stringToUnit16); 22 | } 23 | 24 | export function stringToUnits8(string: string): Array { 25 | // WTF-8 code units packed in UTF-16 code units 26 | const octets = unescape(encodeURIComponent(string)); 27 | 28 | // string.split("") is not to be confused with [...string] 29 | return octets.split("").map(stringToUnit16); 30 | } 31 | 32 | export function stringToUnit16(string: string): number { 33 | // https://jsperf.com/charcodeat-vs-charcodeat0 34 | return string.charCodeAt(0); 35 | } 36 | 37 | export function isSurrogate(point: number): boolean { 38 | return (point & 0xfffff800) == 0xd800; 39 | } 40 | 41 | export function pointLengthUnits16(point: number): 1 | 2 { 42 | if (point > 0x10ffff || isSurrogate(point)) throw new RangeError(); 43 | return point > 0xffff ? 2 : 1; 44 | } 45 | 46 | export function pointsToString(points: number[]): string { 47 | return points.map((x) => pointToString(x)).join(""); 48 | } 49 | 50 | export function stringToPoints(string: string): number[] { 51 | return [...string].map((x) => stringToPoint(x)!); 52 | } 53 | -------------------------------------------------------------------------------- /src/state.ts: -------------------------------------------------------------------------------- 1 | import React from "react"; 2 | 3 | import { Data } from "./data"; 4 | import { toHexadecimal } from "./formatting"; 5 | 6 | export function toFragment(points: number[]): string { 7 | return `#${points.map((x) => toHexadecimal(x)).join("-")}`; 8 | } 9 | 10 | export function getHashPoints( 11 | hash: string | undefined, 12 | or?: undefined, 13 | ): number[] | undefined; 14 | export function getHashPoints(hash: string | undefined, or: D): number[] | D; 15 | export function getHashPoints(hash: string | undefined, or: any) { 16 | if (hash == "" || typeof hash == "undefined") { 17 | return or; 18 | } 19 | 20 | const parts = hash.slice(1).split("-"); 21 | const point = parts.map((x) => parseInt(x, 16)); 22 | 23 | if (point.some((x) => x != x)) { 24 | return or; 25 | } 26 | 27 | if (toFragment(point).length != hash.length) { 28 | return or; 29 | } 30 | 31 | if (point.some((x) => x >= 0x110000)) { 32 | return or; 33 | } 34 | 35 | return point; 36 | } 37 | 38 | export function fixHashPoints(hash: string, points: number[]): void { 39 | const expected = toFragment(points); 40 | const actual = hash; 41 | 42 | if (actual != expected) { 43 | history.replaceState(null, "", expected); 44 | } 45 | } 46 | 47 | export function isSequence(points: number[]): boolean { 48 | return points.length > 1; 49 | } 50 | 51 | export function ifSequence( 52 | points: number[], 53 | yes: (_: number[]) => T, 54 | no: (_: number) => T, 55 | ): T { 56 | if (isSequence(points)) return yes(points); 57 | else return no(points[0]); 58 | } 59 | 60 | export const DataContext = React.createContext(null); 61 | export const PointContext = React.createContext([0]); // FIXME 62 | -------------------------------------------------------------------------------- /src/encoding.test.ts: -------------------------------------------------------------------------------- 1 | import { 2 | pointToString, 3 | stringToPoint, 4 | stringToUnits16, 5 | stringToUnits8, 6 | isSurrogate, 7 | } from "./encoding"; 8 | 9 | test("pointToString returns correct string for BMP point", () => { 10 | expect(pointToString(0x8fea)).toBe("迪"); 11 | expect(pointToString(0x5170)).toBe("兰"); 12 | }); 13 | 14 | test("pointToString returns correct string for astral point", () => 15 | void expect(pointToString(0x1f496)).toBe("💖")); 16 | 17 | test("pointToString returns U+FFFD when point is surrogate", () => 18 | void expect(pointToString(0xd800)).toBe("�")); 19 | 20 | test("stringToPoint returns correct point for BMP string", () => 21 | void expect(stringToPoint("⏿")).toBe(0x23ff)); 22 | 23 | test("stringToPoint returns correct point for astral string", () => 24 | void expect(stringToPoint("💖")).toBe(0x1f496)); 25 | 26 | test("stringToPoint returns null for empty string", () => 27 | void expect(stringToPoint("")).toBe(null)); 28 | 29 | test("stringToUnits16 returns correct value for BMP string", () => 30 | void expect(stringToUnits16("⏿")).toEqual([0x23ff])); 31 | 32 | test("stringToUnits16 returns correct value for astral string", () => 33 | void expect(stringToUnits16("💖")).toEqual([0xd83d, 0xdc96])); 34 | 35 | test("stringToUnits8 returns correct value for BMP string", () => 36 | void expect(stringToUnits8("⏿")).toEqual([0xe2, 0x8f, 0xbf])); 37 | 38 | test("stringToUnits8 returns correct value for astral string", () => 39 | void expect(stringToUnits8("💖")).toEqual([0xf0, 0x9f, 0x92, 0x96])); 40 | 41 | test("isSurrogate returns correct value", () => { 42 | expect(isSurrogate(0xd7ff)).toEqual(false); 43 | expect(isSurrogate(0xd800)).toEqual(true); 44 | expect(isSurrogate(0xdbff)).toEqual(true); 45 | expect(isSurrogate(0xdc00)).toEqual(true); 46 | expect(isSurrogate(0xdfff)).toEqual(true); 47 | expect(isSurrogate(0xe000)).toEqual(false); 48 | }); 49 | -------------------------------------------------------------------------------- /src/Display.test.tsx: -------------------------------------------------------------------------------- 1 | import { 2 | pointToSyntheticTofu, 3 | pointToDiagonal, 4 | pointToSubstitute, 5 | } from "./Display"; 6 | import { getData } from "./testing"; 7 | 8 | test("pointToSyntheticTofu returns correct value", () => { 9 | expect(pointToSyntheticTofu(0xd7ff)).toBe(null); 10 | expect(pointToSyntheticTofu(0xd800)).toBe("D800"); 11 | expect(pointToSyntheticTofu(0xdfff)).toBe("DFFF"); 12 | expect(pointToSyntheticTofu(0xe000)).toBe(null); 13 | expect(pointToSyntheticTofu(0xfdcf)).toBe(null); 14 | expect(pointToSyntheticTofu(0xfdd0)).toBe("FDD0"); 15 | expect(pointToSyntheticTofu(0xfdef)).toBe("FDEF"); 16 | expect(pointToSyntheticTofu(0xfdf0)).toBe(null); 17 | }); 18 | 19 | test("pointToDiagonal returns correct value", () => { 20 | expect(pointToDiagonal(0x23ff)).toBe(null); 21 | expect(pointToDiagonal(0xfeff)).toBe("BOM"); 22 | expect(pointToDiagonal(0x180b)).toBe("FVS1"); 23 | expect(pointToDiagonal(0x180c)).toBe("FVS2"); 24 | expect(pointToDiagonal(0x180d)).toBe("FVS3"); 25 | expect(pointToDiagonal(0xfdef)).toBe(null); 26 | expect(pointToDiagonal(0xfe00)).toBe("VS1"); 27 | expect(pointToDiagonal(0xfe0f)).toBe("VS16"); 28 | expect(pointToDiagonal(0xfe10)).toBe(null); 29 | expect(pointToDiagonal(0xe00ff)).toBe(null); 30 | expect(pointToDiagonal(0xe0100)).toBe("VS17"); 31 | expect(pointToDiagonal(0xe01ef)).toBe("VS256"); 32 | expect(pointToDiagonal(0xe01f0)).toBe(null); 33 | }); 34 | 35 | test("pointToSubstitute returns correct value", () => { 36 | const data = getData(); 37 | expect(pointToSubstitute(data, 0x007f)).toBe("␡"); 38 | expect(pointToSubstitute(data, 0x0000)).toBe("␀"); 39 | expect(pointToSubstitute(data, 0x001f)).toBe("␟"); 40 | expect(pointToSubstitute(null, 0x0020)).toBe(null); 41 | expect(pointToSubstitute(null, 0xe001f)).toBe(null); 42 | expect(pointToSubstitute(data, 0xe0020)).toBe("␠ₜ"); 43 | expect(pointToSubstitute(data, 0xe0021)).toBe("!ₜ"); 44 | expect(pointToSubstitute(data, 0xe007e)).toBe("~ₜ"); 45 | expect(pointToSubstitute(null, 0xe007f)).toBe(null); 46 | }); 47 | -------------------------------------------------------------------------------- /src/search.ts: -------------------------------------------------------------------------------- 1 | import { AliasType, Data } from "./data"; 2 | import SearchWorker from "./search.worker"; 3 | 4 | export type SearchResult = BaseSearchResult & 5 | ( 6 | | SequenceValueSearchResult 7 | | SequenceNameSearchResult 8 | | NameishSearchResult 9 | | AliasSearchResult 10 | | OtherSearchResult 11 | ); 12 | 13 | interface BaseSearchResult { 14 | points: number[]; 15 | score: number; 16 | } 17 | 18 | interface SequenceValueSearchResult { 19 | reason: "sequenceValue"; 20 | sequenceIndex: number; 21 | } 22 | 23 | interface SequenceNameSearchResult { 24 | reason: "sequenceName"; 25 | offset: number; 26 | sequenceIndex: number; 27 | sequenceNameIndex: number; 28 | } 29 | 30 | interface NameishSearchResult { 31 | reason: "name" | "uhdef"; 32 | offset: number; 33 | } 34 | 35 | interface AliasSearchResult { 36 | reason: "alias"; 37 | offset: number; 38 | aliasIndex: number; 39 | aliasType: AliasType; 40 | } 41 | 42 | interface OtherSearchResult { 43 | reason: "hex" | "dec" | "breakdown"; 44 | } 45 | 46 | interface SearchResultKey { 47 | key: string; 48 | } 49 | 50 | export type KeyedSearchResult = SearchResult & SearchResultKey; 51 | 52 | let worker = new SearchWorker(); 53 | let listener: ((event: MessageEvent) => void) | null = null; 54 | let cache: Data | null = null; 55 | 56 | export function search(data: Data, query: string) { 57 | if (listener != null) { 58 | console.warn("search: need to terminate SearchWorker"); 59 | 60 | worker.removeEventListener("message", listener); 61 | worker.terminate(); 62 | 63 | worker = new SearchWorker(); 64 | listener = null; 65 | cache = null; 66 | } 67 | 68 | return new Promise((resolve) => { 69 | listener = (event: MessageEvent) => { 70 | worker.removeEventListener("message", listener!); 71 | listener = null; 72 | cache = data; 73 | resolve(event); 74 | }; 75 | 76 | worker.addEventListener("message", listener); 77 | 78 | if (cache == data) { 79 | worker.postMessage({ query }); 80 | } else { 81 | worker.postMessage({ data, query }); 82 | } 83 | }); 84 | } 85 | -------------------------------------------------------------------------------- /flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "flake-compat": { 4 | "locked": { 5 | "lastModified": 1733328505, 6 | "narHash": "sha256-NeCCThCEP3eCl2l/+27kNNK7QrwZB1IJCrXfrbv5oqU=", 7 | "owner": "edolstra", 8 | "repo": "flake-compat", 9 | "rev": "ff81ac966bb2cae68946d5ed5fc4994f96d0ffec", 10 | "type": "github" 11 | }, 12 | "original": { 13 | "owner": "edolstra", 14 | "repo": "flake-compat", 15 | "type": "github" 16 | } 17 | }, 18 | "flake-utils": { 19 | "inputs": { 20 | "systems": "systems" 21 | }, 22 | "locked": { 23 | "lastModified": 1731533236, 24 | "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=", 25 | "owner": "numtide", 26 | "repo": "flake-utils", 27 | "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b", 28 | "type": "github" 29 | }, 30 | "original": { 31 | "owner": "numtide", 32 | "repo": "flake-utils", 33 | "type": "github" 34 | } 35 | }, 36 | "nixpkgs": { 37 | "locked": { 38 | "lastModified": 1741865919, 39 | "narHash": "sha256-4thdbnP6dlbdq+qZWTsm4ffAwoS8Tiq1YResB+RP6WE=", 40 | "owner": "nixos", 41 | "repo": "nixpkgs", 42 | "rev": "573c650e8a14b2faa0041645ab18aed7e60f0c9a", 43 | "type": "github" 44 | }, 45 | "original": { 46 | "owner": "nixos", 47 | "ref": "nixpkgs-unstable", 48 | "repo": "nixpkgs", 49 | "type": "github" 50 | } 51 | }, 52 | "root": { 53 | "inputs": { 54 | "flake-compat": "flake-compat", 55 | "flake-utils": "flake-utils", 56 | "nixpkgs": "nixpkgs" 57 | } 58 | }, 59 | "systems": { 60 | "locked": { 61 | "lastModified": 1681028828, 62 | "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", 63 | "owner": "nix-systems", 64 | "repo": "default", 65 | "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", 66 | "type": "github" 67 | }, 68 | "original": { 69 | "owner": "nix-systems", 70 | "repo": "default", 71 | "type": "github" 72 | } 73 | } 74 | }, 75 | "root": "root", 76 | "version": 7 77 | } 78 | -------------------------------------------------------------------------------- /src/formatting.ts: -------------------------------------------------------------------------------- 1 | import { 2 | stringToUnits16, 3 | stringToUnits8, 4 | pointToString, 5 | isSurrogate, 6 | } from "./encoding"; 7 | 8 | export function toHexadecimal(value: number, length = 0): string { 9 | return value.toString(16).toUpperCase().padStart(length, "0"); 10 | } 11 | 12 | export function toDecimal(value: number, length = 0): string { 13 | return value.toString(10).padStart(length, "0"); 14 | } 15 | 16 | export function pointToYouPlus(point: number, prefix = "U+"): string { 17 | return `${prefix}${toHexadecimal(point, 4)}`; 18 | } 19 | 20 | export function pointToTofu(point: number): string { 21 | if (0x10000 <= point) { 22 | return toHexadecimal(point, 6); 23 | } 24 | 25 | return toHexadecimal(point, 4); 26 | } 27 | 28 | export function pointToString16(point: number): string | null { 29 | if (isSurrogate(point)) { 30 | return null; 31 | } 32 | 33 | return stringToUnits16(pointToString(point)) 34 | .map((x) => toHexadecimal(x, 4)) 35 | .join(" "); 36 | } 37 | 38 | export function pointToString8(point: number): string | null { 39 | if (isSurrogate(point)) { 40 | return null; 41 | } 42 | 43 | return stringToUnits8(pointToString(point)) 44 | .map((x) => toHexadecimal(x, 2)) 45 | .join(" "); 46 | } 47 | 48 | export function pointToEntity10(point: number): string | null { 49 | // HTML § 12.1.4 50 | if ( 51 | (point >= 0x0000 && point < 0x0009) || 52 | (point >= 0x000b && point < 0x000c) || 53 | (point >= 0x000d && point < 0x0020) || 54 | (point >= 0x007f && point < 0x00a0) || 55 | (point >= 0xfdd0 && point < 0xfdf0) || 56 | (point & 0xfffe) == 0xfffe 57 | ) { 58 | return null; 59 | } 60 | 61 | return `&#${point};`; 62 | } 63 | 64 | export function pointsToYouPlus(points: number[]): string { 65 | return `U+${joinSequence(points, " ", (x) => pointToYouPlus(x, ""))}`; 66 | } 67 | 68 | export function pointsToYouPlusEllipsis(points: number[]): string { 69 | if (points.length < 2) return pointToYouPlus(points[0]); 70 | return `${pointToYouPlus(points[0])}…`; 71 | } 72 | 73 | export function joinSequence( 74 | points: number[], 75 | sep: string, 76 | fun: (_: number) => string | null, 77 | ): string | null { 78 | let ok = true; 79 | const result = points.map((x) => fun(x) ?? ((ok = false), null)); 80 | return ok ? result.join(sep) : null; 81 | } 82 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "charming", 3 | "version": "0.0.0", 4 | "description": "", 5 | "private": true, 6 | "scripts": { 7 | "build": "webpack --mode production", 8 | "start": "webpack-dev-server --mode development", 9 | "open": "webpack-dev-server --mode development --open", 10 | "prettier": "prettier '**/*.{json,css,js,ts,jsx,tsx}'", 11 | "prettier:write": "npm run prettier -- --write", 12 | "check": "tsc", 13 | "test": "jest", 14 | "test:coverage": "npm run test -- --coverage", 15 | "dwim": "npm run prettier:write && npm run check && npm run test" 16 | }, 17 | "repository": { 18 | "type": "git", 19 | "url": "git+https://github.com/delan/charming.git" 20 | }, 21 | "author": "", 22 | "license": "ISC", 23 | "bugs": { 24 | "url": "https://github.com/delan/charming/issues" 25 | }, 26 | "homepage": "https://github.com/delan/charming#readme", 27 | "devDependencies": { 28 | "@babel/core": "^7.23.7", 29 | "@babel/preset-env": "^7.23.8", 30 | "@babel/preset-react": "^7.23.3", 31 | "@babel/preset-typescript": "^7.23.3", 32 | "@types/jest": "^29.5.11", 33 | "@types/react": "^18.2.47", 34 | "@types/react-dom": "^18.2.18", 35 | "@types/react-window": "^1.8.8", 36 | "babel-jest": "^29.7.0", 37 | "babel-loader": "^9.1.3", 38 | "clipboard-polyfill": "^4.0.2", 39 | "compression-webpack-plugin": "^10.0.0", 40 | "core-js": "^3.35.0", 41 | "css-loader": "^6.9.0", 42 | "file-loader": "^6.2.0", 43 | "glyphhanger": "^5.0.0", 44 | "grapheme-iterator": "^1.15.0", 45 | "grapheme-splitter": "^1.0.4", 46 | "html-webpack-plugin": "^5.6.0", 47 | "jest": "^29.7.0", 48 | "jest-environment-jsdom": "^29.7.0", 49 | "prettier": "^3.1.1", 50 | "react": "^18.2.0", 51 | "react-dom": "^18.2.0", 52 | "react-use": "^17.4.2", 53 | "react-virtualized-auto-sizer": "^1.0.20", 54 | "react-window": "^1.8.10", 55 | "regenerator-runtime": "^0.14.1", 56 | "sass": "^1.69.7", 57 | "sass-loader": "^13.3.3", 58 | "style-loader": "^3.3.4", 59 | "typescript": "^5.3.3", 60 | "webpack": "^5.89.0", 61 | "webpack-cli": "^5.1.4", 62 | "webpack-dev-server": "^4.15.1", 63 | "worker-loader": "^3.0.8" 64 | }, 65 | "overrides": { 66 | "@types/node": "20.11.0" 67 | }, 68 | "//": [ 69 | "override @types/node@12.0.7 to fix TS2315 in @types/ws" 70 | ] 71 | } 72 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .POSIX: 2 | 3 | TWEMOJI = 15.1.0 4 | TWEMOJI_REF = refs/tags/v$(TWEMOJI) 5 | TWEMOJI_DIR = $(TWEMOJI) 6 | SYMBOLA = 14.00 7 | 8 | data: 9 | cd data && cargo run 10 | 11 | data-clean: 12 | cd data && rm -f data.string.json data.*.bin 13 | 14 | assets: helper/dist/twemoji-$(TWEMOJI).woff2 helper/dist/Symbola-$(SYMBOLA).woff2 helper/dist/MaterialSymbolsOutlined.woff2 15 | 16 | assets-clean: 17 | cd helper && rm -Rf build dist twemoji-$(TWEMOJI) twemoji-$(TWEMOJI_DIR) 18 | 19 | init: helper/.venv pyproject.toml uv.lock 20 | . helper/.venv/bin/activate && uv pip install -r pyproject.toml 21 | 22 | init-clean: 23 | rm -Rf helper/.venv 24 | 25 | init-nixos: init 26 | # FIXME this can’t possibly be the best solution 27 | patchelf --set-interpreter $$(cat $$NIX_CC/nix-support/dynamic-linker) helper/.venv/bin/ninja 28 | helper/.venv/bin/ninja --version 29 | 30 | helper/.venv: 31 | uv venv -- '$@' 32 | 33 | helper/dist/twemoji-$(TWEMOJI).woff2: helper/twemoji-$(TWEMOJI) 34 | cd data && cargo run -- $(TWEMOJI) 35 | helper/nanoemoji.sh twemoji-$(TWEMOJI)/assets/svg 36 | . helper/.venv/bin/activate && >&2 npx glyphhanger --formats=woff2 --subset=helper/build/Font.ttf 37 | mkdir -p helper/dist 38 | mv helper/build/Font-subset.woff2 $@ 39 | 40 | helper/dist/Symbola-$(SYMBOLA).woff2: helper/Symbola-$(SYMBOLA).otf 41 | . helper/.venv/bin/activate && >&2 npx glyphhanger --formats=woff2 --subset=$? 42 | mkdir -p helper/dist 43 | mv helper/Symbola-$(SYMBOLA)-subset.woff2 $@ 44 | 45 | helper/dist/MaterialSymbolsOutlined.woff2: helper/MaterialSymbolsOutlined.woff2 46 | . helper/.venv/bin/activate && >&2 npx glyphhanger --formats=woff2 --subset='$?' --whitelist='' 47 | mkdir -p helper/dist 48 | mv helper/MaterialSymbolsOutlined-subset.woff2 $@ 49 | 50 | helper/twemoji-$(TWEMOJI): helper/twemoji-$(TWEMOJI).tar.gz 51 | cd helper && tar xzf twemoji-$(TWEMOJI).tar.gz 52 | if [ $(TWEMOJI_DIR) != $(TWEMOJI) ]; then cd helper && mv twemoji-$(TWEMOJI_DIR) twemoji-$(TWEMOJI); fi 53 | 54 | helper/twemoji-$(TWEMOJI).tar.gz: 55 | curl -Lo $@ https://github.com/jdecked/twemoji/archive/$(TWEMOJI_REF).tar.gz 56 | 57 | helper/Symbola-$(SYMBOLA).otf: helper/Symbola-$(SYMBOLA).pdf 58 | pdfdetach -savefile Symbola.otf -o $@ helper/Symbola-$(SYMBOLA).pdf 59 | 60 | helper/Symbola-$(SYMBOLA).pdf: 61 | curl -Lo $@ https://web.archive.org/web/20240107144224/https://dn-works.com/wp-content/uploads/2021/UFAS121921/Symbola.pdf 62 | 63 | .PHONY: data data-clean assets init init-clean init-nixos 64 | -------------------------------------------------------------------------------- /src/perf.ts: -------------------------------------------------------------------------------- 1 | import "core-js/stable"; 2 | import "regenerator-runtime/runtime"; 3 | 4 | import GraphemeSplitter from "grapheme-splitter"; 5 | import GraphemeIterator from "grapheme-iterator"; 6 | import { Data, getNextClusterBreak } from "./data"; 7 | import { fetchAllData } from "./fetch"; 8 | 9 | const splitter = new GraphemeSplitter(); 10 | const textarea = document.querySelector("textarea")!; 11 | const button = document.querySelector("button")!; 12 | 13 | fetchAllData().then((data) => { 14 | button.disabled = false; 15 | button.addEventListener("click", () => { 16 | perfTest(data, textarea.value); 17 | }); 18 | }); 19 | 20 | function perfTest(data: Data, query: string) { 21 | let h = fnv1a(0); 22 | performance.mark(`<`); 23 | for (let i = 0; i < 420; i++) h = perf0(query, h); 24 | performance.mark(`>`); 25 | performance.measure("perf0", "<", ">"); 26 | console.log(h.toString(16)); 27 | 28 | h = fnv1a(0); 29 | performance.mark(`<`); 30 | for (let i = 0; i < 420; i++) h = perf1(data, query, h); 31 | performance.mark(`>`); 32 | performance.measure("perf1", "<", ">"); 33 | console.log(h.toString(16)); 34 | 35 | h = fnv1a(0); 36 | performance.mark(`<`); 37 | for (let i = 0; i < 420; i++) h = perf2(query, h); 38 | performance.mark(`>`); 39 | performance.measure("perf2", "<", ">"); 40 | console.log(h.toString(16)); 41 | } 42 | 43 | function perf0(query: string, h: number) { 44 | for (const egc of splitter.iterateGraphemes(query)) h = hashString(egc, h); 45 | return h; 46 | } 47 | 48 | function perf1(data: Data, query: string, h: number) { 49 | let context = getNextClusterBreak(data!, query); 50 | if (context == null) return h; 51 | let i = context.startUnitIndex; 52 | while ((context = getNextClusterBreak(data!, query, context)) != null) { 53 | h = hashString(query.slice(i, context.startUnitIndex), h); 54 | i = context.startUnitIndex; 55 | } 56 | return h; 57 | } 58 | 59 | function perf2(query: string, h: number) { 60 | for (const egc of GraphemeIterator(query)) h = hashString(egc, h); 61 | return h; 62 | } 63 | 64 | const mul = (x: number, y: number) => Math.imul(x >>> 0, y >>> 0) >>> 0; 65 | const fnv1a = (x: number, h = 2166136261) => mul(h ^ (x & 255), 16777619); 66 | const hashChar = (x: number, h: number) => fnv1a(x, fnv1a(x >> 8, h)); 67 | function hashString(x: string, h: number): number { 68 | h = fnv1a(x.length, h); 69 | for (let i = 0; i < x.length; i++) h = hashChar(x.charCodeAt(i), h); 70 | return h; 71 | } 72 | -------------------------------------------------------------------------------- /data/src/dynamic.rs: -------------------------------------------------------------------------------- 1 | use crate::details::{Details, HangulSyllableType}; 2 | 3 | // Table 4-8. Name Derivation Rule Prefix Strings 4 | // https://www.unicode.org/versions/Unicode16.0.0/core-spec/chapter-4/#G2082 5 | pub(crate) const NAME_RULES: [(usize, usize, NameRule, &str); 19] = [ 6 | (0xAC00, 0xD7A3, NameRule::NR1, "HANGUL SYLLABLE "), 7 | (0x3400, 0x4DBF, NameRule::NR2, "CJK UNIFIED IDEOGRAPH-"), 8 | (0x4E00, 0x9FFF, NameRule::NR2, "CJK UNIFIED IDEOGRAPH-"), 9 | (0x20000, 0x2A6DF, NameRule::NR2, "CJK UNIFIED IDEOGRAPH-"), 10 | (0x2A700, 0x2B739, NameRule::NR2, "CJK UNIFIED IDEOGRAPH-"), 11 | (0x2B740, 0x2B81D, NameRule::NR2, "CJK UNIFIED IDEOGRAPH-"), 12 | (0x2B820, 0x2CEA1, NameRule::NR2, "CJK UNIFIED IDEOGRAPH-"), 13 | (0x2CEB0, 0x2EBE0, NameRule::NR2, "CJK UNIFIED IDEOGRAPH-"), 14 | (0x2EBF0, 0x2EE5D, NameRule::NR2, "CJK UNIFIED IDEOGRAPH-"), 15 | (0x30000, 0x3134A, NameRule::NR2, "CJK UNIFIED IDEOGRAPH-"), 16 | (0x31350, 0x323AF, NameRule::NR2, "CJK UNIFIED IDEOGRAPH-"), 17 | (0x13460, 0x143FA, NameRule::NR2, "EGYPTIAN HIEROGLYPH-"), 18 | (0x17000, 0x187F7, NameRule::NR2, "TANGUT IDEOGRAPH-"), 19 | (0x18D00, 0x18D08, NameRule::NR2, "TANGUT IDEOGRAPH-"), 20 | ( 21 | 0x18B00, 22 | 0x18CD5, 23 | NameRule::NR2, 24 | "KHITAN SMALL SCRIPT CHARACTER-", 25 | ), 26 | (0x1B170, 0x1B2FB, NameRule::NR2, "NUSHU CHARACTER-"), 27 | ( 28 | 0xF900, 29 | 0xFA6D, 30 | NameRule::NR2, 31 | "CJK COMPATIBILITY IDEOGRAPH-", 32 | ), 33 | ( 34 | 0xFA70, 35 | 0xFAD9, 36 | NameRule::NR2, 37 | "CJK COMPATIBILITY IDEOGRAPH-", 38 | ), 39 | ( 40 | 0x2F800, 41 | 0x2FA1D, 42 | NameRule::NR2, 43 | "CJK COMPATIBILITY IDEOGRAPH-", 44 | ), 45 | ]; 46 | 47 | pub(crate) enum NameRule { 48 | NR1, 49 | NR2, 50 | } 51 | 52 | // 3.12 Conjoining Jamo Behavior 53 | pub(crate) fn hangul_lvt_indices(data: &[Details], point: usize) -> Option<(usize, usize, usize)> { 54 | const S_BASE: usize = 0xAC00; 55 | // const L_BASE: usize = 0x1100; 56 | // const V_BASE: usize = 0x1161; 57 | // const T_BASE: usize = 0x11A7; 58 | // const L_COUNT: usize = 19; 59 | const V_COUNT: usize = 21; 60 | const T_COUNT: usize = 28; 61 | const N_COUNT: usize = V_COUNT * T_COUNT; 62 | // const S_COUNT: usize = L_COUNT * N_COUNT; 63 | 64 | let s = point; 65 | let hst = data[s].hst; 66 | 67 | match hst { 68 | Some(HangulSyllableType::Lv | HangulSyllableType::Lvt) => { 69 | let s_index = s - S_BASE; 70 | let l_index = s_index / N_COUNT; 71 | let v_index = (s_index % N_COUNT) / T_COUNT; 72 | let t_index = s_index % T_COUNT; 73 | assert!(hst == Some(HangulSyllableType::Lv) || t_index > 0); 74 | 75 | Some((l_index, v_index, t_index)) 76 | } 77 | None => None, 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/fetch.ts: -------------------------------------------------------------------------------- 1 | import info from "../data/data.info.json"; 2 | import string from "../data/data.string.bin"; 3 | import bits from "../data/data.bits.bin"; 4 | import ebits from "../data/data.ebits.bin"; 5 | import pagebits from "../data/data.pagebits.bin"; 6 | import name from "../data/data.name.bin"; 7 | import aliasc from "../data/data.aliasc.bin"; 8 | import aliasi from "../data/data.aliasi.bin"; 9 | import aliass from "../data/data.aliass.bin"; 10 | import aliast from "../data/data.aliast.bin"; 11 | import dnrp from "../data/data.dnrp.bin"; 12 | import gb from "../data/data.gb.bin"; 13 | import gc from "../data/data.gc.bin"; 14 | import block from "../data/data.block.bin"; 15 | import age from "../data/data.age.bin"; 16 | import hlvt from "../data/data.hlvt.bin"; 17 | import hjsn from "../data/data.hjsn.bin"; 18 | import uhdef from "../data/data.uhdef.bin"; 19 | import uhman from "../data/data.uhman.bin"; 20 | import seqb from "../data/data.seqb.bin"; 21 | import seqp from "../data/data.seqp.bin"; 22 | import seqn from "../data/data.seqn.bin"; 23 | 24 | import { Data } from "./data"; 25 | 26 | export function fetchAllData(): Promise { 27 | return fetchData( 28 | string, 29 | bits, 30 | ebits, 31 | pagebits, 32 | name, 33 | aliasc, 34 | aliasi, 35 | aliass, 36 | aliast, 37 | dnrp, 38 | gb, 39 | gc, 40 | block, 41 | age, 42 | hlvt, 43 | hjsn, 44 | uhdef, 45 | uhman, 46 | seqb, 47 | seqp, 48 | seqn, 49 | ); 50 | } 51 | 52 | async function fetchData( 53 | stringPath: string, 54 | ...paths: string[] 55 | ): Promise { 56 | const [ 57 | string, 58 | [ 59 | bits, 60 | ebits, 61 | pagebits, 62 | name, 63 | aliasc, 64 | aliasi, 65 | aliass, 66 | aliast, 67 | dnrp, 68 | gb, 69 | gc, 70 | block, 71 | age, 72 | hlvt, 73 | hjsn, 74 | uhdef, 75 | uhman, 76 | seqb, 77 | seqp, 78 | seqn, 79 | ], 80 | ] = await Promise.all([ 81 | fetchJson(stringPath), 82 | Promise.all(paths.map(fetchDataView)), 83 | ]); 84 | 85 | return { 86 | info, 87 | string, 88 | bits, 89 | ebits, 90 | pagebits, 91 | name, 92 | aliasc, 93 | aliasi, 94 | aliass, 95 | aliast, 96 | dnrp, 97 | gb, 98 | gc, 99 | block, 100 | age, 101 | hlvt, 102 | hjsn, 103 | uhdef, 104 | uhman, 105 | seqb, 106 | seqp, 107 | seqn, 108 | }; 109 | } 110 | 111 | async function fetchJson(path: string): Promise { 112 | const response = await fetch(path); 113 | return response.json(); 114 | } 115 | 116 | async function fetchDataView(path: string): Promise { 117 | const response = await fetch(path); 118 | const buffer = await response.arrayBuffer(); 119 | return new DataView(buffer); 120 | } 121 | -------------------------------------------------------------------------------- /data/src/ud.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use color_eyre::eyre; 4 | use enumflags2::BitFlags; 5 | use regex::Captures; 6 | 7 | use crate::captures::CapturesExt; 8 | use crate::details::{Alias, AliasType, Bits, Details}; 9 | use crate::pool::Popularity; 10 | 11 | pub(crate) fn ud_handler( 12 | gc_labels: &HashMap, 13 | popularity: &mut Popularity, 14 | sink: &mut [Details], 15 | captures: Captures, 16 | ) -> eyre::Result<()> { 17 | let point = usize::from_str_radix(captures.try_name("point")?, 16)?; 18 | let name = captures.try_name("name")?; 19 | let gc = captures.try_name("gc")?; 20 | let nau1 = captures.name("nau1").map(|x| Alias { 21 | inner: popularity.vote(x.into()), 22 | r#type: AliasType::Unicode1, 23 | }); 24 | 25 | let bits = if gc == "Zs" { 26 | Bits::IsSpaceSeparator.into() 27 | } else if gc.starts_with("M") { 28 | Bits::IsAnyMark.into() 29 | } else { 30 | BitFlags::empty() 31 | }; 32 | 33 | assert!( 34 | !name.contains("<") 35 | || name == "" 36 | || name.ends_with(", First>") 37 | || name.ends_with(", Last>") 38 | ); 39 | 40 | let name = if name.contains("<") { 41 | None 42 | } else { 43 | Some(popularity.vote(name)) 44 | }; 45 | let gc = Some(popularity.vote(gc_labels.get(gc).unwrap())); 46 | 47 | sink[point] = Details { 48 | bits, 49 | name, 50 | alias: nau1.into_iter().collect(), 51 | gc, 52 | ..Default::default() 53 | }; 54 | 55 | Ok(()) 56 | } 57 | 58 | pub(crate) fn ud_range_handler( 59 | ud_ranges: &mut HashMap)>, 60 | captures: Captures, 61 | ) -> eyre::Result<()> { 62 | let point = usize::from_str_radix(captures.try_name("point")?, 16)?; 63 | let name = captures.try_name("name")?; 64 | let kind = captures.try_name("kind")?; 65 | 66 | match kind { 67 | "First" => { 68 | assert_eq!(ud_ranges.insert(name.to_owned(), (point, None)), None); 69 | } 70 | "Last" => { 71 | let pair @ &mut (first, last) = ud_ranges 72 | .get_mut(name) 73 | .expect("missing First in UnicodeData"); 74 | assert_eq!(last, None); 75 | *pair = (first, Some(point)); 76 | } 77 | _ => unreachable!(), 78 | } 79 | 80 | Ok(()) 81 | } 82 | 83 | pub(crate) fn process_ud_ranges( 84 | ranges: HashMap)>, 85 | ) -> HashMap { 86 | assert_eq!( 87 | ranges 88 | .values() 89 | .filter(|(_first, last)| last.is_none()) 90 | .count(), 91 | 0 92 | ); 93 | let mut result = HashMap::default(); 94 | 95 | for &(first, last) in ranges.values() { 96 | for i in first..=last.expect("see assertion") { 97 | result.insert(i, first); 98 | } 99 | } 100 | 101 | result 102 | } 103 | -------------------------------------------------------------------------------- /src/testing.ts: -------------------------------------------------------------------------------- 1 | import { Data } from "./data"; 2 | 3 | export function getData(): Data { 4 | // 0123456789abc 5 | const info = { 6 | sequenceBucketCount: 0, 7 | sequenceCount: 0, 8 | }; 9 | const string = [..."abcdefghixyz"]; 10 | const empty = makeSparseWithDonkeyVote(0, () => {}); 11 | const bits = makeSparseWithDonkeyVote(1 * 3, (result, start) => { 12 | result.setUint8(start + 0, 0b10101010); 13 | result.setUint8(start + 1, 0b01010101); 14 | result.setUint8(start + 2, 0b11001111); 15 | }); 16 | // FIXME write tests for ebits, pagebits, alias[cist], gb, seq[bpn] 17 | const ebits = new DataView(new ArrayBuffer(0x1100)); 18 | const pagebits = new DataView(new ArrayBuffer(0x1100)); 19 | const name = makeSparseWithDonkeyVote(2 * 3, (result, start) => { 20 | result.setUint16(start + 2 * 0, 1); // b 21 | result.setUint16(start + 2 * 1, 2); // c 22 | result.setUint16(start + 2 * 2, 6); // g 23 | }); 24 | const aliasc = empty; 25 | const aliasi = empty; 26 | const aliass = empty; 27 | const aliast = empty; 28 | const dnrp = makeSparseWithDonkeyVote(2 * 2, (result, start) => { 29 | result.setUint16(start + 2 * 0, 8); // i 30 | result.setUint16(start + 2 * 1, 7); // h 31 | }); 32 | const gb = empty; 33 | const gc = makeSparseWithDonkeyVote(2 * 2, (result, start) => { 34 | result.setUint16(start + 2 * 0, 3); // d 35 | result.setUint16(start + 2 * 1, 4); // e 36 | }); 37 | const block = makeSparseWithDonkeyVote(2 * 2, (result, start) => { 38 | result.setUint16(start + 2 * 0, 12); // (out of bounds) 39 | result.setUint16(start + 2 * 1, 0xffff); // (null) 40 | }); 41 | const age = empty; 42 | const hlvt = makeSparseWithDonkeyVote(2 * 2, (result, start) => { 43 | result.setUint16(start + 2 * 1, 0b1_00000_00000_00001); 44 | }); 45 | const hjsn = makeSparseWithDonkeyVote(2 * (0x11a7 + 2), (result, start) => { 46 | for (let i = 0; i < 0x11a7 + 2; i++) 47 | result.setUint16(start + 2 * i, 0xffff); 48 | result.setUint16(start + 2 * (0x1100 + 0), 0x9); 49 | result.setUint16(start + 2 * (0x1161 + 0), 0xa); 50 | result.setUint16(start + 2 * (0x11a7 + 1), 0xb); 51 | }); 52 | const uhdef = makeSparseWithDonkeyVote(2 * 3, (result, start) => { 53 | result.setUint16(start + 2 * 0, 0xffff); // (null) 54 | result.setUint16(start + 2 * 1, 5); // f 55 | result.setUint16(start + 2 * 2, 0xffff); // (null) 56 | }); 57 | const uhman = empty; 58 | const seqb = empty; 59 | const seqp = empty; 60 | const seqn = empty; 61 | return { 62 | info, 63 | string, 64 | bits, 65 | ebits, 66 | pagebits, 67 | name, 68 | aliasc, 69 | aliasi, 70 | aliass, 71 | aliast, 72 | dnrp, 73 | gb, 74 | gc, 75 | block, 76 | age, 77 | hlvt, 78 | hjsn, 79 | uhdef, 80 | uhman, 81 | seqb, 82 | seqp, 83 | seqn, 84 | }; 85 | } 86 | 87 | function makeSparseWithDonkeyVote( 88 | len: number, 89 | fun: (_: DataView, start: number) => void, 90 | ): DataView { 91 | const start = 0x1100 * 2; 92 | const result = new DataView(new ArrayBuffer(start + len)); 93 | for (let i = 0; i < 0x1100; i++) result.setUint16(i * 2, i); 94 | fun(result, start); 95 | return result; 96 | } 97 | -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | const { execSync } = require("child_process"); 3 | const webpack = require("webpack"); 4 | const DefinePlugin = require("webpack/lib/DefinePlugin"); 5 | const CompressionPlugin = require("compression-webpack-plugin"); 6 | const HtmlPlugin = require("html-webpack-plugin"); 7 | 8 | const config = { 9 | entry: { 10 | old: "./src/old.js", 11 | new: "./src/new.tsx", 12 | perf: "./src/perf.ts", 13 | }, 14 | output: { 15 | filename: "[contenthash]/[name].js", 16 | assetModuleFilename: "[hash:20]/[name][ext]", 17 | path: path.resolve(__dirname, "dist"), 18 | }, 19 | resolve: { 20 | extensions: [".js", ".ts", ".jsx", ".tsx"], 21 | }, 22 | module: { 23 | rules: [ 24 | { 25 | test: /[.]sass$/, 26 | exclude: /[/]node_modules[/]/, 27 | use: [ 28 | "style-loader", 29 | "css-loader", 30 | { 31 | loader: "sass-loader", 32 | options: { 33 | implementation: require("sass"), 34 | }, 35 | }, 36 | ], 37 | }, 38 | { 39 | test: /[.]worker[.](js|ts)x?$/, 40 | exclude: /[/]node_modules[/]/, 41 | use: [ 42 | { 43 | loader: "worker-loader", 44 | options: { 45 | inline: "no-fallback", 46 | chunkFilename: "[hash:20]/[name].js", 47 | }, 48 | }, 49 | ], 50 | }, 51 | { 52 | test: /[.](js|ts)x?$/, 53 | exclude: /[/]node_modules[/]/, 54 | loader: "babel-loader", 55 | }, 56 | { 57 | test: /[.](woff2|woff|ttf|otf)$/, 58 | exclude: /[/]node_modules[/]/, 59 | type: "asset/resource", 60 | }, 61 | { 62 | test: /[.]bin$/, 63 | exclude: /[/]node_modules[/]/, 64 | type: "asset/resource", 65 | }, 66 | ], 67 | }, 68 | plugins: [ 69 | new DefinePlugin({ 70 | __COMMIT_HASH__: JSON.stringify( 71 | `${execSync("git rev-parse HEAD")}`.trim(), 72 | ), 73 | }), 74 | new HtmlPlugin({ 75 | filename: "old.html", 76 | template: "src/old.html", 77 | chunks: ["old"], 78 | }), 79 | new HtmlPlugin({ 80 | filename: "index.html", 81 | template: "src/new.html", 82 | chunks: ["new"], 83 | }), 84 | new HtmlPlugin({ 85 | filename: "perf.html", 86 | template: "src/perf.html", 87 | chunks: ["perf"], 88 | }), 89 | ], 90 | ignoreWarnings: [ 91 | // TEMP: old versions of frontend dependencies trigger deprecation warnings in sass 92 | // TODO: remove once frontend dependencies are upgraded 93 | { module: /node_modules/ }, 94 | ], 95 | }; 96 | 97 | module.exports = (env, argv) => { 98 | switch (argv.mode) { 99 | case "production": 100 | config.devtool = "source-map"; 101 | config.plugins.push( 102 | new CompressionPlugin({ 103 | filename: "[file].br[query]", 104 | algorithm: "brotliCompress", 105 | test: /[.](js|eot|svg|ttf|woff|woff2|otf|bin)$/, 106 | }), 107 | ); 108 | break; 109 | case "development": 110 | config.devtool = "eval-cheap-module-source-map"; 111 | break; 112 | default: 113 | // breaks webpack-dev-server 114 | // config.plugins.push(new webpack.debug.ProfilingPlugin()); 115 | break; 116 | } 117 | 118 | return config; 119 | }; 120 | -------------------------------------------------------------------------------- /data/Jamo.txt: -------------------------------------------------------------------------------- 1 | # Jamo-16.0.0.txt 2 | # Date: 2024-02-02 3 | # © 2024 Unicode®, Inc. 4 | # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. 5 | # For terms of use and license, see https://www.unicode.org/terms_of_use.html 6 | # 7 | # Unicode Character Database 8 | # For documentation, see https://www.unicode.org/reports/tr44/ 9 | # 10 | # This file defines the Jamo_Short_Name property. 11 | # 12 | # See Section 3.12 of The Unicode Standard, Version 15.0 13 | # for more information. 14 | # 15 | # Each line contains two fields, separated by a semicolon. 16 | # 17 | # The first field gives the code point, in 4-digit hexadecimal 18 | # form, of a conjoining jamo character that participates in the 19 | # algorithmic determination of Hangul syllable character names. 20 | # The second field gives the Jamo_Short_Name as a one-, two-, 21 | # or three-character ASCII string (or in one case, for U+110B, 22 | # the null string). 23 | # 24 | # ############################################################# 25 | 26 | 1100; G # HANGUL CHOSEONG KIYEOK 27 | 1101; GG # HANGUL CHOSEONG SSANGKIYEOK 28 | 1102; N # HANGUL CHOSEONG NIEUN 29 | 1103; D # HANGUL CHOSEONG TIKEUT 30 | 1104; DD # HANGUL CHOSEONG SSANGTIKEUT 31 | 1105; R # HANGUL CHOSEONG RIEUL 32 | 1106; M # HANGUL CHOSEONG MIEUM 33 | 1107; B # HANGUL CHOSEONG PIEUP 34 | 1108; BB # HANGUL CHOSEONG SSANGPIEUP 35 | 1109; S # HANGUL CHOSEONG SIOS 36 | 110A; SS # HANGUL CHOSEONG SSANGSIOS 37 | 110B; # HANGUL CHOSEONG IEUNG 38 | 110C; J # HANGUL CHOSEONG CIEUC 39 | 110D; JJ # HANGUL CHOSEONG SSANGCIEUC 40 | 110E; C # HANGUL CHOSEONG CHIEUCH 41 | 110F; K # HANGUL CHOSEONG KHIEUKH 42 | 1110; T # HANGUL CHOSEONG THIEUTH 43 | 1111; P # HANGUL CHOSEONG PHIEUPH 44 | 1112; H # HANGUL CHOSEONG HIEUH 45 | 1161; A # HANGUL JUNGSEONG A 46 | 1162; AE # HANGUL JUNGSEONG AE 47 | 1163; YA # HANGUL JUNGSEONG YA 48 | 1164; YAE # HANGUL JUNGSEONG YAE 49 | 1165; EO # HANGUL JUNGSEONG EO 50 | 1166; E # HANGUL JUNGSEONG E 51 | 1167; YEO # HANGUL JUNGSEONG YEO 52 | 1168; YE # HANGUL JUNGSEONG YE 53 | 1169; O # HANGUL JUNGSEONG O 54 | 116A; WA # HANGUL JUNGSEONG WA 55 | 116B; WAE # HANGUL JUNGSEONG WAE 56 | 116C; OE # HANGUL JUNGSEONG OE 57 | 116D; YO # HANGUL JUNGSEONG YO 58 | 116E; U # HANGUL JUNGSEONG U 59 | 116F; WEO # HANGUL JUNGSEONG WEO 60 | 1170; WE # HANGUL JUNGSEONG WE 61 | 1171; WI # HANGUL JUNGSEONG WI 62 | 1172; YU # HANGUL JUNGSEONG YU 63 | 1173; EU # HANGUL JUNGSEONG EU 64 | 1174; YI # HANGUL JUNGSEONG YI 65 | 1175; I # HANGUL JUNGSEONG I 66 | 11A8; G # HANGUL JONGSEONG KIYEOK 67 | 11A9; GG # HANGUL JONGSEONG SSANGKIYEOK 68 | 11AA; GS # HANGUL JONGSEONG KIYEOK-SIOS 69 | 11AB; N # HANGUL JONGSEONG NIEUN 70 | 11AC; NJ # HANGUL JONGSEONG NIEUN-CIEUC 71 | 11AD; NH # HANGUL JONGSEONG NIEUN-HIEUH 72 | 11AE; D # HANGUL JONGSEONG TIKEUT 73 | 11AF; L # HANGUL JONGSEONG RIEUL 74 | 11B0; LG # HANGUL JONGSEONG RIEUL-KIYEOK 75 | 11B1; LM # HANGUL JONGSEONG RIEUL-MIEUM 76 | 11B2; LB # HANGUL JONGSEONG RIEUL-PIEUP 77 | 11B3; LS # HANGUL JONGSEONG RIEUL-SIOS 78 | 11B4; LT # HANGUL JONGSEONG RIEUL-THIEUTH 79 | 11B5; LP # HANGUL JONGSEONG RIEUL-PHIEUPH 80 | 11B6; LH # HANGUL JONGSEONG RIEUL-HIEUH 81 | 11B7; M # HANGUL JONGSEONG MIEUM 82 | 11B8; B # HANGUL JONGSEONG PIEUP 83 | 11B9; BS # HANGUL JONGSEONG PIEUP-SIOS 84 | 11BA; S # HANGUL JONGSEONG SIOS 85 | 11BB; SS # HANGUL JONGSEONG SSANGSIOS 86 | 11BC; NG # HANGUL JONGSEONG IEUNG 87 | 11BD; J # HANGUL JONGSEONG CIEUC 88 | 11BE; C # HANGUL JONGSEONG CHIEUCH 89 | 11BF; K # HANGUL JONGSEONG KHIEUKH 90 | 11C0; T # HANGUL JONGSEONG THIEUTH 91 | 11C1; P # HANGUL JONGSEONG PHIEUPH 92 | 11C2; H # HANGUL JONGSEONG HIEUH 93 | 94 | # EOF 95 | -------------------------------------------------------------------------------- /data/src/details.rs: -------------------------------------------------------------------------------- 1 | use enumflags2::{bitflags, BitFlags}; 2 | use std::{rc::Rc, str::FromStr}; 3 | 4 | use bon::Builder; 5 | use color_eyre::eyre::{self, bail}; 6 | 7 | #[derive(Debug, Default, Clone, PartialEq, Builder)] 8 | #[builder(on(Rc, into))] 9 | pub(crate) struct Details { 10 | #[builder(default)] 11 | pub bits: BitFlags, 12 | #[builder(default)] 13 | pub ebits: BitFlags, 14 | pub name: Option>, 15 | #[builder(with = |alias: &'static[(&str, AliasType)]| { alias.iter().map(|(x, t)| Alias::r#static(x, *t)).collect() })] 16 | #[builder(default)] 17 | pub alias: Vec, 18 | pub dnrp: Option>, 19 | pub gb: Option, 20 | pub gc: Option>, 21 | pub block: Option>, 22 | pub age: Option>, 23 | pub hst: Option, 24 | pub hjsn: Option>, 25 | pub hlvt: Option<(usize, usize, usize)>, 26 | pub uhdef: Option>, 27 | pub uhman: Option>, 28 | } 29 | 30 | #[derive(Debug, Clone, PartialEq)] 31 | pub(crate) struct Alias { 32 | pub inner: Rc, 33 | pub r#type: AliasType, 34 | } 35 | 36 | #[derive(Debug, Clone, Copy, PartialEq)] 37 | #[repr(u8)] 38 | pub(crate) enum AliasType { 39 | Correction = 0, 40 | Control = 1, 41 | Alternate = 2, 42 | Figment = 3, 43 | Abbreviation = 4, 44 | Unicode1 = 5, 45 | Cldr = 6, 46 | } 47 | 48 | #[derive(Debug, Clone, Copy, PartialEq)] 49 | #[repr(u8)] 50 | pub(crate) enum GraphemeBreak { 51 | Cr = 1, 52 | Lf = 2, 53 | Control = 3, 54 | Extend = 4, 55 | Zwj = 5, 56 | RegionalIndicator = 6, 57 | Prepend = 7, 58 | SpacingMark = 8, 59 | HangulL = 9, 60 | HangulV = 10, 61 | HangulT = 11, 62 | HangulLV = 12, 63 | HangulLVT = 13, 64 | } 65 | 66 | #[repr(u8)] 67 | #[derive(Debug, Clone, Copy)] 68 | #[bitflags] 69 | pub(crate) enum Bits { 70 | KdefinitionExists = 1 << 0, 71 | IsSpaceSeparator = 1 << 2, 72 | IsAnyMark = 1 << 3, 73 | DerivedNameNr1 = 1 << 4, 74 | DerivedNameNr2 = 1 << 5, 75 | } 76 | 77 | #[repr(u8)] 78 | #[derive(Debug, Clone, Copy)] 79 | #[bitflags] 80 | pub(crate) enum EmojiBits { 81 | Emoji = 1 << 0, 82 | ExtendedPictographic = 1 << 1, 83 | EmojiComponent = 1 << 2, 84 | EmojiPresentation = 1 << 3, 85 | EmojiModifier = 1 << 4, 86 | EmojiModifierBase = 1 << 5, 87 | } 88 | 89 | #[derive(Debug, Clone, Copy, PartialEq)] 90 | pub(crate) enum HangulSyllableType { 91 | Lv, 92 | Lvt, 93 | } 94 | 95 | impl Alias { 96 | pub(crate) fn r#static(inner: &'static str, r#type: AliasType) -> Self { 97 | Alias { 98 | inner: inner.into(), 99 | r#type, 100 | } 101 | } 102 | } 103 | 104 | impl FromStr for AliasType { 105 | type Err = eyre::Report; 106 | 107 | fn from_str(s: &str) -> Result { 108 | Ok(match s { 109 | "correction" => Self::Correction, 110 | "control" => Self::Control, 111 | "alternate" => Self::Alternate, 112 | "figment" => Self::Figment, 113 | "abbreviation" => Self::Abbreviation, 114 | _ => bail!("unknown Name_Alias type: {s}"), 115 | }) 116 | } 117 | } 118 | 119 | impl FromStr for GraphemeBreak { 120 | type Err = eyre::Report; 121 | 122 | fn from_str(s: &str) -> Result { 123 | Ok(match s { 124 | "CR" => Self::Cr, 125 | "LF" => Self::Lf, 126 | "Control" => Self::Control, 127 | "Extend" => Self::Extend, 128 | "ZWJ" => Self::Zwj, 129 | "Regional_Indicator" => Self::RegionalIndicator, 130 | "Prepend" => Self::Prepend, 131 | "SpacingMark" => Self::SpacingMark, 132 | "L" => Self::HangulL, 133 | "V" => Self::HangulV, 134 | "T" => Self::HangulT, 135 | "LV" => Self::HangulLV, 136 | "LVT" => Self::HangulLVT, 137 | _ => bail!("unknown Grapheme_Cluster_Break value: {s}"), 138 | }) 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/old.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | charming 9 | 10 | 11 | 12 |
13 |
14 |
15 |
16 | 17 |
18 |
19 | 20 | please enable JavaScript 21 | 22 | 23 | loading data 24 | 25 |
26 |
27 |
28 |
29 |
30 | 34 | 35 |
UTF-16 36 | 37 |
38 |
UTF-8 39 | 40 |
41 |
Decimal HTML entity 42 | 43 |
44 |
Block 45 | 46 |
47 |
Introduced in 48 | 49 |
50 |
General category 51 | 52 |
53 |
Unihan Mandarin 54 | 55 |
56 |
57 | 68 |
69 | 70 | 71 | 74 | 77 |
Hexadecimal 72 | 73 |
Decimal 75 | 76 |
Character 78 | 79 |
80 |
81 |
82 |

This is a Unicode character map. It supports 83 | all of the codepoints and planes that are 84 | currently available as of Unicode 85 | (version). 86 |

In addition to details about encodings, 87 | blocks, ages, and categories, the 88 | names of characters are augmented 89 | by definitions from the Unihan 90 | database, where appropriate. 91 |

Source 92 | is available under the ISC licence. 93 |

Keyboard shortcuts 94 |

95 | Home: go to first visible character
96 | Home Home: go to U+0000
97 | End: go to last visible character
98 | End End: go to U+10FFFF
99 | Page Up: show the previous page
100 | Page Down: show the next page
101 | Arrow keys: navigate the character map 102 |

Credits 103 |

Unicode Character Database: Unicode 104 | Consortium
105 | Symbola font: George Douros
106 | Twemoji font: Brad Erickson and others 107 |
lzo1x library: Alistair Braidwood 108 |

Browser compatibility 109 |

Chrome 23: perfect
110 | Firefox 17: perfect
111 | IE 10: perfect
112 | Opera 12: some UI problems
113 | Safari 5: only BMP characters 114 | 115 |

116 |
117 | 124 |
125 |
126 | 127 | 128 | 129 | -------------------------------------------------------------------------------- /src/formatting.test.ts: -------------------------------------------------------------------------------- 1 | import { 2 | toHexadecimal, 3 | pointToYouPlus, 4 | pointToTofu, 5 | pointToString16, 6 | pointToEntity10, 7 | pointToString8, 8 | } from "./formatting"; 9 | 10 | test("toHexadecimal returns correct value", () => { 11 | expect(toHexadecimal(0x0)).toBe("0"); 12 | expect(toHexadecimal(0xf)).toBe("F"); 13 | }); 14 | 15 | test("toHexadecimal pads return value to length", () => { 16 | expect(toHexadecimal(0xf, 2)).toBe("0F"); 17 | expect(toHexadecimal(0xfff, 2)).toBe("FFF"); 18 | }); 19 | 20 | test("pointToYouPlus returns correct value", () => { 21 | expect(pointToYouPlus(0x0000)).toBe("U+0000"); 22 | expect(pointToYouPlus(0x10000)).toBe("U+10000"); 23 | expect(pointToYouPlus(0x100000)).toBe("U+100000"); 24 | }); 25 | 26 | test("pointToTofu returns correct value", () => { 27 | expect(pointToTofu(0x0000)).toBe("0000"); 28 | expect(pointToTofu(0x10000)).toBe("010000"); 29 | }); 30 | 31 | test("pointToString16 returns correct value for BMP point", () => 32 | void expect(pointToString16(0x23ff)).toBe("23FF")); 33 | 34 | test("pointToString16 returns correct value for astral point", () => 35 | void expect(pointToString16(0x1f496)).toBe("D83D DC96")); 36 | 37 | test("pointToString16 returns null when point is surrogate", () => 38 | void expect(pointToString16(0xd800)).toBe(null)); 39 | 40 | test("pointToString8 returns correct value for BMP string", () => 41 | void expect(pointToString8(0x23ff)).toEqual("E2 8F BF")); 42 | 43 | test("pointToString8 returns correct value for astral string", () => 44 | void expect(pointToString8(0x1f496)).toEqual("F0 9F 92 96")); 45 | 46 | test("pointToString8 returns null when point is surrogate", () => 47 | void expect(pointToString8(0xd800)).toBe(null)); 48 | 49 | test("pointToEntity10 returns correct value", () => { 50 | expect(pointToEntity10(0x0009)).toBe(" "); 51 | expect(pointToEntity10(0x000a)).toBe(" "); 52 | expect(pointToEntity10(0x000c)).toBe(" "); 53 | expect(pointToEntity10(0x0020)).toBe(" "); 54 | expect(pointToEntity10(0x007e)).toBe("~"); 55 | expect(pointToEntity10(0x00a0)).toBe(" "); 56 | expect(pointToEntity10(0xfdcf)).toBe("﷏"); 57 | expect(pointToEntity10(0xfdf0)).toBe("ﷰ"); 58 | expect(pointToEntity10(0xfffd)).toBe("�"); 59 | expect(pointToEntity10(0x10000)).toBe("𐀀"); 60 | expect(pointToEntity10(0x1fffd)).toBe("🿽"); 61 | expect(pointToEntity10(0x20000)).toBe("𠀀"); 62 | expect(pointToEntity10(0xffffd)).toBe("󿿽"); 63 | expect(pointToEntity10(0x100000)).toBe("􀀀"); 64 | expect(pointToEntity10(0x10fffd)).toBe("􏿽"); 65 | }); 66 | 67 | test("pointToEntity10 returns null when point has no HTML entity", () => { 68 | expect(pointToEntity10(0x0000)).toBe(null); 69 | expect(pointToEntity10(0x0001)).toBe(null); 70 | expect(pointToEntity10(0x0002)).toBe(null); 71 | expect(pointToEntity10(0x0003)).toBe(null); 72 | expect(pointToEntity10(0x0004)).toBe(null); 73 | expect(pointToEntity10(0x0005)).toBe(null); 74 | expect(pointToEntity10(0x0006)).toBe(null); 75 | expect(pointToEntity10(0x0007)).toBe(null); 76 | expect(pointToEntity10(0x0008)).toBe(null); 77 | expect(pointToEntity10(0x000b)).toBe(null); 78 | expect(pointToEntity10(0x000d)).toBe(null); 79 | expect(pointToEntity10(0x000e)).toBe(null); 80 | expect(pointToEntity10(0x000f)).toBe(null); 81 | expect(pointToEntity10(0x0010)).toBe(null); 82 | expect(pointToEntity10(0x001f)).toBe(null); 83 | expect(pointToEntity10(0x007f)).toBe(null); 84 | expect(pointToEntity10(0x0080)).toBe(null); 85 | expect(pointToEntity10(0x008f)).toBe(null); 86 | expect(pointToEntity10(0x0090)).toBe(null); 87 | expect(pointToEntity10(0x009f)).toBe(null); 88 | expect(pointToEntity10(0xfdd0)).toBe(null); 89 | expect(pointToEntity10(0xfddf)).toBe(null); 90 | expect(pointToEntity10(0xfde0)).toBe(null); 91 | expect(pointToEntity10(0xfdef)).toBe(null); 92 | expect(pointToEntity10(0xfffe)).toBe(null); 93 | expect(pointToEntity10(0xffff)).toBe(null); 94 | expect(pointToEntity10(0x1fffe)).toBe(null); 95 | expect(pointToEntity10(0x1ffff)).toBe(null); 96 | expect(pointToEntity10(0xffffe)).toBe(null); 97 | expect(pointToEntity10(0xfffff)).toBe(null); 98 | expect(pointToEntity10(0x10fffe)).toBe(null); 99 | expect(pointToEntity10(0x10ffff)).toBe(null); 100 | }); 101 | -------------------------------------------------------------------------------- /src/old.css: -------------------------------------------------------------------------------- 1 | @font-face { 2 | font-family: "TwitterColorEmoji-SVGinOT.ttf"; 3 | src: url("TwitterColorEmoji-SVGinOT.ttf"); 4 | } 5 | @font-face { 6 | font-family: "Symbola"; 7 | src: url("Symbola.otf"); 8 | } 9 | @font-face { 10 | font-family: "Segoe UI Emoji"; 11 | src: local("Segoe UI Emoji"); 12 | } 13 | @font-face { 14 | font-family: "Apple Color Emoji"; 15 | src: local("Apple Color Emoji"); 16 | } 17 | body { 18 | font-family: sans-serif; 19 | margin: 1em; 20 | font-size: 16px; 21 | } 22 | th { 23 | text-align: left; 24 | } 25 | .mono { 26 | font-family: monospace; 27 | } 28 | #grid { 29 | border-collapse: collapse; 30 | float: left; 31 | table-layout: fixed; 32 | } 33 | #grid td { 34 | padding: 0; 35 | border: 1px solid #c0c0c0; 36 | font-size: 1.5em; 37 | width: 1.5em; 38 | height: 1.5em; 39 | line-height: 1.5em; 40 | vertical-align: top; 41 | position: relative; 42 | } 43 | #grid div, #big, #goto_char { 44 | font-family: "Symbola"; 45 | } 46 | .like_emoji:not(#specificity):not(#specificity) { 47 | font-family: "Apple Color Emoji", "Segoe UI Emoji", "TwitterColorEmoji-SVGinOT.ttf", "Symbola"; 48 | } 49 | #grid div.like_space:not(#specificity):not(#specificity), 50 | #grid div.like_C1:not(#specificity):not(#specificity), 51 | #grid div.like_C0:not(#specificity):not(#specificity), 52 | #grid div.like:not(#specificity):not(#specificity) { 53 | color: #A0A0A0; 54 | } 55 | #grid td.selected div.like_space:not(#specificity):not(#specificity), 56 | #grid td.selected div.like_C1:not(#specificity):not(#specificity), 57 | #grid td.selected div.like_C0:not(#specificity):not(#specificity), 58 | #grid td.selected div.like:not(#specificity):not(#specificity), 59 | #big.like_space:not(#specificity):not(#specificity), 60 | #big.like_C1:not(#specificity):not(#specificity), 61 | #big.like_C0:not(#specificity):not(#specificity), 62 | #big.like:not(#specificity):not(#specificity) { 63 | color: #606060; 64 | } 65 | #grid div.like_C1:not(#specificity):not(#specificity) { 66 | transform: rotate(30deg) scale(0.5); 67 | width: 3em; 68 | position: absolute; 69 | left: -0.75em; 70 | font-family: monospace, monospace; 71 | } 72 | #grid div, #cp, #big { 73 | height: 1.5em; 74 | line-height: 1.5em; 75 | text-align: center; 76 | overflow: hidden; 77 | } 78 | #grid div { 79 | width: 1.5em; 80 | cursor: default; 81 | } 82 | #grid td.selected, #infotop { 83 | background: #c0c0c0; 84 | } 85 | #info { 86 | float: left; 87 | border: 1px solid #c0c0c0; 88 | border-left: none; 89 | width: 24em; 90 | height: 591px; /* 16 * 36 + 15 */ 91 | } 92 | #infotop { 93 | height: 222px; /* 6 * 36 + 6 */ 94 | } 95 | #cp { 96 | margin: 0 auto; 97 | width: 12em; 98 | } 99 | #big { 100 | display: block; 101 | margin: 0 auto; 102 | font-size: 8em; 103 | width: 2.75em; 104 | padding: 0; 105 | background: none; 106 | border: none; 107 | outline: none; 108 | } 109 | #loading { 110 | height: 369px; /* 10 * 36 + 9 */ 111 | line-height: 369px; 112 | text-align: center; 113 | background: #e0e0e0; 114 | } 115 | #loading_files { 116 | display: none; 117 | } 118 | #ui { 119 | display: none; 120 | } 121 | #ui_content { 122 | height: 332px; 123 | overflow-y: scroll; 124 | } 125 | #ui_content > div { 126 | margin: 1em; 127 | } 128 | #ui_content > div:not(#data) { 129 | display: none; 130 | } 131 | #about { 132 | font-size: 12px; 133 | } 134 | #ui_tabs { 135 | background: #c0c0c0; 136 | } 137 | #ui_tabs a { 138 | display: inline-block; 139 | color: black; 140 | text-decoration: none; 141 | padding: 10.5px; 142 | line-height: 1; 143 | } 144 | #ui_tabs a.selected { 145 | background: #808080; 146 | color: white; 147 | } 148 | #data_name { 149 | font-weight: bold; 150 | } 151 | #data_table_first { 152 | margin-top: 1em; 153 | border-top: 1px solid #c0c0c0; 154 | } 155 | #data table { 156 | width: 100%; 157 | border-bottom: 1px solid #c0c0c0; 158 | } 159 | #data th, #data td { 160 | padding: 0.25em; 161 | } 162 | #data td { 163 | text-align: right; 164 | } 165 | #search_form { 166 | margin-bottom: 1em; 167 | } 168 | #search_query { 169 | width: 100%; 170 | margin-bottom: 0.5em; 171 | } 172 | #search_results div { 173 | overflow: hidden; 174 | white-space: nowrap; 175 | text-overflow: ellipsis; 176 | cursor: default; 177 | } 178 | #search_results div:hover { 179 | background: #c0c0c0; 180 | } 181 | #goto table { 182 | width: 100%; 183 | } 184 | #goto input { 185 | font-size: 2em; 186 | width: 6em; 187 | } 188 | #goto_hex, #goto_dec { 189 | font-family: monospace; 190 | } 191 | @media (max-width: 1020px) { 192 | #grid { 193 | display: none; 194 | } 195 | #info { 196 | border-left: 1px solid #c0c0c0; 197 | } 198 | } 199 | -------------------------------------------------------------------------------- /src/data.test.ts: -------------------------------------------------------------------------------- 1 | import { readFileSync } from "fs"; 2 | 3 | import { 4 | getString, 5 | getNameProperty, 6 | getNonDerivedName, 7 | getNameExceptNr2, 8 | getOldName, 9 | kDefinitionExists, 10 | isSpaceSeparator, 11 | isAnyMark, 12 | hasDerivedNameNr1, 13 | hasDerivedNameNr2, 14 | getNextClusterBreak, 15 | getEmojiPresentationRuns, 16 | } from "./data"; 17 | import { pointsToString, stringToPoints } from "./encoding"; 18 | import { pointsToYouPlus } from "./formatting"; 19 | import { getData } from "./testing"; 20 | 21 | test("getString returns correct string", () => { 22 | expect(getString(getData(), "gc", 0)).toBe("d"); 23 | expect(getString(getData(), "gc", 1)).toBe("e"); 24 | }); 25 | 26 | test("getString returns null when index is out of bounds", () => 27 | void expect(getString(getData(), "block", 0)).toBe(null)); 28 | 29 | test("getString returns null when index is sentinel", () => 30 | void expect(getString(getData(), "block", 1)).toBe(null)); 31 | 32 | test("getNameProperty returns correct value", () => { 33 | expect(getNameProperty(getData(), 0)).toBe("i0000"); 34 | expect(getNameProperty(getData(), 1)).toBe("hxyz"); 35 | expect(getNameProperty(getData(), 2)).toBe("g"); 36 | }); 37 | 38 | test("getNonDerivedName returns correct value", () => { 39 | expect(getNonDerivedName(getData(), 0)).toBe(null); 40 | expect(getNonDerivedName(getData(), 1)).toBe(null); 41 | expect(getNonDerivedName(getData(), 2)).toBe("g"); 42 | }); 43 | 44 | test("getNameExceptNr2 returns correct value", () => { 45 | expect(getNameExceptNr2(getData(), 0)).toBe(null); 46 | expect(getNameExceptNr2(getData(), 1)).toBe("hxyz"); 47 | expect(getNameExceptNr2(getData(), 2)).toBe("g"); 48 | }); 49 | 50 | test("getOldName returns correct value", () => { 51 | expect(getOldName(getData(), 0)).toBe("b"); 52 | expect(getOldName(getData(), 1)).toBe("f"); 53 | expect(getOldName(getData(), 2)).toBe("g"); 54 | }); 55 | 56 | test("kDefinitionExists returns correct value", () => { 57 | expect(kDefinitionExists(getData(), 0)).toBe(false); 58 | expect(kDefinitionExists(getData(), 1)).toBe(true); 59 | }); 60 | 61 | test("isSpaceSeparator returns correct value", () => { 62 | expect(isSpaceSeparator(getData(), 0)).toBe(false); 63 | expect(isSpaceSeparator(getData(), 1)).toBe(true); 64 | }); 65 | 66 | test("isAnyMark returns correct value", () => { 67 | expect(isAnyMark(getData(), 0)).toBe(true); 68 | expect(isAnyMark(getData(), 1)).toBe(false); 69 | }); 70 | 71 | test("hasDerivedNameNr1 returns correct value", () => { 72 | expect(hasDerivedNameNr1(getData(), 0)).toBe(false); 73 | expect(hasDerivedNameNr1(getData(), 1)).toBe(true); 74 | }); 75 | 76 | test("hasDerivedNameNr2 returns correct value", () => { 77 | expect(hasDerivedNameNr2(getData(), 0)).toBe(true); 78 | expect(hasDerivedNameNr2(getData(), 1)).toBe(false); 79 | }); 80 | 81 | test("getNextClusterBreak returns correct values", () => { 82 | const test: string = readFileSync("data/GraphemeBreakTest.txt", "utf8"); 83 | const data = getData(); 84 | data.gb = bufferToDataView(readFileSync("data/data.gb.bin")); 85 | data.ebits = bufferToDataView(readFileSync("data/data.ebits.bin")); 86 | for (const line of test.match(/^÷[ ÷×0-9A-F]+/gm)!) { 87 | const points = line.match(/[0-9A-F]+/g)!.map((x) => parseInt(x, 16)); 88 | const breaks = line.match(/[÷×]/g)!; 89 | const string = pointsToString(points); 90 | 91 | let context = null; 92 | let pointStart = 0; 93 | let pointLen = 0; 94 | for (const brake of breaks) { 95 | switch (brake) { 96 | case "÷": 97 | context = getNextClusterBreak(data, string, context)!; 98 | const actual = context.startPointIndex; 99 | const expected = pointStart + pointLen; 100 | if (actual != expected) { 101 | console.error([line, pointStart, pointLen, context]); 102 | expect(actual).toBe(expected); 103 | } 104 | pointStart += pointLen; 105 | pointLen = 1; 106 | break; 107 | case "×": 108 | pointLen += 1; 109 | break; 110 | } 111 | } 112 | } 113 | }); 114 | 115 | test("getEmojiPresentationRuns returns correct values", () => { 116 | const data = getData(); 117 | data.gb = bufferToDataView(readFileSync("data/data.gb.bin")); 118 | data.ebits = bufferToDataView(readFileSync("data/data.ebits.bin")); 119 | 120 | expect(getEmojiPresentationRuns(data, "")).toEqual([0]); 121 | expect(getEmojiPresentationRuns(data, " ")).toEqual([0]); 122 | 123 | expect(getEmojiPresentationRuns(data, "⌚")).toEqual([0, 0]); 124 | expect(getEmojiPresentationRuns(data, " ⌚")).toEqual([0, 1]); 125 | expect(getEmojiPresentationRuns(data, "⌚ ")).toEqual([0, 0, 1]); 126 | expect(getEmojiPresentationRuns(data, " ⌚ ")).toEqual([0, 1, 2]); 127 | expect(getEmojiPresentationRuns(data, " ⌚⌚ ")).toEqual([0, 1, 3]); 128 | expect(getEmojiPresentationRuns(data, " ⌚ ⌚ ")).toEqual([0, 1, 2, 3, 4]); 129 | 130 | const test: string = readFileSync("data/emoji-test.txt", "utf8"); 131 | for (const line of test.match( 132 | /^[0-9A-F]+(?: [0-9A-F]+)*(?=\s*; fully-qualified(?:\s|#))/gm, 133 | )!) { 134 | // if (line != "1F3F3 FE0F 200D 26A7 FE0F") continue; 135 | const points = line.match(/[0-9A-F]+/g)!.map((x) => parseInt(x, 16)); 136 | const string = pointsToString(points); 137 | const len = string.length; 138 | e(0, `${string}`, [0, 0]); 139 | e(1, ` ${string}`, [0, 1]); 140 | e(2, `${string} `, [0, 0, len]); 141 | e(3, ` ${string} `, [0, 1, 1 + len]); 142 | e(4, ` ${string}${string} `, [0, 1, 1 + 2 * len]); 143 | e(5, ` ${string} ${string} `, [0, 1, 1 + len, 2 + len, 2 + 2 * len]); 144 | } 145 | 146 | function e(i: number, string: string, expected: number[]) { 147 | const actual = getEmojiPresentationRuns(data, string); 148 | if (String(actual) != String(expected)) { 149 | console.error(pointsToYouPlus(stringToPoints(string)), `#${i}`); 150 | expect(actual).toEqual(expected); 151 | } 152 | } 153 | }); 154 | 155 | function bufferToDataView(buffer: Buffer): DataView { 156 | const array = new Uint8Array(buffer.byteLength); 157 | buffer.copy(array, 0, 0, buffer.byteLength); 158 | return new DataView(array.buffer); 159 | } 160 | -------------------------------------------------------------------------------- /src/Display.tsx: -------------------------------------------------------------------------------- 1 | import React, { useContext } from "react"; 2 | 3 | import { pointToString, isSurrogate, pointsToString } from "./encoding"; 4 | import { pointToTofu } from "./formatting"; 5 | import { DataContext } from "./state"; 6 | import { 7 | Data, 8 | isSpaceSeparator, 9 | isAnyMark, 10 | isEmojiPresentation, 11 | getEmojiPresentationRuns, 12 | } from "./data"; 13 | 14 | export function Display({ points }: { points: number[] }) { 15 | const data = useContext(DataContext); 16 | const className = (...xs: string[]) => ["Display", ...xs].join(" "); 17 | 18 | if (points.length > 1) { 19 | const string = pointsToString(points); 20 | const runs = data == null ? [0] : getEmojiPresentationRuns(data, string); 21 | const result = runs.map((x, i, xs) => ( 22 | 23 | {slice(x, i, xs)} 24 | 25 | )); 26 | return {result}; 27 | 28 | function slice(x: number, i: number, xs: number[]): string { 29 | return i < xs.length - 1 ? string.slice(x, xs[i + 1]) : string.slice(x); 30 | } 31 | } 32 | 33 | const point = points[0]; 34 | const tofu = pointToSyntheticTofu(point); 35 | 36 | if (tofu != null) { 37 | return ( 38 | 39 | {[...tofu].map((x, i) => ( 40 | {x} 41 | ))} 42 | 43 | ); 44 | } 45 | 46 | const diagonal = pointToDiagonal(point); 47 | 48 | if (diagonal != null) { 49 | return ( 50 | 51 | {[...diagonal].map((x, i) => ( 52 | {x} 53 | ))} 54 | 55 | ); 56 | } 57 | 58 | const substitute = pointToSubstitute(data, point); 59 | 60 | if (substitute != null) { 61 | return ( 62 | {substitute} 63 | ); 64 | } 65 | 66 | const result = 67 | data != null && isEmojiPresentation(data, point) ? ( 68 | {pointToString(point)} 69 | ) : ( 70 | <>{pointToString(point)} 71 | ); 72 | 73 | return {result}; 74 | } 75 | 76 | // see dist/scratch/edge-points.html 77 | export function pointToSyntheticTofu(point: number): string | null { 78 | if (isSurrogate(point) || (0xfdd0 <= point && point < 0xfdf0)) { 79 | return pointToTofu(point); 80 | } 81 | 82 | return null; 83 | } 84 | 85 | export function pointToDiagonal(point: number): string | null { 86 | const result: { [index: number]: string } = { 87 | 0x0080: "PAD", 88 | 0x0081: "HOP", 89 | 0x0082: "BPH", 90 | 0x0083: "NBH", 91 | 0x0084: "IND", 92 | 0x0085: "NEL", 93 | 0x0086: "SSA", 94 | 0x0087: "ESA", 95 | 0x0088: "HTS", 96 | 0x0089: "HTJ", 97 | 0x008a: "VTS", 98 | 0x008b: "PLD", 99 | 0x008c: "PLU", 100 | 0x008d: "RI", 101 | 0x008e: "SS2", 102 | 0x008f: "SS3", 103 | 0x0090: "DCS", 104 | 0x0091: "PU1", 105 | 0x0092: "PU2", 106 | 0x0093: "STS", 107 | 0x0094: "CCH", 108 | 0x0095: "MW", 109 | 0x0096: "SPA", 110 | 0x0097: "EPA", 111 | 0x0098: "SOS", 112 | 0x0099: "SGCI", 113 | 0x009a: "SCI", 114 | 0x009b: "CSI", 115 | 0x009c: "ST", 116 | 0x009d: "OSC", 117 | 0x009e: "PM", 118 | 0x009f: "APC", 119 | 0x00ad: "SHY", 120 | 0x034f: "CGJ", 121 | 0x061c: "ALM", 122 | 0x180e: "MVS", 123 | 0x200b: "ZWSP", 124 | 0x200c: "ZWNJ", 125 | 0x200d: "ZWJ", 126 | 0x200e: "LRM", 127 | 0x200f: "RLM", 128 | 0x2028: "LS", 129 | 0x2029: "PS", 130 | 0x202a: "LRE", 131 | 0x202b: "RLE", 132 | 0x202c: "PDF", 133 | 0x202d: "LRO", 134 | 0x202e: "RLO", 135 | 0x2060: "WJ", 136 | 0x2066: "LRI", 137 | 0x2067: "RLI", 138 | 0x2068: "FSI", 139 | 0x2069: "PDI", 140 | 0x206a: "ISS", 141 | 0x206b: "ASS", 142 | 0x206c: "IAFS", 143 | 0x206d: "AAFS", 144 | 0x206e: "NAT", 145 | 0x206f: "NOM", 146 | 0x3164: "HF", 147 | 0xfeff: "BOM", 148 | 0xffa0: "HHF", 149 | 0xfff9: "IAA", 150 | 0xfffa: "IAS", 151 | 0xfffb: "IAT", 152 | 0xfffc: "OBJ", 153 | 0xe0000: "LT", 154 | 0xe007f: "CT", 155 | }; 156 | 157 | if (point in result) { 158 | return result[point]; 159 | } 160 | 161 | if (0x180b <= point && point < 0x180e) { 162 | return `FVS${point - 0x180b + 1}`; 163 | } 164 | 165 | if (0xfe00 <= point && point < 0xfe10) { 166 | return `VS${point - 0xfe00 + 1}`; 167 | } 168 | 169 | if (0xe0100 <= point && point < 0xe01f0) { 170 | return `VS${point - 0xe0100 + 17}`; 171 | } 172 | 173 | return null; 174 | } 175 | 176 | export function pointToSubstitute( 177 | data: Data | null, 178 | point: number, 179 | ): string | null { 180 | const result: { [index: number]: string } = { 181 | 0x007f: "\u2421", 182 | 0x2061: "f\u2061()", 183 | 0x2062: "13\u2062x", 184 | 0x2063: "Mᵢ\u2063ⱼ", 185 | 0x2064: "9\u2064¾", 186 | 0xe0020: "\u2420ₜ", 187 | }; 188 | 189 | if (point in result) { 190 | return result[point]; 191 | } 192 | 193 | if (point < 0x0020) { 194 | return pointToString(point + 0x2400); 195 | } 196 | 197 | if (0xe0021 <= point && point < 0xe007f) { 198 | return `${pointToString(point - 0xe0000)}ₜ`; 199 | } 200 | 201 | if (data != null) { 202 | if (isSpaceSeparator(data, point)) { 203 | return `]${pointToString(point)}[`; 204 | } 205 | 206 | if (isAnyMark(data, point)) { 207 | switch (point & 0xfffffff0) { 208 | case 0x0300: // Combining Diacritical Marks 209 | case 0x0310: 210 | case 0x0320: 211 | case 0x0330: 212 | case 0x0340: 213 | case 0x0350: 214 | case 0x0360: 215 | case 0x0480: // Cyrillic 216 | case 0x1dc0: // Combining Diacritical Marks Supplement 217 | case 0x1dd0: 218 | case 0x1de0: 219 | case 0x1df0: 220 | case 0x20d0: // Combining Diacritical Marks for Symbols 221 | case 0x20e0: 222 | case 0x20f0: 223 | case 0x2ce0: // Coptic 224 | case 0x2cf0: 225 | case 0x2de0: // Cyrillic Extended-A 226 | case 0x2df0: 227 | case 0xa660: // Cyrillic Extended-B 228 | case 0xa670: 229 | case 0xa690: 230 | case 0xfe00: // Variation Selectors 231 | case 0xfe20: // Combining Half Marks 232 | case 0x101f0: // Phaistos Disc 233 | case 0x102e0: // Coptic Epact Numbers 234 | case 0x1d160: // Musical Symbols 235 | case 0x1d170: 236 | case 0x1d180: 237 | case 0x1d1a0: 238 | case 0x1d240: // Ancient Greek Musical Notation 239 | case 0xe0100: // Variation Selectors Supplement 240 | case 0xe0110: 241 | case 0xe0120: 242 | case 0xe0130: 243 | case 0xe0140: 244 | case 0xe0150: 245 | case 0xe0160: 246 | case 0xe0170: 247 | case 0xe0180: 248 | case 0xe0190: 249 | case 0xe01a0: 250 | case 0xe01b0: 251 | case 0xe01c0: 252 | case 0xe01d0: 253 | case 0xe01e0: 254 | return `\u25CC${pointToString(point)}`; 255 | } 256 | } 257 | } 258 | 259 | return null; 260 | } 261 | -------------------------------------------------------------------------------- /data/src/uax29.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | collections::HashMap, 3 | fmt::{self, Display, Formatter}, 4 | }; 5 | 6 | use color_eyre::Result; 7 | use nom::{ 8 | branch::alt, 9 | bytes::complete::{tag, take_while1}, 10 | character::complete::{multispace0, newline, one_of, satisfy, space0, space1}, 11 | combinator::{all_consuming, map, opt}, 12 | multi::separated_list1, 13 | sequence::{delimited, separated_pair, tuple}, 14 | IResult, ParseTo, 15 | }; 16 | 17 | use crate::details::GraphemeBreak; 18 | 19 | pub(crate) fn generate_egcbreak() -> Result { 20 | // UAX #29 revision 45, Table 1b + Table 1c 21 | // https://www.unicode.org/reports/tr29/tr29-45.html#Table_Combining_Char_Sequences_and_Grapheme_Clusters 22 | // (note the lowercase ri-sequence, and RI → Regional_Indicator) 23 | let (_, mut grammar) = Grammar::parse( 24 | r#" 25 | egc := crlf | Control | precore* core postcore* 26 | crlf := CR LF | CR | LF 27 | precore := Prepend 28 | core := hangul-syllable | ri-sequence | xpicto-sequence | [^Control CR LF] 29 | postcore := [Extend ZWJ SpacingMark] 30 | hangul-syllable := L* (V+ | LV V* | LVT) T* | L+ | T+ 31 | ri-sequence := Regional_Indicator Regional_Indicator 32 | xpicto-sequence := \p{Extended_Pictographic} (Extend* ZWJ \p{Extended_Pictographic})* 33 | "#, 34 | )?; 35 | 36 | grammar.expand(); 37 | Ok(format!("{}", grammar)) 38 | } 39 | 40 | #[derive(Debug, Clone)] 41 | struct Grammar<'i>(Vec>); 42 | impl<'i> Grammar<'i> { 43 | fn parse(input: &'i str) -> IResult<&'i str, Self> { 44 | map( 45 | all_consuming(delimited( 46 | multispace0, 47 | separated_list1(newline, Derivation::parse), 48 | multispace0, 49 | )), 50 | Self, 51 | )(input) 52 | } 53 | 54 | fn expand(&mut self) -> &Alternate<'i> { 55 | let mut nonterminals = HashMap::new(); 56 | for Derivation((lhs, rhs)) in self.0.clone() { 57 | nonterminals.insert(lhs, rhs); 58 | } 59 | 60 | let Derivation((_, root)) = &mut self.0[0]; 61 | root.expand(&nonterminals); 62 | 63 | root 64 | } 65 | } 66 | impl Display for Grammar<'_> { 67 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 68 | self.0[0].0 .1.fmt(f) 69 | } 70 | } 71 | 72 | #[derive(Debug, Clone)] 73 | struct Derivation<'i>((&'i str, Alternate<'i>)); 74 | impl<'i> Derivation<'i> { 75 | fn parse(input: &'i str) -> IResult<&'i str, Self> { 76 | map( 77 | separated_pair( 78 | delimited(space0, parse_nonterminal, space0), 79 | tag(":="), 80 | delimited(space0, Alternate::parse, space0), 81 | ), 82 | Self, 83 | )(input) 84 | } 85 | } 86 | 87 | #[derive(Debug, Clone)] 88 | struct Alternate<'i>(Vec>); 89 | impl<'i> Alternate<'i> { 90 | fn parse(input: &'i str) -> IResult<&'i str, Self> { 91 | map( 92 | separated_list1(tag("|"), delimited(space0, Sequence::parse, space0)), 93 | Self, 94 | )(input) 95 | } 96 | fn expand(&mut self, nonterminals: &HashMap<&'i str, Alternate<'i>>) { 97 | for sequence in &mut self.0 { 98 | sequence.expand(nonterminals); 99 | } 100 | } 101 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 102 | let (first, rest) = self.0.split_first().unwrap(); 103 | first.fmt(f)?; 104 | for x in rest { 105 | write!(f, "|")?; 106 | x.fmt(f)?; 107 | } 108 | Ok(()) 109 | } 110 | } 111 | 112 | #[derive(Debug, Clone)] 113 | struct Sequence<'i>(Vec>); 114 | impl<'i> Sequence<'i> { 115 | fn parse(input: &'i str) -> IResult<&'i str, Self> { 116 | map(separated_list1(space1, TermRepeat::parse), Self)(input) 117 | } 118 | fn expand(&mut self, nonterminals: &HashMap<&'i str, Alternate<'i>>) { 119 | for TermRepeat((term, _)) in &mut self.0 { 120 | term.expand(nonterminals); 121 | } 122 | } 123 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 124 | for x in &self.0 { 125 | x.fmt(f)?; 126 | } 127 | Ok(()) 128 | } 129 | } 130 | 131 | #[derive(Debug, Clone)] 132 | struct TermRepeat<'i>((Term<'i>, Repeat)); 133 | impl<'i> TermRepeat<'i> { 134 | fn parse(input: &'i str) -> IResult<&'i str, Self> { 135 | map(tuple((Term::parse, Repeat::parse)), Self)(input) 136 | } 137 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 138 | self.0 .0.fmt(f)?; 139 | self.0 .1.fmt(f) 140 | } 141 | } 142 | 143 | #[derive(Debug, Clone)] 144 | enum Term<'i> { 145 | Nonterminal(&'i str), 146 | GcbValue(GcbValue), 147 | PropertyName(&'i str), 148 | GcbClass(bool, Vec), 149 | Group(Alternate<'i>), 150 | } 151 | impl<'i> Term<'i> { 152 | fn parse(input: &'i str) -> IResult<&'i str, Self> { 153 | alt(( 154 | map(parse_nonterminal, Self::Nonterminal), 155 | map(GcbValue::parse, Self::GcbValue), 156 | map( 157 | delimited(tag("\\p{"), parse_name, tag("}")), 158 | Self::PropertyName, 159 | ), 160 | map( 161 | delimited( 162 | tag("["), 163 | tuple((opt(tag("^")), separated_list1(space1, GcbValue::parse))), 164 | tag("]"), 165 | ), 166 | |(not, gcbs)| Self::GcbClass(not.is_some(), gcbs), 167 | ), 168 | map(delimited(tag("("), Alternate::parse, tag(")")), Self::Group), 169 | ))(input) 170 | } 171 | fn expand(&mut self, nonterminals: &HashMap<&'i str, Alternate<'i>>) { 172 | match self { 173 | Self::Nonterminal(k) => { 174 | let mut inner = nonterminals[k].clone(); 175 | inner.expand(nonterminals); 176 | *self = Self::Group(inner); 177 | } 178 | Self::Group(alternate) => alternate.expand(nonterminals), 179 | _ => {} 180 | } 181 | } 182 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 183 | match self { 184 | Self::Nonterminal(_) => panic!(), 185 | Self::GcbValue(x) => { 186 | write!(f, "[")?; 187 | x.fmt(f)?; 188 | write!(f, "]")?; 189 | } 190 | Self::PropertyName("Extended_Pictographic") => { 191 | write!(f, "[\\x{:02X}-\\x{:02X}]", 0x80, 0xFF)?; 192 | } 193 | Self::PropertyName(_) => panic!(), 194 | Self::GcbClass(not, gcbs) => { 195 | let not = ["", "^"][*not as usize]; 196 | write!(f, "[{}", not)?; 197 | for x in gcbs { 198 | x.fmt(f)?; 199 | } 200 | write!(f, "]")?; 201 | } 202 | Self::Group(alternate) => { 203 | write!(f, "(?:")?; 204 | alternate.fmt(f)?; 205 | write!(f, ")")?; 206 | } 207 | }; 208 | Ok(()) 209 | } 210 | } 211 | 212 | #[derive(Debug, Clone)] 213 | struct GcbValue(GraphemeBreak); 214 | impl GcbValue { 215 | fn parse(input: &str) -> IResult<&str, Self> { 216 | map(map(parse_name, |x| x.parse_to().unwrap()), GcbValue)(input) 217 | } 218 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 219 | let x = self.0 as u8; 220 | assert!(x < 0x80); 221 | write!(f, "\\x{:02X}\\x{:02X}", x, x | 0x80) 222 | } 223 | } 224 | 225 | #[derive(Debug, Clone, Copy)] 226 | enum Repeat { 227 | One, 228 | Star, 229 | Plus, 230 | } 231 | impl Repeat { 232 | fn parse(input: &str) -> IResult<&str, Self> { 233 | map(opt(one_of("*+")), |x| match x { 234 | None => Self::One, 235 | Some('*') => Self::Star, 236 | Some('+') => Self::Plus, 237 | _ => panic!(), 238 | })(input) 239 | } 240 | fn fmt(&self, f: &mut Formatter) -> fmt::Result { 241 | match self { 242 | Self::One => {} 243 | Self::Star => f.write_str("*")?, 244 | Self::Plus => f.write_str("+")?, 245 | }; 246 | Ok(()) 247 | } 248 | } 249 | 250 | fn parse_nonterminal(input: &str) -> IResult<&str, &str> { 251 | satisfy(|x| x.is_ascii_lowercase())(input)?; 252 | take_while1(|x: char| x == '-' || x.is_ascii_lowercase())(input) 253 | } 254 | 255 | fn parse_name(input: &str) -> IResult<&str, &str> { 256 | satisfy(|x| x.is_ascii_uppercase())(input)?; 257 | take_while1(|x: char| x == '_' || x.is_ascii_alphabetic())(input) 258 | } 259 | -------------------------------------------------------------------------------- /src/new.sass: -------------------------------------------------------------------------------- 1 | @use "sass:math" 2 | 3 | .icon 4 | width: 1.5rem 5 | height: 1.5rem 6 | 7 | @font-face 8 | font-family: 'Material Symbols Outlined' 9 | src: url('../helper/dist/MaterialSymbolsOutlined.woff2') format('woff2') 10 | .material-symbols-outlined 11 | font-family: 'Material Symbols Outlined' 12 | font-weight: normal 13 | font-style: normal 14 | // font-size: 24px 15 | line-height: 1 16 | letter-spacing: normal 17 | text-transform: none 18 | display: inline-block 19 | white-space: nowrap 20 | word-wrap: normal 21 | direction: ltr 22 | -moz-font-feature-settings: 'liga' 23 | -moz-osx-font-smoothing: grayscale 24 | 25 | @font-face 26 | font-family: Twitter Color Emoji 27 | src: url("../helper/dist/twemoji-15.1.0.woff2") 28 | font-display: swap 29 | 30 | @font-face 31 | font-family: Symbola 32 | src: url("../helper/dist/Symbola-14.00.woff2") 33 | font-display: swap 34 | 35 | @font-face 36 | font-family: LastResort 37 | src: url("LastResort-6.0d1e3.ttf") 38 | font-display: swap 39 | 40 | @font-face 41 | font-family: AdobeBlank 42 | src: url("AdobeBlank-1.045.otf.woff") 43 | font-display: swap 44 | 45 | $interface-font: system-ui, -apple-system, Segoe UI, Roboto, Helvetica Neue LT Std, Helvetica Neue, Helvetica, sans-serif 46 | $display-font: Symbola // FIXME Symbola, , LastResort, AdobeBlank 47 | 48 | $choice-cell-size: 2.5rem 49 | $choice-glyph-size: 1.5rem 50 | 51 | * 52 | box-sizing: border-box 53 | 54 | :root 55 | // prevent scrollbar flapping when zoomed 56 | overflow: hidden 57 | 58 | body 59 | margin: 0 60 | 61 | .Charming 62 | // better than 100vh on iOS 63 | position: absolute 64 | top: 0 65 | bottom: 0 66 | left: 0 67 | right: 0 68 | 69 | display: flex 70 | flex-direction: column 71 | font-family: $interface-font 72 | 73 | .Detail 74 | flex: 1 0 10em 75 | 76 | > .big 77 | $choice-glyph-size: 4rem 78 | $box-size: 6rem 79 | 80 | font-size: $choice-glyph-size 81 | line-height: math.div($box-size, $choice-glyph-size) 82 | 83 | .Map 84 | flex: 1 0 10em 85 | 86 | &.measurer 87 | overflow-y: scroll 88 | 89 | .Search 90 | padding: 0 91 | 92 | > .toolbar 93 | padding-left: 1em 94 | 95 | @media (min-width: 384px) 96 | .Charming 97 | .Search 98 | padding: 0 1em 99 | 100 | > .toolbar 101 | padding-left: 0 102 | 103 | @media (min-width: 512px) 104 | .Charming 105 | flex-direction: row 106 | 107 | .Detail 108 | flex: 1 0 16em 109 | 110 | > .big 111 | $glyph-size: 8rem 112 | $box-size: 12rem 113 | 114 | font-size: $glyph-size 115 | line-height: math.div($box-size, $glyph-size) 116 | 117 | .Map 118 | flex: 3 0 16em 119 | 120 | .Detail 121 | padding: 1em 122 | overflow: auto 123 | position: relative 124 | 125 | * 126 | margin-top: 0 127 | margin-bottom: 0 128 | 129 | * + * 130 | margin-top: 0.5em 131 | 132 | > .loading 133 | font-size: 13em 134 | text-align: center 135 | color: #C0C0C0 136 | 137 | > .toolbar 138 | display: flex 139 | flex-direction: row 140 | gap: 1rem 141 | 142 | > * 143 | margin: 0 144 | 145 | > a 146 | flex: 0 0 auto 147 | font-size: 1.5rem 148 | 149 | &[aria-label=search] 150 | left: 1rem 151 | 152 | &[aria-label=source] 153 | right: 1rem 154 | 155 | > .space 156 | flex: 1 1 auto 157 | 158 | > h1 159 | flex: 1 1 auto 160 | margin: 0 161 | font-size: 1.5em 162 | text-align: center 163 | 164 | > .big 165 | display: block 166 | margin: 0 167 | text-align: center 168 | text-decoration: none 169 | color: inherit 170 | 171 | > p 172 | font-size: 1.5em 173 | text-align: center 174 | 175 | > dl 176 | margin-top: 1em 177 | 178 | > dt.compact 179 | float: left 180 | clear: left 181 | 182 | > dd.compact 183 | float: right 184 | clear: right 185 | 186 | .Search 187 | position: absolute 188 | top: 0 189 | bottom: 0 190 | left: 0 191 | right: 0 192 | background: #FFFFFF 193 | display: flex 194 | flex-direction: column 195 | 196 | &[hidden] 197 | display: none 198 | 199 | > .toolbar 200 | flex: 0 0 $choice-glyph-size*2 201 | display: flex 202 | flex-direction: row 203 | gap: 1rem 204 | 205 | > * 206 | margin: 0 207 | 208 | > a 209 | flex: 0 0 auto 210 | align-self: center 211 | font-size: 1.5rem 212 | height: 1em 213 | 214 | > input 215 | flex: 1 1 auto 216 | min-width: 0 217 | font-size: $choice-glyph-size 218 | 219 | > .results 220 | flex: 1 0 auto 221 | padding-top: 1em 222 | 223 | > div > div 224 | // react-window sets overflow:auto in @style 225 | overflow-y: scroll !important 226 | 227 | .Search > div > div > div > ul, 228 | .Detail.sequence > ul 229 | list-style: none 230 | margin: 0 231 | padding: 0 232 | 233 | > li 234 | margin: 0 235 | padding: 0 236 | 237 | > a 238 | // --ascent: calc(#{$choice-cell-size} - (#{$choice-cell-size} - #{$font-size}) / 2) 239 | // background: linear-gradient(to bottom, #00ff0080 0 var(--ascent), transparent var(--ascent) 100%) 240 | $font-size: 1rem 241 | 242 | display: block 243 | height: $choice-cell-size 244 | font-size: $font-size 245 | line-height: math.div($choice-cell-size, $font-size) 246 | text-decoration: none 247 | 248 | overflow: hidden 249 | white-space: nowrap 250 | text-overflow: ellipsis 251 | 252 | > li.sequence 253 | > a 254 | > span.label 255 | display: inline-block 256 | line-height: 1 257 | font-size: 0.75em 258 | vertical-align: middle 259 | 260 | .choice 261 | vertical-align: middle 262 | 263 | .Detail.sequence 264 | > ul 265 | margin-top: 1em 266 | 267 | .AliasHint 268 | line-height: 1 269 | font-size: 0.75rem 270 | padding: 0.25rem 271 | border: 1px solid 272 | border-radius: 0.5rem 273 | color: #606060 274 | 275 | .AliasList 276 | position: relative 277 | list-style: none 278 | padding: 0 0 0 3rem 279 | 280 | > li 281 | // background: #00ff0080 282 | line-height: calc(1px + 0.25rem + 1 * 0.75rem + 0.25rem + 1px) 283 | 284 | > .marker 285 | // background: #ff00ff80 286 | width: 3rem 287 | position: absolute 288 | left: 0 289 | text-align: left 290 | 291 | .choice 292 | $outline-size: 1px 293 | 294 | display: inline-block 295 | vertical-align: top 296 | width: $choice-cell-size 297 | height: $choice-cell-size 298 | 299 | font-size: $choice-glyph-size 300 | line-height: math.div($choice-cell-size, $choice-glyph-size) 301 | text-align: center 302 | text-decoration: none 303 | color: black 304 | 305 | outline: $outline-size solid #C0C0C0 306 | outline-offset: math.div(-$outline-size*3, 2) 307 | overflow: hidden // Firefox 308 | 309 | &.active 310 | background: #C0C0C0 311 | 312 | > .Display.synthetic 313 | color: #606060 314 | 315 | .Display 316 | font-family: $display-font 317 | 318 | > .emoji 319 | font-family: Twitter Color Emoji, $display-font 320 | 321 | &.synthetic 322 | color: #A0A0A0 323 | 324 | &.tofu 325 | font-size: 0.5em 326 | font-family: monospace, monospace 327 | 328 | > span 329 | position: relative 330 | 331 | > span:first-child:nth-last-child(4) 332 | top: math.div(-7em, 12) 333 | left: 1ch 334 | 335 | + span 336 | top: math.div(-7em, 12) 337 | left: 1ch 338 | 339 | + span 340 | top: math.div(3em, 12) 341 | left: -1ch 342 | 343 | + span 344 | top: math.div(3em, 12) 345 | left: -1ch 346 | 347 | > span:first-child:nth-last-child(6) 348 | top: math.div(-7em, 12) 349 | left: 1.5ch 350 | 351 | + span 352 | top: math.div(-7em, 12) 353 | left: 1.5ch 354 | 355 | + span 356 | top: math.div(-7em, 12) 357 | left: 1.5ch 358 | 359 | + span 360 | top: math.div(3em, 12) 361 | left: -1.5ch 362 | 363 | + span 364 | top: math.div(3em, 12) 365 | left: -1.5ch 366 | 367 | + span 368 | top: math.div(3em, 12) 369 | left: -1.5ch 370 | 371 | &.diagonal 372 | font-size: 0.5em 373 | font-family: monospace, monospace 374 | 375 | > span:first-child:nth-last-child(2) 376 | vertical-align: math.div(2em, 12) 377 | 378 | + span 379 | vertical-align: math.div(-1em, 12) 380 | 381 | > span:first-child:nth-last-child(3) 382 | vertical-align: math.div(4em, 12) 383 | 384 | + span 385 | vertical-align: math.div(1em, 12) 386 | 387 | + span 388 | vertical-align: math.div(-2em, 12) 389 | 390 | > span:first-child:nth-last-child(4) 391 | vertical-align: math.div(5em, 12) 392 | 393 | + span 394 | vertical-align: math.div(2em, 12) 395 | 396 | + span 397 | vertical-align: math.div(-1em, 12) 398 | 399 | + span 400 | vertical-align: math.div(-4em, 12) 401 | 402 | > span:first-child:nth-last-child(5) 403 | vertical-align: math.div(5em, 12) 404 | 405 | + span 406 | vertical-align: math.div(3em, 12) 407 | 408 | + span 409 | vertical-align: math.div(1em, 12) 410 | 411 | + span 412 | vertical-align: math.div(-1em, 12) 413 | 414 | + span 415 | vertical-align: math.div(-3em, 12) 416 | -------------------------------------------------------------------------------- /src/old.js: -------------------------------------------------------------------------------- 1 | import "core-js/stable"; 2 | import "regenerator-runtime/runtime"; 3 | 4 | import { getString, kDefinitionExists, isEmojiPresentation, isSpaceSeparator, getOldName } from "./data"; 5 | import { fetchAllData } from "./fetch"; 6 | import { pointToString, stringToPoint } from "./encoding"; 7 | import { toHexadecimal, pointToYouPlus, pointToString16, pointToString8, pointToEntity10 } from "./formatting"; 8 | import { pointToDiagonal, pointToSubstitute } from "./Display"; 9 | 10 | var ucd_version = '14.0.0', 11 | grid_elements = [], 12 | grid_base, 13 | current_cp, 14 | data_ready = false, 15 | data = null, 16 | data_defaults = { 17 | u16: cp => pointToString16(cp) || "(none)", 18 | u8: cp => pointToString8(cp) || "(none)", 19 | ent: cp => pointToEntity10(cp) || "(none)", 20 | name: '(unknown or unassigned)', 21 | block: '(unknown)', 22 | age: '(unknown)', 23 | gc: 'Unassigned (Cn)', 24 | uhman: '(not applicable)', 25 | }; 26 | 27 | function init_grid() { 28 | var row, cell, div; 29 | for (var i = 0; i < 256; i++) { 30 | if (i % 16 == 0) { 31 | row = $(''); 32 | $('#grid').append(row); 33 | } 34 | cell = $(''); 35 | div = $('
'); 36 | div.attr('id', 'g' + i); 37 | div.click(click_handler); 38 | cell.append(div); 39 | row.append(cell); 40 | grid_elements.push(div); 41 | } 42 | } 43 | 44 | function cp_display(cp) { 45 | var diagonal = pointToDiagonal(cp); 46 | 47 | if (diagonal != null) { 48 | return diagonal; 49 | } 50 | 51 | var substitute = pointToSubstitute(data, cp); 52 | 53 | if (substitute != null) { 54 | return substitute; 55 | } 56 | 57 | return pointToString(cp); 58 | } 59 | 60 | function update_grid() { 61 | grid_elements.forEach(function(e, i) { 62 | var cp = grid_base + i; 63 | e.text(cp_display(cp)); 64 | e.removeClass("like_emoji"); 65 | e.removeClass("like_C0"); 66 | e.removeClass("like_C1"); 67 | e.removeClass("like_space"); 68 | if (like_emoji(cp)) 69 | e.addClass("like_emoji"); 70 | if (like_C0(cp)) 71 | e.addClass("like_C0"); 72 | if (like_C1(cp)) 73 | e.addClass("like_C1"); 74 | if (like_space(cp)) 75 | e.addClass("like_space"); 76 | }); 77 | } 78 | 79 | function update_info() { 80 | var cp = current_cp; 81 | $('#cp').text(pointToYouPlus(cp)); 82 | $('#big').val(cp_display(cp)); 83 | $("#big, #goto_char").removeClass("like_emoji"); 84 | $("#big, #goto_char").removeClass("like_C0"); 85 | $("#big, #goto_char").removeClass("like_C1"); 86 | $("#big, #goto_char").removeClass("like_space"); 87 | if (like_emoji(cp)) 88 | $("#big, #goto_char").addClass("like_emoji"); 89 | if (like_C0(cp)) 90 | $("#big, #goto_char").addClass("like_C0"); 91 | if (like_C1(cp)) 92 | $("#big, #goto_char").addClass("like_C1"); 93 | if (like_space(cp)) 94 | $("#big, #goto_char").addClass("like_space"); 95 | if (!data_ready) 96 | return; 97 | document.title = pointToYouPlus(cp) + ' ' + get_data(cp, 'name'); 98 | for (var x in data_defaults) 99 | $('#data_' + x).text(get_data(cp, x)); 100 | } 101 | 102 | function set_hash(cp) { 103 | location.hash = toHexadecimal(cp); 104 | } 105 | 106 | function set_hash_text(text, field) { 107 | if (text.length == 0) 108 | return; 109 | else 110 | var cp = stringToPoint(text); 111 | if (field !== void 0) { 112 | $(field).val(cp_display(cp)); 113 | yield_then_select(field); 114 | } 115 | set_hash(cp); 116 | } 117 | 118 | function yield_then_select(field) { 119 | if (!$(field).data("composing")) { 120 | setTimeout(function() { 121 | $(field).select(); 122 | }, 0); 123 | } 124 | } 125 | 126 | function replace(cp) { 127 | location.replace("#" + toHexadecimal(cp)); 128 | } 129 | 130 | function hashchange_handler() { 131 | var cp = parseInt(location.hash.slice(1), 16); 132 | if (isNaN(cp) || cp < 0 || cp > 0x10ffff) 133 | if (current_cp == undefined) 134 | return replace(0); 135 | else 136 | return replace(current_cp); 137 | if (location.hash.slice(1) != toHexadecimal(cp)) 138 | return replace(cp); 139 | current_cp = cp; 140 | var new_grid_base = cp - cp % 256; 141 | if (new_grid_base != grid_base) { 142 | grid_base = new_grid_base; 143 | update_grid(); 144 | } 145 | $('#goto_hex').val(toHexadecimal(cp)); 146 | $('#goto_dec').val(cp); 147 | $('#goto_char').val(cp_display(cp)); 148 | $('#grid td').removeClass('selected'); 149 | grid_elements[cp % 256].parent().addClass('selected'); 150 | update_info(); 151 | } 152 | 153 | function click_handler() { 154 | var i = parseInt(this.id.substr(1)); 155 | set_hash(grid_base + i); 156 | } 157 | 158 | function load_data() { 159 | $('#loading_noscript').hide(); 160 | $('#loading_files').show(); 161 | fetchAllData().then(result => { 162 | data = result; 163 | data_ready = true; 164 | $('#loading').hide(); 165 | $('#ui').show(); 166 | update_grid(); 167 | update_info(); 168 | }); 169 | } 170 | 171 | function get_data(cp, prop) { 172 | if (!data || !(prop in data)) { 173 | var substitute = data_defaults[prop]; 174 | if (typeof substitute == "function") 175 | return substitute(cp); 176 | return substitute; 177 | } 178 | if ( 179 | prop == "name" 180 | || prop == "gc" 181 | || prop == "block" 182 | || prop == "age" 183 | || prop == "uhman" 184 | ) { 185 | var result = prop == "name" 186 | ? getOldName(data, cp) 187 | : getString(data, prop, cp); 188 | if (result != null) { 189 | return result; 190 | } 191 | 192 | var substitute = data_defaults[prop]; 193 | if (typeof substitute == "function") 194 | return substitute(cp); 195 | return substitute; 196 | } 197 | throw new Error; 198 | } 199 | 200 | function get_clipboard(event) { 201 | if ("clipboardData" in event) { 202 | return event.clipboardData; 203 | } 204 | if ("originalEvent" in event) { 205 | return get_clipboard(event.originalEvent); 206 | } 207 | if ("clipboardData" in window) { 208 | return window.clipboardData; 209 | } 210 | return null; 211 | } 212 | 213 | function like_emoji(cp) { 214 | return data ? isEmojiPresentation(data, cp) : false; 215 | } 216 | 217 | function like_space(cp) { 218 | return data ? isSpaceSeparator(data, cp) : false; 219 | } 220 | 221 | function is_C0(cp) { 222 | return cp < 0x0020; 223 | } 224 | 225 | function like_C0(cp) { 226 | return is_C0(cp) 227 | || cp == 0x007F 228 | || cp == 0x2061 229 | || cp == 0x2062 230 | || cp == 0x2063 231 | || cp == 0x2064 232 | || cp >= 0xE0020 && cp < 0xE007F; 233 | } 234 | 235 | function is_C1(cp) { 236 | return cp >= 0x0080 && cp < 0x00A0; 237 | } 238 | 239 | function like_C1(cp) { 240 | return is_C1(cp) 241 | || cp == 0x00AD 242 | || cp == 0x034F 243 | || cp == 0x061C 244 | || cp >= 0x180B && cp < 0x180F 245 | || cp >= 0x200B && cp < 0x2010 246 | || cp >= 0x2028 && cp < 0x202F 247 | || cp >= 0x2060 && cp < 0x2061 248 | || cp >= 0x2066 && cp < 0x2070 249 | || cp >= 0x3164 && cp < 0x3165 250 | || cp >= 0xFFA0 && cp < 0xFFA1 251 | || cp >= 0xFFF9 && cp < 0xFFFD 252 | || cp >= 0xFE00 && cp < 0xFE10 253 | || cp >= 0xFEFF && cp < 0xFF00 254 | || cp >= 0xE0001 && cp < 0xE0002 255 | || cp >= 0xE007F && cp < 0xE0080 256 | || cp >= 0xE0100 && cp < 0xE01F0; 257 | } 258 | 259 | init_grid(); 260 | hashchange_handler(); 261 | load_data(); 262 | 263 | $(window).on('hashchange', hashchange_handler); 264 | $(window).keydown(function(e) { 265 | if (e.metaKey || e.shiftKey || e.ctrlKey || e.altKey) 266 | return; 267 | switch (e.keyCode) { 268 | case 33: // page up 269 | set_hash(current_cp - 256); break; 270 | case 34: // page down 271 | set_hash(current_cp + 256); break; 272 | case 35: // end 273 | if (current_cp == grid_base + 255) 274 | set_hash(0x10ffff); 275 | else 276 | set_hash(grid_base + 255); 277 | break; 278 | case 36: // home 279 | if (current_cp == grid_base) 280 | set_hash(0); 281 | else 282 | set_hash(grid_base); 283 | break; 284 | case 37: // left arrow 285 | set_hash(current_cp - 1); break; 286 | case 38: // up arrow 287 | set_hash(current_cp - 16); break; 288 | case 39: // right arrow 289 | set_hash(current_cp + 1); break; 290 | case 40: // down arrow 291 | set_hash(current_cp + 16); break; 292 | } 293 | }); 294 | $('input').keydown(function(e) { 295 | e.stopPropagation(); 296 | }); 297 | $('#ui_tabs a').click(function(e) { 298 | $('#ui_content > div').hide(); 299 | $('#' + this.id.substr(4)).show(); 300 | $('#ui_tabs a').removeClass('selected'); 301 | $(this).addClass('selected'); 302 | e.preventDefault(); 303 | }); 304 | $('#ucd_version').text(ucd_version); 305 | $('#search_form, #search_han').on('change keydown paste input submit', function(e) { 306 | var q = $('#search_query').val().toUpperCase(); 307 | var sr = $('#search_results'); 308 | if (!q.length) 309 | return; 310 | sr.empty(); 311 | var han = $("#search_han").is(":checked"); 312 | for (var n = 0, i = 0; n < 50 && i < 0x110000; i++) { 313 | if (!han && kDefinitionExists(data, i)) 314 | continue; 315 | var name = getOldName(data, i); 316 | if (name == null) 317 | continue; 318 | if (name.toUpperCase().indexOf(q) > -1) { 319 | n++; 320 | sr.append($("
") 321 | .text(pointToYouPlus(i) + "\u2001" + name) 322 | .click(set_hash.bind(null, i))); 323 | } 324 | } 325 | e.preventDefault(); 326 | }); 327 | $('#goto_hex').on('change keydown paste input', function() { 328 | if (this.value.length == 0) 329 | return; 330 | set_hash(parseInt(this.value, 16)); 331 | }); 332 | $('#goto_dec').on('change keydown paste input', function() { 333 | if (this.value.length == 0) 334 | return; 335 | set_hash(parseInt(this.value, 10)); 336 | }); 337 | 338 | $("#big, #goto_char") 339 | .on("cut copy", function(event) { 340 | event.preventDefault(); 341 | var text = pointToString(current_cp); 342 | get_clipboard(event).setData("text", text); 343 | }) 344 | .on("paste", function(event) { 345 | event.preventDefault(); 346 | var text = get_clipboard(event).getData("text"); 347 | set_hash_text(text, this); 348 | }) 349 | .on("compositionstart", function() { 350 | $(this).data("composing", true); 351 | }) 352 | .on("compositionend", function() { 353 | $(this).data("composing", false); 354 | yield_then_select(this); 355 | }) 356 | .on("input", function(event) { 357 | set_hash_text(this.value, this); 358 | }) 359 | .on("focus", function(event) { 360 | $(this).select(); 361 | }); 362 | -------------------------------------------------------------------------------- /src/search.worker.ts: -------------------------------------------------------------------------------- 1 | import "core-js/stable"; 2 | import "regenerator-runtime/runtime"; 3 | 4 | import { 5 | Data, 6 | findSequenceIndex, 7 | getAliasCount, 8 | getAliasType, 9 | getAliasValue, 10 | getNameExceptNr2, 11 | getNextClusterBreak, 12 | getSequenceNames, 13 | getSequencePoints, 14 | getString, 15 | hasAnyAlias, 16 | hasAnyNameExceptNr2, 17 | hasAnyUhdef, 18 | } from "./data"; 19 | import { pointToString, stringToPoint, stringToPoints } from "./encoding"; 20 | import { toHexadecimal, toDecimal } from "./formatting"; 21 | import { KeyedSearchResult, SearchResult } from "./search"; 22 | 23 | // https://github.com/webpack-contrib/worker-loader/issues/94#issuecomment-449861198 24 | export default {} as typeof Worker & { new (): Worker }; 25 | 26 | declare function postMessage(message: any): void; 27 | 28 | let cache: Data | null = null; 29 | 30 | function* searchByHexadecimal(query: string): Generator { 31 | const point = parseInt(query, 16); 32 | 33 | if (point != point || point < 0) { 34 | return; 35 | } 36 | 37 | if (toHexadecimal(point).length != query.length) { 38 | return; 39 | } 40 | 41 | if (point >= 0x110000) { 42 | return; 43 | } 44 | 45 | yield { key: `hex/${point}`, points: [point], reason: "hex", score: 0 }; 46 | } 47 | 48 | function* searchByDecimal(query: string): Generator { 49 | const point = parseInt(query, 10); 50 | 51 | if (point != point || point < 0) { 52 | return; 53 | } 54 | 55 | if (toDecimal(point).length != query.length) { 56 | return; 57 | } 58 | 59 | if (point >= 0x110000) { 60 | return; 61 | } 62 | 63 | yield { key: `dec/${point}`, points: [point], reason: "dec", score: 0 }; 64 | } 65 | 66 | function* searchByBreakdown( 67 | data: Data, 68 | query: string, 69 | graphemes: number, 70 | ): Generator { 71 | let context = getNextClusterBreak(data, query); 72 | if (context == null) return; 73 | 74 | let graphemeCount = 0; 75 | let pointCount = 0; 76 | let i = context.startPointIndex; 77 | while ((context = getNextClusterBreak(data, query, context)) != null) { 78 | for (const pointish of query.slice(i, context.startUnitIndex)) { 79 | const point = stringToPoint(pointish); 80 | 81 | if (point != null) { 82 | yield { 83 | key: `breakdown/${pointCount++}/${point}`, 84 | points: [point], 85 | reason: "breakdown", 86 | score: 0, 87 | }; 88 | } 89 | } 90 | 91 | i = context.startUnitIndex; 92 | if (++graphemeCount >= graphemes) { 93 | return; 94 | } 95 | } 96 | } 97 | 98 | function* searchBySequenceValue( 99 | data: Data, 100 | query: string, 101 | ): Generator { 102 | if (query.length == 0) return; 103 | if (query.length == pointToString(stringToPoint(query)!).length) return; 104 | 105 | const points = stringToPoints(query); 106 | const sequenceIndex = findSequenceIndex(data, points); 107 | if (sequenceIndex == null) return; 108 | 109 | yield { 110 | key: `sequenceValue/${points.join("+")}`, 111 | points, 112 | reason: "sequenceValue", 113 | sequenceIndex, 114 | score: 0, 115 | }; 116 | } 117 | 118 | function* searchBySequenceName( 119 | data: Data, 120 | query: string, 121 | ): Generator { 122 | if (query.length == 0) return; 123 | const upper = query.toUpperCase(); 124 | 125 | for (let i = 0; i < data.info.sequenceCount; i++) { 126 | const sequenceNames = getSequenceNames(data, i); 127 | if (sequenceNames == null) continue; 128 | 129 | for (const [j, sequenceName] of sequenceNames.entries()) { 130 | const search = sequenceName.toUpperCase(); 131 | if (search.includes(upper)) { 132 | const points = getSequencePoints(data, i)!; 133 | const [score, offset] = scoreMatch(search, upper); 134 | yield { 135 | key: `sequenceName/${points.join("+")}`, 136 | points, 137 | reason: "sequenceName", 138 | sequenceIndex: i, 139 | sequenceNameIndex: j, 140 | score, 141 | offset, 142 | }; 143 | } 144 | } 145 | } 146 | } 147 | 148 | function* searchByName( 149 | data: Data, 150 | query: string, 151 | ): Generator { 152 | const upper = query.toUpperCase(); 153 | 154 | for (let page = 0; page < 0x1100; page++) { 155 | if (page % 0x100 == 0) 156 | performance.mark(`sBN ${Math.floor(page / 0x100)} <`); 157 | if (page % 0x100 == 0xff) 158 | performance.mark(`sBN ${Math.floor(page / 0x100)} >`); 159 | if (!hasAnyNameExceptNr2(data, page)) continue; 160 | 161 | for (let point = page * 0x100; point < (page + 1) * 0x100; point++) { 162 | const name = getNameExceptNr2(data, point); 163 | if (name == null) continue; 164 | 165 | const search = name.toUpperCase(); 166 | if (search.includes(upper)) { 167 | const [score, offset] = scoreMatch(search, upper); 168 | yield { 169 | key: `nameish/${point}`, 170 | points: [point], 171 | reason: "name", 172 | score, 173 | offset, 174 | }; 175 | } 176 | } 177 | } 178 | 179 | for (let i = 0; i < 17; i++) 180 | performance.measure(`sBN ${i}`, `sBN ${i} <`, `sBN ${i} >`); 181 | } 182 | 183 | function* searchByNameAlias( 184 | data: Data, 185 | query: string, 186 | ): Generator { 187 | const upper = query.toUpperCase(); 188 | let aliasIndex = 0; 189 | 190 | for (let page = 0; page < 0x1100; page++) { 191 | if (page % 0x100 == 0) 192 | performance.mark(`sBNA ${Math.floor(page / 0x100)} <`); 193 | if (page % 0x100 == 0xff) 194 | performance.mark(`sBNA ${Math.floor(page / 0x100)} >`); 195 | if (!hasAnyAlias(data, page)) continue; 196 | 197 | for (let point = page * 0x100; point < (page + 1) * 0x100; point++) { 198 | const aliasCount = getAliasCount(data, point); 199 | for (let i = 0; i < aliasCount; i++, aliasIndex++) { 200 | const name = getAliasValue(data, aliasIndex)!; 201 | const type = getAliasType(data, aliasIndex)!; 202 | 203 | const search = name.toUpperCase(); 204 | if (search.includes(upper)) { 205 | const [score, offset] = scoreMatch(search, upper); 206 | yield { 207 | key: `nameish/${point}`, 208 | points: [point], 209 | reason: "alias", 210 | aliasIndex, 211 | aliasType: type, 212 | score, 213 | offset, 214 | }; 215 | } 216 | } 217 | } 218 | } 219 | 220 | for (let i = 0; i < 17; i++) 221 | performance.measure(`sBNA ${i}`, `sBNA ${i} <`, `sBNA ${i} >`); 222 | } 223 | 224 | function* searchByUhdef( 225 | data: Data, 226 | query: string, 227 | ): Generator { 228 | const upper = query.toUpperCase(); 229 | 230 | for (let page = 0; page < 0x1100; page++) { 231 | if (page % 0x100 == 0) 232 | performance.mark(`sBU ${Math.floor(page / 0x100)} <`); 233 | if (page % 0x100 == 0xff) 234 | performance.mark(`sBU ${Math.floor(page / 0x100)} >`); 235 | if (!hasAnyUhdef(data, page)) continue; 236 | 237 | for (let point = page * 0x100; point < (page + 1) * 0x100; point++) { 238 | const uhdef = getString(data, "uhdef", point); 239 | if (uhdef == null) continue; 240 | 241 | const search = uhdef.toUpperCase(); 242 | if (search.includes(upper)) { 243 | const [score, offset] = scoreMatch(search, upper); 244 | yield { 245 | key: `uhdef/${point}`, 246 | points: [point], 247 | reason: "uhdef", 248 | score, 249 | offset, 250 | }; 251 | } 252 | } 253 | } 254 | 255 | for (let i = 0; i < 17; i++) 256 | performance.measure(`sBU ${i}`, `sBU ${i} <`, `sBU ${i} >`); 257 | } 258 | 259 | function scoreMatch(haystack: string, needle: string): [number, number] { 260 | let resultScore = 0; 261 | let resultOffset = haystack.indexOf(needle); 262 | 263 | // prettier-ignore 264 | { 265 | // count each kind of match only once, and use offset of best match 266 | check(1, haystack.endsWith(needle), x => x, () => haystack.length - needle.length) 267 | || check(1, haystack.indexOf(`${needle} `), x => x != -1, x => x); 268 | check(2, haystack.startsWith(needle), x => x, () => 0) 269 | || check(2, haystack.indexOf(` ${needle}`), x => x != -1, x => x + 1); 270 | check(4, haystack.startsWith(`${needle} `), x => x, () => 0) 271 | || check(4, haystack.endsWith(` ${needle}`), x => x, () => haystack.length - needle.length) 272 | || check(4, haystack.indexOf(` ${needle} `), x => x != -1, x => x + 1); 273 | check(8, haystack == needle, x => x, () => 0); 274 | } 275 | 276 | return [resultScore, resultOffset]; 277 | 278 | function check( 279 | score: number, 280 | result: any, 281 | pred: (_: any) => boolean, 282 | offset: (_: any) => number, 283 | ): boolean { 284 | if (pred(result)) { 285 | resultScore += score; 286 | resultOffset = offset(result); 287 | return true; 288 | } else { 289 | return false; 290 | } 291 | } 292 | } 293 | 294 | function sortByScore(results: KeyedSearchResult[]): KeyedSearchResult[] { 295 | // sort by score descending, then by point ascending 296 | return results.sort((p, q) => q.score - p.score || comparePoints(p, q)); 297 | } 298 | 299 | function dedupResults(results: KeyedSearchResult[]): KeyedSearchResult[] { 300 | // sort by point ascending, then by score descending, then keep best result for each key 301 | return results 302 | .sort((p, q) => comparePoints(p, q) || q.score - p.score) 303 | .filter((x, i, xs) => x.key != xs[i - 1]?.key); 304 | } 305 | 306 | function comparePoints(p: SearchResult, q: SearchResult): number { 307 | for (let i = 0; i < Math.min(p.points.length, q.points.length); i++) 308 | if (p.points[i] != q.points[i]) return p.points[i] - q.points[i]; 309 | return p.points.length - q.points.length; 310 | } 311 | 312 | addEventListener("message", ({ data: { data = cache, query } }) => { 313 | const result: KeyedSearchResult[] = [ 314 | ...searchByHexadecimal(query), 315 | ...searchByDecimal(query), 316 | ...searchBySequenceValue(data, query), 317 | // three graphemes allows checking for invisible characters between two visible characters 318 | ...searchByBreakdown(data, query, 3), 319 | ...sortByScore( 320 | dedupResults([ 321 | ...searchByName(data, query), 322 | ...searchByNameAlias(data, query), 323 | ...searchBySequenceName(data, query), 324 | ]), 325 | ), 326 | ...sortByScore([...searchByUhdef(data, query)]), 327 | ]; 328 | 329 | cache = data; 330 | postMessage(result); 331 | }); 332 | -------------------------------------------------------------------------------- /data/Blocks.txt: -------------------------------------------------------------------------------- 1 | # Blocks-16.0.0.txt 2 | # Date: 2024-02-02 3 | # © 2024 Unicode®, Inc. 4 | # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. 5 | # For terms of use and license, see https://www.unicode.org/terms_of_use.html 6 | # 7 | # Unicode Character Database 8 | # For documentation, see https://www.unicode.org/reports/tr44/ 9 | # 10 | # Format: 11 | # Start Code..End Code; Block Name 12 | 13 | # ================================================ 14 | 15 | # Note: When comparing block names, casing, whitespace, hyphens, 16 | # and underbars are ignored. 17 | # For example, "Latin Extended-A" and "latin extended a" are equivalent. 18 | # For more information on the comparison of property values, 19 | # see UAX #44: https://www.unicode.org/reports/tr44/ 20 | # 21 | # All block ranges start with a value where (cp MOD 16) = 0, 22 | # and end with a value where (cp MOD 16) = 15. In other words, 23 | # the last hexadecimal digit of the start of range is ...0 24 | # and the last hexadecimal digit of the end of range is ...F. 25 | # This constraint on block ranges guarantees that allocations 26 | # are done in terms of whole columns, and that code chart display 27 | # never involves splitting columns in the charts. 28 | # 29 | # All code points not explicitly listed for Block 30 | # have the value No_Block. 31 | 32 | # Property: Block 33 | # 34 | # @missing: 0000..10FFFF; No_Block 35 | 36 | 0000..007F; Basic Latin 37 | 0080..00FF; Latin-1 Supplement 38 | 0100..017F; Latin Extended-A 39 | 0180..024F; Latin Extended-B 40 | 0250..02AF; IPA Extensions 41 | 02B0..02FF; Spacing Modifier Letters 42 | 0300..036F; Combining Diacritical Marks 43 | 0370..03FF; Greek and Coptic 44 | 0400..04FF; Cyrillic 45 | 0500..052F; Cyrillic Supplement 46 | 0530..058F; Armenian 47 | 0590..05FF; Hebrew 48 | 0600..06FF; Arabic 49 | 0700..074F; Syriac 50 | 0750..077F; Arabic Supplement 51 | 0780..07BF; Thaana 52 | 07C0..07FF; NKo 53 | 0800..083F; Samaritan 54 | 0840..085F; Mandaic 55 | 0860..086F; Syriac Supplement 56 | 0870..089F; Arabic Extended-B 57 | 08A0..08FF; Arabic Extended-A 58 | 0900..097F; Devanagari 59 | 0980..09FF; Bengali 60 | 0A00..0A7F; Gurmukhi 61 | 0A80..0AFF; Gujarati 62 | 0B00..0B7F; Oriya 63 | 0B80..0BFF; Tamil 64 | 0C00..0C7F; Telugu 65 | 0C80..0CFF; Kannada 66 | 0D00..0D7F; Malayalam 67 | 0D80..0DFF; Sinhala 68 | 0E00..0E7F; Thai 69 | 0E80..0EFF; Lao 70 | 0F00..0FFF; Tibetan 71 | 1000..109F; Myanmar 72 | 10A0..10FF; Georgian 73 | 1100..11FF; Hangul Jamo 74 | 1200..137F; Ethiopic 75 | 1380..139F; Ethiopic Supplement 76 | 13A0..13FF; Cherokee 77 | 1400..167F; Unified Canadian Aboriginal Syllabics 78 | 1680..169F; Ogham 79 | 16A0..16FF; Runic 80 | 1700..171F; Tagalog 81 | 1720..173F; Hanunoo 82 | 1740..175F; Buhid 83 | 1760..177F; Tagbanwa 84 | 1780..17FF; Khmer 85 | 1800..18AF; Mongolian 86 | 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 87 | 1900..194F; Limbu 88 | 1950..197F; Tai Le 89 | 1980..19DF; New Tai Lue 90 | 19E0..19FF; Khmer Symbols 91 | 1A00..1A1F; Buginese 92 | 1A20..1AAF; Tai Tham 93 | 1AB0..1AFF; Combining Diacritical Marks Extended 94 | 1B00..1B7F; Balinese 95 | 1B80..1BBF; Sundanese 96 | 1BC0..1BFF; Batak 97 | 1C00..1C4F; Lepcha 98 | 1C50..1C7F; Ol Chiki 99 | 1C80..1C8F; Cyrillic Extended-C 100 | 1C90..1CBF; Georgian Extended 101 | 1CC0..1CCF; Sundanese Supplement 102 | 1CD0..1CFF; Vedic Extensions 103 | 1D00..1D7F; Phonetic Extensions 104 | 1D80..1DBF; Phonetic Extensions Supplement 105 | 1DC0..1DFF; Combining Diacritical Marks Supplement 106 | 1E00..1EFF; Latin Extended Additional 107 | 1F00..1FFF; Greek Extended 108 | 2000..206F; General Punctuation 109 | 2070..209F; Superscripts and Subscripts 110 | 20A0..20CF; Currency Symbols 111 | 20D0..20FF; Combining Diacritical Marks for Symbols 112 | 2100..214F; Letterlike Symbols 113 | 2150..218F; Number Forms 114 | 2190..21FF; Arrows 115 | 2200..22FF; Mathematical Operators 116 | 2300..23FF; Miscellaneous Technical 117 | 2400..243F; Control Pictures 118 | 2440..245F; Optical Character Recognition 119 | 2460..24FF; Enclosed Alphanumerics 120 | 2500..257F; Box Drawing 121 | 2580..259F; Block Elements 122 | 25A0..25FF; Geometric Shapes 123 | 2600..26FF; Miscellaneous Symbols 124 | 2700..27BF; Dingbats 125 | 27C0..27EF; Miscellaneous Mathematical Symbols-A 126 | 27F0..27FF; Supplemental Arrows-A 127 | 2800..28FF; Braille Patterns 128 | 2900..297F; Supplemental Arrows-B 129 | 2980..29FF; Miscellaneous Mathematical Symbols-B 130 | 2A00..2AFF; Supplemental Mathematical Operators 131 | 2B00..2BFF; Miscellaneous Symbols and Arrows 132 | 2C00..2C5F; Glagolitic 133 | 2C60..2C7F; Latin Extended-C 134 | 2C80..2CFF; Coptic 135 | 2D00..2D2F; Georgian Supplement 136 | 2D30..2D7F; Tifinagh 137 | 2D80..2DDF; Ethiopic Extended 138 | 2DE0..2DFF; Cyrillic Extended-A 139 | 2E00..2E7F; Supplemental Punctuation 140 | 2E80..2EFF; CJK Radicals Supplement 141 | 2F00..2FDF; Kangxi Radicals 142 | 2FF0..2FFF; Ideographic Description Characters 143 | 3000..303F; CJK Symbols and Punctuation 144 | 3040..309F; Hiragana 145 | 30A0..30FF; Katakana 146 | 3100..312F; Bopomofo 147 | 3130..318F; Hangul Compatibility Jamo 148 | 3190..319F; Kanbun 149 | 31A0..31BF; Bopomofo Extended 150 | 31C0..31EF; CJK Strokes 151 | 31F0..31FF; Katakana Phonetic Extensions 152 | 3200..32FF; Enclosed CJK Letters and Months 153 | 3300..33FF; CJK Compatibility 154 | 3400..4DBF; CJK Unified Ideographs Extension A 155 | 4DC0..4DFF; Yijing Hexagram Symbols 156 | 4E00..9FFF; CJK Unified Ideographs 157 | A000..A48F; Yi Syllables 158 | A490..A4CF; Yi Radicals 159 | A4D0..A4FF; Lisu 160 | A500..A63F; Vai 161 | A640..A69F; Cyrillic Extended-B 162 | A6A0..A6FF; Bamum 163 | A700..A71F; Modifier Tone Letters 164 | A720..A7FF; Latin Extended-D 165 | A800..A82F; Syloti Nagri 166 | A830..A83F; Common Indic Number Forms 167 | A840..A87F; Phags-pa 168 | A880..A8DF; Saurashtra 169 | A8E0..A8FF; Devanagari Extended 170 | A900..A92F; Kayah Li 171 | A930..A95F; Rejang 172 | A960..A97F; Hangul Jamo Extended-A 173 | A980..A9DF; Javanese 174 | A9E0..A9FF; Myanmar Extended-B 175 | AA00..AA5F; Cham 176 | AA60..AA7F; Myanmar Extended-A 177 | AA80..AADF; Tai Viet 178 | AAE0..AAFF; Meetei Mayek Extensions 179 | AB00..AB2F; Ethiopic Extended-A 180 | AB30..AB6F; Latin Extended-E 181 | AB70..ABBF; Cherokee Supplement 182 | ABC0..ABFF; Meetei Mayek 183 | AC00..D7AF; Hangul Syllables 184 | D7B0..D7FF; Hangul Jamo Extended-B 185 | D800..DB7F; High Surrogates 186 | DB80..DBFF; High Private Use Surrogates 187 | DC00..DFFF; Low Surrogates 188 | E000..F8FF; Private Use Area 189 | F900..FAFF; CJK Compatibility Ideographs 190 | FB00..FB4F; Alphabetic Presentation Forms 191 | FB50..FDFF; Arabic Presentation Forms-A 192 | FE00..FE0F; Variation Selectors 193 | FE10..FE1F; Vertical Forms 194 | FE20..FE2F; Combining Half Marks 195 | FE30..FE4F; CJK Compatibility Forms 196 | FE50..FE6F; Small Form Variants 197 | FE70..FEFF; Arabic Presentation Forms-B 198 | FF00..FFEF; Halfwidth and Fullwidth Forms 199 | FFF0..FFFF; Specials 200 | 10000..1007F; Linear B Syllabary 201 | 10080..100FF; Linear B Ideograms 202 | 10100..1013F; Aegean Numbers 203 | 10140..1018F; Ancient Greek Numbers 204 | 10190..101CF; Ancient Symbols 205 | 101D0..101FF; Phaistos Disc 206 | 10280..1029F; Lycian 207 | 102A0..102DF; Carian 208 | 102E0..102FF; Coptic Epact Numbers 209 | 10300..1032F; Old Italic 210 | 10330..1034F; Gothic 211 | 10350..1037F; Old Permic 212 | 10380..1039F; Ugaritic 213 | 103A0..103DF; Old Persian 214 | 10400..1044F; Deseret 215 | 10450..1047F; Shavian 216 | 10480..104AF; Osmanya 217 | 104B0..104FF; Osage 218 | 10500..1052F; Elbasan 219 | 10530..1056F; Caucasian Albanian 220 | 10570..105BF; Vithkuqi 221 | 105C0..105FF; Todhri 222 | 10600..1077F; Linear A 223 | 10780..107BF; Latin Extended-F 224 | 10800..1083F; Cypriot Syllabary 225 | 10840..1085F; Imperial Aramaic 226 | 10860..1087F; Palmyrene 227 | 10880..108AF; Nabataean 228 | 108E0..108FF; Hatran 229 | 10900..1091F; Phoenician 230 | 10920..1093F; Lydian 231 | 10980..1099F; Meroitic Hieroglyphs 232 | 109A0..109FF; Meroitic Cursive 233 | 10A00..10A5F; Kharoshthi 234 | 10A60..10A7F; Old South Arabian 235 | 10A80..10A9F; Old North Arabian 236 | 10AC0..10AFF; Manichaean 237 | 10B00..10B3F; Avestan 238 | 10B40..10B5F; Inscriptional Parthian 239 | 10B60..10B7F; Inscriptional Pahlavi 240 | 10B80..10BAF; Psalter Pahlavi 241 | 10C00..10C4F; Old Turkic 242 | 10C80..10CFF; Old Hungarian 243 | 10D00..10D3F; Hanifi Rohingya 244 | 10D40..10D8F; Garay 245 | 10E60..10E7F; Rumi Numeral Symbols 246 | 10E80..10EBF; Yezidi 247 | 10EC0..10EFF; Arabic Extended-C 248 | 10F00..10F2F; Old Sogdian 249 | 10F30..10F6F; Sogdian 250 | 10F70..10FAF; Old Uyghur 251 | 10FB0..10FDF; Chorasmian 252 | 10FE0..10FFF; Elymaic 253 | 11000..1107F; Brahmi 254 | 11080..110CF; Kaithi 255 | 110D0..110FF; Sora Sompeng 256 | 11100..1114F; Chakma 257 | 11150..1117F; Mahajani 258 | 11180..111DF; Sharada 259 | 111E0..111FF; Sinhala Archaic Numbers 260 | 11200..1124F; Khojki 261 | 11280..112AF; Multani 262 | 112B0..112FF; Khudawadi 263 | 11300..1137F; Grantha 264 | 11380..113FF; Tulu-Tigalari 265 | 11400..1147F; Newa 266 | 11480..114DF; Tirhuta 267 | 11580..115FF; Siddham 268 | 11600..1165F; Modi 269 | 11660..1167F; Mongolian Supplement 270 | 11680..116CF; Takri 271 | 116D0..116FF; Myanmar Extended-C 272 | 11700..1174F; Ahom 273 | 11800..1184F; Dogra 274 | 118A0..118FF; Warang Citi 275 | 11900..1195F; Dives Akuru 276 | 119A0..119FF; Nandinagari 277 | 11A00..11A4F; Zanabazar Square 278 | 11A50..11AAF; Soyombo 279 | 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 280 | 11AC0..11AFF; Pau Cin Hau 281 | 11B00..11B5F; Devanagari Extended-A 282 | 11BC0..11BFF; Sunuwar 283 | 11C00..11C6F; Bhaiksuki 284 | 11C70..11CBF; Marchen 285 | 11D00..11D5F; Masaram Gondi 286 | 11D60..11DAF; Gunjala Gondi 287 | 11EE0..11EFF; Makasar 288 | 11F00..11F5F; Kawi 289 | 11FB0..11FBF; Lisu Supplement 290 | 11FC0..11FFF; Tamil Supplement 291 | 12000..123FF; Cuneiform 292 | 12400..1247F; Cuneiform Numbers and Punctuation 293 | 12480..1254F; Early Dynastic Cuneiform 294 | 12F90..12FFF; Cypro-Minoan 295 | 13000..1342F; Egyptian Hieroglyphs 296 | 13430..1345F; Egyptian Hieroglyph Format Controls 297 | 13460..143FF; Egyptian Hieroglyphs Extended-A 298 | 14400..1467F; Anatolian Hieroglyphs 299 | 16100..1613F; Gurung Khema 300 | 16800..16A3F; Bamum Supplement 301 | 16A40..16A6F; Mro 302 | 16A70..16ACF; Tangsa 303 | 16AD0..16AFF; Bassa Vah 304 | 16B00..16B8F; Pahawh Hmong 305 | 16D40..16D7F; Kirat Rai 306 | 16E40..16E9F; Medefaidrin 307 | 16F00..16F9F; Miao 308 | 16FE0..16FFF; Ideographic Symbols and Punctuation 309 | 17000..187FF; Tangut 310 | 18800..18AFF; Tangut Components 311 | 18B00..18CFF; Khitan Small Script 312 | 18D00..18D7F; Tangut Supplement 313 | 1AFF0..1AFFF; Kana Extended-B 314 | 1B000..1B0FF; Kana Supplement 315 | 1B100..1B12F; Kana Extended-A 316 | 1B130..1B16F; Small Kana Extension 317 | 1B170..1B2FF; Nushu 318 | 1BC00..1BC9F; Duployan 319 | 1BCA0..1BCAF; Shorthand Format Controls 320 | 1CC00..1CEBF; Symbols for Legacy Computing Supplement 321 | 1CF00..1CFCF; Znamenny Musical Notation 322 | 1D000..1D0FF; Byzantine Musical Symbols 323 | 1D100..1D1FF; Musical Symbols 324 | 1D200..1D24F; Ancient Greek Musical Notation 325 | 1D2C0..1D2DF; Kaktovik Numerals 326 | 1D2E0..1D2FF; Mayan Numerals 327 | 1D300..1D35F; Tai Xuan Jing Symbols 328 | 1D360..1D37F; Counting Rod Numerals 329 | 1D400..1D7FF; Mathematical Alphanumeric Symbols 330 | 1D800..1DAAF; Sutton SignWriting 331 | 1DF00..1DFFF; Latin Extended-G 332 | 1E000..1E02F; Glagolitic Supplement 333 | 1E030..1E08F; Cyrillic Extended-D 334 | 1E100..1E14F; Nyiakeng Puachue Hmong 335 | 1E290..1E2BF; Toto 336 | 1E2C0..1E2FF; Wancho 337 | 1E4D0..1E4FF; Nag Mundari 338 | 1E5D0..1E5FF; Ol Onal 339 | 1E7E0..1E7FF; Ethiopic Extended-B 340 | 1E800..1E8DF; Mende Kikakui 341 | 1E900..1E95F; Adlam 342 | 1EC70..1ECBF; Indic Siyaq Numbers 343 | 1ED00..1ED4F; Ottoman Siyaq Numbers 344 | 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 345 | 1F000..1F02F; Mahjong Tiles 346 | 1F030..1F09F; Domino Tiles 347 | 1F0A0..1F0FF; Playing Cards 348 | 1F100..1F1FF; Enclosed Alphanumeric Supplement 349 | 1F200..1F2FF; Enclosed Ideographic Supplement 350 | 1F300..1F5FF; Miscellaneous Symbols and Pictographs 351 | 1F600..1F64F; Emoticons 352 | 1F650..1F67F; Ornamental Dingbats 353 | 1F680..1F6FF; Transport and Map Symbols 354 | 1F700..1F77F; Alchemical Symbols 355 | 1F780..1F7FF; Geometric Shapes Extended 356 | 1F800..1F8FF; Supplemental Arrows-C 357 | 1F900..1F9FF; Supplemental Symbols and Pictographs 358 | 1FA00..1FA6F; Chess Symbols 359 | 1FA70..1FAFF; Symbols and Pictographs Extended-A 360 | 1FB00..1FBFF; Symbols for Legacy Computing 361 | 20000..2A6DF; CJK Unified Ideographs Extension B 362 | 2A700..2B73F; CJK Unified Ideographs Extension C 363 | 2B740..2B81F; CJK Unified Ideographs Extension D 364 | 2B820..2CEAF; CJK Unified Ideographs Extension E 365 | 2CEB0..2EBEF; CJK Unified Ideographs Extension F 366 | 2EBF0..2EE5F; CJK Unified Ideographs Extension I 367 | 2F800..2FA1F; CJK Compatibility Ideographs Supplement 368 | 30000..3134F; CJK Unified Ideographs Extension G 369 | 31350..323AF; CJK Unified Ideographs Extension H 370 | E0000..E007F; Tags 371 | E0100..E01EF; Variation Selectors Supplement 372 | F0000..FFFFF; Supplementary Private Use Area-A 373 | 100000..10FFFF; Supplementary Private Use Area-B 374 | 375 | # EOF 376 | -------------------------------------------------------------------------------- /data/Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 4 4 | 5 | [[package]] 6 | name = "addr2line" 7 | version = "0.21.0" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" 10 | dependencies = [ 11 | "gimli", 12 | ] 13 | 14 | [[package]] 15 | name = "adler" 16 | version = "1.0.2" 17 | source = "registry+https://github.com/rust-lang/crates.io-index" 18 | checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" 19 | 20 | [[package]] 21 | name = "aho-corasick" 22 | version = "1.1.3" 23 | source = "registry+https://github.com/rust-lang/crates.io-index" 24 | checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" 25 | dependencies = [ 26 | "memchr", 27 | ] 28 | 29 | [[package]] 30 | name = "backtrace" 31 | version = "0.3.71" 32 | source = "registry+https://github.com/rust-lang/crates.io-index" 33 | checksum = "26b05800d2e817c8b3b4b54abd461726265fa9789ae34330622f2db9ee696f9d" 34 | dependencies = [ 35 | "addr2line", 36 | "cc", 37 | "cfg-if", 38 | "libc", 39 | "miniz_oxide", 40 | "object", 41 | "rustc-demangle", 42 | ] 43 | 44 | [[package]] 45 | name = "bon" 46 | version = "3.4.0" 47 | source = "registry+https://github.com/rust-lang/crates.io-index" 48 | checksum = "8a8a41e51fda5f7d87152d00f50d08ce24bf5cee8a962facf7f2526a66f8a5fa" 49 | dependencies = [ 50 | "bon-macros", 51 | "rustversion", 52 | ] 53 | 54 | [[package]] 55 | name = "bon-macros" 56 | version = "3.4.0" 57 | source = "registry+https://github.com/rust-lang/crates.io-index" 58 | checksum = "6b592add4016ac26ca340298fed5cc2524abe8bacae78ebca3780286da588304" 59 | dependencies = [ 60 | "darling", 61 | "ident_case", 62 | "prettyplease", 63 | "proc-macro2", 64 | "quote", 65 | "rustversion", 66 | "syn", 67 | ] 68 | 69 | [[package]] 70 | name = "byteorder" 71 | version = "1.5.0" 72 | source = "registry+https://github.com/rust-lang/crates.io-index" 73 | checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" 74 | 75 | [[package]] 76 | name = "cc" 77 | version = "1.2.16" 78 | source = "registry+https://github.com/rust-lang/crates.io-index" 79 | checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c" 80 | dependencies = [ 81 | "shlex", 82 | ] 83 | 84 | [[package]] 85 | name = "cfg-if" 86 | version = "1.0.0" 87 | source = "registry+https://github.com/rust-lang/crates.io-index" 88 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" 89 | 90 | [[package]] 91 | name = "color-eyre" 92 | version = "0.6.3" 93 | source = "registry+https://github.com/rust-lang/crates.io-index" 94 | checksum = "55146f5e46f237f7423d74111267d4597b59b0dad0ffaf7303bce9945d843ad5" 95 | dependencies = [ 96 | "backtrace", 97 | "color-spantrace", 98 | "eyre", 99 | "indenter", 100 | "once_cell", 101 | "owo-colors", 102 | "tracing-error", 103 | ] 104 | 105 | [[package]] 106 | name = "color-spantrace" 107 | version = "0.2.1" 108 | source = "registry+https://github.com/rust-lang/crates.io-index" 109 | checksum = "cd6be1b2a7e382e2b98b43b2adcca6bb0e465af0bdd38123873ae61eb17a72c2" 110 | dependencies = [ 111 | "once_cell", 112 | "owo-colors", 113 | "tracing-core", 114 | "tracing-error", 115 | ] 116 | 117 | [[package]] 118 | name = "darling" 119 | version = "0.20.10" 120 | source = "registry+https://github.com/rust-lang/crates.io-index" 121 | checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" 122 | dependencies = [ 123 | "darling_core", 124 | "darling_macro", 125 | ] 126 | 127 | [[package]] 128 | name = "darling_core" 129 | version = "0.20.10" 130 | source = "registry+https://github.com/rust-lang/crates.io-index" 131 | checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5" 132 | dependencies = [ 133 | "fnv", 134 | "ident_case", 135 | "proc-macro2", 136 | "quote", 137 | "strsim", 138 | "syn", 139 | ] 140 | 141 | [[package]] 142 | name = "darling_macro" 143 | version = "0.20.10" 144 | source = "registry+https://github.com/rust-lang/crates.io-index" 145 | checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" 146 | dependencies = [ 147 | "darling_core", 148 | "quote", 149 | "syn", 150 | ] 151 | 152 | [[package]] 153 | name = "data" 154 | version = "0.0.0" 155 | dependencies = [ 156 | "bon", 157 | "byteorder", 158 | "color-eyre", 159 | "enumflags2", 160 | "nom", 161 | "regex", 162 | "serde", 163 | "serde_json", 164 | ] 165 | 166 | [[package]] 167 | name = "enumflags2" 168 | version = "0.7.11" 169 | source = "registry+https://github.com/rust-lang/crates.io-index" 170 | checksum = "ba2f4b465f5318854c6f8dd686ede6c0a9dc67d4b1ac241cf0eb51521a309147" 171 | dependencies = [ 172 | "enumflags2_derive", 173 | ] 174 | 175 | [[package]] 176 | name = "enumflags2_derive" 177 | version = "0.7.11" 178 | source = "registry+https://github.com/rust-lang/crates.io-index" 179 | checksum = "fc4caf64a58d7a6d65ab00639b046ff54399a39f5f2554728895ace4b297cd79" 180 | dependencies = [ 181 | "proc-macro2", 182 | "quote", 183 | "syn", 184 | ] 185 | 186 | [[package]] 187 | name = "eyre" 188 | version = "0.6.12" 189 | source = "registry+https://github.com/rust-lang/crates.io-index" 190 | checksum = "7cd915d99f24784cdc19fd37ef22b97e3ff0ae756c7e492e9fbfe897d61e2aec" 191 | dependencies = [ 192 | "indenter", 193 | "once_cell", 194 | ] 195 | 196 | [[package]] 197 | name = "fnv" 198 | version = "1.0.7" 199 | source = "registry+https://github.com/rust-lang/crates.io-index" 200 | checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" 201 | 202 | [[package]] 203 | name = "gimli" 204 | version = "0.28.1" 205 | source = "registry+https://github.com/rust-lang/crates.io-index" 206 | checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" 207 | 208 | [[package]] 209 | name = "ident_case" 210 | version = "1.0.1" 211 | source = "registry+https://github.com/rust-lang/crates.io-index" 212 | checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" 213 | 214 | [[package]] 215 | name = "indenter" 216 | version = "0.3.3" 217 | source = "registry+https://github.com/rust-lang/crates.io-index" 218 | checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" 219 | 220 | [[package]] 221 | name = "itoa" 222 | version = "1.0.4" 223 | source = "registry+https://github.com/rust-lang/crates.io-index" 224 | checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc" 225 | 226 | [[package]] 227 | name = "lazy_static" 228 | version = "1.5.0" 229 | source = "registry+https://github.com/rust-lang/crates.io-index" 230 | checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" 231 | 232 | [[package]] 233 | name = "libc" 234 | version = "0.2.171" 235 | source = "registry+https://github.com/rust-lang/crates.io-index" 236 | checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" 237 | 238 | [[package]] 239 | name = "memchr" 240 | version = "2.7.4" 241 | source = "registry+https://github.com/rust-lang/crates.io-index" 242 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 243 | 244 | [[package]] 245 | name = "minimal-lexical" 246 | version = "0.2.1" 247 | source = "registry+https://github.com/rust-lang/crates.io-index" 248 | checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" 249 | 250 | [[package]] 251 | name = "miniz_oxide" 252 | version = "0.7.4" 253 | source = "registry+https://github.com/rust-lang/crates.io-index" 254 | checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" 255 | dependencies = [ 256 | "adler", 257 | ] 258 | 259 | [[package]] 260 | name = "nom" 261 | version = "7.1.3" 262 | source = "registry+https://github.com/rust-lang/crates.io-index" 263 | checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" 264 | dependencies = [ 265 | "memchr", 266 | "minimal-lexical", 267 | ] 268 | 269 | [[package]] 270 | name = "object" 271 | version = "0.32.2" 272 | source = "registry+https://github.com/rust-lang/crates.io-index" 273 | checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" 274 | dependencies = [ 275 | "memchr", 276 | ] 277 | 278 | [[package]] 279 | name = "once_cell" 280 | version = "1.21.1" 281 | source = "registry+https://github.com/rust-lang/crates.io-index" 282 | checksum = "d75b0bedcc4fe52caa0e03d9f1151a323e4aa5e2d78ba3580400cd3c9e2bc4bc" 283 | 284 | [[package]] 285 | name = "owo-colors" 286 | version = "3.5.0" 287 | source = "registry+https://github.com/rust-lang/crates.io-index" 288 | checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" 289 | 290 | [[package]] 291 | name = "pin-project-lite" 292 | version = "0.2.16" 293 | source = "registry+https://github.com/rust-lang/crates.io-index" 294 | checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" 295 | 296 | [[package]] 297 | name = "prettyplease" 298 | version = "0.2.31" 299 | source = "registry+https://github.com/rust-lang/crates.io-index" 300 | checksum = "5316f57387668042f561aae71480de936257848f9c43ce528e311d89a07cadeb" 301 | dependencies = [ 302 | "proc-macro2", 303 | "syn", 304 | ] 305 | 306 | [[package]] 307 | name = "proc-macro2" 308 | version = "1.0.94" 309 | source = "registry+https://github.com/rust-lang/crates.io-index" 310 | checksum = "a31971752e70b8b2686d7e46ec17fb38dad4051d94024c88df49b667caea9c84" 311 | dependencies = [ 312 | "unicode-ident", 313 | ] 314 | 315 | [[package]] 316 | name = "quote" 317 | version = "1.0.40" 318 | source = "registry+https://github.com/rust-lang/crates.io-index" 319 | checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" 320 | dependencies = [ 321 | "proc-macro2", 322 | ] 323 | 324 | [[package]] 325 | name = "regex" 326 | version = "1.11.1" 327 | source = "registry+https://github.com/rust-lang/crates.io-index" 328 | checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" 329 | dependencies = [ 330 | "aho-corasick", 331 | "memchr", 332 | "regex-automata", 333 | "regex-syntax", 334 | ] 335 | 336 | [[package]] 337 | name = "regex-automata" 338 | version = "0.4.9" 339 | source = "registry+https://github.com/rust-lang/crates.io-index" 340 | checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" 341 | dependencies = [ 342 | "aho-corasick", 343 | "memchr", 344 | "regex-syntax", 345 | ] 346 | 347 | [[package]] 348 | name = "regex-syntax" 349 | version = "0.8.5" 350 | source = "registry+https://github.com/rust-lang/crates.io-index" 351 | checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" 352 | 353 | [[package]] 354 | name = "rustc-demangle" 355 | version = "0.1.24" 356 | source = "registry+https://github.com/rust-lang/crates.io-index" 357 | checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" 358 | 359 | [[package]] 360 | name = "rustversion" 361 | version = "1.0.20" 362 | source = "registry+https://github.com/rust-lang/crates.io-index" 363 | checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" 364 | 365 | [[package]] 366 | name = "ryu" 367 | version = "1.0.0" 368 | source = "registry+https://github.com/rust-lang/crates.io-index" 369 | checksum = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997" 370 | 371 | [[package]] 372 | name = "serde" 373 | version = "1.0.219" 374 | source = "registry+https://github.com/rust-lang/crates.io-index" 375 | checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" 376 | dependencies = [ 377 | "serde_derive", 378 | ] 379 | 380 | [[package]] 381 | name = "serde_derive" 382 | version = "1.0.219" 383 | source = "registry+https://github.com/rust-lang/crates.io-index" 384 | checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" 385 | dependencies = [ 386 | "proc-macro2", 387 | "quote", 388 | "syn", 389 | ] 390 | 391 | [[package]] 392 | name = "serde_json" 393 | version = "1.0.140" 394 | source = "registry+https://github.com/rust-lang/crates.io-index" 395 | checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" 396 | dependencies = [ 397 | "itoa", 398 | "memchr", 399 | "ryu", 400 | "serde", 401 | ] 402 | 403 | [[package]] 404 | name = "sharded-slab" 405 | version = "0.1.7" 406 | source = "registry+https://github.com/rust-lang/crates.io-index" 407 | checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" 408 | dependencies = [ 409 | "lazy_static", 410 | ] 411 | 412 | [[package]] 413 | name = "shlex" 414 | version = "1.3.0" 415 | source = "registry+https://github.com/rust-lang/crates.io-index" 416 | checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" 417 | 418 | [[package]] 419 | name = "strsim" 420 | version = "0.11.1" 421 | source = "registry+https://github.com/rust-lang/crates.io-index" 422 | checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" 423 | 424 | [[package]] 425 | name = "syn" 426 | version = "2.0.100" 427 | source = "registry+https://github.com/rust-lang/crates.io-index" 428 | checksum = "b09a44accad81e1ba1cd74a32461ba89dee89095ba17b32f5d03683b1b1fc2a0" 429 | dependencies = [ 430 | "proc-macro2", 431 | "quote", 432 | "unicode-ident", 433 | ] 434 | 435 | [[package]] 436 | name = "thread_local" 437 | version = "1.1.8" 438 | source = "registry+https://github.com/rust-lang/crates.io-index" 439 | checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" 440 | dependencies = [ 441 | "cfg-if", 442 | "once_cell", 443 | ] 444 | 445 | [[package]] 446 | name = "tracing" 447 | version = "0.1.41" 448 | source = "registry+https://github.com/rust-lang/crates.io-index" 449 | checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" 450 | dependencies = [ 451 | "pin-project-lite", 452 | "tracing-core", 453 | ] 454 | 455 | [[package]] 456 | name = "tracing-core" 457 | version = "0.1.33" 458 | source = "registry+https://github.com/rust-lang/crates.io-index" 459 | checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" 460 | dependencies = [ 461 | "once_cell", 462 | "valuable", 463 | ] 464 | 465 | [[package]] 466 | name = "tracing-error" 467 | version = "0.2.1" 468 | source = "registry+https://github.com/rust-lang/crates.io-index" 469 | checksum = "8b1581020d7a273442f5b45074a6a57d5757ad0a47dac0e9f0bd57b81936f3db" 470 | dependencies = [ 471 | "tracing", 472 | "tracing-subscriber", 473 | ] 474 | 475 | [[package]] 476 | name = "tracing-subscriber" 477 | version = "0.3.19" 478 | source = "registry+https://github.com/rust-lang/crates.io-index" 479 | checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" 480 | dependencies = [ 481 | "sharded-slab", 482 | "thread_local", 483 | "tracing-core", 484 | ] 485 | 486 | [[package]] 487 | name = "unicode-ident" 488 | version = "1.0.5" 489 | source = "registry+https://github.com/rust-lang/crates.io-index" 490 | checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" 491 | 492 | [[package]] 493 | name = "valuable" 494 | version = "0.1.1" 495 | source = "registry+https://github.com/rust-lang/crates.io-index" 496 | checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" 497 | -------------------------------------------------------------------------------- /src/data.ts: -------------------------------------------------------------------------------- 1 | import { EGCBREAK } from "../data/egcbreak"; 2 | 3 | import { pointToYouPlus } from "./formatting"; 4 | import { pointToString, stringToPoint, stringToPoints } from "./encoding"; 5 | 6 | export type StringField = 7 | | "dnrp" 8 | | "gc" 9 | | "block" 10 | | "age" 11 | | "hjsn" 12 | | "uhdef" 13 | | "uhman"; 14 | 15 | export interface Data { 16 | info: DataInfo; 17 | 18 | string: string[]; 19 | 20 | bits: DataView; 21 | ebits: DataView; 22 | pagebits: DataView; 23 | name: DataView; 24 | aliasc: DataView; 25 | aliasi: DataView; 26 | aliass: DataView; 27 | aliast: DataView; 28 | dnrp: DataView; 29 | gb: DataView; 30 | gc: DataView; 31 | block: DataView; 32 | age: DataView; 33 | hlvt: DataView; 34 | hjsn: DataView; 35 | uhdef: DataView; 36 | uhman: DataView; 37 | 38 | seqb: DataView; 39 | seqp: DataView; 40 | seqn: DataView; 41 | } 42 | 43 | export interface DataInfo { 44 | sequenceBucketCount: number; 45 | sequenceCount: number; 46 | } 47 | 48 | export enum AliasType { 49 | Correction = 0, 50 | Control = 1, 51 | Alternate = 2, 52 | Figment = 3, 53 | Abbreviation = 4, 54 | Unicode1 = 5, 55 | Cldr = 6, 56 | } 57 | 58 | export interface SequenceBucket { 59 | start: number; 60 | len: number; 61 | } 62 | 63 | export enum GraphemeBreak { 64 | Cr = 1, 65 | Lf = 2, 66 | Control = 3, 67 | Extend = 4, 68 | Zwj = 5, 69 | RegionalIndicator = 6, 70 | Prepend = 7, 71 | SpacingMark = 8, 72 | HangulL = 9, 73 | HangulV = 10, 74 | HangulT = 11, 75 | HangulLV = 12, 76 | HangulLVT = 13, 77 | } 78 | 79 | // https://stackoverflow.com/q/51419176 80 | type KeyOfType = { [P in keyof T]: T[P] extends U ? P : never }[keyof T]; 81 | type SparseMemberType = { 82 | method: KeyOfType number>; 83 | len: number; 84 | }; 85 | const Uint8: SparseMemberType = { method: "getUint8", len: 1 }; 86 | const Uint16: SparseMemberType = { method: "getUint16", len: 2 }; 87 | 88 | function getSparse( 89 | ty: SparseMemberType, 90 | field: DataView, 91 | def: T, 92 | point: number, 93 | ): number | T { 94 | const page_offset = field.getUint16(Math.floor(point / 256) * 2); 95 | if (page_offset == 0xffff) return def; 96 | 97 | const offset = 8704 + (page_offset * 256 + (point % 256)) * ty.len; 98 | return field[ty.method](offset); 99 | } 100 | 101 | function getFlag(data: Data, shift: number, point: number): boolean { 102 | return !!((getSparse(Uint8, data.bits, 0, point) >> shift) & 1); 103 | } 104 | 105 | function getEmojiFlag(data: Data, shift: number, point: number): boolean { 106 | return !!((getSparse(Uint8, data.ebits, 0, point) >> shift) & 1); 107 | } 108 | 109 | function getPageFlag(data: Data, shift: number, page: number): boolean { 110 | return !!((data.pagebits.getUint8(page) >> shift) & 1); 111 | } 112 | 113 | /** 114 | * Returns the string value for the given point and field. 115 | * 116 | * Do not use this function for the name field. For names, choose a 117 | * semantics appropriate for the context, then define a higher-level 118 | * function that wraps getString0. 119 | */ 120 | export function getString( 121 | data: Data, 122 | field: StringField, 123 | point: number, 124 | ): string | null { 125 | return getString0(data, field, point); 126 | } 127 | 128 | function getString0( 129 | data: Data, 130 | field: "name" | StringField, 131 | point: number, 132 | ): string | null { 133 | const index = getSparse(Uint16, data[field], 0xffff, point); 134 | return getStringByIndex(data, index); 135 | } 136 | 137 | function getStringByIndex(data: Data, index: number): string | null { 138 | if (index == 0xffff || index >= data.string.length) { 139 | return null; 140 | } 141 | 142 | return data.string[index]; 143 | } 144 | 145 | /** 146 | * Returns the Name property for the given point, regardless of 147 | * whether the property is defined by enumeration or by rule. 148 | * 149 | * This name is displayed in the details panel and search results. 150 | * 151 | * Note that charming currently overrides this name while generating 152 | * data files, with the last formal name alias of type figment or 153 | * control or correction (if any). 154 | */ 155 | export function getNameProperty(data: Data, point: number): string | null { 156 | if (hasDerivedNameNr1(data, point)) { 157 | const prefix = getString(data, "dnrp", point); 158 | return `${prefix}${getHangulSyllableName(data, point)}`; 159 | } else if (hasDerivedNameNr2(data, point)) { 160 | const prefix = getString(data, "dnrp", point); 161 | return `${prefix}${pointToYouPlus(point, "")}`; 162 | } 163 | 164 | return getString0(data, "name", point); 165 | } 166 | 167 | /** 168 | * Returns the Name property for the given point, but only if it can’t 169 | * be derived by a rule (regardless of whether the name is stated in 170 | * UnicodeData.txt explicitly). 171 | */ 172 | export function getNonDerivedName(data: Data, point: number): string | null { 173 | if (hasDerivedNameNr1(data, point) || hasDerivedNameNr2(data, point)) { 174 | return null; 175 | } 176 | 177 | return getNameProperty(data, point); 178 | } 179 | 180 | /** 181 | * Returns the Name property for the given point, but only if it can’t 182 | * be derived by rule NR2 (regardless of whether the name is stated in 183 | * UnicodeData.txt explicitly). 184 | * 185 | * This name is used by the search algorithm. 186 | */ 187 | export function getNameExceptNr2(data: Data, point: number): string | null { 188 | if (hasDerivedNameNr2(data, point)) { 189 | return null; 190 | } 191 | 192 | return getNameProperty(data, point); 193 | } 194 | 195 | /** 196 | * Returns the old-charming character name for the given point. 197 | * 198 | * Old-charming overrides character names with Unihan kDefinition (if 199 | * defined), allowing users to search for CJK ideographs by definition 200 | * when #search_han is checked. 201 | */ 202 | export function getOldName(data: Data, point: number): string | null { 203 | // FIXME figment/control/correction 204 | return getString(data, "uhdef", point) ?? getString0(data, "name", point); 205 | } 206 | 207 | export function getHangulSyllableName( 208 | data: Data, 209 | point: number, 210 | ): string | null { 211 | // 3.12 Conjoining Jamo Behavior 212 | const L_BASE = 0x1100; 213 | const V_BASE = 0x1161; 214 | const T_BASE = 0x11a7; 215 | 216 | const lvt = getSparse(Uint16, data.hlvt, 0, point); 217 | const [present, l, v, t] = [ 218 | (lvt >> 15) & 0b1, 219 | (lvt >> 10) & 0b11111, 220 | (lvt >> 5) & 0b11111, 221 | lvt & 0b11111, 222 | ]; 223 | 224 | if (present == 1) { 225 | const ln = getString(data, "hjsn", L_BASE + l); 226 | const vn = getString(data, "hjsn", V_BASE + v); 227 | const tn = t > 0 ? getString(data, "hjsn", T_BASE + t) : ""; 228 | return `${ln}${vn}${tn}`; 229 | } 230 | 231 | return null; 232 | } 233 | 234 | export function getAliasCount(data: Data, point: number): number { 235 | return getSparse(Uint8, data.aliasc, 0, point); 236 | } 237 | 238 | export function getAliasBaseIndex(data: Data, point: number): number | null { 239 | return getSparse(Uint16, data.aliasi, null, point); 240 | } 241 | 242 | export function getAliasValue(data: Data, aliasIndex: number): string | null { 243 | const ty = Uint16; 244 | const offset = aliasIndex * ty.len; 245 | return getStringByIndex(data, data.aliass[ty.method](offset)); 246 | } 247 | 248 | export function getAliasType(data: Data, aliasIndex: number): AliasType | null { 249 | const ty = Uint8; 250 | const offset = aliasIndex * ty.len; 251 | return data.aliast[ty.method](offset); 252 | } 253 | 254 | export function findSequenceBucket( 255 | data: Data, 256 | firstPoint: number, 257 | secondPoint: number, 258 | ): SequenceBucket | null { 259 | let h = 0, 260 | i = 0, 261 | j = data.seqb.byteLength / 11; 262 | while (h < j) { 263 | i = h + Math.floor((j - h) / 2); 264 | const x = data.seqb.getUint32(i * 11 + 0); 265 | if (x != firstPoint) { 266 | if (j - h == 1) return null; 267 | else if (x < firstPoint) h = i; 268 | else if (x > firstPoint) j = i; 269 | continue; 270 | } 271 | const y = data.seqb.getUint32(i * 11 + 4); 272 | if (y != secondPoint) { 273 | if (j - h == 1) return null; 274 | else if (y < secondPoint) h = i; 275 | else if (y > secondPoint) j = i; 276 | continue; 277 | } else { 278 | break; 279 | } 280 | } 281 | const start = data.seqb.getUint16(i * 11 + 8); 282 | const len = data.seqb.getUint8(i * 11 + 10); 283 | return { start, len }; 284 | } 285 | 286 | export function findSequenceIndex(data: Data, points: number[]): number | null { 287 | if (points.length < 2) return null; 288 | const bucket = findSequenceBucket(data, points[0], points[1]); 289 | if (bucket == null) return null; 290 | 291 | for (let i = bucket.start; i < bucket.start + bucket.len; i++) { 292 | const ps = getSequencePoints(data, i)!; 293 | if (ps.length == points.length && ps.every((p, i) => p == points[i])) { 294 | return i; 295 | } 296 | } 297 | 298 | return null; 299 | } 300 | 301 | export function getSequencePoints( 302 | data: Data, 303 | sequenceIndex: number, 304 | ): number[] | null { 305 | const start = data.seqp.getUint16(sequenceIndex * 3 + 0); 306 | const len = data.seqp.getUint8(sequenceIndex * 3 + 2); 307 | const base = data.info.sequenceCount * 3; 308 | const result = []; 309 | for (let i = start; i < start + len; i++) 310 | result.push(data.seqp.getUint32(base + i * 4)); 311 | return result; 312 | } 313 | 314 | export function getSequenceNames( 315 | data: Data, 316 | sequenceIndex: number, 317 | ): string[] | null { 318 | const start = data.seqn.getUint16(sequenceIndex * 3 + 0); 319 | const len = data.seqn.getUint8(sequenceIndex * 3 + 2); 320 | const base = data.info.sequenceCount * 3; 321 | const result = []; 322 | for (let i = start; i < start + len; i++) 323 | result.push(getStringByIndex(data, data.seqn.getUint16(base + i * 2))!); 324 | return result; 325 | } 326 | 327 | export function getSequenceNameByIndices( 328 | data: Data, 329 | sequenceIndex: number, 330 | sequenceNameIndex: number, 331 | ): string | null { 332 | const start = data.seqn.getUint16(sequenceIndex * 3 + 0); 333 | const len = data.seqn.getUint16(sequenceIndex * 3 + 2); 334 | if (sequenceNameIndex < 0 || sequenceNameIndex >= len) return null; 335 | 336 | const base = data.info.sequenceCount * 3; 337 | return getStringByIndex( 338 | data, 339 | data.seqn.getUint16(base + (start + sequenceNameIndex) * 2), 340 | ); 341 | } 342 | 343 | export function getGraphemeBreak( 344 | data: Data, 345 | point: number, 346 | ): GraphemeBreak | null { 347 | return getSparse(Uint8, data.gb, null, point); 348 | } 349 | 350 | export function kDefinitionExists(data: Data, point: number): boolean { 351 | return getFlag(data, 0, point); 352 | } 353 | 354 | export function isSpaceSeparator(data: Data, point: number): boolean { 355 | return getFlag(data, 2, point); 356 | } 357 | 358 | export function isAnyMark(data: Data, point: number): boolean { 359 | return getFlag(data, 3, point); 360 | } 361 | 362 | export function hasDerivedNameNr1(data: Data, point: number): boolean { 363 | return getFlag(data, 4, point); 364 | } 365 | 366 | export function hasDerivedNameNr2(data: Data, point: number): boolean { 367 | return getFlag(data, 5, point); 368 | } 369 | 370 | export function hasAnyNameExceptNr2(data: Data, page: number): boolean { 371 | return getPageFlag(data, 0, page); 372 | } 373 | 374 | export function hasAnyUhdef(data: Data, page: number): boolean { 375 | return getPageFlag(data, 1, page); 376 | } 377 | 378 | export function hasAnyAlias(data: Data, page: number): boolean { 379 | return getPageFlag(data, 2, page); 380 | } 381 | 382 | export function isEmoji(data: Data, point: number): boolean { 383 | return getEmojiFlag(data, 0, point); 384 | } 385 | 386 | export function isExtendedPictographic(data: Data, point: number): boolean { 387 | return getEmojiFlag(data, 1, point); 388 | } 389 | 390 | export function isEmojiComponent(data: Data, point: number): boolean { 391 | return getEmojiFlag(data, 2, point); 392 | } 393 | 394 | export function isEmojiPresentation(data: Data, point: number): boolean { 395 | return getEmojiFlag(data, 3, point); 396 | } 397 | 398 | export function isEmojiModifier(data: Data, point: number): boolean { 399 | return getEmojiFlag(data, 4, point); 400 | } 401 | 402 | export function isEmojiModifierBase(data: Data, point: number): boolean { 403 | return getEmojiFlag(data, 5, point); 404 | } 405 | 406 | interface ClusterBreaker { 407 | startUnitIndex: number; 408 | startPointIndex: number; 409 | kind: string; 410 | } 411 | 412 | export function getNextClusterBreak( 413 | data: Data, 414 | string: string, 415 | context: ClusterBreaker | null = null, 416 | ): ClusterBreaker | null { 417 | if (context == null) { 418 | if (string.length == 0) return null; 419 | 420 | let kind = ""; 421 | for (const pointish of string) { 422 | const point = stringToPoint(pointish)!; 423 | const gb = getGraphemeBreak(data, point) ?? 0; 424 | const exp = Number(isExtendedPictographic(data, point)); 425 | kind += String.fromCharCode((exp << 7) | gb); 426 | } 427 | 428 | // GB1: sot / Any 429 | return { 430 | startUnitIndex: 0, 431 | startPointIndex: 0, 432 | kind, 433 | }; 434 | } 435 | 436 | if (context.startUnitIndex == string.length) return null; 437 | 438 | EGCBREAK.lastIndex = context.startPointIndex; 439 | EGCBREAK.exec(context.kind); 440 | 441 | for (let i = context.startPointIndex; i < EGCBREAK.lastIndex; i++) 442 | context.startUnitIndex += 443 | string.codePointAt(context.startUnitIndex)! > 0xffff ? 2 : 1; 444 | context.startPointIndex = EGCBREAK.lastIndex; 445 | 446 | return context; 447 | } 448 | 449 | export function getEmojiPresentationRuns(data: Data, string: string): number[] { 450 | const points = stringToPoints(string); 451 | const result = [0]; 452 | let emojiRun = false; 453 | for (let i = 0, j = 0; j < string.length /* nothing */; ) { 454 | const n = consumeEmojiSeq(i); 455 | // console.log(`gEPR j=${j} i=${i} n=${n} point=${pointToYouPlus(points[i])} emojiRun=${emojiRun}`); 456 | if ((n != null) != emojiRun) { 457 | emojiRun = !emojiRun; 458 | result.push(j); 459 | } 460 | for (let k = 0; k < (n ?? 1); i++, k++) j += points[i] > 0xffff ? 2 : 1; 461 | } 462 | return result; 463 | 464 | function consumeEmojiSeq(i: number): number | null { 465 | return consumeStandaloneSeq(i) ?? consumeSeqSeq(i); 466 | } 467 | 468 | function consumeStandaloneSeq(i: number): number | null { 469 | return consumeKeycapSeq(i) ?? consumeFlagSeq(i); 470 | } 471 | 472 | function consumeSeqSeq(i: number): number | null { 473 | const n = consumeTagBaseOrZwjElement(i); 474 | if (n == null) return null; 475 | return consumeZwjSeq(i, n) ?? consumeTagSeq(i, n) ?? n; 476 | } 477 | 478 | function consumeTagBaseOrZwjElement(i: number): number | null { 479 | const point = lookahead(i, 0, (x) => x); 480 | if (point == null) return null; 481 | if (isEmoji(data, point)) 482 | if (isEmojiPresentation(data, point)) 483 | if (lookahead(i, 1, (x) => x == 0xfe0e)) return null; 484 | else return 1; 485 | else if (lookahead(i, 1, (x) => x == 0xfe0f)) return 2; 486 | else if (isEmojiModifierBase(data, point)) 487 | if (lookahead(i, 1, (x) => isEmojiModifier(data, x))) return 2; 488 | else return null; 489 | return null; 490 | } 491 | 492 | function consumeZwjSeq(i: number, n: number): number | null { 493 | const isZwj = (x: number) => x == 0x200d; 494 | let n_ = add(n, consume(i, n, isZwj)); 495 | if (n_ == null) return null; 496 | n_ = add(n_, consumeTagBaseOrZwjElement(i + n_)); 497 | if (n_ == null) return null; 498 | while (i + n_ < points.length) { 499 | let updated = add(n_, consume(i, n_, isZwj)); 500 | if (updated == null) break; 501 | updated = add(updated, consumeTagBaseOrZwjElement(i + updated)); 502 | if (updated == null) break; 503 | n_ = updated; 504 | } 505 | return n_; 506 | } 507 | 508 | function consumeTagSeq(i: number, n: number): number | null { 509 | while (lookahead(i, n, (x) => 0xe0020 <= x && x <= 0xe007e)) n += 1; 510 | if (lookahead(i, n, (x) => x == 0xe007f)) return n + 1; 511 | return null; 512 | } 513 | 514 | function consumeKeycapSeq(i: number): number | null { 515 | if (lookahead(i, 0, (x) => /[0-9#*]/.test(pointToString(x)))) 516 | if (lookahead(i, 1, (x) => x == 0xfe0f)) 517 | if (lookahead(i, 2, (x) => x == 0x20e3)) return 3; 518 | return null; 519 | } 520 | 521 | function consumeFlagSeq(i: number): number | null { 522 | const RI = GraphemeBreak.RegionalIndicator; 523 | if (!lookahead(i, 0, (x) => getGraphemeBreak(data, x) == RI)) return null; 524 | if (!lookahead(i, 1, (x) => getGraphemeBreak(data, x) == RI)) return null; 525 | return 2; 526 | } 527 | 528 | function add(result: number, add: number | null): number | null { 529 | return add != null ? result + add : null; 530 | } 531 | 532 | function consume( 533 | i: number, 534 | n: number, 535 | pred: (_: number) => boolean, 536 | ): number | null { 537 | return lookahead(i, n, (x) => (pred(x) == true ? 1 : null)); 538 | } 539 | 540 | function lookahead(i: number, n: number, fun: (_: number) => T): T | null { 541 | return i + n < points.length ? fun(points[i + n]) : null; 542 | } 543 | } 544 | -------------------------------------------------------------------------------- /data/NameAliases.txt: -------------------------------------------------------------------------------- 1 | # NameAliases-16.0.0.txt 2 | # Date: 2024-04-24 3 | # © 2024 Unicode®, Inc. 4 | # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. 5 | # For terms of use and license, see https://www.unicode.org/terms_of_use.html 6 | # 7 | # Unicode Character Database 8 | # For documentation, see https://www.unicode.org/reports/tr44/ 9 | # 10 | # This file is a normative contributory data file in the 11 | # Unicode Character Database. 12 | # 13 | # This file defines the formal name aliases for Unicode characters. 14 | # 15 | # For informative aliases, see NamesList.txt 16 | # 17 | # The formal name aliases are divided into five types, each with a distinct label. 18 | # 19 | # Type Labels: 20 | # 21 | # 1. correction 22 | # Corrections for serious problems in the character names 23 | # 2. control 24 | # ISO 6429 names for C0 and C1 control functions, and other 25 | # commonly occurring names for control codes 26 | # 3. alternate 27 | # A few widely used alternate names for format characters 28 | # 4. figment 29 | # Several documented labels for C1 control code points which 30 | # were never actually approved in any standard 31 | # 5. abbreviation 32 | # Commonly occurring abbreviations (or acronyms) for control codes, 33 | # format characters, spaces, and variation selectors 34 | # 35 | # The formal name aliases are part of the Unicode character namespace, which 36 | # includes the character names and the names of named character sequences. 37 | # The inclusion of ISO 6429 names and other commonly occurring names and 38 | # abbreviations for control codes and format characters as formal name aliases 39 | # is to help avoid name collisions between Unicode character names and the 40 | # labels which commonly appear in text and/or in implementations such as regex, for 41 | # control codes (which for historical reasons have no Unicode character name) 42 | # or for format characters. 43 | # 44 | # For documentation, see NamesList.html and https://www.unicode.org/reports/tr44/ 45 | # 46 | # FORMAT 47 | # 48 | # Each line has three fields, as described here: 49 | # 50 | # First field: Code point 51 | # Second field: Alias 52 | # Third field: Type 53 | # 54 | # The type labels used are defined above. As for property values, comparisons 55 | # of type labels should ignore case. 56 | # 57 | # The type labels can be mapped to other strings for display, if desired. 58 | # 59 | # In case multiple aliases are assigned, additional aliases 60 | # are provided on separate lines. Parsers of this data file should 61 | # take note that the same code point can (and does) occur more than once. 62 | # 63 | # Note that currently the only instances of multiple aliases of the same 64 | # type for a single code point are either of type "control" or "abbreviation". 65 | # An alias of type "abbreviation" can, in principle, be added for any code 66 | # point, although currently aliases of type "correction" do not have 67 | # any additional aliases of type "abbreviation". Such relationships 68 | # are not enforced by stability policies. 69 | # 70 | #----------------------------------------------------------------- 71 | 72 | 0000;NULL;control 73 | 0000;NUL;abbreviation 74 | 0001;START OF HEADING;control 75 | 0001;SOH;abbreviation 76 | 0002;START OF TEXT;control 77 | 0002;STX;abbreviation 78 | 0003;END OF TEXT;control 79 | 0003;ETX;abbreviation 80 | 0004;END OF TRANSMISSION;control 81 | 0004;EOT;abbreviation 82 | 0005;ENQUIRY;control 83 | 0005;ENQ;abbreviation 84 | 0006;ACKNOWLEDGE;control 85 | 0006;ACK;abbreviation 86 | 87 | # Note that no formal name alias for the ISO 6429 "BELL" is 88 | # provided for U+0007, because of the existing name collision 89 | # with U+1F514 BELL. 90 | 91 | 0007;ALERT;control 92 | 0007;BEL;abbreviation 93 | 94 | 0008;BACKSPACE;control 95 | 0008;BS;abbreviation 96 | 0009;CHARACTER TABULATION;control 97 | 0009;HORIZONTAL TABULATION;control 98 | 0009;HT;abbreviation 99 | 0009;TAB;abbreviation 100 | 000A;LINE FEED;control 101 | 000A;NEW LINE;control 102 | 000A;END OF LINE;control 103 | 000A;LF;abbreviation 104 | 000A;NL;abbreviation 105 | 000A;EOL;abbreviation 106 | 000B;LINE TABULATION;control 107 | 000B;VERTICAL TABULATION;control 108 | 000B;VT;abbreviation 109 | 000C;FORM FEED;control 110 | 000C;FF;abbreviation 111 | 000D;CARRIAGE RETURN;control 112 | 000D;CR;abbreviation 113 | 000E;SHIFT OUT;control 114 | 000E;LOCKING-SHIFT ONE;control 115 | 000E;SO;abbreviation 116 | 000F;SHIFT IN;control 117 | 000F;LOCKING-SHIFT ZERO;control 118 | 000F;SI;abbreviation 119 | 0010;DATA LINK ESCAPE;control 120 | 0010;DLE;abbreviation 121 | 0011;DEVICE CONTROL ONE;control 122 | 0011;DC1;abbreviation 123 | 0012;DEVICE CONTROL TWO;control 124 | 0012;DC2;abbreviation 125 | 0013;DEVICE CONTROL THREE;control 126 | 0013;DC3;abbreviation 127 | 0014;DEVICE CONTROL FOUR;control 128 | 0014;DC4;abbreviation 129 | 0015;NEGATIVE ACKNOWLEDGE;control 130 | 0015;NAK;abbreviation 131 | 0016;SYNCHRONOUS IDLE;control 132 | 0016;SYN;abbreviation 133 | 0017;END OF TRANSMISSION BLOCK;control 134 | 0017;ETB;abbreviation 135 | 0018;CANCEL;control 136 | 0018;CAN;abbreviation 137 | 0019;END OF MEDIUM;control 138 | 0019;EOM;abbreviation 139 | 0019;EM;abbreviation 140 | 001A;SUBSTITUTE;control 141 | 001A;SUB;abbreviation 142 | 001B;ESCAPE;control 143 | 001B;ESC;abbreviation 144 | 001C;INFORMATION SEPARATOR FOUR;control 145 | 001C;FILE SEPARATOR;control 146 | 001C;FS;abbreviation 147 | 001D;INFORMATION SEPARATOR THREE;control 148 | 001D;GROUP SEPARATOR;control 149 | 001D;GS;abbreviation 150 | 001E;INFORMATION SEPARATOR TWO;control 151 | 001E;RECORD SEPARATOR;control 152 | 001E;RS;abbreviation 153 | 001F;INFORMATION SEPARATOR ONE;control 154 | 001F;UNIT SEPARATOR;control 155 | 001F;US;abbreviation 156 | 0020;SP;abbreviation 157 | 007F;DELETE;control 158 | 007F;DEL;abbreviation 159 | 160 | # PADDING CHARACTER and HIGH OCTET PRESET represent 161 | # architectural concepts initially proposed for early 162 | # drafts of ISO/IEC 10646-1. They were never actually 163 | # approved or standardized: hence their designation 164 | # here as the "figment" type. Formal name aliases 165 | # (and corresponding abbreviations) for these code 166 | # points are included here because these names leaked 167 | # out from the draft documents and were published in 168 | # at least one RFC whose names for code points were 169 | # implemented in Perl regex expressions. 170 | 171 | 0080;PADDING CHARACTER;figment 172 | 0080;PAD;abbreviation 173 | 0081;HIGH OCTET PRESET;figment 174 | 0081;HOP;abbreviation 175 | 176 | 0082;BREAK PERMITTED HERE;control 177 | 0082;BPH;abbreviation 178 | 0083;NO BREAK HERE;control 179 | 0083;NBH;abbreviation 180 | 0084;INDEX;control 181 | 0084;IND;abbreviation 182 | 0085;NEXT LINE;control 183 | 0085;NEL;abbreviation 184 | 0086;START OF SELECTED AREA;control 185 | 0086;SSA;abbreviation 186 | 0087;END OF SELECTED AREA;control 187 | 0087;ESA;abbreviation 188 | 0088;CHARACTER TABULATION SET;control 189 | 0088;HORIZONTAL TABULATION SET;control 190 | 0088;HTS;abbreviation 191 | 0089;CHARACTER TABULATION WITH JUSTIFICATION;control 192 | 0089;HORIZONTAL TABULATION WITH JUSTIFICATION;control 193 | 0089;HTJ;abbreviation 194 | 008A;LINE TABULATION SET;control 195 | 008A;VERTICAL TABULATION SET;control 196 | 008A;VTS;abbreviation 197 | 008B;PARTIAL LINE FORWARD;control 198 | 008B;PARTIAL LINE DOWN;control 199 | 008B;PLD;abbreviation 200 | 008C;PARTIAL LINE BACKWARD;control 201 | 008C;PARTIAL LINE UP;control 202 | 008C;PLU;abbreviation 203 | 008D;REVERSE LINE FEED;control 204 | 008D;REVERSE INDEX;control 205 | 008D;RI;abbreviation 206 | 008E;SINGLE SHIFT TWO;control 207 | 008E;SINGLE-SHIFT-2;control 208 | 008E;SS2;abbreviation 209 | 008F;SINGLE SHIFT THREE;control 210 | 008F;SINGLE-SHIFT-3;control 211 | 008F;SS3;abbreviation 212 | 0090;DEVICE CONTROL STRING;control 213 | 0090;DCS;abbreviation 214 | 0091;PRIVATE USE ONE;control 215 | 0091;PRIVATE USE-1;control 216 | 0091;PU1;abbreviation 217 | 0092;PRIVATE USE TWO;control 218 | 0092;PRIVATE USE-2;control 219 | 0092;PU2;abbreviation 220 | 0093;SET TRANSMIT STATE;control 221 | 0093;STS;abbreviation 222 | 0094;CANCEL CHARACTER;control 223 | 0094;CCH;abbreviation 224 | 0095;MESSAGE WAITING;control 225 | 0095;MW;abbreviation 226 | 0096;START OF GUARDED AREA;control 227 | 0096;START OF PROTECTED AREA;control 228 | 0096;SPA;abbreviation 229 | 0097;END OF GUARDED AREA;control 230 | 0097;END OF PROTECTED AREA;control 231 | 0097;EPA;abbreviation 232 | 0098;START OF STRING;control 233 | 0098;SOS;abbreviation 234 | 235 | # SINGLE GRAPHIC CHARACTER INTRODUCER is another 236 | # architectural concept from early drafts of ISO/IEC 10646-1 237 | # which was never approved and standardized. 238 | 239 | 0099;SINGLE GRAPHIC CHARACTER INTRODUCER;figment 240 | 0099;SGC;abbreviation 241 | 242 | 009A;SINGLE CHARACTER INTRODUCER;control 243 | 009A;SCI;abbreviation 244 | 009B;CONTROL SEQUENCE INTRODUCER;control 245 | 009B;CSI;abbreviation 246 | 009C;STRING TERMINATOR;control 247 | 009C;ST;abbreviation 248 | 009D;OPERATING SYSTEM COMMAND;control 249 | 009D;OSC;abbreviation 250 | 009E;PRIVACY MESSAGE;control 251 | 009E;PM;abbreviation 252 | 009F;APPLICATION PROGRAM COMMAND;control 253 | 009F;APC;abbreviation 254 | 00A0;NBSP;abbreviation 255 | 00AD;SHY;abbreviation 256 | 01A2;LATIN CAPITAL LETTER GHA;correction 257 | 01A3;LATIN SMALL LETTER GHA;correction 258 | 034F;CGJ;abbreviation 259 | 0616;ARABIC SMALL HIGH LIGATURE ALEF WITH YEH BARREE;correction 260 | 061C;ALM;abbreviation 261 | 0709;SYRIAC SUBLINEAR COLON SKEWED LEFT;correction 262 | 0CDE;KANNADA LETTER LLLA;correction 263 | 0E9D;LAO LETTER FO FON;correction 264 | 0E9F;LAO LETTER FO FAY;correction 265 | 0EA3;LAO LETTER RO;correction 266 | 0EA5;LAO LETTER LO;correction 267 | 0FD0;TIBETAN MARK BKA- SHOG GI MGO RGYAN;correction 268 | 11EC;HANGUL JONGSEONG YESIEUNG-KIYEOK;correction 269 | 11ED;HANGUL JONGSEONG YESIEUNG-SSANGKIYEOK;correction 270 | 11EE;HANGUL JONGSEONG SSANGYESIEUNG;correction 271 | 11EF;HANGUL JONGSEONG YESIEUNG-KHIEUKH;correction 272 | 180B;FVS1;abbreviation 273 | 180C;FVS2;abbreviation 274 | 180D;FVS3;abbreviation 275 | 180E;MVS;abbreviation 276 | 180F;FVS4;abbreviation 277 | 1BBD;SUNDANESE LETTER ARCHAIC I;correction 278 | 200B;ZWSP;abbreviation 279 | 200C;ZWNJ;abbreviation 280 | 200D;ZWJ;abbreviation 281 | 200E;LRM;abbreviation 282 | 200F;RLM;abbreviation 283 | 202A;LRE;abbreviation 284 | 202B;RLE;abbreviation 285 | 202C;PDF;abbreviation 286 | 202D;LRO;abbreviation 287 | 202E;RLO;abbreviation 288 | 202F;NNBSP;abbreviation 289 | 205F;MMSP;abbreviation 290 | 2060;WJ;abbreviation 291 | 2066;LRI;abbreviation 292 | 2067;RLI;abbreviation 293 | 2068;FSI;abbreviation 294 | 2069;PDI;abbreviation 295 | 2118;WEIERSTRASS ELLIPTIC FUNCTION;correction 296 | 2448;MICR ON US SYMBOL;correction 297 | 2449;MICR DASH SYMBOL;correction 298 | 2B7A;LEFTWARDS TRIANGLE-HEADED ARROW WITH DOUBLE VERTICAL STROKE;correction 299 | 2B7C;RIGHTWARDS TRIANGLE-HEADED ARROW WITH DOUBLE VERTICAL STROKE;correction 300 | A015;YI SYLLABLE ITERATION MARK;correction 301 | AA6E;MYANMAR LETTER KHAMTI LLA;correction 302 | FE00;VS1;abbreviation 303 | FE01;VS2;abbreviation 304 | FE02;VS3;abbreviation 305 | FE03;VS4;abbreviation 306 | FE04;VS5;abbreviation 307 | FE05;VS6;abbreviation 308 | FE06;VS7;abbreviation 309 | FE07;VS8;abbreviation 310 | FE08;VS9;abbreviation 311 | FE09;VS10;abbreviation 312 | FE0A;VS11;abbreviation 313 | FE0B;VS12;abbreviation 314 | FE0C;VS13;abbreviation 315 | FE0D;VS14;abbreviation 316 | FE0E;VS15;abbreviation 317 | FE0F;VS16;abbreviation 318 | FE18;PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET;correction 319 | FEFF;BYTE ORDER MARK;alternate 320 | FEFF;BOM;abbreviation 321 | FEFF;ZWNBSP;abbreviation 322 | 122D4;CUNEIFORM SIGN NU11 TENU;correction 323 | 122D5;CUNEIFORM SIGN NU11 OVER NU11 BUR OVER BUR;correction 324 | 12327;CUNEIFORM SIGN KALAM;correction 325 | 1680B;BAMUM LETTER PHASE-A MAEMGBIEE;correction 326 | 16E56;MEDEFAIDRIN CAPITAL LETTER H;correction 327 | 16E57;MEDEFAIDRIN CAPITAL LETTER NG;correction 328 | 16E76;MEDEFAIDRIN SMALL LETTER H;correction 329 | 16E77;MEDEFAIDRIN SMALL LETTER NG;correction 330 | 1B001;HENTAIGANA LETTER E-1;correction 331 | 1D0C5;BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS;correction 332 | 1E899;MENDE KIKAKUI SYLLABLE M172 MBO;correction 333 | 1E89A;MENDE KIKAKUI SYLLABLE M174 MBOO;correction 334 | E0100;VS17;abbreviation 335 | E0101;VS18;abbreviation 336 | E0102;VS19;abbreviation 337 | E0103;VS20;abbreviation 338 | E0104;VS21;abbreviation 339 | E0105;VS22;abbreviation 340 | E0106;VS23;abbreviation 341 | E0107;VS24;abbreviation 342 | E0108;VS25;abbreviation 343 | E0109;VS26;abbreviation 344 | E010A;VS27;abbreviation 345 | E010B;VS28;abbreviation 346 | E010C;VS29;abbreviation 347 | E010D;VS30;abbreviation 348 | E010E;VS31;abbreviation 349 | E010F;VS32;abbreviation 350 | E0110;VS33;abbreviation 351 | E0111;VS34;abbreviation 352 | E0112;VS35;abbreviation 353 | E0113;VS36;abbreviation 354 | E0114;VS37;abbreviation 355 | E0115;VS38;abbreviation 356 | E0116;VS39;abbreviation 357 | E0117;VS40;abbreviation 358 | E0118;VS41;abbreviation 359 | E0119;VS42;abbreviation 360 | E011A;VS43;abbreviation 361 | E011B;VS44;abbreviation 362 | E011C;VS45;abbreviation 363 | E011D;VS46;abbreviation 364 | E011E;VS47;abbreviation 365 | E011F;VS48;abbreviation 366 | E0120;VS49;abbreviation 367 | E0121;VS50;abbreviation 368 | E0122;VS51;abbreviation 369 | E0123;VS52;abbreviation 370 | E0124;VS53;abbreviation 371 | E0125;VS54;abbreviation 372 | E0126;VS55;abbreviation 373 | E0127;VS56;abbreviation 374 | E0128;VS57;abbreviation 375 | E0129;VS58;abbreviation 376 | E012A;VS59;abbreviation 377 | E012B;VS60;abbreviation 378 | E012C;VS61;abbreviation 379 | E012D;VS62;abbreviation 380 | E012E;VS63;abbreviation 381 | E012F;VS64;abbreviation 382 | E0130;VS65;abbreviation 383 | E0131;VS66;abbreviation 384 | E0132;VS67;abbreviation 385 | E0133;VS68;abbreviation 386 | E0134;VS69;abbreviation 387 | E0135;VS70;abbreviation 388 | E0136;VS71;abbreviation 389 | E0137;VS72;abbreviation 390 | E0138;VS73;abbreviation 391 | E0139;VS74;abbreviation 392 | E013A;VS75;abbreviation 393 | E013B;VS76;abbreviation 394 | E013C;VS77;abbreviation 395 | E013D;VS78;abbreviation 396 | E013E;VS79;abbreviation 397 | E013F;VS80;abbreviation 398 | E0140;VS81;abbreviation 399 | E0141;VS82;abbreviation 400 | E0142;VS83;abbreviation 401 | E0143;VS84;abbreviation 402 | E0144;VS85;abbreviation 403 | E0145;VS86;abbreviation 404 | E0146;VS87;abbreviation 405 | E0147;VS88;abbreviation 406 | E0148;VS89;abbreviation 407 | E0149;VS90;abbreviation 408 | E014A;VS91;abbreviation 409 | E014B;VS92;abbreviation 410 | E014C;VS93;abbreviation 411 | E014D;VS94;abbreviation 412 | E014E;VS95;abbreviation 413 | E014F;VS96;abbreviation 414 | E0150;VS97;abbreviation 415 | E0151;VS98;abbreviation 416 | E0152;VS99;abbreviation 417 | E0153;VS100;abbreviation 418 | E0154;VS101;abbreviation 419 | E0155;VS102;abbreviation 420 | E0156;VS103;abbreviation 421 | E0157;VS104;abbreviation 422 | E0158;VS105;abbreviation 423 | E0159;VS106;abbreviation 424 | E015A;VS107;abbreviation 425 | E015B;VS108;abbreviation 426 | E015C;VS109;abbreviation 427 | E015D;VS110;abbreviation 428 | E015E;VS111;abbreviation 429 | E015F;VS112;abbreviation 430 | E0160;VS113;abbreviation 431 | E0161;VS114;abbreviation 432 | E0162;VS115;abbreviation 433 | E0163;VS116;abbreviation 434 | E0164;VS117;abbreviation 435 | E0165;VS118;abbreviation 436 | E0166;VS119;abbreviation 437 | E0167;VS120;abbreviation 438 | E0168;VS121;abbreviation 439 | E0169;VS122;abbreviation 440 | E016A;VS123;abbreviation 441 | E016B;VS124;abbreviation 442 | E016C;VS125;abbreviation 443 | E016D;VS126;abbreviation 444 | E016E;VS127;abbreviation 445 | E016F;VS128;abbreviation 446 | E0170;VS129;abbreviation 447 | E0171;VS130;abbreviation 448 | E0172;VS131;abbreviation 449 | E0173;VS132;abbreviation 450 | E0174;VS133;abbreviation 451 | E0175;VS134;abbreviation 452 | E0176;VS135;abbreviation 453 | E0177;VS136;abbreviation 454 | E0178;VS137;abbreviation 455 | E0179;VS138;abbreviation 456 | E017A;VS139;abbreviation 457 | E017B;VS140;abbreviation 458 | E017C;VS141;abbreviation 459 | E017D;VS142;abbreviation 460 | E017E;VS143;abbreviation 461 | E017F;VS144;abbreviation 462 | E0180;VS145;abbreviation 463 | E0181;VS146;abbreviation 464 | E0182;VS147;abbreviation 465 | E0183;VS148;abbreviation 466 | E0184;VS149;abbreviation 467 | E0185;VS150;abbreviation 468 | E0186;VS151;abbreviation 469 | E0187;VS152;abbreviation 470 | E0188;VS153;abbreviation 471 | E0189;VS154;abbreviation 472 | E018A;VS155;abbreviation 473 | E018B;VS156;abbreviation 474 | E018C;VS157;abbreviation 475 | E018D;VS158;abbreviation 476 | E018E;VS159;abbreviation 477 | E018F;VS160;abbreviation 478 | E0190;VS161;abbreviation 479 | E0191;VS162;abbreviation 480 | E0192;VS163;abbreviation 481 | E0193;VS164;abbreviation 482 | E0194;VS165;abbreviation 483 | E0195;VS166;abbreviation 484 | E0196;VS167;abbreviation 485 | E0197;VS168;abbreviation 486 | E0198;VS169;abbreviation 487 | E0199;VS170;abbreviation 488 | E019A;VS171;abbreviation 489 | E019B;VS172;abbreviation 490 | E019C;VS173;abbreviation 491 | E019D;VS174;abbreviation 492 | E019E;VS175;abbreviation 493 | E019F;VS176;abbreviation 494 | E01A0;VS177;abbreviation 495 | E01A1;VS178;abbreviation 496 | E01A2;VS179;abbreviation 497 | E01A3;VS180;abbreviation 498 | E01A4;VS181;abbreviation 499 | E01A5;VS182;abbreviation 500 | E01A6;VS183;abbreviation 501 | E01A7;VS184;abbreviation 502 | E01A8;VS185;abbreviation 503 | E01A9;VS186;abbreviation 504 | E01AA;VS187;abbreviation 505 | E01AB;VS188;abbreviation 506 | E01AC;VS189;abbreviation 507 | E01AD;VS190;abbreviation 508 | E01AE;VS191;abbreviation 509 | E01AF;VS192;abbreviation 510 | E01B0;VS193;abbreviation 511 | E01B1;VS194;abbreviation 512 | E01B2;VS195;abbreviation 513 | E01B3;VS196;abbreviation 514 | E01B4;VS197;abbreviation 515 | E01B5;VS198;abbreviation 516 | E01B6;VS199;abbreviation 517 | E01B7;VS200;abbreviation 518 | E01B8;VS201;abbreviation 519 | E01B9;VS202;abbreviation 520 | E01BA;VS203;abbreviation 521 | E01BB;VS204;abbreviation 522 | E01BC;VS205;abbreviation 523 | E01BD;VS206;abbreviation 524 | E01BE;VS207;abbreviation 525 | E01BF;VS208;abbreviation 526 | E01C0;VS209;abbreviation 527 | E01C1;VS210;abbreviation 528 | E01C2;VS211;abbreviation 529 | E01C3;VS212;abbreviation 530 | E01C4;VS213;abbreviation 531 | E01C5;VS214;abbreviation 532 | E01C6;VS215;abbreviation 533 | E01C7;VS216;abbreviation 534 | E01C8;VS217;abbreviation 535 | E01C9;VS218;abbreviation 536 | E01CA;VS219;abbreviation 537 | E01CB;VS220;abbreviation 538 | E01CC;VS221;abbreviation 539 | E01CD;VS222;abbreviation 540 | E01CE;VS223;abbreviation 541 | E01CF;VS224;abbreviation 542 | E01D0;VS225;abbreviation 543 | E01D1;VS226;abbreviation 544 | E01D2;VS227;abbreviation 545 | E01D3;VS228;abbreviation 546 | E01D4;VS229;abbreviation 547 | E01D5;VS230;abbreviation 548 | E01D6;VS231;abbreviation 549 | E01D7;VS232;abbreviation 550 | E01D8;VS233;abbreviation 551 | E01D9;VS234;abbreviation 552 | E01DA;VS235;abbreviation 553 | E01DB;VS236;abbreviation 554 | E01DC;VS237;abbreviation 555 | E01DD;VS238;abbreviation 556 | E01DE;VS239;abbreviation 557 | E01DF;VS240;abbreviation 558 | E01E0;VS241;abbreviation 559 | E01E1;VS242;abbreviation 560 | E01E2;VS243;abbreviation 561 | E01E3;VS244;abbreviation 562 | E01E4;VS245;abbreviation 563 | E01E5;VS246;abbreviation 564 | E01E6;VS247;abbreviation 565 | E01E7;VS248;abbreviation 566 | E01E8;VS249;abbreviation 567 | E01E9;VS250;abbreviation 568 | E01EA;VS251;abbreviation 569 | E01EB;VS252;abbreviation 570 | E01EC;VS253;abbreviation 571 | E01ED;VS254;abbreviation 572 | E01EE;VS255;abbreviation 573 | E01EF;VS256;abbreviation 574 | 575 | # EOF 576 | --------------------------------------------------------------------------------