├── .gitignore
├── README.md
├── gitcommitpush
├── gitcommitpush.bat
├── gitforceupdate
├── gitforceupdate.bat
└── src
├── PUCU.pas
├── PUCUBuild.cfg
├── PUCUBuild.dof
├── PUCUBuild.dpr
├── PUCUBuild.lpi
├── PUCUBuildUnicode.cfg
├── PUCUBuildUnicode.dof
├── PUCUCode.pas
├── PUCUConvertUnicode.cfg
├── PUCUConvertUnicode.dof
├── PUCUConvertUnicode.dpr
├── PUCUConvertUnicode.lpi
├── PUCUDebug.cfg
├── PUCUDebug.dof
├── PUCUDebug.dpr
├── PUCUGenCodePages.cfg
├── PUCUGenCodePages.dof
├── PUCUGenCodePages.dpr
├── PUCUGenCodePages.lpi
├── UnicodeData
├── Blocks.txt
├── CaseFolding.txt
├── CompositionExclusions.txt
├── DerivedGeneralCategory.txt
├── NormalizationCorrections.txt
├── NormalizationTest.txt
├── Scripts.txt
├── SpecialCasing.txt
├── UnicodeData.txt
└── empty.txt
└── howto.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Uncomment these types if you want even more clean repository. But be careful.
2 | # It can make harm to an existing project source. Read explanations below.
3 | #
4 | # Resource files are binaries containing manifest, project icon and version info.
5 | # They can not be viewed as text or compared by diff-tools. Consider replacing them with .rc files.
6 | #*.res
7 | #
8 | # Type library file (binary). In old Delphi versions it should be stored.
9 | # Since Delphi 2009 it is produced from .ridl file and can safely be ignored.
10 | #*.tlb
11 | #
12 | # Diagram Portfolio file. Used by the diagram editor up to Delphi 7.
13 | # Uncomment this if you are not using diagrams or use newer Delphi version.
14 | #*.ddp
15 | #
16 | # Visual LiveBindings file. Added in Delphi XE2.
17 | # Uncomment this if you are not using LiveBindings Designer.
18 | #*.vlb
19 | #
20 | # Deployment Manager configuration file for your project. Added in Delphi XE2.
21 | # Uncomment this if it is not mobile development and you do not use remote debug feature.
22 | #*.deployproj
23 | #
24 | # C++ object files produced when C/C++ Output file generation is configured.
25 | # Uncomment this if you are not using external objects (zlib library for example).
26 | #*.obj
27 | #
28 |
29 | # Delphi compiler-generated binaries (safe to delete)
30 | *.exe
31 | *.dll
32 | *.bpl
33 | *.bpi
34 | *.dcp
35 | *.so
36 | *.apk
37 | *.drc
38 | *.map
39 | *.dres
40 | *.rsm
41 | *.tds
42 | *.dcu
43 | *.lib
44 | *.a
45 | *.o
46 | *.ocx
47 |
48 | # Delphi autogenerated files (duplicated info)
49 | *.cfg
50 | *.hpp
51 | *Resource.rc
52 |
53 | # Delphi local files (user-specific info)
54 | *.local
55 | *.identcache
56 | *.projdata
57 | *.tvsconfig
58 | *.dsk
59 |
60 | # Delphi history and backups
61 | __history/
62 | *.~*
63 |
64 | # Castalia statistics file (since XE7 Castalia is distributed with Delphi)
65 | *.stat
66 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PUCU Pascal UniCode Utils Libary
2 | You do need only the src\PUCU.pas file for the normal usage of this Library
3 |
4 |
--------------------------------------------------------------------------------
/gitcommitpush:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | git commit -am "More work"
3 | git push
4 |
--------------------------------------------------------------------------------
/gitcommitpush.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | call git commit -am "More work"
3 | call git push
4 | rem --set-upstream origin master
5 |
--------------------------------------------------------------------------------
/gitforceupdate:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | git stash save
3 | git pull
4 | git stash drop
--------------------------------------------------------------------------------
/gitforceupdate.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | call git stash save
3 | call git pull
4 | call git stash drop
--------------------------------------------------------------------------------
/src/PUCUBuild.cfg:
--------------------------------------------------------------------------------
1 | -$A8
2 | -$B-
3 | -$C+
4 | -$D+
5 | -$E-
6 | -$F-
7 | -$G+
8 | -$H+
9 | -$I+
10 | -$J-
11 | -$K-
12 | -$L+
13 | -$M-
14 | -$N+
15 | -$O+
16 | -$P+
17 | -$Q-
18 | -$R-
19 | -$S-
20 | -$T-
21 | -$U-
22 | -$V+
23 | -$W-
24 | -$X+
25 | -$YD
26 | -$Z1
27 | -cg
28 | -AWinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
29 | -H+
30 | -W+
31 | -M
32 | -$M16384,1048576
33 | -K$00400000
34 | -LE"c:\program files (x86)\borland\delphi7\Projects\Bpl"
35 | -LN"c:\program files (x86)\borland\delphi7\Projects\Bpl"
36 | -w-SYMBOL_DEPRECATED
37 | -w-SYMBOL_LIBRARY
38 | -w-SYMBOL_PLATFORM
39 | -w-UNSAFE_TYPE
40 | -w-UNSAFE_CODE
41 | -w-UNSAFE_CAST
42 |
--------------------------------------------------------------------------------
/src/PUCUBuild.dof:
--------------------------------------------------------------------------------
1 | [FileVersion]
2 | Version=7.0
3 | [Compiler]
4 | A=8
5 | B=0
6 | C=1
7 | D=1
8 | E=0
9 | F=0
10 | G=1
11 | H=1
12 | I=1
13 | J=0
14 | K=0
15 | L=1
16 | M=0
17 | N=1
18 | O=1
19 | P=1
20 | Q=0
21 | R=0
22 | S=0
23 | T=0
24 | U=0
25 | V=1
26 | W=0
27 | X=1
28 | Y=1
29 | Z=1
30 | ShowHints=1
31 | ShowWarnings=1
32 | UnitAliases=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
33 | NamespacePrefix=
34 | SymbolDeprecated=0
35 | SymbolLibrary=0
36 | SymbolPlatform=0
37 | UnitLibrary=1
38 | UnitPlatform=1
39 | UnitDeprecated=1
40 | HResultCompat=1
41 | HidingMember=1
42 | HiddenVirtual=1
43 | Garbage=1
44 | BoundsError=1
45 | ZeroNilCompat=1
46 | StringConstTruncated=1
47 | ForLoopVarVarPar=1
48 | TypedConstVarPar=1
49 | AsgToTypedConst=1
50 | CaseLabelRange=1
51 | ForVariable=1
52 | ConstructingAbstract=1
53 | ComparisonFalse=1
54 | ComparisonTrue=1
55 | ComparingSignedUnsigned=1
56 | CombiningSignedUnsigned=1
57 | UnsupportedConstruct=1
58 | FileOpen=1
59 | FileOpenUnitSrc=1
60 | BadGlobalSymbol=1
61 | DuplicateConstructorDestructor=1
62 | InvalidDirective=1
63 | PackageNoLink=1
64 | PackageThreadVar=1
65 | ImplicitImport=1
66 | HPPEMITIgnored=1
67 | NoRetVal=1
68 | UseBeforeDef=1
69 | ForLoopVarUndef=1
70 | UnitNameMismatch=1
71 | NoCFGFileFound=1
72 | MessageDirective=1
73 | ImplicitVariants=1
74 | UnicodeToLocale=1
75 | LocaleToUnicode=1
76 | ImagebaseMultiple=1
77 | SuspiciousTypecast=1
78 | PrivatePropAccessor=1
79 | UnsafeType=0
80 | UnsafeCode=0
81 | UnsafeCast=0
82 | [Linker]
83 | MapFile=0
84 | OutputObjs=0
85 | ConsoleApp=1
86 | DebugInfo=0
87 | RemoteSymbols=0
88 | MinStackSize=16384
89 | MaxStackSize=1048576
90 | ImageBase=4194304
91 | ExeDescription=
92 | [Directories]
93 | OutputDir=
94 | UnitOutputDir=
95 | PackageDLLOutputDir=
96 | PackageDCPOutputDir=
97 | SearchPath=
98 | Packages=vcl;rtl;vclx;indy;vclie;xmlrtl;inetdbbde;inet;inetdbxpress;dbrtl;soaprtl;dsnap;VclSmp;dbexpress;vcldb;dbxcds;inetdb;bdertl;vcldbx;adortl;teeui;teedb;tee;ibxpress;visualclx;visualdbclx;vclactnband;vclshlctrls;IntrawebDB_50_70;Intraweb_50_70;Rave50CLX;Rave50VCL;dclOfficeXP;acntD7_R;JclDeveloperTools;Jcl;JclVcl;JclContainers;JvCore;JvSystem;JvStdCtrls;JvAppFrm;JvBands;JvDB;JvDlgs;JvBDE;JvControls;JvCmp;JvCrypt;JvCustom;JvDocking;JvDotNetCtrls;JvGlobus;JvHMI;JvJans;JvManagedThreads;JvMM;JvNet;JvPageComps;JvPascalInterpreter;JvPluginSystem;JvPrintPreview;JvRuntimeDesign;JvTimeFramework;JvWizards;JvXPCtrls
99 | Conditionals=
100 | DebugSourceDirs=
101 | UsePackages=0
102 | [Parameters]
103 | RunParams=
104 | HostApplication=
105 | Launcher=
106 | UseLauncher=0
107 | DebugCWD=
108 | [Language]
109 | ActiveLang=
110 | ProjectLang=
111 | RootDir=
112 | [Version Info]
113 | IncludeVerInfo=0
114 | AutoIncBuild=0
115 | MajorVer=1
116 | MinorVer=0
117 | Release=0
118 | Build=0
119 | Debug=0
120 | PreRelease=0
121 | Special=0
122 | Private=0
123 | DLL=0
124 | Locale=1031
125 | CodePage=1252
126 | [HistoryLists\hlUnitAliases]
127 | Count=1
128 | Item0=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
129 |
--------------------------------------------------------------------------------
/src/PUCUBuild.lpi:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 | -
62 |
63 |
64 | -
65 |
66 |
67 | -
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/src/PUCUBuildUnicode.cfg:
--------------------------------------------------------------------------------
1 | -$A8
2 | -$B-
3 | -$C+
4 | -$D+
5 | -$E-
6 | -$F-
7 | -$G+
8 | -$H+
9 | -$I+
10 | -$J-
11 | -$K-
12 | -$L+
13 | -$M-
14 | -$N+
15 | -$O+
16 | -$P+
17 | -$Q-
18 | -$R-
19 | -$S-
20 | -$T-
21 | -$U-
22 | -$V+
23 | -$W-
24 | -$X+
25 | -$YD
26 | -$Z1
27 | -cg
28 | -AWinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
29 | -H+
30 | -W+
31 | -M
32 | -$M16384,1048576
33 | -K$00400000
34 | -LE"c:\program files (x86)\borland\delphi7\Projects\Bpl"
35 | -LN"c:\program files (x86)\borland\delphi7\Projects\Bpl"
36 | -w-SYMBOL_DEPRECATED
37 | -w-SYMBOL_LIBRARY
38 | -w-SYMBOL_PLATFORM
39 | -w-UNSAFE_TYPE
40 | -w-UNSAFE_CODE
41 | -w-UNSAFE_CAST
42 |
--------------------------------------------------------------------------------
/src/PUCUBuildUnicode.dof:
--------------------------------------------------------------------------------
1 | [FileVersion]
2 | Version=7.0
3 | [Compiler]
4 | A=8
5 | B=0
6 | C=1
7 | D=1
8 | E=0
9 | F=0
10 | G=1
11 | H=1
12 | I=1
13 | J=0
14 | K=0
15 | L=1
16 | M=0
17 | N=1
18 | O=1
19 | P=1
20 | Q=0
21 | R=0
22 | S=0
23 | T=0
24 | U=0
25 | V=1
26 | W=0
27 | X=1
28 | Y=1
29 | Z=1
30 | ShowHints=1
31 | ShowWarnings=1
32 | UnitAliases=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
33 | NamespacePrefix=
34 | SymbolDeprecated=0
35 | SymbolLibrary=0
36 | SymbolPlatform=0
37 | UnitLibrary=1
38 | UnitPlatform=1
39 | UnitDeprecated=1
40 | HResultCompat=1
41 | HidingMember=1
42 | HiddenVirtual=1
43 | Garbage=1
44 | BoundsError=1
45 | ZeroNilCompat=1
46 | StringConstTruncated=1
47 | ForLoopVarVarPar=1
48 | TypedConstVarPar=1
49 | AsgToTypedConst=1
50 | CaseLabelRange=1
51 | ForVariable=1
52 | ConstructingAbstract=1
53 | ComparisonFalse=1
54 | ComparisonTrue=1
55 | ComparingSignedUnsigned=1
56 | CombiningSignedUnsigned=1
57 | UnsupportedConstruct=1
58 | FileOpen=1
59 | FileOpenUnitSrc=1
60 | BadGlobalSymbol=1
61 | DuplicateConstructorDestructor=1
62 | InvalidDirective=1
63 | PackageNoLink=1
64 | PackageThreadVar=1
65 | ImplicitImport=1
66 | HPPEMITIgnored=1
67 | NoRetVal=1
68 | UseBeforeDef=1
69 | ForLoopVarUndef=1
70 | UnitNameMismatch=1
71 | NoCFGFileFound=1
72 | MessageDirective=1
73 | ImplicitVariants=1
74 | UnicodeToLocale=1
75 | LocaleToUnicode=1
76 | ImagebaseMultiple=1
77 | SuspiciousTypecast=1
78 | PrivatePropAccessor=1
79 | UnsafeType=0
80 | UnsafeCode=0
81 | UnsafeCast=0
82 | [Linker]
83 | MapFile=0
84 | OutputObjs=0
85 | ConsoleApp=1
86 | DebugInfo=0
87 | RemoteSymbols=0
88 | MinStackSize=16384
89 | MaxStackSize=1048576
90 | ImageBase=4194304
91 | ExeDescription=
92 | [Directories]
93 | OutputDir=
94 | UnitOutputDir=
95 | PackageDLLOutputDir=
96 | PackageDCPOutputDir=
97 | SearchPath=
98 | Packages=vcl;rtl;vclx;indy;vclie;xmlrtl;inetdbbde;inet;inetdbxpress;dbrtl;soaprtl;dsnap;VclSmp;dbexpress;vcldb;dbxcds;inetdb;bdertl;vcldbx;adortl;teeui;teedb;tee;ibxpress;visualclx;visualdbclx;vclactnband;vclshlctrls;IntrawebDB_50_70;Intraweb_50_70;Rave50CLX;Rave50VCL;dclOfficeXP;acntD7_R;JclDeveloperTools;Jcl;JclVcl;JclContainers;JvCore;JvSystem;JvStdCtrls;JvAppFrm;JvBands;JvDB;JvDlgs;JvBDE;JvControls;JvCmp;JvCrypt;JvCustom;JvDocking;JvDotNetCtrls;JvGlobus;JvHMI;JvJans;JvManagedThreads;JvMM;JvNet;JvPageComps;JvPascalInterpreter;JvPluginSystem;JvPrintPreview;JvRuntimeDesign;JvTimeFramework;JvWizards;JvXPCtrls
99 | Conditionals=
100 | DebugSourceDirs=
101 | UsePackages=0
102 | [Parameters]
103 | RunParams=
104 | HostApplication=
105 | Launcher=
106 | UseLauncher=0
107 | DebugCWD=
108 | [Language]
109 | ActiveLang=
110 | ProjectLang=
111 | RootDir=
112 | [Version Info]
113 | IncludeVerInfo=0
114 | AutoIncBuild=0
115 | MajorVer=1
116 | MinorVer=0
117 | Release=0
118 | Build=0
119 | Debug=0
120 | PreRelease=0
121 | Special=0
122 | Private=0
123 | DLL=0
124 | Locale=1031
125 | CodePage=1252
126 | [HistoryLists\hlUnitAliases]
127 | Count=1
128 | Item0=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
129 |
--------------------------------------------------------------------------------
/src/PUCUConvertUnicode.cfg:
--------------------------------------------------------------------------------
1 | -$A8
2 | -$B-
3 | -$C+
4 | -$D+
5 | -$E-
6 | -$F-
7 | -$G+
8 | -$H+
9 | -$I+
10 | -$J-
11 | -$K-
12 | -$L+
13 | -$M-
14 | -$N+
15 | -$O+
16 | -$P+
17 | -$Q-
18 | -$R-
19 | -$S-
20 | -$T-
21 | -$U-
22 | -$V+
23 | -$W-
24 | -$X+
25 | -$YD
26 | -$Z1
27 | -cg
28 | -AWinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
29 | -H+
30 | -W+
31 | -M
32 | -$M16384,1048576
33 | -K$00400000
34 | -LE"c:\program files (x86)\borland\delphi7\Projects\Bpl"
35 | -LN"c:\program files (x86)\borland\delphi7\Projects\Bpl"
36 | -w-UNSAFE_TYPE
37 | -w-UNSAFE_CODE
38 | -w-UNSAFE_CAST
39 |
--------------------------------------------------------------------------------
/src/PUCUConvertUnicode.dof:
--------------------------------------------------------------------------------
1 | [FileVersion]
2 | Version=7.0
3 | [Compiler]
4 | A=8
5 | B=0
6 | C=1
7 | D=1
8 | E=0
9 | F=0
10 | G=1
11 | H=1
12 | I=1
13 | J=0
14 | K=0
15 | L=1
16 | M=0
17 | N=1
18 | O=1
19 | P=1
20 | Q=0
21 | R=0
22 | S=0
23 | T=0
24 | U=0
25 | V=1
26 | W=0
27 | X=1
28 | Y=1
29 | Z=1
30 | ShowHints=1
31 | ShowWarnings=1
32 | UnitAliases=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
33 | NamespacePrefix=
34 | SymbolDeprecated=1
35 | SymbolLibrary=1
36 | SymbolPlatform=1
37 | UnitLibrary=1
38 | UnitPlatform=1
39 | UnitDeprecated=1
40 | HResultCompat=1
41 | HidingMember=1
42 | HiddenVirtual=1
43 | Garbage=1
44 | BoundsError=1
45 | ZeroNilCompat=1
46 | StringConstTruncated=1
47 | ForLoopVarVarPar=1
48 | TypedConstVarPar=1
49 | AsgToTypedConst=1
50 | CaseLabelRange=1
51 | ForVariable=1
52 | ConstructingAbstract=1
53 | ComparisonFalse=1
54 | ComparisonTrue=1
55 | ComparingSignedUnsigned=1
56 | CombiningSignedUnsigned=1
57 | UnsupportedConstruct=1
58 | FileOpen=1
59 | FileOpenUnitSrc=1
60 | BadGlobalSymbol=1
61 | DuplicateConstructorDestructor=1
62 | InvalidDirective=1
63 | PackageNoLink=1
64 | PackageThreadVar=1
65 | ImplicitImport=1
66 | HPPEMITIgnored=1
67 | NoRetVal=1
68 | UseBeforeDef=1
69 | ForLoopVarUndef=1
70 | UnitNameMismatch=1
71 | NoCFGFileFound=1
72 | MessageDirective=1
73 | ImplicitVariants=1
74 | UnicodeToLocale=1
75 | LocaleToUnicode=1
76 | ImagebaseMultiple=1
77 | SuspiciousTypecast=1
78 | PrivatePropAccessor=1
79 | UnsafeType=0
80 | UnsafeCode=0
81 | UnsafeCast=0
82 | [Linker]
83 | MapFile=0
84 | OutputObjs=0
85 | ConsoleApp=1
86 | DebugInfo=0
87 | RemoteSymbols=0
88 | MinStackSize=16384
89 | MaxStackSize=1048576
90 | ImageBase=4194304
91 | ExeDescription=
92 | [Directories]
93 | OutputDir=
94 | UnitOutputDir=
95 | PackageDLLOutputDir=
96 | PackageDCPOutputDir=
97 | SearchPath=
98 | Packages=
99 | Conditionals=
100 | DebugSourceDirs=
101 | UsePackages=0
102 | [Parameters]
103 | RunParams=
104 | HostApplication=
105 | Launcher=
106 | UseLauncher=0
107 | DebugCWD=
108 | [Language]
109 | ActiveLang=
110 | ProjectLang=
111 | RootDir=
112 | [Version Info]
113 | IncludeVerInfo=0
114 | AutoIncBuild=0
115 | MajorVer=1
116 | MinorVer=0
117 | Release=0
118 | Build=0
119 | Debug=0
120 | PreRelease=0
121 | Special=0
122 | Private=0
123 | DLL=0
124 | Locale=1031
125 | CodePage=1252
126 | [Version Info Keys]
127 | CompanyName=
128 | FileDescription=
129 | FileVersion=1.0.0.0
130 | InternalName=
131 | LegalCopyright=
132 | LegalTrademarks=
133 | OriginalFilename=
134 | ProductName=
135 | ProductVersion=1.0.0.0
136 | Comments=
137 |
--------------------------------------------------------------------------------
/src/PUCUConvertUnicode.dpr:
--------------------------------------------------------------------------------
1 | (******************************************************************************
2 | * PUCU Pascal UniCode Utils Libary *
3 | ******************************************************************************
4 | * zlib license *
5 | *============================================================================*
6 | * *
7 | * Copyright (C) 2016-2022, Benjamin Rosseaux (benjamin@rosseaux.de) *
8 | * *
9 | * This software is provided 'as-is', without any express or implied *
10 | * warranty. In no event will the authors be held liable for any damages *
11 | * arising from the use of this software. *
12 | * *
13 | * Permission is granted to anyone to use this software for any purpose, *
14 | * including commercial applications, and to alter it and redistribute it *
15 | * freely, subject to the following restrictions: *
16 | * *
17 | * 1. The origin of this software must not be misrepresented; you must not *
18 | * claim that you wrote the original software. If you use this software *
19 | * in a product, an acknowledgement in the product documentation would be *
20 | * appreciated but is not required. *
21 | * 2. Altered source versions must be plainly marked as such, and must not be *
22 | * misrepresented as being the original software. *
23 | * 3. This notice may not be removed or altered from any source distribution. *
24 | * *
25 | ******************************************************************************
26 | * General guidelines for code contributors *
27 | *============================================================================*
28 | * *
29 | * 1. Make sure you are legally allowed to make a contribution under the zlib *
30 | * license. *
31 | * 2. The zlib license header goes at the top of each source file, with *
32 | * appropriate copyright notice. *
33 | * 3. After a pull request, check the status of your pull request on *
34 | http://github.com/BeRo1985/pucu *
35 | * 4. Write code, which is compatible with Delphi 7-XE7 and FreePascal >= 3.0 *
36 | * so don't use generics/templates, operator overloading and another newer *
37 | * syntax features than Delphi 7 has support for that, but if needed, make *
38 | * it out-ifdef-able. *
39 | * 5. Don't use Delphi-only, FreePascal-only or Lazarus-only libraries/units, *
40 | * but if needed, make it out-ifdef-able. *
41 | * 6. No use of third-party libraries/units as possible, but if needed, make *
42 | * it out-ifdef-able. *
43 | * 7. Try to use const when possible. *
44 | * 8. Make sure to comment out writeln, used while debugging. *
45 | * 9. Make sure the code compiles on 32-bit and 64-bit platforms (x86-32, *
46 | * x86-64, ARM, ARM64, etc.). *
47 | * *
48 | ******************************************************************************)
49 | program PUCUConvertUnicode;
50 | {$ifdef fpc}
51 | {$mode delphi}
52 | {$ifdef cpui386}
53 | {$define cpu386}
54 | {$endif}
55 | {$ifdef cpu386}
56 | {$asmmode intel}
57 | {$endif}
58 | {$ifdef cpuamd64}
59 | {$asmmode intel}
60 | {$endif}
61 | {$ifdef FPC_LITTLE_ENDIAN}
62 | {$define LITTLE_ENDIAN}
63 | {$else}
64 | {$ifdef FPC_BIG_ENDIAN}
65 | {$define BIG_ENDIAN}
66 | {$endif}
67 | {$endif}
68 | {-$pic off}
69 | {$define caninline}
70 | {$ifdef FPC_HAS_TYPE_EXTENDED}
71 | {$define HAS_TYPE_EXTENDED}
72 | {$else}
73 | {$undef HAS_TYPE_EXTENDED}
74 | {$endif}
75 | {$ifdef FPC_HAS_TYPE_DOUBLE}
76 | {$define HAS_TYPE_DOUBLE}
77 | {$else}
78 | {$undef HAS_TYPE_DOUBLE}
79 | {$endif}
80 | {$ifdef FPC_HAS_TYPE_SINGLE}
81 | {$define HAS_TYPE_SINGLE}
82 | {$else}
83 | {$undef HAS_TYPE_SINGLE}
84 | {$endif}
85 | {$if declared(RawByteString)}
86 | {$define HAS_TYPE_RAWBYTESTRING}
87 | {$else}
88 | {$undef HAS_TYPE_RAWBYTESTRING}
89 | {$ifend}
90 | {$else}
91 | {$realcompatibility off}
92 | {$localsymbols on}
93 | {$define LITTLE_ENDIAN}
94 | {$ifndef cpu64}
95 | {$define cpu32}
96 | {$endif}
97 | {$define HAS_TYPE_EXTENDED}
98 | {$define HAS_TYPE_DOUBLE}
99 | {$ifdef conditionalexpressions}
100 | {$if declared(RawByteString)}
101 | {$define HAS_TYPE_RAWBYTESTRING}
102 | {$else}
103 | {$undef HAS_TYPE_RAWBYTESTRING}
104 | {$ifend}
105 | {$else}
106 | {$undef HAS_TYPE_RAWBYTESTRING}
107 | {$endif}
108 | {$endif}
109 | {$ifdef win32}
110 | {$define windows}
111 | {$endif}
112 | {$ifdef win64}
113 | {$define windows}
114 | {$endif}
115 | {$ifdef wince}
116 | {$define windows}
117 | {$endif}
118 | {$ifdef windows}
119 | {$apptype console}
120 | {$endif}
121 | {$rangechecks off}
122 | {$extendedsyntax on}
123 | {$writeableconst on}
124 | {$hints off}
125 | {$booleval off}
126 | {$typedaddress off}
127 | {$stackframes off}
128 | {$varstringchecks on}
129 | {$typeinfo on}
130 | {$overflowchecks off}
131 | {$longstrings on}
132 | {$openstrings on}
133 | {$assertions on}
134 |
135 | uses SysUtils,Classes;
136 |
137 | const MaxUnicodeChar=$10ffff;
138 | CountUnicodeChars=$110000;
139 |
140 | type TPUCURawByteString={$ifdef HAS_TYPE_RAWBYTESTRING}RawByteString{$else}AnsiString{$endif};
141 |
142 | TPUCUUnicodeDWords=array[0..MaxUnicodeChar] of longint;
143 |
144 | TPUCUCodePoints=array of longint;
145 |
146 | PPUCUUnicodeCharacterDecompositionMappingItem=^TPUCUUnicodeCharacterDecompositionMappingItem;
147 | TPUCUUnicodeCharacterDecompositionMappingItem=record
148 | Type_:TPUCURawByteString;
149 | Mapping:TPUCUCodePoints;
150 | end;
151 |
152 | PPUCUUnicodeCharacterDecompositionMappingItems=^TPUCUUnicodeCharacterDecompositionMappingItems;
153 | TPUCUUnicodeCharacterDecompositionMappingItems=array[0..MaxUnicodeChar] of TPUCUUnicodeCharacterDecompositionMappingItem;
154 |
155 | PPUCUUnicodeCompositionExclusions=^TPUCUUnicodeCompositionExclusions;
156 | TPUCUUnicodeCompositionExclusions=array[0..((MaxUnicodeChar+31) shr 5)-1] of longword;
157 |
158 | PPUCUUnicodeCharacterDecompositionMapItem=^TPUCUUnicodeCharacterDecompositionMapItem;
159 | TPUCUUnicodeCharacterDecompositionMapItem=record
160 | CodePoint:longword;
161 | Decomposition:TPUCUCodePoints;
162 | end;
163 |
164 | TPUCUUnicodeCharacterDecompositionMap=array of TPUCUUnicodeCharacterDecompositionMapItem;
165 |
166 | PPUCUUnicodeCharacterCompositionMapItem=^TPUCUUnicodeCharacterCompositionMapItem;
167 | TPUCUUnicodeCharacterCompositionMapItem=record
168 | Composition:TPUCUCodePoints;
169 | CodePoint:longword;
170 | HashValue:longword;
171 | Next:longint;
172 | end;
173 |
174 | TPUCUUnicodeCharacterCompositionMap=array of TPUCUUnicodeCharacterCompositionMapItem;
175 |
176 | TPUCUUnicodeDecompositionSequences=array of longint;
177 |
178 | var PUCUUnicodeCategories:TPUCUUnicodeDWords;
179 | PUCUUnicodeScripts:TPUCUUnicodeDWords;
180 | PUCUUnicodeCanonicalCombiningClasses:TPUCUUnicodeDWords;
181 | PUCUUnicodeLowerCaseDeltas:TPUCUUnicodeDWords;
182 | PUCUUnicodeUpperCaseDeltas:TPUCUUnicodeDWords;
183 | PUCUUnicodeTitleCaseDeltas:TPUCUUnicodeDWords;
184 | PUCUUnicodeCharacterDecompositionMappingItems:TPUCUUnicodeCharacterDecompositionMappingItems;
185 | PUCUUnicodeCompositionExclusions:TPUCUUnicodeCompositionExclusions;
186 | PUCUUnicodeCharacterDecompositionMap:TPUCUUnicodeCharacterDecompositionMap;
187 | PUCUUnicodeDecompositionSequences:TPUCUUnicodeDecompositionSequences;
188 | PUCUUnicodeDecompositionStarts:TPUCUUnicodeDWords;
189 | PUCUUnicodeCharacterCompositionMap:TPUCUUnicodeCharacterCompositionMap;
190 | PUCUCategories:TStringList;
191 | PUCUScripts:TStringList;
192 | OutputList:TStringList;
193 |
194 | function GetUntilSplitter(const Splitter:TPUCURawByteString;var s:TPUCURawByteString):TPUCURawByteString;
195 | var i:longint;
196 | begin
197 | i:=pos(Splitter,s);
198 | if i>0 then begin
199 | result:=trim(copy(s,1,i-1));
200 | Delete(s,1,(i+length(Splitter))-1);
201 | s:=trim(s);
202 | end else begin
203 | result:=trim(s);
204 | s:='';
205 | end;
206 | end;
207 |
208 | procedure PackTable(const Table:array of longint;Level:integer;const Name:TPUCURawByteString);
209 | type TBlock=array of longint;
210 | TBlocks=array of TBlock;
211 | TIndices=array of longint;
212 | var BestBlockSize,BlockSize,CountBlocks,CountIndices,Index,BlockPosition,Bytes,BestBytes,Bits,BestBits,EntryBytes,IndicesEntryBytes,BestIndicesEntryBytes,i,j,k:longint;
213 | Block:TBlock;
214 | Blocks:TBlocks;
215 | Indices:TIndices;
216 | BestBlocks:TBlocks;
217 | BestIndices:TIndices;
218 | OK:boolean;
219 | s:TPUCURawByteString;
220 | begin
221 | if Level<2 then begin
222 | Block:=nil;
223 | Blocks:=nil;
224 | Indices:=nil;
225 | BestBlocks:=nil;
226 | BestIndices:=nil;
227 | try
228 | BestBlockSize:=length(Table)*2;
229 | BestBits:=24;
230 | BlockSize:=1;
231 | Bits:=0;
232 | BestBytes:=-1;
233 | i:=0;
234 | OK:=true;
235 | for Index:=0 to length(Table)-1 do begin
236 | j:=Table[Index];
237 | if j<0 then begin
238 | OK:=false;
239 | end;
240 | j:=abs(j);
241 | if iBlock[j] then begin
285 | OK:=false;
286 | break;
287 | end;
288 | end;
289 | if OK then begin
290 | k:=i;
291 | break;
292 | end;
293 | end;
294 | if k<0 then begin
295 | k:=CountBlocks;
296 | Blocks[CountBlocks]:=copy(Block);
297 | inc(CountBlocks);
298 | end;
299 | if (CountIndices+1)>=length(Indices) then begin
300 | i:=1;
301 | j:=CountIndices+1;
302 | while i<=j do begin
303 | inc(i,i);
304 | end;
305 | SetLength(Indices,i);
306 | end;
307 | Indices[CountIndices]:=k;
308 | inc(CountIndices);
309 | BlockPosition:=0;
310 | end;
311 | end;
312 | if CountBlocks<256 then begin
313 | IndicesEntryBytes:=1;
314 | end else if CountBlocks<65536 then begin
315 | IndicesEntryBytes:=2;
316 | end else begin
317 | IndicesEntryBytes:=4;
318 | end;
319 | Bytes:=((CountBlocks*BlockSize)*EntryBytes)+(CountIndices*IndicesEntryBytes);
320 | if (BestBytes<0) or (Bytes<=BestBytes) then begin
321 | BestBytes:=Bytes;
322 | BestBlockSize:=BlockSize;
323 | BestBits:=Bits;
324 | BestIndicesEntryBytes:=EntryBytes;
325 | BestBlocks:=copy(Blocks,0,CountBlocks);
326 | BestIndices:=copy(Indices,0,CountIndices);
327 | end;
328 | SetLength(Blocks,0);
329 | SetLength(Indices,0);
330 | inc(BlockSize,BlockSize);
331 | inc(Bits);
332 | end;
333 | OutputList.Add('// '+Name+': '+IntToStr(BestBytes)+' bytes, '+IntToStr(length(BestBlocks))+' blocks with '+IntToStr(BestBlockSize)+' items per '+IntToStr(EntryBytes)+' bytes and '+IntToStr(length(BestIndices))+' indices per '+IntToStr(BestIndicesEntryBytes)+' bytes');
334 | OutputList.Add('const '+Name+'BlockBits='+IntToStr(BestBits)+';');
335 | OutputList.Add(' '+Name+'BlockMask='+IntToStr((1 shl BestBits)-1)+';');
336 | OutputList.Add(' '+Name+'BlockSize='+IntToStr(BestBlockSize)+';');
337 | OutputList.Add(' '+Name+'BlockCount='+IntToStr(length(BestBlocks))+';');
338 | OutputList.Add(' '+Name+'BlockData:array[0..'+IntToStr(length(BestBlocks)-1)+',0..'+IntToStr(BestBlockSize-1)+'] of '+s+'=(');
339 | s:='';
340 | for i:=0 to length(BestBlocks)-1 do begin
341 | s:=s+'(';
342 | for j:=0 to BestBlockSize-1 do begin
343 | s:=s+IntToStr(BestBlocks[i,j]);
344 | if (j+1)80 then begin
348 | OutputList.Add(s);
349 | s:='';
350 | end;
351 | end;
352 | s:=s+')';
353 | if (i+1)0 then begin
360 | OutputList.Add(s);
361 | s:='';
362 | end;
363 | OutputList.Add(');');
364 | if Level=1 then begin
365 | case BestIndicesEntryBytes of
366 | 1:begin
367 | s:='byte';
368 | end;
369 | 2:begin
370 | s:='word';
371 | end;
372 | else begin
373 | s:='longword';
374 | end;
375 | end;
376 | OutputList.Add(' '+Name+'IndexCount='+IntToStr(length(BestBlocks))+';');
377 | OutputList.Add(' '+Name+'IndexData:array[0..'+IntToStr(length(BestIndices)-1)+'] of '+s+'=(');
378 | s:='';
379 | for i:=0 to length(BestIndices)-1 do begin
380 | s:=s+IntToStr(BestIndices[i]);
381 | if (i+1)80 then begin
385 | OutputList.Add(s);
386 | s:='';
387 | end;
388 | end;
389 | if length(s)>0 then begin
390 | OutputList.Add(s);
391 | s:='';
392 | end;
393 | OutputList.Add(');');
394 | OutputList.Add('');
395 | end else begin
396 | OutputList.Add('');
397 | PackTable(BestIndices,Level+1,Name+'Index');
398 | end;
399 | finally
400 | SetLength(Block,0);
401 | SetLength(Blocks,0);
402 | SetLength(Indices,0);
403 | SetLength(BestBlocks,0);
404 | SetLength(BestIndices,0);
405 | end;
406 | end;
407 | end;
408 |
409 | procedure WriteTable(const Table:array of longint;Level:integer;const Name:TPUCURawByteString);
410 | var Index,EntryBytes,i,j,k:longint;
411 | OK:boolean;
412 | s:TPUCURawByteString;
413 | begin
414 | i:=0;
415 | OK:=true;
416 | for Index:=0 to length(Table)-1 do begin
417 | j:=Table[Index];
418 | if j<0 then begin
419 | OK:=false;
420 | end;
421 | j:=abs(j);
422 | if i80 then begin
457 | OutputList.Add(s);
458 | s:='';
459 | end;
460 | end;
461 | s:=s+');';
462 | OutputList.Add(s);
463 | end;
464 |
465 | procedure ParseBlocks;
466 | type TPUCUUnicodeBlock=record
467 | Name:TPUCURawByteString;
468 | FromChar,ToChar:longword;
469 | end;
470 | var List:TStringList;
471 | i,j,k,FromChar,ToChar,Count:longint;
472 | s,p:TPUCURawByteString;
473 | Blocks:array of TPUCUUnicodeBlock;
474 | begin
475 | Blocks:=nil;
476 | try
477 | Count:=0;
478 | OutputList.Add('type TPUCUUnicodeBlock=record');
479 | OutputList.Add(' Name:TPUCURawByteString;');
480 | OutputList.Add(' FromChar,ToChar:longword;');
481 | OutputList.Add(' end;');
482 | List:=TStringList.Create;
483 | try
484 | List.LoadFromFile(IncludeTrailingPathDelimiter('UnicodeData')+'Blocks.txt');
485 | for i:=0 to List.Count-1 do begin
486 | s:=trim(List[i]);
487 | if (length(s)=0) or ((length(s)>0) and (s[1]='#')) then begin
488 | continue;
489 | end;
490 | j:=pos('#',s);
491 | if j>0 then begin
492 | s:=trim(copy(s,1,j-1));
493 | end;
494 | j:=pos(';',s);
495 | if j=0 then begin
496 | continue;
497 | end;
498 | p:=trim(copy(s,j+1,length(s)-j));
499 | s:=trim(copy(s,1,j-1));
500 | j:=pos('..',s);
501 | if j=0 then begin
502 | FromChar:=StrToInt('$'+trim(s));
503 | ToChar:=FromChar;
504 | end else begin
505 | FromChar:=StrToInt('$'+trim(copy(s,1,j-1)));
506 | ToChar:=StrToInt('$'+trim(copy(s,j+2,length(s)-(j+1))));
507 | end;
508 | if (Count+1)>=length(Blocks) then begin
509 | j:=1;
510 | k:=Count+1;
511 | while j<=k do begin
512 | inc(j,j);
513 | end;
514 | SetLength(Blocks,j);
515 | end;
516 | Blocks[Count].Name:=p;
517 | Blocks[Count].FromChar:=FromChar;
518 | Blocks[Count].ToChar:=ToChar;
519 | inc(Count);
520 | end;
521 | SetLength(Blocks,Count);
522 | finally
523 | List.Free;
524 | end;
525 | OutputList.Add('const PUCUUnicodeBlockCount='+IntToStr(Count)+';');
526 | OutputList.Add(' PUCUUnicodeBlocks:array[0..'+IntToStr(Count-1)+'] of TPUCUUnicodeBlock=(');
527 | for i:=0 to Count-1 do begin
528 | if (i+1)0) and (s[1]='#')) then begin
554 | continue;
555 | end;
556 | j:=pos('#',s);
557 | if j>0 then begin
558 | s:=trim(copy(s,1,j-1));
559 | end;
560 | j:=pos(';',s);
561 | if j=0 then begin
562 | continue;
563 | end;
564 | p:=trim(copy(s,j+1,length(s)-j));
565 | ci:=PUCUCategories.IndexOf(p);
566 | if ci<0 then begin
567 | ci:=PUCUCategories.Add(p);
568 | end;
569 | s:=trim(copy(s,1,j-1));
570 | j:=pos('..',s);
571 | if j=0 then begin
572 | CurrentChar:=StrToInt('$'+trim(s));
573 | PUCUUnicodeCategories[CurrentChar]:=ci;
574 | end else begin
575 | FromChar:=StrToInt('$'+trim(copy(s,1,j-1)));
576 | ToChar:=StrToInt('$'+trim(copy(s,j+2,length(s)-(j+1))));
577 | for CurrentChar:=FromChar to ToChar do begin
578 | PUCUUnicodeCategories[CurrentChar]:=ci;
579 | end;
580 | end;
581 | end;
582 | finally
583 | List.Free;
584 | end;
585 | end;
586 |
587 | procedure ParseScripts;
588 | var List:TStringList;
589 | i,j,si,FromChar,ToChar,CurrentChar:longint;
590 | s,p:TPUCURawByteString;
591 | begin
592 | List:=TStringList.Create;
593 | try
594 | List.LoadFromFile(IncludeTrailingPathDelimiter('UnicodeData')+'Scripts.txt');
595 | for i:=0 to List.Count-1 do begin
596 | s:=trim(List[i]);
597 | if (length(s)=0) or ((length(s)>0) and (s[1]='#')) then begin
598 | continue;
599 | end;
600 | j:=pos('#',s);
601 | if j>0 then begin
602 | s:=trim(copy(s,1,j-1));
603 | end;
604 | j:=pos(';',s);
605 | if j=0 then begin
606 | continue;
607 | end;
608 | p:=trim(copy(s,j+1,length(s)-j));
609 | si:=PUCUScripts.IndexOf(p);
610 | if si<0 then begin
611 | si:=PUCUScripts.Add(p);
612 | end;
613 | s:=trim(copy(s,1,j-1));
614 | j:=pos('..',s);
615 | if j=0 then begin
616 | CurrentChar:=StrToInt('$'+trim(s));
617 | PUCUUnicodeScripts[CurrentChar]:=si;
618 | end else begin
619 | FromChar:=StrToInt('$'+trim(copy(s,1,j-1)));
620 | ToChar:=StrToInt('$'+trim(copy(s,j+2,length(s)-(j+1))));
621 | for CurrentChar:=FromChar to ToChar do begin
622 | PUCUUnicodeScripts[CurrentChar]:=si;
623 | end;
624 | end;
625 | end;
626 | finally
627 | List.Free;
628 | end;
629 | end;
630 |
631 | procedure ParseUnicodeData;
632 | var List:TStringList;
633 | i,j,k,ci,OtherChar,CurrentChar:longint;
634 | s,cs:TPUCURawByteString;
635 | cdmi:PPUCUUnicodeCharacterDecompositionMappingItem;
636 | begin
637 | List:=TStringList.Create;
638 | try
639 | List.LoadFromFile(IncludeTrailingPathDelimiter('UnicodeData')+'UnicodeData.txt');
640 | for i:=ord('a') to ord('z') do begin
641 | PUCUUnicodeUpperCaseDeltas[i]:=longint(ord('A')-ord('a'));
642 | end;
643 | for i:=ord('A') to ord('Z') do begin
644 | PUCUUnicodeLowerCaseDeltas[i]:=ord('a')-ord('A');
645 | end;
646 | for i:=$ff21 to $ff3a do begin
647 | PUCUUnicodeLowerCaseDeltas[i]:=$ff41-$ff21;
648 | end;
649 | for i:=$ff41 to $ff5a do begin
650 | PUCUUnicodeUpperCaseDeltas[i]:=longint($ff21-$ff41);
651 | end;
652 | for i:=0 to List.Count-1 do begin
653 | s:=trim(List[i]);
654 | if (length(s)=0) or ((length(s)>0) and (s[1]='#')) then begin
655 | continue;
656 | end;
657 | j:=pos('#',s);
658 | if j>0 then begin
659 | s:=trim(copy(s,1,j-1));
660 | end;
661 | j:=pos(';',s);
662 | if j=0 then begin
663 | continue;
664 | end;
665 | CurrentChar:=StrToInt('$'+GetUntilSplitter(';',s)); // Code
666 | GetUntilSplitter(';',s); // Name
667 | begin
668 | cs:=GetUntilSplitter(';',s); // Class
669 | ci:=PUCUCategories.IndexOf(cs);
670 | if ci<0 then begin
671 | ci:=PUCUCategories.Add(cs);
672 | end;
673 | if PUCUUnicodeCategories[CurrentChar]<>ci then begin
674 | writeln(ErrOutput,CurrentChar,' has multiple categories?');
675 | PUCUUnicodeCategories[CurrentChar]:=ci;
676 | end;
677 | end;
678 | begin
679 | PUCUUnicodeCanonicalCombiningClasses[CurrentChar]:=StrToIntDef(GetUntilSplitter(';',s),0); // Canonical Combining Class
680 | end;
681 | GetUntilSplitter(';',s); // Bidirectional Category
682 | begin
683 | cs:=GetUntilSplitter(';',s); // Character Decomposition Mapping
684 | cdmi:=@PUCUUnicodeCharacterDecompositionMappingItems[CurrentChar];
685 | if length(cs)>0 then begin
686 | if pos('<',cs)>0 then begin
687 | GetUntilSplitter('<',cs);
688 | cdmi^.Type_:=GetUntilSplitter('>',cs);
689 | end else begin
690 | cdmi^.Type_:='canonical';
691 | end;
692 | cs:=trim(cs);
693 | cdmi^.Mapping:=nil;
694 | j:=0;
695 | try
696 | while length(cs)>0 do begin
697 | if length(cdmi^.Mapping)<(j+1) then begin
698 | SetLength(cdmi^.Mapping,(j+1)*2);
699 | end;
700 | cdmi^.Mapping[j]:=StrToIntDef('$'+GetUntilSplitter(' ',cs),0);
701 | inc(j);
702 | end;
703 | finally
704 | SetLength(cdmi^.Mapping,j);
705 | end;
706 | end else begin
707 | cdmi^.Type_:='none';
708 | cdmi^.Mapping:=nil;
709 | end;
710 | end;
711 | GetUntilSplitter(';',s); // Decimal digit value
712 | GetUntilSplitter(';',s); // Digit value
713 | GetUntilSplitter(';',s); // Numeric value
714 | GetUntilSplitter(';',s); // Mirrored
715 | GetUntilSplitter(';',s); // Unicode 1.0 Name
716 | GetUntilSplitter(';',s); // 10646 comment field
717 | begin
718 | OtherChar:=StrToIntDef('$'+GetUntilSplitter(';',s),-1); // UpperChar Code
719 | if (OtherChar>=0) and (OtherChar<>CurrentChar) then begin
720 | PUCUUnicodeUpperCaseDeltas[CurrentChar]:=OtherChar-CurrentChar;
721 | end;
722 | end;
723 | begin
724 | OtherChar:=StrToIntDef('$'+GetUntilSplitter(';',s),-1); // LowerChar Code
725 | if (OtherChar>=0) and (OtherChar<>CurrentChar) then begin
726 | PUCUUnicodeLowerCaseDeltas[CurrentChar]:=OtherChar-CurrentChar;
727 | end;
728 | end;
729 | begin
730 | OtherChar:=StrToIntDef('$'+GetUntilSplitter(';',s),-1); // TitleChar Code
731 | if (OtherChar>=0) and (OtherChar<>CurrentChar) then begin
732 | PUCUUnicodeTitleCaseDeltas[CurrentChar]:=OtherChar-CurrentChar;
733 | end;
734 | end;
735 | end;
736 | finally
737 | List.Free;
738 | end;
739 | List:=TStringList.Create;
740 | try
741 | List.LoadFromFile(IncludeTrailingPathDelimiter('UnicodeData')+'CompositionExclusions.txt');
742 | for i:=0 to List.Count-1 do begin
743 | s:=trim(List[i]);
744 | if (length(s)=0) or ((length(s)>0) and (s[1]='#')) then begin
745 | continue;
746 | end;
747 | j:=pos('#',s);
748 | if j>0 then begin
749 | s:=trim(copy(s,1,j-1));
750 | end;
751 | s:=trim(s);
752 | j:=pos(' ',s);
753 | if j>0 then begin
754 | s:=trim(copy(s,1,j-1));
755 | end;
756 | CurrentChar:=StrToInt('$'+GetUntilSplitter(' ',s)); // Code
757 | PUCUUnicodeCompositionExclusions[CurrentChar shr 5]:=PUCUUnicodeCompositionExclusions[CurrentChar shr 5] or (longword(1) shl (CurrentChar and 31));
758 | end;
759 | finally
760 | List.Free;
761 | end;
762 | end;
763 |
764 | function RecursiveDecomposition(const aCodePoint:longword):TPUCUCodePoints;
765 | var Index,Len,NewLen:longint;
766 | CodePoints:TPUCUCodePoints;
767 | begin
768 | result:=nil;
769 | if (aCodePoint0 then begin
778 | NewLen:=Len+length(CodePoints);
779 | if length(result)PUCUUnicodeCanonicalCombiningClasses[PUCUUnicodeCharacterDecompositionMap[Index+1].CodePoint] then begin
818 | PUCUUnicodeCharacterDecompositionMapItem:=PUCUUnicodeCharacterDecompositionMap[Index];
819 | PUCUUnicodeCharacterDecompositionMap[Index]:=PUCUUnicodeCharacterDecompositionMap[Index+1];
820 | PUCUUnicodeCharacterDecompositionMap[Index+1]:=PUCUUnicodeCharacterDecompositionMapItem;
821 | if Index>0 then begin
822 | dec(Index);
823 | end else begin
824 | inc(Index);
825 | end;
826 | end else begin
827 | inc(Index);
828 | end;
829 | end;
830 | PUCUUnicodeDecompositionSequences:=nil;
831 | SequenceLen:=1;
832 | try
833 | SetLength(PUCUUnicodeDecompositionSequences,1);
834 | PUCUUnicodeDecompositionSequences[0]:=0;
835 | StartLen:=0;
836 | for Index:=0 to Count-1 do begin
837 | Decomposition:=PUCUUnicodeCharacterDecompositionMap[Index].Decomposition;
838 | FoundIndex:=-1;
839 | for SubIndex:=0 to SequenceLen-length(Decomposition) do begin
840 | Count:=0;
841 | for SubSubIndex:=0 to length(Decomposition)-1 do begin
842 | if Decomposition[SubSubIndex]<>PUCUUnicodeDecompositionSequences[SubIndex+SubSubIndex] then begin
843 | break;
844 | end;
845 | inc(Count);
846 | end;
847 | if Count=length(Decomposition) then begin
848 | FoundIndex:=SubIndex;
849 | break;
850 | end;
851 | end;
852 | if FoundIndex<0 then begin
853 | FoundIndex:=SequenceLen;
854 | NewLen:=SequenceLen+length(Decomposition);
855 | if length(PUCUUnicodeDecompositionSequences)PUCUUnicodeCharacterCompositionMap[Index+1].Composition[0]) or
918 | ((PUCUUnicodeCharacterCompositionMap[Index].Composition[0]=PUCUUnicodeCharacterCompositionMap[Index+1].Composition[0]) and
919 | (PUCUUnicodeCharacterCompositionMap[Index].Composition[1]>PUCUUnicodeCharacterCompositionMap[Index+1].Composition[1])) then begin
920 | PUCUUnicodeCharacterCompositionMapItem:=PUCUUnicodeCharacterCompositionMap[Index];
921 | PUCUUnicodeCharacterCompositionMap[Index]:=PUCUUnicodeCharacterCompositionMap[Index+1];
922 | PUCUUnicodeCharacterCompositionMap[Index+1]:=PUCUUnicodeCharacterCompositionMapItem;
923 | if Index>0 then begin
924 | dec(Index);
925 | end else begin
926 | inc(Index);
927 | end;
928 | end else begin
929 | inc(Index);
930 | end;
931 | end;
932 | HashTable:=nil;
933 | HashTableLength:=nil;
934 | try
935 | SetLength(HashTable,HashTableSize);
936 | SetLength(HashTableLength,HashTableSize);
937 | for Index:=0 to HashTableSize-1 do begin
938 | HashTable[Index]:=-1;
939 | HashTableLength[Index]:=0;
940 | end;
941 | for Index:=0 to length(PUCUUnicodeCharacterCompositionMap)-1 do begin
942 | HashIndex:=PUCUUnicodeCharacterCompositionMap[Index].HashValue and HashTableMask;
943 | PUCUUnicodeCharacterCompositionMap[Index].Next:=HashTable[HashIndex];
944 | HashTable[HashIndex]:=Index;
945 | inc(HashTableLength[HashIndex]);
946 | end;
947 | for Index:=0 to HashTableSize-1 do begin
948 | inc(HashTable[Index]);
949 | end;
950 | OutputList.Add('const PUCUUnicodeCharacterCompositionHashTableBits='+IntToStr(HashTableBits)+';');
951 | OutputList.Add(' PUCUUnicodeCharacterCompositionHashTableSize='+IntToStr(HashTableSize)+';');
952 | OutputList.Add(' PUCUUnicodeCharacterCompositionHashTableMask='+IntToStr(HashTableMask)+';');
953 | WriteTable(HashTable,0,'PUCUUnicodeCharacterCompositionHashTable');
954 | //WriteTable(HashTableLength,0,'PUCUUnicodeCharacterCompositionHashTableLength'); // for debugging usages
955 | OutputList.Add('type PPUCUUnicodeCharacterCompositionSequence=^TPUCUUnicodeCharacterCompositionSequence;');
956 | OutputList.Add(' TPUCUUnicodeCharacterCompositionSequence=record');
957 | OutputList.Add(' Sequence:array[0..1] of longword;');
958 | OutputList.Add(' CodePoint:longword;');
959 | case length(PUCUUnicodeCharacterCompositionMap)+1 of
960 | 0..255:begin
961 | OutputList.Add(' Next:byte;');
962 | end;
963 | 256..65535:begin
964 | OutputList.Add(' Next:word;');
965 | end;
966 | else begin
967 | OutputList.Add(' Next:longword;');
968 | end;
969 | end;
970 | OutputList.Add(' end;');
971 | OutputList.Add('const PUCUUnicodeCharacterCompositionSequenceCount='+IntToStr(length(PUCUUnicodeCharacterCompositionMap)+1)+';');
972 | OutputList.Add(' PUCUUnicodeCharacterCompositionSequences:array[0..'+IntToStr(length(PUCUUnicodeCharacterCompositionMap))+'] of TPUCUUnicodeCharacterCompositionSequence=(');
973 | if length(PUCUUnicodeCharacterCompositionMap)>0 then begin
974 | OutputList.Add(' (Sequence:(0,0);CodePoint:0;Next:0),');
975 | for Index:=0 to length(PUCUUnicodeCharacterCompositionMap)-1 do begin
976 | if (Index+1)
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 | -
58 |
59 |
60 | -
61 |
62 |
63 | -
64 |
65 |
66 |
67 |
68 |
69 |
--------------------------------------------------------------------------------
/src/PUCUDebug.cfg:
--------------------------------------------------------------------------------
1 | -$A8
2 | -$B-
3 | -$C+
4 | -$D+
5 | -$E-
6 | -$F-
7 | -$G+
8 | -$H+
9 | -$I+
10 | -$J-
11 | -$K-
12 | -$L+
13 | -$M-
14 | -$N+
15 | -$O+
16 | -$P+
17 | -$Q-
18 | -$R-
19 | -$S-
20 | -$T-
21 | -$U-
22 | -$V+
23 | -$W-
24 | -$X+
25 | -$YD
26 | -$Z1
27 | -GD
28 | -cg
29 | -vn
30 | -AWinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
31 | -H+
32 | -W+
33 | -M
34 | -$M16384,1048576
35 | -K$00400000
36 | -LE"c:\program files (x86)\borland\delphi7\Projects\Bpl"
37 | -LN"c:\program files (x86)\borland\delphi7\Projects\Bpl"
38 | -w-UNSAFE_TYPE
39 | -w-UNSAFE_CODE
40 | -w-UNSAFE_CAST
41 |
--------------------------------------------------------------------------------
/src/PUCUDebug.dof:
--------------------------------------------------------------------------------
1 | [FileVersion]
2 | Version=7.0
3 | [Compiler]
4 | A=8
5 | B=0
6 | C=1
7 | D=1
8 | E=0
9 | F=0
10 | G=1
11 | H=1
12 | I=1
13 | J=0
14 | K=0
15 | L=1
16 | M=0
17 | N=1
18 | O=1
19 | P=1
20 | Q=0
21 | R=0
22 | S=0
23 | T=0
24 | U=0
25 | V=1
26 | W=0
27 | X=1
28 | Y=1
29 | Z=1
30 | ShowHints=1
31 | ShowWarnings=1
32 | UnitAliases=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
33 | NamespacePrefix=
34 | SymbolDeprecated=1
35 | SymbolLibrary=1
36 | SymbolPlatform=1
37 | UnitLibrary=1
38 | UnitPlatform=1
39 | UnitDeprecated=1
40 | HResultCompat=1
41 | HidingMember=1
42 | HiddenVirtual=1
43 | Garbage=1
44 | BoundsError=1
45 | ZeroNilCompat=1
46 | StringConstTruncated=1
47 | ForLoopVarVarPar=1
48 | TypedConstVarPar=1
49 | AsgToTypedConst=1
50 | CaseLabelRange=1
51 | ForVariable=1
52 | ConstructingAbstract=1
53 | ComparisonFalse=1
54 | ComparisonTrue=1
55 | ComparingSignedUnsigned=1
56 | CombiningSignedUnsigned=1
57 | UnsupportedConstruct=1
58 | FileOpen=1
59 | FileOpenUnitSrc=1
60 | BadGlobalSymbol=1
61 | DuplicateConstructorDestructor=1
62 | InvalidDirective=1
63 | PackageNoLink=1
64 | PackageThreadVar=1
65 | ImplicitImport=1
66 | HPPEMITIgnored=1
67 | NoRetVal=1
68 | UseBeforeDef=1
69 | ForLoopVarUndef=1
70 | UnitNameMismatch=1
71 | NoCFGFileFound=1
72 | MessageDirective=1
73 | ImplicitVariants=1
74 | UnicodeToLocale=1
75 | LocaleToUnicode=1
76 | ImagebaseMultiple=1
77 | SuspiciousTypecast=1
78 | PrivatePropAccessor=1
79 | UnsafeType=0
80 | UnsafeCode=0
81 | UnsafeCast=0
82 | [Linker]
83 | MapFile=3
84 | OutputObjs=0
85 | ConsoleApp=1
86 | DebugInfo=1
87 | RemoteSymbols=0
88 | MinStackSize=16384
89 | MaxStackSize=1048576
90 | ImageBase=4194304
91 | ExeDescription=
92 | [Directories]
93 | OutputDir=
94 | UnitOutputDir=
95 | PackageDLLOutputDir=
96 | PackageDCPOutputDir=
97 | SearchPath=
98 | Packages=vcl;rtl;vclx;indy;vclie;xmlrtl;inetdbbde;inet;inetdbxpress;dbrtl;soaprtl;dsnap;VclSmp;dbexpress;vcldb;dbxcds;inetdb;bdertl;vcldbx;adortl;teeui;teedb;tee;ibxpress;visualclx;visualdbclx;vclactnband;vclshlctrls;IntrawebDB_50_70;Intraweb_50_70;Rave50CLX;Rave50VCL;dclOfficeXP;acntD7_R;JclDeveloperTools;Jcl;JclVcl;JclContainers;JvCore;JvSystem;JvStdCtrls;JvAppFrm;JvBands;JvDB;JvDlgs;JvBDE;JvControls;JvCmp;JvCrypt;JvCustom;JvDocking;JvDotNetCtrls;JvGlobus;JvHMI;JvJans;JvManagedThreads;JvMM;JvNet;JvPageComps;JvPascalInterpreter;JvPluginSystem;JvPrintPreview;JvRuntimeDesign;JvTimeFramework;JvWizards;JvXPCtrls;KHexEditor_D7R
99 | Conditionals=
100 | DebugSourceDirs=
101 | UsePackages=0
102 | [Parameters]
103 | RunParams=
104 | HostApplication=
105 | Launcher=
106 | UseLauncher=0
107 | DebugCWD=
108 | [Language]
109 | ActiveLang=
110 | ProjectLang=
111 | RootDir=
112 | [Version Info]
113 | IncludeVerInfo=0
114 | AutoIncBuild=0
115 | MajorVer=1
116 | MinorVer=0
117 | Release=0
118 | Build=0
119 | Debug=0
120 | PreRelease=0
121 | Special=0
122 | Private=0
123 | DLL=0
124 | Locale=1031
125 | CodePage=1252
126 | [HistoryLists\hlUnitAliases]
127 | Count=1
128 | Item0=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
129 |
--------------------------------------------------------------------------------
/src/PUCUDebug.dpr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BeRo1985/pucu/ea0b2a5fc5dbd4669f774442ee2bd88075909618/src/PUCUDebug.dpr
--------------------------------------------------------------------------------
/src/PUCUGenCodePages.cfg:
--------------------------------------------------------------------------------
1 | -$A8
2 | -$B-
3 | -$C+
4 | -$D+
5 | -$E-
6 | -$F-
7 | -$G+
8 | -$H+
9 | -$I+
10 | -$J+
11 | -$K-
12 | -$L+
13 | -$M-
14 | -$N+
15 | -$O+
16 | -$P+
17 | -$Q-
18 | -$R-
19 | -$S-
20 | -$T-
21 | -$U-
22 | -$V+
23 | -$W-
24 | -$X+
25 | -$YD
26 | -$Z1
27 | -cg
28 | -AWinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
29 | -H+
30 | -W+
31 | -M
32 | -$M16384,1048576
33 | -K$00400000
34 | -LE"c:\program files (x86)\borland\delphi7\Projects\Bpl"
35 | -LN"c:\program files (x86)\borland\delphi7\Projects\Bpl"
36 | -w-UNSAFE_TYPE
37 | -w-UNSAFE_CODE
38 | -w-UNSAFE_CAST
39 |
--------------------------------------------------------------------------------
/src/PUCUGenCodePages.dof:
--------------------------------------------------------------------------------
1 | [FileVersion]
2 | Version=7.0
3 | [Compiler]
4 | A=8
5 | B=0
6 | C=1
7 | D=1
8 | E=0
9 | F=0
10 | G=1
11 | H=1
12 | I=1
13 | J=1
14 | K=0
15 | L=1
16 | M=0
17 | N=1
18 | O=1
19 | P=1
20 | Q=0
21 | R=0
22 | S=0
23 | T=0
24 | U=0
25 | V=1
26 | W=0
27 | X=1
28 | Y=1
29 | Z=1
30 | ShowHints=1
31 | ShowWarnings=1
32 | UnitAliases=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
33 | NamespacePrefix=
34 | SymbolDeprecated=1
35 | SymbolLibrary=1
36 | SymbolPlatform=1
37 | UnitLibrary=1
38 | UnitPlatform=1
39 | UnitDeprecated=1
40 | HResultCompat=1
41 | HidingMember=1
42 | HiddenVirtual=1
43 | Garbage=1
44 | BoundsError=1
45 | ZeroNilCompat=1
46 | StringConstTruncated=1
47 | ForLoopVarVarPar=1
48 | TypedConstVarPar=1
49 | AsgToTypedConst=1
50 | CaseLabelRange=1
51 | ForVariable=1
52 | ConstructingAbstract=1
53 | ComparisonFalse=1
54 | ComparisonTrue=1
55 | ComparingSignedUnsigned=1
56 | CombiningSignedUnsigned=1
57 | UnsupportedConstruct=1
58 | FileOpen=1
59 | FileOpenUnitSrc=1
60 | BadGlobalSymbol=1
61 | DuplicateConstructorDestructor=1
62 | InvalidDirective=1
63 | PackageNoLink=1
64 | PackageThreadVar=1
65 | ImplicitImport=1
66 | HPPEMITIgnored=1
67 | NoRetVal=1
68 | UseBeforeDef=1
69 | ForLoopVarUndef=1
70 | UnitNameMismatch=1
71 | NoCFGFileFound=1
72 | MessageDirective=1
73 | ImplicitVariants=1
74 | UnicodeToLocale=1
75 | LocaleToUnicode=1
76 | ImagebaseMultiple=1
77 | SuspiciousTypecast=1
78 | PrivatePropAccessor=1
79 | UnsafeType=0
80 | UnsafeCode=0
81 | UnsafeCast=0
82 | [Linker]
83 | MapFile=0
84 | OutputObjs=0
85 | ConsoleApp=1
86 | DebugInfo=0
87 | RemoteSymbols=0
88 | MinStackSize=16384
89 | MaxStackSize=1048576
90 | ImageBase=4194304
91 | ExeDescription=
92 | [Directories]
93 | OutputDir=
94 | UnitOutputDir=
95 | PackageDLLOutputDir=
96 | PackageDCPOutputDir=
97 | SearchPath=
98 | Packages=vcl;rtl;vclx;indy;vclie;xmlrtl;inetdbbde;inet;inetdbxpress;dbrtl;soaprtl;dsnap;VclSmp;dbexpress;vcldb;dbxcds;inetdb;bdertl;vcldbx;adortl;teeui;teedb;tee;ibxpress;visualclx;visualdbclx;vclactnband;vclshlctrls;IntrawebDB_50_70;Intraweb_50_70;Rave50CLX;Rave50VCL;dclOfficeXP;acntD7_R;JclDeveloperTools;Jcl;JclVcl;JclContainers;JvCore;JvSystem;JvStdCtrls;JvAppFrm;JvBands;JvDB;JvDlgs;JvBDE;JvControls;JvCmp;JvCrypt;JvCustom;JvDocking;JvDotNetCtrls;JvGlobus;JvHMI;JvJans;JvManagedThreads;JvMM;JvNet;JvPageComps;JvPascalInterpreter;JvPluginSystem;JvPrintPreview;JvRuntimeDesign;JvTimeFramework;JvWizards;JvXPCtrls;KHexEditor_D7R
99 | Conditionals=
100 | DebugSourceDirs=
101 | UsePackages=0
102 | [Parameters]
103 | RunParams=
104 | HostApplication=
105 | Launcher=
106 | UseLauncher=0
107 | DebugCWD=
108 | [Language]
109 | ActiveLang=
110 | ProjectLang=
111 | RootDir=
112 | [Version Info]
113 | IncludeVerInfo=0
114 | AutoIncBuild=0
115 | MajorVer=1
116 | MinorVer=0
117 | Release=0
118 | Build=0
119 | Debug=0
120 | PreRelease=0
121 | Special=0
122 | Private=0
123 | DLL=0
124 | Locale=1031
125 | CodePage=1252
126 | [Version Info Keys]
127 | CompanyName=
128 | FileDescription=
129 | FileVersion=1.0.0.0
130 | InternalName=
131 | LegalCopyright=
132 | LegalTrademarks=
133 | OriginalFilename=
134 | ProductName=
135 | ProductVersion=1.0.0.0
136 | Comments=
137 | [HistoryLists\hlUnitAliases]
138 | Count=1
139 | Item0=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE;
140 |
--------------------------------------------------------------------------------
/src/PUCUGenCodePages.dpr:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/BeRo1985/pucu/ea0b2a5fc5dbd4669f774442ee2bd88075909618/src/PUCUGenCodePages.dpr
--------------------------------------------------------------------------------
/src/PUCUGenCodePages.lpi:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 | -
59 |
60 |
61 | -
62 |
63 |
64 | -
65 |
66 |
67 |
68 |
69 |
70 |
--------------------------------------------------------------------------------
/src/UnicodeData/Blocks.txt:
--------------------------------------------------------------------------------
1 | # Blocks-15.0.0.txt
2 | # Date: 2022-01-28, 20:58:00 GMT [KW]
3 | # © 2022 Unicode®, Inc.
4 | # For terms of use, see https://www.unicode.org/terms_of_use.html
5 | #
6 | # Unicode Character Database
7 | # For documentation, see https://www.unicode.org/reports/tr44/
8 | #
9 | # Format:
10 | # Start Code..End Code; Block Name
11 |
12 | # ================================================
13 |
14 | # Note: When comparing block names, casing, whitespace, hyphens,
15 | # and underbars are ignored.
16 | # For example, "Latin Extended-A" and "latin extended a" are equivalent.
17 | # For more information on the comparison of property values,
18 | # see UAX #44: https://www.unicode.org/reports/tr44/
19 | #
20 | # All block ranges start with a value where (cp MOD 16) = 0,
21 | # and end with a value where (cp MOD 16) = 15. In other words,
22 | # the last hexadecimal digit of the start of range is ...0
23 | # and the last hexadecimal digit of the end of range is ...F.
24 | # This constraint on block ranges guarantees that allocations
25 | # are done in terms of whole columns, and that code chart display
26 | # never involves splitting columns in the charts.
27 | #
28 | # All code points not explicitly listed for Block
29 | # have the value No_Block.
30 |
31 | # Property: Block
32 | #
33 | # @missing: 0000..10FFFF; No_Block
34 |
35 | 0000..007F; Basic Latin
36 | 0080..00FF; Latin-1 Supplement
37 | 0100..017F; Latin Extended-A
38 | 0180..024F; Latin Extended-B
39 | 0250..02AF; IPA Extensions
40 | 02B0..02FF; Spacing Modifier Letters
41 | 0300..036F; Combining Diacritical Marks
42 | 0370..03FF; Greek and Coptic
43 | 0400..04FF; Cyrillic
44 | 0500..052F; Cyrillic Supplement
45 | 0530..058F; Armenian
46 | 0590..05FF; Hebrew
47 | 0600..06FF; Arabic
48 | 0700..074F; Syriac
49 | 0750..077F; Arabic Supplement
50 | 0780..07BF; Thaana
51 | 07C0..07FF; NKo
52 | 0800..083F; Samaritan
53 | 0840..085F; Mandaic
54 | 0860..086F; Syriac Supplement
55 | 0870..089F; Arabic Extended-B
56 | 08A0..08FF; Arabic Extended-A
57 | 0900..097F; Devanagari
58 | 0980..09FF; Bengali
59 | 0A00..0A7F; Gurmukhi
60 | 0A80..0AFF; Gujarati
61 | 0B00..0B7F; Oriya
62 | 0B80..0BFF; Tamil
63 | 0C00..0C7F; Telugu
64 | 0C80..0CFF; Kannada
65 | 0D00..0D7F; Malayalam
66 | 0D80..0DFF; Sinhala
67 | 0E00..0E7F; Thai
68 | 0E80..0EFF; Lao
69 | 0F00..0FFF; Tibetan
70 | 1000..109F; Myanmar
71 | 10A0..10FF; Georgian
72 | 1100..11FF; Hangul Jamo
73 | 1200..137F; Ethiopic
74 | 1380..139F; Ethiopic Supplement
75 | 13A0..13FF; Cherokee
76 | 1400..167F; Unified Canadian Aboriginal Syllabics
77 | 1680..169F; Ogham
78 | 16A0..16FF; Runic
79 | 1700..171F; Tagalog
80 | 1720..173F; Hanunoo
81 | 1740..175F; Buhid
82 | 1760..177F; Tagbanwa
83 | 1780..17FF; Khmer
84 | 1800..18AF; Mongolian
85 | 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
86 | 1900..194F; Limbu
87 | 1950..197F; Tai Le
88 | 1980..19DF; New Tai Lue
89 | 19E0..19FF; Khmer Symbols
90 | 1A00..1A1F; Buginese
91 | 1A20..1AAF; Tai Tham
92 | 1AB0..1AFF; Combining Diacritical Marks Extended
93 | 1B00..1B7F; Balinese
94 | 1B80..1BBF; Sundanese
95 | 1BC0..1BFF; Batak
96 | 1C00..1C4F; Lepcha
97 | 1C50..1C7F; Ol Chiki
98 | 1C80..1C8F; Cyrillic Extended-C
99 | 1C90..1CBF; Georgian Extended
100 | 1CC0..1CCF; Sundanese Supplement
101 | 1CD0..1CFF; Vedic Extensions
102 | 1D00..1D7F; Phonetic Extensions
103 | 1D80..1DBF; Phonetic Extensions Supplement
104 | 1DC0..1DFF; Combining Diacritical Marks Supplement
105 | 1E00..1EFF; Latin Extended Additional
106 | 1F00..1FFF; Greek Extended
107 | 2000..206F; General Punctuation
108 | 2070..209F; Superscripts and Subscripts
109 | 20A0..20CF; Currency Symbols
110 | 20D0..20FF; Combining Diacritical Marks for Symbols
111 | 2100..214F; Letterlike Symbols
112 | 2150..218F; Number Forms
113 | 2190..21FF; Arrows
114 | 2200..22FF; Mathematical Operators
115 | 2300..23FF; Miscellaneous Technical
116 | 2400..243F; Control Pictures
117 | 2440..245F; Optical Character Recognition
118 | 2460..24FF; Enclosed Alphanumerics
119 | 2500..257F; Box Drawing
120 | 2580..259F; Block Elements
121 | 25A0..25FF; Geometric Shapes
122 | 2600..26FF; Miscellaneous Symbols
123 | 2700..27BF; Dingbats
124 | 27C0..27EF; Miscellaneous Mathematical Symbols-A
125 | 27F0..27FF; Supplemental Arrows-A
126 | 2800..28FF; Braille Patterns
127 | 2900..297F; Supplemental Arrows-B
128 | 2980..29FF; Miscellaneous Mathematical Symbols-B
129 | 2A00..2AFF; Supplemental Mathematical Operators
130 | 2B00..2BFF; Miscellaneous Symbols and Arrows
131 | 2C00..2C5F; Glagolitic
132 | 2C60..2C7F; Latin Extended-C
133 | 2C80..2CFF; Coptic
134 | 2D00..2D2F; Georgian Supplement
135 | 2D30..2D7F; Tifinagh
136 | 2D80..2DDF; Ethiopic Extended
137 | 2DE0..2DFF; Cyrillic Extended-A
138 | 2E00..2E7F; Supplemental Punctuation
139 | 2E80..2EFF; CJK Radicals Supplement
140 | 2F00..2FDF; Kangxi Radicals
141 | 2FF0..2FFF; Ideographic Description Characters
142 | 3000..303F; CJK Symbols and Punctuation
143 | 3040..309F; Hiragana
144 | 30A0..30FF; Katakana
145 | 3100..312F; Bopomofo
146 | 3130..318F; Hangul Compatibility Jamo
147 | 3190..319F; Kanbun
148 | 31A0..31BF; Bopomofo Extended
149 | 31C0..31EF; CJK Strokes
150 | 31F0..31FF; Katakana Phonetic Extensions
151 | 3200..32FF; Enclosed CJK Letters and Months
152 | 3300..33FF; CJK Compatibility
153 | 3400..4DBF; CJK Unified Ideographs Extension A
154 | 4DC0..4DFF; Yijing Hexagram Symbols
155 | 4E00..9FFF; CJK Unified Ideographs
156 | A000..A48F; Yi Syllables
157 | A490..A4CF; Yi Radicals
158 | A4D0..A4FF; Lisu
159 | A500..A63F; Vai
160 | A640..A69F; Cyrillic Extended-B
161 | A6A0..A6FF; Bamum
162 | A700..A71F; Modifier Tone Letters
163 | A720..A7FF; Latin Extended-D
164 | A800..A82F; Syloti Nagri
165 | A830..A83F; Common Indic Number Forms
166 | A840..A87F; Phags-pa
167 | A880..A8DF; Saurashtra
168 | A8E0..A8FF; Devanagari Extended
169 | A900..A92F; Kayah Li
170 | A930..A95F; Rejang
171 | A960..A97F; Hangul Jamo Extended-A
172 | A980..A9DF; Javanese
173 | A9E0..A9FF; Myanmar Extended-B
174 | AA00..AA5F; Cham
175 | AA60..AA7F; Myanmar Extended-A
176 | AA80..AADF; Tai Viet
177 | AAE0..AAFF; Meetei Mayek Extensions
178 | AB00..AB2F; Ethiopic Extended-A
179 | AB30..AB6F; Latin Extended-E
180 | AB70..ABBF; Cherokee Supplement
181 | ABC0..ABFF; Meetei Mayek
182 | AC00..D7AF; Hangul Syllables
183 | D7B0..D7FF; Hangul Jamo Extended-B
184 | D800..DB7F; High Surrogates
185 | DB80..DBFF; High Private Use Surrogates
186 | DC00..DFFF; Low Surrogates
187 | E000..F8FF; Private Use Area
188 | F900..FAFF; CJK Compatibility Ideographs
189 | FB00..FB4F; Alphabetic Presentation Forms
190 | FB50..FDFF; Arabic Presentation Forms-A
191 | FE00..FE0F; Variation Selectors
192 | FE10..FE1F; Vertical Forms
193 | FE20..FE2F; Combining Half Marks
194 | FE30..FE4F; CJK Compatibility Forms
195 | FE50..FE6F; Small Form Variants
196 | FE70..FEFF; Arabic Presentation Forms-B
197 | FF00..FFEF; Halfwidth and Fullwidth Forms
198 | FFF0..FFFF; Specials
199 | 10000..1007F; Linear B Syllabary
200 | 10080..100FF; Linear B Ideograms
201 | 10100..1013F; Aegean Numbers
202 | 10140..1018F; Ancient Greek Numbers
203 | 10190..101CF; Ancient Symbols
204 | 101D0..101FF; Phaistos Disc
205 | 10280..1029F; Lycian
206 | 102A0..102DF; Carian
207 | 102E0..102FF; Coptic Epact Numbers
208 | 10300..1032F; Old Italic
209 | 10330..1034F; Gothic
210 | 10350..1037F; Old Permic
211 | 10380..1039F; Ugaritic
212 | 103A0..103DF; Old Persian
213 | 10400..1044F; Deseret
214 | 10450..1047F; Shavian
215 | 10480..104AF; Osmanya
216 | 104B0..104FF; Osage
217 | 10500..1052F; Elbasan
218 | 10530..1056F; Caucasian Albanian
219 | 10570..105BF; Vithkuqi
220 | 10600..1077F; Linear A
221 | 10780..107BF; Latin Extended-F
222 | 10800..1083F; Cypriot Syllabary
223 | 10840..1085F; Imperial Aramaic
224 | 10860..1087F; Palmyrene
225 | 10880..108AF; Nabataean
226 | 108E0..108FF; Hatran
227 | 10900..1091F; Phoenician
228 | 10920..1093F; Lydian
229 | 10980..1099F; Meroitic Hieroglyphs
230 | 109A0..109FF; Meroitic Cursive
231 | 10A00..10A5F; Kharoshthi
232 | 10A60..10A7F; Old South Arabian
233 | 10A80..10A9F; Old North Arabian
234 | 10AC0..10AFF; Manichaean
235 | 10B00..10B3F; Avestan
236 | 10B40..10B5F; Inscriptional Parthian
237 | 10B60..10B7F; Inscriptional Pahlavi
238 | 10B80..10BAF; Psalter Pahlavi
239 | 10C00..10C4F; Old Turkic
240 | 10C80..10CFF; Old Hungarian
241 | 10D00..10D3F; Hanifi Rohingya
242 | 10E60..10E7F; Rumi Numeral Symbols
243 | 10E80..10EBF; Yezidi
244 | 10EC0..10EFF; Arabic Extended-C
245 | 10F00..10F2F; Old Sogdian
246 | 10F30..10F6F; Sogdian
247 | 10F70..10FAF; Old Uyghur
248 | 10FB0..10FDF; Chorasmian
249 | 10FE0..10FFF; Elymaic
250 | 11000..1107F; Brahmi
251 | 11080..110CF; Kaithi
252 | 110D0..110FF; Sora Sompeng
253 | 11100..1114F; Chakma
254 | 11150..1117F; Mahajani
255 | 11180..111DF; Sharada
256 | 111E0..111FF; Sinhala Archaic Numbers
257 | 11200..1124F; Khojki
258 | 11280..112AF; Multani
259 | 112B0..112FF; Khudawadi
260 | 11300..1137F; Grantha
261 | 11400..1147F; Newa
262 | 11480..114DF; Tirhuta
263 | 11580..115FF; Siddham
264 | 11600..1165F; Modi
265 | 11660..1167F; Mongolian Supplement
266 | 11680..116CF; Takri
267 | 11700..1174F; Ahom
268 | 11800..1184F; Dogra
269 | 118A0..118FF; Warang Citi
270 | 11900..1195F; Dives Akuru
271 | 119A0..119FF; Nandinagari
272 | 11A00..11A4F; Zanabazar Square
273 | 11A50..11AAF; Soyombo
274 | 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
275 | 11AC0..11AFF; Pau Cin Hau
276 | 11B00..11B5F; Devanagari Extended-A
277 | 11C00..11C6F; Bhaiksuki
278 | 11C70..11CBF; Marchen
279 | 11D00..11D5F; Masaram Gondi
280 | 11D60..11DAF; Gunjala Gondi
281 | 11EE0..11EFF; Makasar
282 | 11F00..11F5F; Kawi
283 | 11FB0..11FBF; Lisu Supplement
284 | 11FC0..11FFF; Tamil Supplement
285 | 12000..123FF; Cuneiform
286 | 12400..1247F; Cuneiform Numbers and Punctuation
287 | 12480..1254F; Early Dynastic Cuneiform
288 | 12F90..12FFF; Cypro-Minoan
289 | 13000..1342F; Egyptian Hieroglyphs
290 | 13430..1345F; Egyptian Hieroglyph Format Controls
291 | 14400..1467F; Anatolian Hieroglyphs
292 | 16800..16A3F; Bamum Supplement
293 | 16A40..16A6F; Mro
294 | 16A70..16ACF; Tangsa
295 | 16AD0..16AFF; Bassa Vah
296 | 16B00..16B8F; Pahawh Hmong
297 | 16E40..16E9F; Medefaidrin
298 | 16F00..16F9F; Miao
299 | 16FE0..16FFF; Ideographic Symbols and Punctuation
300 | 17000..187FF; Tangut
301 | 18800..18AFF; Tangut Components
302 | 18B00..18CFF; Khitan Small Script
303 | 18D00..18D7F; Tangut Supplement
304 | 1AFF0..1AFFF; Kana Extended-B
305 | 1B000..1B0FF; Kana Supplement
306 | 1B100..1B12F; Kana Extended-A
307 | 1B130..1B16F; Small Kana Extension
308 | 1B170..1B2FF; Nushu
309 | 1BC00..1BC9F; Duployan
310 | 1BCA0..1BCAF; Shorthand Format Controls
311 | 1CF00..1CFCF; Znamenny Musical Notation
312 | 1D000..1D0FF; Byzantine Musical Symbols
313 | 1D100..1D1FF; Musical Symbols
314 | 1D200..1D24F; Ancient Greek Musical Notation
315 | 1D2C0..1D2DF; Kaktovik Numerals
316 | 1D2E0..1D2FF; Mayan Numerals
317 | 1D300..1D35F; Tai Xuan Jing Symbols
318 | 1D360..1D37F; Counting Rod Numerals
319 | 1D400..1D7FF; Mathematical Alphanumeric Symbols
320 | 1D800..1DAAF; Sutton SignWriting
321 | 1DF00..1DFFF; Latin Extended-G
322 | 1E000..1E02F; Glagolitic Supplement
323 | 1E030..1E08F; Cyrillic Extended-D
324 | 1E100..1E14F; Nyiakeng Puachue Hmong
325 | 1E290..1E2BF; Toto
326 | 1E2C0..1E2FF; Wancho
327 | 1E4D0..1E4FF; Nag Mundari
328 | 1E7E0..1E7FF; Ethiopic Extended-B
329 | 1E800..1E8DF; Mende Kikakui
330 | 1E900..1E95F; Adlam
331 | 1EC70..1ECBF; Indic Siyaq Numbers
332 | 1ED00..1ED4F; Ottoman Siyaq Numbers
333 | 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
334 | 1F000..1F02F; Mahjong Tiles
335 | 1F030..1F09F; Domino Tiles
336 | 1F0A0..1F0FF; Playing Cards
337 | 1F100..1F1FF; Enclosed Alphanumeric Supplement
338 | 1F200..1F2FF; Enclosed Ideographic Supplement
339 | 1F300..1F5FF; Miscellaneous Symbols and Pictographs
340 | 1F600..1F64F; Emoticons
341 | 1F650..1F67F; Ornamental Dingbats
342 | 1F680..1F6FF; Transport and Map Symbols
343 | 1F700..1F77F; Alchemical Symbols
344 | 1F780..1F7FF; Geometric Shapes Extended
345 | 1F800..1F8FF; Supplemental Arrows-C
346 | 1F900..1F9FF; Supplemental Symbols and Pictographs
347 | 1FA00..1FA6F; Chess Symbols
348 | 1FA70..1FAFF; Symbols and Pictographs Extended-A
349 | 1FB00..1FBFF; Symbols for Legacy Computing
350 | 20000..2A6DF; CJK Unified Ideographs Extension B
351 | 2A700..2B73F; CJK Unified Ideographs Extension C
352 | 2B740..2B81F; CJK Unified Ideographs Extension D
353 | 2B820..2CEAF; CJK Unified Ideographs Extension E
354 | 2CEB0..2EBEF; CJK Unified Ideographs Extension F
355 | 2F800..2FA1F; CJK Compatibility Ideographs Supplement
356 | 30000..3134F; CJK Unified Ideographs Extension G
357 | 31350..323AF; CJK Unified Ideographs Extension H
358 | E0000..E007F; Tags
359 | E0100..E01EF; Variation Selectors Supplement
360 | F0000..FFFFF; Supplementary Private Use Area-A
361 | 100000..10FFFF; Supplementary Private Use Area-B
362 |
363 | # EOF
364 |
--------------------------------------------------------------------------------
/src/UnicodeData/CaseFolding.txt:
--------------------------------------------------------------------------------
1 | # CaseFolding-15.0.0.txt
2 | # Date: 2022-02-02, 23:35:35 GMT
3 | # © 2022 Unicode®, Inc.
4 | # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
5 | # For terms of use, see https://www.unicode.org/terms_of_use.html
6 | #
7 | # Unicode Character Database
8 | # For documentation, see https://www.unicode.org/reports/tr44/
9 | #
10 | # Case Folding Properties
11 | #
12 | # This file is a supplement to the UnicodeData file.
13 | # It provides a case folding mapping generated from the Unicode Character Database.
14 | # If all characters are mapped according to the full mapping below, then
15 | # case differences (according to UnicodeData.txt and SpecialCasing.txt)
16 | # are eliminated.
17 | #
18 | # The data supports both implementations that require simple case foldings
19 | # (where string lengths don't change), and implementations that allow full case folding
20 | # (where string lengths may grow). Note that where they can be supported, the
21 | # full case foldings are superior: for example, they allow "MASSE" and "Maße" to match.
22 | #
23 | # All code points not listed in this file map to themselves.
24 | #
25 | # NOTE: case folding does not preserve normalization formats!
26 | #
27 | # For information on case folding, including how to have case folding
28 | # preserve normalization formats, see Section 3.13 Default Case Algorithms in
29 | # The Unicode Standard.
30 | #
31 | # ================================================================================
32 | # Format
33 | # ================================================================================
34 | # The entries in this file are in the following machine-readable format:
35 | #
36 | # ; ; ; #
37 | #
38 | # The status field is:
39 | # C: common case folding, common mappings shared by both simple and full mappings.
40 | # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces.
41 | # S: simple case folding, mappings to single characters where different from F.
42 | # T: special case for uppercase I and dotted uppercase I
43 | # - For non-Turkic languages, this mapping is normally not used.
44 | # - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters.
45 | # Note that the Turkic mappings do not maintain canonical equivalence without additional processing.
46 | # See the discussions of case mapping in the Unicode Standard for more information.
47 | #
48 | # Usage:
49 | # A. To do a simple case folding, use the mappings with status C + S.
50 | # B. To do a full case folding, use the mappings with status C + F.
51 | #
52 | # The mappings with status T can be used or omitted depending on the desired case-folding
53 | # behavior. (The default option is to exclude them.)
54 | #
55 | # =================================================================
56 |
57 | # Property: Case_Folding
58 |
59 | # All code points not explicitly listed for Case_Folding
60 | # have the value C for the status field, and the code point itself for the mapping field.
61 |
62 | # =================================================================
63 | 0041; C; 0061; # LATIN CAPITAL LETTER A
64 | 0042; C; 0062; # LATIN CAPITAL LETTER B
65 | 0043; C; 0063; # LATIN CAPITAL LETTER C
66 | 0044; C; 0064; # LATIN CAPITAL LETTER D
67 | 0045; C; 0065; # LATIN CAPITAL LETTER E
68 | 0046; C; 0066; # LATIN CAPITAL LETTER F
69 | 0047; C; 0067; # LATIN CAPITAL LETTER G
70 | 0048; C; 0068; # LATIN CAPITAL LETTER H
71 | 0049; C; 0069; # LATIN CAPITAL LETTER I
72 | 0049; T; 0131; # LATIN CAPITAL LETTER I
73 | 004A; C; 006A; # LATIN CAPITAL LETTER J
74 | 004B; C; 006B; # LATIN CAPITAL LETTER K
75 | 004C; C; 006C; # LATIN CAPITAL LETTER L
76 | 004D; C; 006D; # LATIN CAPITAL LETTER M
77 | 004E; C; 006E; # LATIN CAPITAL LETTER N
78 | 004F; C; 006F; # LATIN CAPITAL LETTER O
79 | 0050; C; 0070; # LATIN CAPITAL LETTER P
80 | 0051; C; 0071; # LATIN CAPITAL LETTER Q
81 | 0052; C; 0072; # LATIN CAPITAL LETTER R
82 | 0053; C; 0073; # LATIN CAPITAL LETTER S
83 | 0054; C; 0074; # LATIN CAPITAL LETTER T
84 | 0055; C; 0075; # LATIN CAPITAL LETTER U
85 | 0056; C; 0076; # LATIN CAPITAL LETTER V
86 | 0057; C; 0077; # LATIN CAPITAL LETTER W
87 | 0058; C; 0078; # LATIN CAPITAL LETTER X
88 | 0059; C; 0079; # LATIN CAPITAL LETTER Y
89 | 005A; C; 007A; # LATIN CAPITAL LETTER Z
90 | 00B5; C; 03BC; # MICRO SIGN
91 | 00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
92 | 00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
93 | 00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
94 | 00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
95 | 00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
96 | 00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
97 | 00C6; C; 00E6; # LATIN CAPITAL LETTER AE
98 | 00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
99 | 00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
100 | 00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
101 | 00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
102 | 00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
103 | 00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
104 | 00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
105 | 00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
106 | 00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
107 | 00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
108 | 00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
109 | 00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
110 | 00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
111 | 00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
112 | 00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
113 | 00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
114 | 00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
115 | 00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
116 | 00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
117 | 00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
118 | 00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
119 | 00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
120 | 00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
121 | 00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
122 | 0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
123 | 0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
124 | 0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
125 | 0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
126 | 0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
127 | 010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
128 | 010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
129 | 010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
130 | 0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
131 | 0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
132 | 0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
133 | 0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
134 | 0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
135 | 011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
136 | 011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
137 | 011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
138 | 0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
139 | 0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
140 | 0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
141 | 0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
142 | 0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
143 | 012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
144 | 012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
145 | 012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
146 | 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
147 | 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
148 | 0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
149 | 0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
150 | 0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
151 | 0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
152 | 013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
153 | 013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
154 | 013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
155 | 0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
156 | 0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
157 | 0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
158 | 0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
159 | 0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
160 | 014A; C; 014B; # LATIN CAPITAL LETTER ENG
161 | 014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
162 | 014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
163 | 0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
164 | 0152; C; 0153; # LATIN CAPITAL LIGATURE OE
165 | 0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
166 | 0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
167 | 0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
168 | 015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
169 | 015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
170 | 015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
171 | 0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
172 | 0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
173 | 0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
174 | 0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
175 | 0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
176 | 016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
177 | 016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
178 | 016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
179 | 0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
180 | 0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
181 | 0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
182 | 0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
183 | 0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
184 | 0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
185 | 017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
186 | 017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
187 | 017F; C; 0073; # LATIN SMALL LETTER LONG S
188 | 0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
189 | 0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
190 | 0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
191 | 0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
192 | 0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
193 | 0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
194 | 018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
195 | 018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
196 | 018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
197 | 018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
198 | 0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
199 | 0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
200 | 0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
201 | 0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
202 | 0196; C; 0269; # LATIN CAPITAL LETTER IOTA
203 | 0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
204 | 0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
205 | 019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
206 | 019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
207 | 019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
208 | 01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
209 | 01A2; C; 01A3; # LATIN CAPITAL LETTER OI
210 | 01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
211 | 01A6; C; 0280; # LATIN LETTER YR
212 | 01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
213 | 01A9; C; 0283; # LATIN CAPITAL LETTER ESH
214 | 01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
215 | 01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
216 | 01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
217 | 01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
218 | 01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
219 | 01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
220 | 01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
221 | 01B7; C; 0292; # LATIN CAPITAL LETTER EZH
222 | 01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
223 | 01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
224 | 01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
225 | 01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
226 | 01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
227 | 01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
228 | 01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
229 | 01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
230 | 01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
231 | 01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
232 | 01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
233 | 01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
234 | 01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
235 | 01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
236 | 01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
237 | 01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
238 | 01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
239 | 01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
240 | 01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
241 | 01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
242 | 01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
243 | 01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
244 | 01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
245 | 01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
246 | 01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
247 | 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
248 | 01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
249 | 01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
250 | 01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
251 | 01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
252 | 01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
253 | 01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
254 | 01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
255 | 01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
256 | 01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
257 | 0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
258 | 0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
259 | 0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
260 | 0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
261 | 0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
262 | 020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
263 | 020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
264 | 020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
265 | 0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
266 | 0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
267 | 0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
268 | 0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
269 | 0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
270 | 021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
271 | 021C; C; 021D; # LATIN CAPITAL LETTER YOGH
272 | 021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
273 | 0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
274 | 0222; C; 0223; # LATIN CAPITAL LETTER OU
275 | 0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
276 | 0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
277 | 0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
278 | 022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
279 | 022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
280 | 022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
281 | 0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
282 | 0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
283 | 023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE
284 | 023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE
285 | 023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR
286 | 023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
287 | 0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP
288 | 0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE
289 | 0244; C; 0289; # LATIN CAPITAL LETTER U BAR
290 | 0245; C; 028C; # LATIN CAPITAL LETTER TURNED V
291 | 0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE
292 | 0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE
293 | 024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
294 | 024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE
295 | 024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE
296 | 0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
297 | 0370; C; 0371; # GREEK CAPITAL LETTER HETA
298 | 0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI
299 | 0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
300 | 037F; C; 03F3; # GREEK CAPITAL LETTER YOT
301 | 0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
302 | 0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
303 | 0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
304 | 038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
305 | 038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
306 | 038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
307 | 038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
308 | 0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
309 | 0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
310 | 0392; C; 03B2; # GREEK CAPITAL LETTER BETA
311 | 0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
312 | 0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
313 | 0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
314 | 0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
315 | 0397; C; 03B7; # GREEK CAPITAL LETTER ETA
316 | 0398; C; 03B8; # GREEK CAPITAL LETTER THETA
317 | 0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
318 | 039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
319 | 039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
320 | 039C; C; 03BC; # GREEK CAPITAL LETTER MU
321 | 039D; C; 03BD; # GREEK CAPITAL LETTER NU
322 | 039E; C; 03BE; # GREEK CAPITAL LETTER XI
323 | 039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
324 | 03A0; C; 03C0; # GREEK CAPITAL LETTER PI
325 | 03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
326 | 03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
327 | 03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
328 | 03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
329 | 03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
330 | 03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
331 | 03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
332 | 03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
333 | 03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
334 | 03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
335 | 03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
336 | 03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
337 | 03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL
338 | 03D0; C; 03B2; # GREEK BETA SYMBOL
339 | 03D1; C; 03B8; # GREEK THETA SYMBOL
340 | 03D5; C; 03C6; # GREEK PHI SYMBOL
341 | 03D6; C; 03C0; # GREEK PI SYMBOL
342 | 03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
343 | 03DA; C; 03DB; # GREEK LETTER STIGMA
344 | 03DC; C; 03DD; # GREEK LETTER DIGAMMA
345 | 03DE; C; 03DF; # GREEK LETTER KOPPA
346 | 03E0; C; 03E1; # GREEK LETTER SAMPI
347 | 03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
348 | 03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
349 | 03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
350 | 03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
351 | 03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
352 | 03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
353 | 03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
354 | 03F0; C; 03BA; # GREEK KAPPA SYMBOL
355 | 03F1; C; 03C1; # GREEK RHO SYMBOL
356 | 03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
357 | 03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
358 | 03F7; C; 03F8; # GREEK CAPITAL LETTER SHO
359 | 03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL
360 | 03FA; C; 03FB; # GREEK CAPITAL LETTER SAN
361 | 03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
362 | 03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
363 | 03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
364 | 0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
365 | 0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
366 | 0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
367 | 0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
368 | 0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
369 | 0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
370 | 0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
371 | 0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
372 | 0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
373 | 0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
374 | 040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
375 | 040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
376 | 040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
377 | 040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
378 | 040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
379 | 040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
380 | 0410; C; 0430; # CYRILLIC CAPITAL LETTER A
381 | 0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
382 | 0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
383 | 0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
384 | 0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
385 | 0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
386 | 0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
387 | 0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
388 | 0418; C; 0438; # CYRILLIC CAPITAL LETTER I
389 | 0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
390 | 041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
391 | 041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
392 | 041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
393 | 041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
394 | 041E; C; 043E; # CYRILLIC CAPITAL LETTER O
395 | 041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
396 | 0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
397 | 0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
398 | 0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
399 | 0423; C; 0443; # CYRILLIC CAPITAL LETTER U
400 | 0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
401 | 0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
402 | 0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
403 | 0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
404 | 0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
405 | 0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
406 | 042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
407 | 042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
408 | 042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
409 | 042D; C; 044D; # CYRILLIC CAPITAL LETTER E
410 | 042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
411 | 042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
412 | 0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
413 | 0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
414 | 0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
415 | 0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
416 | 0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
417 | 046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
418 | 046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
419 | 046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
420 | 0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
421 | 0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
422 | 0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
423 | 0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
424 | 0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
425 | 047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
426 | 047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
427 | 047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
428 | 0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
429 | 048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
430 | 048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
431 | 048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
432 | 0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
433 | 0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
434 | 0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
435 | 0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
436 | 0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
437 | 049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
438 | 049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
439 | 049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
440 | 04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
441 | 04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
442 | 04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
443 | 04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
444 | 04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
445 | 04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
446 | 04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
447 | 04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
448 | 04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
449 | 04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
450 | 04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
451 | 04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
452 | 04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
453 | 04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
454 | 04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
455 | 04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
456 | 04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA
457 | 04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
458 | 04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
459 | 04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
460 | 04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
461 | 04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
462 | 04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
463 | 04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
464 | 04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
465 | 04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
466 | 04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
467 | 04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
468 | 04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
469 | 04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
470 | 04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
471 | 04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
472 | 04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
473 | 04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
474 | 04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
475 | 04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
476 | 04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
477 | 04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
478 | 04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
479 | 04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
480 | 04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
481 | 04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
482 | 04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
483 | 04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
484 | 04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
485 | 04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
486 | 04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK
487 | 04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE
488 | 0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
489 | 0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
490 | 0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
491 | 0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
492 | 0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
493 | 050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
494 | 050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
495 | 050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
496 | 0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE
497 | 0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK
498 | 0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA
499 | 0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA
500 | 0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE
501 | 051A; C; 051B; # CYRILLIC CAPITAL LETTER QA
502 | 051C; C; 051D; # CYRILLIC CAPITAL LETTER WE
503 | 051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA
504 | 0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
505 | 0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
506 | 0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER
507 | 0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
508 | 0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK
509 | 052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE
510 | 052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE
511 | 052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER
512 | 0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
513 | 0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
514 | 0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
515 | 0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
516 | 0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
517 | 0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
518 | 0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
519 | 0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
520 | 0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
521 | 053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
522 | 053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
523 | 053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
524 | 053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
525 | 053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
526 | 053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
527 | 0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
528 | 0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
529 | 0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
530 | 0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
531 | 0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
532 | 0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
533 | 0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
534 | 0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
535 | 0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
536 | 0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
537 | 054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
538 | 054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
539 | 054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
540 | 054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
541 | 054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
542 | 054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
543 | 0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
544 | 0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
545 | 0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
546 | 0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
547 | 0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
548 | 0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
549 | 0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
550 | 0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
551 | 10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN
552 | 10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN
553 | 10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN
554 | 10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON
555 | 10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN
556 | 10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN
557 | 10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN
558 | 10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN
559 | 10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN
560 | 10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN
561 | 10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS
562 | 10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN
563 | 10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR
564 | 10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON
565 | 10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR
566 | 10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR
567 | 10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE
568 | 10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN
569 | 10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR
570 | 10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN
571 | 10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR
572 | 10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR
573 | 10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN
574 | 10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR
575 | 10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN
576 | 10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN
577 | 10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN
578 | 10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL
579 | 10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL
580 | 10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR
581 | 10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN
582 | 10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN
583 | 10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE
584 | 10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE
585 | 10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE
586 | 10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE
587 | 10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR
588 | 10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE
589 | 10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN
590 | 10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN
591 | 13F8; C; 13F0; # CHEROKEE SMALL LETTER YE
592 | 13F9; C; 13F1; # CHEROKEE SMALL LETTER YI
593 | 13FA; C; 13F2; # CHEROKEE SMALL LETTER YO
594 | 13FB; C; 13F3; # CHEROKEE SMALL LETTER YU
595 | 13FC; C; 13F4; # CHEROKEE SMALL LETTER YV
596 | 13FD; C; 13F5; # CHEROKEE SMALL LETTER MV
597 | 1C80; C; 0432; # CYRILLIC SMALL LETTER ROUNDED VE
598 | 1C81; C; 0434; # CYRILLIC SMALL LETTER LONG-LEGGED DE
599 | 1C82; C; 043E; # CYRILLIC SMALL LETTER NARROW O
600 | 1C83; C; 0441; # CYRILLIC SMALL LETTER WIDE ES
601 | 1C84; C; 0442; # CYRILLIC SMALL LETTER TALL TE
602 | 1C85; C; 0442; # CYRILLIC SMALL LETTER THREE-LEGGED TE
603 | 1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN
604 | 1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT
605 | 1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK
606 | 1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN
607 | 1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN
608 | 1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN
609 | 1C93; C; 10D3; # GEORGIAN MTAVRULI CAPITAL LETTER DON
610 | 1C94; C; 10D4; # GEORGIAN MTAVRULI CAPITAL LETTER EN
611 | 1C95; C; 10D5; # GEORGIAN MTAVRULI CAPITAL LETTER VIN
612 | 1C96; C; 10D6; # GEORGIAN MTAVRULI CAPITAL LETTER ZEN
613 | 1C97; C; 10D7; # GEORGIAN MTAVRULI CAPITAL LETTER TAN
614 | 1C98; C; 10D8; # GEORGIAN MTAVRULI CAPITAL LETTER IN
615 | 1C99; C; 10D9; # GEORGIAN MTAVRULI CAPITAL LETTER KAN
616 | 1C9A; C; 10DA; # GEORGIAN MTAVRULI CAPITAL LETTER LAS
617 | 1C9B; C; 10DB; # GEORGIAN MTAVRULI CAPITAL LETTER MAN
618 | 1C9C; C; 10DC; # GEORGIAN MTAVRULI CAPITAL LETTER NAR
619 | 1C9D; C; 10DD; # GEORGIAN MTAVRULI CAPITAL LETTER ON
620 | 1C9E; C; 10DE; # GEORGIAN MTAVRULI CAPITAL LETTER PAR
621 | 1C9F; C; 10DF; # GEORGIAN MTAVRULI CAPITAL LETTER ZHAR
622 | 1CA0; C; 10E0; # GEORGIAN MTAVRULI CAPITAL LETTER RAE
623 | 1CA1; C; 10E1; # GEORGIAN MTAVRULI CAPITAL LETTER SAN
624 | 1CA2; C; 10E2; # GEORGIAN MTAVRULI CAPITAL LETTER TAR
625 | 1CA3; C; 10E3; # GEORGIAN MTAVRULI CAPITAL LETTER UN
626 | 1CA4; C; 10E4; # GEORGIAN MTAVRULI CAPITAL LETTER PHAR
627 | 1CA5; C; 10E5; # GEORGIAN MTAVRULI CAPITAL LETTER KHAR
628 | 1CA6; C; 10E6; # GEORGIAN MTAVRULI CAPITAL LETTER GHAN
629 | 1CA7; C; 10E7; # GEORGIAN MTAVRULI CAPITAL LETTER QAR
630 | 1CA8; C; 10E8; # GEORGIAN MTAVRULI CAPITAL LETTER SHIN
631 | 1CA9; C; 10E9; # GEORGIAN MTAVRULI CAPITAL LETTER CHIN
632 | 1CAA; C; 10EA; # GEORGIAN MTAVRULI CAPITAL LETTER CAN
633 | 1CAB; C; 10EB; # GEORGIAN MTAVRULI CAPITAL LETTER JIL
634 | 1CAC; C; 10EC; # GEORGIAN MTAVRULI CAPITAL LETTER CIL
635 | 1CAD; C; 10ED; # GEORGIAN MTAVRULI CAPITAL LETTER CHAR
636 | 1CAE; C; 10EE; # GEORGIAN MTAVRULI CAPITAL LETTER XAN
637 | 1CAF; C; 10EF; # GEORGIAN MTAVRULI CAPITAL LETTER JHAN
638 | 1CB0; C; 10F0; # GEORGIAN MTAVRULI CAPITAL LETTER HAE
639 | 1CB1; C; 10F1; # GEORGIAN MTAVRULI CAPITAL LETTER HE
640 | 1CB2; C; 10F2; # GEORGIAN MTAVRULI CAPITAL LETTER HIE
641 | 1CB3; C; 10F3; # GEORGIAN MTAVRULI CAPITAL LETTER WE
642 | 1CB4; C; 10F4; # GEORGIAN MTAVRULI CAPITAL LETTER HAR
643 | 1CB5; C; 10F5; # GEORGIAN MTAVRULI CAPITAL LETTER HOE
644 | 1CB6; C; 10F6; # GEORGIAN MTAVRULI CAPITAL LETTER FI
645 | 1CB7; C; 10F7; # GEORGIAN MTAVRULI CAPITAL LETTER YN
646 | 1CB8; C; 10F8; # GEORGIAN MTAVRULI CAPITAL LETTER ELIFI
647 | 1CB9; C; 10F9; # GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN
648 | 1CBA; C; 10FA; # GEORGIAN MTAVRULI CAPITAL LETTER AIN
649 | 1CBD; C; 10FD; # GEORGIAN MTAVRULI CAPITAL LETTER AEN
650 | 1CBE; C; 10FE; # GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN
651 | 1CBF; C; 10FF; # GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN
652 | 1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
653 | 1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
654 | 1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
655 | 1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
656 | 1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
657 | 1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
658 | 1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
659 | 1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
660 | 1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
661 | 1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
662 | 1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
663 | 1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
664 | 1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
665 | 1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
666 | 1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
667 | 1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
668 | 1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
669 | 1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
670 | 1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
671 | 1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
672 | 1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
673 | 1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
674 | 1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
675 | 1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
676 | 1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
677 | 1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
678 | 1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
679 | 1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
680 | 1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
681 | 1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
682 | 1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
683 | 1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
684 | 1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
685 | 1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
686 | 1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
687 | 1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
688 | 1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
689 | 1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
690 | 1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
691 | 1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
692 | 1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
693 | 1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
694 | 1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
695 | 1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
696 | 1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
697 | 1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
698 | 1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
699 | 1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
700 | 1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
701 | 1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
702 | 1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
703 | 1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
704 | 1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
705 | 1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
706 | 1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
707 | 1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
708 | 1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
709 | 1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
710 | 1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
711 | 1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
712 | 1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
713 | 1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
714 | 1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
715 | 1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
716 | 1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
717 | 1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
718 | 1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
719 | 1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
720 | 1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
721 | 1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
722 | 1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
723 | 1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
724 | 1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
725 | 1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
726 | 1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
727 | 1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
728 | 1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
729 | 1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
730 | 1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
731 | 1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
732 | 1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
733 | 1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S
734 | 1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S
735 | 1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
736 | 1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
737 | 1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
738 | 1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
739 | 1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
740 | 1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
741 | 1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
742 | 1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
743 | 1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
744 | 1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
745 | 1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
746 | 1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
747 | 1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
748 | 1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
749 | 1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
750 | 1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
751 | 1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
752 | 1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
753 | 1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
754 | 1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
755 | 1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
756 | 1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
757 | 1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
758 | 1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
759 | 1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
760 | 1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
761 | 1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
762 | 1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
763 | 1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
764 | 1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
765 | 1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
766 | 1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
767 | 1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
768 | 1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
769 | 1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
770 | 1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
771 | 1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
772 | 1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
773 | 1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
774 | 1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
775 | 1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
776 | 1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
777 | 1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
778 | 1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
779 | 1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
780 | 1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL
781 | 1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V
782 | 1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP
783 | 1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
784 | 1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
785 | 1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
786 | 1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
787 | 1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
788 | 1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
789 | 1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
790 | 1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
791 | 1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
792 | 1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
793 | 1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
794 | 1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
795 | 1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
796 | 1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
797 | 1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
798 | 1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
799 | 1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
800 | 1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
801 | 1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
802 | 1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
803 | 1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
804 | 1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
805 | 1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
806 | 1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
807 | 1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
808 | 1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
809 | 1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
810 | 1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
811 | 1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
812 | 1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
813 | 1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
814 | 1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
815 | 1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
816 | 1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
817 | 1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
818 | 1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
819 | 1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
820 | 1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
821 | 1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
822 | 1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
823 | 1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
824 | 1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
825 | 1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
826 | 1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
827 | 1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
828 | 1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
829 | 1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
830 | 1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
831 | 1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
832 | 1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
833 | 1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
834 | 1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
835 | 1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
836 | 1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
837 | 1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
838 | 1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
839 | 1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
840 | 1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
841 | 1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
842 | 1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
843 | 1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
844 | 1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
845 | 1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
846 | 1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
847 | 1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
848 | 1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
849 | 1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
850 | 1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
851 | 1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
852 | 1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
853 | 1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
854 | 1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
855 | 1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
856 | 1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
857 | 1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
858 | 1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
859 | 1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
860 | 1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
861 | 1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
862 | 1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
863 | 1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
864 | 1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
865 | 1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
866 | 1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
867 | 1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
868 | 1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
869 | 1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
870 | 1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
871 | 1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
872 | 1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
873 | 1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
874 | 1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
875 | 1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
876 | 1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
877 | 1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
878 | 1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
879 | 1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
880 | 1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
881 | 1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
882 | 1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
883 | 1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
884 | 1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
885 | 1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
886 | 1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
887 | 1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
888 | 1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
889 | 1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
890 | 1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
891 | 1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
892 | 1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
893 | 1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
894 | 1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
895 | 1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
896 | 1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
897 | 1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
898 | 1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
899 | 1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
900 | 1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
901 | 1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
902 | 1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
903 | 1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
904 | 1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
905 | 1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
906 | 1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
907 | 1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
908 | 1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
909 | 1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
910 | 1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
911 | 1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
912 | 1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
913 | 1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
914 | 1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
915 | 1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
916 | 1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
917 | 1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
918 | 1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
919 | 1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
920 | 1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
921 | 1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
922 | 1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
923 | 1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
924 | 1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
925 | 1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
926 | 1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
927 | 1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
928 | 1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
929 | 1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
930 | 1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
931 | 1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
932 | 1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
933 | 1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
934 | 1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
935 | 1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
936 | 1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
937 | 1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
938 | 1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
939 | 1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
940 | 1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
941 | 1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
942 | 1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
943 | 1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
944 | 1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
945 | 1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
946 | 1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
947 | 1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
948 | 1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
949 | 1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
950 | 1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
951 | 1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
952 | 1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
953 | 1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
954 | 1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
955 | 1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
956 | 1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
957 | 1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
958 | 1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
959 | 2126; C; 03C9; # OHM SIGN
960 | 212A; C; 006B; # KELVIN SIGN
961 | 212B; C; 00E5; # ANGSTROM SIGN
962 | 2132; C; 214E; # TURNED CAPITAL F
963 | 2160; C; 2170; # ROMAN NUMERAL ONE
964 | 2161; C; 2171; # ROMAN NUMERAL TWO
965 | 2162; C; 2172; # ROMAN NUMERAL THREE
966 | 2163; C; 2173; # ROMAN NUMERAL FOUR
967 | 2164; C; 2174; # ROMAN NUMERAL FIVE
968 | 2165; C; 2175; # ROMAN NUMERAL SIX
969 | 2166; C; 2176; # ROMAN NUMERAL SEVEN
970 | 2167; C; 2177; # ROMAN NUMERAL EIGHT
971 | 2168; C; 2178; # ROMAN NUMERAL NINE
972 | 2169; C; 2179; # ROMAN NUMERAL TEN
973 | 216A; C; 217A; # ROMAN NUMERAL ELEVEN
974 | 216B; C; 217B; # ROMAN NUMERAL TWELVE
975 | 216C; C; 217C; # ROMAN NUMERAL FIFTY
976 | 216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
977 | 216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
978 | 216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
979 | 2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED
980 | 24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
981 | 24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
982 | 24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
983 | 24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
984 | 24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
985 | 24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
986 | 24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
987 | 24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
988 | 24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
989 | 24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
990 | 24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
991 | 24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
992 | 24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
993 | 24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
994 | 24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
995 | 24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
996 | 24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
997 | 24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
998 | 24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
999 | 24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
1000 | 24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
1001 | 24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
1002 | 24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
1003 | 24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
1004 | 24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
1005 | 24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
1006 | 2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU
1007 | 2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY
1008 | 2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE
1009 | 2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI
1010 | 2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO
1011 | 2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU
1012 | 2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE
1013 | 2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO
1014 | 2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA
1015 | 2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE
1016 | 2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE
1017 | 2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I
1018 | 2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI
1019 | 2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO
1020 | 2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE
1021 | 2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE
1022 | 2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI
1023 | 2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU
1024 | 2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI
1025 | 2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI
1026 | 2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO
1027 | 2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO
1028 | 2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU
1029 | 2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU
1030 | 2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU
1031 | 2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU
1032 | 2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE
1033 | 2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA
1034 | 2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI
1035 | 2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI
1036 | 2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA
1037 | 2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU
1038 | 2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI
1039 | 2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI
1040 | 2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA
1041 | 2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU
1042 | 2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS
1043 | 2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
1044 | 2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO
1045 | 2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
1046 | 2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS
1047 | 2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
1048 | 2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA
1049 | 2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA
1050 | 2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC
1051 | 2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
1052 | 2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
1053 | 2C2F; C; 2C5F; # GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI
1054 | 2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR
1055 | 2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
1056 | 2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE
1057 | 2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL
1058 | 2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER
1059 | 2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER
1060 | 2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER
1061 | 2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA
1062 | 2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK
1063 | 2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A
1064 | 2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA
1065 | 2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK
1066 | 2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H
1067 | 2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL
1068 | 2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL
1069 | 2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA
1070 | 2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA
1071 | 2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA
1072 | 2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA
1073 | 2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE
1074 | 2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU
1075 | 2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA
1076 | 2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE
1077 | 2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE
1078 | 2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA
1079 | 2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA
1080 | 2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA
1081 | 2C98; C; 2C99; # COPTIC CAPITAL LETTER MI
1082 | 2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI
1083 | 2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI
1084 | 2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O
1085 | 2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI
1086 | 2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO
1087 | 2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA
1088 | 2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU
1089 | 2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA
1090 | 2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI
1091 | 2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI
1092 | 2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI
1093 | 2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU
1094 | 2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF
1095 | 2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN
1096 | 2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
1097 | 2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA
1098 | 2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI
1099 | 2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
1100 | 2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU
1101 | 2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI
1102 | 2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI
1103 | 2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI
1104 | 2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH
1105 | 2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI
1106 | 2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI
1107 | 2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI
1108 | 2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA
1109 | 2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA
1110 | 2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI
1111 | 2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT
1112 | 2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA
1113 | 2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA
1114 | 2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA
1115 | 2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
1116 | 2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI
1117 | 2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI
1118 | 2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU
1119 | 2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
1120 | 2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
1121 | 2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI
1122 | A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA
1123 | A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO
1124 | A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE
1125 | A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA
1126 | A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV
1127 | A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK
1128 | A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA
1129 | A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER
1130 | A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER
1131 | A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT
1132 | A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU
1133 | A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A
1134 | A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
1135 | A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS
1136 | A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
1137 | A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN
1138 | A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE
1139 | A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE
1140 | A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL
1141 | A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM
1142 | A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O
1143 | A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O
1144 | A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
1145 | A680; C; A681; # CYRILLIC CAPITAL LETTER DWE
1146 | A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE
1147 | A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE
1148 | A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE
1149 | A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE
1150 | A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
1151 | A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE
1152 | A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE
1153 | A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE
1154 | A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE
1155 | A694; C; A695; # CYRILLIC CAPITAL LETTER HWE
1156 | A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE
1157 | A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O
1158 | A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O
1159 | A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
1160 | A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
1161 | A726; C; A727; # LATIN CAPITAL LETTER HENG
1162 | A728; C; A729; # LATIN CAPITAL LETTER TZ
1163 | A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO
1164 | A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO
1165 | A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA
1166 | A732; C; A733; # LATIN CAPITAL LETTER AA
1167 | A734; C; A735; # LATIN CAPITAL LETTER AO
1168 | A736; C; A737; # LATIN CAPITAL LETTER AU
1169 | A738; C; A739; # LATIN CAPITAL LETTER AV
1170 | A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
1171 | A73C; C; A73D; # LATIN CAPITAL LETTER AY
1172 | A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT
1173 | A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE
1174 | A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
1175 | A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
1176 | A746; C; A747; # LATIN CAPITAL LETTER BROKEN L
1177 | A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE
1178 | A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
1179 | A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP
1180 | A74E; C; A74F; # LATIN CAPITAL LETTER OO
1181 | A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
1182 | A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH
1183 | A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
1184 | A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
1185 | A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
1186 | A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA
1187 | A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA
1188 | A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
1189 | A760; C; A761; # LATIN CAPITAL LETTER VY
1190 | A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z
1191 | A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE
1192 | A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
1193 | A768; C; A769; # LATIN CAPITAL LETTER VEND
1194 | A76A; C; A76B; # LATIN CAPITAL LETTER ET
1195 | A76C; C; A76D; # LATIN CAPITAL LETTER IS
1196 | A76E; C; A76F; # LATIN CAPITAL LETTER CON
1197 | A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D
1198 | A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F
1199 | A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G
1200 | A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G
1201 | A780; C; A781; # LATIN CAPITAL LETTER TURNED L
1202 | A782; C; A783; # LATIN CAPITAL LETTER INSULAR R
1203 | A784; C; A785; # LATIN CAPITAL LETTER INSULAR S
1204 | A786; C; A787; # LATIN CAPITAL LETTER INSULAR T
1205 | A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO
1206 | A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H
1207 | A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER
1208 | A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR
1209 | A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH
1210 | A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE
1211 | A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE
1212 | A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE
1213 | A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE
1214 | A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
1215 | A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
1216 | A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
1217 | A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
1218 | A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
1219 | A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK
1220 | A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E
1221 | A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G
1222 | A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT
1223 | A7AE; C; 026A; # LATIN CAPITAL LETTER SMALL CAPITAL I
1224 | A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K
1225 | A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T
1226 | A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL
1227 | A7B3; C; AB53; # LATIN CAPITAL LETTER CHI
1228 | A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA
1229 | A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA
1230 | A7B8; C; A7B9; # LATIN CAPITAL LETTER U WITH STROKE
1231 | A7BA; C; A7BB; # LATIN CAPITAL LETTER GLOTTAL A
1232 | A7BC; C; A7BD; # LATIN CAPITAL LETTER GLOTTAL I
1233 | A7BE; C; A7BF; # LATIN CAPITAL LETTER GLOTTAL U
1234 | A7C0; C; A7C1; # LATIN CAPITAL LETTER OLD POLISH O
1235 | A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W
1236 | A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK
1237 | A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK
1238 | A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK
1239 | A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY
1240 | A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY
1241 | A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G
1242 | A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S
1243 | A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S
1244 | A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H
1245 | AB70; C; 13A0; # CHEROKEE SMALL LETTER A
1246 | AB71; C; 13A1; # CHEROKEE SMALL LETTER E
1247 | AB72; C; 13A2; # CHEROKEE SMALL LETTER I
1248 | AB73; C; 13A3; # CHEROKEE SMALL LETTER O
1249 | AB74; C; 13A4; # CHEROKEE SMALL LETTER U
1250 | AB75; C; 13A5; # CHEROKEE SMALL LETTER V
1251 | AB76; C; 13A6; # CHEROKEE SMALL LETTER GA
1252 | AB77; C; 13A7; # CHEROKEE SMALL LETTER KA
1253 | AB78; C; 13A8; # CHEROKEE SMALL LETTER GE
1254 | AB79; C; 13A9; # CHEROKEE SMALL LETTER GI
1255 | AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO
1256 | AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU
1257 | AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV
1258 | AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA
1259 | AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE
1260 | AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI
1261 | AB80; C; 13B0; # CHEROKEE SMALL LETTER HO
1262 | AB81; C; 13B1; # CHEROKEE SMALL LETTER HU
1263 | AB82; C; 13B2; # CHEROKEE SMALL LETTER HV
1264 | AB83; C; 13B3; # CHEROKEE SMALL LETTER LA
1265 | AB84; C; 13B4; # CHEROKEE SMALL LETTER LE
1266 | AB85; C; 13B5; # CHEROKEE SMALL LETTER LI
1267 | AB86; C; 13B6; # CHEROKEE SMALL LETTER LO
1268 | AB87; C; 13B7; # CHEROKEE SMALL LETTER LU
1269 | AB88; C; 13B8; # CHEROKEE SMALL LETTER LV
1270 | AB89; C; 13B9; # CHEROKEE SMALL LETTER MA
1271 | AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME
1272 | AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI
1273 | AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO
1274 | AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU
1275 | AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA
1276 | AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA
1277 | AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH
1278 | AB91; C; 13C1; # CHEROKEE SMALL LETTER NE
1279 | AB92; C; 13C2; # CHEROKEE SMALL LETTER NI
1280 | AB93; C; 13C3; # CHEROKEE SMALL LETTER NO
1281 | AB94; C; 13C4; # CHEROKEE SMALL LETTER NU
1282 | AB95; C; 13C5; # CHEROKEE SMALL LETTER NV
1283 | AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA
1284 | AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE
1285 | AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI
1286 | AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO
1287 | AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU
1288 | AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV
1289 | AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA
1290 | AB9D; C; 13CD; # CHEROKEE SMALL LETTER S
1291 | AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE
1292 | AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI
1293 | ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO
1294 | ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU
1295 | ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV
1296 | ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA
1297 | ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA
1298 | ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE
1299 | ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE
1300 | ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI
1301 | ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI
1302 | ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO
1303 | ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU
1304 | ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV
1305 | ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA
1306 | ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA
1307 | ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE
1308 | ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI
1309 | ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO
1310 | ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU
1311 | ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV
1312 | ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA
1313 | ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE
1314 | ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI
1315 | ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO
1316 | ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU
1317 | ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV
1318 | ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA
1319 | ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE
1320 | ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI
1321 | ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO
1322 | ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU
1323 | ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV
1324 | ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA
1325 | FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
1326 | FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
1327 | FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
1328 | FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
1329 | FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
1330 | FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
1331 | FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
1332 | FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
1333 | FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
1334 | FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
1335 | FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
1336 | FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
1337 | FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
1338 | FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
1339 | FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
1340 | FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
1341 | FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
1342 | FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
1343 | FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
1344 | FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
1345 | FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
1346 | FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
1347 | FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
1348 | FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
1349 | FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
1350 | FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
1351 | FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
1352 | FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
1353 | FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
1354 | FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
1355 | FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
1356 | FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
1357 | FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
1358 | FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
1359 | FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
1360 | FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
1361 | FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
1362 | FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
1363 | 10400; C; 10428; # DESERET CAPITAL LETTER LONG I
1364 | 10401; C; 10429; # DESERET CAPITAL LETTER LONG E
1365 | 10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
1366 | 10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
1367 | 10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
1368 | 10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
1369 | 10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
1370 | 10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
1371 | 10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
1372 | 10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
1373 | 1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
1374 | 1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
1375 | 1040C; C; 10434; # DESERET CAPITAL LETTER AY
1376 | 1040D; C; 10435; # DESERET CAPITAL LETTER OW
1377 | 1040E; C; 10436; # DESERET CAPITAL LETTER WU
1378 | 1040F; C; 10437; # DESERET CAPITAL LETTER YEE
1379 | 10410; C; 10438; # DESERET CAPITAL LETTER H
1380 | 10411; C; 10439; # DESERET CAPITAL LETTER PEE
1381 | 10412; C; 1043A; # DESERET CAPITAL LETTER BEE
1382 | 10413; C; 1043B; # DESERET CAPITAL LETTER TEE
1383 | 10414; C; 1043C; # DESERET CAPITAL LETTER DEE
1384 | 10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
1385 | 10416; C; 1043E; # DESERET CAPITAL LETTER JEE
1386 | 10417; C; 1043F; # DESERET CAPITAL LETTER KAY
1387 | 10418; C; 10440; # DESERET CAPITAL LETTER GAY
1388 | 10419; C; 10441; # DESERET CAPITAL LETTER EF
1389 | 1041A; C; 10442; # DESERET CAPITAL LETTER VEE
1390 | 1041B; C; 10443; # DESERET CAPITAL LETTER ETH
1391 | 1041C; C; 10444; # DESERET CAPITAL LETTER THEE
1392 | 1041D; C; 10445; # DESERET CAPITAL LETTER ES
1393 | 1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
1394 | 1041F; C; 10447; # DESERET CAPITAL LETTER ESH
1395 | 10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
1396 | 10421; C; 10449; # DESERET CAPITAL LETTER ER
1397 | 10422; C; 1044A; # DESERET CAPITAL LETTER EL
1398 | 10423; C; 1044B; # DESERET CAPITAL LETTER EM
1399 | 10424; C; 1044C; # DESERET CAPITAL LETTER EN
1400 | 10425; C; 1044D; # DESERET CAPITAL LETTER ENG
1401 | 10426; C; 1044E; # DESERET CAPITAL LETTER OI
1402 | 10427; C; 1044F; # DESERET CAPITAL LETTER EW
1403 | 104B0; C; 104D8; # OSAGE CAPITAL LETTER A
1404 | 104B1; C; 104D9; # OSAGE CAPITAL LETTER AI
1405 | 104B2; C; 104DA; # OSAGE CAPITAL LETTER AIN
1406 | 104B3; C; 104DB; # OSAGE CAPITAL LETTER AH
1407 | 104B4; C; 104DC; # OSAGE CAPITAL LETTER BRA
1408 | 104B5; C; 104DD; # OSAGE CAPITAL LETTER CHA
1409 | 104B6; C; 104DE; # OSAGE CAPITAL LETTER EHCHA
1410 | 104B7; C; 104DF; # OSAGE CAPITAL LETTER E
1411 | 104B8; C; 104E0; # OSAGE CAPITAL LETTER EIN
1412 | 104B9; C; 104E1; # OSAGE CAPITAL LETTER HA
1413 | 104BA; C; 104E2; # OSAGE CAPITAL LETTER HYA
1414 | 104BB; C; 104E3; # OSAGE CAPITAL LETTER I
1415 | 104BC; C; 104E4; # OSAGE CAPITAL LETTER KA
1416 | 104BD; C; 104E5; # OSAGE CAPITAL LETTER EHKA
1417 | 104BE; C; 104E6; # OSAGE CAPITAL LETTER KYA
1418 | 104BF; C; 104E7; # OSAGE CAPITAL LETTER LA
1419 | 104C0; C; 104E8; # OSAGE CAPITAL LETTER MA
1420 | 104C1; C; 104E9; # OSAGE CAPITAL LETTER NA
1421 | 104C2; C; 104EA; # OSAGE CAPITAL LETTER O
1422 | 104C3; C; 104EB; # OSAGE CAPITAL LETTER OIN
1423 | 104C4; C; 104EC; # OSAGE CAPITAL LETTER PA
1424 | 104C5; C; 104ED; # OSAGE CAPITAL LETTER EHPA
1425 | 104C6; C; 104EE; # OSAGE CAPITAL LETTER SA
1426 | 104C7; C; 104EF; # OSAGE CAPITAL LETTER SHA
1427 | 104C8; C; 104F0; # OSAGE CAPITAL LETTER TA
1428 | 104C9; C; 104F1; # OSAGE CAPITAL LETTER EHTA
1429 | 104CA; C; 104F2; # OSAGE CAPITAL LETTER TSA
1430 | 104CB; C; 104F3; # OSAGE CAPITAL LETTER EHTSA
1431 | 104CC; C; 104F4; # OSAGE CAPITAL LETTER TSHA
1432 | 104CD; C; 104F5; # OSAGE CAPITAL LETTER DHA
1433 | 104CE; C; 104F6; # OSAGE CAPITAL LETTER U
1434 | 104CF; C; 104F7; # OSAGE CAPITAL LETTER WA
1435 | 104D0; C; 104F8; # OSAGE CAPITAL LETTER KHA
1436 | 104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA
1437 | 104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA
1438 | 104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA
1439 | 10570; C; 10597; # VITHKUQI CAPITAL LETTER A
1440 | 10571; C; 10598; # VITHKUQI CAPITAL LETTER BBE
1441 | 10572; C; 10599; # VITHKUQI CAPITAL LETTER BE
1442 | 10573; C; 1059A; # VITHKUQI CAPITAL LETTER CE
1443 | 10574; C; 1059B; # VITHKUQI CAPITAL LETTER CHE
1444 | 10575; C; 1059C; # VITHKUQI CAPITAL LETTER DE
1445 | 10576; C; 1059D; # VITHKUQI CAPITAL LETTER DHE
1446 | 10577; C; 1059E; # VITHKUQI CAPITAL LETTER EI
1447 | 10578; C; 1059F; # VITHKUQI CAPITAL LETTER E
1448 | 10579; C; 105A0; # VITHKUQI CAPITAL LETTER FE
1449 | 1057A; C; 105A1; # VITHKUQI CAPITAL LETTER GA
1450 | 1057C; C; 105A3; # VITHKUQI CAPITAL LETTER HA
1451 | 1057D; C; 105A4; # VITHKUQI CAPITAL LETTER HHA
1452 | 1057E; C; 105A5; # VITHKUQI CAPITAL LETTER I
1453 | 1057F; C; 105A6; # VITHKUQI CAPITAL LETTER IJE
1454 | 10580; C; 105A7; # VITHKUQI CAPITAL LETTER JE
1455 | 10581; C; 105A8; # VITHKUQI CAPITAL LETTER KA
1456 | 10582; C; 105A9; # VITHKUQI CAPITAL LETTER LA
1457 | 10583; C; 105AA; # VITHKUQI CAPITAL LETTER LLA
1458 | 10584; C; 105AB; # VITHKUQI CAPITAL LETTER ME
1459 | 10585; C; 105AC; # VITHKUQI CAPITAL LETTER NE
1460 | 10586; C; 105AD; # VITHKUQI CAPITAL LETTER NJE
1461 | 10587; C; 105AE; # VITHKUQI CAPITAL LETTER O
1462 | 10588; C; 105AF; # VITHKUQI CAPITAL LETTER PE
1463 | 10589; C; 105B0; # VITHKUQI CAPITAL LETTER QA
1464 | 1058A; C; 105B1; # VITHKUQI CAPITAL LETTER RE
1465 | 1058C; C; 105B3; # VITHKUQI CAPITAL LETTER SE
1466 | 1058D; C; 105B4; # VITHKUQI CAPITAL LETTER SHE
1467 | 1058E; C; 105B5; # VITHKUQI CAPITAL LETTER TE
1468 | 1058F; C; 105B6; # VITHKUQI CAPITAL LETTER THE
1469 | 10590; C; 105B7; # VITHKUQI CAPITAL LETTER U
1470 | 10591; C; 105B8; # VITHKUQI CAPITAL LETTER VE
1471 | 10592; C; 105B9; # VITHKUQI CAPITAL LETTER XE
1472 | 10594; C; 105BB; # VITHKUQI CAPITAL LETTER Y
1473 | 10595; C; 105BC; # VITHKUQI CAPITAL LETTER ZE
1474 | 10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A
1475 | 10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA
1476 | 10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB
1477 | 10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB
1478 | 10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC
1479 | 10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC
1480 | 10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS
1481 | 10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED
1482 | 10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND
1483 | 10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E
1484 | 10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E
1485 | 10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE
1486 | 10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF
1487 | 10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG
1488 | 10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY
1489 | 10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH
1490 | 10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I
1491 | 10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II
1492 | 10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ
1493 | 10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK
1494 | 10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK
1495 | 10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK
1496 | 10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL
1497 | 10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY
1498 | 10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM
1499 | 10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN
1500 | 10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY
1501 | 10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O
1502 | 10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO
1503 | 10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE
1504 | 10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE
1505 | 10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE
1506 | 10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP
1507 | 10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP
1508 | 10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER
1509 | 10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER
1510 | 10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES
1511 | 10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ
1512 | 10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET
1513 | 10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT
1514 | 10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY
1515 | 10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH
1516 | 10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U
1517 | 10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU
1518 | 10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE
1519 | 10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE
1520 | 10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV
1521 | 10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ
1522 | 10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS
1523 | 10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN
1524 | 10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US
1525 | 118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA
1526 | 118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A
1527 | 118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI
1528 | 118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU
1529 | 118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA
1530 | 118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO
1531 | 118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II
1532 | 118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU
1533 | 118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E
1534 | 118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O
1535 | 118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG
1536 | 118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA
1537 | 118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO
1538 | 118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY
1539 | 118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ
1540 | 118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC
1541 | 118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN
1542 | 118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD
1543 | 118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE
1544 | 118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG
1545 | 118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA
1546 | 118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT
1547 | 118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM
1548 | 118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU
1549 | 118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU
1550 | 118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO
1551 | 118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO
1552 | 118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR
1553 | 118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR
1554 | 118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU
1555 | 118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII
1556 | 118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO
1557 | 16E40; C; 16E60; # MEDEFAIDRIN CAPITAL LETTER M
1558 | 16E41; C; 16E61; # MEDEFAIDRIN CAPITAL LETTER S
1559 | 16E42; C; 16E62; # MEDEFAIDRIN CAPITAL LETTER V
1560 | 16E43; C; 16E63; # MEDEFAIDRIN CAPITAL LETTER W
1561 | 16E44; C; 16E64; # MEDEFAIDRIN CAPITAL LETTER ATIU
1562 | 16E45; C; 16E65; # MEDEFAIDRIN CAPITAL LETTER Z
1563 | 16E46; C; 16E66; # MEDEFAIDRIN CAPITAL LETTER KP
1564 | 16E47; C; 16E67; # MEDEFAIDRIN CAPITAL LETTER P
1565 | 16E48; C; 16E68; # MEDEFAIDRIN CAPITAL LETTER T
1566 | 16E49; C; 16E69; # MEDEFAIDRIN CAPITAL LETTER G
1567 | 16E4A; C; 16E6A; # MEDEFAIDRIN CAPITAL LETTER F
1568 | 16E4B; C; 16E6B; # MEDEFAIDRIN CAPITAL LETTER I
1569 | 16E4C; C; 16E6C; # MEDEFAIDRIN CAPITAL LETTER K
1570 | 16E4D; C; 16E6D; # MEDEFAIDRIN CAPITAL LETTER A
1571 | 16E4E; C; 16E6E; # MEDEFAIDRIN CAPITAL LETTER J
1572 | 16E4F; C; 16E6F; # MEDEFAIDRIN CAPITAL LETTER E
1573 | 16E50; C; 16E70; # MEDEFAIDRIN CAPITAL LETTER B
1574 | 16E51; C; 16E71; # MEDEFAIDRIN CAPITAL LETTER C
1575 | 16E52; C; 16E72; # MEDEFAIDRIN CAPITAL LETTER U
1576 | 16E53; C; 16E73; # MEDEFAIDRIN CAPITAL LETTER YU
1577 | 16E54; C; 16E74; # MEDEFAIDRIN CAPITAL LETTER L
1578 | 16E55; C; 16E75; # MEDEFAIDRIN CAPITAL LETTER Q
1579 | 16E56; C; 16E76; # MEDEFAIDRIN CAPITAL LETTER HP
1580 | 16E57; C; 16E77; # MEDEFAIDRIN CAPITAL LETTER NY
1581 | 16E58; C; 16E78; # MEDEFAIDRIN CAPITAL LETTER X
1582 | 16E59; C; 16E79; # MEDEFAIDRIN CAPITAL LETTER D
1583 | 16E5A; C; 16E7A; # MEDEFAIDRIN CAPITAL LETTER OE
1584 | 16E5B; C; 16E7B; # MEDEFAIDRIN CAPITAL LETTER N
1585 | 16E5C; C; 16E7C; # MEDEFAIDRIN CAPITAL LETTER R
1586 | 16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O
1587 | 16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI
1588 | 16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y
1589 | 1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF
1590 | 1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI
1591 | 1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM
1592 | 1E903; C; 1E925; # ADLAM CAPITAL LETTER MIIM
1593 | 1E904; C; 1E926; # ADLAM CAPITAL LETTER BA
1594 | 1E905; C; 1E927; # ADLAM CAPITAL LETTER SINNYIIYHE
1595 | 1E906; C; 1E928; # ADLAM CAPITAL LETTER PE
1596 | 1E907; C; 1E929; # ADLAM CAPITAL LETTER BHE
1597 | 1E908; C; 1E92A; # ADLAM CAPITAL LETTER RA
1598 | 1E909; C; 1E92B; # ADLAM CAPITAL LETTER E
1599 | 1E90A; C; 1E92C; # ADLAM CAPITAL LETTER FA
1600 | 1E90B; C; 1E92D; # ADLAM CAPITAL LETTER I
1601 | 1E90C; C; 1E92E; # ADLAM CAPITAL LETTER O
1602 | 1E90D; C; 1E92F; # ADLAM CAPITAL LETTER DHA
1603 | 1E90E; C; 1E930; # ADLAM CAPITAL LETTER YHE
1604 | 1E90F; C; 1E931; # ADLAM CAPITAL LETTER WAW
1605 | 1E910; C; 1E932; # ADLAM CAPITAL LETTER NUN
1606 | 1E911; C; 1E933; # ADLAM CAPITAL LETTER KAF
1607 | 1E912; C; 1E934; # ADLAM CAPITAL LETTER YA
1608 | 1E913; C; 1E935; # ADLAM CAPITAL LETTER U
1609 | 1E914; C; 1E936; # ADLAM CAPITAL LETTER JIIM
1610 | 1E915; C; 1E937; # ADLAM CAPITAL LETTER CHI
1611 | 1E916; C; 1E938; # ADLAM CAPITAL LETTER HA
1612 | 1E917; C; 1E939; # ADLAM CAPITAL LETTER QAAF
1613 | 1E918; C; 1E93A; # ADLAM CAPITAL LETTER GA
1614 | 1E919; C; 1E93B; # ADLAM CAPITAL LETTER NYA
1615 | 1E91A; C; 1E93C; # ADLAM CAPITAL LETTER TU
1616 | 1E91B; C; 1E93D; # ADLAM CAPITAL LETTER NHA
1617 | 1E91C; C; 1E93E; # ADLAM CAPITAL LETTER VA
1618 | 1E91D; C; 1E93F; # ADLAM CAPITAL LETTER KHA
1619 | 1E91E; C; 1E940; # ADLAM CAPITAL LETTER GBE
1620 | 1E91F; C; 1E941; # ADLAM CAPITAL LETTER ZAL
1621 | 1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO
1622 | 1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA
1623 | #
1624 | # EOF
1625 |
--------------------------------------------------------------------------------
/src/UnicodeData/CompositionExclusions.txt:
--------------------------------------------------------------------------------
1 | # CompositionExclusions-15.0.0.txt
2 | # Date: 2022-05-03, 18:50:00 GMT [KW, LI]
3 | # © 2022 Unicode®, Inc.
4 | # For terms of use, see https://www.unicode.org/terms_of_use.html
5 | #
6 | # Unicode Character Database
7 | # For documentation, see https://www.unicode.org/reports/tr44/
8 | #
9 | # This file lists the characters for the Composition Exclusion Table
10 | # defined in UAX #15, Unicode Normalization Forms.
11 | #
12 | # This file is a normative contributory data file in the
13 | # Unicode Character Database.
14 | #
15 | # For more information, see
16 | # https://www.unicode.org/reports/tr15/#Primary_Exclusion_List_Table
17 | #
18 | # For a full derivation of composition exclusions, see the derived property
19 | # Full_Composition_Exclusion in DerivedNormalizationProps.txt
20 | #
21 |
22 | # ================================================
23 | # (1) Script Specifics
24 | #
25 | # This list of characters cannot be derived from the UnicodeData.txt file.
26 | #
27 | # Included are the following subcategories:
28 | #
29 | # - Many precomposed characters using a nukta diacritic in the Devanagari,
30 | # Bangla/Bengali, Gurmukhi, or Odia/Oriya scripts.
31 | # - Tibetan letters and subjoined letters with decompositions including
32 | # U+0FB7 TIBETAN SUBJOINED LETTER HA or U+0FB5 TIBETAN SUBJOINED LETTER SSA.
33 | # - Two two-part Tibetan vowel signs involving top and bottom pieces.
34 | # - A large collection of compatibility precomposed characters for Hebrew
35 | # involving dagesh and/or other combining marks.
36 | #
37 | # This list is unlikely to grow.
38 | #
39 | # ================================================
40 |
41 | 0958 # DEVANAGARI LETTER QA
42 | 0959 # DEVANAGARI LETTER KHHA
43 | 095A # DEVANAGARI LETTER GHHA
44 | 095B # DEVANAGARI LETTER ZA
45 | 095C # DEVANAGARI LETTER DDDHA
46 | 095D # DEVANAGARI LETTER RHA
47 | 095E # DEVANAGARI LETTER FA
48 | 095F # DEVANAGARI LETTER YYA
49 | 09DC # BENGALI LETTER RRA
50 | 09DD # BENGALI LETTER RHA
51 | 09DF # BENGALI LETTER YYA
52 | 0A33 # GURMUKHI LETTER LLA
53 | 0A36 # GURMUKHI LETTER SHA
54 | 0A59 # GURMUKHI LETTER KHHA
55 | 0A5A # GURMUKHI LETTER GHHA
56 | 0A5B # GURMUKHI LETTER ZA
57 | 0A5E # GURMUKHI LETTER FA
58 | 0B5C # ORIYA LETTER RRA
59 | 0B5D # ORIYA LETTER RHA
60 | 0F43 # TIBETAN LETTER GHA
61 | 0F4D # TIBETAN LETTER DDHA
62 | 0F52 # TIBETAN LETTER DHA
63 | 0F57 # TIBETAN LETTER BHA
64 | 0F5C # TIBETAN LETTER DZHA
65 | 0F69 # TIBETAN LETTER KSSA
66 | 0F76 # TIBETAN VOWEL SIGN VOCALIC R
67 | 0F78 # TIBETAN VOWEL SIGN VOCALIC L
68 | 0F93 # TIBETAN SUBJOINED LETTER GHA
69 | 0F9D # TIBETAN SUBJOINED LETTER DDHA
70 | 0FA2 # TIBETAN SUBJOINED LETTER DHA
71 | 0FA7 # TIBETAN SUBJOINED LETTER BHA
72 | 0FAC # TIBETAN SUBJOINED LETTER DZHA
73 | 0FB9 # TIBETAN SUBJOINED LETTER KSSA
74 | FB1D # HEBREW LETTER YOD WITH HIRIQ
75 | FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH
76 | FB2A # HEBREW LETTER SHIN WITH SHIN DOT
77 | FB2B # HEBREW LETTER SHIN WITH SIN DOT
78 | FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
79 | FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
80 | FB2E # HEBREW LETTER ALEF WITH PATAH
81 | FB2F # HEBREW LETTER ALEF WITH QAMATS
82 | FB30 # HEBREW LETTER ALEF WITH MAPIQ
83 | FB31 # HEBREW LETTER BET WITH DAGESH
84 | FB32 # HEBREW LETTER GIMEL WITH DAGESH
85 | FB33 # HEBREW LETTER DALET WITH DAGESH
86 | FB34 # HEBREW LETTER HE WITH MAPIQ
87 | FB35 # HEBREW LETTER VAV WITH DAGESH
88 | FB36 # HEBREW LETTER ZAYIN WITH DAGESH
89 | FB38 # HEBREW LETTER TET WITH DAGESH
90 | FB39 # HEBREW LETTER YOD WITH DAGESH
91 | FB3A # HEBREW LETTER FINAL KAF WITH DAGESH
92 | FB3B # HEBREW LETTER KAF WITH DAGESH
93 | FB3C # HEBREW LETTER LAMED WITH DAGESH
94 | FB3E # HEBREW LETTER MEM WITH DAGESH
95 | FB40 # HEBREW LETTER NUN WITH DAGESH
96 | FB41 # HEBREW LETTER SAMEKH WITH DAGESH
97 | FB43 # HEBREW LETTER FINAL PE WITH DAGESH
98 | FB44 # HEBREW LETTER PE WITH DAGESH
99 | FB46 # HEBREW LETTER TSADI WITH DAGESH
100 | FB47 # HEBREW LETTER QOF WITH DAGESH
101 | FB48 # HEBREW LETTER RESH WITH DAGESH
102 | FB49 # HEBREW LETTER SHIN WITH DAGESH
103 | FB4A # HEBREW LETTER TAV WITH DAGESH
104 | FB4B # HEBREW LETTER VAV WITH HOLAM
105 | FB4C # HEBREW LETTER BET WITH RAFE
106 | FB4D # HEBREW LETTER KAF WITH RAFE
107 | FB4E # HEBREW LETTER PE WITH RAFE
108 |
109 | # Total code points: 67
110 |
111 | # ================================================
112 | # (2) Post Composition Version precomposed characters
113 | #
114 | # These characters cannot be derived solely from the UnicodeData.txt file
115 | # in this version of Unicode.
116 | #
117 | # Note that characters added to the standard after the
118 | # Composition Version and which have canonical decomposition mappings
119 | # are not automatically added to this list of Post Composition
120 | # Version precomposed characters.
121 | # ================================================
122 |
123 | 2ADC # FORKING
124 | 1D15E # MUSICAL SYMBOL HALF NOTE
125 | 1D15F # MUSICAL SYMBOL QUARTER NOTE
126 | 1D160 # MUSICAL SYMBOL EIGHTH NOTE
127 | 1D161 # MUSICAL SYMBOL SIXTEENTH NOTE
128 | 1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE
129 | 1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE
130 | 1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
131 | 1D1BB # MUSICAL SYMBOL MINIMA
132 | 1D1BC # MUSICAL SYMBOL MINIMA BLACK
133 | 1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE
134 | 1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK
135 | 1D1BF # MUSICAL SYMBOL FUSA WHITE
136 | 1D1C0 # MUSICAL SYMBOL FUSA BLACK
137 |
138 | # Total code points: 14
139 |
140 | # ================================================
141 | # (3) Singleton Decompositions
142 | #
143 | # These characters can be derived from the UnicodeData.txt file
144 | # by including all canonically decomposable characters whose
145 | # canonical decomposition consists of a single character.
146 | #
147 | # These characters are simply quoted here for reference.
148 | # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
149 | # ================================================
150 |
151 | # 0340..0341 [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
152 | # 0343 COMBINING GREEK KORONIS
153 | # 0374 GREEK NUMERAL SIGN
154 | # 037E GREEK QUESTION MARK
155 | # 0387 GREEK ANO TELEIA
156 | # 1F71 GREEK SMALL LETTER ALPHA WITH OXIA
157 | # 1F73 GREEK SMALL LETTER EPSILON WITH OXIA
158 | # 1F75 GREEK SMALL LETTER ETA WITH OXIA
159 | # 1F77 GREEK SMALL LETTER IOTA WITH OXIA
160 | # 1F79 GREEK SMALL LETTER OMICRON WITH OXIA
161 | # 1F7B GREEK SMALL LETTER UPSILON WITH OXIA
162 | # 1F7D GREEK SMALL LETTER OMEGA WITH OXIA
163 | # 1FBB GREEK CAPITAL LETTER ALPHA WITH OXIA
164 | # 1FBE GREEK PROSGEGRAMMENI
165 | # 1FC9 GREEK CAPITAL LETTER EPSILON WITH OXIA
166 | # 1FCB GREEK CAPITAL LETTER ETA WITH OXIA
167 | # 1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
168 | # 1FDB GREEK CAPITAL LETTER IOTA WITH OXIA
169 | # 1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
170 | # 1FEB GREEK CAPITAL LETTER UPSILON WITH OXIA
171 | # 1FEE..1FEF [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA
172 | # 1FF9 GREEK CAPITAL LETTER OMICRON WITH OXIA
173 | # 1FFB GREEK CAPITAL LETTER OMEGA WITH OXIA
174 | # 1FFD GREEK OXIA
175 | # 2000..2001 [2] EN QUAD..EM QUAD
176 | # 2126 OHM SIGN
177 | # 212A..212B [2] KELVIN SIGN..ANGSTROM SIGN
178 | # 2329 LEFT-POINTING ANGLE BRACKET
179 | # 232A RIGHT-POINTING ANGLE BRACKET
180 | # F900..FA0D [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
181 | # FA10 CJK COMPATIBILITY IDEOGRAPH-FA10
182 | # FA12 CJK COMPATIBILITY IDEOGRAPH-FA12
183 | # FA15..FA1E [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
184 | # FA20 CJK COMPATIBILITY IDEOGRAPH-FA20
185 | # FA22 CJK COMPATIBILITY IDEOGRAPH-FA22
186 | # FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
187 | # FA2A..FA6D [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D
188 | # FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
189 | # 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
190 |
191 | # Total code points: 1035
192 |
193 | # ================================================
194 | # (4) Non-Starter Decompositions
195 | #
196 | # These characters can be derived from the UnicodeData.txt file
197 | # by including each expanding canonical decomposition
198 | # (i.e., those which canonically decompose to a sequence
199 | # of characters instead of a single character), such that:
200 | #
201 | # A. The character is not a Starter.
202 | #
203 | # OR (inclusive)
204 | #
205 | # B. The character's canonical decomposition begins
206 | # with a character that is not a Starter.
207 | #
208 | # Note that a "Starter" is any character with a zero combining class.
209 | #
210 | # These characters are simply quoted here for reference.
211 | # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
212 | # ================================================
213 |
214 | # 0344 COMBINING GREEK DIALYTIKA TONOS
215 | # 0F73 TIBETAN VOWEL SIGN II
216 | # 0F75 TIBETAN VOWEL SIGN UU
217 | # 0F81 TIBETAN VOWEL SIGN REVERSED II
218 |
219 | # Total code points: 4
220 |
221 | # EOF
222 |
--------------------------------------------------------------------------------
/src/UnicodeData/NormalizationCorrections.txt:
--------------------------------------------------------------------------------
1 | # NormalizationCorrections-15.0.0.txt
2 | # Date: 2022-05-03, 18:53:00 GMT [KW, LI]
3 | # © 2022 Unicode®, Inc.
4 | # For terms of use, see https://www.unicode.org/terms_of_use.html
5 | #
6 | # Unicode Character Database
7 | # For documentation, see https://www.unicode.org/reports/tr44/
8 | #
9 | # This file is a normative contributory data file in the
10 | # Unicode Character Database.
11 | #
12 | # The normalization stability policy of the Unicode Consortium
13 | # ordinarily precludes any change to the decomposition
14 | # for any character, once established in a relevant version
15 | # of the UnicodeData.txt data file. However, under certain
16 | # exceptional (and rare) conditions, an error in a decomposition
17 | # mapping may be discovered that is truly just an unintended
18 | # typo in the data, and not a matter of dubious interpretation.
19 | #
20 | # Whenever such an error may be found, and if it meets the
21 | # requirements for possible exceptions to normalization
22 | # stability, the correction is entered in this data file,
23 | # so that any implementation depending on absolute stability
24 | # of normalization, *including* any errors in the data, can
25 | # safely reconstruct the exact state of the data tables at
26 | # any given version of Unicode.
27 | #
28 | # Currently this list has exactly six entries in it, one for the
29 | # typo found and corrected in Corrigendum #3, and five for
30 | # the typos and misidentifications found and corrected in
31 | # Corrigendum #4. All efforts
32 | # will be made to keep the entries limited to just those fixes.
33 | #
34 | # Interpretation of the fields:
35 | # Field 0: Unicode code point
36 | # Field 1: Original (erroneous) decomposition
37 | # Field 2: Corrected decomposition
38 | # Field 3: Version of Unicode for which the correction was
39 | # entered into UnicodeData.txt, in n.n.n format.
40 | # Comment: Indicates the Unicode Corrigendum which documents
41 | # the correction
42 | #
43 | # For more information, see UAX #15, Unicode Normalization Forms.
44 | #
45 | F951;96FB;964B;3.2.0 # Corrigendum 3
46 | 2F868;2136A;36FC;4.0.0 # Corrigendum 4
47 | 2F874;5F33;5F53;4.0.0 # Corrigendum 4
48 | 2F91F;43AB;243AB;4.0.0 # Corrigendum 4
49 | 2F95F;7AAE;7AEE;4.0.0 # Corrigendum 4
50 | 2F9BF;4D57;45D7;4.0.0 # Corrigendum 4
51 |
52 | # EOF
53 |
--------------------------------------------------------------------------------
/src/UnicodeData/SpecialCasing.txt:
--------------------------------------------------------------------------------
1 | # SpecialCasing-15.0.0.txt
2 | # Date: 2022-02-02, 23:35:52 GMT
3 | # © 2022 Unicode®, Inc.
4 | # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
5 | # For terms of use, see https://www.unicode.org/terms_of_use.html
6 | #
7 | # Unicode Character Database
8 | # For documentation, see https://www.unicode.org/reports/tr44/
9 | #
10 | # Special Casing
11 | #
12 | # This file is a supplement to the UnicodeData.txt file. It does not define any
13 | # properties, but rather provides additional information about the casing of
14 | # Unicode characters, for situations when casing incurs a change in string length
15 | # or is dependent on context or locale. For compatibility, the UnicodeData.txt
16 | # file only contains simple case mappings for characters where they are one-to-one
17 | # and independent of context and language. The data in this file, combined with
18 | # the simple case mappings in UnicodeData.txt, defines the full case mappings
19 | # Lowercase_Mapping (lc), Titlecase_Mapping (tc), and Uppercase_Mapping (uc).
20 | #
21 | # Note that the preferred mechanism for defining tailored casing operations is
22 | # the Unicode Common Locale Data Repository (CLDR). For more information, see the
23 | # discussion of case mappings and case algorithms in the Unicode Standard.
24 | #
25 | # All code points not listed in this file that do not have a simple case mappings
26 | # in UnicodeData.txt map to themselves.
27 | # ================================================================================
28 | # Format
29 | # ================================================================================
30 | # The entries in this file are in the following machine-readable format:
31 | #
32 | # ; ; ; ; (;)? #
33 | #
34 | # , , , and provide the respective full case mappings
35 | # of , expressed as character values in hex. If there is more than one character,
36 | # they are separated by spaces. Other than as used to separate elements, spaces are
37 | # to be ignored.
38 | #
39 | # The is optional. Where present, it consists of one or more language IDs
40 | # or casing contexts, separated by spaces. In these conditions:
41 | # - A condition list overrides the normal behavior if all of the listed conditions are true.
42 | # - The casing context is always the context of the characters in the original string,
43 | # NOT in the resulting string.
44 | # - Case distinctions in the condition list are not significant.
45 | # - Conditions preceded by "Not_" represent the negation of the condition.
46 | # The condition list is not represented in the UCD as a formal property.
47 | #
48 | # A language ID is defined by BCP 47, with '-' and '_' treated equivalently.
49 | #
50 | # A casing context for a character is defined by Section 3.13 Default Case Algorithms
51 | # of The Unicode Standard.
52 | #
53 | # Parsers of this file must be prepared to deal with future additions to this format:
54 | # * Additional contexts
55 | # * Additional fields
56 | # ================================================================================
57 |
58 | # ================================================================================
59 | # Unconditional mappings
60 | # ================================================================================
61 |
62 | # The German es-zed is special--the normal mapping is to SS.
63 | # Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase())
64 |
65 | 00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S
66 |
67 | # Preserve canonical equivalence for I with dot. Turkic is handled below.
68 |
69 | 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
70 |
71 | # Ligatures
72 |
73 | FB00; FB00; 0046 0066; 0046 0046; # LATIN SMALL LIGATURE FF
74 | FB01; FB01; 0046 0069; 0046 0049; # LATIN SMALL LIGATURE FI
75 | FB02; FB02; 0046 006C; 0046 004C; # LATIN SMALL LIGATURE FL
76 | FB03; FB03; 0046 0066 0069; 0046 0046 0049; # LATIN SMALL LIGATURE FFI
77 | FB04; FB04; 0046 0066 006C; 0046 0046 004C; # LATIN SMALL LIGATURE FFL
78 | FB05; FB05; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE LONG S T
79 | FB06; FB06; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE ST
80 |
81 | 0587; 0587; 0535 0582; 0535 0552; # ARMENIAN SMALL LIGATURE ECH YIWN
82 | FB13; FB13; 0544 0576; 0544 0546; # ARMENIAN SMALL LIGATURE MEN NOW
83 | FB14; FB14; 0544 0565; 0544 0535; # ARMENIAN SMALL LIGATURE MEN ECH
84 | FB15; FB15; 0544 056B; 0544 053B; # ARMENIAN SMALL LIGATURE MEN INI
85 | FB16; FB16; 054E 0576; 054E 0546; # ARMENIAN SMALL LIGATURE VEW NOW
86 | FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH
87 |
88 | # No corresponding uppercase precomposed character
89 |
90 | 0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
91 | 0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
92 | 03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
93 | 01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON
94 | 1E96; 1E96; 0048 0331; 0048 0331; # LATIN SMALL LETTER H WITH LINE BELOW
95 | 1E97; 1E97; 0054 0308; 0054 0308; # LATIN SMALL LETTER T WITH DIAERESIS
96 | 1E98; 1E98; 0057 030A; 0057 030A; # LATIN SMALL LETTER W WITH RING ABOVE
97 | 1E99; 1E99; 0059 030A; 0059 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
98 | 1E9A; 1E9A; 0041 02BE; 0041 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
99 | 1F50; 1F50; 03A5 0313; 03A5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
100 | 1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
101 | 1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
102 | 1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
103 | 1FB6; 1FB6; 0391 0342; 0391 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
104 | 1FC6; 1FC6; 0397 0342; 0397 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
105 | 1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
106 | 1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
107 | 1FD6; 1FD6; 0399 0342; 0399 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
108 | 1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
109 | 1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
110 | 1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
111 | 1FE4; 1FE4; 03A1 0313; 03A1 0313; # GREEK SMALL LETTER RHO WITH PSILI
112 | 1FE6; 1FE6; 03A5 0342; 03A5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
113 | 1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
114 | 1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
115 |
116 | # IMPORTANT-when iota-subscript (0345) is uppercased or titlecased,
117 | # the result will be incorrect unless the iota-subscript is moved to the end
118 | # of any sequence of combining marks. Otherwise, the accents will go on the capital iota.
119 | # This process can be achieved by first transforming the text to NFC before casing.
120 | # E.g. is uppercased to
121 |
122 | # The following cases are already in the UnicodeData.txt file, so are only commented here.
123 |
124 | # 0345; 0345; 0399; 0399; # COMBINING GREEK YPOGEGRAMMENI
125 |
126 | # All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript)
127 | # have special uppercases.
128 | # Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase!
129 |
130 | 1F80; 1F80; 1F88; 1F08 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
131 | 1F81; 1F81; 1F89; 1F09 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
132 | 1F82; 1F82; 1F8A; 1F0A 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
133 | 1F83; 1F83; 1F8B; 1F0B 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
134 | 1F84; 1F84; 1F8C; 1F0C 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
135 | 1F85; 1F85; 1F8D; 1F0D 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
136 | 1F86; 1F86; 1F8E; 1F0E 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
137 | 1F87; 1F87; 1F8F; 1F0F 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
138 | 1F88; 1F80; 1F88; 1F08 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
139 | 1F89; 1F81; 1F89; 1F09 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
140 | 1F8A; 1F82; 1F8A; 1F0A 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
141 | 1F8B; 1F83; 1F8B; 1F0B 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
142 | 1F8C; 1F84; 1F8C; 1F0C 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
143 | 1F8D; 1F85; 1F8D; 1F0D 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
144 | 1F8E; 1F86; 1F8E; 1F0E 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
145 | 1F8F; 1F87; 1F8F; 1F0F 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
146 | 1F90; 1F90; 1F98; 1F28 0399; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
147 | 1F91; 1F91; 1F99; 1F29 0399; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
148 | 1F92; 1F92; 1F9A; 1F2A 0399; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
149 | 1F93; 1F93; 1F9B; 1F2B 0399; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
150 | 1F94; 1F94; 1F9C; 1F2C 0399; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
151 | 1F95; 1F95; 1F9D; 1F2D 0399; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
152 | 1F96; 1F96; 1F9E; 1F2E 0399; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
153 | 1F97; 1F97; 1F9F; 1F2F 0399; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
154 | 1F98; 1F90; 1F98; 1F28 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
155 | 1F99; 1F91; 1F99; 1F29 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
156 | 1F9A; 1F92; 1F9A; 1F2A 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
157 | 1F9B; 1F93; 1F9B; 1F2B 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
158 | 1F9C; 1F94; 1F9C; 1F2C 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
159 | 1F9D; 1F95; 1F9D; 1F2D 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
160 | 1F9E; 1F96; 1F9E; 1F2E 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
161 | 1F9F; 1F97; 1F9F; 1F2F 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
162 | 1FA0; 1FA0; 1FA8; 1F68 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
163 | 1FA1; 1FA1; 1FA9; 1F69 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
164 | 1FA2; 1FA2; 1FAA; 1F6A 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
165 | 1FA3; 1FA3; 1FAB; 1F6B 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
166 | 1FA4; 1FA4; 1FAC; 1F6C 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
167 | 1FA5; 1FA5; 1FAD; 1F6D 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
168 | 1FA6; 1FA6; 1FAE; 1F6E 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
169 | 1FA7; 1FA7; 1FAF; 1F6F 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
170 | 1FA8; 1FA0; 1FA8; 1F68 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
171 | 1FA9; 1FA1; 1FA9; 1F69 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
172 | 1FAA; 1FA2; 1FAA; 1F6A 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
173 | 1FAB; 1FA3; 1FAB; 1F6B 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
174 | 1FAC; 1FA4; 1FAC; 1F6C 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
175 | 1FAD; 1FA5; 1FAD; 1F6D 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
176 | 1FAE; 1FA6; 1FAE; 1F6E 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
177 | 1FAF; 1FA7; 1FAF; 1F6F 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
178 | 1FB3; 1FB3; 1FBC; 0391 0399; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
179 | 1FBC; 1FB3; 1FBC; 0391 0399; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
180 | 1FC3; 1FC3; 1FCC; 0397 0399; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
181 | 1FCC; 1FC3; 1FCC; 0397 0399; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
182 | 1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
183 | 1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
184 |
185 | # Some characters with YPOGEGRAMMENI also have no corresponding titlecases
186 |
187 | 1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
188 | 1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
189 | 1FC2; 1FC2; 1FCA 0345; 1FCA 0399; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
190 | 1FC4; 1FC4; 0389 0345; 0389 0399; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
191 | 1FF2; 1FF2; 1FFA 0345; 1FFA 0399; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
192 | 1FF4; 1FF4; 038F 0345; 038F 0399; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
193 |
194 | 1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
195 | 1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
196 | 1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
197 |
198 | # ================================================================================
199 | # Conditional Mappings
200 | # The remainder of this file provides conditional casing data used to produce
201 | # full case mappings.
202 | # ================================================================================
203 | # Language-Insensitive Mappings
204 | # These are characters whose full case mappings do not depend on language, but do
205 | # depend on context (which characters come before or after). For more information
206 | # see the header of this file and the Unicode Standard.
207 | # ================================================================================
208 |
209 | # Special case for final form of sigma
210 |
211 | 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
212 |
213 | # Note: the following cases for non-final are already in the UnicodeData.txt file.
214 |
215 | # 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA
216 | # 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA
217 | # 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA
218 |
219 | # Note: the following cases are not included, since they would case-fold in lowercasing
220 |
221 | # 03C3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK SMALL LETTER SIGMA
222 | # 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA
223 |
224 | # ================================================================================
225 | # Language-Sensitive Mappings
226 | # These are characters whose full case mappings depend on language and perhaps also
227 | # context (which characters come before or after). For more information
228 | # see the header of this file and the Unicode Standard.
229 | # ================================================================================
230 |
231 | # Lithuanian
232 |
233 | # Lithuanian retains the dot in a lowercase i when followed by accents.
234 |
235 | # Remove DOT ABOVE after "i" with upper or titlecase
236 |
237 | 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
238 |
239 | # Introduce an explicit dot above when lowercasing capital I's and J's
240 | # whenever there are more accents above.
241 | # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
242 |
243 | 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
244 | 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
245 | 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
246 | 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
247 | 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
248 | 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
249 |
250 | # ================================================================================
251 |
252 | # Turkish and Azeri
253 |
254 | # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
255 | # The following rules handle those cases.
256 |
257 | 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
258 | 0130; 0069; 0130; 0130; az; # LATIN CAPITAL LETTER I WITH DOT ABOVE
259 |
260 | # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
261 | # This matches the behavior of the canonically equivalent I-dot_above
262 |
263 | 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
264 | 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
265 |
266 | # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
267 |
268 | 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
269 | 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I
270 |
271 | # When uppercasing, i turns into a dotted capital I
272 |
273 | 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I
274 | 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I
275 |
276 | # Note: the following case is already in the UnicodeData.txt file.
277 |
278 | # 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I
279 |
280 | # EOF
281 |
282 |
--------------------------------------------------------------------------------
/src/UnicodeData/empty.txt:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/howto.txt:
--------------------------------------------------------------------------------
1 |
2 | 1. First compile and run PUCUConvertUnicode
3 | 2. Second compile and run PUCUGenCodePages (which is Windows only for now)
4 | 3. Then !!!!recompile!!! and run PUCUBuild
5 | 4. Delete the no more needed PUCUUnicodePass1.pas, PUCUUnicodePass2.pas and PUCUCodePages.inc from the same directory as the PUCUConvertUnicode, PUCUGenCodePages and PUCUBuild binaries
6 |
7 |
8 |
--------------------------------------------------------------------------------