├── .gitignore ├── README.md ├── gitcommitpush ├── gitcommitpush.bat ├── gitforceupdate ├── gitforceupdate.bat └── src ├── PUCU.pas ├── PUCUBuild.cfg ├── PUCUBuild.dof ├── PUCUBuild.dpr ├── PUCUBuild.lpi ├── PUCUBuildUnicode.cfg ├── PUCUBuildUnicode.dof ├── PUCUCode.pas ├── PUCUConvertUnicode.cfg ├── PUCUConvertUnicode.dof ├── PUCUConvertUnicode.dpr ├── PUCUConvertUnicode.lpi ├── PUCUDebug.cfg ├── PUCUDebug.dof ├── PUCUDebug.dpr ├── PUCUGenCodePages.cfg ├── PUCUGenCodePages.dof ├── PUCUGenCodePages.dpr ├── PUCUGenCodePages.lpi ├── UnicodeData ├── Blocks.txt ├── CaseFolding.txt ├── CompositionExclusions.txt ├── DerivedGeneralCategory.txt ├── NormalizationCorrections.txt ├── NormalizationTest.txt ├── Scripts.txt ├── SpecialCasing.txt ├── UnicodeData.txt └── empty.txt └── howto.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Uncomment these types if you want even more clean repository. But be careful. 2 | # It can make harm to an existing project source. Read explanations below. 3 | # 4 | # Resource files are binaries containing manifest, project icon and version info. 5 | # They can not be viewed as text or compared by diff-tools. Consider replacing them with .rc files. 6 | #*.res 7 | # 8 | # Type library file (binary). In old Delphi versions it should be stored. 9 | # Since Delphi 2009 it is produced from .ridl file and can safely be ignored. 10 | #*.tlb 11 | # 12 | # Diagram Portfolio file. Used by the diagram editor up to Delphi 7. 13 | # Uncomment this if you are not using diagrams or use newer Delphi version. 14 | #*.ddp 15 | # 16 | # Visual LiveBindings file. Added in Delphi XE2. 17 | # Uncomment this if you are not using LiveBindings Designer. 18 | #*.vlb 19 | # 20 | # Deployment Manager configuration file for your project. Added in Delphi XE2. 21 | # Uncomment this if it is not mobile development and you do not use remote debug feature. 22 | #*.deployproj 23 | # 24 | # C++ object files produced when C/C++ Output file generation is configured. 25 | # Uncomment this if you are not using external objects (zlib library for example). 26 | #*.obj 27 | # 28 | 29 | # Delphi compiler-generated binaries (safe to delete) 30 | *.exe 31 | *.dll 32 | *.bpl 33 | *.bpi 34 | *.dcp 35 | *.so 36 | *.apk 37 | *.drc 38 | *.map 39 | *.dres 40 | *.rsm 41 | *.tds 42 | *.dcu 43 | *.lib 44 | *.a 45 | *.o 46 | *.ocx 47 | 48 | # Delphi autogenerated files (duplicated info) 49 | *.cfg 50 | *.hpp 51 | *Resource.rc 52 | 53 | # Delphi local files (user-specific info) 54 | *.local 55 | *.identcache 56 | *.projdata 57 | *.tvsconfig 58 | *.dsk 59 | 60 | # Delphi history and backups 61 | __history/ 62 | *.~* 63 | 64 | # Castalia statistics file (since XE7 Castalia is distributed with Delphi) 65 | *.stat 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PUCU Pascal UniCode Utils Libary 2 | You do need only the src\PUCU.pas file for the normal usage of this Library 3 | 4 | -------------------------------------------------------------------------------- /gitcommitpush: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | git commit -am "More work" 3 | git push 4 | -------------------------------------------------------------------------------- /gitcommitpush.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | call git commit -am "More work" 3 | call git push 4 | rem --set-upstream origin master 5 | -------------------------------------------------------------------------------- /gitforceupdate: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | git stash save 3 | git pull 4 | git stash drop -------------------------------------------------------------------------------- /gitforceupdate.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | call git stash save 3 | call git pull 4 | call git stash drop -------------------------------------------------------------------------------- /src/PUCUBuild.cfg: -------------------------------------------------------------------------------- 1 | -$A8 2 | -$B- 3 | -$C+ 4 | -$D+ 5 | -$E- 6 | -$F- 7 | -$G+ 8 | -$H+ 9 | -$I+ 10 | -$J- 11 | -$K- 12 | -$L+ 13 | -$M- 14 | -$N+ 15 | -$O+ 16 | -$P+ 17 | -$Q- 18 | -$R- 19 | -$S- 20 | -$T- 21 | -$U- 22 | -$V+ 23 | -$W- 24 | -$X+ 25 | -$YD 26 | -$Z1 27 | -cg 28 | -AWinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 29 | -H+ 30 | -W+ 31 | -M 32 | -$M16384,1048576 33 | -K$00400000 34 | -LE"c:\program files (x86)\borland\delphi7\Projects\Bpl" 35 | -LN"c:\program files (x86)\borland\delphi7\Projects\Bpl" 36 | -w-SYMBOL_DEPRECATED 37 | -w-SYMBOL_LIBRARY 38 | -w-SYMBOL_PLATFORM 39 | -w-UNSAFE_TYPE 40 | -w-UNSAFE_CODE 41 | -w-UNSAFE_CAST 42 | -------------------------------------------------------------------------------- /src/PUCUBuild.dof: -------------------------------------------------------------------------------- 1 | [FileVersion] 2 | Version=7.0 3 | [Compiler] 4 | A=8 5 | B=0 6 | C=1 7 | D=1 8 | E=0 9 | F=0 10 | G=1 11 | H=1 12 | I=1 13 | J=0 14 | K=0 15 | L=1 16 | M=0 17 | N=1 18 | O=1 19 | P=1 20 | Q=0 21 | R=0 22 | S=0 23 | T=0 24 | U=0 25 | V=1 26 | W=0 27 | X=1 28 | Y=1 29 | Z=1 30 | ShowHints=1 31 | ShowWarnings=1 32 | UnitAliases=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 33 | NamespacePrefix= 34 | SymbolDeprecated=0 35 | SymbolLibrary=0 36 | SymbolPlatform=0 37 | UnitLibrary=1 38 | UnitPlatform=1 39 | UnitDeprecated=1 40 | HResultCompat=1 41 | HidingMember=1 42 | HiddenVirtual=1 43 | Garbage=1 44 | BoundsError=1 45 | ZeroNilCompat=1 46 | StringConstTruncated=1 47 | ForLoopVarVarPar=1 48 | TypedConstVarPar=1 49 | AsgToTypedConst=1 50 | CaseLabelRange=1 51 | ForVariable=1 52 | ConstructingAbstract=1 53 | ComparisonFalse=1 54 | ComparisonTrue=1 55 | ComparingSignedUnsigned=1 56 | CombiningSignedUnsigned=1 57 | UnsupportedConstruct=1 58 | FileOpen=1 59 | FileOpenUnitSrc=1 60 | BadGlobalSymbol=1 61 | DuplicateConstructorDestructor=1 62 | InvalidDirective=1 63 | PackageNoLink=1 64 | PackageThreadVar=1 65 | ImplicitImport=1 66 | HPPEMITIgnored=1 67 | NoRetVal=1 68 | UseBeforeDef=1 69 | ForLoopVarUndef=1 70 | UnitNameMismatch=1 71 | NoCFGFileFound=1 72 | MessageDirective=1 73 | ImplicitVariants=1 74 | UnicodeToLocale=1 75 | LocaleToUnicode=1 76 | ImagebaseMultiple=1 77 | SuspiciousTypecast=1 78 | PrivatePropAccessor=1 79 | UnsafeType=0 80 | UnsafeCode=0 81 | UnsafeCast=0 82 | [Linker] 83 | MapFile=0 84 | OutputObjs=0 85 | ConsoleApp=1 86 | DebugInfo=0 87 | RemoteSymbols=0 88 | MinStackSize=16384 89 | MaxStackSize=1048576 90 | ImageBase=4194304 91 | ExeDescription= 92 | [Directories] 93 | OutputDir= 94 | UnitOutputDir= 95 | PackageDLLOutputDir= 96 | PackageDCPOutputDir= 97 | SearchPath= 98 | Packages=vcl;rtl;vclx;indy;vclie;xmlrtl;inetdbbde;inet;inetdbxpress;dbrtl;soaprtl;dsnap;VclSmp;dbexpress;vcldb;dbxcds;inetdb;bdertl;vcldbx;adortl;teeui;teedb;tee;ibxpress;visualclx;visualdbclx;vclactnband;vclshlctrls;IntrawebDB_50_70;Intraweb_50_70;Rave50CLX;Rave50VCL;dclOfficeXP;acntD7_R;JclDeveloperTools;Jcl;JclVcl;JclContainers;JvCore;JvSystem;JvStdCtrls;JvAppFrm;JvBands;JvDB;JvDlgs;JvBDE;JvControls;JvCmp;JvCrypt;JvCustom;JvDocking;JvDotNetCtrls;JvGlobus;JvHMI;JvJans;JvManagedThreads;JvMM;JvNet;JvPageComps;JvPascalInterpreter;JvPluginSystem;JvPrintPreview;JvRuntimeDesign;JvTimeFramework;JvWizards;JvXPCtrls 99 | Conditionals= 100 | DebugSourceDirs= 101 | UsePackages=0 102 | [Parameters] 103 | RunParams= 104 | HostApplication= 105 | Launcher= 106 | UseLauncher=0 107 | DebugCWD= 108 | [Language] 109 | ActiveLang= 110 | ProjectLang= 111 | RootDir= 112 | [Version Info] 113 | IncludeVerInfo=0 114 | AutoIncBuild=0 115 | MajorVer=1 116 | MinorVer=0 117 | Release=0 118 | Build=0 119 | Debug=0 120 | PreRelease=0 121 | Special=0 122 | Private=0 123 | DLL=0 124 | Locale=1031 125 | CodePage=1252 126 | [HistoryLists\hlUnitAliases] 127 | Count=1 128 | Item0=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 129 | -------------------------------------------------------------------------------- /src/PUCUBuild.lpi: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | <UseAppBundle Value="False"/> 15 | <ResourceType Value="res"/> 16 | </General> 17 | <BuildModes> 18 | <Item Name="Default" Default="True"/> 19 | </BuildModes> 20 | <PublishOptions> 21 | <Version Value="2"/> 22 | <UseFileFilters Value="True"/> 23 | </PublishOptions> 24 | <RunParams> 25 | <FormatVersion Value="2"/> 26 | </RunParams> 27 | <Units> 28 | <Unit> 29 | <Filename Value="PUCUBuild.dpr"/> 30 | <IsPartOfProject Value="True"/> 31 | </Unit> 32 | <Unit> 33 | <Filename Value="PUCUUnicodePass1.pas"/> 34 | <IsPartOfProject Value="True"/> 35 | </Unit> 36 | </Units> 37 | </ProjectOptions> 38 | <CompilerOptions> 39 | <Version Value="11"/> 40 | <PathDelim Value="\"/> 41 | <Target> 42 | <Filename Value="PUCUBuild"/> 43 | </Target> 44 | <SearchPaths> 45 | <IncludeFiles Value="$(ProjOutDir)"/> 46 | <UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/> 47 | </SearchPaths> 48 | <Parsing> 49 | <SyntaxOptions> 50 | <SyntaxMode Value="Delphi"/> 51 | </SyntaxOptions> 52 | </Parsing> 53 | <Linking> 54 | <Debugging> 55 | <DebugInfoType Value="dsDwarf3"/> 56 | </Debugging> 57 | </Linking> 58 | </CompilerOptions> 59 | <Debugging> 60 | <Exceptions> 61 | <Item> 62 | <Name Value="EAbort"/> 63 | </Item> 64 | <Item> 65 | <Name Value="ECodetoolError"/> 66 | </Item> 67 | <Item> 68 | <Name Value="EFOpenError"/> 69 | </Item> 70 | </Exceptions> 71 | </Debugging> 72 | </CONFIG> 73 | -------------------------------------------------------------------------------- /src/PUCUBuildUnicode.cfg: -------------------------------------------------------------------------------- 1 | -$A8 2 | -$B- 3 | -$C+ 4 | -$D+ 5 | -$E- 6 | -$F- 7 | -$G+ 8 | -$H+ 9 | -$I+ 10 | -$J- 11 | -$K- 12 | -$L+ 13 | -$M- 14 | -$N+ 15 | -$O+ 16 | -$P+ 17 | -$Q- 18 | -$R- 19 | -$S- 20 | -$T- 21 | -$U- 22 | -$V+ 23 | -$W- 24 | -$X+ 25 | -$YD 26 | -$Z1 27 | -cg 28 | -AWinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 29 | -H+ 30 | -W+ 31 | -M 32 | -$M16384,1048576 33 | -K$00400000 34 | -LE"c:\program files (x86)\borland\delphi7\Projects\Bpl" 35 | -LN"c:\program files (x86)\borland\delphi7\Projects\Bpl" 36 | -w-SYMBOL_DEPRECATED 37 | -w-SYMBOL_LIBRARY 38 | -w-SYMBOL_PLATFORM 39 | -w-UNSAFE_TYPE 40 | -w-UNSAFE_CODE 41 | -w-UNSAFE_CAST 42 | -------------------------------------------------------------------------------- /src/PUCUBuildUnicode.dof: -------------------------------------------------------------------------------- 1 | [FileVersion] 2 | Version=7.0 3 | [Compiler] 4 | A=8 5 | B=0 6 | C=1 7 | D=1 8 | E=0 9 | F=0 10 | G=1 11 | H=1 12 | I=1 13 | J=0 14 | K=0 15 | L=1 16 | M=0 17 | N=1 18 | O=1 19 | P=1 20 | Q=0 21 | R=0 22 | S=0 23 | T=0 24 | U=0 25 | V=1 26 | W=0 27 | X=1 28 | Y=1 29 | Z=1 30 | ShowHints=1 31 | ShowWarnings=1 32 | UnitAliases=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 33 | NamespacePrefix= 34 | SymbolDeprecated=0 35 | SymbolLibrary=0 36 | SymbolPlatform=0 37 | UnitLibrary=1 38 | UnitPlatform=1 39 | UnitDeprecated=1 40 | HResultCompat=1 41 | HidingMember=1 42 | HiddenVirtual=1 43 | Garbage=1 44 | BoundsError=1 45 | ZeroNilCompat=1 46 | StringConstTruncated=1 47 | ForLoopVarVarPar=1 48 | TypedConstVarPar=1 49 | AsgToTypedConst=1 50 | CaseLabelRange=1 51 | ForVariable=1 52 | ConstructingAbstract=1 53 | ComparisonFalse=1 54 | ComparisonTrue=1 55 | ComparingSignedUnsigned=1 56 | CombiningSignedUnsigned=1 57 | UnsupportedConstruct=1 58 | FileOpen=1 59 | FileOpenUnitSrc=1 60 | BadGlobalSymbol=1 61 | DuplicateConstructorDestructor=1 62 | InvalidDirective=1 63 | PackageNoLink=1 64 | PackageThreadVar=1 65 | ImplicitImport=1 66 | HPPEMITIgnored=1 67 | NoRetVal=1 68 | UseBeforeDef=1 69 | ForLoopVarUndef=1 70 | UnitNameMismatch=1 71 | NoCFGFileFound=1 72 | MessageDirective=1 73 | ImplicitVariants=1 74 | UnicodeToLocale=1 75 | LocaleToUnicode=1 76 | ImagebaseMultiple=1 77 | SuspiciousTypecast=1 78 | PrivatePropAccessor=1 79 | UnsafeType=0 80 | UnsafeCode=0 81 | UnsafeCast=0 82 | [Linker] 83 | MapFile=0 84 | OutputObjs=0 85 | ConsoleApp=1 86 | DebugInfo=0 87 | RemoteSymbols=0 88 | MinStackSize=16384 89 | MaxStackSize=1048576 90 | ImageBase=4194304 91 | ExeDescription= 92 | [Directories] 93 | OutputDir= 94 | UnitOutputDir= 95 | PackageDLLOutputDir= 96 | PackageDCPOutputDir= 97 | SearchPath= 98 | Packages=vcl;rtl;vclx;indy;vclie;xmlrtl;inetdbbde;inet;inetdbxpress;dbrtl;soaprtl;dsnap;VclSmp;dbexpress;vcldb;dbxcds;inetdb;bdertl;vcldbx;adortl;teeui;teedb;tee;ibxpress;visualclx;visualdbclx;vclactnband;vclshlctrls;IntrawebDB_50_70;Intraweb_50_70;Rave50CLX;Rave50VCL;dclOfficeXP;acntD7_R;JclDeveloperTools;Jcl;JclVcl;JclContainers;JvCore;JvSystem;JvStdCtrls;JvAppFrm;JvBands;JvDB;JvDlgs;JvBDE;JvControls;JvCmp;JvCrypt;JvCustom;JvDocking;JvDotNetCtrls;JvGlobus;JvHMI;JvJans;JvManagedThreads;JvMM;JvNet;JvPageComps;JvPascalInterpreter;JvPluginSystem;JvPrintPreview;JvRuntimeDesign;JvTimeFramework;JvWizards;JvXPCtrls 99 | Conditionals= 100 | DebugSourceDirs= 101 | UsePackages=0 102 | [Parameters] 103 | RunParams= 104 | HostApplication= 105 | Launcher= 106 | UseLauncher=0 107 | DebugCWD= 108 | [Language] 109 | ActiveLang= 110 | ProjectLang= 111 | RootDir= 112 | [Version Info] 113 | IncludeVerInfo=0 114 | AutoIncBuild=0 115 | MajorVer=1 116 | MinorVer=0 117 | Release=0 118 | Build=0 119 | Debug=0 120 | PreRelease=0 121 | Special=0 122 | Private=0 123 | DLL=0 124 | Locale=1031 125 | CodePage=1252 126 | [HistoryLists\hlUnitAliases] 127 | Count=1 128 | Item0=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 129 | -------------------------------------------------------------------------------- /src/PUCUConvertUnicode.cfg: -------------------------------------------------------------------------------- 1 | -$A8 2 | -$B- 3 | -$C+ 4 | -$D+ 5 | -$E- 6 | -$F- 7 | -$G+ 8 | -$H+ 9 | -$I+ 10 | -$J- 11 | -$K- 12 | -$L+ 13 | -$M- 14 | -$N+ 15 | -$O+ 16 | -$P+ 17 | -$Q- 18 | -$R- 19 | -$S- 20 | -$T- 21 | -$U- 22 | -$V+ 23 | -$W- 24 | -$X+ 25 | -$YD 26 | -$Z1 27 | -cg 28 | -AWinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 29 | -H+ 30 | -W+ 31 | -M 32 | -$M16384,1048576 33 | -K$00400000 34 | -LE"c:\program files (x86)\borland\delphi7\Projects\Bpl" 35 | -LN"c:\program files (x86)\borland\delphi7\Projects\Bpl" 36 | -w-UNSAFE_TYPE 37 | -w-UNSAFE_CODE 38 | -w-UNSAFE_CAST 39 | -------------------------------------------------------------------------------- /src/PUCUConvertUnicode.dof: -------------------------------------------------------------------------------- 1 | [FileVersion] 2 | Version=7.0 3 | [Compiler] 4 | A=8 5 | B=0 6 | C=1 7 | D=1 8 | E=0 9 | F=0 10 | G=1 11 | H=1 12 | I=1 13 | J=0 14 | K=0 15 | L=1 16 | M=0 17 | N=1 18 | O=1 19 | P=1 20 | Q=0 21 | R=0 22 | S=0 23 | T=0 24 | U=0 25 | V=1 26 | W=0 27 | X=1 28 | Y=1 29 | Z=1 30 | ShowHints=1 31 | ShowWarnings=1 32 | UnitAliases=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 33 | NamespacePrefix= 34 | SymbolDeprecated=1 35 | SymbolLibrary=1 36 | SymbolPlatform=1 37 | UnitLibrary=1 38 | UnitPlatform=1 39 | UnitDeprecated=1 40 | HResultCompat=1 41 | HidingMember=1 42 | HiddenVirtual=1 43 | Garbage=1 44 | BoundsError=1 45 | ZeroNilCompat=1 46 | StringConstTruncated=1 47 | ForLoopVarVarPar=1 48 | TypedConstVarPar=1 49 | AsgToTypedConst=1 50 | CaseLabelRange=1 51 | ForVariable=1 52 | ConstructingAbstract=1 53 | ComparisonFalse=1 54 | ComparisonTrue=1 55 | ComparingSignedUnsigned=1 56 | CombiningSignedUnsigned=1 57 | UnsupportedConstruct=1 58 | FileOpen=1 59 | FileOpenUnitSrc=1 60 | BadGlobalSymbol=1 61 | DuplicateConstructorDestructor=1 62 | InvalidDirective=1 63 | PackageNoLink=1 64 | PackageThreadVar=1 65 | ImplicitImport=1 66 | HPPEMITIgnored=1 67 | NoRetVal=1 68 | UseBeforeDef=1 69 | ForLoopVarUndef=1 70 | UnitNameMismatch=1 71 | NoCFGFileFound=1 72 | MessageDirective=1 73 | ImplicitVariants=1 74 | UnicodeToLocale=1 75 | LocaleToUnicode=1 76 | ImagebaseMultiple=1 77 | SuspiciousTypecast=1 78 | PrivatePropAccessor=1 79 | UnsafeType=0 80 | UnsafeCode=0 81 | UnsafeCast=0 82 | [Linker] 83 | MapFile=0 84 | OutputObjs=0 85 | ConsoleApp=1 86 | DebugInfo=0 87 | RemoteSymbols=0 88 | MinStackSize=16384 89 | MaxStackSize=1048576 90 | ImageBase=4194304 91 | ExeDescription= 92 | [Directories] 93 | OutputDir= 94 | UnitOutputDir= 95 | PackageDLLOutputDir= 96 | PackageDCPOutputDir= 97 | SearchPath= 98 | Packages= 99 | Conditionals= 100 | DebugSourceDirs= 101 | UsePackages=0 102 | [Parameters] 103 | RunParams= 104 | HostApplication= 105 | Launcher= 106 | UseLauncher=0 107 | DebugCWD= 108 | [Language] 109 | ActiveLang= 110 | ProjectLang= 111 | RootDir= 112 | [Version Info] 113 | IncludeVerInfo=0 114 | AutoIncBuild=0 115 | MajorVer=1 116 | MinorVer=0 117 | Release=0 118 | Build=0 119 | Debug=0 120 | PreRelease=0 121 | Special=0 122 | Private=0 123 | DLL=0 124 | Locale=1031 125 | CodePage=1252 126 | [Version Info Keys] 127 | CompanyName= 128 | FileDescription= 129 | FileVersion=1.0.0.0 130 | InternalName= 131 | LegalCopyright= 132 | LegalTrademarks= 133 | OriginalFilename= 134 | ProductName= 135 | ProductVersion=1.0.0.0 136 | Comments= 137 | -------------------------------------------------------------------------------- /src/PUCUConvertUnicode.dpr: -------------------------------------------------------------------------------- 1 | (****************************************************************************** 2 | * PUCU Pascal UniCode Utils Libary * 3 | ****************************************************************************** 4 | * zlib license * 5 | *============================================================================* 6 | * * 7 | * Copyright (C) 2016-2022, Benjamin Rosseaux (benjamin@rosseaux.de) * 8 | * * 9 | * This software is provided 'as-is', without any express or implied * 10 | * warranty. In no event will the authors be held liable for any damages * 11 | * arising from the use of this software. * 12 | * * 13 | * Permission is granted to anyone to use this software for any purpose, * 14 | * including commercial applications, and to alter it and redistribute it * 15 | * freely, subject to the following restrictions: * 16 | * * 17 | * 1. The origin of this software must not be misrepresented; you must not * 18 | * claim that you wrote the original software. If you use this software * 19 | * in a product, an acknowledgement in the product documentation would be * 20 | * appreciated but is not required. * 21 | * 2. Altered source versions must be plainly marked as such, and must not be * 22 | * misrepresented as being the original software. * 23 | * 3. This notice may not be removed or altered from any source distribution. * 24 | * * 25 | ****************************************************************************** 26 | * General guidelines for code contributors * 27 | *============================================================================* 28 | * * 29 | * 1. Make sure you are legally allowed to make a contribution under the zlib * 30 | * license. * 31 | * 2. The zlib license header goes at the top of each source file, with * 32 | * appropriate copyright notice. * 33 | * 3. After a pull request, check the status of your pull request on * 34 | http://github.com/BeRo1985/pucu * 35 | * 4. Write code, which is compatible with Delphi 7-XE7 and FreePascal >= 3.0 * 36 | * so don't use generics/templates, operator overloading and another newer * 37 | * syntax features than Delphi 7 has support for that, but if needed, make * 38 | * it out-ifdef-able. * 39 | * 5. Don't use Delphi-only, FreePascal-only or Lazarus-only libraries/units, * 40 | * but if needed, make it out-ifdef-able. * 41 | * 6. No use of third-party libraries/units as possible, but if needed, make * 42 | * it out-ifdef-able. * 43 | * 7. Try to use const when possible. * 44 | * 8. Make sure to comment out writeln, used while debugging. * 45 | * 9. Make sure the code compiles on 32-bit and 64-bit platforms (x86-32, * 46 | * x86-64, ARM, ARM64, etc.). * 47 | * * 48 | ******************************************************************************) 49 | program PUCUConvertUnicode; 50 | {$ifdef fpc} 51 | {$mode delphi} 52 | {$ifdef cpui386} 53 | {$define cpu386} 54 | {$endif} 55 | {$ifdef cpu386} 56 | {$asmmode intel} 57 | {$endif} 58 | {$ifdef cpuamd64} 59 | {$asmmode intel} 60 | {$endif} 61 | {$ifdef FPC_LITTLE_ENDIAN} 62 | {$define LITTLE_ENDIAN} 63 | {$else} 64 | {$ifdef FPC_BIG_ENDIAN} 65 | {$define BIG_ENDIAN} 66 | {$endif} 67 | {$endif} 68 | {-$pic off} 69 | {$define caninline} 70 | {$ifdef FPC_HAS_TYPE_EXTENDED} 71 | {$define HAS_TYPE_EXTENDED} 72 | {$else} 73 | {$undef HAS_TYPE_EXTENDED} 74 | {$endif} 75 | {$ifdef FPC_HAS_TYPE_DOUBLE} 76 | {$define HAS_TYPE_DOUBLE} 77 | {$else} 78 | {$undef HAS_TYPE_DOUBLE} 79 | {$endif} 80 | {$ifdef FPC_HAS_TYPE_SINGLE} 81 | {$define HAS_TYPE_SINGLE} 82 | {$else} 83 | {$undef HAS_TYPE_SINGLE} 84 | {$endif} 85 | {$if declared(RawByteString)} 86 | {$define HAS_TYPE_RAWBYTESTRING} 87 | {$else} 88 | {$undef HAS_TYPE_RAWBYTESTRING} 89 | {$ifend} 90 | {$else} 91 | {$realcompatibility off} 92 | {$localsymbols on} 93 | {$define LITTLE_ENDIAN} 94 | {$ifndef cpu64} 95 | {$define cpu32} 96 | {$endif} 97 | {$define HAS_TYPE_EXTENDED} 98 | {$define HAS_TYPE_DOUBLE} 99 | {$ifdef conditionalexpressions} 100 | {$if declared(RawByteString)} 101 | {$define HAS_TYPE_RAWBYTESTRING} 102 | {$else} 103 | {$undef HAS_TYPE_RAWBYTESTRING} 104 | {$ifend} 105 | {$else} 106 | {$undef HAS_TYPE_RAWBYTESTRING} 107 | {$endif} 108 | {$endif} 109 | {$ifdef win32} 110 | {$define windows} 111 | {$endif} 112 | {$ifdef win64} 113 | {$define windows} 114 | {$endif} 115 | {$ifdef wince} 116 | {$define windows} 117 | {$endif} 118 | {$ifdef windows} 119 | {$apptype console} 120 | {$endif} 121 | {$rangechecks off} 122 | {$extendedsyntax on} 123 | {$writeableconst on} 124 | {$hints off} 125 | {$booleval off} 126 | {$typedaddress off} 127 | {$stackframes off} 128 | {$varstringchecks on} 129 | {$typeinfo on} 130 | {$overflowchecks off} 131 | {$longstrings on} 132 | {$openstrings on} 133 | {$assertions on} 134 | 135 | uses SysUtils,Classes; 136 | 137 | const MaxUnicodeChar=$10ffff; 138 | CountUnicodeChars=$110000; 139 | 140 | type TPUCURawByteString={$ifdef HAS_TYPE_RAWBYTESTRING}RawByteString{$else}AnsiString{$endif}; 141 | 142 | TPUCUUnicodeDWords=array[0..MaxUnicodeChar] of longint; 143 | 144 | TPUCUCodePoints=array of longint; 145 | 146 | PPUCUUnicodeCharacterDecompositionMappingItem=^TPUCUUnicodeCharacterDecompositionMappingItem; 147 | TPUCUUnicodeCharacterDecompositionMappingItem=record 148 | Type_:TPUCURawByteString; 149 | Mapping:TPUCUCodePoints; 150 | end; 151 | 152 | PPUCUUnicodeCharacterDecompositionMappingItems=^TPUCUUnicodeCharacterDecompositionMappingItems; 153 | TPUCUUnicodeCharacterDecompositionMappingItems=array[0..MaxUnicodeChar] of TPUCUUnicodeCharacterDecompositionMappingItem; 154 | 155 | PPUCUUnicodeCompositionExclusions=^TPUCUUnicodeCompositionExclusions; 156 | TPUCUUnicodeCompositionExclusions=array[0..((MaxUnicodeChar+31) shr 5)-1] of longword; 157 | 158 | PPUCUUnicodeCharacterDecompositionMapItem=^TPUCUUnicodeCharacterDecompositionMapItem; 159 | TPUCUUnicodeCharacterDecompositionMapItem=record 160 | CodePoint:longword; 161 | Decomposition:TPUCUCodePoints; 162 | end; 163 | 164 | TPUCUUnicodeCharacterDecompositionMap=array of TPUCUUnicodeCharacterDecompositionMapItem; 165 | 166 | PPUCUUnicodeCharacterCompositionMapItem=^TPUCUUnicodeCharacterCompositionMapItem; 167 | TPUCUUnicodeCharacterCompositionMapItem=record 168 | Composition:TPUCUCodePoints; 169 | CodePoint:longword; 170 | HashValue:longword; 171 | Next:longint; 172 | end; 173 | 174 | TPUCUUnicodeCharacterCompositionMap=array of TPUCUUnicodeCharacterCompositionMapItem; 175 | 176 | TPUCUUnicodeDecompositionSequences=array of longint; 177 | 178 | var PUCUUnicodeCategories:TPUCUUnicodeDWords; 179 | PUCUUnicodeScripts:TPUCUUnicodeDWords; 180 | PUCUUnicodeCanonicalCombiningClasses:TPUCUUnicodeDWords; 181 | PUCUUnicodeLowerCaseDeltas:TPUCUUnicodeDWords; 182 | PUCUUnicodeUpperCaseDeltas:TPUCUUnicodeDWords; 183 | PUCUUnicodeTitleCaseDeltas:TPUCUUnicodeDWords; 184 | PUCUUnicodeCharacterDecompositionMappingItems:TPUCUUnicodeCharacterDecompositionMappingItems; 185 | PUCUUnicodeCompositionExclusions:TPUCUUnicodeCompositionExclusions; 186 | PUCUUnicodeCharacterDecompositionMap:TPUCUUnicodeCharacterDecompositionMap; 187 | PUCUUnicodeDecompositionSequences:TPUCUUnicodeDecompositionSequences; 188 | PUCUUnicodeDecompositionStarts:TPUCUUnicodeDWords; 189 | PUCUUnicodeCharacterCompositionMap:TPUCUUnicodeCharacterCompositionMap; 190 | PUCUCategories:TStringList; 191 | PUCUScripts:TStringList; 192 | OutputList:TStringList; 193 | 194 | function GetUntilSplitter(const Splitter:TPUCURawByteString;var s:TPUCURawByteString):TPUCURawByteString; 195 | var i:longint; 196 | begin 197 | i:=pos(Splitter,s); 198 | if i>0 then begin 199 | result:=trim(copy(s,1,i-1)); 200 | Delete(s,1,(i+length(Splitter))-1); 201 | s:=trim(s); 202 | end else begin 203 | result:=trim(s); 204 | s:=''; 205 | end; 206 | end; 207 | 208 | procedure PackTable(const Table:array of longint;Level:integer;const Name:TPUCURawByteString); 209 | type TBlock=array of longint; 210 | TBlocks=array of TBlock; 211 | TIndices=array of longint; 212 | var BestBlockSize,BlockSize,CountBlocks,CountIndices,Index,BlockPosition,Bytes,BestBytes,Bits,BestBits,EntryBytes,IndicesEntryBytes,BestIndicesEntryBytes,i,j,k:longint; 213 | Block:TBlock; 214 | Blocks:TBlocks; 215 | Indices:TIndices; 216 | BestBlocks:TBlocks; 217 | BestIndices:TIndices; 218 | OK:boolean; 219 | s:TPUCURawByteString; 220 | begin 221 | if Level<2 then begin 222 | Block:=nil; 223 | Blocks:=nil; 224 | Indices:=nil; 225 | BestBlocks:=nil; 226 | BestIndices:=nil; 227 | try 228 | BestBlockSize:=length(Table)*2; 229 | BestBits:=24; 230 | BlockSize:=1; 231 | Bits:=0; 232 | BestBytes:=-1; 233 | i:=0; 234 | OK:=true; 235 | for Index:=0 to length(Table)-1 do begin 236 | j:=Table[Index]; 237 | if j<0 then begin 238 | OK:=false; 239 | end; 240 | j:=abs(j); 241 | if i<j then begin 242 | i:=j; 243 | end; 244 | end; 245 | if OK then begin 246 | if i<256 then begin 247 | EntryBytes:=1; 248 | s:='byte'; 249 | end else if i<65536 then begin 250 | EntryBytes:=2; 251 | s:='word'; 252 | end else begin 253 | EntryBytes:=4; 254 | s:='longword'; 255 | end; 256 | end else begin 257 | if i<128 then begin 258 | EntryBytes:=1; 259 | s:='shortint'; 260 | end else if i<32768 then begin 261 | EntryBytes:=2; 262 | s:='smallint'; 263 | end else begin 264 | EntryBytes:=4; 265 | s:='longint'; 266 | end; 267 | end; 268 | BestIndicesEntryBytes:=4; 269 | while BlockSize<length(Table) do begin 270 | SetLength(Block,BlockSize); 271 | SetLength(Blocks,(length(Table) div BlockSize)+1); 272 | FillChar(Block[0],BlockSize,#$ff); 273 | BlockPosition:=0; 274 | CountBlocks:=0; 275 | CountIndices:=0; 276 | for Index:=0 to length(Table)-1 do begin 277 | Block[BlockPosition]:=Table[Index]; 278 | inc(BlockPosition); 279 | if BlockPosition=BlockSize then begin 280 | k:=-1; 281 | for i:=0 to CountBlocks-1 do begin 282 | OK:=true; 283 | for j:=0 to BlockSize-1 do begin 284 | if Blocks[i,j]<>Block[j] then begin 285 | OK:=false; 286 | break; 287 | end; 288 | end; 289 | if OK then begin 290 | k:=i; 291 | break; 292 | end; 293 | end; 294 | if k<0 then begin 295 | k:=CountBlocks; 296 | Blocks[CountBlocks]:=copy(Block); 297 | inc(CountBlocks); 298 | end; 299 | if (CountIndices+1)>=length(Indices) then begin 300 | i:=1; 301 | j:=CountIndices+1; 302 | while i<=j do begin 303 | inc(i,i); 304 | end; 305 | SetLength(Indices,i); 306 | end; 307 | Indices[CountIndices]:=k; 308 | inc(CountIndices); 309 | BlockPosition:=0; 310 | end; 311 | end; 312 | if CountBlocks<256 then begin 313 | IndicesEntryBytes:=1; 314 | end else if CountBlocks<65536 then begin 315 | IndicesEntryBytes:=2; 316 | end else begin 317 | IndicesEntryBytes:=4; 318 | end; 319 | Bytes:=((CountBlocks*BlockSize)*EntryBytes)+(CountIndices*IndicesEntryBytes); 320 | if (BestBytes<0) or (Bytes<=BestBytes) then begin 321 | BestBytes:=Bytes; 322 | BestBlockSize:=BlockSize; 323 | BestBits:=Bits; 324 | BestIndicesEntryBytes:=EntryBytes; 325 | BestBlocks:=copy(Blocks,0,CountBlocks); 326 | BestIndices:=copy(Indices,0,CountIndices); 327 | end; 328 | SetLength(Blocks,0); 329 | SetLength(Indices,0); 330 | inc(BlockSize,BlockSize); 331 | inc(Bits); 332 | end; 333 | OutputList.Add('// '+Name+': '+IntToStr(BestBytes)+' bytes, '+IntToStr(length(BestBlocks))+' blocks with '+IntToStr(BestBlockSize)+' items per '+IntToStr(EntryBytes)+' bytes and '+IntToStr(length(BestIndices))+' indices per '+IntToStr(BestIndicesEntryBytes)+' bytes'); 334 | OutputList.Add('const '+Name+'BlockBits='+IntToStr(BestBits)+';'); 335 | OutputList.Add(' '+Name+'BlockMask='+IntToStr((1 shl BestBits)-1)+';'); 336 | OutputList.Add(' '+Name+'BlockSize='+IntToStr(BestBlockSize)+';'); 337 | OutputList.Add(' '+Name+'BlockCount='+IntToStr(length(BestBlocks))+';'); 338 | OutputList.Add(' '+Name+'BlockData:array[0..'+IntToStr(length(BestBlocks)-1)+',0..'+IntToStr(BestBlockSize-1)+'] of '+s+'=('); 339 | s:=''; 340 | for i:=0 to length(BestBlocks)-1 do begin 341 | s:=s+'('; 342 | for j:=0 to BestBlockSize-1 do begin 343 | s:=s+IntToStr(BestBlocks[i,j]); 344 | if (j+1)<BestBlockSize then begin 345 | s:=s+','; 346 | end; 347 | if length(s)>80 then begin 348 | OutputList.Add(s); 349 | s:=''; 350 | end; 351 | end; 352 | s:=s+')'; 353 | if (i+1)<length(BestBlocks) then begin 354 | s:=s+','; 355 | end; 356 | OutputList.Add(s); 357 | s:=''; 358 | end; 359 | if length(s)>0 then begin 360 | OutputList.Add(s); 361 | s:=''; 362 | end; 363 | OutputList.Add(');'); 364 | if Level=1 then begin 365 | case BestIndicesEntryBytes of 366 | 1:begin 367 | s:='byte'; 368 | end; 369 | 2:begin 370 | s:='word'; 371 | end; 372 | else begin 373 | s:='longword'; 374 | end; 375 | end; 376 | OutputList.Add(' '+Name+'IndexCount='+IntToStr(length(BestBlocks))+';'); 377 | OutputList.Add(' '+Name+'IndexData:array[0..'+IntToStr(length(BestIndices)-1)+'] of '+s+'=('); 378 | s:=''; 379 | for i:=0 to length(BestIndices)-1 do begin 380 | s:=s+IntToStr(BestIndices[i]); 381 | if (i+1)<length(BestIndices) then begin 382 | s:=s+','; 383 | end; 384 | if length(s)>80 then begin 385 | OutputList.Add(s); 386 | s:=''; 387 | end; 388 | end; 389 | if length(s)>0 then begin 390 | OutputList.Add(s); 391 | s:=''; 392 | end; 393 | OutputList.Add(');'); 394 | OutputList.Add(''); 395 | end else begin 396 | OutputList.Add(''); 397 | PackTable(BestIndices,Level+1,Name+'Index'); 398 | end; 399 | finally 400 | SetLength(Block,0); 401 | SetLength(Blocks,0); 402 | SetLength(Indices,0); 403 | SetLength(BestBlocks,0); 404 | SetLength(BestIndices,0); 405 | end; 406 | end; 407 | end; 408 | 409 | procedure WriteTable(const Table:array of longint;Level:integer;const Name:TPUCURawByteString); 410 | var Index,EntryBytes,i,j,k:longint; 411 | OK:boolean; 412 | s:TPUCURawByteString; 413 | begin 414 | i:=0; 415 | OK:=true; 416 | for Index:=0 to length(Table)-1 do begin 417 | j:=Table[Index]; 418 | if j<0 then begin 419 | OK:=false; 420 | end; 421 | j:=abs(j); 422 | if i<j then begin 423 | i:=j; 424 | end; 425 | end; 426 | if OK then begin 427 | if i<256 then begin 428 | EntryBytes:=1; 429 | s:='byte'; 430 | end else if i<65536 then begin 431 | EntryBytes:=2; 432 | s:='word'; 433 | end else begin 434 | EntryBytes:=4; 435 | s:='longword'; 436 | end; 437 | end else begin 438 | if i<128 then begin 439 | EntryBytes:=1; 440 | s:='shortint'; 441 | end else if i<32768 then begin 442 | EntryBytes:=2; 443 | s:='smallint'; 444 | end else begin 445 | EntryBytes:=4; 446 | s:='longint'; 447 | end; 448 | end; 449 | OutputList.Add('const '+Name+'Data:array[0..'+IntToStr(length(Table)-1)+'] of '+s+'=('); 450 | s:=''; 451 | for j:=0 to length(Table)-1 do begin 452 | s:=s+IntToStr(Table[j]); 453 | if (j+1)<length(Table) then begin 454 | s:=s+','; 455 | end; 456 | if length(s)>80 then begin 457 | OutputList.Add(s); 458 | s:=''; 459 | end; 460 | end; 461 | s:=s+');'; 462 | OutputList.Add(s); 463 | end; 464 | 465 | procedure ParseBlocks; 466 | type TPUCUUnicodeBlock=record 467 | Name:TPUCURawByteString; 468 | FromChar,ToChar:longword; 469 | end; 470 | var List:TStringList; 471 | i,j,k,FromChar,ToChar,Count:longint; 472 | s,p:TPUCURawByteString; 473 | Blocks:array of TPUCUUnicodeBlock; 474 | begin 475 | Blocks:=nil; 476 | try 477 | Count:=0; 478 | OutputList.Add('type TPUCUUnicodeBlock=record'); 479 | OutputList.Add(' Name:TPUCURawByteString;'); 480 | OutputList.Add(' FromChar,ToChar:longword;'); 481 | OutputList.Add(' end;'); 482 | List:=TStringList.Create; 483 | try 484 | List.LoadFromFile(IncludeTrailingPathDelimiter('UnicodeData')+'Blocks.txt'); 485 | for i:=0 to List.Count-1 do begin 486 | s:=trim(List[i]); 487 | if (length(s)=0) or ((length(s)>0) and (s[1]='#')) then begin 488 | continue; 489 | end; 490 | j:=pos('#',s); 491 | if j>0 then begin 492 | s:=trim(copy(s,1,j-1)); 493 | end; 494 | j:=pos(';',s); 495 | if j=0 then begin 496 | continue; 497 | end; 498 | p:=trim(copy(s,j+1,length(s)-j)); 499 | s:=trim(copy(s,1,j-1)); 500 | j:=pos('..',s); 501 | if j=0 then begin 502 | FromChar:=StrToInt('$'+trim(s)); 503 | ToChar:=FromChar; 504 | end else begin 505 | FromChar:=StrToInt('$'+trim(copy(s,1,j-1))); 506 | ToChar:=StrToInt('$'+trim(copy(s,j+2,length(s)-(j+1)))); 507 | end; 508 | if (Count+1)>=length(Blocks) then begin 509 | j:=1; 510 | k:=Count+1; 511 | while j<=k do begin 512 | inc(j,j); 513 | end; 514 | SetLength(Blocks,j); 515 | end; 516 | Blocks[Count].Name:=p; 517 | Blocks[Count].FromChar:=FromChar; 518 | Blocks[Count].ToChar:=ToChar; 519 | inc(Count); 520 | end; 521 | SetLength(Blocks,Count); 522 | finally 523 | List.Free; 524 | end; 525 | OutputList.Add('const PUCUUnicodeBlockCount='+IntToStr(Count)+';'); 526 | OutputList.Add(' PUCUUnicodeBlocks:array[0..'+IntToStr(Count-1)+'] of TPUCUUnicodeBlock=('); 527 | for i:=0 to Count-1 do begin 528 | if (i+1)<Count then begin 529 | OutputList.Add(' (Name:'''+Blocks[i].Name+''';FromChar:'+inttostr(Blocks[i].FromChar)+';ToChar:'+inttostr(Blocks[i].ToChar)+'),'); 530 | end else begin 531 | OutputList.Add(' (Name:'''+Blocks[i].Name+''';FromChar:'+inttostr(Blocks[i].FromChar)+';ToChar:'+inttostr(Blocks[i].ToChar)+'));'); 532 | end; 533 | end; 534 | if Count=0 then begin 535 | OutputList.Add(');'); 536 | end; 537 | OutputList.Add(''); 538 | finally 539 | SetLength(Blocks,0); 540 | end; 541 | end; 542 | 543 | procedure ParseDerivedGeneralCategory; 544 | var List:TStringList; 545 | i,j,ci,FromChar,ToChar,CurrentChar:longint; 546 | s,p:TPUCURawByteString; 547 | begin 548 | List:=TStringList.Create; 549 | try 550 | List.LoadFromFile(IncludeTrailingPathDelimiter('UnicodeData')+'DerivedGeneralCategory.txt'); 551 | for i:=0 to List.Count-1 do begin 552 | s:=trim(List[i]); 553 | if (length(s)=0) or ((length(s)>0) and (s[1]='#')) then begin 554 | continue; 555 | end; 556 | j:=pos('#',s); 557 | if j>0 then begin 558 | s:=trim(copy(s,1,j-1)); 559 | end; 560 | j:=pos(';',s); 561 | if j=0 then begin 562 | continue; 563 | end; 564 | p:=trim(copy(s,j+1,length(s)-j)); 565 | ci:=PUCUCategories.IndexOf(p); 566 | if ci<0 then begin 567 | ci:=PUCUCategories.Add(p); 568 | end; 569 | s:=trim(copy(s,1,j-1)); 570 | j:=pos('..',s); 571 | if j=0 then begin 572 | CurrentChar:=StrToInt('$'+trim(s)); 573 | PUCUUnicodeCategories[CurrentChar]:=ci; 574 | end else begin 575 | FromChar:=StrToInt('$'+trim(copy(s,1,j-1))); 576 | ToChar:=StrToInt('$'+trim(copy(s,j+2,length(s)-(j+1)))); 577 | for CurrentChar:=FromChar to ToChar do begin 578 | PUCUUnicodeCategories[CurrentChar]:=ci; 579 | end; 580 | end; 581 | end; 582 | finally 583 | List.Free; 584 | end; 585 | end; 586 | 587 | procedure ParseScripts; 588 | var List:TStringList; 589 | i,j,si,FromChar,ToChar,CurrentChar:longint; 590 | s,p:TPUCURawByteString; 591 | begin 592 | List:=TStringList.Create; 593 | try 594 | List.LoadFromFile(IncludeTrailingPathDelimiter('UnicodeData')+'Scripts.txt'); 595 | for i:=0 to List.Count-1 do begin 596 | s:=trim(List[i]); 597 | if (length(s)=0) or ((length(s)>0) and (s[1]='#')) then begin 598 | continue; 599 | end; 600 | j:=pos('#',s); 601 | if j>0 then begin 602 | s:=trim(copy(s,1,j-1)); 603 | end; 604 | j:=pos(';',s); 605 | if j=0 then begin 606 | continue; 607 | end; 608 | p:=trim(copy(s,j+1,length(s)-j)); 609 | si:=PUCUScripts.IndexOf(p); 610 | if si<0 then begin 611 | si:=PUCUScripts.Add(p); 612 | end; 613 | s:=trim(copy(s,1,j-1)); 614 | j:=pos('..',s); 615 | if j=0 then begin 616 | CurrentChar:=StrToInt('$'+trim(s)); 617 | PUCUUnicodeScripts[CurrentChar]:=si; 618 | end else begin 619 | FromChar:=StrToInt('$'+trim(copy(s,1,j-1))); 620 | ToChar:=StrToInt('$'+trim(copy(s,j+2,length(s)-(j+1)))); 621 | for CurrentChar:=FromChar to ToChar do begin 622 | PUCUUnicodeScripts[CurrentChar]:=si; 623 | end; 624 | end; 625 | end; 626 | finally 627 | List.Free; 628 | end; 629 | end; 630 | 631 | procedure ParseUnicodeData; 632 | var List:TStringList; 633 | i,j,k,ci,OtherChar,CurrentChar:longint; 634 | s,cs:TPUCURawByteString; 635 | cdmi:PPUCUUnicodeCharacterDecompositionMappingItem; 636 | begin 637 | List:=TStringList.Create; 638 | try 639 | List.LoadFromFile(IncludeTrailingPathDelimiter('UnicodeData')+'UnicodeData.txt'); 640 | for i:=ord('a') to ord('z') do begin 641 | PUCUUnicodeUpperCaseDeltas[i]:=longint(ord('A')-ord('a')); 642 | end; 643 | for i:=ord('A') to ord('Z') do begin 644 | PUCUUnicodeLowerCaseDeltas[i]:=ord('a')-ord('A'); 645 | end; 646 | for i:=$ff21 to $ff3a do begin 647 | PUCUUnicodeLowerCaseDeltas[i]:=$ff41-$ff21; 648 | end; 649 | for i:=$ff41 to $ff5a do begin 650 | PUCUUnicodeUpperCaseDeltas[i]:=longint($ff21-$ff41); 651 | end; 652 | for i:=0 to List.Count-1 do begin 653 | s:=trim(List[i]); 654 | if (length(s)=0) or ((length(s)>0) and (s[1]='#')) then begin 655 | continue; 656 | end; 657 | j:=pos('#',s); 658 | if j>0 then begin 659 | s:=trim(copy(s,1,j-1)); 660 | end; 661 | j:=pos(';',s); 662 | if j=0 then begin 663 | continue; 664 | end; 665 | CurrentChar:=StrToInt('$'+GetUntilSplitter(';',s)); // Code 666 | GetUntilSplitter(';',s); // Name 667 | begin 668 | cs:=GetUntilSplitter(';',s); // Class 669 | ci:=PUCUCategories.IndexOf(cs); 670 | if ci<0 then begin 671 | ci:=PUCUCategories.Add(cs); 672 | end; 673 | if PUCUUnicodeCategories[CurrentChar]<>ci then begin 674 | writeln(ErrOutput,CurrentChar,' has multiple categories?'); 675 | PUCUUnicodeCategories[CurrentChar]:=ci; 676 | end; 677 | end; 678 | begin 679 | PUCUUnicodeCanonicalCombiningClasses[CurrentChar]:=StrToIntDef(GetUntilSplitter(';',s),0); // Canonical Combining Class 680 | end; 681 | GetUntilSplitter(';',s); // Bidirectional Category 682 | begin 683 | cs:=GetUntilSplitter(';',s); // Character Decomposition Mapping 684 | cdmi:=@PUCUUnicodeCharacterDecompositionMappingItems[CurrentChar]; 685 | if length(cs)>0 then begin 686 | if pos('<',cs)>0 then begin 687 | GetUntilSplitter('<',cs); 688 | cdmi^.Type_:=GetUntilSplitter('>',cs); 689 | end else begin 690 | cdmi^.Type_:='canonical'; 691 | end; 692 | cs:=trim(cs); 693 | cdmi^.Mapping:=nil; 694 | j:=0; 695 | try 696 | while length(cs)>0 do begin 697 | if length(cdmi^.Mapping)<(j+1) then begin 698 | SetLength(cdmi^.Mapping,(j+1)*2); 699 | end; 700 | cdmi^.Mapping[j]:=StrToIntDef('$'+GetUntilSplitter(' ',cs),0); 701 | inc(j); 702 | end; 703 | finally 704 | SetLength(cdmi^.Mapping,j); 705 | end; 706 | end else begin 707 | cdmi^.Type_:='none'; 708 | cdmi^.Mapping:=nil; 709 | end; 710 | end; 711 | GetUntilSplitter(';',s); // Decimal digit value 712 | GetUntilSplitter(';',s); // Digit value 713 | GetUntilSplitter(';',s); // Numeric value 714 | GetUntilSplitter(';',s); // Mirrored 715 | GetUntilSplitter(';',s); // Unicode 1.0 Name 716 | GetUntilSplitter(';',s); // 10646 comment field 717 | begin 718 | OtherChar:=StrToIntDef('$'+GetUntilSplitter(';',s),-1); // UpperChar Code 719 | if (OtherChar>=0) and (OtherChar<>CurrentChar) then begin 720 | PUCUUnicodeUpperCaseDeltas[CurrentChar]:=OtherChar-CurrentChar; 721 | end; 722 | end; 723 | begin 724 | OtherChar:=StrToIntDef('$'+GetUntilSplitter(';',s),-1); // LowerChar Code 725 | if (OtherChar>=0) and (OtherChar<>CurrentChar) then begin 726 | PUCUUnicodeLowerCaseDeltas[CurrentChar]:=OtherChar-CurrentChar; 727 | end; 728 | end; 729 | begin 730 | OtherChar:=StrToIntDef('$'+GetUntilSplitter(';',s),-1); // TitleChar Code 731 | if (OtherChar>=0) and (OtherChar<>CurrentChar) then begin 732 | PUCUUnicodeTitleCaseDeltas[CurrentChar]:=OtherChar-CurrentChar; 733 | end; 734 | end; 735 | end; 736 | finally 737 | List.Free; 738 | end; 739 | List:=TStringList.Create; 740 | try 741 | List.LoadFromFile(IncludeTrailingPathDelimiter('UnicodeData')+'CompositionExclusions.txt'); 742 | for i:=0 to List.Count-1 do begin 743 | s:=trim(List[i]); 744 | if (length(s)=0) or ((length(s)>0) and (s[1]='#')) then begin 745 | continue; 746 | end; 747 | j:=pos('#',s); 748 | if j>0 then begin 749 | s:=trim(copy(s,1,j-1)); 750 | end; 751 | s:=trim(s); 752 | j:=pos(' ',s); 753 | if j>0 then begin 754 | s:=trim(copy(s,1,j-1)); 755 | end; 756 | CurrentChar:=StrToInt('$'+GetUntilSplitter(' ',s)); // Code 757 | PUCUUnicodeCompositionExclusions[CurrentChar shr 5]:=PUCUUnicodeCompositionExclusions[CurrentChar shr 5] or (longword(1) shl (CurrentChar and 31)); 758 | end; 759 | finally 760 | List.Free; 761 | end; 762 | end; 763 | 764 | function RecursiveDecomposition(const aCodePoint:longword):TPUCUCodePoints; 765 | var Index,Len,NewLen:longint; 766 | CodePoints:TPUCUCodePoints; 767 | begin 768 | result:=nil; 769 | if (aCodePoint<CountUnicodeChars) and 770 | (PUCUUnicodeCharacterDecompositionMappingItems[aCodePoint].Type_='canonical') then begin 771 | Len:=0; 772 | try 773 | for Index:=0 to length(PUCUUnicodeCharacterDecompositionMappingItems[aCodePoint].Mapping)-1 do begin 774 | CodePoints:=nil; 775 | try 776 | CodePoints:=RecursiveDecomposition(PUCUUnicodeCharacterDecompositionMappingItems[aCodePoint].Mapping[Index]); 777 | if length(CodePoints)>0 then begin 778 | NewLen:=Len+length(CodePoints); 779 | if length(result)<NewLen then begin 780 | SetLength(result,NewLen shl 1); 781 | end; 782 | Move(CodePoints[0],result[Len],length(CodePoints)*SizeOf(longword)); 783 | Len:=NewLen; 784 | end; 785 | finally 786 | CodePoints:=nil; 787 | end; 788 | end; 789 | finally 790 | SetLength(result,Len); 791 | end; 792 | end else begin 793 | SetLength(result,1); 794 | result[0]:=aCodePoint; 795 | end; 796 | end; 797 | 798 | procedure ResolveRecursiveDecompositions; 799 | var CodePoint:longword; 800 | Count,Index,SequenceLen,StartLen,FoundIndex,SubIndex,SubSubIndex,NewLen:longint; 801 | PUCUUnicodeCharacterDecompositionMapItem:TPUCUUnicodeCharacterDecompositionMapItem; 802 | Decomposition:TPUCUCodePoints; 803 | begin 804 | Count:=0; 805 | for CodePoint:=0 to MaxUnicodeChar do begin 806 | if PUCUUnicodeCharacterDecompositionMappingItems[CodePoint].Type_='canonical' then begin 807 | if length(PUCUUnicodeCharacterDecompositionMap)<(Count+1) then begin 808 | SetLength(PUCUUnicodeCharacterDecompositionMap,(Count+1)*2); 809 | end; 810 | PUCUUnicodeCharacterDecompositionMap[Count].CodePoint:=CodePoint; 811 | PUCUUnicodeCharacterDecompositionMap[Count].Decomposition:=RecursiveDecomposition(CodePoint); 812 | inc(Count); 813 | end; 814 | end; 815 | Index:=0; 816 | while (Index+1)<Count do begin 817 | if PUCUUnicodeCanonicalCombiningClasses[PUCUUnicodeCharacterDecompositionMap[Index].CodePoint]>PUCUUnicodeCanonicalCombiningClasses[PUCUUnicodeCharacterDecompositionMap[Index+1].CodePoint] then begin 818 | PUCUUnicodeCharacterDecompositionMapItem:=PUCUUnicodeCharacterDecompositionMap[Index]; 819 | PUCUUnicodeCharacterDecompositionMap[Index]:=PUCUUnicodeCharacterDecompositionMap[Index+1]; 820 | PUCUUnicodeCharacterDecompositionMap[Index+1]:=PUCUUnicodeCharacterDecompositionMapItem; 821 | if Index>0 then begin 822 | dec(Index); 823 | end else begin 824 | inc(Index); 825 | end; 826 | end else begin 827 | inc(Index); 828 | end; 829 | end; 830 | PUCUUnicodeDecompositionSequences:=nil; 831 | SequenceLen:=1; 832 | try 833 | SetLength(PUCUUnicodeDecompositionSequences,1); 834 | PUCUUnicodeDecompositionSequences[0]:=0; 835 | StartLen:=0; 836 | for Index:=0 to Count-1 do begin 837 | Decomposition:=PUCUUnicodeCharacterDecompositionMap[Index].Decomposition; 838 | FoundIndex:=-1; 839 | for SubIndex:=0 to SequenceLen-length(Decomposition) do begin 840 | Count:=0; 841 | for SubSubIndex:=0 to length(Decomposition)-1 do begin 842 | if Decomposition[SubSubIndex]<>PUCUUnicodeDecompositionSequences[SubIndex+SubSubIndex] then begin 843 | break; 844 | end; 845 | inc(Count); 846 | end; 847 | if Count=length(Decomposition) then begin 848 | FoundIndex:=SubIndex; 849 | break; 850 | end; 851 | end; 852 | if FoundIndex<0 then begin 853 | FoundIndex:=SequenceLen; 854 | NewLen:=SequenceLen+length(Decomposition); 855 | if length(PUCUUnicodeDecompositionSequences)<NewLen then begin 856 | SetLength(PUCUUnicodeDecompositionSequences,NewLen*2); 857 | end; 858 | Move(Decomposition[0],PUCUUnicodeDecompositionSequences[SequenceLen],length(Decomposition)*SizeOf(longword)); 859 | SequenceLen:=NewLen; 860 | end; 861 | PUCUUnicodeDecompositionStarts[PUCUUnicodeCharacterDecompositionMap[Index].CodePoint]:=FoundIndex or ((length(Decomposition)-1) shl 14); 862 | end; 863 | finally 864 | SetLength(PUCUUnicodeDecompositionSequences,SequenceLen); 865 | end; 866 | end; 867 | 868 | procedure ResolveCompositions; 869 | const HashTableBits=10; 870 | HashTableSize=1 shl HashTableBits; 871 | HashTableMask=HashTableSize-1; 872 | {function Hash(const s:TPUCUCodePoints):longword; 873 | begin 874 | result:=(s[0]*92821) xor (s[1]*486187739); 875 | end;} 876 | function Hash(const s:TPUCUCodePoints):longword; 877 | //var Key:uint64; 878 | begin 879 | result:=(s[0]*98303927) xor (s[1]*24710753); 880 | //result:=(s[0]*12582917) xor (s[1]*25165843); 881 | { Key:=(uint64(s[0]) shl 32) or s[1]; 882 | Key:=(not Key)+(Key shl 18); 883 | Key:=(Key xor (Key shr 31))*21; 884 | Key:=Key xor (Key shr 11); 885 | Key:=Key+(Key shl 6); 886 | result:=longword(Key xor (Key shr 22));} 887 | end; 888 | var CodePoint:longword; 889 | Count,Index,SequenceLen,StartLen,FoundIndex,SubIndex,SubSubIndex,NewLen,HashIndex:longint; 890 | PUCUUnicodeCharacterCompositionMapItem:TPUCUUnicodeCharacterCompositionMapItem; 891 | HashTable,HashTableLength:array of longint; 892 | begin 893 | Count:=0; 894 | try 895 | for Index:=0 to length(PUCUUnicodeCharacterDecompositionMap)-1 do begin 896 | CodePoint:=PUCUUnicodeCharacterDecompositionMap[Index].CodePoint; 897 | if ((PUCUUnicodeCompositionExclusions[CodePoint shr 5] and (longword(1) shl (CodePoint and 31)))=0) and 898 | (length(PUCUUnicodeCharacterDecompositionMap[Index].Decomposition)=2) and 899 | (PUCUUnicodeCharacterDecompositionMappingItems[CodePoint].Type_='canonical') and 900 | (PUCUUnicodeCanonicalCombiningClasses[CodePoint]=0) and 901 | (PUCUUnicodeCanonicalCombiningClasses[PUCUUnicodeCharacterDecompositionMappingItems[CodePoint].Mapping[0]]=0) then begin 902 | if length(PUCUUnicodeCharacterCompositionMap)<(Count+1) then begin 903 | SetLength(PUCUUnicodeCharacterCompositionMap,(Count+1)*2); 904 | end; 905 | PUCUUnicodeCharacterCompositionMap[Count].Composition:=PUCUUnicodeCharacterDecompositionMap[Index].Decomposition; 906 | PUCUUnicodeCharacterCompositionMap[Count].CodePoint:=CodePoint; 907 | PUCUUnicodeCharacterCompositionMap[Count].HashValue:=Hash(PUCUUnicodeCharacterDecompositionMap[Index].Decomposition); 908 | PUCUUnicodeCharacterCompositionMap[Count].Next:=-1; 909 | inc(Count); 910 | end; 911 | end; 912 | finally 913 | SetLength(PUCUUnicodeCharacterCompositionMap,Count); 914 | end; 915 | Index:=0; 916 | while (Index+1)<Count do begin 917 | if (PUCUUnicodeCharacterCompositionMap[Index].Composition[0]>PUCUUnicodeCharacterCompositionMap[Index+1].Composition[0]) or 918 | ((PUCUUnicodeCharacterCompositionMap[Index].Composition[0]=PUCUUnicodeCharacterCompositionMap[Index+1].Composition[0]) and 919 | (PUCUUnicodeCharacterCompositionMap[Index].Composition[1]>PUCUUnicodeCharacterCompositionMap[Index+1].Composition[1])) then begin 920 | PUCUUnicodeCharacterCompositionMapItem:=PUCUUnicodeCharacterCompositionMap[Index]; 921 | PUCUUnicodeCharacterCompositionMap[Index]:=PUCUUnicodeCharacterCompositionMap[Index+1]; 922 | PUCUUnicodeCharacterCompositionMap[Index+1]:=PUCUUnicodeCharacterCompositionMapItem; 923 | if Index>0 then begin 924 | dec(Index); 925 | end else begin 926 | inc(Index); 927 | end; 928 | end else begin 929 | inc(Index); 930 | end; 931 | end; 932 | HashTable:=nil; 933 | HashTableLength:=nil; 934 | try 935 | SetLength(HashTable,HashTableSize); 936 | SetLength(HashTableLength,HashTableSize); 937 | for Index:=0 to HashTableSize-1 do begin 938 | HashTable[Index]:=-1; 939 | HashTableLength[Index]:=0; 940 | end; 941 | for Index:=0 to length(PUCUUnicodeCharacterCompositionMap)-1 do begin 942 | HashIndex:=PUCUUnicodeCharacterCompositionMap[Index].HashValue and HashTableMask; 943 | PUCUUnicodeCharacterCompositionMap[Index].Next:=HashTable[HashIndex]; 944 | HashTable[HashIndex]:=Index; 945 | inc(HashTableLength[HashIndex]); 946 | end; 947 | for Index:=0 to HashTableSize-1 do begin 948 | inc(HashTable[Index]); 949 | end; 950 | OutputList.Add('const PUCUUnicodeCharacterCompositionHashTableBits='+IntToStr(HashTableBits)+';'); 951 | OutputList.Add(' PUCUUnicodeCharacterCompositionHashTableSize='+IntToStr(HashTableSize)+';'); 952 | OutputList.Add(' PUCUUnicodeCharacterCompositionHashTableMask='+IntToStr(HashTableMask)+';'); 953 | WriteTable(HashTable,0,'PUCUUnicodeCharacterCompositionHashTable'); 954 | //WriteTable(HashTableLength,0,'PUCUUnicodeCharacterCompositionHashTableLength'); // for debugging usages 955 | OutputList.Add('type PPUCUUnicodeCharacterCompositionSequence=^TPUCUUnicodeCharacterCompositionSequence;'); 956 | OutputList.Add(' TPUCUUnicodeCharacterCompositionSequence=record'); 957 | OutputList.Add(' Sequence:array[0..1] of longword;'); 958 | OutputList.Add(' CodePoint:longword;'); 959 | case length(PUCUUnicodeCharacterCompositionMap)+1 of 960 | 0..255:begin 961 | OutputList.Add(' Next:byte;'); 962 | end; 963 | 256..65535:begin 964 | OutputList.Add(' Next:word;'); 965 | end; 966 | else begin 967 | OutputList.Add(' Next:longword;'); 968 | end; 969 | end; 970 | OutputList.Add(' end;'); 971 | OutputList.Add('const PUCUUnicodeCharacterCompositionSequenceCount='+IntToStr(length(PUCUUnicodeCharacterCompositionMap)+1)+';'); 972 | OutputList.Add(' PUCUUnicodeCharacterCompositionSequences:array[0..'+IntToStr(length(PUCUUnicodeCharacterCompositionMap))+'] of TPUCUUnicodeCharacterCompositionSequence=('); 973 | if length(PUCUUnicodeCharacterCompositionMap)>0 then begin 974 | OutputList.Add(' (Sequence:(0,0);CodePoint:0;Next:0),'); 975 | for Index:=0 to length(PUCUUnicodeCharacterCompositionMap)-1 do begin 976 | if (Index+1)<length(PUCUUnicodeCharacterCompositionMap) then begin 977 | OutputList.Add(' (Sequence:('+IntToStr(PUCUUnicodeCharacterCompositionMap[Index].Composition[0])+','+IntToStr(PUCUUnicodeCharacterCompositionMap[Index].Composition[1])+');CodePoint:'+IntToStr(PUCUUnicodeCharacterCompositionMap[Index].CodePoint)+';Next:'+IntToStr(PUCUUnicodeCharacterCompositionMap[Index].Next+1)+'),'); 978 | end else begin 979 | OutputList.Add(' (Sequence:('+IntToStr(PUCUUnicodeCharacterCompositionMap[Index].Composition[0])+','+IntToStr(PUCUUnicodeCharacterCompositionMap[Index].Composition[1])+');CodePoint:'+IntToStr(PUCUUnicodeCharacterCompositionMap[Index].CodePoint)+';Next:'+IntToStr(PUCUUnicodeCharacterCompositionMap[Index].Next+1)+')'); 980 | end; 981 | end; 982 | end else begin 983 | OutputList.Add(' (Sequence:(0,0);CodePoint:0)'); 984 | end; 985 | OutputList.Add(' );'); 986 | finally 987 | HashTable:=nil; 988 | end; 989 | end; 990 | 991 | var i:longint; 992 | begin 993 | FillChar(PUCUUnicodeCategories,sizeof(TPUCUUnicodeDWords),#0); 994 | FillChar(PUCUUnicodeScripts,sizeof(TPUCUUnicodeDWords),#$0); 995 | FillChar(PUCUUnicodeCanonicalCombiningClasses,sizeof(TPUCUUnicodeDWords),#$0); 996 | FillChar(PUCUUnicodeUpperCaseDeltas,sizeof(TPUCUUnicodeDWords),#$0); 997 | FillChar(PUCUUnicodeLowerCaseDeltas,sizeof(TPUCUUnicodeDWords),#$0); 998 | FillChar(PUCUUnicodeTitleCaseDeltas,sizeof(TPUCUUnicodeDWords),#$0); 999 | FillChar(PUCUUnicodeCompositionExclusions,sizeof(TPUCUUnicodeCompositionExclusions),#$0); 1000 | FillChar(PUCUUnicodeCharacterDecompositionMappingItems,sizeof(TPUCUUnicodeCharacterDecompositionMappingItems),#$0); 1001 | FillChar(PUCUUnicodeDecompositionStarts,sizeof(TPUCUUnicodeDWords),#$0); 1002 | PUCUUnicodeCharacterDecompositionMap:=nil; 1003 | PUCUUnicodeCharacterCompositionMap:=nil; 1004 | PUCUUnicodeDecompositionSequences:=nil; 1005 | try 1006 | OutputList:=TStringList.Create; 1007 | try 1008 | PUCUCategories:=TStringList.Create; 1009 | PUCUCategories.Add('Cn'); 1010 | try 1011 | PUCUScripts:=TStringList.Create; 1012 | PUCUScripts.Add('Unknown'); 1013 | PUCUScripts.Add('Common'); 1014 | try 1015 | ParseDerivedGeneralCategory; 1016 | ParseScripts; 1017 | ParseUnicodeData; 1018 | OutputList.Add('unit PUCUUnicodePass1;'); 1019 | OutputList.Add('{$ifdef fpc}'); 1020 | OutputList.Add(' {$mode delphi}'); 1021 | OutputList.Add('{$endif}'); 1022 | OutputList.Add('interface'); 1023 | OutputList.Add(''); 1024 | OutputList.Add('type TPUCURawByteString={$ifdef HAS_TYPE_RAWBYTESTRING}RawByteString{$else}AnsiString{$endif};'); 1025 | OutputList.Add('type TPUCURawByteChar=AnsiChar;'); 1026 | OutputList.Add(''); 1027 | ParseBlocks; 1028 | begin 1029 | OutputList.Add('const PUCUUnicodeCategoryIDs:array[0..'+IntToStr(PUCUCategories.Count-1)+'] of TPUCURawByteString=('); 1030 | for i:=0 to PUCUCategories.Count-1 do begin 1031 | if (i+1)<PUCUCategories.Count then begin 1032 | OutputList.Add(''''+PUCUCategories[i]+''','); 1033 | end else begin 1034 | OutputList.Add(''''+PUCUCategories[i]+''''); 1035 | end; 1036 | end; 1037 | OutputList.Add(');'); 1038 | for i:=0 to PUCUCategories.Count-1 do begin 1039 | OutputList.Add(' PUCUUnicodeCategory'+PUCUCategories[i]+'='+IntToStr(i)+';'); 1040 | end; 1041 | OutputList.Add(' PUCUUnicodeCategoryCount='+IntToStr(PUCUCategories.Count)+';'); 1042 | OutputList.Add(' PUCU_CT_UNASSIGNED=PUCUUnicodeCategoryCn;'); 1043 | OutputList.Add(' PUCU_CT_UPPERCASE_LETTER=PUCUUnicodeCategoryLu;'); 1044 | OutputList.Add(' PUCU_CT_LOWERCASE_LETTER=PUCUUnicodeCategoryLl;'); 1045 | OutputList.Add(' PUCU_CT_TITLECASE_LETTER=PUCUUnicodeCategoryLt;'); 1046 | OutputList.Add(' PUCU_CT_MODIFIER_LETTER=PUCUUnicodeCategoryLm;'); 1047 | OutputList.Add(' PUCU_CT_OTHER_LETTER=PUCUUnicodeCategoryLo;'); 1048 | OutputList.Add(' PUCU_CT_NON_SPACING_MARK=PUCUUnicodeCategoryMn;'); 1049 | OutputList.Add(' PUCU_CT_ENCLOSING_MARK=PUCUUnicodeCategoryMe;'); 1050 | OutputList.Add(' PUCU_CT_COMBINING_SPACING_MARK=PUCUUnicodeCategoryMc;'); 1051 | OutputList.Add(' PUCU_CT_DECIMAL_DIGIT_NUMBER=PUCUUnicodeCategoryNd;'); 1052 | OutputList.Add(' PUCU_CT_LETTER_NUMBER=PUCUUnicodeCategoryNl;'); 1053 | OutputList.Add(' PUCU_CT_OTHER_NUMBER=PUCUUnicodeCategoryNo;'); 1054 | OutputList.Add(' PUCU_CT_SPACE_SEPARATOR=PUCUUnicodeCategoryZs;'); 1055 | OutputList.Add(' PUCU_CT_LINE_SEPARATOR=PUCUUnicodeCategoryZl;'); 1056 | OutputList.Add(' PUCU_CT_PARAGRAPH_SEPARATOR=PUCUUnicodeCategoryZp;'); 1057 | OutputList.Add(' PUCU_CT_CONTROL=PUCUUnicodeCategoryCc;'); 1058 | OutputList.Add(' PUCU_CT_FORMAT=PUCUUnicodeCategoryCf;'); 1059 | OutputList.Add(' PUCU_CT_PRIVATE_USE=PUCUUnicodeCategoryCo;'); 1060 | OutputList.Add(' PUCU_CT_SURROGATE=PUCUUnicodeCategoryCs;'); 1061 | OutputList.Add(' PUCU_CT_DASH_PUNCTUATION=PUCUUnicodeCategoryPd;'); 1062 | OutputList.Add(' PUCU_CT_START_PUNCTUATION=PUCUUnicodeCategoryPs;'); 1063 | OutputList.Add(' PUCU_CT_END_PUNCTUATION=PUCUUnicodeCategoryPe;'); 1064 | OutputList.Add(' PUCU_CT_INITIAL_PUNCTUATION=PUCUUnicodeCategoryPi;'); 1065 | OutputList.Add(' PUCU_CT_FINAL_PUNCTUATION=PUCUUnicodeCategoryPf;'); 1066 | OutputList.Add(' PUCU_CT_CONNECTOR_PUNCTUATION=PUCUUnicodeCategoryPc;'); 1067 | OutputList.Add(' PUCU_CT_OTHER_PUNCTUATION=PUCUUnicodeCategoryPo;'); 1068 | OutputList.Add(' PUCU_CT_MATH_SYMBOL=PUCUUnicodeCategorySm;'); 1069 | OutputList.Add(' PUCU_CT_CURRENCY_SYMBOL=PUCUUnicodeCategorySc;'); 1070 | OutputList.Add(' PUCU_CT_MODIFIER_SYMBOL=PUCUUnicodeCategorySk;'); 1071 | OutputList.Add(' PUCU_CT_OTHER_SYMBOL=PUCUUnicodeCategorySo;'); 1072 | OutputList.Add(''); 1073 | end; 1074 | begin 1075 | OutputList.Add('const PUCUUnicodeScriptIDs:array[0..'+IntToStr(PUCUScripts.Count-1)+'] of TPUCURawByteString=('); 1076 | for i:=0 to PUCUScripts.Count-1 do begin 1077 | if (i+1)<PUCUScripts.Count then begin 1078 | OutputList.Add(''''+PUCUScripts[i]+''','); 1079 | end else begin 1080 | OutputList.Add(''''+PUCUScripts[i]+''''); 1081 | end; 1082 | end; 1083 | OutputList.Add(');'); 1084 | for i:=0 to PUCUScripts.Count-1 do begin 1085 | OutputList.Add(' PUCUUnicodeScript'+PUCUScripts[i]+'='+IntToStr(i)+';'); 1086 | end; 1087 | OutputList.Add(' PUCUUnicodeScriptCount='+IntToStr(PUCUScripts.Count)+';'); 1088 | OutputList.Add(''); 1089 | end; 1090 | ResolveRecursiveDecompositions; 1091 | ResolveCompositions; 1092 | PackTable(PUCUUnicodeCategories,0,'PUCUUnicodeCategoryArray'); 1093 | OutputList.Add(''); 1094 | PackTable(PUCUUnicodeScripts,0,'PUCUUnicodeScriptArray'); 1095 | OutputList.Add(''); 1096 | PackTable(PUCUUnicodeCanonicalCombiningClasses,0,'PUCUUnicodeCanonicalCombiningClassArray'); 1097 | OutputList.Add(''); 1098 | PackTable(PUCUUnicodeDecompositionStarts,0,'PUCUUnicodeDecompositionStartArray'); 1099 | OutputList.Add(''); 1100 | WriteTable(PUCUUnicodeDecompositionSequences,0,'PUCUUnicodeDecompositionSequenceArray'); 1101 | OutputList.Add(''); 1102 | PackTable(PUCUUnicodeUpperCaseDeltas,0,'PUCUUnicodeUpperCaseDeltaArray'); 1103 | OutputList.Add(''); 1104 | PackTable(PUCUUnicodeLowerCaseDeltas,0,'PUCUUnicodeLowerCaseDeltaArray'); 1105 | OutputList.Add(''); 1106 | PackTable(PUCUUnicodeTitleCaseDeltas,0,'PUCUUnicodeTitleCaseDeltaArray'); 1107 | OutputList.Add(''); 1108 | OutputList.Add('implementation'); 1109 | OutputList.Add('end.'); 1110 | OutputList.SaveToFile('PUCUUnicodePass1.pas'); 1111 | finally 1112 | PUCUScripts.Free; 1113 | end; 1114 | finally 1115 | PUCUCategories.Free; 1116 | end; 1117 | finally 1118 | OutputList.Free; 1119 | end; 1120 | finally 1121 | PUCUUnicodeCharacterDecompositionMap:=nil; 1122 | PUCUUnicodeCharacterCompositionMap:=nil; 1123 | PUCUUnicodeDecompositionSequences:=nil; 1124 | end; 1125 | end. 1126 | -------------------------------------------------------------------------------- /src/PUCUConvertUnicode.lpi: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8"?> 2 | <CONFIG> 3 | <ProjectOptions> 4 | <Version Value="12"/> 5 | <PathDelim Value="\"/> 6 | <General> 7 | <Flags> 8 | <MainUnitHasCreateFormStatements Value="False"/> 9 | <MainUnitHasTitleStatement Value="False"/> 10 | <MainUnitHasScaledStatement Value="False"/> 11 | </Flags> 12 | <SessionStorage Value="InProjectDir"/> 13 | <Title Value="PUCUConvertUnicode"/> 14 | <UseAppBundle Value="False"/> 15 | <ResourceType Value="res"/> 16 | </General> 17 | <BuildModes> 18 | <Item Name="Default" Default="True"/> 19 | </BuildModes> 20 | <PublishOptions> 21 | <Version Value="2"/> 22 | <UseFileFilters Value="True"/> 23 | </PublishOptions> 24 | <RunParams> 25 | <FormatVersion Value="2"/> 26 | </RunParams> 27 | <Units> 28 | <Unit> 29 | <Filename Value="PUCUConvertUnicode.dpr"/> 30 | <IsPartOfProject Value="True"/> 31 | </Unit> 32 | </Units> 33 | </ProjectOptions> 34 | <CompilerOptions> 35 | <Version Value="11"/> 36 | <PathDelim Value="\"/> 37 | <Target> 38 | <Filename Value="PUCUConvertUnicode"/> 39 | </Target> 40 | <SearchPaths> 41 | <IncludeFiles Value="$(ProjOutDir)"/> 42 | <UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/> 43 | </SearchPaths> 44 | <Parsing> 45 | <SyntaxOptions> 46 | <SyntaxMode Value="Delphi"/> 47 | </SyntaxOptions> 48 | </Parsing> 49 | <Linking> 50 | <Debugging> 51 | <DebugInfoType Value="dsDwarf3"/> 52 | </Debugging> 53 | </Linking> 54 | </CompilerOptions> 55 | <Debugging> 56 | <Exceptions> 57 | <Item> 58 | <Name Value="EAbort"/> 59 | </Item> 60 | <Item> 61 | <Name Value="ECodetoolError"/> 62 | </Item> 63 | <Item> 64 | <Name Value="EFOpenError"/> 65 | </Item> 66 | </Exceptions> 67 | </Debugging> 68 | </CONFIG> 69 | -------------------------------------------------------------------------------- /src/PUCUDebug.cfg: -------------------------------------------------------------------------------- 1 | -$A8 2 | -$B- 3 | -$C+ 4 | -$D+ 5 | -$E- 6 | -$F- 7 | -$G+ 8 | -$H+ 9 | -$I+ 10 | -$J- 11 | -$K- 12 | -$L+ 13 | -$M- 14 | -$N+ 15 | -$O+ 16 | -$P+ 17 | -$Q- 18 | -$R- 19 | -$S- 20 | -$T- 21 | -$U- 22 | -$V+ 23 | -$W- 24 | -$X+ 25 | -$YD 26 | -$Z1 27 | -GD 28 | -cg 29 | -vn 30 | -AWinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 31 | -H+ 32 | -W+ 33 | -M 34 | -$M16384,1048576 35 | -K$00400000 36 | -LE"c:\program files (x86)\borland\delphi7\Projects\Bpl" 37 | -LN"c:\program files (x86)\borland\delphi7\Projects\Bpl" 38 | -w-UNSAFE_TYPE 39 | -w-UNSAFE_CODE 40 | -w-UNSAFE_CAST 41 | -------------------------------------------------------------------------------- /src/PUCUDebug.dof: -------------------------------------------------------------------------------- 1 | [FileVersion] 2 | Version=7.0 3 | [Compiler] 4 | A=8 5 | B=0 6 | C=1 7 | D=1 8 | E=0 9 | F=0 10 | G=1 11 | H=1 12 | I=1 13 | J=0 14 | K=0 15 | L=1 16 | M=0 17 | N=1 18 | O=1 19 | P=1 20 | Q=0 21 | R=0 22 | S=0 23 | T=0 24 | U=0 25 | V=1 26 | W=0 27 | X=1 28 | Y=1 29 | Z=1 30 | ShowHints=1 31 | ShowWarnings=1 32 | UnitAliases=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 33 | NamespacePrefix= 34 | SymbolDeprecated=1 35 | SymbolLibrary=1 36 | SymbolPlatform=1 37 | UnitLibrary=1 38 | UnitPlatform=1 39 | UnitDeprecated=1 40 | HResultCompat=1 41 | HidingMember=1 42 | HiddenVirtual=1 43 | Garbage=1 44 | BoundsError=1 45 | ZeroNilCompat=1 46 | StringConstTruncated=1 47 | ForLoopVarVarPar=1 48 | TypedConstVarPar=1 49 | AsgToTypedConst=1 50 | CaseLabelRange=1 51 | ForVariable=1 52 | ConstructingAbstract=1 53 | ComparisonFalse=1 54 | ComparisonTrue=1 55 | ComparingSignedUnsigned=1 56 | CombiningSignedUnsigned=1 57 | UnsupportedConstruct=1 58 | FileOpen=1 59 | FileOpenUnitSrc=1 60 | BadGlobalSymbol=1 61 | DuplicateConstructorDestructor=1 62 | InvalidDirective=1 63 | PackageNoLink=1 64 | PackageThreadVar=1 65 | ImplicitImport=1 66 | HPPEMITIgnored=1 67 | NoRetVal=1 68 | UseBeforeDef=1 69 | ForLoopVarUndef=1 70 | UnitNameMismatch=1 71 | NoCFGFileFound=1 72 | MessageDirective=1 73 | ImplicitVariants=1 74 | UnicodeToLocale=1 75 | LocaleToUnicode=1 76 | ImagebaseMultiple=1 77 | SuspiciousTypecast=1 78 | PrivatePropAccessor=1 79 | UnsafeType=0 80 | UnsafeCode=0 81 | UnsafeCast=0 82 | [Linker] 83 | MapFile=3 84 | OutputObjs=0 85 | ConsoleApp=1 86 | DebugInfo=1 87 | RemoteSymbols=0 88 | MinStackSize=16384 89 | MaxStackSize=1048576 90 | ImageBase=4194304 91 | ExeDescription= 92 | [Directories] 93 | OutputDir= 94 | UnitOutputDir= 95 | PackageDLLOutputDir= 96 | PackageDCPOutputDir= 97 | SearchPath= 98 | Packages=vcl;rtl;vclx;indy;vclie;xmlrtl;inetdbbde;inet;inetdbxpress;dbrtl;soaprtl;dsnap;VclSmp;dbexpress;vcldb;dbxcds;inetdb;bdertl;vcldbx;adortl;teeui;teedb;tee;ibxpress;visualclx;visualdbclx;vclactnband;vclshlctrls;IntrawebDB_50_70;Intraweb_50_70;Rave50CLX;Rave50VCL;dclOfficeXP;acntD7_R;JclDeveloperTools;Jcl;JclVcl;JclContainers;JvCore;JvSystem;JvStdCtrls;JvAppFrm;JvBands;JvDB;JvDlgs;JvBDE;JvControls;JvCmp;JvCrypt;JvCustom;JvDocking;JvDotNetCtrls;JvGlobus;JvHMI;JvJans;JvManagedThreads;JvMM;JvNet;JvPageComps;JvPascalInterpreter;JvPluginSystem;JvPrintPreview;JvRuntimeDesign;JvTimeFramework;JvWizards;JvXPCtrls;KHexEditor_D7R 99 | Conditionals= 100 | DebugSourceDirs= 101 | UsePackages=0 102 | [Parameters] 103 | RunParams= 104 | HostApplication= 105 | Launcher= 106 | UseLauncher=0 107 | DebugCWD= 108 | [Language] 109 | ActiveLang= 110 | ProjectLang= 111 | RootDir= 112 | [Version Info] 113 | IncludeVerInfo=0 114 | AutoIncBuild=0 115 | MajorVer=1 116 | MinorVer=0 117 | Release=0 118 | Build=0 119 | Debug=0 120 | PreRelease=0 121 | Special=0 122 | Private=0 123 | DLL=0 124 | Locale=1031 125 | CodePage=1252 126 | [HistoryLists\hlUnitAliases] 127 | Count=1 128 | Item0=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 129 | -------------------------------------------------------------------------------- /src/PUCUDebug.dpr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BeRo1985/pucu/ea0b2a5fc5dbd4669f774442ee2bd88075909618/src/PUCUDebug.dpr -------------------------------------------------------------------------------- /src/PUCUGenCodePages.cfg: -------------------------------------------------------------------------------- 1 | -$A8 2 | -$B- 3 | -$C+ 4 | -$D+ 5 | -$E- 6 | -$F- 7 | -$G+ 8 | -$H+ 9 | -$I+ 10 | -$J+ 11 | -$K- 12 | -$L+ 13 | -$M- 14 | -$N+ 15 | -$O+ 16 | -$P+ 17 | -$Q- 18 | -$R- 19 | -$S- 20 | -$T- 21 | -$U- 22 | -$V+ 23 | -$W- 24 | -$X+ 25 | -$YD 26 | -$Z1 27 | -cg 28 | -AWinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 29 | -H+ 30 | -W+ 31 | -M 32 | -$M16384,1048576 33 | -K$00400000 34 | -LE"c:\program files (x86)\borland\delphi7\Projects\Bpl" 35 | -LN"c:\program files (x86)\borland\delphi7\Projects\Bpl" 36 | -w-UNSAFE_TYPE 37 | -w-UNSAFE_CODE 38 | -w-UNSAFE_CAST 39 | -------------------------------------------------------------------------------- /src/PUCUGenCodePages.dof: -------------------------------------------------------------------------------- 1 | [FileVersion] 2 | Version=7.0 3 | [Compiler] 4 | A=8 5 | B=0 6 | C=1 7 | D=1 8 | E=0 9 | F=0 10 | G=1 11 | H=1 12 | I=1 13 | J=1 14 | K=0 15 | L=1 16 | M=0 17 | N=1 18 | O=1 19 | P=1 20 | Q=0 21 | R=0 22 | S=0 23 | T=0 24 | U=0 25 | V=1 26 | W=0 27 | X=1 28 | Y=1 29 | Z=1 30 | ShowHints=1 31 | ShowWarnings=1 32 | UnitAliases=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 33 | NamespacePrefix= 34 | SymbolDeprecated=1 35 | SymbolLibrary=1 36 | SymbolPlatform=1 37 | UnitLibrary=1 38 | UnitPlatform=1 39 | UnitDeprecated=1 40 | HResultCompat=1 41 | HidingMember=1 42 | HiddenVirtual=1 43 | Garbage=1 44 | BoundsError=1 45 | ZeroNilCompat=1 46 | StringConstTruncated=1 47 | ForLoopVarVarPar=1 48 | TypedConstVarPar=1 49 | AsgToTypedConst=1 50 | CaseLabelRange=1 51 | ForVariable=1 52 | ConstructingAbstract=1 53 | ComparisonFalse=1 54 | ComparisonTrue=1 55 | ComparingSignedUnsigned=1 56 | CombiningSignedUnsigned=1 57 | UnsupportedConstruct=1 58 | FileOpen=1 59 | FileOpenUnitSrc=1 60 | BadGlobalSymbol=1 61 | DuplicateConstructorDestructor=1 62 | InvalidDirective=1 63 | PackageNoLink=1 64 | PackageThreadVar=1 65 | ImplicitImport=1 66 | HPPEMITIgnored=1 67 | NoRetVal=1 68 | UseBeforeDef=1 69 | ForLoopVarUndef=1 70 | UnitNameMismatch=1 71 | NoCFGFileFound=1 72 | MessageDirective=1 73 | ImplicitVariants=1 74 | UnicodeToLocale=1 75 | LocaleToUnicode=1 76 | ImagebaseMultiple=1 77 | SuspiciousTypecast=1 78 | PrivatePropAccessor=1 79 | UnsafeType=0 80 | UnsafeCode=0 81 | UnsafeCast=0 82 | [Linker] 83 | MapFile=0 84 | OutputObjs=0 85 | ConsoleApp=1 86 | DebugInfo=0 87 | RemoteSymbols=0 88 | MinStackSize=16384 89 | MaxStackSize=1048576 90 | ImageBase=4194304 91 | ExeDescription= 92 | [Directories] 93 | OutputDir= 94 | UnitOutputDir= 95 | PackageDLLOutputDir= 96 | PackageDCPOutputDir= 97 | SearchPath= 98 | Packages=vcl;rtl;vclx;indy;vclie;xmlrtl;inetdbbde;inet;inetdbxpress;dbrtl;soaprtl;dsnap;VclSmp;dbexpress;vcldb;dbxcds;inetdb;bdertl;vcldbx;adortl;teeui;teedb;tee;ibxpress;visualclx;visualdbclx;vclactnband;vclshlctrls;IntrawebDB_50_70;Intraweb_50_70;Rave50CLX;Rave50VCL;dclOfficeXP;acntD7_R;JclDeveloperTools;Jcl;JclVcl;JclContainers;JvCore;JvSystem;JvStdCtrls;JvAppFrm;JvBands;JvDB;JvDlgs;JvBDE;JvControls;JvCmp;JvCrypt;JvCustom;JvDocking;JvDotNetCtrls;JvGlobus;JvHMI;JvJans;JvManagedThreads;JvMM;JvNet;JvPageComps;JvPascalInterpreter;JvPluginSystem;JvPrintPreview;JvRuntimeDesign;JvTimeFramework;JvWizards;JvXPCtrls;KHexEditor_D7R 99 | Conditionals= 100 | DebugSourceDirs= 101 | UsePackages=0 102 | [Parameters] 103 | RunParams= 104 | HostApplication= 105 | Launcher= 106 | UseLauncher=0 107 | DebugCWD= 108 | [Language] 109 | ActiveLang= 110 | ProjectLang= 111 | RootDir= 112 | [Version Info] 113 | IncludeVerInfo=0 114 | AutoIncBuild=0 115 | MajorVer=1 116 | MinorVer=0 117 | Release=0 118 | Build=0 119 | Debug=0 120 | PreRelease=0 121 | Special=0 122 | Private=0 123 | DLL=0 124 | Locale=1031 125 | CodePage=1252 126 | [Version Info Keys] 127 | CompanyName= 128 | FileDescription= 129 | FileVersion=1.0.0.0 130 | InternalName= 131 | LegalCopyright= 132 | LegalTrademarks= 133 | OriginalFilename= 134 | ProductName= 135 | ProductVersion=1.0.0.0 136 | Comments= 137 | [HistoryLists\hlUnitAliases] 138 | Count=1 139 | Item0=WinTypes=Windows;WinProcs=Windows;DbiTypes=BDE;DbiProcs=BDE;DbiErrs=BDE; 140 | -------------------------------------------------------------------------------- /src/PUCUGenCodePages.dpr: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/BeRo1985/pucu/ea0b2a5fc5dbd4669f774442ee2bd88075909618/src/PUCUGenCodePages.dpr -------------------------------------------------------------------------------- /src/PUCUGenCodePages.lpi: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8"?> 2 | <CONFIG> 3 | <ProjectOptions> 4 | <Version Value="12"/> 5 | <PathDelim Value="\"/> 6 | <General> 7 | <Flags> 8 | <MainUnitHasCreateFormStatements Value="False"/> 9 | <MainUnitHasTitleStatement Value="False"/> 10 | <MainUnitHasScaledStatement Value="False"/> 11 | </Flags> 12 | <SessionStorage Value="InProjectDir"/> 13 | <Title Value="PUCUGenCodePages"/> 14 | <UseAppBundle Value="False"/> 15 | <ResourceType Value="res"/> 16 | </General> 17 | <BuildModes> 18 | <Item Name="Default" Default="True"/> 19 | </BuildModes> 20 | <PublishOptions> 21 | <Version Value="2"/> 22 | <UseFileFilters Value="True"/> 23 | </PublishOptions> 24 | <RunParams> 25 | <FormatVersion Value="2"/> 26 | </RunParams> 27 | <Units> 28 | <Unit> 29 | <Filename Value="PUCUGenCodePages.dpr"/> 30 | <IsPartOfProject Value="True"/> 31 | <UnitName Value="PUCUGnCodePages"/> 32 | </Unit> 33 | </Units> 34 | </ProjectOptions> 35 | <CompilerOptions> 36 | <Version Value="11"/> 37 | <PathDelim Value="\"/> 38 | <Target> 39 | <Filename Value="PUCUGenCodePages"/> 40 | </Target> 41 | <SearchPaths> 42 | <IncludeFiles Value="$(ProjOutDir)"/> 43 | <UnitOutputDirectory Value="lib\$(TargetCPU)-$(TargetOS)"/> 44 | </SearchPaths> 45 | <Parsing> 46 | <SyntaxOptions> 47 | <SyntaxMode Value="Delphi"/> 48 | </SyntaxOptions> 49 | </Parsing> 50 | <Linking> 51 | <Debugging> 52 | <DebugInfoType Value="dsDwarf3"/> 53 | </Debugging> 54 | </Linking> 55 | </CompilerOptions> 56 | <Debugging> 57 | <Exceptions> 58 | <Item> 59 | <Name Value="EAbort"/> 60 | </Item> 61 | <Item> 62 | <Name Value="ECodetoolError"/> 63 | </Item> 64 | <Item> 65 | <Name Value="EFOpenError"/> 66 | </Item> 67 | </Exceptions> 68 | </Debugging> 69 | </CONFIG> 70 | -------------------------------------------------------------------------------- /src/UnicodeData/Blocks.txt: -------------------------------------------------------------------------------- 1 | # Blocks-15.0.0.txt 2 | # Date: 2022-01-28, 20:58:00 GMT [KW] 3 | # © 2022 Unicode®, Inc. 4 | # For terms of use, see https://www.unicode.org/terms_of_use.html 5 | # 6 | # Unicode Character Database 7 | # For documentation, see https://www.unicode.org/reports/tr44/ 8 | # 9 | # Format: 10 | # Start Code..End Code; Block Name 11 | 12 | # ================================================ 13 | 14 | # Note: When comparing block names, casing, whitespace, hyphens, 15 | # and underbars are ignored. 16 | # For example, "Latin Extended-A" and "latin extended a" are equivalent. 17 | # For more information on the comparison of property values, 18 | # see UAX #44: https://www.unicode.org/reports/tr44/ 19 | # 20 | # All block ranges start with a value where (cp MOD 16) = 0, 21 | # and end with a value where (cp MOD 16) = 15. In other words, 22 | # the last hexadecimal digit of the start of range is ...0 23 | # and the last hexadecimal digit of the end of range is ...F. 24 | # This constraint on block ranges guarantees that allocations 25 | # are done in terms of whole columns, and that code chart display 26 | # never involves splitting columns in the charts. 27 | # 28 | # All code points not explicitly listed for Block 29 | # have the value No_Block. 30 | 31 | # Property: Block 32 | # 33 | # @missing: 0000..10FFFF; No_Block 34 | 35 | 0000..007F; Basic Latin 36 | 0080..00FF; Latin-1 Supplement 37 | 0100..017F; Latin Extended-A 38 | 0180..024F; Latin Extended-B 39 | 0250..02AF; IPA Extensions 40 | 02B0..02FF; Spacing Modifier Letters 41 | 0300..036F; Combining Diacritical Marks 42 | 0370..03FF; Greek and Coptic 43 | 0400..04FF; Cyrillic 44 | 0500..052F; Cyrillic Supplement 45 | 0530..058F; Armenian 46 | 0590..05FF; Hebrew 47 | 0600..06FF; Arabic 48 | 0700..074F; Syriac 49 | 0750..077F; Arabic Supplement 50 | 0780..07BF; Thaana 51 | 07C0..07FF; NKo 52 | 0800..083F; Samaritan 53 | 0840..085F; Mandaic 54 | 0860..086F; Syriac Supplement 55 | 0870..089F; Arabic Extended-B 56 | 08A0..08FF; Arabic Extended-A 57 | 0900..097F; Devanagari 58 | 0980..09FF; Bengali 59 | 0A00..0A7F; Gurmukhi 60 | 0A80..0AFF; Gujarati 61 | 0B00..0B7F; Oriya 62 | 0B80..0BFF; Tamil 63 | 0C00..0C7F; Telugu 64 | 0C80..0CFF; Kannada 65 | 0D00..0D7F; Malayalam 66 | 0D80..0DFF; Sinhala 67 | 0E00..0E7F; Thai 68 | 0E80..0EFF; Lao 69 | 0F00..0FFF; Tibetan 70 | 1000..109F; Myanmar 71 | 10A0..10FF; Georgian 72 | 1100..11FF; Hangul Jamo 73 | 1200..137F; Ethiopic 74 | 1380..139F; Ethiopic Supplement 75 | 13A0..13FF; Cherokee 76 | 1400..167F; Unified Canadian Aboriginal Syllabics 77 | 1680..169F; Ogham 78 | 16A0..16FF; Runic 79 | 1700..171F; Tagalog 80 | 1720..173F; Hanunoo 81 | 1740..175F; Buhid 82 | 1760..177F; Tagbanwa 83 | 1780..17FF; Khmer 84 | 1800..18AF; Mongolian 85 | 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 86 | 1900..194F; Limbu 87 | 1950..197F; Tai Le 88 | 1980..19DF; New Tai Lue 89 | 19E0..19FF; Khmer Symbols 90 | 1A00..1A1F; Buginese 91 | 1A20..1AAF; Tai Tham 92 | 1AB0..1AFF; Combining Diacritical Marks Extended 93 | 1B00..1B7F; Balinese 94 | 1B80..1BBF; Sundanese 95 | 1BC0..1BFF; Batak 96 | 1C00..1C4F; Lepcha 97 | 1C50..1C7F; Ol Chiki 98 | 1C80..1C8F; Cyrillic Extended-C 99 | 1C90..1CBF; Georgian Extended 100 | 1CC0..1CCF; Sundanese Supplement 101 | 1CD0..1CFF; Vedic Extensions 102 | 1D00..1D7F; Phonetic Extensions 103 | 1D80..1DBF; Phonetic Extensions Supplement 104 | 1DC0..1DFF; Combining Diacritical Marks Supplement 105 | 1E00..1EFF; Latin Extended Additional 106 | 1F00..1FFF; Greek Extended 107 | 2000..206F; General Punctuation 108 | 2070..209F; Superscripts and Subscripts 109 | 20A0..20CF; Currency Symbols 110 | 20D0..20FF; Combining Diacritical Marks for Symbols 111 | 2100..214F; Letterlike Symbols 112 | 2150..218F; Number Forms 113 | 2190..21FF; Arrows 114 | 2200..22FF; Mathematical Operators 115 | 2300..23FF; Miscellaneous Technical 116 | 2400..243F; Control Pictures 117 | 2440..245F; Optical Character Recognition 118 | 2460..24FF; Enclosed Alphanumerics 119 | 2500..257F; Box Drawing 120 | 2580..259F; Block Elements 121 | 25A0..25FF; Geometric Shapes 122 | 2600..26FF; Miscellaneous Symbols 123 | 2700..27BF; Dingbats 124 | 27C0..27EF; Miscellaneous Mathematical Symbols-A 125 | 27F0..27FF; Supplemental Arrows-A 126 | 2800..28FF; Braille Patterns 127 | 2900..297F; Supplemental Arrows-B 128 | 2980..29FF; Miscellaneous Mathematical Symbols-B 129 | 2A00..2AFF; Supplemental Mathematical Operators 130 | 2B00..2BFF; Miscellaneous Symbols and Arrows 131 | 2C00..2C5F; Glagolitic 132 | 2C60..2C7F; Latin Extended-C 133 | 2C80..2CFF; Coptic 134 | 2D00..2D2F; Georgian Supplement 135 | 2D30..2D7F; Tifinagh 136 | 2D80..2DDF; Ethiopic Extended 137 | 2DE0..2DFF; Cyrillic Extended-A 138 | 2E00..2E7F; Supplemental Punctuation 139 | 2E80..2EFF; CJK Radicals Supplement 140 | 2F00..2FDF; Kangxi Radicals 141 | 2FF0..2FFF; Ideographic Description Characters 142 | 3000..303F; CJK Symbols and Punctuation 143 | 3040..309F; Hiragana 144 | 30A0..30FF; Katakana 145 | 3100..312F; Bopomofo 146 | 3130..318F; Hangul Compatibility Jamo 147 | 3190..319F; Kanbun 148 | 31A0..31BF; Bopomofo Extended 149 | 31C0..31EF; CJK Strokes 150 | 31F0..31FF; Katakana Phonetic Extensions 151 | 3200..32FF; Enclosed CJK Letters and Months 152 | 3300..33FF; CJK Compatibility 153 | 3400..4DBF; CJK Unified Ideographs Extension A 154 | 4DC0..4DFF; Yijing Hexagram Symbols 155 | 4E00..9FFF; CJK Unified Ideographs 156 | A000..A48F; Yi Syllables 157 | A490..A4CF; Yi Radicals 158 | A4D0..A4FF; Lisu 159 | A500..A63F; Vai 160 | A640..A69F; Cyrillic Extended-B 161 | A6A0..A6FF; Bamum 162 | A700..A71F; Modifier Tone Letters 163 | A720..A7FF; Latin Extended-D 164 | A800..A82F; Syloti Nagri 165 | A830..A83F; Common Indic Number Forms 166 | A840..A87F; Phags-pa 167 | A880..A8DF; Saurashtra 168 | A8E0..A8FF; Devanagari Extended 169 | A900..A92F; Kayah Li 170 | A930..A95F; Rejang 171 | A960..A97F; Hangul Jamo Extended-A 172 | A980..A9DF; Javanese 173 | A9E0..A9FF; Myanmar Extended-B 174 | AA00..AA5F; Cham 175 | AA60..AA7F; Myanmar Extended-A 176 | AA80..AADF; Tai Viet 177 | AAE0..AAFF; Meetei Mayek Extensions 178 | AB00..AB2F; Ethiopic Extended-A 179 | AB30..AB6F; Latin Extended-E 180 | AB70..ABBF; Cherokee Supplement 181 | ABC0..ABFF; Meetei Mayek 182 | AC00..D7AF; Hangul Syllables 183 | D7B0..D7FF; Hangul Jamo Extended-B 184 | D800..DB7F; High Surrogates 185 | DB80..DBFF; High Private Use Surrogates 186 | DC00..DFFF; Low Surrogates 187 | E000..F8FF; Private Use Area 188 | F900..FAFF; CJK Compatibility Ideographs 189 | FB00..FB4F; Alphabetic Presentation Forms 190 | FB50..FDFF; Arabic Presentation Forms-A 191 | FE00..FE0F; Variation Selectors 192 | FE10..FE1F; Vertical Forms 193 | FE20..FE2F; Combining Half Marks 194 | FE30..FE4F; CJK Compatibility Forms 195 | FE50..FE6F; Small Form Variants 196 | FE70..FEFF; Arabic Presentation Forms-B 197 | FF00..FFEF; Halfwidth and Fullwidth Forms 198 | FFF0..FFFF; Specials 199 | 10000..1007F; Linear B Syllabary 200 | 10080..100FF; Linear B Ideograms 201 | 10100..1013F; Aegean Numbers 202 | 10140..1018F; Ancient Greek Numbers 203 | 10190..101CF; Ancient Symbols 204 | 101D0..101FF; Phaistos Disc 205 | 10280..1029F; Lycian 206 | 102A0..102DF; Carian 207 | 102E0..102FF; Coptic Epact Numbers 208 | 10300..1032F; Old Italic 209 | 10330..1034F; Gothic 210 | 10350..1037F; Old Permic 211 | 10380..1039F; Ugaritic 212 | 103A0..103DF; Old Persian 213 | 10400..1044F; Deseret 214 | 10450..1047F; Shavian 215 | 10480..104AF; Osmanya 216 | 104B0..104FF; Osage 217 | 10500..1052F; Elbasan 218 | 10530..1056F; Caucasian Albanian 219 | 10570..105BF; Vithkuqi 220 | 10600..1077F; Linear A 221 | 10780..107BF; Latin Extended-F 222 | 10800..1083F; Cypriot Syllabary 223 | 10840..1085F; Imperial Aramaic 224 | 10860..1087F; Palmyrene 225 | 10880..108AF; Nabataean 226 | 108E0..108FF; Hatran 227 | 10900..1091F; Phoenician 228 | 10920..1093F; Lydian 229 | 10980..1099F; Meroitic Hieroglyphs 230 | 109A0..109FF; Meroitic Cursive 231 | 10A00..10A5F; Kharoshthi 232 | 10A60..10A7F; Old South Arabian 233 | 10A80..10A9F; Old North Arabian 234 | 10AC0..10AFF; Manichaean 235 | 10B00..10B3F; Avestan 236 | 10B40..10B5F; Inscriptional Parthian 237 | 10B60..10B7F; Inscriptional Pahlavi 238 | 10B80..10BAF; Psalter Pahlavi 239 | 10C00..10C4F; Old Turkic 240 | 10C80..10CFF; Old Hungarian 241 | 10D00..10D3F; Hanifi Rohingya 242 | 10E60..10E7F; Rumi Numeral Symbols 243 | 10E80..10EBF; Yezidi 244 | 10EC0..10EFF; Arabic Extended-C 245 | 10F00..10F2F; Old Sogdian 246 | 10F30..10F6F; Sogdian 247 | 10F70..10FAF; Old Uyghur 248 | 10FB0..10FDF; Chorasmian 249 | 10FE0..10FFF; Elymaic 250 | 11000..1107F; Brahmi 251 | 11080..110CF; Kaithi 252 | 110D0..110FF; Sora Sompeng 253 | 11100..1114F; Chakma 254 | 11150..1117F; Mahajani 255 | 11180..111DF; Sharada 256 | 111E0..111FF; Sinhala Archaic Numbers 257 | 11200..1124F; Khojki 258 | 11280..112AF; Multani 259 | 112B0..112FF; Khudawadi 260 | 11300..1137F; Grantha 261 | 11400..1147F; Newa 262 | 11480..114DF; Tirhuta 263 | 11580..115FF; Siddham 264 | 11600..1165F; Modi 265 | 11660..1167F; Mongolian Supplement 266 | 11680..116CF; Takri 267 | 11700..1174F; Ahom 268 | 11800..1184F; Dogra 269 | 118A0..118FF; Warang Citi 270 | 11900..1195F; Dives Akuru 271 | 119A0..119FF; Nandinagari 272 | 11A00..11A4F; Zanabazar Square 273 | 11A50..11AAF; Soyombo 274 | 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 275 | 11AC0..11AFF; Pau Cin Hau 276 | 11B00..11B5F; Devanagari Extended-A 277 | 11C00..11C6F; Bhaiksuki 278 | 11C70..11CBF; Marchen 279 | 11D00..11D5F; Masaram Gondi 280 | 11D60..11DAF; Gunjala Gondi 281 | 11EE0..11EFF; Makasar 282 | 11F00..11F5F; Kawi 283 | 11FB0..11FBF; Lisu Supplement 284 | 11FC0..11FFF; Tamil Supplement 285 | 12000..123FF; Cuneiform 286 | 12400..1247F; Cuneiform Numbers and Punctuation 287 | 12480..1254F; Early Dynastic Cuneiform 288 | 12F90..12FFF; Cypro-Minoan 289 | 13000..1342F; Egyptian Hieroglyphs 290 | 13430..1345F; Egyptian Hieroglyph Format Controls 291 | 14400..1467F; Anatolian Hieroglyphs 292 | 16800..16A3F; Bamum Supplement 293 | 16A40..16A6F; Mro 294 | 16A70..16ACF; Tangsa 295 | 16AD0..16AFF; Bassa Vah 296 | 16B00..16B8F; Pahawh Hmong 297 | 16E40..16E9F; Medefaidrin 298 | 16F00..16F9F; Miao 299 | 16FE0..16FFF; Ideographic Symbols and Punctuation 300 | 17000..187FF; Tangut 301 | 18800..18AFF; Tangut Components 302 | 18B00..18CFF; Khitan Small Script 303 | 18D00..18D7F; Tangut Supplement 304 | 1AFF0..1AFFF; Kana Extended-B 305 | 1B000..1B0FF; Kana Supplement 306 | 1B100..1B12F; Kana Extended-A 307 | 1B130..1B16F; Small Kana Extension 308 | 1B170..1B2FF; Nushu 309 | 1BC00..1BC9F; Duployan 310 | 1BCA0..1BCAF; Shorthand Format Controls 311 | 1CF00..1CFCF; Znamenny Musical Notation 312 | 1D000..1D0FF; Byzantine Musical Symbols 313 | 1D100..1D1FF; Musical Symbols 314 | 1D200..1D24F; Ancient Greek Musical Notation 315 | 1D2C0..1D2DF; Kaktovik Numerals 316 | 1D2E0..1D2FF; Mayan Numerals 317 | 1D300..1D35F; Tai Xuan Jing Symbols 318 | 1D360..1D37F; Counting Rod Numerals 319 | 1D400..1D7FF; Mathematical Alphanumeric Symbols 320 | 1D800..1DAAF; Sutton SignWriting 321 | 1DF00..1DFFF; Latin Extended-G 322 | 1E000..1E02F; Glagolitic Supplement 323 | 1E030..1E08F; Cyrillic Extended-D 324 | 1E100..1E14F; Nyiakeng Puachue Hmong 325 | 1E290..1E2BF; Toto 326 | 1E2C0..1E2FF; Wancho 327 | 1E4D0..1E4FF; Nag Mundari 328 | 1E7E0..1E7FF; Ethiopic Extended-B 329 | 1E800..1E8DF; Mende Kikakui 330 | 1E900..1E95F; Adlam 331 | 1EC70..1ECBF; Indic Siyaq Numbers 332 | 1ED00..1ED4F; Ottoman Siyaq Numbers 333 | 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 334 | 1F000..1F02F; Mahjong Tiles 335 | 1F030..1F09F; Domino Tiles 336 | 1F0A0..1F0FF; Playing Cards 337 | 1F100..1F1FF; Enclosed Alphanumeric Supplement 338 | 1F200..1F2FF; Enclosed Ideographic Supplement 339 | 1F300..1F5FF; Miscellaneous Symbols and Pictographs 340 | 1F600..1F64F; Emoticons 341 | 1F650..1F67F; Ornamental Dingbats 342 | 1F680..1F6FF; Transport and Map Symbols 343 | 1F700..1F77F; Alchemical Symbols 344 | 1F780..1F7FF; Geometric Shapes Extended 345 | 1F800..1F8FF; Supplemental Arrows-C 346 | 1F900..1F9FF; Supplemental Symbols and Pictographs 347 | 1FA00..1FA6F; Chess Symbols 348 | 1FA70..1FAFF; Symbols and Pictographs Extended-A 349 | 1FB00..1FBFF; Symbols for Legacy Computing 350 | 20000..2A6DF; CJK Unified Ideographs Extension B 351 | 2A700..2B73F; CJK Unified Ideographs Extension C 352 | 2B740..2B81F; CJK Unified Ideographs Extension D 353 | 2B820..2CEAF; CJK Unified Ideographs Extension E 354 | 2CEB0..2EBEF; CJK Unified Ideographs Extension F 355 | 2F800..2FA1F; CJK Compatibility Ideographs Supplement 356 | 30000..3134F; CJK Unified Ideographs Extension G 357 | 31350..323AF; CJK Unified Ideographs Extension H 358 | E0000..E007F; Tags 359 | E0100..E01EF; Variation Selectors Supplement 360 | F0000..FFFFF; Supplementary Private Use Area-A 361 | 100000..10FFFF; Supplementary Private Use Area-B 362 | 363 | # EOF 364 | -------------------------------------------------------------------------------- /src/UnicodeData/CaseFolding.txt: -------------------------------------------------------------------------------- 1 | # CaseFolding-15.0.0.txt 2 | # Date: 2022-02-02, 23:35:35 GMT 3 | # © 2022 Unicode®, Inc. 4 | # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. 5 | # For terms of use, see https://www.unicode.org/terms_of_use.html 6 | # 7 | # Unicode Character Database 8 | # For documentation, see https://www.unicode.org/reports/tr44/ 9 | # 10 | # Case Folding Properties 11 | # 12 | # This file is a supplement to the UnicodeData file. 13 | # It provides a case folding mapping generated from the Unicode Character Database. 14 | # If all characters are mapped according to the full mapping below, then 15 | # case differences (according to UnicodeData.txt and SpecialCasing.txt) 16 | # are eliminated. 17 | # 18 | # The data supports both implementations that require simple case foldings 19 | # (where string lengths don't change), and implementations that allow full case folding 20 | # (where string lengths may grow). Note that where they can be supported, the 21 | # full case foldings are superior: for example, they allow "MASSE" and "Maße" to match. 22 | # 23 | # All code points not listed in this file map to themselves. 24 | # 25 | # NOTE: case folding does not preserve normalization formats! 26 | # 27 | # For information on case folding, including how to have case folding 28 | # preserve normalization formats, see Section 3.13 Default Case Algorithms in 29 | # The Unicode Standard. 30 | # 31 | # ================================================================================ 32 | # Format 33 | # ================================================================================ 34 | # The entries in this file are in the following machine-readable format: 35 | # 36 | # <code>; <status>; <mapping>; # <name> 37 | # 38 | # The status field is: 39 | # C: common case folding, common mappings shared by both simple and full mappings. 40 | # F: full case folding, mappings that cause strings to grow in length. Multiple characters are separated by spaces. 41 | # S: simple case folding, mappings to single characters where different from F. 42 | # T: special case for uppercase I and dotted uppercase I 43 | # - For non-Turkic languages, this mapping is normally not used. 44 | # - For Turkic languages (tr, az), this mapping can be used instead of the normal mapping for these characters. 45 | # Note that the Turkic mappings do not maintain canonical equivalence without additional processing. 46 | # See the discussions of case mapping in the Unicode Standard for more information. 47 | # 48 | # Usage: 49 | # A. To do a simple case folding, use the mappings with status C + S. 50 | # B. To do a full case folding, use the mappings with status C + F. 51 | # 52 | # The mappings with status T can be used or omitted depending on the desired case-folding 53 | # behavior. (The default option is to exclude them.) 54 | # 55 | # ================================================================= 56 | 57 | # Property: Case_Folding 58 | 59 | # All code points not explicitly listed for Case_Folding 60 | # have the value C for the status field, and the code point itself for the mapping field. 61 | 62 | # ================================================================= 63 | 0041; C; 0061; # LATIN CAPITAL LETTER A 64 | 0042; C; 0062; # LATIN CAPITAL LETTER B 65 | 0043; C; 0063; # LATIN CAPITAL LETTER C 66 | 0044; C; 0064; # LATIN CAPITAL LETTER D 67 | 0045; C; 0065; # LATIN CAPITAL LETTER E 68 | 0046; C; 0066; # LATIN CAPITAL LETTER F 69 | 0047; C; 0067; # LATIN CAPITAL LETTER G 70 | 0048; C; 0068; # LATIN CAPITAL LETTER H 71 | 0049; C; 0069; # LATIN CAPITAL LETTER I 72 | 0049; T; 0131; # LATIN CAPITAL LETTER I 73 | 004A; C; 006A; # LATIN CAPITAL LETTER J 74 | 004B; C; 006B; # LATIN CAPITAL LETTER K 75 | 004C; C; 006C; # LATIN CAPITAL LETTER L 76 | 004D; C; 006D; # LATIN CAPITAL LETTER M 77 | 004E; C; 006E; # LATIN CAPITAL LETTER N 78 | 004F; C; 006F; # LATIN CAPITAL LETTER O 79 | 0050; C; 0070; # LATIN CAPITAL LETTER P 80 | 0051; C; 0071; # LATIN CAPITAL LETTER Q 81 | 0052; C; 0072; # LATIN CAPITAL LETTER R 82 | 0053; C; 0073; # LATIN CAPITAL LETTER S 83 | 0054; C; 0074; # LATIN CAPITAL LETTER T 84 | 0055; C; 0075; # LATIN CAPITAL LETTER U 85 | 0056; C; 0076; # LATIN CAPITAL LETTER V 86 | 0057; C; 0077; # LATIN CAPITAL LETTER W 87 | 0058; C; 0078; # LATIN CAPITAL LETTER X 88 | 0059; C; 0079; # LATIN CAPITAL LETTER Y 89 | 005A; C; 007A; # LATIN CAPITAL LETTER Z 90 | 00B5; C; 03BC; # MICRO SIGN 91 | 00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE 92 | 00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE 93 | 00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX 94 | 00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE 95 | 00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS 96 | 00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE 97 | 00C6; C; 00E6; # LATIN CAPITAL LETTER AE 98 | 00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA 99 | 00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE 100 | 00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE 101 | 00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX 102 | 00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS 103 | 00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE 104 | 00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE 105 | 00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX 106 | 00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS 107 | 00D0; C; 00F0; # LATIN CAPITAL LETTER ETH 108 | 00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE 109 | 00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE 110 | 00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE 111 | 00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX 112 | 00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE 113 | 00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS 114 | 00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE 115 | 00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE 116 | 00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE 117 | 00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX 118 | 00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS 119 | 00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE 120 | 00DE; C; 00FE; # LATIN CAPITAL LETTER THORN 121 | 00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S 122 | 0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON 123 | 0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE 124 | 0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK 125 | 0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE 126 | 0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX 127 | 010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE 128 | 010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON 129 | 010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON 130 | 0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE 131 | 0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON 132 | 0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE 133 | 0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE 134 | 0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK 135 | 011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON 136 | 011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX 137 | 011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE 138 | 0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE 139 | 0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA 140 | 0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX 141 | 0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE 142 | 0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE 143 | 012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON 144 | 012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE 145 | 012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK 146 | 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE 147 | 0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE 148 | 0132; C; 0133; # LATIN CAPITAL LIGATURE IJ 149 | 0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX 150 | 0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA 151 | 0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE 152 | 013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA 153 | 013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON 154 | 013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT 155 | 0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE 156 | 0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE 157 | 0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA 158 | 0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON 159 | 0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE 160 | 014A; C; 014B; # LATIN CAPITAL LETTER ENG 161 | 014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON 162 | 014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE 163 | 0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE 164 | 0152; C; 0153; # LATIN CAPITAL LIGATURE OE 165 | 0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE 166 | 0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA 167 | 0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON 168 | 015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE 169 | 015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX 170 | 015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA 171 | 0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON 172 | 0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA 173 | 0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON 174 | 0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE 175 | 0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE 176 | 016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON 177 | 016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE 178 | 016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE 179 | 0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE 180 | 0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK 181 | 0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX 182 | 0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX 183 | 0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS 184 | 0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE 185 | 017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE 186 | 017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON 187 | 017F; C; 0073; # LATIN SMALL LETTER LONG S 188 | 0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK 189 | 0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR 190 | 0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX 191 | 0186; C; 0254; # LATIN CAPITAL LETTER OPEN O 192 | 0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK 193 | 0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D 194 | 018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK 195 | 018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR 196 | 018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E 197 | 018F; C; 0259; # LATIN CAPITAL LETTER SCHWA 198 | 0190; C; 025B; # LATIN CAPITAL LETTER OPEN E 199 | 0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK 200 | 0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK 201 | 0194; C; 0263; # LATIN CAPITAL LETTER GAMMA 202 | 0196; C; 0269; # LATIN CAPITAL LETTER IOTA 203 | 0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE 204 | 0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK 205 | 019C; C; 026F; # LATIN CAPITAL LETTER TURNED M 206 | 019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK 207 | 019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE 208 | 01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN 209 | 01A2; C; 01A3; # LATIN CAPITAL LETTER OI 210 | 01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK 211 | 01A6; C; 0280; # LATIN LETTER YR 212 | 01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO 213 | 01A9; C; 0283; # LATIN CAPITAL LETTER ESH 214 | 01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK 215 | 01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK 216 | 01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN 217 | 01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON 218 | 01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK 219 | 01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK 220 | 01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE 221 | 01B7; C; 0292; # LATIN CAPITAL LETTER EZH 222 | 01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED 223 | 01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE 224 | 01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON 225 | 01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON 226 | 01C7; C; 01C9; # LATIN CAPITAL LETTER LJ 227 | 01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J 228 | 01CA; C; 01CC; # LATIN CAPITAL LETTER NJ 229 | 01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J 230 | 01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON 231 | 01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON 232 | 01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON 233 | 01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON 234 | 01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON 235 | 01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE 236 | 01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON 237 | 01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE 238 | 01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON 239 | 01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON 240 | 01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON 241 | 01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE 242 | 01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON 243 | 01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON 244 | 01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK 245 | 01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON 246 | 01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON 247 | 01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON 248 | 01F1; C; 01F3; # LATIN CAPITAL LETTER DZ 249 | 01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z 250 | 01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE 251 | 01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR 252 | 01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN 253 | 01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE 254 | 01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE 255 | 01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE 256 | 01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE 257 | 0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE 258 | 0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE 259 | 0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE 260 | 0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE 261 | 0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE 262 | 020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE 263 | 020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE 264 | 020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE 265 | 0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE 266 | 0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE 267 | 0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE 268 | 0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE 269 | 0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW 270 | 021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW 271 | 021C; C; 021D; # LATIN CAPITAL LETTER YOGH 272 | 021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON 273 | 0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG 274 | 0222; C; 0223; # LATIN CAPITAL LETTER OU 275 | 0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK 276 | 0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE 277 | 0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA 278 | 022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON 279 | 022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON 280 | 022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE 281 | 0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON 282 | 0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON 283 | 023A; C; 2C65; # LATIN CAPITAL LETTER A WITH STROKE 284 | 023B; C; 023C; # LATIN CAPITAL LETTER C WITH STROKE 285 | 023D; C; 019A; # LATIN CAPITAL LETTER L WITH BAR 286 | 023E; C; 2C66; # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE 287 | 0241; C; 0242; # LATIN CAPITAL LETTER GLOTTAL STOP 288 | 0243; C; 0180; # LATIN CAPITAL LETTER B WITH STROKE 289 | 0244; C; 0289; # LATIN CAPITAL LETTER U BAR 290 | 0245; C; 028C; # LATIN CAPITAL LETTER TURNED V 291 | 0246; C; 0247; # LATIN CAPITAL LETTER E WITH STROKE 292 | 0248; C; 0249; # LATIN CAPITAL LETTER J WITH STROKE 293 | 024A; C; 024B; # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL 294 | 024C; C; 024D; # LATIN CAPITAL LETTER R WITH STROKE 295 | 024E; C; 024F; # LATIN CAPITAL LETTER Y WITH STROKE 296 | 0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI 297 | 0370; C; 0371; # GREEK CAPITAL LETTER HETA 298 | 0372; C; 0373; # GREEK CAPITAL LETTER ARCHAIC SAMPI 299 | 0376; C; 0377; # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA 300 | 037F; C; 03F3; # GREEK CAPITAL LETTER YOT 301 | 0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS 302 | 0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS 303 | 0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS 304 | 038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS 305 | 038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS 306 | 038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS 307 | 038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS 308 | 0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 309 | 0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA 310 | 0392; C; 03B2; # GREEK CAPITAL LETTER BETA 311 | 0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA 312 | 0394; C; 03B4; # GREEK CAPITAL LETTER DELTA 313 | 0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON 314 | 0396; C; 03B6; # GREEK CAPITAL LETTER ZETA 315 | 0397; C; 03B7; # GREEK CAPITAL LETTER ETA 316 | 0398; C; 03B8; # GREEK CAPITAL LETTER THETA 317 | 0399; C; 03B9; # GREEK CAPITAL LETTER IOTA 318 | 039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA 319 | 039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA 320 | 039C; C; 03BC; # GREEK CAPITAL LETTER MU 321 | 039D; C; 03BD; # GREEK CAPITAL LETTER NU 322 | 039E; C; 03BE; # GREEK CAPITAL LETTER XI 323 | 039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON 324 | 03A0; C; 03C0; # GREEK CAPITAL LETTER PI 325 | 03A1; C; 03C1; # GREEK CAPITAL LETTER RHO 326 | 03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA 327 | 03A4; C; 03C4; # GREEK CAPITAL LETTER TAU 328 | 03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON 329 | 03A6; C; 03C6; # GREEK CAPITAL LETTER PHI 330 | 03A7; C; 03C7; # GREEK CAPITAL LETTER CHI 331 | 03A8; C; 03C8; # GREEK CAPITAL LETTER PSI 332 | 03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA 333 | 03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA 334 | 03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA 335 | 03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 336 | 03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA 337 | 03CF; C; 03D7; # GREEK CAPITAL KAI SYMBOL 338 | 03D0; C; 03B2; # GREEK BETA SYMBOL 339 | 03D1; C; 03B8; # GREEK THETA SYMBOL 340 | 03D5; C; 03C6; # GREEK PHI SYMBOL 341 | 03D6; C; 03C0; # GREEK PI SYMBOL 342 | 03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA 343 | 03DA; C; 03DB; # GREEK LETTER STIGMA 344 | 03DC; C; 03DD; # GREEK LETTER DIGAMMA 345 | 03DE; C; 03DF; # GREEK LETTER KOPPA 346 | 03E0; C; 03E1; # GREEK LETTER SAMPI 347 | 03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI 348 | 03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI 349 | 03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI 350 | 03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI 351 | 03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA 352 | 03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA 353 | 03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI 354 | 03F0; C; 03BA; # GREEK KAPPA SYMBOL 355 | 03F1; C; 03C1; # GREEK RHO SYMBOL 356 | 03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL 357 | 03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL 358 | 03F7; C; 03F8; # GREEK CAPITAL LETTER SHO 359 | 03F9; C; 03F2; # GREEK CAPITAL LUNATE SIGMA SYMBOL 360 | 03FA; C; 03FB; # GREEK CAPITAL LETTER SAN 361 | 03FD; C; 037B; # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL 362 | 03FE; C; 037C; # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL 363 | 03FF; C; 037D; # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL 364 | 0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE 365 | 0401; C; 0451; # CYRILLIC CAPITAL LETTER IO 366 | 0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE 367 | 0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE 368 | 0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE 369 | 0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE 370 | 0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I 371 | 0407; C; 0457; # CYRILLIC CAPITAL LETTER YI 372 | 0408; C; 0458; # CYRILLIC CAPITAL LETTER JE 373 | 0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE 374 | 040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE 375 | 040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE 376 | 040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE 377 | 040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE 378 | 040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U 379 | 040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE 380 | 0410; C; 0430; # CYRILLIC CAPITAL LETTER A 381 | 0411; C; 0431; # CYRILLIC CAPITAL LETTER BE 382 | 0412; C; 0432; # CYRILLIC CAPITAL LETTER VE 383 | 0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE 384 | 0414; C; 0434; # CYRILLIC CAPITAL LETTER DE 385 | 0415; C; 0435; # CYRILLIC CAPITAL LETTER IE 386 | 0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE 387 | 0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE 388 | 0418; C; 0438; # CYRILLIC CAPITAL LETTER I 389 | 0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I 390 | 041A; C; 043A; # CYRILLIC CAPITAL LETTER KA 391 | 041B; C; 043B; # CYRILLIC CAPITAL LETTER EL 392 | 041C; C; 043C; # CYRILLIC CAPITAL LETTER EM 393 | 041D; C; 043D; # CYRILLIC CAPITAL LETTER EN 394 | 041E; C; 043E; # CYRILLIC CAPITAL LETTER O 395 | 041F; C; 043F; # CYRILLIC CAPITAL LETTER PE 396 | 0420; C; 0440; # CYRILLIC CAPITAL LETTER ER 397 | 0421; C; 0441; # CYRILLIC CAPITAL LETTER ES 398 | 0422; C; 0442; # CYRILLIC CAPITAL LETTER TE 399 | 0423; C; 0443; # CYRILLIC CAPITAL LETTER U 400 | 0424; C; 0444; # CYRILLIC CAPITAL LETTER EF 401 | 0425; C; 0445; # CYRILLIC CAPITAL LETTER HA 402 | 0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE 403 | 0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE 404 | 0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA 405 | 0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA 406 | 042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN 407 | 042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU 408 | 042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN 409 | 042D; C; 044D; # CYRILLIC CAPITAL LETTER E 410 | 042E; C; 044E; # CYRILLIC CAPITAL LETTER YU 411 | 042F; C; 044F; # CYRILLIC CAPITAL LETTER YA 412 | 0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA 413 | 0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT 414 | 0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E 415 | 0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS 416 | 0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS 417 | 046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS 418 | 046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS 419 | 046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI 420 | 0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI 421 | 0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA 422 | 0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA 423 | 0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT 424 | 0478; C; 0479; # CYRILLIC CAPITAL LETTER UK 425 | 047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA 426 | 047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO 427 | 047E; C; 047F; # CYRILLIC CAPITAL LETTER OT 428 | 0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA 429 | 048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL 430 | 048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN 431 | 048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK 432 | 0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN 433 | 0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE 434 | 0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK 435 | 0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER 436 | 0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER 437 | 049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER 438 | 049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE 439 | 049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE 440 | 04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA 441 | 04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER 442 | 04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE 443 | 04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK 444 | 04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA 445 | 04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER 446 | 04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER 447 | 04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U 448 | 04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE 449 | 04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER 450 | 04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE 451 | 04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER 452 | 04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE 453 | 04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA 454 | 04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE 455 | 04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER 456 | 04C0; C; 04CF; # CYRILLIC LETTER PALOCHKA 457 | 04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE 458 | 04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK 459 | 04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL 460 | 04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK 461 | 04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL 462 | 04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE 463 | 04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL 464 | 04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE 465 | 04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS 466 | 04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE 467 | 04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE 468 | 04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA 469 | 04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS 470 | 04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS 471 | 04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS 472 | 04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE 473 | 04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON 474 | 04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS 475 | 04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS 476 | 04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O 477 | 04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS 478 | 04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS 479 | 04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON 480 | 04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS 481 | 04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE 482 | 04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS 483 | 04F6; C; 04F7; # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER 484 | 04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS 485 | 04FA; C; 04FB; # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK 486 | 04FC; C; 04FD; # CYRILLIC CAPITAL LETTER HA WITH HOOK 487 | 04FE; C; 04FF; # CYRILLIC CAPITAL LETTER HA WITH STROKE 488 | 0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE 489 | 0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE 490 | 0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE 491 | 0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE 492 | 0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE 493 | 050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE 494 | 050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE 495 | 050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE 496 | 0510; C; 0511; # CYRILLIC CAPITAL LETTER REVERSED ZE 497 | 0512; C; 0513; # CYRILLIC CAPITAL LETTER EL WITH HOOK 498 | 0514; C; 0515; # CYRILLIC CAPITAL LETTER LHA 499 | 0516; C; 0517; # CYRILLIC CAPITAL LETTER RHA 500 | 0518; C; 0519; # CYRILLIC CAPITAL LETTER YAE 501 | 051A; C; 051B; # CYRILLIC CAPITAL LETTER QA 502 | 051C; C; 051D; # CYRILLIC CAPITAL LETTER WE 503 | 051E; C; 051F; # CYRILLIC CAPITAL LETTER ALEUT KA 504 | 0520; C; 0521; # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK 505 | 0522; C; 0523; # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK 506 | 0524; C; 0525; # CYRILLIC CAPITAL LETTER PE WITH DESCENDER 507 | 0526; C; 0527; # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER 508 | 0528; C; 0529; # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK 509 | 052A; C; 052B; # CYRILLIC CAPITAL LETTER DZZHE 510 | 052C; C; 052D; # CYRILLIC CAPITAL LETTER DCHE 511 | 052E; C; 052F; # CYRILLIC CAPITAL LETTER EL WITH DESCENDER 512 | 0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB 513 | 0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN 514 | 0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM 515 | 0534; C; 0564; # ARMENIAN CAPITAL LETTER DA 516 | 0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH 517 | 0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA 518 | 0537; C; 0567; # ARMENIAN CAPITAL LETTER EH 519 | 0538; C; 0568; # ARMENIAN CAPITAL LETTER ET 520 | 0539; C; 0569; # ARMENIAN CAPITAL LETTER TO 521 | 053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE 522 | 053B; C; 056B; # ARMENIAN CAPITAL LETTER INI 523 | 053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN 524 | 053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH 525 | 053E; C; 056E; # ARMENIAN CAPITAL LETTER CA 526 | 053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN 527 | 0540; C; 0570; # ARMENIAN CAPITAL LETTER HO 528 | 0541; C; 0571; # ARMENIAN CAPITAL LETTER JA 529 | 0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD 530 | 0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH 531 | 0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN 532 | 0545; C; 0575; # ARMENIAN CAPITAL LETTER YI 533 | 0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW 534 | 0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA 535 | 0548; C; 0578; # ARMENIAN CAPITAL LETTER VO 536 | 0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA 537 | 054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH 538 | 054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH 539 | 054C; C; 057C; # ARMENIAN CAPITAL LETTER RA 540 | 054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH 541 | 054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW 542 | 054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN 543 | 0550; C; 0580; # ARMENIAN CAPITAL LETTER REH 544 | 0551; C; 0581; # ARMENIAN CAPITAL LETTER CO 545 | 0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN 546 | 0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR 547 | 0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH 548 | 0555; C; 0585; # ARMENIAN CAPITAL LETTER OH 549 | 0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH 550 | 0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN 551 | 10A0; C; 2D00; # GEORGIAN CAPITAL LETTER AN 552 | 10A1; C; 2D01; # GEORGIAN CAPITAL LETTER BAN 553 | 10A2; C; 2D02; # GEORGIAN CAPITAL LETTER GAN 554 | 10A3; C; 2D03; # GEORGIAN CAPITAL LETTER DON 555 | 10A4; C; 2D04; # GEORGIAN CAPITAL LETTER EN 556 | 10A5; C; 2D05; # GEORGIAN CAPITAL LETTER VIN 557 | 10A6; C; 2D06; # GEORGIAN CAPITAL LETTER ZEN 558 | 10A7; C; 2D07; # GEORGIAN CAPITAL LETTER TAN 559 | 10A8; C; 2D08; # GEORGIAN CAPITAL LETTER IN 560 | 10A9; C; 2D09; # GEORGIAN CAPITAL LETTER KAN 561 | 10AA; C; 2D0A; # GEORGIAN CAPITAL LETTER LAS 562 | 10AB; C; 2D0B; # GEORGIAN CAPITAL LETTER MAN 563 | 10AC; C; 2D0C; # GEORGIAN CAPITAL LETTER NAR 564 | 10AD; C; 2D0D; # GEORGIAN CAPITAL LETTER ON 565 | 10AE; C; 2D0E; # GEORGIAN CAPITAL LETTER PAR 566 | 10AF; C; 2D0F; # GEORGIAN CAPITAL LETTER ZHAR 567 | 10B0; C; 2D10; # GEORGIAN CAPITAL LETTER RAE 568 | 10B1; C; 2D11; # GEORGIAN CAPITAL LETTER SAN 569 | 10B2; C; 2D12; # GEORGIAN CAPITAL LETTER TAR 570 | 10B3; C; 2D13; # GEORGIAN CAPITAL LETTER UN 571 | 10B4; C; 2D14; # GEORGIAN CAPITAL LETTER PHAR 572 | 10B5; C; 2D15; # GEORGIAN CAPITAL LETTER KHAR 573 | 10B6; C; 2D16; # GEORGIAN CAPITAL LETTER GHAN 574 | 10B7; C; 2D17; # GEORGIAN CAPITAL LETTER QAR 575 | 10B8; C; 2D18; # GEORGIAN CAPITAL LETTER SHIN 576 | 10B9; C; 2D19; # GEORGIAN CAPITAL LETTER CHIN 577 | 10BA; C; 2D1A; # GEORGIAN CAPITAL LETTER CAN 578 | 10BB; C; 2D1B; # GEORGIAN CAPITAL LETTER JIL 579 | 10BC; C; 2D1C; # GEORGIAN CAPITAL LETTER CIL 580 | 10BD; C; 2D1D; # GEORGIAN CAPITAL LETTER CHAR 581 | 10BE; C; 2D1E; # GEORGIAN CAPITAL LETTER XAN 582 | 10BF; C; 2D1F; # GEORGIAN CAPITAL LETTER JHAN 583 | 10C0; C; 2D20; # GEORGIAN CAPITAL LETTER HAE 584 | 10C1; C; 2D21; # GEORGIAN CAPITAL LETTER HE 585 | 10C2; C; 2D22; # GEORGIAN CAPITAL LETTER HIE 586 | 10C3; C; 2D23; # GEORGIAN CAPITAL LETTER WE 587 | 10C4; C; 2D24; # GEORGIAN CAPITAL LETTER HAR 588 | 10C5; C; 2D25; # GEORGIAN CAPITAL LETTER HOE 589 | 10C7; C; 2D27; # GEORGIAN CAPITAL LETTER YN 590 | 10CD; C; 2D2D; # GEORGIAN CAPITAL LETTER AEN 591 | 13F8; C; 13F0; # CHEROKEE SMALL LETTER YE 592 | 13F9; C; 13F1; # CHEROKEE SMALL LETTER YI 593 | 13FA; C; 13F2; # CHEROKEE SMALL LETTER YO 594 | 13FB; C; 13F3; # CHEROKEE SMALL LETTER YU 595 | 13FC; C; 13F4; # CHEROKEE SMALL LETTER YV 596 | 13FD; C; 13F5; # CHEROKEE SMALL LETTER MV 597 | 1C80; C; 0432; # CYRILLIC SMALL LETTER ROUNDED VE 598 | 1C81; C; 0434; # CYRILLIC SMALL LETTER LONG-LEGGED DE 599 | 1C82; C; 043E; # CYRILLIC SMALL LETTER NARROW O 600 | 1C83; C; 0441; # CYRILLIC SMALL LETTER WIDE ES 601 | 1C84; C; 0442; # CYRILLIC SMALL LETTER TALL TE 602 | 1C85; C; 0442; # CYRILLIC SMALL LETTER THREE-LEGGED TE 603 | 1C86; C; 044A; # CYRILLIC SMALL LETTER TALL HARD SIGN 604 | 1C87; C; 0463; # CYRILLIC SMALL LETTER TALL YAT 605 | 1C88; C; A64B; # CYRILLIC SMALL LETTER UNBLENDED UK 606 | 1C90; C; 10D0; # GEORGIAN MTAVRULI CAPITAL LETTER AN 607 | 1C91; C; 10D1; # GEORGIAN MTAVRULI CAPITAL LETTER BAN 608 | 1C92; C; 10D2; # GEORGIAN MTAVRULI CAPITAL LETTER GAN 609 | 1C93; C; 10D3; # GEORGIAN MTAVRULI CAPITAL LETTER DON 610 | 1C94; C; 10D4; # GEORGIAN MTAVRULI CAPITAL LETTER EN 611 | 1C95; C; 10D5; # GEORGIAN MTAVRULI CAPITAL LETTER VIN 612 | 1C96; C; 10D6; # GEORGIAN MTAVRULI CAPITAL LETTER ZEN 613 | 1C97; C; 10D7; # GEORGIAN MTAVRULI CAPITAL LETTER TAN 614 | 1C98; C; 10D8; # GEORGIAN MTAVRULI CAPITAL LETTER IN 615 | 1C99; C; 10D9; # GEORGIAN MTAVRULI CAPITAL LETTER KAN 616 | 1C9A; C; 10DA; # GEORGIAN MTAVRULI CAPITAL LETTER LAS 617 | 1C9B; C; 10DB; # GEORGIAN MTAVRULI CAPITAL LETTER MAN 618 | 1C9C; C; 10DC; # GEORGIAN MTAVRULI CAPITAL LETTER NAR 619 | 1C9D; C; 10DD; # GEORGIAN MTAVRULI CAPITAL LETTER ON 620 | 1C9E; C; 10DE; # GEORGIAN MTAVRULI CAPITAL LETTER PAR 621 | 1C9F; C; 10DF; # GEORGIAN MTAVRULI CAPITAL LETTER ZHAR 622 | 1CA0; C; 10E0; # GEORGIAN MTAVRULI CAPITAL LETTER RAE 623 | 1CA1; C; 10E1; # GEORGIAN MTAVRULI CAPITAL LETTER SAN 624 | 1CA2; C; 10E2; # GEORGIAN MTAVRULI CAPITAL LETTER TAR 625 | 1CA3; C; 10E3; # GEORGIAN MTAVRULI CAPITAL LETTER UN 626 | 1CA4; C; 10E4; # GEORGIAN MTAVRULI CAPITAL LETTER PHAR 627 | 1CA5; C; 10E5; # GEORGIAN MTAVRULI CAPITAL LETTER KHAR 628 | 1CA6; C; 10E6; # GEORGIAN MTAVRULI CAPITAL LETTER GHAN 629 | 1CA7; C; 10E7; # GEORGIAN MTAVRULI CAPITAL LETTER QAR 630 | 1CA8; C; 10E8; # GEORGIAN MTAVRULI CAPITAL LETTER SHIN 631 | 1CA9; C; 10E9; # GEORGIAN MTAVRULI CAPITAL LETTER CHIN 632 | 1CAA; C; 10EA; # GEORGIAN MTAVRULI CAPITAL LETTER CAN 633 | 1CAB; C; 10EB; # GEORGIAN MTAVRULI CAPITAL LETTER JIL 634 | 1CAC; C; 10EC; # GEORGIAN MTAVRULI CAPITAL LETTER CIL 635 | 1CAD; C; 10ED; # GEORGIAN MTAVRULI CAPITAL LETTER CHAR 636 | 1CAE; C; 10EE; # GEORGIAN MTAVRULI CAPITAL LETTER XAN 637 | 1CAF; C; 10EF; # GEORGIAN MTAVRULI CAPITAL LETTER JHAN 638 | 1CB0; C; 10F0; # GEORGIAN MTAVRULI CAPITAL LETTER HAE 639 | 1CB1; C; 10F1; # GEORGIAN MTAVRULI CAPITAL LETTER HE 640 | 1CB2; C; 10F2; # GEORGIAN MTAVRULI CAPITAL LETTER HIE 641 | 1CB3; C; 10F3; # GEORGIAN MTAVRULI CAPITAL LETTER WE 642 | 1CB4; C; 10F4; # GEORGIAN MTAVRULI CAPITAL LETTER HAR 643 | 1CB5; C; 10F5; # GEORGIAN MTAVRULI CAPITAL LETTER HOE 644 | 1CB6; C; 10F6; # GEORGIAN MTAVRULI CAPITAL LETTER FI 645 | 1CB7; C; 10F7; # GEORGIAN MTAVRULI CAPITAL LETTER YN 646 | 1CB8; C; 10F8; # GEORGIAN MTAVRULI CAPITAL LETTER ELIFI 647 | 1CB9; C; 10F9; # GEORGIAN MTAVRULI CAPITAL LETTER TURNED GAN 648 | 1CBA; C; 10FA; # GEORGIAN MTAVRULI CAPITAL LETTER AIN 649 | 1CBD; C; 10FD; # GEORGIAN MTAVRULI CAPITAL LETTER AEN 650 | 1CBE; C; 10FE; # GEORGIAN MTAVRULI CAPITAL LETTER HARD SIGN 651 | 1CBF; C; 10FF; # GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN 652 | 1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW 653 | 1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE 654 | 1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW 655 | 1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW 656 | 1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE 657 | 1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE 658 | 1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW 659 | 1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW 660 | 1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA 661 | 1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW 662 | 1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE 663 | 1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE 664 | 1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW 665 | 1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW 666 | 1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE 667 | 1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE 668 | 1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON 669 | 1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE 670 | 1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW 671 | 1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS 672 | 1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA 673 | 1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW 674 | 1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW 675 | 1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE 676 | 1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE 677 | 1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW 678 | 1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW 679 | 1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW 680 | 1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON 681 | 1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW 682 | 1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW 683 | 1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE 684 | 1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE 685 | 1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW 686 | 1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE 687 | 1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW 688 | 1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW 689 | 1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW 690 | 1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE 691 | 1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS 692 | 1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE 693 | 1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE 694 | 1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE 695 | 1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE 696 | 1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE 697 | 1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW 698 | 1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON 699 | 1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW 700 | 1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE 701 | 1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW 702 | 1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE 703 | 1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE 704 | 1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE 705 | 1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE 706 | 1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW 707 | 1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW 708 | 1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW 709 | 1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW 710 | 1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW 711 | 1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW 712 | 1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE 713 | 1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS 714 | 1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE 715 | 1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW 716 | 1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE 717 | 1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE 718 | 1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS 719 | 1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE 720 | 1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW 721 | 1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE 722 | 1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS 723 | 1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE 724 | 1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX 725 | 1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW 726 | 1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW 727 | 1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW 728 | 1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS 729 | 1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE 730 | 1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE 731 | 1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING 732 | 1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE 733 | 1E9E; F; 0073 0073; # LATIN CAPITAL LETTER SHARP S 734 | 1E9E; S; 00DF; # LATIN CAPITAL LETTER SHARP S 735 | 1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW 736 | 1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE 737 | 1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE 738 | 1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE 739 | 1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE 740 | 1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE 741 | 1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW 742 | 1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE 743 | 1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE 744 | 1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE 745 | 1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE 746 | 1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW 747 | 1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW 748 | 1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE 749 | 1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE 750 | 1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE 751 | 1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE 752 | 1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE 753 | 1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE 754 | 1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW 755 | 1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE 756 | 1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW 757 | 1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW 758 | 1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE 759 | 1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE 760 | 1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE 761 | 1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE 762 | 1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE 763 | 1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW 764 | 1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE 765 | 1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE 766 | 1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE 767 | 1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE 768 | 1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW 769 | 1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW 770 | 1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE 771 | 1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE 772 | 1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE 773 | 1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE 774 | 1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE 775 | 1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW 776 | 1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE 777 | 1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW 778 | 1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE 779 | 1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE 780 | 1EFA; C; 1EFB; # LATIN CAPITAL LETTER MIDDLE-WELSH LL 781 | 1EFC; C; 1EFD; # LATIN CAPITAL LETTER MIDDLE-WELSH V 782 | 1EFE; C; 1EFF; # LATIN CAPITAL LETTER Y WITH LOOP 783 | 1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI 784 | 1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA 785 | 1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA 786 | 1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA 787 | 1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA 788 | 1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA 789 | 1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI 790 | 1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI 791 | 1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI 792 | 1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA 793 | 1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA 794 | 1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA 795 | 1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA 796 | 1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 797 | 1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI 798 | 1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA 799 | 1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA 800 | 1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA 801 | 1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA 802 | 1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA 803 | 1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI 804 | 1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI 805 | 1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI 806 | 1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA 807 | 1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA 808 | 1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA 809 | 1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA 810 | 1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA 811 | 1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI 812 | 1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI 813 | 1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI 814 | 1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA 815 | 1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA 816 | 1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA 817 | 1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA 818 | 1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA 819 | 1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI 820 | 1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA 821 | 1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA 822 | 1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI 823 | 1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA 824 | 1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA 825 | 1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA 826 | 1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI 827 | 1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI 828 | 1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA 829 | 1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA 830 | 1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA 831 | 1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA 832 | 1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA 833 | 1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI 834 | 1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI 835 | 1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI 836 | 1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI 837 | 1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI 838 | 1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI 839 | 1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI 840 | 1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI 841 | 1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 842 | 1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 843 | 1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI 844 | 1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI 845 | 1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI 846 | 1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI 847 | 1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI 848 | 1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI 849 | 1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI 850 | 1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI 851 | 1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI 852 | 1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI 853 | 1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI 854 | 1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI 855 | 1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 856 | 1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 857 | 1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 858 | 1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 859 | 1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI 860 | 1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI 861 | 1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI 862 | 1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI 863 | 1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI 864 | 1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI 865 | 1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 866 | 1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 867 | 1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI 868 | 1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI 869 | 1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI 870 | 1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI 871 | 1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI 872 | 1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI 873 | 1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI 874 | 1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI 875 | 1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI 876 | 1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI 877 | 1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI 878 | 1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI 879 | 1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 880 | 1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 881 | 1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 882 | 1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 883 | 1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI 884 | 1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI 885 | 1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI 886 | 1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI 887 | 1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI 888 | 1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI 889 | 1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 890 | 1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 891 | 1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI 892 | 1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI 893 | 1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI 894 | 1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI 895 | 1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI 896 | 1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI 897 | 1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI 898 | 1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI 899 | 1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI 900 | 1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI 901 | 1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI 902 | 1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI 903 | 1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 904 | 1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 905 | 1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 906 | 1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 907 | 1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI 908 | 1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI 909 | 1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI 910 | 1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI 911 | 1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI 912 | 1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY 913 | 1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON 914 | 1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA 915 | 1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA 916 | 1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 917 | 1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 918 | 1FBE; C; 03B9; # GREEK PROSGEGRAMMENI 919 | 1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI 920 | 1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI 921 | 1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI 922 | 1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI 923 | 1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI 924 | 1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA 925 | 1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA 926 | 1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA 927 | 1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA 928 | 1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 929 | 1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 930 | 1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA 931 | 1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 932 | 1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI 933 | 1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI 934 | 1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY 935 | 1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON 936 | 1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA 937 | 1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA 938 | 1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA 939 | 1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 940 | 1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI 941 | 1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI 942 | 1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 943 | 1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY 944 | 1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON 945 | 1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA 946 | 1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA 947 | 1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA 948 | 1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI 949 | 1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI 950 | 1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 951 | 1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI 952 | 1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI 953 | 1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA 954 | 1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA 955 | 1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA 956 | 1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA 957 | 1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 958 | 1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 959 | 2126; C; 03C9; # OHM SIGN 960 | 212A; C; 006B; # KELVIN SIGN 961 | 212B; C; 00E5; # ANGSTROM SIGN 962 | 2132; C; 214E; # TURNED CAPITAL F 963 | 2160; C; 2170; # ROMAN NUMERAL ONE 964 | 2161; C; 2171; # ROMAN NUMERAL TWO 965 | 2162; C; 2172; # ROMAN NUMERAL THREE 966 | 2163; C; 2173; # ROMAN NUMERAL FOUR 967 | 2164; C; 2174; # ROMAN NUMERAL FIVE 968 | 2165; C; 2175; # ROMAN NUMERAL SIX 969 | 2166; C; 2176; # ROMAN NUMERAL SEVEN 970 | 2167; C; 2177; # ROMAN NUMERAL EIGHT 971 | 2168; C; 2178; # ROMAN NUMERAL NINE 972 | 2169; C; 2179; # ROMAN NUMERAL TEN 973 | 216A; C; 217A; # ROMAN NUMERAL ELEVEN 974 | 216B; C; 217B; # ROMAN NUMERAL TWELVE 975 | 216C; C; 217C; # ROMAN NUMERAL FIFTY 976 | 216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED 977 | 216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED 978 | 216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND 979 | 2183; C; 2184; # ROMAN NUMERAL REVERSED ONE HUNDRED 980 | 24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A 981 | 24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B 982 | 24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C 983 | 24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D 984 | 24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E 985 | 24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F 986 | 24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G 987 | 24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H 988 | 24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I 989 | 24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J 990 | 24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K 991 | 24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L 992 | 24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M 993 | 24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N 994 | 24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O 995 | 24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P 996 | 24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q 997 | 24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R 998 | 24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S 999 | 24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T 1000 | 24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U 1001 | 24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V 1002 | 24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W 1003 | 24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X 1004 | 24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y 1005 | 24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z 1006 | 2C00; C; 2C30; # GLAGOLITIC CAPITAL LETTER AZU 1007 | 2C01; C; 2C31; # GLAGOLITIC CAPITAL LETTER BUKY 1008 | 2C02; C; 2C32; # GLAGOLITIC CAPITAL LETTER VEDE 1009 | 2C03; C; 2C33; # GLAGOLITIC CAPITAL LETTER GLAGOLI 1010 | 2C04; C; 2C34; # GLAGOLITIC CAPITAL LETTER DOBRO 1011 | 2C05; C; 2C35; # GLAGOLITIC CAPITAL LETTER YESTU 1012 | 2C06; C; 2C36; # GLAGOLITIC CAPITAL LETTER ZHIVETE 1013 | 2C07; C; 2C37; # GLAGOLITIC CAPITAL LETTER DZELO 1014 | 2C08; C; 2C38; # GLAGOLITIC CAPITAL LETTER ZEMLJA 1015 | 2C09; C; 2C39; # GLAGOLITIC CAPITAL LETTER IZHE 1016 | 2C0A; C; 2C3A; # GLAGOLITIC CAPITAL LETTER INITIAL IZHE 1017 | 2C0B; C; 2C3B; # GLAGOLITIC CAPITAL LETTER I 1018 | 2C0C; C; 2C3C; # GLAGOLITIC CAPITAL LETTER DJERVI 1019 | 2C0D; C; 2C3D; # GLAGOLITIC CAPITAL LETTER KAKO 1020 | 2C0E; C; 2C3E; # GLAGOLITIC CAPITAL LETTER LJUDIJE 1021 | 2C0F; C; 2C3F; # GLAGOLITIC CAPITAL LETTER MYSLITE 1022 | 2C10; C; 2C40; # GLAGOLITIC CAPITAL LETTER NASHI 1023 | 2C11; C; 2C41; # GLAGOLITIC CAPITAL LETTER ONU 1024 | 2C12; C; 2C42; # GLAGOLITIC CAPITAL LETTER POKOJI 1025 | 2C13; C; 2C43; # GLAGOLITIC CAPITAL LETTER RITSI 1026 | 2C14; C; 2C44; # GLAGOLITIC CAPITAL LETTER SLOVO 1027 | 2C15; C; 2C45; # GLAGOLITIC CAPITAL LETTER TVRIDO 1028 | 2C16; C; 2C46; # GLAGOLITIC CAPITAL LETTER UKU 1029 | 2C17; C; 2C47; # GLAGOLITIC CAPITAL LETTER FRITU 1030 | 2C18; C; 2C48; # GLAGOLITIC CAPITAL LETTER HERU 1031 | 2C19; C; 2C49; # GLAGOLITIC CAPITAL LETTER OTU 1032 | 2C1A; C; 2C4A; # GLAGOLITIC CAPITAL LETTER PE 1033 | 2C1B; C; 2C4B; # GLAGOLITIC CAPITAL LETTER SHTA 1034 | 2C1C; C; 2C4C; # GLAGOLITIC CAPITAL LETTER TSI 1035 | 2C1D; C; 2C4D; # GLAGOLITIC CAPITAL LETTER CHRIVI 1036 | 2C1E; C; 2C4E; # GLAGOLITIC CAPITAL LETTER SHA 1037 | 2C1F; C; 2C4F; # GLAGOLITIC CAPITAL LETTER YERU 1038 | 2C20; C; 2C50; # GLAGOLITIC CAPITAL LETTER YERI 1039 | 2C21; C; 2C51; # GLAGOLITIC CAPITAL LETTER YATI 1040 | 2C22; C; 2C52; # GLAGOLITIC CAPITAL LETTER SPIDERY HA 1041 | 2C23; C; 2C53; # GLAGOLITIC CAPITAL LETTER YU 1042 | 2C24; C; 2C54; # GLAGOLITIC CAPITAL LETTER SMALL YUS 1043 | 2C25; C; 2C55; # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL 1044 | 2C26; C; 2C56; # GLAGOLITIC CAPITAL LETTER YO 1045 | 2C27; C; 2C57; # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS 1046 | 2C28; C; 2C58; # GLAGOLITIC CAPITAL LETTER BIG YUS 1047 | 2C29; C; 2C59; # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS 1048 | 2C2A; C; 2C5A; # GLAGOLITIC CAPITAL LETTER FITA 1049 | 2C2B; C; 2C5B; # GLAGOLITIC CAPITAL LETTER IZHITSA 1050 | 2C2C; C; 2C5C; # GLAGOLITIC CAPITAL LETTER SHTAPIC 1051 | 2C2D; C; 2C5D; # GLAGOLITIC CAPITAL LETTER TROKUTASTI A 1052 | 2C2E; C; 2C5E; # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE 1053 | 2C2F; C; 2C5F; # GLAGOLITIC CAPITAL LETTER CAUDATE CHRIVI 1054 | 2C60; C; 2C61; # LATIN CAPITAL LETTER L WITH DOUBLE BAR 1055 | 2C62; C; 026B; # LATIN CAPITAL LETTER L WITH MIDDLE TILDE 1056 | 2C63; C; 1D7D; # LATIN CAPITAL LETTER P WITH STROKE 1057 | 2C64; C; 027D; # LATIN CAPITAL LETTER R WITH TAIL 1058 | 2C67; C; 2C68; # LATIN CAPITAL LETTER H WITH DESCENDER 1059 | 2C69; C; 2C6A; # LATIN CAPITAL LETTER K WITH DESCENDER 1060 | 2C6B; C; 2C6C; # LATIN CAPITAL LETTER Z WITH DESCENDER 1061 | 2C6D; C; 0251; # LATIN CAPITAL LETTER ALPHA 1062 | 2C6E; C; 0271; # LATIN CAPITAL LETTER M WITH HOOK 1063 | 2C6F; C; 0250; # LATIN CAPITAL LETTER TURNED A 1064 | 2C70; C; 0252; # LATIN CAPITAL LETTER TURNED ALPHA 1065 | 2C72; C; 2C73; # LATIN CAPITAL LETTER W WITH HOOK 1066 | 2C75; C; 2C76; # LATIN CAPITAL LETTER HALF H 1067 | 2C7E; C; 023F; # LATIN CAPITAL LETTER S WITH SWASH TAIL 1068 | 2C7F; C; 0240; # LATIN CAPITAL LETTER Z WITH SWASH TAIL 1069 | 2C80; C; 2C81; # COPTIC CAPITAL LETTER ALFA 1070 | 2C82; C; 2C83; # COPTIC CAPITAL LETTER VIDA 1071 | 2C84; C; 2C85; # COPTIC CAPITAL LETTER GAMMA 1072 | 2C86; C; 2C87; # COPTIC CAPITAL LETTER DALDA 1073 | 2C88; C; 2C89; # COPTIC CAPITAL LETTER EIE 1074 | 2C8A; C; 2C8B; # COPTIC CAPITAL LETTER SOU 1075 | 2C8C; C; 2C8D; # COPTIC CAPITAL LETTER ZATA 1076 | 2C8E; C; 2C8F; # COPTIC CAPITAL LETTER HATE 1077 | 2C90; C; 2C91; # COPTIC CAPITAL LETTER THETHE 1078 | 2C92; C; 2C93; # COPTIC CAPITAL LETTER IAUDA 1079 | 2C94; C; 2C95; # COPTIC CAPITAL LETTER KAPA 1080 | 2C96; C; 2C97; # COPTIC CAPITAL LETTER LAULA 1081 | 2C98; C; 2C99; # COPTIC CAPITAL LETTER MI 1082 | 2C9A; C; 2C9B; # COPTIC CAPITAL LETTER NI 1083 | 2C9C; C; 2C9D; # COPTIC CAPITAL LETTER KSI 1084 | 2C9E; C; 2C9F; # COPTIC CAPITAL LETTER O 1085 | 2CA0; C; 2CA1; # COPTIC CAPITAL LETTER PI 1086 | 2CA2; C; 2CA3; # COPTIC CAPITAL LETTER RO 1087 | 2CA4; C; 2CA5; # COPTIC CAPITAL LETTER SIMA 1088 | 2CA6; C; 2CA7; # COPTIC CAPITAL LETTER TAU 1089 | 2CA8; C; 2CA9; # COPTIC CAPITAL LETTER UA 1090 | 2CAA; C; 2CAB; # COPTIC CAPITAL LETTER FI 1091 | 2CAC; C; 2CAD; # COPTIC CAPITAL LETTER KHI 1092 | 2CAE; C; 2CAF; # COPTIC CAPITAL LETTER PSI 1093 | 2CB0; C; 2CB1; # COPTIC CAPITAL LETTER OOU 1094 | 2CB2; C; 2CB3; # COPTIC CAPITAL LETTER DIALECT-P ALEF 1095 | 2CB4; C; 2CB5; # COPTIC CAPITAL LETTER OLD COPTIC AIN 1096 | 2CB6; C; 2CB7; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE 1097 | 2CB8; C; 2CB9; # COPTIC CAPITAL LETTER DIALECT-P KAPA 1098 | 2CBA; C; 2CBB; # COPTIC CAPITAL LETTER DIALECT-P NI 1099 | 2CBC; C; 2CBD; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI 1100 | 2CBE; C; 2CBF; # COPTIC CAPITAL LETTER OLD COPTIC OOU 1101 | 2CC0; C; 2CC1; # COPTIC CAPITAL LETTER SAMPI 1102 | 2CC2; C; 2CC3; # COPTIC CAPITAL LETTER CROSSED SHEI 1103 | 2CC4; C; 2CC5; # COPTIC CAPITAL LETTER OLD COPTIC SHEI 1104 | 2CC6; C; 2CC7; # COPTIC CAPITAL LETTER OLD COPTIC ESH 1105 | 2CC8; C; 2CC9; # COPTIC CAPITAL LETTER AKHMIMIC KHEI 1106 | 2CCA; C; 2CCB; # COPTIC CAPITAL LETTER DIALECT-P HORI 1107 | 2CCC; C; 2CCD; # COPTIC CAPITAL LETTER OLD COPTIC HORI 1108 | 2CCE; C; 2CCF; # COPTIC CAPITAL LETTER OLD COPTIC HA 1109 | 2CD0; C; 2CD1; # COPTIC CAPITAL LETTER L-SHAPED HA 1110 | 2CD2; C; 2CD3; # COPTIC CAPITAL LETTER OLD COPTIC HEI 1111 | 2CD4; C; 2CD5; # COPTIC CAPITAL LETTER OLD COPTIC HAT 1112 | 2CD6; C; 2CD7; # COPTIC CAPITAL LETTER OLD COPTIC GANGIA 1113 | 2CD8; C; 2CD9; # COPTIC CAPITAL LETTER OLD COPTIC DJA 1114 | 2CDA; C; 2CDB; # COPTIC CAPITAL LETTER OLD COPTIC SHIMA 1115 | 2CDC; C; 2CDD; # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA 1116 | 2CDE; C; 2CDF; # COPTIC CAPITAL LETTER OLD NUBIAN NGI 1117 | 2CE0; C; 2CE1; # COPTIC CAPITAL LETTER OLD NUBIAN NYI 1118 | 2CE2; C; 2CE3; # COPTIC CAPITAL LETTER OLD NUBIAN WAU 1119 | 2CEB; C; 2CEC; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI 1120 | 2CED; C; 2CEE; # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA 1121 | 2CF2; C; 2CF3; # COPTIC CAPITAL LETTER BOHAIRIC KHEI 1122 | A640; C; A641; # CYRILLIC CAPITAL LETTER ZEMLYA 1123 | A642; C; A643; # CYRILLIC CAPITAL LETTER DZELO 1124 | A644; C; A645; # CYRILLIC CAPITAL LETTER REVERSED DZE 1125 | A646; C; A647; # CYRILLIC CAPITAL LETTER IOTA 1126 | A648; C; A649; # CYRILLIC CAPITAL LETTER DJERV 1127 | A64A; C; A64B; # CYRILLIC CAPITAL LETTER MONOGRAPH UK 1128 | A64C; C; A64D; # CYRILLIC CAPITAL LETTER BROAD OMEGA 1129 | A64E; C; A64F; # CYRILLIC CAPITAL LETTER NEUTRAL YER 1130 | A650; C; A651; # CYRILLIC CAPITAL LETTER YERU WITH BACK YER 1131 | A652; C; A653; # CYRILLIC CAPITAL LETTER IOTIFIED YAT 1132 | A654; C; A655; # CYRILLIC CAPITAL LETTER REVERSED YU 1133 | A656; C; A657; # CYRILLIC CAPITAL LETTER IOTIFIED A 1134 | A658; C; A659; # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS 1135 | A65A; C; A65B; # CYRILLIC CAPITAL LETTER BLENDED YUS 1136 | A65C; C; A65D; # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS 1137 | A65E; C; A65F; # CYRILLIC CAPITAL LETTER YN 1138 | A660; C; A661; # CYRILLIC CAPITAL LETTER REVERSED TSE 1139 | A662; C; A663; # CYRILLIC CAPITAL LETTER SOFT DE 1140 | A664; C; A665; # CYRILLIC CAPITAL LETTER SOFT EL 1141 | A666; C; A667; # CYRILLIC CAPITAL LETTER SOFT EM 1142 | A668; C; A669; # CYRILLIC CAPITAL LETTER MONOCULAR O 1143 | A66A; C; A66B; # CYRILLIC CAPITAL LETTER BINOCULAR O 1144 | A66C; C; A66D; # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O 1145 | A680; C; A681; # CYRILLIC CAPITAL LETTER DWE 1146 | A682; C; A683; # CYRILLIC CAPITAL LETTER DZWE 1147 | A684; C; A685; # CYRILLIC CAPITAL LETTER ZHWE 1148 | A686; C; A687; # CYRILLIC CAPITAL LETTER CCHE 1149 | A688; C; A689; # CYRILLIC CAPITAL LETTER DZZE 1150 | A68A; C; A68B; # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK 1151 | A68C; C; A68D; # CYRILLIC CAPITAL LETTER TWE 1152 | A68E; C; A68F; # CYRILLIC CAPITAL LETTER TSWE 1153 | A690; C; A691; # CYRILLIC CAPITAL LETTER TSSE 1154 | A692; C; A693; # CYRILLIC CAPITAL LETTER TCHE 1155 | A694; C; A695; # CYRILLIC CAPITAL LETTER HWE 1156 | A696; C; A697; # CYRILLIC CAPITAL LETTER SHWE 1157 | A698; C; A699; # CYRILLIC CAPITAL LETTER DOUBLE O 1158 | A69A; C; A69B; # CYRILLIC CAPITAL LETTER CROSSED O 1159 | A722; C; A723; # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF 1160 | A724; C; A725; # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN 1161 | A726; C; A727; # LATIN CAPITAL LETTER HENG 1162 | A728; C; A729; # LATIN CAPITAL LETTER TZ 1163 | A72A; C; A72B; # LATIN CAPITAL LETTER TRESILLO 1164 | A72C; C; A72D; # LATIN CAPITAL LETTER CUATRILLO 1165 | A72E; C; A72F; # LATIN CAPITAL LETTER CUATRILLO WITH COMMA 1166 | A732; C; A733; # LATIN CAPITAL LETTER AA 1167 | A734; C; A735; # LATIN CAPITAL LETTER AO 1168 | A736; C; A737; # LATIN CAPITAL LETTER AU 1169 | A738; C; A739; # LATIN CAPITAL LETTER AV 1170 | A73A; C; A73B; # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR 1171 | A73C; C; A73D; # LATIN CAPITAL LETTER AY 1172 | A73E; C; A73F; # LATIN CAPITAL LETTER REVERSED C WITH DOT 1173 | A740; C; A741; # LATIN CAPITAL LETTER K WITH STROKE 1174 | A742; C; A743; # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE 1175 | A744; C; A745; # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE 1176 | A746; C; A747; # LATIN CAPITAL LETTER BROKEN L 1177 | A748; C; A749; # LATIN CAPITAL LETTER L WITH HIGH STROKE 1178 | A74A; C; A74B; # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY 1179 | A74C; C; A74D; # LATIN CAPITAL LETTER O WITH LOOP 1180 | A74E; C; A74F; # LATIN CAPITAL LETTER OO 1181 | A750; C; A751; # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER 1182 | A752; C; A753; # LATIN CAPITAL LETTER P WITH FLOURISH 1183 | A754; C; A755; # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL 1184 | A756; C; A757; # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER 1185 | A758; C; A759; # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE 1186 | A75A; C; A75B; # LATIN CAPITAL LETTER R ROTUNDA 1187 | A75C; C; A75D; # LATIN CAPITAL LETTER RUM ROTUNDA 1188 | A75E; C; A75F; # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE 1189 | A760; C; A761; # LATIN CAPITAL LETTER VY 1190 | A762; C; A763; # LATIN CAPITAL LETTER VISIGOTHIC Z 1191 | A764; C; A765; # LATIN CAPITAL LETTER THORN WITH STROKE 1192 | A766; C; A767; # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER 1193 | A768; C; A769; # LATIN CAPITAL LETTER VEND 1194 | A76A; C; A76B; # LATIN CAPITAL LETTER ET 1195 | A76C; C; A76D; # LATIN CAPITAL LETTER IS 1196 | A76E; C; A76F; # LATIN CAPITAL LETTER CON 1197 | A779; C; A77A; # LATIN CAPITAL LETTER INSULAR D 1198 | A77B; C; A77C; # LATIN CAPITAL LETTER INSULAR F 1199 | A77D; C; 1D79; # LATIN CAPITAL LETTER INSULAR G 1200 | A77E; C; A77F; # LATIN CAPITAL LETTER TURNED INSULAR G 1201 | A780; C; A781; # LATIN CAPITAL LETTER TURNED L 1202 | A782; C; A783; # LATIN CAPITAL LETTER INSULAR R 1203 | A784; C; A785; # LATIN CAPITAL LETTER INSULAR S 1204 | A786; C; A787; # LATIN CAPITAL LETTER INSULAR T 1205 | A78B; C; A78C; # LATIN CAPITAL LETTER SALTILLO 1206 | A78D; C; 0265; # LATIN CAPITAL LETTER TURNED H 1207 | A790; C; A791; # LATIN CAPITAL LETTER N WITH DESCENDER 1208 | A792; C; A793; # LATIN CAPITAL LETTER C WITH BAR 1209 | A796; C; A797; # LATIN CAPITAL LETTER B WITH FLOURISH 1210 | A798; C; A799; # LATIN CAPITAL LETTER F WITH STROKE 1211 | A79A; C; A79B; # LATIN CAPITAL LETTER VOLAPUK AE 1212 | A79C; C; A79D; # LATIN CAPITAL LETTER VOLAPUK OE 1213 | A79E; C; A79F; # LATIN CAPITAL LETTER VOLAPUK UE 1214 | A7A0; C; A7A1; # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE 1215 | A7A2; C; A7A3; # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE 1216 | A7A4; C; A7A5; # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE 1217 | A7A6; C; A7A7; # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE 1218 | A7A8; C; A7A9; # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE 1219 | A7AA; C; 0266; # LATIN CAPITAL LETTER H WITH HOOK 1220 | A7AB; C; 025C; # LATIN CAPITAL LETTER REVERSED OPEN E 1221 | A7AC; C; 0261; # LATIN CAPITAL LETTER SCRIPT G 1222 | A7AD; C; 026C; # LATIN CAPITAL LETTER L WITH BELT 1223 | A7AE; C; 026A; # LATIN CAPITAL LETTER SMALL CAPITAL I 1224 | A7B0; C; 029E; # LATIN CAPITAL LETTER TURNED K 1225 | A7B1; C; 0287; # LATIN CAPITAL LETTER TURNED T 1226 | A7B2; C; 029D; # LATIN CAPITAL LETTER J WITH CROSSED-TAIL 1227 | A7B3; C; AB53; # LATIN CAPITAL LETTER CHI 1228 | A7B4; C; A7B5; # LATIN CAPITAL LETTER BETA 1229 | A7B6; C; A7B7; # LATIN CAPITAL LETTER OMEGA 1230 | A7B8; C; A7B9; # LATIN CAPITAL LETTER U WITH STROKE 1231 | A7BA; C; A7BB; # LATIN CAPITAL LETTER GLOTTAL A 1232 | A7BC; C; A7BD; # LATIN CAPITAL LETTER GLOTTAL I 1233 | A7BE; C; A7BF; # LATIN CAPITAL LETTER GLOTTAL U 1234 | A7C0; C; A7C1; # LATIN CAPITAL LETTER OLD POLISH O 1235 | A7C2; C; A7C3; # LATIN CAPITAL LETTER ANGLICANA W 1236 | A7C4; C; A794; # LATIN CAPITAL LETTER C WITH PALATAL HOOK 1237 | A7C5; C; 0282; # LATIN CAPITAL LETTER S WITH HOOK 1238 | A7C6; C; 1D8E; # LATIN CAPITAL LETTER Z WITH PALATAL HOOK 1239 | A7C7; C; A7C8; # LATIN CAPITAL LETTER D WITH SHORT STROKE OVERLAY 1240 | A7C9; C; A7CA; # LATIN CAPITAL LETTER S WITH SHORT STROKE OVERLAY 1241 | A7D0; C; A7D1; # LATIN CAPITAL LETTER CLOSED INSULAR G 1242 | A7D6; C; A7D7; # LATIN CAPITAL LETTER MIDDLE SCOTS S 1243 | A7D8; C; A7D9; # LATIN CAPITAL LETTER SIGMOID S 1244 | A7F5; C; A7F6; # LATIN CAPITAL LETTER REVERSED HALF H 1245 | AB70; C; 13A0; # CHEROKEE SMALL LETTER A 1246 | AB71; C; 13A1; # CHEROKEE SMALL LETTER E 1247 | AB72; C; 13A2; # CHEROKEE SMALL LETTER I 1248 | AB73; C; 13A3; # CHEROKEE SMALL LETTER O 1249 | AB74; C; 13A4; # CHEROKEE SMALL LETTER U 1250 | AB75; C; 13A5; # CHEROKEE SMALL LETTER V 1251 | AB76; C; 13A6; # CHEROKEE SMALL LETTER GA 1252 | AB77; C; 13A7; # CHEROKEE SMALL LETTER KA 1253 | AB78; C; 13A8; # CHEROKEE SMALL LETTER GE 1254 | AB79; C; 13A9; # CHEROKEE SMALL LETTER GI 1255 | AB7A; C; 13AA; # CHEROKEE SMALL LETTER GO 1256 | AB7B; C; 13AB; # CHEROKEE SMALL LETTER GU 1257 | AB7C; C; 13AC; # CHEROKEE SMALL LETTER GV 1258 | AB7D; C; 13AD; # CHEROKEE SMALL LETTER HA 1259 | AB7E; C; 13AE; # CHEROKEE SMALL LETTER HE 1260 | AB7F; C; 13AF; # CHEROKEE SMALL LETTER HI 1261 | AB80; C; 13B0; # CHEROKEE SMALL LETTER HO 1262 | AB81; C; 13B1; # CHEROKEE SMALL LETTER HU 1263 | AB82; C; 13B2; # CHEROKEE SMALL LETTER HV 1264 | AB83; C; 13B3; # CHEROKEE SMALL LETTER LA 1265 | AB84; C; 13B4; # CHEROKEE SMALL LETTER LE 1266 | AB85; C; 13B5; # CHEROKEE SMALL LETTER LI 1267 | AB86; C; 13B6; # CHEROKEE SMALL LETTER LO 1268 | AB87; C; 13B7; # CHEROKEE SMALL LETTER LU 1269 | AB88; C; 13B8; # CHEROKEE SMALL LETTER LV 1270 | AB89; C; 13B9; # CHEROKEE SMALL LETTER MA 1271 | AB8A; C; 13BA; # CHEROKEE SMALL LETTER ME 1272 | AB8B; C; 13BB; # CHEROKEE SMALL LETTER MI 1273 | AB8C; C; 13BC; # CHEROKEE SMALL LETTER MO 1274 | AB8D; C; 13BD; # CHEROKEE SMALL LETTER MU 1275 | AB8E; C; 13BE; # CHEROKEE SMALL LETTER NA 1276 | AB8F; C; 13BF; # CHEROKEE SMALL LETTER HNA 1277 | AB90; C; 13C0; # CHEROKEE SMALL LETTER NAH 1278 | AB91; C; 13C1; # CHEROKEE SMALL LETTER NE 1279 | AB92; C; 13C2; # CHEROKEE SMALL LETTER NI 1280 | AB93; C; 13C3; # CHEROKEE SMALL LETTER NO 1281 | AB94; C; 13C4; # CHEROKEE SMALL LETTER NU 1282 | AB95; C; 13C5; # CHEROKEE SMALL LETTER NV 1283 | AB96; C; 13C6; # CHEROKEE SMALL LETTER QUA 1284 | AB97; C; 13C7; # CHEROKEE SMALL LETTER QUE 1285 | AB98; C; 13C8; # CHEROKEE SMALL LETTER QUI 1286 | AB99; C; 13C9; # CHEROKEE SMALL LETTER QUO 1287 | AB9A; C; 13CA; # CHEROKEE SMALL LETTER QUU 1288 | AB9B; C; 13CB; # CHEROKEE SMALL LETTER QUV 1289 | AB9C; C; 13CC; # CHEROKEE SMALL LETTER SA 1290 | AB9D; C; 13CD; # CHEROKEE SMALL LETTER S 1291 | AB9E; C; 13CE; # CHEROKEE SMALL LETTER SE 1292 | AB9F; C; 13CF; # CHEROKEE SMALL LETTER SI 1293 | ABA0; C; 13D0; # CHEROKEE SMALL LETTER SO 1294 | ABA1; C; 13D1; # CHEROKEE SMALL LETTER SU 1295 | ABA2; C; 13D2; # CHEROKEE SMALL LETTER SV 1296 | ABA3; C; 13D3; # CHEROKEE SMALL LETTER DA 1297 | ABA4; C; 13D4; # CHEROKEE SMALL LETTER TA 1298 | ABA5; C; 13D5; # CHEROKEE SMALL LETTER DE 1299 | ABA6; C; 13D6; # CHEROKEE SMALL LETTER TE 1300 | ABA7; C; 13D7; # CHEROKEE SMALL LETTER DI 1301 | ABA8; C; 13D8; # CHEROKEE SMALL LETTER TI 1302 | ABA9; C; 13D9; # CHEROKEE SMALL LETTER DO 1303 | ABAA; C; 13DA; # CHEROKEE SMALL LETTER DU 1304 | ABAB; C; 13DB; # CHEROKEE SMALL LETTER DV 1305 | ABAC; C; 13DC; # CHEROKEE SMALL LETTER DLA 1306 | ABAD; C; 13DD; # CHEROKEE SMALL LETTER TLA 1307 | ABAE; C; 13DE; # CHEROKEE SMALL LETTER TLE 1308 | ABAF; C; 13DF; # CHEROKEE SMALL LETTER TLI 1309 | ABB0; C; 13E0; # CHEROKEE SMALL LETTER TLO 1310 | ABB1; C; 13E1; # CHEROKEE SMALL LETTER TLU 1311 | ABB2; C; 13E2; # CHEROKEE SMALL LETTER TLV 1312 | ABB3; C; 13E3; # CHEROKEE SMALL LETTER TSA 1313 | ABB4; C; 13E4; # CHEROKEE SMALL LETTER TSE 1314 | ABB5; C; 13E5; # CHEROKEE SMALL LETTER TSI 1315 | ABB6; C; 13E6; # CHEROKEE SMALL LETTER TSO 1316 | ABB7; C; 13E7; # CHEROKEE SMALL LETTER TSU 1317 | ABB8; C; 13E8; # CHEROKEE SMALL LETTER TSV 1318 | ABB9; C; 13E9; # CHEROKEE SMALL LETTER WA 1319 | ABBA; C; 13EA; # CHEROKEE SMALL LETTER WE 1320 | ABBB; C; 13EB; # CHEROKEE SMALL LETTER WI 1321 | ABBC; C; 13EC; # CHEROKEE SMALL LETTER WO 1322 | ABBD; C; 13ED; # CHEROKEE SMALL LETTER WU 1323 | ABBE; C; 13EE; # CHEROKEE SMALL LETTER WV 1324 | ABBF; C; 13EF; # CHEROKEE SMALL LETTER YA 1325 | FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF 1326 | FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI 1327 | FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL 1328 | FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI 1329 | FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL 1330 | FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T 1331 | FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST 1332 | FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW 1333 | FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH 1334 | FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI 1335 | FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW 1336 | FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH 1337 | FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A 1338 | FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B 1339 | FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C 1340 | FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D 1341 | FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E 1342 | FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F 1343 | FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G 1344 | FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H 1345 | FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I 1346 | FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J 1347 | FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K 1348 | FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L 1349 | FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M 1350 | FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N 1351 | FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O 1352 | FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P 1353 | FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q 1354 | FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R 1355 | FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S 1356 | FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T 1357 | FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U 1358 | FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V 1359 | FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W 1360 | FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X 1361 | FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y 1362 | FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z 1363 | 10400; C; 10428; # DESERET CAPITAL LETTER LONG I 1364 | 10401; C; 10429; # DESERET CAPITAL LETTER LONG E 1365 | 10402; C; 1042A; # DESERET CAPITAL LETTER LONG A 1366 | 10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH 1367 | 10404; C; 1042C; # DESERET CAPITAL LETTER LONG O 1368 | 10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO 1369 | 10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I 1370 | 10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E 1371 | 10408; C; 10430; # DESERET CAPITAL LETTER SHORT A 1372 | 10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH 1373 | 1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O 1374 | 1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO 1375 | 1040C; C; 10434; # DESERET CAPITAL LETTER AY 1376 | 1040D; C; 10435; # DESERET CAPITAL LETTER OW 1377 | 1040E; C; 10436; # DESERET CAPITAL LETTER WU 1378 | 1040F; C; 10437; # DESERET CAPITAL LETTER YEE 1379 | 10410; C; 10438; # DESERET CAPITAL LETTER H 1380 | 10411; C; 10439; # DESERET CAPITAL LETTER PEE 1381 | 10412; C; 1043A; # DESERET CAPITAL LETTER BEE 1382 | 10413; C; 1043B; # DESERET CAPITAL LETTER TEE 1383 | 10414; C; 1043C; # DESERET CAPITAL LETTER DEE 1384 | 10415; C; 1043D; # DESERET CAPITAL LETTER CHEE 1385 | 10416; C; 1043E; # DESERET CAPITAL LETTER JEE 1386 | 10417; C; 1043F; # DESERET CAPITAL LETTER KAY 1387 | 10418; C; 10440; # DESERET CAPITAL LETTER GAY 1388 | 10419; C; 10441; # DESERET CAPITAL LETTER EF 1389 | 1041A; C; 10442; # DESERET CAPITAL LETTER VEE 1390 | 1041B; C; 10443; # DESERET CAPITAL LETTER ETH 1391 | 1041C; C; 10444; # DESERET CAPITAL LETTER THEE 1392 | 1041D; C; 10445; # DESERET CAPITAL LETTER ES 1393 | 1041E; C; 10446; # DESERET CAPITAL LETTER ZEE 1394 | 1041F; C; 10447; # DESERET CAPITAL LETTER ESH 1395 | 10420; C; 10448; # DESERET CAPITAL LETTER ZHEE 1396 | 10421; C; 10449; # DESERET CAPITAL LETTER ER 1397 | 10422; C; 1044A; # DESERET CAPITAL LETTER EL 1398 | 10423; C; 1044B; # DESERET CAPITAL LETTER EM 1399 | 10424; C; 1044C; # DESERET CAPITAL LETTER EN 1400 | 10425; C; 1044D; # DESERET CAPITAL LETTER ENG 1401 | 10426; C; 1044E; # DESERET CAPITAL LETTER OI 1402 | 10427; C; 1044F; # DESERET CAPITAL LETTER EW 1403 | 104B0; C; 104D8; # OSAGE CAPITAL LETTER A 1404 | 104B1; C; 104D9; # OSAGE CAPITAL LETTER AI 1405 | 104B2; C; 104DA; # OSAGE CAPITAL LETTER AIN 1406 | 104B3; C; 104DB; # OSAGE CAPITAL LETTER AH 1407 | 104B4; C; 104DC; # OSAGE CAPITAL LETTER BRA 1408 | 104B5; C; 104DD; # OSAGE CAPITAL LETTER CHA 1409 | 104B6; C; 104DE; # OSAGE CAPITAL LETTER EHCHA 1410 | 104B7; C; 104DF; # OSAGE CAPITAL LETTER E 1411 | 104B8; C; 104E0; # OSAGE CAPITAL LETTER EIN 1412 | 104B9; C; 104E1; # OSAGE CAPITAL LETTER HA 1413 | 104BA; C; 104E2; # OSAGE CAPITAL LETTER HYA 1414 | 104BB; C; 104E3; # OSAGE CAPITAL LETTER I 1415 | 104BC; C; 104E4; # OSAGE CAPITAL LETTER KA 1416 | 104BD; C; 104E5; # OSAGE CAPITAL LETTER EHKA 1417 | 104BE; C; 104E6; # OSAGE CAPITAL LETTER KYA 1418 | 104BF; C; 104E7; # OSAGE CAPITAL LETTER LA 1419 | 104C0; C; 104E8; # OSAGE CAPITAL LETTER MA 1420 | 104C1; C; 104E9; # OSAGE CAPITAL LETTER NA 1421 | 104C2; C; 104EA; # OSAGE CAPITAL LETTER O 1422 | 104C3; C; 104EB; # OSAGE CAPITAL LETTER OIN 1423 | 104C4; C; 104EC; # OSAGE CAPITAL LETTER PA 1424 | 104C5; C; 104ED; # OSAGE CAPITAL LETTER EHPA 1425 | 104C6; C; 104EE; # OSAGE CAPITAL LETTER SA 1426 | 104C7; C; 104EF; # OSAGE CAPITAL LETTER SHA 1427 | 104C8; C; 104F0; # OSAGE CAPITAL LETTER TA 1428 | 104C9; C; 104F1; # OSAGE CAPITAL LETTER EHTA 1429 | 104CA; C; 104F2; # OSAGE CAPITAL LETTER TSA 1430 | 104CB; C; 104F3; # OSAGE CAPITAL LETTER EHTSA 1431 | 104CC; C; 104F4; # OSAGE CAPITAL LETTER TSHA 1432 | 104CD; C; 104F5; # OSAGE CAPITAL LETTER DHA 1433 | 104CE; C; 104F6; # OSAGE CAPITAL LETTER U 1434 | 104CF; C; 104F7; # OSAGE CAPITAL LETTER WA 1435 | 104D0; C; 104F8; # OSAGE CAPITAL LETTER KHA 1436 | 104D1; C; 104F9; # OSAGE CAPITAL LETTER GHA 1437 | 104D2; C; 104FA; # OSAGE CAPITAL LETTER ZA 1438 | 104D3; C; 104FB; # OSAGE CAPITAL LETTER ZHA 1439 | 10570; C; 10597; # VITHKUQI CAPITAL LETTER A 1440 | 10571; C; 10598; # VITHKUQI CAPITAL LETTER BBE 1441 | 10572; C; 10599; # VITHKUQI CAPITAL LETTER BE 1442 | 10573; C; 1059A; # VITHKUQI CAPITAL LETTER CE 1443 | 10574; C; 1059B; # VITHKUQI CAPITAL LETTER CHE 1444 | 10575; C; 1059C; # VITHKUQI CAPITAL LETTER DE 1445 | 10576; C; 1059D; # VITHKUQI CAPITAL LETTER DHE 1446 | 10577; C; 1059E; # VITHKUQI CAPITAL LETTER EI 1447 | 10578; C; 1059F; # VITHKUQI CAPITAL LETTER E 1448 | 10579; C; 105A0; # VITHKUQI CAPITAL LETTER FE 1449 | 1057A; C; 105A1; # VITHKUQI CAPITAL LETTER GA 1450 | 1057C; C; 105A3; # VITHKUQI CAPITAL LETTER HA 1451 | 1057D; C; 105A4; # VITHKUQI CAPITAL LETTER HHA 1452 | 1057E; C; 105A5; # VITHKUQI CAPITAL LETTER I 1453 | 1057F; C; 105A6; # VITHKUQI CAPITAL LETTER IJE 1454 | 10580; C; 105A7; # VITHKUQI CAPITAL LETTER JE 1455 | 10581; C; 105A8; # VITHKUQI CAPITAL LETTER KA 1456 | 10582; C; 105A9; # VITHKUQI CAPITAL LETTER LA 1457 | 10583; C; 105AA; # VITHKUQI CAPITAL LETTER LLA 1458 | 10584; C; 105AB; # VITHKUQI CAPITAL LETTER ME 1459 | 10585; C; 105AC; # VITHKUQI CAPITAL LETTER NE 1460 | 10586; C; 105AD; # VITHKUQI CAPITAL LETTER NJE 1461 | 10587; C; 105AE; # VITHKUQI CAPITAL LETTER O 1462 | 10588; C; 105AF; # VITHKUQI CAPITAL LETTER PE 1463 | 10589; C; 105B0; # VITHKUQI CAPITAL LETTER QA 1464 | 1058A; C; 105B1; # VITHKUQI CAPITAL LETTER RE 1465 | 1058C; C; 105B3; # VITHKUQI CAPITAL LETTER SE 1466 | 1058D; C; 105B4; # VITHKUQI CAPITAL LETTER SHE 1467 | 1058E; C; 105B5; # VITHKUQI CAPITAL LETTER TE 1468 | 1058F; C; 105B6; # VITHKUQI CAPITAL LETTER THE 1469 | 10590; C; 105B7; # VITHKUQI CAPITAL LETTER U 1470 | 10591; C; 105B8; # VITHKUQI CAPITAL LETTER VE 1471 | 10592; C; 105B9; # VITHKUQI CAPITAL LETTER XE 1472 | 10594; C; 105BB; # VITHKUQI CAPITAL LETTER Y 1473 | 10595; C; 105BC; # VITHKUQI CAPITAL LETTER ZE 1474 | 10C80; C; 10CC0; # OLD HUNGARIAN CAPITAL LETTER A 1475 | 10C81; C; 10CC1; # OLD HUNGARIAN CAPITAL LETTER AA 1476 | 10C82; C; 10CC2; # OLD HUNGARIAN CAPITAL LETTER EB 1477 | 10C83; C; 10CC3; # OLD HUNGARIAN CAPITAL LETTER AMB 1478 | 10C84; C; 10CC4; # OLD HUNGARIAN CAPITAL LETTER EC 1479 | 10C85; C; 10CC5; # OLD HUNGARIAN CAPITAL LETTER ENC 1480 | 10C86; C; 10CC6; # OLD HUNGARIAN CAPITAL LETTER ECS 1481 | 10C87; C; 10CC7; # OLD HUNGARIAN CAPITAL LETTER ED 1482 | 10C88; C; 10CC8; # OLD HUNGARIAN CAPITAL LETTER AND 1483 | 10C89; C; 10CC9; # OLD HUNGARIAN CAPITAL LETTER E 1484 | 10C8A; C; 10CCA; # OLD HUNGARIAN CAPITAL LETTER CLOSE E 1485 | 10C8B; C; 10CCB; # OLD HUNGARIAN CAPITAL LETTER EE 1486 | 10C8C; C; 10CCC; # OLD HUNGARIAN CAPITAL LETTER EF 1487 | 10C8D; C; 10CCD; # OLD HUNGARIAN CAPITAL LETTER EG 1488 | 10C8E; C; 10CCE; # OLD HUNGARIAN CAPITAL LETTER EGY 1489 | 10C8F; C; 10CCF; # OLD HUNGARIAN CAPITAL LETTER EH 1490 | 10C90; C; 10CD0; # OLD HUNGARIAN CAPITAL LETTER I 1491 | 10C91; C; 10CD1; # OLD HUNGARIAN CAPITAL LETTER II 1492 | 10C92; C; 10CD2; # OLD HUNGARIAN CAPITAL LETTER EJ 1493 | 10C93; C; 10CD3; # OLD HUNGARIAN CAPITAL LETTER EK 1494 | 10C94; C; 10CD4; # OLD HUNGARIAN CAPITAL LETTER AK 1495 | 10C95; C; 10CD5; # OLD HUNGARIAN CAPITAL LETTER UNK 1496 | 10C96; C; 10CD6; # OLD HUNGARIAN CAPITAL LETTER EL 1497 | 10C97; C; 10CD7; # OLD HUNGARIAN CAPITAL LETTER ELY 1498 | 10C98; C; 10CD8; # OLD HUNGARIAN CAPITAL LETTER EM 1499 | 10C99; C; 10CD9; # OLD HUNGARIAN CAPITAL LETTER EN 1500 | 10C9A; C; 10CDA; # OLD HUNGARIAN CAPITAL LETTER ENY 1501 | 10C9B; C; 10CDB; # OLD HUNGARIAN CAPITAL LETTER O 1502 | 10C9C; C; 10CDC; # OLD HUNGARIAN CAPITAL LETTER OO 1503 | 10C9D; C; 10CDD; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG OE 1504 | 10C9E; C; 10CDE; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA OE 1505 | 10C9F; C; 10CDF; # OLD HUNGARIAN CAPITAL LETTER OEE 1506 | 10CA0; C; 10CE0; # OLD HUNGARIAN CAPITAL LETTER EP 1507 | 10CA1; C; 10CE1; # OLD HUNGARIAN CAPITAL LETTER EMP 1508 | 10CA2; C; 10CE2; # OLD HUNGARIAN CAPITAL LETTER ER 1509 | 10CA3; C; 10CE3; # OLD HUNGARIAN CAPITAL LETTER SHORT ER 1510 | 10CA4; C; 10CE4; # OLD HUNGARIAN CAPITAL LETTER ES 1511 | 10CA5; C; 10CE5; # OLD HUNGARIAN CAPITAL LETTER ESZ 1512 | 10CA6; C; 10CE6; # OLD HUNGARIAN CAPITAL LETTER ET 1513 | 10CA7; C; 10CE7; # OLD HUNGARIAN CAPITAL LETTER ENT 1514 | 10CA8; C; 10CE8; # OLD HUNGARIAN CAPITAL LETTER ETY 1515 | 10CA9; C; 10CE9; # OLD HUNGARIAN CAPITAL LETTER ECH 1516 | 10CAA; C; 10CEA; # OLD HUNGARIAN CAPITAL LETTER U 1517 | 10CAB; C; 10CEB; # OLD HUNGARIAN CAPITAL LETTER UU 1518 | 10CAC; C; 10CEC; # OLD HUNGARIAN CAPITAL LETTER NIKOLSBURG UE 1519 | 10CAD; C; 10CED; # OLD HUNGARIAN CAPITAL LETTER RUDIMENTA UE 1520 | 10CAE; C; 10CEE; # OLD HUNGARIAN CAPITAL LETTER EV 1521 | 10CAF; C; 10CEF; # OLD HUNGARIAN CAPITAL LETTER EZ 1522 | 10CB0; C; 10CF0; # OLD HUNGARIAN CAPITAL LETTER EZS 1523 | 10CB1; C; 10CF1; # OLD HUNGARIAN CAPITAL LETTER ENT-SHAPED SIGN 1524 | 10CB2; C; 10CF2; # OLD HUNGARIAN CAPITAL LETTER US 1525 | 118A0; C; 118C0; # WARANG CITI CAPITAL LETTER NGAA 1526 | 118A1; C; 118C1; # WARANG CITI CAPITAL LETTER A 1527 | 118A2; C; 118C2; # WARANG CITI CAPITAL LETTER WI 1528 | 118A3; C; 118C3; # WARANG CITI CAPITAL LETTER YU 1529 | 118A4; C; 118C4; # WARANG CITI CAPITAL LETTER YA 1530 | 118A5; C; 118C5; # WARANG CITI CAPITAL LETTER YO 1531 | 118A6; C; 118C6; # WARANG CITI CAPITAL LETTER II 1532 | 118A7; C; 118C7; # WARANG CITI CAPITAL LETTER UU 1533 | 118A8; C; 118C8; # WARANG CITI CAPITAL LETTER E 1534 | 118A9; C; 118C9; # WARANG CITI CAPITAL LETTER O 1535 | 118AA; C; 118CA; # WARANG CITI CAPITAL LETTER ANG 1536 | 118AB; C; 118CB; # WARANG CITI CAPITAL LETTER GA 1537 | 118AC; C; 118CC; # WARANG CITI CAPITAL LETTER KO 1538 | 118AD; C; 118CD; # WARANG CITI CAPITAL LETTER ENY 1539 | 118AE; C; 118CE; # WARANG CITI CAPITAL LETTER YUJ 1540 | 118AF; C; 118CF; # WARANG CITI CAPITAL LETTER UC 1541 | 118B0; C; 118D0; # WARANG CITI CAPITAL LETTER ENN 1542 | 118B1; C; 118D1; # WARANG CITI CAPITAL LETTER ODD 1543 | 118B2; C; 118D2; # WARANG CITI CAPITAL LETTER TTE 1544 | 118B3; C; 118D3; # WARANG CITI CAPITAL LETTER NUNG 1545 | 118B4; C; 118D4; # WARANG CITI CAPITAL LETTER DA 1546 | 118B5; C; 118D5; # WARANG CITI CAPITAL LETTER AT 1547 | 118B6; C; 118D6; # WARANG CITI CAPITAL LETTER AM 1548 | 118B7; C; 118D7; # WARANG CITI CAPITAL LETTER BU 1549 | 118B8; C; 118D8; # WARANG CITI CAPITAL LETTER PU 1550 | 118B9; C; 118D9; # WARANG CITI CAPITAL LETTER HIYO 1551 | 118BA; C; 118DA; # WARANG CITI CAPITAL LETTER HOLO 1552 | 118BB; C; 118DB; # WARANG CITI CAPITAL LETTER HORR 1553 | 118BC; C; 118DC; # WARANG CITI CAPITAL LETTER HAR 1554 | 118BD; C; 118DD; # WARANG CITI CAPITAL LETTER SSUU 1555 | 118BE; C; 118DE; # WARANG CITI CAPITAL LETTER SII 1556 | 118BF; C; 118DF; # WARANG CITI CAPITAL LETTER VIYO 1557 | 16E40; C; 16E60; # MEDEFAIDRIN CAPITAL LETTER M 1558 | 16E41; C; 16E61; # MEDEFAIDRIN CAPITAL LETTER S 1559 | 16E42; C; 16E62; # MEDEFAIDRIN CAPITAL LETTER V 1560 | 16E43; C; 16E63; # MEDEFAIDRIN CAPITAL LETTER W 1561 | 16E44; C; 16E64; # MEDEFAIDRIN CAPITAL LETTER ATIU 1562 | 16E45; C; 16E65; # MEDEFAIDRIN CAPITAL LETTER Z 1563 | 16E46; C; 16E66; # MEDEFAIDRIN CAPITAL LETTER KP 1564 | 16E47; C; 16E67; # MEDEFAIDRIN CAPITAL LETTER P 1565 | 16E48; C; 16E68; # MEDEFAIDRIN CAPITAL LETTER T 1566 | 16E49; C; 16E69; # MEDEFAIDRIN CAPITAL LETTER G 1567 | 16E4A; C; 16E6A; # MEDEFAIDRIN CAPITAL LETTER F 1568 | 16E4B; C; 16E6B; # MEDEFAIDRIN CAPITAL LETTER I 1569 | 16E4C; C; 16E6C; # MEDEFAIDRIN CAPITAL LETTER K 1570 | 16E4D; C; 16E6D; # MEDEFAIDRIN CAPITAL LETTER A 1571 | 16E4E; C; 16E6E; # MEDEFAIDRIN CAPITAL LETTER J 1572 | 16E4F; C; 16E6F; # MEDEFAIDRIN CAPITAL LETTER E 1573 | 16E50; C; 16E70; # MEDEFAIDRIN CAPITAL LETTER B 1574 | 16E51; C; 16E71; # MEDEFAIDRIN CAPITAL LETTER C 1575 | 16E52; C; 16E72; # MEDEFAIDRIN CAPITAL LETTER U 1576 | 16E53; C; 16E73; # MEDEFAIDRIN CAPITAL LETTER YU 1577 | 16E54; C; 16E74; # MEDEFAIDRIN CAPITAL LETTER L 1578 | 16E55; C; 16E75; # MEDEFAIDRIN CAPITAL LETTER Q 1579 | 16E56; C; 16E76; # MEDEFAIDRIN CAPITAL LETTER HP 1580 | 16E57; C; 16E77; # MEDEFAIDRIN CAPITAL LETTER NY 1581 | 16E58; C; 16E78; # MEDEFAIDRIN CAPITAL LETTER X 1582 | 16E59; C; 16E79; # MEDEFAIDRIN CAPITAL LETTER D 1583 | 16E5A; C; 16E7A; # MEDEFAIDRIN CAPITAL LETTER OE 1584 | 16E5B; C; 16E7B; # MEDEFAIDRIN CAPITAL LETTER N 1585 | 16E5C; C; 16E7C; # MEDEFAIDRIN CAPITAL LETTER R 1586 | 16E5D; C; 16E7D; # MEDEFAIDRIN CAPITAL LETTER O 1587 | 16E5E; C; 16E7E; # MEDEFAIDRIN CAPITAL LETTER AI 1588 | 16E5F; C; 16E7F; # MEDEFAIDRIN CAPITAL LETTER Y 1589 | 1E900; C; 1E922; # ADLAM CAPITAL LETTER ALIF 1590 | 1E901; C; 1E923; # ADLAM CAPITAL LETTER DAALI 1591 | 1E902; C; 1E924; # ADLAM CAPITAL LETTER LAAM 1592 | 1E903; C; 1E925; # ADLAM CAPITAL LETTER MIIM 1593 | 1E904; C; 1E926; # ADLAM CAPITAL LETTER BA 1594 | 1E905; C; 1E927; # ADLAM CAPITAL LETTER SINNYIIYHE 1595 | 1E906; C; 1E928; # ADLAM CAPITAL LETTER PE 1596 | 1E907; C; 1E929; # ADLAM CAPITAL LETTER BHE 1597 | 1E908; C; 1E92A; # ADLAM CAPITAL LETTER RA 1598 | 1E909; C; 1E92B; # ADLAM CAPITAL LETTER E 1599 | 1E90A; C; 1E92C; # ADLAM CAPITAL LETTER FA 1600 | 1E90B; C; 1E92D; # ADLAM CAPITAL LETTER I 1601 | 1E90C; C; 1E92E; # ADLAM CAPITAL LETTER O 1602 | 1E90D; C; 1E92F; # ADLAM CAPITAL LETTER DHA 1603 | 1E90E; C; 1E930; # ADLAM CAPITAL LETTER YHE 1604 | 1E90F; C; 1E931; # ADLAM CAPITAL LETTER WAW 1605 | 1E910; C; 1E932; # ADLAM CAPITAL LETTER NUN 1606 | 1E911; C; 1E933; # ADLAM CAPITAL LETTER KAF 1607 | 1E912; C; 1E934; # ADLAM CAPITAL LETTER YA 1608 | 1E913; C; 1E935; # ADLAM CAPITAL LETTER U 1609 | 1E914; C; 1E936; # ADLAM CAPITAL LETTER JIIM 1610 | 1E915; C; 1E937; # ADLAM CAPITAL LETTER CHI 1611 | 1E916; C; 1E938; # ADLAM CAPITAL LETTER HA 1612 | 1E917; C; 1E939; # ADLAM CAPITAL LETTER QAAF 1613 | 1E918; C; 1E93A; # ADLAM CAPITAL LETTER GA 1614 | 1E919; C; 1E93B; # ADLAM CAPITAL LETTER NYA 1615 | 1E91A; C; 1E93C; # ADLAM CAPITAL LETTER TU 1616 | 1E91B; C; 1E93D; # ADLAM CAPITAL LETTER NHA 1617 | 1E91C; C; 1E93E; # ADLAM CAPITAL LETTER VA 1618 | 1E91D; C; 1E93F; # ADLAM CAPITAL LETTER KHA 1619 | 1E91E; C; 1E940; # ADLAM CAPITAL LETTER GBE 1620 | 1E91F; C; 1E941; # ADLAM CAPITAL LETTER ZAL 1621 | 1E920; C; 1E942; # ADLAM CAPITAL LETTER KPO 1622 | 1E921; C; 1E943; # ADLAM CAPITAL LETTER SHA 1623 | # 1624 | # EOF 1625 | -------------------------------------------------------------------------------- /src/UnicodeData/CompositionExclusions.txt: -------------------------------------------------------------------------------- 1 | # CompositionExclusions-15.0.0.txt 2 | # Date: 2022-05-03, 18:50:00 GMT [KW, LI] 3 | # © 2022 Unicode®, Inc. 4 | # For terms of use, see https://www.unicode.org/terms_of_use.html 5 | # 6 | # Unicode Character Database 7 | # For documentation, see https://www.unicode.org/reports/tr44/ 8 | # 9 | # This file lists the characters for the Composition Exclusion Table 10 | # defined in UAX #15, Unicode Normalization Forms. 11 | # 12 | # This file is a normative contributory data file in the 13 | # Unicode Character Database. 14 | # 15 | # For more information, see 16 | # https://www.unicode.org/reports/tr15/#Primary_Exclusion_List_Table 17 | # 18 | # For a full derivation of composition exclusions, see the derived property 19 | # Full_Composition_Exclusion in DerivedNormalizationProps.txt 20 | # 21 | 22 | # ================================================ 23 | # (1) Script Specifics 24 | # 25 | # This list of characters cannot be derived from the UnicodeData.txt file. 26 | # 27 | # Included are the following subcategories: 28 | # 29 | # - Many precomposed characters using a nukta diacritic in the Devanagari, 30 | # Bangla/Bengali, Gurmukhi, or Odia/Oriya scripts. 31 | # - Tibetan letters and subjoined letters with decompositions including 32 | # U+0FB7 TIBETAN SUBJOINED LETTER HA or U+0FB5 TIBETAN SUBJOINED LETTER SSA. 33 | # - Two two-part Tibetan vowel signs involving top and bottom pieces. 34 | # - A large collection of compatibility precomposed characters for Hebrew 35 | # involving dagesh and/or other combining marks. 36 | # 37 | # This list is unlikely to grow. 38 | # 39 | # ================================================ 40 | 41 | 0958 # DEVANAGARI LETTER QA 42 | 0959 # DEVANAGARI LETTER KHHA 43 | 095A # DEVANAGARI LETTER GHHA 44 | 095B # DEVANAGARI LETTER ZA 45 | 095C # DEVANAGARI LETTER DDDHA 46 | 095D # DEVANAGARI LETTER RHA 47 | 095E # DEVANAGARI LETTER FA 48 | 095F # DEVANAGARI LETTER YYA 49 | 09DC # BENGALI LETTER RRA 50 | 09DD # BENGALI LETTER RHA 51 | 09DF # BENGALI LETTER YYA 52 | 0A33 # GURMUKHI LETTER LLA 53 | 0A36 # GURMUKHI LETTER SHA 54 | 0A59 # GURMUKHI LETTER KHHA 55 | 0A5A # GURMUKHI LETTER GHHA 56 | 0A5B # GURMUKHI LETTER ZA 57 | 0A5E # GURMUKHI LETTER FA 58 | 0B5C # ORIYA LETTER RRA 59 | 0B5D # ORIYA LETTER RHA 60 | 0F43 # TIBETAN LETTER GHA 61 | 0F4D # TIBETAN LETTER DDHA 62 | 0F52 # TIBETAN LETTER DHA 63 | 0F57 # TIBETAN LETTER BHA 64 | 0F5C # TIBETAN LETTER DZHA 65 | 0F69 # TIBETAN LETTER KSSA 66 | 0F76 # TIBETAN VOWEL SIGN VOCALIC R 67 | 0F78 # TIBETAN VOWEL SIGN VOCALIC L 68 | 0F93 # TIBETAN SUBJOINED LETTER GHA 69 | 0F9D # TIBETAN SUBJOINED LETTER DDHA 70 | 0FA2 # TIBETAN SUBJOINED LETTER DHA 71 | 0FA7 # TIBETAN SUBJOINED LETTER BHA 72 | 0FAC # TIBETAN SUBJOINED LETTER DZHA 73 | 0FB9 # TIBETAN SUBJOINED LETTER KSSA 74 | FB1D # HEBREW LETTER YOD WITH HIRIQ 75 | FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH 76 | FB2A # HEBREW LETTER SHIN WITH SHIN DOT 77 | FB2B # HEBREW LETTER SHIN WITH SIN DOT 78 | FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT 79 | FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT 80 | FB2E # HEBREW LETTER ALEF WITH PATAH 81 | FB2F # HEBREW LETTER ALEF WITH QAMATS 82 | FB30 # HEBREW LETTER ALEF WITH MAPIQ 83 | FB31 # HEBREW LETTER BET WITH DAGESH 84 | FB32 # HEBREW LETTER GIMEL WITH DAGESH 85 | FB33 # HEBREW LETTER DALET WITH DAGESH 86 | FB34 # HEBREW LETTER HE WITH MAPIQ 87 | FB35 # HEBREW LETTER VAV WITH DAGESH 88 | FB36 # HEBREW LETTER ZAYIN WITH DAGESH 89 | FB38 # HEBREW LETTER TET WITH DAGESH 90 | FB39 # HEBREW LETTER YOD WITH DAGESH 91 | FB3A # HEBREW LETTER FINAL KAF WITH DAGESH 92 | FB3B # HEBREW LETTER KAF WITH DAGESH 93 | FB3C # HEBREW LETTER LAMED WITH DAGESH 94 | FB3E # HEBREW LETTER MEM WITH DAGESH 95 | FB40 # HEBREW LETTER NUN WITH DAGESH 96 | FB41 # HEBREW LETTER SAMEKH WITH DAGESH 97 | FB43 # HEBREW LETTER FINAL PE WITH DAGESH 98 | FB44 # HEBREW LETTER PE WITH DAGESH 99 | FB46 # HEBREW LETTER TSADI WITH DAGESH 100 | FB47 # HEBREW LETTER QOF WITH DAGESH 101 | FB48 # HEBREW LETTER RESH WITH DAGESH 102 | FB49 # HEBREW LETTER SHIN WITH DAGESH 103 | FB4A # HEBREW LETTER TAV WITH DAGESH 104 | FB4B # HEBREW LETTER VAV WITH HOLAM 105 | FB4C # HEBREW LETTER BET WITH RAFE 106 | FB4D # HEBREW LETTER KAF WITH RAFE 107 | FB4E # HEBREW LETTER PE WITH RAFE 108 | 109 | # Total code points: 67 110 | 111 | # ================================================ 112 | # (2) Post Composition Version precomposed characters 113 | # 114 | # These characters cannot be derived solely from the UnicodeData.txt file 115 | # in this version of Unicode. 116 | # 117 | # Note that characters added to the standard after the 118 | # Composition Version and which have canonical decomposition mappings 119 | # are not automatically added to this list of Post Composition 120 | # Version precomposed characters. 121 | # ================================================ 122 | 123 | 2ADC # FORKING 124 | 1D15E # MUSICAL SYMBOL HALF NOTE 125 | 1D15F # MUSICAL SYMBOL QUARTER NOTE 126 | 1D160 # MUSICAL SYMBOL EIGHTH NOTE 127 | 1D161 # MUSICAL SYMBOL SIXTEENTH NOTE 128 | 1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE 129 | 1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE 130 | 1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE 131 | 1D1BB # MUSICAL SYMBOL MINIMA 132 | 1D1BC # MUSICAL SYMBOL MINIMA BLACK 133 | 1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE 134 | 1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK 135 | 1D1BF # MUSICAL SYMBOL FUSA WHITE 136 | 1D1C0 # MUSICAL SYMBOL FUSA BLACK 137 | 138 | # Total code points: 14 139 | 140 | # ================================================ 141 | # (3) Singleton Decompositions 142 | # 143 | # These characters can be derived from the UnicodeData.txt file 144 | # by including all canonically decomposable characters whose 145 | # canonical decomposition consists of a single character. 146 | # 147 | # These characters are simply quoted here for reference. 148 | # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt 149 | # ================================================ 150 | 151 | # 0340..0341 [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK 152 | # 0343 COMBINING GREEK KORONIS 153 | # 0374 GREEK NUMERAL SIGN 154 | # 037E GREEK QUESTION MARK 155 | # 0387 GREEK ANO TELEIA 156 | # 1F71 GREEK SMALL LETTER ALPHA WITH OXIA 157 | # 1F73 GREEK SMALL LETTER EPSILON WITH OXIA 158 | # 1F75 GREEK SMALL LETTER ETA WITH OXIA 159 | # 1F77 GREEK SMALL LETTER IOTA WITH OXIA 160 | # 1F79 GREEK SMALL LETTER OMICRON WITH OXIA 161 | # 1F7B GREEK SMALL LETTER UPSILON WITH OXIA 162 | # 1F7D GREEK SMALL LETTER OMEGA WITH OXIA 163 | # 1FBB GREEK CAPITAL LETTER ALPHA WITH OXIA 164 | # 1FBE GREEK PROSGEGRAMMENI 165 | # 1FC9 GREEK CAPITAL LETTER EPSILON WITH OXIA 166 | # 1FCB GREEK CAPITAL LETTER ETA WITH OXIA 167 | # 1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 168 | # 1FDB GREEK CAPITAL LETTER IOTA WITH OXIA 169 | # 1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 170 | # 1FEB GREEK CAPITAL LETTER UPSILON WITH OXIA 171 | # 1FEE..1FEF [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA 172 | # 1FF9 GREEK CAPITAL LETTER OMICRON WITH OXIA 173 | # 1FFB GREEK CAPITAL LETTER OMEGA WITH OXIA 174 | # 1FFD GREEK OXIA 175 | # 2000..2001 [2] EN QUAD..EM QUAD 176 | # 2126 OHM SIGN 177 | # 212A..212B [2] KELVIN SIGN..ANGSTROM SIGN 178 | # 2329 LEFT-POINTING ANGLE BRACKET 179 | # 232A RIGHT-POINTING ANGLE BRACKET 180 | # F900..FA0D [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D 181 | # FA10 CJK COMPATIBILITY IDEOGRAPH-FA10 182 | # FA12 CJK COMPATIBILITY IDEOGRAPH-FA12 183 | # FA15..FA1E [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E 184 | # FA20 CJK COMPATIBILITY IDEOGRAPH-FA20 185 | # FA22 CJK COMPATIBILITY IDEOGRAPH-FA22 186 | # FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26 187 | # FA2A..FA6D [68] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA6D 188 | # FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 189 | # 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D 190 | 191 | # Total code points: 1035 192 | 193 | # ================================================ 194 | # (4) Non-Starter Decompositions 195 | # 196 | # These characters can be derived from the UnicodeData.txt file 197 | # by including each expanding canonical decomposition 198 | # (i.e., those which canonically decompose to a sequence 199 | # of characters instead of a single character), such that: 200 | # 201 | # A. The character is not a Starter. 202 | # 203 | # OR (inclusive) 204 | # 205 | # B. The character's canonical decomposition begins 206 | # with a character that is not a Starter. 207 | # 208 | # Note that a "Starter" is any character with a zero combining class. 209 | # 210 | # These characters are simply quoted here for reference. 211 | # See also Full_Composition_Exclusion in DerivedNormalizationProps.txt 212 | # ================================================ 213 | 214 | # 0344 COMBINING GREEK DIALYTIKA TONOS 215 | # 0F73 TIBETAN VOWEL SIGN II 216 | # 0F75 TIBETAN VOWEL SIGN UU 217 | # 0F81 TIBETAN VOWEL SIGN REVERSED II 218 | 219 | # Total code points: 4 220 | 221 | # EOF 222 | -------------------------------------------------------------------------------- /src/UnicodeData/NormalizationCorrections.txt: -------------------------------------------------------------------------------- 1 | # NormalizationCorrections-15.0.0.txt 2 | # Date: 2022-05-03, 18:53:00 GMT [KW, LI] 3 | # © 2022 Unicode®, Inc. 4 | # For terms of use, see https://www.unicode.org/terms_of_use.html 5 | # 6 | # Unicode Character Database 7 | # For documentation, see https://www.unicode.org/reports/tr44/ 8 | # 9 | # This file is a normative contributory data file in the 10 | # Unicode Character Database. 11 | # 12 | # The normalization stability policy of the Unicode Consortium 13 | # ordinarily precludes any change to the decomposition 14 | # for any character, once established in a relevant version 15 | # of the UnicodeData.txt data file. However, under certain 16 | # exceptional (and rare) conditions, an error in a decomposition 17 | # mapping may be discovered that is truly just an unintended 18 | # typo in the data, and not a matter of dubious interpretation. 19 | # 20 | # Whenever such an error may be found, and if it meets the 21 | # requirements for possible exceptions to normalization 22 | # stability, the correction is entered in this data file, 23 | # so that any implementation depending on absolute stability 24 | # of normalization, *including* any errors in the data, can 25 | # safely reconstruct the exact state of the data tables at 26 | # any given version of Unicode. 27 | # 28 | # Currently this list has exactly six entries in it, one for the 29 | # typo found and corrected in Corrigendum #3, and five for 30 | # the typos and misidentifications found and corrected in 31 | # Corrigendum #4. All efforts 32 | # will be made to keep the entries limited to just those fixes. 33 | # 34 | # Interpretation of the fields: 35 | # Field 0: Unicode code point 36 | # Field 1: Original (erroneous) decomposition 37 | # Field 2: Corrected decomposition 38 | # Field 3: Version of Unicode for which the correction was 39 | # entered into UnicodeData.txt, in n.n.n format. 40 | # Comment: Indicates the Unicode Corrigendum which documents 41 | # the correction 42 | # 43 | # For more information, see UAX #15, Unicode Normalization Forms. 44 | # 45 | F951;96FB;964B;3.2.0 # Corrigendum 3 46 | 2F868;2136A;36FC;4.0.0 # Corrigendum 4 47 | 2F874;5F33;5F53;4.0.0 # Corrigendum 4 48 | 2F91F;43AB;243AB;4.0.0 # Corrigendum 4 49 | 2F95F;7AAE;7AEE;4.0.0 # Corrigendum 4 50 | 2F9BF;4D57;45D7;4.0.0 # Corrigendum 4 51 | 52 | # EOF 53 | -------------------------------------------------------------------------------- /src/UnicodeData/SpecialCasing.txt: -------------------------------------------------------------------------------- 1 | # SpecialCasing-15.0.0.txt 2 | # Date: 2022-02-02, 23:35:52 GMT 3 | # © 2022 Unicode®, Inc. 4 | # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. 5 | # For terms of use, see https://www.unicode.org/terms_of_use.html 6 | # 7 | # Unicode Character Database 8 | # For documentation, see https://www.unicode.org/reports/tr44/ 9 | # 10 | # Special Casing 11 | # 12 | # This file is a supplement to the UnicodeData.txt file. It does not define any 13 | # properties, but rather provides additional information about the casing of 14 | # Unicode characters, for situations when casing incurs a change in string length 15 | # or is dependent on context or locale. For compatibility, the UnicodeData.txt 16 | # file only contains simple case mappings for characters where they are one-to-one 17 | # and independent of context and language. The data in this file, combined with 18 | # the simple case mappings in UnicodeData.txt, defines the full case mappings 19 | # Lowercase_Mapping (lc), Titlecase_Mapping (tc), and Uppercase_Mapping (uc). 20 | # 21 | # Note that the preferred mechanism for defining tailored casing operations is 22 | # the Unicode Common Locale Data Repository (CLDR). For more information, see the 23 | # discussion of case mappings and case algorithms in the Unicode Standard. 24 | # 25 | # All code points not listed in this file that do not have a simple case mappings 26 | # in UnicodeData.txt map to themselves. 27 | # ================================================================================ 28 | # Format 29 | # ================================================================================ 30 | # The entries in this file are in the following machine-readable format: 31 | # 32 | # <code>; <lower>; <title>; <upper>; (<condition_list>;)? # <comment> 33 | # 34 | # <code>, <lower>, <title>, and <upper> provide the respective full case mappings 35 | # of <code>, expressed as character values in hex. If there is more than one character, 36 | # they are separated by spaces. Other than as used to separate elements, spaces are 37 | # to be ignored. 38 | # 39 | # The <condition_list> is optional. Where present, it consists of one or more language IDs 40 | # or casing contexts, separated by spaces. In these conditions: 41 | # - A condition list overrides the normal behavior if all of the listed conditions are true. 42 | # - The casing context is always the context of the characters in the original string, 43 | # NOT in the resulting string. 44 | # - Case distinctions in the condition list are not significant. 45 | # - Conditions preceded by "Not_" represent the negation of the condition. 46 | # The condition list is not represented in the UCD as a formal property. 47 | # 48 | # A language ID is defined by BCP 47, with '-' and '_' treated equivalently. 49 | # 50 | # A casing context for a character is defined by Section 3.13 Default Case Algorithms 51 | # of The Unicode Standard. 52 | # 53 | # Parsers of this file must be prepared to deal with future additions to this format: 54 | # * Additional contexts 55 | # * Additional fields 56 | # ================================================================================ 57 | 58 | # ================================================================================ 59 | # Unconditional mappings 60 | # ================================================================================ 61 | 62 | # The German es-zed is special--the normal mapping is to SS. 63 | # Note: the titlecase should never occur in practice. It is equal to titlecase(uppercase(<es-zed>)) 64 | 65 | 00DF; 00DF; 0053 0073; 0053 0053; # LATIN SMALL LETTER SHARP S 66 | 67 | # Preserve canonical equivalence for I with dot. Turkic is handled below. 68 | 69 | 0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE 70 | 71 | # Ligatures 72 | 73 | FB00; FB00; 0046 0066; 0046 0046; # LATIN SMALL LIGATURE FF 74 | FB01; FB01; 0046 0069; 0046 0049; # LATIN SMALL LIGATURE FI 75 | FB02; FB02; 0046 006C; 0046 004C; # LATIN SMALL LIGATURE FL 76 | FB03; FB03; 0046 0066 0069; 0046 0046 0049; # LATIN SMALL LIGATURE FFI 77 | FB04; FB04; 0046 0066 006C; 0046 0046 004C; # LATIN SMALL LIGATURE FFL 78 | FB05; FB05; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE LONG S T 79 | FB06; FB06; 0053 0074; 0053 0054; # LATIN SMALL LIGATURE ST 80 | 81 | 0587; 0587; 0535 0582; 0535 0552; # ARMENIAN SMALL LIGATURE ECH YIWN 82 | FB13; FB13; 0544 0576; 0544 0546; # ARMENIAN SMALL LIGATURE MEN NOW 83 | FB14; FB14; 0544 0565; 0544 0535; # ARMENIAN SMALL LIGATURE MEN ECH 84 | FB15; FB15; 0544 056B; 0544 053B; # ARMENIAN SMALL LIGATURE MEN INI 85 | FB16; FB16; 054E 0576; 054E 0546; # ARMENIAN SMALL LIGATURE VEW NOW 86 | FB17; FB17; 0544 056D; 0544 053D; # ARMENIAN SMALL LIGATURE MEN XEH 87 | 88 | # No corresponding uppercase precomposed character 89 | 90 | 0149; 0149; 02BC 004E; 02BC 004E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE 91 | 0390; 0390; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 92 | 03B0; 03B0; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 93 | 01F0; 01F0; 004A 030C; 004A 030C; # LATIN SMALL LETTER J WITH CARON 94 | 1E96; 1E96; 0048 0331; 0048 0331; # LATIN SMALL LETTER H WITH LINE BELOW 95 | 1E97; 1E97; 0054 0308; 0054 0308; # LATIN SMALL LETTER T WITH DIAERESIS 96 | 1E98; 1E98; 0057 030A; 0057 030A; # LATIN SMALL LETTER W WITH RING ABOVE 97 | 1E99; 1E99; 0059 030A; 0059 030A; # LATIN SMALL LETTER Y WITH RING ABOVE 98 | 1E9A; 1E9A; 0041 02BE; 0041 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING 99 | 1F50; 1F50; 03A5 0313; 03A5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI 100 | 1F52; 1F52; 03A5 0313 0300; 03A5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA 101 | 1F54; 1F54; 03A5 0313 0301; 03A5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA 102 | 1F56; 1F56; 03A5 0313 0342; 03A5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI 103 | 1FB6; 1FB6; 0391 0342; 0391 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI 104 | 1FC6; 1FC6; 0397 0342; 0397 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI 105 | 1FD2; 1FD2; 0399 0308 0300; 0399 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA 106 | 1FD3; 1FD3; 0399 0308 0301; 0399 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 107 | 1FD6; 1FD6; 0399 0342; 0399 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI 108 | 1FD7; 1FD7; 0399 0308 0342; 0399 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI 109 | 1FE2; 1FE2; 03A5 0308 0300; 03A5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA 110 | 1FE3; 1FE3; 03A5 0308 0301; 03A5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 111 | 1FE4; 1FE4; 03A1 0313; 03A1 0313; # GREEK SMALL LETTER RHO WITH PSILI 112 | 1FE6; 1FE6; 03A5 0342; 03A5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI 113 | 1FE7; 1FE7; 03A5 0308 0342; 03A5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 114 | 1FF6; 1FF6; 03A9 0342; 03A9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI 115 | 116 | # IMPORTANT-when iota-subscript (0345) is uppercased or titlecased, 117 | # the result will be incorrect unless the iota-subscript is moved to the end 118 | # of any sequence of combining marks. Otherwise, the accents will go on the capital iota. 119 | # This process can be achieved by first transforming the text to NFC before casing. 120 | # E.g. <alpha><iota_subscript><acute> is uppercased to <ALPHA><acute><IOTA> 121 | 122 | # The following cases are already in the UnicodeData.txt file, so are only commented here. 123 | 124 | # 0345; 0345; 0399; 0399; # COMBINING GREEK YPOGEGRAMMENI 125 | 126 | # All letters with YPOGEGRAMMENI (iota-subscript) or PROSGEGRAMMENI (iota adscript) 127 | # have special uppercases. 128 | # Note: characters with PROSGEGRAMMENI are actually titlecase, not uppercase! 129 | 130 | 1F80; 1F80; 1F88; 1F08 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI 131 | 1F81; 1F81; 1F89; 1F09 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI 132 | 1F82; 1F82; 1F8A; 1F0A 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI 133 | 1F83; 1F83; 1F8B; 1F0B 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI 134 | 1F84; 1F84; 1F8C; 1F0C 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI 135 | 1F85; 1F85; 1F8D; 1F0D 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI 136 | 1F86; 1F86; 1F8E; 1F0E 0399; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 137 | 1F87; 1F87; 1F8F; 1F0F 0399; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 138 | 1F88; 1F80; 1F88; 1F08 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI 139 | 1F89; 1F81; 1F89; 1F09 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI 140 | 1F8A; 1F82; 1F8A; 1F0A 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI 141 | 1F8B; 1F83; 1F8B; 1F0B 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI 142 | 1F8C; 1F84; 1F8C; 1F0C 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI 143 | 1F8D; 1F85; 1F8D; 1F0D 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI 144 | 1F8E; 1F86; 1F8E; 1F0E 0399; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 145 | 1F8F; 1F87; 1F8F; 1F0F 0399; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 146 | 1F90; 1F90; 1F98; 1F28 0399; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI 147 | 1F91; 1F91; 1F99; 1F29 0399; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI 148 | 1F92; 1F92; 1F9A; 1F2A 0399; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI 149 | 1F93; 1F93; 1F9B; 1F2B 0399; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI 150 | 1F94; 1F94; 1F9C; 1F2C 0399; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI 151 | 1F95; 1F95; 1F9D; 1F2D 0399; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI 152 | 1F96; 1F96; 1F9E; 1F2E 0399; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 153 | 1F97; 1F97; 1F9F; 1F2F 0399; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 154 | 1F98; 1F90; 1F98; 1F28 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI 155 | 1F99; 1F91; 1F99; 1F29 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI 156 | 1F9A; 1F92; 1F9A; 1F2A 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI 157 | 1F9B; 1F93; 1F9B; 1F2B 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI 158 | 1F9C; 1F94; 1F9C; 1F2C 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI 159 | 1F9D; 1F95; 1F9D; 1F2D 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI 160 | 1F9E; 1F96; 1F9E; 1F2E 0399; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 161 | 1F9F; 1F97; 1F9F; 1F2F 0399; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 162 | 1FA0; 1FA0; 1FA8; 1F68 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI 163 | 1FA1; 1FA1; 1FA9; 1F69 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI 164 | 1FA2; 1FA2; 1FAA; 1F6A 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI 165 | 1FA3; 1FA3; 1FAB; 1F6B 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI 166 | 1FA4; 1FA4; 1FAC; 1F6C 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI 167 | 1FA5; 1FA5; 1FAD; 1F6D 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI 168 | 1FA6; 1FA6; 1FAE; 1F6E 0399; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 169 | 1FA7; 1FA7; 1FAF; 1F6F 0399; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 170 | 1FA8; 1FA0; 1FA8; 1F68 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI 171 | 1FA9; 1FA1; 1FA9; 1F69 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI 172 | 1FAA; 1FA2; 1FAA; 1F6A 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI 173 | 1FAB; 1FA3; 1FAB; 1F6B 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI 174 | 1FAC; 1FA4; 1FAC; 1F6C 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI 175 | 1FAD; 1FA5; 1FAD; 1F6D 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI 176 | 1FAE; 1FA6; 1FAE; 1F6E 0399; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 177 | 1FAF; 1FA7; 1FAF; 1F6F 0399; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 178 | 1FB3; 1FB3; 1FBC; 0391 0399; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI 179 | 1FBC; 1FB3; 1FBC; 0391 0399; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 180 | 1FC3; 1FC3; 1FCC; 0397 0399; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI 181 | 1FCC; 1FC3; 1FCC; 0397 0399; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 182 | 1FF3; 1FF3; 1FFC; 03A9 0399; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI 183 | 1FFC; 1FF3; 1FFC; 03A9 0399; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 184 | 185 | # Some characters with YPOGEGRAMMENI also have no corresponding titlecases 186 | 187 | 1FB2; 1FB2; 1FBA 0345; 1FBA 0399; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI 188 | 1FB4; 1FB4; 0386 0345; 0386 0399; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI 189 | 1FC2; 1FC2; 1FCA 0345; 1FCA 0399; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI 190 | 1FC4; 1FC4; 0389 0345; 0389 0399; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI 191 | 1FF2; 1FF2; 1FFA 0345; 1FFA 0399; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI 192 | 1FF4; 1FF4; 038F 0345; 038F 0399; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 193 | 194 | 1FB7; 1FB7; 0391 0342 0345; 0391 0342 0399; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI 195 | 1FC7; 1FC7; 0397 0342 0345; 0397 0342 0399; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI 196 | 1FF7; 1FF7; 03A9 0342 0345; 03A9 0342 0399; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI 197 | 198 | # ================================================================================ 199 | # Conditional Mappings 200 | # The remainder of this file provides conditional casing data used to produce 201 | # full case mappings. 202 | # ================================================================================ 203 | # Language-Insensitive Mappings 204 | # These are characters whose full case mappings do not depend on language, but do 205 | # depend on context (which characters come before or after). For more information 206 | # see the header of this file and the Unicode Standard. 207 | # ================================================================================ 208 | 209 | # Special case for final form of sigma 210 | 211 | 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA 212 | 213 | # Note: the following cases for non-final are already in the UnicodeData.txt file. 214 | 215 | # 03A3; 03C3; 03A3; 03A3; # GREEK CAPITAL LETTER SIGMA 216 | # 03C3; 03C3; 03A3; 03A3; # GREEK SMALL LETTER SIGMA 217 | # 03C2; 03C2; 03A3; 03A3; # GREEK SMALL LETTER FINAL SIGMA 218 | 219 | # Note: the following cases are not included, since they would case-fold in lowercasing 220 | 221 | # 03C3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK SMALL LETTER SIGMA 222 | # 03C2; 03C3; 03A3; 03A3; Not_Final_Sigma; # GREEK SMALL LETTER FINAL SIGMA 223 | 224 | # ================================================================================ 225 | # Language-Sensitive Mappings 226 | # These are characters whose full case mappings depend on language and perhaps also 227 | # context (which characters come before or after). For more information 228 | # see the header of this file and the Unicode Standard. 229 | # ================================================================================ 230 | 231 | # Lithuanian 232 | 233 | # Lithuanian retains the dot in a lowercase i when followed by accents. 234 | 235 | # Remove DOT ABOVE after "i" with upper or titlecase 236 | 237 | 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE 238 | 239 | # Introduce an explicit dot above when lowercasing capital I's and J's 240 | # whenever there are more accents above. 241 | # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) 242 | 243 | 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I 244 | 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J 245 | 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK 246 | 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE 247 | 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE 248 | 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE 249 | 250 | # ================================================================================ 251 | 252 | # Turkish and Azeri 253 | 254 | # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri 255 | # The following rules handle those cases. 256 | 257 | 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE 258 | 0130; 0069; 0130; 0130; az; # LATIN CAPITAL LETTER I WITH DOT ABOVE 259 | 260 | # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. 261 | # This matches the behavior of the canonically equivalent I-dot_above 262 | 263 | 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE 264 | 0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE 265 | 266 | # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. 267 | 268 | 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I 269 | 0049; 0131; 0049; 0049; az Not_Before_Dot; # LATIN CAPITAL LETTER I 270 | 271 | # When uppercasing, i turns into a dotted capital I 272 | 273 | 0069; 0069; 0130; 0130; tr; # LATIN SMALL LETTER I 274 | 0069; 0069; 0130; 0130; az; # LATIN SMALL LETTER I 275 | 276 | # Note: the following case is already in the UnicodeData.txt file. 277 | 278 | # 0131; 0131; 0049; 0049; tr; # LATIN SMALL LETTER DOTLESS I 279 | 280 | # EOF 281 | 282 | -------------------------------------------------------------------------------- /src/UnicodeData/empty.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/howto.txt: -------------------------------------------------------------------------------- 1 | 2 | 1. First compile and run PUCUConvertUnicode 3 | 2. Second compile and run PUCUGenCodePages (which is Windows only for now) 4 | 3. Then !!!!recompile!!! and run PUCUBuild 5 | 4. Delete the no more needed PUCUUnicodePass1.pas, PUCUUnicodePass2.pas and PUCUCodePages.inc from the same directory as the PUCUConvertUnicode, PUCUGenCodePages and PUCUBuild binaries 6 | 7 | 8 | --------------------------------------------------------------------------------