├── UniHax ├── UniHax.pidb ├── Properties │ ├── Resources.resources │ ├── AssemblyInfo.cs │ ├── Resources.Designer.cs │ └── Resources.resx ├── ClassDiagram1.cd ├── Exceptions.cs ├── UnicodeMapping.cs ├── UniHax.csproj ├── BestFitMapping.cs ├── UnicodeChar.cs ├── Fuzzer.cs └── Mappings.cs ├── TestUniHax ├── TestUniHax.pidb ├── Properties │ ├── Settings.settings │ ├── DataSources │ │ └── UniMap.Mappings.datasource │ ├── Settings.Designer.cs │ ├── AssemblyInfo.cs │ ├── Resources.Designer.cs │ └── Resources.resx ├── Program.cs ├── TestUniHax.csproj ├── Form1.resx ├── Form1.cs └── Form1.Designer.cs ├── LICENSE.html ├── .gitignore ├── UniHax.sln └── README.md /UniHax/UniHax.pidb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cweb/unicode-hax/HEAD/UniHax/UniHax.pidb -------------------------------------------------------------------------------- /TestUniHax/TestUniHax.pidb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cweb/unicode-hax/HEAD/TestUniHax/TestUniHax.pidb -------------------------------------------------------------------------------- /UniHax/Properties/Resources.resources: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cweb/unicode-hax/HEAD/UniHax/Properties/Resources.resources -------------------------------------------------------------------------------- /TestUniHax/Properties/Settings.settings: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /UniHax/ClassDiagram1.cd: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | ACAAAAAAAAAAAAAAoAACQBAACEAAAAgAAAACAAAgAAA= 7 | Mappings.cs 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /TestUniHax/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Windows.Forms; 3 | 4 | namespace TestUniMap 5 | { 6 | static class Program 7 | { 8 | /// 9 | /// The main entry point for the application. 10 | /// 11 | [STAThread] 12 | static void Main() 13 | { 14 | 15 | Application.EnableVisualStyles(); 16 | Application.SetCompatibleTextRenderingDefault(false); 17 | Application.Run(new FormUniMapTest()); 18 | 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /TestUniHax/Properties/DataSources/UniMap.Mappings.datasource: -------------------------------------------------------------------------------- 1 | 2 | 8 | 9 | UniMap.Mappings, UniMap, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null 10 | -------------------------------------------------------------------------------- /LICENSE.html: -------------------------------------------------------------------------------- 1 | Creative Commons License
Unicode-Hax by Chris Weber is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License.
Based on a work at https://github.com/cweb/unicode-hax. 2 | -------------------------------------------------------------------------------- /TestUniHax/Properties/Settings.Designer.cs: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // 3 | // This code was generated by a tool. 4 | // Runtime Version:4.0.30319.1 5 | // 6 | // Changes to this file may cause incorrect behavior and will be lost if 7 | // the code is regenerated. 8 | // 9 | //------------------------------------------------------------------------------ 10 | 11 | namespace TestUniHax.Properties { 12 | 13 | 14 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] 15 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "10.0.0.0")] 16 | internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase { 17 | 18 | private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings()))); 19 | 20 | public static Settings Default { 21 | get { 22 | return defaultInstance; 23 | } 24 | } 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /UniHax/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("UniMap")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("Microsoft")] 12 | [assembly: AssemblyProduct("UniMap")] 13 | [assembly: AssemblyCopyright("Copyright © Microsoft 2011")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("5f203b2d-de63-4204-bfb3-7f811bf0ebc1")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /TestUniHax/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.CompilerServices; 3 | using System.Runtime.InteropServices; 4 | 5 | // General Information about an assembly is controlled through the following 6 | // set of attributes. Change these attribute values to modify the information 7 | // associated with an assembly. 8 | [assembly: AssemblyTitle("TestUniMap")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("Microsoft")] 12 | [assembly: AssemblyProduct("TestUniMap")] 13 | [assembly: AssemblyCopyright("Copyright © Microsoft 2011")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | [assembly: ComVisible(false)] 21 | 22 | // The following GUID is for the ID of the typelib if this project is exposed to COM 23 | [assembly: Guid("248bce11-a986-4cc6-9c61-ce90bd108952")] 24 | 25 | // Version information for an assembly consists of the following four values: 26 | // 27 | // Major Version 28 | // Minor Version 29 | // Build Number 30 | // Revision 31 | // 32 | // You can specify all the values or you can default the Build and Revision Numbers 33 | // by using the '*' as shown below: 34 | // [assembly: AssemblyVersion("1.0.*")] 35 | [assembly: AssemblyVersion("1.0.0.0")] 36 | [assembly: AssemblyFileVersion("1.0.0.0")] 37 | -------------------------------------------------------------------------------- /UniHax/Exceptions.cs: -------------------------------------------------------------------------------- 1 | 2 | // Copyright (c) 2011 by Christopher Weber 3 | 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files (the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions: 10 | 11 | // The above copyright notice and this permission notice shall be included in 12 | // all copies or substantial portions of the Software. 13 | 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | // SOFTWARE. 21 | 22 | // Authors: 23 | // Christopher Weber (chris@lookout.net) 24 | 25 | using System; 26 | 27 | 28 | namespace UniHax 29 | { 30 | public class BestFitMappingException : ApplicationException 31 | { 32 | private string messageDetails = String.Empty; 33 | public DateTime TimeStamp { get; set; } 34 | public string CauseOfError { get; set; } 35 | 36 | public BestFitMappingException() 37 | { 38 | } 39 | 40 | 41 | public override string Message 42 | { 43 | get 44 | { 45 | return String.Format("Bestfit mapping error:{0}", messageDetails); 46 | return base.Message; 47 | } 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build Folders (you can keep bin if you'd like, to store dlls and pdbs) 2 | [Bb]in/ 3 | [Oo]bj/ 4 | 5 | # mstest test results 6 | TestResults 7 | 8 | ## Ignore Visual Studio temporary files, build results, and 9 | ## files generated by popular Visual Studio add-ons. 10 | 11 | # User-specific files 12 | *.suo 13 | *.user 14 | *.sln.docstates 15 | 16 | # Build results 17 | [Dd]ebug/ 18 | [Rr]elease/ 19 | x64/ 20 | *_i.c 21 | *_p.c 22 | *.ilk 23 | *.meta 24 | *.obj 25 | *.pch 26 | *.pdb 27 | *.pgc 28 | *.pgd 29 | *.rsp 30 | *.sbr 31 | *.tlb 32 | *.tli 33 | *.tlh 34 | *.tmp 35 | *.log 36 | *.vspscc 37 | *.vssscc 38 | .builds 39 | 40 | # Visual C++ cache files 41 | ipch/ 42 | *.aps 43 | *.ncb 44 | *.opensdf 45 | *.sdf 46 | 47 | # Visual Studio profiler 48 | *.psess 49 | *.vsp 50 | *.vspx 51 | 52 | # Guidance Automation Toolkit 53 | *.gpState 54 | 55 | # ReSharper is a .NET coding add-in 56 | _ReSharper* 57 | 58 | # NCrunch 59 | *.ncrunch* 60 | .*crunch*.local.xml 61 | 62 | # Installshield output folder 63 | [Ee]xpress 64 | 65 | # DocProject is a documentation generator add-in 66 | DocProject/buildhelp/ 67 | DocProject/Help/*.HxT 68 | DocProject/Help/*.HxC 69 | DocProject/Help/*.hhc 70 | DocProject/Help/*.hhk 71 | DocProject/Help/*.hhp 72 | DocProject/Help/Html2 73 | DocProject/Help/html 74 | 75 | # Click-Once directory 76 | publish 77 | 78 | # Publish Web Output 79 | *.Publish.xml 80 | 81 | # NuGet Packages Directory 82 | packages 83 | 84 | # Windows Azure Build Output 85 | csx 86 | *.build.csdef 87 | 88 | # Windows Store app package directory 89 | AppPackages/ 90 | 91 | # Others 92 | [Bb]in 93 | [Oo]bj 94 | sql 95 | TestResults 96 | [Tt]est[Rr]esult* 97 | *.Cache 98 | ClientBin 99 | [Ss]tyle[Cc]op.* 100 | ~$* 101 | *.dbmdl 102 | Generated_Code #added for RIA/Silverlight projects 103 | 104 | # Backup & report files from converting an old project file to a newer 105 | # Visual Studio version. Backup files are not needed, because we have git ;-) 106 | _UpgradeReport_Files/ 107 | Backup*/ 108 | UpgradeLog*.XML 109 | -------------------------------------------------------------------------------- /UniHax.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 11.00 3 | # Visual Studio 2010 4 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UniHax", "UniHax\UniHax.csproj", "{434C7AAC-316B-4425-A459-730E3E127505}" 5 | EndProject 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TestUniHax", "TestUniHax\TestUniHax.csproj", "{6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}" 7 | EndProject 8 | Global 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 10 | Debug|Any CPU = Debug|Any CPU 11 | Debug|Mixed Platforms = Debug|Mixed Platforms 12 | Debug|x86 = Debug|x86 13 | Release|Any CPU = Release|Any CPU 14 | Release|Mixed Platforms = Release|Mixed Platforms 15 | Release|x86 = Release|x86 16 | EndGlobalSection 17 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 18 | {434C7AAC-316B-4425-A459-730E3E127505}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 19 | {434C7AAC-316B-4425-A459-730E3E127505}.Debug|Any CPU.Build.0 = Debug|Any CPU 20 | {434C7AAC-316B-4425-A459-730E3E127505}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 21 | {434C7AAC-316B-4425-A459-730E3E127505}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 22 | {434C7AAC-316B-4425-A459-730E3E127505}.Debug|x86.ActiveCfg = Debug|Any CPU 23 | {434C7AAC-316B-4425-A459-730E3E127505}.Release|Any CPU.ActiveCfg = Release|Any CPU 24 | {434C7AAC-316B-4425-A459-730E3E127505}.Release|Any CPU.Build.0 = Release|Any CPU 25 | {434C7AAC-316B-4425-A459-730E3E127505}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU 26 | {434C7AAC-316B-4425-A459-730E3E127505}.Release|Mixed Platforms.Build.0 = Release|Any CPU 27 | {434C7AAC-316B-4425-A459-730E3E127505}.Release|x86.ActiveCfg = Release|Any CPU 28 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Debug|Any CPU.ActiveCfg = Debug|x86 29 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Debug|Mixed Platforms.ActiveCfg = Debug|x86 30 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Debug|Mixed Platforms.Build.0 = Debug|x86 31 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Debug|x86.ActiveCfg = Debug|x86 32 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Debug|x86.Build.0 = Debug|x86 33 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Release|Any CPU.ActiveCfg = Release|x86 34 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Release|Mixed Platforms.ActiveCfg = Release|x86 35 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Release|Mixed Platforms.Build.0 = Release|x86 36 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Release|x86.ActiveCfg = Release|x86 37 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Release|x86.Build.0 = Release|x86 38 | EndGlobalSection 39 | GlobalSection(SolutionProperties) = preSolution 40 | HideSolutionNode = FALSE 41 | EndGlobalSection 42 | EndGlobal 43 | -------------------------------------------------------------------------------- /TestUniHax/Properties/Resources.Designer.cs: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // 3 | // This code was generated by a tool. 4 | // Runtime Version:4.0.30319.1 5 | // 6 | // Changes to this file may cause incorrect behavior and will be lost if 7 | // the code is regenerated. 8 | // 9 | //------------------------------------------------------------------------------ 10 | 11 | namespace TestUniHax.Properties { 12 | using System; 13 | 14 | 15 | /// 16 | /// A strongly-typed resource class, for looking up localized strings, etc. 17 | /// 18 | // This class was auto-generated by the StronglyTypedResourceBuilder 19 | // class via a tool like ResGen or Visual Studio. 20 | // To add or remove a member, edit your .ResX file then rerun ResGen 21 | // with the /str option, or rebuild your VS project. 22 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")] 23 | [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] 24 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] 25 | internal class Resources { 26 | 27 | private static global::System.Resources.ResourceManager resourceMan; 28 | 29 | private static global::System.Globalization.CultureInfo resourceCulture; 30 | 31 | [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] 32 | internal Resources() { 33 | } 34 | 35 | /// 36 | /// Returns the cached ResourceManager instance used by this class. 37 | /// 38 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] 39 | internal static global::System.Resources.ResourceManager ResourceManager { 40 | get { 41 | if (object.ReferenceEquals(resourceMan, null)) { 42 | global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("TestUniHax.Properties.Resources", typeof(Resources).Assembly); 43 | resourceMan = temp; 44 | } 45 | return resourceMan; 46 | } 47 | } 48 | 49 | /// 50 | /// Overrides the current thread's CurrentUICulture property for all 51 | /// resource lookups using this strongly typed resource class. 52 | /// 53 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] 54 | internal static global::System.Globalization.CultureInfo Culture { 55 | get { 56 | return resourceCulture; 57 | } 58 | set { 59 | resourceCulture = value; 60 | } 61 | } 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /UniHax/UnicodeMapping.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 by Christopher Weber 2 | 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | // SOFTWARE. 20 | 21 | // Authors: 22 | // Christopher Weber (chris@lookout.net) 23 | 24 | namespace UniHax 25 | { 26 | /// 27 | /// Represents a Unicode mapping to an ASCII character. 28 | /// 29 | public class UnicodeMapping 30 | { 31 | private string character; 32 | private string ascii; 33 | private string unicode; 34 | private string name; 35 | private string transform; 36 | 37 | public readonly string[] transformations = { 38 | "Simple_Lowercase_Mapping", 39 | "Lowercase_Mapping", 40 | "Simple_Case_Folding", 41 | "cf", 42 | "Simple_Uppercase_Mapping", 43 | "Simple_Titlecase_Mapping", 44 | "Uppercase_Mapping", 45 | "Titlecase_Mapping", 46 | "Decomposition_Mapping", 47 | "FC_NFKC" 48 | }; 49 | 50 | public string Character 51 | { 52 | get { return character; } 53 | set { character = value; } 54 | } 55 | 56 | public string Ascii 57 | { 58 | get { return ascii; } 59 | set { ascii = value; } 60 | } 61 | 62 | public string Unicode 63 | { 64 | get { return unicode; } 65 | set { unicode = value; } 66 | } 67 | 68 | public string Name 69 | { 70 | get { return name; } 71 | set { name = value; } 72 | } 73 | 74 | public string Transform 75 | { 76 | get { return transform; } 77 | set { transform = value; } 78 | } 79 | 80 | 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /UniHax/UniHax.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Debug 5 | AnyCPU 6 | 8.0.30703 7 | 2.0 8 | {434C7AAC-316B-4425-A459-730E3E127505} 9 | Library 10 | Properties 11 | UniHax 12 | UniHax 13 | v4.0 14 | 512 15 | 16 | 17 | true 18 | full 19 | false 20 | bin\Debug\ 21 | DEBUG;TRACE 22 | prompt 23 | 4 24 | 25 | 26 | pdbonly 27 | true 28 | bin\Release\ 29 | TRACE 30 | prompt 31 | 4 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | True 49 | True 50 | Resources.resx 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | ResXFileCodeGenerator 61 | Resources.Designer.cs 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | Designer 70 | 71 | 72 | 73 | 74 | 75 | 76 | 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | unicode-hax 2 | =========== 3 | 4 | A library to assist in security-testing Unicode enabled applications. The original intent of putting this together 5 | was threefold: 6 | 7 | 1. To provide a reduced set of useful Unicode input to a software fuzzer 8 | 2. To document historically problematic Unicode characters sequences which 9 | might negatively affect protocols and Web applications. 10 | 3. To lookup mappings for ASCII equivalent characters 11 | 12 | For example, the __best-fit__ and __normalization__ mappings can be useful for testing Web applications for 13 | cross-site scripting (XSS) or SQL injection (SQLi) vulnerabilities, by providing you with alternative 14 | characters which map back, or transform, to the intended ASCII encoded input - such as "<", "'", etc. 15 | 16 | Additionally, many __problem characters__ have been pre-defined as a small set, reducing the number of iterations 17 | a fuzzer might need to perform. 18 | 19 | Major features: 20 | - best fit mappings 21 | - Unicode normalization mappings 22 | - hard-coded Unicode characters useful in fuzzing 23 | 24 | For fuzzing applications it includes: 25 | - ill-formed byte sequences 26 | - non-characters 27 | - private use area (PUA) 28 | - unassigned code points 29 | - code points with special meaning such as the BOM and RLO 30 | - half-surrogate values 31 | 32 | /TestUniHax 33 | ----------- 34 | This Windows form application loads the UniHax library mainly to test the best-fit and normalization mappings. 35 | If you simply input a single ASCII character, all of its equivalent characters will be displayed. 36 | 37 | e.g. If you're testing a Web-application and want to test equivalents for the "<" character U+003C, 38 | enter that as input and select either "best-fit mapping", which is linked to a charset encoding, 39 | or "normalization" equivalents. For this character, the following are best-fits: 40 | 41 | - U+003B in the APL-ISO-IR-68 encoding 42 | - U+0014 in the CP424 encoding 43 | - etc... 44 | 45 | Also, the following are normalization decomposition mappings: 46 | 47 | - U+FE64 SMALL LESS-THAN SIGN 48 | - U+FF1C FULLWIDTH LESS-THAN SIGN 49 | 50 | /UniHax 51 | ------- 52 | This library contains a small set of __problematic Unicode characters__ in **Fuzzer.cs** such as the following: 53 | 54 | ```csharp 55 | /// 56 | /// An unassigned code point U+0FED 57 | /// 58 | public static readonly string uUnassigned = "\u0FED"; 59 | /// 60 | /// An illegal low half-surrogate U+DEAD 61 | /// 62 | public static readonly string uDEAD = "\uDEAD"; 63 | ``` 64 | 65 | Also the following method to return those characters as a byte array in any encoding. 66 | 67 | ```csharp 68 | public byte[] GetCharacterBytes(string encoding, string character) 69 | ``` 70 | 71 | There's also the following method to return any Unicode character as a malformed byte sequence, simply by 72 | trimming the last byte. 73 | 74 | ```csharp 75 | public byte[] GetCharacterBytesMalformed(string encoding, string character) 76 | ``` 77 | 78 | This project also contains the data files, pre-created in the __/data__ folder, and a __Mapping.cs__ Mapping 79 | class which can lookup mapping equivalents for the following: 80 | 81 | - ASCII equivalent best-fit mappings across legacy character encodings 82 | - ASCII equivalent mappings for Unicode normalization types. For example, Web browsers commonly use 83 | a form of normalization for keeping URL content and host names compatible. 84 | 85 | For more on Unicode Normalization see TR15: http://www.unicode.org/reports/tr15/ 86 | 87 | License 88 | ------- 89 | Unicode-Hax by Chris Weber is licensed under a 90 | 91 | Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License . 92 | Based on a work at https://github.com/cweb/unicode-hax. 93 | -------------------------------------------------------------------------------- /UniHax/BestFitMapping.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 by Christopher Weber 2 | 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | // SOFTWARE. 20 | 21 | // Authors: 22 | // Christopher Weber (chris@lookout.net) 23 | 24 | namespace UniHax 25 | { 26 | /// 27 | /// Represents a bestfit mapping between two characters. 28 | /// 29 | public class BestFitMapping 30 | { 31 | private string character; 32 | private string ascii; 33 | private string unicode; 34 | private string name; 35 | private string charset; 36 | 37 | public readonly string[] charsets = { 38 | "APL-ISO-IR-68", 39 | "CP424", 40 | "IBMGRAPH", 41 | "US-ASCII-QUOTES", 42 | "windows-1250", 43 | "windows-1251", 44 | "windows-1252", 45 | "windows-1253", 46 | "windows-1254", 47 | "windows-1255", 48 | "windows-1256", 49 | "windows-1257", 50 | "windows-1258", 51 | "windows-874", 52 | "CP864", 53 | "CP037", 54 | "CP1026", 55 | "CP500", 56 | "CP875", 57 | "DINGBATS", 58 | "KEYBOARD", 59 | "SYMBOL", 60 | "symbol", 61 | "zdingbat", 62 | "JAPANESE", 63 | "GSM0338" 64 | }; 65 | 66 | public string Character 67 | { 68 | get { return character; } 69 | set { character = value; } 70 | } 71 | 72 | public string Ascii 73 | { 74 | get { return ascii; } 75 | set { ascii = value; } 76 | } 77 | 78 | public string Unicode 79 | { 80 | get { return unicode; } 81 | set { unicode = value; } 82 | } 83 | 84 | public string Name 85 | { 86 | get { return name; } 87 | set { name = value; } 88 | } 89 | 90 | public string Charset 91 | { 92 | get { return charset; } 93 | set { charset = value; } 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /UniHax/UnicodeChar.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 by Christopher Weber 2 | 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | // SOFTWARE. 20 | 21 | // Authors: 22 | // Christopher Weber (chris@lookout.net) 23 | 24 | using System; 25 | 26 | namespace UniHax 27 | { 28 | /// 29 | /// A 32-bit representation of a Unicode character. Treats all characters as 30 | /// a 32 bit value. Gives access to a character's code point as a string. 31 | /// [!] Only handles the Basic Mulitlingual Plane (BMP). Code points > U+FFFF 32 | /// will throw exceptions. 33 | /// [!] Does NOT support surrogate pairs. 34 | /// 35 | public struct UniChar 36 | { 37 | public string CodePoint; 38 | public string Character; 39 | 40 | 41 | //public UniChar(string codePoint) 42 | //{ 43 | // CodePoint = codePoint; 44 | // Character = ConvertCodePointToString(CodePoint); 45 | //} 46 | 47 | //public UniChar(char character) 48 | //{ 49 | // this.Character = character; 50 | // this.CodePoint = "0000"; 51 | // this.CodePoint = GetCodePoint(Character); 52 | //} 53 | 54 | public string GetCodePoint(char c) 55 | { 56 | int i; 57 | try 58 | { 59 | i = Convert.ToInt32(c); // 0x00 to 0x10ffff 60 | } 61 | catch (Exception) 62 | { 63 | // return the replacement chacter U+FFFD 64 | i = 0x0000FFFD; 65 | throw; 66 | } 67 | string codepoint = String.Format("{0:X4}", i); 68 | return codepoint; 69 | } 70 | 71 | public string ConvertCharacterToString(char character) 72 | { 73 | int i; 74 | 75 | try 76 | { 77 | i = Convert.ToInt32(CodePoint.Trim(), 16); // 0x00 to 0x10ffff 78 | } 79 | catch (FormatException e) 80 | { 81 | i = 0; 82 | } 83 | catch(Exception) 84 | { 85 | throw; 86 | } 87 | 88 | string x = Char.ConvertFromUtf32(i); 89 | string characters = x; 90 | 91 | return characters; 92 | } 93 | 94 | 95 | public string ConvertCharacterToCodePoint(char character) 96 | { 97 | int i = Convert.ToInt32(CodePoint.Trim(), 16); // 0x00 to 0x10ffff 98 | string x = string.Format("{0:X4}", i); 99 | 100 | return x; 101 | } 102 | 103 | public string ConvertCodePointToString(string codepoint) 104 | { 105 | int i; 106 | try 107 | { 108 | i = Convert.ToInt32(codepoint.Trim(), 16); // 0x00 to 0x10ffff 109 | } 110 | catch (FormatException e) 111 | { 112 | i = 0; 113 | } 114 | catch(ArgumentOutOfRangeException e) 115 | { 116 | i = 0; 117 | } 118 | return Char.ConvertFromUtf32(i); 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /UniHax/Properties/Resources.Designer.cs: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // 3 | // This code was generated by a tool. 4 | // Runtime Version:4.0.30319.1 5 | // 6 | // Changes to this file may cause incorrect behavior and will be lost if 7 | // the code is regenerated. 8 | // 9 | //------------------------------------------------------------------------------ 10 | 11 | namespace UniHax.Properties { 12 | using System; 13 | 14 | 15 | /// 16 | /// A strongly-typed resource class, for looking up localized strings, etc. 17 | /// 18 | // This class was auto-generated by the StronglyTypedResourceBuilder 19 | // class via a tool like ResGen or Visual Studio. 20 | // To add or remove a member, edit your .ResX file then rerun ResGen 21 | // with the /str option, or rebuild your VS project. 22 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")] 23 | [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] 24 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] 25 | internal class Resources { 26 | 27 | private static global::System.Resources.ResourceManager resourceMan; 28 | 29 | private static global::System.Globalization.CultureInfo resourceCulture; 30 | 31 | [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] 32 | internal Resources() { 33 | } 34 | 35 | /// 36 | /// Returns the cached ResourceManager instance used by this class. 37 | /// 38 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] 39 | internal static global::System.Resources.ResourceManager ResourceManager { 40 | get { 41 | if (object.ReferenceEquals(resourceMan, null)) { 42 | global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("UniHax.Properties.Resources", typeof(Resources).Assembly); 43 | resourceMan = temp; 44 | } 45 | return resourceMan; 46 | } 47 | } 48 | 49 | /// 50 | /// Overrides the current thread's CurrentUICulture property for all 51 | /// resource lookups using this strongly typed resource class. 52 | /// 53 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] 54 | internal static global::System.Globalization.CultureInfo Culture { 55 | get { 56 | return resourceCulture; 57 | } 58 | set { 59 | resourceCulture = value; 60 | } 61 | } 62 | 63 | /// 64 | /// Looks up a localized string similar to <Bestfit> 65 | /// <Mapping> 66 | /// <Ascii>0021</Ascii> 67 | /// <Unicode>00A8</Unicode> 68 | /// <Name>#</Name> 69 | /// <Charset>APL-ISO-IR-68</Charset> 70 | /// </Mapping> 71 | /// <Mapping> 72 | /// <Ascii>0022</Ascii> 73 | /// <Unicode>0029</Unicode> 74 | /// <Name>#</Name> 75 | /// <Charset>APL-ISO-IR-68</Charset> 76 | /// </Mapping> 77 | /// <Mapping> 78 | /// <Ascii>0023</Ascii> 79 | /// <Unicode>003C</Unicode> 80 | /// <Name>#</Name> 81 | /// <Charset>APL-ISO-IR-68</Charset> 82 | /// </Mapping> 83 | /// <Mapping> 84 | /// <Ascii>0024</Ascii> 85 | /// <Unicode>2264</Unicode> 86 | /// <Name>#</Name [rest of string was truncated]";. 87 | /// 88 | internal static string bestfit { 89 | get { 90 | return ResourceManager.GetString("bestfit", resourceCulture); 91 | } 92 | } 93 | 94 | /// 95 | /// Looks up a localized string similar to <Bestfit> 96 | /// <Mapping> 97 | /// <Ascii>006A</Ascii> 98 | /// <Unicode>004A</Unicode> 99 | /// <Name>LATIN CAPITAL LETTER J</Name> 100 | /// <Transform>Simple_Lowercase_Mapping</Transform> 101 | /// </Mapping> 102 | /// <Mapping> 103 | /// <Ascii>006A</Ascii> 104 | /// <Unicode>004A</Unicode> 105 | /// <Name>LATIN CAPITAL LETTER J</Name> 106 | /// <Transform>Lowercase_Mapping</Transform> 107 | /// </Mapping> 108 | /// <Mapping> 109 | /// <Ascii>006A</Ascii> 110 | /// <Unicode>004A</Unicode> 111 | /// <Name>LATIN CAPITAL LETTER J</Name> 112 | /// <Transform>Simple_Case_Folding</Transform> 113 | /// [rest of string was truncated]";. 114 | /// 115 | internal static string unicode { 116 | get { 117 | return ResourceManager.GetString("unicode", resourceCulture); 118 | } 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /TestUniHax/Properties/Resources.resx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | text/microsoft-resx 107 | 108 | 109 | 2.0 110 | 111 | 112 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 113 | 114 | 115 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 116 | 117 | -------------------------------------------------------------------------------- /TestUniHax/TestUniHax.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Debug 5 | x86 6 | 8.0.30703 7 | 2.0 8 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A} 9 | WinExe 10 | Properties 11 | TestUniHax 12 | TestUniHax 13 | v4.0 14 | Client 15 | 512 16 | publish\ 17 | true 18 | Disk 19 | false 20 | Foreground 21 | 7 22 | Days 23 | false 24 | false 25 | true 26 | 0 27 | 1.0.0.%2a 28 | false 29 | false 30 | true 31 | 32 | 33 | x86 34 | true 35 | full 36 | false 37 | bin\Debug\ 38 | DEBUG;TRACE 39 | prompt 40 | 4 41 | 42 | 43 | x86 44 | pdbonly 45 | true 46 | bin\Release\ 47 | TRACE 48 | prompt 49 | 4 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | Form 66 | 67 | 68 | Form1.cs 69 | 70 | 71 | 72 | 73 | Form1.cs 74 | 75 | 76 | ResXFileCodeGenerator 77 | Resources.Designer.cs 78 | Designer 79 | 80 | 81 | True 82 | Resources.resx 83 | True 84 | 85 | 86 | 87 | SettingsSingleFileGenerator 88 | Settings.Designer.cs 89 | 90 | 91 | True 92 | Settings.settings 93 | True 94 | 95 | 96 | 97 | 98 | False 99 | Microsoft .NET Framework 4 Client Profile %28x86 and x64%29 100 | true 101 | 102 | 103 | False 104 | .NET Framework 3.5 SP1 Client Profile 105 | false 106 | 107 | 108 | False 109 | .NET Framework 3.5 SP1 110 | false 111 | 112 | 113 | False 114 | Windows Installer 3.1 115 | true 116 | 117 | 118 | 119 | 120 | {434C7AAC-316B-4425-A459-730E3E127505} 121 | UniHax 122 | 123 | 124 | 125 | 132 | -------------------------------------------------------------------------------- /TestUniHax/Form1.resx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | text/microsoft-resx 110 | 111 | 112 | 2.0 113 | 114 | 115 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 116 | 117 | 118 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 119 | 120 | -------------------------------------------------------------------------------- /UniHax/Properties/Resources.resx: -------------------------------------------------------------------------------- 1 | 2 | 3 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | text/microsoft-resx 110 | 111 | 112 | 2.0 113 | 114 | 115 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 116 | 117 | 118 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 119 | 120 | 121 | 122 | ..\data\bestfit.xml;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8 123 | 124 | 125 | ..\data\unicode.xml;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8 126 | 127 | -------------------------------------------------------------------------------- /TestUniHax/Form1.cs: -------------------------------------------------------------------------------- 1 | 2 | // Copyright (c) 2011 by Christopher Weber 3 | 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files (the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions: 10 | 11 | // The above copyright notice and this permission notice shall be included in 12 | // all copies or substantial portions of the Software. 13 | 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | // SOFTWARE. 21 | 22 | // Authors: 23 | // Christopher Weber (chris@lookout.net) 24 | 25 | using System; 26 | using System.Collections.Generic; 27 | using System.Windows.Forms; 28 | using UniHax; 29 | 30 | namespace TestUniMap 31 | { 32 | public partial class FormUniMapTest : Form 33 | { 34 | 35 | private char input; 36 | private string charset; 37 | private string transform; 38 | private Mappings data = new Mappings(); 39 | 40 | public FormUniMapTest() 41 | { 42 | InitializeComponent(); 43 | 44 | // Setup the charset combobox 45 | List charsets = Data.GetAvailableBestfitCharsets(); 46 | // Insert a blank line at the beginning 47 | charsets.Insert(0, ""); 48 | comboBoxCharsets.DataSource = charsets; 49 | comboBoxCharsets.DisplayMember = "Charset"; 50 | 51 | // Setup the transform combobox 52 | List transforms = Data.GetAvailableTransforms(); 53 | transforms.Insert(0, ""); 54 | comboBoxTransformations.DataSource = transforms; 55 | comboBoxTransformations.DisplayMember = "Transform"; 56 | 57 | // Populate unichar properties 58 | string[] aProps = { Fuzzer.uBOM, Fuzzer.uMVS, Fuzzer.uReservedCodePoint, Fuzzer.uRLO, Fuzzer.uDEAD, Fuzzer.uDAAD, Fuzzer.uPrivate, Fuzzer.uNotACharacter }; 59 | string sProps = String.Join("\r\n", aProps); 60 | textBoxUnicharProps.Text = sProps; 61 | } 62 | 63 | public Mappings Data 64 | { 65 | get { return data; } 66 | set { data = value; } 67 | } 68 | 69 | public char Input 70 | { 71 | get { return input; } 72 | 73 | set 74 | { 75 | try 76 | { 77 | //input = Convert.ToChar(0x0d); 78 | input = Convert.ToChar(value); 79 | } 80 | catch (Exception) 81 | { 82 | textBoxStatus.Text = 83 | "Error: Input was not in a correct format. Only a single ASCII character is allowed. The first character you entered will be used."; 84 | } 85 | } 86 | } 87 | public string Charset 88 | { 89 | get { return charset; } 90 | set { charset = value; } 91 | } 92 | 93 | public string Transform 94 | { 95 | get { return transform; } 96 | set { transform = value; } 97 | } 98 | 99 | private void labelHelp_Click(object sender, EventArgs e) 100 | { 101 | 102 | } 103 | 104 | private void textBoxInput_TextChanged(object sender, EventArgs e) 105 | { 106 | 107 | 108 | string sInput = textBoxInput.Text; 109 | textBoxStatus.Text = ""; 110 | 111 | try 112 | { 113 | Input = Convert.ToChar(sInput); 114 | } 115 | catch (Exception) 116 | { 117 | textBoxStatus.Text = 118 | "Error: Input was not in a correct format. Only a single ASCII character is allowed. The first character you entered will be used."; 119 | 120 | } 121 | } 122 | 123 | private void textBoxStatus_TextChanged(object sender, EventArgs e) 124 | { 125 | } 126 | 127 | private void buttonGetBestfit_Click(object sender, EventArgs e) 128 | { 129 | textBoxOutput.Text = ""; 130 | List bestfits = new List(); 131 | bestfits = Data.GetBestfitMappings(Input); 132 | 133 | string output = String.Empty; 134 | 135 | foreach (string bestfit in bestfits) 136 | { 137 | UniChar uc = new UniChar(); 138 | if (!String.IsNullOrEmpty(bestfit)) 139 | { 140 | output += uc.ConvertCodePointToString(bestfit) + "\r\n"; 141 | } 142 | } 143 | 144 | textBoxOutput.Text = output; 145 | 146 | // Fill DataGrid 147 | List lBestfits = new List(); 148 | dataGridViewBestFit.DataSource = null; 149 | Data.BuildBestfitTable(Input,ref lBestfits, Charset); 150 | dataGridViewBestFit.DataSource = lBestfits; 151 | 152 | 153 | } 154 | 155 | private void textBoxOutput_TextChanged(object sender, EventArgs e) 156 | { 157 | 158 | } 159 | 160 | private void dataGridViewBestFit_CellContentClick(object sender, DataGridViewCellEventArgs e) 161 | { 162 | 163 | } 164 | 165 | private void FormUniMapTest_Load(object sender, EventArgs e) 166 | { 167 | 168 | } 169 | 170 | private void comboBoxCharsets_SelectedIndexChanged(object sender, EventArgs e) 171 | { 172 | Charset = comboBoxCharsets.SelectedValue.ToString(); 173 | } 174 | 175 | private void label1_Click(object sender, EventArgs e) 176 | { 177 | 178 | } 179 | 180 | private void comboBoxTransformations_SelectedIndexChanged(object sender, EventArgs e) 181 | { 182 | Transform = comboBoxTransformations.SelectedValue.ToString(); 183 | } 184 | 185 | private void buttonGetUnicode_Click(object sender, EventArgs e) 186 | { 187 | textBoxOutput.Text = ""; 188 | List transforms = new List(); 189 | transforms = Data.GetNormalizationMappings(Input); 190 | 191 | string output = String.Empty; 192 | 193 | foreach (string transform in transforms) 194 | { 195 | UniChar uc = new UniChar(); 196 | if (!String.IsNullOrEmpty(transform)) 197 | { 198 | output += uc.ConvertCodePointToString(transform) + "\r\n"; 199 | } 200 | } 201 | 202 | textBoxOutput.Text = output; 203 | 204 | // Fill DataGrid 205 | List lTransformations = new List(); 206 | dataGridViewBestFit.DataSource = null; 207 | Data.BuildTransformationsTable(Input, ref lTransformations, Transform); 208 | dataGridViewBestFit.DataSource = lTransformations; 209 | } 210 | 211 | private void textBoxUnicharProps_TextChanged(object sender, EventArgs e) 212 | { 213 | 214 | } 215 | 216 | private void labelInput_Click(object sender, EventArgs e) 217 | { 218 | 219 | } 220 | } 221 | } 222 | -------------------------------------------------------------------------------- /UniHax/Fuzzer.cs: -------------------------------------------------------------------------------- 1 | 2 | // Copyright (c) 2011 by Christopher Weber 3 | 4 | // Permission is hereby granted, free of charge, to any person obtaining a copy 5 | // of this software and associated documentation files (the "Software"), to deal 6 | // in the Software without restriction, including without limitation the rights 7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | // copies of the Software, and to permit persons to whom the Software is 9 | // furnished to do so, subject to the following conditions: 10 | 11 | // The above copyright notice and this permission notice shall be included in 12 | // all copies or substantial portions of the Software. 13 | 14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 | // SOFTWARE. 21 | 22 | // Authors: 23 | // Christopher Weber (chris@lookout.net) 24 | 25 | 26 | using System; 27 | using System.IO; 28 | 29 | namespace UniHax 30 | { 31 | /// 32 | /// The Fuzzer has cases for some of the oddball manifestations of Unicode that can trip up software including: 33 | /// 34 | /// - non-character, reserved, and private use area code points 35 | /// - special meaning characters such as the BOM and RLO 36 | /// - ill-formed byte sequences 37 | /// - a half-surrogate code point 38 | /// 39 | /// 40 | /// 41 | public class Fuzzer 42 | { 43 | 44 | /// 45 | /// The Byte Order Mark U+FEFF is a special character defining the byte order and endianess 46 | /// of text data. 47 | /// UTF-8 percent encoding is %EF%BB%BF 48 | /// 49 | public static readonly string uBOM = "\uFEFF"; 50 | /// 51 | /// The Right to Left Override U+202E defines special meaning to re-order the 52 | /// display of text for right-to-left reading. 53 | /// UTF-8 percent encoding is %E2%80%AE 54 | /// 55 | public static readonly string uRLO = "\u202E"; 56 | /// 57 | /// Mongolian Vowel Separator U+180E is invisible and has the whitespace property. 58 | /// UTF-8 percent encoding is %E1%A0%8E 59 | /// 60 | public static readonly string uMVS = "\u180E"; 61 | /// 62 | /// Word Joiner U+2060 is an invisible zero-width character. 63 | /// UTF-8 percent encoding is %E2%81%A0 64 | /// 65 | public static readonly string uWordJoiner = "\u2060"; 66 | /// 67 | /// A reserved code point U+FEFE 68 | /// UTF-8 percent encoding is %ef%bb%be 69 | /// 70 | public static readonly string uReservedCodePoint = "\uFEFE"; 71 | /// 72 | /// The code point U+FFFF is guaranteed to not be a Unicode character at all 73 | /// UTF-8 percent encoding is %ef%bf%bf 74 | /// 75 | public static readonly string uNotACharacter = "\uFFFF"; 76 | /// 77 | /// An unassigned code point U+0FED 78 | /// UTF-8 percent encoding is %e0%bf%ad 79 | /// 80 | public static readonly string uUnassigned = "\u0FED"; 81 | /// 82 | /// An illegal low half-surrogate U+DEAD 83 | /// UTF-8 percent encoding is %ed%ba%ad 84 | /// 85 | public static readonly string uDEAD = "\uDEAD"; 86 | /// 87 | /// An illegal high half-surrogate U+DAAD 88 | /// UTF-8 percent encoding is %ed%aa%ad 89 | /// 90 | public static readonly string uDAAD = "\uDAAD"; 91 | /// 92 | /// A Private Use Area code point U+F8FF which Apple happens to use for its logo. 93 | /// UTF-8 percent encoding is %EF%A3%BF 94 | /// 95 | public static readonly string uPrivate = "\uF8FF"; 96 | /// 97 | /// U+FF0F FULLWIDTH SOLIDUS should normalize to / in a hostname 98 | /// UTF-8 percent encoding is %EF%BC%8F 99 | /// 100 | public static readonly string uFullwidthSolidus = "\uFF0F"; 101 | /// 102 | /// Code point with a numerical mapping and value U+1D7D6 MATHEMATICAL BOLD DIGIT EIGHT 103 | /// UTF-8 percent encoding is %F0%9D%9F%96 104 | /// 105 | public static readonly string uBoldEight = char.ConvertFromUtf32(0x1D7D6); 106 | /// 107 | /// IDNA2003/2008 Deviant - U+00DF normalizes to "ss" during IDNA2003's mapping phase, 108 | /// different from its IDNA2008 mapping. 109 | /// See http://www.unicode.org/reports/tr46/ 110 | /// UTF-8 percent encoding is %C3%9F 111 | /// 112 | public static readonly string uIdnaSs = "\u00DF"; 113 | /// 114 | /// U+FDFD expands by 11x (UTF-8) and 18x (UTF-16) under NFKC/NFKC 115 | /// UTF-8 percent encoding is %EF%B7%BA 116 | /// 117 | public static readonly string uFDFA = "\uFDFA"; 118 | /// 119 | /// U+0390 expands by 3x (UTF-8) under NFD 120 | /// UTF-8 percent encoding is %CE%90 121 | /// 122 | public static readonly string u0390 = "\u0390"; 123 | /// 124 | /// U+1F82 expands by 4x (UTF-16) under NFD 125 | /// UTF-8 percent encoding is %E1%BE%82 126 | /// 127 | public static readonly string u1F82 = "\u1F82"; 128 | /// 129 | /// U+FB2C expands by 3x (UTF-16) under NFC 130 | /// UTF-8 percent encoding is %EF%AC%AC 131 | /// 132 | public static readonly string uFB2C = "\uFB2C"; 133 | /// 134 | /// U+1D160 expands by 3x (UTF-8) under NFC 135 | /// UTF-8 percent encoding is %F0%9D%85%A0 136 | /// 137 | public static readonly string u1D160 = char.ConvertFromUtf32(0x1D160); 138 | 139 | /// 140 | /// Gets the requested byte representation of the current Unicode character codepoint 141 | /// 142 | /// The encoding you want a byte representation in. Specify utf-8, utf-16le, or utf16-be 143 | /// A single character sent as a string. 144 | /// Returns a byte array 145 | public byte[] GetCharacterBytes(string encoding, string character) 146 | { 147 | System.Text.Encoding enc; 148 | if (encoding == "utf-16le") 149 | { 150 | enc = new System.Text.UnicodeEncoding(); 151 | } 152 | else if (encoding == "utf-16be") 153 | { 154 | enc = new System.Text.UnicodeEncoding(true, false); 155 | } 156 | else 157 | { 158 | enc = new System.Text.UTF8Encoding(); 159 | } 160 | 161 | return enc.GetBytes(character); 162 | 163 | } 164 | 165 | /// 166 | /// Malforms the bytes by removing the last byte from whichever encoding you specify. 167 | /// 168 | /// The encoding you want a byte representation in. Specify utf-8, utf-16le, or utf16-be 169 | /// A single character sent as a string. 170 | /// 171 | public byte[] GetCharacterBytesMalformed(string encoding, string character) 172 | { 173 | System.Text.Encoding enc; 174 | 175 | if (encoding == "utf-16le") 176 | { 177 | enc = new System.Text.UnicodeEncoding(); 178 | } 179 | else if (encoding == "utf-16be") 180 | { 181 | enc = new System.Text.UnicodeEncoding(true, false); 182 | } 183 | else 184 | { 185 | enc = new System.Text.UTF8Encoding(); 186 | } 187 | 188 | 189 | byte[] characterBytes = enc.GetBytes(character); // now we have a byte array 190 | byte[] shorter; 191 | 192 | // Check that there's more than one byte before malforming it by removing the last byte. 193 | // Otherwise we'd end up with no bytes in the array. This can make test cases pretty useless. 194 | if (enc.GetByteCount(character) > 1) 195 | { 196 | shorter = new byte[characterBytes.Length - 1]; 197 | Array.Copy(characterBytes, shorter, shorter.Length); 198 | } 199 | 200 | // just return the one byte array rather than removing the one byte 201 | else 202 | { 203 | shorter = new byte[characterBytes.Length]; 204 | Array.Copy(characterBytes, shorter, shorter.Length); 205 | } 206 | return shorter; 207 | 208 | } 209 | 210 | public string GetBom() 211 | { 212 | return Fuzzer.uBOM; 213 | } 214 | 215 | /// 216 | /// Return a UTF32 byte encoding for an illegal code point value U+1FFFFF. 217 | /// Note that Unicode 6.0 supports only up to U+10FFFF. 218 | /// UTF-8 percent encoding for something out of range is %F4%8F%BF%BE 219 | /// 220 | /// A raw byte array because .NET will not allow illegal code points in the System.String class. 221 | public byte[] OutOfRangeCodePointAsUtf32BE() 222 | { 223 | byte[] bytes = {0x00, 0x1F, 0xFF, 0xFF}; 224 | return bytes; 225 | } 226 | } 227 | } 228 | -------------------------------------------------------------------------------- /UniHax/Mappings.cs: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2011 by Christopher Weber 2 | 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | // SOFTWARE. 20 | 21 | // Authors: 22 | // Christopher Weber (chris@lookout.net) 23 | 24 | using System; 25 | using System.Collections.Generic; 26 | using System.Linq; 27 | using System.Xml.Linq; 28 | using UniHax.Properties; 29 | 30 | namespace UniHax 31 | { 32 | /// 33 | /// The Mappings class gives access to bestfit and Unicode normalization character mappings. 34 | /// 35 | public class Mappings 36 | { 37 | 38 | #region fields 39 | 40 | // This stores the entire Bestfit XML as an in memory database 41 | private XDocument xDocBestfit; 42 | // This stores the entire Bestfit XML as an in memory database 43 | private XDocument xDocUnicode; 44 | 45 | #endregion 46 | 47 | #region Ctor(s) 48 | public Mappings() 49 | { 50 | Init(); 51 | } 52 | #endregion 53 | 54 | 55 | #region properties 56 | public XDocument XDocBestfit 57 | { 58 | get { return xDocBestfit; } 59 | set { xDocBestfit = value; } 60 | } 61 | 62 | public XDocument XDocUnicode 63 | { 64 | get { return xDocUnicode; } 65 | set { xDocUnicode = value; } 66 | } 67 | 68 | #endregion 69 | 70 | private void Init() 71 | { 72 | // The bestfit.xml file embedded as a resource 73 | var sBestfitXml = Resources.bestfit; 74 | // The unicode.xml file embedded as a resource 75 | var sUnicodeXml = Resources.unicode; 76 | 77 | // Build the in-memory databases 78 | XDocBestfit = XDocument.Parse(sBestfitXml); 79 | XDocUnicode = XDocument.Parse(sUnicodeXml); 80 | 81 | 82 | } 83 | 84 | /// 85 | /// Get a unique list of the charsets from the bestfit database. 86 | /// 87 | /// 88 | public List GetAvailableBestfitCharsets() 89 | { 90 | List charsets = new List(); 91 | IEnumerable query; 92 | 93 | try 94 | { 95 | query = (from mapping in XDocBestfit.Descendants("Mapping") 96 | select (string)mapping.Element("Charset").Value).Distinct(); 97 | } 98 | catch (Exception) 99 | { 100 | throw; 101 | } 102 | 103 | 104 | try 105 | { 106 | foreach (var charset in query.Distinct()) 107 | { 108 | charsets.Add(charset); 109 | } 110 | } 111 | catch (Exception) 112 | { 113 | // fine just return an empty list 114 | charsets.Add(""); 115 | throw; 116 | } 117 | 118 | return charsets; 119 | } 120 | 121 | /// 122 | /// Build a data table for all of an ASCII character's bestfit mappings. 123 | /// 124 | /// The ASCII character to query on. 125 | /// Reference to a List you want to populate with data. 126 | /// An optional charset to filter results by. 127 | /// 128 | public void BuildBestfitTable(char cAscii, 129 | ref List lBestFit, 130 | string sCharset = "") 131 | { 132 | UniChar uc = new UniChar(); 133 | uc.CodePoint = uc.GetCodePoint(cAscii); 134 | IEnumerable query; 135 | 136 | if (String.IsNullOrEmpty(sCharset)) 137 | { 138 | query = from mapping in XDocBestfit.Descendants("Mapping") 139 | where (string)mapping.Element("Ascii") == uc.CodePoint 140 | select mapping; 141 | } 142 | else 143 | { 144 | query = (from mapping in XDocBestfit.Descendants("Mapping") 145 | where (string)mapping.Element("Ascii") == uc.CodePoint && 146 | (string)mapping.Element("Charset") == sCharset 147 | select mapping); 148 | } 149 | 150 | var count = query.Count(); 151 | foreach (var item in query.Distinct()) 152 | { 153 | BestFitMapping bf = new BestFitMapping(); 154 | UniChar uc2 = new UniChar(); 155 | bf.Ascii = item.Element("Ascii").Value; 156 | bf.Unicode = item.Element("Unicode").Value; 157 | bf.Character = uc2.ConvertCodePointToString(bf.Unicode); 158 | bf.Charset = item.Element("Charset").Value; 159 | bf.Name = item.Element("Name").Value; 160 | lBestFit.Add(bf); 161 | } 162 | } 163 | 164 | /// 165 | /// Build a data table for all of an ASCII character's bestfit mappings. 166 | /// 167 | /// The ASCII character to query on. 168 | /// Reference to a List you want to populate with data. 169 | /// An optional charset to filter results by. 170 | public void BuildTransformationsTable(char cAscii, 171 | ref List lTransformations, 172 | string sTransform = "") 173 | { 174 | UniChar uc = new UniChar(); 175 | uc.CodePoint = uc.GetCodePoint(cAscii); 176 | IEnumerable query; 177 | 178 | if (String.IsNullOrEmpty(sTransform)) 179 | { 180 | query = from mapping in xDocUnicode.Descendants("Mapping") 181 | where (string)mapping.Element("Ascii") == uc.CodePoint 182 | select mapping; 183 | } 184 | else 185 | { 186 | query = (from mapping in XDocUnicode.Descendants("Mapping") 187 | where (string)mapping.Element("Ascii") == uc.CodePoint && 188 | (string)mapping.Element("Transform") == sTransform 189 | select mapping); 190 | } 191 | 192 | var count = query.Count(); 193 | foreach (var item in query.Distinct()) 194 | { 195 | UnicodeMapping um = new UnicodeMapping(); 196 | UniChar uc2 = new UniChar(); 197 | um.Ascii = item.Element("Ascii").Value; 198 | um.Unicode = item.Element("Unicode").Value; 199 | um.Character = uc2.ConvertCodePointToString(um.Unicode); 200 | um.Transform= item.Element("Transform").Value; 201 | um.Name = item.Element("Name").Value; 202 | lTransformations.Add(um); 203 | } 204 | } 205 | 206 | /// 207 | /// Send me an ASCII character and I'll return you a list of Unicode characters that 208 | /// best fit map to it. Since you're not telling me a specific charset your're 209 | /// interested in, I'm going to send you data for all of them. 210 | /// 211 | /// The ASCII character to query on. 212 | /// An option charset name to filter by, valid values include: 213 | /// APL-ISO-IR-68 214 | /// CP424 215 | /// IBMGRAPH 216 | /// US-ASCII-QUOTES 217 | /// windows-1250 218 | /// windows-1251 219 | /// windows-1252 220 | /// windows-1253 221 | /// windows-1254 222 | /// windows-1255 223 | /// windows-1256 224 | /// windows-1257 225 | /// windows-1258 226 | /// windows-874 227 | /// CP864 228 | /// CP037 229 | /// CP1026 230 | /// CP500 231 | /// CP875 232 | /// DINGBATS 233 | /// KEYBOARD 234 | /// SYMBOL 235 | /// symbol 236 | /// zdingbat 237 | /// JAPANESE 238 | /// GSM0338 239 | /// 240 | /// 241 | public List GetBestfitMappings(char cAscii, string sCharset = "") 242 | { 243 | BestFitMapping bm = new BestFitMapping(); 244 | // If an invald charset was entered then set it to the wildcard 245 | if (!bm.charsets.Contains(sCharset)) 246 | { 247 | sCharset = ""; 248 | } 249 | UniChar uc = new UniChar(); 250 | uc.CodePoint = uc.GetCodePoint(cAscii); 251 | IEnumerable query; 252 | 253 | // If a charset wasn't specified, filter by the ASCII character 254 | if (String.IsNullOrEmpty(sCharset)) 255 | { 256 | 257 | query = (from mapping in XDocBestfit.Descendants("Mapping") 258 | where 259 | (string)mapping.Element("Ascii") == uc.CodePoint 260 | select mapping.Element("Unicode").Value); 261 | } 262 | 263 | // else filter by the charset too 264 | else 265 | { 266 | 267 | query = (from mapping in XDocBestfit.Descendants("Mapping") 268 | where 269 | (string)mapping.Element("Ascii") == "0043" && 270 | (string)mapping.Element("Charset") == sCharset 271 | select mapping.Element("Unicode").Value); 272 | } 273 | 274 | List data = new List(); 275 | foreach (var item in query.Distinct()) 276 | { 277 | data.Add(item); 278 | } 279 | return data; 280 | } 281 | 282 | 283 | /// 284 | /// Get a unique list of the charsets from the bestfit database. 285 | /// 286 | /// 287 | public List GetAvailableTransforms() 288 | { 289 | List transforms = new List(); 290 | var query = (from mapping in XDocUnicode.Descendants("Mapping") 291 | select (string)mapping.Element("Transform").Value).Distinct(); 292 | 293 | foreach (var charset in query.Distinct()) 294 | { 295 | transforms.Add(charset); 296 | } 297 | return transforms; 298 | } 299 | 300 | 301 | /// 302 | /// Get all normalization mappings for an ASCII character. Optionally specify a specific transform 303 | /// as a filter. 304 | /// 305 | /// A single ASCII character. 306 | /// Valid transformations specified as: 307 | /// 308 | /// Simple_Lowercase_Mapping 309 | /// Lowercase_Mapping 310 | /// Simple_Case_Folding 311 | /// cf 312 | /// Simple_Uppercase_Mapping 313 | /// Simple_Titlecase_Mapping 314 | /// Uppercase_Mapping 315 | /// Titlecase_Mapping 316 | /// Decomposition_Mapping 317 | /// FC_NFKC 318 | /// 319 | /// 320 | public List GetNormalizationMappings(char cAscii, string sTransform = "") 321 | { 322 | UnicodeMapping um = new UnicodeMapping(); 323 | 324 | // If something is passed in we don't know about then set transform to a wildcard 325 | if (!um.transformations.Contains(sTransform)) 326 | { 327 | sTransform = ""; 328 | } 329 | UniChar uc = new UniChar(); 330 | uc.CodePoint = uc.GetCodePoint(cAscii); 331 | IEnumerable query; 332 | 333 | // If a transformation wasn't specified, filter by the ASCII character 334 | if (String.IsNullOrEmpty(sTransform)) 335 | { 336 | 337 | query = (from mapping in XDocUnicode.Descendants("Mapping") 338 | where 339 | (string)mapping.Element("Ascii") == uc.CodePoint 340 | select mapping.Element("Unicode").Value); 341 | } 342 | 343 | // else filter by the transformation too 344 | else 345 | { 346 | 347 | query = (from mapping in XDocUnicode.Descendants("Mapping") 348 | where 349 | (string)mapping.Element("Ascii") == "0043" && 350 | (string)mapping.Element("Transform") == sTransform 351 | select mapping.Element("Unicode").Value); 352 | } 353 | 354 | List data = new List(); 355 | foreach (var item in query.Distinct()) 356 | { 357 | data.Add(item); 358 | } 359 | return data; 360 | } 361 | } 362 | } 363 | -------------------------------------------------------------------------------- /TestUniHax/Form1.Designer.cs: -------------------------------------------------------------------------------- 1 | namespace TestUniMap 2 | { 3 | partial class FormUniMapTest 4 | { 5 | /// 6 | /// Required designer variable. 7 | /// 8 | private System.ComponentModel.IContainer components = null; 9 | 10 | /// 11 | /// Clean up any resources being used. 12 | /// 13 | /// true if managed resources should be disposed; otherwise, false. 14 | protected override void Dispose(bool disposing) 15 | { 16 | if (disposing && (components != null)) 17 | { 18 | components.Dispose(); 19 | } 20 | base.Dispose(disposing); 21 | } 22 | 23 | #region Windows Form Designer generated code 24 | 25 | /// 26 | /// Required method for Designer support - do not modify 27 | /// the contents of this method with the code editor. 28 | /// 29 | private void InitializeComponent() 30 | { 31 | System.Windows.Forms.DataGridViewCellStyle dataGridViewCellStyle7 = new System.Windows.Forms.DataGridViewCellStyle(); 32 | System.Windows.Forms.DataGridViewCellStyle dataGridViewCellStyle8 = new System.Windows.Forms.DataGridViewCellStyle(); 33 | System.Windows.Forms.DataGridViewCellStyle dataGridViewCellStyle9 = new System.Windows.Forms.DataGridViewCellStyle(); 34 | this.textBoxInput = new System.Windows.Forms.TextBox(); 35 | this.buttonGetBestfit = new System.Windows.Forms.Button(); 36 | this.buttonGetUnicode = new System.Windows.Forms.Button(); 37 | this.textBoxOutput = new System.Windows.Forms.TextBox(); 38 | this.textBoxStatus = new System.Windows.Forms.TextBox(); 39 | this.dataGridViewBestFit = new System.Windows.Forms.DataGridView(); 40 | this.comboBoxCharsets = new System.Windows.Forms.ComboBox(); 41 | this.labelCharsetCombo = new System.Windows.Forms.Label(); 42 | this.labelInput = new System.Windows.Forms.Label(); 43 | this.comboBoxTransformations = new System.Windows.Forms.ComboBox(); 44 | this.textBoxUnicharProps = new System.Windows.Forms.TextBox(); 45 | this.labelTransformCombo = new System.Windows.Forms.Label(); 46 | this.labelResultsAsString = new System.Windows.Forms.Label(); 47 | this.labelResultsAsGrid = new System.Windows.Forms.Label(); 48 | this.labelSpecialCharacters = new System.Windows.Forms.Label(); 49 | ((System.ComponentModel.ISupportInitialize)(this.dataGridViewBestFit)).BeginInit(); 50 | this.SuspendLayout(); 51 | // 52 | // textBoxInput 53 | // 54 | this.textBoxInput.Font = new System.Drawing.Font("Microsoft Sans Serif", 20F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); 55 | this.textBoxInput.Location = new System.Drawing.Point(7, 52); 56 | this.textBoxInput.Name = "textBoxInput"; 57 | this.textBoxInput.Size = new System.Drawing.Size(100, 38); 58 | this.textBoxInput.TabIndex = 0; 59 | this.textBoxInput.TextChanged += new System.EventHandler(this.textBoxInput_TextChanged); 60 | // 61 | // buttonGetBestfit 62 | // 63 | this.buttonGetBestfit.Location = new System.Drawing.Point(7, 160); 64 | this.buttonGetBestfit.Name = "buttonGetBestfit"; 65 | this.buttonGetBestfit.Size = new System.Drawing.Size(75, 43); 66 | this.buttonGetBestfit.TabIndex = 1; 67 | this.buttonGetBestfit.Text = "Get Bestfit Equivalents"; 68 | this.buttonGetBestfit.UseVisualStyleBackColor = true; 69 | this.buttonGetBestfit.Click += new System.EventHandler(this.buttonGetBestfit_Click); 70 | // 71 | // buttonGetUnicode 72 | // 73 | this.buttonGetUnicode.Location = new System.Drawing.Point(7, 268); 74 | this.buttonGetUnicode.Name = "buttonGetUnicode"; 75 | this.buttonGetUnicode.Size = new System.Drawing.Size(75, 39); 76 | this.buttonGetUnicode.TabIndex = 2; 77 | this.buttonGetUnicode.Text = "Get Unicode Transforms"; 78 | this.buttonGetUnicode.UseVisualStyleBackColor = true; 79 | this.buttonGetUnicode.Click += new System.EventHandler(this.buttonGetUnicode_Click); 80 | // 81 | // textBoxOutput 82 | // 83 | this.textBoxOutput.Location = new System.Drawing.Point(332, 32); 84 | this.textBoxOutput.Multiline = true; 85 | this.textBoxOutput.Name = "textBoxOutput"; 86 | this.textBoxOutput.ReadOnly = true; 87 | this.textBoxOutput.ScrollBars = System.Windows.Forms.ScrollBars.Vertical; 88 | this.textBoxOutput.Size = new System.Drawing.Size(154, 368); 89 | this.textBoxOutput.TabIndex = 3; 90 | this.textBoxOutput.TextChanged += new System.EventHandler(this.textBoxOutput_TextChanged); 91 | // 92 | // textBoxStatus 93 | // 94 | this.textBoxStatus.BorderStyle = System.Windows.Forms.BorderStyle.None; 95 | this.textBoxStatus.Font = new System.Drawing.Font("Microsoft Sans Serif", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); 96 | this.textBoxStatus.ForeColor = System.Drawing.SystemColors.Highlight; 97 | this.textBoxStatus.Location = new System.Drawing.Point(144, 12); 98 | this.textBoxStatus.Multiline = true; 99 | this.textBoxStatus.Name = "textBoxStatus"; 100 | this.textBoxStatus.ReadOnly = true; 101 | this.textBoxStatus.Size = new System.Drawing.Size(155, 118); 102 | this.textBoxStatus.TabIndex = 5; 103 | this.textBoxStatus.TextChanged += new System.EventHandler(this.textBoxStatus_TextChanged); 104 | // 105 | // dataGridViewBestFit 106 | // 107 | dataGridViewCellStyle7.Alignment = System.Windows.Forms.DataGridViewContentAlignment.MiddleLeft; 108 | dataGridViewCellStyle7.BackColor = System.Drawing.SystemColors.Control; 109 | dataGridViewCellStyle7.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); 110 | dataGridViewCellStyle7.ForeColor = System.Drawing.SystemColors.WindowText; 111 | dataGridViewCellStyle7.SelectionBackColor = System.Drawing.SystemColors.Highlight; 112 | dataGridViewCellStyle7.SelectionForeColor = System.Drawing.SystemColors.HighlightText; 113 | dataGridViewCellStyle7.WrapMode = System.Windows.Forms.DataGridViewTriState.True; 114 | this.dataGridViewBestFit.ColumnHeadersDefaultCellStyle = dataGridViewCellStyle7; 115 | this.dataGridViewBestFit.ColumnHeadersHeightSizeMode = System.Windows.Forms.DataGridViewColumnHeadersHeightSizeMode.AutoSize; 116 | dataGridViewCellStyle8.Alignment = System.Windows.Forms.DataGridViewContentAlignment.MiddleLeft; 117 | dataGridViewCellStyle8.BackColor = System.Drawing.SystemColors.Window; 118 | dataGridViewCellStyle8.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); 119 | dataGridViewCellStyle8.ForeColor = System.Drawing.SystemColors.ControlText; 120 | dataGridViewCellStyle8.SelectionBackColor = System.Drawing.SystemColors.Highlight; 121 | dataGridViewCellStyle8.SelectionForeColor = System.Drawing.SystemColors.HighlightText; 122 | dataGridViewCellStyle8.WrapMode = System.Windows.Forms.DataGridViewTriState.False; 123 | this.dataGridViewBestFit.DefaultCellStyle = dataGridViewCellStyle8; 124 | this.dataGridViewBestFit.Location = new System.Drawing.Point(492, 32); 125 | this.dataGridViewBestFit.Name = "dataGridViewBestFit"; 126 | this.dataGridViewBestFit.ReadOnly = true; 127 | dataGridViewCellStyle9.Alignment = System.Windows.Forms.DataGridViewContentAlignment.MiddleLeft; 128 | dataGridViewCellStyle9.BackColor = System.Drawing.SystemColors.Control; 129 | dataGridViewCellStyle9.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0))); 130 | dataGridViewCellStyle9.ForeColor = System.Drawing.SystemColors.WindowText; 131 | dataGridViewCellStyle9.SelectionBackColor = System.Drawing.SystemColors.Highlight; 132 | dataGridViewCellStyle9.SelectionForeColor = System.Drawing.SystemColors.HighlightText; 133 | dataGridViewCellStyle9.WrapMode = System.Windows.Forms.DataGridViewTriState.True; 134 | this.dataGridViewBestFit.RowHeadersDefaultCellStyle = dataGridViewCellStyle9; 135 | this.dataGridViewBestFit.Size = new System.Drawing.Size(570, 368); 136 | this.dataGridViewBestFit.TabIndex = 6; 137 | this.dataGridViewBestFit.CellContentClick += new System.Windows.Forms.DataGridViewCellEventHandler(this.dataGridViewBestFit_CellContentClick); 138 | // 139 | // comboBoxCharsets 140 | // 141 | this.comboBoxCharsets.FormattingEnabled = true; 142 | this.comboBoxCharsets.Location = new System.Drawing.Point(7, 133); 143 | this.comboBoxCharsets.Name = "comboBoxCharsets"; 144 | this.comboBoxCharsets.Size = new System.Drawing.Size(121, 21); 145 | this.comboBoxCharsets.TabIndex = 7; 146 | this.comboBoxCharsets.SelectedIndexChanged += new System.EventHandler(this.comboBoxCharsets_SelectedIndexChanged); 147 | // 148 | // labelCharsetCombo 149 | // 150 | this.labelCharsetCombo.AutoSize = true; 151 | this.labelCharsetCombo.Location = new System.Drawing.Point(6, 117); 152 | this.labelCharsetCombo.Name = "labelCharsetCombo"; 153 | this.labelCharsetCombo.Size = new System.Drawing.Size(117, 13); 154 | this.labelCharsetCombo.TabIndex = 8; 155 | this.labelCharsetCombo.Text = "Filter results by charset:"; 156 | this.labelCharsetCombo.Click += new System.EventHandler(this.label1_Click); 157 | // 158 | // labelInput 159 | // 160 | this.labelInput.AutoSize = true; 161 | this.labelInput.Location = new System.Drawing.Point(6, 13); 162 | this.labelInput.Name = "labelInput"; 163 | this.labelInput.Size = new System.Drawing.Size(85, 26); 164 | this.labelInput.TabIndex = 9; 165 | this.labelInput.Text = "Enter a single \r\nASCII character:"; 166 | this.labelInput.Click += new System.EventHandler(this.labelInput_Click); 167 | // 168 | // comboBoxTransformations 169 | // 170 | this.comboBoxTransformations.FormattingEnabled = true; 171 | this.comboBoxTransformations.Location = new System.Drawing.Point(7, 237); 172 | this.comboBoxTransformations.Name = "comboBoxTransformations"; 173 | this.comboBoxTransformations.Size = new System.Drawing.Size(121, 21); 174 | this.comboBoxTransformations.TabIndex = 10; 175 | this.comboBoxTransformations.SelectedIndexChanged += new System.EventHandler(this.comboBoxTransformations_SelectedIndexChanged); 176 | // 177 | // textBoxUnicharProps 178 | // 179 | this.textBoxUnicharProps.Location = new System.Drawing.Point(182, 172); 180 | this.textBoxUnicharProps.Multiline = true; 181 | this.textBoxUnicharProps.Name = "textBoxUnicharProps"; 182 | this.textBoxUnicharProps.ReadOnly = true; 183 | this.textBoxUnicharProps.Size = new System.Drawing.Size(100, 130); 184 | this.textBoxUnicharProps.TabIndex = 11; 185 | this.textBoxUnicharProps.TextChanged += new System.EventHandler(this.textBoxUnicharProps_TextChanged); 186 | // 187 | // labelTransformCombo 188 | // 189 | this.labelTransformCombo.AutoSize = true; 190 | this.labelTransformCombo.Location = new System.Drawing.Point(4, 221); 191 | this.labelTransformCombo.Name = "labelTransformCombo"; 192 | this.labelTransformCombo.Size = new System.Drawing.Size(148, 13); 193 | this.labelTransformCombo.TabIndex = 12; 194 | this.labelTransformCombo.Text = "Filter results by transformation:"; 195 | // 196 | // labelResultsAsString 197 | // 198 | this.labelResultsAsString.AutoSize = true; 199 | this.labelResultsAsString.Location = new System.Drawing.Point(333, 13); 200 | this.labelResultsAsString.Name = "labelResultsAsString"; 201 | this.labelResultsAsString.Size = new System.Drawing.Size(96, 13); 202 | this.labelResultsAsString.TabIndex = 13; 203 | this.labelResultsAsString.Text = "Results as a string:"; 204 | // 205 | // labelResultsAsGrid 206 | // 207 | this.labelResultsAsGrid.AutoSize = true; 208 | this.labelResultsAsGrid.Location = new System.Drawing.Point(492, 12); 209 | this.labelResultsAsGrid.Name = "labelResultsAsGrid"; 210 | this.labelResultsAsGrid.Size = new System.Drawing.Size(116, 13); 211 | this.labelResultsAsGrid.TabIndex = 14; 212 | this.labelResultsAsGrid.Text = "Results as a Data Grid:"; 213 | // 214 | // labelSpecialCharacters 215 | // 216 | this.labelSpecialCharacters.AutoSize = true; 217 | this.labelSpecialCharacters.Location = new System.Drawing.Point(182, 153); 218 | this.labelSpecialCharacters.Name = "labelSpecialCharacters"; 219 | this.labelSpecialCharacters.Size = new System.Drawing.Size(99, 13); 220 | this.labelSpecialCharacters.TabIndex = 15; 221 | this.labelSpecialCharacters.Text = "Special Characters:"; 222 | // 223 | // FormUniMapTest 224 | // 225 | this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F); 226 | this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font; 227 | this.ClientSize = new System.Drawing.Size(1074, 623); 228 | this.Controls.Add(this.labelSpecialCharacters); 229 | this.Controls.Add(this.labelResultsAsGrid); 230 | this.Controls.Add(this.labelResultsAsString); 231 | this.Controls.Add(this.labelTransformCombo); 232 | this.Controls.Add(this.textBoxUnicharProps); 233 | this.Controls.Add(this.comboBoxTransformations); 234 | this.Controls.Add(this.labelInput); 235 | this.Controls.Add(this.labelCharsetCombo); 236 | this.Controls.Add(this.comboBoxCharsets); 237 | this.Controls.Add(this.dataGridViewBestFit); 238 | this.Controls.Add(this.textBoxStatus); 239 | this.Controls.Add(this.textBoxOutput); 240 | this.Controls.Add(this.buttonGetUnicode); 241 | this.Controls.Add(this.buttonGetBestfit); 242 | this.Controls.Add(this.textBoxInput); 243 | this.Name = "FormUniMapTest"; 244 | this.Text = "Get Unicode and Bestfit Mappings"; 245 | this.Load += new System.EventHandler(this.FormUniMapTest_Load); 246 | ((System.ComponentModel.ISupportInitialize)(this.dataGridViewBestFit)).EndInit(); 247 | this.ResumeLayout(false); 248 | this.PerformLayout(); 249 | 250 | } 251 | 252 | #endregion 253 | 254 | private System.Windows.Forms.TextBox textBoxInput; 255 | private System.Windows.Forms.Button buttonGetBestfit; 256 | private System.Windows.Forms.Button buttonGetUnicode; 257 | private System.Windows.Forms.TextBox textBoxOutput; 258 | private System.Windows.Forms.TextBox textBoxStatus; 259 | private System.Windows.Forms.DataGridView dataGridViewBestFit; 260 | private System.Windows.Forms.ComboBox comboBoxCharsets; 261 | private System.Windows.Forms.Label labelCharsetCombo; 262 | private System.Windows.Forms.Label labelInput; 263 | private System.Windows.Forms.ComboBox comboBoxTransformations; 264 | private System.Windows.Forms.TextBox textBoxUnicharProps; 265 | private System.Windows.Forms.Label labelTransformCombo; 266 | private System.Windows.Forms.Label labelResultsAsString; 267 | private System.Windows.Forms.Label labelResultsAsGrid; 268 | private System.Windows.Forms.Label labelSpecialCharacters; 269 | } 270 | } 271 | 272 | --------------------------------------------------------------------------------