├── UniHax
├── UniHax.pidb
├── Properties
│ ├── Resources.resources
│ ├── AssemblyInfo.cs
│ ├── Resources.Designer.cs
│ └── Resources.resx
├── ClassDiagram1.cd
├── Exceptions.cs
├── UnicodeMapping.cs
├── UniHax.csproj
├── BestFitMapping.cs
├── UnicodeChar.cs
├── Fuzzer.cs
└── Mappings.cs
├── TestUniHax
├── TestUniHax.pidb
├── Properties
│ ├── Settings.settings
│ ├── DataSources
│ │ └── UniMap.Mappings.datasource
│ ├── Settings.Designer.cs
│ ├── AssemblyInfo.cs
│ ├── Resources.Designer.cs
│ └── Resources.resx
├── Program.cs
├── TestUniHax.csproj
├── Form1.resx
├── Form1.cs
└── Form1.Designer.cs
├── LICENSE.html
├── .gitignore
├── UniHax.sln
└── README.md
/UniHax/UniHax.pidb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cweb/unicode-hax/HEAD/UniHax/UniHax.pidb
--------------------------------------------------------------------------------
/TestUniHax/TestUniHax.pidb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cweb/unicode-hax/HEAD/TestUniHax/TestUniHax.pidb
--------------------------------------------------------------------------------
/UniHax/Properties/Resources.resources:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cweb/unicode-hax/HEAD/UniHax/Properties/Resources.resources
--------------------------------------------------------------------------------
/TestUniHax/Properties/Settings.settings:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/UniHax/ClassDiagram1.cd:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | ACAAAAAAAAAAAAAAoAACQBAACEAAAAgAAAACAAAgAAA=
7 | Mappings.cs
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/TestUniHax/Program.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Windows.Forms;
3 |
4 | namespace TestUniMap
5 | {
6 | static class Program
7 | {
8 | ///
9 | /// The main entry point for the application.
10 | ///
11 | [STAThread]
12 | static void Main()
13 | {
14 |
15 | Application.EnableVisualStyles();
16 | Application.SetCompatibleTextRenderingDefault(false);
17 | Application.Run(new FormUniMapTest());
18 |
19 | }
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/TestUniHax/Properties/DataSources/UniMap.Mappings.datasource:
--------------------------------------------------------------------------------
1 |
2 |
8 |
9 | UniMap.Mappings, UniMap, Version=1.0.0.0, Culture=neutral, PublicKeyToken=null
10 |
--------------------------------------------------------------------------------
/LICENSE.html:
--------------------------------------------------------------------------------
1 | 
Unicode-Hax by Chris Weber is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License.
Based on a work at https://github.com/cweb/unicode-hax.
2 |
--------------------------------------------------------------------------------
/TestUniHax/Properties/Settings.Designer.cs:
--------------------------------------------------------------------------------
1 | //------------------------------------------------------------------------------
2 | //
3 | // This code was generated by a tool.
4 | // Runtime Version:4.0.30319.1
5 | //
6 | // Changes to this file may cause incorrect behavior and will be lost if
7 | // the code is regenerated.
8 | //
9 | //------------------------------------------------------------------------------
10 |
11 | namespace TestUniHax.Properties {
12 |
13 |
14 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
15 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("Microsoft.VisualStudio.Editors.SettingsDesigner.SettingsSingleFileGenerator", "10.0.0.0")]
16 | internal sealed partial class Settings : global::System.Configuration.ApplicationSettingsBase {
17 |
18 | private static Settings defaultInstance = ((Settings)(global::System.Configuration.ApplicationSettingsBase.Synchronized(new Settings())));
19 |
20 | public static Settings Default {
21 | get {
22 | return defaultInstance;
23 | }
24 | }
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/UniHax/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.InteropServices;
4 |
5 | // General Information about an assembly is controlled through the following
6 | // set of attributes. Change these attribute values to modify the information
7 | // associated with an assembly.
8 | [assembly: AssemblyTitle("UniMap")]
9 | [assembly: AssemblyDescription("")]
10 | [assembly: AssemblyConfiguration("")]
11 | [assembly: AssemblyCompany("Microsoft")]
12 | [assembly: AssemblyProduct("UniMap")]
13 | [assembly: AssemblyCopyright("Copyright © Microsoft 2011")]
14 | [assembly: AssemblyTrademark("")]
15 | [assembly: AssemblyCulture("")]
16 |
17 | // Setting ComVisible to false makes the types in this assembly not visible
18 | // to COM components. If you need to access a type in this assembly from
19 | // COM, set the ComVisible attribute to true on that type.
20 | [assembly: ComVisible(false)]
21 |
22 | // The following GUID is for the ID of the typelib if this project is exposed to COM
23 | [assembly: Guid("5f203b2d-de63-4204-bfb3-7f811bf0ebc1")]
24 |
25 | // Version information for an assembly consists of the following four values:
26 | //
27 | // Major Version
28 | // Minor Version
29 | // Build Number
30 | // Revision
31 | //
32 | // You can specify all the values or you can default the Build and Revision Numbers
33 | // by using the '*' as shown below:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion("1.0.0.0")]
36 | [assembly: AssemblyFileVersion("1.0.0.0")]
37 |
--------------------------------------------------------------------------------
/TestUniHax/Properties/AssemblyInfo.cs:
--------------------------------------------------------------------------------
1 | using System.Reflection;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.InteropServices;
4 |
5 | // General Information about an assembly is controlled through the following
6 | // set of attributes. Change these attribute values to modify the information
7 | // associated with an assembly.
8 | [assembly: AssemblyTitle("TestUniMap")]
9 | [assembly: AssemblyDescription("")]
10 | [assembly: AssemblyConfiguration("")]
11 | [assembly: AssemblyCompany("Microsoft")]
12 | [assembly: AssemblyProduct("TestUniMap")]
13 | [assembly: AssemblyCopyright("Copyright © Microsoft 2011")]
14 | [assembly: AssemblyTrademark("")]
15 | [assembly: AssemblyCulture("")]
16 |
17 | // Setting ComVisible to false makes the types in this assembly not visible
18 | // to COM components. If you need to access a type in this assembly from
19 | // COM, set the ComVisible attribute to true on that type.
20 | [assembly: ComVisible(false)]
21 |
22 | // The following GUID is for the ID of the typelib if this project is exposed to COM
23 | [assembly: Guid("248bce11-a986-4cc6-9c61-ce90bd108952")]
24 |
25 | // Version information for an assembly consists of the following four values:
26 | //
27 | // Major Version
28 | // Minor Version
29 | // Build Number
30 | // Revision
31 | //
32 | // You can specify all the values or you can default the Build and Revision Numbers
33 | // by using the '*' as shown below:
34 | // [assembly: AssemblyVersion("1.0.*")]
35 | [assembly: AssemblyVersion("1.0.0.0")]
36 | [assembly: AssemblyFileVersion("1.0.0.0")]
37 |
--------------------------------------------------------------------------------
/UniHax/Exceptions.cs:
--------------------------------------------------------------------------------
1 |
2 | // Copyright (c) 2011 by Christopher Weber
3 |
4 | // Permission is hereby granted, free of charge, to any person obtaining a copy
5 | // of this software and associated documentation files (the "Software"), to deal
6 | // in the Software without restriction, including without limitation the rights
7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the Software is
9 | // furnished to do so, subject to the following conditions:
10 |
11 | // The above copyright notice and this permission notice shall be included in
12 | // all copies or substantial portions of the Software.
13 |
14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | // SOFTWARE.
21 |
22 | // Authors:
23 | // Christopher Weber (chris@lookout.net)
24 |
25 | using System;
26 |
27 |
28 | namespace UniHax
29 | {
30 | public class BestFitMappingException : ApplicationException
31 | {
32 | private string messageDetails = String.Empty;
33 | public DateTime TimeStamp { get; set; }
34 | public string CauseOfError { get; set; }
35 |
36 | public BestFitMappingException()
37 | {
38 | }
39 |
40 |
41 | public override string Message
42 | {
43 | get
44 | {
45 | return String.Format("Bestfit mapping error:{0}", messageDetails);
46 | return base.Message;
47 | }
48 | }
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Build Folders (you can keep bin if you'd like, to store dlls and pdbs)
2 | [Bb]in/
3 | [Oo]bj/
4 |
5 | # mstest test results
6 | TestResults
7 |
8 | ## Ignore Visual Studio temporary files, build results, and
9 | ## files generated by popular Visual Studio add-ons.
10 |
11 | # User-specific files
12 | *.suo
13 | *.user
14 | *.sln.docstates
15 |
16 | # Build results
17 | [Dd]ebug/
18 | [Rr]elease/
19 | x64/
20 | *_i.c
21 | *_p.c
22 | *.ilk
23 | *.meta
24 | *.obj
25 | *.pch
26 | *.pdb
27 | *.pgc
28 | *.pgd
29 | *.rsp
30 | *.sbr
31 | *.tlb
32 | *.tli
33 | *.tlh
34 | *.tmp
35 | *.log
36 | *.vspscc
37 | *.vssscc
38 | .builds
39 |
40 | # Visual C++ cache files
41 | ipch/
42 | *.aps
43 | *.ncb
44 | *.opensdf
45 | *.sdf
46 |
47 | # Visual Studio profiler
48 | *.psess
49 | *.vsp
50 | *.vspx
51 |
52 | # Guidance Automation Toolkit
53 | *.gpState
54 |
55 | # ReSharper is a .NET coding add-in
56 | _ReSharper*
57 |
58 | # NCrunch
59 | *.ncrunch*
60 | .*crunch*.local.xml
61 |
62 | # Installshield output folder
63 | [Ee]xpress
64 |
65 | # DocProject is a documentation generator add-in
66 | DocProject/buildhelp/
67 | DocProject/Help/*.HxT
68 | DocProject/Help/*.HxC
69 | DocProject/Help/*.hhc
70 | DocProject/Help/*.hhk
71 | DocProject/Help/*.hhp
72 | DocProject/Help/Html2
73 | DocProject/Help/html
74 |
75 | # Click-Once directory
76 | publish
77 |
78 | # Publish Web Output
79 | *.Publish.xml
80 |
81 | # NuGet Packages Directory
82 | packages
83 |
84 | # Windows Azure Build Output
85 | csx
86 | *.build.csdef
87 |
88 | # Windows Store app package directory
89 | AppPackages/
90 |
91 | # Others
92 | [Bb]in
93 | [Oo]bj
94 | sql
95 | TestResults
96 | [Tt]est[Rr]esult*
97 | *.Cache
98 | ClientBin
99 | [Ss]tyle[Cc]op.*
100 | ~$*
101 | *.dbmdl
102 | Generated_Code #added for RIA/Silverlight projects
103 |
104 | # Backup & report files from converting an old project file to a newer
105 | # Visual Studio version. Backup files are not needed, because we have git ;-)
106 | _UpgradeReport_Files/
107 | Backup*/
108 | UpgradeLog*.XML
109 |
--------------------------------------------------------------------------------
/UniHax.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 11.00
3 | # Visual Studio 2010
4 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UniHax", "UniHax\UniHax.csproj", "{434C7AAC-316B-4425-A459-730E3E127505}"
5 | EndProject
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TestUniHax", "TestUniHax\TestUniHax.csproj", "{6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}"
7 | EndProject
8 | Global
9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 | Debug|Any CPU = Debug|Any CPU
11 | Debug|Mixed Platforms = Debug|Mixed Platforms
12 | Debug|x86 = Debug|x86
13 | Release|Any CPU = Release|Any CPU
14 | Release|Mixed Platforms = Release|Mixed Platforms
15 | Release|x86 = Release|x86
16 | EndGlobalSection
17 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
18 | {434C7AAC-316B-4425-A459-730E3E127505}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
19 | {434C7AAC-316B-4425-A459-730E3E127505}.Debug|Any CPU.Build.0 = Debug|Any CPU
20 | {434C7AAC-316B-4425-A459-730E3E127505}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU
21 | {434C7AAC-316B-4425-A459-730E3E127505}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU
22 | {434C7AAC-316B-4425-A459-730E3E127505}.Debug|x86.ActiveCfg = Debug|Any CPU
23 | {434C7AAC-316B-4425-A459-730E3E127505}.Release|Any CPU.ActiveCfg = Release|Any CPU
24 | {434C7AAC-316B-4425-A459-730E3E127505}.Release|Any CPU.Build.0 = Release|Any CPU
25 | {434C7AAC-316B-4425-A459-730E3E127505}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU
26 | {434C7AAC-316B-4425-A459-730E3E127505}.Release|Mixed Platforms.Build.0 = Release|Any CPU
27 | {434C7AAC-316B-4425-A459-730E3E127505}.Release|x86.ActiveCfg = Release|Any CPU
28 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Debug|Any CPU.ActiveCfg = Debug|x86
29 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Debug|Mixed Platforms.ActiveCfg = Debug|x86
30 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Debug|Mixed Platforms.Build.0 = Debug|x86
31 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Debug|x86.ActiveCfg = Debug|x86
32 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Debug|x86.Build.0 = Debug|x86
33 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Release|Any CPU.ActiveCfg = Release|x86
34 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Release|Mixed Platforms.ActiveCfg = Release|x86
35 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Release|Mixed Platforms.Build.0 = Release|x86
36 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Release|x86.ActiveCfg = Release|x86
37 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}.Release|x86.Build.0 = Release|x86
38 | EndGlobalSection
39 | GlobalSection(SolutionProperties) = preSolution
40 | HideSolutionNode = FALSE
41 | EndGlobalSection
42 | EndGlobal
43 |
--------------------------------------------------------------------------------
/TestUniHax/Properties/Resources.Designer.cs:
--------------------------------------------------------------------------------
1 | //------------------------------------------------------------------------------
2 | //
3 | // This code was generated by a tool.
4 | // Runtime Version:4.0.30319.1
5 | //
6 | // Changes to this file may cause incorrect behavior and will be lost if
7 | // the code is regenerated.
8 | //
9 | //------------------------------------------------------------------------------
10 |
11 | namespace TestUniHax.Properties {
12 | using System;
13 |
14 |
15 | ///
16 | /// A strongly-typed resource class, for looking up localized strings, etc.
17 | ///
18 | // This class was auto-generated by the StronglyTypedResourceBuilder
19 | // class via a tool like ResGen or Visual Studio.
20 | // To add or remove a member, edit your .ResX file then rerun ResGen
21 | // with the /str option, or rebuild your VS project.
22 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")]
23 | [global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
24 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
25 | internal class Resources {
26 |
27 | private static global::System.Resources.ResourceManager resourceMan;
28 |
29 | private static global::System.Globalization.CultureInfo resourceCulture;
30 |
31 | [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
32 | internal Resources() {
33 | }
34 |
35 | ///
36 | /// Returns the cached ResourceManager instance used by this class.
37 | ///
38 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
39 | internal static global::System.Resources.ResourceManager ResourceManager {
40 | get {
41 | if (object.ReferenceEquals(resourceMan, null)) {
42 | global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("TestUniHax.Properties.Resources", typeof(Resources).Assembly);
43 | resourceMan = temp;
44 | }
45 | return resourceMan;
46 | }
47 | }
48 |
49 | ///
50 | /// Overrides the current thread's CurrentUICulture property for all
51 | /// resource lookups using this strongly typed resource class.
52 | ///
53 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
54 | internal static global::System.Globalization.CultureInfo Culture {
55 | get {
56 | return resourceCulture;
57 | }
58 | set {
59 | resourceCulture = value;
60 | }
61 | }
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/UniHax/UnicodeMapping.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2011 by Christopher Weber
2 |
3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
4 | // of this software and associated documentation files (the "Software"), to deal
5 | // in the Software without restriction, including without limitation the rights
6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | // copies of the Software, and to permit persons to whom the Software is
8 | // furnished to do so, subject to the following conditions:
9 |
10 | // The above copyright notice and this permission notice shall be included in
11 | // all copies or substantial portions of the Software.
12 |
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | // SOFTWARE.
20 |
21 | // Authors:
22 | // Christopher Weber (chris@lookout.net)
23 |
24 | namespace UniHax
25 | {
26 | ///
27 | /// Represents a Unicode mapping to an ASCII character.
28 | ///
29 | public class UnicodeMapping
30 | {
31 | private string character;
32 | private string ascii;
33 | private string unicode;
34 | private string name;
35 | private string transform;
36 |
37 | public readonly string[] transformations = {
38 | "Simple_Lowercase_Mapping",
39 | "Lowercase_Mapping",
40 | "Simple_Case_Folding",
41 | "cf",
42 | "Simple_Uppercase_Mapping",
43 | "Simple_Titlecase_Mapping",
44 | "Uppercase_Mapping",
45 | "Titlecase_Mapping",
46 | "Decomposition_Mapping",
47 | "FC_NFKC"
48 | };
49 |
50 | public string Character
51 | {
52 | get { return character; }
53 | set { character = value; }
54 | }
55 |
56 | public string Ascii
57 | {
58 | get { return ascii; }
59 | set { ascii = value; }
60 | }
61 |
62 | public string Unicode
63 | {
64 | get { return unicode; }
65 | set { unicode = value; }
66 | }
67 |
68 | public string Name
69 | {
70 | get { return name; }
71 | set { name = value; }
72 | }
73 |
74 | public string Transform
75 | {
76 | get { return transform; }
77 | set { transform = value; }
78 | }
79 |
80 |
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/UniHax/UniHax.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Debug
5 | AnyCPU
6 | 8.0.30703
7 | 2.0
8 | {434C7AAC-316B-4425-A459-730E3E127505}
9 | Library
10 | Properties
11 | UniHax
12 | UniHax
13 | v4.0
14 | 512
15 |
16 |
17 | true
18 | full
19 | false
20 | bin\Debug\
21 | DEBUG;TRACE
22 | prompt
23 | 4
24 |
25 |
26 | pdbonly
27 | true
28 | bin\Release\
29 | TRACE
30 | prompt
31 | 4
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 | True
49 | True
50 | Resources.resx
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 | ResXFileCodeGenerator
61 | Resources.Designer.cs
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 | Designer
70 |
71 |
72 |
73 |
74 |
75 |
76 |
83 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | unicode-hax
2 | ===========
3 |
4 | A library to assist in security-testing Unicode enabled applications. The original intent of putting this together
5 | was threefold:
6 |
7 | 1. To provide a reduced set of useful Unicode input to a software fuzzer
8 | 2. To document historically problematic Unicode characters sequences which
9 | might negatively affect protocols and Web applications.
10 | 3. To lookup mappings for ASCII equivalent characters
11 |
12 | For example, the __best-fit__ and __normalization__ mappings can be useful for testing Web applications for
13 | cross-site scripting (XSS) or SQL injection (SQLi) vulnerabilities, by providing you with alternative
14 | characters which map back, or transform, to the intended ASCII encoded input - such as "<", "'", etc.
15 |
16 | Additionally, many __problem characters__ have been pre-defined as a small set, reducing the number of iterations
17 | a fuzzer might need to perform.
18 |
19 | Major features:
20 | - best fit mappings
21 | - Unicode normalization mappings
22 | - hard-coded Unicode characters useful in fuzzing
23 |
24 | For fuzzing applications it includes:
25 | - ill-formed byte sequences
26 | - non-characters
27 | - private use area (PUA)
28 | - unassigned code points
29 | - code points with special meaning such as the BOM and RLO
30 | - half-surrogate values
31 |
32 | /TestUniHax
33 | -----------
34 | This Windows form application loads the UniHax library mainly to test the best-fit and normalization mappings.
35 | If you simply input a single ASCII character, all of its equivalent characters will be displayed.
36 |
37 | e.g. If you're testing a Web-application and want to test equivalents for the "<" character U+003C,
38 | enter that as input and select either "best-fit mapping", which is linked to a charset encoding,
39 | or "normalization" equivalents. For this character, the following are best-fits:
40 |
41 | - U+003B in the APL-ISO-IR-68 encoding
42 | - U+0014 in the CP424 encoding
43 | - etc...
44 |
45 | Also, the following are normalization decomposition mappings:
46 |
47 | - U+FE64 SMALL LESS-THAN SIGN
48 | - U+FF1C FULLWIDTH LESS-THAN SIGN
49 |
50 | /UniHax
51 | -------
52 | This library contains a small set of __problematic Unicode characters__ in **Fuzzer.cs** such as the following:
53 |
54 | ```csharp
55 | ///
56 | /// An unassigned code point U+0FED
57 | ///
58 | public static readonly string uUnassigned = "\u0FED";
59 | ///
60 | /// An illegal low half-surrogate U+DEAD
61 | ///
62 | public static readonly string uDEAD = "\uDEAD";
63 | ```
64 |
65 | Also the following method to return those characters as a byte array in any encoding.
66 |
67 | ```csharp
68 | public byte[] GetCharacterBytes(string encoding, string character)
69 | ```
70 |
71 | There's also the following method to return any Unicode character as a malformed byte sequence, simply by
72 | trimming the last byte.
73 |
74 | ```csharp
75 | public byte[] GetCharacterBytesMalformed(string encoding, string character)
76 | ```
77 |
78 | This project also contains the data files, pre-created in the __/data__ folder, and a __Mapping.cs__ Mapping
79 | class which can lookup mapping equivalents for the following:
80 |
81 | - ASCII equivalent best-fit mappings across legacy character encodings
82 | - ASCII equivalent mappings for Unicode normalization types. For example, Web browsers commonly use
83 | a form of normalization for keeping URL content and host names compatible.
84 |
85 | For more on Unicode Normalization see TR15: http://www.unicode.org/reports/tr15/
86 |
87 | License
88 | -------
89 | Unicode-Hax by Chris Weber is licensed under a
90 |
91 | Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported License .
92 | Based on a work at https://github.com/cweb/unicode-hax.
93 |
--------------------------------------------------------------------------------
/UniHax/BestFitMapping.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2011 by Christopher Weber
2 |
3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
4 | // of this software and associated documentation files (the "Software"), to deal
5 | // in the Software without restriction, including without limitation the rights
6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | // copies of the Software, and to permit persons to whom the Software is
8 | // furnished to do so, subject to the following conditions:
9 |
10 | // The above copyright notice and this permission notice shall be included in
11 | // all copies or substantial portions of the Software.
12 |
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | // SOFTWARE.
20 |
21 | // Authors:
22 | // Christopher Weber (chris@lookout.net)
23 |
24 | namespace UniHax
25 | {
26 | ///
27 | /// Represents a bestfit mapping between two characters.
28 | ///
29 | public class BestFitMapping
30 | {
31 | private string character;
32 | private string ascii;
33 | private string unicode;
34 | private string name;
35 | private string charset;
36 |
37 | public readonly string[] charsets = {
38 | "APL-ISO-IR-68",
39 | "CP424",
40 | "IBMGRAPH",
41 | "US-ASCII-QUOTES",
42 | "windows-1250",
43 | "windows-1251",
44 | "windows-1252",
45 | "windows-1253",
46 | "windows-1254",
47 | "windows-1255",
48 | "windows-1256",
49 | "windows-1257",
50 | "windows-1258",
51 | "windows-874",
52 | "CP864",
53 | "CP037",
54 | "CP1026",
55 | "CP500",
56 | "CP875",
57 | "DINGBATS",
58 | "KEYBOARD",
59 | "SYMBOL",
60 | "symbol",
61 | "zdingbat",
62 | "JAPANESE",
63 | "GSM0338"
64 | };
65 |
66 | public string Character
67 | {
68 | get { return character; }
69 | set { character = value; }
70 | }
71 |
72 | public string Ascii
73 | {
74 | get { return ascii; }
75 | set { ascii = value; }
76 | }
77 |
78 | public string Unicode
79 | {
80 | get { return unicode; }
81 | set { unicode = value; }
82 | }
83 |
84 | public string Name
85 | {
86 | get { return name; }
87 | set { name = value; }
88 | }
89 |
90 | public string Charset
91 | {
92 | get { return charset; }
93 | set { charset = value; }
94 | }
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/UniHax/UnicodeChar.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2011 by Christopher Weber
2 |
3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
4 | // of this software and associated documentation files (the "Software"), to deal
5 | // in the Software without restriction, including without limitation the rights
6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | // copies of the Software, and to permit persons to whom the Software is
8 | // furnished to do so, subject to the following conditions:
9 |
10 | // The above copyright notice and this permission notice shall be included in
11 | // all copies or substantial portions of the Software.
12 |
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | // SOFTWARE.
20 |
21 | // Authors:
22 | // Christopher Weber (chris@lookout.net)
23 |
24 | using System;
25 |
26 | namespace UniHax
27 | {
28 | ///
29 | /// A 32-bit representation of a Unicode character. Treats all characters as
30 | /// a 32 bit value. Gives access to a character's code point as a string.
31 | /// [!] Only handles the Basic Mulitlingual Plane (BMP). Code points > U+FFFF
32 | /// will throw exceptions.
33 | /// [!] Does NOT support surrogate pairs.
34 | ///
35 | public struct UniChar
36 | {
37 | public string CodePoint;
38 | public string Character;
39 |
40 |
41 | //public UniChar(string codePoint)
42 | //{
43 | // CodePoint = codePoint;
44 | // Character = ConvertCodePointToString(CodePoint);
45 | //}
46 |
47 | //public UniChar(char character)
48 | //{
49 | // this.Character = character;
50 | // this.CodePoint = "0000";
51 | // this.CodePoint = GetCodePoint(Character);
52 | //}
53 |
54 | public string GetCodePoint(char c)
55 | {
56 | int i;
57 | try
58 | {
59 | i = Convert.ToInt32(c); // 0x00 to 0x10ffff
60 | }
61 | catch (Exception)
62 | {
63 | // return the replacement chacter U+FFFD
64 | i = 0x0000FFFD;
65 | throw;
66 | }
67 | string codepoint = String.Format("{0:X4}", i);
68 | return codepoint;
69 | }
70 |
71 | public string ConvertCharacterToString(char character)
72 | {
73 | int i;
74 |
75 | try
76 | {
77 | i = Convert.ToInt32(CodePoint.Trim(), 16); // 0x00 to 0x10ffff
78 | }
79 | catch (FormatException e)
80 | {
81 | i = 0;
82 | }
83 | catch(Exception)
84 | {
85 | throw;
86 | }
87 |
88 | string x = Char.ConvertFromUtf32(i);
89 | string characters = x;
90 |
91 | return characters;
92 | }
93 |
94 |
95 | public string ConvertCharacterToCodePoint(char character)
96 | {
97 | int i = Convert.ToInt32(CodePoint.Trim(), 16); // 0x00 to 0x10ffff
98 | string x = string.Format("{0:X4}", i);
99 |
100 | return x;
101 | }
102 |
103 | public string ConvertCodePointToString(string codepoint)
104 | {
105 | int i;
106 | try
107 | {
108 | i = Convert.ToInt32(codepoint.Trim(), 16); // 0x00 to 0x10ffff
109 | }
110 | catch (FormatException e)
111 | {
112 | i = 0;
113 | }
114 | catch(ArgumentOutOfRangeException e)
115 | {
116 | i = 0;
117 | }
118 | return Char.ConvertFromUtf32(i);
119 | }
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/UniHax/Properties/Resources.Designer.cs:
--------------------------------------------------------------------------------
1 | //------------------------------------------------------------------------------
2 | //
3 | // This code was generated by a tool.
4 | // Runtime Version:4.0.30319.1
5 | //
6 | // Changes to this file may cause incorrect behavior and will be lost if
7 | // the code is regenerated.
8 | //
9 | //------------------------------------------------------------------------------
10 |
11 | namespace UniHax.Properties {
12 | using System;
13 |
14 |
15 | ///
16 | /// A strongly-typed resource class, for looking up localized strings, etc.
17 | ///
18 | // This class was auto-generated by the StronglyTypedResourceBuilder
19 | // class via a tool like ResGen or Visual Studio.
20 | // To add or remove a member, edit your .ResX file then rerun ResGen
21 | // with the /str option, or rebuild your VS project.
22 | [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "4.0.0.0")]
23 | [global::System.Diagnostics.DebuggerNonUserCodeAttribute()]
24 | [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()]
25 | internal class Resources {
26 |
27 | private static global::System.Resources.ResourceManager resourceMan;
28 |
29 | private static global::System.Globalization.CultureInfo resourceCulture;
30 |
31 | [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")]
32 | internal Resources() {
33 | }
34 |
35 | ///
36 | /// Returns the cached ResourceManager instance used by this class.
37 | ///
38 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
39 | internal static global::System.Resources.ResourceManager ResourceManager {
40 | get {
41 | if (object.ReferenceEquals(resourceMan, null)) {
42 | global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("UniHax.Properties.Resources", typeof(Resources).Assembly);
43 | resourceMan = temp;
44 | }
45 | return resourceMan;
46 | }
47 | }
48 |
49 | ///
50 | /// Overrides the current thread's CurrentUICulture property for all
51 | /// resource lookups using this strongly typed resource class.
52 | ///
53 | [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)]
54 | internal static global::System.Globalization.CultureInfo Culture {
55 | get {
56 | return resourceCulture;
57 | }
58 | set {
59 | resourceCulture = value;
60 | }
61 | }
62 |
63 | ///
64 | /// Looks up a localized string similar to <Bestfit>
65 | /// <Mapping>
66 | /// <Ascii>0021</Ascii>
67 | /// <Unicode>00A8</Unicode>
68 | /// <Name>#</Name>
69 | /// <Charset>APL-ISO-IR-68</Charset>
70 | /// </Mapping>
71 | /// <Mapping>
72 | /// <Ascii>0022</Ascii>
73 | /// <Unicode>0029</Unicode>
74 | /// <Name>#</Name>
75 | /// <Charset>APL-ISO-IR-68</Charset>
76 | /// </Mapping>
77 | /// <Mapping>
78 | /// <Ascii>0023</Ascii>
79 | /// <Unicode>003C</Unicode>
80 | /// <Name>#</Name>
81 | /// <Charset>APL-ISO-IR-68</Charset>
82 | /// </Mapping>
83 | /// <Mapping>
84 | /// <Ascii>0024</Ascii>
85 | /// <Unicode>2264</Unicode>
86 | /// <Name>#</Name [rest of string was truncated]";.
87 | ///
88 | internal static string bestfit {
89 | get {
90 | return ResourceManager.GetString("bestfit", resourceCulture);
91 | }
92 | }
93 |
94 | ///
95 | /// Looks up a localized string similar to <Bestfit>
96 | /// <Mapping>
97 | /// <Ascii>006A</Ascii>
98 | /// <Unicode>004A</Unicode>
99 | /// <Name>LATIN CAPITAL LETTER J</Name>
100 | /// <Transform>Simple_Lowercase_Mapping</Transform>
101 | /// </Mapping>
102 | /// <Mapping>
103 | /// <Ascii>006A</Ascii>
104 | /// <Unicode>004A</Unicode>
105 | /// <Name>LATIN CAPITAL LETTER J</Name>
106 | /// <Transform>Lowercase_Mapping</Transform>
107 | /// </Mapping>
108 | /// <Mapping>
109 | /// <Ascii>006A</Ascii>
110 | /// <Unicode>004A</Unicode>
111 | /// <Name>LATIN CAPITAL LETTER J</Name>
112 | /// <Transform>Simple_Case_Folding</Transform>
113 | /// [rest of string was truncated]";.
114 | ///
115 | internal static string unicode {
116 | get {
117 | return ResourceManager.GetString("unicode", resourceCulture);
118 | }
119 | }
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/TestUniHax/Properties/Resources.resx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 | text/microsoft-resx
107 |
108 |
109 | 2.0
110 |
111 |
112 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
113 |
114 |
115 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=2.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
116 |
117 |
--------------------------------------------------------------------------------
/TestUniHax/TestUniHax.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Debug
5 | x86
6 | 8.0.30703
7 | 2.0
8 | {6B734BA5-A17B-4E85-8465-B8AAAFFAE56A}
9 | WinExe
10 | Properties
11 | TestUniHax
12 | TestUniHax
13 | v4.0
14 | Client
15 | 512
16 | publish\
17 | true
18 | Disk
19 | false
20 | Foreground
21 | 7
22 | Days
23 | false
24 | false
25 | true
26 | 0
27 | 1.0.0.%2a
28 | false
29 | false
30 | true
31 |
32 |
33 | x86
34 | true
35 | full
36 | false
37 | bin\Debug\
38 | DEBUG;TRACE
39 | prompt
40 | 4
41 |
42 |
43 | x86
44 | pdbonly
45 | true
46 | bin\Release\
47 | TRACE
48 | prompt
49 | 4
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 | Form
66 |
67 |
68 | Form1.cs
69 |
70 |
71 |
72 |
73 | Form1.cs
74 |
75 |
76 | ResXFileCodeGenerator
77 | Resources.Designer.cs
78 | Designer
79 |
80 |
81 | True
82 | Resources.resx
83 | True
84 |
85 |
86 |
87 | SettingsSingleFileGenerator
88 | Settings.Designer.cs
89 |
90 |
91 | True
92 | Settings.settings
93 | True
94 |
95 |
96 |
97 |
98 | False
99 | Microsoft .NET Framework 4 Client Profile %28x86 and x64%29
100 | true
101 |
102 |
103 | False
104 | .NET Framework 3.5 SP1 Client Profile
105 | false
106 |
107 |
108 | False
109 | .NET Framework 3.5 SP1
110 | false
111 |
112 |
113 | False
114 | Windows Installer 3.1
115 | true
116 |
117 |
118 |
119 |
120 | {434C7AAC-316B-4425-A459-730E3E127505}
121 | UniHax
122 |
123 |
124 |
125 |
132 |
--------------------------------------------------------------------------------
/TestUniHax/Form1.resx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 | text/microsoft-resx
110 |
111 |
112 | 2.0
113 |
114 |
115 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
116 |
117 |
118 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
119 |
120 |
--------------------------------------------------------------------------------
/UniHax/Properties/Resources.resx:
--------------------------------------------------------------------------------
1 |
2 |
3 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 | text/microsoft-resx
110 |
111 |
112 | 2.0
113 |
114 |
115 | System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
116 |
117 |
118 | System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089
119 |
120 |
121 |
122 | ..\data\bestfit.xml;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8
123 |
124 |
125 | ..\data\unicode.xml;System.String, mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089;utf-8
126 |
127 |
--------------------------------------------------------------------------------
/TestUniHax/Form1.cs:
--------------------------------------------------------------------------------
1 |
2 | // Copyright (c) 2011 by Christopher Weber
3 |
4 | // Permission is hereby granted, free of charge, to any person obtaining a copy
5 | // of this software and associated documentation files (the "Software"), to deal
6 | // in the Software without restriction, including without limitation the rights
7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the Software is
9 | // furnished to do so, subject to the following conditions:
10 |
11 | // The above copyright notice and this permission notice shall be included in
12 | // all copies or substantial portions of the Software.
13 |
14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | // SOFTWARE.
21 |
22 | // Authors:
23 | // Christopher Weber (chris@lookout.net)
24 |
25 | using System;
26 | using System.Collections.Generic;
27 | using System.Windows.Forms;
28 | using UniHax;
29 |
30 | namespace TestUniMap
31 | {
32 | public partial class FormUniMapTest : Form
33 | {
34 |
35 | private char input;
36 | private string charset;
37 | private string transform;
38 | private Mappings data = new Mappings();
39 |
40 | public FormUniMapTest()
41 | {
42 | InitializeComponent();
43 |
44 | // Setup the charset combobox
45 | List charsets = Data.GetAvailableBestfitCharsets();
46 | // Insert a blank line at the beginning
47 | charsets.Insert(0, "");
48 | comboBoxCharsets.DataSource = charsets;
49 | comboBoxCharsets.DisplayMember = "Charset";
50 |
51 | // Setup the transform combobox
52 | List transforms = Data.GetAvailableTransforms();
53 | transforms.Insert(0, "");
54 | comboBoxTransformations.DataSource = transforms;
55 | comboBoxTransformations.DisplayMember = "Transform";
56 |
57 | // Populate unichar properties
58 | string[] aProps = { Fuzzer.uBOM, Fuzzer.uMVS, Fuzzer.uReservedCodePoint, Fuzzer.uRLO, Fuzzer.uDEAD, Fuzzer.uDAAD, Fuzzer.uPrivate, Fuzzer.uNotACharacter };
59 | string sProps = String.Join("\r\n", aProps);
60 | textBoxUnicharProps.Text = sProps;
61 | }
62 |
63 | public Mappings Data
64 | {
65 | get { return data; }
66 | set { data = value; }
67 | }
68 |
69 | public char Input
70 | {
71 | get { return input; }
72 |
73 | set
74 | {
75 | try
76 | {
77 | //input = Convert.ToChar(0x0d);
78 | input = Convert.ToChar(value);
79 | }
80 | catch (Exception)
81 | {
82 | textBoxStatus.Text =
83 | "Error: Input was not in a correct format. Only a single ASCII character is allowed. The first character you entered will be used.";
84 | }
85 | }
86 | }
87 | public string Charset
88 | {
89 | get { return charset; }
90 | set { charset = value; }
91 | }
92 |
93 | public string Transform
94 | {
95 | get { return transform; }
96 | set { transform = value; }
97 | }
98 |
99 | private void labelHelp_Click(object sender, EventArgs e)
100 | {
101 |
102 | }
103 |
104 | private void textBoxInput_TextChanged(object sender, EventArgs e)
105 | {
106 |
107 |
108 | string sInput = textBoxInput.Text;
109 | textBoxStatus.Text = "";
110 |
111 | try
112 | {
113 | Input = Convert.ToChar(sInput);
114 | }
115 | catch (Exception)
116 | {
117 | textBoxStatus.Text =
118 | "Error: Input was not in a correct format. Only a single ASCII character is allowed. The first character you entered will be used.";
119 |
120 | }
121 | }
122 |
123 | private void textBoxStatus_TextChanged(object sender, EventArgs e)
124 | {
125 | }
126 |
127 | private void buttonGetBestfit_Click(object sender, EventArgs e)
128 | {
129 | textBoxOutput.Text = "";
130 | List bestfits = new List();
131 | bestfits = Data.GetBestfitMappings(Input);
132 |
133 | string output = String.Empty;
134 |
135 | foreach (string bestfit in bestfits)
136 | {
137 | UniChar uc = new UniChar();
138 | if (!String.IsNullOrEmpty(bestfit))
139 | {
140 | output += uc.ConvertCodePointToString(bestfit) + "\r\n";
141 | }
142 | }
143 |
144 | textBoxOutput.Text = output;
145 |
146 | // Fill DataGrid
147 | List lBestfits = new List();
148 | dataGridViewBestFit.DataSource = null;
149 | Data.BuildBestfitTable(Input,ref lBestfits, Charset);
150 | dataGridViewBestFit.DataSource = lBestfits;
151 |
152 |
153 | }
154 |
155 | private void textBoxOutput_TextChanged(object sender, EventArgs e)
156 | {
157 |
158 | }
159 |
160 | private void dataGridViewBestFit_CellContentClick(object sender, DataGridViewCellEventArgs e)
161 | {
162 |
163 | }
164 |
165 | private void FormUniMapTest_Load(object sender, EventArgs e)
166 | {
167 |
168 | }
169 |
170 | private void comboBoxCharsets_SelectedIndexChanged(object sender, EventArgs e)
171 | {
172 | Charset = comboBoxCharsets.SelectedValue.ToString();
173 | }
174 |
175 | private void label1_Click(object sender, EventArgs e)
176 | {
177 |
178 | }
179 |
180 | private void comboBoxTransformations_SelectedIndexChanged(object sender, EventArgs e)
181 | {
182 | Transform = comboBoxTransformations.SelectedValue.ToString();
183 | }
184 |
185 | private void buttonGetUnicode_Click(object sender, EventArgs e)
186 | {
187 | textBoxOutput.Text = "";
188 | List transforms = new List();
189 | transforms = Data.GetNormalizationMappings(Input);
190 |
191 | string output = String.Empty;
192 |
193 | foreach (string transform in transforms)
194 | {
195 | UniChar uc = new UniChar();
196 | if (!String.IsNullOrEmpty(transform))
197 | {
198 | output += uc.ConvertCodePointToString(transform) + "\r\n";
199 | }
200 | }
201 |
202 | textBoxOutput.Text = output;
203 |
204 | // Fill DataGrid
205 | List lTransformations = new List();
206 | dataGridViewBestFit.DataSource = null;
207 | Data.BuildTransformationsTable(Input, ref lTransformations, Transform);
208 | dataGridViewBestFit.DataSource = lTransformations;
209 | }
210 |
211 | private void textBoxUnicharProps_TextChanged(object sender, EventArgs e)
212 | {
213 |
214 | }
215 |
216 | private void labelInput_Click(object sender, EventArgs e)
217 | {
218 |
219 | }
220 | }
221 | }
222 |
--------------------------------------------------------------------------------
/UniHax/Fuzzer.cs:
--------------------------------------------------------------------------------
1 |
2 | // Copyright (c) 2011 by Christopher Weber
3 |
4 | // Permission is hereby granted, free of charge, to any person obtaining a copy
5 | // of this software and associated documentation files (the "Software"), to deal
6 | // in the Software without restriction, including without limitation the rights
7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | // copies of the Software, and to permit persons to whom the Software is
9 | // furnished to do so, subject to the following conditions:
10 |
11 | // The above copyright notice and this permission notice shall be included in
12 | // all copies or substantial portions of the Software.
13 |
14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 | // SOFTWARE.
21 |
22 | // Authors:
23 | // Christopher Weber (chris@lookout.net)
24 |
25 |
26 | using System;
27 | using System.IO;
28 |
29 | namespace UniHax
30 | {
31 | ///
32 | /// The Fuzzer has cases for some of the oddball manifestations of Unicode that can trip up software including:
33 | ///
34 | /// - non-character, reserved, and private use area code points
35 | /// - special meaning characters such as the BOM and RLO
36 | /// - ill-formed byte sequences
37 | /// - a half-surrogate code point
38 | ///
39 | ///
40 | ///
41 | public class Fuzzer
42 | {
43 |
44 | ///
45 | /// The Byte Order Mark U+FEFF is a special character defining the byte order and endianess
46 | /// of text data.
47 | /// UTF-8 percent encoding is %EF%BB%BF
48 | ///
49 | public static readonly string uBOM = "\uFEFF";
50 | ///
51 | /// The Right to Left Override U+202E defines special meaning to re-order the
52 | /// display of text for right-to-left reading.
53 | /// UTF-8 percent encoding is %E2%80%AE
54 | ///
55 | public static readonly string uRLO = "\u202E";
56 | ///
57 | /// Mongolian Vowel Separator U+180E is invisible and has the whitespace property.
58 | /// UTF-8 percent encoding is %E1%A0%8E
59 | ///
60 | public static readonly string uMVS = "\u180E";
61 | ///
62 | /// Word Joiner U+2060 is an invisible zero-width character.
63 | /// UTF-8 percent encoding is %E2%81%A0
64 | ///
65 | public static readonly string uWordJoiner = "\u2060";
66 | ///
67 | /// A reserved code point U+FEFE
68 | /// UTF-8 percent encoding is %ef%bb%be
69 | ///
70 | public static readonly string uReservedCodePoint = "\uFEFE";
71 | ///
72 | /// The code point U+FFFF is guaranteed to not be a Unicode character at all
73 | /// UTF-8 percent encoding is %ef%bf%bf
74 | ///
75 | public static readonly string uNotACharacter = "\uFFFF";
76 | ///
77 | /// An unassigned code point U+0FED
78 | /// UTF-8 percent encoding is %e0%bf%ad
79 | ///
80 | public static readonly string uUnassigned = "\u0FED";
81 | ///
82 | /// An illegal low half-surrogate U+DEAD
83 | /// UTF-8 percent encoding is %ed%ba%ad
84 | ///
85 | public static readonly string uDEAD = "\uDEAD";
86 | ///
87 | /// An illegal high half-surrogate U+DAAD
88 | /// UTF-8 percent encoding is %ed%aa%ad
89 | ///
90 | public static readonly string uDAAD = "\uDAAD";
91 | ///
92 | /// A Private Use Area code point U+F8FF which Apple happens to use for its logo.
93 | /// UTF-8 percent encoding is %EF%A3%BF
94 | ///
95 | public static readonly string uPrivate = "\uF8FF";
96 | ///
97 | /// U+FF0F FULLWIDTH SOLIDUS should normalize to / in a hostname
98 | /// UTF-8 percent encoding is %EF%BC%8F
99 | ///
100 | public static readonly string uFullwidthSolidus = "\uFF0F";
101 | ///
102 | /// Code point with a numerical mapping and value U+1D7D6 MATHEMATICAL BOLD DIGIT EIGHT
103 | /// UTF-8 percent encoding is %F0%9D%9F%96
104 | ///
105 | public static readonly string uBoldEight = char.ConvertFromUtf32(0x1D7D6);
106 | ///
107 | /// IDNA2003/2008 Deviant - U+00DF normalizes to "ss" during IDNA2003's mapping phase,
108 | /// different from its IDNA2008 mapping.
109 | /// See http://www.unicode.org/reports/tr46/
110 | /// UTF-8 percent encoding is %C3%9F
111 | ///
112 | public static readonly string uIdnaSs = "\u00DF";
113 | ///
114 | /// U+FDFD expands by 11x (UTF-8) and 18x (UTF-16) under NFKC/NFKC
115 | /// UTF-8 percent encoding is %EF%B7%BA
116 | ///
117 | public static readonly string uFDFA = "\uFDFA";
118 | ///
119 | /// U+0390 expands by 3x (UTF-8) under NFD
120 | /// UTF-8 percent encoding is %CE%90
121 | ///
122 | public static readonly string u0390 = "\u0390";
123 | ///
124 | /// U+1F82 expands by 4x (UTF-16) under NFD
125 | /// UTF-8 percent encoding is %E1%BE%82
126 | ///
127 | public static readonly string u1F82 = "\u1F82";
128 | ///
129 | /// U+FB2C expands by 3x (UTF-16) under NFC
130 | /// UTF-8 percent encoding is %EF%AC%AC
131 | ///
132 | public static readonly string uFB2C = "\uFB2C";
133 | ///
134 | /// U+1D160 expands by 3x (UTF-8) under NFC
135 | /// UTF-8 percent encoding is %F0%9D%85%A0
136 | ///
137 | public static readonly string u1D160 = char.ConvertFromUtf32(0x1D160);
138 |
139 | ///
140 | /// Gets the requested byte representation of the current Unicode character codepoint
141 | ///
142 | /// The encoding you want a byte representation in. Specify utf-8, utf-16le, or utf16-be
143 | /// A single character sent as a string.
144 | /// Returns a byte array
145 | public byte[] GetCharacterBytes(string encoding, string character)
146 | {
147 | System.Text.Encoding enc;
148 | if (encoding == "utf-16le")
149 | {
150 | enc = new System.Text.UnicodeEncoding();
151 | }
152 | else if (encoding == "utf-16be")
153 | {
154 | enc = new System.Text.UnicodeEncoding(true, false);
155 | }
156 | else
157 | {
158 | enc = new System.Text.UTF8Encoding();
159 | }
160 |
161 | return enc.GetBytes(character);
162 |
163 | }
164 |
165 | ///
166 | /// Malforms the bytes by removing the last byte from whichever encoding you specify.
167 | ///
168 | /// The encoding you want a byte representation in. Specify utf-8, utf-16le, or utf16-be
169 | /// A single character sent as a string.
170 | ///
171 | public byte[] GetCharacterBytesMalformed(string encoding, string character)
172 | {
173 | System.Text.Encoding enc;
174 |
175 | if (encoding == "utf-16le")
176 | {
177 | enc = new System.Text.UnicodeEncoding();
178 | }
179 | else if (encoding == "utf-16be")
180 | {
181 | enc = new System.Text.UnicodeEncoding(true, false);
182 | }
183 | else
184 | {
185 | enc = new System.Text.UTF8Encoding();
186 | }
187 |
188 |
189 | byte[] characterBytes = enc.GetBytes(character); // now we have a byte array
190 | byte[] shorter;
191 |
192 | // Check that there's more than one byte before malforming it by removing the last byte.
193 | // Otherwise we'd end up with no bytes in the array. This can make test cases pretty useless.
194 | if (enc.GetByteCount(character) > 1)
195 | {
196 | shorter = new byte[characterBytes.Length - 1];
197 | Array.Copy(characterBytes, shorter, shorter.Length);
198 | }
199 |
200 | // just return the one byte array rather than removing the one byte
201 | else
202 | {
203 | shorter = new byte[characterBytes.Length];
204 | Array.Copy(characterBytes, shorter, shorter.Length);
205 | }
206 | return shorter;
207 |
208 | }
209 |
210 | public string GetBom()
211 | {
212 | return Fuzzer.uBOM;
213 | }
214 |
215 | ///
216 | /// Return a UTF32 byte encoding for an illegal code point value U+1FFFFF.
217 | /// Note that Unicode 6.0 supports only up to U+10FFFF.
218 | /// UTF-8 percent encoding for something out of range is %F4%8F%BF%BE
219 | ///
220 | /// A raw byte array because .NET will not allow illegal code points in the System.String class.
221 | public byte[] OutOfRangeCodePointAsUtf32BE()
222 | {
223 | byte[] bytes = {0x00, 0x1F, 0xFF, 0xFF};
224 | return bytes;
225 | }
226 | }
227 | }
228 |
--------------------------------------------------------------------------------
/UniHax/Mappings.cs:
--------------------------------------------------------------------------------
1 | // Copyright (c) 2011 by Christopher Weber
2 |
3 | // Permission is hereby granted, free of charge, to any person obtaining a copy
4 | // of this software and associated documentation files (the "Software"), to deal
5 | // in the Software without restriction, including without limitation the rights
6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | // copies of the Software, and to permit persons to whom the Software is
8 | // furnished to do so, subject to the following conditions:
9 |
10 | // The above copyright notice and this permission notice shall be included in
11 | // all copies or substantial portions of the Software.
12 |
13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19 | // SOFTWARE.
20 |
21 | // Authors:
22 | // Christopher Weber (chris@lookout.net)
23 |
24 | using System;
25 | using System.Collections.Generic;
26 | using System.Linq;
27 | using System.Xml.Linq;
28 | using UniHax.Properties;
29 |
30 | namespace UniHax
31 | {
32 | ///
33 | /// The Mappings class gives access to bestfit and Unicode normalization character mappings.
34 | ///
35 | public class Mappings
36 | {
37 |
38 | #region fields
39 |
40 | // This stores the entire Bestfit XML as an in memory database
41 | private XDocument xDocBestfit;
42 | // This stores the entire Bestfit XML as an in memory database
43 | private XDocument xDocUnicode;
44 |
45 | #endregion
46 |
47 | #region Ctor(s)
48 | public Mappings()
49 | {
50 | Init();
51 | }
52 | #endregion
53 |
54 |
55 | #region properties
56 | public XDocument XDocBestfit
57 | {
58 | get { return xDocBestfit; }
59 | set { xDocBestfit = value; }
60 | }
61 |
62 | public XDocument XDocUnicode
63 | {
64 | get { return xDocUnicode; }
65 | set { xDocUnicode = value; }
66 | }
67 |
68 | #endregion
69 |
70 | private void Init()
71 | {
72 | // The bestfit.xml file embedded as a resource
73 | var sBestfitXml = Resources.bestfit;
74 | // The unicode.xml file embedded as a resource
75 | var sUnicodeXml = Resources.unicode;
76 |
77 | // Build the in-memory databases
78 | XDocBestfit = XDocument.Parse(sBestfitXml);
79 | XDocUnicode = XDocument.Parse(sUnicodeXml);
80 |
81 |
82 | }
83 |
84 | ///
85 | /// Get a unique list of the charsets from the bestfit database.
86 | ///
87 | ///
88 | public List GetAvailableBestfitCharsets()
89 | {
90 | List charsets = new List();
91 | IEnumerable query;
92 |
93 | try
94 | {
95 | query = (from mapping in XDocBestfit.Descendants("Mapping")
96 | select (string)mapping.Element("Charset").Value).Distinct();
97 | }
98 | catch (Exception)
99 | {
100 | throw;
101 | }
102 |
103 |
104 | try
105 | {
106 | foreach (var charset in query.Distinct())
107 | {
108 | charsets.Add(charset);
109 | }
110 | }
111 | catch (Exception)
112 | {
113 | // fine just return an empty list
114 | charsets.Add("");
115 | throw;
116 | }
117 |
118 | return charsets;
119 | }
120 |
121 | ///
122 | /// Build a data table for all of an ASCII character's bestfit mappings.
123 | ///
124 | /// The ASCII character to query on.
125 | /// Reference to a List you want to populate with data.
126 | /// An optional charset to filter results by.
127 | ///
128 | public void BuildBestfitTable(char cAscii,
129 | ref List lBestFit,
130 | string sCharset = "")
131 | {
132 | UniChar uc = new UniChar();
133 | uc.CodePoint = uc.GetCodePoint(cAscii);
134 | IEnumerable query;
135 |
136 | if (String.IsNullOrEmpty(sCharset))
137 | {
138 | query = from mapping in XDocBestfit.Descendants("Mapping")
139 | where (string)mapping.Element("Ascii") == uc.CodePoint
140 | select mapping;
141 | }
142 | else
143 | {
144 | query = (from mapping in XDocBestfit.Descendants("Mapping")
145 | where (string)mapping.Element("Ascii") == uc.CodePoint &&
146 | (string)mapping.Element("Charset") == sCharset
147 | select mapping);
148 | }
149 |
150 | var count = query.Count();
151 | foreach (var item in query.Distinct())
152 | {
153 | BestFitMapping bf = new BestFitMapping();
154 | UniChar uc2 = new UniChar();
155 | bf.Ascii = item.Element("Ascii").Value;
156 | bf.Unicode = item.Element("Unicode").Value;
157 | bf.Character = uc2.ConvertCodePointToString(bf.Unicode);
158 | bf.Charset = item.Element("Charset").Value;
159 | bf.Name = item.Element("Name").Value;
160 | lBestFit.Add(bf);
161 | }
162 | }
163 |
164 | ///
165 | /// Build a data table for all of an ASCII character's bestfit mappings.
166 | ///
167 | /// The ASCII character to query on.
168 | /// Reference to a List you want to populate with data.
169 | /// An optional charset to filter results by.
170 | public void BuildTransformationsTable(char cAscii,
171 | ref List lTransformations,
172 | string sTransform = "")
173 | {
174 | UniChar uc = new UniChar();
175 | uc.CodePoint = uc.GetCodePoint(cAscii);
176 | IEnumerable query;
177 |
178 | if (String.IsNullOrEmpty(sTransform))
179 | {
180 | query = from mapping in xDocUnicode.Descendants("Mapping")
181 | where (string)mapping.Element("Ascii") == uc.CodePoint
182 | select mapping;
183 | }
184 | else
185 | {
186 | query = (from mapping in XDocUnicode.Descendants("Mapping")
187 | where (string)mapping.Element("Ascii") == uc.CodePoint &&
188 | (string)mapping.Element("Transform") == sTransform
189 | select mapping);
190 | }
191 |
192 | var count = query.Count();
193 | foreach (var item in query.Distinct())
194 | {
195 | UnicodeMapping um = new UnicodeMapping();
196 | UniChar uc2 = new UniChar();
197 | um.Ascii = item.Element("Ascii").Value;
198 | um.Unicode = item.Element("Unicode").Value;
199 | um.Character = uc2.ConvertCodePointToString(um.Unicode);
200 | um.Transform= item.Element("Transform").Value;
201 | um.Name = item.Element("Name").Value;
202 | lTransformations.Add(um);
203 | }
204 | }
205 |
206 | ///
207 | /// Send me an ASCII character and I'll return you a list of Unicode characters that
208 | /// best fit map to it. Since you're not telling me a specific charset your're
209 | /// interested in, I'm going to send you data for all of them.
210 | ///
211 | /// The ASCII character to query on.
212 | /// An option charset name to filter by, valid values include:
213 | /// APL-ISO-IR-68
214 | /// CP424
215 | /// IBMGRAPH
216 | /// US-ASCII-QUOTES
217 | /// windows-1250
218 | /// windows-1251
219 | /// windows-1252
220 | /// windows-1253
221 | /// windows-1254
222 | /// windows-1255
223 | /// windows-1256
224 | /// windows-1257
225 | /// windows-1258
226 | /// windows-874
227 | /// CP864
228 | /// CP037
229 | /// CP1026
230 | /// CP500
231 | /// CP875
232 | /// DINGBATS
233 | /// KEYBOARD
234 | /// SYMBOL
235 | /// symbol
236 | /// zdingbat
237 | /// JAPANESE
238 | /// GSM0338
239 | ///
240 | ///
241 | public List GetBestfitMappings(char cAscii, string sCharset = "")
242 | {
243 | BestFitMapping bm = new BestFitMapping();
244 | // If an invald charset was entered then set it to the wildcard
245 | if (!bm.charsets.Contains(sCharset))
246 | {
247 | sCharset = "";
248 | }
249 | UniChar uc = new UniChar();
250 | uc.CodePoint = uc.GetCodePoint(cAscii);
251 | IEnumerable query;
252 |
253 | // If a charset wasn't specified, filter by the ASCII character
254 | if (String.IsNullOrEmpty(sCharset))
255 | {
256 |
257 | query = (from mapping in XDocBestfit.Descendants("Mapping")
258 | where
259 | (string)mapping.Element("Ascii") == uc.CodePoint
260 | select mapping.Element("Unicode").Value);
261 | }
262 |
263 | // else filter by the charset too
264 | else
265 | {
266 |
267 | query = (from mapping in XDocBestfit.Descendants("Mapping")
268 | where
269 | (string)mapping.Element("Ascii") == "0043" &&
270 | (string)mapping.Element("Charset") == sCharset
271 | select mapping.Element("Unicode").Value);
272 | }
273 |
274 | List data = new List();
275 | foreach (var item in query.Distinct())
276 | {
277 | data.Add(item);
278 | }
279 | return data;
280 | }
281 |
282 |
283 | ///
284 | /// Get a unique list of the charsets from the bestfit database.
285 | ///
286 | ///
287 | public List GetAvailableTransforms()
288 | {
289 | List transforms = new List();
290 | var query = (from mapping in XDocUnicode.Descendants("Mapping")
291 | select (string)mapping.Element("Transform").Value).Distinct();
292 |
293 | foreach (var charset in query.Distinct())
294 | {
295 | transforms.Add(charset);
296 | }
297 | return transforms;
298 | }
299 |
300 |
301 | ///
302 | /// Get all normalization mappings for an ASCII character. Optionally specify a specific transform
303 | /// as a filter.
304 | ///
305 | /// A single ASCII character.
306 | /// Valid transformations specified as:
307 | ///
308 | /// Simple_Lowercase_Mapping
309 | /// Lowercase_Mapping
310 | /// Simple_Case_Folding
311 | /// cf
312 | /// Simple_Uppercase_Mapping
313 | /// Simple_Titlecase_Mapping
314 | /// Uppercase_Mapping
315 | /// Titlecase_Mapping
316 | /// Decomposition_Mapping
317 | /// FC_NFKC
318 | ///
319 | ///
320 | public List GetNormalizationMappings(char cAscii, string sTransform = "")
321 | {
322 | UnicodeMapping um = new UnicodeMapping();
323 |
324 | // If something is passed in we don't know about then set transform to a wildcard
325 | if (!um.transformations.Contains(sTransform))
326 | {
327 | sTransform = "";
328 | }
329 | UniChar uc = new UniChar();
330 | uc.CodePoint = uc.GetCodePoint(cAscii);
331 | IEnumerable query;
332 |
333 | // If a transformation wasn't specified, filter by the ASCII character
334 | if (String.IsNullOrEmpty(sTransform))
335 | {
336 |
337 | query = (from mapping in XDocUnicode.Descendants("Mapping")
338 | where
339 | (string)mapping.Element("Ascii") == uc.CodePoint
340 | select mapping.Element("Unicode").Value);
341 | }
342 |
343 | // else filter by the transformation too
344 | else
345 | {
346 |
347 | query = (from mapping in XDocUnicode.Descendants("Mapping")
348 | where
349 | (string)mapping.Element("Ascii") == "0043" &&
350 | (string)mapping.Element("Transform") == sTransform
351 | select mapping.Element("Unicode").Value);
352 | }
353 |
354 | List data = new List();
355 | foreach (var item in query.Distinct())
356 | {
357 | data.Add(item);
358 | }
359 | return data;
360 | }
361 | }
362 | }
363 |
--------------------------------------------------------------------------------
/TestUniHax/Form1.Designer.cs:
--------------------------------------------------------------------------------
1 | namespace TestUniMap
2 | {
3 | partial class FormUniMapTest
4 | {
5 | ///
6 | /// Required designer variable.
7 | ///
8 | private System.ComponentModel.IContainer components = null;
9 |
10 | ///
11 | /// Clean up any resources being used.
12 | ///
13 | /// true if managed resources should be disposed; otherwise, false.
14 | protected override void Dispose(bool disposing)
15 | {
16 | if (disposing && (components != null))
17 | {
18 | components.Dispose();
19 | }
20 | base.Dispose(disposing);
21 | }
22 |
23 | #region Windows Form Designer generated code
24 |
25 | ///
26 | /// Required method for Designer support - do not modify
27 | /// the contents of this method with the code editor.
28 | ///
29 | private void InitializeComponent()
30 | {
31 | System.Windows.Forms.DataGridViewCellStyle dataGridViewCellStyle7 = new System.Windows.Forms.DataGridViewCellStyle();
32 | System.Windows.Forms.DataGridViewCellStyle dataGridViewCellStyle8 = new System.Windows.Forms.DataGridViewCellStyle();
33 | System.Windows.Forms.DataGridViewCellStyle dataGridViewCellStyle9 = new System.Windows.Forms.DataGridViewCellStyle();
34 | this.textBoxInput = new System.Windows.Forms.TextBox();
35 | this.buttonGetBestfit = new System.Windows.Forms.Button();
36 | this.buttonGetUnicode = new System.Windows.Forms.Button();
37 | this.textBoxOutput = new System.Windows.Forms.TextBox();
38 | this.textBoxStatus = new System.Windows.Forms.TextBox();
39 | this.dataGridViewBestFit = new System.Windows.Forms.DataGridView();
40 | this.comboBoxCharsets = new System.Windows.Forms.ComboBox();
41 | this.labelCharsetCombo = new System.Windows.Forms.Label();
42 | this.labelInput = new System.Windows.Forms.Label();
43 | this.comboBoxTransformations = new System.Windows.Forms.ComboBox();
44 | this.textBoxUnicharProps = new System.Windows.Forms.TextBox();
45 | this.labelTransformCombo = new System.Windows.Forms.Label();
46 | this.labelResultsAsString = new System.Windows.Forms.Label();
47 | this.labelResultsAsGrid = new System.Windows.Forms.Label();
48 | this.labelSpecialCharacters = new System.Windows.Forms.Label();
49 | ((System.ComponentModel.ISupportInitialize)(this.dataGridViewBestFit)).BeginInit();
50 | this.SuspendLayout();
51 | //
52 | // textBoxInput
53 | //
54 | this.textBoxInput.Font = new System.Drawing.Font("Microsoft Sans Serif", 20F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
55 | this.textBoxInput.Location = new System.Drawing.Point(7, 52);
56 | this.textBoxInput.Name = "textBoxInput";
57 | this.textBoxInput.Size = new System.Drawing.Size(100, 38);
58 | this.textBoxInput.TabIndex = 0;
59 | this.textBoxInput.TextChanged += new System.EventHandler(this.textBoxInput_TextChanged);
60 | //
61 | // buttonGetBestfit
62 | //
63 | this.buttonGetBestfit.Location = new System.Drawing.Point(7, 160);
64 | this.buttonGetBestfit.Name = "buttonGetBestfit";
65 | this.buttonGetBestfit.Size = new System.Drawing.Size(75, 43);
66 | this.buttonGetBestfit.TabIndex = 1;
67 | this.buttonGetBestfit.Text = "Get Bestfit Equivalents";
68 | this.buttonGetBestfit.UseVisualStyleBackColor = true;
69 | this.buttonGetBestfit.Click += new System.EventHandler(this.buttonGetBestfit_Click);
70 | //
71 | // buttonGetUnicode
72 | //
73 | this.buttonGetUnicode.Location = new System.Drawing.Point(7, 268);
74 | this.buttonGetUnicode.Name = "buttonGetUnicode";
75 | this.buttonGetUnicode.Size = new System.Drawing.Size(75, 39);
76 | this.buttonGetUnicode.TabIndex = 2;
77 | this.buttonGetUnicode.Text = "Get Unicode Transforms";
78 | this.buttonGetUnicode.UseVisualStyleBackColor = true;
79 | this.buttonGetUnicode.Click += new System.EventHandler(this.buttonGetUnicode_Click);
80 | //
81 | // textBoxOutput
82 | //
83 | this.textBoxOutput.Location = new System.Drawing.Point(332, 32);
84 | this.textBoxOutput.Multiline = true;
85 | this.textBoxOutput.Name = "textBoxOutput";
86 | this.textBoxOutput.ReadOnly = true;
87 | this.textBoxOutput.ScrollBars = System.Windows.Forms.ScrollBars.Vertical;
88 | this.textBoxOutput.Size = new System.Drawing.Size(154, 368);
89 | this.textBoxOutput.TabIndex = 3;
90 | this.textBoxOutput.TextChanged += new System.EventHandler(this.textBoxOutput_TextChanged);
91 | //
92 | // textBoxStatus
93 | //
94 | this.textBoxStatus.BorderStyle = System.Windows.Forms.BorderStyle.None;
95 | this.textBoxStatus.Font = new System.Drawing.Font("Microsoft Sans Serif", 10F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
96 | this.textBoxStatus.ForeColor = System.Drawing.SystemColors.Highlight;
97 | this.textBoxStatus.Location = new System.Drawing.Point(144, 12);
98 | this.textBoxStatus.Multiline = true;
99 | this.textBoxStatus.Name = "textBoxStatus";
100 | this.textBoxStatus.ReadOnly = true;
101 | this.textBoxStatus.Size = new System.Drawing.Size(155, 118);
102 | this.textBoxStatus.TabIndex = 5;
103 | this.textBoxStatus.TextChanged += new System.EventHandler(this.textBoxStatus_TextChanged);
104 | //
105 | // dataGridViewBestFit
106 | //
107 | dataGridViewCellStyle7.Alignment = System.Windows.Forms.DataGridViewContentAlignment.MiddleLeft;
108 | dataGridViewCellStyle7.BackColor = System.Drawing.SystemColors.Control;
109 | dataGridViewCellStyle7.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
110 | dataGridViewCellStyle7.ForeColor = System.Drawing.SystemColors.WindowText;
111 | dataGridViewCellStyle7.SelectionBackColor = System.Drawing.SystemColors.Highlight;
112 | dataGridViewCellStyle7.SelectionForeColor = System.Drawing.SystemColors.HighlightText;
113 | dataGridViewCellStyle7.WrapMode = System.Windows.Forms.DataGridViewTriState.True;
114 | this.dataGridViewBestFit.ColumnHeadersDefaultCellStyle = dataGridViewCellStyle7;
115 | this.dataGridViewBestFit.ColumnHeadersHeightSizeMode = System.Windows.Forms.DataGridViewColumnHeadersHeightSizeMode.AutoSize;
116 | dataGridViewCellStyle8.Alignment = System.Windows.Forms.DataGridViewContentAlignment.MiddleLeft;
117 | dataGridViewCellStyle8.BackColor = System.Drawing.SystemColors.Window;
118 | dataGridViewCellStyle8.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
119 | dataGridViewCellStyle8.ForeColor = System.Drawing.SystemColors.ControlText;
120 | dataGridViewCellStyle8.SelectionBackColor = System.Drawing.SystemColors.Highlight;
121 | dataGridViewCellStyle8.SelectionForeColor = System.Drawing.SystemColors.HighlightText;
122 | dataGridViewCellStyle8.WrapMode = System.Windows.Forms.DataGridViewTriState.False;
123 | this.dataGridViewBestFit.DefaultCellStyle = dataGridViewCellStyle8;
124 | this.dataGridViewBestFit.Location = new System.Drawing.Point(492, 32);
125 | this.dataGridViewBestFit.Name = "dataGridViewBestFit";
126 | this.dataGridViewBestFit.ReadOnly = true;
127 | dataGridViewCellStyle9.Alignment = System.Windows.Forms.DataGridViewContentAlignment.MiddleLeft;
128 | dataGridViewCellStyle9.BackColor = System.Drawing.SystemColors.Control;
129 | dataGridViewCellStyle9.Font = new System.Drawing.Font("Microsoft Sans Serif", 8.25F, System.Drawing.FontStyle.Regular, System.Drawing.GraphicsUnit.Point, ((byte)(0)));
130 | dataGridViewCellStyle9.ForeColor = System.Drawing.SystemColors.WindowText;
131 | dataGridViewCellStyle9.SelectionBackColor = System.Drawing.SystemColors.Highlight;
132 | dataGridViewCellStyle9.SelectionForeColor = System.Drawing.SystemColors.HighlightText;
133 | dataGridViewCellStyle9.WrapMode = System.Windows.Forms.DataGridViewTriState.True;
134 | this.dataGridViewBestFit.RowHeadersDefaultCellStyle = dataGridViewCellStyle9;
135 | this.dataGridViewBestFit.Size = new System.Drawing.Size(570, 368);
136 | this.dataGridViewBestFit.TabIndex = 6;
137 | this.dataGridViewBestFit.CellContentClick += new System.Windows.Forms.DataGridViewCellEventHandler(this.dataGridViewBestFit_CellContentClick);
138 | //
139 | // comboBoxCharsets
140 | //
141 | this.comboBoxCharsets.FormattingEnabled = true;
142 | this.comboBoxCharsets.Location = new System.Drawing.Point(7, 133);
143 | this.comboBoxCharsets.Name = "comboBoxCharsets";
144 | this.comboBoxCharsets.Size = new System.Drawing.Size(121, 21);
145 | this.comboBoxCharsets.TabIndex = 7;
146 | this.comboBoxCharsets.SelectedIndexChanged += new System.EventHandler(this.comboBoxCharsets_SelectedIndexChanged);
147 | //
148 | // labelCharsetCombo
149 | //
150 | this.labelCharsetCombo.AutoSize = true;
151 | this.labelCharsetCombo.Location = new System.Drawing.Point(6, 117);
152 | this.labelCharsetCombo.Name = "labelCharsetCombo";
153 | this.labelCharsetCombo.Size = new System.Drawing.Size(117, 13);
154 | this.labelCharsetCombo.TabIndex = 8;
155 | this.labelCharsetCombo.Text = "Filter results by charset:";
156 | this.labelCharsetCombo.Click += new System.EventHandler(this.label1_Click);
157 | //
158 | // labelInput
159 | //
160 | this.labelInput.AutoSize = true;
161 | this.labelInput.Location = new System.Drawing.Point(6, 13);
162 | this.labelInput.Name = "labelInput";
163 | this.labelInput.Size = new System.Drawing.Size(85, 26);
164 | this.labelInput.TabIndex = 9;
165 | this.labelInput.Text = "Enter a single \r\nASCII character:";
166 | this.labelInput.Click += new System.EventHandler(this.labelInput_Click);
167 | //
168 | // comboBoxTransformations
169 | //
170 | this.comboBoxTransformations.FormattingEnabled = true;
171 | this.comboBoxTransformations.Location = new System.Drawing.Point(7, 237);
172 | this.comboBoxTransformations.Name = "comboBoxTransformations";
173 | this.comboBoxTransformations.Size = new System.Drawing.Size(121, 21);
174 | this.comboBoxTransformations.TabIndex = 10;
175 | this.comboBoxTransformations.SelectedIndexChanged += new System.EventHandler(this.comboBoxTransformations_SelectedIndexChanged);
176 | //
177 | // textBoxUnicharProps
178 | //
179 | this.textBoxUnicharProps.Location = new System.Drawing.Point(182, 172);
180 | this.textBoxUnicharProps.Multiline = true;
181 | this.textBoxUnicharProps.Name = "textBoxUnicharProps";
182 | this.textBoxUnicharProps.ReadOnly = true;
183 | this.textBoxUnicharProps.Size = new System.Drawing.Size(100, 130);
184 | this.textBoxUnicharProps.TabIndex = 11;
185 | this.textBoxUnicharProps.TextChanged += new System.EventHandler(this.textBoxUnicharProps_TextChanged);
186 | //
187 | // labelTransformCombo
188 | //
189 | this.labelTransformCombo.AutoSize = true;
190 | this.labelTransformCombo.Location = new System.Drawing.Point(4, 221);
191 | this.labelTransformCombo.Name = "labelTransformCombo";
192 | this.labelTransformCombo.Size = new System.Drawing.Size(148, 13);
193 | this.labelTransformCombo.TabIndex = 12;
194 | this.labelTransformCombo.Text = "Filter results by transformation:";
195 | //
196 | // labelResultsAsString
197 | //
198 | this.labelResultsAsString.AutoSize = true;
199 | this.labelResultsAsString.Location = new System.Drawing.Point(333, 13);
200 | this.labelResultsAsString.Name = "labelResultsAsString";
201 | this.labelResultsAsString.Size = new System.Drawing.Size(96, 13);
202 | this.labelResultsAsString.TabIndex = 13;
203 | this.labelResultsAsString.Text = "Results as a string:";
204 | //
205 | // labelResultsAsGrid
206 | //
207 | this.labelResultsAsGrid.AutoSize = true;
208 | this.labelResultsAsGrid.Location = new System.Drawing.Point(492, 12);
209 | this.labelResultsAsGrid.Name = "labelResultsAsGrid";
210 | this.labelResultsAsGrid.Size = new System.Drawing.Size(116, 13);
211 | this.labelResultsAsGrid.TabIndex = 14;
212 | this.labelResultsAsGrid.Text = "Results as a Data Grid:";
213 | //
214 | // labelSpecialCharacters
215 | //
216 | this.labelSpecialCharacters.AutoSize = true;
217 | this.labelSpecialCharacters.Location = new System.Drawing.Point(182, 153);
218 | this.labelSpecialCharacters.Name = "labelSpecialCharacters";
219 | this.labelSpecialCharacters.Size = new System.Drawing.Size(99, 13);
220 | this.labelSpecialCharacters.TabIndex = 15;
221 | this.labelSpecialCharacters.Text = "Special Characters:";
222 | //
223 | // FormUniMapTest
224 | //
225 | this.AutoScaleDimensions = new System.Drawing.SizeF(6F, 13F);
226 | this.AutoScaleMode = System.Windows.Forms.AutoScaleMode.Font;
227 | this.ClientSize = new System.Drawing.Size(1074, 623);
228 | this.Controls.Add(this.labelSpecialCharacters);
229 | this.Controls.Add(this.labelResultsAsGrid);
230 | this.Controls.Add(this.labelResultsAsString);
231 | this.Controls.Add(this.labelTransformCombo);
232 | this.Controls.Add(this.textBoxUnicharProps);
233 | this.Controls.Add(this.comboBoxTransformations);
234 | this.Controls.Add(this.labelInput);
235 | this.Controls.Add(this.labelCharsetCombo);
236 | this.Controls.Add(this.comboBoxCharsets);
237 | this.Controls.Add(this.dataGridViewBestFit);
238 | this.Controls.Add(this.textBoxStatus);
239 | this.Controls.Add(this.textBoxOutput);
240 | this.Controls.Add(this.buttonGetUnicode);
241 | this.Controls.Add(this.buttonGetBestfit);
242 | this.Controls.Add(this.textBoxInput);
243 | this.Name = "FormUniMapTest";
244 | this.Text = "Get Unicode and Bestfit Mappings";
245 | this.Load += new System.EventHandler(this.FormUniMapTest_Load);
246 | ((System.ComponentModel.ISupportInitialize)(this.dataGridViewBestFit)).EndInit();
247 | this.ResumeLayout(false);
248 | this.PerformLayout();
249 |
250 | }
251 |
252 | #endregion
253 |
254 | private System.Windows.Forms.TextBox textBoxInput;
255 | private System.Windows.Forms.Button buttonGetBestfit;
256 | private System.Windows.Forms.Button buttonGetUnicode;
257 | private System.Windows.Forms.TextBox textBoxOutput;
258 | private System.Windows.Forms.TextBox textBoxStatus;
259 | private System.Windows.Forms.DataGridView dataGridViewBestFit;
260 | private System.Windows.Forms.ComboBox comboBoxCharsets;
261 | private System.Windows.Forms.Label labelCharsetCombo;
262 | private System.Windows.Forms.Label labelInput;
263 | private System.Windows.Forms.ComboBox comboBoxTransformations;
264 | private System.Windows.Forms.TextBox textBoxUnicharProps;
265 | private System.Windows.Forms.Label labelTransformCombo;
266 | private System.Windows.Forms.Label labelResultsAsString;
267 | private System.Windows.Forms.Label labelResultsAsGrid;
268 | private System.Windows.Forms.Label labelSpecialCharacters;
269 | }
270 | }
271 |
272 |
--------------------------------------------------------------------------------