├── .gitignore
├── LICENSE
├── README.md
├── nuget.config
├── scripts
├── 35MSSharedLib1024.snk
├── release.yml
└── test.yml
├── srm.sln
├── srm
├── AutomataException.cs
├── Match.cs
├── Regex.cs
├── RegexOptions.cs
├── RegexToAutomatonConverter.cs
├── SymbolicRegexBuilder.cs
├── SymbolicRegexNode.cs
├── algebras
│ ├── BDD.cs
│ ├── BDDAlgebra.cs
│ ├── BV.cs
│ ├── BV64Algebra.cs
│ ├── BVAlgebra.cs
│ ├── CharSetSolver.cs
│ ├── CharacterEncoding.cs
│ ├── IBooleanAlgebra.cs
│ ├── ICharAlgebra.cs
│ ├── IntervalSet.cs
│ ├── MintermGenerator.cs
│ └── RangeConverter.cs
├── icon.png
├── matcher
│ ├── BooleanDecisionTree.cs
│ ├── DecisionTree.cs
│ ├── IMatcher.cs
│ ├── SymbolicRegexMatcher.cs
│ ├── UTF8Encoding.cs
│ └── VectorizedIndexOf.cs
├── parser
│ ├── RegexBoyerMoore.cs
│ ├── RegexCharClass.cs
│ ├── RegexCode.cs
│ ├── RegexFCD.cs
│ ├── RegexNode.cs
│ ├── RegexParser.cs
│ ├── RegexReplacement.cs
│ ├── RegexTree.cs
│ └── SR.cs
├── printing
│ └── RegexCharSetPrinter.cs
├── srm.csproj
├── unicode
│ ├── IgnoreCaseRelation.cs
│ ├── IgnoreCaseRelationGenerator.cs
│ ├── IgnoreCaseTransformer.cs
│ ├── UnicodeCategoryRanges.cs
│ ├── UnicodeCategoryRangesGenerator.cs
│ └── UnicodeCategoryTheory.cs
└── utils
│ └── StringUtility.cs
├── tests
├── MatchingTests.cs
├── SerializationTests.cs
└── tests.csproj
└── unicode_table_gen
├── Program.cs
└── unicode_table_gen.csproj
/.gitignore:
--------------------------------------------------------------------------------
1 | # IDE directories
2 | .vs/
3 | .vscode/
4 |
5 | # Build directories
6 | bin/
7 | obj/
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Microsoft Corporation
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Symbolic Regex Matcher (SRM)
2 |
3 | SRM is a high-performance regular expression matching engine with predictable performance characteristics. SRM implements a fully compatible subset of the .NET regex language, which mainly omits non-regular features. It provides comparable throughput to popular native libraries, such as RE2, with a pure C# codebase.
4 |
5 | SRM combines advanced symbolic reasoning with a regex derivatives based matching approach. For an overview of the theory behind SRM please see:
6 | [Olli Saarikivi, Margus Veanes, Tiki Wan, Eric Xu. *Symbolic Regex Matcher*. In TACAS 2019.](https://doi.org/10.1007/978-3-030-17462-0_24)
7 |
8 | # Usage
9 |
10 | The API mostly follows that of `System.Text.RegularExpressions`:
11 |
12 | ```
13 | using Microsoft.SRM;
14 | ...
15 | string input = "Hello World!";
16 | var regex = new Regex(".l*.");
17 | bool hasLs = regex.IsMatch(input); // True
18 | var matches = regex.Matches(input); // list of Match structs for "ello" and "rld"
19 | ```
20 |
21 | # Building and running tests
22 |
23 | The library is built and tested with [.NET Core 3.1](https://dotnet.microsoft.com/download/dotnet-core/3.1). To build the project and run the tests run:
24 |
25 | ```
26 | dotnet build
27 | dotnet test
28 | ```
29 |
30 | # Regenerate unicode character tables
31 |
32 | SRM uses unicode character tables recovered from the .NET runtime. To regenerate them for a new version of the runtime run:
33 |
34 | ```
35 | cd unicode_table_gen
36 | dotnet run ../srm/unicode
37 | ```
38 |
--------------------------------------------------------------------------------
/nuget.config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/scripts/35MSSharedLib1024.snk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomataDotNet/srm/7c7eec9c4c974610f246e2502d93730335e70fa9/scripts/35MSSharedLib1024.snk
--------------------------------------------------------------------------------
/scripts/release.yml:
--------------------------------------------------------------------------------
1 | trigger: none
2 |
3 | variables:
4 | ReleaseVersion: '1.2.2'
5 |
6 | pool:
7 | vmImage: "windows-latest"
8 |
9 | steps:
10 | - script: dotnet build srm --configuration Release -p:BuildType=Official -p:Version=$(ReleaseVersion) --output $(Build.ArtifactStagingDirectory)
11 | displayName: 'Build'
12 | - task: EsrpCodeSigning@1
13 | displayName: 'StrongName sign'
14 | inputs:
15 | ConnectedServiceName: 'srm-esrp-signing'
16 | FolderPath: $(Build.ArtifactStagingDirectory)
17 | Pattern: srm.dll
18 | signConfigType: 'inlineSignParams'
19 | inlineOperation: |
20 | [
21 | {
22 | "KeyCode" : "CP-233863-SN",
23 | "OperationCode" : "StrongNameSign",
24 | "Parameters" : {},
25 | "ToolName" : "sign",
26 | "ToolVersion" : "1.0"
27 | },
28 | {
29 | "KeyCode" : "CP-233863-SN",
30 | "OperationCode" : "StrongNameVerify",
31 | "Parameters" : {},
32 | "ToolName" : "sign",
33 | "ToolVersion" : "1.0"
34 | }
35 | ]
36 | SessionTimeout: '60'
37 | MaxConcurrency: '50'
38 | MaxRetryAttempts: '5'
39 | - script: dotnet pack srm -p:PackageVersion=$(ReleaseVersion) --output $(Build.ArtifactStagingDirectory) --no-build -p:OutputPath=$(Build.ArtifactStagingDirectory)
40 | displayName: 'Pack'
41 | - task: EsrpCodeSigning@1
42 | displayName: 'NuGet sign'
43 | inputs:
44 | ConnectedServiceName: 'srm-esrp-signing'
45 | FolderPath: $(Build.ArtifactStagingDirectory)
46 | Pattern: Microsoft.Automata.SRM.$(ReleaseVersion).nupkg
47 | signConfigType: 'inlineSignParams'
48 | inlineOperation: |
49 | [
50 | {
51 | "KeyCode" : "CP-401405",
52 | "OperationCode" : "NuGetSign",
53 | "Parameters" : {},
54 | "ToolName" : "sign",
55 | "ToolVersion" : "1.0"
56 | },
57 | {
58 | "KeyCode" : "CP-401405",
59 | "OperationCode" : "NuGetVerify",
60 | "Parameters" : {},
61 | "ToolName" : "sign",
62 | "ToolVersion" : "1.0"
63 | }
64 | ]
65 | SessionTimeout: '60'
66 | MaxConcurrency: '50'
67 | MaxRetryAttempts: '5'
68 | - task: PublishPipelineArtifact@1
69 | inputs:
70 | targetPath: $(Build.ArtifactStagingDirectory)\Microsoft.Automata.SRM.$(ReleaseVersion).nupkg
71 | artifactName: 'NuGetPackage'
--------------------------------------------------------------------------------
/scripts/test.yml:
--------------------------------------------------------------------------------
1 | pool:
2 | vmImage: "windows-latest"
3 |
4 | steps:
5 | - task: DotNetCoreCLI@2
6 | displayName: 'Test'
7 | inputs:
8 | command: test
--------------------------------------------------------------------------------
/srm.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 15
4 | VisualStudioVersion = 15.0.26124.0
5 | MinimumVisualStudioVersion = 15.0.26124.0
6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "srm", "srm\srm.csproj", "{69ED8C3B-1140-441B-8FEB-AA05855C84F5}"
7 | EndProject
8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "tests", "tests\tests.csproj", "{70878658-B583-496F-A113-BE95FDF2E4EF}"
9 | EndProject
10 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "unicode_table_gen", "unicode_table_gen\unicode_table_gen.csproj", "{548048A4-FC83-41E1-A070-BDA5B814C254}"
11 | EndProject
12 | Global
13 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
14 | Debug|Any CPU = Debug|Any CPU
15 | Debug|x64 = Debug|x64
16 | Debug|x86 = Debug|x86
17 | Release|Any CPU = Release|Any CPU
18 | Release|x64 = Release|x64
19 | Release|x86 = Release|x86
20 | EndGlobalSection
21 | GlobalSection(SolutionProperties) = preSolution
22 | HideSolutionNode = FALSE
23 | EndGlobalSection
24 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
25 | {69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
26 | {69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Debug|Any CPU.Build.0 = Debug|Any CPU
27 | {69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Debug|x64.ActiveCfg = Debug|Any CPU
28 | {69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Debug|x64.Build.0 = Debug|Any CPU
29 | {69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Debug|x86.ActiveCfg = Debug|Any CPU
30 | {69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Debug|x86.Build.0 = Debug|Any CPU
31 | {69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Release|Any CPU.ActiveCfg = Release|Any CPU
32 | {69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Release|Any CPU.Build.0 = Release|Any CPU
33 | {69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Release|x64.ActiveCfg = Release|Any CPU
34 | {69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Release|x64.Build.0 = Release|Any CPU
35 | {69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Release|x86.ActiveCfg = Release|Any CPU
36 | {69ED8C3B-1140-441B-8FEB-AA05855C84F5}.Release|x86.Build.0 = Release|Any CPU
37 | {70878658-B583-496F-A113-BE95FDF2E4EF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
38 | {70878658-B583-496F-A113-BE95FDF2E4EF}.Debug|Any CPU.Build.0 = Debug|Any CPU
39 | {70878658-B583-496F-A113-BE95FDF2E4EF}.Debug|x64.ActiveCfg = Debug|Any CPU
40 | {70878658-B583-496F-A113-BE95FDF2E4EF}.Debug|x64.Build.0 = Debug|Any CPU
41 | {70878658-B583-496F-A113-BE95FDF2E4EF}.Debug|x86.ActiveCfg = Debug|Any CPU
42 | {70878658-B583-496F-A113-BE95FDF2E4EF}.Debug|x86.Build.0 = Debug|Any CPU
43 | {70878658-B583-496F-A113-BE95FDF2E4EF}.Release|Any CPU.ActiveCfg = Release|Any CPU
44 | {70878658-B583-496F-A113-BE95FDF2E4EF}.Release|Any CPU.Build.0 = Release|Any CPU
45 | {70878658-B583-496F-A113-BE95FDF2E4EF}.Release|x64.ActiveCfg = Release|Any CPU
46 | {70878658-B583-496F-A113-BE95FDF2E4EF}.Release|x64.Build.0 = Release|Any CPU
47 | {70878658-B583-496F-A113-BE95FDF2E4EF}.Release|x86.ActiveCfg = Release|Any CPU
48 | {70878658-B583-496F-A113-BE95FDF2E4EF}.Release|x86.Build.0 = Release|Any CPU
49 | {548048A4-FC83-41E1-A070-BDA5B814C254}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
50 | {548048A4-FC83-41E1-A070-BDA5B814C254}.Debug|Any CPU.Build.0 = Debug|Any CPU
51 | {548048A4-FC83-41E1-A070-BDA5B814C254}.Debug|x64.ActiveCfg = Debug|Any CPU
52 | {548048A4-FC83-41E1-A070-BDA5B814C254}.Debug|x64.Build.0 = Debug|Any CPU
53 | {548048A4-FC83-41E1-A070-BDA5B814C254}.Debug|x86.ActiveCfg = Debug|Any CPU
54 | {548048A4-FC83-41E1-A070-BDA5B814C254}.Debug|x86.Build.0 = Debug|Any CPU
55 | {548048A4-FC83-41E1-A070-BDA5B814C254}.Release|Any CPU.ActiveCfg = Release|Any CPU
56 | {548048A4-FC83-41E1-A070-BDA5B814C254}.Release|Any CPU.Build.0 = Release|Any CPU
57 | {548048A4-FC83-41E1-A070-BDA5B814C254}.Release|x64.ActiveCfg = Release|Any CPU
58 | {548048A4-FC83-41E1-A070-BDA5B814C254}.Release|x64.Build.0 = Release|Any CPU
59 | {548048A4-FC83-41E1-A070-BDA5B814C254}.Release|x86.ActiveCfg = Release|Any CPU
60 | {548048A4-FC83-41E1-A070-BDA5B814C254}.Release|x86.Build.0 = Release|Any CPU
61 | EndGlobalSection
62 | EndGlobal
63 |
--------------------------------------------------------------------------------
/srm/AutomataException.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Text;
4 |
5 | namespace Microsoft.SRM
6 | {
7 | ///
8 | /// Exeption thrown by the automata constructions
9 | ///
10 | public class AutomataException : Exception
11 | {
12 | ///
13 | /// the kind of exception
14 | ///
15 | public readonly AutomataExceptionKind kind;
16 |
17 | ///
18 | /// construct an exception
19 | ///
20 | public AutomataException(string message, Exception innerException)
21 | : base(message, innerException)
22 | {
23 | kind = AutomataExceptionKind.Unspecified;
24 | }
25 |
26 | ///
27 | /// construct an exception with given message
28 | ///
29 | public AutomataException(string message)
30 | : base(message)
31 | {
32 | kind = AutomataExceptionKind.Unspecified;
33 | }
34 |
35 | ///
36 | /// construct an exception with given kind
37 | ///
38 | public AutomataException(AutomataExceptionKind kind)
39 | : base(GetMessage(kind))
40 | {
41 | this.kind = kind;
42 | }
43 |
44 | ///
45 | /// construct an exception with given kind and inner exception
46 | ///
47 | public AutomataException(AutomataExceptionKind kind, Exception innerException)
48 | : base(GetMessage(kind), innerException)
49 | {
50 | this.kind = kind;
51 | }
52 |
53 | private static string GetMessage(AutomataExceptionKind kind)
54 | {
55 | switch (kind)
56 | {
57 | case AutomataExceptionKind.CharacterEncodingIsUnspecified:
58 | return CharacterEncodingIsUnspecified;
59 | case AutomataExceptionKind.CharSetMustBeNonempty:
60 | return CharSetMustBeNonempty;
61 | case AutomataExceptionKind.UnrecognizedRegex:
62 | return UnrecognizedRegex;
63 | case AutomataExceptionKind.InternalError:
64 | return InternalError;
65 | default:
66 | return kind.ToString();
67 | }
68 | }
69 |
70 | public const string UnrecognizedRegex =
71 | "Unrecognized regex construct";
72 | public const string CharSetMustBeNonempty =
73 | "Set must be nonempty";
74 | public const string CharacterEncodingIsUnspecified =
75 | "Character encoding is unspecified";
76 | public const string InternalError =
77 | "Internal error";
78 | }
79 |
80 |
81 | ///
82 | /// Kinds of exceptions that may be thrown by the Automata library operations.
83 | ///
84 | public enum AutomataExceptionKind
85 | {
86 | UnrecognizedRegex,
87 | CharSetMustBeNonempty,
88 | CharacterEncodingIsUnspecified,
89 | InternalError,
90 | Unspecified,
91 | InvalidArguments,
92 | CharSetMustBeNontrivial,
93 | CompactSerializationNodeLimitViolation,
94 | CompactSerializationBitLimitViolation,
95 | CompactDeserializationError,
96 | SetIsEmpty,
97 | InvalidArgument,
98 | IncompatibleAlgebras,
99 | NotSupported,
100 | BooleanAlgebraIsNotAtomic,
101 | OrdinalIsTooLarge,
102 | UnexpectedMTBDDTerminal,
103 | AlgebraMustBeCharSetSolver,
104 | MTBDDsNotSupportedForThisOperation,
105 | BDDSerializationNodeLimitViolation,
106 | BDDSerializationBitLimitViolation,
107 | BDDDeserializationError,
108 | BitOutOfRange,
109 | InternalError_SymbolicRegex,
110 | MustNotAcceptEmptyString,
111 | NrOfMintermsCanBeAtMost64,
112 | }
113 | }
--------------------------------------------------------------------------------
/srm/Match.cs:
--------------------------------------------------------------------------------
1 | namespace Microsoft.SRM
2 | {
3 | public struct Match
4 | {
5 | public int Index { get; private set; }
6 | public int Length { get; private set; }
7 |
8 | public Match(int index, int length)
9 | {
10 | Index = index;
11 | Length = length;
12 | }
13 |
14 | public static bool operator==(Match left, Match right)
15 | => left.Index == right.Index && left.Length == right.Length;
16 |
17 | public static bool operator!=(Match left, Match right) => !(left == right);
18 |
19 | public override bool Equals(object obj) => obj is Match other && this == other;
20 |
21 | public override int GetHashCode() => (Index, Length).GetHashCode();
22 |
23 | public override string ToString()
24 | {
25 | return string.Format("Match({0},{1})", Index, Length);
26 | }
27 | }
28 | }
--------------------------------------------------------------------------------
/srm/Regex.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.IO;
4 | using System.Runtime.Serialization;
5 | using System.Runtime.Serialization.Formatters.Binary;
6 |
7 | namespace Microsoft.SRM
8 | {
9 | [Serializable]
10 | public class Regex
11 | {
12 | private static readonly CharSetSolver solver;
13 | private static readonly RegexToAutomatonConverter converter;
14 | static Regex()
15 | {
16 | solver = new CharSetSolver();
17 | converter = new RegexToAutomatonConverter(solver);
18 | }
19 |
20 | private IMatcher matcher;
21 |
22 | public Regex(string pattern) : this(pattern, RegexOptions.None) { }
23 |
24 | public Regex(string pattern, RegexOptions options)
25 | {
26 | var root = converter.ConvertToSymbolicRegex(pattern, options, keepAnchors: true);
27 | var partition = root.ComputeMinterms();
28 | if (partition.Length > 64)
29 | {
30 | //more than 64 bits needed to represent a set
31 | matcher = new SymbolicRegexBV(root, solver, converter.srBuilder, partition, options);
32 | }
33 | else
34 | {
35 | //enough to use 64 bits
36 | matcher = new SymbolicRegexUInt64(root, solver, converter.srBuilder, partition, options);
37 | }
38 | }
39 |
40 | ///
41 | /// Returns true iff the input string matches.
42 | /// given iput string
43 | /// start position in the input
44 | /// end position in the input, -1 means that the value is unspecified and taken to be input.Length-1
45 | ///
46 | public bool IsMatch(string input, int startat = 0, int endat = -1)
47 | => matcher.IsMatch(input, startat, endat);
48 |
49 | ///
50 | /// Returns all matches as pairs (startindex, length) in the input string.
51 | ///
52 | /// given iput string
53 | /// as soon as this many matches have been found the search terminates, 0 or negative value means that there is no bound, default is 0
54 | /// start position in the input, default is 0
55 | /// end position in the input, -1 means that the value is unspecified and taken to be input.Length-1
56 | public List Matches(string input, int limit = 0, int startat = 0, int endat = -1)
57 | => matcher.Matches(input, limit, startat, endat);
58 |
59 | ///
60 | /// Serialize this symbolic regex matcher to the given file.
61 | /// If formatter is null then an instance of
62 | /// System.Runtime.Serialization.Formatters.Binary.BinaryFormatter is used.
63 | ///
64 | /// file where the serialization is stored
65 | /// given formatter
66 | public void Serialize(string file, IFormatter formatter = null)
67 | {
68 | var stream = new FileStream(file, FileMode.Create, FileAccess.Write, FileShare.None);
69 | Serialize(stream, formatter);
70 | stream.Close();
71 | }
72 |
73 | ///
74 | /// Serialize this symbolic regex matcher to the given file.
75 | /// If formatter is null then an instance of
76 | /// System.Runtime.Serialization.Formatters.Binary.BinaryFormatter is used.
77 | ///
78 | /// stream where the serialization is stored
79 | /// given formatter
80 | public void Serialize(Stream stream, IFormatter formatter = null)
81 | {
82 | if (formatter == null)
83 | formatter = new BinaryFormatter();
84 | formatter.Serialize(stream, this);
85 | }
86 |
87 | ///
88 | /// Deserialize the matcher of a symblic regex from the given file using the given formatter.
89 | /// If formatter is null then an instance of
90 | /// System.Runtime.Serialization.Formatters.Binary.BinaryFormatter is used.
91 | ///
92 | /// source file of the serialized matcher
93 | /// given formatter
94 | ///
95 | public static Regex Deserialize(string file, IFormatter formatter = null)
96 | {
97 | Stream stream = new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.Read);
98 | Regex matcher = Deserialize(stream, formatter);
99 | stream.Close();
100 | return matcher;
101 | }
102 |
103 | ///
104 | /// Deserialize the matcher of a symblic regex from the given stream using the given formatter.
105 | /// If formatter is null then an instance of
106 | /// System.Runtime.Serialization.Formatters.Binary.BinaryFormatter is used.
107 | ///
108 | /// source stream of the serialized matcher
109 | /// given formatter
110 | ///
111 | public static Regex Deserialize(Stream stream, IFormatter formatter = null)
112 | {
113 | if (formatter == null)
114 | formatter = new BinaryFormatter();
115 | Regex matcher = (Regex)formatter.Deserialize(stream);
116 | return matcher;
117 | }
118 | }
119 | }
120 |
--------------------------------------------------------------------------------
/srm/RegexOptions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Diagnostics;
3 |
4 | namespace Microsoft.SRM
5 | {
6 | [Serializable]
7 | public struct RegexOptions
8 | {
9 | // .NET compatible options
10 | public static RegexOptions None = new RegexOptions(0);
11 | public static RegexOptions IgnoreCase = new RegexOptions(1);
12 | public static RegexOptions Multiline = new RegexOptions(2);
13 | public static RegexOptions Singleline = new RegexOptions(4);
14 | public static RegexOptions IgnorePatternWhitespace = new RegexOptions(8);
15 | public static RegexOptions CultureInvariant = new RegexOptions(16);
16 | public static RegexOptions ECMAScript = new RegexOptions(32);
17 |
18 | // SRM specific options
19 | public static RegexOptions Vectorize = new RegexOptions(1024);
20 |
21 | private int value;
22 |
23 | private RegexOptions(int value)
24 | {
25 | this.value = value;
26 | }
27 |
28 | public static RegexOptions operator|(RegexOptions left, RegexOptions right)
29 | {
30 | return new RegexOptions(left.value | right.value);
31 | }
32 |
33 | public static RegexOptions operator^(RegexOptions left, RegexOptions right)
34 | {
35 | return new RegexOptions(left.value ^ right.value);
36 | }
37 |
38 | public static RegexOptions operator&(RegexOptions left, RegexOptions right)
39 | {
40 | return new RegexOptions(left.value & right.value);
41 | }
42 |
43 | public static implicit operator int(RegexOptions ourOptions)
44 | {
45 | return ourOptions.value;
46 | }
47 |
48 | public static implicit operator System.Text.RegularExpressions.RegexOptions(RegexOptions ourOptions)
49 | {
50 | var theirOptions = System.Text.RegularExpressions.RegexOptions.None;
51 | var handledOptions = None;
52 | Action handleEquivalentOption = (o, t) =>
53 | {
54 | if ((ourOptions & o) != 0)
55 | {
56 | theirOptions |= t;
57 | handledOptions |= o;
58 | }
59 | };
60 | Action ignoreOption = t =>
61 | {
62 | if ((ourOptions & t) != 0)
63 | {
64 | handledOptions |= t;
65 | }
66 | };
67 | handleEquivalentOption(IgnoreCase, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
68 | handleEquivalentOption(Multiline, System.Text.RegularExpressions.RegexOptions.Multiline);
69 | handleEquivalentOption(Singleline, System.Text.RegularExpressions.RegexOptions.Singleline);
70 | handleEquivalentOption(IgnorePatternWhitespace, System.Text.RegularExpressions.RegexOptions.IgnorePatternWhitespace);
71 | handleEquivalentOption(CultureInvariant, System.Text.RegularExpressions.RegexOptions.CultureInvariant);
72 | handleEquivalentOption(ECMAScript, System.Text.RegularExpressions.RegexOptions.ECMAScript);
73 | ignoreOption(Vectorize);
74 | Debug.Assert(handledOptions == ourOptions);
75 | return theirOptions;
76 | }
77 |
78 | public static implicit operator RegexOptions(System.Text.RegularExpressions.RegexOptions theirOptions)
79 | {
80 | var ourOptions = None;
81 | var handledOptions = System.Text.RegularExpressions.RegexOptions.None;
82 | Action handleEquivalentOption = (o, t) =>
83 | {
84 | if ((theirOptions & t) != 0)
85 | {
86 | ourOptions |= o;
87 | handledOptions |= t;
88 | }
89 | };
90 | Action ignoreOption = t =>
91 | {
92 | if ((theirOptions & t) != 0)
93 | {
94 | handledOptions |= t;
95 | }
96 | };
97 | handleEquivalentOption(IgnoreCase, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
98 | handleEquivalentOption(Multiline, System.Text.RegularExpressions.RegexOptions.Multiline);
99 | handleEquivalentOption(Singleline, System.Text.RegularExpressions.RegexOptions.Singleline);
100 | handleEquivalentOption(IgnorePatternWhitespace, System.Text.RegularExpressions.RegexOptions.IgnorePatternWhitespace);
101 | handleEquivalentOption(CultureInvariant, System.Text.RegularExpressions.RegexOptions.CultureInvariant);
102 | handleEquivalentOption(ECMAScript, System.Text.RegularExpressions.RegexOptions.ECMAScript);
103 | ignoreOption(System.Text.RegularExpressions.RegexOptions.RightToLeft);
104 | ignoreOption(System.Text.RegularExpressions.RegexOptions.Compiled);
105 | ignoreOption(System.Text.RegularExpressions.RegexOptions.ExplicitCapture);
106 | Debug.Assert(handledOptions == theirOptions);
107 | return ourOptions;
108 | }
109 | }
110 | }
--------------------------------------------------------------------------------
/srm/algebras/BDD.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 |
4 | namespace Microsoft.SRM
5 | {
6 | ///
7 | /// Represents a Binary Decision Diagram.
8 | ///
9 | public class BDD
10 | {
11 | ///
12 | /// The encoding of the set for lower ordinals for the case when the current bit is 1.
13 | /// The value is null iff IsLeaf is true.
14 | ///
15 | public readonly BDD One;
16 |
17 | ///
18 | /// The encoding of the set for lower ordinals for the case when the current bit is 0.
19 | /// The value is null iff IsLeaf is true.
20 | ///
21 | public readonly BDD Zero;
22 |
23 |
24 | public readonly BDDAlgebra algebra;
25 |
26 | ///
27 | /// Ordinal of this bit if nonleaf
28 | ///
29 | public readonly int Ordinal;
30 |
31 | internal BDD(BDDAlgebra algebra, int ordinal, BDD one, BDD zero)
32 | {
33 | this.One = one;
34 | this.Zero = zero;
35 | this.Ordinal = ordinal;
36 | this.algebra = algebra;
37 | }
38 |
39 | ///
40 | /// True iff the node is a terminal (One and Zero are null).
41 | ///
42 | public bool IsLeaf
43 | {
44 | get { return One == null; }
45 | }
46 |
47 | ///
48 | /// True iff the set is full.
49 | ///
50 | public bool IsFull
51 | {
52 | get { return this == algebra.True; }
53 | }
54 |
55 | ///
56 | /// True iff the set is empty.
57 | ///
58 | public bool IsEmpty
59 | {
60 | get { return this == algebra.False; }
61 | }
62 |
63 | ///
64 | /// Counts the number of nodes (both terminals and nonterminals) in the BDD.
65 | ///
66 | public int CountNodes()
67 | {
68 | if (IsLeaf)
69 | return 1;
70 |
71 | HashSet visited = new HashSet();
72 | Stack stack = new Stack();
73 | stack.Push(this);
74 | visited.Add(this);
75 | while (stack.Count > 0)
76 | {
77 | BDD a = stack.Pop();
78 | if (!a.IsLeaf)
79 | {
80 | if (visited.Add(a.One))
81 | stack.Push(a.One);
82 | if (visited.Add(a.Zero))
83 | stack.Push(a.Zero);
84 | }
85 | }
86 | return visited.Count;
87 | }
88 |
89 | ///
90 | /// Gets the lexicographically minimum bitvector in this BDD as a ulong.
91 | /// Assumes that this BDD is nonempty and that its ordinal is at most 63.
92 | ///
93 | public ulong GetMin()
94 | {
95 | var set = this;
96 |
97 | if (set.IsFull)
98 | return (ulong)0;
99 |
100 | if (set.IsEmpty)
101 | throw new AutomataException(AutomataExceptionKind.SetIsEmpty);
102 |
103 | if (set.Ordinal > 63)
104 | throw new AutomataException(AutomataExceptionKind.OrdinalIsTooLarge);
105 |
106 | ulong res = 0;
107 |
108 | while (!set.IsLeaf)
109 | {
110 | if (set.Zero.IsEmpty) //the bit must be set to 1
111 | {
112 | res = res | ((ulong)1 << set.Ordinal);
113 | set = set.One;
114 | }
115 | else
116 | set = set.Zero;
117 | }
118 |
119 | return res;
120 | }
121 |
122 | public static BDD operator >>(BDD x, int k)
123 | {
124 | return x.algebra.ShiftRight(x, k);
125 | }
126 |
127 | public static BDD operator <<(BDD x, int k)
128 | {
129 | return x.algebra.ShiftLeft(x, k);
130 | }
131 |
132 | public static BDD operator &(BDD x, BDD y)
133 | {
134 | return x.algebra.MkAnd(x, y);
135 | }
136 |
137 | public static BDD operator |(BDD x, BDD y)
138 | {
139 | return x.algebra.MkOr(x, y);
140 | }
141 |
142 | public static BDD operator !(BDD x)
143 | {
144 | return x.algebra.MkNot(x);
145 | }
146 | }
147 | }
148 |
149 |
--------------------------------------------------------------------------------
/srm/algebras/BV.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Runtime.CompilerServices;
4 | using System.Runtime.Serialization;
5 |
6 |
7 | namespace Microsoft.SRM
8 | {
9 | ///
10 | /// Represents a bitvector
11 | ///
12 | [Serializable]
13 | public class BV : IComparable, ISerializable
14 | {
15 | internal ulong first;
16 | internal ulong[] more;
17 |
18 | ///
19 | /// Constructs a bitvector
20 | ///
21 | /// first 64 bits
22 | /// remaining bits in 64 increments
23 | public BV(ulong first, params ulong[] more)
24 | {
25 | this.first = first;
26 | this.more = more;
27 | }
28 |
29 | ///
30 | /// Bitwise AND
31 | ///
32 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
33 | public static BV operator &(BV x, BV y)
34 | {
35 | int k = (x.more.Length <= y.more.Length ? x.more.Length : y.more.Length);
36 | var first = x.first & y.first;
37 | var more = new ulong[k];
38 | for (int i = 0; i < k; i++)
39 | {
40 | more[i] = x.more[i] & y.more[i];
41 | }
42 | return new BV(first, more);
43 | }
44 |
45 | ///
46 | /// Bitwise OR
47 | ///
48 | public static BV operator |(BV x, BV y)
49 | {
50 | int k = (x.more.Length <= y.more.Length ? x.more.Length : y.more.Length);
51 | var first = x.first | y.first;
52 | var more = new ulong[k];
53 | for (int i = 0; i < k; i++)
54 | {
55 | more[i] = x.more[i] | y.more[i];
56 | }
57 | return new BV(first, more);
58 | }
59 |
60 | ///
61 | /// Bitwise XOR
62 | ///
63 | public static BV operator ^(BV x, BV y)
64 | {
65 | int k = (x.more.Length <= y.more.Length ? x.more.Length : y.more.Length);
66 | var first = x.first ^ y.first;
67 | var more = new ulong[x.more.Length];
68 | for (int i = 0; i < x.more.Length; i++)
69 | {
70 | more[i] = x.more[i] ^ y.more[i];
71 | }
72 | return new BV(first, more);
73 | }
74 |
75 | ///
76 | /// Bitwise NOT
77 | ///
78 | public static BV operator ~(BV x)
79 | {
80 | var first_compl = ~x.first;
81 | var more_compl = Array.ConvertAll(x.more, n => ~n);
82 | var compl = new BV(first_compl, more_compl);
83 | return compl;
84 | }
85 |
86 | ///
87 | /// less than
88 | ///
89 | public static bool operator <(BV x, BV y)
90 | {
91 | return x.CompareTo(y) < 0;
92 | }
93 |
94 | ///
95 | /// greater than
96 | ///
97 | public static bool operator >(BV x, BV y)
98 | {
99 | return x.CompareTo(y) > 0;
100 | }
101 |
102 | ///
103 | /// less than or equal
104 | ///
105 | public static bool operator <=(BV x, BV y)
106 | {
107 | return x.CompareTo(y) <= 0;
108 | }
109 |
110 | ///
111 | /// greater than or equal
112 | ///
113 | public static bool operator >=(BV x, BV y)
114 | {
115 | return x.CompareTo(y) >= 0;
116 | }
117 |
118 | ///
119 | /// Shows the serialized representation
120 | ///
121 | public override string ToString()
122 | {
123 | return Serialize();
124 | }
125 |
126 | public override bool Equals(object obj)
127 | {
128 | BV that = obj as BV;
129 | if (that == null)
130 | return false;
131 | if (this == that)
132 | return true;
133 | if (this.first != that.first)
134 | return false;
135 | if (that.more.Length != this.more.Length)
136 | return false;
137 | for (int i = 0; i < more.Length; i++)
138 | {
139 | if (more[i] != that.more[i])
140 | return false;
141 | }
142 | return true;
143 | }
144 |
145 | public override int GetHashCode()
146 | {
147 | int h = first.GetHashCode();
148 | for (int i = 0; i < more.Length; i++)
149 | {
150 | h = (h << 5) ^ more[i].GetHashCode();
151 | }
152 | return h;
153 | }
154 |
155 | public int CompareTo(object obj)
156 | {
157 | BV that = obj as BV;
158 | if (that == null)
159 | return 1;
160 | else if (this.more.Length != that.more.Length)
161 | {
162 | return this.more.Length.CompareTo(that.more.Length);
163 | }
164 | else
165 | {
166 | int k = this.more.Length;
167 | if (k > 0)
168 | {
169 | int i = k - 1;
170 | while (i >= 0)
171 | {
172 | var comp = this.more[i].CompareTo(that.more[i]);
173 | if (comp == 0)
174 | i = i - 1;
175 | else
176 | return comp;
177 | }
178 | }
179 | return this.first.CompareTo(that.first);
180 | }
181 | }
182 |
183 | #region serialization
184 | ///
185 | /// Serialize
186 | ///
187 | public void GetObjectData(SerializationInfo info, StreamingContext context)
188 | {
189 | info.AddValue("bv", Serialize());
190 | }
191 | ///
192 | /// Deserialize
193 | ///
194 | public BV(SerializationInfo info, StreamingContext context)
195 | {
196 | var s = info.GetString("bv");
197 | Deserialize_Helper(s, out first, out more);
198 | }
199 |
200 | ///
201 | /// Serialize BV into a string of hexadecimal numerals, separated by '.',
202 | /// each numeral representing an unsigned 64-bit integer in hexadecimal using lowercase a-f
203 | ///
204 | ///
205 | public string Serialize()
206 | {
207 | string str = this.first.ToString("x") + "." + string.Join(".", Array.ConvertAll(this.more, x => x.ToString("x")));
208 | return str;
209 | }
210 |
211 | ///
212 | /// Deserialize BV from given string that was produced by Serialize
213 | ///
214 | /// BV in serialized form
215 | public static BV Deserialize(string s)
216 | {
217 | ulong first;
218 | ulong[] rest;
219 | Deserialize_Helper(s, out first, out rest);
220 | return new BV(first, rest);
221 | }
222 |
223 | private static void Deserialize_Helper(string s, out ulong first, out ulong[] rest)
224 | {
225 | int i = s.IndexOf('.');
226 | first = ulong.Parse(s.Substring(0, i), System.Globalization.NumberStyles.HexNumber);
227 | rest = Array.ConvertAll(s.Substring(i + 1).Split('.'), x => ulong.Parse(x, System.Globalization.NumberStyles.HexNumber));
228 | }
229 | #endregion
230 | }
231 | }
232 |
--------------------------------------------------------------------------------
/srm/algebras/BV64Algebra.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Runtime.CompilerServices;
4 | using System.Runtime.Serialization;
5 |
6 | namespace Microsoft.SRM
7 | {
8 | ///
9 | /// Bit vector algebra of up to 64 bits
10 | ///
11 | [Serializable]
12 | public class BV64Algebra : BVAlgebraBase, ICharAlgebra, ISerializable
13 | {
14 | [NonSerialized]
15 | MintermGenerator mtg;
16 | [NonSerialized]
17 | ulong zero = 0;
18 | [NonSerialized]
19 | ulong all;
20 | [NonSerialized]
21 | internal ulong[] atoms;
22 |
23 | public ulong ComputeDomainSize(ulong set)
24 | {
25 | int size = 0;
26 | for (int i = 0; i < atoms.Length; i++)
27 | {
28 | if (IsSatisfiable(set & atoms[i]))
29 | size += partition[i].Count;
30 | }
31 | return (ulong)size;
32 | }
33 |
34 | public static BV64Algebra Create(CharSetSolver solver, BDD[] minterms)
35 | {
36 | if (minterms.Length > 64)
37 | throw new AutomataException(AutomataExceptionKind.NrOfMintermsCanBeAtMost64);
38 | var dtree = DecisionTree.Create(solver, minterms);
39 | var partitionBase = Array.ConvertAll(minterms, m => solver.ToRanges(m));
40 | var partition = Array.ConvertAll(partitionBase, p => new IntervalSet(p));
41 | return new BV64Algebra(dtree, partition);
42 | }
43 |
44 | private BV64Algebra(DecisionTree dtree, IntervalSet[] partition) : base(dtree, partition, partition.Length)
45 | {
46 | this.all = ulong.MaxValue >> (64 - this.nrOfBits);
47 | this.mtg = new MintermGenerator(this);
48 | this.atoms = new ulong[this.nrOfBits];
49 | for (int i = 0; i < this.nrOfBits; i++)
50 | {
51 | atoms[i] = ((ulong)1) << i;
52 | }
53 | }
54 |
55 | ///
56 | /// Create a variant of the algebra where each minterms is replaced with a singleton set starting from '0'
57 | /// Used for testing purposes.
58 | ///
59 | internal BV64Algebra ReplaceMintermsWithVisibleCharacters()
60 | {
61 | Func f = x =>
62 | {
63 | int k;
64 | if (x <= 26)
65 | k = ('A' + (x - 1));
66 | else if (x <= 52)
67 | k = ('a' + (x - 27));
68 | else if (x <= 62)
69 | k = ('0' + (x - 53));
70 | else
71 | k = '=';
72 | return k;
73 | };
74 | var simplified_partition = new IntervalSet[this.partition.Length];
75 | int[] precomp = new int[256];
76 | for (int i=1; i < simplified_partition.Length; i++)
77 | {
78 | int k = f(i);
79 | simplified_partition[i] = new IntervalSet(new Tuple((uint)k,(uint)k));
80 | precomp[k] = i;
81 | }
82 | var zeroIntervals = new List>();
83 | int lower = 0;
84 | int upper = 0;
85 | for (int i = 1; i <= 'z' + 1; i++)
86 | {
87 | if (precomp[i] == 0)
88 | {
89 | if (upper == i - 1)
90 | upper += 1;
91 | else
92 | {
93 | zeroIntervals.Add(new Tuple((uint)lower, (uint)upper));
94 | lower = i;
95 | upper = i;
96 | }
97 | }
98 | }
99 | zeroIntervals.Add(new Tuple((uint)lower, 0xFFFF));
100 | simplified_partition[0] = new IntervalSet(zeroIntervals.ToArray());
101 |
102 | var simplified_dtree = new DecisionTree(precomp, new DecisionTree.BST(0, null, null));
103 | return new BV64Algebra(simplified_dtree, simplified_partition);
104 | }
105 |
106 | public ulong False
107 | {
108 | get
109 | {
110 | return zero;
111 | }
112 | }
113 |
114 | public bool IsExtensional
115 | {
116 | get
117 | {
118 | return true;
119 | }
120 | }
121 |
122 | public ulong True
123 | {
124 | get
125 | {
126 | return all;
127 | }
128 | }
129 |
130 | public BitWidth Encoding
131 | {
132 | get
133 | {
134 | throw new NotSupportedException();
135 | }
136 | }
137 |
138 | public CharSetSolver CharSetProvider
139 | {
140 | get
141 | {
142 | throw new NotSupportedException();
143 | }
144 | }
145 |
146 | public bool AreEquivalent(ulong predicate1, ulong predicate2)
147 | {
148 | return predicate1 == predicate2;
149 | }
150 |
151 | public IEnumerable> GenerateMinterms(params ulong[] constraints)
152 | {
153 | return this.mtg.GenerateMinterms(constraints);
154 | }
155 |
156 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
157 | public bool IsSatisfiable(ulong predicate)
158 | {
159 | return predicate != zero;
160 | }
161 |
162 | public ulong MkAnd(params ulong[] predicates)
163 | {
164 | var and = all;
165 | for (int i = 0; i < predicates.Length; i++)
166 | {
167 | and = and & predicates[i];
168 | if (and == zero)
169 | return zero;
170 | }
171 | return and;
172 | }
173 |
174 | public ulong MkAnd(IEnumerable predicates)
175 | {
176 | throw new NotImplementedException();
177 | }
178 |
179 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
180 | public ulong MkAnd(ulong predicate1, ulong predicate2)
181 | {
182 | return predicate1 & predicate2;
183 | }
184 |
185 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
186 | public ulong MkDiff(ulong predicate1, ulong predicate2)
187 | {
188 | return predicate1 & ~predicate2;
189 | }
190 |
191 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
192 | public ulong MkNot(ulong predicate)
193 | {
194 | return all & ~predicate;
195 | }
196 |
197 | public ulong MkOr(IEnumerable predicates)
198 | {
199 | var res = zero;
200 | foreach (var p in predicates)
201 | {
202 | res = res | p;
203 | if (res == all)
204 | return all;
205 | }
206 | return res;
207 | }
208 |
209 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
210 | public ulong MkOr(ulong predicate1, ulong predicate2)
211 | {
212 | return predicate1 | predicate2;
213 | }
214 |
215 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
216 | public ulong MkSymmetricDifference(ulong p1, ulong p2)
217 | {
218 | return (p1 ^ p2);
219 | }
220 |
221 | public ulong MkRangeConstraint(char lower, char upper, bool caseInsensitive = false)
222 | {
223 | throw new NotSupportedException();
224 | }
225 |
226 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
227 | public ulong MkCharConstraint(char c, bool caseInsensitive = false)
228 | {
229 | if (caseInsensitive == true)
230 | throw new AutomataException(AutomataExceptionKind.NotSupported);
231 | return this.atoms[this.dtree.GetId(c)];
232 | }
233 |
234 | ///
235 | /// Assumes that set is a union of some minterms (or empty).
236 | /// If null then 0 is returned.
237 | ///
238 | public ulong ConvertFromCharSet(BDD set)
239 | {
240 | if (set == null)
241 | return zero;
242 | var alg = set.algebra;
243 | ulong res = this.zero;
244 | for (int i = 0; i < partition.Length; i++)
245 | {
246 | BDD bdd_i = partition[i].AsBDD(alg);
247 | var conj = alg.MkAnd(bdd_i, set);
248 | if (alg.IsSatisfiable(conj))
249 | {
250 | res = res | atoms[i];
251 | }
252 | }
253 | return res;
254 | }
255 |
256 | ///
257 | /// Pretty print the bitvector predicate as a character class.
258 | ///
259 | /// given bitvector predicate
260 | public string PrettyPrint(ulong bv)
261 | {
262 | var lab1 = PrettyPrintHelper(bv, false);
263 | var lab2 = PrettyPrintHelper(~bv, true);
264 | if (lab1.Length <= lab2.Length)
265 | return lab1;
266 | else
267 | return lab2;
268 |
269 | }
270 |
271 | string PrettyPrintHelper(ulong bv, bool complement)
272 | {
273 | List sets = new List();
274 | for (int i = 0; i < atoms.Length; i++)
275 | if (IsSatisfiable(bv & atoms[i]))
276 | sets.Add(partition[i]);
277 | var set = IntervalSet.Merge(sets);
278 | var res = set.ToCharacterClass(complement);
279 | return res;
280 | }
281 |
282 | public BDD ConvertToCharSet(BDDAlgebra solver, ulong pred)
283 | {
284 | BDD res = solver.False;
285 | if (!pred.Equals(this.zero))
286 | {
287 | for (int i = 0; i < atoms.Length; i++)
288 | {
289 | //construct the union of the corresponding atoms
290 | if (!(pred & atoms[i]).Equals(this.zero))
291 | {
292 | BDD bdd_i = partition[i].AsBDD(solver);
293 | res = solver.MkOr(res, bdd_i);
294 | }
295 | }
296 | }
297 | return res;
298 | }
299 |
300 | public ulong[] GetPartition()
301 | {
302 | return atoms;
303 | }
304 |
305 | public IEnumerable GenerateAllCharacters(ulong set)
306 | {
307 | for (int i = 0; i < atoms.Length; i++)
308 | {
309 | if (IsSatisfiable(atoms[i] & set))
310 | foreach (uint elem in partition[i].Enumerate())
311 | yield return (char)elem;
312 | }
313 | }
314 |
315 | #region serialization
316 | ///
317 | /// Serialize
318 | ///
319 | public void GetObjectData(SerializationInfo info, StreamingContext context)
320 | {
321 | info.AddValue("d", dtree);
322 | info.AddValue("p", SerializePartition());
323 | }
324 |
325 | ///
326 | /// Deserialize
327 | ///
328 | public BV64Algebra(SerializationInfo info, StreamingContext context)
329 | : this((DecisionTree)info.GetValue("d", typeof(DecisionTree)),
330 | DeserializePartition(info.GetString("p")))
331 | {
332 | }
333 |
334 | ///
335 | /// Serialize s as a hexadecimal numeral using lowercase letters
336 | ///
337 | /// given predicate
338 | public string SerializePredicate(ulong s)
339 | {
340 | return s.ToString("x");
341 | }
342 |
343 | ///
344 | /// Deserialize s from a string created by SerializePredicate
345 | ///
346 | /// given hexadecimal numeral representation
347 | public ulong DeserializePredicate(string s)
348 | {
349 | return ulong.Parse(s, System.Globalization.NumberStyles.HexNumber);
350 | }
351 | #endregion
352 |
353 | public ulong MkCharPredicate(string name, ulong pred)
354 | {
355 | throw new NotImplementedException();
356 | }
357 |
358 | }
359 | }
--------------------------------------------------------------------------------
/srm/algebras/BVAlgebra.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Runtime.CompilerServices;
4 | using System.Runtime.Serialization;
5 |
6 | namespace Microsoft.SRM
7 | {
8 | public abstract class BVAlgebraBase
9 | {
10 | internal DecisionTree dtree;
11 | internal IntervalSet[] partition;
12 | internal int nrOfBits;
13 |
14 | internal BVAlgebraBase(DecisionTree dtree, IntervalSet[] partition, int nrOfBits)
15 | {
16 | this.dtree = dtree;
17 | this.partition = partition;
18 | this.nrOfBits = nrOfBits;
19 | }
20 |
21 | protected string SerializePartition()
22 | {
23 | string s = "";
24 | for (int i = 0; i < partition.Length; i++)
25 | {
26 | if (i > 0)
27 | s += ";";
28 | s += partition[i].Serialize();
29 | }
30 | return s;
31 | }
32 |
33 | protected static IntervalSet[] DeserializePartition(string s)
34 | {
35 | var blocks = s.Split(';');
36 | var intervalSets = Array.ConvertAll(blocks, IntervalSet.Parse);
37 | return intervalSets;
38 | }
39 | }
40 | ///
41 | /// Bit vector algebra
42 | ///
43 | [Serializable]
44 | public class BVAlgebra : BVAlgebraBase, ICharAlgebra, ISerializable
45 | {
46 | [NonSerialized]
47 | MintermGenerator mtg;
48 | [NonSerialized]
49 | BV zero;
50 | [NonSerialized]
51 | BV ones;
52 | [NonSerialized]
53 | ulong[] all0;
54 | [NonSerialized]
55 | ulong[] all1;
56 | [NonSerialized]
57 | internal BV[] atoms;
58 |
59 | public ulong ComputeDomainSize(BV set)
60 | {
61 | int size = 0;
62 | for (int i = 0; i < atoms.Length; i++)
63 | {
64 | if (IsSatisfiable(set & atoms[i]))
65 | size += partition[i].Count;
66 | }
67 | return (ulong)size;
68 | }
69 |
70 | public static BVAlgebra Create(CharSetSolver solver, BDD[] minterms)
71 | {
72 | var dtree = DecisionTree.Create(solver, minterms);
73 | var partitionBase = Array.ConvertAll(minterms, m => solver.ToRanges(m));
74 | var partition = Array.ConvertAll(partitionBase, p => new IntervalSet(p));
75 | return new BVAlgebra(dtree, partition);
76 | }
77 |
78 | private BVAlgebra(DecisionTree dtree, IntervalSet[] partition) : base(dtree, partition, partition.Length)
79 | {
80 | var K = (nrOfBits - 1) / 64;
81 | int last = nrOfBits % 64;
82 | ulong lastMask = (last == 0 ? ulong.MaxValue : (((ulong)1 << last) - 1));
83 | all0 = new ulong[K];
84 | all1 = new ulong[K];
85 | for (int i = 0; i < K; i++)
86 | {
87 | all0[0] = 0;
88 | if (i < K - 1)
89 | {
90 | all1[i] = ulong.MaxValue;
91 | }
92 | else
93 | {
94 | all1[i] = lastMask;
95 | }
96 | }
97 | this.zero = new BV(0, all0);
98 | this.ones = new BV((K == 0 ? lastMask : ulong.MaxValue), all1);
99 | this.mtg = new MintermGenerator(this);
100 | this.atoms = new BV[nrOfBits];
101 | for (int i = 0; i < nrOfBits; i++)
102 | {
103 | atoms[i] = MkBV(i);
104 | }
105 | }
106 |
107 | public BV False
108 | {
109 | get
110 | {
111 | return zero;
112 | }
113 | }
114 |
115 | public bool IsExtensional
116 | {
117 | get
118 | {
119 | return true;
120 | }
121 | }
122 |
123 | public BV True
124 | {
125 | get
126 | {
127 | return ones;
128 | }
129 | }
130 |
131 | public BitWidth Encoding
132 | {
133 | get
134 | {
135 | throw new NotSupportedException();
136 | }
137 | }
138 |
139 | public CharSetSolver CharSetProvider
140 | {
141 | get
142 | {
143 | throw new NotSupportedException();
144 | }
145 | }
146 |
147 | public bool AreEquivalent(BV predicate1, BV predicate2)
148 | {
149 | return predicate1.Equals(predicate2);
150 | }
151 |
152 | public IEnumerable> GenerateMinterms(params BV[] constraints)
153 | {
154 | return this.mtg.GenerateMinterms(constraints);
155 | }
156 |
157 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
158 | public bool IsSatisfiable(BV predicate)
159 | {
160 | return !predicate.Equals(zero);
161 | }
162 |
163 | public BV MkAnd(params BV[] predicates)
164 | {
165 | var and = ones;
166 | for (int i = 0; i < predicates.Length; i++)
167 | {
168 | and = and & predicates[i];
169 | if (and.Equals(zero))
170 | return zero;
171 | }
172 | return and;
173 | }
174 |
175 | public BV MkAnd(IEnumerable predicates)
176 | {
177 | throw new NotImplementedException();
178 | }
179 |
180 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
181 | public BV MkAnd(BV predicate1, BV predicate2)
182 | {
183 | return predicate1 & predicate2;
184 | }
185 |
186 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
187 | public BV MkDiff(BV predicate1, BV predicate2)
188 | {
189 | return predicate1 & ~predicate2;
190 | }
191 |
192 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
193 | public BV MkNot(BV predicate)
194 | {
195 | return ones & ~predicate;
196 | }
197 |
198 | public BV MkOr(IEnumerable predicates)
199 | {
200 | var res = zero;
201 | foreach (var p in predicates)
202 | {
203 | res = res | p;
204 | if (res.Equals(ones))
205 | return ones;
206 | }
207 | return res;
208 | }
209 |
210 | public BV MkOr(BV predicate1, BV predicate2)
211 | {
212 | return predicate1 | predicate2;
213 | }
214 |
215 | public BV MkBV(params int[] truebits)
216 | {
217 | ulong first = 0;
218 | var more = new ulong[this.all0.Length];
219 | for (int i = 0; i < truebits.Length; i++)
220 | {
221 | int b = truebits[i];
222 | if (b >= nrOfBits || b < 0)
223 | throw new AutomataException(AutomataExceptionKind.BitOutOfRange);
224 | int k = b / 64;
225 | int j = b % 64;
226 | if (k == 0)
227 | first = first | ((ulong)1 << j);
228 | else
229 | more[k-1] = more[k-1] | ((ulong)1 << j);
230 | }
231 | var bv = new BV(first, more);
232 | return bv;
233 | }
234 |
235 | public BV MkRangeConstraint(char lower, char upper, bool caseInsensitive = false)
236 | {
237 | throw new NotSupportedException();
238 | }
239 |
240 | public BV MkCharConstraint(char c, bool caseInsensitive = false)
241 | {
242 | if (caseInsensitive == true)
243 | throw new AutomataException(AutomataExceptionKind.NotSupported);
244 |
245 | int i = this.dtree.GetId(c);
246 | return this.atoms[i];
247 | }
248 |
249 | ///
250 | /// Assumes that set is a union of some minterms (or empty).
251 | /// If null then null is returned.
252 | ///
253 | public BV ConvertFromCharSet(BDD set)
254 | {
255 | if (set == null)
256 | return null;
257 | var alg = set.algebra;
258 | BV res = this.zero;
259 | for (int i = 0; i < partition.Length; i++)
260 | {
261 | BDD bdd_i = partition[i].AsBDD(alg);
262 | var conj = alg.MkAnd(bdd_i, set);
263 | if (alg.IsSatisfiable(conj))
264 | {
265 | res = res | atoms[i];
266 | }
267 | }
268 | return res;
269 | }
270 |
271 | public BDD ConvertToCharSet(BDDAlgebra solver, BV pred)
272 | {
273 | BDD res = solver.False;
274 | if (!pred.Equals(this.zero))
275 | {
276 | for (int i = 0; i < atoms.Length; i++)
277 | {
278 | //construct the union of the corresponding atoms
279 | if (!(pred & atoms[i]).Equals(this.zero))
280 | {
281 | BDD bdd_i = partition[i].AsBDD(solver);
282 | res = solver.MkOr(res, bdd_i);
283 | }
284 | }
285 | }
286 | return res;
287 | }
288 |
289 | public BV[] GetPartition()
290 | {
291 | return atoms;
292 | }
293 |
294 | public IEnumerable GenerateAllCharacters(BV set)
295 | {
296 | for (int i = 0; i < atoms.Length; i++)
297 | {
298 | if (IsSatisfiable(atoms[i] & set))
299 | foreach (uint elem in partition[i].Enumerate())
300 | yield return (char)elem;
301 | }
302 | }
303 |
304 | #region serialization
305 | ///
306 | /// Serialize
307 | ///
308 | public void GetObjectData(SerializationInfo info, StreamingContext context)
309 | {
310 | info.AddValue("d", dtree);
311 | info.AddValue("p", SerializePartition());
312 | }
313 |
314 | ///
315 | /// Deserialize
316 | ///
317 | public BVAlgebra(SerializationInfo info, StreamingContext context)
318 | : this((DecisionTree)info.GetValue("d", typeof(DecisionTree)),
319 | DeserializePartition(info.GetString("p")))
320 | {
321 | }
322 |
323 | ///
324 | /// calls bv.Serialize()
325 | ///
326 | public string SerializePredicate(BV bv)
327 | {
328 | return bv.Serialize();
329 | }
330 |
331 | ///
332 | /// calls BV.Deserialize(s)
333 | ///
334 | public BV DeserializePredicate(string s)
335 | {
336 | return BV.Deserialize(s);
337 | }
338 | #endregion
339 |
340 | public BV MkCharPredicate(string name, BV pred)
341 | {
342 | throw new NotImplementedException();
343 | }
344 | }
345 | }
346 |
--------------------------------------------------------------------------------
/srm/algebras/CharSetSolver.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | //using RestrictKeyType = System.Int64;
4 | using System.IO;
5 | using System.Text.RegularExpressions;
6 |
7 | namespace Microsoft.SRM
8 | {
9 | ///
10 | /// Provides functionality to build character sets, to perform boolean operations over character sets,
11 | /// and to construct an SFA over character sets from a regex.
12 | /// Character sets are represented by bitvector sets.
13 | ///
14 | public class CharSetSolver : BDDAlgebra, ICharAlgebra
15 | {
16 |
17 | int _bw;
18 |
19 | public BitWidth Encoding
20 | {
21 | get { return (BitWidth)_bw; }
22 | }
23 |
24 | ///
25 | /// Construct the solver for BitWidth.BV16
26 | ///
27 | public CharSetSolver() : this(BitWidth.BV16)
28 | {
29 | }
30 |
31 | ///
32 | /// Construct a character set solver for the given character encoding (nr of bits).
33 | ///
34 | public CharSetSolver(BitWidth bits) : base()
35 | {
36 | if (!CharacterEncodingTool.IsSpecified(bits))
37 | throw new AutomataException(AutomataExceptionKind.CharacterEncodingIsUnspecified);
38 | _bw = (int)bits;
39 | }
40 |
41 | IgnoreCaseTransformer _IgnoreCase = null;
42 | IgnoreCaseTransformer IgnoreCase
43 | {
44 | get
45 | {
46 | if (_IgnoreCase == null)
47 | _IgnoreCase = new IgnoreCaseTransformer(this);
48 | return _IgnoreCase;
49 | }
50 | }
51 |
52 | BDD[] charPredTable = new BDD[1 << 16];
53 |
54 | ///
55 | /// Make a character containing the given character c.
56 | /// If c is a lower case or upper case character and ignoreCase is true
57 | /// then add both the upper case and the lower case characters.
58 | ///
59 | public BDD MkCharConstraint(char c, bool ignoreCase = false)
60 | {
61 | int i = (int)c;
62 | if (charPredTable[i] == null)
63 | charPredTable[i] = MkSetFrom((uint)c, _bw - 1);
64 | if (ignoreCase)
65 | return IgnoreCase.Apply(charPredTable[i]);
66 | return charPredTable[i];
67 | }
68 |
69 | ///
70 | /// Make a CharSet from all the characters in the range from m to n.
71 | /// Returns the empty set if n is less than m
72 | ///
73 | public BDD MkCharSetFromRange(char m, char n)
74 | {
75 | return MkSetFromRange((uint)m, (uint)n, _bw-1);
76 | }
77 |
78 | ///
79 | /// Make a character set that is the union of the character sets of the given ranges.
80 | ///
81 | public BDD MkCharSetFromRanges(IEnumerable> ranges)
82 | {
83 | BDD res = False;
84 | foreach (var range in ranges)
85 | res = MkOr(res, MkSetFromRange(range.Item1, range.Item2, _bw -1));
86 | return res;
87 | }
88 |
89 | ///
90 | /// Make a character set of all the characters in the interval from c to d.
91 | /// If ignoreCase is true ignore cases for upper and lower case characters by including both versions.
92 | ///
93 | public BDD MkRangeConstraint(char c, char d, bool ignoreCase = false)
94 | {
95 | var res = MkSetFromRange((uint)c, (uint)d, _bw - 1);
96 | if (ignoreCase)
97 | res = IgnoreCase.Apply(res);
98 | return res;
99 | }
100 |
101 | ///
102 | /// Make a BDD encoding of k least significant bits of all the integers in the ranges
103 | ///
104 | internal BDD MkBddForIntRanges(IEnumerable ranges)
105 | {
106 | BDD bdd = False;
107 | foreach (var range in ranges)
108 | bdd = MkOr(bdd, MkSetFromRange((uint)range[0], (uint)range[1], _bw - 1));
109 | return bdd;
110 | }
111 |
112 | #region Serialializing and deserializing BDDs
113 |
114 | ///
115 | /// Represent the set as an integer array.
116 | /// Assumes that the bdd has less than 2^14 nodes and at most 16 variables.
117 | ///
118 | internal int[] SerializeCompact(BDD bdd)
119 | {
120 | //return SerializeBasedOnRanges(bdd);
121 | return SerializeCompact2(bdd);
122 | }
123 |
124 | ///
125 | /// Represent the set as an integer array.
126 | /// Assumes that the bdd has at most 2^14 nodes and at most 16 variables.
127 | ///
128 | int[] SerializeCompact2(BDD bdd)
129 | {
130 | // encode the bdd directly
131 | //
132 | // the element at index 0 is the false node
133 | // the element at index 1 is the true node
134 | // and entry at index i>1 is node i and has the structure
135 | // (ordinal trueNode falseNode)
136 | // where ordinal uses 4 bits and trueNode and falseNode each use 14 bits
137 | // Assumes that the bdd has less than 2^14 nodes and at most 16 variables.
138 | // BDD.False is represented by int[]{0}.
139 | // BDD.True is represented by int[]{0,0}.
140 | // The root of the BDD (Other than True or False) is node 2
141 |
142 | if (bdd.IsEmpty)
143 | return new int[] { 0 };
144 | if (bdd.IsFull)
145 | return new int[] { 0, 0 };
146 |
147 | int nrOfNodes = bdd.CountNodes();
148 |
149 | if (nrOfNodes > (1 << 14))
150 | throw new AutomataException(AutomataExceptionKind.CompactSerializationNodeLimitViolation);
151 |
152 | int[] res = new int[nrOfNodes];
153 |
154 |
155 | //here we know that bdd is neither empty nor full
156 | var done = new Dictionary();
157 | done[False] = 0;
158 | done[True] = 1;
159 |
160 | Stack stack = new Stack();
161 | stack.Push(bdd);
162 | done[bdd] = 2;
163 |
164 | int doneCount = 3;
165 |
166 | while (stack.Count > 0)
167 | {
168 | BDD b = stack.Pop();
169 | if (!done.ContainsKey(b.One))
170 | {
171 | done[b.One] = (doneCount++);
172 | stack.Push(b.One);
173 | }
174 | if (!done.ContainsKey(b.Zero))
175 | {
176 | done[b.Zero] = (doneCount++);
177 | stack.Push(b.Zero);
178 | }
179 | int bId = done[b];
180 | int fId = done[b.Zero];
181 | int tId = done[b.One];
182 |
183 | if (b.Ordinal > 15)
184 | throw new AutomataException(AutomataExceptionKind.CompactSerializationBitLimitViolation);
185 |
186 | res[bId] = (b.Ordinal << 28) | (tId << 14) | fId;
187 | }
188 | return res;
189 | }
190 |
191 | ///
192 | /// Recreates a BDD from an int array that has been created using SerializeCompact
193 | ///
194 | internal BDD DeserializeCompact(int[] arcs)
195 | {
196 | //return DeserializeBasedOnRanges(arcs);
197 | return DeserializeCompact2(arcs);
198 | }
199 |
200 | ///
201 | /// Recreates a BDD from an int array that has been created using SerializeCompact
202 | ///
203 | BDD DeserializeCompact2(int[] arcs)
204 | {
205 | if (arcs.Length == 1)
206 | return False;
207 | if (arcs.Length == 2)
208 | return True;
209 |
210 | //organized by order
211 | //note that all arcs are strictly increasing in levels
212 | var levels = new List[16];
213 |
214 | BDD[] bddMap = new BDD[arcs.Length];
215 | bddMap[0] = False;
216 | bddMap[1] = True;
217 |
218 | for (int i = 2; i < arcs.Length; i++)
219 | {
220 | int x = ((arcs[i] >> 28) & 0xF);
221 | if (levels[x] == null)
222 | levels[x] = new List();
223 | levels[x].Add(i);
224 | }
225 |
226 | //create the BDD nodes according to the levels x
227 | //this is to ensure proper internalization
228 | for (int x = 0; x < 16; x++)
229 | {
230 | if (levels[x] != null)
231 | {
232 | foreach (int i in levels[x])
233 | {
234 | int one = ((arcs[i] >> 14) & 0x3FFF);
235 | int zero = (arcs[i] & 0x3FFF);
236 | if (one > bddMap.Length || zero > bddMap.Length)
237 | throw new AutomataException(AutomataExceptionKind.CompactDeserializationError);
238 | var oneBranch = bddMap[one];
239 | var zeroBranch = bddMap[zero];
240 | var bdd = MkBvSet(x, oneBranch, zeroBranch);
241 | bddMap[i] = bdd;
242 | if (bdd.Ordinal <= bdd.One.Ordinal || bdd.Ordinal <= bdd.Zero.Ordinal)
243 | throw new AutomataException(AutomataExceptionKind.CompactDeserializationError);
244 | }
245 | }
246 | }
247 |
248 | return bddMap[2];
249 | }
250 | #endregion
251 |
252 | ///
253 | /// Identity function, returns s.
254 | ///
255 | public BDD ConvertFromCharSet(BDD s)
256 | {
257 | return s;
258 | }
259 |
260 | ///
261 | /// Returns this character set solver.
262 | ///
263 | public CharSetSolver CharSetProvider
264 | {
265 | get { return this; }
266 | }
267 |
268 | ///
269 | /// Returns pred.
270 | ///
271 | public BDD MkCharPredicate(string name, BDD pred)
272 | {
273 | return pred;
274 | }
275 |
276 | public IEnumerable GenerateAllCharacters(BDD bvSet, bool inRevereseOrder = false)
277 | {
278 | foreach (var c in GenerateAllElements(bvSet, inRevereseOrder))
279 | yield return (char)c;
280 | }
281 |
282 | public IEnumerable GenerateAllCharacters(BDD set)
283 | {
284 | return GenerateAllCharacters(set, false);
285 | }
286 |
287 |
288 | ///
289 | /// Calculate the number of elements in the set.
290 | ///
291 | /// the given set
292 | /// the cardinality of the set
293 | public ulong ComputeDomainSize(BDD set)
294 | {
295 | var card = ComputeDomainSize(set, _bw - 1);
296 | return card;
297 | }
298 |
299 | ///
300 | /// Returns true iff the set contains exactly one element.
301 | ///
302 | /// the given set
303 | /// true iff the set is a singleton
304 | public bool IsSingleton(BDD set)
305 | {
306 | var card = ComputeDomainSize(set, _bw - 1);
307 | return card == (long)1;
308 | }
309 |
310 | ///
311 | /// Convert the set into an equivalent array of ranges. The ranges are nonoverlapping and ordered.
312 | /// If limit > 0 then returns null if the total number of ranges exceeds limit.
313 | ///
314 | public Tuple[] ToRanges(BDD set, int limit = 0)
315 | {
316 | return ToRanges(set, _bw - 1, limit);
317 | }
318 |
319 | IEnumerable GenerateAllCharactersInOrder(BDD set)
320 | {
321 | var ranges = ToRanges(set);
322 | foreach (var range in ranges)
323 | for (uint i = range.Item1; i <= range.Item2; i++)
324 | yield return (uint)i;
325 | }
326 |
327 | IEnumerable GenerateAllCharactersInReverseOrder(BDD set)
328 | {
329 | var ranges = ToRanges(set);
330 | for (int j = ranges.Length - 1; j >= 0; j--)
331 | for (uint i = ranges[j].Item2; i >= ranges[j].Item1; i--)
332 | yield return (char)i;
333 | }
334 |
335 | ///
336 | /// Generate all characters that are members of the set in alphabetical order, smallest first, provided that inReverseOrder is false.
337 | ///
338 | /// the given set
339 | /// if true the members are generated in reverse alphabetical order with the largest first, otherwise in alphabetical order
340 | /// enumeration of all characters in the set, the enumeration is empty if the set is empty
341 | public IEnumerable GenerateAllElements(BDD set, bool inReverseOrder)
342 | {
343 | if (set == False)
344 | return GenerateNothing();
345 | else if (inReverseOrder)
346 | return GenerateAllCharactersInReverseOrder(set);
347 | else
348 | return GenerateAllCharactersInOrder(set);
349 | }
350 |
351 | IEnumerable GenerateNothing()
352 | {
353 | yield break;
354 | }
355 |
356 | public BDD ConvertToCharSet(BDDAlgebra alg, BDD pred)
357 | {
358 | return pred;
359 | }
360 |
361 | #region code generation
362 |
363 | public BDD[] GetPartition()
364 | {
365 | throw new NotSupportedException();
366 | }
367 |
368 | #endregion
369 |
370 | public override string SerializePredicate(BDD s)
371 | {
372 | throw new NotImplementedException();
373 | }
374 |
375 | public override BDD DeserializePredicate(string s)
376 | {
377 | throw new NotImplementedException();
378 | }
379 | }
380 | }
381 |
--------------------------------------------------------------------------------
/srm/algebras/CharacterEncoding.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Text;
4 |
5 | namespace Microsoft.SRM
6 | {
7 | ///
8 | /// Number of bits used in bitvectors.
9 | ///
10 | public enum BitWidth
11 | {
12 | ///
13 | /// 7 bit ASCII encoding
14 | ///
15 | BV7 = 7,
16 | ///
17 | /// 8 bit Extended ASCII encoding
18 | ///
19 | BV8 = 8,
20 | ///
21 | /// 16 bit bit-vector encoding
22 | ///
23 | BV16 = 16,
24 | ///
25 | /// 32 bit bit-vector encoding
26 | ///
27 | BV32 = 32,
28 | /////
29 | ///// 64 bit bit-vector encoding
30 | /////
31 | BV64 = 64
32 | }
33 |
34 | ///
35 | /// Provides functionality for character encodings.
36 | ///
37 | public static class CharacterEncodingTool
38 | {
39 | ///
40 | /// Maps ASCII to 7, extended ASCII to 8, and other encodings to 16.
41 | /// Throws AutomataException if IsSpecified(encoding) is false.
42 | ///
43 | ///
44 | /// either 7, 8, or 16
45 | public static int Truncate(BitWidth encoding)
46 | {
47 | switch (encoding)
48 | {
49 | case BitWidth.BV7: return 7;
50 | case BitWidth.BV8: return 8;
51 | case BitWidth.BV16: return 16;
52 | case BitWidth.BV32: return 16;
53 | case BitWidth.BV64: return 16;
54 | default:
55 | throw new AutomataException(AutomataExceptionKind.CharacterEncodingIsUnspecified);
56 | }
57 | }
58 |
59 | ///
60 | /// Returns true iff encoding equals to one of the enums in CharacterEncoding.
61 | ///
62 | public static bool IsSpecified(BitWidth encoding)
63 | {
64 | return (encoding == BitWidth.BV7 ||
65 | encoding == BitWidth.BV32 ||
66 | encoding == BitWidth.BV8 ||
67 | encoding == BitWidth.BV64 ||
68 | encoding == BitWidth.BV16);
69 | }
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/srm/algebras/IBooleanAlgebra.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 |
4 | namespace Microsoft.SRM
5 | {
6 | ///
7 | /// Generic Boolean Algebra solver.
8 | /// Provides operations for conjunction, disjunction, and negation.
9 | /// Allows to decide if a predicate is satisfiable and if two predicates are equivalent.
10 | ///
11 | /// predicates
12 | public interface IBooleanAlgebra
13 | {
14 | ///
15 | /// Top element of the Boolean algebra, corresponds to the value true.
16 | ///
17 | S True { get; }
18 |
19 | ///
20 | /// Bottom element of the Boolean algebra, corresponds to the value false.
21 | ///
22 | S False { get; }
23 |
24 | ///
25 | /// Make a conjunction of predicate1 and predicate2.
26 | ///
27 | S MkAnd(S predicate1, S predicate2);
28 |
29 | ///
30 | /// Make a conjunction of all the predicates in the enumeration.
31 | /// Returns True if the enumeration is empty.
32 | ///
33 | S MkAnd(IEnumerable predicates);
34 |
35 | ///
36 | /// Make a conjunction of all the predicates.
37 | /// Returns True if the enumeration is empty.
38 | ///
39 | S MkAnd(params S[] predicates);
40 |
41 | ///
42 | /// Make a disjunction of predicate1 and predicate2.
43 | ///
44 | S MkOr(S predicate1, S predicate2);
45 |
46 | ///
47 | /// Make a disjunction of all the predicates in the enumeration.
48 | /// Must return False if the enumeration is empty.
49 | ///
50 | S MkOr(IEnumerable predicates);
51 |
52 | ///
53 | /// Negate the predicate.
54 | ///
55 | S MkNot(S predicate);
56 |
57 | ///
58 | /// Compute the predicate and(predicate1,not(predicate2))
59 | ///
60 | S MkDiff(S predicate1, S predicate2);
61 |
62 | ///
63 | /// Returns true iff the predicate is satisfiable.
64 | ///
65 | bool IsSatisfiable(S predicate);
66 |
67 | ///
68 | /// Returns true iff predicate1 is equivalent to predicate2.
69 | ///
70 | bool AreEquivalent(S predicate1, S predicate2);
71 |
72 | ///
73 | /// True iff any two equivalent predicates are identical.
74 | ///
75 | bool IsExtensional { get; }
76 |
77 | ///
78 | /// Given an array of constraints {c_1, c_2, ..., c_n} where n>=0.
79 | /// Enumerate all satisfiable Boolean combinations Tuple({b_1, b_2, ..., b_n}, c)
80 | /// where c is satisfisable and equivalent to c'_1 & c'_2 & ... & c'_n,
81 | /// where c'_i = c_i if b_i = true and c'_i is Not(c_i) otherwise.
82 | /// If n=0 return Tuple({},True)
83 | ///
84 | /// array of constraints
85 | /// Booolean combinations that are satisfiable
86 | IEnumerable> GenerateMinterms(params S[] constraints);
87 |
88 | ///
89 | /// Serialize the predicate using characters in [0-9a-f\-\.]
90 | ///
91 | /// given predicate
92 | string SerializePredicate(S s);
93 |
94 | ///
95 | /// Deserialize the predicate from a string constructed with Serialize
96 | ///
97 | /// given serialized predicate
98 | S DeserializePredicate(string s);
99 | }
100 | }
101 |
--------------------------------------------------------------------------------
/srm/algebras/ICharAlgebra.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Microsoft.SRM
7 | {
8 | ///
9 | /// Extends ICharAlgebra with character predicate solving and predicate pretty printing.
10 | ///
11 | /// predicates
12 | public interface ICharAlgebra : IBooleanAlgebra
13 | {
14 | BitWidth Encoding { get; }
15 |
16 | ///
17 | /// Make a constraint describing the set of all characters between a (inclusive) and b (inclusive).
18 | /// Add both uppercase and lowercase elelements if caseInsensitive is true.
19 | ///
20 | PRED MkRangeConstraint(char lower, char upper, bool caseInsensitive = false);
21 |
22 | ///
23 | /// Make a constraint describing a singleton set containing the character c, or
24 | /// a set containing also the upper and lowercase versions of c if caseInsensitive is true.
25 | ///
26 | /// if true include both the uppercase and the lowercase versions of the given character
27 | /// the given character
28 | PRED MkCharConstraint(char c, bool caseInsensitive = false);
29 |
30 | ///
31 | /// Make a term that encodes the given character set.
32 | ///
33 | PRED ConvertFromCharSet(BDD set);
34 |
35 | ///
36 | /// Compute the number of elements in the set
37 | ///
38 | ulong ComputeDomainSize(PRED set);
39 |
40 | ///
41 | /// Enumerate all characters in the set
42 | ///
43 | /// given set
44 | IEnumerable GenerateAllCharacters(PRED set);
45 |
46 | ///
47 | /// Convert a predicate into a set of characters.
48 | ///
49 | BDD ConvertToCharSet(BDDAlgebra solver, PRED pred);
50 |
51 | ///
52 | /// Gets the underlying character set solver.
53 | ///
54 | CharSetSolver CharSetProvider { get; }
55 |
56 | ///
57 | /// If named definitions are possible,
58 | /// makes a named definition of pred, as a unary relation symbol,
59 | /// such that, for all x, name(x) holds iff body(x) holds. Returns the
60 | /// atom name(x) that is equivalent to pred(x).
61 | /// If named definitions are not supported, returns pred.
62 | ///
63 | PRED MkCharPredicate(string name, PRED pred);
64 |
65 | ///
66 | /// Returns a partition of the full domain.
67 | ///
68 | PRED[] GetPartition();
69 | }
70 | }
71 |
72 |
--------------------------------------------------------------------------------
/srm/algebras/IntervalSet.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Runtime.Serialization;
4 | using System.Text;
5 |
6 | namespace Microsoft.SRM
7 | {
8 | ///
9 | /// Represents a sorted finite set of finite intervals representing characters
10 | ///
11 | [Serializable]
12 | public class IntervalSet : ISerializable
13 | {
14 | Tuple[] intervals;
15 |
16 | ///
17 | /// Create a new interval set
18 | ///
19 | /// given intervals
20 | public IntervalSet(params Tuple[] intervals)
21 | {
22 | this.intervals = intervals;
23 | }
24 |
25 | ///
26 | /// Gets the index'th element where index is in [0..Count-1].
27 | /// Throws IndexOutOfRangeException() if index is out of range.
28 | ///
29 | public uint this[int index]
30 | {
31 | get
32 | {
33 | int k = index;
34 | for (int i = 0; i < intervals.Length; i++)
35 | {
36 | int ith_size = (int)intervals[i].Item2 - (int)intervals[i].Item1 + 1;
37 | if (k < ith_size)
38 | return intervals[i].Item1 + (uint)k;
39 | else
40 | k = k - ith_size;
41 | }
42 | throw new IndexOutOfRangeException();
43 | }
44 | }
45 |
46 | int count = -1;
47 |
48 | ///
49 | /// Number of elements in the set
50 | ///
51 | public int Count
52 | {
53 | get
54 | {
55 | if (count == -1)
56 | {
57 | int s = 0;
58 | for (int i = 0; i < intervals.Length; i++)
59 | {
60 | s += (int)intervals[i].Item2 - (int)intervals[i].Item1 + 1;
61 | }
62 | count = s;
63 | }
64 | return count;
65 | }
66 | }
67 |
68 | public bool IsEmpty
69 | {
70 | get { return Count == 0; }
71 | }
72 |
73 | private static int CompareTuples(Tuple x, Tuple y)
74 | {
75 | return x.Item1.CompareTo(y.Item1);
76 | }
77 |
78 | internal static IntervalSet Merge(IEnumerable sets)
79 | {
80 | List> merged = new List>();
81 | foreach (var set in sets)
82 | merged.AddRange(set.intervals);
83 |
84 | merged.Sort(CompareTuples);
85 | return new IntervalSet(merged.ToArray());
86 | }
87 |
88 | public BDD AsBDD(BDDAlgebra alg)
89 | {
90 | var res = alg.False;
91 | for (int i = 0; i < intervals.Length; i++)
92 | res = res | alg.MkSetFromRange(intervals[i].Item1, intervals[i].Item2, 15);
93 | return res;
94 | }
95 |
96 | public IEnumerable Enumerate()
97 | {
98 | for (int i = 0; i < intervals.Length; i++)
99 | {
100 | for (uint j = intervals[i].Item1; j < intervals[i].Item2; j++)
101 | yield return j;
102 | yield return intervals[i].Item2;
103 | }
104 | }
105 |
106 | internal string ToCharacterClass(bool isComplement)
107 | {
108 | if (IsEmpty)
109 | return "[0-[0]]";
110 |
111 | string res = "";
112 | uint m = intervals[0].Item1;
113 | uint n = intervals[0].Item2;
114 | for (int i = 1; i < intervals.Length; i++)
115 | {
116 | if (intervals[i].Item1 == n + 1)
117 | n = intervals[i].Item2;
118 | else
119 | {
120 | res += ToCharacterClassInterval(m, n);
121 | m = intervals[i].Item1;
122 | n = intervals[i].Item2;
123 | }
124 | }
125 | res += ToCharacterClassInterval(m, n);
126 | if (isComplement || res.Length > 1)
127 | {
128 | res = "[" + (isComplement ? "^" : "") + res + "]";
129 | }
130 | return res;
131 | }
132 |
133 | private static string ToCharacterClassInterval(uint m, uint n)
134 | {
135 | if (m == 0 && n == 0xFFFF)
136 | return ".";
137 |
138 | if (m == n)
139 | return StringUtility.Escape((char)m);
140 |
141 | string res = StringUtility.Escape((char)m);
142 | if (n > m + 1)
143 | res += "-";
144 | res += StringUtility.Escape((char)n);
145 | return res;
146 | }
147 |
148 | public override string ToString()
149 | {
150 | return ToCharacterClass(false);
151 | }
152 |
153 | #region custom serialization
154 | ///
155 | /// Serialize
156 | ///
157 | public void GetObjectData(SerializationInfo info, StreamingContext context)
158 | {
159 | string s = Serialize();
160 | info.AddValue("i", s);
161 | }
162 | ///
163 | /// Deserialize
164 | ///
165 | public IntervalSet(SerializationInfo info, StreamingContext context)
166 | {
167 | string s = info.GetString("i");
168 | intervals = Deserialize(s);
169 | }
170 |
171 | ///
172 | /// Returns a string that can be parsed back to IntervalSet
173 | ///
174 | public string Serialize()
175 | {
176 | string s = "";
177 | for (int i=0; i < intervals.Length; i++)
178 | {
179 | if (i > 0)
180 | s += ",";
181 | s += intervals[i].Item1.ToString();
182 | s += "-";
183 | s += intervals[i].Item2.ToString();
184 | }
185 | return s;
186 | }
187 |
188 | static Tuple[] Deserialize(string s)
189 | {
190 | Func> f = pair =>
191 | {
192 | string[] vals = pair.Split('-');
193 | return new Tuple(uint.Parse(vals[0]), uint.Parse(vals[1]));
194 | };
195 | var intervals = Array.ConvertAll(s.Split(','), pair => f(pair));
196 | return intervals;
197 | }
198 |
199 | ///
200 | /// Parse the interval set from a string s that was produced with Serialize
201 | ///
202 | /// given serialization
203 | public static IntervalSet Parse(string s)
204 | {
205 | var intervals = Deserialize(s);
206 | return new IntervalSet(intervals);
207 | }
208 | #endregion
209 | }
210 | }
211 |
--------------------------------------------------------------------------------
/srm/algebras/MintermGenerator.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Text;
4 |
5 | namespace Microsoft.SRM
6 | {
7 |
8 | ///
9 | /// Provides a generic implementation for minterm generation over a given Boolean Algebra.
10 | ///
11 | /// type of predicates
12 | public class MintermGenerator
13 | {
14 | IBooleanAlgebra ba;
15 |
16 | bool hashCodesRespectEquivalence;
17 |
18 | ///
19 | /// Constructs a minterm generator for a given Boolean Algebra.
20 | ///
21 | /// given Boolean Algebra
22 | public MintermGenerator(IBooleanAlgebra ba)
23 | {
24 | this.ba = ba;
25 | hashCodesRespectEquivalence = ba.IsExtensional;
26 | }
27 |
28 | ///
29 | /// Returns GenerateMinterms(true, preds).
30 | ///
31 | public IEnumerable> GenerateMinterms(params PRED[] preds)
32 | {
33 | return GenerateMinterms(true, preds);
34 | }
35 |
36 | ///
37 | /// Given an array of predidates {p_1, p_2, ..., p_n} where n>=0.
38 | /// Enumerate all satisfiable Boolean combinations Tuple({b_1, b_2, ..., b_n}, p)
39 | /// where p is satisfiable and equivalent to p'_1 & p'_2 & ... & p'_n,
40 | /// where p'_i = p_i if b_i = true and p'_i is Not(p_i) otherwise.
41 | /// If n=0 return Tuple({},True).
42 | ///
43 | /// array of predicates
44 | /// optimization flag: if true, uses equivalence checking to cluster equivalent predicates; otherwise does not use equivalence checking
45 | /// all minterms of the given predicate sequence
46 | public IEnumerable> GenerateMinterms(bool useEquivalenceChecking, params PRED[] preds)
47 | {
48 | if (preds.Length == 0)
49 | {
50 | yield return new Tuple(new bool[] { }, ba.True);
51 | }
52 | else
53 | {
54 | var count = preds.Length;
55 |
56 | List nonequivalentSets = new List();
57 |
58 | //work only with nonequivalent sets as distinct elements
59 | var indexLookup = new Dictionary();
60 | var newIndexMap = new Dictionary();
61 | var equivs = new List>();
62 |
63 | for (int i = 0; i < count; i++)
64 | {
65 | int newIndex;
66 | EquivClass equiv = CreateEquivalenceClass(useEquivalenceChecking, preds[i]);
67 | if (!newIndexMap.TryGetValue(equiv, out newIndex))
68 | {
69 | newIndex = newIndexMap.Count;
70 | newIndexMap[equiv] = newIndex;
71 | nonequivalentSets.Add(preds[i]);
72 | equivs.Add(new List());
73 | }
74 | indexLookup[i] = newIndex;
75 | equivs[newIndex].Add(i);
76 | }
77 |
78 | //var pairs = new List>(GenerateMinterms1(nonequivalentSets.ToArray()));
79 | //foreach (var pair in pairs)
80 | //{
81 | // var characteristic = new bool[preds.Length];
82 | // for (int i = 0; i < count; i++)
83 | // if (pair.First.Contains(indexLookup[i]))
84 | // characteristic[i] = true;
85 | // yield return
86 | // new Tuple(characteristic, pair.Second);
87 | //}
88 |
89 | var tree = new PartitonTree(ba);
90 | foreach (var psi in nonequivalentSets)
91 | tree.Refine(psi);
92 | foreach (var leaf in tree.GetLeaves())
93 | {
94 | var characteristic = new bool[preds.Length];
95 | foreach (var k in leaf.GetPath())
96 | foreach (var n in equivs[k])
97 | characteristic[n] = true;
98 | yield return
99 | new Tuple(characteristic, leaf.phi);
100 | }
101 | }
102 | }
103 |
104 | EquivClass CreateEquivalenceClass(bool useEquivalenceChecking, PRED set)
105 | {
106 | return new EquivClass(useEquivalenceChecking, this, set);
107 | }
108 |
109 | private class EquivClass
110 | {
111 | PRED set;
112 | MintermGenerator gen;
113 | bool useEquivalenceChecking;
114 |
115 | internal EquivClass(bool useEquivalenceChecking, MintermGenerator gen, PRED set)
116 | {
117 | this.set = set;
118 | this.gen = gen;
119 | this.useEquivalenceChecking = useEquivalenceChecking;
120 | }
121 |
122 | public override int GetHashCode()
123 | {
124 | if (useEquivalenceChecking && !gen.hashCodesRespectEquivalence)
125 | //cannot rely on equivalent predicates having the same hashcode
126 | //so all predicates end up in the same bucket that causes a linear search
127 | //with Equals to check equivalence when useEquivalenceChecking=true
128 | return 0;
129 | else
130 | return set.GetHashCode();
131 | }
132 |
133 | public override bool Equals(object obj)
134 | {
135 | if (useEquivalenceChecking)
136 | return gen.ba.AreEquivalent(set, ((EquivClass)obj).set);
137 | else
138 | return set.Equals(((EquivClass)obj).set);
139 | }
140 | }
141 | }
142 |
143 | internal class PartitonTree
144 | {
145 | PartitonTree parent;
146 | int nr;
147 | internal PRED phi;
148 | IBooleanAlgebra solver;
149 | PartitonTree left;
150 | PartitonTree right; //complement
151 | internal PartitonTree(IBooleanAlgebra solver)
152 | {
153 | this.solver = solver;
154 | nr = -1;
155 | parent = null;
156 | this.phi = solver.True;
157 | this.left = null;
158 | this.right = null;
159 | }
160 | PartitonTree(IBooleanAlgebra solver, int depth, PartitonTree parent, PRED phi, PartitonTree left, PartitonTree right)
161 | {
162 | this.solver = solver;
163 | this.parent = parent;
164 | this.nr = depth;
165 | this.phi = phi;
166 | this.left = left;
167 | this.right = right;
168 | }
169 |
170 | internal void Refine(PRED psi)
171 | {
172 |
173 | if (left == null && right == null)
174 | {
175 | #region leaf
176 | var phi_and_psi = solver.MkAnd(phi, psi);
177 | if (solver.IsSatisfiable(phi_and_psi))
178 | {
179 | var phi_min_psi = solver.MkAnd(phi, solver.MkNot(psi));
180 | if (solver.IsSatisfiable(phi_min_psi))
181 | {
182 | left = new PartitonTree(solver, nr + 1, this, phi_and_psi, null, null);
183 | right = new PartitonTree(solver, nr + 1, this, phi_min_psi, null, null);
184 | }
185 | else // [[phi]] subset of [[psi]]
186 | left = new PartitonTree(solver, nr + 1, this, phi, null, null); //psi must true
187 | }
188 | else // [[phi]] subset of [[not(psi)]]
189 | right = new PartitonTree(solver, nr + 1, this, phi, null, null); //psi must be false
190 | #endregion
191 | }
192 | else if (left == null)
193 | right.Refine(psi);
194 | else if (right == null)
195 | left.Refine(psi);
196 | else
197 | {
198 | #region nonleaf
199 | var phi_and_psi = solver.MkAnd(phi, psi);
200 | if (solver.IsSatisfiable(phi_and_psi))
201 | {
202 | var phi_min_psi = solver.MkAnd(phi, solver.MkNot(psi));
203 | if (solver.IsSatisfiable(phi_min_psi))
204 | {
205 | left.Refine(psi);
206 | right.Refine(psi);
207 | }
208 | else // [[phi]] subset of [[psi]]
209 | {
210 | left.ExtendLeft(); //psi is true
211 | right.ExtendLeft();
212 | }
213 | }
214 | else // [[phi]] subset of [[not(psi)]]
215 | {
216 | left.ExtendRight();
217 | right.ExtendRight(); //psi is false
218 | }
219 | #endregion
220 | }
221 | }
222 |
223 | private void ExtendRight()
224 | {
225 | if (left == null && right == null)
226 | right = new PartitonTree(solver, nr + 1, this, phi, null, null);
227 | else if (left == null)
228 | right.ExtendRight();
229 | else if (right == null)
230 | left.ExtendRight();
231 | else
232 | {
233 | left.ExtendRight();
234 | right.ExtendRight();
235 | }
236 | }
237 |
238 | private void ExtendLeft()
239 | {
240 | if (left == null && right == null)
241 | left = new PartitonTree(solver, nr + 1, this, phi, null, null);
242 | else if (left == null)
243 | right.ExtendLeft();
244 | else if (right == null)
245 | left.ExtendLeft();
246 | else
247 | {
248 | left.ExtendLeft();
249 | right.ExtendLeft();
250 | }
251 | }
252 |
253 | internal IEnumerable GetPath()
254 | {
255 | for (var curr = this; curr.parent != null; curr = curr.parent)
256 | if (curr.parent.left == curr) //curr is the left child of its parent
257 | yield return curr.nr;
258 | }
259 |
260 | internal IEnumerable> GetLeaves()
261 | {
262 | if (left == null && right == null)
263 | yield return this;
264 | else if (right == null)
265 | foreach (var leaf in left.GetLeaves())
266 | yield return leaf;
267 | else if (left == null)
268 | foreach (var leaf in right.GetLeaves())
269 | yield return leaf;
270 | else
271 | {
272 | foreach (var leaf in left.GetLeaves())
273 | yield return leaf;
274 | foreach (var leaf in right.GetLeaves())
275 | yield return leaf;
276 | }
277 | }
278 | }
279 | }
280 |
--------------------------------------------------------------------------------
/srm/algebras/RangeConverter.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 |
4 | namespace Microsoft.SRM
5 | {
6 | internal class RangeConverter
7 | {
8 | Dictionary[]> rangeCache = new Dictionary[]>();
9 |
10 | internal RangeConverter()
11 | {
12 | }
13 |
14 | //e.g. if b = 6 and p = 2 and ranges = (in binary form) {[0000 1010, 0000 1110]} i.e. [x0A,x0E]
15 | //then res = {[0000 1010, 0000 1110], [0001 1010, 0001 1110],
16 | // [0010 1010, 0010 1110], [0011 1010, 0011 1110]},
17 | Tuple[] LiftRanges(int b, int p, Tuple[] ranges)
18 | {
19 | if (p == 0)
20 | return ranges;
21 |
22 | int k = b - p;
23 | uint maximal = ((uint)1 << k) - 1;
24 |
25 | Tuple[] res = new Tuple[(1 << p) * (ranges.Length)];
26 | int j = 0;
27 | for (uint i = 0; i < (1 << p); i++)
28 | {
29 | uint prefix = (i << k);
30 | foreach (var range in ranges)
31 | res[j++] = new Tuple(range.Item1 | prefix, range.Item2 | prefix);
32 | }
33 |
34 | //the range wraps around : [0...][...2^k-1][2^k...][...2^(k+1)-1]
35 | if (ranges[0].Item1 == 0 && ranges[ranges.Length - 1].Item2 == maximal)
36 | {
37 | //merge consequtive ranges, we know that res has at least two elements here
38 | List> res1 = new List>();
39 | var from = res[0].Item1;
40 | var to = res[0].Item2;
41 | for (int i = 1; i < res.Length; i++)
42 | {
43 | if (to == res[i].Item1 - 1)
44 | to = res[i].Item2;
45 | else
46 | {
47 | res1.Add(new Tuple(from, to));
48 | from = res[i].Item1;
49 | to = res[i].Item2;
50 | }
51 | }
52 | res1.Add(new Tuple(from, to));
53 | res = res1.ToArray();
54 | }
55 |
56 | //CheckBug(res);
57 | return res;
58 | }
59 |
60 | Tuple[] ToRanges1(BDD set)
61 | {
62 | Tuple[] ranges;
63 | if (!rangeCache.TryGetValue(set, out ranges))
64 | {
65 | int b = set.Ordinal;
66 | uint mask = (uint)1 << b;
67 | if (set.Zero.IsEmpty)
68 | {
69 | #region 0-case is empty
70 | if (set.One.IsFull)
71 | {
72 | var range = new Tuple(mask, (mask << 1) - 1);
73 | ranges = new Tuple[] { range };
74 | }
75 | else //1-case is neither full nor empty
76 | {
77 | var ranges1 = LiftRanges(b, (b - set.One.Ordinal) - 1, ToRanges1(set.One));
78 | ranges = new Tuple[ranges1.Length];
79 | for (int i = 0; i < ranges1.Length; i++)
80 | {
81 | ranges[i] = new Tuple(ranges1[i].Item1 | mask, ranges1[i].Item2 | mask);
82 | }
83 | }
84 | #endregion
85 | }
86 | else if (set.Zero.IsFull)
87 | {
88 | #region 0-case is full
89 | if (set.One.IsEmpty)
90 | {
91 | var range = new Tuple(0, mask - 1);
92 | ranges = new Tuple[] { range };
93 | }
94 | else
95 | {
96 | var rangesR = LiftRanges(b, (b - set.One.Ordinal) - 1, ToRanges1(set.One));
97 | var range = rangesR[0];
98 | if (range.Item1 == 0)
99 | {
100 | ranges = new Tuple[rangesR.Length];
101 | ranges[0] = new Tuple(0, range.Item2 | mask);
102 | for (int i = 1; i < rangesR.Length; i++)
103 | {
104 | ranges[i] = new Tuple(rangesR[i].Item1 | mask, rangesR[i].Item2 | mask);
105 | }
106 | }
107 | else
108 | {
109 | ranges = new Tuple[rangesR.Length + 1];
110 | ranges[0] = new Tuple(0, mask - 1);
111 | for (int i = 0; i < rangesR.Length; i++)
112 | {
113 | ranges[i + 1] = new Tuple(rangesR[i].Item1 | mask, rangesR[i].Item2 | mask);
114 | }
115 | }
116 | }
117 | #endregion
118 | }
119 | else
120 | {
121 | #region 0-case is neither full nor empty
122 | var rangesL = LiftRanges(b, (b - set.Zero.Ordinal) - 1, ToRanges1(set.Zero));
123 | var last = rangesL[rangesL.Length - 1];
124 |
125 | if (set.One.IsEmpty)
126 | {
127 | ranges = rangesL;
128 | }
129 |
130 | else if (set.One.IsFull)
131 | {
132 | var ranges1 = new List>();
133 | for (int i = 0; i < rangesL.Length - 1; i++)
134 | ranges1.Add(rangesL[i]);
135 | if (last.Item2 == (mask - 1))
136 | {
137 | ranges1.Add(new Tuple(last.Item1, (mask << 1) - 1));
138 | }
139 | else
140 | {
141 | ranges1.Add(last);
142 | ranges1.Add(new Tuple(mask, (mask << 1) - 1));
143 | }
144 | ranges = ranges1.ToArray();
145 | }
146 | else //general case: neither 0-case, not 1-case is full or empty
147 | {
148 | var rangesR0 = ToRanges1(set.One);
149 |
150 | var rangesR = LiftRanges(b, (b - set.One.Ordinal) - 1, rangesR0);
151 |
152 | var first = rangesR[0];
153 |
154 | if (last.Item2 == (mask - 1) && first.Item1 == 0) //merge together the last and first ranges
155 | {
156 | ranges = new Tuple[rangesL.Length + rangesR.Length - 1];
157 | for (int i = 0; i < rangesL.Length - 1; i++)
158 | ranges[i] = rangesL[i];
159 | ranges[rangesL.Length - 1] = new Tuple(last.Item1, first.Item2 | mask);
160 | for (int i = 1; i < rangesR.Length; i++)
161 | ranges[rangesL.Length - 1 + i] = new Tuple(rangesR[i].Item1 | mask, rangesR[i].Item2 | mask);
162 | }
163 | else
164 | {
165 | ranges = new Tuple[rangesL.Length + rangesR.Length];
166 | for (int i = 0; i < rangesL.Length; i++)
167 | ranges[i] = rangesL[i];
168 | for (int i = 0; i < rangesR.Length; i++)
169 | ranges[rangesL.Length + i] = new Tuple(rangesR[i].Item1 | mask, rangesR[i].Item2 | mask);
170 | }
171 |
172 | }
173 | #endregion
174 | }
175 | rangeCache[set] = ranges;
176 | }
177 | return ranges;
178 | }
179 |
180 | ///
181 | /// Convert the set into an equivalent array of ranges.
182 | /// The ranges are nonoverlapping and ordered.
183 | ///
184 | public Tuple[] ToRanges(BDD set, int maxBit)
185 | {
186 | if (set.IsEmpty)
187 | return new Tuple[] { };
188 | else if (set.IsFull)
189 | return new Tuple[] { new Tuple(0, ((((uint)1 << maxBit) << 1) - 1)) }; //note: maxBit could be 31
190 | else
191 | return LiftRanges(maxBit + 1, maxBit - set.Ordinal, ToRanges1(set));
192 | }
193 | }
194 | }
195 |
--------------------------------------------------------------------------------
/srm/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AutomataDotNet/srm/7c7eec9c4c974610f246e2502d93730335e70fa9/srm/icon.png
--------------------------------------------------------------------------------
/srm/matcher/BooleanDecisionTree.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.Serialization;
4 |
5 | namespace Microsoft.SRM
6 | {
7 | ///
8 | /// Decision tree for mapping character ranges into corresponding partition block ids
9 | ///
10 | [Serializable]
11 | internal class BooleanDecisionTree : ISerializable
12 | {
13 | [NonSerialized]
14 | internal bool[] precomputed;
15 | [NonSerialized]
16 | internal DecisionTree.BST bst;
17 |
18 | internal BooleanDecisionTree(bool[] precomputed, DecisionTree.BST bst)
19 | {
20 | this.precomputed = precomputed;
21 | this.bst = bst;
22 | }
23 |
24 | ///
25 | /// Crteate a Boolean decision tree.
26 | /// References to solver and domain are not saved in the resulting decision tree.
27 | ///
28 | /// character alberbra
29 | /// elements that map to true
30 | /// upper limit for block ids for characters to be precomputed in an array (default is 0xFF, i.e. extended ASCII)
31 | ///
32 | internal static BooleanDecisionTree Create(CharSetSolver solver, BDD domain, ushort precomputeLimit = 0xFF)
33 | {
34 | BDD domain_compl = solver.MkNot(domain);
35 | var partition = new BDD[] { domain_compl, domain };
36 | if (precomputeLimit == 0)
37 | {
38 | return new BooleanDecisionTree(new bool[] { }, MkBST(new DecisionTree.PartitionCut(solver, partition), 0, 0xFFFF));
39 | }
40 |
41 | bool[] precomp = Precompute(solver, domain, precomputeLimit);
42 | DecisionTree.BST bst = null;
43 | if (precomputeLimit < ushort.MaxValue)
44 | bst = MkBST(new DecisionTree.PartitionCut(solver, partition), precomputeLimit + 1, ushort.MaxValue);
45 |
46 | return new BooleanDecisionTree(precomp, bst);
47 | }
48 |
49 | private static bool[] Precompute(CharSetSolver solver, BDD domain, int precomputeLimit)
50 | {
51 | bool[] precomp = new bool[precomputeLimit + 1];
52 | Func F = i =>
53 | {
54 | var bdd = solver.MkCharConstraint((char)i);
55 | if (solver.IsSatisfiable(solver.MkAnd(bdd, domain)))
56 | return true;
57 | else
58 | return false;
59 | };
60 | for (int c = 0; c <= precomputeLimit; c++)
61 | {
62 | precomp[c] = F(c);
63 | }
64 | return precomp;
65 | }
66 |
67 | private static DecisionTree.BST MkBST(DecisionTree.PartitionCut partition, int from, int to)
68 | {
69 | var cut = partition.Cut(from, to);
70 | if (cut.IsEmpty)
71 | return null;
72 | else
73 | {
74 | int block_id = cut.GetSigletonId();
75 | if (block_id >= 0)
76 | //there is precisely one block remaining
77 | return new DecisionTree.BST(block_id, null, null);
78 | else
79 | {
80 | //it must be that 'from < to'
81 | //or else there could only have been one block
82 | int mid = (from + to) / 2;
83 | var left = MkBST(cut, from, mid);
84 | var right = MkBST(cut, mid + 1, to);
85 | //it must be that either left != null or right != null
86 | if (left == null)
87 | return right;
88 | else if (right == null)
89 | return left;
90 | else
91 | return new DecisionTree.BST(mid + 1, left, right);
92 | }
93 | }
94 | }
95 |
96 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
97 | public bool Contains(ushort c)
98 | {
99 | return (c < precomputed.Length ? precomputed[c] : bst.Find(c) == 1);
100 | }
101 |
102 | #region serialization
103 | ///
104 | /// Serialize
105 | ///
106 | public void GetObjectData(SerializationInfo info, StreamingContext context)
107 | {
108 | info.AddValue("p", SerializePrecomputed());
109 | info.AddValue("b", bst.Serialize());
110 | }
111 | ///
112 | /// Deserialize
113 | ///
114 | public BooleanDecisionTree(SerializationInfo info, StreamingContext context)
115 | {
116 | precomputed = DeserializePrecomputed(info.GetString("p"));
117 | this.bst = DecisionTree.BST.Deserialize(info.GetString("b"));
118 | }
119 |
120 | string SerializePrecomputed()
121 | {
122 | char[] chars = Array.ConvertAll(precomputed, b => (b ? '1' : '0'));
123 | var s = new String(chars);
124 | return s;
125 | }
126 |
127 | static bool[] DeserializePrecomputed(string s)
128 | {
129 | var vals = Array.ConvertAll(s.ToCharArray(), c => (c == '1' ? true : false));
130 | return vals;
131 | }
132 | #endregion
133 | }
134 | }
135 |
--------------------------------------------------------------------------------
/srm/matcher/DecisionTree.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Runtime.CompilerServices;
3 | using System.Runtime.Serialization;
4 | using System.Text;
5 |
6 | namespace Microsoft.SRM
7 | {
8 | ///
9 | /// Decision tree for mapping character ranges into corresponding partition block ids
10 | ///
11 | [Serializable]
12 | public class DecisionTree : ISerializable
13 | {
14 | [NonSerialized]
15 | internal int[] precomputed;
16 | [NonSerialized]
17 | internal BST bst;
18 |
19 | internal BST Tree
20 | {
21 | get
22 | {
23 | return bst;
24 | }
25 | }
26 |
27 | public DecisionTree(int[] precomputed, BST bst)
28 | {
29 | this.precomputed = precomputed;
30 | this.bst = bst;
31 | }
32 |
33 | ///
34 | /// Crteate a decision tree that maps a character into a partion block id
35 | ///
36 | /// character alberbra
37 | /// partition of the whole set of all characters into pairwise disjoint nonempty sets
38 | /// upper limit for block ids for characters to be precomputed in an array (default is 0xFF, i.e. extended ASCII)
39 | ///
40 | internal static DecisionTree Create(CharSetSolver solver, BDD[] partition, ushort precomputeLimit = 0xFF)
41 | {
42 | if (partition.Length == 1)
43 | //there is no actual partition, everything maps to one id 0, e.g. as in .*
44 | return new DecisionTree(new int[(int)precomputeLimit], new BST(0, null, null));
45 |
46 | if (precomputeLimit == 0)
47 | return new DecisionTree(new int[] { }, MkBST(new PartitionCut(solver, partition), 0, 0xFFFF));
48 |
49 | int[] precomp = Precompute(solver, partition, precomputeLimit);
50 | BST bst = null;
51 | if (precomputeLimit < ushort.MaxValue)
52 | bst = MkBST(new PartitionCut(solver, partition), precomputeLimit + 1, ushort.MaxValue);
53 |
54 | return new DecisionTree(precomp, bst);
55 | }
56 |
57 | private static int[] Precompute(CharSetSolver solver, BDD[] partition, int precomputeLimit)
58 | {
59 | int[] precomp = new int[precomputeLimit + 1];
60 | Func GetPartitionId = i =>
61 | {
62 | for (int j = 0; j < partition.Length; j++)
63 | {
64 | var i_bdd = solver.MkCharConstraint((char)i);
65 | if (solver.IsSatisfiable(solver.MkAnd(i_bdd, partition[j])))
66 | {
67 | return j;
68 | }
69 | }
70 | return -1;
71 | };
72 | for (int c = 0; c <= precomputeLimit; c++)
73 | {
74 | int id = GetPartitionId(c);
75 | if (id < 0)
76 | throw new AutomataException(AutomataExceptionKind.InternalError);
77 | precomp[c] = id;
78 | }
79 | return precomp;
80 | }
81 |
82 | private static BST MkBST(PartitionCut partition, int from, int to)
83 | {
84 | var cut = partition.Cut(from, to);
85 | if (cut.IsEmpty)
86 | return null;
87 | else
88 | {
89 | int block_id = cut.GetSigletonId();
90 | if (block_id >= 0)
91 | //there is precisely one block remaining
92 | return new BST(block_id, null, null);
93 | else
94 | {
95 | //it must be that 'from < to'
96 | //or else there could only have been one block
97 | int mid = (from + to) / 2;
98 | var left = MkBST(cut, from, mid);
99 | var right = MkBST(cut, mid + 1, to);
100 | //it must be that either left != null or right != null
101 | if (left == null)
102 | return right;
103 | else if (right == null)
104 | return left;
105 | else
106 | return new BST(mid + 1, left, right);
107 | }
108 | }
109 | }
110 |
111 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
112 | public int GetId(ushort c)
113 | {
114 | if (c < precomputed.Length)
115 | {
116 | return precomputed[c];
117 | }
118 | else
119 | {
120 | return bst.Find(c);
121 | }
122 | }
123 |
124 | ///
125 | /// Used in the decision tree to locate minterm ids of nonascii characters
126 | ///
127 | public class BST
128 | {
129 | //[NonSerialized]
130 | int node;
131 | //[NonSerialized]
132 | BST left;
133 | //[NonSerialized]
134 | BST right;
135 |
136 | internal BST Left
137 | {
138 | get
139 | {
140 | return left;
141 | }
142 | }
143 |
144 | internal BST Right
145 | {
146 | get
147 | {
148 | return right;
149 | }
150 | }
151 |
152 | internal bool IsLeaf
153 | {
154 | get
155 | {
156 | return left == null;
157 | }
158 | }
159 |
160 | internal int Node
161 | {
162 | get
163 | {
164 | return node;
165 | }
166 | }
167 |
168 | internal int Find(int charCode)
169 | {
170 | if (left == null)
171 | return node; //return the leaf
172 | else if (charCode < node)
173 | return left.Find(charCode);
174 | else
175 | return right.Find(charCode);
176 | }
177 |
178 | public BST(int node, BST left, BST right)
179 | {
180 | this.node = node;
181 | this.left = left;
182 | this.right = right;
183 | }
184 |
185 | public override string ToString()
186 | {
187 | return this.Serialize();
188 | }
189 |
190 | #region custom serialization
191 | void SerializeHelper(StringBuilder sb)
192 | {
193 | if (IsLeaf)
194 | {
195 | sb.Append(string.Format("{0}#", node));
196 | }
197 | else
198 | {
199 | sb.Append("(");
200 | sb.Append(node);
201 | sb.Append(",");
202 | left.SerializeHelper(sb);
203 | sb.Append(",");
204 | right.SerializeHelper(sb);
205 | sb.Append(")");
206 | }
207 | }
208 | public string Serialize()
209 | {
210 | var sb = new StringBuilder();
211 | SerializeHelper(sb);
212 | return sb.ToString();
213 | }
214 |
215 | public static BST Deserialize(string s)
216 | {
217 | int tmp;
218 | var bst = DeserializeHelper(s, 0, out tmp);
219 | return bst;
220 | }
221 |
222 | static BST DeserializeHelper(string s, int i, out int next_i)
223 | {
224 | switch (s[i])
225 | {
226 | case '(':
227 | {
228 | int j = s.IndexOf(',', i + 1);
229 | int node = int.Parse(s.Substring(i + 1, j - (i + 1)));
230 | int k;
231 | var left = DeserializeHelper(s, j + 1, out k);
232 | int m;
233 | var right = DeserializeHelper(s, k + 1, out m);
234 | next_i = m + 1;
235 | return new BST(node, left, right);
236 | }
237 | default: //leaf l(node)
238 | {
239 | int j = s.IndexOf('#', i);
240 | int node = int.Parse(s.Substring(i, j - i));
241 | next_i = j + 1;
242 | return new BST(node, null, null);
243 | }
244 | }
245 | }
246 | #endregion
247 | }
248 |
249 | ///
250 | /// Represents a cut of the original partition wrt some interval
251 | ///
252 | internal class PartitionCut
253 | {
254 | BDD[] blocks;
255 | CharSetSolver solver;
256 | internal PartitionCut(CharSetSolver solver, BDD[] blocks)
257 | {
258 | this.blocks = blocks;
259 | this.solver = solver;
260 | }
261 |
262 | internal bool IsEmpty
263 | {
264 | get
265 | {
266 | return Array.TrueForAll(blocks, b => b.IsEmpty);
267 | }
268 | }
269 |
270 | internal int GetSigletonId()
271 | {
272 | int id = -1;
273 | for (int i = 0; i < blocks.Length; i++)
274 | {
275 | if (!blocks[i].IsEmpty)
276 | {
277 | if (id >= 0)
278 | //there is more than one nonempty block
279 | return -1;
280 | else
281 | id = i;
282 | }
283 | }
284 | return id;
285 | }
286 |
287 | internal PartitionCut Cut(int lower, int upper)
288 | {
289 | var set = solver.MkCharSetFromRange((char)lower, (char)upper);
290 | var newblocks = Array.ConvertAll(blocks, b => solver.MkAnd(b, set));
291 | return new PartitionCut(solver, newblocks);
292 | }
293 | }
294 |
295 | #region serialization
296 | ///
297 | /// Serialize
298 | ///
299 | public void GetObjectData(SerializationInfo info, StreamingContext context)
300 | {
301 | info.AddValue("p", SerializePrecomputed());
302 | info.AddValue("b", bst.Serialize());
303 | }
304 | ///
305 | /// Deserialize
306 | ///
307 | public DecisionTree(SerializationInfo info, StreamingContext context)
308 | {
309 | precomputed = DeserializePrecomputed(info.GetString("p"));
310 | bst = BST.Deserialize(info.GetString("b"));
311 | }
312 |
313 | string SerializePrecomputed()
314 | {
315 | string s = "";
316 | for (int i=0; i < precomputed.Length; i++)
317 | {
318 | if (i > 0)
319 | s += ",";
320 | s += precomputed[i].ToString();
321 | }
322 | return s;
323 | }
324 |
325 | static int[] DeserializePrecomputed(string s)
326 | {
327 | var vals = Array.ConvertAll(s.Split(','), x => int.Parse(x));
328 | return vals;
329 | }
330 | #endregion
331 | }
332 | }
333 |
--------------------------------------------------------------------------------
/srm/matcher/IMatcher.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 | using System.Reflection;
7 | using System.Runtime.Serialization;
8 | using System.IO;
9 |
10 | namespace Microsoft.SRM
11 | {
12 | ///
13 | /// Provides IsMatch and Matches methods.
14 | ///
15 | public interface IMatcher
16 | {
17 | ///
18 | /// Returns true iff the input string matches.
19 | /// given iput string
20 | /// start position in the input, default is 0
21 | /// end position in the input, -1 means that the value is unspecified and taken to be input.Length-1
22 | ///
23 | bool IsMatch(string input, int startat = 0, int endat = -1);
24 |
25 | ///
26 | /// Returns all matches as pairs (startindex, length) in the input string.
27 | ///
28 | /// given iput string
29 | /// as soon as this many matches have been found the search terminates, 0 or negative value means that there is no bound, default is 0
30 | /// start position in the input, default is 0
31 | /// end position in the input, -1 means that the value is unspecified and taken to be input.Length-1
32 | List Matches(string input, int limit = 0, int startat = 0, int endat = -1);
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/srm/matcher/UTF8Encoding.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 | using System.Runtime.CompilerServices;
7 |
8 | namespace Microsoft.SRM
9 | {
10 | ///
11 | /// Methods for decoding UTF8 encoded strings.
12 | ///
13 | public static class UTF8Encoding
14 | {
15 | ///
16 | /// Decode the next codepoint in the input.
17 | /// Here input[i] is assumed to be non-ASCII.
18 | /// The input byte array is asssumed to be valid UTF8 encoded Unicode text.
19 | ///
20 | /// UTF8 encoded Unicode text
21 | /// position of the current start byte
22 | /// how many bytes were consumed
23 | /// computed Unicode codepoint
24 | ///
25 | internal static void DecodeNextNonASCII(byte[] input, int i, out int step, out int codepoint)
26 | {
27 | int b = input[i];
28 | // (b & 1110.0000 == 1100.0000)
29 | // so b has the form 110x.xxxx
30 | // startbyte of two byte encoding
31 | if ((b & 0xE0) == 0xC0)
32 | {
33 | codepoint = ((b & 0x1F) << 6) | (input[i + 1] & 0x3F);
34 | step = 2;
35 | }
36 | // (b & 1111.0000 == 1110.0000)
37 | // so b has the form 1110.xxxx
38 | // startbyte of three byte encoding
39 | else if ((b & 0xF0) == 0xE0)
40 | {
41 | codepoint = ((b & 0x0F) << 12) | ((input[i + 1] & 0x3F) << 6) | (input[i + 2] & 0x3F);
42 | step = 3;
43 | }
44 | // (b & 1111.1000 == 1111.0000)
45 | // so b has the form 1111.0xxx
46 | // must be startbyte of four byte encoding
47 | else
48 | {
49 | codepoint = ((b & 0x07) << 18) | ((input[i + 1] & 0x3F) << 12) | ((input[i + 2] & 0x3F) << 6) | (input[i + 3] & 0x3F);
50 | step = 4;
51 | }
52 | }
53 |
54 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
55 | internal static ushort HighSurrogate(int codepoint)
56 | {
57 | //given codepoint = ((H - 0xD800) * 0x400) + (L - 0xDC00) + 0x10000
58 | // compute H
59 | return (ushort)(((codepoint - 0x10000) >> 10) | 0xD800);
60 | }
61 |
62 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
63 | internal static ushort LowSurrogate(int codepoint)
64 | {
65 | //given codepoint = ((H - 0xD800) * 0x400) + (L - 0xDC00) + 0x10000
66 | //compute L
67 | var cp = (ushort)(((codepoint - 0x10000) & 0x3FF) | 0xDC00);
68 | return cp;
69 | }
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/srm/matcher/VectorizedIndexOf.cs:
--------------------------------------------------------------------------------
1 | using System.Linq;
2 | using System.Numerics;
3 | using System.Runtime.CompilerServices;
4 | using System.Collections;
5 | using System.Collections.Generic;
6 | using System;
7 | using System.Linq.Expressions;
8 |
9 | namespace Microsoft.SRM
10 | {
11 | public static class VectorizedIndexOf
12 | {
13 | static int vecUshortSize = Vector.Count;
14 | static int vecUintSize = Vector.Count;
15 | static int vecByteSize = Vector.Count;
16 |
17 | #if UNSAFE
18 |
19 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
20 | internal unsafe static int UnsafeIndexOf(char* chars, int length, int start, string toMatch)
21 | {
22 | if (toMatch.Length == 0)
23 | {
24 | return start;
25 | }
26 | if (toMatch.Length == 1)
27 | {
28 | return UnsafeIndexOf1(chars, length, start, toMatch[0], new Vector(toMatch[0]));
29 | }
30 |
31 | fixed (char* toMatchp = toMatch)
32 | {
33 | var first = new Vector((ushort)toMatchp[0]);
34 | int lastOffset = toMatch.Length - 1;
35 | var last = new Vector((ushort)toMatchp[lastOffset]);
36 |
37 | int i = start;
38 | int lastVec = length - vecUshortSize - lastOffset;
39 | for (; i <= lastVec; i += vecUshortSize)
40 | {
41 | var vecFirst = Unsafe.Read>(chars + i);
42 | var vecLast = Unsafe.Read>(chars + i + lastOffset);
43 |
44 | var eqFirst = Vector.Equals(vecFirst, first);
45 | var eqLast = Vector.Equals(vecLast, last);
46 |
47 | var mask = Vector.BitwiseAnd(eqFirst, eqLast);
48 |
49 | if (!Vector.EqualsAll(mask, Vector.Zero))
50 | {
51 | for (int j = 0; j < vecUshortSize; ++j)
52 | {
53 | if (mask[j] != 0)
54 | {
55 | var ij = i + j;
56 | for (int k = 0; k <= lastOffset; ++k)
57 | {
58 | if (chars[ij + k] != toMatchp[k])
59 | goto MATCH_FAIL;
60 | }
61 | return ij;
62 | }
63 | MATCH_FAIL:;
64 | }
65 | }
66 | }
67 | for (; i < length; ++i)
68 | {
69 | for (int k = 0; k <= lastOffset; ++k)
70 | {
71 | if (chars[i + k] != toMatchp[k])
72 | goto REMAINDER_MATCH_FAIL;
73 | }
74 | return i;
75 | REMAINDER_MATCH_FAIL:;
76 | }
77 | return -1;
78 | }
79 | }
80 |
81 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
82 | internal unsafe static int UnsafeIndexOf(char* chars, int length, int start, BooleanDecisionTree toMatch, Vector[] toMatchVecs)
83 | {
84 | //System.Diagnostics.Debug.Assert(Vector.IsHardwareAccelerated);
85 | fixed (bool* toMatch_precomputed = toMatch.precomputed)
86 | {
87 | int i = start;
88 | int lastVec = length - vecUshortSize;
89 | int toMatch_precomputed_length = toMatch.precomputed.Length;
90 | int toMatchVecs_Length = toMatchVecs.Length;
91 | for (; i <= lastVec; i += vecUshortSize)
92 | {
93 | var vec = Unsafe.Read>(chars + i);
94 | for (int k = 0; k < toMatchVecs_Length; k++)
95 | {
96 | var searchVec = toMatchVecs[k];
97 | if (Vector.EqualsAny(vec, searchVec))
98 | {
99 | for (int j = 0; j < vecUshortSize; ++j)
100 | {
101 | int ij = i + j;
102 | var c = chars[ij];
103 | if (c < toMatch_precomputed_length ? toMatch_precomputed[c] : toMatch.bst.Find(c) == 1)
104 | return ij;
105 | }
106 | }
107 | }
108 | }
109 | for (; i < length; ++i)
110 | {
111 | if (toMatch.Contains(chars[i])) return i;
112 | }
113 | return -1;
114 | }
115 | }
116 |
117 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
118 | internal unsafe static int UnsafeIndexOf(char* chars, int length, int start, BooleanDecisionTree toMatch, Vector toMatchVec)
119 | {
120 | //System.Diagnostics.Debug.Assert(Vector.IsHardwareAccelerated);
121 | fixed (bool* toMatch_precomputed = toMatch.precomputed)
122 | {
123 | int i = start;
124 | int lastVec = length - vecUshortSize;
125 | int toMatch_precomputed_length = toMatch.precomputed.Length;
126 | for (; i <= lastVec; i += vecUshortSize)
127 | {
128 | var vec = Unsafe.Read>(chars + i);
129 | var searchVec = toMatchVec;
130 | if (Vector.EqualsAny(vec, searchVec))
131 | {
132 | for (int j = 0; j < vecUshortSize; ++j)
133 | {
134 | int ij = i + j;
135 | var c = chars[ij];
136 | if (c < toMatch_precomputed_length ? toMatch_precomputed[c] : toMatch.bst.Find(c) == 1)
137 | return ij;
138 | }
139 | }
140 | }
141 | for (; i < length; ++i)
142 | {
143 | if (toMatch.Contains(chars[i])) return i;
144 | }
145 | return -1;
146 | }
147 | }
148 |
149 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
150 | internal unsafe static int UnsafeIndexOf1(char* chars, int length, int start, ushort toMatch, Vector toMatchVec)
151 | {
152 | //System.Diagnostics.Debug.Assert(Vector.IsHardwareAccelerated);
153 | int i = start;
154 | int lastVec = length - vecUshortSize;
155 | for (; i <= lastVec; i += vecUshortSize)
156 | {
157 | var vec = Unsafe.Read>(chars + i);
158 | if (Vector.EqualsAny(vec, toMatchVec))
159 | {
160 | for (int j = 0; j < vecUshortSize; ++j)
161 | {
162 | int ij = i + j;
163 | if (toMatch == chars[ij]) return ij;
164 | }
165 | }
166 | }
167 | for (; i < length; ++i)
168 | {
169 | if (toMatch == chars[i]) return i;
170 | }
171 | return -1;
172 | }
173 |
174 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
175 | internal unsafe static int UnsafeIndexOfByte(byte[] input, int start, byte[] toMatch)
176 | {
177 | var toMatchVecs = toMatch.Select(x => new Vector(x)).ToArray();
178 | fixed (byte* bytes = input)
179 | {
180 | var length = input.Length;
181 | int i = start;
182 | int lastVec = length - vecByteSize;
183 | for (; i <= lastVec; i += vecByteSize)
184 | {
185 | var vec = Unsafe.Read>(bytes + i);
186 | foreach (var searchVec in toMatchVecs)
187 | {
188 | if (Vector.EqualsAny(vec, searchVec))
189 | {
190 | for (int j = 0; j < vecUshortSize; ++j)
191 | {
192 | if (toMatch.Contains(input[i + j])) return i + j;
193 | }
194 | }
195 | }
196 | }
197 | for (; i < input.Length; ++i)
198 | {
199 | if (toMatch.Contains(input[i])) return i;
200 | }
201 | return -1;
202 | }
203 | }
204 |
205 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
206 | internal unsafe static int UnsafeIndexOfByte(byte[] input, int i, byte toMatch, Vector toMatchVec)
207 | {
208 | var length = input.Length;
209 | int lastVec = length - vecByteSize;
210 | fixed (byte* bytes = input)
211 | {
212 | for (; i <= lastVec; i += vecByteSize)
213 | {
214 | var vec = Unsafe.Read>(bytes + i);
215 | if (Vector.EqualsAny(vec, toMatchVec))
216 | {
217 | return Array.IndexOf(input, toMatch, i);
218 | }
219 | }
220 | return Array.IndexOf(input, toMatch, i);
221 | }
222 | }
223 |
224 | #endif
225 |
226 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
227 | public static int IndexOfByte(byte[] input, int i, byte toMatch, Vector toMatchVec)
228 | {
229 | int lastVec = input.Length - vecByteSize;
230 | while (i <= lastVec && !Vector.EqualsAny(new Vector(input, i), toMatchVec))
231 | i += vecByteSize;
232 | return Array.IndexOf(input, toMatch, i);
233 | }
234 |
235 | [MethodImpl(MethodImplOptions.AggressiveInlining)]
236 | public static int IndexOfByteSeq(byte[] input, int i, byte[] seqToMatch, Vector firstToMatchVec)
237 | {
238 | int length = input.Length;
239 | int lastVec = length - vecByteSize;
240 | byte firstToMatch = seqToMatch[0];
241 | int seqToMatch_length = seqToMatch.Length;
242 | while (i <= lastVec)
243 | {
244 | if (Vector.EqualsAny(new Vector(input, i), firstToMatchVec))
245 | {
246 | i = Array.IndexOf(input, firstToMatch, i);
247 | if (i + seqToMatch_length > length)
248 | return -1;
249 | int j = 1;
250 | while (j < seqToMatch_length && input[i + j] == seqToMatch[j])
251 | j += 1;
252 | if (j == seqToMatch_length)
253 | return i;
254 | else
255 | {
256 | i += 1;
257 | }
258 | }
259 | else
260 | {
261 | i += vecByteSize;
262 | }
263 | }
264 | i = Array.IndexOf(input, firstToMatch, i);
265 | if (i + seqToMatch_length > length)
266 | return -1;
267 | int j1 = 1;
268 | while (j1 < seqToMatch_length && input[i + j1] == seqToMatch[j1])
269 | j1 += 1;
270 | if (j1 == seqToMatch_length)
271 | return i;
272 | else
273 | return -1;
274 | }
275 | }
276 | }
277 |
--------------------------------------------------------------------------------
/srm/parser/RegexBoyerMoore.cs:
--------------------------------------------------------------------------------
1 | //------------------------------------------------------------------------------
2 | //
3 | // Copyright (c) Microsoft Corporation. All rights reserved.
4 | //
5 | //------------------------------------------------------------------------------
6 |
7 | // The RegexBoyerMoore object precomputes the Boyer-Moore
8 | // tables for fast string scanning. These tables allow
9 | // you to scan for the first occurance of a string within
10 | // a large body of text without examining every character.
11 | // The performance of the heuristic depends on the actual
12 | // string and the text being searched, but usually, the longer
13 | // the string that is being searched for, the fewer characters
14 | // need to be examined.
15 |
16 | namespace System.Text.RegularExpressions
17 | {
18 |
19 | using System.Collections;
20 | using System.Diagnostics;
21 | using System.Globalization;
22 |
23 | internal sealed class RegexBoyerMoore {
24 | internal int[] _positive;
25 | internal int[] _negativeASCII;
26 | internal int[][] _negativeUnicode;
27 | internal String _pattern;
28 | internal int _lowASCII;
29 | internal int _highASCII;
30 | internal bool _rightToLeft;
31 | internal bool _caseInsensitive;
32 | internal CultureInfo _culture;
33 |
34 | internal const int infinite = 0x7FFFFFFF;
35 |
36 | /*
37 | * Constructs a Boyer-Moore state machine for searching for the string
38 | * pattern. The string must not be zero-length.
39 | */
40 | internal RegexBoyerMoore(String pattern, bool caseInsensitive, bool rightToLeft, CultureInfo culture) {
41 | /*
42 | * Sorry, you just can't use Boyer-Moore to find an empty pattern.
43 | * We're doing this for your own protection. (Really, for speed.)
44 | */
45 | Debug.Assert(pattern.Length != 0, "RegexBoyerMoore called with an empty string. This is bad for perf");
46 |
47 | int beforefirst;
48 | int last;
49 | int bump;
50 | int examine;
51 | int scan;
52 | int match;
53 | char ch;
54 |
55 | // We do the ToLower character by character for consistency. With surrogate chars, doing
56 | // a ToLower on the entire string could actually change the surrogate pair. This is more correct
57 | // linguistically, but since Regex doesn't support surrogates, it's more important to be
58 | // consistent.
59 | if (caseInsensitive) {
60 | StringBuilder sb = new StringBuilder(pattern.Length);
61 | for (int i=0; i This algorithm appears to be a simplified variant of the
90 | * standard Boyer-Moore good suffix calculation. It could
91 | * be one of D.M. Sunday's variations, but I have not found which one.
92 | *
93 | * Maybe someday rewrite this with the real Boyer-Moore algorithm and split it
94 | * out into a separate piece of code in the BCL.
95 | *
96 | */
97 | _positive = new int[pattern.Length];
98 |
99 | examine = last;
100 | ch = pattern[examine];
101 | _positive[examine] = bump;
102 | examine -= bump;
103 |
104 | for (;;) {
105 | // find an internal char (examine) that matches the tail
106 |
107 | for (;;) {
108 | if (examine == beforefirst)
109 | goto OuterloopBreak;
110 | if (pattern[examine] == ch)
111 | break;
112 | examine -= bump;
113 | }
114 |
115 | match = last;
116 | scan = examine;
117 |
118 | // find the length of the match
119 |
120 | for (;;) {
121 | if (scan == beforefirst || pattern[match] != pattern[scan]) {
122 | // at the end of the match, note the difference in _positive
123 | // this is not the length of the match, but the distance from the internal match
124 | // to the tail suffix.
125 | if (_positive[match] == 0)
126 | _positive[match] = match - scan;
127 |
128 | // System.Diagnostics.Debug.WriteLine("Set positive[" + match + "] to " + (match - scan));
129 |
130 | break;
131 | }
132 |
133 | scan -= bump;
134 | match -= bump;
135 | }
136 |
137 | examine -= bump;
138 | }
139 |
140 | OuterloopBreak:
141 |
142 | match = last - bump;
143 |
144 | // scan for the chars for which there are no shifts that yield a different candidate
145 |
146 | /*
147 | * The inside of the if statement used to say
148 | * "_positive[match] = last - beforefirst;"
149 | * I've changed it to the below code. This
150 | * is slightly less agressive in how much we skip, but at worst it
151 | * should mean a little more work rather than skipping a potential
152 | * match.
153 | *
154 | */
155 | while (match != beforefirst) {
156 | if (_positive[match] == 0)
157 | _positive[match] = bump;
158 |
159 | match -= bump;
160 | }
161 |
162 | //System.Diagnostics.Debug.WriteLine("good suffix shift table:");
163 | //for (int i=0; i<_positive.Length; i++)
164 | // System.Diagnostics.Debug.WriteLine("\t_positive[" + i + "] = " + _positive[i]);
165 |
166 |
167 | /*
168 | * PART II - the bad-character shift table
169 | *
170 | * compute the negative requirement:
171 | * if char "ch" is the reject character when testing position "i",
172 | * we can slide up by _negative[ch];
173 | * (_negative[ch] = str.Length - 1 - str.LastIndexOf(ch))
174 | *
175 | * the lookup table is divided into ASCII and Unicode portions;
176 | * only those parts of the Unicode 16-bit code set that actually
177 | * appear in the string are in the table. (Maximum size with
178 | * Unicode is 65K; ASCII only case is 512 bytes.)
179 | */
180 |
181 | _negativeASCII = new int[128];
182 |
183 | for (int i = 0; i < 128; i++)
184 | _negativeASCII[i] = last - beforefirst;
185 |
186 | _lowASCII = 127;
187 | _highASCII = 0;
188 |
189 | for (examine = last; examine != beforefirst; examine -= bump) {
190 | ch = pattern[examine];
191 |
192 | if (ch < 128) {
193 | if (_lowASCII > ch)
194 | _lowASCII = ch;
195 |
196 | if (_highASCII < ch)
197 | _highASCII = ch;
198 |
199 | if (_negativeASCII[ch] == last - beforefirst)
200 | _negativeASCII[ch] = last - examine;
201 | }
202 | else {
203 | int i = ch >> 8;
204 | int j = ch & 0xFF;
205 |
206 | if (_negativeUnicode == null) {
207 | _negativeUnicode = new int[256][];
208 | }
209 |
210 | if (_negativeUnicode[i] == null) {
211 | int[] newarray = new int[256];
212 |
213 | for (int k = 0; k < 256; k++)
214 | newarray[k] = last - beforefirst;
215 |
216 | if (i == 0) {
217 | System.Array.Copy(_negativeASCII, newarray, 128);
218 | _negativeASCII = newarray;
219 | }
220 |
221 | _negativeUnicode[i] = newarray;
222 | }
223 |
224 | if (_negativeUnicode[i][j] == last - beforefirst)
225 | _negativeUnicode[i][j] = last - examine;
226 | }
227 | }
228 | }
229 |
230 | private bool MatchPattern(string text, int index) {
231 | if (_caseInsensitive) {
232 | if( text.Length - index < _pattern.Length) {
233 | return false;
234 | }
235 |
236 | TextInfo textinfo = _culture.TextInfo;
237 | for( int i = 0; i < _pattern.Length; i++) {
238 | Debug.Assert(textinfo.ToLower(_pattern[i]) == _pattern[i], "pattern should be converted to lower case in constructor!");
239 | if( textinfo.ToLower(text[index + i]) != _pattern[i]) {
240 | return false;
241 | }
242 | }
243 | return true;
244 | }
245 | else {
246 | return(0 == String.CompareOrdinal(_pattern, 0, text, index, _pattern.Length));
247 | }
248 | }
249 |
250 | /*
251 | * When a regex is anchored, we can do a quick IsMatch test instead of a Scan
252 | */
253 | internal bool IsMatch(String text, int index, int beglimit, int endlimit) {
254 |
255 | if (!_rightToLeft) {
256 | if (index < beglimit || endlimit - index < _pattern.Length)
257 | return false;
258 |
259 | return MatchPattern(text, index);
260 | }
261 | else {
262 | if (index > endlimit || index - beglimit < _pattern.Length)
263 | return false;
264 |
265 | return MatchPattern(text, index - _pattern.Length);
266 | }
267 | }
268 |
269 |
270 | /*
271 | * Scan uses the Boyer-Moore algorithm to find the first occurrance
272 | * of the specified string within text, beginning at index, and
273 | * constrained within beglimit and endlimit.
274 | *
275 | * The direction and case-sensitivity of the match is determined
276 | * by the arguments to the RegexBoyerMoore constructor.
277 | */
278 | internal int Scan(String text, int index, int beglimit, int endlimit) {
279 | int test;
280 | int test2;
281 | int match;
282 | int startmatch;
283 | int endmatch;
284 | int advance;
285 | int defadv;
286 | int bump;
287 | char chMatch;
288 | char chTest;
289 | int[] unicodeLookup;
290 |
291 | if (!_rightToLeft) {
292 | defadv = _pattern.Length;
293 | startmatch = _pattern.Length - 1;
294 | endmatch = 0;
295 | test = index + defadv - 1;
296 | bump = 1;
297 | }
298 | else {
299 | defadv = -_pattern.Length;
300 | startmatch = 0;
301 | endmatch = -defadv - 1;
302 | test = index + defadv;
303 | bump = -1;
304 | }
305 |
306 | chMatch = _pattern[startmatch];
307 |
308 | for (;;) {
309 | if (test >= endlimit || test < beglimit)
310 | return -1;
311 |
312 | chTest = text[test];
313 |
314 | if (_caseInsensitive)
315 | chTest = Char.ToLower(chTest, _culture);
316 |
317 | if (chTest != chMatch) {
318 | if (chTest < 128)
319 | advance = _negativeASCII[chTest];
320 | else if (null != _negativeUnicode && (null != (unicodeLookup = _negativeUnicode[chTest >> 8])))
321 | advance = unicodeLookup[chTest & 0xFF];
322 | else
323 | advance = defadv;
324 |
325 | test += advance;
326 | }
327 | else { // if (chTest == chMatch)
328 | test2 = test;
329 | match = startmatch;
330 |
331 | for (;;) {
332 | if (match == endmatch)
333 | return(_rightToLeft ? test2 + 1 : test2);
334 |
335 | match -= bump;
336 | test2 -= bump;
337 |
338 | chTest = text[test2];
339 |
340 | if (_caseInsensitive)
341 | chTest = Char.ToLower(chTest, _culture);
342 |
343 | if (chTest != _pattern[match]) {
344 | advance = _positive[match];
345 | if ((chTest & 0xFF80) == 0)
346 | test2 = (match - startmatch) + _negativeASCII[chTest];
347 | else if (null != _negativeUnicode && (null != (unicodeLookup = _negativeUnicode[chTest >> 8])))
348 | test2 = (match - startmatch) + unicodeLookup[chTest & 0xFF];
349 | else {
350 | test += advance;
351 | break;
352 | }
353 |
354 | if (_rightToLeft ? test2 < advance : test2 > advance)
355 | advance = test2;
356 |
357 | test += advance;
358 | break;
359 | }
360 | }
361 | }
362 | }
363 | }
364 |
365 | /*
366 | * Used when dumping for debugging.
367 | */
368 | public override String ToString() {
369 | return _pattern;
370 | }
371 |
372 | #if DBG
373 | public String Dump(String indent) {
374 | StringBuilder sb = new StringBuilder();
375 |
376 | sb.Append(indent + "BM Pattern: " + _pattern + "\n");
377 | sb.Append(indent + "Positive: ");
378 | for (int i = 0; i < _positive.Length; i++) {
379 | sb.Append(_positive[i].ToString(CultureInfo.InvariantCulture) + " ");
380 | }
381 | sb.Append("\n");
382 |
383 | if (_negativeASCII != null) {
384 | sb.Append(indent + "Negative table\n");
385 | for (int i = 0; i < _negativeASCII.Length; i++) {
386 | if (_negativeASCII[i] != _pattern.Length) {
387 | sb.Append(indent + " " + Regex.Escape(Convert.ToString((char)i, CultureInfo.InvariantCulture)) + " " + _negativeASCII[i].ToString(CultureInfo.InvariantCulture) + "\n");
388 | }
389 | }
390 | }
391 |
392 | return sb.ToString();
393 | }
394 | #endif
395 | }
396 | }
397 |
--------------------------------------------------------------------------------
/srm/parser/RegexReplacement.cs:
--------------------------------------------------------------------------------
1 | //------------------------------------------------------------------------------
2 | //
3 | // Copyright (c) Microsoft Corporation. All rights reserved.
4 | //
5 | //------------------------------------------------------------------------------
6 |
7 | // The RegexReplacement class represents a substitution string for
8 | // use when using regexs to search/replace, etc. It's logically
9 | // a sequence intermixed (1) constant strings and (2) group numbers.
10 |
11 | namespace System.Text.RegularExpressions {
12 |
13 | using System.Collections;
14 | using System.Collections.Generic;
15 |
16 | internal sealed class RegexReplacement {
17 | /*
18 | * Since RegexReplacement shares the same parser as Regex,
19 | * the constructor takes a RegexNode which is a concatenation
20 | * of constant strings and backreferences.
21 | */
22 | #if SILVERLIGHT
23 | internal RegexReplacement(String rep, RegexNode concat, Dictionary _caps) {
24 | #else
25 | internal RegexReplacement(String rep, RegexNode concat, Hashtable _caps) {
26 | #endif
27 | StringBuilder sb;
28 | List strings;
29 | List rules;
30 | int slot;
31 |
32 | _rep = rep;
33 |
34 | if (concat.Type() != RegexNode.Concatenate)
35 | throw new ArgumentException(SR.GetString(SR.ReplacementError));
36 |
37 | sb = new StringBuilder();
38 | strings = new List();
39 | rules = new List();
40 |
41 | for (int i = 0; i < concat.ChildCount(); i++) {
42 | RegexNode child = concat.Child(i);
43 |
44 | switch (child.Type()) {
45 | case RegexNode.Multi:
46 | sb.Append(child._str);
47 | break;
48 | case RegexNode.One:
49 | sb.Append(child._ch);
50 | break;
51 | case RegexNode.Ref:
52 | if (sb.Length > 0) {
53 | rules.Add(strings.Count);
54 | strings.Add(sb.ToString());
55 | sb.Length = 0;
56 | }
57 | slot = child._m;
58 |
59 | if (_caps != null && slot >= 0)
60 | slot = (int)_caps[slot];
61 |
62 | rules.Add(-Specials - 1 - slot);
63 | break;
64 | default:
65 | throw new ArgumentException(SR.GetString(SR.ReplacementError));
66 | }
67 | }
68 |
69 | if (sb.Length > 0) {
70 | rules.Add(strings.Count);
71 | strings.Add(sb.ToString());
72 | }
73 |
74 | _strings = strings;
75 | _rules = rules;
76 | }
77 |
78 | internal String _rep;
79 | internal List _strings; // table of string constants
80 | internal List _rules; // negative -> group #, positive -> string #
81 |
82 | // constants for special insertion patterns
83 |
84 | internal const int Specials = 4;
85 | internal const int LeftPortion = -1;
86 | internal const int RightPortion = -2;
87 | internal const int LastGroup = -3;
88 | internal const int WholeString = -4;
89 |
90 | /*
91 | * The original pattern string
92 | */
93 | internal String Pattern {
94 | get {
95 | return _rep;
96 | }
97 | }
98 | }
99 |
100 | }
101 |
--------------------------------------------------------------------------------
/srm/parser/RegexTree.cs:
--------------------------------------------------------------------------------
1 | //------------------------------------------------------------------------------
2 | //
3 | // Copyright (c) Microsoft Corporation. All rights reserved.
4 | //
5 | //------------------------------------------------------------------------------
6 |
7 | // RegexTree is just a wrapper for a node tree with some
8 | // global information attached.
9 |
10 | namespace System.Text.RegularExpressions {
11 |
12 | using System.Collections;
13 | using System.Collections.Generic;
14 |
15 | // PIETER: made public instead of internal for
16 | // direct access from Automata.Tests
17 | public sealed class RegexTree {
18 | #if SILVERLIGHT
19 | internal RegexTree(RegexNode root, Dictionary caps, Int32[] capnumlist, int captop, Dictionary capnames, String[] capslist, RegexOptions opts)
20 | #else
21 | internal RegexTree(RegexNode root, Hashtable caps, Int32[] capnumlist, int captop, Hashtable capnames, String[] capslist, RegexOptions opts)
22 | #endif
23 |
24 | {
25 | _root = root;
26 | _caps = caps;
27 | _capnumlist = capnumlist;
28 | _capnames = capnames;
29 | _capslist = capslist;
30 | _captop = captop;
31 | _options = opts;
32 | }
33 |
34 | internal RegexNode _root;
35 | #if SILVERLIGHT
36 | internal Dictionary _caps;
37 | #else
38 | internal Hashtable _caps;
39 | #endif
40 | internal Int32[] _capnumlist;
41 | #if SILVERLIGHT
42 | internal Dictionary _capnames;
43 | #else
44 | internal Hashtable _capnames;
45 | #endif
46 | internal String[] _capslist;
47 | internal RegexOptions _options;
48 | internal int _captop;
49 |
50 | #if DBG
51 | internal void Dump() {
52 | _root.Dump();
53 | }
54 |
55 | internal bool Debug {
56 | get {
57 | return(_options & RegexOptions.Debug) != 0;
58 | }
59 | }
60 | #endif
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/srm/parser/SR.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Text;
4 |
5 | namespace System.Text.RegularExpressions
6 | {
7 | static class SR
8 | {
9 | public static string GetString(string s, params object[] o)
10 | {
11 | return s;
12 | }
13 |
14 | public const string ReplacementError = "ReplacementError";
15 | public const string UnexpectedOpcode = "UnexpectedOpcode";
16 | public const string TooManyParens = "TooManyParens";
17 | public const string NestedQuantify = "NestedQuantify";
18 | public const string QuantifyAfterNothing = "QuantifyAfterNothing";
19 | public const string InternalError = "InternalError";
20 | public const string IllegalRange = "IllegalRange";
21 | public const string NotEnoughParens = "NotEnoughParens";
22 | public const string BadClassInCharRange = "BadClassInCharRange";
23 | public const string SubtractionMustBeLast = "SubtractionMustBeLast";
24 | public const string ReversedCharRange = "ReversedCharRange";
25 | public const string UnterminatedBracket = "UnterminatedBracket";
26 | public const string InvalidGroupName = "InvalidGroupName";
27 | public const string CapnumNotZero = "CapnumNotZero";
28 | public const string UndefinedBackref = "UndefinedBackref";
29 | public const string MalformedReference = "MalformedReference";
30 | public const string AlternationCantHaveComment = "AlternationCantHaveComment";
31 | public const string AlternationCantCapture = "AlternationCantCapture";
32 | public const string UnrecognizedGrouping = "UnrecognizedGrouping";
33 | public const string IllegalEndEscape = "IllegalEndEscape";
34 | public const string CaptureGroupOutOfRange = "CaptureGroupOutOfRange";
35 | public const string TooFewHex = "TooFewHex";
36 | public const string MissingControl = "MissingControl";
37 | public const string UnrecognizedControl = "UnrecognizedControl";
38 | public const string UnrecognizedEscape = "UnrecognizedEscape";
39 | public const string IncompleteSlashP = "IncompleteSlashP";
40 | public const string MalformedSlashP = "MalformedSlashP";
41 | public const string IllegalCondition = "IllegalCondition";
42 | public const string TooManyAlternates = "TooManyAlternates";
43 | public const string MakeException = "MakeException";
44 | public const string UndefinedNameRef = "UndefinedNameRef";
45 | public const string UndefinedReference = "UndefinedReference";
46 | public const string UnterminatedComment = "UnterminatedComment";
47 | public const string MalformedNameRef = "MalformedNameRef";
48 | public const string UnknownProperty = "UnknownProperty";
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/srm/printing/RegexCharSetPrinter.cs:
--------------------------------------------------------------------------------
1 | using System.Text;
2 |
3 | namespace Microsoft.SRM
4 | {
5 | internal static class RegexCharSetPrinter
6 | {
7 | internal static string ToRegexCharSet(BDD label, IUnicodeCategoryTheory categorizer, CharSetSolver solver)
8 | {
9 | if (categorizer.CategoryCondition(8) == label)
10 | return @"\d";
11 | if (solver.MkNot(categorizer.CategoryCondition(8)) == label)
12 | return @"\D";
13 | if (categorizer.WordLetterCondition == label)
14 | return @"\w";
15 | if (solver.MkNot(categorizer.WordLetterCondition) == label)
16 | return @"\W";
17 | if (categorizer.WhiteSpaceCondition == label)
18 | return @"\s";
19 | if (solver.MkNot(categorizer.WhiteSpaceCondition) == label)
20 | return @"\S";
21 | for (int i = 0; i < categorizer.UnicodeCategoryStandardAbbreviations.Length; i++)
22 | if (categorizer.CategoryCondition(i) == label)
23 | {
24 | return @"\P{" + categorizer.UnicodeCategoryStandardAbbreviations[i] + "}";
25 | }
26 |
27 | var ranges = solver.ToRanges(label);
28 | if (ranges.Length == 1 && ranges[0].Item1 == ranges[0].Item2)
29 | {
30 | return StringUtility.Escape((char)ranges[0].Item1);
31 | }
32 |
33 | var res = new StringBuilder("[");
34 | for (int i = 0; i < ranges.Length; i++ )
35 | {
36 | var range = ranges[i];
37 | if (range.Item1 == range.Item2)
38 | res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
39 | else if (range.Item1 == range.Item2 - 1)
40 | {
41 | res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
42 | res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2));
43 | }
44 | else
45 | {
46 | res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item1));
47 | res.Append("-");
48 | res.Append(StringUtility.EscapeWithNumericSpace((char)range.Item2));
49 | }
50 | }
51 | res.Append("]");
52 | return res.ToString();
53 | }
54 | }
55 | }
--------------------------------------------------------------------------------
/srm/srm.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netstandard2.0
5 | true
6 | UNSAFE
7 | Microsoft.Automata.SRM
8 | Microsoft
9 | MIT
10 | © Microsoft Corporation. All rights reserved.
11 | https://github.com/AutomataDotNet/srm
12 | High-performance .NET regex engine with predictable performance
13 | regular expression regex matching unicode
14 | icon.png
15 |
16 |
17 |
18 | true
19 | true
20 | ../scripts/35MSSharedLib1024.snk
21 | ../scripts/35MSSharedLib1024.snk
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/srm/unicode/IgnoreCaseRelationGenerator.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Text;
4 | using System.Globalization;
5 | using System.IO;
6 |
7 | namespace Microsoft.SRM
8 | {
9 | public static class IgnoreCaseRelationGenerator
10 | {
11 | public static void Generate(string namespacename, string classname, string path)
12 | {
13 | if (classname == null)
14 | throw new ArgumentNullException("classname");
15 | if (path == null)
16 | throw new ArgumentNullException("path");
17 |
18 | if (path != "" && !path.EndsWith("/"))
19 | path = path + "/";
20 |
21 | string version = System.Environment.Version.ToString();
22 |
23 | string prefix = @"///
24 | /// Automatically generated by IgnoreCaseRelationGenerator for System.Environment.Version = " + version + @"
25 | ///
26 | namespace " + namespacename + @"
27 | {
28 | internal static class " + classname + @"
29 | {";
30 |
31 | string suffix = @"}
32 | }
33 | ";
34 | FileInfo fi = new FileInfo(string.Format("{1}{0}.cs", classname, path));
35 | if (fi.Exists)
36 | fi.IsReadOnly = false;
37 | StreamWriter sw = new StreamWriter(string.Format("{1}{0}.cs", classname, path));
38 | sw.WriteLine(prefix);
39 |
40 | CreateUlongArray(sw);
41 | //CreateStringArray(sw);
42 |
43 | sw.WriteLine(suffix);
44 | sw.Close();
45 | }
46 |
47 | private static void CreateUlongArray(StreamWriter sw)
48 | {
49 | sw.WriteLine("/// ");
50 | sw.WriteLine("/// Serialized BDD for mapping characters to their case-ignoring equivalence classes.");
51 | sw.WriteLine("/// ");
52 | sw.WriteLine("public static ulong[] ignorecase = new ulong[]{");
53 | CharSetSolver solver = new CharSetSolver();
54 |
55 | Dictionary ignoreCase = ComputeIgnoreCaseDistionary(solver);
56 |
57 | BDD ignorecase = solver.False;
58 | foreach (var kv in ignoreCase)
59 | {
60 | var a = solver.MkCharSetFromRange(kv.Key, kv.Key);
61 | var b = kv.Value;
62 | ignorecase = ignorecase | (a << 16) & b;
63 | }
64 | var ignorecaseArray = solver.Serialize(ignorecase);
65 | for (int i = 0; i < ignorecaseArray.Length; i++)
66 | sw.WriteLine("0x{0:X16},", ignorecaseArray[i]);
67 |
68 | sw.WriteLine("};"); //end of array
69 | }
70 |
71 | private static Dictionary ComputeIgnoreCaseDistionary(CharSetSolver solver)
72 | {
73 | var ignoreCase = new Dictionary();
74 | for (uint i = 0; i <= 0xFFFF; i++)
75 | {
76 | char c = (char)i;
77 | char cU = char.ToUpper(c); // (char.IsLetter(char.ToUpper(c)) ? char.ToUpper(c) : c);
78 | char cL = char.ToLower(c); // (char.IsLetter(char.ToLower(c)) ? char.ToLower(c) : c);
79 | if (c != cU || c != cL || cU != cL)
80 | {
81 | //make sure that the regex engine considers c as being equivalent to cU and cL, else ignore c
82 | //in some cases c != cU but the regex engine does not consider the chacarters equivalent wrt the ignore-case option.
83 | //These characters are:
84 | //c=\xB5,cU=\u039C
85 | //c=\u0131,cU=I
86 | //c=\u017F,cU=S
87 | //c=\u0345,cU=\u0399
88 | //c=\u03C2,cU=\u03A3
89 | //c=\u03D0,cU=\u0392
90 | //c=\u03D1,cU=\u0398
91 | //c=\u03D5,cU=\u03A6
92 | //c=\u03D6,cU=\u03A0
93 | //c=\u03F0,cU=\u039A
94 | //c=\u03F1,cU=\u03A1
95 | //c=\u03F5,cU=\u0395
96 | //c=\u1E9B,cU=\u1E60
97 | //c=\u1FBE,cU=\u0399
98 | if (System.Text.RegularExpressions.Regex.IsMatch(cU.ToString() + cL.ToString(), "^(?i:" + StringUtility.Escape(c) + ")+$"))
99 | {
100 | BDD equiv = solver.False;
101 |
102 | if (ignoreCase.ContainsKey(c))
103 | equiv = equiv | ignoreCase[c];
104 | if (ignoreCase.ContainsKey(cU))
105 | equiv = equiv | ignoreCase[cU];
106 | if (ignoreCase.ContainsKey(cL))
107 | equiv = equiv | ignoreCase[cL];
108 |
109 | equiv = equiv | solver.MkCharSetFromRange(c, c) | solver.MkCharSetFromRange(cU, cU) | solver.MkCharSetFromRange(cL, cL);
110 |
111 | foreach (char d in solver.GenerateAllCharacters(equiv))
112 | ignoreCase[d] = equiv;
113 | }
114 | //else
115 | //{
116 | // outp += "c=" + StringUtility.Escape(c) + "," + "cU=" + StringUtility.Escape(cU);
117 | // Console.WriteLine("c=" + StringUtility.Escape(c) + "," + "cL=" + StringUtility.Escape(cL) + "," + "cU=" + StringUtility.Escape(cU));
118 | //}
119 | }
120 | }
121 | return ignoreCase;
122 | }
123 |
124 | private static void CreateStringArray(StreamWriter sw)
125 | {
126 | sw.WriteLine("/// ");
127 | sw.WriteLine("/// Each string correponds to an equivalence class of characters when case is ignored.");
128 | sw.WriteLine("/// ");
129 | sw.WriteLine("public static string[] ignorecase = new string[]{");
130 | CharSetSolver solver = new CharSetSolver();
131 |
132 | Dictionary ignoreCase = ComputeIgnoreCaseDistionary(solver);
133 |
134 | HashSet done = new HashSet();
135 | foreach (var kv in ignoreCase)
136 | if (done.Add(kv.Value))
137 | {
138 | var ranges = solver.ToRanges(kv.Value);
139 | List s = new List();
140 | for (int i = 0; i < ranges.Length; i++)
141 | {
142 | var l = (int)ranges[i].Item1;
143 | var h = (int)ranges[i].Item2;
144 | for (int j = l; j <= h; j++)
145 | s.Add((char)j);
146 | }
147 | var str = StringUtility.Escape(new String(s.ToArray()));
148 | sw.WriteLine(@"{0},", str);
149 | }
150 | sw.WriteLine("};"); //end of array
151 | }
152 | };
153 | }
154 |
155 |
156 |
--------------------------------------------------------------------------------
/srm/unicode/IgnoreCaseTransformer.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace Microsoft.SRM
8 | {
9 | internal class IgnoreCaseTransformer
10 | {
11 | BDD IgnoreCaseRel;
12 | BDD domain;
13 | CharSetSolver solver;
14 |
15 | public IgnoreCaseTransformer(CharSetSolver charSetSolver)
16 | {
17 | this.solver = charSetSolver;
18 | IgnoreCaseRel = charSetSolver.Deserialize(Microsoft.SRM.Generated.IgnoreCaseRelation.ignorecase);
19 | domain = IgnoreCaseRel >> 16;
20 | }
21 |
22 | ///
23 | /// For all letters in the bdd add their lower and upper case equivalents.
24 | ///
25 | public BDD Apply(BDD bdd)
26 | {
27 | if ((domain & bdd).IsEmpty)
28 | return bdd;
29 | else
30 | {
31 | var ignorecase = (bdd & IgnoreCaseRel) >> 16;
32 | var res = ignorecase | bdd;
33 | return res;
34 | }
35 | }
36 |
37 | public bool IsInDomain(char c)
38 | {
39 | BDD c_bdd = solver.MkCharConstraint(c);
40 | if ((c_bdd & domain).IsEmpty)
41 | return false;
42 | else
43 | return true;
44 | }
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/srm/unicode/UnicodeCategoryRangesGenerator.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Text;
4 | using System.Globalization;
5 | using System.IO;
6 |
7 | namespace Microsoft.SRM
8 | {
9 | ///
10 | /// Utility for generating unicode category ranges and corresponing binary decision diagrams
11 | ///
12 | public static class UnicodeCategoryRangesGenerator
13 | {
14 | ///
15 | /// Create a file classname.cs in the directory path.
16 | /// The file contains the static class with name classname and has namespace namespacename.
17 | /// The class has static fields that map unicode categories to their character ranges
18 | /// and provide whitespace ranges.
19 | /// The fields are for the ASCII case (7 bits), CP437 case (8 bits) and for UTF16 (16 bits).
20 | /// Overwrites an existing file, even if the existing file is write protected.
21 | ///
22 | /// namespace for the class
23 | /// name of the class
24 | /// path where the file is written
25 | public static void Generate(string namespacename, string classname, string path)
26 | {
27 | if (classname == null)
28 | throw new ArgumentNullException("classname");
29 | if (path == null)
30 | throw new ArgumentNullException("path");
31 |
32 | if (path != "" && !path.EndsWith("/"))
33 | path = path + "/";
34 |
35 | string version = System.Environment.Version.ToString();
36 |
37 | string prefix = @"///
38 | /// Automatically generated by UnicodeCategoryRangesGenerator for System.Environment.Version = " + version + @"
39 | ///
40 | namespace " + namespacename + @"
41 | {
42 | internal static class " + classname + @"
43 | {";
44 |
45 | string suffix = @"}
46 | }
47 | ";
48 | FileInfo fi = new FileInfo(string.Format("{1}{0}.cs", classname, path));
49 | if (fi.Exists)
50 | fi.IsReadOnly = false;
51 | StreamWriter sw = new StreamWriter(string.Format("{1}{0}.cs", classname, path));
52 | sw.WriteLine(prefix);
53 |
54 | sw.WriteLine("#region ASCII");
55 | WriteRangeFields(BitWidth.BV7, sw, "ASCII");
56 | sw.WriteLine("#endregion");
57 | sw.WriteLine();
58 |
59 | sw.WriteLine("#region CP437");
60 | WriteRangeFields(BitWidth.BV8, sw, "CP437");
61 | sw.WriteLine("#endregion");
62 | sw.WriteLine();
63 |
64 | sw.WriteLine("#region Unicode (UTF16)");
65 | WriteRangeFields(BitWidth.BV16, sw, "Unicode");
66 | sw.WriteLine("#endregion");
67 | sw.WriteLine();
68 |
69 | sw.WriteLine(suffix);
70 | sw.Close();
71 | }
72 |
73 | private static void WriteRangeFields(BitWidth encoding, StreamWriter sw, string field)
74 | {
75 | int bits = (int)encoding;
76 | int maxChar = (1 << bits) - 1;
77 | var catMap = new Dictionary();
78 | for (int c = 0; c < 30; c++)
79 | catMap[(UnicodeCategory)c] = new Ranges();
80 | Ranges whitespace = new Ranges();
81 | Ranges wordcharacter = new Ranges();
82 | for (int i = 0; i <= maxChar; i++)
83 | {
84 | char ch = (char)i;
85 | if (char.IsWhiteSpace(ch))
86 | whitespace.Add(i);
87 | UnicodeCategory cat = char.GetUnicodeCategory(ch);
88 | catMap[cat].Add(i);
89 | int catCode = (int)cat;
90 | //in .NET 3.5
91 | if (bits == 7)
92 | if (catCode == 0 || catCode == 1 || catCode == 2 || catCode == 3 || catCode == 4 || catCode == 5 || catCode == 8 || catCode == 18)
93 | wordcharacter.Add(i);
94 | }
95 | //generate bdd reprs for each of the category ranges
96 | BDD[] catBDDs = new BDD[30];
97 | CharSetSolver bddb = new CharSetSolver(encoding);
98 | for (int c = 0; c < 30; c++)
99 | catBDDs[c] = bddb.MkBddForIntRanges(catMap[(UnicodeCategory)c].ranges);
100 |
101 | BDD whitespaceBdd = bddb.MkBddForIntRanges(whitespace.ranges);
102 |
103 | //in .NET 3.5 category 5 was NOT a word character
104 | //union of categories 0,1,2,3,4,8,18
105 | BDD wordCharBdd = bddb.MkOr(catBDDs[0],
106 | bddb.MkOr(catBDDs[1],
107 | bddb.MkOr(catBDDs[2],
108 | bddb.MkOr(catBDDs[3],
109 | bddb.MkOr(catBDDs[4],
110 | bddb.MkOr(catBDDs[5],
111 | bddb.MkOr(catBDDs[8], catBDDs[18])))))));
112 | if (bits == 7)
113 | {
114 | sw.WriteLine(@"///
115 | /// Array of 30 UnicodeCategory ranges. Each entry is a pair of integers.
116 | /// corresponding to the lower and upper bounds of the unicodes of the characters
117 | /// that have the given UnicodeCategory code (between 0 and 29).
118 | /// ");
119 | sw.WriteLine("public static int[][][] " + field + " = new int[][][]{");
120 | foreach (UnicodeCategory c in catMap.Keys)
121 | {
122 | sw.WriteLine("//{0}({1}):", c, (int)c);
123 | if (catMap[c].Count == 0)
124 | sw.WriteLine("null,");
125 | else
126 | {
127 | sw.WriteLine("new int[][]{");
128 | foreach (int[] range in catMap[c].ranges)
129 | sw.WriteLine(" new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},");
130 | sw.WriteLine("},");
131 | }
132 | }
133 | sw.WriteLine("};");
134 | }
135 |
136 | sw.WriteLine(@"///
137 | /// Compact BDD encodings of the categories.
138 | /// ");
139 | sw.WriteLine("public static int[][] " + field + "Bdd = new int[][]{");
140 | foreach (UnicodeCategory c in catMap.Keys)
141 | {
142 | sw.WriteLine("//{0}({1}):", c, (int)c);
143 | BDD catBdd = catBDDs[(int)c];
144 | if (catBdd == null || catBdd.IsEmpty)
145 | sw.WriteLine("null, //false");
146 | else if (catBdd.IsFull)
147 | sw.WriteLine("new int[]{0,0}, //true");
148 | else
149 | {
150 | sw.WriteLine("new int[]{");
151 | foreach (var arc in bddb.SerializeCompact(catBdd))
152 | sw.WriteLine("{0},", arc);
153 | sw.WriteLine("},");
154 | }
155 | }
156 | sw.WriteLine("};");
157 |
158 | if (bits == 7)
159 | {
160 | sw.WriteLine(@"///
161 | /// Whitespace character ranges.
162 | /// ");
163 | sw.WriteLine("public static int[][] " + field + "Whitespace = new int[][]{");
164 | foreach (int[] range in whitespace.ranges)
165 | sw.WriteLine(" new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},");
166 | sw.WriteLine("};");
167 |
168 | sw.WriteLine(@"///
169 | /// Word character ranges.
170 | /// ");
171 | sw.WriteLine("public static int[][] " + field + "WordCharacter = new int[][]{");
172 | foreach (int[] range in wordcharacter.ranges)
173 | sw.WriteLine(" new int[]{" + string.Format("{0},{1}", range[0], range[1]) + "},");
174 | sw.WriteLine("};");
175 | }
176 |
177 | sw.WriteLine(@"///
178 | /// Compact BDD encoding of the whitespace characters.
179 | /// ");
180 | sw.WriteLine("public static int[] " + field + "WhitespaceBdd = new int[]{");
181 | foreach (var arc in bddb.SerializeCompact(whitespaceBdd))
182 | sw.WriteLine("{0},", arc);
183 | sw.WriteLine("};");
184 |
185 | sw.WriteLine(@"///
186 | /// Compact BDD encoding of word characters
187 | /// ");
188 | sw.WriteLine("public static int[] " + field + "WordCharacterBdd = new int[]{");
189 | foreach (var arc in bddb.SerializeCompact(wordCharBdd))
190 | sw.WriteLine("{0},", arc);
191 | sw.WriteLine("};");
192 | }
193 | }
194 |
195 | ///
196 | /// Used internally for creating a collection of ranges for serialization.
197 | ///
198 | internal class Ranges
199 | {
200 | public List ranges = new List();
201 | public Ranges()
202 | {
203 | }
204 | public void Add(int n)
205 | {
206 | for (int i = 0; i < ranges.Count; i++)
207 | {
208 | if (ranges[i][1] == (n - 1))
209 | {
210 | ranges[i][1] = n;
211 | return;
212 | }
213 | }
214 | ranges.Add(new int[] { n, n });
215 | }
216 |
217 | public int Count
218 | {
219 | get { return ranges.Count; }
220 | }
221 | }
222 | }
223 |
--------------------------------------------------------------------------------
/srm/unicode/UnicodeCategoryTheory.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using Microsoft.SRM.Generated;
6 |
7 | namespace Microsoft.SRM
8 | {
9 | ///
10 | /// Maps unicode categories to correspoing character predicates.
11 | ///
12 | /// predicates
13 | public interface IUnicodeCategoryTheory
14 | {
15 | ///
16 | /// Gets the unicode category condition for unicode category cat, that must be an integer between 0 and 29
17 | ///
18 | PRED CategoryCondition(int cat);
19 |
20 | ///
21 | /// Gets the white space condition
22 | ///
23 | PRED WhiteSpaceCondition { get; }
24 |
25 | ///
26 | /// Gets the word letter (\w) condition
27 | ///
28 | PRED WordLetterCondition { get; }
29 |
30 | string[] UnicodeCategoryStandardAbbreviations { get; }
31 | }
32 |
33 | internal class UnicodeCategoryTheory : IUnicodeCategoryTheory
34 | {
35 | ICharAlgebra solver;
36 | PRED[] catConditions = new PRED[30];
37 | PRED whiteSpaceCondition = default(PRED);
38 | PRED wordLetterCondition = default(PRED);
39 |
40 | public string[] UnicodeCategoryStandardAbbreviations
41 | {
42 | get
43 | {
44 | return unicodeCategoryStandardAbbreviations;
45 | }
46 | }
47 |
48 | #region unicode category abbreviations
49 | public static string[] unicodeCategoryStandardAbbreviations = new string[30]{
50 | "Lu", //0: UppercaseLetter
51 | "Ll", //1: LowercaseLetter
52 | "Lt", //2: TitlecaseLetter
53 | "Lm", //3: ModifierLetter
54 | "Lo", //4: OtherLetter
55 | "Mn", //5: NonSpacingMark
56 | "Mc", //6: SpacingCombiningMark
57 | "Me", //7: EnclosingMark
58 | "Nd", //8: DecimalDigitNumber
59 | "Nl", //9: LetterNumber
60 | "No", //10: OtherNumber
61 | "Zs", //11: SpaceSeparator
62 | "Zl", //12: LineSeparator
63 | "Zp", //13: ParagraphSeparator
64 | "Cc", //14: Control
65 | "Cf", //15: Format
66 | "Cs", //16: Surrogate
67 | "Co", //17: PrivateUse
68 | "Pc", //18: ConnectorPunctuation
69 | "Pd", //19: DashPunctuation
70 | "Ps", //20: OpenPunctuation
71 | "Pe", //21: ClosePunctuation
72 | "Pi", //22: InitialQuotePunctuation
73 | "Pf", //23: FinalQuotePunctuation
74 | "Po", //24: OtherPunctuation
75 | "Sm", //25: MathSymbol
76 | "Sc", //26: CurrencySymbol
77 | "Sk", //27: ModifierSymbol
78 | "So", //28: OtherSymbol
79 | "Cn", //29: OtherNotAssigned
80 | };
81 | #endregion
82 |
83 | public static string UnicodeCategoryPredicateName(int cat)
84 | {
85 | string catName = ((System.Globalization.UnicodeCategory)cat).ToString();
86 | return "Is" + catName;
87 | }
88 |
89 | public UnicodeCategoryTheory(ICharAlgebra solver)
90 | {
91 | this.solver = solver;
92 | InitializeUnicodeCategoryDefinitions();
93 | }
94 |
95 | PRED MkRangesConstraint(IEnumerable ranges)
96 | {
97 | PRED res = solver.False;
98 | foreach (var range in ranges)
99 | res = solver.MkOr(res, solver.MkRangeConstraint((char)range[0], (char)range[1]));
100 | return res;
101 | }
102 |
103 | private void InitializeUnicodeCategoryDefinitions()
104 | {
105 | if (solver.Encoding == BitWidth.BV7)
106 | {
107 | //use ranges directly
108 | for (int i = 0; i < 30; i++)
109 | if (UnicodeCategoryRanges.ASCII[i] == null)
110 | catConditions[i] = solver.False;
111 | else
112 | catConditions[i] = solver.MkCharPredicate(
113 | UnicodeCategoryPredicateName(i), MkRangesConstraint(UnicodeCategoryRanges.ASCII[i]));
114 |
115 | whiteSpaceCondition = solver.MkCharPredicate(
116 | "IsWhitespace", MkRangesConstraint(UnicodeCategoryRanges.ASCIIWhitespace));
117 | wordLetterCondition = solver.MkCharPredicate(
118 | "IsWordletter", MkRangesConstraint(UnicodeCategoryRanges.ASCIIWordCharacter));
119 | }
120 | else if (solver.Encoding == BitWidth.BV8)
121 | {
122 | //use BDDs
123 | for (int i = 0; i < 30; i++)
124 | if (UnicodeCategoryRanges.CP437Bdd[i] == null)
125 | catConditions[i] = solver.False;
126 | else
127 | catConditions[i] = solver.MkCharPredicate(
128 | UnicodeCategoryPredicateName(i),
129 | solver.ConvertFromCharSet(solver.CharSetProvider.DeserializeCompact(UnicodeCategoryRanges.CP437Bdd[i])));
130 | whiteSpaceCondition = solver.MkCharPredicate("IsWhitespace",
131 | solver.ConvertFromCharSet(solver.CharSetProvider.DeserializeCompact(UnicodeCategoryRanges.CP437WhitespaceBdd)));
132 | wordLetterCondition = solver.MkCharPredicate("IsWordletter",
133 | solver.ConvertFromCharSet(solver.CharSetProvider.DeserializeCompact(UnicodeCategoryRanges.CP437WordCharacterBdd)));
134 | }
135 | else
136 | {
137 | //use BDDs
138 | for (int i = 0; i < 30; i++)
139 | catConditions[i] = solver.MkCharPredicate(
140 | UnicodeCategoryPredicateName(i),
141 | solver.ConvertFromCharSet(solver.CharSetProvider.DeserializeCompact(UnicodeCategoryRanges.UnicodeBdd[i])));
142 | whiteSpaceCondition = solver.MkCharPredicate("IsWhitespace",
143 | solver.ConvertFromCharSet(solver.CharSetProvider.DeserializeCompact(UnicodeCategoryRanges.UnicodeWhitespaceBdd)));
144 | wordLetterCondition = solver.MkCharPredicate("IsWordletter",
145 | solver.ConvertFromCharSet(solver.CharSetProvider.DeserializeCompact(UnicodeCategoryRanges.UnicodeWordCharacterBdd)));
146 | }
147 | }
148 |
149 | #region IUnicodeCategoryTheory Members
150 |
151 | public PRED CategoryCondition(int i)
152 | {
153 | if (object.Equals(catConditions[i], default(PRED))) //uninitialized
154 | {
155 | if (solver.Encoding == BitWidth.BV7)
156 | {
157 | if (UnicodeCategoryRanges.ASCII[i] == null)
158 | catConditions[i] = solver.False;
159 | else
160 | catConditions[i] = solver.MkCharPredicate(
161 | UnicodeCategoryPredicateName(i), MkRangesConstraint(UnicodeCategoryRanges.ASCII[i]));
162 | }
163 | else if (solver.Encoding == BitWidth.BV8)
164 | {
165 | //use BDDs
166 | if (UnicodeCategoryRanges.CP437Bdd[i] == null)
167 | catConditions[i] = solver.False;
168 | else
169 | catConditions[i] = solver.MkCharPredicate(
170 | UnicodeCategoryPredicateName(i),
171 | solver.ConvertFromCharSet(solver.CharSetProvider.DeserializeCompact(UnicodeCategoryRanges.CP437Bdd[i])));
172 | }
173 | else
174 | {
175 | catConditions[i] = solver.MkCharPredicate(
176 | UnicodeCategoryPredicateName(i),
177 | solver.ConvertFromCharSet(solver.CharSetProvider.DeserializeCompact(UnicodeCategoryRanges.UnicodeBdd[i])));
178 | }
179 | }
180 | return catConditions[i];
181 | }
182 |
183 | public PRED WhiteSpaceCondition
184 | {
185 | get {
186 | if (object.Equals(whiteSpaceCondition, default(PRED)))
187 | {
188 | if (solver.Encoding == BitWidth.BV7)
189 | {
190 | whiteSpaceCondition = solver.MkCharPredicate(
191 | "IsWhitespace", MkRangesConstraint(UnicodeCategoryRanges.ASCIIWhitespace));
192 | }
193 | else if (solver.Encoding == BitWidth.BV8)
194 | {
195 | //use BDDs
196 | whiteSpaceCondition = solver.MkCharPredicate("IsWhitespace",
197 | solver.ConvertFromCharSet(solver.CharSetProvider.DeserializeCompact(UnicodeCategoryRanges.CP437WhitespaceBdd)));
198 | }
199 | else
200 | {
201 | //use BDDs
202 | whiteSpaceCondition = solver.MkCharPredicate("IsWhitespace",
203 | solver.ConvertFromCharSet(solver.CharSetProvider.DeserializeCompact(UnicodeCategoryRanges.UnicodeWhitespaceBdd)));
204 | }
205 | }
206 | return whiteSpaceCondition;
207 | }
208 | }
209 |
210 | public PRED WordLetterCondition
211 | {
212 | get {
213 | if (object.Equals(wordLetterCondition, default(PRED)))
214 | {
215 | if (solver.Encoding == BitWidth.BV7)
216 | {
217 | wordLetterCondition = solver.MkCharPredicate(
218 | "IsWordletter", MkRangesConstraint(UnicodeCategoryRanges.ASCIIWordCharacter));
219 | }
220 | else if (solver.Encoding == BitWidth.BV8)
221 | {
222 | //use BDDs
223 | wordLetterCondition = solver.MkCharPredicate("IsWordletter",
224 | solver.ConvertFromCharSet(solver.CharSetProvider.DeserializeCompact(UnicodeCategoryRanges.CP437WordCharacterBdd)));
225 | }
226 | else
227 | {
228 | //use BDDs
229 | wordLetterCondition = solver.MkCharPredicate("IsWordletter",
230 | solver.ConvertFromCharSet(solver.CharSetProvider.DeserializeCompact(UnicodeCategoryRanges.UnicodeWordCharacterBdd)));
231 | }
232 | }
233 | return wordLetterCondition;
234 | }
235 | }
236 |
237 | #endregion
238 | }
239 | }
240 |
--------------------------------------------------------------------------------
/srm/utils/StringUtility.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 |
6 | namespace Microsoft.SRM
7 | {
8 | ///
9 | /// Provides some character escaping routines for strings.
10 | ///
11 | internal static class StringUtility
12 | {
13 | #region Escaping strings
14 | /////
15 | /////
16 | /////
17 |
18 | ///
19 | /// Make an escaped string from a character.
20 | ///
21 | /// given character
22 | /// if true then use numeric hexadecimal escaping of all characters
23 | ///
24 | public static string Escape(char c, bool useNumericRepresentationOnly = false)
25 | {
26 | int code = (int)c;
27 |
28 | if (useNumericRepresentationOnly)
29 | {
30 | if (code <= 0xF)
31 | return string.Format("\\x0{0:X}", code);
32 | else if (code <= 0xFF)
33 | return string.Format("\\x{0:X}", code);
34 | else if (code <= 0xFFF)
35 | return string.Format("\\u0{0:X}", code);
36 | else
37 | return string.Format("\\u{0:X}", code);
38 | }
39 |
40 | if (code > 255)
41 | return ToUnicodeRepr(code);
42 |
43 | if (code <= 255 && code > 126)
44 | return string.Format("\\x{0:X}", code);
45 |
46 | switch (c)
47 | {
48 | case '\0':
49 | return @"\0";
50 | //case '\a':
51 | // return @"\a";
52 | //case '\b':
53 | // return @"\b";
54 | //case '\t':
55 | // return @"\t";
56 | //case '\r':
57 | // return @"\r";
58 | //case '\v':
59 | // return @"\v";
60 | //case '\f':
61 | // return @"\f";
62 | case '\n':
63 | return @"\n";
64 | case '=':
65 | return "=";
66 | case ';':
67 | return ";";
68 | case '/':
69 | return "/";
70 | case '!':
71 | return "!";
72 | //case '>':
73 | // return ">";
74 | //case '\"':
75 | // return "\\\"";
76 | //case '\'':
77 | // return "\\\'";
78 | //case ' ':
79 | // return " ";
80 | //case '\\' :
81 | // return @"\\";
82 | default:
83 | if (code <= 15)
84 | {
85 | return string.Format("\\x0{0:X}", code);
86 | }
87 | else if (!(((int)'a') <= code && code <= ((int)'z'))
88 | && !(((int)'A') <= code && code <= ((int)'Z'))
89 | && !(((int)'0') <= code && code <= ((int)'9')))
90 | {
91 | return string.Format("\\x{0:X}", code);
92 | }
93 | else
94 | return c.ToString();
95 | }
96 | }
97 |
98 | ///
99 | /// Make an escaped string from a character
100 | ///
101 | internal static string EscapeWithNumericSpace(char c)
102 | {
103 | int code = (int)c;
104 | if (code == 32)
105 | return string.Format("\\x{0:X}", code);
106 | else
107 | return Escape(c);
108 | }
109 |
110 | static string ToUnicodeRepr(int i)
111 | {
112 | string s = string.Format("{0:X}", i);
113 | if (s.Length == 1)
114 | s = "\\u000" + s;
115 | else if (s.Length == 2)
116 | s = "\\u00" + s;
117 | else if (s.Length == 3)
118 | s = "\\u0" + s;
119 | else
120 | s = "\\u" + s;
121 | return s;
122 | }
123 |
124 | ///
125 | /// Makes an escaped string from a literal string s.
126 | /// Appends '\"' at the start and end of the encoded string.
127 | ///
128 | public static string Escape(string s)
129 | {
130 | StringBuilder sb = new StringBuilder();
131 | sb.Append("\"");
132 | foreach (char c in s)
133 | {
134 | sb.Append(Escape(c));
135 | }
136 | sb.Append("\"");
137 | return sb.ToString();
138 | }
139 |
140 | ///
141 | /// Unescapes any escaped characters in in the input string.
142 | /// (Same as System.Text.RegularExpressions.Regex.Unescape)
143 | ///
144 | public static string Unescape(string s)
145 | {
146 | return System.Text.RegularExpressions.Regex.Unescape(s);
147 | }
148 | #endregion
149 |
150 | internal static string SerializeStringToCharCodeSequence(string s)
151 | {
152 | if (string.IsNullOrEmpty(s))
153 | return s;
154 | var encodedChars = Array.ConvertAll(s.ToCharArray(), c => ((int)c).ToString());
155 | var serialized = string.Join(",", encodedChars);
156 | return serialized;
157 | }
158 |
159 | internal static string DeserializeStringFromCharCodeSequence(string s)
160 | {
161 | if (string.IsNullOrEmpty(s))
162 | return s;
163 | var encodedChars = s.Split(',');
164 | var deserialized = new String(Array.ConvertAll(encodedChars, x => (char)(int.Parse(x))));
165 | return deserialized;
166 | }
167 | }
168 | }
169 |
--------------------------------------------------------------------------------
/tests/SerializationTests.cs:
--------------------------------------------------------------------------------
1 |
2 | using System.Runtime.Serialization.Formatters.Binary;
3 | using System.Text.RegularExpressions;
4 | using Microsoft.VisualStudio.TestTools.UnitTesting;
5 |
6 | namespace Microsoft.SRM
7 | {
8 | [TestClass]
9 | public class RegexMatcherTests
10 | {
11 | [TestMethod]
12 | public void TestSRM()
13 | {
14 | var sr = new Microsoft.SRM.Regex(@"a[^ab]+b");
15 | var input = "xaTAG1bxaTAG2bc";
16 | var matches = sr.Matches(input);
17 | Assert.IsTrue(matches.Count == 2);
18 | Assert.IsTrue(matches[0].Index == 1);
19 | Assert.IsTrue(matches[0].Length == 6);
20 | Assert.IsTrue(matches[1].Index == 8);
21 | Assert.IsTrue(matches[1].Length == 6);
22 | sr.Serialize("tag.bin");
23 | var sr2 = Microsoft.SRM.Regex.Deserialize("tag.bin");
24 | var matches2 = sr2.Matches(input);
25 | CollectionAssert.AreEqual(matches, matches2);
26 | }
27 |
28 | [TestMethod]
29 | public void TestSRM_singlePass()
30 | {
31 | var sr = new Microsoft.SRM.Regex(@"abcbc1|cbc2");
32 | var input = "xxxabcbc1yyyccbc2xxx";
33 | var matches = sr.Matches(input);
34 | Assert.IsTrue(matches.Count == 2);
35 | Assert.IsTrue(matches[0].Index == 3);
36 | Assert.IsTrue(matches[0].Length == 6);
37 | Assert.IsTrue(matches[1].Index == 13);
38 | Assert.IsTrue(matches[1].Length == 4);
39 | sr.Serialize("tag.bin");
40 | var sr2 = Microsoft.SRM.Regex.Deserialize("tag.bin");
41 | var matches2 = sr2.Matches(input);
42 | CollectionAssert.AreEqual(matches, matches2);
43 | }
44 |
45 | [TestMethod]
46 | public void TestSRM_singletonSeq()
47 | {
48 | var sr = new Microsoft.SRM.Regex(@"a[bB]c");
49 | var input = "xxxabcyyyaBcxxx";
50 | var matches = sr.Matches(input);
51 | Assert.IsTrue(matches.Count == 2);
52 | Assert.IsTrue(matches[0].Index == 3);
53 | Assert.IsTrue(matches[0].Length == 3);
54 | Assert.IsTrue(matches[1].Index == 9);
55 | Assert.IsTrue(matches[1].Length == 3);
56 | sr.Serialize("tag.bin");
57 | var sr2 = Microsoft.SRM.Regex.Deserialize("tag.bin");
58 | var matches2 = sr2.Matches(input);
59 | CollectionAssert.AreEqual(matches, matches2);
60 | }
61 | }
62 | }
--------------------------------------------------------------------------------
/tests/tests.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netcoreapp3.1
5 | false
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/unicode_table_gen/Program.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 | using System.Text;
5 | using System.Threading.Tasks;
6 |
7 | namespace Microsoft.SRM
8 | {
9 | class Program
10 | {
11 | static int Main(string[] args)
12 | {
13 | if (args.Length != 1)
14 | {
15 | System.Console.WriteLine("usage: unicode_table_gen ");
16 | return 1;
17 | }
18 | string targetDirectory = args[0];
19 | UnicodeCategoryRangesGenerator.Generate("Microsoft.SRM.Generated", "UnicodeCategoryRanges", targetDirectory);
20 | IgnoreCaseRelationGenerator.Generate("Microsoft.SRM.Generated", "IgnoreCaseRelation", targetDirectory);
21 | return 0;
22 | }
23 | }
24 | }
25 |
--------------------------------------------------------------------------------
/unicode_table_gen/unicode_table_gen.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Exe
5 | netcoreapp3.1
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------