├── Maybe
├── assets
│ └── maybe-icon.png
├── CountMinSketch
│ ├── IncompatibleMergeException.cs
│ ├── CountMinSketchBase.cs
│ └── CountMinSketch.cs
├── BloomFilter
│ ├── IBloomFilter.cs
│ ├── ScalableBloomFilter.cs
│ ├── BloomFilter.cs
│ ├── BloomFilterBase.cs
│ └── CountingBloomFilter.cs
├── Utilities
│ ├── ByteConverter.cs
│ └── MurmurHash3.cs
├── SkipList
│ ├── Node.cs
│ └── SkipList.cs
└── Maybe.csproj
├── appveyor.yml
├── Maybe.Test
├── Maybe.Test.csproj
├── Utilities
│ └── ByteConverterTests.cs
├── BloomFilter
│ ├── ScalableBloomFilterTests.cs
│ ├── CountingBloomFilterTests.cs
│ └── BloomFilterTests.cs
├── SkipList
│ ├── NodeTests.cs
│ └── SkipListTests.cs
└── CountMinSketch
│ └── CountMinSketchTests.cs
├── LICENSE
├── Maybe.sln
├── README.md
└── .gitignore
/Maybe/assets/maybe-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmc00/Maybe/HEAD/Maybe/assets/maybe-icon.png
--------------------------------------------------------------------------------
/Maybe/CountMinSketch/IncompatibleMergeException.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace Maybe.CountMinSketch
4 | {
5 | ///
6 | /// Represent an error encountered when merging
7 | ///
8 | public class IncompatibleMergeException : Exception
9 | {
10 | ///
11 | /// Creates a new instance of this exception with a custom error message
12 | ///
13 | /// The message to be set on the exception
14 | public IncompatibleMergeException(string message) : base(message) { }
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
1 | version: 1.0.{build}
2 | configuration: Release
3 | dotnet_csproj:
4 | patch: true
5 | file: Maybe\Maybe.csproj
6 | version: '{version}'
7 | package_version: '{version}'
8 | assembly_version: '{version}'
9 | file_version: '{version}'
10 | informational_version: '{version}'
11 | build_script:
12 | - ps: .\build.ps1
13 | artifacts:
14 | - path: '**\Maybe*.nupkg'
15 | name: nuget-push
16 | deploy:
17 | provider: NuGet
18 | name: NuGet
19 | on:
20 | branch: master
21 | api_key:
22 | secure: N4s21dksdls5sci+5lmxc4ef6h8qQ6u1knvqGSLwSEXXXTGhjrGNbXGrHP/To6TR
23 | skip_symbols: true
24 | artifact: /Maybe.*\.nupkg/
25 |
--------------------------------------------------------------------------------
/Maybe/BloomFilter/IBloomFilter.cs:
--------------------------------------------------------------------------------
1 | namespace Maybe.BloomFilter
2 | {
3 | ///
4 | /// Generic bloom filter interface to describe basic operations for any type of bloom filter.
5 | ///
6 | ///
7 | public interface IBloomFilter
8 | {
9 | ///
10 | /// Adds an item to the bloom filter
11 | ///
12 | /// The item which should be added
13 | void Add(T item);
14 |
15 | ///
16 | /// Checks if this bloom filter currently contains an item
17 | ///
18 | /// The item for which to search in the bloom filter
19 | /// False if the item is NOT in the bloom filter. True if the item MIGHT be in the bloom filter.
20 | bool Contains(T item);
21 | }
22 | }
23 |
--------------------------------------------------------------------------------
/Maybe.Test/Maybe.Test.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | netcoreapp2.0
4 | true
5 | false
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | all
15 | runtime; build; native; contentfiles; analyzers
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/Maybe/Utilities/ByteConverter.cs:
--------------------------------------------------------------------------------
1 | using System.IO;
2 |
3 | namespace Maybe.Utilities
4 | {
5 | ///
6 | /// Helper class to abstract serializing objects to bytes.
7 | ///
8 | public static class ByteConverter
9 | {
10 | ///
11 | /// Given a serializable object, returns the binary serialized representation of that object.
12 | ///
13 | /// The input to be serialized
14 | /// Binary serialized representation of the input item.
15 | public static byte[] ConvertToByteArray(object item)
16 | {
17 | if (item == null)
18 | {
19 | return null;
20 | }
21 |
22 | var formatter = new System.Runtime.Serialization.Formatters.Binary.BinaryFormatter();
23 | using (var stream = new MemoryStream())
24 | {
25 | formatter.Serialize(stream, item);
26 | return stream.ToArray();
27 | }
28 | }
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Ryan McCoy
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Maybe.Test/Utilities/ByteConverterTests.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Runtime.Serialization;
3 | using Maybe.Utilities;
4 | using Xunit;
5 |
6 | namespace Maybe.Test.Utilities
7 | {
8 | public class ByteConverterTests
9 | {
10 | [Fact]
11 | [Trait("Category", "Unit")]
12 | public void ConvertToByteArray_WithNull_ShouldReturnNull()
13 | {
14 | var bytes = ByteConverter.ConvertToByteArray(null);
15 | Assert.Null(bytes);
16 | }
17 |
18 | [Fact]
19 | [Trait("Category", "Unit")]
20 | public void ConvertToByteArray_WithNonSerializableInput_ShouldThrowException()
21 | {
22 | var test = new DontSerializeMe();
23 | Assert.Throws(() => ByteConverter.ConvertToByteArray(test));
24 | }
25 |
26 | [Fact]
27 | [Trait("Category", "Unit")]
28 | public void ConvertToByteArray_WithSerializableInput_ShouldReturnBytes()
29 | {
30 | var test = new SerializeMe();
31 | var bytes = ByteConverter.ConvertToByteArray(test);
32 | Assert.NotNull(bytes);
33 | }
34 |
35 | private class DontSerializeMe
36 | {
37 | public string Test;
38 | public int Data;
39 | }
40 |
41 | [Serializable]
42 | private class SerializeMe
43 | {
44 | public string MoreTest;
45 | public string MoreData;
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/Maybe.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio 15
4 | VisualStudioVersion = 15.0.27130.2027
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Maybe", "Maybe\Maybe.csproj", "{18F63560-39BA-405E-B799-2C6AA5B3AA62}"
7 | EndProject
8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Maybe.Test", "Maybe.Test\Maybe.Test.csproj", "{E987735C-5184-4014-860E-725D72721ECC}"
9 | EndProject
10 | Global
11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 | Debug|Any CPU = Debug|Any CPU
13 | Release|Any CPU = Release|Any CPU
14 | EndGlobalSection
15 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
16 | {18F63560-39BA-405E-B799-2C6AA5B3AA62}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
17 | {18F63560-39BA-405E-B799-2C6AA5B3AA62}.Debug|Any CPU.Build.0 = Debug|Any CPU
18 | {18F63560-39BA-405E-B799-2C6AA5B3AA62}.Release|Any CPU.ActiveCfg = Release|Any CPU
19 | {18F63560-39BA-405E-B799-2C6AA5B3AA62}.Release|Any CPU.Build.0 = Release|Any CPU
20 | {E987735C-5184-4014-860E-725D72721ECC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
21 | {E987735C-5184-4014-860E-725D72721ECC}.Debug|Any CPU.Build.0 = Debug|Any CPU
22 | {E987735C-5184-4014-860E-725D72721ECC}.Release|Any CPU.ActiveCfg = Release|Any CPU
23 | {E987735C-5184-4014-860E-725D72721ECC}.Release|Any CPU.Build.0 = Release|Any CPU
24 | EndGlobalSection
25 | GlobalSection(SolutionProperties) = preSolution
26 | HideSolutionNode = FALSE
27 | EndGlobalSection
28 | GlobalSection(ExtensibilityGlobals) = postSolution
29 | SolutionGuid = {3372518D-BB6E-4B5E-A5F9-26AB9CBF2DE2}
30 | EndGlobalSection
31 | EndGlobal
32 |
--------------------------------------------------------------------------------
/Maybe/SkipList/Node.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace Maybe.SkipList
4 | {
5 | ///
6 | /// Represents a single node on a SkipList -- Contains a value and a set of follow up nodes at various levels.
7 | ///
8 | ///
9 | [Serializable]
10 | public class Node
11 | {
12 | ///
13 | /// Nodes that follow this current node at a given level (where the array index is the level)
14 | ///
15 | public Node[] Next { get; }
16 |
17 | ///
18 | /// The value of this node.
19 | ///
20 | public T Value { get; }
21 |
22 | ///
23 | /// Creates a new instance of this node.
24 | ///
25 | /// The value of the node.
26 | /// The level where the node is stored in the tree
27 | public Node(T value, int level)
28 | {
29 | if(level < 0) { throw new ArgumentException("Level must be >= 0!", nameof(level)); }
30 | Value = value;
31 | Next = new Node[level];
32 | }
33 |
34 | ///
35 | /// Checks if this node has a link to the next node at a given level
36 | ///
37 | /// The level of link to search for
38 | /// True if the node has a link to another node at that level. Otherwise false.
39 | public bool HasNextAtLevel(int level)
40 | {
41 | if (level < 0) { throw new ArgumentException("Level must be >= 0!", nameof(level)); }
42 | return level < Next.Length && Next[level] != null;
43 | }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/Maybe.Test/BloomFilter/ScalableBloomFilterTests.cs:
--------------------------------------------------------------------------------
1 | using Maybe.BloomFilter;
2 | using System.IO;
3 | using System.Runtime.Serialization;
4 | using System.Runtime.Serialization.Formatters.Binary;
5 | using Xunit;
6 |
7 | namespace Maybe.Test.BloomFilter
8 | {
9 | public class ScalableBloomFilterTests
10 | {
11 | [Fact]
12 | [Trait("Category", "Unit")]
13 | public void Contains_WhenItemHasBeenAdded_ShouldReturnTrue()
14 | {
15 | var filter = new ScalableBloomFilter(0.02);
16 | filter.Add(42);
17 | Assert.True(filter.Contains(42));
18 | }
19 |
20 | [Fact]
21 | [Trait("Category", "Unit")]
22 | public void Contains_WithFreshFilter_ShouldReturnFalse()
23 | {
24 | var filter = new ScalableBloomFilter(0.02);
25 | Assert.False(filter.Contains(42));
26 | }
27 |
28 | [Fact]
29 | [Trait("Category", "Unit")]
30 | public void NumberFilters_WithThreeTimesFirstCapacity_ShouldBeTwo()
31 | {
32 | var filter = new ScalableBloomFilter(0.02);
33 | for (var i = 0; i < 3*ScalableBloomFilter.MinimumCapacity; i++)
34 | {
35 | filter.Add(i);
36 | }
37 | Assert.Equal(2, filter.NumberFilters);
38 | }
39 |
40 | [Fact]
41 | [Trait("Category", "Unit")]
42 | public void Contains_WhenItemHasBeenAdded_AndFilterHasBeenSerializedAndUnserialized_ShouldReturnTrue()
43 | {
44 | using (var stream = new MemoryStream())
45 | {
46 | var filterOld = new ScalableBloomFilter(0.02);
47 | filterOld.Add(42);
48 | IFormatter formatter = new BinaryFormatter();
49 | formatter.Serialize(stream, filterOld);
50 | stream.Flush();
51 | stream.Position = 0;
52 | ScalableBloomFilter filterNew = (ScalableBloomFilter)formatter.Deserialize(stream);
53 | Assert.True(filterNew.Contains(42));
54 | }
55 | }
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/Maybe/Maybe.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | netstandard2.0
5 | true
6 | true
7 | 1.0.80
8 | Ryan McCoy
9 | Ryan McCoy
10 | Maybe.NET
11 | Maybe.NET
12 | Maybe.NET is a library of probabilistic data structures. Instantly start using bloom filters, skip lists, count min sketch, and more!
13 | Copyright 2016
14 | https://github.com/rmc00/Maybe/blob/master/LICENSE
15 | https://github.com/rmc00/Maybe
16 | https://github.com/rmc00/Maybe/raw/master/Maybe/assets/maybe-icon.png
17 | https://github.com/rmc00/Maybe.git
18 | Git
19 | bloom filter count-min sketch skiplist probabilistic netstandard20
20 | New support for count min sketch!
21 |
22 |
23 |
24 | TRACE
25 | True
26 |
27 |
28 |
29 |
30 |
31 | true
32 |
33 | bin\Debug\netstandard2.0\Maybe.xml
34 |
35 |
36 |
37 | bin\Release\netstandard2.0\Maybe.xml
38 |
39 |
40 |
41 |
42 | 1.3.0
43 |
44 |
45 |
46 |
47 |
48 | 4.3.0
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
--------------------------------------------------------------------------------
/Maybe/BloomFilter/ScalableBloomFilter.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Linq;
4 |
5 | namespace Maybe.BloomFilter
6 | {
7 | ///
8 | /// Represents a composite bloom filter, which will create many internal bloom filters to hold more values without increasing expected error rate.
9 | ///
10 | [Serializable]
11 | public class ScalableBloomFilter : IBloomFilter
12 | {
13 | ///
14 | /// The minimum number of items that this scalable bloom filter will handle.
15 | ///
16 | public const int MinimumCapacity = 50;
17 |
18 | private IEnumerable> _filters;
19 | private readonly double _maxErrorRate;
20 | private int _activeItemCount;
21 | private int _capacity;
22 |
23 | ///
24 | /// Creates a new bloom filter with error rate limited to the desired ratio.
25 | ///
26 | /// Maximum error rate to tolerate -- more memory will be used to reduce error rate.
27 | public ScalableBloomFilter(double maximumErrorRate)
28 | {
29 | _maxErrorRate = maximumErrorRate;
30 | }
31 |
32 | ///
33 | /// Adds a new item to the bloom filter and scales the bloom filter as needed.
34 | ///
35 | ///
36 | public void Add(T item)
37 | {
38 | if (_activeItemCount >= _capacity)
39 | {
40 | _capacity = Math.Max(MinimumCapacity, _capacity * 2);
41 | _filters = AddNewFilter(_maxErrorRate, _capacity, _filters);
42 | _activeItemCount = 0;
43 | }
44 | _activeItemCount++;
45 | _filters.Last().Add(item);
46 | }
47 |
48 | ///
49 | /// Checks whether an item may currently exist in the bloom filter.
50 | ///
51 | /// The item to check for membership in this
52 | /// True if the item MIGHT be in the collection. False if the item is NOT in the collection.
53 | public bool Contains(T item) => _filters != null && _filters.Any(filter => filter.Contains(item));
54 |
55 | ///
56 | /// Gets the number of filters that are currently being used internally to hold items without exceeding the error rate.
57 | ///
58 | public int NumberFilters => _filters.Count();
59 |
60 | private static IEnumerable> AddNewFilter(double maxError, int capacity, IEnumerable> currentFilters)
61 | {
62 | var filters = (currentFilters ?? new List>()).ToList();
63 | filters.Add(new BloomFilter(capacity, maxError));
64 | return filters;
65 | }
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/Maybe.Test/SkipList/NodeTests.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.IO;
3 | using System.Runtime.Serialization.Formatters.Binary;
4 | using Maybe.SkipList;
5 | using Xunit;
6 |
7 | namespace Maybe.Test.SkipList
8 | {
9 | public class NodeTests
10 | {
11 | [Fact]
12 | [Trait("Category", "Unit")]
13 | public void Constructor_WithValue_SetsValueProperty()
14 | {
15 | const int value = 42;
16 | var node = new Node(value, 0);
17 | Assert.Equal(value, node.Value);
18 | }
19 |
20 | [Fact]
21 | [Trait("Category", "Unit")]
22 | public void Constructor_WithLevel_ShouldSetupNodesToThatLevel()
23 | {
24 | const int level = 3;
25 | var node = new Node(42, level);
26 | Assert.Equal(level, node.Next.Length);
27 | }
28 |
29 | [Fact]
30 | [Trait("Category", "Unit")]
31 | public void Constructor_WithNegativeLevel_ShouldThrowArgumentException()
32 | {
33 | Assert.Throws(() => new Node(42, -5));
34 | }
35 |
36 | [Fact]
37 | [Trait("Category", "Unit")]
38 | public void HasNextAtLevel_WithNegativeLevel_ShouldThrowArgumentException()
39 | {
40 | var node = new Node(42, 2);
41 | Assert.Throws(() => node.HasNextAtLevel(-2));
42 | }
43 |
44 | [Fact]
45 | [Trait("Category", "Unit")]
46 | public void HasNextAtLevel_WithLevelGreaterThanNodeLevel_ShouldReturnFalse()
47 | {
48 | var node = new Node(42, 1);
49 | Assert.False(node.HasNextAtLevel(2));
50 | }
51 |
52 | [Fact]
53 | [Trait("Category", "Unit")]
54 | public void HasNextAtLevel_WithNodeSetNull_ShouldReturnFalse()
55 | {
56 | var node = new Node(42, 1);
57 | Assert.False(node.HasNextAtLevel(1));
58 | }
59 |
60 | [Fact]
61 | [Trait("Category", "Unit")]
62 | public void HasNextAtLevel_WithValidNodeAtIndex_ShouldReturnTrue()
63 | {
64 | var node = new Node(42, 2);
65 | node.Next[1] = new Node(46, 1);
66 | Assert.True(node.HasNextAtLevel(1));
67 | }
68 |
69 | [Fact]
70 | [Trait("Category", "Unit")]
71 | public void Deserialize_WithValueAndLevels_ShouldBeSameAfterDeserialization()
72 | {
73 | using (var stream = new MemoryStream())
74 | {
75 | const int level = 3;
76 | const int value = 42;
77 | var node = new Node(value, level);
78 |
79 | var formatter = new BinaryFormatter();
80 | formatter.Serialize(stream, node);
81 | stream.Flush();
82 | stream.Position = 0;
83 |
84 | var newNode = (Node)formatter.Deserialize(stream);
85 |
86 | Assert.Equal(value, newNode.Value);
87 | Assert.Equal(level, newNode.Next.Length);
88 | }
89 | }
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/Maybe/BloomFilter/BloomFilter.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections;
3 | using System.Linq;
4 |
5 | namespace Maybe.BloomFilter
6 | {
7 | ///
8 | /// Basic bloom filter collection
9 | ///
10 | /// Type of data that will be contained in the bloom filter.
11 | [Serializable]
12 | public class BloomFilter : BloomFilterBase
13 | {
14 | private readonly BitArray _collectionState;
15 |
16 | ///
17 | protected BloomFilter(int bitArraySize, int numHashes) : base(bitArraySize, numHashes)
18 | {
19 | _collectionState = new BitArray(bitArraySize, false);
20 | }
21 |
22 | ///
23 | public BloomFilter(int expectedItems, double acceptableErrorRate) : base(expectedItems, acceptableErrorRate)
24 | {
25 | _collectionState = new BitArray(CollectionLength, false);
26 | }
27 |
28 | ///
29 | /// Gets the ratio of how many bits in the bloom filter are set to the total number of bits. When this ratio is too high, the chance for error increases.
30 | ///
31 | public override double FillRatio => _collectionState.Cast().Count(bit => bit) / (double)_collectionState.Length;
32 |
33 | ///
34 | /// Creates a new bloom filter with appropriate bit width and hash functions for your expected size and error rate.
35 | ///
36 | /// The maximum number of items you expect to be in the bloom filter
37 | /// The maximum rate of false positives you can accept. Must be a value between 0.00-1.00
38 | /// A new bloom filter configured appropriately for number of items and error rate
39 | public static BloomFilter Create(int expectedItems, double acceptableErrorRate)
40 | {
41 | return new BloomFilter(expectedItems, acceptableErrorRate);
42 | }
43 |
44 | ///
45 | /// Adds an item to the bloom filter
46 | ///
47 | /// The item which should be added
48 | public override void Add(T item) => DoHashAction(item, hash => _collectionState[hash] = true);
49 |
50 | ///
51 | /// Adds an item to the bloom filter and returns if it might already be contained before
52 | ///
53 | /// The item which should be added and searched in the bloom filter
54 | /// False if the item was NOT in the bloom filter before. True if the item MIGHT have been in the bloom filter.
55 | public override bool AddAndCheck(T item)
56 | {
57 | var containsItem = true;
58 | DoHashAction(item, hash =>
59 | {
60 | containsItem = containsItem && _collectionState[hash];
61 | _collectionState[hash] = true;
62 | });
63 | return containsItem;
64 | }
65 |
66 | ///
67 | /// Checks if this bloom filter currently contains an item
68 | ///
69 | /// The item for which to search in the bloom filter
70 | /// False if the item is NOT in the bloom filter. True if the item MIGHT be in the bloom filter.
71 | public override bool Contains(T item)
72 | {
73 | var containsItem = true;
74 | DoHashAction(item, hash => containsItem = containsItem && _collectionState[hash]);
75 | return containsItem;
76 | }
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/Maybe/CountMinSketch/CountMinSketchBase.cs:
--------------------------------------------------------------------------------
1 | using System;
2 |
3 | namespace Maybe.CountMinSketch
4 | {
5 | ///
6 | /// An abstract class representing a general Count-Min Sketch data structure.
7 | ///
8 | ///
9 | [Serializable]
10 | public abstract class CountMinSketchBase
11 | {
12 | ///
13 | /// Returns the relative error, epsilon, of this
14 | ///
15 | public abstract double RelativeError { get; }
16 | ///
17 | /// Returns the confidence, delta, of this
18 | ///
19 | public abstract double Confidence { get; }
20 | ///
21 | /// Returns the depth of this
22 | ///
23 | public abstract int Depth { get; }
24 | ///
25 | /// Returns the width of this
26 | ///
27 | public abstract int Width { get; }
28 | ///
29 | /// Returns the total count of items added to this
30 | ///
31 | public abstract long TotalCount { get; }
32 |
33 | ///
34 | /// Returns the random seed that was used to initialize this
35 | ///
36 | public abstract int Seed { get; }
37 |
38 | ///
39 | /// Exposes the 's counter table
40 | ///
41 | public abstract long[,] Table { get; }
42 |
43 | ///
44 | /// Increments counters for the item
45 | ///
46 | ///
47 | public abstract void Add(T item);
48 |
49 | ///
50 | /// Returns the estimated frequency of the item
51 | ///
52 | /// Item for which an estimated count is desired
53 | /// Estimated frequency of this item in the
54 | public abstract long EstimateCount(T item);
55 |
56 | ///
57 | /// Merges another with this one in place. Other must have the same depth, width, and seed to be merged.
58 | ///
59 | /// to be merged with this
60 | ///
61 | public abstract CountMinSketchBase MergeInPlace(CountMinSketchBase other);
62 |
63 | ///
64 | /// Creates a with the given depth, width, and seed
65 | ///
66 | /// The depth of the count-min sketch. Must be positive.
67 | /// The width of the count-min sketch. Must be positive.
68 | /// A random seed for hashing.
69 | /// A new created with the provided parameters
70 | public static CountMinSketchBase Create(int depth, int width, int seed) => new CountMinSketch(depth, width, seed);
71 |
72 | ///
73 | /// Creates a new with given relative error (epsilon), confidence, and random seed.
74 | ///
75 | /// Relative error of the sketch. Must be positive.
76 | /// Confidence of frequence estimations. Must be between 0 and 1
77 | /// A random seed for hashing.
78 | /// A new created with the provided parameters
79 | public static CountMinSketchBase Create(double epsilon, double confidence, int seed) => new CountMinSketch(epsilon, confidence, seed);
80 | }
81 | }
82 |
--------------------------------------------------------------------------------
/Maybe/Utilities/MurmurHash3.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 |
4 | namespace Maybe.Utilities
5 | {
6 | [Serializable]
7 | internal class MurmurHash3
8 | {
9 | ///
10 | /// Uses Dillinger and Manolios algorithm to calculate many hashes from 2 main hash functions (built-in .NET hash and Murmur3)
11 | ///
12 | /// Item to hash
13 | /// Desired number of hashes to computer
14 | /// Maximum value that will be returned; modulus is used to limit results
15 | ///
16 | public IEnumerable GetHashes(byte[] item, int numHashes, int maxHashValue)
17 | {
18 | var primaryHash = Hash(item, 293);
19 | var secondaryHash = Hash(item, 697);
20 |
21 | for (var i = 0; i < numHashes; i++)
22 | {
23 | yield return Math.Abs((primaryHash + i * secondaryHash) % maxHashValue);
24 | }
25 | }
26 |
27 | ///
28 | /// Maps a stream of data to an integer hash
29 | ///
30 | /// Stream of data to hash
31 | /// Seed used for hashing functions
32 | /// Int hash
33 | public int Hash(byte[] bytes, uint seed)
34 | {
35 | const uint c1 = 0xcc9e2d51;
36 | const uint c2 = 0x1b873593;
37 | var h1 = seed;
38 |
39 | var stream = new Span(bytes);
40 |
41 | int index;
42 | for (index = 0; index < stream.Length-4; index += 4)
43 | {
44 | var slice = stream.Slice(index, 4);
45 | uint k1;
46 | /* Get four bytes from the input into an uint */
47 | k1 = (uint)
48 | (slice[0]
49 | | slice[1] << 8
50 | | slice[2] << 16
51 | | slice[3] << 24);
52 |
53 | /* bitmagic hash */
54 | k1 *= c1;
55 | k1 = Rotl32(k1, 15);
56 | k1 *= c2;
57 |
58 | h1 ^= k1;
59 | h1 = Rotl32(h1, 13);
60 | h1 = h1 * 5 + 0xe6546b64;
61 | }
62 |
63 | // handle remainder
64 | if (index < stream.Length)
65 | {
66 | var slice = stream.Slice(index, stream.Length-index);
67 | uint k1;
68 | switch (slice.Length)
69 | {
70 | case 3:
71 | k1 = (uint)
72 | (slice[0]
73 | | slice[1] << 8
74 | | slice[2] << 16);
75 | k1 *= c1;
76 | k1 = Rotl32(k1, 15);
77 | k1 *= c2;
78 | h1 ^= k1;
79 | break;
80 | case 2:
81 | k1 = (uint)
82 | (slice[0]
83 | | slice[1] << 8);
84 | k1 *= c1;
85 | k1 = Rotl32(k1, 15);
86 | k1 *= c2;
87 | h1 ^= k1;
88 | break;
89 | case 1:
90 | k1 = slice[0];
91 | k1 *= c1;
92 | k1 = Rotl32(k1, 15);
93 | k1 *= c2;
94 | h1 ^= k1;
95 | break;
96 | }
97 | }
98 |
99 | // finalization, magic chants to wrap it all up
100 | h1 ^= (uint)stream.Length;
101 | h1 = Fmix(h1);
102 |
103 | unchecked //ignore overflow
104 | {
105 | return (int)h1;
106 | }
107 | }
108 |
109 | private static uint Rotl32(uint x, byte r) => (x << r) | (x >> (32 - r));
110 |
111 | private static uint Fmix(uint h)
112 | {
113 | h ^= h >> 16;
114 | h *= 0x85ebca6b;
115 | h ^= h >> 13;
116 | h *= 0xc2b2ae35;
117 | h ^= h >> 16;
118 | return h;
119 | }
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/Maybe/BloomFilter/BloomFilterBase.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using Maybe.Utilities;
3 |
4 | namespace Maybe.BloomFilter
5 | {
6 | ///
7 | /// Base class for bloom filter to contain some common member variables and hashing helper functions.
8 | ///
9 | ///
10 | [Serializable]
11 | public abstract class BloomFilterBase : IBloomFilter
12 | {
13 | private readonly MurmurHash3 _hasher = new MurmurHash3();
14 |
15 | ///
16 | /// The number of times an item should be hashed when being added to or checked for membership in the collection
17 | ///
18 | protected int NumberHashes;
19 |
20 | ///
21 | /// Number of buckets for storing hash info (bits or ints)
22 | ///
23 | protected readonly int CollectionLength;
24 |
25 | ///
26 | /// Creates a new bloom filter with appropriate bit width and hash functions for your expected size and error rate.
27 | ///
28 | /// The maximum number of items you expect to be in the bloom filter
29 | /// The maximum rate of false positives you can accept. Must be a value between 0.00-1.00
30 | /// A new bloom filter configured appropriately for number of items and error rate
31 | protected BloomFilterBase(int expectedItems, double acceptableErrorRate)
32 | {
33 | if (expectedItems <= 0) { throw new ArgumentException("Expected items must be at least 1.", nameof(expectedItems)); }
34 | if (acceptableErrorRate < 0 || acceptableErrorRate > 1) { throw new ArgumentException("Acceptable error rate must be between 0 and 1.", nameof(acceptableErrorRate)); }
35 |
36 | var bitWidth = (int)Math.Ceiling(expectedItems * Math.Log(acceptableErrorRate) / Math.Log(1.0 / Math.Pow(2.0, Math.Log(2.0)))) * 2;
37 | var numHashes = (int)Math.Round(Math.Log(2.0) * bitWidth / expectedItems) * 2;
38 | NumberHashes = numHashes;
39 | CollectionLength = bitWidth;
40 | }
41 |
42 | ///
43 | /// Protected constructor to create a new bloom filter
44 | ///
45 | /// The number of bits that should be used internally to store items.
46 | /// The number of times an input should be hashed before working against the internal bit array.
47 | protected BloomFilterBase(int bitArraySize, int numberHashes)
48 | {
49 | NumberHashes = numberHashes;
50 | CollectionLength = bitArraySize;
51 | }
52 |
53 | ///
54 | /// Adds an item to the bloom filter
55 | ///
56 | /// The item which should be added
57 | public abstract void Add(T item);
58 |
59 | ///
60 | /// Checks if this bloom filter currently contains an item
61 | ///
62 | /// The item for which to search in the bloom filter
63 | /// False if the item is NOT in the bloom filter. True if the item MIGHT be in the bloom filter.
64 | public abstract bool Contains(T item);
65 |
66 | ///
67 | /// Adds an item to the bloom filter and returns if it might already be contained before
68 | ///
69 | /// The item which should be added and searched in the bloom filter
70 | /// False if the item was NOT in the bloom filter before. True if the item MIGHT have been in the bloom filter.
71 | public abstract bool AddAndCheck(T item);
72 |
73 | ///
74 | /// Represents the ratio of positions that are set in the bloom filter to the total number of positions
75 | ///
76 | public abstract double FillRatio { get; }
77 |
78 | ///
79 | /// Hashes the provided and passes the hashed result to an action for processing (typically setting bits in the bit array or checking if those bits are set)
80 | ///
81 | ///
82 | ///
83 | protected void DoHashAction(T item, Action hashAction)
84 | {
85 | var bytes = item as byte[] ?? ByteConverter.ConvertToByteArray(item);
86 | var hashes = _hasher.GetHashes(bytes, NumberHashes, CollectionLength);
87 | foreach (var hash in hashes)
88 | {
89 | hashAction(hash);
90 | }
91 | }
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Maybe.NET
2 | [](https://ci.appveyor.com/project/rmc00/maybe)
3 | [](https://coveralls.io/github/rmc00/Maybe?branch=master)
4 |
5 | Maybe.NET is a lightweight library of probabilistic data structures for .NET. The library currently features Bloom Filters, Counting Bloom Filters, and Skip Lists. And more data structures are coming soon! Stop scouring the Internet and re-writing the same classes over and over -- use Maybe.NET.
6 |
7 | ## Installation
8 |
9 | Installation is super simple with NuGet! Just use this command to install from the Visual Studio Package Manager Console:
10 |
11 | Install-Package Maybe.NET
12 |
13 | ## Usage
14 | Maybe.NET has a clear, intuitive API that is easy to pick up. You can check out the Maybe.Tests project for examples of using each method. Here are some quick examples to get you started.
15 |
16 | ### Bloom Filter Usage
17 | The bloom filter is a handy collection to which you can add items and check if an item is contained in the collection. They are very fast and memory-efficient, but it comes at a small cost: the filter can definitely say if an item is *NOT* in the collection, but it can't say for sure that an item is in the collection, only that it *MIGHT* be. You can use the constructor to specify your targeted maximum rate of errors. (Lower error rates may use more memory)
18 |
19 | ```
20 | var filter = new BloomFilter(50, 0.02);
21 | filter.Add(42);
22 | filter.Add(27);
23 | filter.Add(33);
24 |
25 | filter.Contains(55); // returns false (the item is NOT in the collection)
26 | filter.Contains(27); // returns true (the item MIGHT be in the collection)
27 | ```
28 |
29 | ### Counting Bloom Filter Usage
30 | Counting bloom filters extend regular bloom filter functionality by allowing items to be removed from the collection. This can be useful functionality, but it opens the possibility of false negatives.
31 |
32 | ```
33 | var filter = new CountingBloomFilter(50, 0.02);
34 | filter.Add(42);
35 | filter.Contains(42); // returns true
36 | filter.Remove(42);
37 | filter.Contains(42); // returns false
38 | ```
39 |
40 | ### Scalable Bloom Filter Usage
41 | Scalable bloom filters offer the same Add and Contains operations that normal BloomFilter offers. The difference is that ScalableBloomFilter only needs to know the max error rate. The capacity of the bloom filter will grow by adding additional bloom filters internally, which allows the developer to add more items to the bloom filter without worrying about incurring too high of a false positive rate.
42 |
43 | ```
44 | var filter = new ScalableBloomFilter(0.02); // specifying 2% max error rate, no capacity required
45 | filter.Add(42);
46 | filter.Add(27);
47 | filter.Add(33); // add as many items as needed. The scalable bloom filter will create as many filters as needed to hold data and keep error rates within tolerance.
48 |
49 | filter.Contains(55); // returns false (the item is NOT in the collection)
50 | filter.Contains(27); // returns true (the item MIGHT be in the collection)
51 | ```
52 |
53 | ### Skip List Usage
54 | Skip lists are sort of like a singly linked list, but they have additional links to other nodes further out in the list. The structure of the links is similar to building Binary Search into the Skip List. However, the Skip List uses randomness to avoid expensive balancing operations when the list is being modified. This structure allows for searching in logarithmic time on average, but doesn't incur the cost of balancing a tree that is normally incurred for fast search. See the [wikipedia article](https://en.wikipedia.org/wiki/Skip_list) for detailed information.
55 |
56 | ```
57 | var list = new SkipList {42, 33};
58 | list.Contains(42); // returns true
59 | list.Contains(91); // returns false
60 | ```
61 |
62 | ### Count Min Sketch Usage
63 | Count min sketch is a data structure that allows you to track the frequency of an item occurring within a large set. The count min sketch will never undercount items, but it can overcount by a controllable confidence interval. This is great for counting in very large (think big data) datasets where you can't deterministically fit everything into memory.
64 |
65 | ```
66 | var sketch = new CountMinSketch(5d, 0.95d, 42);
67 | sketch.Add("test");
68 | sketch.Add("foobar");
69 | var estimate = sketch.EstimateCount("test"); // returns 1
70 | ```
71 |
72 | Count min sketch also supports merging for parallel work. You can divide your workload and process in multiple Count Min Sketches in parallel. Then, merge the sketches from each workload at the end to see the overall result. Just make sure to initialize the sketches with the same configuration.
73 |
74 | ## Contributing
75 | Contributions are always welcome! Please feel free to submit pull requests and to open issues. I prefer to have tests on all public methods if possible and where ever else makes sense.
76 |
77 | ## License
78 |
79 | Free to use under MIT license
80 |
--------------------------------------------------------------------------------
/Maybe.Test/SkipList/SkipListTests.cs:
--------------------------------------------------------------------------------
1 | using Maybe.SkipList;
2 | using System.Collections.Generic;
3 | using System.IO;
4 | using System.Linq;
5 | using System.Runtime.Serialization.Formatters.Binary;
6 | using Xunit;
7 |
8 | namespace Maybe.Test.SkipList
9 | {
10 | public class SkipListTests
11 | {
12 | [Fact]
13 | [Trait("Category", "Unit")]
14 | public void Add_WithValue_ShouldInsertNewNode()
15 | {
16 | var list = new SkipList();
17 | list.Add(42);
18 | Assert.Single(list);
19 | }
20 |
21 | [Fact]
22 | [Trait("Category", "Unit")]
23 | public void AddRange_WithValues_ShouldInsertNewNodeForEachValue()
24 | {
25 | var list = new SkipList();
26 | list.AddRange(new[] { 42, 27, 33});
27 | Assert.Equal(3, list.Count());
28 | }
29 |
30 | [Fact]
31 | [Trait("Category", "Unit")]
32 | public void Deserialize_WithValues_ShouldCreateNewListWithSameValues()
33 | {
34 | using (var stream = new MemoryStream())
35 | {
36 | var list = new SkipList();
37 | list.AddRange(new[] { 42, 27, 33 });
38 |
39 | var formatter = new BinaryFormatter();
40 | formatter.Serialize(stream, list);
41 | stream.Flush();
42 | stream.Position = 0;
43 | var newList = (SkipList) formatter.Deserialize(stream);
44 |
45 | Assert.True(newList.Contains(42));
46 | Assert.True(newList.Contains(27));
47 | Assert.True(newList.Contains(33));
48 | Assert.Equal(list.Count(), newList.Count());
49 | }
50 | }
51 |
52 | [Fact]
53 | [Trait("Category", "Unit")]
54 | public void Contains_WithValueInList_ShouldReturnTrue()
55 | {
56 | var list = new SkipList {42, 33};
57 | Assert.True(list.Contains(42));
58 | }
59 |
60 | [Fact]
61 | [Trait("Category", "Unit")]
62 | public void Contains_WithValueNotInList_ShouldReturnFalse()
63 | {
64 | var list = new SkipList { 42, 33 };
65 | Assert.False(list.Contains(27));
66 | }
67 |
68 | [Fact]
69 | [Trait("Category", "Unit")]
70 | public void Remove_WhenValueFoundAndRemoved_ShouldReturnTrue()
71 | {
72 | var list = new SkipList {42};
73 | Assert.True(list.Remove(42));
74 | }
75 |
76 | [Fact]
77 | [Trait("Category", "Unit")]
78 | public void Remove_WhenValueNotFound_ShouldReturnFalse()
79 | {
80 | var list = new SkipList();
81 | Assert.False(list.Remove(42));
82 | }
83 |
84 | [Fact]
85 | [Trait("Category", "Unit")]
86 | public void Remove_WhenValueFound_ShouldDeleteValueFromList()
87 | {
88 | var list = new SkipList {42};
89 | list.Remove(42);
90 | Assert.False(list.Contains(42));
91 | }
92 |
93 | [Fact]
94 | [Trait("Category", "Unit")]
95 | public void GetEnumerator_WithValues_ShouldReturnAllValuesAsIEnumerable()
96 | {
97 | var list = new SkipList {42, 27, 39};
98 | using (var ie = list.GetEnumerator())
99 | {
100 | var count = 0;
101 | while (ie.MoveNext()) { count++; }
102 | Assert.Equal(3, count);
103 | }
104 | }
105 |
106 | [Fact]
107 | [Trait("Category", "Unit")]
108 | public void Enumeration_WithNoSpecifiedComparer_ShouldUseDefaultSortOrder()
109 | {
110 | var list = new SkipList{42,27,33};
111 | var content = list.ToList();
112 | Assert.Equal(27, content[0]);
113 | Assert.Equal(33, content[1]);
114 | Assert.Equal(42, content[2]);
115 | }
116 |
117 | [Fact]
118 | [Trait("Category", "Unit")]
119 | public void Enumeration_WithSpecificComparer_ShouldUseCustomSortOrder()
120 | {
121 | var list = new SkipList(new MyComparer()) {42,27,33};
122 | var content = list.ToList();
123 | Assert.Equal(42, content[0]);
124 | Assert.Equal(33, content[1]);
125 | Assert.Equal(27, content[2]);
126 | }
127 |
128 | [Fact]
129 | [Trait("Category", "Unit")]
130 | public void GetEnumerator_WhenListIsEmpty_ShouldReturnEmptyEnumerator()
131 | {
132 | var list = new SkipList();
133 | using (var enumerator = list.GetEnumerator())
134 | {
135 | Assert.False(enumerator.MoveNext());
136 | }
137 | }
138 |
139 | private class MyComparer : IComparer
140 | {
141 | public int Compare(int x, int y)
142 | {
143 | if (x < y) { return 1; }
144 | if (x > y) { return -1; }
145 | return 0;
146 | }
147 | }
148 | }
149 | }
150 |
--------------------------------------------------------------------------------
/Maybe/BloomFilter/CountingBloomFilter.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Linq;
3 |
4 | namespace Maybe.BloomFilter
5 | {
6 | ///
7 | /// A bloom filter modified to store counters and allow elements to be removed from the collection.
8 | ///
9 | /// The type of item to be stored in the collection
10 | [Serializable]
11 | public class CountingBloomFilter : BloomFilterBase
12 | {
13 | private readonly byte[] _collectionState;
14 |
15 | ///
16 | /// Creates a new counting bloom filter -- a bloom filter capable of tracking how many times a bit has been set
17 | ///
18 | /// Size of the internal bit array to track items
19 | /// Number of times the input should be hashed before working with the bit array.
20 | protected CountingBloomFilter(int arraySize, int numHashes) : base(arraySize, numHashes)
21 | {
22 | _collectionState = new byte[arraySize];
23 | for (var i = 0; i < _collectionState.Length; i++)
24 | {
25 | _collectionState[i] = 0;
26 | }
27 | }
28 |
29 |
30 | ///
31 | /// Creates a new counting bloom filter
32 | ///
33 | /// Expected number of items for the bloom filter to hold
34 | /// The maximum error rate for this counting bloom filter when items are below expected value
35 | public CountingBloomFilter(int expectedItems, double acceptableErrorRate) : base(expectedItems, acceptableErrorRate)
36 | {
37 | _collectionState = new byte[CollectionLength];
38 | for (var i = 0; i < _collectionState.Length; i++)
39 | {
40 | _collectionState[i] = 0;
41 | }
42 | }
43 |
44 |
45 | ///
46 | public override double FillRatio => _collectionState.Count(position => position > 0) / (double)_collectionState.Length;
47 |
48 | ///
49 | /// Creates a new counting bloom filter with appropriate bit width and hash functions for your expected size and error rate.
50 | ///
51 | /// The maximum number of items you expect to be in the counting bloom filter
52 | /// The maximum rate of false positives you can accept. Must be a value between 0.00-1.00
53 | /// A new bloom filter configured appropriately for number of items and error rate
54 | public static CountingBloomFilter Create(int expectedItems, double acceptableErrorRate)
55 | {
56 | return new CountingBloomFilter(expectedItems, acceptableErrorRate);
57 | }
58 |
59 | ///
60 | /// Adds an item to the counting bloom filter
61 | ///
62 | /// The item which should be added
63 | public override void Add(T item) => DoHashAction(item, hash =>
64 | {
65 | if (_collectionState[hash] < byte.MaxValue)
66 | {
67 | _collectionState[hash]++;
68 | }
69 | });
70 |
71 | ///
72 | /// Checks if this counting bloom filter currently contains an item
73 | ///
74 | /// The item for which to search in the bloom filter
75 | /// False if the item is NOT in the counting bloom filter. True if the item MIGHT be in the counting bloom filter.
76 | public override bool Contains(T item)
77 | {
78 | var containsItem = true;
79 | DoHashAction(item, hash => containsItem = containsItem && _collectionState[hash] > 0);
80 | return containsItem;
81 | }
82 |
83 | ///
84 | /// Adds an item to the bloom filter and returns if it might already be contained before
85 | ///
86 | /// The item which should be added and searched in the bloom filter
87 | /// False if the item was NOT in the bloom filter before. True if the item MIGHT have been in the bloom filter.
88 | public override bool AddAndCheck(T item)
89 | {
90 | var containsItem = true;
91 | DoHashAction(item, hash =>
92 | {
93 | containsItem = containsItem && _collectionState[hash] > 0;
94 | if (_collectionState[hash] < byte.MaxValue)
95 | {
96 | _collectionState[hash]++;
97 | }
98 | });
99 | return containsItem;
100 | }
101 |
102 | ///
103 | /// Removes an item from the counting bloom filter
104 | ///
105 | /// The item to remove
106 | /// True if the counting bloom filter might contain the item and the item was removed. False otherwise.
107 | public bool Remove(T item)
108 | {
109 | if (!Contains(item)) return false;
110 |
111 | DoHashAction(item, hash => _collectionState[hash]--);
112 | return true;
113 | }
114 |
115 | ///
116 | /// Returns the counter value at a given index if the index is valid. 0 if the index is invalid.
117 | ///
118 | public byte CounterAt(int index)
119 | {
120 | return index < 0 || index >= _collectionState.Length ? (byte)0 : _collectionState[index];
121 | }
122 | }
123 | }
124 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.suo
8 | *.user
9 | *.userosscache
10 | *.sln.docstates
11 | *.vcxproj.filters
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Build results
17 | [Dd]ebug/
18 | [Dd]ebugPublic/
19 | [Rr]elease/
20 | [Rr]eleases/
21 | x64/
22 | x86/
23 | bld/
24 | [Bb]in/
25 | [Oo]bj/
26 | [Ll]og/
27 |
28 | # Visual Studio 2015 cache/options directory
29 | .vs/
30 | # Uncomment if you have tasks that create the project's static files in wwwroot
31 | #wwwroot/
32 |
33 | # MSTest test Results
34 | [Tt]est[Rr]esult*/
35 | [Bb]uild[Ll]og.*
36 |
37 | # NUNIT
38 | *.VisualState.xml
39 | TestResult.xml
40 |
41 | # Build Results of an ATL Project
42 | [Dd]ebugPS/
43 | [Rr]eleasePS/
44 | dlldata.c
45 |
46 | # .NET Core
47 | project.lock.json
48 | project.fragment.lock.json
49 | artifacts/
50 | **/Properties/launchSettings.json
51 |
52 | *_i.c
53 | *_p.c
54 | *_i.h
55 | *.ilk
56 | *.meta
57 | *.obj
58 | *.pch
59 | *.pdb
60 | *.pgc
61 | *.pgd
62 | *.rsp
63 | *.sbr
64 | *.tlb
65 | *.tli
66 | *.tlh
67 | *.tmp
68 | *.tmp_proj
69 | *.log
70 | *.vspscc
71 | *.vssscc
72 | .builds
73 | *.pidb
74 | *.svclog
75 | *.scc
76 |
77 | # Chutzpah Test files
78 | _Chutzpah*
79 |
80 | # Visual C++ cache files
81 | ipch/
82 | *.aps
83 | *.ncb
84 | *.opendb
85 | *.opensdf
86 | *.sdf
87 | *.cachefile
88 | *.VC.db
89 | *.VC.VC.opendb
90 |
91 | # Visual Studio profiler
92 | *.psess
93 | *.vsp
94 | *.vspx
95 | *.sap
96 |
97 | # TFS 2012 Local Workspace
98 | $tf/
99 |
100 | # Guidance Automation Toolkit
101 | *.gpState
102 |
103 | # ReSharper is a .NET coding add-in
104 | _ReSharper*/
105 | *.[Rr]e[Ss]harper
106 | *.DotSettings.user
107 |
108 | # JustCode is a .NET coding add-in
109 | .JustCode
110 |
111 | # TeamCity is a build add-in
112 | _TeamCity*
113 |
114 | # DotCover is a Code Coverage Tool
115 | *.dotCover
116 |
117 | # Visual Studio code coverage results
118 | *.coverage
119 | *.coveragexml
120 |
121 | # NCrunch
122 | _NCrunch_*
123 | .*crunch*.local.xml
124 | nCrunchTemp_*
125 |
126 | # MightyMoose
127 | *.mm.*
128 | AutoTest.Net/
129 |
130 | # Web workbench (sass)
131 | .sass-cache/
132 |
133 | # Installshield output folder
134 | [Ee]xpress/
135 |
136 | # DocProject is a documentation generator add-in
137 | DocProject/buildhelp/
138 | DocProject/Help/*.HxT
139 | DocProject/Help/*.HxC
140 | DocProject/Help/*.hhc
141 | DocProject/Help/*.hhk
142 | DocProject/Help/*.hhp
143 | DocProject/Help/Html2
144 | DocProject/Help/html
145 |
146 | # Click-Once directory
147 | publish/
148 |
149 | # Publish Web Output
150 | *.[Pp]ublish.xml
151 | *.azurePubxml
152 | # TODO: Comment the next line if you want to checkin your web deploy settings
153 | # but database connection strings (with potential passwords) will be unencrypted
154 | *.pubxml
155 | *.publishproj
156 |
157 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
158 | # checkin your Azure Web App publish settings, but sensitive information contained
159 | # in these scripts will be unencrypted
160 | PublishScripts/
161 |
162 | # NuGet Packages
163 | *.nupkg
164 | # The packages folder can be ignored because of Package Restore
165 | **/packages/*
166 | # except build/, which is used as an MSBuild target.
167 | !**/packages/build/
168 | # Uncomment if necessary however generally it will be regenerated when needed
169 | #!**/packages/repositories.config
170 | # NuGet v3's project.json files produces more ignoreable files
171 | *.nuget.props
172 | *.nuget.targets
173 |
174 | # Microsoft Azure Build Output
175 | csx/
176 | *.build.csdef
177 |
178 | # Microsoft Azure Emulator
179 | ecf/
180 | rcf/
181 |
182 | # Windows Store app package directories and files
183 | AppPackages/
184 | BundleArtifacts/
185 | Package.StoreAssociation.xml
186 | _pkginfo.txt
187 |
188 | # Visual Studio cache files
189 | # files ending in .cache can be ignored
190 | *.[Cc]ache
191 | # but keep track of directories ending in .cache
192 | !*.[Cc]ache/
193 |
194 | # Others
195 | ClientBin/
196 | ~$*
197 | *~
198 | *.dbmdl
199 | *.dbproj.schemaview
200 | *.jfm
201 | *.pfx
202 | *.publishsettings
203 | node_modules/
204 | orleans.codegen.cs
205 |
206 | # Since there are multiple workflows, uncomment next line to ignore bower_components
207 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
208 | #bower_components/
209 |
210 | # RIA/Silverlight projects
211 | Generated_Code/
212 |
213 | # Backup & report files from converting an old project file
214 | # to a newer Visual Studio version. Backup files are not needed,
215 | # because we have git ;-)
216 | _UpgradeReport_Files/
217 | Backup*/
218 | UpgradeLog*.XML
219 | UpgradeLog*.htm
220 |
221 | # SQL Server files
222 | *.mdf
223 | *.ldf
224 |
225 | # Business Intelligence projects
226 | *.rdl.data
227 | *.bim.layout
228 | *.bim_*.settings
229 |
230 | # Microsoft Fakes
231 | FakesAssemblies/
232 |
233 | # GhostDoc plugin setting file
234 | *.GhostDoc.xml
235 |
236 | # Node.js Tools for Visual Studio
237 | .ntvs_analysis.dat
238 |
239 | # Visual Studio 6 build log
240 | *.plg
241 |
242 | # Visual Studio 6 workspace options file
243 | *.opt
244 |
245 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
246 | *.vbw
247 |
248 | # Visual Studio LightSwitch build output
249 | **/*.HTMLClient/GeneratedArtifacts
250 | **/*.DesktopClient/GeneratedArtifacts
251 | **/*.DesktopClient/ModelManifest.xml
252 | **/*.Server/GeneratedArtifacts
253 | **/*.Server/ModelManifest.xml
254 | _Pvt_Extensions
255 |
256 | # Paket dependency manager
257 | .paket/paket.exe
258 | paket-files/
259 |
260 | # FAKE - F# Make
261 | .fake/
262 |
263 | # JetBrains Rider
264 | .idea/
265 | *.sln.iml
266 |
267 | # CodeRush
268 | .cr/
269 |
270 | # Python Tools for Visual Studio (PTVS)
271 | __pycache__/
272 | *.pyc
273 |
274 | # Cake - Uncomment if you are using it
275 | build/tools/
276 | tools/
277 | coverage.xml
278 |
--------------------------------------------------------------------------------
/Maybe.Test/CountMinSketch/CountMinSketchTests.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.IO;
3 | using System.Runtime.Serialization;
4 | using System.Runtime.Serialization.Formatters.Binary;
5 | using Maybe.CountMinSketch;
6 | using Xunit;
7 |
8 | namespace Maybe.Test.CountMinSketch
9 | {
10 | public class CountMinSketchTests
11 | {
12 | [Fact]
13 | [Trait("Category", "Unit")]
14 | public void Constructor_WithNegativeDepth_ShouldThrowArgumentException() => Assert.Throws(() => new CountMinSketch(-2, 5, 42));
15 |
16 | [Fact]
17 | [Trait("Category", "Unit")]
18 | public void Constructor_WithNegativeWidth_ShouldThrowArgumentException() => Assert.Throws(() => new CountMinSketch(5, -5, 42));
19 |
20 | [Fact]
21 | [Trait("Category", "Unit")]
22 | public void Constructor_WithNegativeEpsilon_ShouldThrowArgumentException() => Assert.Throws(() => new CountMinSketch(-5d, 5d, 42));
23 |
24 | [Fact]
25 | [Trait("Category", "Unit")]
26 | public void Constructor_WithNegativeConfidence_ShouldThrowArgumentException() => Assert.Throws(() => new CountMinSketch(5d, -5d, 42));
27 |
28 | [Fact]
29 | [Trait("Category", "Unit")]
30 | public void Constructor_WithConfidenceOverOne_ShouldThrowArgumentException() => Assert.Throws(() => new CountMinSketch(5d, 2, 42));
31 |
32 | [Fact]
33 | [Trait("Category", "Unit")]
34 | public void TotalCount_ShouldIncrement_WhenItemIsAdded()
35 | {
36 | var sketch = new CountMinSketch(5d, 0.95d, 42);
37 | sketch.Add(31337);
38 | Assert.Equal(1, sketch.TotalCount);
39 | }
40 |
41 | [Fact]
42 | [Trait("Category", "Unit")]
43 | public void EstimateCount_ShouldBeWithinConfidenceInterval_ForItemThatHasBeenAdded()
44 | {
45 | const string input = "Testing!!";
46 | var sketch = new CountMinSketch(5d, 0.95, 42);
47 | for (var i = 0; i < 1000; i++)
48 | {
49 | sketch.Add(input);
50 | }
51 | var estimate = sketch.EstimateCount(input);
52 | Assert.InRange(estimate, 1000, 1050);
53 | }
54 |
55 | [Fact]
56 | [Trait("Category", "Unit")]
57 | public void EstimateCount_ShouldBeWithinConfidenceInterval_ForDeserializedSketch()
58 | {
59 | using (var stream = new MemoryStream())
60 | {
61 | const string input = "Testing!!";
62 | var originalSketch = new CountMinSketch(5d, 0.95, 42);
63 | for (var i = 0; i < 1000; i++)
64 | {
65 | originalSketch.Add(input);
66 | }
67 | IFormatter formatter = new BinaryFormatter();
68 | formatter.Serialize(stream, originalSketch);
69 | stream.Flush();
70 | stream.Position = 0;
71 | var newSketch = (CountMinSketch) formatter.Deserialize(stream);
72 | var estimate = newSketch.EstimateCount(input);
73 | Assert.InRange(estimate, 1000, 1050);
74 | }
75 | }
76 |
77 | [Fact]
78 | [Trait("Category", "Unit")]
79 | public void MergeInPlace_WithNullOther_ShouldThrowIncompatibleMergeException()
80 | {
81 | var sketch = new CountMinSketch(5d, 0.95d, 42);
82 | Assert.Throws(() => sketch.MergeInPlace(null));
83 | }
84 |
85 | [Fact]
86 | [Trait("Category", "Unit")]
87 | public void MergeInPlace_WithDifferentDepths_ShouldThrowIncompatibleMergeException()
88 | {
89 | var sketch = new CountMinSketch(20, 20, 42);
90 | var sketch2 = new CountMinSketch(10, 20, 42);
91 | Assert.Throws(() => sketch.MergeInPlace(sketch2));
92 | }
93 |
94 | [Fact]
95 | [Trait("Category", "Unit")]
96 | public void MergeInPlace_WithDifferentWidths_ShouldThrowIncompatibleMergeException()
97 | {
98 | var sketch = new CountMinSketch(20, 20, 42);
99 | var sketch2 = new CountMinSketch(20, 10, 42);
100 | Assert.Throws(() => sketch.MergeInPlace(sketch2));
101 | }
102 |
103 | [Fact]
104 | [Trait("Category", "Unit")]
105 | public void MergeInPlace_WithDifferentSeeds_ShouldThrowIncompatibleMergeException()
106 | {
107 | var sketch = new CountMinSketch(20, 20, 42);
108 | var sketch2 = new CountMinSketch(20, 20, 22);
109 | Assert.Throws(() => sketch.MergeInPlace(sketch2));
110 | }
111 |
112 | [Fact]
113 | [Trait("Category", "Unit")]
114 | public void TotalCount_AfterMergeInPlace_ShouldBeSumOfMergedTotals()
115 | {
116 | var sketch = new CountMinSketch(5d, 0.95d, 42);
117 | var sketch2 = new CountMinSketch(5d, 0.95d, 42);
118 | for (var i = 0; i < 100; i++)
119 | {
120 | sketch.Add(42);
121 | sketch2.Add(42);
122 | }
123 | sketch.MergeInPlace(sketch2);
124 |
125 | Assert.Equal(200, sketch.TotalCount);
126 | }
127 |
128 | [Fact]
129 | [Trait("Category", "Unit")]
130 | public void EstimateCount_AfterMergeInPlace_ShouldBeWithinConfidenceInterval()
131 | {
132 | const string input = "Testing!!";
133 | var sketch = new CountMinSketch(5d, 0.95, 42);
134 | var sketch2 = new CountMinSketch(5d, 0.95, 42);
135 | for (var i = 0; i < 1000; i++)
136 | {
137 | sketch.Add(input);
138 | sketch2.Add(input);
139 | }
140 | sketch.MergeInPlace(sketch2);
141 | var estimate = sketch.EstimateCount(input);
142 | Assert.InRange(estimate, 2000, 2100);
143 | }
144 | }
145 | }
146 |
--------------------------------------------------------------------------------
/Maybe.Test/BloomFilter/CountingBloomFilterTests.cs:
--------------------------------------------------------------------------------
1 | using System.IO;
2 | using System.Linq;
3 | using System.Runtime.Serialization;
4 | using System.Runtime.Serialization.Formatters.Binary;
5 | using FsCheck;
6 | using FsCheck.Xunit;
7 | using Maybe.BloomFilter;
8 | using Xunit;
9 |
10 | namespace Maybe.Test.BloomFilter
11 | {
12 | public class CountingBloomFilterTests
13 | {
14 | [Property]
15 | [Trait("Category", "Property")]
16 | public Property Contains_WhenItemHasBeenAdded_ShouldReturnTrue()
17 | {
18 | return Prop.ForAll(Arb.Default.Int32(), testData =>
19 | {
20 | var filter = new CountingBloomFilter(50, 0.02);
21 | filter.Add(testData);
22 | return filter.Contains(testData).ToProperty();
23 | });
24 | }
25 |
26 | [Property]
27 | [Trait("Category", "Property")]
28 | public Property Contains_WithFreshFilter_ShouldReturnFalse()
29 | {
30 | return Prop.ForAll(Arb.Default.Int32(), testData =>
31 | {
32 | var filter = new CountingBloomFilter(50, 0.02);
33 | return (!filter.Contains(testData)).ToProperty();
34 | });
35 | }
36 |
37 | [Property]
38 | [Trait("Category", "Property")]
39 | public Property Contains_With5PercentFalsePositives_ShouldHaveLessThan5PercentErrors()
40 | {
41 | return Prop.ForAll(Arb.From(Gen.Choose(1, 5000)), Arb.From(Gen.Choose(1, 99)), (stepRange, errorRate) =>
42 | {
43 | var filter = new CountingBloomFilter(stepRange, errorRate/100d);
44 | foreach (var num in Enumerable.Range(1, stepRange))
45 | {
46 | filter.Add(num);
47 | }
48 | var errorCount = Enumerable.Range(stepRange + 1, stepRange * 2).Count(num => filter.Contains(num));
49 | var highError = errorRate * stepRange;
50 | (0 <= errorCount && errorCount <= highError).ToProperty();
51 | });
52 | }
53 |
54 | [Property]
55 | [Trait("Category", "Unit")]
56 | public Property Remove_WithItemNotInCollection_ShouldReturnFalse()
57 | {
58 | return Prop.ForAll(Arb.Default.Int32(), testData =>
59 | {
60 | var filter = new CountingBloomFilter(100, 0.2);
61 | (!filter.Remove(testData)).ToProperty();
62 | });
63 | }
64 |
65 | [Property]
66 | [Trait("Category", "Property")]
67 | public Property Remove_WithItemInCollection_ShouldReturnTrue()
68 | {
69 | return Prop.ForAll(Arb.Default.Int32(), testData =>
70 | {
71 | var filter = new CountingBloomFilter(100, 0.2);
72 | filter.Add(testData);
73 | return filter.Remove(testData).ToProperty();
74 | });
75 | }
76 |
77 | [Property]
78 | [Trait("Category", "Property")]
79 | public Property Remove_WithItemInCollection_ShouldRemoveItemFromCollection()
80 | {
81 | return Prop.ForAll(Arb.Default.Int32(), testData =>
82 | {
83 | var filter = new CountingBloomFilter(100, 0.2);
84 | filter.Add(testData);
85 | filter.Remove(testData);
86 | return (!filter.Remove(testData)).ToProperty();
87 | });
88 | }
89 |
90 | [Fact]
91 | [Trait("Category", "Unit")]
92 | public void FillRatio_WithNewFilter_ShouldBeZero()
93 | {
94 | var filter = new CountingBloomFilter(1000, 0.05);
95 | Assert.Equal(0d, filter.FillRatio);
96 | }
97 |
98 | [Property]
99 | [Trait("Category", "Property")]
100 | public Property FillRatio_WithOneItem_ShouldBeNumHashesDividedByBitArraySize()
101 | {
102 | return Prop.ForAll(Arb.Default.Int32(), Arb.From(Gen.Choose(1, 10000)), Arb.From(Gen.Choose(1, 99)), (testData, bitArraySize, errorRate) =>
103 | {
104 | var realErrorRate = (int) (errorRate / 100d);
105 | var filter = new MyTestBloomFilter(bitArraySize, realErrorRate);
106 | filter.Add(testData);
107 | return (realErrorRate / bitArraySize == filter.FillRatio).ToProperty();
108 | });
109 | }
110 |
111 | [Fact]
112 | [Trait("Category", "Unit")]
113 | public void Add_WithCounterAtMaxValue_ShouldRemainConstant()
114 | {
115 | var filter = new CountingBloomFilter(50, 0.01);
116 | while(filter.CounterAt(42) < byte.MaxValue)
117 | {
118 | filter.Add(42);
119 | }
120 | filter.Add(42); // one additional add to attempt to roll over byte.maxvalue
121 | Assert.True(filter.Contains(42));
122 | }
123 |
124 | [Property]
125 | [Trait("Category", "Property")]
126 | public Property AddAndCheck_WhenItemHasBeenAddedBefore_ShouldReturnTrue()
127 | {
128 | return Prop.ForAll(Arb.Default.Int32(), testData =>
129 | {
130 | var filter = new CountingBloomFilter(50, 0.02);
131 | filter.Add(testData);
132 | (filter.AddAndCheck(testData)).ToProperty();
133 | });
134 | }
135 |
136 | [Property]
137 | [Trait("Category", "Property")]
138 | public Property AddAndCheck_WhenItemHasntBeenAddedBefore_ShouldReturnFalse()
139 | {
140 | return Prop.ForAll(Arb.Default.Int32(), testData =>
141 | {
142 | var filter = new CountingBloomFilter(50, 0.02);
143 | (filter.AddAndCheck(testData)).ToProperty();
144 | });
145 | }
146 |
147 | [Fact]
148 | [Trait("Category", "Unit")]
149 | public void Contains_WhenItemHasBeenAdded_AndFilterHasBeenSerializedAndUnserialized_ShouldReturnTrue()
150 | {
151 | using (var stream = new MemoryStream())
152 | {
153 | var filterOld = new CountingBloomFilter(50, 0.02);
154 | filterOld.Add(42);
155 | IFormatter formatter = new BinaryFormatter();
156 | formatter.Serialize(stream, filterOld);
157 | stream.Flush();
158 | stream.Position = 0;
159 | CountingBloomFilter filterNew = (CountingBloomFilter)formatter.Deserialize(stream);
160 | Assert.True(filterNew.Contains(42));
161 | }
162 | }
163 |
164 | private class MyTestBloomFilter : CountingBloomFilter
165 | {
166 | public MyTestBloomFilter(int bitArraySize, int numHashes)
167 | : base(bitArraySize, numHashes)
168 | {
169 |
170 | }
171 | }
172 | }
173 | }
174 |
--------------------------------------------------------------------------------
/Maybe/SkipList/SkipList.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections;
3 | using System.Collections.Generic;
4 | using System.Runtime.Serialization;
5 | using System.Security.Permissions;
6 |
7 | namespace Maybe.SkipList
8 | {
9 | ///
10 | /// A sorted collection which allows for fast search by creating hierarchies of links between nodes
11 | ///
12 | /// The type to be contained in the
13 | [Serializable]
14 | public class SkipList : IEnumerable, ISerializable
15 | {
16 | private readonly Node _headNode = new Node(default(T), 33); // The max. number of levels is 33
17 | private readonly Random _randomGenerator = new Random();
18 | private int _levels = 1;
19 | private readonly IComparer _comparer = Comparer.Default;
20 |
21 | ///
22 | /// Creates a new
23 | ///
24 | /// An optional comparer for sorting values. If null the default will be used.
25 | public SkipList(IComparer comparer=null)
26 | {
27 | if (comparer != null)
28 | {
29 | _comparer = comparer;
30 | }
31 | }
32 |
33 | ///
34 | /// Protected constructor used to deserialize an instance of
35 | ///
36 | ///
37 | ///
38 | protected SkipList(SerializationInfo info, StreamingContext context)
39 | {
40 | _headNode = (Node) info.GetValue("headNode", typeof(Node));
41 | _levels = info.GetInt32("levels");
42 | _comparer = (IComparer)info.GetValue("comparer", typeof(IComparer));
43 | }
44 |
45 | ///
46 | /// Adds the value to the skip list
47 | ///
48 | public void Add(T value)
49 | {
50 | var level = 0;
51 | for (var r = _randomGenerator.Next(); (r & 1) == 1; r >>= 1)
52 | {
53 | level++;
54 | if (level == _levels)
55 | {
56 | _levels++;
57 | break;
58 | }
59 | }
60 |
61 | var addNode = new Node(value, level + 1);
62 | var currentNode = _headNode;
63 | for (var currentLevel = _levels - 1; currentLevel >= 0; currentLevel--)
64 | {
65 | while (currentNode.HasNextAtLevel(currentLevel))
66 | {
67 | if (_comparer.Compare(currentNode.Next[currentLevel].Value, value) == 1)
68 | {
69 | // current value has skipped over the needed position, need to drop down a level and look there
70 | break;
71 | }
72 | currentNode = currentNode.Next[currentLevel];
73 | }
74 |
75 | if (currentLevel <= level)
76 | {
77 | // add the node here
78 | addNode.Next[currentLevel] = currentNode.Next[currentLevel];
79 | currentNode.Next[currentLevel] = addNode;
80 | }
81 | }
82 | }
83 |
84 | ///
85 | /// Adds multiple values to the collection
86 | ///
87 | /// A collection of values which should all be added
88 | public void AddRange(IEnumerable values)
89 | {
90 | if (values == null) return;
91 | foreach (var value in values)
92 | {
93 | Add(value);
94 | }
95 | }
96 |
97 | ///
98 | /// Returns whether a particular value already exists in the skip list
99 | ///
100 | public bool Contains(T value)
101 | {
102 | var currentNode = _headNode;
103 | for (var currentLevel = _levels - 1; currentLevel >= 0; currentLevel--)
104 | {
105 | while (currentNode.HasNextAtLevel(currentLevel))
106 | {
107 | if (_comparer.Compare(currentNode.Next[currentLevel].Value, value) == 1) break;
108 | if (_comparer.Compare(currentNode.Next[currentLevel].Value, value) == 0) return true;
109 | currentNode = currentNode.Next[currentLevel];
110 | }
111 | }
112 | return false;
113 | }
114 |
115 | ///
116 | /// Attempts to remove one occurence of a particular value from the skip list. Returns
117 | /// whether the value was found in the skip list.
118 | ///
119 | public bool Remove(T value)
120 | {
121 | var currentNode = _headNode;
122 |
123 | var found = false;
124 | for (var currentLevel = _levels - 1; currentLevel >= 0; currentLevel--)
125 | {
126 | while (currentNode.HasNextAtLevel(currentLevel))
127 | {
128 | if (_comparer.Compare(currentNode.Next[currentLevel].Value, value) == 0)
129 | {
130 | found = true;
131 | currentNode.Next[currentLevel] = currentNode.Next[currentLevel].Next[currentLevel];
132 | break;
133 | }
134 |
135 | if (_comparer.Compare(currentNode.Next[currentLevel].Value, value) == 1) { break; }
136 |
137 | currentNode = currentNode.Next[currentLevel];
138 | }
139 | }
140 |
141 | return found;
142 | }
143 |
144 | IEnumerator IEnumerable.GetEnumerator()
145 | {
146 | return GetEnumerator();
147 | }
148 |
149 | ///
150 | /// Enumerates all nodes of this collection
151 | ///
152 | ///
153 | public IEnumerator GetEnumerator()
154 | {
155 | var currentNode = _headNode.Next[0];
156 | while (currentNode != null && currentNode.HasNextAtLevel(0))
157 | {
158 | yield return currentNode.Value;
159 | currentNode = currentNode.Next[0];
160 | }
161 |
162 | if (currentNode != null)
163 | {
164 | yield return currentNode.Value;
165 | }
166 | }
167 |
168 | ///
169 | /// Helper method for serialization of this class."/>
170 | ///
171 | ///
172 | ///
173 | public virtual void GetObjectData(SerializationInfo info, StreamingContext context)
174 | {
175 | info.AddValue("headNode", _headNode);
176 | info.AddValue("levels", _levels);
177 | info.AddValue("comparer", _comparer);
178 | }
179 | }
180 | }
181 |
--------------------------------------------------------------------------------
/Maybe.Test/BloomFilter/BloomFilterTests.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.IO;
3 | using System.Linq;
4 | using System.Runtime.Serialization;
5 | using System.Runtime.Serialization.Formatters.Binary;
6 | using FsCheck;
7 | using FsCheck.Xunit;
8 | using Maybe.BloomFilter;
9 | using Xunit;
10 |
11 | namespace Maybe.Test.BloomFilter
12 | {
13 | public class BloomFilterTests
14 | {
15 | [Property]
16 | [Trait("Category", "Property")]
17 | public Property Contains_WhenItemHasBeenAdded_ShouldReturnTrue()
18 | {
19 | return Prop.ForAll(Arb.Default.Int32(), item =>
20 | {
21 | var filter = new BloomFilter(50, 0.02);
22 | filter.Add(item);
23 | return filter.Contains(item).ToProperty();
24 | });
25 | }
26 |
27 | [Property]
28 | [Trait("Category", "Property")]
29 | public Property Contains_WithFreshFilter_ShouldReturnFalse()
30 | {
31 | return Prop.ForAll(Arb.Default.Int32(), item =>
32 | {
33 | var filter = new BloomFilter(50, 0.02);
34 | Assert.False(filter.Contains(item));
35 | });
36 | }
37 |
38 | [Property]
39 | [Trait("Category", "Property")]
40 | public Property Contains_With5PercentFalsePositives_ShouldHaveLessThan5PercentErrors()
41 | {
42 | return Prop.ForAll(Arb.From(Gen.Choose(1, 5000)), Arb.From(Gen.Choose(1, 99)), (stepRange, errorRate) =>
43 | {
44 | var filter = new BloomFilter(stepRange, errorRate/100d);
45 | foreach (var num in Enumerable.Range(1, stepRange))
46 | {
47 | filter.Add(num);
48 | }
49 | var errorCount = Enumerable.Range(stepRange + 1, stepRange * 2).Count(num => filter.Contains(num));
50 | var highError = errorRate * stepRange;
51 | (0 <= errorCount && errorCount <= highError).ToProperty();
52 | });
53 | }
54 |
55 | [Fact]
56 | [Trait("Category", "Unit")]
57 | public void FillRatio_WithNewFilter_ShouldBeZero()
58 | {
59 | var filter = new BloomFilter(1000, 0.05);
60 | Assert.Equal(0d, filter.FillRatio);
61 | }
62 |
63 | [Property]
64 | [Trait("Category", "Property")]
65 | public Property FillRatio_WithOneItem_ShouldBeNumHashesDevidedByBitArraySize_Prop()
66 | {
67 | return Prop.ForAll(Arb.From(Gen.Choose(1, 10000)), Arb.From(Gen.Choose(1, 99)), (bitArraySize, numHashes) =>
68 | {
69 | var filter = new MyTestBloomFilter(bitArraySize, numHashes);
70 | filter.Add(42);
71 | (numHashes/(double)bitArraySize == filter.FillRatio).ToProperty();
72 | });
73 | }
74 |
75 | [Fact]
76 | [Trait("Category", "Unit")]
77 | public void Contains_WhenItemHasBeenAdded_AndFilterHasBeenSerializedAndUnserialized_ShouldReturnTrue()
78 | {
79 | using (var stream = new MemoryStream())
80 | {
81 | var filterOld = new BloomFilter(50, 0.02);
82 | filterOld.Add(42);
83 | IFormatter formatter = new BinaryFormatter();
84 | formatter.Serialize(stream, filterOld);
85 | stream.Flush();
86 | stream.Position = 0;
87 | BloomFilter filterNew = (BloomFilter)formatter.Deserialize(stream);
88 | Assert.True(filterNew.Contains(42));
89 | }
90 | }
91 |
92 | private class MyTestBloomFilter : BloomFilter
93 | {
94 | public MyTestBloomFilter(int bitArraySize, int numHashes)
95 | : base(bitArraySize, numHashes)
96 | {
97 |
98 | }
99 | }
100 |
101 | [Property]
102 | [Trait("Category", "Property")]
103 | public Property AddAndCheck_WhenItemHasBeenAddedBefore_ShouldReturnTrue()
104 | {
105 | return Prop.ForAll(Arb.Default.Int32(), testData =>
106 | {
107 | var filter = new BloomFilter(50, 0.02);
108 | filter.Add(testData);
109 | return filter.AddAndCheck(testData).ToProperty();
110 | });
111 | }
112 |
113 | [Property]
114 | [Trait("Category", "Property")]
115 | public Property AddAndCheck_WhenItemHasntBeenAddedBefore_ShouldReturnFalse()
116 | {
117 | return Prop.ForAll(Arb.Default.Int32(), testData =>
118 | {
119 | var filter = new BloomFilter(50, 0.02);
120 | return (!filter.AddAndCheck(testData)).ToProperty();
121 | });
122 | }
123 |
124 | [Fact]
125 | [Trait("Category", "Unit")]
126 | public void Create_WithZeroExpectedSize_ShouldThrowArgumentException()
127 | {
128 | Assert.Throws(() => BloomFilter.Create(0, 0.5));
129 | }
130 |
131 | [Fact]
132 | [Trait("Category", "Unit")]
133 | public void Create_WithNegativeExpectedSize_ShouldThrowArgumentException()
134 | {
135 | Assert.Throws(() => BloomFilter.Create(-100, 0.5));
136 | }
137 |
138 | [Fact]
139 | [Trait("Category", "Unit")]
140 | public void Create_WithErrorRateLessThanZero_ShouldThrowArgumentException()
141 | {
142 | Assert.Throws(() => BloomFilter.Create(100, -5));
143 | }
144 |
145 | [Fact]
146 | [Trait("Category", "Unit")]
147 | public void Create_WithErrorRateGreaterThanOne_ShouldThrowArgumentException()
148 | {
149 | Assert.Throws(() => BloomFilter.Create(100, 5));
150 | }
151 |
152 | [Fact]
153 | [Trait("Category", "Unit")]
154 | public void Create_WithValidParameters_ShouldReturnBloomFilter()
155 | {
156 | var filter = BloomFilter.Create(50, 0.03);
157 | Assert.NotNull(filter);
158 | }
159 |
160 | [Fact]
161 | [Trait("Category", "Unit")]
162 | public void Constructor_WithZeroExpectedSize_ShouldThrowArgumentException()
163 | {
164 | Assert.Throws(() => new BloomFilter(0, 0.5d));
165 | }
166 |
167 | [Fact]
168 | [Trait("Category", "Unit")]
169 | public void Constructor_WithNegativeExpectedSize_ShouldThrowArgumentException()
170 | {
171 | Assert.Throws(() => new BloomFilter(-100, 0.5d));
172 | }
173 |
174 | [Fact]
175 | [Trait("Category", "Unit")]
176 | public void Constructor_WithErrorRateLessThanZero_ShouldThrowArgumentException()
177 | {
178 | Assert.Throws(() => new BloomFilter(100, -5d));
179 | }
180 |
181 | [Fact]
182 | [Trait("Category", "Unit")]
183 | public void Constructor_WithErrorRateGreaterThanOne_ShouldThrowArgumentException()
184 | {
185 | Assert.Throws(() => new BloomFilter(100, 5d));
186 | }
187 | }
188 | }
189 |
--------------------------------------------------------------------------------
/Maybe/CountMinSketch/CountMinSketch.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Linq;
3 | using Maybe.Utilities;
4 |
5 | namespace Maybe.CountMinSketch
6 | {
7 | ///
8 | /// Count min sketch is a data structure that allows you to track the frequency of an item occurring within a large set. The count min sketch will never undercount items, but it can overcount by a controllable confidence interval.
9 | ///
10 | ///
11 | [Serializable]
12 | public class CountMinSketch : CountMinSketchBase
13 | {
14 | private readonly MurmurHash3 _hasher = new MurmurHash3();
15 | private readonly int _depth;
16 | private readonly int _width;
17 | private long[,] _table;
18 | private long[] _hashA;
19 | private long _totalCount;
20 |
21 | ///
22 | /// Creates a new instance of
23 | ///
24 | /// The number of buckets to be used for counting. More buckets will increase the probability of computing the value correctly.
25 | /// The size of buckets for counting items. Wider buckets will increase accuracy but use more memory.
26 | /// Some seed for random number generation. This is passed in to allow multiple sketches to be sync'ed with the same seed.
27 | public CountMinSketch(int depth, int width, int seed)
28 | {
29 | if (depth <= 0) { throw new ArgumentException("Depth must be a positive integer.", nameof(depth)); }
30 | if (width <= 0) { throw new ArgumentException("Width must be a positive integer.", nameof(width)); }
31 |
32 | Seed = seed;
33 | _depth = depth;
34 | _width = width;
35 | RelativeError = 2d / width;
36 | Confidence = 1 - 1 / Math.Pow(2, depth);
37 | InitTablesWith(depth, width, seed);
38 | }
39 |
40 | ///
41 | /// Creates a new instance of
42 | ///
43 | /// The accuracy of the counts produced by this data structure.
44 | /// The probability of computing the value correctly.
45 | /// Some seed for random number generation. This is passed in to allow multiple sketches to be sync'ed with the same seed.
46 | public CountMinSketch(double epsilon, double confidence, int seed)
47 | {
48 | if (epsilon <= 0d) { throw new ArgumentException("Relative error must be positive.", nameof(epsilon)); }
49 | if (confidence <= 0d || confidence >= 1d) { throw new ArgumentException("Confidence must be greater than 0 and less than 1", nameof(confidence)); }
50 |
51 | RelativeError = epsilon;
52 | Confidence = confidence;
53 | _width = (int)Math.Ceiling(2 / epsilon);
54 | _depth = (int)Math.Ceiling(-Math.Log(1 - confidence) / Math.Log(2));
55 | InitTablesWith(_depth, _width, seed);
56 | }
57 |
58 | private void InitTablesWith(int depth, int width, int seed)
59 | {
60 | _table = new long[depth,width];
61 | _hashA = new long[depth];
62 | var r = new Random(seed);
63 | for (var i = 0; i < depth; i++)
64 | {
65 | _hashA[i] = r.Next();
66 | }
67 | }
68 |
69 | ///
70 | /// Gets the seed that was used to initialize this CountMinSketch.
71 | ///
72 | public override int Seed { get; }
73 |
74 | ///
75 | /// Gets the epsilon setting used to initialize this .
76 | ///
77 | public override double RelativeError { get; }
78 |
79 | ///
80 | /// Gets the confidence interval used to initialize this
81 | ///
82 | public override double Confidence { get; }
83 |
84 | ///
85 | /// The number of buckets used for tracking items.
86 | ///
87 | public override int Depth => _depth;
88 |
89 | ///
90 | /// Gets the size of each bucket used for tracking frequency of items.
91 | ///
92 | public override int Width => _width;
93 |
94 | ///
95 | /// Gets the total number of items in this collection.
96 | ///
97 | public override long TotalCount => _totalCount;
98 |
99 | ///
100 | /// Gets or sets the table that is currently being used to track frequency of items.
101 | ///
102 | public override long[,] Table => _table;
103 |
104 | ///
105 | /// Adds a new item to the collection.
106 | ///
107 | /// The item to be added to the collection
108 | public override void Add(T item) => Add(item, 1);
109 |
110 | ///
111 | /// Adds an item to the collection a specified number of times
112 | ///
113 | /// The item to be added
114 | /// The number of times the item should be added
115 | public void Add(T item, long count)
116 | {
117 | var buckets = GetHashBuckets(item, Depth, Width);
118 | for (var i = 0; i < _depth; i++)
119 | {
120 | _table[i, buckets[i]] += count;
121 | }
122 | _totalCount += count;
123 | }
124 |
125 | ///
126 | /// Estimates the number of times an item has been added to this
127 | ///
128 | /// The item to check
129 | /// An estimated number of times that the item has been added to the collection. This will never be low but could be higher than the actual result.
130 | public override long EstimateCount(T item)
131 | {
132 | var res = long.MaxValue;
133 | var buckets = GetHashBuckets(item, Depth, Width);
134 |
135 | for (var i = 0; i < Depth; i++)
136 | {
137 | res = Math.Min(res, _table[i, buckets[i]]);
138 | }
139 | return res;
140 | }
141 |
142 | private int[] GetHashBuckets(T item, int hashCount, int max)
143 | {
144 | var bytes = item as byte[] ?? ByteConverter.ConvertToByteArray(item);
145 |
146 | var result = new int[hashCount];
147 | var hashes = _hasher.GetHashes(bytes, hashCount, max).ToList();
148 | for (var i = 0; i < hashCount; i++)
149 | {
150 | result[i] = hashes[i];
151 | }
152 | return result;
153 | }
154 |
155 | ///
156 | /// Merges another instance of with this collection. The results will be an aggregate of both collections after merging.
157 | ///
158 | /// The that should be merged into the current collection.
159 | /// This with the results from the other collection included.
160 | public override CountMinSketchBase MergeInPlace(CountMinSketchBase other)
161 | {
162 | if (other == null) { throw new IncompatibleMergeException("Cannot merge null estimator"); }
163 | if(other.Depth != Depth) { throw new IncompatibleMergeException("Cannot merge estimators with different depths"); }
164 | if(other.Width != Width) { throw new IncompatibleMergeException("Cannot merge estimators with different widths"); }
165 | if(other.Seed != Seed) { throw new IncompatibleMergeException("Cannot merge sketches that were initialized with different seeds"); }
166 |
167 | for (var i = 0; i < _table.GetLength(0); i++)
168 | {
169 | for (var j = 0; j < _table.GetLength(1); j++)
170 | {
171 | _table[i, j] = _table[i, j] + other.Table[i, j];
172 | }
173 | }
174 | _totalCount += other.TotalCount;
175 | return this;
176 | }
177 | }
178 | }
179 |
--------------------------------------------------------------------------------