├── .gitignore ├── DbscanDemo ├── DbscanDemo.csproj ├── Eventing │ ├── MyFeatureConsoleLogger.cs │ ├── QueueExchange.cs │ ├── QueueExchangePublisher.cs │ └── QueueExchangeSubscriber.cs ├── MyFeature.cs ├── MyFeatureDataSource.cs └── Program.cs ├── DbscanImplementation ├── DbscanAlgorithm.cs ├── DbscanImplementation.csproj ├── DbscanPoint.cs ├── DbscanResult.cs ├── Eventing │ ├── EmptyDbscanEventPublisher.cs │ ├── Events.cs │ ├── IDbscanEventPublisher.cs │ └── IDbscanEventSubscriber.cs ├── PointType.cs └── ResultBuilding │ └── DbscanResultBuilder.cs ├── README.md └── dbscan.sln /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.*~ 3 | project.lock.json 4 | .DS_Store 5 | *.pyc 6 | nupkg/ 7 | 8 | # Visual Studio Code 9 | .vscode 10 | 11 | # Rider 12 | .idea 13 | 14 | # User-specific files 15 | *.suo 16 | *.user 17 | *.userosscache 18 | *.sln.docstates 19 | 20 | # Build results 21 | [Dd]ebug/ 22 | [Dd]ebugPublic/ 23 | [Rr]elease/ 24 | [Rr]eleases/ 25 | x64/ 26 | x86/ 27 | build/ 28 | bld/ 29 | [Bb]in/ 30 | [Oo]bj/ 31 | [Oo]ut/ 32 | msbuild.log 33 | msbuild.err 34 | msbuild.wrn 35 | 36 | # Visual Studio 2015 37 | .vs/ -------------------------------------------------------------------------------- /DbscanDemo/DbscanDemo.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Exe 9 | netcoreapp3.0 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /DbscanDemo/Eventing/MyFeatureConsoleLogger.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using DbscanImplementation.Eventing; 3 | 4 | namespace DbscanDemo.Eventing 5 | { 6 | public class MyFeatureConsoleLogger : IDbscanEventPublisher 7 | { 8 | public void Publish(TObj e) 9 | { 10 | //INFO: match the events you want to process 11 | var info = e switch 12 | { 13 | PointTypeAssigned pta => $"{pta.Point.ClusterId}: {pta.AssignedType}", 14 | _ => null 15 | }; 16 | 17 | if (info != null) 18 | { 19 | Console.WriteLine(info); 20 | } 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /DbscanDemo/Eventing/QueueExchange.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Concurrent; 2 | using System.Collections.Generic; 3 | 4 | namespace DbscanDemo.Eventing 5 | { 6 | /// 7 | /// Example for an exchange 8 | /// 9 | /// 10 | public class QueueExchange 11 | { 12 | public QueueExchange() 13 | { 14 | Queue = new ConcurrentQueue(); 15 | } 16 | 17 | public ConcurrentQueue Queue { get; private set; } 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /DbscanDemo/Eventing/QueueExchangePublisher.cs: -------------------------------------------------------------------------------- 1 | using DbscanImplementation.Eventing; 2 | 3 | namespace DbscanDemo.Eventing 4 | { 5 | /// 6 | /// An example publisher of QueueExchange 7 | /// 8 | public class QueueExchangePublisher : IDbscanEventPublisher 9 | { 10 | private readonly QueueExchange exchange; 11 | 12 | public QueueExchangePublisher(QueueExchange exchange) 13 | { 14 | this.exchange = exchange; 15 | } 16 | 17 | public void Publish(TObj @event) 18 | { 19 | exchange.Queue.Enqueue(@event); 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /DbscanDemo/Eventing/QueueExchangeSubscriber.cs: -------------------------------------------------------------------------------- 1 | using System.Threading.Tasks; 2 | using DbscanImplementation; 3 | using DbscanImplementation.Eventing; 4 | using DbscanImplementation.ResultBuilding; 5 | 6 | namespace DbscanDemo.Eventing 7 | { 8 | /// 9 | /// An example for how to get data from an exchange and build result asynchronously 10 | /// 11 | /// queue item type 12 | public class QueueExchangeSubscriber : IDbscanEventSubscriber> 13 | { 14 | private readonly QueueExchange exchange; 15 | 16 | public QueueExchangeSubscriber(QueueExchange exchange) 17 | { 18 | this.exchange = exchange; 19 | } 20 | 21 | public Task> Subscribe() 22 | { 23 | return Task.Factory.StartNew(() => 24 | { 25 | var resultBuilder = new DbscanResultBuilder(); 26 | 27 | var stopDequeue = false; 28 | 29 | while (!stopDequeue) 30 | { 31 | if (exchange.Queue.Count == 0) 32 | { 33 | Task.Delay(100).GetAwaiter().GetResult(); 34 | continue; 35 | } 36 | 37 | if (!exchange.Queue.TryDequeue(out TQ qEvent)) 38 | { 39 | continue; 40 | } 41 | 42 | //you can get extra event information from algorithm here and process as you wish 43 | switch (qEvent) 44 | { 45 | case PointProcessFinished e: 46 | resultBuilder.Process(e.Point); 47 | break; 48 | case ComputeFinished e: 49 | stopDequeue = true; 50 | break; 51 | } 52 | if (stopDequeue) 53 | { 54 | break; 55 | } 56 | } 57 | 58 | return resultBuilder.Result; 59 | }); 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /DbscanDemo/MyFeature.cs: -------------------------------------------------------------------------------- 1 | namespace DbscanDemo 2 | { 3 | public class MyFeature 4 | { 5 | public double X; 6 | public double Y; 7 | 8 | public MyFeature(double x, double y) 9 | { 10 | X = x; 11 | Y = y; 12 | } 13 | } 14 | } -------------------------------------------------------------------------------- /DbscanDemo/MyFeatureDataSource.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace DbscanDemo 4 | { 5 | public class MyFeatureDataSource 6 | { 7 | /// 8 | /// Builds a feature data that contains two clusters 9 | /// according to metric and 10 | /// 1- around the coordinates of (1,1) in total of 4001 11 | /// 2- around the coordinates of (5,5) in total of 4001 12 | /// Also contains a noise point at coordinate of (10, 10) 13 | /// 14 | /// List of feature set 15 | public List GetFeatureData() 16 | { 17 | var testPoints = new List() { }; 18 | 19 | //points around (1,1) with most 1 distance 20 | testPoints.Add(new MyFeature(1, 1)); 21 | for (int i = 1; i <= 1000; i++) 22 | { 23 | var v = (float)i / 1000; 24 | testPoints.Add(new MyFeature(1, 1 + v)); 25 | testPoints.Add(new MyFeature(1, 1 - v)); 26 | testPoints.Add(new MyFeature(1 - v, 1)); 27 | testPoints.Add(new MyFeature(1 + v, 1)); 28 | } 29 | 30 | //points around (5,5) with most 1 distance 31 | testPoints.Add(new MyFeature(5, 5)); 32 | for (int i = 1; i <= 1000; i++) 33 | { 34 | var v = (float)i / 1000; 35 | testPoints.Add(new MyFeature(5, 5 + v)); 36 | testPoints.Add(new MyFeature(5, 5 - v)); 37 | testPoints.Add(new MyFeature(5 - v, 5)); 38 | testPoints.Add(new MyFeature(5 + v, 5)); 39 | } 40 | 41 | //noise point 42 | testPoints.Add(new MyFeature(10, 10)); 43 | 44 | return testPoints; 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /DbscanDemo/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Threading; 4 | using DbscanDemo.Eventing; 5 | using DbscanImplementation; 6 | 7 | namespace DbscanDemo 8 | { 9 | public class Program 10 | { 11 | public static void Main() 12 | { 13 | var features = new MyFeatureDataSource().GetFeatureData(); 14 | 15 | RunOfflineDbscan(features); 16 | 17 | RunOfflineDbscanWithEventPublisher(features); 18 | 19 | RunOfflineDbscanWithResultBuilderAsync(features); 20 | } 21 | 22 | /// 23 | /// async compute call and publishes all events inside dbscan to an exchange 24 | /// By using an async subscriber all events processed and a result created. 25 | /// 26 | /// 27 | private static void RunOfflineDbscanWithResultBuilderAsync(List features) 28 | { 29 | var exchange = new QueueExchange(); 30 | 31 | var publisher = new QueueExchangePublisher(exchange); 32 | 33 | var dbscanAsync = new DbscanAlgorithm( 34 | EuclidienDistance, 35 | publisher 36 | ); 37 | 38 | var subscriber = new QueueExchangeSubscriber(exchange); 39 | 40 | var subscriptionTask = subscriber.Subscribe(); 41 | 42 | var computeDbscanTask = dbscanAsync.ComputeClusterDbscanAsync(allPoints: features.ToArray(), 43 | epsilon: .01, minimumPoints: 10, CancellationToken.None); 44 | 45 | Console.WriteLine("Async dbscan operation continues..."); 46 | 47 | var computeResult = computeDbscanTask.GetAwaiter().GetResult(); 48 | 49 | var eventResult = subscriptionTask.GetAwaiter().GetResult(); 50 | 51 | Console.WriteLine($"Clusters from Computed: {computeResult.Clusters.Count}"); 52 | Console.WriteLine($"Clusters from Events: {computeResult.Clusters.Count}"); 53 | } 54 | 55 | /// 56 | /// uses console logger to process events and writes to console 57 | /// 58 | /// 59 | private static void RunOfflineDbscanWithEventPublisher(List features) 60 | { 61 | //INFO: second argument of constructor takes an instance implemented with IDbscanEventPublisher interface 62 | var dbscanWithEventing = new DbscanAlgorithm( 63 | EuclidienDistance, 64 | new MyFeatureConsoleLogger() 65 | ); 66 | 67 | var resultWithEventing = dbscanWithEventing.ComputeClusterDbscan(allPoints: features.ToArray(), 68 | epsilon: .01, minimumPoints: 10); 69 | 70 | Console.WriteLine($"Noise: {resultWithEventing.Noise.Count}"); 71 | 72 | Console.WriteLine($"# of Clusters: {resultWithEventing.Clusters.Count}"); 73 | } 74 | 75 | /// 76 | /// Most basic usage of Dbscan implementation, with no eventing and async mechanism 77 | /// 78 | /// Features provided 79 | private static void RunOfflineDbscan(List features) 80 | { 81 | var simpleDbscan = new DbscanAlgorithm(EuclidienDistance); 82 | 83 | var result = simpleDbscan.ComputeClusterDbscan(allPoints: features.ToArray(), 84 | epsilon: .01, minimumPoints: 10); 85 | 86 | Console.WriteLine($"Noise: {result.Noise.Count}"); 87 | 88 | Console.WriteLine($"# of Clusters: {result.Clusters.Count}"); 89 | } 90 | 91 | /// 92 | /// Euclidien distance function 93 | /// 94 | /// 95 | /// 96 | /// 97 | private static double EuclidienDistance(MyFeature feature1, MyFeature feature2) 98 | { 99 | return Math.Sqrt( 100 | ((feature1.X - feature2.X) * (feature1.X - feature2.X)) + 101 | ((feature1.Y - feature2.Y) * (feature1.Y - feature2.Y)) 102 | ); 103 | } 104 | 105 | } 106 | 107 | } 108 | -------------------------------------------------------------------------------- /DbscanImplementation/DbscanAlgorithm.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Linq; 3 | using System.Threading; 4 | using System.Threading.Tasks; 5 | using DbscanImplementation.Eventing; 6 | using DbscanImplementation.ResultBuilding; 7 | 8 | namespace DbscanImplementation 9 | { 10 | /// 11 | /// DBSCAN algorithm implementation type 12 | /// 13 | /// Takes dataset item row (features, preferences, vector) 14 | public class DbscanAlgorithm 15 | { 16 | /// 17 | /// distance calculation metric function between two feature 18 | /// 19 | public readonly Func MetricFunction; 20 | 21 | /// 22 | /// Curried Function that checking two feature as neighbor 23 | /// 24 | public readonly Func, bool>> RegionQueryPredicate; 25 | 26 | private readonly IDbscanEventPublisher publisher; 27 | 28 | /// 29 | /// Takes metric function to compute distances between two 30 | /// 31 | /// 32 | public DbscanAlgorithm(Func metricFunc) 33 | { 34 | MetricFunction = metricFunc ?? throw new ArgumentNullException(nameof(metricFunc)); 35 | 36 | RegionQueryPredicate = 37 | (mainFeature, epsilon) => relatedPoint => MetricFunction(mainFeature, relatedPoint.Feature) <= epsilon; 38 | 39 | publisher = new EmptyDbscanEventPublisher(); 40 | } 41 | 42 | public DbscanAlgorithm(Func metricFunc, IDbscanEventPublisher publisher) 43 | : this(metricFunc) 44 | { 45 | this.publisher = publisher ?? throw new ArgumentNullException(nameof(publisher)); 46 | } 47 | 48 | public Task> ComputeClusterDbscanAsync(TF[] allPoints, double epsilon, int minimumPoints, 49 | CancellationToken cancellationToken) 50 | { 51 | return Task.Factory.StartNew(() => 52 | ComputeClusterDbscan(allPoints, epsilon, minimumPoints), 53 | cancellationToken, TaskCreationOptions.LongRunning, TaskScheduler.Current); 54 | } 55 | 56 | /// 57 | /// Performs the DBSCAN clustering algorithm. 58 | /// 59 | /// feature set 60 | /// Desired region ball radius 61 | /// Minimum number of points to be in a region 62 | /// Overall result of cluster compute operation 63 | public DbscanResult ComputeClusterDbscan(TF[] allPoints, double epsilon, int minimumPoints) 64 | { 65 | if (epsilon <= 0) 66 | { 67 | throw new ArgumentOutOfRangeException(nameof(epsilon), "Must be greater than zero"); 68 | } 69 | 70 | if (minimumPoints <= 0) 71 | { 72 | throw new ArgumentOutOfRangeException(nameof(minimumPoints), "Must be greater than zero"); 73 | } 74 | 75 | var allPointsDbscan = allPoints.Select(x => new DbscanPoint(x)).ToArray(); 76 | 77 | int clusterId = 0; 78 | 79 | var computeId = Guid.NewGuid(); 80 | 81 | publisher.Publish(new ComputeStarted(computeId)); 82 | 83 | for (int i = 0; i < allPointsDbscan.Length; i++) 84 | { 85 | var currentPoint = allPointsDbscan[i]; 86 | 87 | if (currentPoint.PointType.HasValue) 88 | { 89 | publisher.Publish(new PointAlreadyProcessed(currentPoint)); 90 | 91 | continue; 92 | } 93 | 94 | publisher.Publish(new PointProcessStarted(currentPoint)); 95 | 96 | publisher.Publish(new RegionQueryStarted(currentPoint, epsilon, minimumPoints)); 97 | 98 | var neighborPoints = RegionQuery(allPointsDbscan, currentPoint.Feature, epsilon); 99 | 100 | publisher.Publish(new RegionQueryFinished(currentPoint, neighborPoints)); 101 | 102 | if (neighborPoints.Length < minimumPoints) 103 | { 104 | currentPoint.PointType = PointType.Noise; 105 | 106 | publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Noise)); 107 | 108 | publisher.Publish(new PointProcessFinished(currentPoint)); 109 | 110 | continue; 111 | } 112 | 113 | clusterId++; 114 | 115 | currentPoint.ClusterId = clusterId; 116 | 117 | currentPoint.PointType = PointType.Core; 118 | 119 | publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Core)); 120 | 121 | publisher.Publish(new PointProcessFinished(currentPoint)); 122 | 123 | publisher.Publish( 124 | new ClusteringStarted(currentPoint, neighborPoints, clusterId, epsilon, minimumPoints)); 125 | 126 | ExpandCluster(allPointsDbscan, neighborPoints, clusterId, epsilon, minimumPoints); 127 | 128 | publisher.Publish( 129 | new ClusteringFinished(currentPoint, neighborPoints, clusterId, epsilon, minimumPoints)); 130 | } 131 | 132 | publisher.Publish(new ComputeFinished(computeId)); 133 | 134 | var resultBuilder = new DbscanResultBuilder(); 135 | 136 | foreach (var p in allPointsDbscan) 137 | { 138 | resultBuilder.Process(p); 139 | } 140 | 141 | return resultBuilder.Result; 142 | } 143 | 144 | /// 145 | /// Checks current cluster for expanding it 146 | /// 147 | /// Dataset 148 | /// other points in same region 149 | /// given clusterId 150 | /// Desired region ball radius 151 | /// Minimum number of points to be in a region 152 | private void ExpandCluster(DbscanPoint[] allPoints, DbscanPoint[] neighborPoints, 153 | int clusterId, double epsilon, int minimumPoints) 154 | { 155 | for (int i = 0; i < neighborPoints.Length; i++) 156 | { 157 | var currentPoint = neighborPoints[i]; 158 | 159 | publisher.Publish(new PointProcessStarted(currentPoint)); 160 | 161 | if (currentPoint.PointType == PointType.Noise) 162 | { 163 | currentPoint.ClusterId = clusterId; 164 | 165 | currentPoint.PointType = PointType.Border; 166 | 167 | publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Border)); 168 | 169 | publisher.Publish(new PointProcessFinished(currentPoint)); 170 | 171 | continue; 172 | } 173 | 174 | if (currentPoint.PointType.HasValue) 175 | { 176 | publisher.Publish(new PointAlreadyProcessed(currentPoint)); 177 | 178 | continue; 179 | } 180 | 181 | currentPoint.ClusterId = clusterId; 182 | 183 | publisher.Publish(new RegionQueryStarted(currentPoint, epsilon, minimumPoints)); 184 | 185 | var otherNeighborPoints = RegionQuery(allPoints, currentPoint.Feature, epsilon); 186 | 187 | publisher.Publish(new RegionQueryFinished(currentPoint, otherNeighborPoints)); 188 | 189 | if (otherNeighborPoints.Length < minimumPoints) 190 | { 191 | currentPoint.PointType = PointType.Border; 192 | 193 | publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Border)); 194 | 195 | publisher.Publish(new PointProcessFinished(currentPoint)); 196 | 197 | continue; 198 | } 199 | 200 | currentPoint.PointType = PointType.Core; 201 | 202 | publisher.Publish(new PointTypeAssigned(currentPoint, PointType.Core)); 203 | 204 | publisher.Publish(new PointProcessFinished(currentPoint)); 205 | 206 | neighborPoints = neighborPoints.Union(otherNeighborPoints).ToArray(); 207 | } 208 | } 209 | 210 | /// 211 | /// Checks and searchs neighbor points for given point 212 | /// 213 | /// Dbscan points converted from feature set 214 | /// Focused feature to be searched neighbors 215 | /// Desired region ball radius 216 | /// Calculated neighbor points 217 | public DbscanPoint[] RegionQuery(DbscanPoint[] allPoints, TF mainFeature, double epsilon) 218 | { 219 | return allPoints.Where(RegionQueryPredicate(mainFeature, epsilon)).ToArray(); 220 | } 221 | } 222 | } -------------------------------------------------------------------------------- /DbscanImplementation/DbscanImplementation.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Library 5 | netcoreapp3.0 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /DbscanImplementation/DbscanPoint.cs: -------------------------------------------------------------------------------- 1 | namespace DbscanImplementation 2 | { 3 | /// 4 | /// Algorithm point definition 5 | /// 6 | /// Feature data contribute into algorithm 7 | public class DbscanPoint 8 | { 9 | public TF Feature { get; internal set; } 10 | 11 | public int? ClusterId { get; internal set; } 12 | 13 | public PointType? PointType { get; internal set; } 14 | 15 | public DbscanPoint(TF feature) 16 | { 17 | Feature = feature; 18 | ClusterId = null; 19 | PointType = null; 20 | } 21 | } 22 | } -------------------------------------------------------------------------------- /DbscanImplementation/DbscanResult.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace DbscanImplementation 4 | { 5 | /// 6 | /// Result object of algorithm after clusters computed 7 | /// 8 | /// Feature data contribute into algorithm 9 | public class DbscanResult 10 | { 11 | public DbscanResult() 12 | { 13 | Noise = new List>(); 14 | Clusters = new Dictionary>>(); 15 | } 16 | 17 | public Dictionary>> Clusters { get; private set; } 18 | 19 | public List> Noise { get; private set; } 20 | } 21 | } -------------------------------------------------------------------------------- /DbscanImplementation/Eventing/EmptyDbscanEventPublisher.cs: -------------------------------------------------------------------------------- 1 | namespace DbscanImplementation.Eventing 2 | { 3 | public class EmptyDbscanEventPublisher : IDbscanEventPublisher 4 | { 5 | public void Publish(TQ @event) 6 | { 7 | 8 | } 9 | } 10 | } -------------------------------------------------------------------------------- /DbscanImplementation/Eventing/Events.cs: -------------------------------------------------------------------------------- 1 | 2 | using System; 3 | 4 | namespace DbscanImplementation.Eventing 5 | { 6 | public class ComputeFinished 7 | { 8 | public Guid ComputeId { get; } 9 | 10 | public ComputeFinished(Guid computeId) 11 | { 12 | ComputeId = computeId; 13 | } 14 | } 15 | 16 | public class ComputeStarted 17 | { 18 | public Guid ComputeId { get; } 19 | 20 | public ComputeStarted(Guid computeId) 21 | { 22 | ComputeId = computeId; 23 | } 24 | } 25 | 26 | public class PointProcessFinished 27 | { 28 | public DbscanPoint Point { get; } 29 | 30 | public PointProcessFinished(DbscanPoint point) 31 | { 32 | Point = point; 33 | } 34 | } 35 | 36 | public class ClusteringFinished 37 | { 38 | public DbscanPoint Point { get; } 39 | 40 | public DbscanPoint[] NeighborPoints { get; } 41 | 42 | public int ClusterId { get; } 43 | 44 | public double Epsilon { get; } 45 | 46 | public int MinimumPoints { get; } 47 | 48 | public ClusteringFinished(DbscanPoint point, DbscanPoint[] neighborPoints, 49 | int clusterId, double epsilon, int minimumPoints) 50 | { 51 | Point = point; 52 | NeighborPoints = neighborPoints; 53 | ClusterId = clusterId; 54 | Epsilon = epsilon; 55 | MinimumPoints = minimumPoints; 56 | } 57 | } 58 | 59 | public class ClusteringStarted 60 | { 61 | 62 | public DbscanPoint Point { get; } 63 | 64 | public DbscanPoint[] NeighborPoints { get; } 65 | 66 | public int ClusterId { get; } 67 | 68 | public double Epsilon { get; } 69 | 70 | public int MinimumPoints { get; } 71 | 72 | public ClusteringStarted(DbscanPoint point, DbscanPoint[] neighborPoints, 73 | int clusterId, double epsilon, int minimumPoints) 74 | { 75 | Point = point; 76 | NeighborPoints = neighborPoints; 77 | ClusterId = clusterId; 78 | Epsilon = epsilon; 79 | MinimumPoints = minimumPoints; 80 | } 81 | } 82 | 83 | public class PointTypeAssigned 84 | { 85 | public DbscanPoint Point { get; } 86 | 87 | public PointType AssignedType { get; } 88 | 89 | public PointTypeAssigned(DbscanPoint point, PointType assignedType) 90 | { 91 | Point = point; 92 | AssignedType = assignedType; 93 | } 94 | } 95 | 96 | public class RegionQueryFinished 97 | { 98 | public DbscanPoint Point { get; } 99 | 100 | public DbscanPoint[] NeighborPoints { get; } 101 | 102 | public RegionQueryFinished(DbscanPoint point, DbscanPoint[] neighborPoints) 103 | { 104 | Point = point; 105 | NeighborPoints = neighborPoints; 106 | } 107 | } 108 | 109 | public class RegionQueryStarted 110 | { 111 | public DbscanPoint Point { get; private set; } 112 | 113 | public double Epsilon { get; } 114 | 115 | public int MinimumPoints { get; } 116 | 117 | public RegionQueryStarted(DbscanPoint point, double epsilon, int minimumPoints) 118 | { 119 | Point = point; 120 | Epsilon = epsilon; 121 | MinimumPoints = minimumPoints; 122 | } 123 | } 124 | 125 | public class PointAlreadyProcessed 126 | { 127 | public DbscanPoint Point { get; private set; } 128 | 129 | public PointAlreadyProcessed(DbscanPoint point) 130 | { 131 | Point = point; 132 | } 133 | } 134 | 135 | public class PointProcessStarted 136 | { 137 | public DbscanPoint Point { get; private set; } 138 | 139 | public PointProcessStarted(DbscanPoint point) 140 | { 141 | Point = point; 142 | } 143 | } 144 | } -------------------------------------------------------------------------------- /DbscanImplementation/Eventing/IDbscanEventPublisher.cs: -------------------------------------------------------------------------------- 1 | namespace DbscanImplementation.Eventing 2 | { 3 | /// 4 | /// A basic interface for publishing occurring events inside dbscan algorithm. 5 | /// 6 | public interface IDbscanEventPublisher 7 | { 8 | /// 9 | /// Use for publishing single event 10 | /// 11 | void Publish(TObj @event); 12 | } 13 | } -------------------------------------------------------------------------------- /DbscanImplementation/Eventing/IDbscanEventSubscriber.cs: -------------------------------------------------------------------------------- 1 | using System.Threading.Tasks; 2 | 3 | namespace DbscanImplementation.Eventing 4 | { 5 | public interface IDbscanEventSubscriber 6 | { 7 | Task Subscribe(); 8 | } 9 | } -------------------------------------------------------------------------------- /DbscanImplementation/PointType.cs: -------------------------------------------------------------------------------- 1 | namespace DbscanImplementation 2 | { 3 | public enum PointType 4 | { 5 | Noise = 0, 6 | Core = 1, 7 | Border = 2 8 | } 9 | } -------------------------------------------------------------------------------- /DbscanImplementation/ResultBuilding/DbscanResultBuilder.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace DbscanImplementation.ResultBuilding 4 | { 5 | public class DbscanResultBuilder 6 | { 7 | public DbscanResult Result { get; private set; } 8 | 9 | public DbscanResultBuilder() 10 | { 11 | Result = new DbscanResult(); 12 | } 13 | 14 | public void Process(DbscanPoint point) 15 | { 16 | if (point.ClusterId.HasValue && !Result.Clusters.ContainsKey(point.ClusterId.Value)) 17 | { 18 | Result.Clusters.Add(point.ClusterId.Value, new List>()); 19 | } 20 | 21 | switch (point) 22 | { 23 | case DbscanPoint core when core.PointType == PointType.Core: 24 | Result.Clusters[core.ClusterId.Value].Add(core); 25 | break; 26 | 27 | case DbscanPoint border when border.PointType == PointType.Border: 28 | Result.Clusters[border.ClusterId.Value].Add(border); 29 | break; 30 | 31 | case DbscanPoint noise when noise.PointType == PointType.Noise: 32 | Result.Noise.Add(noise); 33 | break; 34 | } 35 | } 36 | } 37 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DBSCAN 2 | DBSCAN Clustering Algorithm C# Implementation 3 | 4 | It is a small project that implements DBSCAN Clustering algorithm in C# and dotnet-core. 5 | 6 | For using this you only need to define your own dataset class and create DbscanAlgorithm class to perform clustering. After that only call the ComputeClusterDbscan with desired clustering parameter. 7 | 8 | You can check previous git tags for more primitive DBSCAN implementation. 9 | 10 | Example: 11 | -------- 12 | Your dataset items (preference, feature, vector, row, etc.): 13 | ```cs 14 | public class MyCustomFeature 15 | { 16 | public double X; 17 | public double Y; 18 | 19 | public MyCustomFeature(double x, double y) 20 | { 21 | X = x; 22 | Y = y; 23 | } 24 | } 25 | ``` 26 | 27 | Your Distance Function 28 | ```cs 29 | private static double EuclidienDistance(MyFeature feature1, MyFeature feature2) 30 | { 31 | return Math.Sqrt( 32 | ((feature1.X - feature2.X) * (feature1.X - feature2.X)) + 33 | ((feature1.Y - feature2.Y) * (feature1.Y - feature2.Y)) 34 | ); 35 | } 36 | ``` 37 | 38 | Then for clustering with simple form 39 | ```cs 40 | var features = new MyFeatureDataSource().GetFeatureData(); 41 | 42 | //INFO: applied euclidean distance as metric calculation function 43 | var simpleDbscan = new DbscanAlgorithm(EuclidienDistance); 44 | 45 | //returns DbscanResult typed object of algorithm's process 46 | var result = dbscan.ComputeClusterDbscan(allPoints: features.ToArray(), epsilon: .01, minimumPoints: 10); 47 | ``` 48 | 49 | If you want to get events happening inside algorithm then you can create algorithm with other constructor which takes a publisher type as instance 50 | ```cs 51 | //INFO: second argument of constructor takes an instance implemented with IDbscanEventPublisher interface 52 | var dbscanWithEventing = new DbscanAlgorithm( 53 | EuclidienDistance, 54 | new MyFeatureConsoleLogger() 55 | ); 56 | 57 | var resultWithEventing = dbscanWithEventing.ComputeClusterDbscan(allPoints: features.ToArray(), epsilon: .01, minimumPoints: 10); 58 | ``` 59 | 60 | An example of the implementation for IDbscanEventPublisher interface: 61 | ```cs 62 | public class DbscanLogger : IDbscanEventPublisher 63 | { 64 | public void Publish(params object[] events) 65 | { 66 | foreach (var e in events) 67 | { 68 | //INFO: match the events you want to process 69 | var info = e switch 70 | { 71 | PointTypeAssigned pta => $"{pta.Point.ClusterId}: {pta.AssignedType}", 72 | _ => null 73 | }; 74 | 75 | if (info != null) 76 | { 77 | Console.WriteLine(info); 78 | } 79 | } 80 | } 81 | } 82 | ``` 83 | 84 | Another example of IDbscanEventPublisher could be a Pub/Sub application, like this: 85 | ```cs 86 | var exchange = new QueueExchange(); 87 | 88 | var publisher = new QueueExchangePublisher(exchange); 89 | 90 | var dbscanAsync = new DbscanAlgorithm( 91 | EuclidienDistance, 92 | publisher 93 | ); 94 | 95 | var subscriber = new QueueExchangeSubscriber(exchange); 96 | 97 | var subscriptionTask = subscriber.Subscribe(); 98 | ``` 99 | This can be anything that succeeds the Pub/Sub design. You can asyncronously build your own analytics result by subscription. 100 | -------------------------------------------------------------------------------- /dbscan.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.26124.0 5 | MinimumVisualStudioVersion = 15.0.26124.0 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DbscanImplementation", "DbscanImplementation\DbscanImplementation.csproj", "{EA602F4F-FED7-4853-831A-5AC98217CCCF}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "DbscanDemo", "DbscanDemo\DbscanDemo.csproj", "{6A3D2E5D-9A97-4F9C-9833-0FCFDD87BC54}" 9 | EndProject 10 | Global 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 12 | Debug|Any CPU = Debug|Any CPU 13 | Debug|x64 = Debug|x64 14 | Debug|x86 = Debug|x86 15 | Release|Any CPU = Release|Any CPU 16 | Release|x64 = Release|x64 17 | Release|x86 = Release|x86 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 23 | {EA602F4F-FED7-4853-831A-5AC98217CCCF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 24 | {EA602F4F-FED7-4853-831A-5AC98217CCCF}.Debug|Any CPU.Build.0 = Debug|Any CPU 25 | {EA602F4F-FED7-4853-831A-5AC98217CCCF}.Debug|x64.ActiveCfg = Debug|Any CPU 26 | {EA602F4F-FED7-4853-831A-5AC98217CCCF}.Debug|x64.Build.0 = Debug|Any CPU 27 | {EA602F4F-FED7-4853-831A-5AC98217CCCF}.Debug|x86.ActiveCfg = Debug|Any CPU 28 | {EA602F4F-FED7-4853-831A-5AC98217CCCF}.Debug|x86.Build.0 = Debug|Any CPU 29 | {EA602F4F-FED7-4853-831A-5AC98217CCCF}.Release|Any CPU.ActiveCfg = Release|Any CPU 30 | {EA602F4F-FED7-4853-831A-5AC98217CCCF}.Release|Any CPU.Build.0 = Release|Any CPU 31 | {EA602F4F-FED7-4853-831A-5AC98217CCCF}.Release|x64.ActiveCfg = Release|Any CPU 32 | {EA602F4F-FED7-4853-831A-5AC98217CCCF}.Release|x64.Build.0 = Release|Any CPU 33 | {EA602F4F-FED7-4853-831A-5AC98217CCCF}.Release|x86.ActiveCfg = Release|Any CPU 34 | {EA602F4F-FED7-4853-831A-5AC98217CCCF}.Release|x86.Build.0 = Release|Any CPU 35 | {6A3D2E5D-9A97-4F9C-9833-0FCFDD87BC54}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 36 | {6A3D2E5D-9A97-4F9C-9833-0FCFDD87BC54}.Debug|Any CPU.Build.0 = Debug|Any CPU 37 | {6A3D2E5D-9A97-4F9C-9833-0FCFDD87BC54}.Debug|x64.ActiveCfg = Debug|Any CPU 38 | {6A3D2E5D-9A97-4F9C-9833-0FCFDD87BC54}.Debug|x64.Build.0 = Debug|Any CPU 39 | {6A3D2E5D-9A97-4F9C-9833-0FCFDD87BC54}.Debug|x86.ActiveCfg = Debug|Any CPU 40 | {6A3D2E5D-9A97-4F9C-9833-0FCFDD87BC54}.Debug|x86.Build.0 = Debug|Any CPU 41 | {6A3D2E5D-9A97-4F9C-9833-0FCFDD87BC54}.Release|Any CPU.ActiveCfg = Release|Any CPU 42 | {6A3D2E5D-9A97-4F9C-9833-0FCFDD87BC54}.Release|Any CPU.Build.0 = Release|Any CPU 43 | {6A3D2E5D-9A97-4F9C-9833-0FCFDD87BC54}.Release|x64.ActiveCfg = Release|Any CPU 44 | {6A3D2E5D-9A97-4F9C-9833-0FCFDD87BC54}.Release|x64.Build.0 = Release|Any CPU 45 | {6A3D2E5D-9A97-4F9C-9833-0FCFDD87BC54}.Release|x86.ActiveCfg = Release|Any CPU 46 | {6A3D2E5D-9A97-4F9C-9833-0FCFDD87BC54}.Release|x86.Build.0 = Release|Any CPU 47 | EndGlobalSection 48 | EndGlobal 49 | --------------------------------------------------------------------------------