├── .gitignore ├── .travis.yml ├── CHANGELOG ├── EWAH.RunTests ├── EWAH.RunTests.csproj ├── Properties │ └── AssemblyInfo.cs └── example.cs ├── EWAH.Tests ├── EWAH.Tests.csproj ├── EWAHCompressedBitArraySerializerTest.cs └── EWAHCompressedBitmapTest.cs ├── EWAH.sln ├── EWAH ├── BufferedRunningLengthWord.cs ├── EWAH.csproj ├── EwahCompressedBitArray.cs ├── EwahCompressedBitArraySerializer.cs ├── EwahEnumerator.cs ├── PlaceHolders.cs ├── Properties │ └── AssemblyInfo.cs └── RunningLengthWord.cs ├── LICENSE ├── README.md └── package.sh /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Ww][Ii][Nn]32/ 27 | [Aa][Rr][Mm]/ 28 | [Aa][Rr][Mm]64/ 29 | bld/ 30 | [Bb]in/ 31 | [Oo]bj/ 32 | [Ll]og/ 33 | [Ll]ogs/ 34 | 35 | # Visual Studio 2015/2017 cache/options directory 36 | .vs/ 37 | # Uncomment if you have tasks that create the project's static files in wwwroot 38 | #wwwroot/ 39 | 40 | # Visual Studio 2017 auto generated files 41 | Generated\ Files/ 42 | 43 | # MSTest test Results 44 | [Tt]est[Rr]esult*/ 45 | [Bb]uild[Ll]og.* 46 | 47 | # NUnit 48 | *.VisualState.xml 49 | TestResult.xml 50 | nunit-*.xml 51 | 52 | # Build Results of an ATL Project 53 | [Dd]ebugPS/ 54 | [Rr]eleasePS/ 55 | dlldata.c 56 | 57 | # Benchmark Results 58 | BenchmarkDotNet.Artifacts/ 59 | 60 | # .NET Core 61 | project.lock.json 62 | project.fragment.lock.json 63 | artifacts/ 64 | 65 | # Tye 66 | .tye/ 67 | 68 | # ASP.NET Scaffolding 69 | ScaffoldingReadMe.txt 70 | 71 | # StyleCop 72 | StyleCopReport.xml 73 | 74 | # Files built by Visual Studio 75 | *_i.c 76 | *_p.c 77 | *_h.h 78 | *.ilk 79 | *.meta 80 | *.obj 81 | *.iobj 82 | *.pch 83 | *.pdb 84 | *.ipdb 85 | *.pgc 86 | *.pgd 87 | *.rsp 88 | *.sbr 89 | *.tlb 90 | *.tli 91 | *.tlh 92 | *.tmp 93 | *.tmp_proj 94 | *_wpftmp.csproj 95 | *.log 96 | *.vspscc 97 | *.vssscc 98 | .builds 99 | *.pidb 100 | *.svclog 101 | *.scc 102 | 103 | # Chutzpah Test files 104 | _Chutzpah* 105 | 106 | # Visual C++ cache files 107 | ipch/ 108 | *.aps 109 | *.ncb 110 | *.opendb 111 | *.opensdf 112 | *.sdf 113 | *.cachefile 114 | *.VC.db 115 | *.VC.VC.opendb 116 | 117 | # Visual Studio profiler 118 | *.psess 119 | *.vsp 120 | *.vspx 121 | *.sap 122 | 123 | # Visual Studio Trace Files 124 | *.e2e 125 | 126 | # TFS 2012 Local Workspace 127 | $tf/ 128 | 129 | # Guidance Automation Toolkit 130 | *.gpState 131 | 132 | # ReSharper is a .NET coding add-in 133 | _ReSharper*/ 134 | *.[Rr]e[Ss]harper 135 | *.DotSettings.user 136 | 137 | # TeamCity is a build add-in 138 | _TeamCity* 139 | 140 | # DotCover is a Code Coverage Tool 141 | *.dotCover 142 | 143 | # AxoCover is a Code Coverage Tool 144 | .axoCover/* 145 | !.axoCover/settings.json 146 | 147 | # Coverlet is a free, cross platform Code Coverage Tool 148 | coverage*.json 149 | coverage*.xml 150 | coverage*.info 151 | 152 | # Visual Studio code coverage results 153 | *.coverage 154 | *.coveragexml 155 | 156 | # NCrunch 157 | _NCrunch_* 158 | .*crunch*.local.xml 159 | nCrunchTemp_* 160 | 161 | # MightyMoose 162 | *.mm.* 163 | AutoTest.Net/ 164 | 165 | # Web workbench (sass) 166 | .sass-cache/ 167 | 168 | # Installshield output folder 169 | [Ee]xpress/ 170 | 171 | # DocProject is a documentation generator add-in 172 | DocProject/buildhelp/ 173 | DocProject/Help/*.HxT 174 | DocProject/Help/*.HxC 175 | DocProject/Help/*.hhc 176 | DocProject/Help/*.hhk 177 | DocProject/Help/*.hhp 178 | DocProject/Help/Html2 179 | DocProject/Help/html 180 | 181 | # Click-Once directory 182 | publish/ 183 | 184 | # Publish Web Output 185 | *.[Pp]ublish.xml 186 | *.azurePubxml 187 | # Note: Comment the next line if you want to checkin your web deploy settings, 188 | # but database connection strings (with potential passwords) will be unencrypted 189 | *.pubxml 190 | *.publishproj 191 | 192 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 193 | # checkin your Azure Web App publish settings, but sensitive information contained 194 | # in these scripts will be unencrypted 195 | PublishScripts/ 196 | 197 | # NuGet Packages 198 | *.nupkg 199 | # NuGet Symbol Packages 200 | *.snupkg 201 | # The packages folder can be ignored because of Package Restore 202 | **/[Pp]ackages/* 203 | # except build/, which is used as an MSBuild target. 204 | !**/[Pp]ackages/build/ 205 | # Uncomment if necessary however generally it will be regenerated when needed 206 | #!**/[Pp]ackages/repositories.config 207 | # NuGet v3's project.json files produces more ignorable files 208 | *.nuget.props 209 | *.nuget.targets 210 | 211 | # Microsoft Azure Build Output 212 | csx/ 213 | *.build.csdef 214 | 215 | # Microsoft Azure Emulator 216 | ecf/ 217 | rcf/ 218 | 219 | # Windows Store app package directories and files 220 | AppPackages/ 221 | BundleArtifacts/ 222 | Package.StoreAssociation.xml 223 | _pkginfo.txt 224 | *.appx 225 | *.appxbundle 226 | *.appxupload 227 | 228 | # Visual Studio cache files 229 | # files ending in .cache can be ignored 230 | *.[Cc]ache 231 | # but keep track of directories ending in .cache 232 | !?*.[Cc]ache/ 233 | 234 | # Others 235 | ClientBin/ 236 | ~$* 237 | *~ 238 | *.dbmdl 239 | *.dbproj.schemaview 240 | *.jfm 241 | *.pfx 242 | *.publishsettings 243 | orleans.codegen.cs 244 | 245 | # Including strong name files can present a security risk 246 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 247 | #*.snk 248 | 249 | # Since there are multiple workflows, uncomment next line to ignore bower_components 250 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 251 | #bower_components/ 252 | 253 | # RIA/Silverlight projects 254 | Generated_Code/ 255 | 256 | # Backup & report files from converting an old project file 257 | # to a newer Visual Studio version. Backup files are not needed, 258 | # because we have git ;-) 259 | _UpgradeReport_Files/ 260 | Backup*/ 261 | UpgradeLog*.XML 262 | UpgradeLog*.htm 263 | ServiceFabricBackup/ 264 | *.rptproj.bak 265 | 266 | # SQL Server files 267 | *.mdf 268 | *.ldf 269 | *.ndf 270 | 271 | # Business Intelligence projects 272 | *.rdl.data 273 | *.bim.layout 274 | *.bim_*.settings 275 | *.rptproj.rsuser 276 | *- [Bb]ackup.rdl 277 | *- [Bb]ackup ([0-9]).rdl 278 | *- [Bb]ackup ([0-9][0-9]).rdl 279 | 280 | # Microsoft Fakes 281 | FakesAssemblies/ 282 | 283 | # GhostDoc plugin setting file 284 | *.GhostDoc.xml 285 | 286 | # Node.js Tools for Visual Studio 287 | .ntvs_analysis.dat 288 | node_modules/ 289 | 290 | # Visual Studio 6 build log 291 | *.plg 292 | 293 | # Visual Studio 6 workspace options file 294 | *.opt 295 | 296 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 297 | *.vbw 298 | 299 | # Visual Studio LightSwitch build output 300 | **/*.HTMLClient/GeneratedArtifacts 301 | **/*.DesktopClient/GeneratedArtifacts 302 | **/*.DesktopClient/ModelManifest.xml 303 | **/*.Server/GeneratedArtifacts 304 | **/*.Server/ModelManifest.xml 305 | _Pvt_Extensions 306 | 307 | # Paket dependency manager 308 | .paket/paket.exe 309 | paket-files/ 310 | 311 | # FAKE - F# Make 312 | .fake/ 313 | 314 | # CodeRush personal settings 315 | .cr/personal 316 | 317 | # Python Tools for Visual Studio (PTVS) 318 | __pycache__/ 319 | *.pyc 320 | 321 | # Cake - Uncomment if you are using it 322 | # tools/** 323 | # !tools/packages.config 324 | 325 | # Tabs Studio 326 | *.tss 327 | 328 | # Telerik's JustMock configuration file 329 | *.jmconfig 330 | 331 | # BizTalk build output 332 | *.btp.cs 333 | *.btm.cs 334 | *.odx.cs 335 | *.xsd.cs 336 | 337 | # OpenCover UI analysis results 338 | OpenCover/ 339 | 340 | # Azure Stream Analytics local run output 341 | ASALocalRun/ 342 | 343 | # MSBuild Binary and Structured Log 344 | *.binlog 345 | 346 | # NVidia Nsight GPU debugger configuration file 347 | *.nvuser 348 | 349 | # MFractors (Xamarin productivity tool) working folder 350 | .mfractor/ 351 | 352 | # Local History for Visual Studio 353 | .localhistory/ 354 | 355 | # BeatPulse healthcheck temp database 356 | healthchecksdb 357 | 358 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 359 | MigrationBackup/ 360 | 361 | # Ionide (cross platform F# VS Code tools) working folder 362 | .ionide/ 363 | 364 | # Fody - auto-generated XML schema 365 | FodyWeavers.xsd 366 | 367 | ## 368 | ## Visual studio for Mac 369 | ## 370 | 371 | 372 | # globs 373 | Makefile.in 374 | *.userprefs 375 | *.usertasks 376 | config.make 377 | config.status 378 | aclocal.m4 379 | install-sh 380 | autom4te.cache/ 381 | *.tar.gz 382 | tarballs/ 383 | test-results/ 384 | 385 | # Mac bundle stuff 386 | *.dmg 387 | *.app 388 | 389 | # content below from: https://github.com/github/gitignore/blob/master/Global/macOS.gitignore 390 | # General 391 | .DS_Store 392 | .AppleDouble 393 | .LSOverride 394 | 395 | # Icon must end with two \r 396 | Icon 397 | 398 | 399 | # Thumbnails 400 | ._* 401 | 402 | # Files that might appear in the root of a volume 403 | .DocumentRevisions-V100 404 | .fseventsd 405 | .Spotlight-V100 406 | .TemporaryItems 407 | .Trashes 408 | .VolumeIcon.icns 409 | .com.apple.timemachine.donotpresent 410 | 411 | # Directories potentially created on remote AFP share 412 | .AppleDB 413 | .AppleDesktop 414 | Network Trash Folder 415 | Temporary Items 416 | .apdisk 417 | 418 | # content below from: https://github.com/github/gitignore/blob/master/Global/Windows.gitignore 419 | # Windows thumbnail cache files 420 | Thumbs.db 421 | ehthumbs.db 422 | ehthumbs_vista.db 423 | 424 | # Dump file 425 | *.stackdump 426 | 427 | # Folder config file 428 | [Dd]esktop.ini 429 | 430 | # Recycle Bin used on file shares 431 | $RECYCLE.BIN/ 432 | 433 | # Windows Installer files 434 | *.cab 435 | *.msi 436 | *.msix 437 | *.msm 438 | *.msp 439 | 440 | # Windows shortcuts 441 | *.lnk 442 | 443 | # JetBrains Rider 444 | .idea/ 445 | *.sln.iml 446 | 447 | ## 448 | ## Visual Studio Code 449 | ## 450 | .vscode/* 451 | !.vscode/settings.json 452 | !.vscode/tasks.json 453 | !.vscode/launch.json 454 | !.vscode/extensions.json 455 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: csharp 2 | solution: EWAH.sln 3 | install: 4 | - nuget restore EWAH.sln 5 | script: 6 | - msbuild /p:Configuration=Release EWAH.sln 7 | - mono ./EWAH.RunTests/bin/Release/EWAH.RunTests.exe 8 | -------------------------------------------------------------------------------- /CHANGELOG: -------------------------------------------------------------------------------- 1 | 0.2.6 (June 12th 2015) 2 | Fixed Tests that would not run, standardized C# formatting. 3 | 4 | 0.2.5 (May 21st 2013) 5 | Fixed bug with addStreamOfNegatedDirtyWords (Vicent Marti) 6 | 7 | 0.2.4 (May 15th 2013) 8 | Fixed bug with Intersects (Ciaran Jessup) 9 | 10 | 0.2.3 (May 14th 2013) 11 | Fixed violation of GetHashCode contract (Ciaran Jessup) 12 | 13 | 0.2.2 (May 13th 2013) 14 | Fixed bug with Intersects (ynosa) 15 | 16 | 17 | 0.2.1 (April 16th 2013) 18 | Clone method does not clone the _Rlw variable 19 | Fixed bug in function Not when SizeInBits is not a multiple of 64 20 | added method "bitmapOf" 21 | made method ToString return something friendlier 22 | change the semantics of "equals" so that a Xor is computed (small perf penalty, but nicer in practice) 23 | added many unit tests 24 | 25 | 26 | 0.2.0 (April 20th 2012) 27 | New lightweight serialization infrastructure (Ciaran Jessup). 28 | 29 | 0.1.0 (April 19th 2012) 30 | New file layout. 31 | Fixed a bug in how the cardinality is computed. 32 | Made the computation of the cardinality faster. 33 | 34 | 35 | 36 | Note: A version 0.2.1 was released on May 1st 2012, but it had 37 | faulty decoding. 38 | 39 | -------------------------------------------------------------------------------- /EWAH.RunTests/EWAH.RunTests.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | netcoreapp3.1 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /EWAH.RunTests/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.InteropServices; 3 | 4 | // General Information about an assembly is controlled through the following 5 | // set of attributes. Change these attribute values to modify the information 6 | // associated with an assembly. 7 | 8 | [assembly: AssemblyTitle("EWAH.RunTests")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("OECD")] 12 | [assembly: AssemblyProduct("EWAH.RunTests")] 13 | [assembly: AssemblyCopyright("Copyright © OECD 2012")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | 21 | [assembly: ComVisible(false)] 22 | 23 | // The following GUID is for the ID of the typelib if this project is exposed to COM 24 | 25 | [assembly: Guid("0666e9e3-73bd-4a3f-9633-9f7299d6b509")] 26 | 27 | // Version information for an assembly consists of the following four values: 28 | // 29 | // Major Version 30 | // Minor Version 31 | // Build Number 32 | // Revision 33 | // 34 | // You can specify all the values or you can default the Build and Revision Numbers 35 | // by using the '*' as shown below: 36 | // [assembly: AssemblyVersion("1.0.*")] 37 | 38 | [assembly: AssemblyVersion("1.0.0.0")] 39 | [assembly: AssemblyFileVersion("1.0.0.0")] -------------------------------------------------------------------------------- /EWAH.RunTests/example.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Ewah; 3 | 4 | /* 5 | * Copyright 2012, Kemal Erdogan, Daniel Lemire and Ciaran Jessup 6 | * Licensed under APL 2.0. 7 | */ 8 | 9 | public class example 10 | { 11 | public static void Main(string[] args) 12 | { 13 | 14 | var ewahBitmap1 = EwahCompressedBitArray.BitmapOf(0, 2, 64, 1 << 30); 15 | var ewahBitmap2 = EwahCompressedBitArray.BitmapOf(1, 3, 64, 1 << 30); 16 | Console.WriteLine("Running demo program:"); 17 | Console.WriteLine("bitmap 1: " + ewahBitmap1); 18 | Console.WriteLine("bitmap 2:" + ewahBitmap2); 19 | EwahCompressedBitArray orbitmap = ewahBitmap1.Or(ewahBitmap2); 20 | Console.WriteLine(); 21 | Console.WriteLine("bitmap 1 OR bitmap 2:" + orbitmap); 22 | Console.WriteLine("memory usage: " + orbitmap.SizeInBytes + " bytes"); 23 | Console.WriteLine(); 24 | EwahCompressedBitArray andbitmap = ewahBitmap1.And(ewahBitmap2); 25 | Console.WriteLine("bitmap 1 AND bitmap 2:" + andbitmap); 26 | Console.WriteLine("memory usage: " + andbitmap.SizeInBytes + " bytes"); 27 | EwahCompressedBitArray xorbitmap = ewahBitmap1.Xor(ewahBitmap2); 28 | Console.WriteLine("bitmap 1 XOR bitmap 2:" + xorbitmap); 29 | Console.WriteLine("memory usage: " + andbitmap.SizeInBytes + " bytes"); 30 | Console.WriteLine("End of demo."); 31 | Console.WriteLine(""); 32 | 33 | var tr = new EwahCompressedBitArrayTest(); 34 | tr.TestYnosa(); 35 | tr.TestIntersectOddNess(); 36 | tr.testsetSizeInBits(); 37 | tr.SsiYanKaiTest(); 38 | tr.testDebugSetSizeInBitsTest(); 39 | tr.EwahIteratorProblem(); 40 | tr.TayaraTest(); 41 | tr.TestNot(); 42 | tr.TestCardinality(); 43 | tr.TestEwahCompressedBitArray(); 44 | tr.TestExternalization(); 45 | tr.TestLargeEwahCompressedBitArray(); 46 | tr.TestMassiveAnd(); 47 | tr.TestMassiveAndNot(); 48 | tr.TestMassiveOr(); 49 | tr.TestMassiveXOR(); 50 | tr.HabermaasTest(); 51 | tr.VanSchaikTest(); 52 | tr.TestRunningLengthWord(); 53 | tr.TestSizeInBits1(); 54 | tr.TestHasNextSafe(); 55 | tr.TestCloneEwahCompressedBitArray(); 56 | tr.TestSetGet(); 57 | tr.TestWithParameters(); 58 | 59 | new EWAHCompressedBitArraySerializerTest().TestCustomSerializationStrategy(); 60 | 61 | } 62 | } -------------------------------------------------------------------------------- /EWAH.Tests/EWAH.Tests.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | netcoreapp3.1 5 | 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /EWAH.Tests/EWAHCompressedBitArraySerializerTest.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Microsoft.VisualStudio.TestTools.UnitTesting; 3 | using System.IO; 4 | using System.Runtime.Serialization.Formatters.Binary; 5 | 6 | namespace Ewah 7 | { 8 | /* 9 | * Copyright 2012, Kemal Erdogan, Daniel Lemire and Ciaran Jessup 10 | * Licensed under APL 2.0. 11 | */ 12 | [TestClass] 13 | public class EWAHCompressedBitArraySerializerTest 14 | { 15 | 16 | /// 17 | /// Tests the custom serialization strategy. 18 | /// 19 | [TestMethod] 20 | public void TestCustomSerializationStrategy() { 21 | Console.WriteLine("testing Custom serialization strategy"); 22 | 23 | // Create a compressed bit array, and randomly assign up to 20,000 bits to it. 24 | var bmp = new EwahCompressedBitArray(); 25 | var r= new Random(); 26 | for (int i = 0; i < 23000; i++) { 27 | if (r.NextDouble() < 0.5) { 28 | bmp.Set(i); 29 | } 30 | } 31 | 32 | byte[] originalDeserialized= null; 33 | byte[] newFormDeserialized= null; 34 | EwahCompressedBitArray newFormReserialized= null; 35 | EwahCompressedBitArray originalReserialized= null; 36 | 37 | // First de-serialize+ re-serialize 'normally' 38 | using (var ms = new MemoryStream()) { 39 | BinaryFormatter bf = new BinaryFormatter(); 40 | bf.Serialize(ms, bmp); 41 | originalDeserialized = ms.ToArray(); 42 | ms.Seek(0, SeekOrigin.Begin); 43 | originalReserialized = (EwahCompressedBitArray)bf.Deserialize(ms); 44 | } 45 | 46 | // Now de-serialize + re-serialize with the new form. 47 | using (var ms = new MemoryStream()) { 48 | EwahCompressedBitArraySerializer bf = new EwahCompressedBitArraySerializer(); 49 | bf.Serialize(ms, bmp); 50 | newFormDeserialized = ms.ToArray(); 51 | ms.Seek(0, SeekOrigin.Begin); 52 | newFormReserialized = (EwahCompressedBitArray)bf.Deserialize(ms); 53 | } 54 | 55 | // Assert that the new form is more compact than the original form. 56 | Assert.IsTrue(newFormDeserialized.Length < originalDeserialized.Length); 57 | 58 | // Compare the 'normal' de-serialized + re-serialized form, against the original. 59 | Assert.AreEqual(bmp, originalReserialized); 60 | 61 | // Compare the 'new form' de-serialized + re-serialized form, against the original. 62 | Assert.AreEqual(bmp, newFormReserialized); 63 | 64 | // Compare the 'normal' de-serialized + re-serialized form, against the newly de-serialized + re-serialized form. 65 | Assert.AreEqual(newFormReserialized, originalReserialized); 66 | 67 | Console.WriteLine("testing Custom serialization strategy:ok"); 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /EWAH.Tests/EWAHCompressedBitmapTest.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections; 3 | using System.Collections.Generic; 4 | using System.IO; 5 | using System.Runtime.Serialization.Formatters.Binary; 6 | using Microsoft.VisualStudio.TestTools.UnitTesting; 7 | 8 | namespace Ewah 9 | { 10 | /* 11 | * Copyright 2012, Kemal Erdogan, Daniel Lemire and Ciaran Jessup 12 | * Licensed under APL 2.0. 13 | */ 14 | public static class Ext 15 | { 16 | public static void AddRange(this HashSet set, IEnumerable newElems) 17 | { 18 | foreach (T elem in newElems) 19 | { 20 | set.Add(elem); 21 | } 22 | } 23 | 24 | public static int NextSetBit(this BitArray bitArray, int startPos) 25 | { 26 | for (int ii = startPos; ii < bitArray.Length; ii++) 27 | { 28 | if (bitArray[ii]) 29 | return ii; 30 | } 31 | return -1; 32 | } 33 | 34 | public static ulong Cardinality(this BitArray bitArray) 35 | { 36 | ulong res = 0; 37 | 38 | for (int ii = 0; ii < bitArray.Length; ii++) 39 | { 40 | if (bitArray.Get((ii))) 41 | res++; 42 | } 43 | return res; 44 | } 45 | 46 | public static int AndNot(this BitArray bitArray, BitArray other) 47 | { 48 | int res = Math.Min(bitArray.Count, other.Count); 49 | for (int ii = 0; ii < res; ii++) 50 | { 51 | bitArray.Set(ii, bitArray[ii] && !other[ii]); 52 | } 53 | return res; 54 | } 55 | 56 | public static void Retain(this List list, List retain) 57 | { 58 | int ii = 0; 59 | while (ii < list.Count) 60 | { 61 | if (retain.Contains(list[ii])) 62 | { 63 | ii++; 64 | } 65 | else 66 | { 67 | list.RemoveAt(ii); 68 | } 69 | } 70 | } 71 | } 72 | 73 | /** 74 | * This class is used for unit testing. 75 | */ 76 | 77 | 78 | [TestClass] 79 | public sealed class EwahCompressedBitArrayTest 80 | { 81 | /** The Constant MEGA: a large integer. */ 82 | private const int Mega = 8 * 1024 * 1024; 83 | 84 | /** The Constant TEST_BS_SIZE: used to represent the size of a large bitmap. */ 85 | private const int TestBsSize = 8 * Mega; 86 | 87 | /** 88 | * Function used in a test inspired by Federico Fissore. 89 | * 90 | * @param size the number of set bits 91 | * @param seed the random seed 92 | * @return the pseudo-random array int[] 93 | */ 94 | 95 | private static int[] CreateSortedIntArrayOfBitsToSet(int size, int seed) 96 | { 97 | var random = new Random(seed); 98 | // build raw int array 99 | var bits = new int[size]; 100 | for (int i = 0; i < bits.Length; i++) 101 | { 102 | bits[i] = random.Next(TestBsSize); 103 | } 104 | // might generate duplicates 105 | Array.Sort(bits); 106 | // first Count how many distinct values 107 | int counter = 0; 108 | int oldx = -1; 109 | foreach (int x in bits) 110 | { 111 | if (x != oldx) 112 | ++counter; 113 | oldx = x; 114 | } 115 | // then construct new array 116 | var answer = new int[counter]; 117 | counter = 0; 118 | oldx = -1; 119 | foreach (int x in bits) 120 | { 121 | if (x != oldx) 122 | { 123 | answer[counter] = x; 124 | ++counter; 125 | } 126 | oldx = x; 127 | } 128 | return answer; 129 | } 130 | 131 | /** 132 | * Pseudo-non-deterministic test inspired by S.J.vanSchaik. 133 | * (Yes, non-deterministic tests are bad, but the test is actually deterministic.) 134 | */ 135 | 136 | /** 137 | * Pseudo-non-deterministic test inspired by Federico Fissore. 138 | * 139 | * @param length the number of set bits in a bitmap 140 | */ 141 | 142 | private static void ShouldSetBits(int length) 143 | { 144 | Console.WriteLine("testing shouldSetBits " + length); 145 | 146 | int[] bitsToSet = CreateSortedIntArrayOfBitsToSet(length, 434222); 147 | var ewah = new EwahCompressedBitArray(); 148 | 149 | Console.WriteLine(" ... setting " + bitsToSet.Length + " values"); 150 | 151 | foreach (int i in bitsToSet) 152 | { 153 | ewah.Set(i); 154 | } 155 | 156 | Console.WriteLine(" ... verifying " + bitsToSet.Length + " values"); 157 | AreEqual(ewah, bitsToSet); 158 | 159 | Console.WriteLine(" ... checking GetCardinality"); 160 | Assert.AreEqual((ulong)bitsToSet.Length, ewah.GetCardinality()); 161 | } 162 | 163 | /** 164 | * Test running length word. 165 | */ 166 | 167 | /** 168 | * Convenience function to assess equality between an array and an enumerator over 169 | * Integers 170 | * 171 | * @param i the enumerator 172 | * @param array the array 173 | */ 174 | 175 | private static void AreEqual(IEnumerable i, int[] array) 176 | { 177 | int cursor = 0; 178 | foreach (int pos in i) 179 | { 180 | int x = pos; 181 | int y = array[cursor++]; 182 | Assert.AreEqual(y, x); 183 | } 184 | } 185 | 186 | private static void AreEqual(IList i, IList array) 187 | { 188 | Assert.AreEqual(i.Count, array.Count); 189 | for (int k = 0; k < i.Count; k++) 190 | { 191 | Assert.AreEqual(i[k], array[k]); 192 | } 193 | } 194 | 195 | /** 196 | * Convenience function to assess equality between a compressed BitArray 197 | * and an uncompressed BitArray 198 | * 199 | * @param x the compressed BitArray/bitmap 200 | * @param y the uncompressed BitArray/bitmap 201 | */ 202 | 203 | private static void AreEqual(EwahCompressedBitArray x, BitArray y) 204 | { 205 | Assert.AreEqual(x.GetCardinality(), y.Cardinality()); 206 | var positions = new List(); 207 | for (int ii = 0; ii < y.Count; ii++) 208 | { 209 | if (y[ii]) 210 | { 211 | positions.Add(ii); 212 | } 213 | } 214 | AreEqual(x.GetPositions(), positions); 215 | } 216 | 217 | 218 | /** 219 | * a non-deterministic test proposed by Marc Polizzi. 220 | * 221 | * @param maxlength the maximum uncompressed size of the bitmap 222 | */ 223 | 224 | public static void PolizziTest(int maxlength) 225 | { 226 | Console.WriteLine("Polizzi test with max length = " + maxlength); 227 | for (int k = 0; k < 10000; k += 77) 228 | { 229 | var rnd = new Random(); 230 | var ewahBitmap1 = new EwahCompressedBitArray(); 231 | var clrBitArray1 = new BitArray(10000); 232 | var ewahBitmap2 = new EwahCompressedBitArray(); 233 | var clrBitArray2 = new BitArray(10000); 234 | int len = rnd.Next(maxlength); 235 | for (int pos = 0; pos < len; pos++) 236 | { 237 | // random *** number of bits set *** 238 | if (rnd.Next(7) == 0) 239 | { 240 | // random *** increasing *** values 241 | ewahBitmap1.Set(pos); 242 | clrBitArray1.Set(pos, true); 243 | } 244 | if (rnd.Next(11) == 0) 245 | { 246 | // random *** increasing *** values 247 | ewahBitmap2.Set(pos); 248 | clrBitArray2.Set(pos, true); 249 | } 250 | } 251 | assertEquals(clrBitArray1, ewahBitmap1); 252 | assertEquals(clrBitArray2, ewahBitmap2); 253 | // XOR 254 | { 255 | EwahCompressedBitArray xorEwahBitmap = ewahBitmap1.Xor(ewahBitmap2); 256 | var xorclrBitArray = (BitArray)clrBitArray1.Clone(); 257 | xorclrBitArray.Xor(clrBitArray2); 258 | assertEquals(xorclrBitArray, xorEwahBitmap); 259 | } 260 | // AND 261 | { 262 | EwahCompressedBitArray andEwahBitmap = ewahBitmap1.And(ewahBitmap2); 263 | var andclrBitArray = (BitArray)clrBitArray1.Clone(); 264 | andclrBitArray.And(clrBitArray2); 265 | assertEquals(andclrBitArray, andEwahBitmap); 266 | } 267 | // AND 268 | { 269 | EwahCompressedBitArray andEwahBitmap = ewahBitmap2.And(ewahBitmap1); 270 | var andclrBitArray = (BitArray)clrBitArray1.Clone(); 271 | andclrBitArray.And(clrBitArray2); 272 | assertEquals(andclrBitArray, andEwahBitmap); 273 | } 274 | // AND NOT 275 | { 276 | EwahCompressedBitArray andNotEwahBitmap = ewahBitmap1 277 | .AndNot(ewahBitmap2); 278 | var andNotclrBitArray = (BitArray)clrBitArray1.Clone(); 279 | andNotclrBitArray.AndNot(clrBitArray2); 280 | assertEquals(andNotclrBitArray, andNotEwahBitmap); 281 | } 282 | // AND NOT 283 | { 284 | EwahCompressedBitArray andNotEwahBitmap = ewahBitmap2 285 | .AndNot(ewahBitmap1); 286 | var andNotclrBitArray = (BitArray)clrBitArray2.Clone(); 287 | andNotclrBitArray.AndNot(clrBitArray1); 288 | assertEquals(andNotclrBitArray, andNotEwahBitmap); 289 | } 290 | // OR 291 | { 292 | EwahCompressedBitArray orEwahBitmap = ewahBitmap1.Or(ewahBitmap2); 293 | var orclrBitArray = (BitArray)clrBitArray1.Clone(); 294 | orclrBitArray.Or(clrBitArray2); 295 | assertEquals(orclrBitArray, orEwahBitmap); 296 | } 297 | // OR 298 | { 299 | EwahCompressedBitArray orEwahBitmap = ewahBitmap2.Or(ewahBitmap1); 300 | var orclrBitArray = (BitArray)clrBitArray1.Clone(); 301 | orclrBitArray.Or(clrBitArray2); 302 | assertEquals(orclrBitArray, orEwahBitmap); 303 | } 304 | } 305 | } 306 | 307 | /** 308 | * Assess equality between an uncompressed bitmap and a compressed one, 309 | * part of a test contributed by Marc Polizzi. 310 | * 311 | * @param clrBitArray the uncompressed bitmap 312 | * @param ewahBitmap the compressed bitmap 313 | */ 314 | 315 | private static void assertEquals(BitArray clrBitArray, EwahCompressedBitArray ewahBitmap) 316 | { 317 | assertEqualsIterator(clrBitArray, ewahBitmap); 318 | assertEqualsPositions(clrBitArray, ewahBitmap); 319 | assertCardinality(clrBitArray, ewahBitmap); 320 | } 321 | 322 | /** 323 | * Assess equality between an uncompressed bitmap and a compressed one, 324 | * part of a test contributed by Marc Polizzi 325 | * 326 | * @param clrBitArray the uncompressed bitmap 327 | * @param ewahBitmap the compressed bitmap 328 | */ 329 | 330 | private static void assertCardinality(BitArray clrBitArray, 331 | EwahCompressedBitArray ewahBitmap) 332 | { 333 | Assert.AreEqual(ewahBitmap.GetCardinality(), clrBitArray.Cardinality()); 334 | } 335 | 336 | // 337 | /** 338 | * Assess equality between an uncompressed bitmap and a compressed one, 339 | * part of a test contributed by Marc Polizzi 340 | * 341 | * @param clrBitArray the clr BitArray 342 | * @param ewahBitmap the ewah BitArray 343 | */ 344 | 345 | private static void assertEqualsIterator(BitArray clrBitArray, EwahCompressedBitArray ewahBitmap) 346 | { 347 | var positions = new List(); 348 | foreach (int bit in ewahBitmap) 349 | { 350 | Assert.IsTrue(clrBitArray.Get(bit), "enumerator: BitArray got different bits"); 351 | positions.Add(bit); 352 | } 353 | 354 | for (int pos = clrBitArray.NextSetBit(0); pos >= 0; pos = clrBitArray.NextSetBit(pos + 1)) 355 | { 356 | Assert.IsTrue(positions.Contains(pos), "enumerator: BitArray got different bits"); 357 | } 358 | } 359 | 360 | // part of a test contributed by Marc Polizzi 361 | /** 362 | * Assert equals positions. 363 | * 364 | * @param clrBitArray the jdk bitmap 365 | * @param ewahBitmap the ewah bitmap 366 | */ 367 | 368 | private static void assertEqualsPositions(BitArray clrBitArray, 369 | EwahCompressedBitArray ewahBitmap) 370 | { 371 | List positions = ewahBitmap.GetPositions(); 372 | foreach (int position in positions) 373 | { 374 | Assert.IsTrue(clrBitArray.Get(position), 375 | "positions: BitArray got different bits"); 376 | } 377 | var ps = new HashSet(positions); 378 | for (int pos = clrBitArray.NextSetBit(0); 379 | pos >= 0; 380 | pos = clrBitArray 381 | .NextSetBit(pos + 1)) 382 | { 383 | Assert.IsTrue(ps.Contains(pos), 384 | "positions: BitArray got different bits"); 385 | } 386 | } 387 | 388 | /** 389 | * Assert equals positions. 390 | * 391 | * @param ewahBitmap1 the ewah bitmap1 392 | * @param ewahBitmap2 the ewah bitmap2 393 | */ 394 | 395 | private static void assertEqualsPositions(IList positions1, IList positions2) 396 | { 397 | Assert.AreEqual(positions1.Count, positions2.Count); 398 | for (int ii = 0; ii < positions1.Count; ii++) 399 | { 400 | Assert.AreEqual(positions1[ii], positions2[ii], "positions: alternative got different bits"); 401 | } 402 | } 403 | 404 | [TestMethod] 405 | public void EwahIteratorProblem() 406 | { 407 | Console.WriteLine("testing ArnonMoscona"); 408 | var bitmap = new EwahCompressedBitArray(); 409 | for (int i = 9434560; i <= 9435159; i++) 410 | { 411 | bitmap.Set(i); 412 | } 413 | 414 | List v = bitmap.GetPositions(); 415 | int k = 0; 416 | foreach (int ival in bitmap) 417 | { 418 | Assert.AreEqual(ival, v[k++]); 419 | } 420 | Assert.AreEqual(k, v.Count); 421 | 422 | for (k = 2; k <= 1024; k *= 2) 423 | { 424 | int[] bitsToSet = CreateSortedIntArrayOfBitsToSet(k, 434455 + 5 * k); 425 | var ewah = new EwahCompressedBitArray(); 426 | foreach (int i in bitsToSet) 427 | { 428 | ewah.Set(i); 429 | } 430 | assertEqualsPositions(bitsToSet, ewah.GetPositions()); 431 | } 432 | } 433 | 434 | 435 | [TestMethod] 436 | public void TayaraTest() 437 | { 438 | Console.WriteLine("Tayara test"); 439 | for (int offset = 64; offset < (1 << 30); offset *= 2) 440 | { 441 | EwahCompressedBitArray a = new EwahCompressedBitArray(); 442 | EwahCompressedBitArray b = new EwahCompressedBitArray(); 443 | for (int k = 0; k < 64; ++k) 444 | { 445 | a.Set(offset + k); 446 | b.Set(offset + k); 447 | } 448 | Assert.AreEqual(a.And(b).Equals(a), true); 449 | Assert.AreEqual(a.Or(b).Equals(a), true); 450 | } 451 | } 452 | 453 | 454 | [TestMethod] 455 | public void TestNot() 456 | { 457 | Console.WriteLine("testing not"); 458 | var bmp = new EwahCompressedBitArray(); 459 | for (int i = 0; i <= 184; i++) 460 | { 461 | bmp.Set(i); 462 | } 463 | Assert.AreEqual(185UL, bmp.GetCardinality()); 464 | bmp.Not(); 465 | Assert.AreEqual(0UL, bmp.GetCardinality()); 466 | Console.WriteLine("testing not:ok"); 467 | } 468 | 469 | [TestMethod] 470 | public void HabermaasTest() 471 | { 472 | Console.WriteLine("testing habermaasTest"); 473 | var bitArrayaa = new BitArray(1000131); 474 | var aa = new EwahCompressedBitArray(); 475 | int[] val = { 55400, 1000000, 1000128 }; 476 | foreach (int t in val) 477 | { 478 | aa.Set(t); 479 | bitArrayaa.Set(t, true); 480 | } 481 | assertEquals(bitArrayaa, aa); 482 | var bitArrayab = new BitArray(1000131); 483 | var ab = new EwahCompressedBitArray(); 484 | for (int i = 4096; i < (4096 + 5); i++) 485 | { 486 | ab.Set(i); 487 | bitArrayab.Set(i, true); 488 | } 489 | ab.Set(99000); 490 | bitArrayab.Set(99000, true); 491 | ab.Set(1000130); 492 | bitArrayab.Set(1000130, true); 493 | assertEquals(bitArrayab, ab); 494 | EwahCompressedBitArray bb = aa.Or(ab); 495 | EwahCompressedBitArray bbAnd = aa.And(ab); 496 | var bitArraybb = (BitArray)bitArrayaa.Clone(); 497 | bitArraybb.Or(bitArrayab); 498 | var bitArraybbAnd = (BitArray)bitArrayaa.Clone(); 499 | bitArraybbAnd.And(bitArrayab); 500 | AreEqual(bbAnd, bitArraybbAnd); 501 | AreEqual(bb, bitArraybb); 502 | Console.WriteLine("testing habermaasTest:ok"); 503 | } 504 | 505 | [TestMethod] 506 | public void TestYnosa() 507 | { 508 | Console.WriteLine("testing Ynosa"); 509 | var a1 = new EwahCompressedBitArray(); 510 | var a2 = new EwahCompressedBitArray(); 511 | a1.Set(5); 512 | a1.Set(15); 513 | a2.Set(5); 514 | Assert.IsTrue(a1.Intersects(a2)); 515 | Console.WriteLine("testing Ynosa:ok"); 516 | } 517 | 518 | [TestMethod] 519 | public void TestIntersectOddNess() 520 | { 521 | Console.WriteLine("testing IntersectOddNess"); 522 | var a1 = new EwahCompressedBitArray(); 523 | var a2 = new EwahCompressedBitArray(); 524 | a1.Set(12); 525 | a2.Set(0); 526 | a2.Set(1); 527 | a2.Set(4); 528 | a2.Set(14); 529 | Assert.IsFalse(a1.Intersects(a2)); 530 | Console.WriteLine("testing IntersectOddNess:ok"); 531 | } 532 | 533 | 534 | [TestMethod] 535 | public void TestCardinality() 536 | { 537 | Console.WriteLine("testing EWAH GetCardinality"); 538 | var bitmap = new EwahCompressedBitArray(); 539 | bitmap.Set(int.MaxValue); 540 | 541 | //Assert.AreEqual(true, false); 542 | Console.WriteLine("Total Items {0:d}\n", bitmap.GetCardinality()); 543 | Assert.AreEqual(bitmap.GetCardinality(), 1UL); 544 | Console.WriteLine("testing EWAH GetCardinality:ok"); 545 | } 546 | 547 | [TestMethod] 548 | public void TestEwahCompressedBitArray() 549 | { 550 | Console.WriteLine("testing EWAH (basic)"); 551 | const long zero = 0; 552 | const long specialval = 1L | (1L << 4) | (1L << 63); 553 | const long notzero = ~zero; 554 | var myarray1 = new EwahCompressedBitArray 555 | {zero, zero, zero, specialval, specialval, notzero, zero}; 556 | Assert.AreEqual(myarray1.GetPositions().Count, 6 + 64); 557 | var myarray2 = new EwahCompressedBitArray(); 558 | myarray2.Add(zero); 559 | myarray2.Add(specialval); 560 | myarray2.Add(specialval); 561 | myarray2.Add(notzero); 562 | myarray2.Add(zero); 563 | myarray2.Add(zero); 564 | myarray2.Add(zero); 565 | Assert.AreEqual(myarray2.GetPositions().Count, 6 + 64); 566 | List data1 = myarray1.GetPositions(); 567 | List data2 = myarray2.GetPositions(); 568 | var logicalor = new List(); 569 | { 570 | var tmp = new HashSet(); 571 | tmp.AddRange(data1); 572 | tmp.AddRange(data2); 573 | logicalor.AddRange(tmp); 574 | } 575 | logicalor.Sort(); 576 | var logicaland = new List(); 577 | logicaland.AddRange(data1); 578 | logicaland.Retain(data2); 579 | logicaland.Sort(); 580 | EwahCompressedBitArray arrayand = myarray1.And(myarray2); 581 | AreEqual(arrayand.GetPositions(), logicaland); 582 | EwahCompressedBitArray arrayor = myarray1.Or(myarray2); 583 | AreEqual(arrayor.GetPositions(), logicalor); 584 | EwahCompressedBitArray arrayandbis = myarray2.And(myarray1); 585 | AreEqual(arrayandbis.GetPositions(), logicaland); 586 | EwahCompressedBitArray arrayorbis = myarray2.Or(myarray1); 587 | AreEqual(arrayorbis.GetPositions(), logicalor); 588 | var x = new EwahCompressedBitArray(); 589 | foreach (int i in myarray1.GetPositions()) 590 | { 591 | x.Set(i); 592 | } 593 | AreEqual(x.GetPositions(), myarray1.GetPositions()); 594 | x = new EwahCompressedBitArray(); 595 | foreach (int i in myarray2.GetPositions()) 596 | { 597 | x.Set(i); 598 | } 599 | AreEqual(x.GetPositions(), myarray2.GetPositions()); 600 | x = new EwahCompressedBitArray(); 601 | foreach (int pos in myarray1) 602 | { 603 | x.Set(pos); 604 | } 605 | AreEqual(x.GetPositions(), myarray1.GetPositions()); 606 | x = new EwahCompressedBitArray(); 607 | foreach (int pos in myarray2) 608 | { 609 | x.Set(pos); 610 | } 611 | AreEqual(x.GetPositions(), myarray2.GetPositions()); 612 | Console.WriteLine("testing EWAH (basic):ok"); 613 | } 614 | 615 | [TestMethod] 616 | public void TestExternalization() 617 | { 618 | Console.WriteLine("testing EWAH externalization"); 619 | var ewcb = new EwahCompressedBitArray(); 620 | int[] val = { 5, 4400, 44600, 55400, 1000000 }; 621 | foreach (int t in val) 622 | { 623 | ewcb.Set(t); 624 | } 625 | 626 | var bos = new MemoryStream(); 627 | var bf = new BinaryFormatter(); 628 | bf.Serialize(bos, ewcb); 629 | bos.Position = 0; 630 | 631 | ewcb = (EwahCompressedBitArray)bf.Deserialize(bos); 632 | 633 | List result = ewcb.GetPositions(); 634 | AreEqual(val, result); 635 | Console.WriteLine("testing EWAH externalization:ok"); 636 | } 637 | 638 | [TestMethod] 639 | public void TestLargeEwahCompressedBitArray() 640 | { 641 | Console.WriteLine("testing EWAH over a large array"); 642 | var myarray1 = new EwahCompressedBitArray(); 643 | const int n = 11000000; 644 | for (int i = 0; i < n; ++i) 645 | { 646 | myarray1.Set(i); 647 | } 648 | Assert.AreEqual(myarray1.SizeInBits, n); 649 | Console.WriteLine("testing EWAH over a large array:ok"); 650 | } 651 | 652 | /** 653 | * Test massive and. 654 | */ 655 | 656 | [TestMethod] 657 | public void TestMassiveAnd() 658 | { 659 | Console.WriteLine("testing massive logical and"); 660 | var ewah = new EwahCompressedBitArray[1024]; 661 | for (int k = 0; k < ewah.Length; ++k) 662 | ewah[k] = new EwahCompressedBitArray(); 663 | for (int k = 0; k < 30000; ++k) 664 | { 665 | ewah[(k + 2 * k * k) % ewah.Length].Set(k); 666 | } 667 | EwahCompressedBitArray answer = ewah[0]; 668 | for (int k = 1; k < ewah.Length; ++k) 669 | answer = answer.And(ewah[k]); 670 | // result should be empty 671 | if (answer.GetPositions().Count != 0) 672 | Console.WriteLine(answer.ToDebugString()); 673 | Assert.IsTrue(answer.GetPositions().Count == 0); 674 | Console.WriteLine("testing massive logical and:ok"); 675 | } 676 | 677 | /** 678 | * Test massive xor. 679 | */ 680 | 681 | /** 682 | * Test massive and not. 683 | */ 684 | 685 | [TestMethod] 686 | public void TestMassiveAndNot() 687 | { 688 | Console.WriteLine("testing massive and not"); 689 | int N = 1024; 690 | var ewah = new EwahCompressedBitArray[N]; 691 | for (int k = 0; k < ewah.Length; ++k) 692 | ewah[k] = new EwahCompressedBitArray(); 693 | for (int k = 0; k < 30000; ++k) 694 | { 695 | ewah[(k + 2 * k * k) % ewah.Length].Set(k); 696 | } 697 | EwahCompressedBitArray answer = ewah[0]; 698 | EwahCompressedBitArray answer2 = ewah[0]; 699 | ; 700 | for (int k = 1; k < ewah.Length; ++k) 701 | { 702 | answer = answer.AndNot(ewah[k]); 703 | EwahCompressedBitArray copy = null; 704 | try 705 | { 706 | copy = (EwahCompressedBitArray)ewah[k].Clone(); 707 | copy.Not(); 708 | answer2.And(copy); 709 | assertEqualsPositions(answer.GetPositions(), answer2.GetPositions()); 710 | } 711 | catch (InvalidOperationException e) 712 | { 713 | Console.Error.WriteLine(e.StackTrace); 714 | } 715 | } 716 | Console.WriteLine("testing massive and not:ok"); 717 | } 718 | 719 | /** 720 | * Test massive or. 721 | */ 722 | 723 | [TestMethod] 724 | public void TestMassiveOr() 725 | { 726 | Console.WriteLine("testing massive logical or (can take a couple of minutes)"); 727 | int N = 128; 728 | for (int howmany = 512; howmany <= 10000; howmany *= 2) 729 | { 730 | var ewah = new EwahCompressedBitArray[N]; 731 | var bset = new BitArray[N]; 732 | int k; 733 | for (k = 0; k < ewah.Length; ++k) 734 | ewah[k] = new EwahCompressedBitArray(); 735 | for (k = 0; k < bset.Length; ++k) 736 | bset[k] = new BitArray(10000); 737 | for (k = 0; k < N; ++k) 738 | assertEqualsPositions(bset[k], ewah[k]); 739 | for (k = 0; k < howmany; ++k) 740 | { 741 | ewah[(k + 2 * k * k) % ewah.Length].Set(k); 742 | bset[(k + 2 * k * k) % ewah.Length].Set(k, true); 743 | } 744 | for (k = 0; k < N; ++k) 745 | assertEqualsPositions(bset[k], ewah[k]); 746 | EwahCompressedBitArray answer = ewah[0]; 747 | BitArray BitArrayanswer = bset[0]; 748 | for (k = 1; k < ewah.Length; ++k) 749 | { 750 | EwahCompressedBitArray tmp = answer.Or(ewah[k]); 751 | BitArrayanswer.Or(bset[k]); 752 | answer = tmp; 753 | assertEqualsPositions(BitArrayanswer, answer); 754 | } 755 | assertEqualsPositions(BitArrayanswer, answer); 756 | k = 0; 757 | foreach (int j in answer) 758 | { 759 | if (k != j) 760 | Console.WriteLine(answer.ToDebugString()); 761 | Assert.AreEqual(k, j); 762 | k += 1; 763 | } 764 | } 765 | Console.WriteLine("testing massive logical or:ok"); 766 | } 767 | 768 | [TestMethod] 769 | public void TestMassiveXOR() 770 | { 771 | Console.WriteLine("testing massive xor (can take a couple of minutes)"); 772 | int N = 16; 773 | var ewah = new EwahCompressedBitArray[N]; 774 | var bset = new BitArray[N]; 775 | for (int k = 0; k < ewah.Length; ++k) 776 | ewah[k] = new EwahCompressedBitArray(); 777 | for (int k = 0; k < bset.Length; ++k) 778 | bset[k] = new BitArray(30000); 779 | for (int k = 0; k < 30000; ++k) 780 | { 781 | ewah[(k + 2 * k * k) % ewah.Length].Set(k); 782 | bset[(k + 2 * k * k) % ewah.Length].Set(k, true); 783 | } 784 | EwahCompressedBitArray answer = ewah[0]; 785 | BitArray BitArrayanswer = bset[0]; 786 | for (int k = 1; k < ewah.Length; ++k) 787 | { 788 | answer = answer.Xor(ewah[k]); 789 | BitArrayanswer.Xor(bset[k]); 790 | assertEqualsPositions(BitArrayanswer, answer); 791 | } 792 | int k2 = 0; 793 | foreach (int j in answer) 794 | { 795 | if (k2 != j) 796 | Console.WriteLine(answer.ToDebugString()); 797 | Assert.AreEqual(k2, j); 798 | k2 += 1; 799 | } 800 | Console.WriteLine("testing massive xor:ok"); 801 | } 802 | 803 | [TestMethod] 804 | public void TestRunningLengthWord() 805 | { 806 | Console.WriteLine("testing RunningLengthWord"); 807 | var x = new long[1]; 808 | var rlw = new RunningLengthWord(x, 0); 809 | Assert.AreEqual(0, rlw.NumberOfLiteralWords); 810 | Assert.AreEqual(false, rlw.RunningBit); 811 | Assert.AreEqual(0, rlw.RunningLength); 812 | rlw.RunningBit = true; 813 | Assert.AreEqual(0, rlw.NumberOfLiteralWords); 814 | Assert.AreEqual(true, rlw.RunningBit); 815 | Assert.AreEqual(0, rlw.RunningLength); 816 | rlw.RunningBit = false; 817 | Assert.AreEqual(0, rlw.NumberOfLiteralWords); 818 | Assert.AreEqual(false, rlw.RunningBit); 819 | Assert.AreEqual(0, rlw.RunningLength); 820 | 821 | for (var rl = (int)RunningLengthWord.LargestLiteralCount; rl >= 0; rl -= 64 * 1024) 822 | { 823 | rlw.NumberOfLiteralWords = rl; 824 | Assert.AreEqual(rl, rlw.NumberOfLiteralWords); 825 | Assert.AreEqual(false, rlw.RunningBit); 826 | Assert.AreEqual(0, rlw.RunningLength); 827 | rlw.NumberOfLiteralWords = 0; 828 | Assert.AreEqual(0, rlw.NumberOfLiteralWords); 829 | Assert.AreEqual(false, rlw.RunningBit); 830 | Assert.AreEqual(0, rlw.RunningLength); 831 | } 832 | 833 | for (long rl = 0; rl <= RunningLengthWord.LargestRunningLengthCount; rl += 64 * 1024) 834 | { 835 | rlw.RunningLength = rl; 836 | Assert.AreEqual(0, rlw.NumberOfLiteralWords); 837 | Assert.AreEqual(false, rlw.RunningBit); 838 | Assert.AreEqual(rl, rlw.RunningLength); 839 | rlw.RunningLength = 0; 840 | Assert.AreEqual(0, rlw.NumberOfLiteralWords); 841 | Assert.AreEqual(false, rlw.RunningBit); 842 | Assert.AreEqual(0, rlw.RunningLength); 843 | } 844 | 845 | rlw.RunningBit = true; 846 | for (long rl = 0; rl <= RunningLengthWord.LargestRunningLengthCount; rl += 64 * 1024) 847 | { 848 | rlw.RunningLength = rl; 849 | Assert.AreEqual(0, rlw.NumberOfLiteralWords); 850 | Assert.AreEqual(true, rlw.RunningBit); 851 | Assert.AreEqual(rl, rlw.RunningLength); 852 | rlw.RunningLength = 0; 853 | Assert.AreEqual(0, rlw.NumberOfLiteralWords); 854 | Assert.AreEqual(true, rlw.RunningBit); 855 | Assert.AreEqual(0, rlw.RunningLength); 856 | } 857 | 858 | for (long rl = 0; rl <= RunningLengthWord.LargestLiteralCount; rl += 64 * 128) 859 | { 860 | rlw.NumberOfLiteralWords = rl; 861 | Assert.AreEqual(rl, rlw.NumberOfLiteralWords); 862 | Assert.AreEqual(true, rlw.RunningBit); 863 | Assert.AreEqual(0, rlw.RunningLength); 864 | rlw.NumberOfLiteralWords = 0; 865 | Assert.AreEqual(0, rlw.NumberOfLiteralWords); 866 | Assert.AreEqual(true, rlw.RunningBit); 867 | Assert.AreEqual(0, rlw.RunningLength); 868 | } 869 | Console.WriteLine("testing RunningLengthWord:ok"); 870 | } 871 | 872 | [TestMethod] 873 | public void testsetSizeInBits() 874 | { 875 | Console.WriteLine("testing setSizeInBits"); 876 | for (int k = 0; k < 4096; ++k) 877 | { 878 | EwahCompressedBitArray ewah = new EwahCompressedBitArray(); 879 | ewah.SizeInBits = k; 880 | Assert.AreEqual(ewah.SizeInBits, k); 881 | Assert.AreEqual(ewah.GetCardinality(), 0UL); 882 | 883 | EwahCompressedBitArray ewah2 = new EwahCompressedBitArray(); 884 | ewah2.SetSizeInBits(k, false); 885 | Assert.AreEqual(ewah2.SizeInBits, k); 886 | Assert.AreEqual(ewah2.GetCardinality(), 0UL); 887 | 888 | EwahCompressedBitArray ewah3 = new EwahCompressedBitArray(); 889 | for (int i = 0; i < k; ++i) 890 | { 891 | ewah3.Set(i); 892 | } 893 | Assert.AreEqual(ewah3.SizeInBits, k); 894 | Assert.AreEqual(ewah3.GetCardinality(), (ulong)k); 895 | 896 | EwahCompressedBitArray ewah4 = new EwahCompressedBitArray(); 897 | ewah4.SetSizeInBits(k, true); 898 | Assert.AreEqual(ewah4.SizeInBits, k); 899 | Assert.AreEqual(ewah4.GetCardinality(), (ulong)k); 900 | } 901 | } 902 | 903 | [TestMethod] 904 | public void TestSizeInBits1() 905 | { 906 | Console.WriteLine("testing TestSizeInBits1"); 907 | 908 | EwahCompressedBitArray bitmap = new EwahCompressedBitArray(); 909 | bitmap.SetSizeInBits(1, false); 910 | Assert.AreEqual(1, bitmap.SizeInBits); 911 | 912 | bitmap.Not(); 913 | Assert.AreEqual(1UL, bitmap.GetCardinality()); 914 | } 915 | 916 | [TestMethod] 917 | public void TestHasNextSafe() 918 | { 919 | Console.WriteLine("testing TestHasNextSafe"); 920 | EwahCompressedBitArray bitmap = new EwahCompressedBitArray(); 921 | bitmap.Set(0); 922 | IEnumerator it = ((IEnumerable)bitmap).GetEnumerator(); 923 | Assert.AreEqual(it.MoveNext(), true); 924 | Assert.AreEqual(0, it.Current); 925 | } 926 | 927 | [TestMethod] 928 | public void testDebugSetSizeInBitsTest() 929 | { 930 | Console.WriteLine("testing DebugSetSizeInBits"); 931 | EwahCompressedBitArray b = new EwahCompressedBitArray(); 932 | 933 | b.Set(4); 934 | 935 | b.SetSizeInBits(6, true); 936 | 937 | List positions = b.GetPositions(); 938 | 939 | Assert.AreEqual(2, positions.Count); 940 | Assert.AreEqual(4, positions[0]); 941 | Assert.AreEqual(5, positions[1]); 942 | 943 | IEnumerator iterator = ((IEnumerable)b).GetEnumerator(); 944 | Assert.AreEqual(true, iterator.MoveNext()); 945 | Assert.AreEqual(4, iterator.Current); 946 | Assert.AreEqual(true, iterator.MoveNext()); 947 | Assert.AreEqual(5, iterator.Current); 948 | Assert.AreEqual(false, iterator.MoveNext()); 949 | 950 | } 951 | 952 | [TestMethod] 953 | public void SsiYanKaiTest() 954 | { 955 | Console.WriteLine("testing SsiYanKaiTest"); 956 | EwahCompressedBitArray a = EwahCompressedBitArray.BitmapOf(39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, 40000, 40001, 40002, 40003, 40004, 40005, 40006, 40007, 40008, 40009, 40010, 40011, 40012, 40013, 40014, 40015, 40016, 40017, 40018, 40019, 40020, 40021, 40022, 40023, 40024, 40025, 40026, 40027, 40028, 40029, 40030, 40031, 40032, 40033, 40034, 40035, 40036, 40037, 40038, 40039, 40040, 40041, 40042, 40043, 40044, 40045, 40046, 40047, 40048, 40049, 40050, 40051, 40052, 40053, 40054, 40055, 40056, 40057, 40058, 40059, 40060, 40061, 40062, 40063, 40064, 40065, 40066, 40067, 40068, 40069, 40070, 40071, 40072, 40073, 40074, 40075, 40076, 40077, 40078, 40079, 40080, 40081, 40082, 40083, 40084, 40085, 40086, 40087, 40088, 40089, 40090, 40091, 40092, 40093, 40094, 40095, 40096, 40097, 40098, 40099, 40100); 957 | EwahCompressedBitArray b = EwahCompressedBitArray.BitmapOf(39935, 39936, 39937, 39938, 39939, 39940, 39941, 39942, 39943, 39944, 39945, 39946, 39947, 39948, 39949, 39950, 39951, 39952, 39953, 39954, 39955, 39956, 39957, 39958, 39959, 39960, 39961, 39962, 39963, 39964, 39965, 39966, 39967, 39968, 39969, 39970, 39971, 39972, 39973, 39974, 39975, 39976, 39977, 39978, 39979, 39980, 39981, 39982, 39983, 39984, 39985, 39986, 39987, 39988, 39989, 39990, 39991, 39992, 39993, 39994, 39995, 39996, 39997, 39998, 39999, 270000); 958 | HashSet aPositions = new HashSet(a.GetPositions()); 959 | ulong intersection = 0; 960 | EwahCompressedBitArray inter = new EwahCompressedBitArray(); 961 | HashSet bPositions = new HashSet(b.GetPositions()); 962 | foreach (int integer in bPositions) 963 | { 964 | if (aPositions.Contains(integer)) 965 | { 966 | inter.Set(integer); 967 | ++intersection; 968 | } 969 | } 970 | EwahCompressedBitArray and2 = a.And(b); 971 | List l1 = inter.GetPositions(); 972 | List l2 = and2.GetPositions(); 973 | var ok = true; 974 | if (l1.Count != l2.Count) 975 | { 976 | Console.WriteLine("cardinality differs = " + l1.Count + " " + l2.Count); 977 | ok = false; 978 | } 979 | for (int k = 0; k < l1.Count; ++k) 980 | { 981 | if (l1[k] != l2[k]) 982 | { 983 | Console.WriteLine("differ at " + k + " = " + l1[k] + " " + l2[k]); 984 | ok = false; 985 | } 986 | 987 | } 988 | Assert.IsTrue(ok); 989 | Assert.AreEqual(true, and2.Equals(inter)); 990 | Assert.AreEqual(inter.GetHashCode(), and2.GetHashCode()); 991 | Assert.AreEqual(intersection, and2.GetCardinality()); 992 | } 993 | 994 | [TestMethod] 995 | public void TestCloneEwahCompressedBitArray() 996 | { 997 | Console.WriteLine("testing EWAH clone"); 998 | EwahCompressedBitArray a = new EwahCompressedBitArray(); 999 | a.Set(410018); 1000 | a.Set(410019); 1001 | a.Set(410020); 1002 | a.Set(410021); 1003 | a.Set(410022); 1004 | a.Set(410023); 1005 | 1006 | EwahCompressedBitArray b = (EwahCompressedBitArray)a.Clone(); 1007 | 1008 | a.SetSizeInBits(487123, false); 1009 | b.SetSizeInBits(487123, false); 1010 | 1011 | Assert.AreEqual(a, b); 1012 | } 1013 | 1014 | [TestMethod] 1015 | public void TestSetGet() 1016 | { 1017 | Console.WriteLine("testing EWAH Set/get"); 1018 | var ewcb = new EwahCompressedBitArray(); 1019 | int[] val = { 5, 4400, 44600, 55400, 1000000 }; 1020 | for (int k = 0; k < val.Length; ++k) 1021 | { 1022 | ewcb.Set(val[k]); 1023 | } 1024 | List result = ewcb.GetPositions(); 1025 | AreEqual(val, result); 1026 | Console.WriteLine("testing EWAH Set/get:ok"); 1027 | } 1028 | 1029 | /** 1030 | * Created: 2/4/11 6:03 PM By: Arnon Moscona. 1031 | */ 1032 | 1033 | /** 1034 | * Test with parameters. 1035 | * 1036 | * @throws IOException Signals that an I/O exception has occurred. 1037 | */ 1038 | [TestMethod] 1039 | public void TestWithParameters() 1040 | { 1041 | Console 1042 | .WriteLine("These tests can run for several minutes. Please be patient."); 1043 | for (int k = 2; k < 1 << 24; k *= 8) 1044 | ShouldSetBits(k); 1045 | 1046 | Console.WriteLine("64"); 1047 | 1048 | PolizziTest(64); 1049 | PolizziTest(128); 1050 | PolizziTest(256); 1051 | PolizziTest(2048); 1052 | Console.WriteLine("Your code is probably ok."); 1053 | } 1054 | 1055 | [TestMethod] 1056 | public void VanSchaikTest() 1057 | { 1058 | Console.WriteLine("testing vanSchaikTest (this takes some time)"); 1059 | const int totalNumBits = 32768; 1060 | const double odds = 0.9; 1061 | var rand = new Random(323232323); 1062 | 1063 | for (int t = 0; t < 100; t++) 1064 | { 1065 | ulong numBitsSet = 0; 1066 | var cBitMap = new EwahCompressedBitArray(); 1067 | for (int i = 0; i < totalNumBits; i++) 1068 | { 1069 | if (rand.NextDouble() < odds) 1070 | { 1071 | cBitMap.Set(i); 1072 | numBitsSet++; 1073 | } 1074 | } 1075 | Assert.AreEqual(cBitMap.GetCardinality(), numBitsSet); 1076 | } 1077 | Console.WriteLine("testing vanSchaikTest:ok"); 1078 | } 1079 | } 1080 | } -------------------------------------------------------------------------------- /EWAH.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.0.31903.59 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "EWAH", "EWAH\EWAH.csproj", "{13FE7CCD-A03A-4B97-A89C-6CD301624997}" 7 | EndProject 8 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{F813A4C4-A3DB-461F-907E-6C69FDA4D8E2}" 9 | EndProject 10 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "EWAH.Tests", "EWAH.Tests\EWAH.Tests.csproj", "{1611081B-537F-4F2D-9670-5CE42AD30983}" 11 | EndProject 12 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "EWAH.RunTests", "EWAH.RunTests\EWAH.RunTests.csproj", "{34766D05-85E8-44A7-A0CD-8AA9391AA302}" 13 | EndProject 14 | Global 15 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 16 | Debug|Any CPU = Debug|Any CPU 17 | Debug|Mixed Platforms = Debug|Mixed Platforms 18 | Debug|x86 = Debug|x86 19 | Release|Any CPU = Release|Any CPU 20 | Release|Mixed Platforms = Release|Mixed Platforms 21 | Release|x86 = Release|x86 22 | EndGlobalSection 23 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 24 | {13FE7CCD-A03A-4B97-A89C-6CD301624997}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 25 | {13FE7CCD-A03A-4B97-A89C-6CD301624997}.Debug|Any CPU.Build.0 = Debug|Any CPU 26 | {13FE7CCD-A03A-4B97-A89C-6CD301624997}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 27 | {13FE7CCD-A03A-4B97-A89C-6CD301624997}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 28 | {13FE7CCD-A03A-4B97-A89C-6CD301624997}.Debug|x86.ActiveCfg = Debug|Any CPU 29 | {13FE7CCD-A03A-4B97-A89C-6CD301624997}.Release|Any CPU.ActiveCfg = Release|Any CPU 30 | {13FE7CCD-A03A-4B97-A89C-6CD301624997}.Release|Any CPU.Build.0 = Release|Any CPU 31 | {13FE7CCD-A03A-4B97-A89C-6CD301624997}.Release|Mixed Platforms.ActiveCfg = Release|Any CPU 32 | {13FE7CCD-A03A-4B97-A89C-6CD301624997}.Release|Mixed Platforms.Build.0 = Release|Any CPU 33 | {13FE7CCD-A03A-4B97-A89C-6CD301624997}.Release|x86.ActiveCfg = Release|Any CPU 34 | {1611081B-537F-4F2D-9670-5CE42AD30983}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 35 | {1611081B-537F-4F2D-9670-5CE42AD30983}.Debug|Any CPU.Build.0 = Debug|Any CPU 36 | {1611081B-537F-4F2D-9670-5CE42AD30983}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 37 | {1611081B-537F-4F2D-9670-5CE42AD30983}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 38 | {1611081B-537F-4F2D-9670-5CE42AD30983}.Debug|x86.ActiveCfg = Debug|x86 39 | {1611081B-537F-4F2D-9670-5CE42AD30983}.Debug|x86.Build.0 = Debug|x86 40 | {1611081B-537F-4F2D-9670-5CE42AD30983}.Release|Any CPU.ActiveCfg = Release|Any CPU 41 | {1611081B-537F-4F2D-9670-5CE42AD30983}.Release|Any CPU.Build.0 = Release|Any CPU 42 | {1611081B-537F-4F2D-9670-5CE42AD30983}.Release|Mixed Platforms.ActiveCfg = Release|x86 43 | {1611081B-537F-4F2D-9670-5CE42AD30983}.Release|Mixed Platforms.Build.0 = Release|x86 44 | {1611081B-537F-4F2D-9670-5CE42AD30983}.Release|x86.ActiveCfg = Release|x86 45 | {1611081B-537F-4F2D-9670-5CE42AD30983}.Release|x86.Build.0 = Release|x86 46 | {34766D05-85E8-44A7-A0CD-8AA9391AA302}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 47 | {34766D05-85E8-44A7-A0CD-8AA9391AA302}.Debug|Any CPU.Build.0 = Debug|Any CPU 48 | {34766D05-85E8-44A7-A0CD-8AA9391AA302}.Debug|Mixed Platforms.ActiveCfg = Debug|Any CPU 49 | {34766D05-85E8-44A7-A0CD-8AA9391AA302}.Debug|Mixed Platforms.Build.0 = Debug|Any CPU 50 | {34766D05-85E8-44A7-A0CD-8AA9391AA302}.Debug|x86.ActiveCfg = Debug|x86 51 | {34766D05-85E8-44A7-A0CD-8AA9391AA302}.Debug|x86.Build.0 = Debug|x86 52 | {34766D05-85E8-44A7-A0CD-8AA9391AA302}.Release|Any CPU.ActiveCfg = Release|Any CPU 53 | {34766D05-85E8-44A7-A0CD-8AA9391AA302}.Release|Any CPU.Build.0 = Release|Any CPU 54 | {34766D05-85E8-44A7-A0CD-8AA9391AA302}.Release|Mixed Platforms.ActiveCfg = Release|x86 55 | {34766D05-85E8-44A7-A0CD-8AA9391AA302}.Release|Mixed Platforms.Build.0 = Release|x86 56 | {34766D05-85E8-44A7-A0CD-8AA9391AA302}.Release|x86.ActiveCfg = Release|x86 57 | {34766D05-85E8-44A7-A0CD-8AA9391AA302}.Release|x86.Build.0 = Release|x86 58 | EndGlobalSection 59 | GlobalSection(SolutionProperties) = preSolution 60 | HideSolutionNode = FALSE 61 | EndGlobalSection 62 | GlobalSection(ExtensibilityGlobals) = postSolution 63 | SolutionGuid = {41233BE1-099C-475F-AE63-B5F7D0A5DC2F} 64 | EndGlobalSection 65 | EndGlobal 66 | -------------------------------------------------------------------------------- /EWAH/BufferedRunningLengthWord.cs: -------------------------------------------------------------------------------- 1 | namespace Ewah 2 | { 3 | /* 4 | * Copyright 2012, Kemal Erdogan, Daniel Lemire and Ciaran Jessup 5 | * Licensed under APL 2.0. 6 | */ 7 | 8 | 9 | /// 10 | /// Mostly for internal use. Similar to RunningLengthWord, but can 11 | /// be modified without access to the array, and has faster access. 12 | /// 13 | internal sealed class BufferedRunningLengthWord 14 | { 15 | #region Fields 16 | 17 | /// 18 | /// how many dirty words have we read so far? 19 | /// 20 | public int DirtyWordOffset; 21 | 22 | /// 23 | /// The Number of literal words 24 | /// 25 | public int NumberOfLiteralWords; 26 | 27 | /// 28 | /// The Running bit 29 | /// 30 | public bool RunningBit; 31 | 32 | /// 33 | /// The Running length 34 | /// 35 | public long RunningLength; 36 | 37 | #endregion 38 | 39 | #region C'tors 40 | 41 | /// 42 | /// Instantiates a new buffered running length word 43 | /// 44 | /// the rlw 45 | public BufferedRunningLengthWord(RunningLengthWord rlw) 46 | : this(rlw.ArrayOfWords[rlw.Position]) 47 | { 48 | } 49 | 50 | /// 51 | /// Instantiates a new buffered running length word 52 | /// 53 | /// the word 54 | public BufferedRunningLengthWord(long a) 55 | { 56 | NumberOfLiteralWords = (int) (((ulong) a) >> (1 + RunningLengthWord.RunningLengthBits)); 57 | RunningBit = (a & 1) != 0; 58 | RunningLength = (int) ((((ulong) a) >> 1) & RunningLengthWord.LargestRunningLengthCount); 59 | } 60 | 61 | #endregion 62 | 63 | #region Instance Properties 64 | 65 | /// 66 | /// Size in uncompressed words 67 | /// 68 | public long Count 69 | { 70 | get { return RunningLength + NumberOfLiteralWords; } 71 | } 72 | 73 | #endregion 74 | 75 | #region Instance Methods 76 | 77 | public override string ToString() 78 | { 79 | return "running bit = " + RunningBit + " running length = " 80 | + RunningLength + " number of lit. words " 81 | + NumberOfLiteralWords; 82 | } 83 | 84 | /// 85 | /// Discard first words 86 | /// 87 | /// 88 | public void DiscardFirstWords(long x) 89 | { 90 | if (RunningLength >= x) 91 | { 92 | RunningLength -= x; 93 | return; 94 | } 95 | x -= RunningLength; 96 | RunningLength = 0; 97 | DirtyWordOffset += (int) x; 98 | NumberOfLiteralWords -= (int) x; 99 | } 100 | 101 | /// 102 | /// Reset the values of this running length word so that it has the same values 103 | /// as the other running length word. 104 | /// 105 | /// the other running length word 106 | public void Reset(RunningLengthWord rlw) 107 | { 108 | Reset(rlw.ArrayOfWords[rlw.Position]); 109 | } 110 | 111 | /// 112 | /// Reset the values using the provided word. 113 | /// 114 | /// the word 115 | public void Reset(long a) 116 | { 117 | NumberOfLiteralWords = (int) (((ulong) a) >> (1 + RunningLengthWord.RunningLengthBits)); 118 | RunningBit = (a & 1) != 0; 119 | RunningLength = (int) ((((ulong) a) >> 1) & RunningLengthWord.LargestRunningLengthCount); 120 | DirtyWordOffset = 0; 121 | } 122 | 123 | #endregion 124 | } 125 | } -------------------------------------------------------------------------------- /EWAH/EWAH.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netstandard2.0 5 | false 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /EWAH/EwahCompressedBitArray.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections; 3 | using System.Collections.Generic; 4 | using System.Runtime.Serialization; 5 | using System.Text; 6 | 7 | namespace Ewah 8 | { 9 | /* 10 | * Copyright 2012, Kemal Erdogan, Daniel Lemire and Ciaran Jessup 11 | * Licensed under APL 2.0. 12 | */ 13 | 14 | 15 | /// 16 | ///

This implements the patent-free(1) EWAH scheme. Roughly speaking, it is a 17 | /// 64-bit variant of the BBC compression scheme used by Oracle for its bitmap 18 | /// indexes.

19 | /// 20 | ///

The objective of this compression type is to provide some compression, while 21 | /// reducing as much as possible the CPU cycle usage.

22 | /// 23 | /// 24 | ///

This implementation being 64-bit, it assumes a 64-bit CPU together with a 25 | /// 64-bit .NET runtime. This same code on a 32-bit machine may not be as 26 | /// fast.

27 | /// 28 | ///

For more details, see the following paper:

29 | /// 30 | ///
  • Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned 31 | /// bitmap indexes. Data & Knowledge Engineering 69 (1), pages 3-28, 2010. 32 | /// http://arxiv.org/abs/0901.3751
  • 33 | ///
34 | /// 35 | ///

It was first described by Wu et al. and named WBC:

36 | /// 37 | ///
  • K. Wu, E. J. Otoo, A. Shoshani, H. Nordberg, Notes on design and 38 | /// implementation of compressed bit vectors, Tech. Rep. LBNL/PUB-3161, Lawrence 39 | /// Berkeley National Laboratory, available from http://crd.lbl. 40 | /// gov/~kewu/ps/PUB-3161.html (2001).
  • 41 | ///
42 | /// 43 | ///

We can view this scheme as a 64-bit equivalent to the 44 | /// Oracle bitmap compression scheme:

45 | ///
  • G. Antoshenkov, Byte-Aligned Bitmap Compression, DCC'95, 1995.
46 | /// 47 | ///

1- The author (D. Lemire) does not know of any patent infringed by the 48 | /// following implementation. However, similar schemes, like WAH are covered by 49 | /// patents.

50 | /// 51 | /// Ported to C# by Kemal Erdogan 52 | ///
53 | [Serializable] 54 | public sealed class EwahCompressedBitArray : ICloneable, IEnumerable, ISerializable 55 | { 56 | #region Constants 57 | 58 | /// 59 | /// the number of bits in a long 60 | /// 61 | public const int WordInBits = 64; 62 | 63 | /// 64 | /// default memory allocation when the object is constructed. 65 | /// 66 | private const int DefaultBufferSize = 4; 67 | 68 | #endregion 69 | 70 | #region Readonly & Static Fields 71 | 72 | //internal readonly RunningLengthWord _Rlw; 73 | 74 | #endregion 75 | 76 | #region Fields 77 | 78 | /// 79 | /// current (last) running length word 80 | /// 81 | internal RunningLengthWord _Rlw; 82 | 83 | internal int _ActualSizeInWords = 1; 84 | 85 | /// 86 | /// The buffer (array of 64-bit words) 87 | /// 88 | internal long[] _Buffer; 89 | 90 | #endregion 91 | 92 | #region C'tors 93 | 94 | /// 95 | /// Creates an empty bitmap (no bit set to true). 96 | /// 97 | public EwahCompressedBitArray() 98 | { 99 | _Buffer = new long[DefaultBufferSize]; 100 | _Rlw = new RunningLengthWord(_Buffer, 0); 101 | } 102 | 103 | /// 104 | /// Sets explicitly the buffer size (in 64-bit words). The initial memory usage 105 | /// will be "buffersize * 64". For large poorly compressible bitmaps, using 106 | /// large values may improve performance. 107 | /// 108 | /// buffersize number of 64-bit words reserved when the object is created 109 | public EwahCompressedBitArray(int buffersize) 110 | { 111 | _Buffer = new long[buffersize]; 112 | _Rlw = new RunningLengthWord(_Buffer, 0); 113 | } 114 | 115 | /// 116 | /// Special constructor used by serialization infrastructure 117 | /// 118 | /// 119 | /// 120 | private EwahCompressedBitArray(SerializationInfo input, StreamingContext context) 121 | : this(input.GetInt32("sb"), input.GetInt32("aw"), (long[])input.GetValue("bu", typeof(long[])), input.GetInt32("rp")) 122 | { } 123 | 124 | /// 125 | /// Special constructor used by serialization infrastructure 126 | /// 127 | /// The size in bits. 128 | /// The actual size in words. 129 | /// The buffer. 130 | /// The running length word position. 131 | internal EwahCompressedBitArray(int sizeInBits, int actualSizeInWords, long[] buffer, int runningLengthWordPosition) 132 | { 133 | this.SizeInBits = sizeInBits; 134 | this._ActualSizeInWords = actualSizeInWords; 135 | this._Buffer = buffer; 136 | this._Rlw = new RunningLengthWord(_Buffer, runningLengthWordPosition); 137 | } 138 | 139 | #endregion 140 | 141 | #region Instance Properties 142 | 143 | /// 144 | /// The size in bits of the *uncompressed* bitmap represented by this 145 | /// compressed bitmap. Initially, the SizeInBits is zero. It is extended 146 | /// automatically when you set bits to true. 147 | /// 148 | public int SizeInBits { get; set; } 149 | 150 | /// 151 | /// Report the *compressed* size of the bitmap (equivalent to memory usage, 152 | /// after accounting for some overhead). 153 | /// 154 | public int SizeInBytes 155 | { 156 | get { return _ActualSizeInWords * 8; } 157 | } 158 | 159 | #endregion 160 | 161 | #region Instance Methods 162 | 163 | /// 164 | /// Check to see whether the two compressed bitmaps contain the same data 165 | /// (effectively check whether the cardinality of a XOR is == 0. 166 | /// 167 | /// the other bitmap 168 | /// 169 | public override bool Equals(Object o) 170 | { 171 | var other = o as EwahCompressedBitArray; 172 | if (other != null) 173 | { 174 | return this.Xor(other).GetCardinality() == 0; // could be more efficient 175 | } 176 | return false; 177 | } 178 | 179 | /// 180 | /// Returns a customized hash code (based on Karp-Rabin). 181 | /// Naturally, if the bitmaps are equal, they will hash to the same value. 182 | /// 183 | /// 184 | public override int GetHashCode() 185 | { 186 | long karprabin = 0; 187 | const int B = 31; 188 | EwahEnumerator i = this.GetEwahEnumerator(); 189 | while (i.HasNext()) 190 | { 191 | i.Next(); 192 | if (i._Rlw.RunningBit == true) 193 | { 194 | karprabin += B * karprabin 195 | + (i._Rlw.RunningLength & ((1L << 32) - 1)); 196 | karprabin += B * karprabin + (long)(((ulong)i._Rlw.RunningLength) >> 32); 197 | } 198 | int dw = i.DirtyWords; 199 | long numLiteralWords = i._Rlw.NumberOfLiteralWords; 200 | long buf; 201 | for (int k = 0; k < numLiteralWords; ++k) 202 | { 203 | buf = this._Buffer[dw + k]; 204 | karprabin += B * karprabin + (buf & ((1L << 32) - 1)); 205 | karprabin += B * karprabin + (long)(((ulong)buf) >> 32); 206 | } 207 | } 208 | return (int)karprabin; 209 | } 210 | 211 | /// 212 | /// A string describing the bitmap 213 | /// 214 | /// the description string 215 | public override string ToString() 216 | { 217 | var ans = new StringBuilder("{"); 218 | 219 | 220 | IEnumerator it = ((IEnumerable)this).GetEnumerator(); 221 | 222 | if (it.MoveNext()) 223 | while (true) 224 | { 225 | ans.Append(it.Current); 226 | var b = it.MoveNext(); 227 | if (b) 228 | ans.Append(","); 229 | else 230 | break; 231 | } 232 | ans.Append("}"); 233 | return ans.ToString(); 234 | } 235 | 236 | /// 237 | /// Adding words directly to the bitmap (for expert use). 238 | /// 239 | /// This is normally how you add data to the array. So you add bits in streams 240 | /// of 8*8 bits. 241 | /// 242 | /// the word 243 | /// the number of words added to the buffer 244 | public int Add(long newdata) 245 | { 246 | return Add(newdata, WordInBits); 247 | } 248 | 249 | /// 250 | /// Adding words directly to the bitmap (for expert use). 251 | /// 252 | /// the word 253 | /// the number of significant bits (by default it should be 64) 254 | /// the number of words added to the buffer 255 | public int Add(long newdata, int bitsthatmatter) 256 | { 257 | SizeInBits += bitsthatmatter; 258 | if (newdata == 0) 259 | { 260 | return AddEmptyWord(false); 261 | } 262 | if (newdata == ~0L) 263 | { 264 | return AddEmptyWord(true); 265 | } 266 | return AddLiteralWord(newdata); 267 | } 268 | 269 | /// 270 | /// For experts: You want to add many 271 | /// zeroes or ones? This is the method you use. 272 | /// 273 | /// the bool value 274 | /// the number 275 | /// the number of words added to the buffer 276 | public int AddStreamOfEmptyWords(bool v, long number) 277 | { 278 | if (number == 0) 279 | { 280 | return 0; 281 | } 282 | bool noliteralword = (_Rlw.NumberOfLiteralWords == 0); 283 | long runlen = _Rlw.RunningLength; 284 | if ((noliteralword) && (runlen == 0)) 285 | { 286 | _Rlw.RunningBit = v; 287 | } 288 | int wordsadded = 0; 289 | if ((noliteralword) && (_Rlw.RunningBit == v) 290 | && (runlen < RunningLengthWord.LargestRunningLengthCount)) 291 | { 292 | long whatwecanadd = number < RunningLengthWord.LargestRunningLengthCount 293 | - runlen 294 | ? number 295 | : RunningLengthWord.LargestRunningLengthCount 296 | - runlen; 297 | _Rlw.RunningLength = runlen + whatwecanadd; 298 | SizeInBits += (int)whatwecanadd * WordInBits; 299 | if (number - whatwecanadd > 0) 300 | { 301 | wordsadded += AddStreamOfEmptyWords(v, number - whatwecanadd); 302 | } 303 | } 304 | else 305 | { 306 | PushBack(0); 307 | ++wordsadded; 308 | _Rlw.Position = _ActualSizeInWords - 1; 309 | long whatwecanadd = number < RunningLengthWord.LargestRunningLengthCount 310 | ? number 311 | : RunningLengthWord.LargestRunningLengthCount; 312 | _Rlw.RunningBit = v; 313 | _Rlw.RunningLength = whatwecanadd; 314 | SizeInBits += (int)whatwecanadd * WordInBits; 315 | if (number - whatwecanadd > 0) 316 | { 317 | wordsadded += AddStreamOfEmptyWords(v, number - whatwecanadd); 318 | } 319 | } 320 | return wordsadded; 321 | } 322 | 323 | /// 324 | /// Returns a new compressed bitmap containing the bitwise AND values of the 325 | /// current bitmap with some other bitmap. 326 | /// 327 | /// The running time is proportional to the sum of the compressed sizes (as 328 | /// reported by SizeInBytes). 329 | /// 330 | /// 331 | /// the other bitmap 332 | /// the EWAH compressed bitmap 333 | public EwahCompressedBitArray And(EwahCompressedBitArray a) 334 | { 335 | var container = new EwahCompressedBitArray(); 336 | container 337 | .Reserve(_ActualSizeInWords > a._ActualSizeInWords 338 | ? _ActualSizeInWords 339 | : a._ActualSizeInWords); 340 | EwahEnumerator i = a.GetEwahEnumerator(); 341 | EwahEnumerator j = GetEwahEnumerator(); 342 | if (!(i.HasNext() && j.HasNext())) 343 | { 344 | // this never happens... 345 | container.SizeInBits = SizeInBits; 346 | return container; 347 | } 348 | // at this point, this is safe: 349 | var rlwi = new BufferedRunningLengthWord(i.Next()); 350 | var rlwj = new BufferedRunningLengthWord(j.Next()); 351 | while (true) 352 | { 353 | bool iIsPrey = rlwi.Count < rlwj.Count; 354 | BufferedRunningLengthWord prey = iIsPrey ? rlwi : rlwj; 355 | BufferedRunningLengthWord predator = iIsPrey ? rlwj : rlwi; 356 | long predatorrl; 357 | long tobediscarded; 358 | if (prey.RunningBit == false) 359 | { 360 | container.AddStreamOfEmptyWords(false, prey.RunningLength); 361 | predator.DiscardFirstWords(prey.RunningLength); 362 | prey.RunningLength = 0; 363 | } 364 | else 365 | { 366 | // we have a stream of 1x11 367 | predatorrl = predator.RunningLength; 368 | long preyrl = prey.RunningLength; 369 | tobediscarded = (predatorrl >= preyrl) ? preyrl : predatorrl; 370 | container 371 | .AddStreamOfEmptyWords(predator.RunningBit, tobediscarded); 372 | int dwPredator = predator.DirtyWordOffset 373 | + (iIsPrey ? j.DirtyWords : i.DirtyWords); 374 | container.AddStreamOfDirtyWords(iIsPrey ? j.Buffer : i.Buffer, 375 | dwPredator, 376 | preyrl - tobediscarded); 377 | predator.DiscardFirstWords(preyrl); 378 | prey.RunningLength = 0; 379 | } 380 | predatorrl = predator.RunningLength; 381 | long nbreDirtyPrey; 382 | if (predatorrl > 0) 383 | { 384 | if (predator.RunningBit == false) 385 | { 386 | nbreDirtyPrey = prey.NumberOfLiteralWords; 387 | tobediscarded = (predatorrl >= nbreDirtyPrey) 388 | ? nbreDirtyPrey 389 | : predatorrl; 390 | predator.DiscardFirstWords(tobediscarded); 391 | prey.DiscardFirstWords(tobediscarded); 392 | container.AddStreamOfEmptyWords(false, tobediscarded); 393 | } 394 | else 395 | { 396 | nbreDirtyPrey = prey.NumberOfLiteralWords; 397 | int dwPrey = prey.DirtyWordOffset 398 | + (iIsPrey ? i.DirtyWords : j.DirtyWords); 399 | tobediscarded = (predatorrl >= nbreDirtyPrey) 400 | ? nbreDirtyPrey 401 | : predatorrl; 402 | container.AddStreamOfDirtyWords(iIsPrey ? i.Buffer : j.Buffer, 403 | dwPrey, 404 | tobediscarded); 405 | predator.DiscardFirstWords(tobediscarded); 406 | prey.DiscardFirstWords(tobediscarded); 407 | } 408 | } 409 | // all that is left to do now is to AND the dirty words 410 | nbreDirtyPrey = prey.NumberOfLiteralWords; 411 | if (nbreDirtyPrey > 0) 412 | { 413 | for (int k = 0; k < nbreDirtyPrey; ++k) 414 | { 415 | if (iIsPrey) 416 | { 417 | container.Add(i.Buffer[prey.DirtyWordOffset + i.DirtyWords + k] 418 | & j.Buffer[predator.DirtyWordOffset + j.DirtyWords + k]); 419 | } 420 | else 421 | { 422 | container.Add(i.Buffer[predator.DirtyWordOffset + i.DirtyWords 423 | + k] 424 | & j.Buffer[prey.DirtyWordOffset + j.DirtyWords + k]); 425 | } 426 | } 427 | predator.DiscardFirstWords(nbreDirtyPrey); 428 | } 429 | if (iIsPrey) 430 | { 431 | if (!i.HasNext()) 432 | { 433 | rlwi = null; 434 | break; 435 | } 436 | rlwi.Reset(i.Next()); 437 | } 438 | else 439 | { 440 | if (!j.HasNext()) 441 | { 442 | rlwj = null; 443 | break; 444 | } 445 | rlwj.Reset(j.Next()); 446 | } 447 | } 448 | if (rlwi != null) 449 | { 450 | DischargeAsEmpty(rlwi, i, container); 451 | } 452 | if (rlwj != null) 453 | { 454 | DischargeAsEmpty(rlwj, j, container); 455 | } 456 | container.SizeInBits = Math.Max(SizeInBits, a.SizeInBits); 457 | return container; 458 | } 459 | 460 | /// 461 | /// Returns a new compressed bitmap containing the bitwise AND NOT values of 462 | /// the current bitmap with some other bitmap. 463 | /// 464 | /// The running time is proportional to the sum of the compressed sizes (as 465 | /// reported by SizeInBytes). 466 | /// 467 | /// the other bitmap 468 | /// the EWAH compressed bitmap 469 | public EwahCompressedBitArray AndNot(EwahCompressedBitArray a) 470 | { 471 | var container = new EwahCompressedBitArray(); 472 | container 473 | .Reserve(_ActualSizeInWords > a._ActualSizeInWords 474 | ? _ActualSizeInWords 475 | : a._ActualSizeInWords); 476 | EwahEnumerator i = a.GetEwahEnumerator(); 477 | EwahEnumerator j = GetEwahEnumerator(); 478 | if (!(i.HasNext() && j.HasNext())) 479 | { 480 | // this never happens... 481 | container.SizeInBits = SizeInBits; 482 | return container; 483 | } 484 | // at this point, this is safe: 485 | var rlwi = new BufferedRunningLengthWord(i.Next()); 486 | rlwi.RunningBit = !rlwi.RunningBit; 487 | var rlwj = new BufferedRunningLengthWord(j.Next()); 488 | while (true) 489 | { 490 | bool iIsPrey = rlwi.Count < rlwj.Count; 491 | BufferedRunningLengthWord prey = iIsPrey ? rlwi : rlwj; 492 | BufferedRunningLengthWord predator = iIsPrey ? rlwj : rlwi; 493 | 494 | long predatorrl; 495 | long tobediscarded; 496 | if (prey.RunningBit == false) 497 | { 498 | container.AddStreamOfEmptyWords(false, prey.RunningLength); 499 | predator.DiscardFirstWords(prey.RunningLength); 500 | prey.RunningLength = 0; 501 | } 502 | else 503 | { 504 | // we have a stream of 1x11 505 | predatorrl = predator.RunningLength; 506 | long preyrl = prey.RunningLength; 507 | tobediscarded = (predatorrl >= preyrl) ? preyrl : predatorrl; 508 | container 509 | .AddStreamOfEmptyWords(predator.RunningBit, tobediscarded); 510 | int dwPredator = predator.DirtyWordOffset 511 | + (iIsPrey ? j.DirtyWords : i.DirtyWords); 512 | if (iIsPrey) 513 | { 514 | container.AddStreamOfDirtyWords(j.Buffer, 515 | dwPredator, 516 | preyrl 517 | - tobediscarded); 518 | } 519 | else 520 | { 521 | container.AddStreamOfNegatedDirtyWords(i.Buffer, 522 | dwPredator, 523 | preyrl - tobediscarded); 524 | } 525 | predator.DiscardFirstWords(preyrl); 526 | prey.RunningLength = 0; 527 | } 528 | predatorrl = predator.RunningLength; 529 | long nbreDirtyPrey; 530 | if (predatorrl > 0) 531 | { 532 | if (predator.RunningBit == false) 533 | { 534 | nbreDirtyPrey = prey.NumberOfLiteralWords; 535 | tobediscarded = (predatorrl >= nbreDirtyPrey) 536 | ? nbreDirtyPrey 537 | : predatorrl; 538 | predator.DiscardFirstWords(tobediscarded); 539 | prey.DiscardFirstWords(tobediscarded); 540 | container.AddStreamOfEmptyWords(false, tobediscarded); 541 | } 542 | else 543 | { 544 | nbreDirtyPrey = prey.NumberOfLiteralWords; 545 | int dwPrey = prey.DirtyWordOffset 546 | + (iIsPrey ? i.DirtyWords : j.DirtyWords); 547 | tobediscarded = (predatorrl >= nbreDirtyPrey) 548 | ? nbreDirtyPrey 549 | : predatorrl; 550 | if (iIsPrey) 551 | { 552 | container.AddStreamOfNegatedDirtyWords(i.Buffer, 553 | dwPrey, 554 | tobediscarded); 555 | } 556 | else 557 | { 558 | container.AddStreamOfDirtyWords(j.Buffer, dwPrey, tobediscarded); 559 | } 560 | predator.DiscardFirstWords(tobediscarded); 561 | prey.DiscardFirstWords(tobediscarded); 562 | } 563 | } 564 | // all that is left to do now is to AND the dirty words 565 | nbreDirtyPrey = prey.NumberOfLiteralWords; 566 | if (nbreDirtyPrey > 0) 567 | { 568 | for (int k = 0; k < nbreDirtyPrey; ++k) 569 | { 570 | if (iIsPrey) 571 | { 572 | container.Add((~i.Buffer[prey.DirtyWordOffset + i.DirtyWords 573 | + k]) 574 | & j.Buffer[predator.DirtyWordOffset + j.DirtyWords + k]); 575 | } 576 | else 577 | { 578 | container.Add((~i.Buffer[predator.DirtyWordOffset 579 | + i.DirtyWords + k]) 580 | & j.Buffer[prey.DirtyWordOffset + j.DirtyWords + k]); 581 | } 582 | } 583 | predator.DiscardFirstWords(nbreDirtyPrey); 584 | } 585 | if (iIsPrey) 586 | { 587 | if (!i.HasNext()) 588 | { 589 | rlwi = null; 590 | break; 591 | } 592 | rlwi.Reset(i.Next()); 593 | rlwi.RunningBit = !rlwi.RunningBit; 594 | } 595 | else 596 | { 597 | if (!j.HasNext()) 598 | { 599 | rlwj = null; 600 | break; 601 | } 602 | rlwj.Reset(j.Next()); 603 | } 604 | } 605 | if (rlwi != null) 606 | { 607 | DischargeAsEmpty(rlwi, i, container); 608 | } 609 | if (rlwj != null) 610 | { 611 | Discharge(rlwj, j, container); 612 | } 613 | container.SizeInBits = Math.Max(SizeInBits, a.SizeInBits); 614 | return container; 615 | } 616 | 617 | /// 618 | /// reports the number of bits set to true. Running time is proportional to 619 | /// compressed size (as reported by SizeInBytes). 620 | /// 621 | /// the number of bits set to true 622 | public ulong GetCardinality() 623 | { 624 | ulong counter = 0; 625 | var i = new EwahEnumerator(_Buffer, _ActualSizeInWords); 626 | while (i.HasNext()) 627 | { 628 | RunningLengthWord localrlw = i.Next(); 629 | 630 | if (localrlw.RunningBit) 631 | { 632 | counter += (ulong)(WordInBits * localrlw.RunningLength); 633 | } 634 | for (int j = 0; j < localrlw.NumberOfLiteralWords; ++j) 635 | { 636 | long data = i.Buffer[i.DirtyWords + j]; 637 | counter += bitCount((ulong)data); 638 | } 639 | } 640 | return counter; 641 | } 642 | 643 | /// 644 | /// get the locations of the true values as one vector. (may use more memory 645 | /// than GetEnumerator() 646 | /// 647 | /// 648 | /// 649 | /// 650 | public List GetPositions() 651 | { 652 | var v = new List(); 653 | var i = new EwahEnumerator(_Buffer, _ActualSizeInWords); 654 | int pos = 0; 655 | while (i.HasNext()) 656 | { 657 | RunningLengthWord localrlw = i.Next(); 658 | if (localrlw.RunningBit) 659 | { 660 | for (int j = 0; j < localrlw.RunningLength; ++j) 661 | { 662 | for (int c = 0; c < WordInBits; ++c) 663 | { 664 | v.Add(pos++); 665 | } 666 | } 667 | } 668 | else 669 | { 670 | pos += WordInBits * (int)localrlw.RunningLength; 671 | } 672 | 673 | for (int j = 0; j < localrlw.NumberOfLiteralWords; ++j) 674 | { 675 | long data = i.Buffer[i.DirtyWords + j]; 676 | for (int c = 0; c < WordInBits; ++c) 677 | { 678 | if (((1L << c) & data) != 0) 679 | { 680 | v.Add(pos); 681 | } 682 | ++pos; 683 | } 684 | } 685 | } 686 | while ((v.Count > 0) 687 | && (v[v.Count - 1] >= SizeInBits)) 688 | { 689 | v.Remove(v.Count - 1); 690 | } 691 | return v; 692 | } 693 | 694 | /// 695 | /// Return true if the two EwahCompressedBitArray have both at least one 696 | /// true bit in the same Position. Equivalently, you could call "And" 697 | /// and check whether there is a set bit, but intersects will run faster 698 | /// if you don't need the result of the "and" operation. 699 | /// 700 | /// 701 | /// 702 | public bool Intersects(EwahCompressedBitArray a) 703 | { 704 | EwahEnumerator i = a.GetEwahEnumerator(); 705 | EwahEnumerator j = GetEwahEnumerator(); 706 | if (!(i.HasNext() && j.HasNext())) 707 | { 708 | return false; 709 | } 710 | // at this point, this is safe: 711 | var rlwi = new BufferedRunningLengthWord(i.Next()); 712 | var rlwj = new BufferedRunningLengthWord(j.Next()); 713 | while (true) 714 | { 715 | bool iIsPrey = rlwi.Count < rlwj.Count; 716 | BufferedRunningLengthWord prey = iIsPrey ? rlwi : rlwj; 717 | BufferedRunningLengthWord predator = iIsPrey ? rlwj : rlwi; 718 | long predatorrl; 719 | long tobediscarded; 720 | if (prey.RunningBit == false) 721 | { 722 | predator.DiscardFirstWords(prey.RunningLength); 723 | prey.RunningLength = 0; 724 | } 725 | else 726 | { 727 | // we have a stream of 1x11 728 | predatorrl = predator.RunningLength; 729 | long preyrl = prey.RunningLength; 730 | tobediscarded = (predatorrl >= preyrl) ? preyrl : predatorrl; 731 | if (predator.RunningBit) return true; 732 | if (preyrl - tobediscarded > 0) return true; 733 | predator.DiscardFirstWords(preyrl); 734 | prey.RunningLength = 0; 735 | } 736 | predatorrl = predator.RunningLength; 737 | long nbreDirtyPrey; 738 | if (predatorrl > 0) 739 | { 740 | if (predator.RunningBit == false) 741 | { 742 | nbreDirtyPrey = prey.NumberOfLiteralWords; 743 | tobediscarded = (predatorrl >= nbreDirtyPrey) 744 | ? nbreDirtyPrey 745 | : predatorrl; 746 | predator.DiscardFirstWords(tobediscarded); 747 | prey.DiscardFirstWords(tobediscarded); 748 | } 749 | else 750 | { 751 | nbreDirtyPrey = prey.NumberOfLiteralWords; 752 | tobediscarded = (predatorrl >= nbreDirtyPrey) 753 | ? nbreDirtyPrey 754 | : predatorrl; 755 | if (tobediscarded > 0) return true; 756 | predator.DiscardFirstWords(tobediscarded); 757 | prey.DiscardFirstWords(tobediscarded); 758 | } 759 | } 760 | // all that is left to do now is to AND the dirty words 761 | nbreDirtyPrey = prey.NumberOfLiteralWords; 762 | 763 | if (nbreDirtyPrey > 0) 764 | { 765 | for (int k = 0; k < nbreDirtyPrey; ++k) 766 | { 767 | if (iIsPrey) 768 | { 769 | if ((i.Buffer[prey.DirtyWordOffset + i.DirtyWords + k] 770 | & j.Buffer[predator.DirtyWordOffset + j.DirtyWords + k]) != 0) 771 | return true; 772 | } 773 | else 774 | { 775 | if ((i.Buffer[predator.DirtyWordOffset + i.DirtyWords 776 | + k] 777 | & j.Buffer[prey.DirtyWordOffset + j.DirtyWords + k]) != 0) 778 | return true; 779 | } 780 | } 781 | predator.DiscardFirstWords(nbreDirtyPrey); 782 | } 783 | if (iIsPrey) 784 | { 785 | if (!i.HasNext()) 786 | { 787 | rlwi = null; 788 | break; 789 | } 790 | rlwi.Reset(i.Next()); 791 | } 792 | else 793 | { 794 | if (!j.HasNext()) 795 | { 796 | rlwj = null; 797 | break; 798 | } 799 | rlwj.Reset(j.Next()); 800 | } 801 | } 802 | return false; 803 | } 804 | 805 | /// 806 | /// Negate (bitwise) the current bitmap. To get a negated copy, do 807 | /// ((EwahCompressedBitArray) mybitmap.Clone()).not(); 808 | /// 809 | /// The running time is proportional to the compressed size (as reported by 810 | /// SizeInBytes). 811 | /// 812 | public void Not() 813 | { 814 | 815 | var i = new EwahEnumerator(_Buffer, _ActualSizeInWords); 816 | if (!i.HasNext()) 817 | { 818 | return; 819 | } 820 | while (true) 821 | { 822 | 823 | RunningLengthWord rlw1 = i.Next(); 824 | 825 | rlw1.RunningBit = !rlw1.RunningBit; 826 | for (int j = 0; j < rlw1.NumberOfLiteralWords; ++j) 827 | { 828 | i.Buffer[i.DirtyWords + j] = ~i.Buffer[i.DirtyWords + j]; 829 | } 830 | if (!i.HasNext()) 831 | { 832 | 833 | int usedbitsinlast = SizeInBits % WordInBits; 834 | 835 | if (usedbitsinlast == 0) 836 | return; 837 | 838 | if (rlw1.NumberOfLiteralWords == 0) 839 | { 840 | if ((rlw1.RunningLength > 0) && (rlw1.RunningBit)) 841 | { 842 | rlw1.RunningLength = rlw1.RunningLength - 1; 843 | AddLiteralWord((long)((~0UL) >> (WordInBits - usedbitsinlast))); 844 | } 845 | return; 846 | } 847 | i.Buffer[i.DirtyWords + rlw1.NumberOfLiteralWords - 1] &= (long)((~0UL) >> 848 | (WordInBits - usedbitsinlast)); 849 | 850 | return; 851 | } 852 | 853 | } 854 | } 855 | 856 | /// 857 | /// Returns a new compressed bitmap containing the bitwise OR values of the 858 | /// current bitmap with some other bitmap. 859 | /// 860 | /// The running time is proportional to the sum of the compressed sizes (as 861 | /// reported by SizeInBytes). 862 | /// 863 | /// the other bitmap 864 | /// the EWAH compressed bitmap 865 | public EwahCompressedBitArray Or(EwahCompressedBitArray a) 866 | { 867 | var container = new EwahCompressedBitArray(); 868 | container.Reserve(_ActualSizeInWords + a._ActualSizeInWords); 869 | EwahEnumerator i = a.GetEwahEnumerator(); 870 | EwahEnumerator j = GetEwahEnumerator(); 871 | if (!(i.HasNext() && j.HasNext())) 872 | { 873 | // this never happens... 874 | container.SizeInBits = SizeInBits; 875 | return container; 876 | } 877 | // at this point, this is safe: 878 | var rlwi = new BufferedRunningLengthWord(i.Next()); 879 | var rlwj = new BufferedRunningLengthWord(j.Next()); 880 | // RunningLength; 881 | while (true) 882 | { 883 | bool iIsPrey = rlwi.Count < rlwj.Count; 884 | BufferedRunningLengthWord prey = iIsPrey ? rlwi : rlwj; 885 | BufferedRunningLengthWord predator = iIsPrey ? rlwj : rlwi; 886 | long predatorrl; 887 | long tobediscarded; 888 | if (prey.RunningBit == false) 889 | { 890 | predatorrl = predator.RunningLength; 891 | long preyrl = prey.RunningLength; 892 | tobediscarded = (predatorrl >= preyrl) ? preyrl : predatorrl; 893 | container 894 | .AddStreamOfEmptyWords(predator.RunningBit, tobediscarded); 895 | long dwPredator = predator.DirtyWordOffset 896 | + (iIsPrey ? j.DirtyWords : i.DirtyWords); 897 | container.AddStreamOfDirtyWords(iIsPrey ? j.Buffer : i.Buffer, 898 | dwPredator, 899 | preyrl - tobediscarded); 900 | predator.DiscardFirstWords(preyrl); 901 | prey.DiscardFirstWords(preyrl); 902 | prey.RunningLength = 0; 903 | } 904 | else 905 | { 906 | // we have a stream of 1x11 907 | container.AddStreamOfEmptyWords(true, prey.RunningLength); 908 | predator.DiscardFirstWords(prey.RunningLength); 909 | prey.RunningLength = 0; 910 | } 911 | predatorrl = predator.RunningLength; 912 | long nbreDirtyPrey; 913 | if (predatorrl > 0) 914 | { 915 | if (predator.RunningBit == false) 916 | { 917 | nbreDirtyPrey = prey.NumberOfLiteralWords; 918 | tobediscarded = (predatorrl >= nbreDirtyPrey) 919 | ? nbreDirtyPrey 920 | : predatorrl; 921 | long dwPrey = prey.DirtyWordOffset 922 | + (iIsPrey ? i.DirtyWords : j.DirtyWords); 923 | predator.DiscardFirstWords(tobediscarded); 924 | prey.DiscardFirstWords(tobediscarded); 925 | container.AddStreamOfDirtyWords(iIsPrey ? i.Buffer : j.Buffer, 926 | dwPrey, 927 | tobediscarded); 928 | } 929 | else 930 | { 931 | nbreDirtyPrey = prey.NumberOfLiteralWords; 932 | tobediscarded = (predatorrl >= nbreDirtyPrey) 933 | ? nbreDirtyPrey 934 | : predatorrl; 935 | container.AddStreamOfEmptyWords(true, tobediscarded); 936 | predator.DiscardFirstWords(tobediscarded); 937 | prey.DiscardFirstWords(tobediscarded); 938 | } 939 | } 940 | // all that is left to do now is to OR the dirty words 941 | nbreDirtyPrey = prey.NumberOfLiteralWords; 942 | if (nbreDirtyPrey > 0) 943 | { 944 | for (int k = 0; k < nbreDirtyPrey; ++k) 945 | { 946 | if (iIsPrey) 947 | { 948 | container.Add(i.Buffer[prey.DirtyWordOffset + i.DirtyWords + k] 949 | | j.Buffer[predator.DirtyWordOffset + j.DirtyWords + k]); 950 | } 951 | else 952 | { 953 | container.Add(i.Buffer[predator.DirtyWordOffset + i.DirtyWords 954 | + k] 955 | | j.Buffer[prey.DirtyWordOffset + j.DirtyWords + k]); 956 | } 957 | } 958 | predator.DiscardFirstWords(nbreDirtyPrey); 959 | } 960 | if (iIsPrey) 961 | { 962 | if (!i.HasNext()) 963 | { 964 | rlwi = null; 965 | break; 966 | } 967 | rlwi.Reset(i.Next()); // = new 968 | // BufferedRunningLengthWord(i.Next()); 969 | } 970 | else 971 | { 972 | if (!j.HasNext()) 973 | { 974 | rlwj = null; 975 | break; 976 | } 977 | rlwj.Reset(j.Next()); // = new 978 | // BufferedRunningLengthWord( 979 | // j.Next()); 980 | } 981 | } 982 | if (rlwi != null) 983 | { 984 | Discharge(rlwi, i, container); 985 | } 986 | if (rlwj != null) 987 | { 988 | Discharge(rlwj, j, container); 989 | } 990 | container.SizeInBits = Math.Max(SizeInBits, a.SizeInBits); 991 | return container; 992 | } 993 | 994 | /// 995 | /// set the bit at Position i to true, the bits must be set in increasing 996 | /// order. For example, Set(15) and then Set(7) will fail. You must do Set(7) 997 | /// and then Set(15). 998 | /// 999 | /// the index 1000 | /// true if the value was set (always true when i>= SizeInBits) 1001 | public bool Set(int i) 1002 | { 1003 | if (i < SizeInBits) 1004 | { 1005 | return false; 1006 | } 1007 | // must I complete a word? 1008 | if ((SizeInBits % WordInBits) != 0) 1009 | { 1010 | int possiblesizeinbits = (SizeInBits / WordInBits) * WordInBits + WordInBits; 1011 | if (possiblesizeinbits < i + 1) 1012 | { 1013 | SizeInBits = possiblesizeinbits; 1014 | } 1015 | } 1016 | AddStreamOfEmptyWords(false, (i / WordInBits) - SizeInBits / WordInBits); 1017 | int bittoflip = i - (SizeInBits / WordInBits * WordInBits); 1018 | // next, we set the bit 1019 | if ((_Rlw.NumberOfLiteralWords == 0) 1020 | || ((SizeInBits - 1) / WordInBits < i / WordInBits)) 1021 | { 1022 | long newdata = 1L << bittoflip; 1023 | AddLiteralWord(newdata); 1024 | } 1025 | else 1026 | { 1027 | _Buffer[_ActualSizeInWords - 1] |= 1L << bittoflip; 1028 | // check if we just completed a stream of 1s 1029 | if (_Buffer[_ActualSizeInWords - 1] == ~0L) 1030 | { 1031 | // we remove the last dirty word 1032 | _Buffer[_ActualSizeInWords - 1] = 0L; 1033 | --_ActualSizeInWords; 1034 | _Rlw 1035 | .NumberOfLiteralWords = _Rlw.NumberOfLiteralWords - 1; 1036 | // next we add one clean word 1037 | AddEmptyWord(true); 1038 | } 1039 | } 1040 | SizeInBits = i + 1; 1041 | return true; 1042 | } 1043 | 1044 | /// 1045 | /// Change the reported size in bits of the *uncompressed* bitmap represented 1046 | /// by this compressed bitmap. It is not possible to reduce the SizeInBits, but 1047 | /// it can be extended. The new bits are set to false or true depending on the 1048 | /// value of defaultvalue. 1049 | /// 1050 | /// the size in bits 1051 | /// the default bool value 1052 | /// true if the update was possible 1053 | public bool SetSizeInBits(int size, bool defaultvalue) 1054 | { 1055 | if (size < SizeInBits) 1056 | { 1057 | return false; 1058 | } 1059 | if (defaultvalue == false) 1060 | { 1061 | 1062 | int currentLeftover = SizeInBits % WordInBits; 1063 | int finalLeftover = size % WordInBits; 1064 | AddStreamOfEmptyWords(false, (size / WordInBits) - SizeInBits 1065 | / WordInBits + (finalLeftover != 0 ? 1 : 0) 1066 | + (currentLeftover != 0 ? -1 : 0)); 1067 | } 1068 | else 1069 | { 1070 | // next bit could be optimized 1071 | while (((SizeInBits % WordInBits) != 0) && (SizeInBits < size)) 1072 | { 1073 | Set(SizeInBits); 1074 | } 1075 | 1076 | AddStreamOfEmptyWords(defaultvalue, (size / WordInBits) 1077 | - SizeInBits / WordInBits); 1078 | 1079 | // next bit could be optimized 1080 | while (SizeInBits < size) 1081 | { 1082 | Set(SizeInBits); 1083 | } 1084 | } 1085 | SizeInBits = size; 1086 | return true; 1087 | } 1088 | /// 1089 | /// Sets the internal buffer to the minimum possible size required to contain 1090 | /// the current bitarray. 1091 | /// 1092 | /// This method is useful when dealing with static bitmasks, if it is called 1093 | /// after the final bit has been set, some memory can be free-ed. 1094 | /// 1095 | /// Please note, the next bit set after a call to shrink will cause the memory 1096 | /// usage of the bit-array to double. 1097 | /// 1098 | public void Shrink() 1099 | { 1100 | Array.Resize(ref _Buffer, _ActualSizeInWords); 1101 | _Rlw.ArrayOfWords = _Buffer; 1102 | } 1103 | 1104 | /// 1105 | /// A more detailed string describing the bitmap (useful for debugging). 1106 | /// 1107 | /// detailed debug string 1108 | public string ToDebugString() 1109 | { 1110 | string ans = " EwahCompressedBitArray, size in bits = " + SizeInBits 1111 | + " size in words = " + _ActualSizeInWords + "\n"; 1112 | var i = new EwahEnumerator(_Buffer, _ActualSizeInWords); 1113 | while (i.HasNext()) 1114 | { 1115 | RunningLengthWord localrlw = i.Next(); 1116 | if (localrlw.RunningBit) 1117 | { 1118 | ans += localrlw.RunningLength + " 1x11\n"; 1119 | } 1120 | else 1121 | { 1122 | ans += localrlw.RunningLength + " 0x00\n"; 1123 | } 1124 | ans += localrlw.NumberOfLiteralWords + " dirties\n"; 1125 | for (int j = 0; j < localrlw.NumberOfLiteralWords; ++j) 1126 | { 1127 | long data = i.Buffer[i.DirtyWords + j]; 1128 | ans += "\t" + data + "\n"; 1129 | } 1130 | } 1131 | return ans; 1132 | } 1133 | 1134 | 1135 | 1136 | 1137 | /// 1138 | /// Returns a new compressed bitmap containing the bitwise XOR values of the 1139 | /// current bitmap with some other bitmap. 1140 | /// 1141 | /// The running time is proportional to the sum of the compressed sizes (as 1142 | /// reported by SizeInBytes). 1143 | /// 1144 | /// 1145 | /// the other bitmap 1146 | /// the EWAH compressed bitmap 1147 | public EwahCompressedBitArray Xor(EwahCompressedBitArray a) 1148 | { 1149 | var container = new EwahCompressedBitArray(); 1150 | container.Reserve(_ActualSizeInWords + a._ActualSizeInWords); 1151 | EwahEnumerator i = a.GetEwahEnumerator(); 1152 | EwahEnumerator j = GetEwahEnumerator(); 1153 | if (!(i.HasNext() && j.HasNext())) 1154 | { 1155 | // this never happens... 1156 | container.SizeInBits = SizeInBits; 1157 | return container; 1158 | } 1159 | // at this point, this is safe: 1160 | var rlwi = new BufferedRunningLengthWord(i.Next()); 1161 | var rlwj = new BufferedRunningLengthWord(j.Next()); 1162 | while (true) 1163 | { 1164 | bool iIsPrey = rlwi.Count < rlwj.Count; 1165 | BufferedRunningLengthWord prey = iIsPrey ? rlwi : rlwj; 1166 | BufferedRunningLengthWord predator = iIsPrey ? rlwj : rlwi; 1167 | long predatorrl; 1168 | long preyrl; 1169 | long tobediscarded; 1170 | 1171 | if (prey.RunningBit == false) 1172 | { 1173 | predatorrl = predator.RunningLength; 1174 | preyrl = prey.RunningLength; 1175 | tobediscarded = (predatorrl >= preyrl) ? preyrl : predatorrl; 1176 | container 1177 | .AddStreamOfEmptyWords(predator.RunningBit, tobediscarded); 1178 | long dwPredator = predator.DirtyWordOffset 1179 | + (iIsPrey ? j.DirtyWords : i.DirtyWords); 1180 | container.AddStreamOfDirtyWords(iIsPrey ? j.Buffer : i.Buffer, 1181 | dwPredator, 1182 | preyrl - tobediscarded); 1183 | predator.DiscardFirstWords(preyrl); 1184 | prey.DiscardFirstWords(preyrl); 1185 | } 1186 | else 1187 | { 1188 | // we have a stream of 1x11 1189 | predatorrl = predator.RunningLength; 1190 | preyrl = prey.RunningLength; 1191 | tobediscarded = (predatorrl >= preyrl) ? preyrl : predatorrl; 1192 | container.AddStreamOfEmptyWords(!predator.RunningBit, 1193 | tobediscarded); 1194 | int dwPredator = predator.DirtyWordOffset 1195 | + (iIsPrey ? j.DirtyWords : i.DirtyWords); 1196 | long[] buf = iIsPrey ? j.Buffer : i.Buffer; 1197 | for (int k = 0; k < preyrl - tobediscarded; ++k) 1198 | { 1199 | container.Add(~buf[k + dwPredator]); 1200 | } 1201 | predator.DiscardFirstWords(preyrl); 1202 | prey.DiscardFirstWords(preyrl); 1203 | } 1204 | predatorrl = predator.RunningLength; 1205 | long nbreDirtyPrey; 1206 | if (predatorrl > 0) 1207 | { 1208 | if (predator.RunningBit == false) 1209 | { 1210 | nbreDirtyPrey = prey.NumberOfLiteralWords; 1211 | tobediscarded = (predatorrl >= nbreDirtyPrey) 1212 | ? nbreDirtyPrey 1213 | : predatorrl; 1214 | long dwPrey = prey.DirtyWordOffset 1215 | + (iIsPrey ? i.DirtyWords : j.DirtyWords); 1216 | predator.DiscardFirstWords(tobediscarded); 1217 | prey.DiscardFirstWords(tobediscarded); 1218 | container.AddStreamOfDirtyWords(iIsPrey ? i.Buffer : j.Buffer, 1219 | dwPrey, 1220 | tobediscarded); 1221 | } 1222 | else 1223 | { 1224 | nbreDirtyPrey = prey.NumberOfLiteralWords; 1225 | tobediscarded = (predatorrl >= nbreDirtyPrey) 1226 | ? nbreDirtyPrey 1227 | : predatorrl; 1228 | int dwPrey = prey.DirtyWordOffset 1229 | + (iIsPrey ? i.DirtyWords : j.DirtyWords); 1230 | predator.DiscardFirstWords(tobediscarded); 1231 | prey.DiscardFirstWords(tobediscarded); 1232 | long[] buf = iIsPrey ? i.Buffer : j.Buffer; 1233 | for (int k = 0; k < tobediscarded; ++k) 1234 | { 1235 | container.Add(~buf[k + dwPrey]); 1236 | } 1237 | } 1238 | } 1239 | // all that is left to do now is to AND the dirty words 1240 | nbreDirtyPrey = prey.NumberOfLiteralWords; 1241 | if (nbreDirtyPrey > 0) 1242 | { 1243 | for (int k = 0; k < nbreDirtyPrey; ++k) 1244 | { 1245 | if (iIsPrey) 1246 | { 1247 | container.Add(i.Buffer[prey.DirtyWordOffset + i.DirtyWords + k] 1248 | ^ j.Buffer[predator.DirtyWordOffset + j.DirtyWords + k]); 1249 | } 1250 | else 1251 | { 1252 | container.Add(i.Buffer[predator.DirtyWordOffset + i.DirtyWords 1253 | + k] 1254 | ^ j.Buffer[prey.DirtyWordOffset + j.DirtyWords + k]); 1255 | } 1256 | } 1257 | predator.DiscardFirstWords(nbreDirtyPrey); 1258 | } 1259 | if (iIsPrey) 1260 | { 1261 | if (!i.HasNext()) 1262 | { 1263 | rlwi = null; 1264 | break; 1265 | } 1266 | rlwi.Reset(i.Next()); 1267 | } 1268 | else 1269 | { 1270 | if (!j.HasNext()) 1271 | { 1272 | rlwj = null; 1273 | break; 1274 | } 1275 | rlwj.Reset(j.Next()); 1276 | } 1277 | } 1278 | if (rlwi != null) 1279 | { 1280 | Discharge(rlwi, i, container); 1281 | } 1282 | if (rlwj != null) 1283 | { 1284 | Discharge(rlwj, j, container); 1285 | } 1286 | container.SizeInBits = Math.Max(SizeInBits, a.SizeInBits); 1287 | return container; 1288 | } 1289 | 1290 | /// 1291 | /// For internal use. 1292 | /// 1293 | /// the bool value 1294 | /// the storage cost of the addition 1295 | private int AddEmptyWord(bool v) 1296 | { 1297 | bool noliteralword = (_Rlw.NumberOfLiteralWords == 0); 1298 | long runlen = _Rlw.RunningLength; 1299 | if ((noliteralword) && (runlen == 0)) 1300 | { 1301 | _Rlw.RunningBit = v; 1302 | } 1303 | if ((noliteralword) && (_Rlw.RunningBit == v) 1304 | && (runlen < RunningLengthWord.LargestRunningLengthCount)) 1305 | { 1306 | _Rlw.RunningLength = runlen + 1; 1307 | return 0; 1308 | } 1309 | PushBack(0); 1310 | _Rlw.Position = _ActualSizeInWords - 1; 1311 | _Rlw.RunningBit = v; 1312 | _Rlw.RunningLength = 1; 1313 | return 1; 1314 | } 1315 | 1316 | /// 1317 | /// For internal use. 1318 | /// 1319 | /// the dirty word 1320 | /// the storage cost of the addition 1321 | private int AddLiteralWord(long newdata) 1322 | { 1323 | long numbersofar = _Rlw.NumberOfLiteralWords; 1324 | if (numbersofar >= RunningLengthWord.LargestLiteralCount) 1325 | { 1326 | PushBack(0); 1327 | _Rlw.Position = _ActualSizeInWords - 1; 1328 | _Rlw.NumberOfLiteralWords = 1; 1329 | PushBack(newdata); 1330 | return 2; 1331 | } 1332 | _Rlw.NumberOfLiteralWords = (int)numbersofar + 1; 1333 | PushBack(newdata); 1334 | return 1; 1335 | } 1336 | 1337 | /// 1338 | /// if you have several dirty words to copy over, this might be faster. 1339 | /// 1340 | /// the dirty words 1341 | /// the starting point in the array 1342 | /// the number of dirty words to add 1343 | /// how many (compressed) words were added to the bitmap 1344 | private long AddStreamOfDirtyWords(long[] data, 1345 | long start, 1346 | long number) 1347 | { 1348 | if (number == 0) 1349 | { 1350 | return 0; 1351 | } 1352 | long numberOfLiteralWords = _Rlw.NumberOfLiteralWords; 1353 | long whatwecanadd = number < RunningLengthWord.LargestLiteralCount 1354 | - numberOfLiteralWords 1355 | ? number 1356 | : RunningLengthWord.LargestLiteralCount 1357 | - numberOfLiteralWords; 1358 | _Rlw.NumberOfLiteralWords = (int)(numberOfLiteralWords + whatwecanadd); 1359 | long leftovernumber = number - whatwecanadd; 1360 | PushBack(data, (int)start, (int)whatwecanadd); 1361 | SizeInBits += (int)whatwecanadd * WordInBits; 1362 | long wordsadded = whatwecanadd; 1363 | if (leftovernumber > 0) 1364 | { 1365 | PushBack(0); 1366 | _Rlw.Position = _ActualSizeInWords - 1; 1367 | ++wordsadded; 1368 | wordsadded += AddStreamOfDirtyWords(data, 1369 | start + whatwecanadd, 1370 | leftovernumber); 1371 | } 1372 | return wordsadded; 1373 | } 1374 | 1375 | /// 1376 | /// Same as addStreamOfDirtyWords, but the words are negated. 1377 | /// 1378 | /// the dirty words 1379 | /// start the starting point in the array 1380 | /// the number of dirty words to add 1381 | /// how many (compressed) words were added to the bitmap 1382 | private long AddStreamOfNegatedDirtyWords(long[] data, 1383 | long start, 1384 | long number) 1385 | { 1386 | if (number == 0) 1387 | { 1388 | return 0; 1389 | } 1390 | long numberOfLiteralWords = _Rlw.NumberOfLiteralWords; 1391 | long whatwecanadd = number < RunningLengthWord.LargestLiteralCount 1392 | - numberOfLiteralWords 1393 | ? number 1394 | : RunningLengthWord.LargestLiteralCount 1395 | - numberOfLiteralWords; 1396 | _Rlw.NumberOfLiteralWords = (int)(numberOfLiteralWords + whatwecanadd); 1397 | long leftovernumber = number - whatwecanadd; 1398 | NegativePushBack(data, (int)start, (int)whatwecanadd); 1399 | SizeInBits += (int)whatwecanadd * WordInBits; 1400 | long wordsadded = whatwecanadd; 1401 | if (leftovernumber > 0) 1402 | { 1403 | PushBack(0); 1404 | _Rlw.Position = _ActualSizeInWords - 1; 1405 | ++wordsadded; 1406 | wordsadded += AddStreamOfNegatedDirtyWords(data, 1407 | start + whatwecanadd, 1408 | leftovernumber); 1409 | } 1410 | return wordsadded; 1411 | } 1412 | 1413 | /// 1414 | /// Gets an EwahEnumerator over the data. This is a customized 1415 | /// enumerator which iterates over run length word. For experts only. 1416 | /// 1417 | /// the EwahEnumerator 1418 | private EwahEnumerator GetEwahEnumerator() 1419 | { 1420 | return new EwahEnumerator(_Buffer, _ActualSizeInWords); 1421 | } 1422 | 1423 | /// 1424 | /// For internal use. 1425 | /// 1426 | /// the array of words to be added 1427 | /// the starting point 1428 | /// the number of words to add 1429 | private void NegativePushBack(long[] data, 1430 | int start, 1431 | int number) 1432 | { 1433 | while (_ActualSizeInWords + number >= _Buffer.Length) 1434 | { 1435 | Array.Resize(ref _Buffer, _Buffer.Length * 2); 1436 | _Rlw.ArrayOfWords = _Buffer; 1437 | } 1438 | for (int k = 0; k < number; ++k) 1439 | { 1440 | _Buffer[_ActualSizeInWords + k] = ~data[start + k]; 1441 | } 1442 | _ActualSizeInWords += number; 1443 | } 1444 | 1445 | /// 1446 | /// For internal use. 1447 | /// 1448 | /// the word to be added 1449 | private void PushBack(long data) 1450 | { 1451 | if (_ActualSizeInWords == _Buffer.Length) 1452 | { 1453 | Array.Resize(ref _Buffer, _Buffer.Length * 2); 1454 | _Rlw.ArrayOfWords = _Buffer; 1455 | } 1456 | _Buffer[_ActualSizeInWords++] = data; 1457 | } 1458 | 1459 | /// 1460 | /// For internal use. 1461 | /// 1462 | /// the array of words to be added 1463 | /// the starting point 1464 | /// the number of words to add 1465 | private void PushBack(long[] data, int start, int number) 1466 | { 1467 | while (_ActualSizeInWords + number >= _Buffer.Length) 1468 | { 1469 | Array.Resize(ref _Buffer, _Buffer.Length * 2); 1470 | _Rlw.ArrayOfWords = _Buffer; 1471 | } 1472 | Array.Copy(data, start, _Buffer, _ActualSizeInWords, number); 1473 | _ActualSizeInWords += number; 1474 | } 1475 | 1476 | /// 1477 | /// For internal use (trading off memory for speed). 1478 | /// 1479 | /// the number of words to allocate 1480 | /// True if the operation was a success 1481 | private bool Reserve(int size) 1482 | { 1483 | if (size > _Buffer.Length) 1484 | { 1485 | Array.Resize(ref _Buffer, size); 1486 | _Rlw.ArrayOfWords = _Buffer; 1487 | return true; 1488 | } 1489 | return false; 1490 | } 1491 | 1492 | #endregion 1493 | 1494 | #region ICloneable Members 1495 | 1496 | public object Clone() 1497 | { 1498 | var clone = new EwahCompressedBitArray(); 1499 | clone._Buffer = (long[])_Buffer.Clone(); 1500 | clone._Rlw = new RunningLengthWord(clone._Buffer, _Rlw.Position); 1501 | clone._ActualSizeInWords = _ActualSizeInWords; 1502 | clone.SizeInBits = SizeInBits; 1503 | return clone; 1504 | } 1505 | 1506 | #endregion 1507 | 1508 | #region IEnumerable Members 1509 | 1510 | IEnumerator IEnumerable.GetEnumerator() 1511 | { 1512 | return GetEnumerator(); 1513 | } 1514 | 1515 | /// 1516 | /// Iterator over the set bits (this is what most people will want to use to 1517 | /// browse the content). The location of the set bits is returned, in 1518 | /// increasing order. 1519 | /// 1520 | /// the int enumerator 1521 | public IEnumerator GetEnumerator() 1522 | { 1523 | return new IntIteratorImpl(new EwahEnumerator(_Buffer, _ActualSizeInWords), WordInBits); 1524 | } 1525 | 1526 | #endregion 1527 | 1528 | #region ISerializable Members 1529 | 1530 | public void GetObjectData(SerializationInfo info, StreamingContext context) 1531 | { 1532 | this.Shrink(); 1533 | info.AddValue("sb", SizeInBits); 1534 | info.AddValue("aw", _ActualSizeInWords); 1535 | info.AddValue("bu", _Buffer, typeof(long[])); 1536 | info.AddValue("rp", _Rlw.Position); 1537 | } 1538 | 1539 | #endregion 1540 | 1541 | #region Class Methods 1542 | 1543 | public static EwahCompressedBitArray BitmapOf(params int[] setbits) 1544 | { 1545 | EwahCompressedBitArray a = new EwahCompressedBitArray(); 1546 | foreach (int k in setbits) 1547 | a.Set(k); 1548 | return a; 1549 | } 1550 | /// 1551 | /// For internal use. 1552 | /// 1553 | /// the initial word 1554 | /// the enumerator 1555 | /// the container 1556 | private static void Discharge(BufferedRunningLengthWord initialWord, 1557 | EwahEnumerator enumerator, 1558 | EwahCompressedBitArray container) 1559 | { 1560 | BufferedRunningLengthWord runningLengthWord = initialWord; 1561 | for (; ;) 1562 | { 1563 | long runningLength = runningLengthWord.RunningLength; 1564 | container.AddStreamOfEmptyWords(runningLengthWord.RunningBit, 1565 | runningLength); 1566 | container.AddStreamOfDirtyWords(enumerator.Buffer, 1567 | enumerator.DirtyWords 1568 | + runningLengthWord.DirtyWordOffset, 1569 | runningLengthWord.NumberOfLiteralWords); 1570 | if (!enumerator.HasNext()) 1571 | { 1572 | break; 1573 | } 1574 | runningLengthWord = new BufferedRunningLengthWord(enumerator.Next()); 1575 | } 1576 | } 1577 | 1578 | /// 1579 | /// Counts the number of set (1) bits. 1580 | /// 1581 | /// the value to be processed 1582 | public static UInt64 bitCount(UInt64 v) 1583 | { 1584 | const UInt64 MaskMult = 0x0101010101010101; 1585 | const UInt64 mask1h = (~0UL) / 3 << 1; 1586 | const UInt64 mask2l = (~0UL) / 5; 1587 | const UInt64 mask4l = (~0UL) / 17; 1588 | v -= (mask1h & v) >> 1; 1589 | v = (v & mask2l) + ((v >> 2) & mask2l); 1590 | v += v >> 4; 1591 | v &= mask4l; 1592 | return (v * MaskMult) >> 56; 1593 | } 1594 | 1595 | /// 1596 | /// For internal use. 1597 | /// 1598 | /// the initial word 1599 | /// the enumerator 1600 | /// the container 1601 | private static void DischargeAsEmpty(BufferedRunningLengthWord initialWord, 1602 | EwahEnumerator enumerator, 1603 | EwahCompressedBitArray container) 1604 | { 1605 | BufferedRunningLengthWord runningLengthWord = initialWord; 1606 | for (; ;) 1607 | { 1608 | long runningLength = runningLengthWord.RunningLength; 1609 | container.AddStreamOfEmptyWords(false, 1610 | runningLength + runningLengthWord.NumberOfLiteralWords); 1611 | if (!enumerator.HasNext()) 1612 | { 1613 | break; 1614 | } 1615 | runningLengthWord = new BufferedRunningLengthWord(enumerator.Next()); 1616 | } 1617 | } 1618 | 1619 | #endregion 1620 | 1621 | #region Nested type: IntIteratorImpl 1622 | 1623 | private sealed class IntIteratorImpl : IEnumerator 1624 | { 1625 | #region Constants 1626 | 1627 | private const int InitCapacity = 512; 1628 | 1629 | #endregion 1630 | 1631 | #region Readonly & Static Fields 1632 | 1633 | private readonly EwahEnumerator _EwahEnumerator; 1634 | private readonly int _WordInBits; 1635 | 1636 | #endregion 1637 | 1638 | #region Fields 1639 | 1640 | private int _BufferPos; 1641 | private int _Current = -1; 1642 | private int[] _LocalBuffer = new int[InitCapacity]; 1643 | private int _LocalBufferSize; 1644 | private RunningLengthWord _LocalRlw; 1645 | private int _Pos; 1646 | 1647 | #endregion 1648 | 1649 | #region C'tors 1650 | 1651 | public IntIteratorImpl(EwahEnumerator ewahEnumerator, int wordInBits) 1652 | { 1653 | _EwahEnumerator = ewahEnumerator; 1654 | _WordInBits = wordInBits; 1655 | } 1656 | 1657 | #endregion 1658 | 1659 | #region Instance Methods 1660 | 1661 | private void Add(int val) 1662 | { 1663 | ++_LocalBufferSize; 1664 | while (_LocalBufferSize > _LocalBuffer.Length) 1665 | { 1666 | Array.Resize(ref _LocalBuffer, _LocalBuffer.Length * 2); 1667 | } 1668 | _LocalBuffer[_LocalBufferSize - 1] = val; 1669 | } 1670 | 1671 | private bool HasNext() 1672 | { 1673 | while (_LocalBufferSize == 0) 1674 | { 1675 | if (!LoadNextRle()) 1676 | { 1677 | return false; 1678 | } 1679 | LoadBuffer(); 1680 | } 1681 | return true; 1682 | } 1683 | 1684 | private void LoadBuffer() 1685 | { 1686 | _BufferPos = 0; 1687 | _LocalBufferSize = 0; 1688 | if (_LocalRlw.RunningBit) 1689 | { 1690 | for (int j = 0; j < _LocalRlw.RunningLength; ++j) 1691 | { 1692 | for (int c = 0; c < _WordInBits; ++c) 1693 | { 1694 | Add(_Pos++); 1695 | } 1696 | } 1697 | } 1698 | else 1699 | { 1700 | _Pos += (int)(_WordInBits * _LocalRlw.RunningLength); 1701 | } 1702 | for (int j = 0; j < _LocalRlw.NumberOfLiteralWords; ++j) 1703 | { 1704 | long data = _EwahEnumerator.Buffer[_EwahEnumerator.DirtyWords + j]; 1705 | for (int c = 0; c < _WordInBits; ++c) 1706 | { 1707 | if (((1L << c) & data) != 0) 1708 | { 1709 | Add(_Pos); 1710 | } 1711 | ++_Pos; 1712 | } 1713 | } 1714 | } 1715 | 1716 | private bool LoadNextRle() 1717 | { 1718 | while (_EwahEnumerator.HasNext()) 1719 | { 1720 | _LocalRlw = _EwahEnumerator.Next(); 1721 | return true; 1722 | } 1723 | return false; 1724 | } 1725 | 1726 | private int Next() 1727 | { 1728 | int answer = _LocalBuffer[_BufferPos++]; 1729 | if (_LocalBufferSize == _BufferPos) 1730 | { 1731 | _LocalBufferSize = 0; 1732 | } 1733 | return answer; 1734 | } 1735 | 1736 | #endregion 1737 | 1738 | #region IEnumerator Members 1739 | 1740 | public int Current 1741 | { 1742 | get { return _Current; } 1743 | } 1744 | 1745 | public void Dispose() 1746 | { 1747 | //noop; 1748 | } 1749 | 1750 | object IEnumerator.Current 1751 | { 1752 | get { return _Current; } 1753 | } 1754 | 1755 | public bool MoveNext() 1756 | { 1757 | bool res = HasNext(); 1758 | if (!res) 1759 | { 1760 | _Current = -1; 1761 | return false; 1762 | } 1763 | _Current = Next(); 1764 | return true; 1765 | } 1766 | 1767 | public void Reset() 1768 | { 1769 | _EwahEnumerator.Reset(); 1770 | _Current = -1; 1771 | _Pos = 0; 1772 | _LocalRlw = null; 1773 | _LocalBuffer = new int[InitCapacity]; 1774 | _LocalBufferSize = 0; 1775 | _BufferPos = 0; 1776 | } 1777 | 1778 | #endregion 1779 | } 1780 | 1781 | #endregion 1782 | } 1783 | } -------------------------------------------------------------------------------- /EWAH/EwahCompressedBitArraySerializer.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Text; 5 | using System.IO; 6 | 7 | namespace Ewah 8 | { 9 | /* 10 | * Copyright 2012, Kemal Erdogan, Daniel Lemire and Ciaran Jessup 11 | * Licensed under APL 2.0. 12 | */ 13 | 14 | /// 15 | /// A very simple Serialization schema for serialising and de-serialising instance of . 16 | /// 17 | /// The current implementation lacks serialization version information, and type information, it is rigid, brittle, simplistic but 18 | /// results in less byte bloat than a traditional BinaryFormatter. 19 | /// 20 | /// Consists of a very simple, fixed width header, and abritrary length buffer. 21 | /// 22 | /// Bytes 1-4 : 'SizeInBits' 23 | /// Bytes 5-8 : 'ActualSizeInWords' 24 | /// Bytes 9-12 : 'RunningLengthWordPosition' 25 | /// Bytes 13-End : Contents of the internal long[] buffer 26 | /// 27 | /// The encoding scheme is that of Microsoft's Built in System.BitConverter methods. Unfortunately the endian-ness of these calls 28 | /// is architecture specific, so beware that to be certain of the endian order on the machine serializing this class one must use the 29 | /// BitConverter.IsLittleEndian property. 30 | /// 31 | public class EwahCompressedBitArraySerializer 32 | { 33 | /// 34 | /// Deserializes the specified stream into an instance of 35 | /// 36 | /// The stream containing the data that constructs a valid instance of EwahCompressedBitArray. 37 | /// 38 | public EwahCompressedBitArray Deserialize(Stream serializationStream) { 39 | byte[] buff= new byte[8]; 40 | serializationStream.Read(buff, 0, 4); 41 | int sizeInBits = BitConverter.ToInt32(buff, 0); 42 | serializationStream.Read(buff, 0, 4); 43 | int actualSizeInWords = BitConverter.ToInt32(buff, 0); 44 | serializationStream.Read(buff, 0, 4); 45 | int runningLengthWordPosition = BitConverter.ToInt32(buff, 0); 46 | long[] buffer = new long[actualSizeInWords]; 47 | for (int i = 0; i < actualSizeInWords; i++) { 48 | serializationStream.Read(buff, 0, 8); 49 | buffer[i] = BitConverter.ToInt64(buff, 0); 50 | } 51 | return new EwahCompressedBitArray(sizeInBits, actualSizeInWords, buffer, runningLengthWordPosition); 52 | } 53 | 54 | /// 55 | /// Serializes an instance of into the given stream 56 | /// 57 | /// The serialization stream. 58 | /// The bit array. 59 | public void Serialize(Stream serializationStream, EwahCompressedBitArray bitArray) { 60 | // No actual need to call Shrink with this serialisation strategy, so we can avoid 61 | // mutating the source type (side-effects are bad ;) ) 62 | serializationStream.Write( BitConverter.GetBytes(bitArray.SizeInBits), 0, 4 ); 63 | serializationStream.Write( BitConverter.GetBytes(bitArray._ActualSizeInWords),0, 4 ); 64 | serializationStream.Write(BitConverter.GetBytes(bitArray._Rlw.Position), 0, 4); 65 | for(int i=0; i< bitArray._ActualSizeInWords;i++) { 66 | serializationStream.Write(BitConverter.GetBytes(bitArray._Buffer[i]), 0, 8); 67 | } 68 | return; 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /EWAH/EwahEnumerator.cs: -------------------------------------------------------------------------------- 1 | namespace Ewah 2 | { 3 | /* 4 | * Copyright 2012, Kemal Erdogan, Daniel Lemire and Ciaran Jessup 5 | * Licensed under APL 2.0. 6 | */ 7 | 8 | 9 | /// 10 | /// The class EwahEnumerator represents a special type of 11 | /// efficient enumerator iterating over (uncompressed) words of bits. 12 | /// 13 | public sealed class EwahEnumerator 14 | { 15 | #region Readonly & Static Fields 16 | 17 | /// 18 | /// current running length word 19 | /// 20 | public readonly RunningLengthWord _Rlw; 21 | 22 | /// 23 | /// The size in words 24 | /// 25 | private readonly int _SizeInWords; 26 | 27 | #endregion 28 | 29 | #region Fields 30 | 31 | /// 32 | /// The pointer represent the location of the current running length 33 | /// word in the array of words (embedded in the rlw attribute). 34 | /// 35 | private int _Pointer; 36 | 37 | #endregion 38 | 39 | #region C'tors 40 | 41 | /// 42 | /// Instantiates a new eWAH enumerator 43 | /// 44 | /// the array of words 45 | /// the number of words that are significant in the array of words 46 | public EwahEnumerator(long[] a, int sizeinwords) 47 | { 48 | _Rlw = new RunningLengthWord(a, 0); 49 | _SizeInWords = sizeinwords; 50 | _Pointer = 0; 51 | } 52 | 53 | #endregion 54 | 55 | #region Instance Properties 56 | 57 | /// 58 | /// Access to the array of words 59 | /// 60 | public long[] Buffer 61 | { 62 | get { return _Rlw.ArrayOfWords; } 63 | } 64 | 65 | /// 66 | /// Position of the dirty words represented by this running length word 67 | /// 68 | public int DirtyWords 69 | { 70 | get { return _Pointer - (int) _Rlw.NumberOfLiteralWords; } 71 | } 72 | 73 | #endregion 74 | 75 | #region Instance Methods 76 | 77 | /// 78 | /// Checks for next 79 | /// 80 | /// true, if successful 81 | public bool HasNext() 82 | { 83 | return _Pointer < _SizeInWords; 84 | } 85 | 86 | /// 87 | /// Next running length word 88 | /// 89 | /// 90 | public RunningLengthWord Next() 91 | { 92 | _Rlw.Position = _Pointer; 93 | _Pointer += (int) _Rlw.NumberOfLiteralWords + 1; 94 | return _Rlw; 95 | } 96 | 97 | /// 98 | /// Reset the enumerator to the beginning 99 | /// 100 | internal void Reset() 101 | { 102 | _Rlw.Position = 0; 103 | _Pointer = 0; 104 | } 105 | 106 | #endregion 107 | } 108 | } -------------------------------------------------------------------------------- /EWAH/PlaceHolders.cs: -------------------------------------------------------------------------------- 1 | namespace Ewah 2 | { 3 | } -------------------------------------------------------------------------------- /EWAH/Properties/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Reflection; 2 | using System.Runtime.InteropServices; 3 | 4 | // General Information about an assembly is controlled through the following 5 | // set of attributes. Change these attribute values to modify the information 6 | // associated with an assembly. 7 | 8 | [assembly: AssemblyTitle("EWAH")] 9 | [assembly: AssemblyDescription("")] 10 | [assembly: AssemblyConfiguration("")] 11 | [assembly: AssemblyCompany("OECD")] 12 | [assembly: AssemblyProduct("EWAH")] 13 | [assembly: AssemblyCopyright("Copyright © OECD 2012")] 14 | [assembly: AssemblyTrademark("")] 15 | [assembly: AssemblyCulture("")] 16 | 17 | // Setting ComVisible to false makes the types in this assembly not visible 18 | // to COM components. If you need to access a type in this assembly from 19 | // COM, set the ComVisible attribute to true on that type. 20 | 21 | [assembly: ComVisible(false)] 22 | 23 | // The following GUID is for the ID of the typelib if this project is exposed to COM 24 | 25 | [assembly: Guid("0666e9e3-73bd-4a3f-9633-9f7299d6b509")] 26 | 27 | // Version information for an assembly consists of the following four values: 28 | // 29 | // Major Version 30 | // Minor Version 31 | // Build Number 32 | // Revision 33 | // 34 | // You can specify all the values or you can default the Build and Revision Numbers 35 | // by using the '*' as shown below: 36 | // [assembly: AssemblyVersion("1.0.*")] 37 | 38 | [assembly: AssemblyVersion("1.0.0.0")] 39 | [assembly: AssemblyFileVersion("1.0.0.0")] -------------------------------------------------------------------------------- /EWAH/RunningLengthWord.cs: -------------------------------------------------------------------------------- 1 | [assembly: System.Runtime.CompilerServices.InternalsVisibleTo("EWAH.Tests")] 2 | 3 | namespace Ewah 4 | { 5 | 6 | /* 7 | * Copyright 2012, Kemal Erdogan, Daniel Lemire and Ciaran Jessup 8 | * Licensed under APL 2.0. 9 | */ 10 | 11 | /// 12 | /// Mostly for internal use. 13 | /// 14 | public sealed class RunningLengthWord 15 | { 16 | #region Constants 17 | 18 | /// 19 | /// largest number of dirty words in a run 20 | /// 21 | public const long LargestLiteralCount = (1L << LiteralBits) - 1; 22 | 23 | /// 24 | /// largest number of clean words in a run 25 | /// 26 | public const long LargestRunningLengthCount = (1L << RunningLengthBits) - 1; 27 | 28 | /// 29 | /// number of bits dedicated to marking of the running length of clean words 30 | /// 31 | public const int RunningLengthBits = 32; 32 | 33 | private const int LiteralBits = 64 - 1 - RunningLengthBits; 34 | private const long NotRunningLengthPlusRunningBit = ~RunningLengthPlusRunningBit; 35 | private const long NotShiftedLargestRunningLengthCount = ~ShiftedLargestRunningLengthCount; 36 | 37 | private const long RunningLengthPlusRunningBit = (1L << (RunningLengthBits + 1)) - 1; 38 | private const long ShiftedLargestRunningLengthCount = LargestRunningLengthCount << 1; 39 | 40 | #endregion 41 | 42 | #region Fields 43 | 44 | /// 45 | /// The array of words. 46 | /// 47 | public long[] ArrayOfWords; 48 | 49 | /// 50 | /// The Position in array. 51 | /// 52 | public int Position; 53 | 54 | #endregion 55 | 56 | #region C'tors 57 | 58 | /// 59 | /// Instantiates a new running length word 60 | /// 61 | /// an array of 64-bit words 62 | /// p Position in the array where the running length word is located 63 | internal RunningLengthWord(long[] a, int p) 64 | { 65 | ArrayOfWords = a; 66 | Position = p; 67 | } 68 | 69 | #endregion 70 | 71 | #region Instance Properties 72 | 73 | /// 74 | /// Return the size in uncompressed words represented by 75 | /// this running length word. 76 | /// 77 | /// 78 | public long Count 79 | { 80 | get { return RunningLength + NumberOfLiteralWords; } 81 | } 82 | 83 | /// 84 | /// the number of literal words 85 | /// 86 | public long NumberOfLiteralWords 87 | { 88 | get { return (long) (((ulong) ArrayOfWords[Position]) >> (1 + RunningLengthBits)); } 89 | set 90 | { 91 | ArrayOfWords[Position] |= NotRunningLengthPlusRunningBit; 92 | ArrayOfWords[Position] &= (value << (RunningLengthBits + 1)) 93 | | RunningLengthPlusRunningBit; 94 | } 95 | } 96 | 97 | /// 98 | /// the running bit 99 | /// 100 | public bool RunningBit 101 | { 102 | get { return (ArrayOfWords[Position] & 1) != 0; } 103 | set 104 | { 105 | if (value) 106 | { 107 | ArrayOfWords[Position] |= 1L; 108 | } 109 | else 110 | { 111 | ArrayOfWords[Position] &= ~1L; 112 | } 113 | } 114 | } 115 | 116 | /// 117 | /// the running length 118 | /// 119 | public long RunningLength 120 | { 121 | get { return (long) ((((ulong) ArrayOfWords[Position]) >> 1) & LargestRunningLengthCount); } 122 | set 123 | { 124 | ArrayOfWords[Position] |= ShiftedLargestRunningLengthCount; 125 | ArrayOfWords[Position] &= (value << 1) 126 | | NotShiftedLargestRunningLengthCount; 127 | } 128 | } 129 | 130 | #endregion 131 | 132 | #region ICloneable Members 133 | 134 | public object Clone() 135 | { 136 | var clone = new RunningLengthWord(ArrayOfWords,Position); 137 | return clone; 138 | } 139 | 140 | #endregion 141 | #region Instance Methods 142 | 143 | public override string ToString() 144 | { 145 | return "running bit = " + RunningBit + " running length = " 146 | + RunningLength + " number of lit. words " 147 | + NumberOfLiteralWords; 148 | } 149 | 150 | #endregion 151 | } 152 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, and 10 | distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by the copyright 13 | owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all other entities 16 | that control, are controlled by, or are under common control with that entity. 17 | For the purposes of this definition, "control" means (i) the power, direct or 18 | indirect, to cause the direction or management of such entity, whether by 19 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the 20 | outstanding shares, or (iii) beneficial ownership of such entity. 21 | 22 | "You" (or "Your") shall mean an individual or Legal Entity exercising 23 | permissions granted by this License. 24 | 25 | "Source" form shall mean the preferred form for making modifications, including 26 | but not limited to software source code, documentation source, and configuration 27 | files. 28 | 29 | "Object" form shall mean any form resulting from mechanical transformation or 30 | translation of a Source form, including but not limited to compiled object code, 31 | generated documentation, and conversions to other media types. 32 | 33 | "Work" shall mean the work of authorship, whether in Source or Object form, made 34 | available under the License, as indicated by a copyright notice that is included 35 | in or attached to the work (an example is provided in the Appendix below). 36 | 37 | "Derivative Works" shall mean any work, whether in Source or Object form, that 38 | is based on (or derived from) the Work and for which the editorial revisions, 39 | annotations, elaborations, or other modifications represent, as a whole, an 40 | original work of authorship. For the purposes of this License, Derivative Works 41 | shall not include works that remain separable from, or merely link (or bind by 42 | name) to the interfaces of, the Work and Derivative Works thereof. 43 | 44 | "Contribution" shall mean any work of authorship, including the original version 45 | of the Work and any modifications or additions to that Work or Derivative Works 46 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 47 | by the copyright owner or by an individual or Legal Entity authorized to submit 48 | on behalf of the copyright owner. For the purposes of this definition, 49 | "submitted" means any form of electronic, verbal, or written communication sent 50 | to the Licensor or its representatives, including but not limited to 51 | communication on electronic mailing lists, source code control systems, and 52 | issue tracking systems that are managed by, or on behalf of, the Licensor for 53 | the purpose of discussing and improving the Work, but excluding communication 54 | that is conspicuously marked or otherwise designated in writing by the copyright 55 | owner as "Not a Contribution." 56 | 57 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf 58 | of whom a Contribution has been received by Licensor and subsequently 59 | incorporated within the Work. 60 | 61 | 2. Grant of Copyright License. 62 | 63 | Subject to the terms and conditions of this License, each Contributor hereby 64 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 65 | irrevocable copyright license to reproduce, prepare Derivative Works of, 66 | publicly display, publicly perform, sublicense, and distribute the Work and such 67 | Derivative Works in Source or Object form. 68 | 69 | 3. Grant of Patent License. 70 | 71 | Subject to the terms and conditions of this License, each Contributor hereby 72 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 73 | irrevocable (except as stated in this section) patent license to make, have 74 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 75 | such license applies only to those patent claims licensable by such Contributor 76 | that are necessarily infringed by their Contribution(s) alone or by combination 77 | of their Contribution(s) with the Work to which such Contribution(s) was 78 | submitted. If You institute patent litigation against any entity (including a 79 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 80 | Contribution incorporated within the Work constitutes direct or contributory 81 | patent infringement, then any patent licenses granted to You under this License 82 | for that Work shall terminate as of the date such litigation is filed. 83 | 84 | 4. Redistribution. 85 | 86 | You may reproduce and distribute copies of the Work or Derivative Works thereof 87 | in any medium, with or without modifications, and in Source or Object form, 88 | provided that You meet the following conditions: 89 | 90 | You must give any other recipients of the Work or Derivative Works a copy of 91 | this License; and 92 | You must cause any modified files to carry prominent notices stating that You 93 | changed the files; and 94 | You must retain, in the Source form of any Derivative Works that You distribute, 95 | all copyright, patent, trademark, and attribution notices from the Source form 96 | of the Work, excluding those notices that do not pertain to any part of the 97 | Derivative Works; and 98 | If the Work includes a "NOTICE" text file as part of its distribution, then any 99 | Derivative Works that You distribute must include a readable copy of the 100 | attribution notices contained within such NOTICE file, excluding those notices 101 | that do not pertain to any part of the Derivative Works, in at least one of the 102 | following places: within a NOTICE text file distributed as part of the 103 | Derivative Works; within the Source form or documentation, if provided along 104 | with the Derivative Works; or, within a display generated by the Derivative 105 | Works, if and wherever such third-party notices normally appear. The contents of 106 | the NOTICE file are for informational purposes only and do not modify the 107 | License. You may add Your own attribution notices within Derivative Works that 108 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 109 | provided that such additional attribution notices cannot be construed as 110 | modifying the License. 111 | You may add Your own copyright statement to Your modifications and may provide 112 | additional or different license terms and conditions for use, reproduction, or 113 | distribution of Your modifications, or for any such Derivative Works as a whole, 114 | provided Your use, reproduction, and distribution of the Work otherwise complies 115 | with the conditions stated in this License. 116 | 117 | 5. Submission of Contributions. 118 | 119 | Unless You explicitly state otherwise, any Contribution intentionally submitted 120 | for inclusion in the Work by You to the Licensor shall be under the terms and 121 | conditions of this License, without any additional terms or conditions. 122 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 123 | any separate license agreement you may have executed with Licensor regarding 124 | such Contributions. 125 | 126 | 6. Trademarks. 127 | 128 | This License does not grant permission to use the trade names, trademarks, 129 | service marks, or product names of the Licensor, except as required for 130 | reasonable and customary use in describing the origin of the Work and 131 | reproducing the content of the NOTICE file. 132 | 133 | 7. Disclaimer of Warranty. 134 | 135 | Unless required by applicable law or agreed to in writing, Licensor provides the 136 | Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, 137 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 138 | including, without limitation, any warranties or conditions of TITLE, 139 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 140 | solely responsible for determining the appropriateness of using or 141 | redistributing the Work and assume any risks associated with Your exercise of 142 | permissions under this License. 143 | 144 | 8. Limitation of Liability. 145 | 146 | In no event and under no legal theory, whether in tort (including negligence), 147 | contract, or otherwise, unless required by applicable law (such as deliberate 148 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 149 | liable to You for damages, including any direct, indirect, special, incidental, 150 | or consequential damages of any character arising as a result of this License or 151 | out of the use or inability to use the Work (including but not limited to 152 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 153 | any and all other commercial damages or losses), even if such Contributor has 154 | been advised of the possibility of such damages. 155 | 156 | 9. Accepting Warranty or Additional Liability. 157 | 158 | While redistributing the Work or Derivative Works thereof, You may choose to 159 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 160 | other liability obligations and/or rights consistent with this License. However, 161 | in accepting such obligations, You may act only on Your own behalf and on Your 162 | sole responsibility, not on behalf of any other Contributor, and only if You 163 | agree to indemnify, defend, and hold each Contributor harmless for any liability 164 | incurred by, or claims asserted against, such Contributor by reason of your 165 | accepting any such warranty or additional liability. 166 | 167 | END OF TERMS AND CONDITIONS 168 | 169 | APPENDIX: How to apply the Apache License to your work 170 | 171 | To apply the Apache License to your work, attach the following boilerplate 172 | notice, with the fields enclosed by brackets "[]" replaced with your own 173 | identifying information. (Don't include the brackets!) The text should be 174 | enclosed in the appropriate comment syntax for the file format. We also 175 | recommend that a file or class name and description of purpose be included on 176 | the same "printed page" as the copyright notice for easier identification within 177 | third-party archives. 178 | 179 | Copyright [yyyy] [name of copyright owner] 180 | 181 | Licensed under the Apache License, Version 2.0 (the "License"); 182 | you may not use this file except in compliance with the License. 183 | You may obtain a copy of the License at 184 | 185 | http://www.apache.org/licenses/LICENSE-2.0 186 | 187 | Unless required by applicable law or agreed to in writing, software 188 | distributed under the License is distributed on an "AS IS" BASIS, 189 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 190 | See the License for the specific language governing permissions and 191 | limitations under the License. 192 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CSharpEWAH 2 | == 3 | 4 | 5 | This is a compressed variant of 6 | the standard bitarray class. It uses a 64-bit RLE-like 7 | compression scheme. It can be used to implement 8 | bitmap indexes. 9 | 10 | The goal of word-aligned compression is not to 11 | achieve the best compression, but rather to 12 | improve query processing time. Hence, we try 13 | to save CPU cycles, maybe at the expense of 14 | storage. However, the EWAH scheme we implemented 15 | is always more efficient storage-wise than an 16 | uncompressed bitarray. 17 | 18 | 19 | Real-world usage 20 | ---------------- 21 | 22 | CSharpEWAH has been reviewed by Matt Warren as part of his work on the Stack Overflow tag system: 23 | 24 | http://mattwarren.org/2015/10/29/the-stack-overflow-tag-engine-part-3/ 25 | 26 | The Java counterpart of this library (JavaEWAH) is part of Apache Hive and its derivatives (e.g., Apache Spark) and Eclipse JGit. It has been used in production systems for many years. It is part of major Linux distributions. 27 | 28 | EWAH is used to accelerate the distributed version control system Git (http://githubengineering.com/counting-objects/). You can find the C port of EWAH written by the Git team at https://github.com/git/git/tree/master/ewah 29 | 30 | When should you use a bitmap? 31 | ---------------------------------------- 32 | 33 | Sets are a fundamental abstraction in 34 | software. They can be implemented in various 35 | ways, as hash sets, as trees, and so forth. 36 | In databases and search engines, sets are often an integral 37 | part of indexes. For example, we may need to maintain a set 38 | of all documents or rows (represented by numerical identifier) 39 | that satisfy some property. Besides adding or removing 40 | elements from the set, we need fast functions 41 | to compute the intersection, the union, the difference between sets, and so on. 42 | 43 | 44 | To implement a set 45 | of integers, a particularly appealing strategy is the 46 | bitmap (also called bitset or bit vector). Using n bits, 47 | we can represent any set made of the integers from the range 48 | [0,n): it suffices to set the ith bit is set to one if integer i is present in the set. 49 | Commodity processors use words of W=32 or W=64 bits. By combining many such words, we can 50 | support large values of n. Intersections, unions and differences can then be implemented 51 | as bitwise AND, OR and ANDNOT operations. 52 | More complicated set functions can also be implemented as bitwise operations. 53 | 54 | When the bitset approach is applicable, it can be orders of 55 | magnitude faster than other possible implementation of a set (e.g., as a hash set) 56 | while using several times less memory. 57 | 58 | 59 | When should you use compressed bitmaps? 60 | ---------------------------------------- 61 | 62 | An uncompress BitSet can use a lot of memory. For example, if you take a BitSet 63 | and set the bit at position 1,000,000 to true and you have just over 100kB. That's over 100kB 64 | to store the position of one bit. This is wasteful even if you do not care about memory: 65 | suppose that you need to compute the intersection between this BitSet and another one 66 | that has a bit at position 1,000,001 to true, then you need to go through all these zeroes, 67 | whether you like it or not. That can become very wasteful. 68 | 69 | This being said, there are definitively cases where attempting to use compressed bitmaps is wasteful. 70 | For example, if you have a small universe size. E.g., your bitmaps represent sets of integers 71 | from [0,n) where n is small (e.g., n=64 or n=128). If you can use an uncompressed BitSet and 72 | it does not blow up your memory usage, then compressed bitmaps are probably not useful 73 | to you. In fact, if you do not need compression, then a BitSet offers remarkable speed. 74 | One of the downsides of a compressed bitmap like those provided by JavaEWAH is slower random access: 75 | checking whether a bit is set to true in a compressed bitmap takes longer. 76 | 77 | 78 | How does EWAH compares with the alternatives? 79 | ------------------------------------------- 80 | 81 | EWAH is part of a larger family of compressed bitmaps that are run-length-encoded 82 | bitmaps. They identify long runs of 1s or 0s and they represent them with a marker word. 83 | If you have a local mix of 1s and 0, you use an uncompressed word. 84 | 85 | There are many formats in this family beside EWAH: 86 | 87 | * Oracle's BBC is an obsolete format at this point: though it may provide good compression, 88 | it is likely much slower than more recent alternatives due to excessive branching. 89 | * WAH is a patented variation on BBC that provides better performance. 90 | * Concise is a variation on the patented WAH. It some specific instances, it can compress 91 | much better than WAH (up to 2x better), but it is generally slower. 92 | * EWAH is both free of patent, and it is faster than all the above. On the downside, it 93 | does not compress quite as well. It is faster because it allows some form of "skipping" 94 | over uncompressed words. So though none of these formats are great at random access, EWAH 95 | is better than the alternatives. 96 | 97 | There are other alternatives however. For example, the Roaring 98 | format (https://github.com/lemire/RoaringBitmap) is not a run-length-encoded hybrid. It provides faster random access 99 | than even EWAH. 100 | 101 | 102 | Data format 103 | ------------ 104 | 105 | For more details regarding the compression format, please 106 | see Section 3 of the following paper: 107 | 108 | Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned bitmap indexes. Data & Knowledge Engineering 69 (1), pages 3-28, 2010. 109 | http://arxiv.org/abs/0901.3751 110 | 111 | (The PDF file is freely available on the arXiv site.) 112 | 113 | Unit testing 114 | == 115 | 116 | Building using Mono 117 | 118 | 119 | You can build CSharpEWAH using the open source 120 | Mono toolchain using the msbuild command. 121 | Then you can run the executable using 122 | the mono command: 123 | ``` 124 | $ nuget restore EWAH.sln 125 | $ msbuild 126 | $ mono ./EWAH.RunTests/bin/Debug/EWAH.RunTests.exe 127 | ``` 128 | 129 | This will run unit tests. 130 | 131 | 132 | Usage 133 | == 134 | 135 | See example.cs. 136 | 137 | Credit 138 | == 139 | 140 | 141 | 142 | (c) Kemal Erdogan, Daniel Lemire, Ciaran Jessup, Michael Rice, Matt Warren 143 | This code is licensed under the Apache 144 | License, Version 2.0 (ASL2.0) 145 | -------------------------------------------------------------------------------- /package.sh: -------------------------------------------------------------------------------- 1 | name=CSharpEWAH_`date +%Y%m%d`.zip 2 | cd ..;zip -9 -r -v ./csharpewah/CSharpEWAH.0.2.5-src.zip ./csharpewah/EWAH/EWAH.csproj ./csharpewah/EWAH.sln ./csharpewah/README ./csharpewah/CHANGELOG ./csharpewah/EWAH.RunTests/EWAH.RunTests.csproj ./csharpewah/EWAH.RunTests/Properties/AssemblyInfo.cs ./csharpewah/EWAH.RunTests/app.config ./csharpewah/EWAH.RunTests/example.cs ./csharpewah/EWAH.Tests/EWAH.Tests.csproj ./csharpewah/EWAH.Tests/EWAHCompressedBitmapTest.cs ./csharpewah/EWAH.Tests/Properties/AssemblyInfo.cs ./csharpewah/EWAH.Tests/app.config ./csharpewah/EWAH/BufferedRunningLengthWord.cs ./csharpewah/EWAH/EWAH.csproj ./csharpewah/EWAH/EwahCompressedBitArray.cs ./csharpewah/EWAH/EwahEnumerator.cs ./csharpewah/EWAH/PlaceHolders.cs ./csharpewah/EWAH/Properties/AssemblyInfo.cs ./csharpewah/EWAH/RunningLengthWord.cs ./csharpewah/EWAH.Tests/EWAHCompressedBitArraySerializerTest.cs ./csharpewah/EWAH/EwahCompressedBitArraySerializer.cs 3 | --------------------------------------------------------------------------------