├── .gitattributes ├── .gitignore ├── Directory.Build.targets ├── Global.ruleset ├── LICENSE.txt ├── README.md └── src ├── CSharpFastPFOR.Benchmarks ├── Benchmark.cs ├── BenchmarkBitPacking.cs ├── BenchmarkCSV.cs ├── BenchmarkOffsettedSeries.cs ├── BenchmarkSkippable.cs ├── CSharpFastPFOR.Benchmarks.csproj ├── PerformanceLogger.cs ├── Port │ └── System.cs └── Program.cs ├── CSharpFastPFOR.Tests ├── AdhocTest.cs ├── BasicTest.cs ├── BoundaryTest.cs ├── ByteBasicTest.cs ├── CSharpFastPFOR.Tests.csproj ├── DeltaZigzagEncodingTest.cs ├── ExampleTest.cs ├── IntCompressorTest.cs ├── Port │ └── Assert2.cs ├── SkippableBasicTest.cs ├── Utils │ └── TestUtils.cs ├── UtilsTest.cs └── XorBinaryPackingTest.cs ├── CSharpFastPFOR.sln └── CSharpFastPFOR ├── BinaryPacking.cs ├── BitPacking.cs ├── ByteIntegerCODEC.cs ├── CSharpFastPFOR.csproj ├── Composition.cs ├── DeltaZigzagBinaryPacking.cs ├── DeltaZigzagEncoding.cs ├── DeltaZigzagVariableByte.cs ├── Differential ├── Delta.cs ├── IntegratedBinaryPacking.cs ├── IntegratedBitPacking.cs ├── IntegratedByteIntegerCODEC.cs ├── IntegratedComposition.cs ├── IntegratedIntCompressor.cs ├── IntegratedIntegerCODEC.cs ├── IntegratedVariableByte.cs ├── SkippableIntegratedComposition.cs ├── SkippableIntegratedIntegerCODEC.cs └── XorBinaryPacking.cs ├── FastPFOR.cs ├── FastPFOR128.cs ├── IntCompressor.cs ├── IntWrapper.cs ├── IntegerCODEC.cs ├── JustCopy.cs ├── NewPFD.cs ├── NewPFDS16.cs ├── NewPFDS9.cs ├── OptPFD.cs ├── OptPFDS16.cs ├── OptPFDS9.cs ├── Port ├── Arrays.cs ├── BitSet.cs ├── ByteBuffer.cs ├── ByteOrder.cs ├── IntBuffer.cs └── Integer.cs ├── S16.cs ├── S9.cs ├── Simple16.cs ├── Simple9.cs ├── SkippableComposition.cs ├── SkippableIntegerCODEC.cs ├── Synth ├── ClusteredDataGenerator.cs └── UniformDataGenerator.cs ├── Util.cs └── VariableByte.cs /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ################# 2 | ## Visual Studio 3 | ################# 4 | 5 | ## Ignore Visual Studio temporary files, build results, and 6 | ## files generated by popular Visual Studio add-ons. 7 | 8 | # User-specific files 9 | *.suo 10 | *.user 11 | *.sln.docstates 12 | 13 | # Build results 14 | 15 | [Dd]ebug/ 16 | [Rr]elease/ 17 | x64/ 18 | build/ 19 | [Bb]in/ 20 | [Oo]bj/ 21 | 22 | # MSTest test Results 23 | [Tt]est[Rr]esult*/ 24 | [Bb]uild[Ll]og.* 25 | 26 | *_i.c 27 | *_p.c 28 | *.ilk 29 | *.meta 30 | *.obj 31 | *.pch 32 | *.pdb 33 | *.pgc 34 | *.pgd 35 | *.rsp 36 | *.sbr 37 | *.tlb 38 | *.tli 39 | *.tlh 40 | *.tmp 41 | *.tmp_proj 42 | *.log 43 | *.vspscc 44 | *.vssscc 45 | .builds 46 | *.pidb 47 | *.log 48 | *.scc 49 | 50 | # Visual C++ cache files 51 | ipch/ 52 | *.aps 53 | *.ncb 54 | *.opensdf 55 | *.sdf 56 | *.cachefile 57 | 58 | # Visual Studio profiler 59 | *.psess 60 | *.vsp 61 | *.vspx 62 | 63 | # Guidance Automation Toolkit 64 | *.gpState 65 | 66 | # ReSharper is a .NET coding add-in 67 | _ReSharper*/ 68 | *.[Rr]e[Ss]harper 69 | 70 | # TeamCity is a build add-in 71 | _TeamCity* 72 | 73 | # DotCover is a Code Coverage Tool 74 | *.dotCover 75 | 76 | # NCrunch 77 | *.ncrunch* 78 | .*crunch*.local.xml 79 | 80 | # Installshield output folder 81 | [Ee]xpress/ 82 | 83 | # DocProject is a documentation generator add-in 84 | DocProject/buildhelp/ 85 | DocProject/Help/*.HxT 86 | DocProject/Help/*.HxC 87 | DocProject/Help/*.hhc 88 | DocProject/Help/*.hhk 89 | DocProject/Help/*.hhp 90 | DocProject/Help/Html2 91 | DocProject/Help/html 92 | 93 | # Click-Once directory 94 | publish/ 95 | 96 | # Publish Web Output 97 | *.Publish.xml 98 | *.pubxml 99 | *.publishproj 100 | 101 | # NuGet Packages Directory 102 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line 103 | #packages/ 104 | 105 | # Windows Azure Build Output 106 | csx 107 | *.build.csdef 108 | 109 | # Windows Store app package directory 110 | AppPackages/ 111 | 112 | # Others 113 | sql/ 114 | *.Cache 115 | ClientBin/ 116 | [Ss]tyle[Cc]op.* 117 | ~$* 118 | *~ 119 | *.dbmdl 120 | *.[Pp]ublish.xml 121 | *.pfx 122 | *.publishsettings 123 | 124 | # RIA/Silverlight projects 125 | Generated_Code/ 126 | 127 | # Backup & report files from converting an old project file to a newer 128 | # Visual Studio version. Backup files are not needed, because we have git ;-) 129 | _UpgradeReport_Files/ 130 | Backup*/ 131 | UpgradeLog*.XML 132 | UpgradeLog*.htm 133 | 134 | # SQL Server files 135 | App_Data/*.mdf 136 | App_Data/*.ldf 137 | 138 | ############# 139 | ## Windows detritus 140 | ############# 141 | 142 | # Windows image file caches 143 | Thumbs.db 144 | ehthumbs.db 145 | 146 | # Folder config file 147 | Desktop.ini 148 | 149 | # Recycle Bin used on file shares 150 | $RECYCLE.BIN/ 151 | 152 | # Mac crap 153 | .DS_Store 154 | 155 | BenchmarkDotNet.Artifacts/ 156 | .vs/ -------------------------------------------------------------------------------- /Directory.Build.targets: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Ian Qvist 5 | Copyright 2020, by Ian Qvist. All rights reserved. 6 | A C# port of the simple integer compression library JavaFastPFOR by Daniel Lemire 7 | 1.0.0 8 | 9 | 10 | 2 11 | 500 12 | 13 | 8.0 14 | false 15 | portable 16 | CS0108,CS0109,CS0114,CS0162,CS0251,CS0659,CS0660,CS1717,CS1718 17 | $(MSBuildThisFileDirectory)\Global.ruleset 18 | Git 19 | Genbox.$(MSBuildProjectName) 20 | 21 | https://github.com/Genbox/$(MSBuildProjectName) 22 | Apache-2.0 23 | 24 | 25 | true 26 | true 27 | true 28 | snupkg 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | false 42 | 43 | 44 | 45 | true 46 | 47 | 48 | -------------------------------------------------------------------------------- /Global.ruleset: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CSharpFastPFOR: A C# port of the simple integer compression library JavaFastPFOR 2 | ========================================================== 3 | 4 | Why? 5 | ---- 6 | While building a in-memory compressed datastructure, I needed a library for efficient integer compression. JavaFastPFOR was what I needed, so I ported it to C#. 7 | See the original project by Daniel Lemire here: https://github.com/lemire/JavaFastPFOR 8 | 9 | Requirements 10 | ------------ 11 | * .NET Core 2.0 SDK 12 | * Visual Studio 2019 13 | 14 | API 15 | ----------------- 16 | The API is excatly the same as JavaFastPFOR. See the documentation here: http://www.javadoc.io/doc/me.lemire.integercompression/JavaFastPFOR/ 17 | 18 | References 19 | ----------------- 20 | 21 | * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second through vectorization, Software Practice & Experience 45 (1), 2015. http://arxiv.org/abs/1209.2137 http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract 22 | * Daniel Lemire, Leonid Boytsov, Nathan Kurz, SIMD Compression and the Intersection of Sorted Integers, Software Practice & Experience (to appear) http://arxiv.org/abs/1401.6399 23 | * Matteo Catena, Craig Macdonald, Iadh Ounis, On Inverted Index Compression for Search Engine Efficiency, Lecture Notes in Computer Science 8416 (ECIR 2014), 2014. 24 | http://dx.doi.org/10.1007/978-3-319-06028-6_30 25 | -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Benchmarks/BenchmarkBitPacking.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Class used to benchmark the speed of bit packing. (For expert use.) 10 | * 11 | * @author Daniel Lemire 12 | * 13 | */ 14 | 15 | using System; 16 | using Genbox.CSharpFastPFOR.Differential; 17 | using Genbox.CSharpFastPFOR.Port; 18 | 19 | namespace Genbox.CSharpFastPFOR.Benchmarks 20 | { 21 | public static class BenchmarkBitPacking 22 | { 23 | public static void test(bool verbose) 24 | { 25 | const int N = 32; 26 | const int times = 100000; 27 | Random r = new Random(0); 28 | int[] data = new int[N]; 29 | int[] compressed = new int[N]; 30 | int[] uncompressed = new int[N]; 31 | 32 | for (int bit = 0; bit < 31; ++bit) 33 | { 34 | long comp = 0; 35 | long compwm = 0; 36 | long decomp = 0; 37 | for (int t = 0; t < times; ++t) 38 | { 39 | for (int k = 0; k < N; ++k) 40 | { 41 | data[k] = r.Next(1 << bit); 42 | } 43 | 44 | long time1 = Port.System.nanoTime(); 45 | BitPacking 46 | .fastpack(data, 0, compressed, 0, bit); 47 | long time2 = Port.System.nanoTime(); 48 | BitPacking.fastpackwithoutmask(data, 0, 49 | compressed, 0, bit); 50 | long time3 = Port.System.nanoTime(); 51 | BitPacking.fastunpack(compressed, 0, 52 | uncompressed, 0, bit); 53 | long time4 = Port.System.nanoTime(); 54 | comp += time2 - time1; 55 | compwm += time3 - time2; 56 | decomp += time4 - time3; 57 | } 58 | if (verbose) 59 | Console.WriteLine("bit = " 60 | + bit 61 | + " comp. speed = " 62 | + (N * times * 1000.0 / (comp)).ToString("0") 63 | + " comp. speed wm = " 64 | + (N * times * 1000.0 / (compwm)).ToString("0") 65 | + " decomp. speed = " 66 | + (N * times * 1000.0 / (decomp)).ToString("0")); 67 | } 68 | } 69 | 70 | public static void testWithDeltas(bool verbose) 71 | { 72 | const int N = 32; 73 | const int times = 100000; 74 | 75 | Random r = new Random(0); 76 | int[] data = new int[N]; 77 | int[] compressed = new int[N]; 78 | int[] icompressed = new int[N]; 79 | int[] uncompressed = new int[N]; 80 | 81 | for (int bit = 1; bit < 31; ++bit) 82 | { 83 | long comp = 0; 84 | long decomp = 0; 85 | long icomp = 0; 86 | long idecomp = 0; 87 | 88 | for (int t = 0; t < times; ++t) 89 | { 90 | data[0] = r.Next(1 << bit); 91 | for (int k = 1; k < N; ++k) 92 | { 93 | data[k] = r.Next(1 << bit) 94 | + data[k - 1]; 95 | } 96 | 97 | int[] tmpdata = Arrays.copyOf(data, data.Length); 98 | 99 | long time1 = Port.System.nanoTime(); 100 | Delta.delta(tmpdata); 101 | BitPacking.fastpackwithoutmask(tmpdata, 0, 102 | compressed, 0, bit); 103 | long time2 = Port.System.nanoTime(); 104 | BitPacking.fastunpack(compressed, 0, 105 | uncompressed, 0, bit); 106 | Delta.fastinverseDelta(uncompressed); 107 | long time3 = Port.System.nanoTime(); 108 | if (!Arrays.equals(data, uncompressed)) 109 | throw new Exception("bug"); 110 | comp += time2 - time1; 111 | decomp += time3 - time2; 112 | tmpdata = Arrays.copyOf(data, data.Length); 113 | time1 = Port.System.nanoTime(); 114 | IntegratedBitPacking.integratedpack(0, tmpdata, 115 | 0, icompressed, 0, bit); 116 | time2 = Port.System.nanoTime(); 117 | IntegratedBitPacking.integratedunpack(0, 118 | icompressed, 0, uncompressed, 0, bit); 119 | time3 = Port.System.nanoTime(); 120 | if (!Arrays.equals(icompressed, compressed)) 121 | throw new Exception("ibug " + bit); 122 | 123 | if (!Arrays.equals(data, uncompressed)) 124 | throw new Exception("bug " + bit); 125 | icomp += time2 - time1; 126 | idecomp += time3 - time2; 127 | } 128 | 129 | if (verbose) 130 | Console.WriteLine("bit = " 131 | + bit 132 | + " comp. speed = " 133 | + (N * times * 1000.0 / (comp)).ToString("0") 134 | + " decomp. speed = " 135 | + (N * times * 1000.0 / (decomp)).ToString("0") 136 | + " icomp. speed = " 137 | + (N * times * 1000.0 / (icomp)).ToString("0") 138 | + " idecomp. speed = " 139 | + (N * times * 1000.0 / (idecomp)).ToString("0")); 140 | } 141 | } 142 | } 143 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Benchmarks/CSharpFastPFOR.Benchmarks.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | netcoreapp2.2 6 | false 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Benchmarks/PerformanceLogger.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | */ 5 | 6 | /** 7 | * PerformanceLogger for IntegerCODEC. 8 | * 9 | * @author MURAOKA Taro http://github.com/koron 10 | */ 11 | namespace Genbox.CSharpFastPFOR.Benchmarks 12 | { 13 | public class PerformanceLogger 14 | { 15 | public class Timer 16 | { 17 | private long startNano; 18 | private long duration = 0; 19 | 20 | public void start() 21 | { 22 | this.startNano = Port.System.nanoTime(); 23 | } 24 | 25 | public long end() 26 | { 27 | return this.duration += Port.System.nanoTime()- this.startNano; 28 | } 29 | 30 | public long getDuration() 31 | { 32 | return this.duration; 33 | } 34 | } 35 | 36 | public readonly Timer compressionTimer = new Timer(); 37 | public readonly Timer decompressionTimer = new Timer(); 38 | 39 | private long originalSize = 0; 40 | private long compressedSize = 0; 41 | 42 | public long addOriginalSize(long value) 43 | { 44 | return this.originalSize += value; 45 | } 46 | 47 | public long addCompressedSize(long value) 48 | { 49 | return this.compressedSize += value; 50 | } 51 | 52 | private long getOriginalSize() 53 | { 54 | return this.originalSize; 55 | } 56 | 57 | private long getCompressedSize() 58 | { 59 | return this.compressedSize; 60 | } 61 | 62 | public double getBitPerInt() 63 | { 64 | return this.compressedSize * 32.0 / this.originalSize; 65 | } 66 | 67 | private static double getMiS(long size, long nanoTime) 68 | { 69 | return (size * 1e-6) / (nanoTime * 1.0e-9); 70 | } 71 | 72 | public double getCompressSpeed() 73 | { 74 | return getMiS(this.originalSize, this.compressionTimer.getDuration()); 75 | } 76 | 77 | public double getDecompressSpeed() 78 | { 79 | return getMiS(this.originalSize, this.decompressionTimer.getDuration()); 80 | } 81 | } 82 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Benchmarks/Port/System.cs: -------------------------------------------------------------------------------- 1 | using System.Diagnostics; 2 | 3 | namespace Genbox.CSharpFastPFOR.Benchmarks.Port 4 | { 5 | public static class System 6 | { 7 | public static long nanoTime() 8 | { 9 | return (long)(Stopwatch.GetTimestamp() / (Stopwatch.Frequency / 1000000000.0)); 10 | } 11 | } 12 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Benchmarks/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using Genbox.CSharpFastPFOR.Port; 5 | 6 | namespace Genbox.CSharpFastPFOR.Benchmarks 7 | { 8 | internal class Program 9 | { 10 | private static void Main(string[] args) 11 | { 12 | BenchmarkMethod(); 13 | BenchmarkBitPackingMethod(); 14 | BenchmarkCSVMethod(args); 15 | BenchmarkOffsettedSeriesMethod(); 16 | BenchmarkSkippableMethod(); 17 | } 18 | 19 | private static void BenchmarkMethod() 20 | { 21 | Console.WriteLine("# benchmark based on the ClusterData model from:"); 22 | Console.WriteLine("# Vo Ngoc Anh and Alistair Moffat. "); 23 | Console.WriteLine("# Index compression using 64-bit words."); 24 | Console.WriteLine("# Softw. Pract. Exper.40, 2 (February 2010), 131-147. "); 25 | Console.WriteLine(); 26 | 27 | string path = Path.GetFullPath("benchmark-" + DateTime.Now.ToString("yyyyMMddTHHmmss") + ".csv"); 28 | 29 | using (FileStream csvFile = File.OpenWrite(path)) 30 | using (StreamWriter writer = new StreamWriter(csvFile)) 31 | { 32 | Console.WriteLine("# Results will be written into a CSV file: " + path); 33 | Console.WriteLine(); 34 | Benchmark.test(writer, 20, 18, 10); 35 | Console.WriteLine(); 36 | Console.WriteLine("Results were written into a CSV file: " + path); 37 | } 38 | } 39 | 40 | private static void BenchmarkBitPackingMethod() 41 | { 42 | Console.WriteLine("Testing packing and delta "); 43 | BenchmarkBitPacking.testWithDeltas(false); 44 | BenchmarkBitPacking.testWithDeltas(true); 45 | Console.WriteLine("Testing packing alone "); 46 | BenchmarkBitPacking.test(false); 47 | BenchmarkBitPacking.test(true); 48 | } 49 | 50 | private static void BenchmarkCSVMethod(string[] args) 51 | { 52 | Format myformat = Format.ONEARRAYPERLINE; 53 | CompressionMode cm = CompressionMode.DELTA; 54 | List files = new List(); 55 | foreach (string s in args) 56 | { 57 | if (s.StartsWith("-")) 58 | {// it is a flag 59 | if (s.Equals("--onearrayperfile")) 60 | myformat = Format.ONEARRAYPERFILE; 61 | else if (s.Equals("--nodelta")) 62 | cm = CompressionMode.AS_IS; 63 | else if (s.Equals("--oneintperline")) 64 | myformat = Format.ONEINTPERLINE; 65 | else 66 | throw new Exception("I don't understand: " + s); 67 | } 68 | else 69 | {// it is a filename 70 | files.Add(s); 71 | } 72 | } 73 | if (myformat == Format.ONEARRAYPERFILE) 74 | Console.WriteLine("Treating each file as one array."); 75 | else if (myformat == Format.ONEARRAYPERLINE) 76 | Console.WriteLine("Each line of each file is an array: use --onearrayperfile or --oneintperline to change."); 77 | else if (myformat == Format.ONEINTPERLINE) 78 | Console.WriteLine("Treating each file as one array, with one integer per line."); 79 | if (cm == CompressionMode.AS_IS) 80 | Console.WriteLine("Compressing the integers 'as is' (no differential coding)"); 81 | else 82 | Console.WriteLine("Using differential coding (arrays will be sorted): use --nodelta to prevent sorting"); 83 | 84 | List data = new List(); 85 | foreach (string fn in files) 86 | foreach (int[] x in BenchmarkCSV.loadIntegers(fn, myformat)) 87 | data.Add(x); 88 | Console.WriteLine("Loaded " + data.Count + " array(s)"); 89 | if (cm == CompressionMode.DELTA) 90 | { 91 | Console.WriteLine("Sorting the arrray(s) because you are using differential coding"); 92 | foreach (int[] x in data) 93 | Arrays.sort(x); 94 | } 95 | BenchmarkCSV.bench(data, cm, false); 96 | BenchmarkCSV.bench(data, cm, false); 97 | BenchmarkCSV.bench(data, cm, true); 98 | BenchmarkCSV.bytebench(data, cm, false); 99 | BenchmarkCSV.bytebench(data, cm, false); 100 | BenchmarkCSV.bytebench(data, cm, true); 101 | } 102 | 103 | private static void BenchmarkOffsettedSeriesMethod() 104 | { 105 | string path = Path.GetFullPath("benchmark-offsetted-" + DateTime.Now.ToString("yyyyMMddTHHmmss") + ".csv"); 106 | 107 | using (FileStream csvFile = File.OpenWrite(path)) 108 | using (StreamWriter writer = new StreamWriter(csvFile)) 109 | { 110 | Console.WriteLine("# Results will be written into a CSV file: " + path); 111 | Console.WriteLine(); 112 | BenchmarkOffsettedSeries.run(writer, 8 * 1024, 1280); 113 | Console.WriteLine(); 114 | Console.WriteLine("# Results were written into a CSV file: " + path); 115 | } 116 | } 117 | 118 | private static void BenchmarkSkippableMethod() 119 | { 120 | Console.WriteLine("# benchmark based on the ClusterData model from:"); 121 | Console.WriteLine("# Vo Ngoc Anh and Alistair Moffat. "); 122 | Console.WriteLine("# Index compression using 64-bit words."); 123 | Console.WriteLine("# Softw. Pract. Exper.40, 2 (February 2010), 131-147. "); 124 | Console.WriteLine(); 125 | 126 | string path = Path.GetFullPath("benchmark-skippable-" + DateTime.Now.ToString("yyyyMMddTHHmmss") + ".csv"); 127 | 128 | using (FileStream csvFile = File.OpenWrite(path)) 129 | using (StreamWriter writer = new StreamWriter(csvFile)) 130 | { 131 | Console.WriteLine("# Results will be written into a CSV file: " + path); 132 | Console.WriteLine(); 133 | BenchmarkSkippable.test(writer, 20, 18, 10); 134 | Console.WriteLine(); 135 | Console.WriteLine("Results were written into a CSV file: " + path); 136 | } 137 | } 138 | } 139 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Tests/AdhocTest.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * Collection of adhoc tests. 3 | */ 4 | 5 | using Genbox.CSharpFastPFOR.Tests.Utils; 6 | using Xunit; 7 | 8 | namespace Genbox.CSharpFastPFOR.Tests 9 | { 10 | public class AdhocTest 11 | { 12 | [Fact] 13 | public void biggerCompressedArray0() 14 | { 15 | // No problem: for comparison. 16 | IntegerCODEC c = new Composition(new FastPFOR128(), new VariableByte()); 17 | TestUtils.assertSymmetry(c, 0, 16384); 18 | c = new Composition(new FastPFOR(), new VariableByte()); 19 | TestUtils.assertSymmetry(c, 0, 16384); 20 | } 21 | 22 | [Fact] 23 | public void biggerCompressedArray1() 24 | { 25 | // Compressed array is bigger than original, because of VariableByte. 26 | IntegerCODEC c = new VariableByte(); 27 | TestUtils.assertSymmetry(c, -1); 28 | } 29 | 30 | [Fact] 31 | public void biggerCompressedArray2() 32 | { 33 | // Compressed array is bigger than original, because of Composition. 34 | IntegerCODEC c = new Composition(new FastPFOR128(), new VariableByte()); 35 | TestUtils.assertSymmetry(c, 65535, 65535); 36 | c = new Composition(new FastPFOR(), new VariableByte()); 37 | TestUtils.assertSymmetry(c, 65535, 65535); 38 | } 39 | } 40 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Tests/BoundaryTest.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * @author lemire 3 | * 4 | */ 5 | 6 | using Genbox.CSharpFastPFOR.Differential; 7 | using Genbox.CSharpFastPFOR.Port; 8 | using Genbox.CSharpFastPFOR.Tests.Port; 9 | using Xunit; 10 | using Xunit.Abstractions; 11 | 12 | namespace Genbox.CSharpFastPFOR.Tests 13 | { 14 | public class BoundaryTest 15 | { 16 | private readonly ITestOutputHelper _testOutputHelper; 17 | 18 | public BoundaryTest(ITestOutputHelper testOutputHelper) 19 | { 20 | _testOutputHelper = testOutputHelper; 21 | } 22 | 23 | private void compressAndUncompress(int length, IntegerCODEC c) 24 | { 25 | // Initialize array. 26 | int[] source = new int[length]; 27 | for (int i = 0; i < source.Length; ++i) 28 | { 29 | source[i] = i; 30 | } 31 | 32 | // Compress an array. 33 | int[] compressed = new int[length]; 34 | IntWrapper c_inpos = new IntWrapper(0); 35 | IntWrapper c_outpos = new IntWrapper(0); 36 | c.compress(source, c_inpos, source.Length, compressed, c_outpos); 37 | Assert2.assertTrue(c_outpos.get() <= length); 38 | 39 | // Uncompress an array. 40 | int[] uncompressed = new int[length]; 41 | IntWrapper u_inpos = new IntWrapper(0); 42 | IntWrapper u_outpos = new IntWrapper(0); 43 | c.uncompress(compressed, u_inpos, c_outpos.get(), uncompressed, 44 | u_outpos); 45 | 46 | // Compare between uncompressed and original arrays. 47 | int[] target = Arrays.copyOf(uncompressed, u_outpos.get()); 48 | if (!Arrays.equals(source, target)) 49 | { 50 | _testOutputHelper.WriteLine("problem with length = " + length + " and " + c); 51 | _testOutputHelper.WriteLine(Arrays.toString(source)); 52 | _testOutputHelper.WriteLine(Arrays.toString(target)); 53 | } 54 | Assert2.assertArrayEquals(source, target); 55 | } 56 | 57 | private void around32(IntegerCODEC c) 58 | { 59 | compressAndUncompress(31, c); 60 | compressAndUncompress(32, c); 61 | compressAndUncompress(33, c); 62 | } 63 | 64 | private void around128(IntegerCODEC c) 65 | { 66 | compressAndUncompress(127, c); 67 | compressAndUncompress(128, c); 68 | compressAndUncompress(129, c); 69 | } 70 | 71 | private void around256(IntegerCODEC c) 72 | { 73 | compressAndUncompress(255, c); 74 | compressAndUncompress(256, c); 75 | compressAndUncompress(257, c); 76 | } 77 | 78 | private void testBoundary(IntegerCODEC c) 79 | { 80 | around32(c); 81 | around128(c); 82 | around256(c); 83 | } 84 | 85 | [Fact] 86 | public void testIntegratedComposition() 87 | { 88 | IntegratedComposition c = new IntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()); 89 | testBoundary(c); 90 | } 91 | 92 | [Fact] 93 | public void testComposition() 94 | { 95 | Composition c = new Composition(new BinaryPacking(), new VariableByte()); 96 | testBoundary(c); 97 | } 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Tests/ByteBasicTest.cs: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * Just some basic sanity tests. 4 | * 5 | * @author Daniel Lemire 6 | */ 7 | 8 | using System; 9 | using Genbox.CSharpFastPFOR.Differential; 10 | using Genbox.CSharpFastPFOR.Port; 11 | using Genbox.CSharpFastPFOR.Tests.Port; 12 | using Genbox.CSharpFastPFOR.Tests.Utils; 13 | using Xunit; 14 | using Xunit.Abstractions; 15 | 16 | namespace Genbox.CSharpFastPFOR.Tests 17 | { 18 | public class ByteBasicTest 19 | { 20 | private readonly ByteIntegerCODEC[] codecs = { 21 | new VariableByte(), 22 | new IntegratedVariableByte(), 23 | }; 24 | 25 | private readonly ITestOutputHelper _testOutputHelper; 26 | 27 | public ByteBasicTest(ITestOutputHelper testOutputHelper) 28 | { 29 | _testOutputHelper = testOutputHelper; 30 | } 31 | 32 | [Fact] 33 | public void saulTest() 34 | { 35 | foreach (ByteIntegerCODEC C in codecs) 36 | { 37 | for (int x = 0; x < 50 * 4; ++x) 38 | { 39 | int[] a = { 2, 3, 4, 5 }; 40 | sbyte[] b = new sbyte[90 * 4]; 41 | int[] c = new int[a.Length]; 42 | 43 | IntWrapper aOffset = new IntWrapper(0); 44 | IntWrapper bOffset = new IntWrapper(x); 45 | C.compress(a, aOffset, a.Length, b, bOffset); 46 | int len = bOffset.get() - x; 47 | 48 | bOffset.set(x); 49 | IntWrapper cOffset = new IntWrapper(0); 50 | C.uncompress(b, bOffset, len, c, cOffset); 51 | if (!Arrays.equals(a, c)) 52 | { 53 | _testOutputHelper.WriteLine("Problem with " + C); 54 | } 55 | Assert2.assertArrayEquals(a, c); 56 | } 57 | } 58 | } 59 | 60 | [Fact] 61 | public void varyingLengthTest() 62 | { 63 | const int N = 4096; 64 | int[] data = new int[N]; 65 | for (int k = 0; k < N; ++k) 66 | data[k] = k; 67 | foreach (ByteIntegerCODEC c in codecs) 68 | { 69 | for (int L = 1; L <= 128; L++) 70 | { 71 | sbyte[] comp = TestUtils.compress(c, Arrays.copyOf(data, L)); 72 | int[] answer = TestUtils.uncompress(c, comp, L); 73 | for (int k = 0; k < L; ++k) 74 | if (answer[k] != data[k]) 75 | throw new Exception("bug " + c + " " + k + " " + answer[k] + " " + data[k]); 76 | } 77 | for (int L = 128; L <= N; L *= 2) 78 | { 79 | sbyte[] comp = TestUtils.compress(c, Arrays.copyOf(data, L)); 80 | int[] answer = TestUtils.uncompress(c, comp, L); 81 | for (int k = 0; k < L; ++k) 82 | if (answer[k] != data[k]) 83 | throw new Exception("bug"); 84 | } 85 | } 86 | } 87 | 88 | [Fact] 89 | public void varyingLengthTest2() 90 | { 91 | const int N = 128; 92 | int[] data = new int[N]; 93 | data[127] = -1; 94 | foreach (ByteIntegerCODEC c in codecs) 95 | { 96 | //TODO: this makes no sense in port 97 | //if (c is Simple9) 98 | // continue; 99 | 100 | for (int L = 1; L <= 128; L++) 101 | { 102 | sbyte[] comp = TestUtils.compress(c, Arrays.copyOf(data, L)); 103 | int[] answer = TestUtils.uncompress(c, comp, L); 104 | for (int k = 0; k < L; ++k) 105 | if (answer[k] != data[k]) 106 | throw new Exception("bug at k = " + k + " " + answer[k] + " " + data[k]); 107 | } 108 | for (int L = 128; L <= N; L *= 2) 109 | { 110 | sbyte[] comp = TestUtils.compress(c, Arrays.copyOf(data, L)); 111 | int[] answer = TestUtils.uncompress(c, comp, L); 112 | for (int k = 0; k < L; ++k) 113 | if (answer[k] != data[k]) 114 | throw new Exception("bug"); 115 | } 116 | } 117 | } 118 | } 119 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Tests/CSharpFastPFOR.Tests.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | netcoreapp2.2 5 | false 6 | 7 | 8 | 9 | 10 | 11 | 12 | all 13 | runtime; build; native; contentfiles; analyzers; buildtransitive 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Tests/DeltaZigzagEncodingTest.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | */ 5 | 6 | using Genbox.CSharpFastPFOR.Tests.Port; 7 | using Xunit; 8 | 9 | namespace Genbox.CSharpFastPFOR.Tests 10 | { 11 | public class DeltaZigzagEncodingTest 12 | { 13 | private static int zigzagEncode(DeltaZigzagEncoding.Encoder e, int value) 14 | { 15 | e.setContextValue(0); 16 | return e.encodeInt(value); 17 | } 18 | 19 | private static int zigzagDecode(DeltaZigzagEncoding.Decoder d, int value) 20 | { 21 | d.setContextValue(0); 22 | return d.decodeInt(value); 23 | } 24 | 25 | private static void checkEncode( 26 | DeltaZigzagEncoding.Encoder e, 27 | int[] data, 28 | int[] expected) 29 | { 30 | Assert2.assertArrayEquals(expected, e.encodeArray(data)); 31 | Assert2.assertEquals(data[data.Length - 1], e.getContextValue()); 32 | } 33 | 34 | private static void checkDecode( 35 | DeltaZigzagEncoding.Decoder d, 36 | int[] data, 37 | int[] expected) 38 | { 39 | int[] r = d.decodeArray(data); 40 | Assert2.assertArrayEquals(expected, r); 41 | Assert2.assertEquals(r[r.Length - 1], d.getContextValue()); 42 | } 43 | 44 | [Fact] 45 | public void checkZigzagEncode() 46 | { 47 | DeltaZigzagEncoding.Encoder e = new DeltaZigzagEncoding.Encoder(0); 48 | Assert2.assertEquals(0, zigzagEncode(e, 0)); 49 | Assert2.assertEquals(2, zigzagEncode(e, 1)); 50 | Assert2.assertEquals(4, zigzagEncode(e, 2)); 51 | Assert2.assertEquals(6, zigzagEncode(e, 3)); 52 | Assert2.assertEquals(1, zigzagEncode(e, -1)); 53 | Assert2.assertEquals(3, zigzagEncode(e, -2)); 54 | Assert2.assertEquals(5, zigzagEncode(e, -3)); 55 | } 56 | 57 | [Fact] 58 | public void checkZigzagDecoder() 59 | { 60 | DeltaZigzagEncoding.Decoder d = new DeltaZigzagEncoding.Decoder(0); 61 | Assert2.assertEquals(0, zigzagDecode(d, 0)); 62 | Assert2.assertEquals(-1, zigzagDecode(d, 1)); 63 | Assert2.assertEquals(1, zigzagDecode(d, 2)); 64 | Assert2.assertEquals(-2, zigzagDecode(d, 3)); 65 | Assert2.assertEquals(2, zigzagDecode(d, 4)); 66 | Assert2.assertEquals(-3, zigzagDecode(d, 5)); 67 | } 68 | 69 | [Fact] 70 | public void checkEncodeSimple() 71 | { 72 | DeltaZigzagEncoding.Encoder e = new DeltaZigzagEncoding.Encoder(0); 73 | checkEncode(e, 74 | new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }, 75 | new int[] { 0, 2, 2, 2, 2, 2, 2, 2, 2, 2 }); 76 | } 77 | 78 | [Fact] 79 | public void checkDecodeSimple() 80 | { 81 | DeltaZigzagEncoding.Decoder d = new DeltaZigzagEncoding.Decoder(0); 82 | checkDecode(d, 83 | new int[] { 0, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, 84 | new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }); 85 | } 86 | 87 | private class SpotChecker 88 | { 89 | 90 | private static readonly DeltaZigzagEncoding.Encoder encoder = new DeltaZigzagEncoding.Encoder(0); 91 | private static readonly DeltaZigzagEncoding.Decoder decoder = new DeltaZigzagEncoding.Decoder(0); 92 | 93 | public void check(int value) 94 | { 95 | SpotChecker.encoder.setContextValue(0); 96 | SpotChecker.decoder.setContextValue(0); 97 | int value2 = SpotChecker.decoder.decodeInt(SpotChecker.encoder.encodeInt(value)); 98 | Assert2.assertEquals(value, value2); 99 | } 100 | } 101 | 102 | [Fact] 103 | public void checkSpots() 104 | { 105 | SpotChecker c = new SpotChecker(); 106 | c.check(0); 107 | c.check(1); 108 | c.check(1375228800); 109 | c.check(1 << 30); 110 | c.check(1 << 31); 111 | } 112 | } 113 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Tests/IntCompressorTest.cs: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * Testing IntCompressor objects. 4 | */ 5 | 6 | using System; 7 | using Genbox.CSharpFastPFOR.Differential; 8 | using Genbox.CSharpFastPFOR.Port; 9 | using Genbox.CSharpFastPFOR.Tests.Port; 10 | using Xunit; 11 | using Xunit.Abstractions; 12 | 13 | namespace Genbox.CSharpFastPFOR.Tests 14 | { 15 | public class IntCompressorTest 16 | { 17 | private readonly ITestOutputHelper _testOutputHelper; 18 | 19 | public IntCompressorTest(ITestOutputHelper testOutputHelper) 20 | { 21 | _testOutputHelper = testOutputHelper; 22 | } 23 | 24 | private readonly IntegratedIntCompressor[] iic = { 25 | new IntegratedIntCompressor(new IntegratedVariableByte()), 26 | new IntegratedIntCompressor(new SkippableIntegratedComposition(new IntegratedBinaryPacking(),new IntegratedVariableByte())) }; 27 | 28 | private readonly IntCompressor[] ic = { 29 | new IntCompressor(new VariableByte()), 30 | new IntCompressor(new SkippableComposition(new BinaryPacking(), new VariableByte())) }; 31 | 32 | [Fact] 33 | public void basicTest() 34 | { 35 | for (int N = 1; N <= 10000; N *= 10) 36 | { 37 | int[] orig = new int[N]; 38 | for (int k = 0; k < N; k++) 39 | orig[k] = 3 * k + 5; 40 | foreach (IntCompressor i in ic) 41 | { 42 | int[] comp = i.compress(orig); 43 | int[] back = i.uncompress(comp); 44 | Assert2.assertArrayEquals(back, orig); 45 | } 46 | } 47 | } 48 | 49 | [Fact] 50 | public void superSimpleExample() 51 | { 52 | IntegratedIntCompressor iic2 = new IntegratedIntCompressor(); 53 | int[] data = new int[2342351]; 54 | for (int k = 0; k < data.Length; ++k) 55 | data[k] = k; 56 | _testOutputHelper.WriteLine("Compressing " + data.Length + " integers using friendly interface"); 57 | int[] compressed = iic2.compress(data); 58 | int[] recov = iic2.uncompress(compressed); 59 | _testOutputHelper.WriteLine("compressed from " + data.Length * 4 / 1024 + "KB to " + compressed.Length * 4 / 1024 + "KB"); 60 | if (!Arrays.equals(recov, data)) throw new Exception("bug"); 61 | } 62 | 63 | [Fact] 64 | public void basicIntegratedTest() 65 | { 66 | for (int N = 1; N <= 10000; N *= 10) 67 | { 68 | int[] orig = new int[N]; 69 | for (int k = 0; k < N; k++) 70 | orig[k] = 3 * k + 5; 71 | foreach (IntegratedIntCompressor i in iic) 72 | { 73 | int[] comp = i.compress(orig); 74 | int[] back = i.uncompress(comp); 75 | Assert2.assertArrayEquals(back, orig); 76 | } 77 | } 78 | } 79 | } 80 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Tests/Port/Assert2.cs: -------------------------------------------------------------------------------- 1 | using System.Linq; 2 | using Xunit; 3 | 4 | namespace Genbox.CSharpFastPFOR.Tests.Port 5 | { 6 | public static class Assert2 7 | { 8 | public static void assertArrayEquals(int[] ints, int[] ints1) 9 | { 10 | Assert.True(ints.SequenceEqual(ints1)); 11 | } 12 | 13 | public static void assertEquals(int first, int second) 14 | { 15 | Assert.Equal(first,second); 16 | } 17 | 18 | public static void assertTrue(bool b) 19 | { 20 | Assert.True(b); 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Tests/SkippableBasicTest.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * Just some basic sanity tests. 3 | * 4 | * @author Daniel Lemire 5 | */ 6 | 7 | using System; 8 | using Genbox.CSharpFastPFOR.Port; 9 | using Genbox.CSharpFastPFOR.Tests.Utils; 10 | using Xunit; 11 | using Xunit.Abstractions; 12 | 13 | namespace Genbox.CSharpFastPFOR.Tests 14 | { 15 | public class SkippableBasicTest 16 | { 17 | private readonly ITestOutputHelper _testOutputHelper; 18 | 19 | public SkippableBasicTest(ITestOutputHelper testOutputHelper) 20 | { 21 | _testOutputHelper = testOutputHelper; 22 | } 23 | 24 | private readonly SkippableIntegerCODEC[] codecs = { 25 | new JustCopy(), 26 | new VariableByte(), 27 | new SkippableComposition(new BinaryPacking(), new VariableByte()), 28 | new SkippableComposition(new NewPFD(), new VariableByte()), 29 | new SkippableComposition(new NewPFDS9(), new VariableByte()), 30 | new SkippableComposition(new NewPFDS16(), new VariableByte()), 31 | new SkippableComposition(new OptPFD(), new VariableByte()), 32 | new SkippableComposition(new OptPFDS9(), new VariableByte()), 33 | new SkippableComposition(new OptPFDS16(), new VariableByte()), 34 | new SkippableComposition(new FastPFOR128(), new VariableByte()), 35 | new SkippableComposition(new FastPFOR(), new VariableByte()), 36 | new Simple9(), 37 | new Simple16() }; 38 | 39 | [Fact] 40 | public void consistentTest() 41 | { 42 | const int N = 4096; 43 | int[] data = new int[N]; 44 | int[] rev = new int[N]; 45 | for (int k = 0; k < N; ++k) 46 | data[k] = k % 128; 47 | foreach (SkippableIntegerCODEC c in codecs) 48 | { 49 | _testOutputHelper.WriteLine("[SkippeableBasicTest.consistentTest] codec = " + c); 50 | int[] outBuf = new int[N + 1024]; 51 | for (int n = 0; n <= N; ++n) 52 | { 53 | IntWrapper inPos = new IntWrapper(); 54 | IntWrapper outPos = new IntWrapper(); 55 | c.headlessCompress(data, inPos, n, outBuf, outPos); 56 | 57 | IntWrapper inPoso = new IntWrapper(); 58 | IntWrapper outPoso = new IntWrapper(); 59 | c.headlessUncompress(outBuf, inPoso, outPos.get(), rev, 60 | outPoso, n); 61 | if (outPoso.get() != n) 62 | { 63 | throw new Exception("bug " + n); 64 | } 65 | if (inPoso.get() != outPos.get()) 66 | { 67 | throw new Exception("bug " + n + " " + inPoso.get() + " " + outPos.get()); 68 | } 69 | for (int j = 0; j < n; ++j) 70 | if (data[j] != rev[j]) 71 | { 72 | throw new Exception("bug"); 73 | } 74 | } 75 | } 76 | } 77 | 78 | [Fact] 79 | public void varyingLengthTest() 80 | { 81 | const int N = 4096; 82 | int[] data = new int[N]; 83 | for (int k = 0; k < N; ++k) 84 | data[k] = k; 85 | foreach (SkippableIntegerCODEC c in codecs) 86 | { 87 | _testOutputHelper.WriteLine("[SkippeableBasicTest.varyingLengthTest] codec = " + c); 88 | for (int L = 1; L <= 128; L++) 89 | { 90 | int[] comp = TestUtils.compressHeadless(c, Arrays.copyOf(data, L)); 91 | int[] answer = TestUtils.uncompressHeadless(c, comp, L); 92 | for (int k = 0; k < L; ++k) 93 | if (answer[k] != data[k]) 94 | throw new Exception("bug " + c + " " + k + " " + answer[k] + " " + data[k]); 95 | } 96 | for (int L = 128; L <= N; L *= 2) 97 | { 98 | int[] comp = TestUtils.compressHeadless(c, Arrays.copyOf(data, L)); 99 | int[] answer = TestUtils.uncompressHeadless(c, comp, L); 100 | for (int k = 0; k < L; ++k) 101 | if (answer[k] != data[k]) 102 | throw new Exception("bug"); 103 | } 104 | 105 | } 106 | } 107 | 108 | [Fact] 109 | public void varyingLengthTest2() 110 | { 111 | const int N = 128; 112 | int[] data = new int[N]; 113 | data[127] = -1; 114 | foreach (SkippableIntegerCODEC c in codecs) 115 | { 116 | _testOutputHelper.WriteLine("[SkippeableBasicTest.varyingLengthTest2] codec = " + c); 117 | 118 | if (c is Simple9) 119 | continue; 120 | 121 | if (c is Simple16) 122 | continue; 123 | 124 | for (int L = 1; L <= 128; L++) 125 | { 126 | int[] comp = TestUtils.compressHeadless(c, Arrays.copyOf(data, L)); 127 | int[] answer = TestUtils.uncompressHeadless(c, comp, L); 128 | for (int k = 0; k < L; ++k) 129 | if (answer[k] != data[k]) 130 | throw new Exception("bug at k = " + k + " " + answer[k] + " " + data[k] + " for " + c); 131 | } 132 | for (int L = 128; L <= N; L *= 2) 133 | { 134 | int[] comp = TestUtils.compressHeadless(c, Arrays.copyOf(data, L)); 135 | int[] answer = TestUtils.uncompressHeadless(c, comp, L); 136 | for (int k = 0; k < L; ++k) 137 | if (answer[k] != data[k]) 138 | throw new Exception("bug"); 139 | } 140 | } 141 | } 142 | } 143 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Tests/Utils/TestUtils.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * Static utility methods for test. 3 | */ 4 | 5 | using System; 6 | using Genbox.CSharpFastPFOR.Port; 7 | using Genbox.CSharpFastPFOR.Tests.Port; 8 | 9 | namespace Genbox.CSharpFastPFOR.Tests.Utils 10 | { 11 | public class TestUtils 12 | { 13 | /** 14 | * Check that compress and uncompress keep original array. 15 | * 16 | * @param codec CODEC to test. 17 | * @param orig original integers 18 | */ 19 | public static void assertSymmetry(IntegerCODEC codec, params int[] orig) 20 | { 21 | // There are some cases that compressed array is bigger than original 22 | // array. So output array for compress must be larger. 23 | // 24 | // Example: 25 | // - VariableByte compresses an array like [ -1 ]. 26 | // - Composition compresses a short array. 27 | 28 | const int EXTEND = 1; 29 | 30 | int[] compressed = new int[orig.Length + EXTEND]; 31 | IntWrapper c_inpos = new IntWrapper(0); 32 | IntWrapper c_outpos = new IntWrapper(0); 33 | codec.compress(orig, c_inpos, orig.Length, compressed, 34 | c_outpos); 35 | 36 | Assert2.assertTrue(c_outpos.get() <= orig.Length + EXTEND); 37 | 38 | // Uncompress an array. 39 | int[] uncompressed = new int[orig.Length]; 40 | IntWrapper u_inpos = new IntWrapper(0); 41 | IntWrapper u_outpos = new IntWrapper(0); 42 | codec.uncompress(compressed, u_inpos, c_outpos.get(), 43 | uncompressed, u_outpos); 44 | 45 | // Compare between uncompressed and orig arrays. 46 | int[] target = Arrays.copyOf(uncompressed, u_outpos.get()); 47 | Assert2.assertArrayEquals(orig, target); 48 | } 49 | 50 | public static int[] compress(IntegerCODEC codec, int[] data) 51 | { 52 | int[] outBuf = new int[data.Length * 4]; 53 | IntWrapper inPos = new IntWrapper(); 54 | IntWrapper outPos = new IntWrapper(); 55 | codec.compress(data, inPos, data.Length, outBuf, outPos); 56 | return Arrays.copyOf(outBuf, outPos.get()); 57 | } 58 | 59 | public static int[] uncompress(IntegerCODEC codec, int[] data, int len) 60 | { 61 | int[] outBuf = new int[len + 1024]; 62 | IntWrapper inPos = new IntWrapper(); 63 | IntWrapper outPos = new IntWrapper(); 64 | codec.uncompress(data, inPos, data.Length, outBuf, outPos); 65 | return Arrays.copyOf(outBuf, outPos.get()); 66 | } 67 | 68 | public static sbyte[] compress(ByteIntegerCODEC codec, int[] data) 69 | { 70 | sbyte[] outBuf = new sbyte[data.Length * 4 * 4]; 71 | IntWrapper inPos = new IntWrapper(); 72 | IntWrapper outPos = new IntWrapper(); 73 | codec.compress(data, inPos, data.Length, outBuf, outPos); 74 | return Arrays.copyOf(outBuf, outPos.get()); 75 | } 76 | 77 | public static int[] uncompress(ByteIntegerCODEC codec, sbyte[] data, int len) 78 | { 79 | int[] outBuf = new int[len + 1024]; 80 | IntWrapper inPos = new IntWrapper(); 81 | IntWrapper outPos = new IntWrapper(); 82 | codec.uncompress(data, inPos, data.Length, outBuf, outPos); 83 | return Arrays.copyOf(outBuf, outPos.get()); 84 | } 85 | 86 | public static int[] compressHeadless(SkippableIntegerCODEC codec, int[] data) 87 | { 88 | int[] outBuf = new int[data.Length * 4]; 89 | IntWrapper inPos = new IntWrapper(); 90 | IntWrapper outPos = new IntWrapper(); 91 | codec.headlessCompress(data, inPos, data.Length, outBuf, outPos); 92 | return Arrays.copyOf(outBuf, outPos.get()); 93 | } 94 | 95 | public static int[] uncompressHeadless(SkippableIntegerCODEC codec, int[] data, int len) 96 | { 97 | int[] outBuf = new int[len + 1024]; 98 | IntWrapper inPos = new IntWrapper(); 99 | IntWrapper outPos = new IntWrapper(); 100 | codec.headlessUncompress(data, inPos, data.Length, outBuf, outPos, len); 101 | if (outPos.get() < len) throw new Exception("Insufficient output."); 102 | return Arrays.copyOf(outBuf, outPos.get()); 103 | } 104 | } 105 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Tests/UtilsTest.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Genbox.CSharpFastPFOR.Port; 3 | using Xunit; 4 | using Xunit.Abstractions; 5 | 6 | namespace Genbox.CSharpFastPFOR.Tests 7 | { 8 | public class UtilsTest 9 | { 10 | private readonly ITestOutputHelper _testOutputHelper; 11 | 12 | public UtilsTest(ITestOutputHelper testOutputHelper) 13 | { 14 | _testOutputHelper = testOutputHelper; 15 | } 16 | 17 | [Fact] 18 | public void testPacking() 19 | { 20 | int[] outputarray = new int[32]; 21 | for (int b = 1; b < 32; ++b) 22 | { 23 | int[] data = new int[32]; 24 | int[] newdata = new int[32]; 25 | int mask = (1 << b) - 1; 26 | for (int j = 0; j < data.Length; ++j) 27 | { 28 | data[j] = mask - (j % mask); 29 | } 30 | for (int n = 0; n <= 32; ++n) 31 | { 32 | Arrays.fill(outputarray, 0); 33 | int howmany = Util.pack(outputarray, 0, data, 0, n, b); 34 | if (howmany != Util.packsize(n, b)) throw new Exception("bug " + n + " " + b); 35 | Util.unpack(Arrays.copyOf(outputarray, howmany), 0, newdata, 0, n, b); 36 | for (int i = 0; i < n; ++i) 37 | if (newdata[i] != data[i]) 38 | { 39 | _testOutputHelper.WriteLine(Arrays.toString(Arrays.copyOf(data, n))); 40 | _testOutputHelper.WriteLine(Arrays.toString(Arrays.copyOf(newdata, n))); 41 | throw new Exception("bug " + b + " " + n); 42 | } 43 | } 44 | } 45 | } 46 | 47 | [Fact] 48 | public void testPackingw() 49 | { 50 | int[] outputarray = new int[32]; 51 | for (int b = 1; b < 32; ++b) 52 | { 53 | int[] data = new int[32]; 54 | int[] newdata = new int[32]; 55 | int mask = (1 << b) - 1; 56 | for (int j = 0; j < data.Length; ++j) 57 | { 58 | data[j] = mask - (j % mask); 59 | } 60 | for (int n = 0; n <= 32; ++n) 61 | { 62 | Arrays.fill(outputarray, 0); 63 | int howmany = Util.packw(outputarray, 0, data, n, b); 64 | if (howmany != Util.packsizew(n, b)) throw new Exception("bug " + n + " " + b); 65 | Util.unpackw(Arrays.copyOf(outputarray, howmany), 0, newdata, n, b); 66 | for (int i = 0; i < n; ++i) 67 | if (newdata[i] != data[i]) 68 | { 69 | _testOutputHelper.WriteLine(Arrays.toString(Arrays.copyOf(data, n))); 70 | _testOutputHelper.WriteLine(Arrays.toString(Arrays.copyOf(newdata, n))); 71 | throw new Exception("bug " + b + " " + n); 72 | } 73 | } 74 | } 75 | } 76 | } 77 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR.Tests/XorBinaryPackingTest.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | */ 5 | 6 | /** 7 | * @author lemire 8 | * 9 | */ 10 | 11 | using Genbox.CSharpFastPFOR.Differential; 12 | using Genbox.CSharpFastPFOR.Port; 13 | using Genbox.CSharpFastPFOR.Tests.Port; 14 | using Genbox.CSharpFastPFOR.Tests.Utils; 15 | using Xunit; 16 | 17 | namespace Genbox.CSharpFastPFOR.Tests 18 | { 19 | public class XorBinaryPackingTest 20 | { 21 | private static void checkCompressAndUncompress(string label, int[] data) 22 | { 23 | XorBinaryPacking codec = new XorBinaryPacking(); 24 | int[] compBuf = TestUtils.compress(codec, data); 25 | int[] decompBuf = TestUtils.uncompress(codec, compBuf, data.Length); 26 | Assert2.assertArrayEquals(data, decompBuf); 27 | } 28 | 29 | [Fact] 30 | public void compressAndUncompress0() 31 | { 32 | int[] data = new int[128]; 33 | Arrays.fill(data, 0, 31, 1); 34 | Arrays.fill(data, 32, 63, 2); 35 | Arrays.fill(data, 64, 95, 4); 36 | Arrays.fill(data, 96, 127, 8); 37 | checkCompressAndUncompress("compressAndUncompress0", data); 38 | } 39 | 40 | [Fact] 41 | public void compressAndUncompress1() 42 | { 43 | int[] data = new int[128]; 44 | for (int i = 0; i < data.Length; ++i) 45 | { 46 | data[i] = i; 47 | } 48 | checkCompressAndUncompress("compressAndUncompress1", data); 49 | } 50 | 51 | [Fact] 52 | public void compressAndUncompress2() 53 | { 54 | int[] data = new int[128]; 55 | for (int i = 0; i < data.Length; ++i) 56 | { 57 | data[i] = i * (i + 1) / 2; 58 | } 59 | checkCompressAndUncompress("compressAndUncompress2", data); 60 | } 61 | 62 | [Fact] 63 | public void compressAndUncompress3() 64 | { 65 | int[] data = new int[256]; 66 | Arrays.fill(data, 0, 127, 2); 67 | Arrays.fill(data, 128, 255, 3); 68 | checkCompressAndUncompress("compressAndUncompress3", data); 69 | } 70 | 71 | [Fact] 72 | public void compressAndUncompress4() 73 | { 74 | int[] data = new int[256]; 75 | Arrays.fill(data, 0, 127, 3); 76 | Arrays.fill(data, 128, 255, 2); 77 | checkCompressAndUncompress("compressAndUncompress4", data); 78 | } 79 | 80 | [Fact] 81 | public void compressAndUncompress5() 82 | { 83 | int[] data = new int[256]; 84 | for (int i = 0; i < data.Length; ++i) 85 | { 86 | data[i] = i; 87 | } 88 | checkCompressAndUncompress("compressAndUncompress5", data); 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/CSharpFastPFOR.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.29009.5 5 | MinimumVisualStudioVersion = 15.0.26124.0 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CSharpFastPFOR", "CSharpFastPFOR\CSharpFastPFOR.csproj", "{D8FC173D-E21E-4493-A8C3-44B22AB15627}" 7 | EndProject 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CSharpFastPFOR.Benchmarks", "CSharpFastPFOR.Benchmarks\CSharpFastPFOR.Benchmarks.csproj", "{A3193F56-14E3-4E88-BBAD-9C6B91A2A2B7}" 9 | EndProject 10 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "CSharpFastPFOR.Tests", "CSharpFastPFOR.Tests\CSharpFastPFOR.Tests.csproj", "{1BBE6772-7D66-42F3-867D-DFE6AACFE2DF}" 11 | EndProject 12 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Items", "Items", "{EDAA6B5E-A9FB-4AD2-8FDA-3A0FD7AD5F62}" 13 | ProjectSection(SolutionItems) = preProject 14 | ..\.gitignore = ..\.gitignore 15 | ..\Directory.Build.targets = ..\Directory.Build.targets 16 | ..\README.md = ..\README.md 17 | EndProjectSection 18 | EndProject 19 | Global 20 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 21 | Debug|Any CPU = Debug|Any CPU 22 | Debug|x64 = Debug|x64 23 | Debug|x86 = Debug|x86 24 | Release|Any CPU = Release|Any CPU 25 | Release|x64 = Release|x64 26 | Release|x86 = Release|x86 27 | EndGlobalSection 28 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 29 | {D8FC173D-E21E-4493-A8C3-44B22AB15627}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 30 | {D8FC173D-E21E-4493-A8C3-44B22AB15627}.Debug|Any CPU.Build.0 = Debug|Any CPU 31 | {D8FC173D-E21E-4493-A8C3-44B22AB15627}.Debug|x64.ActiveCfg = Debug|Any CPU 32 | {D8FC173D-E21E-4493-A8C3-44B22AB15627}.Debug|x64.Build.0 = Debug|Any CPU 33 | {D8FC173D-E21E-4493-A8C3-44B22AB15627}.Debug|x86.ActiveCfg = Debug|Any CPU 34 | {D8FC173D-E21E-4493-A8C3-44B22AB15627}.Debug|x86.Build.0 = Debug|Any CPU 35 | {D8FC173D-E21E-4493-A8C3-44B22AB15627}.Release|Any CPU.ActiveCfg = Release|Any CPU 36 | {D8FC173D-E21E-4493-A8C3-44B22AB15627}.Release|Any CPU.Build.0 = Release|Any CPU 37 | {D8FC173D-E21E-4493-A8C3-44B22AB15627}.Release|x64.ActiveCfg = Release|Any CPU 38 | {D8FC173D-E21E-4493-A8C3-44B22AB15627}.Release|x64.Build.0 = Release|Any CPU 39 | {D8FC173D-E21E-4493-A8C3-44B22AB15627}.Release|x86.ActiveCfg = Release|Any CPU 40 | {D8FC173D-E21E-4493-A8C3-44B22AB15627}.Release|x86.Build.0 = Release|Any CPU 41 | {A3193F56-14E3-4E88-BBAD-9C6B91A2A2B7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 42 | {A3193F56-14E3-4E88-BBAD-9C6B91A2A2B7}.Debug|Any CPU.Build.0 = Debug|Any CPU 43 | {A3193F56-14E3-4E88-BBAD-9C6B91A2A2B7}.Debug|x64.ActiveCfg = Debug|Any CPU 44 | {A3193F56-14E3-4E88-BBAD-9C6B91A2A2B7}.Debug|x64.Build.0 = Debug|Any CPU 45 | {A3193F56-14E3-4E88-BBAD-9C6B91A2A2B7}.Debug|x86.ActiveCfg = Debug|Any CPU 46 | {A3193F56-14E3-4E88-BBAD-9C6B91A2A2B7}.Debug|x86.Build.0 = Debug|Any CPU 47 | {A3193F56-14E3-4E88-BBAD-9C6B91A2A2B7}.Release|Any CPU.ActiveCfg = Release|Any CPU 48 | {A3193F56-14E3-4E88-BBAD-9C6B91A2A2B7}.Release|Any CPU.Build.0 = Release|Any CPU 49 | {A3193F56-14E3-4E88-BBAD-9C6B91A2A2B7}.Release|x64.ActiveCfg = Release|Any CPU 50 | {A3193F56-14E3-4E88-BBAD-9C6B91A2A2B7}.Release|x64.Build.0 = Release|Any CPU 51 | {A3193F56-14E3-4E88-BBAD-9C6B91A2A2B7}.Release|x86.ActiveCfg = Release|Any CPU 52 | {A3193F56-14E3-4E88-BBAD-9C6B91A2A2B7}.Release|x86.Build.0 = Release|Any CPU 53 | {1BBE6772-7D66-42F3-867D-DFE6AACFE2DF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 54 | {1BBE6772-7D66-42F3-867D-DFE6AACFE2DF}.Debug|Any CPU.Build.0 = Debug|Any CPU 55 | {1BBE6772-7D66-42F3-867D-DFE6AACFE2DF}.Debug|x64.ActiveCfg = Debug|Any CPU 56 | {1BBE6772-7D66-42F3-867D-DFE6AACFE2DF}.Debug|x64.Build.0 = Debug|Any CPU 57 | {1BBE6772-7D66-42F3-867D-DFE6AACFE2DF}.Debug|x86.ActiveCfg = Debug|Any CPU 58 | {1BBE6772-7D66-42F3-867D-DFE6AACFE2DF}.Debug|x86.Build.0 = Debug|Any CPU 59 | {1BBE6772-7D66-42F3-867D-DFE6AACFE2DF}.Release|Any CPU.ActiveCfg = Release|Any CPU 60 | {1BBE6772-7D66-42F3-867D-DFE6AACFE2DF}.Release|Any CPU.Build.0 = Release|Any CPU 61 | {1BBE6772-7D66-42F3-867D-DFE6AACFE2DF}.Release|x64.ActiveCfg = Release|Any CPU 62 | {1BBE6772-7D66-42F3-867D-DFE6AACFE2DF}.Release|x64.Build.0 = Release|Any CPU 63 | {1BBE6772-7D66-42F3-867D-DFE6AACFE2DF}.Release|x86.ActiveCfg = Release|Any CPU 64 | {1BBE6772-7D66-42F3-867D-DFE6AACFE2DF}.Release|x86.Build.0 = Release|Any CPU 65 | EndGlobalSection 66 | GlobalSection(SolutionProperties) = preSolution 67 | HideSolutionNode = FALSE 68 | EndGlobalSection 69 | GlobalSection(ExtensibilityGlobals) = postSolution 70 | SolutionGuid = {0C8D26C5-574B-42EB-8B53-B5B5E223B04F} 71 | EndGlobalSection 72 | EndGlobal 73 | -------------------------------------------------------------------------------- /src/CSharpFastPFOR/BinaryPacking.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Scheme based on a commonly used idea: can be extremely fast. 10 | * It encodes integers in blocks of 32 integers. For arrays containing 11 | * an arbitrary number of integers, you should use it in conjunction 12 | * with another CODEC: 13 | * 14 | *
IntegerCODEC ic = 
 15 |  *  new Composition(new BinaryPacking(), new VariableByte()).
16 | * 17 | * Note that this does not use differential coding: if you are working on sorted 18 | * lists, use IntegratedBinaryPacking instead. 19 | * 20 | *

21 | * For details, please see 22 | *

23 | *

24 | * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second 25 | * through vectorization Software: Practice & Experience 26 | * http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract 27 | * http://arxiv.org/abs/1209.2137 28 | *

29 | *

30 | * Daniel Lemire, Leonid Boytsov, Nathan Kurz, 31 | * SIMD Compression and the Intersection of Sorted Integers 32 | * http://arxiv.org/abs/1401.6399 33 | *

34 | * 35 | * @author Daniel Lemire 36 | */ 37 | 38 | namespace Genbox.CSharpFastPFOR 39 | { 40 | public class BinaryPacking : IntegerCODEC, SkippableIntegerCODEC 41 | { 42 | private const int BLOCK_SIZE = 32; 43 | 44 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 45 | { 46 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 47 | if (inlength == 0) 48 | return; 49 | @out[outpos.get()] = inlength; 50 | outpos.increment(); 51 | headlessCompress(@in, inpos, inlength, @out, outpos); 52 | } 53 | 54 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 55 | { 56 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 57 | int tmpoutpos = outpos.get(); 58 | int s = inpos.get(); 59 | for (; s + BLOCK_SIZE * 4 - 1 < inpos.get() + inlength; s += BLOCK_SIZE * 4) 60 | { 61 | 62 | int mbits1 = Util.maxbits(@in, s, BLOCK_SIZE); 63 | 64 | int mbits2 = Util.maxbits(@in, s + BLOCK_SIZE, BLOCK_SIZE); 65 | 66 | int mbits3 = Util.maxbits(@in, s + 2 * BLOCK_SIZE, BLOCK_SIZE); 67 | 68 | int mbits4 = Util.maxbits(@in, s + 3 * BLOCK_SIZE, BLOCK_SIZE); 69 | @out[tmpoutpos++] = (mbits1 << 24) | (mbits2 << 16) 70 | | (mbits3 << 8) | (mbits4); 71 | BitPacking.fastpackwithoutmask(@in, s, @out, tmpoutpos, 72 | mbits1); 73 | tmpoutpos += mbits1; 74 | BitPacking.fastpackwithoutmask(@in, s + BLOCK_SIZE, @out, 75 | tmpoutpos, mbits2); 76 | tmpoutpos += mbits2; 77 | BitPacking.fastpackwithoutmask(@in, s + 2 * BLOCK_SIZE, @out, 78 | tmpoutpos, mbits3); 79 | tmpoutpos += mbits3; 80 | BitPacking.fastpackwithoutmask(@in, s + 3 * BLOCK_SIZE, @out, 81 | tmpoutpos, mbits4); 82 | tmpoutpos += mbits4; 83 | } 84 | for (; s < inpos.get() + inlength; s += BLOCK_SIZE) 85 | { 86 | 87 | int mbits = Util.maxbits(@in, s, BLOCK_SIZE); 88 | @out[tmpoutpos++] = mbits; 89 | BitPacking.fastpackwithoutmask(@in, s, @out, tmpoutpos, 90 | mbits); 91 | tmpoutpos += mbits; 92 | 93 | } 94 | inpos.add(inlength); 95 | outpos.set(tmpoutpos); 96 | } 97 | 98 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 99 | { 100 | if (inlength == 0) 101 | return; 102 | 103 | int outlength = @in[inpos.get()]; 104 | inpos.increment(); 105 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 106 | } 107 | 108 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num) 109 | { 110 | 111 | int outlength = Util.greatestMultiple(num, BLOCK_SIZE); 112 | int tmpinpos = inpos.get(); 113 | int s = outpos.get(); 114 | for (; s + BLOCK_SIZE * 4 - 1 < outpos.get() + outlength; s += BLOCK_SIZE * 4) 115 | { 116 | 117 | int mbits1 = (int)((uint)@in[tmpinpos] >> 24); 118 | 119 | int mbits2 = (int)((uint)@in[tmpinpos] >> 16) & 0xFF; 120 | 121 | int mbits3 = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 122 | 123 | int mbits4 = (int)((uint)@in[tmpinpos]) & 0xFF; 124 | ++tmpinpos; 125 | BitPacking.fastunpack(@in, tmpinpos, @out, s, mbits1); 126 | tmpinpos += mbits1; 127 | BitPacking 128 | .fastunpack(@in, tmpinpos, @out, s + BLOCK_SIZE, mbits2); 129 | tmpinpos += mbits2; 130 | BitPacking.fastunpack(@in, tmpinpos, @out, s + 2 * BLOCK_SIZE, 131 | mbits3); 132 | tmpinpos += mbits3; 133 | BitPacking.fastunpack(@in, tmpinpos, @out, s + 3 * BLOCK_SIZE, 134 | mbits4); 135 | tmpinpos += mbits4; 136 | } 137 | for (; s < outpos.get() + outlength; s += BLOCK_SIZE) 138 | { 139 | 140 | int mbits = @in[tmpinpos]; 141 | ++tmpinpos; 142 | BitPacking.fastunpack(@in, tmpinpos, @out, s, mbits); 143 | tmpinpos += mbits; 144 | } 145 | outpos.add(outlength); 146 | inpos.set(tmpinpos); 147 | } 148 | 149 | public override string ToString() 150 | { 151 | return nameof(BinaryPacking); 152 | } 153 | } 154 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/ByteIntegerCODEC.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Interface describing a CODEC to compress integers to bytes. 10 | * 11 | * @author Daniel Lemire 12 | * 13 | */ 14 | namespace Genbox.CSharpFastPFOR 15 | { 16 | public interface ByteIntegerCODEC 17 | { 18 | /** 19 | * Compress data from an array to another array. 20 | * 21 | * Both inpos and outpos are modified to represent how much data was 22 | * read and written to if 12 ints (inlength = 12) are compressed to 3 23 | * bytes, then inpos will be incremented by 12 while outpos will be 24 | * incremented by 3 we use IntWrapper to pass the values by reference. 25 | * 26 | * @param in 27 | * input array 28 | * @param inpos 29 | * location in the input array 30 | * @param inlength 31 | * how many integers to compress 32 | * @param out 33 | * output array 34 | * @param outpos 35 | * where to write in the output array 36 | */ 37 | void compress(int[] @in, IntWrapper inpos, int inlength, sbyte[] @out, IntWrapper outpos); 38 | 39 | /** 40 | * Uncompress data from an array to another array. 41 | * 42 | * Both inpos and outpos parameters are modified to indicate new 43 | * positions after read/write. 44 | * 45 | * @param in 46 | * array containing data in compressed form 47 | * @param inpos 48 | * where to start reading in the array 49 | * @param inlength 50 | * length of the compressed data (ignored by some 51 | * schemes) 52 | * @param out 53 | * array where to write the compressed output 54 | * @param outpos 55 | * where to write the compressed output in out 56 | */ 57 | void uncompress(sbyte[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos); 58 | } 59 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/CSharpFastPFOR.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Library 5 | netstandard2.0 6 | 7 | 8 | -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Composition.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Helper class to compose schemes. 10 | * 11 | * @author Daniel Lemire 12 | */ 13 | namespace Genbox.CSharpFastPFOR 14 | { 15 | public class Composition : IntegerCODEC 16 | { 17 | private IntegerCODEC F1; 18 | private IntegerCODEC F2; 19 | 20 | /** 21 | * Compose a scheme from a first one (f1) and a second one (f2). The 22 | * first one is called first and then the second one tries to compress 23 | * whatever remains from the first run. 24 | * 25 | * By convention, the first scheme should be such that if, during 26 | * decoding, a 32-bit zero is first encountered, then there is no 27 | * output. 28 | * 29 | * @param f1 30 | * first codec 31 | * @param f2 32 | * second codec 33 | */ 34 | public Composition(IntegerCODEC f1, IntegerCODEC f2) 35 | { 36 | F1 = f1; 37 | F2 = f2; 38 | } 39 | 40 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 41 | { 42 | if (inlength == 0) 43 | { 44 | return; 45 | } 46 | int inposInit = inpos.get(); 47 | int outposInit = outpos.get(); 48 | F1.compress(@in, inpos, inlength, @out, outpos); 49 | if (outpos.get() == outposInit) 50 | { 51 | @out[outposInit] = 0; 52 | outpos.increment(); 53 | } 54 | inlength -= inpos.get() - inposInit; 55 | F2.compress(@in, inpos, inlength, @out, outpos); 56 | } 57 | 58 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 59 | { 60 | if (inlength == 0) 61 | return; 62 | 63 | int init = inpos.get(); 64 | F1.uncompress(@in, inpos, inlength, @out, outpos); 65 | inlength -= inpos.get() - init; 66 | F2.uncompress(@in, inpos, inlength, @out, outpos); 67 | } 68 | 69 | public override string ToString() 70 | { 71 | return F1 + " + " + F2; 72 | } 73 | } 74 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/DeltaZigzagBinaryPacking.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | */ 5 | 6 | /** 7 | * BinaryPacking with Delta+Zigzag Encoding. 8 | * 9 | * It encodes integers in blocks of 128 integers. For arrays containing 10 | * an arbitrary number of integers, you should use it in conjunction 11 | * with another CODEC: 12 | * 13 | *
IntegerCODEC ic = new Composition(new DeltaZigzagBinaryPacking(),
 14 |  *                      new DeltaZigzagVariableByte()).
15 | * 16 | * @author MURAOKA Taro http://github.com/koron 17 | */ 18 | 19 | namespace Genbox.CSharpFastPFOR 20 | { 21 | public class DeltaZigzagBinaryPacking : IntegerCODEC 22 | { 23 | private const int BLOCK_LENGTH = 128; 24 | 25 | public void compress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) 26 | { 27 | inLen = inLen - inLen % BLOCK_LENGTH; 28 | if (inLen == 0) 29 | { 30 | return; 31 | } 32 | 33 | outBuf[outPos.get()] = inLen; 34 | outPos.increment(); 35 | 36 | DeltaZigzagEncoding.Encoder ctx = new DeltaZigzagEncoding.Encoder(0); 37 | int[] work = new int[BLOCK_LENGTH]; 38 | 39 | int op = outPos.get(); 40 | int ip = inPos.get(); 41 | int inPosLast = ip + inLen; 42 | for (; ip < inPosLast; ip += BLOCK_LENGTH) 43 | { 44 | ctx.encodeArray(inBuf, ip, BLOCK_LENGTH, work); 45 | int bits1 = Util.maxbits32(work, 0); 46 | int bits2 = Util.maxbits32(work, 32); 47 | int bits3 = Util.maxbits32(work, 64); 48 | int bits4 = Util.maxbits32(work, 96); 49 | outBuf[op++] = (bits1 << 24) | (bits2 << 16) 50 | | (bits3 << 8) | (bits4 << 0); 51 | op += pack(work, 0, outBuf, op, bits1); 52 | op += pack(work, 32, outBuf, op, bits2); 53 | op += pack(work, 64, outBuf, op, bits3); 54 | op += pack(work, 96, outBuf, op, bits4); 55 | } 56 | 57 | inPos.add(inLen); 58 | outPos.set(op); 59 | } 60 | 61 | public void uncompress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) 62 | { 63 | if (inLen == 0) 64 | { 65 | return; 66 | } 67 | 68 | int outLen = inBuf[inPos.get()]; 69 | inPos.increment(); 70 | 71 | DeltaZigzagEncoding.Decoder ctx = new DeltaZigzagEncoding.Decoder(0); 72 | int[] work = new int[BLOCK_LENGTH]; 73 | 74 | int ip = inPos.get(); 75 | int op = outPos.get(); 76 | int outPosLast = op + outLen; 77 | for (; op < outPosLast; op += BLOCK_LENGTH) 78 | { 79 | int n = inBuf[ip++]; 80 | ip += unpack(inBuf, ip, work, 0, (n >> 24) & 0x3F); 81 | ip += unpack(inBuf, ip, work, 32, (n >> 16) & 0x3F); 82 | ip += unpack(inBuf, ip, work, 64, (n >> 8) & 0x3F); 83 | ip += unpack(inBuf, ip, work, 96, (n >> 0) & 0x3F); 84 | ctx.decodeArray(work, 0, BLOCK_LENGTH, outBuf, op); 85 | } 86 | 87 | outPos.add(outLen); 88 | inPos.set(ip); 89 | } 90 | 91 | private static int pack(int[] inBuf, int inOff, int[] outBuf, int outOff, int validBits) 92 | { 93 | BitPacking.fastpackwithoutmask(inBuf, inOff, outBuf, outOff, validBits); 94 | return validBits; 95 | } 96 | 97 | private static int unpack(int[] inBuf, int inOff, int[] outBuf, int outOff, int validBits) 98 | { 99 | BitPacking.fastunpack(inBuf, inOff, outBuf, outOff, validBits); 100 | return validBits; 101 | } 102 | 103 | public override string ToString() 104 | { 105 | return nameof(DeltaZigzagBinaryPacking); 106 | } 107 | } 108 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/DeltaZigzagEncoding.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | */ 5 | 6 | /** 7 | * Delta+Zigzag Encoding. 8 | * 9 | * @author MURAOKA Taro http://github.com/koron 10 | */ 11 | namespace Genbox.CSharpFastPFOR 12 | { 13 | public class DeltaZigzagEncoding 14 | { 15 | public class Context 16 | { 17 | protected int ContextValue; 18 | 19 | protected Context(int contextValue) 20 | { 21 | ContextValue = contextValue; 22 | } 23 | 24 | public void setContextValue(int contextValue) 25 | { 26 | ContextValue = contextValue; 27 | } 28 | 29 | public int getContextValue() 30 | { 31 | return ContextValue; 32 | } 33 | } 34 | 35 | public class Encoder : Context 36 | { 37 | public Encoder(int contextValue) : base(contextValue) 38 | { 39 | } 40 | 41 | public int encodeInt(int value) 42 | { 43 | int n = value - ContextValue; 44 | ContextValue = value; 45 | return (n << 1) ^ (n >> 31); 46 | } 47 | 48 | public int[] encodeArray(int[] src, int srcoff, int length, int[] dst, int dstoff) 49 | { 50 | for (int i = 0; i < length; ++i) 51 | { 52 | dst[dstoff + i] = encodeInt(src[srcoff + i]); 53 | } 54 | return dst; 55 | } 56 | 57 | public int[] encodeArray(int[] src, int srcoff, int length, int[] dst) 58 | { 59 | return encodeArray(src, srcoff, length, dst, 0); 60 | } 61 | 62 | public int[] encodeArray(int[] src, int offset, int length) 63 | { 64 | return encodeArray(src, offset, length, new int[length], 0); 65 | } 66 | 67 | public int[] encodeArray(int[] src) 68 | { 69 | return encodeArray(src, 0, src.Length, new int[src.Length], 0); 70 | } 71 | } 72 | 73 | public class Decoder : Context 74 | { 75 | public Decoder(int contextValue) : base(contextValue) 76 | { 77 | } 78 | 79 | public int decodeInt(int value) 80 | { 81 | int n = (int)((uint)value >> 1) ^ ((value << 31) >> 31); 82 | n += ContextValue; 83 | ContextValue = n; 84 | return n; 85 | } 86 | 87 | public int[] decodeArray(int[] src, int srcoff, int length, 88 | int[] dst, int dstoff) 89 | { 90 | for (int i = 0; i < length; ++i) 91 | { 92 | dst[dstoff + i] = decodeInt(src[srcoff + i]); 93 | } 94 | return dst; 95 | } 96 | 97 | public int[] decodeArray(int[] src, int offset, int length) 98 | { 99 | return decodeArray(src, offset, length, new int[length], 0); 100 | } 101 | 102 | public int[] decodeArray(int[] src) 103 | { 104 | return decodeArray(src, 0, src.Length); 105 | } 106 | } 107 | } 108 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/DeltaZigzagVariableByte.cs: -------------------------------------------------------------------------------- 1 | /* 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | */ 5 | 6 | /** 7 | * VariableByte with Delta+Zigzag Encoding. 8 | * 9 | * @author MURAOKA Taro http://github.com/koron 10 | */ 11 | 12 | using Genbox.CSharpFastPFOR.Port; 13 | 14 | namespace Genbox.CSharpFastPFOR 15 | { 16 | public class DeltaZigzagVariableByte : IntegerCODEC 17 | { 18 | public void compress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) 19 | { 20 | if (inLen == 0) 21 | { 22 | return; 23 | } 24 | 25 | ByteBuffer byteBuf = ByteBuffer.allocateDirect(inLen * 5 + 3); 26 | DeltaZigzagEncoding.Encoder ctx = new DeltaZigzagEncoding.Encoder(0); 27 | 28 | // Delta+Zigzag+VariableByte encoding. 29 | int ip = inPos.get(); 30 | 31 | int inPosLast = ip + inLen; 32 | for (; ip < inPosLast; ++ip) 33 | { 34 | // Filter with delta+zigzag encoding. 35 | int n = ctx.encodeInt(inBuf[ip]); 36 | // Variable byte encoding. 37 | 38 | //PORT NOTE: The following IF statements are ported from a switch. Fall through switches are not allowed in C# 39 | int zeros = Integer.numberOfLeadingZeros(n); 40 | 41 | if (zeros < 4) 42 | { 43 | byteBuf.put((sbyte)(((int)((uint)n >> 28) & 0x7F) | 0x80)); 44 | } 45 | 46 | if (zeros < 11) 47 | { 48 | byteBuf.put((sbyte)(((int)((uint)n >> 21) & 0x7F) | 0x80)); 49 | } 50 | 51 | if (zeros < 18) 52 | { 53 | byteBuf.put((sbyte)(((int)((uint)n >> 14) & 0x7F) | 0x80)); 54 | } 55 | 56 | if (zeros < 25) 57 | { 58 | byteBuf.put((sbyte)(((int)((uint)n >> 7) & 0x7F) | 0x80)); 59 | } 60 | 61 | byteBuf.put((sbyte)((uint)n & 0x7F)); 62 | } 63 | 64 | // Padding buffer to considerable as IntBuffer. 65 | for (int i = (4 - (byteBuf.position() % 4)) % 4; i > 0; --i) 66 | { 67 | unchecked 68 | { 69 | byteBuf.put((sbyte)(0x80)); 70 | } 71 | } 72 | 73 | int outLen = byteBuf.position() / 4; 74 | byteBuf.flip(); 75 | IntBuffer intBuf = byteBuf.asIntBuffer(); 76 | /* 77 | * Console.WriteLine(String.format( 78 | * "inLen=%d pos=%d limit=%d outLen=%d outBuf.len=%d", inLen, 79 | * intBuf.position(), intBuf.limit(), outLen, outBuf.Length)); 80 | */ 81 | intBuf.get(outBuf, outPos.get(), outLen); 82 | inPos.add(inLen); 83 | outPos.add(outLen); 84 | } 85 | 86 | public void uncompress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) 87 | { 88 | DeltaZigzagEncoding.Decoder ctx = new DeltaZigzagEncoding.Decoder(0); 89 | 90 | int ip = inPos.get(); 91 | int op = outPos.get(); 92 | int vbcNum = 0, vbcShift = 24; // Varialbe Byte Context. 93 | 94 | int inPosLast = ip + inLen; 95 | while (ip < inPosLast) 96 | { 97 | // Fetch a byte value. 98 | int n = (int)((uint)inBuf[ip] >> vbcShift) & 0xFF; 99 | if (vbcShift > 0) 100 | { 101 | vbcShift -= 8; 102 | } 103 | else 104 | { 105 | vbcShift = 24; 106 | ip++; 107 | } 108 | // Decode variable byte and delta+zigzag. 109 | vbcNum = (vbcNum << 7) + (n & 0x7F); 110 | if ((n & 0x80) == 0) 111 | { 112 | outBuf[op++] = ctx.decodeInt(vbcNum); 113 | vbcNum = 0; 114 | } 115 | } 116 | 117 | outPos.set(op); 118 | inPos.set(inPosLast); 119 | } 120 | 121 | public override string ToString() 122 | { 123 | return nameof(DeltaZigzagVariableByte); 124 | } 125 | } 126 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/Delta.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Generic class to compute differential coding. 10 | * 11 | * @author Daniel Lemire 12 | * 13 | */ 14 | namespace Genbox.CSharpFastPFOR.Differential 15 | { 16 | public class Delta 17 | { 18 | /** 19 | * Apply differential coding (in-place). 20 | * 21 | * @param data 22 | * data to be modified 23 | */ 24 | public static void delta(int[] data) 25 | { 26 | for (int i = data.Length - 1; i > 0; --i) 27 | { 28 | data[i] -= data[i - 1]; 29 | } 30 | } 31 | 32 | /** 33 | * Apply differential coding (in-place) given an initial value. 34 | * 35 | * @param data 36 | * data to be modified 37 | * @param start 38 | * starting index 39 | * @param length 40 | * number of integers to process 41 | * @param init 42 | * initial value 43 | * @return next initial vale 44 | */ 45 | public static int delta(int[] data, int start, int length, int init) 46 | { 47 | int nextinit = data[start + length - 1]; 48 | for (int i = length - 1; i > 0; --i) 49 | { 50 | data[start + i] -= data[start + i - 1]; 51 | } 52 | data[start] -= init; 53 | return nextinit; 54 | } 55 | 56 | /** 57 | * Compute differential coding given an initial value. Output is written 58 | * to a provided array: must have length "length" or better. 59 | * 60 | * @param data 61 | * data to be modified 62 | * @param start 63 | * starting index 64 | * @param length 65 | * number of integers to process 66 | * @param init 67 | * initial value 68 | * @param out 69 | * output array 70 | * @return next initial vale 71 | */ 72 | public static int delta(int[] data, int start, int length, int init, int[] @out) 73 | { 74 | for (int i = length - 1; i > 0; --i) 75 | { 76 | @out[i] = data[start + i] - data[start + i - 1]; 77 | } 78 | @out[0] = data[start] - init; 79 | return data[start + length - 1]; 80 | } 81 | 82 | /** 83 | * Undo differential coding (in-place). Effectively computes a prefix 84 | * sum. 85 | * 86 | * @param data 87 | * to be modified. 88 | */ 89 | public static void inverseDelta(int[] data) 90 | { 91 | for (int i = 1; i < data.Length; ++i) 92 | { 93 | data[i] += data[i - 1]; 94 | } 95 | } 96 | 97 | /** 98 | * Undo differential coding (in-place). Effectively computes a prefix 99 | * sum. Like inverseDelta, only faster. 100 | * 101 | * @param data 102 | * to be modified 103 | */ 104 | public static void fastinverseDelta(int[] data) 105 | { 106 | int sz0 = data.Length / 4 * 4; 107 | int i = 1; 108 | if (sz0 >= 4) 109 | { 110 | int a = data[0]; 111 | for (; i < sz0 - 4; i += 4) 112 | { 113 | a = data[i] += a; 114 | a = data[i + 1] += a; 115 | a = data[i + 2] += a; 116 | a = data[i + 3] += a; 117 | } 118 | } 119 | 120 | for (; i != data.Length; ++i) 121 | { 122 | data[i] += data[i - 1]; 123 | } 124 | } 125 | 126 | /** 127 | * Undo differential coding (in-place). Effectively computes a prefix 128 | * sum. Like inverseDelta, only faster. Uses an initial value. 129 | * 130 | * @param data 131 | * to be modified 132 | * @param start 133 | * starting index 134 | * @param length 135 | * number of integers to process 136 | * @param init 137 | * initial value 138 | * @return next initial value 139 | */ 140 | public static int fastinverseDelta(int[] data, int start, int length, int init) 141 | { 142 | data[start] += init; 143 | int sz0 = length / 4 * 4; 144 | int i = 1; 145 | if (sz0 >= 4) 146 | { 147 | int a = data[start]; 148 | for (; i < sz0 - 4; i += 4) 149 | { 150 | a = data[start + i] += a; 151 | a = data[start + i + 1] += a; 152 | a = data[start + i + 2] += a; 153 | a = data[start + i + 3] += a; 154 | } 155 | } 156 | 157 | for (; i != length; ++i) 158 | { 159 | data[start + i] += data[start + i - 1]; 160 | } 161 | return data[start + length - 1]; 162 | } 163 | } 164 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/IntegratedBinaryPacking.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Scheme based on a commonly used idea: can be extremely fast. 10 | * 11 | * You should only use this scheme on sorted arrays. Use BinaryPacking if you 12 | * have unsorted arrays. 13 | * 14 | * It encodes integers in blocks of 32 integers. For arrays containing an 15 | * arbitrary number of integers, you should use it in conjunction with another 16 | * CODEC: 17 | * 18 | *
 19 |  * IntegratedIntegerCODEC is = 
 20 |  * new IntegratedComposition(new IntegratedBinaryPacking(), 
 21 |  * new IntegratedVariableByte())
 22 |  * 
23 | * 24 | *

25 | * For details, please see 26 | *

27 | *

28 | * Daniel Lemire and Leonid Boytsov, Decoding billions of integers per second 29 | * through vectorization Software: Practice & Experience http://onlinelibrary.wiley.com/doi/10.1002/spe.2203/abstract http://arxiv.org/abs/1209.2137 33 | *

34 | *

35 | * Daniel Lemire, Leonid Boytsov, Nathan Kurz, SIMD Compression and the 36 | * Intersection of Sorted Integers http://arxiv.org/abs/1401.6399 38 | *

39 | * 40 | * @author Daniel Lemire 41 | * 42 | */ 43 | 44 | namespace Genbox.CSharpFastPFOR.Differential 45 | { 46 | public class IntegratedBinaryPacking : IntegratedIntegerCODEC, SkippableIntegratedIntegerCODEC 47 | { 48 | private const int BLOCK_SIZE = 32; 49 | 50 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 51 | { 52 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 53 | if (inlength == 0) 54 | return; 55 | @out[outpos.get()] = inlength; 56 | outpos.increment(); 57 | headlessCompress(@in, inpos, inlength, @out, outpos, new IntWrapper(0)); 58 | } 59 | 60 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 61 | { 62 | if (inlength == 0) 63 | return; 64 | int outlength = @in[inpos.get()]; 65 | inpos.increment(); 66 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength, new IntWrapper(0)); 67 | } 68 | 69 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, IntWrapper initvalue) 70 | { 71 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 72 | if (inlength == 0) 73 | return; 74 | int tmpoutpos = outpos.get(); 75 | int initoffset = initvalue.get(); 76 | initvalue.set(@in[inpos.get() + inlength - 1]); 77 | int s = inpos.get(); 78 | for (; s + BLOCK_SIZE * 4 - 1 < inpos.get() + inlength; s += BLOCK_SIZE * 4) 79 | { 80 | int mbits1 = Util.maxdiffbits(initoffset, @in, s, BLOCK_SIZE); 81 | int initoffset2 = @in[s + 31]; 82 | int mbits2 = Util.maxdiffbits(initoffset2, @in, s + BLOCK_SIZE, BLOCK_SIZE); 83 | int initoffset3 = @in[s + BLOCK_SIZE + 31]; 84 | int mbits3 = Util 85 | .maxdiffbits(initoffset3, @in, s + 2 * BLOCK_SIZE, BLOCK_SIZE); 86 | int initoffset4 = @in[s + 2 * BLOCK_SIZE + 31]; 87 | int mbits4 = Util 88 | .maxdiffbits(initoffset4, @in, s + 3 * BLOCK_SIZE, BLOCK_SIZE); 89 | @out[tmpoutpos++] = (mbits1 << 24) | (mbits2 << 16) | (mbits3 << 8) 90 | | (mbits4); 91 | IntegratedBitPacking.integratedpack(initoffset, @in, s, @out, 92 | tmpoutpos, mbits1); 93 | tmpoutpos += mbits1; 94 | IntegratedBitPacking.integratedpack(initoffset2, @in, s + BLOCK_SIZE, @out, 95 | tmpoutpos, mbits2); 96 | tmpoutpos += mbits2; 97 | IntegratedBitPacking.integratedpack(initoffset3, @in, s + 2 * BLOCK_SIZE, 98 | @out, tmpoutpos, mbits3); 99 | tmpoutpos += mbits3; 100 | IntegratedBitPacking.integratedpack(initoffset4, @in, s + 3 * BLOCK_SIZE, 101 | @out, tmpoutpos, mbits4); 102 | tmpoutpos += mbits4; 103 | initoffset = @in[s + 3 * BLOCK_SIZE + 31]; 104 | } 105 | for (; s < inpos.get() + inlength; s += BLOCK_SIZE) 106 | { 107 | int mbits = Util.maxdiffbits(initoffset, @in, s, BLOCK_SIZE); 108 | @out[tmpoutpos++] = mbits; 109 | IntegratedBitPacking.integratedpack(initoffset, @in, s, @out, 110 | tmpoutpos, mbits); 111 | tmpoutpos += mbits; 112 | initoffset = @in[s + 31]; 113 | } 114 | inpos.add(inlength); 115 | outpos.set(tmpoutpos); 116 | } 117 | 118 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num, IntWrapper initvalue) 119 | { 120 | int outlength = Util.greatestMultiple(num, BLOCK_SIZE); 121 | int tmpinpos = inpos.get(); 122 | int initoffset = initvalue.get(); 123 | int s = outpos.get(); 124 | for (; s + BLOCK_SIZE * 4 - 1 < outpos.get() + outlength; s += BLOCK_SIZE * 4) 125 | { 126 | int mbits1 = (int)((uint)@in[tmpinpos] >> 24); 127 | int mbits2 = (int)((uint)@in[tmpinpos] >> 16) & 0xFF; 128 | int mbits3 = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 129 | int mbits4 = (@in[tmpinpos]) & 0xFF; 130 | 131 | ++tmpinpos; 132 | IntegratedBitPacking.integratedunpack(initoffset, @in, tmpinpos, 133 | @out, s, mbits1); 134 | tmpinpos += mbits1; 135 | initoffset = @out[s + 31]; 136 | IntegratedBitPacking.integratedunpack(initoffset, @in, tmpinpos, 137 | @out, s + BLOCK_SIZE, mbits2); 138 | tmpinpos += mbits2; 139 | initoffset = @out[s + BLOCK_SIZE + 31]; 140 | IntegratedBitPacking.integratedunpack(initoffset, @in, tmpinpos, 141 | @out, s + 2 * BLOCK_SIZE, mbits3); 142 | tmpinpos += mbits3; 143 | initoffset = @out[s + 2 * BLOCK_SIZE + 31]; 144 | IntegratedBitPacking.integratedunpack(initoffset, @in, tmpinpos, 145 | @out, s + 3 * BLOCK_SIZE, mbits4); 146 | tmpinpos += mbits4; 147 | initoffset = @out[s + 3 * BLOCK_SIZE + 31]; 148 | } 149 | for (; s < outpos.get() + outlength; s += BLOCK_SIZE) 150 | { 151 | int mbits = @in[tmpinpos]; 152 | ++tmpinpos; 153 | IntegratedBitPacking.integratedunpack(initoffset, @in, tmpinpos, 154 | @out, s, mbits); 155 | initoffset = @out[s + 31]; 156 | 157 | tmpinpos += mbits; 158 | } 159 | outpos.add(outlength); 160 | initvalue.set(initoffset); 161 | inpos.set(tmpinpos); 162 | } 163 | 164 | public override string ToString() 165 | { 166 | return nameof(IntegratedBinaryPacking); 167 | } 168 | } 169 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/IntegratedByteIntegerCODEC.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Interface describing a CODEC to compress integers to bytes. 10 | * 11 | * "Integrated" means that it uses differential coding. 12 | * 13 | * @author Daniel Lemire 14 | * 15 | */ 16 | namespace Genbox.CSharpFastPFOR.Differential 17 | { 18 | public interface IntegratedByteIntegerCODEC : ByteIntegerCODEC 19 | { 20 | } 21 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/IntegratedComposition.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Helper class to compose schemes. 10 | * 11 | * @author Daniel Lemire 12 | */ 13 | 14 | namespace Genbox.CSharpFastPFOR.Differential 15 | { 16 | public class IntegratedComposition : IntegratedIntegerCODEC 17 | { 18 | private IntegratedIntegerCODEC F1; 19 | private IntegratedIntegerCODEC F2; 20 | 21 | /** 22 | * Compose a scheme from a first one (f1) and a second one (f2). The 23 | * first one is called first and then the second one tries to compress 24 | * whatever remains from the first run. 25 | * 26 | * By convention, the first scheme should be such that if, during 27 | * decoding, a 32-bit zero is first encountered, then there is no 28 | * output. 29 | * 30 | * @param f1 31 | * first codec 32 | * @param f2 33 | * second codec 34 | */ 35 | public IntegratedComposition(IntegratedIntegerCODEC f1, IntegratedIntegerCODEC f2) 36 | { 37 | F1 = f1; 38 | F2 = f2; 39 | } 40 | 41 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 42 | { 43 | if (inlength == 0) 44 | { 45 | return; 46 | } 47 | int inposInit = inpos.get(); 48 | int outposInit = outpos.get(); 49 | F1.compress(@in, inpos, inlength, @out, outpos); 50 | if (outpos.get() == outposInit) 51 | { 52 | @out[outposInit] = 0; 53 | outpos.increment(); 54 | } 55 | inlength -= inpos.get() - inposInit; 56 | F2.compress(@in, inpos, inlength, @out, outpos); 57 | } 58 | 59 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 60 | { 61 | if (inlength == 0) 62 | return; 63 | int init = inpos.get(); 64 | F1.uncompress(@in, inpos, inlength, @out, outpos); 65 | inlength -= inpos.get() - init; 66 | F2.uncompress(@in, inpos, inlength, @out, outpos); 67 | } 68 | 69 | public override string ToString() 70 | { 71 | return F1 + " + " + F2; 72 | } 73 | } 74 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/IntegratedIntCompressor.cs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | /** 8 | * This is a convenience class that wraps a codec to provide 9 | * a "friendly" API. 10 | * 11 | */ 12 | 13 | using Genbox.CSharpFastPFOR.Port; 14 | 15 | namespace Genbox.CSharpFastPFOR.Differential 16 | { 17 | public class IntegratedIntCompressor 18 | { 19 | private SkippableIntegratedIntegerCODEC codec; 20 | 21 | /** 22 | * Constructor wrapping a codec. 23 | * 24 | * @param c the underlying codec 25 | */ 26 | public IntegratedIntCompressor(SkippableIntegratedIntegerCODEC c) 27 | { 28 | codec = c; 29 | } 30 | 31 | /** 32 | * Constructor with default codec. 33 | */ 34 | public IntegratedIntCompressor() 35 | { 36 | codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()); 37 | } 38 | 39 | /** 40 | * Compress an array and returns the compressed result as a new array. 41 | * 42 | * @param input array to be compressed 43 | * @return compressed array 44 | */ 45 | public int[] compress(int[] input) 46 | { 47 | int[] compressed = new int[input.Length + 1024]; 48 | compressed[0] = input.Length; 49 | IntWrapper outpos = new IntWrapper(1); 50 | IntWrapper initvalue = new IntWrapper(0); 51 | codec.headlessCompress(input, new IntWrapper(0), 52 | input.Length, compressed, outpos, initvalue); 53 | compressed = Arrays.copyOf(compressed, outpos.intValue()); 54 | return compressed; 55 | } 56 | 57 | /** 58 | * Uncompress an array and returns the uncompressed result as a new array. 59 | * 60 | * @param compressed compressed array 61 | * @return uncompressed array 62 | */ 63 | public int[] uncompress(int[] compressed) 64 | { 65 | int[] decompressed = new int[compressed[0]]; 66 | IntWrapper inpos = new IntWrapper(1); 67 | codec.headlessUncompress(compressed, inpos, 68 | compressed.Length - inpos.intValue(), 69 | decompressed, new IntWrapper(0), 70 | decompressed.Length, new IntWrapper(0)); 71 | return decompressed; 72 | } 73 | } 74 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/IntegratedIntegerCODEC.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * This is just like IntegerCODEC, except that it indicates that delta coding is 10 | * "integrated", so that you don't need a separate step for delta coding. 11 | * 12 | * @author Daniel Lemire 13 | */ 14 | namespace Genbox.CSharpFastPFOR.Differential 15 | { 16 | public interface IntegratedIntegerCODEC : IntegerCODEC 17 | { 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/SkippableIntegratedComposition.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Helper class to compose schemes. 10 | * 11 | * @author Daniel Lemire 12 | */ 13 | namespace Genbox.CSharpFastPFOR.Differential 14 | { 15 | public class SkippableIntegratedComposition : SkippableIntegratedIntegerCODEC 16 | { 17 | private SkippableIntegratedIntegerCODEC F1; 18 | private SkippableIntegratedIntegerCODEC F2; 19 | 20 | /** 21 | * Compose a scheme from a first one (f1) and a second one (f2). The first 22 | * one is called first and then the second one tries to compress whatever 23 | * remains from the first run. 24 | * 25 | * By convention, the first scheme should be such that if, during decoding, 26 | * a 32-bit zero is first encountered, then there is no output. 27 | * 28 | * @param f1 29 | * first codec 30 | * @param f2 31 | * second codec 32 | */ 33 | public SkippableIntegratedComposition(SkippableIntegratedIntegerCODEC f1, SkippableIntegratedIntegerCODEC f2) 34 | { 35 | F1 = f1; 36 | F2 = f2; 37 | } 38 | 39 | public override string ToString() 40 | { 41 | return F1 + " + " + F2; 42 | } 43 | 44 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, IntWrapper initvalue) 45 | { 46 | if (inlength == 0) 47 | return; 48 | int init = inpos.get(); 49 | F1.headlessCompress(@in, inpos, inlength, @out, outpos, initvalue); 50 | if (outpos.get() == 0) 51 | { 52 | @out[0] = 0; 53 | outpos.increment(); 54 | } 55 | inlength -= inpos.get() - init; 56 | F2.headlessCompress(@in, inpos, inlength, @out, outpos, initvalue); 57 | } 58 | 59 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num, IntWrapper initvalue) 60 | { 61 | if (inlength == 0) 62 | return; 63 | int init = inpos.get(); 64 | F1.headlessUncompress(@in, inpos, inlength, @out, outpos, num, initvalue); 65 | inlength -= inpos.get() - init; 66 | num -= outpos.get(); 67 | F2.headlessUncompress(@in, inpos, inlength, @out, outpos, num, initvalue); 68 | } 69 | } 70 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/SkippableIntegratedIntegerCODEC.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This is code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Interface describing a standard CODEC to compress integers. This is a 10 | * variation on the IntegerCODEC interface meant to be used for random access 11 | * and with integrated differential coding 12 | * (i.e., given a large array, you can segment it and decode just the subarray you need). 13 | * 14 | * The main differences are that we must specify the number of integers we wish to 15 | * decode as well as the initial value (for differential coding). This information 16 | * might be stored elsewhere. 17 | * 18 | * 19 | * @author Daniel Lemire 20 | * 21 | */ 22 | namespace Genbox.CSharpFastPFOR.Differential 23 | { 24 | public interface SkippableIntegratedIntegerCODEC 25 | { 26 | /** 27 | * Compress data from an array to another array. 28 | * 29 | * Both inpos and outpos are modified to represent how much data was read 30 | * and written to if 12 ints (inlength = 12) are compressed to 3 ints, then 31 | * inpos will be incremented by 12 while outpos will be incremented by 3 we 32 | * use IntWrapper to pass the values by reference. 33 | * 34 | * @param in 35 | * input array 36 | * @param inpos 37 | * location in the input array 38 | * @param inlength 39 | * how many integers to compress 40 | * @param out 41 | * output array 42 | * @param outpos 43 | * where to write in the output array 44 | * @param initvalue initial value for the purpose of differential coding, the value is automatically updated 45 | */ 46 | void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, IntWrapper initvalue); 47 | 48 | /** 49 | * Uncompress data from an array to another array. 50 | * 51 | * Both inpos and outpos parameters are modified to indicate new positions 52 | * after read/write. 53 | * 54 | * @param in 55 | * array containing data in compressed form 56 | * @param inpos 57 | * where to start reading in the array 58 | * @param inlength 59 | * length of the compressed data (ignored by some schemes) 60 | * @param out 61 | * array where to write the compressed output 62 | * @param outpos 63 | * where to write the compressed output in out 64 | * @param num 65 | * number of integers we want to decode, the actual number of integers decoded can be less 66 | * @param initvalue initial value for the purpose of differential coding, the value is automatically updated 67 | */ 68 | void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num, IntWrapper initvalue); 69 | } 70 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Differential/XorBinaryPacking.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | */ 5 | 6 | /** 7 | * BinaryPacking over XOR differential. 8 | * 9 | *
IntegratedIntegerCODEC is = 
 10 |  * new Composition(new XorBinaryPacking(), new VariableByte())
11 | * 12 | * @author MURAOKA Taro http://github.com/koron 13 | */ 14 | 15 | using Genbox.CSharpFastPFOR.Port; 16 | 17 | namespace Genbox.CSharpFastPFOR.Differential 18 | { 19 | public class XorBinaryPacking : IntegratedIntegerCODEC 20 | { 21 | private const int BLOCK_LENGTH = 128; 22 | 23 | public void compress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) 24 | { 25 | inLen = inLen - inLen % BLOCK_LENGTH; 26 | if (inLen == 0) 27 | return; 28 | 29 | outBuf[outPos.get()] = inLen; 30 | outPos.increment(); 31 | 32 | int context = 0; 33 | int[] work = new int[32]; 34 | 35 | int op = outPos.get(); 36 | int ip = inPos.get(); 37 | int inPosLast = ip + inLen; 38 | for (; ip < inPosLast; ip += BLOCK_LENGTH) 39 | { 40 | int bits1 = xorMaxBits(inBuf, ip + 0, 32, context); 41 | int bits2 = xorMaxBits(inBuf, ip + 32, 32, 42 | inBuf[ip + 31]); 43 | int bits3 = xorMaxBits(inBuf, ip + 64, 32, 44 | inBuf[ip + 63]); 45 | int bits4 = xorMaxBits(inBuf, ip + 96, 32, 46 | inBuf[ip + 95]); 47 | outBuf[op++] = (bits1 << 24) | (bits2 << 16) 48 | | (bits3 << 8) | (bits4 << 0); 49 | op += xorPack(inBuf, ip + 0, outBuf, op, bits1, 50 | context, work); 51 | op += xorPack(inBuf, ip + 32, outBuf, op, bits2, 52 | inBuf[ip + 31], work); 53 | op += xorPack(inBuf, ip + 64, outBuf, op, bits3, 54 | inBuf[ip + 63], work); 55 | op += xorPack(inBuf, ip + 96, outBuf, op, bits4, 56 | inBuf[ip + 95], work); 57 | context = inBuf[ip + 127]; 58 | } 59 | 60 | inPos.add(inLen); 61 | outPos.set(op); 62 | } 63 | 64 | public void uncompress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) 65 | { 66 | if (inLen == 0) 67 | return; 68 | 69 | int outLen = inBuf[inPos.get()]; 70 | inPos.increment(); 71 | 72 | int context = 0; 73 | int[] work = new int[32]; 74 | 75 | int ip = inPos.get(); 76 | int op = outPos.get(); 77 | int outPosLast = op + outLen; 78 | for (; op < outPosLast; op += BLOCK_LENGTH) 79 | { 80 | int bits1 = (int)((uint)inBuf[ip] >> 24); 81 | int bits2 = (int)((uint)inBuf[ip] >> 16) & 0xFF; 82 | int bits3 = (int)((uint)inBuf[ip] >> 8) & 0xFF; 83 | int bits4 = (int)((uint)inBuf[ip] >> 0) & 0xFF; 84 | ++ip; 85 | ip += xorUnpack(inBuf, ip, outBuf, op + 0, bits1, 86 | context, work); 87 | ip += xorUnpack(inBuf, ip, outBuf, op + 32, bits2, 88 | outBuf[op + 31], work); 89 | ip += xorUnpack(inBuf, ip, outBuf, op + 64, bits3, 90 | outBuf[op + 63], work); 91 | ip += xorUnpack(inBuf, ip, outBuf, op + 96, bits4, 92 | outBuf[op + 95], work); 93 | context = outBuf[op + 127]; 94 | } 95 | 96 | outPos.add(outLen); 97 | inPos.set(ip); 98 | } 99 | 100 | private static int xorMaxBits(int[] buf, int offset, int length, int context) 101 | { 102 | int mask = buf[offset] ^ context; 103 | int M = offset + length; 104 | for (int i = offset + 1, prev = offset; i < M; ++i, ++prev) 105 | { 106 | mask |= buf[i] ^ buf[prev]; 107 | } 108 | 109 | return 32 - Integer.numberOfLeadingZeros(mask); 110 | } 111 | 112 | private static int xorPack(int[] inBuf, int inOff, int[] outBuf, int outOff, int validBits, int context, int[] work) 113 | { 114 | work[0] = inBuf[inOff] ^ context; 115 | for (int i = 1, p = inOff + 1; i < 32; ++i, ++p) 116 | { 117 | work[i] = inBuf[p] ^ inBuf[p - 1]; 118 | } 119 | BitPacking.fastpackwithoutmask(work, 0, outBuf, outOff, 120 | validBits); 121 | 122 | return validBits; 123 | } 124 | 125 | private static int xorUnpack(int[] inBuf, int inOff, int[] outBuf, int outOff, int validBits, int context, int[] work) 126 | { 127 | BitPacking.fastunpack(inBuf, inOff, work, 0, validBits); 128 | outBuf[outOff] = context = work[0] ^ context; 129 | for (int i = 1, p = outOff + 1; i < 32; ++i, ++p) 130 | { 131 | outBuf[p] = context = work[i] ^ context; 132 | } 133 | return validBits; 134 | } 135 | 136 | public override string ToString() 137 | { 138 | return nameof(XorBinaryPacking); 139 | } 140 | } 141 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/IntCompressor.cs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | /** 6 | * This is a convenience class that wraps a codec to provide 7 | * a "friendly" API. 8 | * 9 | */ 10 | 11 | using Genbox.CSharpFastPFOR.Port; 12 | 13 | namespace Genbox.CSharpFastPFOR 14 | { 15 | public class IntCompressor 16 | { 17 | private SkippableIntegerCODEC codec; 18 | 19 | /** 20 | * Constructor wrapping a codec. 21 | * 22 | * @param c the underlying codec 23 | */ 24 | public IntCompressor(SkippableIntegerCODEC c) 25 | { 26 | codec = c; 27 | } 28 | 29 | /** 30 | * Constructor with default codec. 31 | */ 32 | public IntCompressor() 33 | { 34 | codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); 35 | } 36 | 37 | /** 38 | * Compress an array and returns the compressed result as a new array. 39 | * 40 | * @param input array to be compressed 41 | * @return compressed array 42 | */ 43 | public int[] compress(int[] input) 44 | { 45 | int[] compressed = new int[input.Length + 1024]; 46 | compressed[0] = input.Length; 47 | IntWrapper outpos = new IntWrapper(1); 48 | codec.headlessCompress(input, new IntWrapper(0), input.Length, compressed, outpos); 49 | compressed = Arrays.copyOf(compressed, outpos.intValue()); 50 | return compressed; 51 | } 52 | 53 | /** 54 | * Uncompress an array and returns the uncompressed result as a new array. 55 | * 56 | * @param compressed compressed array 57 | * @return uncompressed array 58 | */ 59 | public int[] uncompress(int[] compressed) 60 | { 61 | int[] decompressed = new int[compressed[0]]; 62 | IntWrapper inpos = new IntWrapper(1); 63 | codec.headlessUncompress(compressed, inpos, compressed.Length - inpos.intValue(), decompressed, new IntWrapper(0), decompressed.Length); 64 | return decompressed; 65 | } 66 | } 67 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/IntWrapper.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | */ 6 | 7 | /** 8 | * Essentially a mutable wrapper around an integer. 9 | * 10 | * @author dwu 11 | */ 12 | namespace Genbox.CSharpFastPFOR 13 | { 14 | public class IntWrapper 15 | { 16 | private int value; 17 | 18 | /** 19 | * Constructor: value set to 0. 20 | */ 21 | public IntWrapper() : this(0) 22 | { 23 | } 24 | 25 | /** 26 | * Construction: value set to provided argument. 27 | * 28 | * @param v 29 | * value to wrap 30 | */ 31 | public IntWrapper(int v) 32 | { 33 | this.value = v; 34 | } 35 | 36 | /** 37 | * add the provided value to the integer 38 | * @param v value to add 39 | */ 40 | public void add(int v) 41 | { 42 | this.value += v; 43 | } 44 | 45 | 46 | public double doubleValue() 47 | { 48 | return this.value; 49 | } 50 | 51 | public float floatValue() 52 | { 53 | return this.value; 54 | } 55 | 56 | /** 57 | * @return the integer value 58 | */ 59 | public int get() 60 | { 61 | return this.value; 62 | } 63 | 64 | /** 65 | * add 1 to the integer value 66 | */ 67 | public void increment() 68 | { 69 | this.value++; 70 | } 71 | 72 | public int intValue() 73 | { 74 | return this.value; 75 | } 76 | 77 | public long longValue() 78 | { 79 | return this.value; 80 | } 81 | 82 | /** 83 | * Set the value to that of the specified integer. 84 | * 85 | * @param value 86 | * specified integer value 87 | */ 88 | public void set(int value) 89 | { 90 | this.value = value; 91 | } 92 | 93 | public override string ToString() 94 | { 95 | return value.ToString(); 96 | } 97 | } 98 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/IntegerCODEC.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Interface describing a standard CODEC to compress integers. 10 | * 11 | * @author Daniel Lemire 12 | * 13 | */ 14 | namespace Genbox.CSharpFastPFOR 15 | { 16 | public interface IntegerCODEC 17 | { 18 | /** 19 | * Compress data from an array to another array. 20 | * 21 | * Both inpos and outpos are modified to represent how much data was 22 | * read and written to if 12 ints (inlength = 12) are compressed to 3 23 | * ints, then inpos will be incremented by 12 while outpos will be 24 | * incremented by 3 we use IntWrapper to pass the values by reference. 25 | * 26 | * @param in 27 | * input array 28 | * @param inpos 29 | * location in the input array 30 | * @param inlength 31 | * how many integers to compress 32 | * @param out 33 | * output array 34 | * @param outpos 35 | * where to write in the output array 36 | */ 37 | void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos); 38 | 39 | /** 40 | * Uncompress data from an array to another array. 41 | * 42 | * Both inpos and outpos parameters are modified to indicate new 43 | * positions after read/write. 44 | * 45 | * @param in 46 | * array containing data in compressed form 47 | * @param inpos 48 | * where to start reading in the array 49 | * @param inlength 50 | * length of the compressed data (ignored by some 51 | * schemes) 52 | * @param out 53 | * array where to write the compressed output 54 | * @param outpos 55 | * where to write the compressed output in out 56 | */ 57 | void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos); 58 | } 59 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/JustCopy.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * @author Daniel Lemire 10 | * 11 | */ 12 | 13 | using System; 14 | 15 | namespace Genbox.CSharpFastPFOR 16 | { 17 | public class JustCopy : IntegerCODEC, SkippableIntegerCODEC 18 | { 19 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 20 | { 21 | Array.Copy(@in, inpos.get(), @out, outpos.get(), inlength); 22 | inpos.add(inlength); 23 | outpos.add(inlength); 24 | } 25 | 26 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 27 | { 28 | headlessUncompress(@in, inpos, inlength, @out, outpos, inlength); 29 | } 30 | 31 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num) 32 | { 33 | Array.Copy(@in, inpos.get(), @out, outpos.get(), num); 34 | inpos.add(num); 35 | outpos.add(num); 36 | } 37 | 38 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 39 | { 40 | headlessCompress(@in, inpos, inlength, @out, outpos); 41 | } 42 | 43 | public override string ToString() 44 | { 45 | return nameof(JustCopy); 46 | } 47 | } 48 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/NewPFD.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * NewPFD/NewPFOR: fast patching scheme by Yan et al. 10 | *

11 | * Follows: 12 | *

13 | * H. Yan, S. Ding, T. Suel, Inverted index compression and query processing 14 | * with optimized document ordering, in: WWW 09, 2009, pp. 401-410. 15 | *

16 | * using Simple16 as the secondary coder. 17 | * 18 | * It encodes integers in blocks of 128 integers. For arrays containing 19 | * an arbitrary number of integers, you should use it in conjunction 20 | * with another CODEC: 21 | * 22 | *
IntegerCODEC ic = 
 23 | *  new Composition(new NewPDF(), new VariableByte()).
24 | * 25 | * Note that this does not use differential coding: if you are working on sorted 26 | * lists, you must compute the deltas separately. (Yes, this is true even though 27 | * the "D" at the end of the name probably stands for delta.) 28 | * 29 | * For multi-threaded applications, each thread should use its own NewPFD 30 | * object. 31 | * 32 | * @author Daniel Lemire 33 | */ 34 | namespace Genbox.CSharpFastPFOR 35 | { 36 | public class NewPFD : IntegerCODEC, SkippableIntegerCODEC 37 | { 38 | private const int BLOCK_SIZE = 128; 39 | 40 | private int[] exceptbuffer = new int[2 * BLOCK_SIZE]; 41 | 42 | /** 43 | * Constructor for the NewPFD CODEC. 44 | */ 45 | public NewPFD() 46 | { 47 | } 48 | 49 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 50 | { 51 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 52 | if (inlength == 0) 53 | return; 54 | encodePage(@in, inpos, inlength, @out, outpos); 55 | } 56 | 57 | protected static int[] bits = { 0, 1, 2, 3, 4, 5, 58 | 6, 7, 8, 9, 10, 11, 59 | 12, 13, 16, 20, 32 }; 60 | 61 | protected static int[] invbits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 62 | 10, 11, 12, 13, 14, 14, 14, 15, 63 | 15, 15, 15, 16, 16, 16, 16, 16, 64 | 16, 16, 16, 16, 16, 16, 16 }; 65 | 66 | protected static void getBestBFromData(int[] @in, int pos, IntWrapper bestb, IntWrapper bestexcept) 67 | { 68 | int mb = Util.maxbits(@in, pos, BLOCK_SIZE); 69 | int mini = 0; 70 | if (mini + 28 < bits[invbits[mb]]) 71 | mini = bits[invbits[mb]] - 28; // 28 is the max for 72 | // exceptions 73 | int besti = bits.Length - 1; 74 | int exceptcounter = 0; 75 | for (int i = mini; i < bits.Length - 1; ++i) 76 | { 77 | int tmpcounter = 0; 78 | for (int k = pos; k < BLOCK_SIZE + pos; ++k) 79 | if ((int)((uint)@in[k] >> bits[i]) != 0) 80 | ++tmpcounter; 81 | if (tmpcounter * 10 <= BLOCK_SIZE) 82 | { 83 | besti = i; 84 | exceptcounter = tmpcounter; 85 | break; 86 | } 87 | } 88 | bestb.set(besti); 89 | bestexcept.set(exceptcounter); 90 | } 91 | 92 | private void encodePage(int[] @in, IntWrapper inpos, int thissize, int[] @out, IntWrapper outpos) 93 | { 94 | int tmpoutpos = outpos.get(); 95 | int tmpinpos = inpos.get(); 96 | IntWrapper bestb = new IntWrapper(); 97 | IntWrapper bestexcept = new IntWrapper(); 98 | for (int finalinpos = tmpinpos + thissize; tmpinpos 99 | + BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE) 100 | { 101 | getBestBFromData(@in, tmpinpos, bestb, bestexcept); 102 | int tmpbestb = bestb.get(); 103 | int nbrexcept = bestexcept.get(); 104 | int exceptsize = 0; 105 | int remember = tmpoutpos; 106 | tmpoutpos++; 107 | if (nbrexcept > 0) 108 | { 109 | for (int i = 0, c = 0; i < BLOCK_SIZE; ++i) 110 | { 111 | if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0) 112 | { 113 | exceptbuffer[c + nbrexcept] = i; 114 | exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]); 115 | ++c; 116 | } 117 | } 118 | exceptsize = S16.compress(exceptbuffer, 0, 119 | 2 * nbrexcept, @out, tmpoutpos); 120 | tmpoutpos += exceptsize; 121 | } 122 | @out[remember] = tmpbestb | (nbrexcept << 8) 123 | | (exceptsize << 16); 124 | for (int k = 0; k < BLOCK_SIZE; k += 32) 125 | { 126 | BitPacking.fastpack(@in, tmpinpos + k, @out, 127 | tmpoutpos, bits[tmpbestb]); 128 | tmpoutpos += bits[tmpbestb]; 129 | } 130 | } 131 | inpos.set(tmpinpos); 132 | outpos.set(tmpoutpos); 133 | } 134 | 135 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int mynvalue) 136 | { 137 | if (inlength == 0) 138 | return; 139 | mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE); 140 | decodePage(@in, inpos, @out, outpos, mynvalue); 141 | } 142 | 143 | private void decodePage(int[] @in, IntWrapper inpos, int[] @out, IntWrapper outpos, int thissize) 144 | { 145 | int tmpoutpos = outpos.get(); 146 | int tmpinpos = inpos.get(); 147 | 148 | for (int run = 0; run < thissize / BLOCK_SIZE; ++run, tmpoutpos += BLOCK_SIZE) 149 | { 150 | int b = @in[tmpinpos] & 0xFF; 151 | int cexcept = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 152 | int exceptsize = (int)((uint)@in[tmpinpos] >> 16); 153 | ++tmpinpos; 154 | S16.uncompress(@in, tmpinpos, exceptsize, exceptbuffer, 155 | 0, 2 * cexcept); 156 | tmpinpos += exceptsize; 157 | for (int k = 0; k < BLOCK_SIZE; k += 32) 158 | { 159 | BitPacking.fastunpack(@in, tmpinpos, @out, 160 | tmpoutpos + k, bits[b]); 161 | tmpinpos += bits[b]; 162 | } 163 | for (int k = 0; k < cexcept; ++k) 164 | { 165 | @out[tmpoutpos + exceptbuffer[k + cexcept]] |= (exceptbuffer[k] << bits[b]); 166 | } 167 | } 168 | outpos.set(tmpoutpos); 169 | inpos.set(tmpinpos); 170 | } 171 | 172 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 173 | { 174 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 175 | if (inlength == 0) 176 | return; 177 | @out[outpos.get()] = inlength; 178 | outpos.increment(); 179 | headlessCompress(@in, inpos, inlength, @out, outpos); 180 | } 181 | 182 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 183 | { 184 | if (inlength == 0) 185 | return; 186 | int outlength = @in[inpos.get()]; 187 | inpos.increment(); 188 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 189 | } 190 | 191 | public override string ToString() 192 | { 193 | return nameof(NewPFD); 194 | } 195 | } 196 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/NewPFDS16.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * NewPFD/NewPFOR based on Simple16 by Yan et al. 10 | *

11 | * Follows: 12 | *

13 | * H. Yan, S. Ding, T. Suel, Inverted index compression and query processing 14 | * with optimized document ordering, in: WWW 09, 2009, pp. 401-410. 15 | *

16 | * using Simple16 as the secondary coder. 17 | * 18 | * It encodes integers in blocks of 128 integers. For arrays containing 19 | * an arbitrary number of integers, you should use it in conjunction 20 | * with another CODEC: 21 | * 22 | *

IntegerCODEC ic =
 23 |  *   new Composition(new PDFS16(), new VariableByte()).
24 | * 25 | * Note that this does not use differential coding: if you are working on sorted 26 | * lists, you must compute the deltas separately. 27 | * 28 | * For multi-threaded applications, each thread should use its own NewPFDS16 29 | * object. 30 | * 31 | * @author Daniel Lemire 32 | */ 33 | namespace Genbox.CSharpFastPFOR 34 | { 35 | public class NewPFDS16 : IntegerCODEC, SkippableIntegerCODEC 36 | { 37 | private const int BLOCK_SIZE = 128; 38 | 39 | private int[] exceptbuffer = new int[2 * BLOCK_SIZE]; 40 | 41 | /** 42 | * Constructor for the NewPFDS16 CODEC. 43 | */ 44 | public NewPFDS16() 45 | { 46 | } 47 | 48 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 49 | { 50 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 51 | if (inlength == 0) 52 | return; 53 | encodePage(@in, inpos, inlength, @out, outpos); 54 | } 55 | 56 | private static int[] bits = { 0, 1, 2, 3, 4, 5, 57 | 6, 7, 8, 9, 10, 11, 58 | 12, 13, 16, 20, 32 }; 59 | 60 | private static int[] invbits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 61 | 10, 11, 12, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 62 | 16, 16, 16, 16, 16, 16, 16 }; 63 | 64 | private static void getBestBFromData(int[] @in, int pos, IntWrapper bestb, IntWrapper bestexcept) 65 | { 66 | int mb = Util.maxbits(@in, pos, BLOCK_SIZE); 67 | int mini = 0; 68 | if (mini + 28 < bits[invbits[mb]]) 69 | mini = bits[invbits[mb]] - 28; // 28 is the max for 70 | // exceptions 71 | int besti = bits.Length - 1; 72 | int exceptcounter = 0; 73 | for (int i = mini; i < bits.Length - 1; ++i) 74 | { 75 | int tmpcounter = 0; 76 | for (int k = pos; k < BLOCK_SIZE + pos; ++k) 77 | if ((int)((uint)@in[k] >> bits[i]) != 0) 78 | ++tmpcounter; 79 | if (tmpcounter * 10 <= BLOCK_SIZE) 80 | { 81 | besti = i; 82 | exceptcounter = tmpcounter; 83 | break; 84 | } 85 | } 86 | bestb.set(besti); 87 | bestexcept.set(exceptcounter); 88 | } 89 | 90 | private void encodePage(int[] @in, IntWrapper inpos, int thissize, int[] @out, IntWrapper outpos) 91 | { 92 | int tmpoutpos = outpos.get(); 93 | int tmpinpos = inpos.get(); 94 | IntWrapper bestb = new IntWrapper(); 95 | IntWrapper bestexcept = new IntWrapper(); 96 | for (int finalinpos = tmpinpos + thissize; tmpinpos 97 | + BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE) 98 | { 99 | getBestBFromData(@in, tmpinpos, bestb, bestexcept); 100 | int tmpbestb = bestb.get(); 101 | int nbrexcept = bestexcept.get(); 102 | int exceptsize = 0; 103 | int remember = tmpoutpos; 104 | tmpoutpos++; 105 | if (nbrexcept > 0) 106 | { 107 | for (int i = 0, c = 0; i < BLOCK_SIZE; ++i) 108 | { 109 | if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0) 110 | { 111 | exceptbuffer[c + nbrexcept] = i; 112 | exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]); 113 | ++c; 114 | } 115 | } 116 | exceptsize = S16.compress(exceptbuffer, 0, 117 | 2 * nbrexcept, @out, tmpoutpos); 118 | tmpoutpos += exceptsize; 119 | } 120 | @out[remember] = tmpbestb | (nbrexcept << 8) 121 | | (exceptsize << 16); 122 | for (int k = 0; k < BLOCK_SIZE; k += 32) 123 | { 124 | BitPacking.fastpack(@in, tmpinpos + k, @out, 125 | tmpoutpos, bits[tmpbestb]); 126 | tmpoutpos += bits[tmpbestb]; 127 | } 128 | } 129 | inpos.set(tmpinpos); 130 | outpos.set(tmpoutpos); 131 | } 132 | 133 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int mynvalue) 134 | { 135 | if (inlength == 0) 136 | return; 137 | mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE); 138 | decodePage(@in, inpos, @out, outpos, mynvalue); 139 | } 140 | 141 | private void decodePage(int[] @in, IntWrapper inpos, int[] @out, IntWrapper outpos, int thissize) 142 | { 143 | int tmpoutpos = outpos.get(); 144 | int tmpinpos = inpos.get(); 145 | 146 | for (int run = 0; run < thissize / BLOCK_SIZE; ++run, tmpoutpos += BLOCK_SIZE) 147 | { 148 | int b = @in[tmpinpos] & 0xFF; 149 | int cexcept = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 150 | int exceptsize = (int)((uint)@in[tmpinpos] >> 16); 151 | ++tmpinpos; 152 | S16.uncompress(@in, tmpinpos, exceptsize, exceptbuffer, 153 | 0, 2 * cexcept); 154 | tmpinpos += exceptsize; 155 | for (int k = 0; k < BLOCK_SIZE; k += 32) 156 | { 157 | BitPacking.fastunpack(@in, tmpinpos, @out, 158 | tmpoutpos + k, bits[b]); 159 | tmpinpos += bits[b]; 160 | } 161 | for (int k = 0; k < cexcept; ++k) 162 | { 163 | @out[tmpoutpos + exceptbuffer[k + cexcept]] |= (exceptbuffer[k] << bits[b]); 164 | } 165 | } 166 | outpos.set(tmpoutpos); 167 | inpos.set(tmpinpos); 168 | } 169 | 170 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 171 | { 172 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 173 | if (inlength == 0) 174 | return; 175 | @out[outpos.get()] = inlength; 176 | outpos.increment(); 177 | headlessCompress(@in, inpos, inlength, @out, outpos); 178 | } 179 | 180 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 181 | { 182 | if (inlength == 0) 183 | return; 184 | int outlength = @in[inpos.get()]; 185 | inpos.increment(); 186 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 187 | } 188 | 189 | public override string ToString() 190 | { 191 | return nameof(NewPFDS16); 192 | } 193 | } 194 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/NewPFDS9.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * NewPFD/NewPFOR based on Simple9 by Yan et al. 10 | *

11 | * Follows: 12 | *

13 | * H. Yan, S. Ding, T. Suel, Inverted index compression and query processing 14 | * with optimized document ordering, in: WWW 09, 2009, pp. 401-410. 15 | *

16 | * using Simple9 as the secondary coder. 17 | * 18 | * It encodes integers in blocks of 128 integers. For arrays containing 19 | * an arbitrary number of integers, you should use it in conjunction 20 | * with another CODEC: 21 | * 22 | *
IntegerCODEC ic = new Composition(new PDFS9(), new VariableByte()).
23 | * 24 | * Note that this does not use differential coding: if you are working on sorted 25 | * lists, you must compute the deltas separately. 26 | * 27 | * For multi-threaded applications, each thread should use its own NewPFDS9 28 | * object. 29 | * 30 | * @author Daniel Lemire 31 | */ 32 | namespace Genbox.CSharpFastPFOR 33 | { 34 | public class NewPFDS9 : IntegerCODEC, SkippableIntegerCODEC 35 | { 36 | private const int BLOCK_SIZE = 128; 37 | 38 | private int[] exceptbuffer = new int[2 * BLOCK_SIZE]; 39 | 40 | /** 41 | * Constructor for the NewPFDS9 CODEC. 42 | */ 43 | public NewPFDS9() 44 | { 45 | } 46 | 47 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 48 | { 49 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 50 | if (inlength == 0) 51 | return; 52 | encodePage(@in, inpos, inlength, @out, outpos); 53 | } 54 | 55 | private static int[] bits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 56 | 11, 12, 13, 16, 20, 32 }; 57 | private static int[] invbits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 58 | 10, 11, 12, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 59 | 16, 16, 16, 16, 16, 16, 16 }; 60 | 61 | private static void getBestBFromData(int[] @in, int pos, IntWrapper bestb, IntWrapper bestexcept) 62 | { 63 | int mb = Util.maxbits(@in, pos, BLOCK_SIZE); 64 | int mini = 0; 65 | if (mini + 28 < bits[invbits[mb]]) 66 | mini = bits[invbits[mb]] - 28; // 28 is the max for 67 | // exceptions 68 | int besti = bits.Length - 1; 69 | int exceptcounter = 0; 70 | for (int i = mini; i < bits.Length - 1; ++i) 71 | { 72 | int tmpcounter = 0; 73 | for (int k = pos; k < BLOCK_SIZE + pos; ++k) 74 | if ((int)((uint)@in[k] >> bits[i]) != 0) 75 | ++tmpcounter; 76 | if (tmpcounter * 10 <= BLOCK_SIZE) 77 | { 78 | besti = i; 79 | exceptcounter = tmpcounter; 80 | break; 81 | } 82 | } 83 | bestb.set(besti); 84 | bestexcept.set(exceptcounter); 85 | } 86 | 87 | private void encodePage(int[] @in, IntWrapper inpos, int thissize, int[] @out, IntWrapper outpos) 88 | { 89 | int tmpoutpos = outpos.get(); 90 | int tmpinpos = inpos.get(); 91 | IntWrapper bestb = new IntWrapper(); 92 | IntWrapper bestexcept = new IntWrapper(); 93 | for (int finalinpos = tmpinpos + thissize; tmpinpos 94 | + BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE) 95 | { 96 | getBestBFromData(@in, tmpinpos, bestb, bestexcept); 97 | int tmpbestb = bestb.get(); 98 | int nbrexcept = bestexcept.get(); 99 | int exceptsize = 0; 100 | int remember = tmpoutpos; 101 | tmpoutpos++; 102 | if (nbrexcept > 0) 103 | { 104 | for (int i = 0, c = 0; i < BLOCK_SIZE; ++i) 105 | { 106 | if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0) 107 | { 108 | exceptbuffer[c + nbrexcept] = i; 109 | exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]); 110 | ++c; 111 | } 112 | } 113 | exceptsize = S9.compress(exceptbuffer, 0, 114 | 2 * nbrexcept, @out, tmpoutpos); 115 | tmpoutpos += exceptsize; 116 | } 117 | @out[remember] = tmpbestb | (nbrexcept << 8) 118 | | (exceptsize << 16); 119 | for (int k = 0; k < BLOCK_SIZE; k += 32) 120 | { 121 | BitPacking.fastpack(@in, tmpinpos + k, @out, 122 | tmpoutpos, bits[tmpbestb]); 123 | tmpoutpos += bits[tmpbestb]; 124 | } 125 | } 126 | inpos.set(tmpinpos); 127 | outpos.set(tmpoutpos); 128 | } 129 | 130 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int mynvalue) 131 | { 132 | if (inlength == 0) 133 | return; 134 | mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE); 135 | decodePage(@in, inpos, @out, outpos, mynvalue); 136 | } 137 | 138 | private void decodePage(int[] @in, IntWrapper inpos, int[] @out, IntWrapper outpos, int thissize) 139 | { 140 | int tmpoutpos = outpos.get(); 141 | int tmpinpos = inpos.get(); 142 | 143 | for (int run = 0; run < thissize / BLOCK_SIZE; ++run, tmpoutpos += BLOCK_SIZE) 144 | { 145 | int b = @in[tmpinpos] & 0xFF; 146 | int cexcept = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 147 | int exceptsize = (int)((uint)@in[tmpinpos] >> 16); 148 | ++tmpinpos; 149 | S9.uncompress(@in, tmpinpos, exceptsize, exceptbuffer, 150 | 0, 2 * cexcept); 151 | tmpinpos += exceptsize; 152 | for (int k = 0; k < BLOCK_SIZE; k += 32) 153 | { 154 | BitPacking.fastunpack(@in, tmpinpos, @out, 155 | tmpoutpos + k, bits[b]); 156 | tmpinpos += bits[b]; 157 | } 158 | for (int k = 0; k < cexcept; ++k) 159 | { 160 | @out[tmpoutpos + exceptbuffer[k + cexcept]] |= (exceptbuffer[k] << bits[b]); 161 | } 162 | } 163 | outpos.set(tmpoutpos); 164 | inpos.set(tmpinpos); 165 | } 166 | 167 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 168 | { 169 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 170 | if (inlength == 0) 171 | return; 172 | @out[outpos.get()] = inlength; 173 | outpos.increment(); 174 | headlessCompress(@in, inpos, inlength, @out, outpos); 175 | } 176 | 177 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 178 | { 179 | if (inlength == 0) 180 | return; 181 | int outlength = @in[inpos.get()]; 182 | inpos.increment(); 183 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 184 | } 185 | 186 | public override string ToString() 187 | { 188 | return nameof(NewPFDS9); 189 | } 190 | } 191 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/OptPFD.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * OptPFD: fast patching scheme by Yan et al. 10 | *

11 | * Follows: 12 | *

13 | * H. Yan, S. Ding, T. Suel, Inverted index compression and query processing 14 | * with optimized document ordering, in: WWW 09, 2009, pp. 401-410. 15 | *

16 | * using Simple16 as the secondary coder. 17 | * 18 | * It encodes integers in blocks of 128 integers. For arrays containing 19 | * an arbitrary number of integers, you should use it in conjunction 20 | * with another CODEC: 21 | * 22 | *
IntegerCODEC ic = new Composition(new OptPFD(), new VariableByte()).
23 | * 24 | * Note that this does not use differential coding: if you are working on sorted 25 | * lists, you must compute the deltas separately. (Yes, this is true even though 26 | * the "D" at the end of the name probably stands for delta.) 27 | * 28 | * For multi-threaded applications, each thread should use its own OptPFD 29 | * object. 30 | * 31 | * @author Daniel Lemire 32 | */ 33 | namespace Genbox.CSharpFastPFOR 34 | { 35 | public class OptPFD : IntegerCODEC, SkippableIntegerCODEC 36 | { 37 | private const int BLOCK_SIZE = 128; 38 | 39 | private int[] exceptbuffer = new int[2 * BLOCK_SIZE]; 40 | 41 | /** 42 | * Constructor for the OptPFD CODEC. 43 | */ 44 | public OptPFD() 45 | { 46 | } 47 | 48 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 49 | { 50 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 51 | if (inlength == 0) 52 | return; 53 | encodePage(@in, inpos, inlength, @out, outpos); 54 | } 55 | 56 | private static int[] bits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 57 | 11, 12, 13, 16, 20, 32 }; 58 | private static int[] invbits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 59 | 10, 11, 12, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 60 | 16, 16, 16, 16, 16, 16, 16 }; 61 | 62 | private void getBestBFromData(int[] @in, int pos, IntWrapper bestb, IntWrapper bestexcept) 63 | { 64 | int mb = Util.maxbits(@in, pos, BLOCK_SIZE); 65 | int mini = 0; 66 | if (mini + 28 < bits[invbits[mb]]) 67 | mini = bits[invbits[mb]] - 28; // 28 is the max for 68 | // exceptions 69 | int besti = bits.Length - 1; 70 | int bestcost = bits[besti] * 4; 71 | int exceptcounter = 0; 72 | for (int i = mini; i < bits.Length - 1; ++i) 73 | { 74 | int tmpcounter = 0; 75 | for (int k = pos; k < BLOCK_SIZE + pos; ++k) 76 | if ((int)((uint)@in[k] >> bits[i]) != 0) 77 | { 78 | ++tmpcounter; 79 | } 80 | if (tmpcounter == BLOCK_SIZE) 81 | continue; // no need 82 | for (int k = pos, c = 0; k < pos + BLOCK_SIZE; ++k) 83 | if ((int)((uint)@in[k] >> bits[i]) != 0) 84 | { 85 | exceptbuffer[tmpcounter + c] = k - pos; 86 | exceptbuffer[c] = (int)((uint)@in[k] >> bits[i]); 87 | ++c; 88 | } 89 | 90 | int thiscost = bits[i] * 4 + S16.estimatecompress(exceptbuffer, 0, 2 * tmpcounter); 91 | if (thiscost <= bestcost) 92 | { 93 | bestcost = thiscost; 94 | besti = i; 95 | exceptcounter = tmpcounter; 96 | } 97 | } 98 | 99 | bestb.set(besti); 100 | bestexcept.set(exceptcounter); 101 | } 102 | 103 | private void encodePage(int[] @in, IntWrapper inpos, int thissize, int[] @out, IntWrapper outpos) 104 | { 105 | int tmpoutpos = outpos.get(); 106 | int tmpinpos = inpos.get(); 107 | IntWrapper bestb = new IntWrapper(); 108 | IntWrapper bestexcept = new IntWrapper(); 109 | for (int finalinpos = tmpinpos + thissize; tmpinpos + BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE) 110 | { 111 | getBestBFromData(@in, tmpinpos, bestb, bestexcept); 112 | int tmpbestb = bestb.get(); 113 | int nbrexcept = bestexcept.get(); 114 | int exceptsize = 0; 115 | int remember = tmpoutpos; 116 | tmpoutpos++; 117 | if (nbrexcept > 0) 118 | { 119 | int c = 0; 120 | for (int i = 0; i < BLOCK_SIZE; ++i) 121 | { 122 | if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0) 123 | { 124 | exceptbuffer[c + nbrexcept] = i; 125 | exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]); 126 | ++c; 127 | } 128 | } 129 | exceptsize = S16.compress(exceptbuffer, 0, 130 | 2 * nbrexcept, @out, tmpoutpos); 131 | tmpoutpos += exceptsize; 132 | } 133 | @out[remember] = tmpbestb | (nbrexcept << 8) 134 | | (exceptsize << 16); 135 | for (int k = 0; k < BLOCK_SIZE; k += 32) 136 | { 137 | BitPacking.fastpack(@in, tmpinpos + k, @out, 138 | tmpoutpos, bits[tmpbestb]); 139 | tmpoutpos += bits[tmpbestb]; 140 | } 141 | } 142 | inpos.set(tmpinpos); 143 | outpos.set(tmpoutpos); 144 | } 145 | 146 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int mynvalue) 147 | { 148 | if (inlength == 0) 149 | return; 150 | mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE); 151 | decodePage(@in, inpos, @out, outpos, mynvalue); 152 | } 153 | 154 | private void decodePage(int[] @in, IntWrapper inpos, int[] @out, IntWrapper outpos, int thissize) 155 | { 156 | int tmpoutpos = outpos.get(); 157 | int tmpinpos = inpos.get(); 158 | 159 | for (int run = 0; run < thissize / BLOCK_SIZE; ++run, tmpoutpos += BLOCK_SIZE) 160 | { 161 | int b = @in[tmpinpos] & 0xFF; 162 | int cexcept = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 163 | int exceptsize = (int)((uint)@in[tmpinpos] >> 16); 164 | ++tmpinpos; 165 | S16.uncompress(@in, tmpinpos, exceptsize, exceptbuffer, 166 | 0, 2 * cexcept); 167 | tmpinpos += exceptsize; 168 | for (int k = 0; k < BLOCK_SIZE; k += 32) 169 | { 170 | BitPacking.fastunpack(@in, tmpinpos, @out, 171 | tmpoutpos + k, bits[b]); 172 | tmpinpos += bits[b]; 173 | } 174 | for (int k = 0; k < cexcept; ++k) 175 | { 176 | @out[tmpoutpos + exceptbuffer[k + cexcept]] |= (exceptbuffer[k] << bits[b]); 177 | } 178 | } 179 | outpos.set(tmpoutpos); 180 | inpos.set(tmpinpos); 181 | } 182 | 183 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 184 | { 185 | inlength = inlength / BLOCK_SIZE * BLOCK_SIZE; 186 | if (inlength == 0) 187 | return; 188 | @out[outpos.get()] = inlength; 189 | outpos.increment(); 190 | headlessCompress(@in, inpos, inlength, @out, outpos); 191 | } 192 | 193 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 194 | { 195 | if (inlength == 0) 196 | return; 197 | int outlength = @in[inpos.get()]; 198 | inpos.increment(); 199 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 200 | } 201 | 202 | public override string ToString() 203 | { 204 | return nameof(OptPFD); 205 | } 206 | } 207 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/OptPFDS16.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * OptPFD based on Simple16 by Yan et al. 10 | *

11 | * Follows: 12 | *

13 | * H. Yan, S. Ding, T. Suel, Inverted index compression and query processing 14 | * with optimized document ordering, in: WWW 09, 2009, pp. 401-410. 15 | *

16 | * using Simple16 as the secondary coder. 17 | * 18 | * It encodes integers in blocks of 128 integers. For arrays containing 19 | * an arbitrary number of integers, you should use it in conjunction 20 | * with another CODEC: 21 | * 22 | *
IntegerCODEC ic = new Composition(new OptPFDS16(), new VariableByte()).
23 | * 24 | * Note that this does not use differential coding: if you are working on sorted 25 | * lists, you must compute the deltas separately. 26 | * 27 | * For multi-threaded applications, each thread should use its own OptPFDS16 28 | * object. 29 | * 30 | * @author Daniel Lemire 31 | */ 32 | namespace Genbox.CSharpFastPFOR 33 | { 34 | public class OptPFDS16 : IntegerCODEC, SkippableIntegerCODEC 35 | { 36 | private const int BLOCK_SIZE = 128; 37 | private int[] exceptbuffer = new int[2 * BLOCK_SIZE]; 38 | 39 | /** 40 | * Constructor for the OptPFDS16 CODEC. 41 | */ 42 | public OptPFDS16() 43 | { 44 | } 45 | 46 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 47 | { 48 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 49 | if (inlength == 0) 50 | return; 51 | 52 | encodePage(@in, inpos, inlength, @out, outpos); 53 | } 54 | 55 | private static int[] bits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 56 | 11, 12, 13, 16, 20, 32 }; 57 | private static int[] invbits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 58 | 10, 11, 12, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 59 | 16, 16, 16, 16, 16, 16, 16 }; 60 | 61 | private void getBestBFromData(int[] @in, int pos, IntWrapper bestb, IntWrapper bestexcept) 62 | { 63 | int mb = Util.maxbits(@in, pos, BLOCK_SIZE); 64 | int mini = 0; 65 | if (mini + 28 < bits[invbits[mb]]) 66 | mini = bits[invbits[mb]] - 28; // 28 is the max for 67 | // exceptions 68 | int besti = bits.Length - 1; 69 | int bestcost = bits[besti] * 4; 70 | int exceptcounter = 0; 71 | for (int i = mini; i < bits.Length - 1; ++i) 72 | { 73 | int tmpcounter = 0; 74 | for (int k = pos; k < BLOCK_SIZE + pos; ++k) 75 | if ((int)((uint)@in[k] >> bits[i]) != 0) 76 | { 77 | ++tmpcounter; 78 | } 79 | if (tmpcounter == BLOCK_SIZE) 80 | continue; // no need 81 | for (int k = pos, c = 0; k < pos + BLOCK_SIZE; ++k) 82 | if ((int)((uint)@in[k] >> bits[i]) != 0) 83 | { 84 | exceptbuffer[tmpcounter + c] = k - pos; 85 | exceptbuffer[c] = (int)((uint)@in[k] >> bits[i]); 86 | ++c; 87 | } 88 | 89 | int thiscost = bits[i] * 4 + S16.estimatecompress(exceptbuffer, 0, 2 * tmpcounter); 90 | if (thiscost <= bestcost) 91 | { 92 | bestcost = thiscost; 93 | besti = i; 94 | exceptcounter = tmpcounter; 95 | } 96 | } 97 | bestb.set(besti); 98 | bestexcept.set(exceptcounter); 99 | } 100 | 101 | private void encodePage(int[] @in, IntWrapper inpos, int thissize, int[] @out, IntWrapper outpos) 102 | { 103 | int tmpoutpos = outpos.get(); 104 | int tmpinpos = inpos.get(); 105 | IntWrapper bestb = new IntWrapper(); 106 | IntWrapper bestexcept = new IntWrapper(); 107 | for (int finalinpos = tmpinpos + thissize; tmpinpos + BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE) 108 | { 109 | getBestBFromData(@in, tmpinpos, bestb, bestexcept); 110 | int tmpbestb = bestb.get(); 111 | int nbrexcept = bestexcept.get(); 112 | int exceptsize = 0; 113 | int remember = tmpoutpos; 114 | tmpoutpos++; 115 | if (nbrexcept > 0) 116 | { 117 | int c = 0; 118 | for (int i = 0; i < BLOCK_SIZE; ++i) 119 | { 120 | if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0) 121 | { 122 | exceptbuffer[c + nbrexcept] = i; 123 | exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]); 124 | ++c; 125 | } 126 | } 127 | exceptsize = S16.compress(exceptbuffer, 0, 128 | 2 * nbrexcept, @out, tmpoutpos); 129 | tmpoutpos += exceptsize; 130 | } 131 | @out[remember] = tmpbestb | (nbrexcept << 8) 132 | | (exceptsize << 16); 133 | for (int k = 0; k < BLOCK_SIZE; k += 32) 134 | { 135 | BitPacking.fastpack(@in, tmpinpos + k, @out, 136 | tmpoutpos, bits[tmpbestb]); 137 | tmpoutpos += bits[tmpbestb]; 138 | } 139 | } 140 | inpos.set(tmpinpos); 141 | outpos.set(tmpoutpos); 142 | } 143 | 144 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int mynvalue) 145 | { 146 | if (inlength == 0) 147 | return; 148 | mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE); 149 | decodePage(@in, inpos, @out, outpos, mynvalue); 150 | } 151 | 152 | private void decodePage(int[] @in, IntWrapper inpos, int[] @out, IntWrapper outpos, int thissize) 153 | { 154 | int tmpoutpos = outpos.get(); 155 | int tmpinpos = inpos.get(); 156 | 157 | for (int run = 0; run < thissize / BLOCK_SIZE; ++run, tmpoutpos += BLOCK_SIZE) 158 | { 159 | int b = @in[tmpinpos] & 0xFF; 160 | int cexcept = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 161 | int exceptsize = (int)((uint)@in[tmpinpos] >> 16); 162 | ++tmpinpos; 163 | S16.uncompress(@in, tmpinpos, exceptsize, exceptbuffer, 164 | 0, 2 * cexcept); 165 | tmpinpos += exceptsize; 166 | for (int k = 0; k < BLOCK_SIZE; k += 32) 167 | { 168 | BitPacking.fastunpack(@in, tmpinpos, @out, 169 | tmpoutpos + k, bits[b]); 170 | tmpinpos += bits[b]; 171 | } 172 | for (int k = 0; k < cexcept; ++k) 173 | { 174 | @out[tmpoutpos + exceptbuffer[k + cexcept]] |= (exceptbuffer[k] << bits[b]); 175 | } 176 | } 177 | outpos.set(tmpoutpos); 178 | inpos.set(tmpinpos); 179 | } 180 | 181 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 182 | { 183 | inlength = inlength / BLOCK_SIZE * BLOCK_SIZE; 184 | if (inlength == 0) 185 | return; 186 | @out[outpos.get()] = inlength; 187 | outpos.increment(); 188 | headlessCompress(@in, inpos, inlength, @out, outpos); 189 | } 190 | 191 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 192 | { 193 | if (inlength == 0) 194 | return; 195 | int outlength = @in[inpos.get()]; 196 | inpos.increment(); 197 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 198 | } 199 | 200 | public override string ToString() 201 | { 202 | return nameof(OptPFDS16); 203 | } 204 | } 205 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/OptPFDS9.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * OptPFD based on Simple9 by Yan et al. 10 | *

11 | * Follows: 12 | *

13 | * H. Yan, S. Ding, T. Suel, Inverted index compression and query processing 14 | * with optimized document ordering, in: WWW 09, 2009, pp. 401-410. 15 | *

16 | * using Simple9 as the secondary coder. 17 | * 18 | * It encodes integers in blocks of 128 integers. For arrays containing 19 | * an arbitrary number of integers, you should use it in conjunction 20 | * with another CODEC: 21 | * 22 | *
 IntegerCODEC ic = new Composition(new OptPFDS9(), new VariableByte()).
23 | * 24 | * Note that this does not use differential coding: if you are working on sorted 25 | * lists, you must compute the deltas separately. 26 | * 27 | * For multi-threaded applications, each thread should use its own OptPFDS9 28 | * object. 29 | * 30 | * @author Daniel Lemire 31 | */ 32 | namespace Genbox.CSharpFastPFOR 33 | { 34 | public class OptPFDS9 : IntegerCODEC, SkippableIntegerCODEC 35 | { 36 | private const int BLOCK_SIZE = 128; 37 | private int[] exceptbuffer = new int[2 * BLOCK_SIZE]; 38 | 39 | /** 40 | * Constructor for the OptPFDS9 CODEC. 41 | */ 42 | public OptPFDS9() 43 | { 44 | } 45 | 46 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 47 | { 48 | inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); 49 | if (inlength == 0) 50 | return; 51 | encodePage(@in, inpos, inlength, @out, outpos); 52 | } 53 | 54 | private static int[] bits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 55 | 11, 12, 13, 16, 20, 32 }; 56 | private static int[] invbits = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 57 | 10, 11, 12, 13, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 58 | 16, 16, 16, 16, 16, 16, 16 }; 59 | 60 | private void getBestBFromData(int[] @in, int pos, IntWrapper bestb, IntWrapper bestexcept) 61 | { 62 | int mb = Util.maxbits(@in, pos, BLOCK_SIZE); 63 | int mini = 0; 64 | if (mini + 28 < bits[invbits[mb]]) 65 | mini = bits[invbits[mb]] - 28; // 28 is the max for 66 | // exceptions 67 | int besti = bits.Length - 1; 68 | int bestcost = bits[besti] * 4; 69 | int exceptcounter = 0; 70 | for (int i = mini; i < bits.Length - 1; ++i) 71 | { 72 | int tmpcounter = 0; 73 | for (int k = pos; k < BLOCK_SIZE + pos; ++k) 74 | if ((int)((uint)@in[k] >> bits[i]) != 0) 75 | { 76 | ++tmpcounter; 77 | } 78 | if (tmpcounter == BLOCK_SIZE) 79 | continue; // no need 80 | for (int k = pos, c = 0; k < pos + BLOCK_SIZE; ++k) 81 | if ((int)((uint)@in[k] >> bits[i]) != 0) 82 | { 83 | exceptbuffer[tmpcounter + c] = k - pos; 84 | exceptbuffer[c] = (int)((uint)@in[k] >> bits[i]); 85 | ++c; 86 | } 87 | 88 | int thiscost = bits[i] * 4 + S9.estimatecompress(exceptbuffer, 0, 2 * tmpcounter); 89 | if (thiscost <= bestcost) 90 | { 91 | bestcost = thiscost; 92 | besti = i; 93 | exceptcounter = tmpcounter; 94 | } 95 | } 96 | bestb.set(besti); 97 | bestexcept.set(exceptcounter); 98 | } 99 | 100 | private void encodePage(int[] @in, IntWrapper inpos, int thissize, 101 | int[] @out, IntWrapper outpos) 102 | { 103 | int tmpoutpos = outpos.get(); 104 | int tmpinpos = inpos.get(); 105 | IntWrapper bestb = new IntWrapper(); 106 | IntWrapper bestexcept = new IntWrapper(); 107 | for (int finalinpos = tmpinpos + thissize; tmpinpos + BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE) 108 | { 109 | getBestBFromData(@in, tmpinpos, bestb, bestexcept); 110 | int tmpbestb = bestb.get(); 111 | int nbrexcept = bestexcept.get(); 112 | int exceptsize = 0; 113 | int remember = tmpoutpos; 114 | tmpoutpos++; 115 | if (nbrexcept > 0) 116 | { 117 | int c = 0; 118 | for (int i = 0; i < BLOCK_SIZE; ++i) 119 | { 120 | if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0) 121 | { 122 | exceptbuffer[c + nbrexcept] = i; 123 | exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]); 124 | ++c; 125 | } 126 | } 127 | exceptsize = S9.compress(exceptbuffer, 0, 128 | 2 * nbrexcept, @out, tmpoutpos); 129 | tmpoutpos += exceptsize; 130 | } 131 | @out[remember] = tmpbestb | (nbrexcept << 8) 132 | | (exceptsize << 16); 133 | for (int k = 0; k < BLOCK_SIZE; k += 32) 134 | { 135 | BitPacking.fastpack(@in, tmpinpos + k, @out, 136 | tmpoutpos, bits[tmpbestb]); 137 | tmpoutpos += bits[tmpbestb]; 138 | } 139 | } 140 | inpos.set(tmpinpos); 141 | outpos.set(tmpoutpos); 142 | } 143 | 144 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int mynvalue) 145 | { 146 | if (inlength == 0) 147 | return; 148 | mynvalue = Util.greatestMultiple(mynvalue, BLOCK_SIZE); 149 | decodePage(@in, inpos, @out, outpos, mynvalue); 150 | } 151 | 152 | private void decodePage(int[] @in, IntWrapper inpos, int[] @out, IntWrapper outpos, int thissize) 153 | { 154 | int tmpoutpos = outpos.get(); 155 | int tmpinpos = inpos.get(); 156 | 157 | for (int run = 0; run < thissize / BLOCK_SIZE; ++run, tmpoutpos += BLOCK_SIZE) 158 | { 159 | int b = @in[tmpinpos] & 0xFF; 160 | int cexcept = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; 161 | int exceptsize = (int)((uint)@in[tmpinpos] >> 16); 162 | ++tmpinpos; 163 | S9.uncompress(@in, tmpinpos, exceptsize, exceptbuffer, 164 | 0, 2 * cexcept); 165 | tmpinpos += exceptsize; 166 | for (int k = 0; k < BLOCK_SIZE; k += 32) 167 | { 168 | BitPacking.fastunpack(@in, tmpinpos, @out, 169 | tmpoutpos + k, bits[b]); 170 | tmpinpos += bits[b]; 171 | } 172 | for (int k = 0; k < cexcept; ++k) 173 | { 174 | @out[tmpoutpos + exceptbuffer[k + cexcept]] |= (exceptbuffer[k] << bits[b]); 175 | } 176 | } 177 | outpos.set(tmpoutpos); 178 | inpos.set(tmpinpos); 179 | } 180 | 181 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 182 | { 183 | inlength = inlength / BLOCK_SIZE * BLOCK_SIZE; 184 | if (inlength == 0) 185 | return; 186 | @out[outpos.get()] = inlength; 187 | outpos.increment(); 188 | headlessCompress(@in, inpos, inlength, @out, outpos); 189 | } 190 | 191 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 192 | { 193 | if (inlength == 0) 194 | return; 195 | int outlength = @in[inpos.get()]; 196 | inpos.increment(); 197 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 198 | } 199 | 200 | public override string ToString() 201 | { 202 | return nameof(OptPFDS9); 203 | } 204 | } 205 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Port/Arrays.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Linq; 3 | 4 | namespace Genbox.CSharpFastPFOR.Port 5 | { 6 | public class Arrays 7 | { 8 | private static void rangeCheck(int arrayLen, int fromIndex, int toIndex) 9 | { 10 | if (fromIndex > toIndex) 11 | throw new ArgumentOutOfRangeException("fromIndex(" + fromIndex + ") > toIndex(" + toIndex + ")"); 12 | 13 | if (fromIndex < 0) 14 | throw new ArgumentOutOfRangeException(nameof(fromIndex)); 15 | 16 | if (toIndex > arrayLen) 17 | throw new ArgumentOutOfRangeException(nameof(toIndex)); 18 | } 19 | 20 | public static void fill(T[] array, int start, int end, T value) 21 | { 22 | if (array == null) 23 | throw new ArgumentNullException(nameof(array)); 24 | 25 | rangeCheck(array.Length, start, end); 26 | 27 | for (int i = start; i < end; i++) 28 | { 29 | array[i] = value; 30 | } 31 | } 32 | 33 | public static T[] copyOf(T[] original, int newLength) 34 | { 35 | T[] copy = new T[newLength]; 36 | Array.Copy(original, 0, copy, 0, Math.Min(original.Length, newLength)); 37 | return copy; 38 | } 39 | 40 | public static void fill(T[] array, T value) 41 | { 42 | fill(array, 0, array.Length, value); 43 | } 44 | 45 | public static void sort(int[] array) 46 | { 47 | Array.Sort(array); 48 | } 49 | 50 | public static string toString(int[] ints) 51 | { 52 | return string.Join(", ", ints); 53 | } 54 | 55 | public static bool equals(int[] first, int[] second) 56 | { 57 | return first.SequenceEqual(second); 58 | } 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Port/BitSet.cs: -------------------------------------------------------------------------------- 1 | using System.Collections; 2 | 3 | namespace Genbox.CSharpFastPFOR.Port 4 | { 5 | public class BitSet 6 | { 7 | private BitArray _bitArray; 8 | 9 | public BitSet(int max) 10 | { 11 | _bitArray = new BitArray(max); 12 | } 13 | 14 | public bool get(int i) 15 | { 16 | return _bitArray[i]; 17 | } 18 | 19 | public void set(int i) 20 | { 21 | _bitArray[i] = true; 22 | } 23 | 24 | public int nextSetBit(int fromIndex) 25 | { 26 | for (int j = fromIndex; j < _bitArray.Length; j++) 27 | { 28 | if (_bitArray[j]) 29 | return j; 30 | } 31 | 32 | return -1; 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Port/ByteBuffer.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | 3 | namespace Genbox.CSharpFastPFOR.Port 4 | { 5 | public class ByteBuffer 6 | { 7 | public readonly MemoryStream _ms; 8 | private readonly BinaryWriter _bw; 9 | private readonly BinaryReader _br; 10 | private ByteOrder _order; 11 | 12 | public ByteBuffer(int length) 13 | { 14 | _ms = new MemoryStream(); 15 | _ms.SetLength(length); 16 | 17 | _bw = new BinaryWriter(_ms); 18 | _br = new BinaryReader(_ms); 19 | 20 | _order = ByteOrder.BIG_ENDIAN; //To simulate Java 21 | } 22 | 23 | internal static ByteBuffer allocateDirect(int length) 24 | { 25 | return new ByteBuffer(length); 26 | } 27 | 28 | public void order(ByteOrder order) 29 | { 30 | _order = order; 31 | } 32 | 33 | public int position() 34 | { 35 | return (int)_ms.Position; 36 | } 37 | 38 | public void put(sbyte b) 39 | { 40 | _bw.Write(b); 41 | } 42 | 43 | public void flip() 44 | { 45 | _ms.Position = 0; 46 | } 47 | 48 | public IntBuffer asIntBuffer() 49 | { 50 | return new IntBuffer(_ms, _order); 51 | } 52 | 53 | public void clear() 54 | { 55 | byte[] empty = new byte[_ms.Length]; 56 | _ms.Position = 0; 57 | _ms.Write(empty, 0, empty.Length); 58 | _ms.Position = 0; 59 | } 60 | 61 | public sbyte get() 62 | { 63 | return _br.ReadSByte(); 64 | } 65 | 66 | internal void put(int[] src, int offset, int length) 67 | { 68 | //TODO: port this 69 | //checkBounds(offset, length, src.Length); 70 | //if (length > remaining()) 71 | // throw new BufferOverflowException(); 72 | 73 | int end = offset + length; 74 | for (int i = offset; i < end; i++) 75 | _bw.Write(src[i]); 76 | } 77 | } 78 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Port/ByteOrder.cs: -------------------------------------------------------------------------------- 1 | namespace Genbox.CSharpFastPFOR.Port 2 | { 3 | public enum ByteOrder 4 | { 5 | LITTLE_ENDIAN, 6 | BIG_ENDIAN 7 | } 8 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Port/IntBuffer.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | 4 | namespace Genbox.CSharpFastPFOR.Port 5 | { 6 | public class IntBuffer 7 | { 8 | private readonly MemoryStream _ms; 9 | private readonly ByteOrder _order; 10 | 11 | public IntBuffer(MemoryStream ms, ByteOrder order) 12 | { 13 | _ms = ms; 14 | _order = order; 15 | } 16 | 17 | public void get(int[] dst, int offset, int length) 18 | { 19 | int end = offset + length; 20 | 21 | BinaryReader br = new BinaryReader(_ms); 22 | 23 | for (int i = offset; i < end; i++) 24 | { 25 | int value = br.ReadInt32(); 26 | 27 | if (BitConverter.IsLittleEndian && _order == ByteOrder.BIG_ENDIAN) 28 | value = reverseBytes(value); 29 | 30 | dst[i] = value; 31 | } 32 | } 33 | 34 | public static int reverseBytes(int i) 35 | { 36 | return ((int)((uint)i >> 24)) | 37 | ((i >> 8) & 0xFF00) | 38 | ((i << 8) & 0xFF0000) | 39 | ((i << 24)); 40 | } 41 | 42 | private static void checkBounds(int off, int len, int size) 43 | { 44 | if ((off | len | (off + len) | (size - (off + len))) < 0) 45 | throw new IndexOutOfRangeException(); 46 | } 47 | } 48 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Port/Integer.cs: -------------------------------------------------------------------------------- 1 | namespace Genbox.CSharpFastPFOR.Port 2 | { 3 | public class Integer 4 | { 5 | private int v; 6 | 7 | public Integer(int v) 8 | { 9 | this.v = v; 10 | } 11 | 12 | public static int numberOfLeadingZeros(int x) 13 | { 14 | x |= (x >> 1); 15 | x |= (x >> 2); 16 | x |= (x >> 4); 17 | x |= (x >> 8); 18 | x |= (x >> 16); 19 | return (sizeof(int) * 8 - Ones(x)); 20 | } 21 | 22 | public static int Ones(int x) 23 | { 24 | x -= ((x >> 1) & 0x55555555); 25 | x = (((x >> 2) & 0x33333333) + (x & 0x33333333)); 26 | x = (((x >> 4) + x) & 0x0f0f0f0f); 27 | x += (x >> 8); 28 | x += (x >> 16); 29 | return (x & 0x0000003f); 30 | } 31 | } 32 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/S16.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Version of Simple16 for NewPFD and OptPFD. 10 | *

11 | * Adapted by D. Lemire from the Apache Lucene project. 12 | *

13 | */ 14 | 15 | using System; 16 | 17 | namespace Genbox.CSharpFastPFOR 18 | { 19 | public class S16 20 | { 21 | /** 22 | * Compress an integer array using Simple16 23 | * 24 | * 25 | * @param in 26 | * array to compress 27 | * @param currentPos 28 | * where to start reading 29 | * @param inlength 30 | * how many integers to read 31 | * @param out output array 32 | * @param tmpoutpos location in the output array 33 | * @return the number of 32-bit words written (in compressed form) 34 | */ 35 | public static int compress(int[] @in, int currentPos, int inlength, int[] @out, int tmpoutpos) 36 | { 37 | int outpos = tmpoutpos; 38 | int finalin = currentPos + inlength; 39 | while (currentPos < finalin) 40 | { 41 | int inoffset = compressblock(@out, outpos++, @in, 42 | currentPos, inlength); 43 | if (inoffset == -1) 44 | throw new Exception("Too big a number"); 45 | currentPos += inoffset; 46 | inlength -= inoffset; 47 | } 48 | return outpos - tmpoutpos; 49 | } 50 | 51 | /** 52 | * Estimate size of the compressed output. 53 | * 54 | * @param in 55 | * array to compress 56 | * @param currentPos 57 | * where to start reading 58 | * @param inlength 59 | * how many integers to read 60 | * @return estimated size of the output (in 32-bit integers) 61 | */ 62 | public static int estimatecompress(int[] @in, int currentPos, int inlength) 63 | { 64 | int finalin = currentPos + inlength; 65 | int counter = 0; 66 | while (currentPos < finalin) 67 | { 68 | int inoffset = fakecompressblock(@in, currentPos, 69 | inlength); 70 | if (inoffset == -1) 71 | throw new Exception("Too big a number"); 72 | currentPos += inoffset; 73 | inlength -= inoffset; 74 | ++counter; 75 | } 76 | return counter; 77 | } 78 | 79 | /** 80 | * Compress an integer array using Simple16 81 | * 82 | * @param out 83 | * the compressed output 84 | * @param outOffset 85 | * the offset of the output in the number of integers 86 | * @param in 87 | * the integer input array 88 | * @param inOffset 89 | * the offset of the input in the number of integers 90 | * @param n 91 | * the number of elements to be compressed 92 | * @return the size of the outputs in 32-bit integers 93 | * 94 | */ 95 | public static int compressblock(int[] @out, int outOffset, int[] @in, int inOffset, int n) 96 | { 97 | int numIdx, j, num, bits; 98 | for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++) 99 | { 100 | @out[outOffset] = numIdx << S16_BITSSIZE; 101 | num = (S16_NUM[numIdx] < n) ? S16_NUM[numIdx] : n; 102 | 103 | for (j = 0, bits = 0; (j < num) 104 | && (@in[inOffset + j] < SHIFTED_S16_BITS[numIdx][j]);) 105 | { 106 | @out[outOffset] |= (@in[inOffset + j] << bits); 107 | bits += S16_BITS[numIdx][j]; 108 | j++; 109 | } 110 | 111 | if (j == num) 112 | { 113 | return num; 114 | } 115 | } 116 | 117 | return -1; 118 | } 119 | 120 | private static int fakecompressblock(int[] @in, int inOffset, int n) 121 | { 122 | int numIdx, j, num; 123 | for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++) 124 | { 125 | num = (S16_NUM[numIdx] < n) ? S16_NUM[numIdx] : n; 126 | 127 | for (j = 0; (j < num) 128 | && (@in[inOffset + j] < SHIFTED_S16_BITS[numIdx][j]);) 129 | { 130 | j++; 131 | } 132 | 133 | if (j == num) 134 | { 135 | return num; 136 | } 137 | } 138 | 139 | return -1; 140 | } 141 | 142 | /** 143 | * Decompress an integer array using Simple16 144 | * 145 | * @param out 146 | * the decompressed output 147 | * @param outOffset 148 | * the offset of the output in the number of integers 149 | * @param in 150 | * the compressed input array 151 | * @param inOffset 152 | * the offset of the input in the number of integers 153 | * @param n 154 | * the number of elements to be compressed 155 | * @return the number of processed integers 156 | */ 157 | public static int decompressblock(int[] @out, int outOffset, int[] @in, int inOffset, int n) 158 | { 159 | int numIdx, j = 0, bits = 0; 160 | numIdx = (int)((uint)@in[inOffset] >> S16_BITSSIZE); 161 | int num = S16_NUM[numIdx] < n ? S16_NUM[numIdx] : n; 162 | for (j = 0, bits = 0; j < num; j++) 163 | { 164 | @out[outOffset + j] = (int)((uint)@in[inOffset] >> bits) & (int)((uint)0xffffffff >> (32 - S16_BITS[numIdx][j])); 165 | bits += S16_BITS[numIdx][j]; 166 | } 167 | return num; 168 | } 169 | 170 | /** 171 | * Uncompressed data from an input array into an output array 172 | * 173 | * @param in input array (in compressed form) 174 | * @param tmpinpos starting location in the compressed input array 175 | * @param inlength how much data we wish the read (in 32-bit words) 176 | * @param out output array (in decompressed form) 177 | * @param currentPos current position in the output array 178 | * @param outlength available data in the output array 179 | */ 180 | public static void uncompress(int[] @in, int tmpinpos, int inlength, int[] @out, int currentPos, int outlength) 181 | { 182 | int pos = tmpinpos + inlength; 183 | while (tmpinpos < pos) 184 | { 185 | int howmany = decompressblock(@out, currentPos, 186 | @in, tmpinpos, outlength); 187 | outlength -= howmany; 188 | currentPos += howmany; 189 | tmpinpos += 1; 190 | } 191 | } 192 | 193 | private static int[][] shiftme(int[][] x) 194 | { 195 | int[][] answer = new int[x.Length][]; 196 | for (int k = 0; k < x.Length; ++k) 197 | { 198 | answer[k] = new int[x[k].Length]; 199 | for (int z = 0; z < answer[k].Length; ++z) 200 | answer[k][z] = 1 << x[k][z]; 201 | } 202 | return answer; 203 | } 204 | 205 | private const int S16_NUMSIZE = 16; 206 | private const int S16_BITSSIZE = 28; 207 | // the possible number of bits used to represent one integer 208 | private static int[] S16_NUM = { 28, 21, 21, 21, 14, 9, 8, 7, 6, 6, 5, 5, 4, 3, 2, 1 }; 209 | // the corresponding number of elements for each value of the number of 210 | // bits 211 | private static int[][] S16_BITS = { 212 | new[]{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 213 | new[]{ 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 214 | new[]{ 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1 }, 215 | new[]{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2 }, 216 | new[]{ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, 217 | new[]{ 4, 3, 3, 3, 3, 3, 3, 3, 3 }, new[] { 3, 4, 4, 4, 4, 3, 3, 3 }, 218 | new[]{ 4, 4, 4, 4, 4, 4, 4 }, new[]{ 5, 5, 5, 5, 4, 4 }, 219 | new[]{ 4, 4, 5, 5, 5, 5 }, new[]{ 6, 6, 6, 5, 5 }, new[]{ 5, 5, 6, 6, 6 }, 220 | new[]{ 7, 7, 7, 7 }, new[]{ 10, 9, 9, }, new[]{ 14, 14 }, new[]{ 28 } }; 221 | private static int[][] SHIFTED_S16_BITS = shiftme(S16_BITS); 222 | 223 | public override string ToString() 224 | { 225 | return nameof(S16); 226 | } 227 | } 228 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Simple16.cs: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | /** 7 | * This is an implementation of the popular Simple16 scheme. It is limited to 8 | * 28-bit integers (between 0 and 2^28-1). 9 | * 10 | * Note that this does not use differential coding: if you are working on sorted 11 | * lists, you must compute the deltas separately. 12 | * 13 | *

14 | * Adapted by D. Lemire from the Apache Lucene project. 15 | *

16 | */ 17 | 18 | using System; 19 | 20 | namespace Genbox.CSharpFastPFOR 21 | { 22 | public class Simple16 : IntegerCODEC, SkippableIntegerCODEC 23 | { 24 | 25 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 26 | { 27 | int i_inpos = inpos.get(); 28 | int i_outpos = outpos.get(); 29 | int finalin = i_inpos + inlength; 30 | while (i_inpos < finalin) 31 | { 32 | int inoffset = compressblock(@out, i_outpos++, @in, i_inpos, inlength); 33 | if (inoffset == -1) 34 | throw new Exception("Too big a number"); 35 | i_inpos += inoffset; 36 | inlength -= inoffset; 37 | } 38 | inpos.set(i_inpos); 39 | outpos.set(i_outpos); 40 | } 41 | 42 | /** 43 | * Compress an integer array using Simple16 44 | * 45 | * @param out 46 | * the compressed output 47 | * @param outOffset 48 | * the offset of the output in the number of integers 49 | * @param in 50 | * the integer input array 51 | * @param inOffset 52 | * the offset of the input in the number of integers 53 | * @param n 54 | * the number of elements to be compressed 55 | * @return the number of compressed integers 56 | */ 57 | public static int compressblock(int[] @out, int outOffset, int[] @in, int inOffset, int n) 58 | { 59 | int numIdx, j, num, bits; 60 | for (numIdx = 0; numIdx < S16_NUMSIZE; numIdx++) 61 | { 62 | @out[outOffset] = numIdx << S16_BITSSIZE; 63 | num = (S16_NUM[numIdx] < n) ? S16_NUM[numIdx] : n; 64 | 65 | for (j = 0, bits = 0; (j < num) 66 | && (@in[inOffset + j] < SHIFTED_S16_BITS[numIdx][j]);) 67 | { 68 | @out[outOffset] |= (@in[inOffset + j] << bits); 69 | bits += S16_BITS[numIdx][j]; 70 | j++; 71 | } 72 | 73 | if (j == num) 74 | { 75 | return num; 76 | } 77 | } 78 | 79 | return -1; 80 | } 81 | 82 | /** 83 | * Decompress an integer array using Simple16 84 | * 85 | * @param out 86 | * the decompressed output 87 | * @param outOffset 88 | * the offset of the output in the number of integers 89 | * @param in 90 | * the compressed input array 91 | * @param inOffset 92 | * the offset of the input in the number of integers 93 | * @param n 94 | * the number of elements to be compressed 95 | * @return the number of processed integers 96 | */ 97 | public static int decompressblock(int[] @out, int outOffset, int[] @in, int inOffset, int n) 98 | { 99 | int numIdx, j = 0, bits = 0; 100 | numIdx = (int)((uint)@in[inOffset] >> S16_BITSSIZE); 101 | int num = S16_NUM[numIdx] < n ? S16_NUM[numIdx] : n; 102 | for (j = 0, bits = 0; j < num; j++) 103 | { 104 | @out[outOffset + j] = (int)((uint)@in[inOffset] >> bits) & (int)((uint)0xffffffff >> (32 - S16_BITS[numIdx][j])); 105 | bits += S16_BITS[numIdx][j]; 106 | } 107 | return num; 108 | } 109 | 110 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num) 111 | { 112 | int i_inpos = inpos.get(); 113 | int i_outpos = outpos.get(); 114 | while (num > 0) 115 | { 116 | int howmany = decompressblock(@out, i_outpos, @in, i_inpos, num); 117 | num -= howmany; 118 | i_outpos += howmany; 119 | i_inpos++; 120 | } 121 | inpos.set(i_inpos); 122 | outpos.set(i_outpos); 123 | } 124 | 125 | /** 126 | * Uncompress data from an array to another array. 127 | * 128 | * Both inpos and outpos parameters are modified to indicate new positions 129 | * after read/write. 130 | * 131 | * @param in 132 | * array containing data in compressed form 133 | * @param tmpinpos 134 | * where to start reading in the array 135 | * @param inlength 136 | * length of the compressed data (ignored by some schemes) 137 | * @param out 138 | * array where to write the compressed output 139 | * @param currentPos 140 | * where to write the compressed output in out 141 | * @param outlength 142 | * number of integers we want to decode 143 | */ 144 | public static void uncompress(int[] @in, int tmpinpos, int inlength, int[] @out, int currentPos, int outlength) 145 | { 146 | int pos = tmpinpos + inlength; 147 | while (tmpinpos < pos) 148 | { 149 | int howmany = decompressblock(@out, currentPos, @in, tmpinpos, 150 | outlength); 151 | outlength -= howmany; 152 | currentPos += howmany; 153 | tmpinpos += 1; 154 | } 155 | } 156 | 157 | private static int[][] shiftme(int[][] x) 158 | { 159 | int[][] answer = new int[x.Length][]; 160 | for (int k = 0; k < x.Length; ++k) 161 | { 162 | answer[k] = new int[x[k].Length]; 163 | for (int z = 0; z < answer[k].Length; ++z) 164 | answer[k][z] = 1 << x[k][z]; 165 | } 166 | return answer; 167 | } 168 | 169 | public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 170 | { 171 | if (inlength == 0) 172 | return; 173 | @out[outpos.get()] = inlength; 174 | outpos.increment(); 175 | headlessCompress(@in, inpos, inlength, @out, outpos); 176 | } 177 | 178 | public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 179 | { 180 | if (inlength == 0) 181 | return; 182 | int outlength = @in[inpos.get()]; 183 | inpos.increment(); 184 | headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); 185 | 186 | } 187 | 188 | private const int S16_NUMSIZE = 16; 189 | private const int S16_BITSSIZE = 28; 190 | // the possible number of bits used to represent one integer 191 | private static int[] S16_NUM = { 28, 21, 21, 21, 14, 9, 8, 7, 6, 6, 5, 5, 4, 3, 2, 1 }; 192 | // the corresponding number of elements for each value of the number of bits 193 | private static int[][] S16_BITS = { 194 | new[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1 }, 195 | new[]{ 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, 196 | new[]{ 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1 }, 197 | new[]{ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2 }, 198 | new[]{ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }, 199 | new[]{ 4, 3, 3, 3, 3, 3, 3, 3, 3 }, new[] { 3, 4, 4, 4, 4, 3, 3, 3 }, 200 | new[]{ 4, 4, 4, 4, 4, 4, 4 }, new[]{ 5, 5, 5, 5, 4, 4 }, 201 | new[]{ 4, 4, 5, 5, 5, 5 }, new[] { 6, 6, 6, 5, 5 }, new[] { 5, 5, 6, 6, 6 }, 202 | new[]{ 7, 7, 7, 7 }, new[] { 10, 9, 9, }, new[]{ 14, 14 }, new[]{ 28 } }; 203 | private static int[][] SHIFTED_S16_BITS = shiftme(S16_BITS); 204 | 205 | public override string ToString() 206 | { 207 | return nameof(Simple16); 208 | } 209 | } 210 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/SkippableComposition.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This is code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Helper class to compose schemes. 10 | * 11 | * @author Daniel Lemire 12 | */ 13 | 14 | namespace Genbox.CSharpFastPFOR 15 | { 16 | public class SkippableComposition : SkippableIntegerCODEC 17 | { 18 | private SkippableIntegerCODEC F1; 19 | private SkippableIntegerCODEC F2; 20 | 21 | /** 22 | * Compose a scheme from a first one (f1) and a second one (f2). The first 23 | * one is called first and then the second one tries to compress whatever 24 | * remains from the first run. 25 | * 26 | * By convention, the first scheme should be such that if, during decoding, 27 | * a 32-bit zero is first encountered, then there is no output. 28 | * 29 | * @param f1 30 | * first codec 31 | * @param f2 32 | * second codec 33 | */ 34 | public SkippableComposition(SkippableIntegerCODEC f1, SkippableIntegerCODEC f2) 35 | { 36 | F1 = f1; 37 | F2 = f2; 38 | } 39 | 40 | public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) 41 | { 42 | int init = inpos.get(); 43 | F1.headlessCompress(@in, inpos, inlength, @out, outpos); 44 | inlength -= inpos.get() - init; 45 | F2.headlessCompress(@in, inpos, inlength, @out, outpos); 46 | } 47 | 48 | public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num) 49 | { 50 | int init = inpos.get(); 51 | F1.headlessUncompress(@in, inpos, inlength, @out, outpos, num); 52 | inlength -= inpos.get() - init; 53 | num -= outpos.get(); 54 | F2.headlessUncompress(@in, inpos, inlength, @out, outpos, num); 55 | } 56 | 57 | public override string ToString() 58 | { 59 | return F1 + "+" + F2; 60 | } 61 | } 62 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/SkippableIntegerCODEC.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This is code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * Interface describing a standard CODEC to compress integers. This is a 10 | * variation on the IntegerCODEC interface meant to be used for random access. 11 | * 12 | * The main difference is that we must specify the number of integers we wish to 13 | * decode. This information should be stored elsewhere. 14 | * 15 | * This interface was designed by the Terrier team for their search engine. 16 | * 17 | * @author Daniel Lemire 18 | * 19 | */ 20 | namespace Genbox.CSharpFastPFOR 21 | { 22 | public interface SkippableIntegerCODEC 23 | { 24 | /** 25 | * Compress data from an array to another array. 26 | * 27 | * Both inpos and outpos are modified to represent how much data was read 28 | * and written to if 12 ints (inlength = 12) are compressed to 3 ints, then 29 | * inpos will be incremented by 12 while outpos will be incremented by 3 we 30 | * use IntWrapper to pass the values by reference. 31 | * 32 | * @param in 33 | * input array 34 | * @param inpos 35 | * location in the input array 36 | * @param inlength 37 | * how many integers to compress 38 | * @param out 39 | * output array 40 | * @param outpos 41 | * where to write in the output array 42 | */ 43 | void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos); 44 | 45 | /** 46 | * Uncompress data from an array to another array. 47 | * 48 | * Both inpos and outpos parameters are modified to indicate new positions 49 | * after read/write. 50 | * 51 | * @param in 52 | * array containing data in compressed form 53 | * @param inpos 54 | * where to start reading in the array 55 | * @param inlength 56 | * length of the compressed data (ignored by some schemes) 57 | * @param out 58 | * array where to write the compressed output 59 | * @param outpos 60 | * where to write the compressed output in out 61 | * @param num 62 | * number of integers we want to decode, the actual number of integers decoded can be less 63 | */ 64 | void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num); 65 | } 66 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Synth/ClusteredDataGenerator.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * This class will generate lists of random integers based on the clustered 10 | * model: 11 | * 12 | * Reference: Vo Ngoc Anh and Alistair Moffat. 2010. Index compression using 13 | * 64-bit words. Softw. Pract. Exper.40, 2 (February 2010), 131-147. 14 | * 15 | * @author Daniel Lemire 16 | */ 17 | 18 | namespace Genbox.CSharpFastPFOR.Synth 19 | { 20 | public class ClusteredDataGenerator 21 | { 22 | private readonly UniformDataGenerator unidg = new UniformDataGenerator(); 23 | 24 | /** 25 | * Creating random array generator. 26 | */ 27 | public ClusteredDataGenerator() 28 | { 29 | } 30 | 31 | private void fillUniform(int[] array, int offset, int length, int Min, int Max) 32 | { 33 | int[] v = this.unidg.generateUniform(length, Max - Min); 34 | for (int k = 0; k < v.Length; ++k) 35 | array[k + offset] = Min + v[k]; 36 | } 37 | 38 | private void fillClustered(int[] array, int offset, int length, int Min, int Max) 39 | { 40 | int range = Max - Min; 41 | if ((range == length) || (length <= 10)) 42 | { 43 | fillUniform(array, offset, length, Min, Max); 44 | return; 45 | } 46 | int cut = length / 2 + ((range - length - 1 > 0) ? this.unidg.rand.Next(range - length - 1) : 0); 47 | double p = this.unidg.rand.NextDouble(); 48 | if (p < 0.25) 49 | { 50 | fillUniform(array, offset, length / 2, Min, Min + cut); 51 | fillClustered(array, offset + length / 2, length - length / 2, Min + cut, Max); 52 | } 53 | else if (p < 0.5) 54 | { 55 | fillClustered(array, offset, length / 2, Min, Min + cut); 56 | fillUniform(array, offset + length / 2, length - length / 2, Min + cut, Max); 57 | } 58 | else 59 | { 60 | fillClustered(array, offset, length / 2, Min, Min + cut); 61 | fillClustered(array, offset + length / 2, length - length / 2, Min + cut, Max); 62 | } 63 | } 64 | 65 | /** 66 | * generates randomly N distinct integers from 0 to Max. 67 | * 68 | * @param N 69 | * number of integers to generate 70 | * @param Max 71 | * maximal value of the integers 72 | * @return array containing the integers 73 | */ 74 | public int[] generateClustered(int N, int Max) 75 | { 76 | int[] array = new int[N]; 77 | fillClustered(array, 0, N, 0, Max); 78 | return array; 79 | } 80 | } 81 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Synth/UniformDataGenerator.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | /** 9 | * This class will generate "uniform" lists of random integers. 10 | * 11 | * @author Daniel Lemire 12 | */ 13 | 14 | using System; 15 | using System.Collections.Generic; 16 | using Genbox.CSharpFastPFOR.Port; 17 | 18 | namespace Genbox.CSharpFastPFOR.Synth 19 | { 20 | public class UniformDataGenerator 21 | { 22 | /** 23 | * construct generator of random arrays. 24 | */ 25 | public UniformDataGenerator() 26 | { 27 | this.rand = new Random(); 28 | } 29 | 30 | /** 31 | * @param seed 32 | * random seed 33 | */ 34 | public UniformDataGenerator(int seed) 35 | { 36 | this.rand = new Random(seed); 37 | } 38 | 39 | /** 40 | * generates randomly N distinct integers from 0 to Max. 41 | */ 42 | 43 | private int[] generateUniformHash(int N, int Max) 44 | { 45 | if (N > Max) 46 | throw new Exception("not possible"); 47 | 48 | int[] ans = new int[N]; 49 | HashSet s = new HashSet(); 50 | while (s.Count < N) 51 | s.Add(rand.Next(Max)); 52 | 53 | //Iterator i = s.iterator(); 54 | HashSet.Enumerator i = s.GetEnumerator(); 55 | 56 | for (int k = 0; k < N; ++k) 57 | { 58 | ans[k] = i.Current; 59 | i.MoveNext(); 60 | } 61 | 62 | Arrays.sort(ans); 63 | return ans; 64 | } 65 | 66 | /** 67 | * output all integers from the range [0,Max) that are not in the array 68 | */ 69 | private static int[] negate(int[] x, int Max) 70 | { 71 | int[] ans = new int[Max - x.Length]; 72 | int i = 0; 73 | int c = 0; 74 | for (int j = 0; j < x.Length; ++j) 75 | { 76 | int v = x[j]; 77 | for (; i < v; ++i) 78 | ans[c++] = i; 79 | ++i; 80 | } 81 | while (c < ans.Length) 82 | ans[c++] = i++; 83 | return ans; 84 | } 85 | 86 | /** 87 | * generates randomly N distinct integers from 0 to Max. 88 | * 89 | * @param N 90 | * number of integers to generate 91 | * @param Max 92 | * bound on the value of integers 93 | * @return an array containing randomly selected integers 94 | */ 95 | public int[] generateUniform(int N, int Max) 96 | { 97 | if (N * 2 > Max) 98 | { 99 | return negate(generateUniform(Max - N, Max), Max); 100 | } 101 | if (2048 * N > Max) 102 | return generateUniformBitmap(N, Max); 103 | return generateUniformHash(N, Max); 104 | } 105 | 106 | /** 107 | * generates randomly N distinct integers from 0 to Max. 108 | */ 109 | private int[] generateUniformBitmap(int N, int Max) 110 | { 111 | if (N > Max) 112 | throw new Exception("not possible"); 113 | int[] ans = new int[N]; 114 | BitSet bs = new BitSet(Max); 115 | int cardinality = 0; 116 | while (cardinality < N) 117 | { 118 | int v = rand.Next(Max); 119 | if (!bs.get(v)) 120 | { 121 | bs.set(v); 122 | cardinality++; 123 | } 124 | } 125 | int pos = 0; 126 | for (int i = bs.nextSetBit(0); i >= 0; i = bs.nextSetBit(i + 1)) 127 | { 128 | ans[pos++] = i; 129 | } 130 | return ans; 131 | } 132 | 133 | public readonly Random rand = new Random(); 134 | } 135 | } -------------------------------------------------------------------------------- /src/CSharpFastPFOR/Util.cs: -------------------------------------------------------------------------------- 1 | /** 2 | * This code is released under the 3 | * Apache License Version 2.0 http://www.apache.org/licenses/. 4 | * 5 | * (c) Daniel Lemire, http://lemire.me/en/ 6 | */ 7 | 8 | 9 | /** 10 | * Routine utility functions. 11 | * 12 | * @author Daniel Lemire 13 | * 14 | */ 15 | 16 | using System; 17 | using Genbox.CSharpFastPFOR.Port; 18 | 19 | namespace Genbox.CSharpFastPFOR 20 | { 21 | public class Util 22 | { 23 | /** 24 | * Compute the maximum of the integer logarithms (ceil(log(x+1)) of a range 25 | * of value 26 | * 27 | * @param i 28 | * source array 29 | * @param pos 30 | * starting position 31 | * @param length 32 | * number of integers to consider 33 | * @return integer logarithm 34 | */ 35 | public static int maxbits(int[] i, int pos, int length) 36 | { 37 | int mask = 0; 38 | for (int k = pos; k < pos + length; ++k) 39 | mask |= i[k]; 40 | return bits(mask); 41 | } 42 | 43 | public static int maxbits32(int[] i, int pos) 44 | { 45 | int mask = i[pos]; 46 | mask |= i[pos + 1]; 47 | mask |= i[pos + 2]; 48 | mask |= i[pos + 3]; 49 | mask |= i[pos + 4]; 50 | mask |= i[pos + 5]; 51 | mask |= i[pos + 6]; 52 | mask |= i[pos + 7]; 53 | mask |= i[pos + 8]; 54 | mask |= i[pos + 9]; 55 | mask |= i[pos + 10]; 56 | mask |= i[pos + 11]; 57 | mask |= i[pos + 12]; 58 | mask |= i[pos + 13]; 59 | mask |= i[pos + 14]; 60 | mask |= i[pos + 15]; 61 | mask |= i[pos + 16]; 62 | mask |= i[pos + 17]; 63 | mask |= i[pos + 18]; 64 | mask |= i[pos + 19]; 65 | mask |= i[pos + 20]; 66 | mask |= i[pos + 21]; 67 | mask |= i[pos + 22]; 68 | mask |= i[pos + 23]; 69 | mask |= i[pos + 24]; 70 | mask |= i[pos + 25]; 71 | mask |= i[pos + 26]; 72 | mask |= i[pos + 27]; 73 | mask |= i[pos + 28]; 74 | mask |= i[pos + 29]; 75 | mask |= i[pos + 30]; 76 | mask |= i[pos + 31]; 77 | return bits(mask); 78 | } 79 | 80 | /** 81 | * Compute the maximum of the integer logarithms (ceil(log(x+1)) of a the 82 | * successive differences (deltas) of a range of value 83 | * 84 | * @param initoffset 85 | * initial vallue for the computation of the deltas 86 | * @param i 87 | * source array 88 | * @param pos 89 | * starting position 90 | * @param length 91 | * number of integers to consider 92 | * @return integer logarithm 93 | */ 94 | public static int maxdiffbits(int initoffset, int[] i, int pos, int length) 95 | { 96 | int mask = 0; 97 | mask |= (i[pos] - initoffset); 98 | for (int k = pos + 1; k < pos + length; ++k) 99 | { 100 | mask |= i[k] - i[k - 1]; 101 | } 102 | return bits(mask); 103 | } 104 | 105 | /** 106 | * Compute the integer logarithms (ceil(log(x+1)) of a value 107 | * 108 | * @param i 109 | * source value 110 | * @return integer logarithm 111 | */ 112 | public static int bits(int i) 113 | { 114 | return 32 - Integer.numberOfLeadingZeros(i); 115 | } 116 | 117 | public static int packsize(int num, int b) 118 | { 119 | if (b > 16) 120 | return num; 121 | int howmanyfit = 32 / b; 122 | return (num + howmanyfit - 1) / howmanyfit; 123 | } 124 | 125 | public static int pack(int[] outputarray, int arraypos, int[] data, int datapos, int num, int b) 126 | { 127 | if (num == 0) 128 | return arraypos; 129 | if (b > 16) 130 | { 131 | Array.Copy(data, datapos, outputarray, arraypos, num); 132 | return num + arraypos; 133 | } 134 | for (int k = 0; k < packsize(num, b); ++k) 135 | outputarray[k + arraypos] = 0; 136 | int inwordpointer = 0; 137 | for (int k = 0; k < num; ++k) 138 | { 139 | outputarray[arraypos] |= (data[k + datapos] << inwordpointer); 140 | inwordpointer += b; 141 | int increment = ((inwordpointer + b - 1) >> 5); 142 | arraypos += increment; 143 | inwordpointer &= ~(-increment); 144 | } 145 | return arraypos + (inwordpointer > 0 ? 1 : 0); 146 | } 147 | 148 | public static int unpack(int[] sourcearray, int arraypos, int[] data, int datapos, int num, int b) 149 | { 150 | if (b > 16) 151 | { 152 | Array.Copy(sourcearray, arraypos, data, 0, num); 153 | return num + arraypos; 154 | } 155 | int mask = (1 << b) - 1; 156 | int inwordpointer = 0; 157 | for (int k = 0; k < num; ++k) 158 | { 159 | data[k + datapos] = (int)(((uint)sourcearray[arraypos] >> inwordpointer) & mask); 160 | inwordpointer += b; 161 | int increment = ((inwordpointer + b - 1) >> 5); 162 | arraypos += increment; 163 | inwordpointer &= ~(-increment); 164 | } 165 | return arraypos + (inwordpointer > 0 ? 1 : 0); 166 | } 167 | 168 | public static int packsizew(int num, int b) 169 | { 170 | int howmanyfit = 32 / b; 171 | if (num <= howmanyfit) 172 | return 1; 173 | return num; 174 | } 175 | 176 | public static int packw(int[] outputarray, int arraypos, int[] data, int num, int b) 177 | { 178 | int howmanyfit = 32 / b; 179 | if (num > howmanyfit) 180 | { 181 | Array.Copy(data, 0, outputarray, arraypos, num); 182 | return num + arraypos; 183 | } 184 | outputarray[arraypos] = 0; 185 | int inwordpointer = 0; 186 | for (int k = 0; k < num; ++k) 187 | { 188 | outputarray[arraypos] |= (data[k] << inwordpointer); 189 | inwordpointer += b; 190 | } 191 | return arraypos + 1; 192 | } 193 | 194 | public static int unpackw(int[] sourcearray, int arraypos, int[] data, int num, int b) 195 | { 196 | int howmanyfit = 32 / b; 197 | if (num > howmanyfit) 198 | { 199 | Array.Copy(sourcearray, arraypos, data, 0, num); 200 | return num + arraypos; 201 | } 202 | int mask = (1 << b) - 1; 203 | int val = sourcearray[arraypos]; 204 | for (int k = 0; k < num; ++k) 205 | { 206 | data[k] = (val & mask); 207 | val >>= b; 208 | } 209 | return arraypos + 1; 210 | } 211 | 212 | /** 213 | * return floor(value / factor) * factor 214 | * 215 | * @param value 216 | * numerator 217 | * @param factor 218 | * denominator 219 | * @return greatest multiple of factor no larger than value 220 | */ 221 | public static int greatestMultiple(int value, int factor) 222 | { 223 | return value - value % factor; 224 | } 225 | } 226 | } --------------------------------------------------------------------------------