├── .gitignore
├── CMakeLists.txt
├── FecalCommon.cpp
├── FecalCommon.h
├── FecalDecoder.cpp
├── FecalDecoder.h
├── FecalEncoder.cpp
├── FecalEncoder.h
├── License.md
├── README.md
├── fecal.cpp
├── fecal.h
├── gf256.cpp
├── gf256.h
├── proj
    └── msvc
    │   ├── Fecal.sln
    │   ├── LibFecal.vcxproj
    │   └── LibFecal.vcxproj.filters
└── tests
    ├── benchmark.cpp
    └── msvc
        ├── FecalBenchmark.vcxproj
        └── FecalBenchmark.vcxproj.filters


/.gitignore:
--------------------------------------------------------------------------------
  1 | ## Ignore Visual Studio temporary files, build results, and
  2 | ## files generated by popular Visual Studio add-ons.
  3 | 
  4 | # User-specific files
  5 | *.suo
  6 | *.user
  7 | *.userosscache
  8 | *.sln.docstates
  9 | 
 10 | # User-specific files (MonoDevelop/Xamarin Studio)
 11 | *.userprefs
 12 | 
 13 | # Build results
 14 | [Dd]ebug/
 15 | [Dd]ebugPublic/
 16 | [Rr]elease/
 17 | [Rr]eleases/
 18 | x64/
 19 | x86/
 20 | bld/
 21 | [Bb]in/
 22 | [Oo]bj/
 23 | [Ll]og/
 24 | 
 25 | # Visual Studio 2015 cache/options directory
 26 | .vs/
 27 | # Uncomment if you have tasks that create the project's static files in wwwroot
 28 | #wwwroot/
 29 | 
 30 | # MSTest test Results
 31 | [Tt]est[Rr]esult*/
 32 | [Bb]uild[Ll]og.*
 33 | 
 34 | # NUNIT
 35 | *.VisualState.xml
 36 | TestResult.xml
 37 | 
 38 | # Build Results of an ATL Project
 39 | [Dd]ebugPS/
 40 | [Rr]eleasePS/
 41 | dlldata.c
 42 | 
 43 | # DNX
 44 | project.lock.json
 45 | artifacts/
 46 | 
 47 | *_i.c
 48 | *_p.c
 49 | *_i.h
 50 | *.ilk
 51 | *.meta
 52 | *.obj
 53 | *.pch
 54 | *.pdb
 55 | *.pgc
 56 | *.pgd
 57 | *.rsp
 58 | *.sbr
 59 | *.tlb
 60 | *.tli
 61 | *.tlh
 62 | *.tmp
 63 | *.tmp_proj
 64 | *.log
 65 | *.vspscc
 66 | *.vssscc
 67 | .builds
 68 | *.pidb
 69 | *.svclog
 70 | *.scc
 71 | 
 72 | # Chutzpah Test files
 73 | _Chutzpah*
 74 | 
 75 | # Visual C++ cache files
 76 | ipch/
 77 | *.aps
 78 | *.ncb
 79 | *.opendb
 80 | *.opensdf
 81 | *.sdf
 82 | *.cachefile
 83 | *.VC.db
 84 | *.VC.VC.opendb
 85 | 
 86 | # Visual Studio profiler
 87 | *.psess
 88 | *.vsp
 89 | *.vspx
 90 | *.sap
 91 | 
 92 | # TFS 2012 Local Workspace
 93 | $tf/
 94 | 
 95 | # Guidance Automation Toolkit
 96 | *.gpState
 97 | 
 98 | # ReSharper is a .NET coding add-in
 99 | _ReSharper*/
100 | *.[Rr]e[Ss]harper
101 | *.DotSettings.user
102 | 
103 | # JustCode is a .NET coding add-in
104 | .JustCode
105 | 
106 | # TeamCity is a build add-in
107 | _TeamCity*
108 | 
109 | # DotCover is a Code Coverage Tool
110 | *.dotCover
111 | 
112 | # NCrunch
113 | _NCrunch_*
114 | .*crunch*.local.xml
115 | nCrunchTemp_*
116 | 
117 | # MightyMoose
118 | *.mm.*
119 | AutoTest.Net/
120 | 
121 | # Web workbench (sass)
122 | .sass-cache/
123 | 
124 | # Installshield output folder
125 | [Ee]xpress/
126 | 
127 | # DocProject is a documentation generator add-in
128 | DocProject/buildhelp/
129 | DocProject/Help/*.HxT
130 | DocProject/Help/*.HxC
131 | DocProject/Help/*.hhc
132 | DocProject/Help/*.hhk
133 | DocProject/Help/*.hhp
134 | DocProject/Help/Html2
135 | DocProject/Help/html
136 | 
137 | # Click-Once directory
138 | publish/
139 | 
140 | # Publish Web Output
141 | *.[Pp]ublish.xml
142 | *.azurePubxml
143 | # TODO: Comment the next line if you want to checkin your web deploy settings
144 | # but database connection strings (with potential passwords) will be unencrypted
145 | *.pubxml
146 | *.publishproj
147 | 
148 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
149 | # checkin your Azure Web App publish settings, but sensitive information contained
150 | # in these scripts will be unencrypted
151 | PublishScripts/
152 | 
153 | # NuGet Packages
154 | *.nupkg
155 | # The packages folder can be ignored because of Package Restore
156 | **/packages/*
157 | # except build/, which is used as an MSBuild target.
158 | !**/packages/build/
159 | # Uncomment if necessary however generally it will be regenerated when needed
160 | #!**/packages/repositories.config
161 | # NuGet v3's project.json files produces more ignoreable files
162 | *.nuget.props
163 | *.nuget.targets
164 | 
165 | # Microsoft Azure Build Output
166 | csx/
167 | *.build.csdef
168 | 
169 | # Microsoft Azure Emulator
170 | ecf/
171 | rcf/
172 | 
173 | # Windows Store app package directories and files
174 | AppPackages/
175 | BundleArtifacts/
176 | Package.StoreAssociation.xml
177 | _pkginfo.txt
178 | 
179 | # Visual Studio cache files
180 | # files ending in .cache can be ignored
181 | *.[Cc]ache
182 | # but keep track of directories ending in .cache
183 | !*.[Cc]ache/
184 | 
185 | # Others
186 | ClientBin/
187 | ~$*
188 | *~
189 | *.dbmdl
190 | *.dbproj.schemaview
191 | *.pfx
192 | *.publishsettings
193 | node_modules/
194 | orleans.codegen.cs
195 | 
196 | # Since there are multiple workflows, uncomment next line to ignore bower_components
197 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
198 | #bower_components/
199 | 
200 | # RIA/Silverlight projects
201 | Generated_Code/
202 | 
203 | # Backup & report files from converting an old project file
204 | # to a newer Visual Studio version. Backup files are not needed,
205 | # because we have git ;-)
206 | _UpgradeReport_Files/
207 | Backup*/
208 | UpgradeLog*.XML
209 | UpgradeLog*.htm
210 | 
211 | # SQL Server files
212 | *.mdf
213 | *.ldf
214 | 
215 | # Business Intelligence projects
216 | *.rdl.data
217 | *.bim.layout
218 | *.bim_*.settings
219 | 
220 | # Microsoft Fakes
221 | FakesAssemblies/
222 | 
223 | # GhostDoc plugin setting file
224 | *.GhostDoc.xml
225 | 
226 | # Node.js Tools for Visual Studio
227 | .ntvs_analysis.dat
228 | 
229 | # Visual Studio 6 build log
230 | *.plg
231 | 
232 | # Visual Studio 6 workspace options file
233 | *.opt
234 | 
235 | # Visual Studio LightSwitch build output
236 | **/*.HTMLClient/GeneratedArtifacts
237 | **/*.DesktopClient/GeneratedArtifacts
238 | **/*.DesktopClient/ModelManifest.xml
239 | **/*.Server/GeneratedArtifacts
240 | **/*.Server/ModelManifest.xml
241 | _Pvt_Extensions
242 | 
243 | # Paket dependency manager
244 | .paket/paket.exe
245 | paket-files/
246 | 
247 | # FAKE - F# Make
248 | .fake/
249 | 
250 | # JetBrains Rider
251 | .idea/
252 | *.sln.iml
253 | *.txt
254 | *.lib
255 | *.exe
256 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 3.5)
 2 | project(fecal)
 3 | 
 4 | set(CMAKE_CXX_STANDARD 11)
 5 | 
 6 | # Dependency: GF256 library source files
 7 | set(GF256_LIB_SRCFILES
 8 |         gf256.cpp
 9 |         gf256.h)
10 | 
11 | # FEC-AL library source files
12 | set(FECAL_LIB_SRCFILES
13 |         fecal.cpp
14 |         fecal.h
15 |         FecalCommon.cpp
16 |         FecalCommon.h
17 |         FecalDecoder.cpp
18 |         FecalDecoder.h
19 |         FecalEncoder.cpp
20 |         FecalEncoder.h)
21 | 
22 | add_library(gf256 ${GF256_LIB_SRCFILES})
23 | add_library(fecal ${FECAL_LIB_SRCFILES})
24 | 
25 | add_executable(benchmark tests/benchmark.cpp)
26 | target_link_libraries(benchmark gf256 fecal)
27 | 


--------------------------------------------------------------------------------
/FecalCommon.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
  3 | 
  4 |     Redistribution and use in source and binary forms, with or without
  5 |     modification, are permitted provided that the following conditions are met:
  6 | 
  7 |     * Redistributions of source code must retain the above copyright notice,
  8 |       this list of conditions and the following disclaimer.
  9 |     * Redistributions in binary form must reproduce the above copyright notice,
 10 |       this list of conditions and the following disclaimer in the documentation
 11 |       and/or other materials provided with the distribution.
 12 |     * Neither the name of Fecal nor the names of its contributors may be
 13 |       used to endorse or promote products derived from this software without
 14 |       specific prior written permission.
 15 | 
 16 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 17 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 |     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 19 |     ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 20 |     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 21 |     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 22 |     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 23 |     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 24 |     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 25 |     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 26 |     POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | #include "FecalCommon.h"
 30 | 
 31 | namespace fecal {
 32 | 
 33 | 
 34 | //------------------------------------------------------------------------------
 35 | // AppDataWindow
 36 | 
 37 | bool AppDataWindow::SetParameters(unsigned input_count, uint64_t total_bytes)
 38 | {
 39 |     if (input_count <= 0 || total_bytes < input_count)
 40 |     {
 41 |         FECAL_DEBUG_BREAK; // Invalid input
 42 |         return false;
 43 |     }
 44 | 
 45 |     InputCount = input_count;
 46 |     TotalBytes = total_bytes;
 47 | 
 48 |     SymbolBytes = static_cast<unsigned>((total_bytes + input_count - 1) / input_count);
 49 |     FinalBytes = static_cast<unsigned>(total_bytes % SymbolBytes);
 50 |     if (FinalBytes <= 0)
 51 |         FinalBytes = SymbolBytes;
 52 | 
 53 |     FECAL_DEBUG_ASSERT(SymbolBytes >= FinalBytes && FinalBytes != 0);
 54 | 
 55 |     return true;
 56 | }
 57 | 
 58 | 
 59 | //------------------------------------------------------------------------------
 60 | // AlignedDataBuffer
 61 | 
 62 | AlignedDataBuffer::~AlignedDataBuffer()
 63 | {
 64 |     SIMDSafeFree(Data);
 65 | }
 66 | 
 67 | bool AlignedDataBuffer::Allocate(unsigned bytes)
 68 | {
 69 |     FECAL_DEBUG_ASSERT(bytes > 0);
 70 |     SIMDSafeFree(Data);
 71 |     Data = SIMDSafeAllocate(bytes);
 72 |     return Data != nullptr;
 73 | }
 74 | 
 75 | 
 76 | //------------------------------------------------------------------------------
 77 | // GrowingAlignedByteMatrix
 78 | 
 79 | GrowingAlignedByteMatrix::~GrowingAlignedByteMatrix()
 80 | {
 81 |     SIMDSafeFree(Data);
 82 | }
 83 | 
 84 | void GrowingAlignedByteMatrix::Free()
 85 | {
 86 |     if (Data)
 87 |     {
 88 |         SIMDSafeFree(Data);
 89 |         Data             = nullptr;
 90 |         AllocatedRows    = 0;
 91 |         AllocatedColumns = 0;
 92 |     }
 93 | }
 94 | 
 95 | bool GrowingAlignedByteMatrix::Initialize(unsigned rows, unsigned columns)
 96 | {
 97 |     Rows    = rows;
 98 |     Columns = columns;
 99 |     AllocatedRows    = rows + kExtraRows;
100 |     AllocatedColumns = NextAlignedOffset(columns + kMinExtraColumns);
101 | 
102 |     SIMDSafeFree(Data);
103 |     Data = SIMDSafeAllocate(AllocatedRows * AllocatedColumns);
104 | 
105 |     return Data != nullptr;
106 | }
107 | 
108 | bool GrowingAlignedByteMatrix::Resize(unsigned rows, unsigned columns)
109 | {
110 |     FECAL_DEBUG_ASSERT(rows > 0 && columns > 0);
111 |     if (rows <= AllocatedRows && columns <= AllocatedColumns)
112 |     {
113 |         Rows    = rows;
114 |         Columns = columns;
115 |         return true;
116 |     }
117 | 
118 |     const unsigned allocatedRows    = rows + kExtraRows;
119 |     const unsigned allocatedColumns = NextAlignedOffset(columns + kMinExtraColumns);
120 | 
121 |     uint8_t* buffer = SIMDSafeAllocate(allocatedRows * allocatedColumns);
122 |     if (!buffer)
123 |     {
124 |         Free();
125 |         return false;
126 |     }
127 | 
128 |     // If we already allocated a buffer:
129 |     if (Data)
130 |     {
131 |         uint8_t* oldBuffer        = Data;
132 |         const unsigned oldColumns = Columns;
133 | 
134 |         if (oldColumns > 0)
135 |         {
136 |             // Maintain old data
137 |             const unsigned oldRows   = Rows;
138 |             const unsigned oldStride = AllocatedColumns;
139 |             uint8_t* destRow = buffer;
140 |             uint8_t* srcRow  = oldBuffer;
141 | 
142 |             unsigned copyCount = oldColumns;
143 |             if (copyCount > columns)
144 |             {
145 |                 FECAL_DEBUG_BREAK; // Should never happen
146 |                 copyCount = columns;
147 |             }
148 | 
149 |             for (unsigned i = 0; i < oldRows; ++i, destRow += allocatedColumns, srcRow += oldStride)
150 |                 memcpy(destRow, srcRow, copyCount);
151 |         }
152 | 
153 |         SIMDSafeFree(oldBuffer);
154 |     }
155 | 
156 |     AllocatedRows    = allocatedRows;
157 |     AllocatedColumns = allocatedColumns;
158 |     Rows    = rows;
159 |     Columns = columns;
160 |     Data    = buffer;
161 |     return true;
162 | }
163 | 
164 | 
165 | } // namespace fecal
166 | 


--------------------------------------------------------------------------------
/FecalCommon.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
  3 | 
  4 |     Redistribution and use in source and binary forms, with or without
  5 |     modification, are permitted provided that the following conditions are met:
  6 | 
  7 |     * Redistributions of source code must retain the above copyright notice,
  8 |       this list of conditions and the following disclaimer.
  9 |     * Redistributions in binary form must reproduce the above copyright notice,
 10 |       this list of conditions and the following disclaimer in the documentation
 11 |       and/or other materials provided with the distribution.
 12 |     * Neither the name of Fecal nor the names of its contributors may be
 13 |       used to endorse or promote products derived from this software without
 14 |       specific prior written permission.
 15 | 
 16 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 17 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 |     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 19 |     ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 20 |     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 21 |     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 22 |     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 23 |     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 24 |     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 25 |     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 26 |     POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | #pragma once
 30 | 
 31 | /*
 32 |     This module provides core tools and constants used by the codec:
 33 | 
 34 |     + Debugging macros
 35 |     + Alignment
 36 |     + PCGRandom, Int32Hash
 37 |     + Parameters of the Siamese and Cauchy matrix structures
 38 |     + ICodec base class for Encoder and Decoder
 39 |     + EncoderAppDataWindow and DecoderAppDataWindow structures
 40 |     + Growing matrix structure
 41 |     + CustomBitSet
 42 | */
 43 | 
 44 | #ifdef _WIN32
 45 |     #include <intrin.h>
 46 | #endif
 47 | 
 48 | #include "fecal.h"
 49 | #include "gf256.h"
 50 | 
 51 | #include <new>
 52 | #include <vector>
 53 | #include <array>
 54 | #include <algorithm>
 55 | 
 56 | namespace fecal {
 57 | 
 58 | 
 59 | //------------------------------------------------------------------------------
 60 | // Debug
 61 | 
 62 | // Some bugs only repro in release mode, so this can be helpful
 63 | //#define FECAL_DEBUG_IN_RELEASE
 64 | 
 65 | #if defined(_DEBUG) || defined(DEBUG) || defined(FECAL_DEBUG_IN_RELEASE)
 66 |     #define FECAL_DEBUG
 67 |     #ifdef _WIN32
 68 |         #define FECAL_DEBUG_BREAK __debugbreak()
 69 |     #else
 70 |         #define FECAL_DEBUG_BREAK __builtin_trap()
 71 |     #endif
 72 |     #define FECAL_DEBUG_ASSERT(cond) { if (!(cond)) { FECAL_DEBUG_BREAK; } }
 73 | #else
 74 |     #define FECAL_DEBUG_BREAK ;
 75 |     #define FECAL_DEBUG_ASSERT(cond) ;
 76 | #endif
 77 | 
 78 | 
 79 | //------------------------------------------------------------------------------
 80 | // PCG PRNG
 81 | // From http://www.pcg-random.org/
 82 | 
 83 | class PCGRandom
 84 | {
 85 | public:
 86 |     inline void Seed(uint64_t y, uint64_t x = 0)
 87 |     {
 88 |         State = 0;
 89 |         Inc = (y << 1u) | 1u;
 90 |         Next();
 91 |         State += x;
 92 |         Next();
 93 |     }
 94 | 
 95 |     inline uint32_t Next()
 96 |     {
 97 |         const uint64_t oldstate = State;
 98 |         State = oldstate * UINT64_C(6364136223846793005) + Inc;
 99 |         const uint32_t xorshifted = (uint32_t)(((oldstate >> 18) ^ oldstate) >> 27);
100 |         const uint32_t rot = oldstate >> 59;
101 |         return (xorshifted >> rot) | (xorshifted << ((uint32_t)(-(int32_t)rot) & 31));
102 |     }
103 | 
104 |     uint64_t State = 0, Inc = 0;
105 | };
106 | 
107 | 
108 | //------------------------------------------------------------------------------
109 | // Int32Hash
110 | 
111 | // Thomas Wang's 32-bit -> 32-bit integer hash function
112 | // http://burtleburtle.net/bob/hash/integer.html
113 | inline uint32_t Int32Hash(uint32_t key)
114 | {
115 |     key += ~(key << 15);
116 |     key ^= (key >> 10);
117 |     key += (key << 3);
118 |     key ^= (key >> 6);
119 |     key += ~(key << 11);
120 |     key ^= (key >> 16);
121 |     return key;
122 | }
123 | 
124 | 
125 | //------------------------------------------------------------------------------
126 | // Code Parameters
127 | 
128 | // Number of values 3..255 that we cycle through
129 | static const unsigned kColumnValuePeriod = 253;
130 | 
131 | // Number of values 1..255 that we cycle through
132 | static const unsigned kRowValuePeriod = 255;
133 | 
134 | 
135 | GF256_FORCE_INLINE uint8_t GetColumnValue(unsigned column)
136 | {
137 |     // Note: This LCG visits each value exactly once
138 |     return (uint8_t)(3 + (column * 199) % kColumnValuePeriod);
139 | }
140 | 
141 | GF256_FORCE_INLINE uint8_t GetRowValue(unsigned row)
142 | {
143 |     return (uint8_t)(1 + (row + 1) % kRowValuePeriod);
144 | }
145 | 
146 | 
147 | // Number of parallel lanes to run
148 | // Lane#(Column) = Column % kColumnLaneCount
149 | static const unsigned kColumnLaneCount = 8;
150 | 
151 | // Number of running sums of original data
152 | // Note: This cannot be tuned without making code changes
153 | static const unsigned kColumnSumCount = 3;
154 | // Sum 0 = Parity XOR of all input data
155 | // Sum 1 = Product #1 sum XOR of all input data times its GetColumnValue()
156 | // Sum 2 = Product #2 sum XOR of all input data times its GetColumnValue() squared
157 | 
158 | // Rate at which we add random pairs of data
159 | static const unsigned kPairAddRate = 16;
160 | 
161 | 
162 | // Calculate operation code for the given row and lane
163 | GF256_FORCE_INLINE unsigned GetRowOpcode(unsigned lane, unsigned row)
164 | {
165 |     FECAL_DEBUG_ASSERT(lane < kColumnLaneCount);
166 |     static const uint32_t kSumMask = (1 << (kColumnSumCount * 2)) - 1;
167 |     static const uint32_t kZeroValue = (1 << ((kColumnSumCount - 1) * 2));
168 | 
169 |     // This offset tunes the quality of the upper left of the generated matrix,
170 |     // which is encountered in practice for the first block of input data
171 |     static const unsigned kArbitraryOffset = 3;
172 | 
173 |     const uint32_t opcode = Int32Hash(lane + (row + kArbitraryOffset) * kColumnLaneCount) & kSumMask;
174 |     return (opcode == 0) ? kZeroValue : (unsigned)opcode;
175 | }
176 | 
177 | 
178 | //------------------------------------------------------------------------------
179 | // ICodec
180 | 
181 | class ICodec
182 | {
183 | public:
184 |     virtual ~ICodec() {}
185 | };
186 | 
187 | 
188 | //------------------------------------------------------------------------------
189 | // AlignedDataBuffer
190 | //
191 | // Aligned to cache-line boundaries for SIMD
192 | 
193 | struct AlignedDataBuffer
194 | {
195 |     uint8_t* Data = nullptr;
196 | 
197 | 
198 |     // Free memory
199 |     ~AlignedDataBuffer();
200 | 
201 |     // Allocate memory
202 |     bool Allocate(unsigned bytes);
203 | };
204 | 
205 | 
206 | //------------------------------------------------------------------------------
207 | // GrowingAlignedByteMatrix
208 | //
209 | // This is a matrix of bytes where the elements are stored in row-first order
210 | // and the first byte element of each row is aligned to cache-line boundaries.
211 | // Furthermore the matrix can grow in rows or columns, keeping existing data.
212 | 
213 | struct GrowingAlignedByteMatrix
214 | {
215 |     // Buffer data
216 |     uint8_t* Data = nullptr;
217 | 
218 |     // Used rows, columns
219 |     unsigned Rows    = 0;
220 |     unsigned Columns = 0;
221 | 
222 |     // Allocate a few extra rows, columns whenenver we grow the matrix
223 |     // This is tuned for the expected maximum recovery failure rate
224 |     static const unsigned kExtraRows       = 4;
225 |     static const unsigned kMinExtraColumns = 4;
226 | 
227 |     // Allocated rows, columns
228 |     unsigned AllocatedRows    = 0;
229 |     unsigned AllocatedColumns = 0;
230 | 
231 | 
232 |     ~GrowingAlignedByteMatrix();
233 | 
234 |     // Initialize matrix to the given size
235 |     // New elements have undefined initial state
236 |     bool Initialize(unsigned rows, unsigned columns);
237 | 
238 |     // Growing mantaining existing data in the buffer
239 |     // New elements have undefined initial state
240 |     bool Resize(unsigned rows, unsigned columns);
241 | 
242 |     uint8_t Get(unsigned row, unsigned column)
243 |     {
244 |         FECAL_DEBUG_ASSERT(Data && row < Rows && column < Columns);
245 |         return Data[row * AllocatedColumns + column];
246 |     }
247 | 
248 |     // Free allocated memory
249 |     void Free();
250 | };
251 | 
252 | 
253 | //------------------------------------------------------------------------------
254 | // Portable Intrinsics
255 | 
256 | // Returns number of bits set in the 64-bit value
257 | GF256_FORCE_INLINE unsigned PopCount64(uint64_t x)
258 | {
259 | #ifdef _MSC_VER
260 | #ifdef _WIN64
261 |     return (unsigned)__popcnt64(x);
262 | #else
263 |     return (unsigned)(__popcnt((uint32_t)x) + __popcnt((uint32_t)(x >> 32)));
264 | #endif
265 | #else // GCC
266 |     return (unsigned)__builtin_popcountll(x);
267 | #endif
268 | }
269 | 
270 | // Returns lowest bit index 0..63 where the first non-zero bit is found
271 | // Precondition: x != 0
272 | GF256_FORCE_INLINE unsigned FirstNonzeroBit64(uint64_t x)
273 | {
274 | #ifdef _MSC_VER
275 | #ifdef _WIN64
276 |     unsigned long index;
277 |     // Note: Ignoring result because x != 0
278 |     _BitScanForward64(&index, x);
279 |     return (unsigned)index;
280 | #else
281 |     unsigned long index;
282 |     if (0 != _BitScanForward(&index, (uint32_t)x))
283 |         return (unsigned)index;
284 |     // Note: Ignoring result because x != 0
285 |     _BitScanForward(&index, (uint32_t)(x >> 32));
286 |     return (unsigned)index + 32;
287 | #endif
288 | #else
289 |     // Note: Ignoring return value of 0 because x != 0
290 |     return (unsigned)__builtin_ffsll(x) - 1;
291 | #endif
292 | }
293 | 
294 | 
295 | //------------------------------------------------------------------------------
296 | // CustomBitSet
297 | 
298 | // Custom std::bitset implementation for speed
299 | template<unsigned N>
300 | struct CustomBitSet
301 | {
302 |     static const unsigned kValidBits = N;
303 |     typedef uint64_t WordT;
304 |     static const unsigned kWordBits = sizeof(WordT) * 8;
305 |     static const unsigned kWords = (kValidBits + kWordBits - 1) / kWordBits;
306 |     static const WordT kAllOnes = UINT64_C(0xffffffffffffffff);
307 | 
308 |     WordT Words[kWords];
309 | 
310 | 
311 |     CustomBitSet()
312 |     {
313 |         ClearAll();
314 |     }
315 | 
316 |     void ClearAll()
317 |     {
318 |         for (unsigned i = 0; i < kWords; ++i)
319 |             Words[i] = 0;
320 |     }
321 |     void SetAll()
322 |     {
323 |         for (unsigned i = 0; i < kWords; ++i)
324 |             Words[i] = kAllOnes;
325 |     }
326 |     void Set(unsigned bit)
327 |     {
328 |         const unsigned word = bit / kWordBits;
329 |         const WordT mask = (WordT)1 << (bit % kWordBits);
330 |         Words[word] |= mask;
331 |     }
332 |     void Clear(unsigned bit)
333 |     {
334 |         const unsigned word = bit / kWordBits;
335 |         const WordT mask = (WordT)1 << (bit % kWordBits);
336 |         Words[word] &= ~mask;
337 |     }
338 |     bool Check(unsigned bit) const
339 |     {
340 |         const unsigned word = bit / kWordBits;
341 |         const WordT mask = (WordT)1 << (bit % kWordBits);
342 |         return 0 != (Words[word] & mask);
343 |     }
344 | 
345 |     /*
346 |         Returns the popcount of the bits within the given range.
347 | 
348 |         bitStart < kValidBits: First bit to test
349 |         bitEnd <= kValidBits: Bit to stop at (non-inclusive)
350 |     */
351 |     unsigned RangePopcount(unsigned bitStart, unsigned bitEnd)
352 |     {
353 |         static_assert(kWordBits == 64, "Update this");
354 | 
355 |         if (bitStart >= bitEnd)
356 |             return 0;
357 | 
358 |         unsigned wordIndex = bitStart / kWordBits;
359 |         const unsigned wordEnd = bitEnd / kWordBits;
360 | 
361 |         // Eliminate low bits of first word
362 |         WordT word = Words[wordIndex] >> (bitStart % kWordBits);
363 | 
364 |         // Eliminate high bits of last word if there is just one word
365 |         if (wordEnd == wordIndex)
366 |             return PopCount64(word << (kWordBits - (bitEnd - bitStart)));
367 | 
368 |         // Count remainder of first word
369 |         unsigned count = PopCount64(word);
370 | 
371 |         // Accumulate popcount of full words
372 |         while (++wordIndex < wordEnd)
373 |             count += PopCount64(Words[wordIndex]);
374 | 
375 |         // Eliminate high bits of last word if there is one
376 |         unsigned lastWordBits = bitEnd - wordIndex * kWordBits;
377 |         if (lastWordBits > 0)
378 |             count += PopCount64(Words[wordIndex] << (kWordBits - lastWordBits));
379 | 
380 |         return count;
381 |     }
382 | 
383 |     /*
384 |         Returns the bit index where the first cleared bit is found.
385 |         Returns kValidBits if all bits are set.
386 | 
387 |         bitStart < kValidBits: Index to start looking
388 |     */
389 |     unsigned FindFirstClear(unsigned bitStart)
390 |     {
391 |         static_assert(kWordBits == 64, "Update this");
392 | 
393 |         unsigned wordStart = bitStart / kWordBits;
394 | 
395 |         WordT word = ~Words[wordStart] >> (bitStart % kWordBits);
396 |         if (word != 0)
397 |         {
398 |             unsigned offset = 0;
399 |             if ((word & 1) == 0)
400 |                 offset = FirstNonzeroBit64(word);
401 |             return bitStart + offset;
402 |         }
403 | 
404 |         for (unsigned i = wordStart + 1; i < kWords; ++i)
405 |         {
406 |             word = ~Words[i];
407 |             if (word != 0)
408 |                 return i * kWordBits + FirstNonzeroBit64(word);
409 |         }
410 | 
411 |         return kValidBits;
412 |     }
413 | 
414 |     /*
415 |         Returns the bit index where the first set bit is found.
416 |         Returns 'bitEnd' if all bits are clear.
417 | 
418 |         bitStart < kValidBits: Index to start looking
419 |         bitEnd <= kValidBits: Index to stop looking at
420 |     */
421 |     unsigned FindFirstSet(unsigned bitStart, unsigned bitEnd = kValidBits)
422 |     {
423 |         static_assert(kWordBits == 64, "Update this");
424 | 
425 |         unsigned wordStart = bitStart / kWordBits;
426 | 
427 |         WordT word = Words[wordStart] >> (bitStart % kWordBits);
428 |         if (word != 0)
429 |         {
430 |             unsigned offset = 0;
431 |             if ((word & 1) == 0)
432 |                 offset = FirstNonzeroBit64(word);
433 |             return bitStart + offset;
434 |         }
435 | 
436 |         const unsigned wordEnd = (bitEnd + kWordBits - 1) / kWordBits;
437 | 
438 |         for (unsigned i = wordStart + 1; i < wordEnd; ++i)
439 |         {
440 |             word = Words[i];
441 |             if (word != 0)
442 |                 return i * kWordBits + FirstNonzeroBit64(word);
443 |         }
444 | 
445 |         return bitEnd;
446 |     }
447 | 
448 |     /*
449 |         Set a range of bits
450 | 
451 |         bitStart < kValidBits: Index at which to start setting
452 |         bitEnd <= kValidBits: Bit to stop at (non-inclusive)
453 |     */
454 |     void SetRange(unsigned bitStart, unsigned bitEnd)
455 |     {
456 |         if (bitStart >= bitEnd)
457 |             return;
458 | 
459 |         unsigned wordStart = bitStart / kWordBits;
460 |         const unsigned wordEnd = bitEnd / kWordBits;
461 | 
462 |         bitStart %= kWordBits;
463 | 
464 |         if (wordEnd == wordStart)
465 |         {
466 |             // This implies x=(bitStart % kWordBits) and y=(bitEnd % kWordBits)
467 |             // are in the same word.  Also: x < y, y < 64, y - x < 64.
468 |             bitEnd %= kWordBits;
469 |             WordT mask = ((WordT)1 << (bitEnd - bitStart)) - 1; // 1..63 bits
470 |             mask <<= bitStart;
471 |             Words[wordStart] |= mask;
472 |             return;
473 |         }
474 | 
475 |         // Set the end of the first word
476 |         Words[wordStart] |= kAllOnes << bitStart;
477 | 
478 |         // Whole words at a time
479 |         for (unsigned i = wordStart + 1; i < wordEnd; ++i)
480 |             Words[i] = kAllOnes;
481 | 
482 |         // Set first few bits of the last word
483 |         unsigned lastWordBits = bitEnd - wordEnd * kWordBits;
484 |         if (lastWordBits > 0)
485 |         {
486 |             WordT mask = ((WordT)1 << lastWordBits) - 1; // 1..63 bits
487 |             Words[wordEnd] |= mask;
488 |         }
489 |     }
490 | 
491 |     /*
492 |         Clear a range of bits
493 | 
494 |         bitStart < kValidBits: Index at which to start clearing
495 |         bitEnd <= kValidBits: Bit to stop at (non-inclusive)
496 |     */
497 |     void ClearRange(unsigned bitStart, unsigned bitEnd)
498 |     {
499 |         if (bitStart >= bitEnd)
500 |             return;
501 | 
502 |         unsigned wordStart = bitStart / kWordBits;
503 |         const unsigned wordEnd = bitEnd / kWordBits;
504 | 
505 |         bitStart %= kWordBits;
506 | 
507 |         if (wordEnd == wordStart)
508 |         {
509 |             // This implies x=(bitStart % kWordBits) and y=(bitEnd % kWordBits)
510 |             // are in the same word.  Also: x < y, y < 64, y - x < 64.
511 |             bitEnd %= kWordBits;
512 |             WordT mask = ((WordT)1 << (bitEnd - bitStart)) - 1; // 1..63 bits
513 |             mask <<= bitStart;
514 |             Words[wordStart] &= ~mask;
515 |             return;
516 |         }
517 | 
518 |         // Clear the end of the first word
519 |         Words[wordStart] &= ~(kAllOnes << bitStart);
520 | 
521 |         // Whole words at a time
522 |         for (unsigned i = wordStart + 1; i < wordEnd; ++i)
523 |             Words[i] = 0;
524 | 
525 |         // Clear first few bits of the last word
526 |         unsigned lastWordBits = bitEnd - wordEnd * kWordBits;
527 |         if (lastWordBits > 0)
528 |         {
529 |             WordT mask = ((WordT)1 << lastWordBits) - 1; // 1..63 bits
530 |             Words[wordEnd] &= ~mask;
531 |         }
532 |     }
533 | };
534 | 
535 | 
536 | //------------------------------------------------------------------------------
537 | // SIMD-Safe Aligned Memory Allocations
538 | 
539 | static const unsigned kAlignmentBytes = GF256_ALIGN_BYTES;
540 | 
541 | GF256_FORCE_INLINE unsigned NextAlignedOffset(unsigned offset)
542 | {
543 |     return (offset + kAlignmentBytes - 1) & ~(kAlignmentBytes - 1);
544 | }
545 | 
546 | static GF256_FORCE_INLINE uint8_t* SIMDSafeAllocate(size_t size)
547 | {
548 |     uint8_t* data = (uint8_t*)calloc(1, kAlignmentBytes + size);
549 |     if (!data)
550 |         return nullptr;
551 |     unsigned offset = (unsigned)((uintptr_t)data % kAlignmentBytes);
552 |     data += kAlignmentBytes - offset;
553 |     data[-1] = (uint8_t)offset;
554 |     return data;
555 | }
556 | 
557 | static GF256_FORCE_INLINE void SIMDSafeFree(void* ptr)
558 | {
559 |     if (!ptr)
560 |         return;
561 |     uint8_t* data = (uint8_t*)ptr;
562 |     unsigned offset = data[-1];
563 |     if (offset >= kAlignmentBytes)
564 |     {
565 |         FECAL_DEBUG_BREAK; // Should never happen
566 |         return;
567 |     }
568 |     data -= kAlignmentBytes - offset;
569 |     free(data);
570 | }
571 | 
572 | 
573 | //------------------------------------------------------------------------------
574 | // AppDataWindow
575 | 
576 | // Base class for app data window shared between encoder and decoder
577 | struct AppDataWindow
578 | {
579 |     // Application parameters
580 |     unsigned InputCount = 0;   // Number of input symbols
581 |     uint64_t TotalBytes = 0;   // Total number of input bytes
582 |     unsigned FinalBytes = 0;   // Number of bytes in the final symbol
583 |     unsigned SymbolBytes = 0;  // Number of bytes in all other symbols
584 | 
585 | 
586 |     // Set parameter for the window (should be done first)
587 |     // Returns false if input is invalid
588 |     bool SetParameters(unsigned input_count, uint64_t total_bytes);
589 | 
590 |     GF256_FORCE_INLINE bool IsFinalColumn(unsigned column)
591 |     {
592 |         return (column == InputCount - 1);
593 |     }
594 | 
595 |     // Helper function
596 |     GF256_FORCE_INLINE unsigned GetColumnBytes(unsigned column)
597 |     {
598 |         return IsFinalColumn(column) ? FinalBytes : SymbolBytes;
599 |     }
600 | };
601 | 
602 | 
603 | //------------------------------------------------------------------------------
604 | // XORSummer
605 | 
606 | // This optimization speeds up the codec by 15%
607 | #define FECAL_ADD2_OPT
608 | 
609 | class XORSummer
610 | {
611 | public:
612 |     // Set the addition destination and byte count
613 |     GF256_FORCE_INLINE void Initialize(uint8_t* dest, unsigned bytes)
614 |     {
615 |         DestBuffer = dest;
616 |         Bytes = bytes;
617 |         Waiting = nullptr;
618 |     }
619 | 
620 |     // Accumulate some source data
621 |     GF256_FORCE_INLINE void Add(const uint8_t* src)
622 |     {
623 | #ifdef FECAL_ADD2_OPT
624 |         if (Waiting)
625 |         {
626 |             gf256_add2_mem(DestBuffer, src, Waiting, Bytes);
627 |             Waiting = nullptr;
628 |         }
629 |         else
630 |             Waiting = src;
631 | #else
632 |         gf256_add_mem(DestBuffer, src, Bytes);
633 | #endif
634 |     }
635 | 
636 |     // Finalize in the destination buffer
637 |     GF256_FORCE_INLINE void Finalize()
638 |     {
639 | #ifdef FECAL_ADD2_OPT
640 |         if (Waiting)
641 |             gf256_add_mem(DestBuffer, Waiting, Bytes);
642 | #endif
643 |     }
644 | 
645 | protected:
646 |     uint8_t* DestBuffer;
647 |     unsigned Bytes;
648 |     const uint8_t* Waiting;
649 | };
650 | 
651 | 
652 | } // namespace fecal
653 | 


--------------------------------------------------------------------------------
/FecalDecoder.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
  3 | 
  4 |     Redistribution and use in source and binary forms, with or without
  5 |     modification, are permitted provided that the following conditions are met:
  6 | 
  7 |     * Redistributions of source code must retain the above copyright notice,
  8 |       this list of conditions and the following disclaimer.
  9 |     * Redistributions in binary form must reproduce the above copyright notice,
 10 |       this list of conditions and the following disclaimer in the documentation
 11 |       and/or other materials provided with the distribution.
 12 |     * Neither the name of Fecal nor the names of its contributors may be
 13 |       used to endorse or promote products derived from this software without
 14 |       specific prior written permission.
 15 | 
 16 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 17 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 |     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 19 |     ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 20 |     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 21 |     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 22 |     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 23 |     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 24 |     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 25 |     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 26 |     POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | #include "FecalDecoder.h"
 30 | 
 31 | namespace fecal {
 32 | 
 33 | 
 34 | //------------------------------------------------------------------------------
 35 | // DecoderAppDataWindow
 36 | 
 37 | void DecoderAppDataWindow::AllocateOriginals()
 38 | {
 39 |     OriginalData.resize(InputCount);
 40 | 
 41 |     // Allocate some space for recovery data too (20% of original data size)
 42 |     RecoveryData.reserve(InputCount / 5 + 1);
 43 | 
 44 |     SubwindowCount = (InputCount + kSubwindowSize - 1) / kSubwindowSize;
 45 |     Subwindows.resize(SubwindowCount);
 46 | }
 47 | 
 48 | bool DecoderAppDataWindow::AddOriginal(unsigned column, uint8_t* data)
 49 | {
 50 |     // If we already have this one:
 51 |     if (OriginalData[column].Data)
 52 |         return false;
 53 | 
 54 |     // Record this one
 55 |     OriginalData[column].Data = data;
 56 |     MarkGotElement(column);
 57 |     ++OriginalGotCount;
 58 | 
 59 |     return true;
 60 | }
 61 | 
 62 | bool DecoderAppDataWindow::AddRecovery(uint8_t* data, unsigned row)
 63 | {
 64 |     FECAL_DEBUG_ASSERT(InputCount > 0); // SetParameters() must be called first
 65 | 
 66 |     // Trying to insert with duplicate ID: It will not be inserted
 67 |     auto res = RowSet.insert(row);
 68 |     if (!res.second)
 69 |         return false;
 70 | 
 71 |     RecoveryInfo info;
 72 |     info.Data = data;
 73 |     info.Row = row;
 74 |     info.UsedForSolution = false;
 75 |     RecoveryData.push_back(info);
 76 | 
 77 |     return true;
 78 | }
 79 | 
 80 | void DecoderAppDataWindow::MarkGotElement(unsigned element)
 81 | {
 82 |     FECAL_DEBUG_ASSERT(element < InputCount);
 83 |     Subwindow& subwindow = Subwindows[element / kSubwindowSize];
 84 |     FECAL_DEBUG_ASSERT(!subwindow.Got.Check(element % kSubwindowSize));
 85 |     subwindow.Got.Set(element % kSubwindowSize);
 86 |     subwindow.GotCount++;
 87 | }
 88 | 
 89 | unsigned DecoderAppDataWindow::FindNextLostElement(unsigned elementStart)
 90 | {
 91 |     if (elementStart >= InputCount)
 92 |         return InputCount;
 93 | 
 94 |     const unsigned subwindowEnd = SubwindowCount;
 95 |     unsigned subwindowIndex = elementStart / kSubwindowSize;
 96 |     unsigned bitIndex = elementStart % kSubwindowSize;
 97 |     FECAL_DEBUG_ASSERT(subwindowEnd <= SubwindowCount);
 98 |     FECAL_DEBUG_ASSERT(subwindowIndex < SubwindowCount);
 99 | 
100 |     while (subwindowIndex < subwindowEnd)
101 |     {
102 |         // If there may be any lost packets in this subwindow:
103 |         if (Subwindows[subwindowIndex].GotCount < kSubwindowSize)
104 |         {
105 |             for (;;)
106 |             {
107 |                 // Seek next clear bit
108 |                 bitIndex = Subwindows[subwindowIndex].Got.FindFirstClear(bitIndex);
109 | 
110 |                 // If there were none, skip this subwindow
111 |                 if (bitIndex >= kSubwindowSize)
112 |                     break;
113 | 
114 |                 // Calculate element index and stop if we hit the end of the valid data
115 |                 unsigned nextElement = subwindowIndex * kSubwindowSize + bitIndex;
116 |                 if (nextElement > InputCount)
117 |                     nextElement = InputCount;
118 | 
119 |                 return nextElement;
120 |             }
121 |         }
122 | 
123 |         // Reset bit index to the front of the next subwindow
124 |         bitIndex = 0;
125 | 
126 |         // Check next subwindow
127 |         ++subwindowIndex;
128 |     }
129 | 
130 |     return InputCount;
131 | }
132 | 
133 | 
134 | //------------------------------------------------------------------------------
135 | // Decoder
136 | 
137 | FecalResult Decoder::Initialize(unsigned input_count, uint64_t total_bytes)
138 | {
139 |     RecoveryMatrix.Window = &Window;
140 | 
141 |     if (!Window.SetParameters(input_count, total_bytes))
142 |     {
143 |         FECAL_DEBUG_BREAK; // Invalid input
144 |         return Fecal_InvalidInput;
145 |     }
146 |     Window.AllocateOriginals();
147 | 
148 |     return Fecal_Success;
149 | }
150 | 
151 | FecalResult Decoder::AddOriginal(const FecalSymbol& symbol)
152 | {
153 |     if (symbol.Index >= Window.InputCount ||
154 |         symbol.Data == nullptr ||
155 |         symbol.Bytes != Window.GetColumnBytes(symbol.Index))
156 |     {
157 |         FECAL_DEBUG_BREAK; // Invalid input
158 |         return Fecal_InvalidInput;
159 |     }
160 | 
161 |     if (Window.AddOriginal(symbol.Index, (uint8_t*)symbol.Data))
162 |         RecoveryAttempted = false;
163 | 
164 |     return Fecal_Success;
165 | }
166 | 
167 | FecalResult Decoder::AddRecovery(const FecalSymbol& symbol)
168 | {
169 |     if (symbol.Data == nullptr ||
170 |         symbol.Bytes != Window.SymbolBytes)
171 |     {
172 |         FECAL_DEBUG_BREAK; // Invalid input
173 |         return Fecal_InvalidInput;
174 |     }
175 | 
176 |     if (Window.AddRecovery((uint8_t*)symbol.Data, symbol.Index))
177 |         RecoveryAttempted = false;
178 | 
179 |     return Fecal_Success;
180 | }
181 | 
182 | FecalResult Decoder::GetOriginal(unsigned column, FecalSymbol& symbol)
183 | {
184 |     symbol.Index = column;
185 |     symbol.Data = nullptr;
186 |     symbol.Bytes = 0;
187 | 
188 |     if (column >= Window.InputCount)
189 |     {
190 |         FECAL_DEBUG_BREAK; // Invalid input
191 |         return Fecal_InvalidInput;
192 |     }
193 | 
194 |     symbol.Data = Window.OriginalData[column].Data;
195 |     if (symbol.Data == nullptr)
196 |         return Fecal_NeedMoreData;
197 | 
198 |     symbol.Bytes = Window.GetColumnBytes(column);
199 |     return Fecal_Success;
200 | }
201 | 
202 | FecalResult Decoder::Decode(RecoveredSymbols& symbols)
203 | {
204 |     // Default return values
205 |     symbols.Symbols = nullptr;
206 |     symbols.Count = 0;
207 | 
208 |     // If all original data arrived:
209 |     if (Window.OriginalGotCount >= Window.InputCount)
210 |         return Fecal_Success;
211 | 
212 |     // If we have not received enough data to try to decode:
213 |     if (Window.OriginalGotCount + static_cast<unsigned>(Window.RecoveryData.size()) < Window.InputCount)
214 |         return Fecal_NeedMoreData;
215 | 
216 |     // If recovery was already attempted:
217 |     if (RecoveryAttempted)
218 |         return Fecal_NeedMoreData;
219 |     RecoveryAttempted = true;
220 | 
221 |     // Generate updated recovery matrix
222 |     if (!RecoveryMatrix.GenerateMatrix())
223 |         return Fecal_OutOfMemory;
224 | 
225 |     // Attempt to solve the linear system
226 |     if (!RecoveryMatrix.GaussianElimination())
227 |         return Fecal_NeedMoreData;
228 | 
229 |     FecalResult result = EliminateOriginalData();
230 |     if (result != Fecal_Success)
231 |         return result;
232 | 
233 |     MultiplyLowerTriangle();
234 | 
235 |     result = BackSubstitution();
236 | 
237 |     if (result == Fecal_Success)
238 |     {
239 |         symbols.Symbols = &RecoveredData[0];
240 |         symbols.Count = static_cast<unsigned>(RecoveredData.size());
241 |     }
242 | 
243 |     return result;
244 | }
245 | 
246 | FecalResult Decoder::EliminateOriginalData()
247 | {
248 |     // Allocate workspace
249 |     const unsigned symbolBytes = Window.SymbolBytes;
250 |     if (!ProductWorkspace.Allocate(symbolBytes))
251 |         return Fecal_OutOfMemory;
252 | 
253 |     const unsigned rows = static_cast<unsigned>(Window.RecoveryData.size());
254 | 
255 |     // Eliminate data in sorted row order regardless of pivot order:
256 |     for (unsigned matrixRowIndex = 0; matrixRowIndex < rows; ++matrixRowIndex)
257 |     {
258 |         const RecoveryInfo& recovery = Window.RecoveryData[matrixRowIndex];
259 |         if (!recovery.UsedForSolution)
260 |             continue;
261 | 
262 |         // Zero the product sum
263 |         memset(ProductWorkspace.Data, 0, symbolBytes);
264 | 
265 |         XORSummer summer1;
266 |         summer1.Initialize(recovery.Data, symbolBytes);
267 |         XORSummer summerRX;
268 |         summerRX.Initialize(ProductWorkspace.Data, symbolBytes);
269 | 
270 |         // Eliminate dense recovery data outside of matrix:
271 |         for (unsigned laneIndex = 0; laneIndex < kColumnLaneCount; ++laneIndex)
272 |         {
273 |             const unsigned opcode = GetRowOpcode(laneIndex, recovery.Row);
274 | 
275 |             // For summations into the RecoveryPacket buffer:
276 |             unsigned mask = 1;
277 |             for (unsigned sumIndex = 0; sumIndex < kColumnSumCount; ++sumIndex)
278 |             {
279 |                 if (opcode & mask)
280 |                     summer1.Add(GetLaneSum(laneIndex, sumIndex));
281 |                 mask <<= 1;
282 |             }
283 | 
284 |             // For summations into the ProductWorkspace buffer:
285 |             for (unsigned sumIndex = 0; sumIndex < kColumnSumCount; ++sumIndex)
286 |             {
287 |                 if (opcode & mask)
288 |                     summerRX.Add(GetLaneSum(laneIndex, sumIndex));
289 |                 mask <<= 1;
290 |             }
291 |         }
292 | 
293 |         // Eliminate light recovery data outside of matrix:
294 |         const unsigned inputCount = Window.InputCount;
295 |         PCGRandom prng;
296 |         prng.Seed(recovery.Row, inputCount);
297 | 
298 |         const unsigned pairCount = (inputCount + kPairAddRate - 1) / kPairAddRate;
299 |         for (unsigned i = 0; i < pairCount; ++i)
300 |         {
301 |             const unsigned element1 = prng.Next() % inputCount;
302 |             const uint8_t* original1 = Window.OriginalData[element1].Data;
303 |             if (original1)
304 |             {
305 |                 if (element1 == inputCount - 1)
306 |                     gf256_add_mem(recovery.Data, original1, Window.FinalBytes);
307 |                 else
308 |                     summer1.Add(original1);
309 |             }
310 | 
311 |             const unsigned elementRX = prng.Next() % inputCount;
312 |             const uint8_t* originalRX = Window.OriginalData[elementRX].Data;
313 |             if (originalRX)
314 |             {
315 |                 if (elementRX == inputCount - 1)
316 |                     gf256_add_mem(ProductWorkspace.Data, originalRX, Window.FinalBytes);
317 |                 else
318 |                     summerRX.Add(originalRX);
319 |             }
320 |         }
321 | 
322 |         summer1.Finalize();
323 |         summerRX.Finalize();
324 | 
325 |         const uint8_t RX = GetRowValue(recovery.Row);
326 |         gf256_muladd_mem(recovery.Data, RX, ProductWorkspace.Data, symbolBytes);
327 |     }
328 | 
329 |     return Fecal_Success;
330 | }
331 | 
332 | const uint8_t* Decoder::GetLaneSum(unsigned laneIndex, unsigned sumIndex)
333 | {
334 |     AlignedDataBuffer& sum = LaneSums[laneIndex][sumIndex];
335 |     if (sum.Data)
336 |         return sum.Data;
337 | 
338 |     const unsigned symbolBytes = Window.SymbolBytes;
339 |     if (!sum.Allocate(symbolBytes))
340 |         return nullptr;
341 | 
342 |     memset(sum.Data, 0, symbolBytes);
343 | 
344 |     const unsigned inputEnd = Window.InputCount - 1;
345 |     if (sumIndex == 0)
346 |     {
347 |         XORSummer summer;
348 |         summer.Initialize(sum.Data, symbolBytes);
349 | 
350 |         // For each input column:
351 |         for (unsigned column = laneIndex; column < inputEnd; column += kColumnLaneCount)
352 |         {
353 |             const uint8_t* data = Window.OriginalData[column].Data;
354 |             if (data)
355 |                 summer.Add(data);
356 |         }
357 |         if (inputEnd % kColumnLaneCount == laneIndex)
358 |         {
359 |             const uint8_t* data = Window.OriginalData[inputEnd].Data;
360 |             if (data)
361 |                 gf256_add_mem(sum.Data, data, Window.FinalBytes);
362 |         }
363 | 
364 |         summer.Finalize();
365 |         return sum.Data;
366 |     }
367 | 
368 |     // For each input column:
369 |     for (unsigned column = laneIndex; column < inputEnd; column += kColumnLaneCount)
370 |     {
371 |         const uint8_t* data = Window.OriginalData[column].Data;
372 |         if (!data)
373 |             continue;
374 | 
375 |         uint8_t CX_or_CX2 = GetColumnValue(column);
376 |         if (sumIndex == 2)
377 |             CX_or_CX2 = gf256_sqr(CX_or_CX2);
378 | 
379 |         gf256_muladd_mem(sum.Data, CX_or_CX2, data, symbolBytes);
380 |     }
381 |     if (inputEnd % kColumnLaneCount == laneIndex)
382 |     {
383 |         const uint8_t* data = Window.OriginalData[inputEnd].Data;
384 |         if (data)
385 |         {
386 |             uint8_t CX_or_CX2 = GetColumnValue(inputEnd);
387 |             if (sumIndex == 2)
388 |                 CX_or_CX2 = gf256_sqr(CX_or_CX2);
389 | 
390 |             gf256_muladd_mem(sum.Data, CX_or_CX2, data, Window.FinalBytes);
391 |         }
392 |     }
393 | 
394 |     return sum.Data;
395 | 
396 |     static_assert(kColumnSumCount == 3, "Update this");
397 | }
398 | 
399 | void Decoder::MultiplyLowerTriangle()
400 | {
401 |     const unsigned columns = static_cast<unsigned>(RecoveryMatrix.Columns.size());
402 |     const unsigned srcBytes = Window.SymbolBytes;
403 | 
404 |     // Multiply lower triangle following solution order from left to right:
405 |     for (unsigned col_i = 0; col_i < columns - 1; ++col_i)
406 |     {
407 |         const unsigned matrixRowIndex_i = RecoveryMatrix.Pivots[col_i];
408 |         const uint8_t* srcData = Window.RecoveryData[matrixRowIndex_i].Data;
409 |         FECAL_DEBUG_ASSERT(srcData && srcBytes > 0);
410 | 
411 |         for (unsigned col_j = col_i + 1; col_j < columns; ++col_j)
412 |         {
413 |             const unsigned matrixRowIndex_j = RecoveryMatrix.Pivots[col_j];
414 |             const uint8_t y = RecoveryMatrix.Matrix.Get(matrixRowIndex_j, col_i);
415 | 
416 |             if (y == 0)
417 |                 continue;
418 | 
419 |             uint8_t* destData = Window.RecoveryData[matrixRowIndex_j].Data;
420 |             gf256_muladd_mem(destData, y, srcData, srcBytes);
421 |         }
422 |     }
423 | }
424 | 
425 | FecalResult Decoder::BackSubstitution()
426 | {
427 |     const unsigned columns = static_cast<unsigned>(RecoveryMatrix.Columns.size());
428 |     const unsigned srcBytes = Window.SymbolBytes;
429 | 
430 |     RecoveredData.resize(columns);
431 | 
432 |     // For each column starting with the right-most column:
433 |     for (int col_i = columns - 1; col_i >= 0; --col_i)
434 |     {
435 |         const unsigned matrixRowIndex = RecoveryMatrix.Pivots[col_i];
436 |         uint8_t* recovery = Window.RecoveryData[matrixRowIndex].Data;
437 |         const uint8_t y = RecoveryMatrix.Matrix.Get(matrixRowIndex, col_i);
438 |         FECAL_DEBUG_ASSERT(y != 0);
439 |         const unsigned originalColumn = RecoveryMatrix.Columns[col_i].Column;
440 |         const unsigned originalBytes = Window.GetColumnBytes(originalColumn);
441 | 
442 |         gf256_div_mem(recovery, recovery, y, originalBytes);
443 | 
444 |         Window.OriginalData[originalColumn].Data = recovery;
445 | 
446 |         // Write recovered packet data
447 |         RecoveredData[col_i].Data = recovery;
448 |         RecoveredData[col_i].Bytes = originalBytes;
449 |         RecoveredData[col_i].Index = originalColumn;
450 | 
451 |         // Eliminate from all other pivot rows above it:
452 |         for (unsigned col_j = 0; col_j < (unsigned)col_i; ++col_j)
453 |         {
454 |             unsigned pivot_j = RecoveryMatrix.Pivots[col_j];
455 |             const uint8_t x = RecoveryMatrix.Matrix.Get(pivot_j, col_i);
456 | 
457 |             if (x == 0)
458 |                 continue;
459 | 
460 |             gf256_muladd_mem(Window.RecoveryData[pivot_j].Data, x, recovery, originalBytes);
461 |         }
462 |     }
463 | 
464 |     return Fecal_Success;
465 | }
466 | 
467 | 
468 | //------------------------------------------------------------------------------
469 | // RecoveryMatrixState
470 | 
471 | void RecoveryMatrixState::PopulateColumns(const unsigned columns)
472 | {
473 |     Columns.resize(columns);
474 | 
475 |     unsigned nextSearchColumn = 0;
476 |     for (unsigned matrixColumn = 0; matrixColumn < columns; ++matrixColumn)
477 |     {
478 |         unsigned lostColumn = Window->FindNextLostElement(nextSearchColumn);
479 |         if (lostColumn >= Window->InputCount)
480 |         {
481 |             FECAL_DEBUG_BREAK; // Should never happen
482 |             break;
483 |         }
484 |         nextSearchColumn = lostColumn + 1;
485 | 
486 |         ColumnInfo& columnInfo = Columns[matrixColumn];
487 |         columnInfo.Column = lostColumn;
488 |         columnInfo.CX = GetColumnValue(lostColumn);
489 | 
490 |         Window->OriginalData[lostColumn].RecoveryMatrixColumn = matrixColumn;
491 |     }
492 | }
493 | 
494 | bool RecoveryMatrixState::GenerateMatrix()
495 | {
496 |     const unsigned input_count = Window->InputCount;
497 |     const unsigned columns = input_count - Window->OriginalGotCount;
498 |     const unsigned rows = static_cast<unsigned>(Window->RecoveryData.size());
499 |     FECAL_DEBUG_ASSERT(rows >= columns);
500 | 
501 |     // If column count changed:
502 |     if (columns != (unsigned)Columns.size())
503 |     {
504 |         PopulateColumns(columns);
505 | 
506 |         // Reset everything
507 |         Pivots.clear();
508 |         GEResumePivot = 0;
509 |         FilledRows = 0;
510 | 
511 |         if (!Matrix.Initialize(rows, columns))
512 |             return false;
513 |     }
514 |     else
515 |     {
516 |         // Otherwise we just added rows
517 |         FECAL_DEBUG_ASSERT(FilledRows < rows);
518 |         if (!Matrix.Resize(rows, columns))
519 |             return false;
520 |     }
521 | 
522 |     const unsigned stride = Matrix.AllocatedColumns;
523 |     uint8_t* rowData = Matrix.Data + FilledRows * stride;
524 | 
525 |     // For each row to fill:
526 |     for (unsigned ii = FilledRows; ii < rows; ++ii, rowData += stride)
527 |     {
528 |         const unsigned row = Window->RecoveryData[ii].Row;
529 | 
530 |         // Calculate row multiplier RX
531 |         const uint8_t RX = GetRowValue(row);
532 | 
533 |         // Fill columns from left for new rows:
534 |         for (unsigned j = 0; j < columns; ++j)
535 |         {
536 |             const unsigned column = Columns[j].Column;
537 | 
538 |             // Generate opcode and parameters
539 |             const uint8_t CX = Columns[j].CX;
540 |             const uint8_t CX2 = gf256_sqr(CX);
541 |             const unsigned lane = column % kColumnLaneCount;
542 |             const unsigned opcode = GetRowOpcode(lane, row);
543 | 
544 |             unsigned value = opcode & 1;
545 |             if (opcode & 2)
546 |                 value ^= CX;
547 |             if (opcode & 4)
548 |                 value ^= CX2;
549 |             if (opcode & 8)
550 |                 value ^= RX;
551 |             if (opcode & 16)
552 |                 value ^= gf256_mul(CX, RX);
553 |             if (opcode & 32)
554 |                 value ^= gf256_mul(CX2, RX);
555 |             rowData[j] = (uint8_t)value;
556 |         }
557 | 
558 |         PCGRandom prng;
559 |         prng.Seed(row, input_count);
560 | 
561 |         const unsigned pairCount = (input_count + kPairAddRate - 1) / kPairAddRate;
562 | 
563 |         for (unsigned k = 0; k < pairCount; ++k)
564 |         {
565 |             const unsigned element1 = prng.Next() % input_count;
566 |             if (!Window->OriginalData[element1].Data)
567 |             {
568 |                 const unsigned matrixColumn = Window->OriginalData[element1].RecoveryMatrixColumn;
569 |                 rowData[matrixColumn] ^= 1;
570 |             }
571 | 
572 |             const unsigned elementRX = prng.Next() % input_count;
573 |             if (!Window->OriginalData[elementRX].Data)
574 |             {
575 |                 const unsigned matrixColumn = Window->OriginalData[elementRX].RecoveryMatrixColumn;
576 |                 rowData[matrixColumn] ^= RX;
577 |             }
578 |         } // for each pair of random columns
579 |     } // for each recovery row
580 | 
581 |     // Fill in revealed column pivots with their own value
582 |     Pivots.resize(rows);
583 |     for (unsigned i = FilledRows; i < rows; ++i)
584 |         Pivots[i] = i;
585 | 
586 |     // If we have already performed some GE, then we need to eliminate new
587 |     // row data and we need to carry on elimination for new columns
588 |     if (GEResumePivot > 0)
589 |         ResumeGE(FilledRows, rows);
590 | 
591 |     FilledRows = rows;
592 | 
593 |     return true;
594 | }
595 | 
596 | void RecoveryMatrixState::ResumeGE(const unsigned oldRows, const unsigned rows)
597 | {
598 |     // If we did not add any new rows:
599 |     if (oldRows >= rows)
600 |     {
601 |         FECAL_DEBUG_ASSERT(oldRows == rows);
602 |         return;
603 |     }
604 | 
605 |     const unsigned stride = Matrix.AllocatedColumns;
606 |     const unsigned columns = Matrix.Columns;
607 | 
608 |     // For each pivot we have determined already:
609 |     for (unsigned pivot_i = 0; pivot_i < GEResumePivot; ++pivot_i)
610 |     {
611 |         // Get the row for that pivot
612 |         const unsigned matrixRowIndex_i = Pivots[pivot_i];
613 |         const uint8_t* ge_row = Matrix.Data + stride * matrixRowIndex_i;
614 |         const uint8_t val_i = ge_row[pivot_i];
615 |         FECAL_DEBUG_ASSERT(val_i != 0);
616 | 
617 |         uint8_t* rem_row = Matrix.Data + stride * oldRows;
618 | 
619 |         // For each new row that was added:
620 |         for (unsigned newRowIndex = oldRows; newRowIndex < rows; ++newRowIndex, rem_row += stride)
621 |         {
622 |             EliminateRow(ge_row, rem_row, pivot_i, columns, val_i);
623 | 
624 |             FECAL_DEBUG_ASSERT(Pivots[newRowIndex] == newRowIndex);
625 |         }
626 |     }
627 | }
628 | 
629 | bool RecoveryMatrixState::GaussianElimination()
630 | {
631 |     // Attempt to solve as much of the matrix as possible without using a pivots array
632 |     // since that requires extra memory operations.  Since the matrix will be dense we
633 |     // have a good chance of going pretty far before we hit a zero
634 | 
635 |     if (GEResumePivot > 0)
636 |         return PivotedGaussianElimination(GEResumePivot);
637 | 
638 |     const unsigned columns = Matrix.Columns;
639 |     const unsigned stride = Matrix.AllocatedColumns;
640 |     const unsigned rows = Matrix.Rows;
641 |     uint8_t* ge_row = Matrix.Data;
642 | 
643 |     for (unsigned pivot_i = 0; pivot_i < columns; ++pivot_i, ge_row += stride)
644 |     {
645 |         const uint8_t val_i = ge_row[pivot_i];
646 |         if (val_i == 0)
647 |             return PivotedGaussianElimination(pivot_i);
648 | 
649 |         RecoveryInfo& rowInfo = Window->RecoveryData[pivot_i];
650 |         rowInfo.UsedForSolution = true;
651 | 
652 |         uint8_t* rem_row = ge_row;
653 | 
654 |         // For each remaining row:
655 |         for (unsigned pivot_j = pivot_i + 1; pivot_j < rows; ++pivot_j)
656 |         {
657 |             rem_row += stride;
658 |             EliminateRow(ge_row, rem_row, pivot_i, columns, val_i);
659 |         }
660 |     }
661 | 
662 |     return true;
663 | }
664 | 
665 | bool RecoveryMatrixState::PivotedGaussianElimination(unsigned pivot_i)
666 | {
667 |     const unsigned columns = Matrix.Columns;
668 |     const unsigned stride = Matrix.AllocatedColumns;
669 |     const unsigned rows = Matrix.Rows;
670 | 
671 |     // Resume from next row down...
672 |     // Note: This is designed to be called by the non-pivoted version
673 |     unsigned pivot_j = pivot_i + 1;
674 |     goto UsePivoting;
675 | 
676 |     // For each pivot to determine:
677 |     for (; pivot_i < columns; ++pivot_i)
678 |     {
679 |         pivot_j = pivot_i;
680 | UsePivoting:
681 |         for (; pivot_j < rows; ++pivot_j)
682 |         {
683 |             const unsigned matrixRowIndex_j = Pivots[pivot_j];
684 |             const uint8_t* ge_row = Matrix.Data + stride * matrixRowIndex_j;
685 |             const uint8_t val_i = ge_row[pivot_i];
686 |             if (val_i == 0)
687 |                 continue;
688 | 
689 |             // Swap out the pivot index for this one
690 |             if (pivot_i != pivot_j)
691 |             {
692 |                 const unsigned temp = Pivots[pivot_i];
693 |                 Pivots[pivot_i] = Pivots[pivot_j];
694 |                 Pivots[pivot_j] = temp;
695 |             }
696 | 
697 |             RecoveryInfo& rowInfo = Window->RecoveryData[matrixRowIndex_j];
698 |             rowInfo.UsedForSolution = true;
699 | 
700 |             // Skip eliminating extra rows in the case that we just solved the matrix
701 |             if (pivot_i >= columns - 1)
702 |                 return true;
703 | 
704 |             // For each remaining row:
705 |             for (unsigned pivot_k = pivot_i + 1; pivot_k < rows; ++pivot_k)
706 |             {
707 |                 const unsigned matrixRowIndex_k = Pivots[pivot_k];
708 |                 uint8_t* rem_row = Matrix.Data + stride * matrixRowIndex_k;
709 | 
710 |                 EliminateRow(ge_row, rem_row, pivot_i, columns, val_i);
711 |             }
712 | 
713 |             goto NextPivot;
714 |         }
715 | 
716 |         // Remember where we failed last time
717 |         GEResumePivot = pivot_i;
718 | 
719 |         return false;
720 | NextPivot:;
721 |     }
722 | 
723 |     return true;
724 | }
725 | 
726 | 
727 | } // namespace fecal
728 | 


--------------------------------------------------------------------------------
/FecalDecoder.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
  3 | 
  4 |     Redistribution and use in source and binary forms, with or without
  5 |     modification, are permitted provided that the following conditions are met:
  6 | 
  7 |     * Redistributions of source code must retain the above copyright notice,
  8 |       this list of conditions and the following disclaimer.
  9 |     * Redistributions in binary form must reproduce the above copyright notice,
 10 |       this list of conditions and the following disclaimer in the documentation
 11 |       and/or other materials provided with the distribution.
 12 |     * Neither the name of Fecal nor the names of its contributors may be
 13 |       used to endorse or promote products derived from this software without
 14 |       specific prior written permission.
 15 | 
 16 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 17 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 |     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 19 |     ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 20 |     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 21 |     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 22 |     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 23 |     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 24 |     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 25 |     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 26 |     POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | #pragma once
 30 | 
 31 | /*
 32 |     Siamese Decoder Data Recovery Process
 33 | 
 34 |     (1) Collect data:
 35 | 
 36 |     This collects original data packets and recovery packets, until a solution
 37 |     may be possible (recovery is possible about 99.9% of the time).
 38 | 
 39 |     (2) Generate recovery matrix:
 40 | 
 41 |     The recovery matrix is a square GF(2^^8) where the width of the matrix is
 42 |     the number of losses we are trying to recover.  The recovery matrix elements
 43 |     are sampled from a larger matrix that is implicit (not actually constructed),
 44 |     where the columns correspond to original data and the rows correspond to
 45 |     recovery packets.
 46 | 
 47 |     (3) Solve recovery matrix:
 48 | 
 49 |     We experimentally perform Gaussian elimination on the matrix to put it in
 50 |     upper triangular form.  If this is successful, then recovery can proceed.
 51 |     Note that we have done no operations on the original data yet, so this step
 52 |     is fairly inexpensive.
 53 | 
 54 |     To speed up this step with the density of the matrix in mind, we attempt
 55 |     GE without pivoting first and then switch to a pivoting algorithm as zeroes
 56 |     are encountered.
 57 | 
 58 |     If this fails we attempt to build a larger recovery matrix involving more
 59 |     received recovery packets, which may also involve more lost original data.
 60 |     If recovery is not possible with the received data, then we wait for more.
 61 | 
 62 |     (4) Eliminate received data:
 63 | 
 64 |     This step involves most of the received data and takes the most time.
 65 |     Its complexity is slightly less than that of the encoder.  As a result,
 66 |     and improvement in encoder performance will translate to a faster decoder.
 67 | 
 68 |     For each recovery packet involved in solution we need to eliminate original
 69 |     data that is outside of the recovery matrix, so that the recovery matrix can
 70 |     be applied to recover the lost data.
 71 | 
 72 |     We construct the sums of received original data for each row as in the encoder,
 73 |     and roll the sums up as the left side is eliminated from later recovery packets.
 74 |     The sums are reused on multiple rows to eliminate data faster.
 75 | 
 76 |     (5) Recover original data:
 77 | 
 78 |     The same operations performed to arrive at the GE solution earlier are now
 79 |     performed on the recovery data packets.  We then multiply by the upper
 80 |     triangle of the recovery matrix in back substitution order.  Finally the
 81 |     diagonal is eliminated by dividing each recovery packet by the diagonal.
 82 |     The recovery packets now contain original data.
 83 | 
 84 |     The original data are prefixed by a length field so that the original data
 85 |     length can be recovered, since we support variable length input data.
 86 | */
 87 | 
 88 | #include "FecalCommon.h"
 89 | 
 90 | #include <unordered_set>
 91 | 
 92 | namespace fecal {
 93 | 
 94 | 
 95 | //------------------------------------------------------------------------------
 96 | // DecoderAppDataWindow
 97 | 
 98 | struct RecoveryInfo
 99 | {
100 |     uint8_t* Data = nullptr;
101 |     unsigned Row = 0;
102 |     bool UsedForSolution = false;
103 | };
104 | 
105 | struct OriginalInfo
106 | {
107 |     uint8_t* Data = nullptr;
108 |     unsigned RecoveryMatrixColumn = 0;
109 | };
110 | 
111 | // Keep this number of columns in each subwindow
112 | static const unsigned kSubwindowSize = kColumnLaneCount * 8;
113 | 
114 | struct Subwindow
115 | {
116 |     CustomBitSet<kSubwindowSize> Got;
117 |     unsigned GotCount = 0;
118 | };
119 | 
120 | // Decoder-specialized app data window
121 | struct DecoderAppDataWindow : AppDataWindow
122 | {
123 |     // Received original data
124 |     std::vector<OriginalInfo> OriginalData;
125 | 
126 |     // Received recovery data
127 |     std::vector<RecoveryInfo> RecoveryData;
128 | 
129 |     // Track which entries are filled in
130 |     unsigned SubwindowCount = 0;
131 |     std::vector<Subwindow> Subwindows;
132 | 
133 |     // Number of unique originals received so far
134 |     unsigned OriginalGotCount = 0;
135 | 
136 |     // Check if row has been seen yet
137 |     std::unordered_set<unsigned> RowSet;
138 | 
139 | 
140 |     // Allocate originals
141 |     void AllocateOriginals();
142 | 
143 |     // Add symbol data
144 |     // Returns false if we already have the data
145 |     bool AddRecovery(uint8_t* data, unsigned row);
146 | 
147 |     // Add original data
148 |     // Returns false if we already have the data
149 |     bool AddOriginal(unsigned column, uint8_t* data);
150 | 
151 |     // Mark that we got an element
152 |     void MarkGotElement(unsigned element);
153 | 
154 |     // Returns Count if no more elements were lost
155 |     // Otherwise returns the next element that was lost at or after the given one
156 |     unsigned FindNextLostElement(unsigned elementStart);
157 | };
158 | 
159 | 
160 | //------------------------------------------------------------------------------
161 | // RecoveryMatrixState
162 | 
163 | /*
164 |     We maintain a GF(2^^8) byte matrix that can grow a little in rows and
165 |     columns to reattempt solving with a larger matrix that includes more
166 |     lost columns and received recovery data, in the case that recovery fails.
167 |     It is expected that recovery fails around 1% of the time.
168 | 
169 |     The matrix is also a bit oversized to allow us to prefetch the next row,
170 |     and to align memory addresses with cache line boundaries for speed.
171 | */
172 | 
173 | class RecoveryMatrixState
174 | {
175 | public:
176 |     DecoderAppDataWindow* Window = nullptr;
177 | 
178 |     struct ColumnInfo
179 |     {
180 |         // Column number for the missing data
181 |         unsigned Column = 0;
182 | 
183 |         // Column multiplier
184 |         uint8_t CX = 0;
185 |     };
186 |     std::vector<ColumnInfo> Columns;
187 | 
188 |     // Recovery matrix
189 |     GrowingAlignedByteMatrix Matrix;
190 | 
191 |     // Array of pivots used for when rows need to be swapped
192 |     // This allows us to swap indices rather than swap whole rows to reduce memory accesses
193 |     std::vector<unsigned> Pivots;
194 | 
195 |     // Pivot to resume at when we get more data
196 |     unsigned GEResumePivot = 0;
197 | 
198 |     // Number of matrix rows we already filled
199 |     unsigned FilledRows = 0;
200 | 
201 | 
202 |     // Populate Rows and Columns arrays
203 |     void PopulateColumns(const unsigned columns);
204 | 
205 |     // Generate the matrix
206 |     bool GenerateMatrix();
207 | 
208 |     // Attempt to put the matrix in upper-triangular form
209 |     bool GaussianElimination();
210 | 
211 | protected:
212 |     // Resume GE from a previous failure point
213 |     void ResumeGE(const unsigned oldRows, const unsigned rows);
214 | 
215 |     // Run GE with pivots after a column is found to be zero
216 |     bool PivotedGaussianElimination(unsigned pivot_i);
217 | 
218 |     // rem_row[] += ge_row[] * y
219 |     GF256_FORCE_INLINE void MulAddRows(
220 |         const uint8_t* ge_row, uint8_t* rem_row, unsigned columnStart,
221 |         const unsigned columnEnd, uint8_t y)
222 |     {
223 | #ifdef GF256_ALIGNED_ACCESSES
224 |         // Do unaligned operations first
225 |         // Note: Each row starts at an aliged address
226 |         unsigned unalignedEnd = NextAlignedOffset(columnStart);
227 |         if (unalignedEnd > columnEnd)
228 |             unalignedEnd = columnEnd;
229 |         for (; columnStart < unalignedEnd; ++columnStart)
230 |             rem_row[columnStart] ^= gf256_mul(ge_row[columnStart], y);
231 |         if (columnStart >= columnEnd)
232 |             return;
233 | #endif
234 | 
235 |         gf256_muladd_mem(rem_row + columnStart, y, ge_row + columnStart, columnEnd - columnStart);
236 |     }
237 | 
238 |     // Internal function common to both GE functions, used to eliminate a row of data
239 |     GF256_FORCE_INLINE void EliminateRow(
240 |         const uint8_t* ge_row, uint8_t* rem_row, const unsigned pivot_i,
241 |         const unsigned columnEnd, const uint8_t val_i)
242 |     {
243 |         // Skip if the element j,i is already zero
244 |         const uint8_t val_j = rem_row[pivot_i];
245 |         if (val_j == 0)
246 |             return;
247 | 
248 |         // Calculate element j,i elimination constant based on pivot row value
249 |         const uint8_t y = gf256_div(val_j, val_i);
250 | 
251 |         // Remember what value was used to zero element j,i
252 |         rem_row[pivot_i] = y;
253 | 
254 |         MulAddRows(ge_row, rem_row, pivot_i + 1, columnEnd, y);
255 |     }
256 | };
257 | 
258 | 
259 | //------------------------------------------------------------------------------
260 | // Decoder
261 | 
262 | class Decoder : public ICodec
263 | {
264 | public:
265 |     virtual ~Decoder() {}
266 | 
267 |     // Initialize the decoder
268 |     FecalResult Initialize(unsigned input_count, uint64_t total_bytes);
269 | 
270 |     // Add original data
271 |     FecalResult AddOriginal(const FecalSymbol& symbol);
272 | 
273 |     // Add recovery data
274 |     FecalResult AddRecovery(const FecalSymbol& symbol);
275 | 
276 |     // Try to decode
277 |     FecalResult Decode(RecoveredSymbols& symbols);
278 | 
279 |     // Get original data
280 |     FecalResult GetOriginal(unsigned column, FecalSymbol& symbol);
281 | 
282 | protected:
283 |     // Window of original data
284 |     DecoderAppDataWindow Window;
285 | 
286 |     // Matrix containing recovery packets that may admit a solution
287 |     RecoveryMatrixState RecoveryMatrix;
288 | 
289 |     // Has recovery been attempted with the latest inputs?
290 |     bool RecoveryAttempted = false;
291 | 
292 |     // Recovered data array returned to application
293 |     std::vector<FecalSymbol> RecoveredData;
294 | 
295 |     // Sums for each lane
296 |     AlignedDataBuffer LaneSums[kColumnLaneCount][kColumnSumCount];
297 | 
298 |     // Output workspace
299 |     AlignedDataBuffer ProductWorkspace;
300 | 
301 | 
302 |     // Recovery step: Eliminate original data that was successfully received
303 |     FecalResult EliminateOriginalData();
304 | 
305 |     // Get lane sum for original data we have
306 |     const uint8_t* GetLaneSum(unsigned laneIndex, unsigned sumIndex);
307 | 
308 |     // Recovery step: Multiply lower triangle following solution order
309 |     void MultiplyLowerTriangle();
310 | 
311 |     // Recovery step: Back-substitute upper triangle to reveal original data
312 |     FecalResult BackSubstitution();
313 | };
314 | 
315 | 
316 | } // namespace fecal
317 | 


--------------------------------------------------------------------------------
/FecalEncoder.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
  3 | 
  4 |     Redistribution and use in source and binary forms, with or without
  5 |     modification, are permitted provided that the following conditions are met:
  6 | 
  7 |     * Redistributions of source code must retain the above copyright notice,
  8 |       this list of conditions and the following disclaimer.
  9 |     * Redistributions in binary form must reproduce the above copyright notice,
 10 |       this list of conditions and the following disclaimer in the documentation
 11 |       and/or other materials provided with the distribution.
 12 |     * Neither the name of Fecal nor the names of its contributors may be
 13 |       used to endorse or promote products derived from this software without
 14 |       specific prior written permission.
 15 | 
 16 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 17 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 |     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 19 |     ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 20 |     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 21 |     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 22 |     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 23 |     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 24 |     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 25 |     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 26 |     POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | #include "FecalEncoder.h"
 30 | 
 31 | namespace fecal {
 32 | 
 33 | 
 34 | //------------------------------------------------------------------------------
 35 | // EncoderAppDataWindow
 36 | 
 37 | void EncoderAppDataWindow::AllocateOriginals()
 38 | {
 39 |     OriginalData.resize(InputCount);
 40 | }
 41 | 
 42 | void EncoderAppDataWindow::SetEncoderInput(void* const * const input_data)
 43 | {
 44 |     FECAL_DEBUG_ASSERT(InputCount > 0); // SetParameters() must be called first
 45 | 
 46 |     for (unsigned ii = 0, count = InputCount; ii < count; ++ii)
 47 |         OriginalData[ii] = reinterpret_cast<const uint8_t*>(input_data[ii]);
 48 | }
 49 | 
 50 | 
 51 | //------------------------------------------------------------------------------
 52 | // Encoder
 53 | 
 54 | // This optimization speeds up encoding by about 5%
 55 | #ifdef FECAL_ADD2_OPT
 56 | #define FECAL_ADD2_ENC_SETUP_OPT
 57 | #endif
 58 | 
 59 | FecalResult Encoder::Initialize(unsigned input_count, void* const * const input_data, uint64_t total_bytes)
 60 | {
 61 |     // Validate input and set parameters
 62 |     if (!Window.SetParameters(input_count, total_bytes))
 63 |     {
 64 |         FECAL_DEBUG_BREAK; // Invalid input
 65 |         return Fecal_InvalidInput;
 66 |     }
 67 |     Window.AllocateOriginals();
 68 |     Window.SetEncoderInput(input_data);
 69 | 
 70 |     const unsigned symbolBytes = Window.SymbolBytes;
 71 | 
 72 |     // Allocate lane sums
 73 |     for (unsigned laneIndex = 0; laneIndex < kColumnLaneCount; ++laneIndex)
 74 |     {
 75 |         for (unsigned sumIndex = 0; sumIndex < kColumnSumCount; ++sumIndex)
 76 |         {
 77 |             if (!LaneSums[laneIndex][sumIndex].Allocate(symbolBytes))
 78 |                 return Fecal_OutOfMemory;
 79 | 
 80 |             // Clear memory in each lane sum
 81 |             memset(LaneSums[laneIndex][sumIndex].Data, 0, symbolBytes);
 82 |         }
 83 |     }
 84 | 
 85 |     // Allocate workspace
 86 |     if (!ProductWorkspace.Allocate(symbolBytes))
 87 |         return Fecal_OutOfMemory;
 88 | 
 89 |     // TBD: Unroll first set of 8 lanes to avoid the extra memset above?
 90 |     // TBD: Use GetLaneSum() approach do to minimal work for small output?
 91 | 
 92 | #ifdef FECAL_ADD2_ENC_SETUP_OPT
 93 |     for (unsigned laneIndex = 0; laneIndex < kColumnLaneCount; ++laneIndex)
 94 |     {
 95 |         // Sum[0] += Data
 96 |         XORSummer sum;
 97 |         sum.Initialize(LaneSums[laneIndex][0].Data, symbolBytes);
 98 | 
 99 |         const unsigned columnEnd = input_count - 1;
100 | 
101 |         for (unsigned column = laneIndex; column < columnEnd; column += kColumnLaneCount)
102 |         {
103 |             const uint8_t* columnData = reinterpret_cast<const uint8_t*>(input_data[column]);
104 |             sum.Add(columnData);
105 |         }
106 | 
107 |         if ((columnEnd % kColumnLaneCount) == laneIndex)
108 |         {
109 |             const uint8_t* columnData = reinterpret_cast<const uint8_t*>(input_data[columnEnd]);
110 |             gf256_add_mem(LaneSums[laneIndex][0].Data, columnData, Window.FinalBytes);
111 |         }
112 | 
113 |         sum.Finalize();
114 |     }
115 | #endif
116 | 
117 |     // For each input column:
118 |     for (unsigned column = 0; column < input_count; ++column)
119 |     {
120 |         const uint8_t* columnData = reinterpret_cast<const uint8_t*>(input_data[column]);
121 |         const unsigned columnBytes = Window.GetColumnBytes(column);
122 |         const unsigned laneIndex = column % kColumnLaneCount;
123 |         const uint8_t CX = GetColumnValue(column);
124 |         const uint8_t CX2 = gf256_sqr(CX);
125 | 
126 | #ifndef FECAL_ADD2_ENC_SETUP_OPT
127 |         // Sum[0] += Data
128 |         gf256_add_mem(LaneSums[laneIndex][0].Data, columnData, columnBytes);
129 | #endif
130 | 
131 |         // Sum[1] += CX * Data
132 |         gf256_muladd_mem(LaneSums[laneIndex][1].Data, CX, columnData, columnBytes);
133 | 
134 |         // Sum[2] += CX^2 * Data
135 |         gf256_muladd_mem(LaneSums[laneIndex][2].Data, CX2, columnData, columnBytes);
136 |     }
137 | 
138 |     return Fecal_Success;
139 | 
140 |     static_assert(kColumnSumCount == 3, "Update this");
141 | }
142 | 
143 | FecalResult Encoder::Encode(FecalSymbol& symbol)
144 | {
145 |     // If encoder is not initialized:
146 |     if (!ProductWorkspace.Data)
147 |         return Fecal_InvalidInput;
148 | 
149 |     const unsigned symbolBytes = Window.SymbolBytes;
150 |     if (symbol.Bytes != symbolBytes)
151 |         return Fecal_InvalidInput;
152 | 
153 |     // Load parameters
154 |     const unsigned count = Window.InputCount;
155 |     uint8_t* outputSum = reinterpret_cast<uint8_t*>( symbol.Data );
156 |     uint8_t* outputProduct = ProductWorkspace.Data;
157 | 
158 |     const unsigned row = symbol.Index;
159 | 
160 |     // Initialize LDPC
161 |     PCGRandom prng;
162 |     prng.Seed(row, count);
163 | 
164 |     // Accumulate original data into the two sums
165 |     const unsigned pairCount = (Window.InputCount + kPairAddRate - 1) / kPairAddRate;
166 |     // Unrolled first loop:
167 |     {
168 |         const unsigned element1 = prng.Next() % count;
169 |         const uint8_t* original1 = Window.OriginalData[element1];
170 | 
171 |         const unsigned elementRX = prng.Next() % count;
172 |         const uint8_t* originalRX = Window.OriginalData[elementRX];
173 | 
174 |         // Sum = Original[element1]
175 |         if (Window.IsFinalColumn(element1))
176 |         {
177 |             memcpy(outputSum, original1, Window.FinalBytes);
178 |             memset(outputSum + Window.FinalBytes, 0, symbolBytes - Window.FinalBytes);
179 |         }
180 |         else
181 |             memcpy(outputSum, original1, symbolBytes);
182 | 
183 |         // Product = Original[elementRX]
184 |         if (Window.IsFinalColumn(elementRX))
185 |         {
186 |             memcpy(outputProduct, originalRX, Window.FinalBytes);
187 |             memset(outputProduct + Window.FinalBytes, 0, symbolBytes - Window.FinalBytes);
188 |         }
189 |         else
190 |             memcpy(outputProduct, originalRX, symbolBytes);
191 |     }
192 | 
193 |     XORSummer sum;
194 |     sum.Initialize(outputSum, symbolBytes);
195 |     XORSummer prod;
196 |     prod.Initialize(outputProduct, symbolBytes);
197 | 
198 |     for (unsigned i = 1; i < pairCount; ++i)
199 |     {
200 |         const unsigned element1   = prng.Next() % count;
201 |         const uint8_t* original1  = Window.OriginalData[element1];
202 | 
203 |         const unsigned elementRX  = prng.Next() % count;
204 |         const uint8_t* originalRX = Window.OriginalData[elementRX];
205 | 
206 |         // Sum += Original[element1]
207 |         if (Window.IsFinalColumn(element1))
208 |             gf256_add_mem(outputSum, original1, Window.FinalBytes);
209 |         else
210 |             sum.Add(original1);
211 | 
212 |         // Product += Original[elementRX]
213 |         if (Window.IsFinalColumn(elementRX))
214 |             gf256_add_mem(outputProduct, originalRX, Window.FinalBytes);
215 |         else
216 |             prod.Add(originalRX);
217 |     }
218 | 
219 |     // For each lane:
220 |     for (unsigned laneIndex = 0; laneIndex < kColumnLaneCount; ++laneIndex)
221 |     {
222 |         // Compute the operations to run for this lane and row
223 |         unsigned opcode = GetRowOpcode(laneIndex, row);
224 | 
225 |         // Sum += Random Lanes
226 |         unsigned mask = 1;
227 |         for (unsigned sumIndex = 0; sumIndex < kColumnSumCount; ++sumIndex, mask <<= 1)
228 |             if (opcode & mask)
229 |                 sum.Add(LaneSums[laneIndex][sumIndex].Data);
230 | 
231 |         // Product += Random Lanes
232 |         for (unsigned sumIndex = 0; sumIndex < kColumnSumCount; ++sumIndex, mask <<= 1)
233 |             if (opcode & mask)
234 |                 prod.Add(LaneSums[laneIndex][sumIndex].Data);
235 |     }
236 | 
237 |     sum.Finalize();
238 |     prod.Finalize();
239 | 
240 |     // Sum += RX * Product
241 |     gf256_muladd_mem(outputSum, GetRowValue(row), outputProduct, symbolBytes);
242 | 
243 |     return Fecal_Success;
244 | }
245 | 
246 | 
247 | } // namespace fecal
248 | 


--------------------------------------------------------------------------------
/FecalEncoder.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
 3 | 
 4 |     Redistribution and use in source and binary forms, with or without
 5 |     modification, are permitted provided that the following conditions are met:
 6 | 
 7 |     * Redistributions of source code must retain the above copyright notice,
 8 |       this list of conditions and the following disclaimer.
 9 |     * Redistributions in binary form must reproduce the above copyright notice,
10 |       this list of conditions and the following disclaimer in the documentation
11 |       and/or other materials provided with the distribution.
12 |     * Neither the name of Fecal nor the names of its contributors may be
13 |       used to endorse or promote products derived from this software without
14 |       specific prior written permission.
15 | 
16 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 |     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 |     ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
20 |     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 |     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 |     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 |     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 |     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 |     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 |     POSSIBILITY OF SUCH DAMAGE.
27 | */
28 | 
29 | #pragma once
30 | 
31 | /*
32 |     Encoder
33 | 
34 |     The encoder builds up sums of input data on Initialize().
35 | 
36 |     When Encode() is called it will combine these sums in a deterministic way.
37 | 
38 |     Encode returns a pointer to the Sum workspace.
39 | */
40 | 
41 | #include "FecalCommon.h"
42 | 
43 | namespace fecal {
44 | 
45 | 
46 | //------------------------------------------------------------------------------
47 | // EncoderAppDataWindow
48 | 
49 | // Encoder-specialized app data window
50 | struct EncoderAppDataWindow : AppDataWindow
51 | {
52 |     // Original data
53 |     std::vector<const uint8_t*> OriginalData;
54 | 
55 | 
56 |     // Set encoder input
57 |     // Returns false if input is invalid
58 |     void SetEncoderInput(void* const * const input_data);
59 | 
60 |     // Allocate originals
61 |     void AllocateOriginals();
62 | };
63 | 
64 | 
65 | //------------------------------------------------------------------------------
66 | // Encoder
67 | 
68 | class Encoder : public ICodec
69 | {
70 | public:
71 |     virtual ~Encoder() {}
72 | 
73 |     // Initialize the encoder
74 |     FecalResult Initialize(unsigned input_count, void* const * const input_data, uint64_t total_bytes);
75 | 
76 |     // Generate the next recovery packet for the data
77 |     FecalResult Encode(FecalSymbol& symbol);
78 | 
79 | protected:
80 |     // Application data set
81 |     EncoderAppDataWindow Window;
82 | 
83 |     // Sums for each lane
84 |     AlignedDataBuffer LaneSums[kColumnLaneCount][kColumnSumCount];
85 | 
86 |     // Output workspace
87 |     AlignedDataBuffer ProductWorkspace;
88 | };
89 | 
90 | 
91 | } // namespace fecal
92 | 


--------------------------------------------------------------------------------
/License.md:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2017, Christopher A. Taylor
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # FEC-AL
  2 | ## Forward Error Correction at the Application Layer in C
  3 | 
  4 | FEC-AL is a simple, portable, fast library for Forward Error Correction.
  5 | From a block of equally sized original data pieces, it generates recovery
  6 | symbols that can be used to recover lost original data.
  7 | 
  8 | * It requires that data pieces are all a fixed size.
  9 | * It can take as input an unlimited number of input blocks.
 10 | * It can generate an unlimited stream of recovery symbols used for decoding.
 11 | * It has a small (about 1%) chance of failing to recover, so it is not an MDS code.
 12 | 
 13 | The main limitation of the software is that it gets slower as O(N^^2) in
 14 | the number of inputs or outputs.  In trade, the encoder overhead is unusually
 15 | low, and the decoder is extremely efficient when recovering from a small number
 16 | of losses.  It may be the best choice based on practical evaluation.
 17 | 
 18 | FEC-AL is a block codec derived from the [Siamese](https://github.com/catid/siamese) streaming FEC library.
 19 | 
 20 | 
 21 | #### Why fecal matters:
 22 | 
 23 | It supports an unlimited number of inputs and outputs, similar to a Fountain Code,
 24 | but it is designed as a Convolutional Code.  This means that it does not perform
 25 | well with a large number of losses.  It is faster than existing erasure correction
 26 | code (ECC) software when the loss count is expected to be small.
 27 | 
 28 | 
 29 | #### Encoder API:
 30 | 
 31 | ```
 32 | #include "fecal.h"
 33 | ```
 34 | 
 35 | For full documentation please read `fecal.h`.
 36 | 
 37 | + `fecal_init()` : Initialize library.
 38 | + `fecal_encoder_create()`: Create encoder object.
 39 | + `fecal_encode()`: Encode a recovery symbol.
 40 | + `fecal_free()`: Free encoder object.
 41 | 
 42 | 
 43 | #### Decoder API:
 44 | 
 45 | ```
 46 | #include "fecal.h"
 47 | ```
 48 | 
 49 | For full documentation please read `fecal.h`.
 50 | 
 51 | + `fecal_init()` : Initialize library.
 52 | + `fecal_decoder_create()`: Create a decoder object.
 53 | + `fecal_decoder_add_original()`: Add original data to the decoder.
 54 | + `fecal_decoder_add_recovery()`: Add recovery data to the decoder.
 55 | + `fecal_decode()`: Attempt to decode with what has been added so far, returning recovered data.
 56 | + `fecal_decoder_get()`: Read back original data after decode.
 57 | + `fecal_free()`: Free decoder object.
 58 | 
 59 | 
 60 | #### Benchmarks:
 61 | 
 62 | For random losses in 2 MB of data split into 1000 equal-sized 2000 byte pieces:
 63 | 
 64 | ```
 65 | Encoder(2 MB in 1000 pieces, 1 losses): Input=6968.64 MB/s, Output=6.96864 MB/s, (Encode create: 7225.69 MB/s)
 66 | Decoder(2 MB in 1000 pieces, 1 losses): Input=9083.06 MB/s, Output=9.08307 MB/s, (Overhead = 0 pieces)
 67 | 
 68 | Encoder(2 MB in 1000 pieces, 2 losses): Input=7181.33 MB/s, Output=14.5063 MB/s, (Encode create: 7663.72 MB/s)
 69 | Decoder(2 MB in 1000 pieces, 2 losses): Input=7365.13 MB/s, Output=14.7303 MB/s, (Overhead = 0.02 pieces)
 70 | 
 71 | Encoder(2 MB in 1000 pieces, 3 losses): Input=6805.5 MB/s, Output=20.4165 MB/s, (Encode create: 7526.72 MB/s)
 72 | Decoder(2 MB in 1000 pieces, 3 losses): Input=6312.93 MB/s, Output=18.9388 MB/s, (Overhead = 0 pieces)
 73 | 
 74 | Encoder(2 MB in 1000 pieces, 4 losses): Input=6751.28 MB/s, Output=27.0726 MB/s, (Encode create: 7645.84 MB/s)
 75 | Decoder(2 MB in 1000 pieces, 4 losses): Input=6387.12 MB/s, Output=25.5485 MB/s, (Overhead = 0.0100002 pieces)
 76 | 
 77 | Encoder(2 MB in 1000 pieces, 5 losses): Input=6502.16 MB/s, Output=32.5108 MB/s, (Encode create: 7645.55 MB/s)
 78 | Decoder(2 MB in 1000 pieces, 5 losses): Input=5982.11 MB/s, Output=29.9106 MB/s, (Overhead = 0 pieces)
 79 | 
 80 | Encoder(2 MB in 1000 pieces, 6 losses): Input=6014.13 MB/s, Output=36.3855 MB/s, (Encode create: 7238.51 MB/s)
 81 | Decoder(2 MB in 1000 pieces, 6 losses): Input=5520.74 MB/s, Output=33.1245 MB/s, (Overhead = 0.0500002 pieces)
 82 | 
 83 | Encoder(2 MB in 1000 pieces, 7 losses): Input=6284.56 MB/s, Output=44.1176 MB/s, (Encode create: 7764.88 MB/s)
 84 | Decoder(2 MB in 1000 pieces, 7 losses): Input=5601.61 MB/s, Output=39.2113 MB/s, (Overhead = 0.02 pieces)
 85 | 
 86 | Encoder(2 MB in 1000 pieces, 8 losses): Input=5854.97 MB/s, Output=46.8398 MB/s, (Encode create: 7388.25 MB/s)
 87 | Decoder(2 MB in 1000 pieces, 8 losses): Input=5492.54 MB/s, Output=43.9403 MB/s, (Overhead = 0 pieces)
 88 | 
 89 | Encoder(2 MB in 1000 pieces, 9 losses): Input=5843.34 MB/s, Output=52.6485 MB/s, (Encode create: 7645.84 MB/s)
 90 | Decoder(2 MB in 1000 pieces, 9 losses): Input=5221.11 MB/s, Output=46.99 MB/s, (Overhead = 0.0100002 pieces)
 91 | 
 92 | Encoder(2 MB in 1000 pieces, 10 losses): Input=5728.53 MB/s, Output=57.3998 MB/s, (Encode create: 7610.06 MB/s)
 93 | Decoder(2 MB in 1000 pieces, 10 losses): Input=5172.24 MB/s, Output=51.7224 MB/s, (Overhead = 0.0200005 pieces)
 94 | 
 95 | Encoder(2 MB in 1000 pieces, 11 losses): Input=5590.65 MB/s, Output=61.4972 MB/s, (Encode create: 7667.83 MB/s)
 96 | Decoder(2 MB in 1000 pieces, 11 losses): Input=5012.53 MB/s, Output=55.1378 MB/s, (Overhead = 0 pieces)
 97 | 
 98 | Encoder(2 MB in 1000 pieces, 13 losses): Input=5382.13 MB/s, Output=70.0753 MB/s, (Encode create: 7687.28 MB/s)
 99 | Decoder(2 MB in 1000 pieces, 13 losses): Input=4790.53 MB/s, Output=62.2769 MB/s, (Overhead = 0.0200005 pieces)
100 | 
101 | Encoder(2 MB in 1000 pieces, 15 losses): Input=5065.47 MB/s, Output=76.0327 MB/s, (Encode create: 7556.01 MB/s)
102 | Decoder(2 MB in 1000 pieces, 15 losses): Input=4490.45 MB/s, Output=67.3567 MB/s, (Overhead = 0.0100002 pieces)
103 | 
104 | Encoder(2 MB in 1000 pieces, 16 losses): Input=4874.6 MB/s, Output=77.9936 MB/s, (Encode create: 7390.71 MB/s)
105 | Decoder(2 MB in 1000 pieces, 16 losses): Input=4279.45 MB/s, Output=68.4712 MB/s, (Overhead = 0 pieces)
106 | 
107 | Encoder(2 MB in 1000 pieces, 18 losses): Input=4707.99 MB/s, Output=84.7438 MB/s, (Encode create: 7515.69 MB/s)
108 | Decoder(2 MB in 1000 pieces, 18 losses): Input=4008.9 MB/s, Output=72.1602 MB/s, (Overhead = 0 pieces)
109 | 
110 | Encoder(2 MB in 1000 pieces, 20 losses): Input=4619.15 MB/s, Output=92.4754 MB/s, (Encode create: 7679.31 MB/s)
111 | Decoder(2 MB in 1000 pieces, 20 losses): Input=3858.4 MB/s, Output=77.1679 MB/s, (Overhead = 0.0200005 pieces)
112 | 
113 | Encoder(2 MB in 1000 pieces, 25 losses): Input=4176.24 MB/s, Output=104.448 MB/s, (Encode create: 7576.33 MB/s)
114 | Decoder(2 MB in 1000 pieces, 25 losses): Input=3374.22 MB/s, Output=84.3554 MB/s, (Overhead = 0.0100002 pieces)
115 | 
116 | Encoder(2 MB in 1000 pieces, 30 losses): Input=3731.27 MB/s, Output=111.976 MB/s, (Encode create: 7418.12 MB/s)
117 | Decoder(2 MB in 1000 pieces, 30 losses): Input=2950.2 MB/s, Output=88.506 MB/s, (Overhead = 0.0100002 pieces)
118 | 
119 | Encoder(2 MB in 1000 pieces, 35 losses): Input=3542.46 MB/s, Output=124.021 MB/s, (Encode create: 7610.64 MB/s)
120 | Decoder(2 MB in 1000 pieces, 35 losses): Input=2702.99 MB/s, Output=94.6048 MB/s, (Overhead = 0.00999832 pieces)
121 | 
122 | Encoder(2 MB in 1000 pieces, 40 losses): Input=3365.53 MB/s, Output=134.621 MB/s, (Encode create: 7846.52 MB/s)
123 | Decoder(2 MB in 1000 pieces, 40 losses): Input=2410.42 MB/s, Output=96.4169 MB/s, (Overhead = 0 pieces)
124 | 
125 | Encoder(2 MB in 1000 pieces, 50 losses): Input=2658.13 MB/s, Output=132.933 MB/s, (Encode create: 6889.42 MB/s)
126 | Decoder(2 MB in 1000 pieces, 50 losses): Input=1917.88 MB/s, Output=95.8938 MB/s, (Overhead = 0.00999832 pieces)
127 | 
128 | Encoder(2 MB in 1000 pieces, 60 losses): Input=2573.04 MB/s, Output=154.408 MB/s, (Encode create: 7578.92 MB/s)
129 | Decoder(2 MB in 1000 pieces, 60 losses): Input=1612.62 MB/s, Output=96.757 MB/s, (Overhead = 0.00999832 pieces)
130 | 
131 | Encoder(2 MB in 1000 pieces, 70 losses): Input=2141.83 MB/s, Output=149.95 MB/s, (Encode create: 6861.77 MB/s)
132 | Decoder(2 MB in 1000 pieces, 70 losses): Input=1325.65 MB/s, Output=92.7957 MB/s, (Overhead = 0.0100021 pieces)
133 | 
134 | Encoder(2 MB in 1000 pieces, 80 losses): Input=2052.65 MB/s, Output=164.212 MB/s, (Encode create: 7454.34 MB/s)
135 | Decoder(2 MB in 1000 pieces, 80 losses): Input=1112 MB/s, Output=88.9601 MB/s, (Overhead = 0 pieces)
136 | 
137 | Encoder(2 MB in 1000 pieces, 90 losses): Input=1926.69 MB/s, Output=173.402 MB/s, (Encode create: 7593.01 MB/s)
138 | Decoder(2 MB in 1000 pieces, 90 losses): Input=972.81 MB/s, Output=87.5529 MB/s, (Overhead = 0 pieces)
139 | 
140 | Encoder(2 MB in 1000 pieces, 100 losses): Input=1814.67 MB/s, Output=181.467 MB/s, (Encode create: 7866.27 MB/s)
141 | Decoder(2 MB in 1000 pieces, 100 losses): Input=861.668 MB/s, Output=86.1668 MB/s, (Overhead = 0 pieces)
142 | 
143 | Encoder(2 MB in 1000 pieces, 110 losses): Input=1617.09 MB/s, Output=177.88 MB/s, (Encode create: 7514.28 MB/s)
144 | Decoder(2 MB in 1000 pieces, 110 losses): Input=740.198 MB/s, Output=81.4218 MB/s, (Overhead = 0 pieces)
145 | 
146 | Encoder(2 MB in 1000 pieces, 120 losses): Input=1485.21 MB/s, Output=178.225 MB/s, (Encode create: 7274.05 MB/s)
147 | Decoder(2 MB in 1000 pieces, 120 losses): Input=645.417 MB/s, Output=77.4501 MB/s, (Overhead = 0 pieces)
148 | ```
149 | 
150 | 
151 | #### Comparisons:
152 | 
153 | Comparing with `wh256`, which is [Wirehair](https://github.com/catid/wirehair) using the GF256 library instead of the old library so it runs faster:
154 | 
155 | For the same data sizes and about 100 losses:
156 | 
157 | ```
158 | >> wirehair_encode(N = 1000) in 2174.33 usec, 919.825 MB/s after 98.992 avg losses
159 | << wirehair_decode(N = 1000) average overhead = 0.023 blocks, average reconstruct time = 1519.61 usec, 1316.13 MB/s
160 | ```
161 | 
162 | Wirehair is asymptotically O(N) in speed, but for smaller input or output data it can be beaten by other codecs.
163 | In this case the Fecal encoder is twice as fast as Wirehair.  Wirehair is almost twice as fast to decode,
164 | but it takes the same time regardless of the number of losses, so Fecal is much faster for small loss counts.
165 | 
166 | For the same data sizes and about 30 losses:
167 | 
168 | ```
169 | >> wirehair_encode(N = 1000) in 2281.65 usec, 876.559 MB/s after 30.931 avg losses
170 | << wirehair_decode(N = 1000) average overhead = 0.02 blocks, average reconstruct time = 1462.48 usec, 1367.54 MB/s
171 | ```
172 | 
173 | Now Wirehair is 4x slower to encode and 2x slower to decode.  There is definitely a large, useful region of operation
174 | where the Fecal algorithm is preferred.
175 | 
176 | 
177 | #### Smaller input benchmark:
178 | 
179 | For random losses in 0.2 MB of data split into 100 equal-sized 2000 byte pieces:
180 | 
181 | ```
182 | Encoder(0.2 MB in 100 pieces, 1 losses): Input=5899.71 MB/s, Output=58.9971 MB/s, (Encode create: 6251.95 MB/s)
183 | Decoder(0.2 MB in 100 pieces, 1 losses): Input=8257.64 MB/s, Output=82.5764 MB/s, (Overhead = 0 pieces)
184 | 
185 | Encoder(0.2 MB in 100 pieces, 2 losses): Input=6040.47 MB/s, Output=122.018 MB/s, (Encode create: 6680.03 MB/s)
186 | Decoder(0.2 MB in 100 pieces, 2 losses): Input=6572.46 MB/s, Output=131.449 MB/s, (Overhead = 0.02 pieces)
187 | 
188 | Encoder(0.2 MB in 100 pieces, 3 losses): Input=5474.95 MB/s, Output=165.344 MB/s, (Encode create: 6391.82 MB/s)
189 | Decoder(0.2 MB in 100 pieces, 3 losses): Input=5274.26 MB/s, Output=158.228 MB/s, (Overhead = 0.02 pieces)
190 | 
191 | Encoder(0.2 MB in 100 pieces, 4 losses): Input=5298.01 MB/s, Output=212.98 MB/s, (Encode create: 6504.06 MB/s)
192 | Decoder(0.2 MB in 100 pieces, 4 losses): Input=5055.61 MB/s, Output=202.224 MB/s, (Overhead = 0.02 pieces)
193 | 
194 | Encoder(0.2 MB in 100 pieces, 5 losses): Input=5289.61 MB/s, Output=264.48 MB/s, (Encode create: 6768.19 MB/s)
195 | Decoder(0.2 MB in 100 pieces, 5 losses): Input=4785.83 MB/s, Output=239.292 MB/s, (Overhead = 0 pieces)
196 | 
197 | Encoder(0.2 MB in 100 pieces, 6 losses): Input=4945.6 MB/s, Output=297.23 MB/s, (Encode create: 6648.94 MB/s)
198 | Decoder(0.2 MB in 100 pieces, 6 losses): Input=4356.35 MB/s, Output=261.381 MB/s, (Overhead = 0.0100002 pieces)
199 | 
200 | Encoder(0.2 MB in 100 pieces, 7 losses): Input=4621.07 MB/s, Output=324.399 MB/s, (Encode create: 6466.21 MB/s)
201 | Decoder(0.2 MB in 100 pieces, 7 losses): Input=4024.95 MB/s, Output=281.747 MB/s, (Overhead = 0.02 pieces)
202 | 
203 | Encoder(0.2 MB in 100 pieces, 8 losses): Input=4338.4 MB/s, Output=347.072 MB/s, (Encode create: 6287.33 MB/s)
204 | Decoder(0.2 MB in 100 pieces, 8 losses): Input=3762.94 MB/s, Output=301.035 MB/s, (Overhead = 0 pieces)
205 | 
206 | Encoder(0.2 MB in 100 pieces, 9 losses): Input=4346.88 MB/s, Output=391.654 MB/s, (Encode create: 6548.79 MB/s)
207 | Decoder(0.2 MB in 100 pieces, 9 losses): Input=3592.6 MB/s, Output=323.334 MB/s, (Overhead = 0.0100002 pieces)
208 | 
209 | Encoder(0.2 MB in 100 pieces, 10 losses): Input=4168.4 MB/s, Output=417.257 MB/s, (Encode create: 6553.08 MB/s)
210 | Decoder(0.2 MB in 100 pieces, 10 losses): Input=3413.55 MB/s, Output=341.355 MB/s, (Overhead = 0.0100002 pieces)
211 | ```
212 | 
213 | 
214 | #### Comparisons:
215 | 
216 | Comparing with `cm256`, which is a Cauchy Reed-Solomon erasure code library using GF256:
217 | 
218 | ```
219 | Encoder: 2000 bytes k = 100 m = 1 : 7.69775 usec, 25981.6 MBps
220 | Decoder: 2000 bytes k = 100 m = 1 : 15.0289 usec, 13307.7 MBps
221 | Encoder: 2000 bytes k = 100 m = 2 : 37.7556 usec, 5297.23 MBps
222 | Decoder: 2000 bytes k = 100 m = 2 : 36.2894 usec, 5511.25 MBps
223 | Encoder: 2000 bytes k = 100 m = 3 : 69.2797 usec, 2886.85 MBps
224 | Decoder: 2000 bytes k = 100 m = 3 : 43.9871 usec, 4546.78 MBps
225 | Encoder: 2000 bytes k = 100 m = 4 : 56.8167 usec, 3520.09 MBps
226 | Decoder: 2000 bytes k = 100 m = 4 : 74.4116 usec, 2687.75 MBps
227 | Encoder: 2000 bytes k = 100 m = 5 : 107.402 usec, 1862.16 MBps
228 | Decoder: 2000 bytes k = 100 m = 5 : 102.637 usec, 1948.62 MBps
229 | Encoder: 2000 bytes k = 100 m = 6 : 271.987 usec, 735.329 MBps
230 | Decoder: 2000 bytes k = 100 m = 6 : 300.945 usec, 664.573 MBps
231 | Encoder: 2000 bytes k = 100 m = 7 : 371.691 usec, 538.081 MBps
232 | Decoder: 2000 bytes k = 100 m = 7 : 336.135 usec, 594.999 MBps
233 | Encoder: 2000 bytes k = 100 m = 8 : 244.129 usec, 819.241 MBps
234 | Decoder: 2000 bytes k = 100 m = 8 : 251.093 usec, 796.517 MBps
235 | Encoder: 2000 bytes k = 100 m = 9 : 282.251 usec, 708.59 MBps
236 | Decoder: 2000 bytes k = 100 m = 9 : 282.984 usec, 706.754 MBps
237 | Encoder: 2000 bytes k = 100 m = 10 : 307.543 usec, 650.315 MBps
238 | Decoder: 2000 bytes k = 100 m = 10 : 313.775 usec, 637.4 MBps
239 | ```
240 | 
241 | Fecal is only slower for the special single loss case where `cm256` uses XOR,
242 | in all other cases the new library is much faster.  For 10 losses, it is 6x faster.
243 | Note that `cm256` is also limited to 255 inputs or outputs.
244 | 
245 | 
246 | #### How fecal works:
247 | 
248 | The library uses Siamese Codes for a structured convolutional matrix.
249 | This matrix has a fast matrix-vector product involving mostly XOR operations.
250 | This allows Siamese Codes to encode and decode much faster than other
251 | convolutional codes built on Cauchy or Vandermonde matrices.
252 | Let's call this the Siamese Matrix Structure or something similar.
253 | 
254 | To produce an output packet, some preprocessing is performed.
255 | 
256 | The input data is first split into 8 "lanes" where every 8th symbol {e.g. 0, 8, 16, 24, ...} is summed together.
257 | The second "lane" starts from input symbol 1 and contains every 8th symbol after that {e.g. 1, 9, 17, 25, ...}.
258 | 
259 | For each "lane" there are three running "sums":
260 | 
261 | + Sum 0: Simple XOR between all inputs in that lane.
262 | + Sum 1: Each input is multiplied by a coefficient provided by `GetColumnValue`, and then XORed into the sum.
263 | + Sum 2: Each input is multiplied by the same coefficient squared, and then XORed into the sum.
264 | 
265 | This means there are 24 running sums, each with symbol_bytes bytes of data.
266 | 
267 | When an output is being produced (encoded), two running sums are formed temporarily.  Both are generated
268 | through the same process, and the result of one sum is multiplied by a row coefficient produced by the
269 | `GetRowValue` function and added to the other sum to produce the output.
270 | 
271 | To produce each of the two sums, a formula is followed.
272 | For each lane, the `GetRowOpcode` function returns which sums should be used.
273 | Sums 0, 1, and 2 are incorporated in based on the function output.
274 | And then 1/16 of the input data are selected at random and XORed into each sum.
275 | 
276 | The Siamese codec and the Fecal decoder both will compute lane sums only when they are needed.
277 | Since some of the 24 sums (about 50%) are unneeded, the number of operations will vary for each row.
278 | 
279 | The final random XOR is similar to an LDPC code and allows the recovery properties of the code to perform well
280 | on a larger scale above about 32 input symbols.  The GF(2^^8) multiplies dominate the recovery properties for smaller
281 | losses and input symbols.  The specific code used was selected by experimenting with different parameters until a
282 | desired failure rate was achieved with good performance characteristics.
283 | 
284 | As a result the Siamese Codes mainly use XORs.  So it can run a lot faster than straight GF(2^^8) multiply-add operations.
285 | Since they are still Convolutional Codes, the Siamese Codes also lend themselves to streaming use case.
286 | 
287 | When AVX2 and SSSE3 are unavailable, Siamese takes 4x longer to decode
288 | and 2.6x longer to encode.  Encoding requires a lot more simple XOR ops
289 | so it is still pretty fast.  Decoding is usually really quick because
290 | average loss rates are low, but when needed it requires a lot more
291 | GF multiplies requiring table lookups which is slower.
292 | 
293 | 
294 | #### Credits
295 | 
296 | Software by Christopher A. Taylor <mrcatid@gmail.com>, making shit happen.
297 | 
298 | Please reach out if you need support or would like to collaborate on a project.
299 | 


--------------------------------------------------------------------------------
/fecal.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
  3 | 
  4 |     Redistribution and use in source and binary forms, with or without
  5 |     modification, are permitted provided that the following conditions are met:
  6 | 
  7 |     * Redistributions of source code must retain the above copyright notice,
  8 |       this list of conditions and the following disclaimer.
  9 |     * Redistributions in binary form must reproduce the above copyright notice,
 10 |       this list of conditions and the following disclaimer in the documentation
 11 |       and/or other materials provided with the distribution.
 12 |     * Neither the name of Fecal nor the names of its contributors may be
 13 |       used to endorse or promote products derived from this software without
 14 |       specific prior written permission.
 15 | 
 16 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 17 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 |     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 19 |     ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 20 |     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 21 |     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 22 |     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 23 |     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 24 |     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 25 |     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 26 |     POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | #include "fecal.h"
 30 | #include "gf256.h"
 31 | #include "FecalEncoder.h"
 32 | #include "FecalDecoder.h"
 33 | 
 34 | extern "C" {
 35 | 
 36 | 
 37 | //------------------------------------------------------------------------------
 38 | // Initialization API
 39 | 
 40 | static bool m_Initialized = false;
 41 | 
 42 | FECAL_EXPORT int fecal_init_(int version)
 43 | {
 44 |     if (version != FECAL_VERSION)
 45 |         return Fecal_InvalidInput;
 46 | 
 47 |     if (0 != gf256_init())
 48 |         return Fecal_Platform;
 49 | 
 50 |     m_Initialized = true;
 51 |     return Fecal_Success;
 52 | }
 53 | 
 54 | 
 55 | //------------------------------------------------------------------------------
 56 | // Encoder API
 57 | 
 58 | FECAL_EXPORT FecalEncoder fecal_encoder_create(unsigned input_count, void* const * const input_data, uint64_t total_bytes)
 59 | {
 60 |     if (input_count <= 0 || !input_data || total_bytes < input_count)
 61 |     {
 62 |         FECAL_DEBUG_BREAK; // Invalid input
 63 |         return nullptr;
 64 |     }
 65 | 
 66 |     FECAL_DEBUG_ASSERT(m_Initialized); // Must call fecal_init() first
 67 |     if (!m_Initialized)
 68 |         return nullptr;
 69 | 
 70 |     fecal::Encoder* encoder = new(std::nothrow) fecal::Encoder;
 71 |     if (!encoder)
 72 |     {
 73 |         FECAL_DEBUG_BREAK; // Out of memory
 74 |         return nullptr;
 75 |     }
 76 | 
 77 |     if (Fecal_Success != encoder->Initialize(input_count, input_data, total_bytes))
 78 |     {
 79 |         delete encoder;
 80 |         return nullptr;
 81 |     }
 82 | 
 83 |     return reinterpret_cast<FecalEncoder>( encoder );
 84 | }
 85 | 
 86 | FECAL_EXPORT int fecal_encode(FecalEncoder encoder_v, FecalSymbol* symbol)
 87 | {
 88 |     fecal::Encoder* encoder = reinterpret_cast<fecal::Encoder*>( encoder_v );
 89 |     if (!encoder || !symbol)
 90 |         return Fecal_InvalidInput;
 91 | 
 92 |     return encoder->Encode(*symbol);
 93 | }
 94 | 
 95 | FECAL_EXPORT void fecal_free(void* codec_v)
 96 | {
 97 |     if (codec_v)
 98 |     {
 99 |         fecal::ICodec* icodec = reinterpret_cast<fecal::ICodec*>( codec_v );
100 |         delete icodec;
101 |     }
102 | }
103 | 
104 | 
105 | //------------------------------------------------------------------------------
106 | // Decoder API
107 | 
108 | FECAL_EXPORT FecalDecoder fecal_decoder_create(unsigned input_count, uint64_t total_bytes)
109 | {
110 |     if (input_count <= 0 || total_bytes < input_count)
111 |     {
112 |         FECAL_DEBUG_BREAK; // Invalid input
113 |         return nullptr;
114 |     }
115 | 
116 |     FECAL_DEBUG_ASSERT(m_Initialized); // Must call fecal_init() first
117 |     if (!m_Initialized)
118 |         return nullptr;
119 | 
120 |     fecal::Decoder* decoder = new(std::nothrow) fecal::Decoder;
121 |     if (!decoder)
122 |     {
123 |         FECAL_DEBUG_BREAK; // Out of memory
124 |         return nullptr;
125 |     }
126 | 
127 |     if (Fecal_Success != decoder->Initialize(input_count, total_bytes))
128 |     {
129 |         delete decoder;
130 |         return nullptr;
131 |     }
132 | 
133 |     return reinterpret_cast<FecalDecoder>( decoder );
134 | }
135 | 
136 | FECAL_EXPORT int fecal_decoder_add_original(FecalDecoder decoder_v, const FecalSymbol* symbol)
137 | {
138 |     fecal::Decoder* decoder = reinterpret_cast<fecal::Decoder*>( decoder_v );
139 |     if (!decoder || !symbol)
140 |         return Fecal_InvalidInput;
141 | 
142 |     return decoder->AddOriginal(*symbol);
143 | }
144 | 
145 | FECAL_EXPORT int fecal_decoder_add_recovery(FecalDecoder decoder_v, const FecalSymbol* symbol)
146 | {
147 |     fecal::Decoder* decoder = reinterpret_cast<fecal::Decoder*>( decoder_v );
148 |     if (!decoder || !symbol)
149 |         return Fecal_InvalidInput;
150 | 
151 |     return decoder->AddRecovery(*symbol);
152 | }
153 | 
154 | FECAL_EXPORT int fecal_decode(FecalDecoder decoder_v, RecoveredSymbols* symbols)
155 | {
156 |     fecal::Decoder* decoder = reinterpret_cast<fecal::Decoder*>( decoder_v );
157 |     if (!decoder || !symbols)
158 |         return Fecal_InvalidInput;
159 | 
160 |     return decoder->Decode(*symbols);
161 | }
162 | 
163 | FECAL_EXPORT int fecal_decoder_get(FecalDecoder decoder_v, unsigned input_index, FecalSymbol* symbol)
164 | {
165 |     fecal::Decoder* decoder = reinterpret_cast<fecal::Decoder*>( decoder_v );
166 |     if (!decoder || !symbol)
167 |         return Fecal_InvalidInput;
168 | 
169 |     return decoder->GetOriginal(input_index, *symbol);
170 | }
171 | 
172 | 
173 | } // extern "C"
174 | 


--------------------------------------------------------------------------------
/fecal.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
  3 | 
  4 |     Redistribution and use in source and binary forms, with or without
  5 |     modification, are permitted provided that the following conditions are met:
  6 | 
  7 |     * Redistributions of source code must retain the above copyright notice,
  8 |       this list of conditions and the following disclaimer.
  9 |     * Redistributions in binary form must reproduce the above copyright notice,
 10 |       this list of conditions and the following disclaimer in the documentation
 11 |       and/or other materials provided with the distribution.
 12 |     * Neither the name of Fecal nor the names of its contributors may be
 13 |       used to endorse or promote products derived from this software without
 14 |       specific prior written permission.
 15 | 
 16 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 17 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 |     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 19 |     ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 20 |     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 21 |     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 22 |     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 23 |     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 24 |     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 25 |     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 26 |     POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | #ifndef CAT_FECAL_H
 30 | #define CAT_FECAL_H
 31 | 
 32 | /*
 33 |     FEC-AL: Forward Error Correction at the Application Layer
 34 |     Block erasure code based on math from the Siamese library.
 35 | */
 36 | 
 37 | // Library version
 38 | #define FECAL_VERSION 2
 39 | 
 40 | // Tweak if the functions are exported or statically linked
 41 | //#define FECAL_DLL /* Defined when building/linking as DLL */
 42 | //#define FECAL_BUILDING /* Defined by the library makefile */
 43 | 
 44 | #if defined(FECAL_BUILDING)
 45 | # if defined(FECAL_DLL)
 46 |     #define FECAL_EXPORT __declspec(dllexport)
 47 | # else
 48 |     #define FECAL_EXPORT
 49 | # endif
 50 | #else
 51 | # if defined(FECAL_DLL)
 52 |     #define FECAL_EXPORT __declspec(dllimport)
 53 | # else
 54 |     #define FECAL_EXPORT extern
 55 | # endif
 56 | #endif
 57 | 
 58 | #include <stdint.h>
 59 | 
 60 | 
 61 | #ifdef __cplusplus
 62 | extern "C" {
 63 | #endif
 64 | 
 65 | 
 66 | //------------------------------------------------------------------------------
 67 | // Initialization API
 68 | //
 69 | // Perform static initialization for the library, verifying that the platform
 70 | // is supported.
 71 | //
 72 | // Returns 0 on success and other values on failure.
 73 | 
 74 | FECAL_EXPORT int fecal_init_(int version);
 75 | #define fecal_init() fecal_init_(FECAL_VERSION)
 76 | 
 77 | 
 78 | //------------------------------------------------------------------------------
 79 | // Shared Constants / Datatypes
 80 | 
 81 | // Results
 82 | typedef enum FecalResultT
 83 | {
 84 |     Fecal_NeedMoreData      =  1, // More data is needed for this operation to succeed
 85 | 
 86 |     Fecal_Success           =  0,
 87 | 
 88 |     Fecal_InvalidInput      = -1, // A function parameter was invalid
 89 |     Fecal_Platform          = -2, // Platform is unsupported
 90 |     Fecal_OutOfMemory       = -3, // Out of memory error occurred
 91 |     Fecal_Unexpected        = -4, // Unexpected error - Software bug?
 92 | } FecalResult;
 93 | 
 94 | // Encoder and Decoder object types
 95 | typedef struct FecalEncoderImpl { int impl; }*FecalEncoder;
 96 | typedef struct FecalDecoderImpl { int impl; }*FecalDecoder;
 97 | 
 98 | // Data or Recovery symbol
 99 | typedef struct FecalSymbolT
100 | {
101 |     // User-provided data pointer allocated by application.
102 |     void* Data;
103 | 
104 |     // User-provided number of bytes in the data buffer, for validation.
105 |     unsigned Bytes;
106 | 
107 |     // Zero-based index in the data array,
108 |     // or a larger number for recovery data.
109 |     unsigned Index;
110 | } FecalSymbol;
111 | 
112 | // Recovered data
113 | typedef struct RecoveredSymbolsT
114 | {
115 |     // Array of symbols
116 |     FecalSymbol* Symbols;
117 | 
118 |     // Number of symbols in the array
119 |     unsigned Count;
120 | } RecoveredSymbols;
121 | 
122 | 
123 | //------------------------------------------------------------------------------
124 | // Encoder API
125 | 
126 | /*
127 |     fecal_encoder_create()
128 | 
129 |     Create an encoder and set the input data.
130 | 
131 |     input_count: Number of input_data[] buffers provided.
132 |     input_data:  Array of pointers to input data.
133 |     total_bytes: Sum of the total bytes in all buffers.
134 | 
135 |     Buffer data must be available until the decoder is freed with fecal_free().
136 |     Buffer data does not need to be aligned.
137 |     Buffer data will not be modified, only read.
138 | 
139 |     Each buffer should have the same number of bytes except for the last one,
140 |     which can be shorter.
141 | 
142 |     Let symbol_bytes = The number of bytes in each input_data buffer:
143 | 
144 |         input_count = static_cast<unsigned>(
145 |             (total_bytes + symbol_bytes - 1) / symbol_bytes);
146 | 
147 |     Or if the number of pieces is known:
148 | 
149 |         symbol_bytes = static_cast<unsigned>(
150 |             (total_bytes + input_count - 1) / input_count);
151 | 
152 |     Let final_bytes = The final piece of input data size in bytes:
153 | 
154 |         final_bytes = static_cast<unsigned>(total_bytes % symbol_bytes);
155 |         if (final_bytes <= 0)
156 |             final_bytes = symbol_bytes;
157 | 
158 |     Returns NULL on failure.
159 | */
160 | FECAL_EXPORT FecalEncoder fecal_encoder_create(unsigned input_count, void* const * const input_data, uint64_t total_bytes);
161 | 
162 | /*
163 |     fecal_encode()
164 | 
165 |     Generate a recovery symbol.
166 | 
167 |     encoder:       Encoder from fecal_encoder_create().
168 |     symbol->Index: Application provided recovery symbol index starting from 0.
169 |     symbol->Data:  Application provided buffer to write the symbol to.
170 |     symbol->Bytes: Application provided number of bytes in the symbol buffer.
171 | 
172 |     Given total_bytes and input_count from fecal_encoder_create():
173 | 
174 |         symbol->Bytes = static_cast<unsigned>(
175 |             (total_bytes + input_count - 1) / input_count);
176 | 
177 |     Returns Fecal_Success on success.
178 |     Returns Fecal_InvalidInput if the symbol parameter was invalid, or the
179 |         codec is not initialized yet.
180 | */
181 | FECAL_EXPORT int fecal_encode(FecalEncoder encoder, FecalSymbol* symbol);
182 | 
183 | /*
184 |     fecal_free()
185 | 
186 |     Free memory associated with the created encoder or decoder.
187 | 
188 |     codec: Pointer returned by fecal_encoder_create() or fecal_decoder_create()
189 | */
190 | FECAL_EXPORT void fecal_free(void* codec);
191 | 
192 | 
193 | //------------------------------------------------------------------------------
194 | // Decoder API
195 | 
196 | /*
197 |     fecal_decoder_create()
198 | 
199 |     Create a decoder and set the input_count and total_bytes.
200 | 
201 |     input_count: Number of input_data[] buffers provided to fecal_encoder_create().
202 |     total_bytes: Sum of the total bytes in all buffers.
203 | 
204 |     See documentation for fecal_encoder_create() above.
205 | 
206 |     Returns NULL on failure.
207 | */
208 | FECAL_EXPORT FecalDecoder fecal_decoder_create(unsigned input_count, uint64_t total_bytes);
209 | 
210 | /*
211 |     fecal_decoder_add_original()
212 | 
213 |     Adds an original symbol to the decoder.
214 | 
215 |     decoder:       Decoder from fecal_decoder_create().
216 |     symbol->Index: Input data index from 0..(input_count-1).
217 |     symbol->Data:  Application provided buffer to read the symbol from.
218 |     symbol->Bytes: Application provided number of bytes in the symbol buffer.
219 | 
220 |     Buffer data must be available until the decoder is freed with fecal_free().
221 |     Buffer data does not need to be aligned.
222 |     Buffer data will not be modified, only read.
223 | 
224 |     Given total_bytes and input_count from fecal_encoder_create():
225 | 
226 |         // Calculate the number of bytes in each symbol
227 |         unsigned symbol_bytes = static_cast<unsigned>(
228 |             (total_bytes + input_count - 1) / input_count);
229 | 
230 |         // If it is the final symbol:
231 |         if (symbol->Index == input_count - 1)
232 |             symbol->Bytes = final_bytes;
233 |         else
234 |             symbol->Bytes = symbol_bytes;
235 | 
236 |     Returns Fecal_Success on success.
237 |     Returns Fecal_InvalidInput if the symbol parameter was invalid, or the
238 |     codec is not initialized yet.
239 | */
240 | FECAL_EXPORT int fecal_decoder_add_original(FecalDecoder decoder, const FecalSymbol* symbol);
241 | 
242 | /*
243 |     fecal_decoder_add_recovery()
244 | 
245 |     Adds a recovery symbol to the decoder.
246 | 
247 |     decoder:       Decoder from fecal_decoder_create().
248 |     symbol->Index: Application provided recovery symbol index starting from 0.
249 |     symbol->Data:  Application provided buffer to read the symbol from.
250 |     symbol->Bytes: Application provided number of bytes in the symbol buffer.
251 | 
252 |     Buffer data must be available until the decoder is freed with fecal_free().
253 |     Buffer data does not need to be aligned.
254 |     Buffer data WILL BE MODIFIED.
255 | 
256 |     Given total_bytes and input_count from fecal_encoder_create():
257 | 
258 |         symbol->Bytes = static_cast<unsigned>(
259 |             (total_bytes + input_count - 1) / input_count);
260 | 
261 |     Returns Fecal_Success on success.
262 |     Returns Fecal_InvalidInput if the symbol parameter was invalid, or the
263 |     codec is not initialized yet.
264 | */
265 | FECAL_EXPORT int fecal_decoder_add_recovery(FecalDecoder decoder, const FecalSymbol* symbol);
266 | 
267 | /*
268 |     fecal_decode()
269 | 
270 |     Decode data if possible.
271 | 
272 |     decoder: Decoder from fecal_decoder_create().
273 |     symbols: Returned array of recovered input symbols.
274 | 
275 |     The returned data pointers are valid until fecal_free() is called.
276 |     Note that the final symbol size can be different from the rest.
277 |     The returned data pointers are taken from recovery symbols previously submitted.
278 | 
279 |     After decoding completes, the decoder object should be passed to fecal_free().
280 | 
281 |     Returns Fecal_Success if decode was successful.  `symbols` will contain results,
282 |         and fecal_decoder_get() can be used to request specific pieces.
283 |     Returns Fecal_NeedMoreData if more pieces must be added before decoding can proceed.
284 |     Returns Fecal_InvalidInput if the parameters are invalid.
285 | */
286 | FECAL_EXPORT int fecal_decode(FecalDecoder decoder, RecoveredSymbols* symbols);
287 | 
288 | /*
289 |     fecal_decoder_get()
290 | 
291 |     Get original data.
292 | 
293 |     decoder: Decoder from fecal_decoder_create().
294 |     input_index: Input data index from 0..(input_count-1).
295 |     symbol: Returned original symbol data.
296 | 
297 |     The returned data pointers are taken from original or recovery symbols previously submitted.
298 |     The returned data pointers are valid until fecal_free() is called.
299 |     Note that the final symbol size can be different from the rest.
300 | 
301 |     After decoding completes, the decoder object should be passed to fecal_free().
302 | 
303 |     Returns Fecal_Success on success.
304 |     Returns Fecal_NeedMoreData if the data is unavailable.
305 |     Returns Fecal_InvalidInput if the parameters are invalid.
306 | */
307 | FECAL_EXPORT int fecal_decoder_get(FecalDecoder decoder, unsigned input_index, FecalSymbol* symbol);
308 | 
309 | 
310 | #ifdef __cplusplus
311 | }
312 | #endif
313 | 
314 | 
315 | #endif // CAT_FECAL_H
316 | 


--------------------------------------------------------------------------------
/gf256.h:
--------------------------------------------------------------------------------
  1 | /** \file
  2 |     \brief GF(256) Main C API Header
  3 |     \copyright Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
  4 | 
  5 |     Redistribution and use in source and binary forms, with or without
  6 |     modification, are permitted provided that the following conditions are met:
  7 | 
  8 |     * Redistributions of source code must retain the above copyright notice,
  9 |       this list of conditions and the following disclaimer.
 10 |     * Redistributions in binary form must reproduce the above copyright notice,
 11 |       this list of conditions and the following disclaimer in the documentation
 12 |       and/or other materials provided with the distribution.
 13 |     * Neither the name of GF256 nor the names of its contributors may be
 14 |       used to endorse or promote products derived from this software without
 15 |       specific prior written permission.
 16 | 
 17 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 18 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 19 |     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 20 |     ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 21 |     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 22 |     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 23 |     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 24 |     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 25 |     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 26 |     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 27 |     POSSIBILITY OF SUCH DAMAGE.
 28 | */
 29 | 
 30 | #ifndef CAT_GF256_H
 31 | #define CAT_GF256_H
 32 | 
 33 | /** \page GF256 GF(256) Math Module
 34 | 
 35 |     This module provides efficient implementations of bulk
 36 |     GF(2^^8) math operations over memory buffers.
 37 | 
 38 |     Addition is done over the base field in GF(2) meaning
 39 |     that addition is XOR between memory buffers.
 40 | 
 41 |     Multiplication is performed using table lookups via
 42 |     SIMD instructions.  This is somewhat slower than XOR,
 43 |     but fast enough to not become a major bottleneck when
 44 |     used sparingly.
 45 | */
 46 | 
 47 | #include <stdint.h> // uint32_t etc
 48 | #include <cstring> // memcpy, memset
 49 | 
 50 | /// Library header version
 51 | #define GF256_VERSION 2
 52 | 
 53 | //------------------------------------------------------------------------------
 54 | // Platform/Architecture
 55 | 
 56 | #if defined(ANDROID) || defined(IOS) || defined(LINUX_ARM) || defined(__powerpc__) || defined(__s390__)
 57 |     #define GF256_TARGET_MOBILE
 58 | #endif // ANDROID
 59 | 
 60 | #if defined(__AVX2__) || (defined (_MSC_VER) && _MSC_VER >= 1900)
 61 |     #define GF256_TRY_AVX2 /* 256-bit */
 62 |     #include <immintrin.h>
 63 |     #define GF256_ALIGN_BYTES 32
 64 | #else // __AVX2__
 65 |     #define GF256_ALIGN_BYTES 16
 66 | #endif // __AVX2__
 67 | 
 68 | #if !defined(GF256_TARGET_MOBILE)
 69 |     // Note: MSVC currently only supports SSSE3 but not AVX2
 70 |     #include <tmmintrin.h> // SSSE3: _mm_shuffle_epi8
 71 |     #include <emmintrin.h> // SSE2
 72 | #endif // GF256_TARGET_MOBILE
 73 | 
 74 | #if defined(HAVE_ARM_NEON_H)
 75 |     #include <arm_neon.h>
 76 | #endif // HAVE_ARM_NEON_H
 77 | 
 78 | #if defined(GF256_TARGET_MOBILE)
 79 | 
 80 |     #define GF256_ALIGNED_ACCESSES /* Inputs must be aligned to GF256_ALIGN_BYTES */
 81 | 
 82 | # if defined(HAVE_ARM_NEON_H)
 83 |     // Compiler-specific 128-bit SIMD register keyword
 84 |     #define GF256_M128 uint8x16_t
 85 |     #define GF256_TRY_NEON
 86 | #else
 87 |     #define GF256_M128 uint64_t
 88 | # endif
 89 | 
 90 | #else // GF256_TARGET_MOBILE
 91 | 
 92 |     // Compiler-specific 128-bit SIMD register keyword
 93 |     #define GF256_M128 __m128i
 94 | 
 95 | #endif // GF256_TARGET_MOBILE
 96 | 
 97 | #ifdef GF256_TRY_AVX2
 98 |     // Compiler-specific 256-bit SIMD register keyword
 99 |     #define GF256_M256 __m256i
100 | #endif
101 | 
102 | // Compiler-specific C++11 restrict keyword
103 | #define GF256_RESTRICT __restrict
104 | 
105 | // Compiler-specific force inline keyword
106 | #ifdef _MSC_VER
107 |     #define GF256_FORCE_INLINE inline __forceinline
108 | #else
109 |     #define GF256_FORCE_INLINE inline __attribute__((always_inline))
110 | #endif
111 | 
112 | // Compiler-specific alignment keyword
113 | // Note: Alignment only matters for ARM NEON where it should be 16
114 | #ifdef _MSC_VER
115 |     #define GF256_ALIGNED __declspec(align(GF256_ALIGN_BYTES))
116 | #else // _MSC_VER
117 |     #define GF256_ALIGNED __attribute__((aligned(GF256_ALIGN_BYTES)))
118 | #endif // _MSC_VER
119 | 
120 | #ifdef __cplusplus
121 | extern "C" {
122 | #endif // __cplusplus
123 | 
124 | 
125 | //------------------------------------------------------------------------------
126 | // Portability
127 | 
128 | /// Swap two memory buffers in-place
129 | extern void gf256_memswap(void * GF256_RESTRICT vx, void * GF256_RESTRICT vy, int bytes);
130 | 
131 | 
132 | //------------------------------------------------------------------------------
133 | // GF(256) Context
134 | 
135 | #ifdef _MSC_VER
136 |     #pragma warning(push)
137 |     #pragma warning(disable: 4324) // warning C4324: 'gf256_ctx' : structure was padded due to __declspec(align())
138 | #endif // _MSC_VER
139 | 
140 | /// The context object stores tables required to perform library calculations
141 | struct gf256_ctx
142 | {
143 |     /// We require memory to be aligned since the SIMD instructions benefit from
144 |     /// or require aligned accesses to the table data.
145 |     struct
146 |     {
147 |         GF256_ALIGNED GF256_M128 TABLE_LO_Y[256];
148 |         GF256_ALIGNED GF256_M128 TABLE_HI_Y[256];
149 |     } MM128;
150 | #ifdef GF256_TRY_AVX2
151 |     struct
152 |     {
153 |         GF256_ALIGNED GF256_M256 TABLE_LO_Y[256];
154 |         GF256_ALIGNED GF256_M256 TABLE_HI_Y[256];
155 |     } MM256;
156 | #endif // GF256_TRY_AVX2
157 | 
158 |     /// Mul/Div/Inv/Sqr tables
159 |     uint8_t GF256_MUL_TABLE[256 * 256];
160 |     uint8_t GF256_DIV_TABLE[256 * 256];
161 |     uint8_t GF256_INV_TABLE[256];
162 |     uint8_t GF256_SQR_TABLE[256];
163 | 
164 |     /// Log/Exp tables
165 |     uint16_t GF256_LOG_TABLE[256];
166 |     uint8_t GF256_EXP_TABLE[512 * 2 + 1];
167 | 
168 |     /// Polynomial used
169 |     unsigned Polynomial;
170 | };
171 | 
172 | #ifdef _MSC_VER
173 |     #pragma warning(pop)
174 | #endif // _MSC_VER
175 | 
176 | extern gf256_ctx GF256Ctx;
177 | 
178 | 
179 | //------------------------------------------------------------------------------
180 | // Initialization
181 | 
182 | /**
183 |     Initialize a context, filling in the tables.
184 |     
185 |     Thread-safety / Usage Notes:
186 |     
187 |     It is perfectly safe and encouraged to use a gf256_ctx object from multiple
188 |     threads.  The gf256_init() is relatively expensive and should only be done
189 |     once, though it will take less than a millisecond.
190 |     
191 |     The gf256_ctx object must be aligned to 16 byte boundary.
192 |     Simply tag the object with GF256_ALIGNED to achieve this.
193 |     
194 |     Example:
195 |        static GF256_ALIGNED gf256_ctx TheGF256Context;
196 |        gf256_init(&TheGF256Context, 0);
197 |     
198 |     Returns 0 on success and other values on failure.
199 | */
200 | extern int gf256_init_(int version);
201 | #define gf256_init() gf256_init_(GF256_VERSION)
202 | 
203 | 
204 | //------------------------------------------------------------------------------
205 | // Math Operations
206 | 
207 | /// return x + y
208 | static GF256_FORCE_INLINE uint8_t gf256_add(uint8_t x, uint8_t y)
209 | {
210 |     return (uint8_t)(x ^ y);
211 | }
212 | 
213 | /// return x * y
214 | /// For repeated multiplication by a constant, it is faster to put the constant in y.
215 | static GF256_FORCE_INLINE uint8_t gf256_mul(uint8_t x, uint8_t y)
216 | {
217 |     return GF256Ctx.GF256_MUL_TABLE[((unsigned)y << 8) + x];
218 | }
219 | 
220 | /// return x / y
221 | /// Memory-access optimized for constant divisors in y.
222 | static GF256_FORCE_INLINE uint8_t gf256_div(uint8_t x, uint8_t y)
223 | {
224 |     return GF256Ctx.GF256_DIV_TABLE[((unsigned)y << 8) + x];
225 | }
226 | 
227 | /// return 1 / x
228 | static GF256_FORCE_INLINE uint8_t gf256_inv(uint8_t x)
229 | {
230 |     return GF256Ctx.GF256_INV_TABLE[x];
231 | }
232 | 
233 | /// return x * x
234 | static GF256_FORCE_INLINE uint8_t gf256_sqr(uint8_t x)
235 | {
236 |     return GF256Ctx.GF256_SQR_TABLE[x];
237 | }
238 | 
239 | 
240 | //------------------------------------------------------------------------------
241 | // Bulk Memory Math Operations
242 | 
243 | /// Performs "x[] += y[]" bulk memory XOR operation
244 | extern void gf256_add_mem(void * GF256_RESTRICT vx,
245 |                           const void * GF256_RESTRICT vy, int bytes);
246 | 
247 | /// Performs "z[] += x[] + y[]" bulk memory operation
248 | extern void gf256_add2_mem(void * GF256_RESTRICT vz, const void * GF256_RESTRICT vx,
249 |                            const void * GF256_RESTRICT vy, int bytes);
250 | 
251 | /// Performs "z[] = x[] + y[]" bulk memory operation
252 | extern void gf256_addset_mem(void * GF256_RESTRICT vz, const void * GF256_RESTRICT vx,
253 |                              const void * GF256_RESTRICT vy, int bytes);
254 | 
255 | /// Performs "z[] = x[] * y" bulk memory operation
256 | extern void gf256_mul_mem(void * GF256_RESTRICT vz,
257 |                           const void * GF256_RESTRICT vx, uint8_t y, int bytes);
258 | 
259 | /// Performs "z[] += x[] * y" bulk memory operation
260 | extern void gf256_muladd_mem(void * GF256_RESTRICT vz, uint8_t y,
261 |                              const void * GF256_RESTRICT vx, int bytes);
262 | 
263 | /// Performs "x[] /= y" bulk memory operation
264 | static GF256_FORCE_INLINE void gf256_div_mem(void * GF256_RESTRICT vz,
265 |                                              const void * GF256_RESTRICT vx, uint8_t y, int bytes)
266 | {
267 |     // Multiply by inverse
268 |     gf256_mul_mem(vz, vx, y == 1 ? (uint8_t)1 : GF256Ctx.GF256_INV_TABLE[y], bytes);
269 | }
270 | 
271 | 
272 | //------------------------------------------------------------------------------
273 | // Misc Operations
274 | 
275 | /// Swap two memory buffers in-place
276 | extern void gf256_memswap(void * GF256_RESTRICT vx, void * GF256_RESTRICT vy, int bytes);
277 | 
278 | 
279 | #ifdef __cplusplus
280 | }
281 | #endif // __cplusplus
282 | 
283 | #endif // CAT_GF256_H
284 | 


--------------------------------------------------------------------------------
/proj/msvc/Fecal.sln:
--------------------------------------------------------------------------------
 1 | ﻿
 2 | Microsoft Visual Studio Solution File, Format Version 12.00
 3 | # Visual Studio 14
 4 | VisualStudioVersion = 14.0.25420.1
 5 | MinimumVisualStudioVersion = 10.0.40219.1
 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Fecal", "LibFecal.vcxproj", "{FF5912EF-7424-4974-B877-62B03D5046C6}"
 7 | EndProject
 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FecalBenchmark", "..\..\tests\msvc\FecalBenchmark.vcxproj", "{32176592-2F30-4BD5-B645-EB11C8D3453E}"
 9 | 	ProjectSection(ProjectDependencies) = postProject
10 | 		{FF5912EF-7424-4974-B877-62B03D5046C6} = {FF5912EF-7424-4974-B877-62B03D5046C6}
11 | 	EndProjectSection
12 | EndProject
13 | Global
14 | 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
15 | 		Debug|Win32 = Debug|Win32
16 | 		Debug|x64 = Debug|x64
17 | 		Release|Win32 = Release|Win32
18 | 		Release|x64 = Release|x64
19 | 	EndGlobalSection
20 | 	GlobalSection(ProjectConfigurationPlatforms) = postSolution
21 | 		{FF5912EF-7424-4974-B877-62B03D5046C6}.Debug|Win32.ActiveCfg = Debug|Win32
22 | 		{FF5912EF-7424-4974-B877-62B03D5046C6}.Debug|Win32.Build.0 = Debug|Win32
23 | 		{FF5912EF-7424-4974-B877-62B03D5046C6}.Debug|x64.ActiveCfg = Debug|x64
24 | 		{FF5912EF-7424-4974-B877-62B03D5046C6}.Debug|x64.Build.0 = Debug|x64
25 | 		{FF5912EF-7424-4974-B877-62B03D5046C6}.Release|Win32.ActiveCfg = Release|Win32
26 | 		{FF5912EF-7424-4974-B877-62B03D5046C6}.Release|Win32.Build.0 = Release|Win32
27 | 		{FF5912EF-7424-4974-B877-62B03D5046C6}.Release|x64.ActiveCfg = Release|x64
28 | 		{FF5912EF-7424-4974-B877-62B03D5046C6}.Release|x64.Build.0 = Release|x64
29 | 		{E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Debug|Win32.ActiveCfg = Debug|Win32
30 | 		{E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Debug|Win32.Build.0 = Debug|Win32
31 | 		{E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Debug|x64.ActiveCfg = Debug|x64
32 | 		{E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Debug|x64.Build.0 = Debug|x64
33 | 		{E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Release|Win32.ActiveCfg = Release|Win32
34 | 		{E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Release|Win32.Build.0 = Release|Win32
35 | 		{E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Release|x64.ActiveCfg = Release|x64
36 | 		{E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Release|x64.Build.0 = Release|x64
37 | 		{32176592-2F30-4BD5-B645-EB11C8D3453E}.Debug|Win32.ActiveCfg = Debug|Win32
38 | 		{32176592-2F30-4BD5-B645-EB11C8D3453E}.Debug|Win32.Build.0 = Debug|Win32
39 | 		{32176592-2F30-4BD5-B645-EB11C8D3453E}.Debug|x64.ActiveCfg = Debug|x64
40 | 		{32176592-2F30-4BD5-B645-EB11C8D3453E}.Debug|x64.Build.0 = Debug|x64
41 | 		{32176592-2F30-4BD5-B645-EB11C8D3453E}.Release|Win32.ActiveCfg = Release|Win32
42 | 		{32176592-2F30-4BD5-B645-EB11C8D3453E}.Release|Win32.Build.0 = Release|Win32
43 | 		{32176592-2F30-4BD5-B645-EB11C8D3453E}.Release|x64.ActiveCfg = Release|x64
44 | 		{32176592-2F30-4BD5-B645-EB11C8D3453E}.Release|x64.Build.0 = Release|x64
45 | 	EndGlobalSection
46 | 	GlobalSection(SolutionProperties) = preSolution
47 | 		HideSolutionNode = FALSE
48 | 	EndGlobalSection
49 | EndGlobal
50 | 


--------------------------------------------------------------------------------
/proj/msvc/LibFecal.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <PropertyGroup Label="Globals">
 22 |     <ProjectGuid>{FF5912EF-7424-4974-B877-62B03D5046C6}</ProjectGuid>
 23 |     <RootNamespace>LibFecal</RootNamespace>
 24 |     <ProjectName>Fecal</ProjectName>
 25 |   </PropertyGroup>
 26 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 27 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 28 |     <ConfigurationType>StaticLibrary</ConfigurationType>
 29 |     <UseDebugLibraries>true</UseDebugLibraries>
 30 |     <PlatformToolset>v140</PlatformToolset>
 31 |     <CharacterSet>MultiByte</CharacterSet>
 32 |   </PropertyGroup>
 33 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 34 |     <ConfigurationType>StaticLibrary</ConfigurationType>
 35 |     <UseDebugLibraries>true</UseDebugLibraries>
 36 |     <PlatformToolset>v140</PlatformToolset>
 37 |     <CharacterSet>MultiByte</CharacterSet>
 38 |   </PropertyGroup>
 39 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 40 |     <ConfigurationType>StaticLibrary</ConfigurationType>
 41 |     <UseDebugLibraries>false</UseDebugLibraries>
 42 |     <PlatformToolset>v140</PlatformToolset>
 43 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 44 |     <CharacterSet>MultiByte</CharacterSet>
 45 |   </PropertyGroup>
 46 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 47 |     <ConfigurationType>StaticLibrary</ConfigurationType>
 48 |     <UseDebugLibraries>false</UseDebugLibraries>
 49 |     <PlatformToolset>v140</PlatformToolset>
 50 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 51 |     <CharacterSet>MultiByte</CharacterSet>
 52 |   </PropertyGroup>
 53 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 54 |   <ImportGroup Label="ExtensionSettings">
 55 |   </ImportGroup>
 56 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 57 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 58 |   </ImportGroup>
 59 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 60 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 61 |   </ImportGroup>
 62 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 63 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 64 |   </ImportGroup>
 65 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 66 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 67 |   </ImportGroup>
 68 |   <PropertyGroup Label="UserMacros" />
 69 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 70 |     <OutDir>Output/$(ProjectName)/$(Configuration)/$(Platform)/</OutDir>
 71 |     <IntDir>Obj/$(ProjectName)/$(Configuration)/$(Platform)/</IntDir>
 72 |   </PropertyGroup>
 73 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 74 |     <OutDir>Output/$(ProjectName)/$(Configuration)/$(Platform)/</OutDir>
 75 |     <IntDir>Obj/$(ProjectName)/$(Configuration)/$(Platform)/</IntDir>
 76 |   </PropertyGroup>
 77 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 78 |     <OutDir>Output/$(ProjectName)/$(Configuration)/$(Platform)/</OutDir>
 79 |     <IntDir>Obj/$(ProjectName)/$(Configuration)/$(Platform)/</IntDir>
 80 |   </PropertyGroup>
 81 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 82 |     <OutDir>Output/$(ProjectName)/$(Configuration)/$(Platform)/</OutDir>
 83 |     <IntDir>Obj/$(ProjectName)/$(Configuration)/$(Platform)/</IntDir>
 84 |   </PropertyGroup>
 85 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 86 |     <ClCompile>
 87 |       <WarningLevel>Level4</WarningLevel>
 88 |       <Optimization>Disabled</Optimization>
 89 |       <SDLCheck>true</SDLCheck>
 90 |       <PreprocessorDefinitions>SIAMESE_BUILDING;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 91 |       <TreatWarningAsError>true</TreatWarningAsError>
 92 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
 93 |     </ClCompile>
 94 |     <Link>
 95 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 96 |       <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
 97 |     </Link>
 98 |     <PostBuildEvent>
 99 |       <Command>
100 |       </Command>
101 |     </PostBuildEvent>
102 |   </ItemDefinitionGroup>
103 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
104 |     <ClCompile>
105 |       <WarningLevel>Level4</WarningLevel>
106 |       <Optimization>Disabled</Optimization>
107 |       <SDLCheck>true</SDLCheck>
108 |       <PreprocessorDefinitions>SIAMESE_BUILDING;%(PreprocessorDefinitions)</PreprocessorDefinitions>
109 |       <TreatWarningAsError>true</TreatWarningAsError>
110 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
111 |     </ClCompile>
112 |     <Link>
113 |       <GenerateDebugInformation>true</GenerateDebugInformation>
114 |       <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
115 |     </Link>
116 |     <PostBuildEvent>
117 |       <Command>
118 |       </Command>
119 |     </PostBuildEvent>
120 |   </ItemDefinitionGroup>
121 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
122 |     <ClCompile>
123 |       <WarningLevel>Level4</WarningLevel>
124 |       <Optimization>Full</Optimization>
125 |       <FunctionLevelLinking>true</FunctionLevelLinking>
126 |       <IntrinsicFunctions>true</IntrinsicFunctions>
127 |       <SDLCheck>true</SDLCheck>
128 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
129 |       <BufferSecurityCheck>false</BufferSecurityCheck>
130 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
131 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
132 |       <OmitFramePointers>true</OmitFramePointers>
133 |       <PreprocessorDefinitions>SIAMESE_BUILDING;%(PreprocessorDefinitions)</PreprocessorDefinitions>
134 |       <WholeProgramOptimization>true</WholeProgramOptimization>
135 |       <TreatWarningAsError>true</TreatWarningAsError>
136 |       <RuntimeTypeInfo>false</RuntimeTypeInfo>
137 |     </ClCompile>
138 |     <Link>
139 |       <GenerateDebugInformation>true</GenerateDebugInformation>
140 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
141 |       <OptimizeReferences>true</OptimizeReferences>
142 |       <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
143 |     </Link>
144 |     <PostBuildEvent>
145 |       <Command>
146 |       </Command>
147 |     </PostBuildEvent>
148 |   </ItemDefinitionGroup>
149 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
150 |     <ClCompile>
151 |       <WarningLevel>Level4</WarningLevel>
152 |       <Optimization>Full</Optimization>
153 |       <FunctionLevelLinking>true</FunctionLevelLinking>
154 |       <IntrinsicFunctions>true</IntrinsicFunctions>
155 |       <SDLCheck>true</SDLCheck>
156 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
157 |       <BufferSecurityCheck>false</BufferSecurityCheck>
158 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
159 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
160 |       <OmitFramePointers>true</OmitFramePointers>
161 |       <PreprocessorDefinitions>SIAMESE_BUILDING;%(PreprocessorDefinitions)</PreprocessorDefinitions>
162 |       <WholeProgramOptimization>true</WholeProgramOptimization>
163 |       <TreatWarningAsError>true</TreatWarningAsError>
164 |       <RuntimeTypeInfo>false</RuntimeTypeInfo>
165 |     </ClCompile>
166 |     <Link>
167 |       <GenerateDebugInformation>true</GenerateDebugInformation>
168 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
169 |       <OptimizeReferences>true</OptimizeReferences>
170 |       <OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
171 |     </Link>
172 |     <PostBuildEvent>
173 |       <Command>
174 |       </Command>
175 |     </PostBuildEvent>
176 |   </ItemDefinitionGroup>
177 |   <ItemGroup>
178 |     <ClCompile Include="..\..\fecal.cpp" />
179 |     <ClCompile Include="..\..\gf256.cpp" />
180 |     <ClCompile Include="..\..\FecalCommon.cpp" />
181 |     <ClCompile Include="..\..\FecalDecoder.cpp" />
182 |     <ClCompile Include="..\..\FecalEncoder.cpp" />
183 |   </ItemGroup>
184 |   <ItemGroup>
185 |     <ClInclude Include="..\..\fecal.h" />
186 |     <ClInclude Include="..\..\gf256.h" />
187 |     <ClInclude Include="..\..\FecalCommon.h" />
188 |     <ClInclude Include="..\..\FecalDecoder.h" />
189 |     <ClInclude Include="..\..\FecalEncoder.h" />
190 |   </ItemGroup>
191 |   <ItemGroup>
192 |     <None Include="..\..\README.md" />
193 |   </ItemGroup>
194 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
195 |   <ImportGroup Label="ExtensionTargets">
196 |   </ImportGroup>
197 | </Project>


--------------------------------------------------------------------------------
/proj/msvc/LibFecal.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="Source Files">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="Header Files">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="Resource Files">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="..\..\gf256.cpp">
19 |       <Filter>Source Files</Filter>
20 |     </ClCompile>
21 |     <ClCompile Include="..\..\fecal.cpp">
22 |       <Filter>Source Files</Filter>
23 |     </ClCompile>
24 |     <ClCompile Include="..\..\FecalCommon.cpp">
25 |       <Filter>Source Files</Filter>
26 |     </ClCompile>
27 |     <ClCompile Include="..\..\FecalDecoder.cpp">
28 |       <Filter>Source Files</Filter>
29 |     </ClCompile>
30 |     <ClCompile Include="..\..\FecalEncoder.cpp">
31 |       <Filter>Source Files</Filter>
32 |     </ClCompile>
33 |   </ItemGroup>
34 |   <ItemGroup>
35 |     <ClInclude Include="..\..\gf256.h">
36 |       <Filter>Source Files</Filter>
37 |     </ClInclude>
38 |     <ClInclude Include="..\..\fecal.h">
39 |       <Filter>Header Files</Filter>
40 |     </ClInclude>
41 |     <ClInclude Include="..\..\FecalCommon.h">
42 |       <Filter>Source Files</Filter>
43 |     </ClInclude>
44 |     <ClInclude Include="..\..\FecalDecoder.h">
45 |       <Filter>Source Files</Filter>
46 |     </ClInclude>
47 |     <ClInclude Include="..\..\FecalEncoder.h">
48 |       <Filter>Source Files</Filter>
49 |     </ClInclude>
50 |   </ItemGroup>
51 |   <ItemGroup>
52 |     <None Include="..\..\README.md">
53 |       <Filter>Header Files</Filter>
54 |     </None>
55 |   </ItemGroup>
56 | </Project>


--------------------------------------------------------------------------------
/tests/benchmark.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright (c) 2017 Christopher A. Taylor.  All rights reserved.
  3 | 
  4 |     Redistribution and use in source and binary forms, with or without
  5 |     modification, are permitted provided that the following conditions are met:
  6 | 
  7 |     * Redistributions of source code must retain the above copyright notice,
  8 |       this list of conditions and the following disclaimer.
  9 |     * Redistributions in binary form must reproduce the above copyright notice,
 10 |       this list of conditions and the following disclaimer in the documentation
 11 |       and/or other materials provided with the distribution.
 12 |     * Neither the name of Fecal nor the names of its contributors may be
 13 |       used to endorse or promote products derived from this software without
 14 |       specific prior written permission.
 15 | 
 16 |     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 17 |     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 18 |     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 19 |     ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 20 |     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 21 |     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 22 |     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 23 |     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 24 |     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 25 |     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 26 |     POSSIBILITY OF SUCH DAMAGE.
 27 | */
 28 | 
 29 | #include "../FecalCommon.h"
 30 | #include "../fecal.h"
 31 | 
 32 | #include <list>
 33 | #include <memory>
 34 | #include <iostream>
 35 | #include <string>
 36 | using namespace std;
 37 | 
 38 | //#define TEST_DATA_ALL_SAME
 39 | //#define TEST_LOSE_FIRST_K_PACKETS
 40 | 
 41 | 
 42 | //------------------------------------------------------------------------------
 43 | // Windows
 44 | 
 45 | #ifdef _WIN32
 46 |     #define WIN32_LEAN_AND_MEAN
 47 | 
 48 |     #ifndef _WINSOCKAPI_
 49 |         #define DID_DEFINE_WINSOCKAPI
 50 |         #define _WINSOCKAPI_
 51 |     #endif
 52 |     #ifndef NOMINMAX
 53 |         #define NOMINMAX
 54 |     #endif
 55 |     #ifndef _WIN32_WINNT
 56 |         #define _WIN32_WINNT 0x0601 /* Windows 7+ */
 57 |     #endif
 58 | 
 59 |     #include <windows.h>
 60 | #endif
 61 | 
 62 | #ifdef DID_DEFINE_WINSOCKAPI
 63 |     #undef _WINSOCKAPI_
 64 |     #undef DID_DEFINE_WINSOCKAPI
 65 | #endif
 66 | 
 67 | 
 68 | //------------------------------------------------------------------------------
 69 | // Threads
 70 | 
 71 | static bool SetCurrentThreadPriority()
 72 | {
 73 | #ifdef _WIN32
 74 |     return 0 != ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL);
 75 | #else
 76 |     return -1 != nice(2);
 77 | #endif
 78 | }
 79 | 
 80 | 
 81 | //------------------------------------------------------------------------------
 82 | // Timing
 83 | 
 84 | static uint64_t GetTimeUsec()
 85 | {
 86 | #ifdef _WIN32
 87 |     LARGE_INTEGER timeStamp = {};
 88 |     if (!::QueryPerformanceCounter(&timeStamp))
 89 |         return 0;
 90 |     static double PerfFrequencyInverse = 0.;
 91 |     if (PerfFrequencyInverse == 0.)
 92 |     {
 93 |         LARGE_INTEGER freq = {};
 94 |         if (!::QueryPerformanceFrequency(&freq) || freq.QuadPart == 0)
 95 |             return 0;
 96 |         PerfFrequencyInverse = 1000000. / (double)freq.QuadPart;
 97 |     }
 98 |     return (uint64_t)(PerfFrequencyInverse * timeStamp.QuadPart);
 99 | #else
100 |     struct timeval tv;
101 |     gettimeofday(&tv, nullptr);
102 |     return 1000000 * tv.tv_sec + tv.tv_usec;
103 | #endif // _WIN32
104 | }
105 | 
106 | 
107 | //------------------------------------------------------------------------------
108 | // Self-Checking Packet
109 | 
110 | static void WriteRandomSelfCheckingPacket(fecal::PCGRandom& prng, void* packet, unsigned bytes)
111 | {
112 |     uint8_t* buffer = (uint8_t*)packet;
113 | #ifdef TEST_DATA_ALL_SAME
114 |     if (bytes != 0)
115 | #else
116 |     if (bytes < 16)
117 | #endif
118 |     {
119 |         FECAL_DEBUG_ASSERT(bytes >= 2);
120 |         buffer[0] = (uint8_t)prng.Next();
121 |         for (unsigned i = 1; i < bytes; ++i)
122 |         {
123 |             buffer[i] = buffer[0];
124 |         }
125 |     }
126 |     else
127 |     {
128 |         uint32_t crc = bytes;
129 |         *(uint32_t*)(buffer + 4) = bytes;
130 |         for (unsigned i = 8; i < bytes; ++i)
131 |         {
132 |             uint8_t v = (uint8_t)prng.Next();
133 |             buffer[i] = v;
134 |             crc = (crc << 3) | (crc >> (32 - 3));
135 |             crc += v;
136 |         }
137 |         *(uint32_t*)buffer = crc;
138 |     }
139 | }
140 | 
141 | static bool CheckPacket(const void* packet, unsigned bytes)
142 | {
143 |     uint8_t* buffer = (uint8_t*)packet;
144 | #ifdef TEST_DATA_ALL_SAME
145 |     if (bytes != 0)
146 | #else
147 |     if (bytes < 16)
148 | #endif
149 |     {
150 |         if (bytes < 2)
151 |             return false;
152 | 
153 |         uint8_t v = buffer[0];
154 |         for (unsigned i = 1; i < bytes; ++i)
155 |         {
156 |             if (buffer[i] != v)
157 |                 return false;
158 |         }
159 |     }
160 |     else
161 |     {
162 |         uint32_t crc = bytes;
163 |         uint32_t readBytes = *(uint32_t*)(buffer + 4);
164 |         if (readBytes != bytes)
165 |             return false;
166 |         for (unsigned i = 8; i < bytes; ++i)
167 |         {
168 |             uint8_t v = buffer[i];
169 |             crc = (crc << 3) | (crc >> (32 - 3));
170 |             crc += v;
171 |         }
172 |         uint32_t readCRC = *(uint32_t*)buffer;
173 |         if (readCRC != crc)
174 |             return false;
175 |     }
176 |     return true;
177 | }
178 | 
179 | 
180 | //------------------------------------------------------------------------------
181 | // FunctionTimer
182 | 
183 | class FunctionTimer
184 | {
185 | public:
186 |     FunctionTimer(const std::string& name)
187 |     {
188 |         FunctionName = name;
189 |     }
190 |     void BeginCall()
191 |     {
192 |         FECAL_DEBUG_ASSERT(t0 == 0);
193 |         t0 = GetTimeUsec();
194 |     }
195 |     void EndCall()
196 |     {
197 |         FECAL_DEBUG_ASSERT(t0 != 0);
198 |         uint64_t t1 = GetTimeUsec();
199 |         ++Invokations;
200 |         TotalUsec += t1 - t0;
201 |         t0 = 0;
202 |     }
203 |     void Reset()
204 |     {
205 |         FECAL_DEBUG_ASSERT(t0 == 0);
206 |         t0 = 0;
207 |         Invokations = 0;
208 |         TotalUsec = 0;
209 |     }
210 |     void Print(unsigned trials)
211 |     {
212 |         cout << FunctionName << " called " << Invokations / (float)trials << " times per trial (avg).  " << TotalUsec / (double)Invokations << " usec avg for all invokations.  " << TotalUsec / (float)trials << " usec (avg) of " << trials << " trials" << endl;
213 |     }
214 | 
215 |     uint64_t t0 = 0;
216 |     uint64_t Invokations = 0;
217 |     uint64_t TotalUsec = 0;
218 |     std::string FunctionName;
219 | };
220 | 
221 | 
222 | //------------------------------------------------------------------------------
223 | // Utility: Deck Shuffling function
224 | 
225 | /*
226 |     Given a PRNG, generate a deck of cards in a random order.
227 |     The deck will contain elements with values between 0 and count - 1.
228 | */
229 | 
230 | static void ShuffleDeck16(fecal::PCGRandom &prng, uint16_t * GF256_RESTRICT deck, uint32_t count)
231 | {
232 |     deck[0] = 0;
233 | 
234 |     // If we can unroll 4 times,
235 |     if (count <= 256)
236 |     {
237 |         for (uint32_t ii = 1;;)
238 |         {
239 |             uint32_t jj, rv = prng.Next();
240 | 
241 |             // 8-bit unroll
242 |             switch (count - ii)
243 |             {
244 |             default:
245 |                 jj = (uint8_t)rv % ii;
246 |                 deck[ii] = deck[jj];
247 |                 deck[jj] = ii;
248 |                 ++ii;
249 |                 jj = (uint8_t)(rv >> 8) % ii;
250 |                 deck[ii] = deck[jj];
251 |                 deck[jj] = ii;
252 |                 ++ii;
253 |                 jj = (uint8_t)(rv >> 16) % ii;
254 |                 deck[ii] = deck[jj];
255 |                 deck[jj] = ii;
256 |                 ++ii;
257 |                 jj = (uint8_t)(rv >> 24) % ii;
258 |                 deck[ii] = deck[jj];
259 |                 deck[jj] = ii;
260 |                 ++ii;
261 |                 break;
262 | 
263 |             case 3:
264 |                 jj = (uint8_t)rv % ii;
265 |                 deck[ii] = deck[jj];
266 |                 deck[jj] = ii;
267 |                 ++ii;
268 |             case 2:
269 |                 jj = (uint8_t)(rv >> 8) % ii;
270 |                 deck[ii] = deck[jj];
271 |                 deck[jj] = ii;
272 |                 ++ii;
273 |             case 1:
274 |                 jj = (uint8_t)(rv >> 16) % ii;
275 |                 deck[ii] = deck[jj];
276 |                 deck[jj] = ii;
277 |             case 0:
278 |                 return;
279 |             }
280 |         }
281 |     }
282 |     else
283 |     {
284 |         // For each deck entry,
285 |         for (uint32_t ii = 1;;)
286 |         {
287 |             uint32_t jj, rv = prng.Next();
288 | 
289 |             // 16-bit unroll
290 |             switch (count - ii)
291 |             {
292 |             default:
293 |                 jj = (uint16_t)rv % ii;
294 |                 deck[ii] = deck[jj];
295 |                 deck[jj] = ii;
296 |                 ++ii;
297 |                 jj = (uint16_t)(rv >> 16) % ii;
298 |                 deck[ii] = deck[jj];
299 |                 deck[jj] = ii;
300 |                 ++ii;
301 |                 break;
302 | 
303 |             case 1:
304 |                 jj = (uint16_t)rv % ii;
305 |                 deck[ii] = deck[jj];
306 |                 deck[jj] = ii;
307 |             case 0:
308 |                 return;
309 |             }
310 |         }
311 |     }
312 | }
313 | 
314 | 
315 | //------------------------------------------------------------------------------
316 | // Tests
317 | 
318 | static void BasicTest(unsigned input_count, unsigned symbol_bytes, unsigned seed = 0)
319 | {
320 |     cout << "Testing performance for input_count=" << input_count << " and symbol_bytes=" << symbol_bytes << endl;
321 | 
322 |     static const unsigned final_bytes = symbol_bytes;
323 | 
324 |     for (unsigned lossCount = 1; lossCount <= input_count; ++lossCount)
325 |     {
326 |         const uint64_t total_bytes = (input_count - 1) * symbol_bytes + final_bytes;
327 | 
328 |         FunctionTimer t_fecal_encoder_create("fecal_encoder_create");
329 |         FunctionTimer t_fecal_decoder_create("fecal_decoder_create");
330 |         FunctionTimer t_fecal_encode("fecal_encode");
331 |         FunctionTimer t_fecal_decoder_add_original("fecal_decoder_add_original");
332 |         FunctionTimer t_fecal_decoder_add_recovery("fecal_decoder_add_recovery");
333 |         FunctionTimer t_fecal_decode("fecal_decode");
334 | 
335 |         static const unsigned kTrials = 100;
336 | 
337 |         uint64_t recoveryRequired = 0;
338 | 
339 |         for (unsigned trial = 0; trial < kTrials; ++trial)
340 |         {
341 |             fecal::PCGRandom prng;
342 |             prng.Seed(seed, lossCount * kTrials + trial);
343 | 
344 |             std::vector<uint8_t> OriginalData((size_t)total_bytes + 1);
345 |             OriginalData[total_bytes] = 0xfe;
346 |             std::vector<void*> input_data(input_count);
347 | 
348 |             uint8_t* data_buffer = &OriginalData[0];
349 |             for (unsigned ii = 0; ii < input_count - 1; ++ii)
350 |             {
351 |                 input_data[ii] = data_buffer;
352 |                 WriteRandomSelfCheckingPacket(prng, data_buffer, symbol_bytes);
353 |                 data_buffer += symbol_bytes;
354 |             }
355 |             input_data[input_count - 1] = data_buffer;
356 |             WriteRandomSelfCheckingPacket(prng, data_buffer, final_bytes);
357 | 
358 |             t_fecal_encoder_create.BeginCall();
359 |             FecalEncoder encoder = fecal_encoder_create(input_count, &input_data[0], total_bytes);
360 |             t_fecal_encoder_create.EndCall();
361 | 
362 |             if (!encoder)
363 |             {
364 |                 cout << "Error: Unable to create encoder" << endl;
365 |                 FECAL_DEBUG_BREAK;
366 |                 return;
367 |             }
368 | 
369 |             t_fecal_decoder_create.BeginCall();
370 |             FecalDecoder decoder = fecal_decoder_create(input_count, total_bytes);
371 |             t_fecal_decoder_create.EndCall();
372 | 
373 |             if (!decoder)
374 |             {
375 |                 cout << "Error: Unable to create decoder" << endl;
376 |                 FECAL_DEBUG_BREAK;
377 |                 return;
378 |             }
379 | 
380 | #ifndef TEST_LOSE_FIRST_K_PACKETS
381 |             FECAL_DEBUG_ASSERT(input_count <= 65536);
382 |             std::vector<uint16_t> deck(input_count);
383 |             ShuffleDeck16(prng, &deck[0], input_count);
384 | #endif
385 | 
386 |             for (unsigned i = 0; i < input_count; ++i)
387 |             {
388 |                 bool isLost = false;
389 | #ifdef TEST_LOSE_FIRST_K_PACKETS
390 |                 if (i < lossCount)
391 |                     isLost = true;
392 | #else
393 |                 for (unsigned k = 0; k < lossCount; ++k)
394 |                 {
395 |                     if (i == deck[k])
396 |                     {
397 |                         isLost = true;
398 |                         break;
399 |                     }
400 |                 }
401 | #endif
402 |                 if (isLost)
403 |                     continue;
404 | 
405 |                 FecalSymbol original;
406 |                 original.Data = input_data[i];
407 |                 original.Bytes = symbol_bytes;
408 |                 if (i == input_count - 1)
409 |                     original.Bytes = final_bytes;
410 |                 original.Index = i;
411 | 
412 |                 t_fecal_decoder_add_original.BeginCall();
413 |                 int result = fecal_decoder_add_original(decoder, &original);
414 |                 t_fecal_decoder_add_original.EndCall();
415 | 
416 |                 if (result)
417 |                 {
418 |                     cout << "Error: Unable to add original data to decoder. error=" << result << endl;
419 |                     FECAL_DEBUG_BREAK;
420 |                     return;
421 |                 }
422 |             }
423 | 
424 |             typedef std::shared_ptr< std::vector<uint8_t> > vecptr_t;
425 |             std::list<vecptr_t> recoveryData;
426 | 
427 |             for (unsigned recoveryIndex = 0;; ++recoveryIndex)
428 |             {
429 |                 vecptr_t data = std::make_shared< std::vector<uint8_t> >(symbol_bytes);
430 |                 recoveryData.push_back(data);
431 | 
432 |                 FecalSymbol recovery;
433 |                 recovery.Index = recoveryIndex;
434 |                 recovery.Data = &data->at(0);
435 |                 recovery.Bytes = symbol_bytes;
436 | 
437 |                 {
438 |                     t_fecal_encode.BeginCall();
439 |                     int result = fecal_encode(encoder, &recovery);
440 |                     t_fecal_encode.EndCall();
441 | 
442 |                     if (result)
443 |                     {
444 |                         FECAL_DEBUG_BREAK;
445 |                         cout << "Error: Unable to generate encoded data. error=" << result << endl;
446 |                         return;
447 |                     }
448 |                 }
449 | 
450 |                 ++recoveryRequired;
451 | 
452 |                 {
453 |                     t_fecal_decoder_add_recovery.BeginCall();
454 |                     int result = fecal_decoder_add_recovery(decoder, &recovery);
455 |                     t_fecal_decoder_add_recovery.EndCall();
456 |                     if (result)
457 |                     {
458 |                         cout << "Error: Unable to add recovery data to decoder. error=" << result << endl;
459 |                         FECAL_DEBUG_BREAK;
460 |                         return;
461 |                     }
462 |                 }
463 | 
464 |                 RecoveredSymbols recovered;
465 | 
466 |                 t_fecal_decode.BeginCall();
467 |                 int decodeResult = fecal_decode(decoder, &recovered);
468 |                 t_fecal_decode.EndCall();
469 | 
470 |                 if (decodeResult == Fecal_Success)
471 |                 {
472 |                     for (unsigned i = 0; i < recovered.Count; ++i)
473 |                     {
474 |                         if (!CheckPacket(
475 |                             recovered.Symbols[i].Data,
476 |                             recovered.Symbols[i].Bytes))
477 |                         {
478 |                             cout << "Error: Packet check failed for " << i << endl;
479 |                             FECAL_DEBUG_BREAK;
480 |                             return;
481 |                         }
482 |                     }
483 | 
484 |                     // Decode success!
485 |                     break;
486 |                 }
487 |                 else if (decodeResult == Fecal_NeedMoreData)
488 |                 {
489 |                     //cout << "Needed more data to decode");
490 |                 }
491 |                 else
492 |                 {
493 |                     cout << "Error: Decode returned " << decodeResult << endl;
494 |                     FECAL_DEBUG_BREAK;
495 |                     return;
496 |                 }
497 |             }
498 | 
499 |             // Decode success!
500 | 
501 |             fecal_free(encoder);
502 |             fecal_free(decoder);
503 | 
504 |             if (OriginalData[total_bytes] != 0xfe)
505 |             {
506 |                 cout << "Error: Corruption after final symbol" << endl;
507 |                 FECAL_DEBUG_BREAK;
508 |                 return;
509 |             }
510 |         }
511 | 
512 |         float avgRecoveryRequired = recoveryRequired / (float)kTrials;
513 | 
514 | #ifdef TEST_PRINT_API_TIMINGS
515 |         t_fecal_encoder_create.Print(kTrials);
516 |         t_fecal_encode.Print(kTrials);
517 |         t_fecal_decoder_create.Print(kTrials);
518 |         t_fecal_decoder_add_original.Print(kTrials);
519 |         t_fecal_decoder_add_recovery.Print(kTrials);
520 |         t_fecal_decode.Print(kTrials);
521 | #endif
522 | 
523 |         float encode_input_MBPS = total_bytes * kTrials / (float)(t_fecal_encoder_create.TotalUsec + t_fecal_encode.TotalUsec);
524 |         float encode_setup_MBPS = total_bytes * kTrials / (float)t_fecal_encoder_create.TotalUsec;
525 |         float encode_output_MBPS = avgRecoveryRequired * symbol_bytes * kTrials / (float)(t_fecal_encoder_create.TotalUsec + t_fecal_encode.TotalUsec);
526 |         float decode_input_MBPS = total_bytes * kTrials / (float)(t_fecal_decode.TotalUsec);
527 |         float decode_output_MBPS = lossCount * symbol_bytes * kTrials / (float)(t_fecal_decode.TotalUsec);
528 | 
529 |         //cout << "Using " << avgRecoveryRequired << " average recovery packets for " << lossCount << " losses of " << input_count << " original packets:" << endl;
530 |         cout << "Encoder(" << total_bytes / 1000000.f << " MB in " << input_count << " pieces, " << lossCount << " losses): Input=" << encode_input_MBPS << " MB/s, Output=" << encode_output_MBPS << " MB/s, (Encode create: " << encode_setup_MBPS << " MB/s)" << endl;
531 |         cout << "Decoder(" << total_bytes / 1000000.f << " MB in " << input_count << " pieces, " << lossCount << " losses): Input=" << decode_input_MBPS << " MB/s, Output=" << decode_output_MBPS << " MB/s, (Overhead = " << avgRecoveryRequired - lossCount << " pieces)" << endl << endl;
532 |     }
533 | }
534 | 
535 | 
536 | //------------------------------------------------------------------------------
537 | // Entrypoint
538 | 
539 | int main(int argc, char **argv)
540 | {
541 |     SetCurrentThreadPriority();
542 | 
543 |     FunctionTimer t_fecal_init("fecal_init");
544 | 
545 |     t_fecal_init.BeginCall();
546 |     if (0 != fecal_init())
547 |     {
548 |         cout << "Failed to initialize" << endl;
549 |         return -1;
550 |     }
551 |     t_fecal_init.EndCall();
552 |     t_fecal_init.Print(1);
553 | 
554 |     unsigned input_count = 200;
555 | #ifdef FECAL_DEBUG
556 |     unsigned symbol_bytes = 20;
557 | #else
558 |     unsigned symbol_bytes = 1300;
559 | #endif
560 | 
561 |     if (argc >= 2)
562 |         input_count = atoi(argv[1]);
563 |     if (argc >= 3)
564 |         symbol_bytes = atoi(argv[2]);
565 | 
566 |     BasicTest(input_count, symbol_bytes);
567 | 
568 |     getchar();
569 | 
570 |     return 0;
571 | }
572 | 


--------------------------------------------------------------------------------
/tests/msvc/FecalBenchmark.vcxproj:
--------------------------------------------------------------------------------
  1 | ﻿<?xml version="1.0" encoding="utf-8"?>
  2 | <Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 |   <ItemGroup Label="ProjectConfigurations">
  4 |     <ProjectConfiguration Include="Debug|Win32">
  5 |       <Configuration>Debug</Configuration>
  6 |       <Platform>Win32</Platform>
  7 |     </ProjectConfiguration>
  8 |     <ProjectConfiguration Include="Debug|x64">
  9 |       <Configuration>Debug</Configuration>
 10 |       <Platform>x64</Platform>
 11 |     </ProjectConfiguration>
 12 |     <ProjectConfiguration Include="Release|Win32">
 13 |       <Configuration>Release</Configuration>
 14 |       <Platform>Win32</Platform>
 15 |     </ProjectConfiguration>
 16 |     <ProjectConfiguration Include="Release|x64">
 17 |       <Configuration>Release</Configuration>
 18 |       <Platform>x64</Platform>
 19 |     </ProjectConfiguration>
 20 |   </ItemGroup>
 21 |   <PropertyGroup Label="Globals">
 22 |     <ProjectGuid>{32176592-2F30-4BD5-B645-EB11C8D3453E}</ProjectGuid>
 23 |     <RootNamespace>Fecal</RootNamespace>
 24 |     <ProjectName>FecalBenchmark</ProjectName>
 25 |   </PropertyGroup>
 26 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
 27 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
 28 |     <ConfigurationType>Application</ConfigurationType>
 29 |     <UseDebugLibraries>true</UseDebugLibraries>
 30 |     <CharacterSet>MultiByte</CharacterSet>
 31 |     <PlatformToolset>v140</PlatformToolset>
 32 |   </PropertyGroup>
 33 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
 34 |     <ConfigurationType>Application</ConfigurationType>
 35 |     <UseDebugLibraries>true</UseDebugLibraries>
 36 |     <CharacterSet>MultiByte</CharacterSet>
 37 |     <PlatformToolset>v140</PlatformToolset>
 38 |   </PropertyGroup>
 39 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
 40 |     <ConfigurationType>Application</ConfigurationType>
 41 |     <UseDebugLibraries>false</UseDebugLibraries>
 42 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 43 |     <CharacterSet>MultiByte</CharacterSet>
 44 |     <PlatformToolset>v140</PlatformToolset>
 45 |   </PropertyGroup>
 46 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
 47 |     <ConfigurationType>Application</ConfigurationType>
 48 |     <UseDebugLibraries>false</UseDebugLibraries>
 49 |     <WholeProgramOptimization>true</WholeProgramOptimization>
 50 |     <CharacterSet>MultiByte</CharacterSet>
 51 |     <PlatformToolset>v140</PlatformToolset>
 52 |   </PropertyGroup>
 53 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
 54 |   <ImportGroup Label="ExtensionSettings">
 55 |   </ImportGroup>
 56 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 57 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 58 |   </ImportGroup>
 59 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
 60 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 61 |   </ImportGroup>
 62 |   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 63 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 64 |   </ImportGroup>
 65 |   <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
 66 |     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
 67 |   </ImportGroup>
 68 |   <PropertyGroup Label="UserMacros" />
 69 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 70 |     <OutDir>Output/$(ProjectName)/$(Configuration)/$(Platform)/</OutDir>
 71 |     <IntDir>Obj/$(ProjectName)/$(Configuration)/$(Platform)/</IntDir>
 72 |   </PropertyGroup>
 73 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
 74 |     <OutDir>Output/$(ProjectName)/$(Configuration)/$(Platform)/</OutDir>
 75 |     <IntDir>Obj/$(ProjectName)/$(Configuration)/$(Platform)/</IntDir>
 76 |   </PropertyGroup>
 77 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
 78 |     <OutDir>Output/$(ProjectName)/$(Configuration)/$(Platform)/</OutDir>
 79 |     <IntDir>Obj/$(ProjectName)/$(Configuration)/$(Platform)/</IntDir>
 80 |   </PropertyGroup>
 81 |   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
 82 |     <OutDir>Output/$(ProjectName)/$(Configuration)/$(Platform)/</OutDir>
 83 |     <IntDir>Obj/$(ProjectName)/$(Configuration)/$(Platform)/</IntDir>
 84 |   </PropertyGroup>
 85 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
 86 |     <ClCompile>
 87 |       <WarningLevel>Level3</WarningLevel>
 88 |       <Optimization>Disabled</Optimization>
 89 |       <SDLCheck>true</SDLCheck>
 90 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
 91 |       <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
 92 |     </ClCompile>
 93 |     <Link>
 94 |       <GenerateDebugInformation>true</GenerateDebugInformation>
 95 |       <AdditionalLibraryDirectories>
 96 |       </AdditionalLibraryDirectories>
 97 |     </Link>
 98 |     <PostBuildEvent>
 99 |       <Command>
100 |       </Command>
101 |     </PostBuildEvent>
102 |   </ItemDefinitionGroup>
103 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
104 |     <ClCompile>
105 |       <WarningLevel>Level3</WarningLevel>
106 |       <Optimization>Disabled</Optimization>
107 |       <SDLCheck>true</SDLCheck>
108 |       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
109 |       <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
110 |     </ClCompile>
111 |     <Link>
112 |       <GenerateDebugInformation>true</GenerateDebugInformation>
113 |       <AdditionalLibraryDirectories>
114 |       </AdditionalLibraryDirectories>
115 |     </Link>
116 |     <PostBuildEvent>
117 |       <Command>
118 |       </Command>
119 |     </PostBuildEvent>
120 |   </ItemDefinitionGroup>
121 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
122 |     <ClCompile>
123 |       <WarningLevel>Level3</WarningLevel>
124 |       <Optimization>MaxSpeed</Optimization>
125 |       <FunctionLevelLinking>true</FunctionLevelLinking>
126 |       <IntrinsicFunctions>true</IntrinsicFunctions>
127 |       <SDLCheck>true</SDLCheck>
128 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
129 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
130 |       <OmitFramePointers>false</OmitFramePointers>
131 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
132 |       <BufferSecurityCheck>true</BufferSecurityCheck>
133 |       <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
134 |     </ClCompile>
135 |     <Link>
136 |       <GenerateDebugInformation>true</GenerateDebugInformation>
137 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
138 |       <OptimizeReferences>true</OptimizeReferences>
139 |       <AdditionalLibraryDirectories>
140 |       </AdditionalLibraryDirectories>
141 |     </Link>
142 |     <PostBuildEvent>
143 |       <Command>
144 |       </Command>
145 |     </PostBuildEvent>
146 |   </ItemDefinitionGroup>
147 |   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
148 |     <ClCompile>
149 |       <WarningLevel>Level3</WarningLevel>
150 |       <Optimization>MaxSpeed</Optimization>
151 |       <FunctionLevelLinking>true</FunctionLevelLinking>
152 |       <IntrinsicFunctions>true</IntrinsicFunctions>
153 |       <SDLCheck>true</SDLCheck>
154 |       <InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
155 |       <FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
156 |       <OmitFramePointers>false</OmitFramePointers>
157 |       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
158 |       <BufferSecurityCheck>true</BufferSecurityCheck>
159 |       <PreprocessorDefinitions>_MBCS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
160 |     </ClCompile>
161 |     <Link>
162 |       <GenerateDebugInformation>true</GenerateDebugInformation>
163 |       <EnableCOMDATFolding>true</EnableCOMDATFolding>
164 |       <OptimizeReferences>true</OptimizeReferences>
165 |       <AdditionalLibraryDirectories>
166 |       </AdditionalLibraryDirectories>
167 |     </Link>
168 |     <PostBuildEvent>
169 |       <Command>
170 |       </Command>
171 |     </PostBuildEvent>
172 |   </ItemDefinitionGroup>
173 |   <ItemGroup>
174 |     <ProjectReference Include="..\..\proj\msvc\LibFecal.vcxproj">
175 |       <Project>{ff5912ef-7424-4974-b877-62b03d5046c6}</Project>
176 |     </ProjectReference>
177 |   </ItemGroup>
178 |   <ItemGroup>
179 |     <ClCompile Include="..\benchmark.cpp" />
180 |   </ItemGroup>
181 |   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
182 |   <ImportGroup Label="ExtensionTargets">
183 |   </ImportGroup>
184 | </Project>


--------------------------------------------------------------------------------
/tests/msvc/FecalBenchmark.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="Source Files">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="Header Files">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="Resource Files">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="..\benchmark.cpp">
19 |       <Filter>Source Files</Filter>
20 |     </ClCompile>
21 |   </ItemGroup>
22 | </Project>


--------------------------------------------------------------------------------