├── .gitignore ├── CMakeLists.txt ├── FecalCommon.cpp ├── FecalCommon.h ├── FecalDecoder.cpp ├── FecalDecoder.h ├── FecalEncoder.cpp ├── FecalEncoder.h ├── License.md ├── README.md ├── fecal.cpp ├── fecal.h ├── gf256.cpp ├── gf256.h ├── proj └── msvc │ ├── Fecal.sln │ ├── LibFecal.vcxproj │ └── LibFecal.vcxproj.filters └── tests ├── benchmark.cpp └── msvc ├── FecalBenchmark.vcxproj └── FecalBenchmark.vcxproj.filters /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.userosscache 8 | *.sln.docstates 9 | 10 | # User-specific files (MonoDevelop/Xamarin Studio) 11 | *.userprefs 12 | 13 | # Build results 14 | [Dd]ebug/ 15 | [Dd]ebugPublic/ 16 | [Rr]elease/ 17 | [Rr]eleases/ 18 | x64/ 19 | x86/ 20 | bld/ 21 | [Bb]in/ 22 | [Oo]bj/ 23 | [Ll]og/ 24 | 25 | # Visual Studio 2015 cache/options directory 26 | .vs/ 27 | # Uncomment if you have tasks that create the project's static files in wwwroot 28 | #wwwroot/ 29 | 30 | # MSTest test Results 31 | [Tt]est[Rr]esult*/ 32 | [Bb]uild[Ll]og.* 33 | 34 | # NUNIT 35 | *.VisualState.xml 36 | TestResult.xml 37 | 38 | # Build Results of an ATL Project 39 | [Dd]ebugPS/ 40 | [Rr]eleasePS/ 41 | dlldata.c 42 | 43 | # DNX 44 | project.lock.json 45 | artifacts/ 46 | 47 | *_i.c 48 | *_p.c 49 | *_i.h 50 | *.ilk 51 | *.meta 52 | *.obj 53 | *.pch 54 | *.pdb 55 | *.pgc 56 | *.pgd 57 | *.rsp 58 | *.sbr 59 | *.tlb 60 | *.tli 61 | *.tlh 62 | *.tmp 63 | *.tmp_proj 64 | *.log 65 | *.vspscc 66 | *.vssscc 67 | .builds 68 | *.pidb 69 | *.svclog 70 | *.scc 71 | 72 | # Chutzpah Test files 73 | _Chutzpah* 74 | 75 | # Visual C++ cache files 76 | ipch/ 77 | *.aps 78 | *.ncb 79 | *.opendb 80 | *.opensdf 81 | *.sdf 82 | *.cachefile 83 | *.VC.db 84 | *.VC.VC.opendb 85 | 86 | # Visual Studio profiler 87 | *.psess 88 | *.vsp 89 | *.vspx 90 | *.sap 91 | 92 | # TFS 2012 Local Workspace 93 | $tf/ 94 | 95 | # Guidance Automation Toolkit 96 | *.gpState 97 | 98 | # ReSharper is a .NET coding add-in 99 | _ReSharper*/ 100 | *.[Rr]e[Ss]harper 101 | *.DotSettings.user 102 | 103 | # JustCode is a .NET coding add-in 104 | .JustCode 105 | 106 | # TeamCity is a build add-in 107 | _TeamCity* 108 | 109 | # DotCover is a Code Coverage Tool 110 | *.dotCover 111 | 112 | # NCrunch 113 | _NCrunch_* 114 | .*crunch*.local.xml 115 | nCrunchTemp_* 116 | 117 | # MightyMoose 118 | *.mm.* 119 | AutoTest.Net/ 120 | 121 | # Web workbench (sass) 122 | .sass-cache/ 123 | 124 | # Installshield output folder 125 | [Ee]xpress/ 126 | 127 | # DocProject is a documentation generator add-in 128 | DocProject/buildhelp/ 129 | DocProject/Help/*.HxT 130 | DocProject/Help/*.HxC 131 | DocProject/Help/*.hhc 132 | DocProject/Help/*.hhk 133 | DocProject/Help/*.hhp 134 | DocProject/Help/Html2 135 | DocProject/Help/html 136 | 137 | # Click-Once directory 138 | publish/ 139 | 140 | # Publish Web Output 141 | *.[Pp]ublish.xml 142 | *.azurePubxml 143 | # TODO: Comment the next line if you want to checkin your web deploy settings 144 | # but database connection strings (with potential passwords) will be unencrypted 145 | *.pubxml 146 | *.publishproj 147 | 148 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 149 | # checkin your Azure Web App publish settings, but sensitive information contained 150 | # in these scripts will be unencrypted 151 | PublishScripts/ 152 | 153 | # NuGet Packages 154 | *.nupkg 155 | # The packages folder can be ignored because of Package Restore 156 | **/packages/* 157 | # except build/, which is used as an MSBuild target. 158 | !**/packages/build/ 159 | # Uncomment if necessary however generally it will be regenerated when needed 160 | #!**/packages/repositories.config 161 | # NuGet v3's project.json files produces more ignoreable files 162 | *.nuget.props 163 | *.nuget.targets 164 | 165 | # Microsoft Azure Build Output 166 | csx/ 167 | *.build.csdef 168 | 169 | # Microsoft Azure Emulator 170 | ecf/ 171 | rcf/ 172 | 173 | # Windows Store app package directories and files 174 | AppPackages/ 175 | BundleArtifacts/ 176 | Package.StoreAssociation.xml 177 | _pkginfo.txt 178 | 179 | # Visual Studio cache files 180 | # files ending in .cache can be ignored 181 | *.[Cc]ache 182 | # but keep track of directories ending in .cache 183 | !*.[Cc]ache/ 184 | 185 | # Others 186 | ClientBin/ 187 | ~$* 188 | *~ 189 | *.dbmdl 190 | *.dbproj.schemaview 191 | *.pfx 192 | *.publishsettings 193 | node_modules/ 194 | orleans.codegen.cs 195 | 196 | # Since there are multiple workflows, uncomment next line to ignore bower_components 197 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 198 | #bower_components/ 199 | 200 | # RIA/Silverlight projects 201 | Generated_Code/ 202 | 203 | # Backup & report files from converting an old project file 204 | # to a newer Visual Studio version. Backup files are not needed, 205 | # because we have git ;-) 206 | _UpgradeReport_Files/ 207 | Backup*/ 208 | UpgradeLog*.XML 209 | UpgradeLog*.htm 210 | 211 | # SQL Server files 212 | *.mdf 213 | *.ldf 214 | 215 | # Business Intelligence projects 216 | *.rdl.data 217 | *.bim.layout 218 | *.bim_*.settings 219 | 220 | # Microsoft Fakes 221 | FakesAssemblies/ 222 | 223 | # GhostDoc plugin setting file 224 | *.GhostDoc.xml 225 | 226 | # Node.js Tools for Visual Studio 227 | .ntvs_analysis.dat 228 | 229 | # Visual Studio 6 build log 230 | *.plg 231 | 232 | # Visual Studio 6 workspace options file 233 | *.opt 234 | 235 | # Visual Studio LightSwitch build output 236 | **/*.HTMLClient/GeneratedArtifacts 237 | **/*.DesktopClient/GeneratedArtifacts 238 | **/*.DesktopClient/ModelManifest.xml 239 | **/*.Server/GeneratedArtifacts 240 | **/*.Server/ModelManifest.xml 241 | _Pvt_Extensions 242 | 243 | # Paket dependency manager 244 | .paket/paket.exe 245 | paket-files/ 246 | 247 | # FAKE - F# Make 248 | .fake/ 249 | 250 | # JetBrains Rider 251 | .idea/ 252 | *.sln.iml 253 | *.txt 254 | *.lib 255 | *.exe 256 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.5) 2 | project(fecal) 3 | 4 | set(CMAKE_CXX_STANDARD 11) 5 | 6 | # Dependency: GF256 library source files 7 | set(GF256_LIB_SRCFILES 8 | gf256.cpp 9 | gf256.h) 10 | 11 | # FEC-AL library source files 12 | set(FECAL_LIB_SRCFILES 13 | fecal.cpp 14 | fecal.h 15 | FecalCommon.cpp 16 | FecalCommon.h 17 | FecalDecoder.cpp 18 | FecalDecoder.h 19 | FecalEncoder.cpp 20 | FecalEncoder.h) 21 | 22 | add_library(gf256 ${GF256_LIB_SRCFILES}) 23 | add_library(fecal ${FECAL_LIB_SRCFILES}) 24 | 25 | add_executable(benchmark tests/benchmark.cpp) 26 | target_link_libraries(benchmark gf256 fecal) 27 | -------------------------------------------------------------------------------- /FecalCommon.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017 Christopher A. Taylor. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of Fecal nor the names of its contributors may be 13 | used to endorse or promote products derived from this software without 14 | specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include "FecalCommon.h" 30 | 31 | namespace fecal { 32 | 33 | 34 | //------------------------------------------------------------------------------ 35 | // AppDataWindow 36 | 37 | bool AppDataWindow::SetParameters(unsigned input_count, uint64_t total_bytes) 38 | { 39 | if (input_count <= 0 || total_bytes < input_count) 40 | { 41 | FECAL_DEBUG_BREAK; // Invalid input 42 | return false; 43 | } 44 | 45 | InputCount = input_count; 46 | TotalBytes = total_bytes; 47 | 48 | SymbolBytes = static_cast((total_bytes + input_count - 1) / input_count); 49 | FinalBytes = static_cast(total_bytes % SymbolBytes); 50 | if (FinalBytes <= 0) 51 | FinalBytes = SymbolBytes; 52 | 53 | FECAL_DEBUG_ASSERT(SymbolBytes >= FinalBytes && FinalBytes != 0); 54 | 55 | return true; 56 | } 57 | 58 | 59 | //------------------------------------------------------------------------------ 60 | // AlignedDataBuffer 61 | 62 | AlignedDataBuffer::~AlignedDataBuffer() 63 | { 64 | SIMDSafeFree(Data); 65 | } 66 | 67 | bool AlignedDataBuffer::Allocate(unsigned bytes) 68 | { 69 | FECAL_DEBUG_ASSERT(bytes > 0); 70 | SIMDSafeFree(Data); 71 | Data = SIMDSafeAllocate(bytes); 72 | return Data != nullptr; 73 | } 74 | 75 | 76 | //------------------------------------------------------------------------------ 77 | // GrowingAlignedByteMatrix 78 | 79 | GrowingAlignedByteMatrix::~GrowingAlignedByteMatrix() 80 | { 81 | SIMDSafeFree(Data); 82 | } 83 | 84 | void GrowingAlignedByteMatrix::Free() 85 | { 86 | if (Data) 87 | { 88 | SIMDSafeFree(Data); 89 | Data = nullptr; 90 | AllocatedRows = 0; 91 | AllocatedColumns = 0; 92 | } 93 | } 94 | 95 | bool GrowingAlignedByteMatrix::Initialize(unsigned rows, unsigned columns) 96 | { 97 | Rows = rows; 98 | Columns = columns; 99 | AllocatedRows = rows + kExtraRows; 100 | AllocatedColumns = NextAlignedOffset(columns + kMinExtraColumns); 101 | 102 | SIMDSafeFree(Data); 103 | Data = SIMDSafeAllocate(AllocatedRows * AllocatedColumns); 104 | 105 | return Data != nullptr; 106 | } 107 | 108 | bool GrowingAlignedByteMatrix::Resize(unsigned rows, unsigned columns) 109 | { 110 | FECAL_DEBUG_ASSERT(rows > 0 && columns > 0); 111 | if (rows <= AllocatedRows && columns <= AllocatedColumns) 112 | { 113 | Rows = rows; 114 | Columns = columns; 115 | return true; 116 | } 117 | 118 | const unsigned allocatedRows = rows + kExtraRows; 119 | const unsigned allocatedColumns = NextAlignedOffset(columns + kMinExtraColumns); 120 | 121 | uint8_t* buffer = SIMDSafeAllocate(allocatedRows * allocatedColumns); 122 | if (!buffer) 123 | { 124 | Free(); 125 | return false; 126 | } 127 | 128 | // If we already allocated a buffer: 129 | if (Data) 130 | { 131 | uint8_t* oldBuffer = Data; 132 | const unsigned oldColumns = Columns; 133 | 134 | if (oldColumns > 0) 135 | { 136 | // Maintain old data 137 | const unsigned oldRows = Rows; 138 | const unsigned oldStride = AllocatedColumns; 139 | uint8_t* destRow = buffer; 140 | uint8_t* srcRow = oldBuffer; 141 | 142 | unsigned copyCount = oldColumns; 143 | if (copyCount > columns) 144 | { 145 | FECAL_DEBUG_BREAK; // Should never happen 146 | copyCount = columns; 147 | } 148 | 149 | for (unsigned i = 0; i < oldRows; ++i, destRow += allocatedColumns, srcRow += oldStride) 150 | memcpy(destRow, srcRow, copyCount); 151 | } 152 | 153 | SIMDSafeFree(oldBuffer); 154 | } 155 | 156 | AllocatedRows = allocatedRows; 157 | AllocatedColumns = allocatedColumns; 158 | Rows = rows; 159 | Columns = columns; 160 | Data = buffer; 161 | return true; 162 | } 163 | 164 | 165 | } // namespace fecal 166 | -------------------------------------------------------------------------------- /FecalCommon.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017 Christopher A. Taylor. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of Fecal nor the names of its contributors may be 13 | used to endorse or promote products derived from this software without 14 | specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #pragma once 30 | 31 | /* 32 | This module provides core tools and constants used by the codec: 33 | 34 | + Debugging macros 35 | + Alignment 36 | + PCGRandom, Int32Hash 37 | + Parameters of the Siamese and Cauchy matrix structures 38 | + ICodec base class for Encoder and Decoder 39 | + EncoderAppDataWindow and DecoderAppDataWindow structures 40 | + Growing matrix structure 41 | + CustomBitSet 42 | */ 43 | 44 | #ifdef _WIN32 45 | #include 46 | #endif 47 | 48 | #include "fecal.h" 49 | #include "gf256.h" 50 | 51 | #include 52 | #include 53 | #include 54 | #include 55 | 56 | namespace fecal { 57 | 58 | 59 | //------------------------------------------------------------------------------ 60 | // Debug 61 | 62 | // Some bugs only repro in release mode, so this can be helpful 63 | //#define FECAL_DEBUG_IN_RELEASE 64 | 65 | #if defined(_DEBUG) || defined(DEBUG) || defined(FECAL_DEBUG_IN_RELEASE) 66 | #define FECAL_DEBUG 67 | #ifdef _WIN32 68 | #define FECAL_DEBUG_BREAK __debugbreak() 69 | #else 70 | #define FECAL_DEBUG_BREAK __builtin_trap() 71 | #endif 72 | #define FECAL_DEBUG_ASSERT(cond) { if (!(cond)) { FECAL_DEBUG_BREAK; } } 73 | #else 74 | #define FECAL_DEBUG_BREAK ; 75 | #define FECAL_DEBUG_ASSERT(cond) ; 76 | #endif 77 | 78 | 79 | //------------------------------------------------------------------------------ 80 | // PCG PRNG 81 | // From http://www.pcg-random.org/ 82 | 83 | class PCGRandom 84 | { 85 | public: 86 | inline void Seed(uint64_t y, uint64_t x = 0) 87 | { 88 | State = 0; 89 | Inc = (y << 1u) | 1u; 90 | Next(); 91 | State += x; 92 | Next(); 93 | } 94 | 95 | inline uint32_t Next() 96 | { 97 | const uint64_t oldstate = State; 98 | State = oldstate * UINT64_C(6364136223846793005) + Inc; 99 | const uint32_t xorshifted = (uint32_t)(((oldstate >> 18) ^ oldstate) >> 27); 100 | const uint32_t rot = oldstate >> 59; 101 | return (xorshifted >> rot) | (xorshifted << ((uint32_t)(-(int32_t)rot) & 31)); 102 | } 103 | 104 | uint64_t State = 0, Inc = 0; 105 | }; 106 | 107 | 108 | //------------------------------------------------------------------------------ 109 | // Int32Hash 110 | 111 | // Thomas Wang's 32-bit -> 32-bit integer hash function 112 | // http://burtleburtle.net/bob/hash/integer.html 113 | inline uint32_t Int32Hash(uint32_t key) 114 | { 115 | key += ~(key << 15); 116 | key ^= (key >> 10); 117 | key += (key << 3); 118 | key ^= (key >> 6); 119 | key += ~(key << 11); 120 | key ^= (key >> 16); 121 | return key; 122 | } 123 | 124 | 125 | //------------------------------------------------------------------------------ 126 | // Code Parameters 127 | 128 | // Number of values 3..255 that we cycle through 129 | static const unsigned kColumnValuePeriod = 253; 130 | 131 | // Number of values 1..255 that we cycle through 132 | static const unsigned kRowValuePeriod = 255; 133 | 134 | 135 | GF256_FORCE_INLINE uint8_t GetColumnValue(unsigned column) 136 | { 137 | // Note: This LCG visits each value exactly once 138 | return (uint8_t)(3 + (column * 199) % kColumnValuePeriod); 139 | } 140 | 141 | GF256_FORCE_INLINE uint8_t GetRowValue(unsigned row) 142 | { 143 | return (uint8_t)(1 + (row + 1) % kRowValuePeriod); 144 | } 145 | 146 | 147 | // Number of parallel lanes to run 148 | // Lane#(Column) = Column % kColumnLaneCount 149 | static const unsigned kColumnLaneCount = 8; 150 | 151 | // Number of running sums of original data 152 | // Note: This cannot be tuned without making code changes 153 | static const unsigned kColumnSumCount = 3; 154 | // Sum 0 = Parity XOR of all input data 155 | // Sum 1 = Product #1 sum XOR of all input data times its GetColumnValue() 156 | // Sum 2 = Product #2 sum XOR of all input data times its GetColumnValue() squared 157 | 158 | // Rate at which we add random pairs of data 159 | static const unsigned kPairAddRate = 16; 160 | 161 | 162 | // Calculate operation code for the given row and lane 163 | GF256_FORCE_INLINE unsigned GetRowOpcode(unsigned lane, unsigned row) 164 | { 165 | FECAL_DEBUG_ASSERT(lane < kColumnLaneCount); 166 | static const uint32_t kSumMask = (1 << (kColumnSumCount * 2)) - 1; 167 | static const uint32_t kZeroValue = (1 << ((kColumnSumCount - 1) * 2)); 168 | 169 | // This offset tunes the quality of the upper left of the generated matrix, 170 | // which is encountered in practice for the first block of input data 171 | static const unsigned kArbitraryOffset = 3; 172 | 173 | const uint32_t opcode = Int32Hash(lane + (row + kArbitraryOffset) * kColumnLaneCount) & kSumMask; 174 | return (opcode == 0) ? kZeroValue : (unsigned)opcode; 175 | } 176 | 177 | 178 | //------------------------------------------------------------------------------ 179 | // ICodec 180 | 181 | class ICodec 182 | { 183 | public: 184 | virtual ~ICodec() {} 185 | }; 186 | 187 | 188 | //------------------------------------------------------------------------------ 189 | // AlignedDataBuffer 190 | // 191 | // Aligned to cache-line boundaries for SIMD 192 | 193 | struct AlignedDataBuffer 194 | { 195 | uint8_t* Data = nullptr; 196 | 197 | 198 | // Free memory 199 | ~AlignedDataBuffer(); 200 | 201 | // Allocate memory 202 | bool Allocate(unsigned bytes); 203 | }; 204 | 205 | 206 | //------------------------------------------------------------------------------ 207 | // GrowingAlignedByteMatrix 208 | // 209 | // This is a matrix of bytes where the elements are stored in row-first order 210 | // and the first byte element of each row is aligned to cache-line boundaries. 211 | // Furthermore the matrix can grow in rows or columns, keeping existing data. 212 | 213 | struct GrowingAlignedByteMatrix 214 | { 215 | // Buffer data 216 | uint8_t* Data = nullptr; 217 | 218 | // Used rows, columns 219 | unsigned Rows = 0; 220 | unsigned Columns = 0; 221 | 222 | // Allocate a few extra rows, columns whenenver we grow the matrix 223 | // This is tuned for the expected maximum recovery failure rate 224 | static const unsigned kExtraRows = 4; 225 | static const unsigned kMinExtraColumns = 4; 226 | 227 | // Allocated rows, columns 228 | unsigned AllocatedRows = 0; 229 | unsigned AllocatedColumns = 0; 230 | 231 | 232 | ~GrowingAlignedByteMatrix(); 233 | 234 | // Initialize matrix to the given size 235 | // New elements have undefined initial state 236 | bool Initialize(unsigned rows, unsigned columns); 237 | 238 | // Growing mantaining existing data in the buffer 239 | // New elements have undefined initial state 240 | bool Resize(unsigned rows, unsigned columns); 241 | 242 | uint8_t Get(unsigned row, unsigned column) 243 | { 244 | FECAL_DEBUG_ASSERT(Data && row < Rows && column < Columns); 245 | return Data[row * AllocatedColumns + column]; 246 | } 247 | 248 | // Free allocated memory 249 | void Free(); 250 | }; 251 | 252 | 253 | //------------------------------------------------------------------------------ 254 | // Portable Intrinsics 255 | 256 | // Returns number of bits set in the 64-bit value 257 | GF256_FORCE_INLINE unsigned PopCount64(uint64_t x) 258 | { 259 | #ifdef _MSC_VER 260 | #ifdef _WIN64 261 | return (unsigned)__popcnt64(x); 262 | #else 263 | return (unsigned)(__popcnt((uint32_t)x) + __popcnt((uint32_t)(x >> 32))); 264 | #endif 265 | #else // GCC 266 | return (unsigned)__builtin_popcountll(x); 267 | #endif 268 | } 269 | 270 | // Returns lowest bit index 0..63 where the first non-zero bit is found 271 | // Precondition: x != 0 272 | GF256_FORCE_INLINE unsigned FirstNonzeroBit64(uint64_t x) 273 | { 274 | #ifdef _MSC_VER 275 | #ifdef _WIN64 276 | unsigned long index; 277 | // Note: Ignoring result because x != 0 278 | _BitScanForward64(&index, x); 279 | return (unsigned)index; 280 | #else 281 | unsigned long index; 282 | if (0 != _BitScanForward(&index, (uint32_t)x)) 283 | return (unsigned)index; 284 | // Note: Ignoring result because x != 0 285 | _BitScanForward(&index, (uint32_t)(x >> 32)); 286 | return (unsigned)index + 32; 287 | #endif 288 | #else 289 | // Note: Ignoring return value of 0 because x != 0 290 | return (unsigned)__builtin_ffsll(x) - 1; 291 | #endif 292 | } 293 | 294 | 295 | //------------------------------------------------------------------------------ 296 | // CustomBitSet 297 | 298 | // Custom std::bitset implementation for speed 299 | template 300 | struct CustomBitSet 301 | { 302 | static const unsigned kValidBits = N; 303 | typedef uint64_t WordT; 304 | static const unsigned kWordBits = sizeof(WordT) * 8; 305 | static const unsigned kWords = (kValidBits + kWordBits - 1) / kWordBits; 306 | static const WordT kAllOnes = UINT64_C(0xffffffffffffffff); 307 | 308 | WordT Words[kWords]; 309 | 310 | 311 | CustomBitSet() 312 | { 313 | ClearAll(); 314 | } 315 | 316 | void ClearAll() 317 | { 318 | for (unsigned i = 0; i < kWords; ++i) 319 | Words[i] = 0; 320 | } 321 | void SetAll() 322 | { 323 | for (unsigned i = 0; i < kWords; ++i) 324 | Words[i] = kAllOnes; 325 | } 326 | void Set(unsigned bit) 327 | { 328 | const unsigned word = bit / kWordBits; 329 | const WordT mask = (WordT)1 << (bit % kWordBits); 330 | Words[word] |= mask; 331 | } 332 | void Clear(unsigned bit) 333 | { 334 | const unsigned word = bit / kWordBits; 335 | const WordT mask = (WordT)1 << (bit % kWordBits); 336 | Words[word] &= ~mask; 337 | } 338 | bool Check(unsigned bit) const 339 | { 340 | const unsigned word = bit / kWordBits; 341 | const WordT mask = (WordT)1 << (bit % kWordBits); 342 | return 0 != (Words[word] & mask); 343 | } 344 | 345 | /* 346 | Returns the popcount of the bits within the given range. 347 | 348 | bitStart < kValidBits: First bit to test 349 | bitEnd <= kValidBits: Bit to stop at (non-inclusive) 350 | */ 351 | unsigned RangePopcount(unsigned bitStart, unsigned bitEnd) 352 | { 353 | static_assert(kWordBits == 64, "Update this"); 354 | 355 | if (bitStart >= bitEnd) 356 | return 0; 357 | 358 | unsigned wordIndex = bitStart / kWordBits; 359 | const unsigned wordEnd = bitEnd / kWordBits; 360 | 361 | // Eliminate low bits of first word 362 | WordT word = Words[wordIndex] >> (bitStart % kWordBits); 363 | 364 | // Eliminate high bits of last word if there is just one word 365 | if (wordEnd == wordIndex) 366 | return PopCount64(word << (kWordBits - (bitEnd - bitStart))); 367 | 368 | // Count remainder of first word 369 | unsigned count = PopCount64(word); 370 | 371 | // Accumulate popcount of full words 372 | while (++wordIndex < wordEnd) 373 | count += PopCount64(Words[wordIndex]); 374 | 375 | // Eliminate high bits of last word if there is one 376 | unsigned lastWordBits = bitEnd - wordIndex * kWordBits; 377 | if (lastWordBits > 0) 378 | count += PopCount64(Words[wordIndex] << (kWordBits - lastWordBits)); 379 | 380 | return count; 381 | } 382 | 383 | /* 384 | Returns the bit index where the first cleared bit is found. 385 | Returns kValidBits if all bits are set. 386 | 387 | bitStart < kValidBits: Index to start looking 388 | */ 389 | unsigned FindFirstClear(unsigned bitStart) 390 | { 391 | static_assert(kWordBits == 64, "Update this"); 392 | 393 | unsigned wordStart = bitStart / kWordBits; 394 | 395 | WordT word = ~Words[wordStart] >> (bitStart % kWordBits); 396 | if (word != 0) 397 | { 398 | unsigned offset = 0; 399 | if ((word & 1) == 0) 400 | offset = FirstNonzeroBit64(word); 401 | return bitStart + offset; 402 | } 403 | 404 | for (unsigned i = wordStart + 1; i < kWords; ++i) 405 | { 406 | word = ~Words[i]; 407 | if (word != 0) 408 | return i * kWordBits + FirstNonzeroBit64(word); 409 | } 410 | 411 | return kValidBits; 412 | } 413 | 414 | /* 415 | Returns the bit index where the first set bit is found. 416 | Returns 'bitEnd' if all bits are clear. 417 | 418 | bitStart < kValidBits: Index to start looking 419 | bitEnd <= kValidBits: Index to stop looking at 420 | */ 421 | unsigned FindFirstSet(unsigned bitStart, unsigned bitEnd = kValidBits) 422 | { 423 | static_assert(kWordBits == 64, "Update this"); 424 | 425 | unsigned wordStart = bitStart / kWordBits; 426 | 427 | WordT word = Words[wordStart] >> (bitStart % kWordBits); 428 | if (word != 0) 429 | { 430 | unsigned offset = 0; 431 | if ((word & 1) == 0) 432 | offset = FirstNonzeroBit64(word); 433 | return bitStart + offset; 434 | } 435 | 436 | const unsigned wordEnd = (bitEnd + kWordBits - 1) / kWordBits; 437 | 438 | for (unsigned i = wordStart + 1; i < wordEnd; ++i) 439 | { 440 | word = Words[i]; 441 | if (word != 0) 442 | return i * kWordBits + FirstNonzeroBit64(word); 443 | } 444 | 445 | return bitEnd; 446 | } 447 | 448 | /* 449 | Set a range of bits 450 | 451 | bitStart < kValidBits: Index at which to start setting 452 | bitEnd <= kValidBits: Bit to stop at (non-inclusive) 453 | */ 454 | void SetRange(unsigned bitStart, unsigned bitEnd) 455 | { 456 | if (bitStart >= bitEnd) 457 | return; 458 | 459 | unsigned wordStart = bitStart / kWordBits; 460 | const unsigned wordEnd = bitEnd / kWordBits; 461 | 462 | bitStart %= kWordBits; 463 | 464 | if (wordEnd == wordStart) 465 | { 466 | // This implies x=(bitStart % kWordBits) and y=(bitEnd % kWordBits) 467 | // are in the same word. Also: x < y, y < 64, y - x < 64. 468 | bitEnd %= kWordBits; 469 | WordT mask = ((WordT)1 << (bitEnd - bitStart)) - 1; // 1..63 bits 470 | mask <<= bitStart; 471 | Words[wordStart] |= mask; 472 | return; 473 | } 474 | 475 | // Set the end of the first word 476 | Words[wordStart] |= kAllOnes << bitStart; 477 | 478 | // Whole words at a time 479 | for (unsigned i = wordStart + 1; i < wordEnd; ++i) 480 | Words[i] = kAllOnes; 481 | 482 | // Set first few bits of the last word 483 | unsigned lastWordBits = bitEnd - wordEnd * kWordBits; 484 | if (lastWordBits > 0) 485 | { 486 | WordT mask = ((WordT)1 << lastWordBits) - 1; // 1..63 bits 487 | Words[wordEnd] |= mask; 488 | } 489 | } 490 | 491 | /* 492 | Clear a range of bits 493 | 494 | bitStart < kValidBits: Index at which to start clearing 495 | bitEnd <= kValidBits: Bit to stop at (non-inclusive) 496 | */ 497 | void ClearRange(unsigned bitStart, unsigned bitEnd) 498 | { 499 | if (bitStart >= bitEnd) 500 | return; 501 | 502 | unsigned wordStart = bitStart / kWordBits; 503 | const unsigned wordEnd = bitEnd / kWordBits; 504 | 505 | bitStart %= kWordBits; 506 | 507 | if (wordEnd == wordStart) 508 | { 509 | // This implies x=(bitStart % kWordBits) and y=(bitEnd % kWordBits) 510 | // are in the same word. Also: x < y, y < 64, y - x < 64. 511 | bitEnd %= kWordBits; 512 | WordT mask = ((WordT)1 << (bitEnd - bitStart)) - 1; // 1..63 bits 513 | mask <<= bitStart; 514 | Words[wordStart] &= ~mask; 515 | return; 516 | } 517 | 518 | // Clear the end of the first word 519 | Words[wordStart] &= ~(kAllOnes << bitStart); 520 | 521 | // Whole words at a time 522 | for (unsigned i = wordStart + 1; i < wordEnd; ++i) 523 | Words[i] = 0; 524 | 525 | // Clear first few bits of the last word 526 | unsigned lastWordBits = bitEnd - wordEnd * kWordBits; 527 | if (lastWordBits > 0) 528 | { 529 | WordT mask = ((WordT)1 << lastWordBits) - 1; // 1..63 bits 530 | Words[wordEnd] &= ~mask; 531 | } 532 | } 533 | }; 534 | 535 | 536 | //------------------------------------------------------------------------------ 537 | // SIMD-Safe Aligned Memory Allocations 538 | 539 | static const unsigned kAlignmentBytes = GF256_ALIGN_BYTES; 540 | 541 | GF256_FORCE_INLINE unsigned NextAlignedOffset(unsigned offset) 542 | { 543 | return (offset + kAlignmentBytes - 1) & ~(kAlignmentBytes - 1); 544 | } 545 | 546 | static GF256_FORCE_INLINE uint8_t* SIMDSafeAllocate(size_t size) 547 | { 548 | uint8_t* data = (uint8_t*)calloc(1, kAlignmentBytes + size); 549 | if (!data) 550 | return nullptr; 551 | unsigned offset = (unsigned)((uintptr_t)data % kAlignmentBytes); 552 | data += kAlignmentBytes - offset; 553 | data[-1] = (uint8_t)offset; 554 | return data; 555 | } 556 | 557 | static GF256_FORCE_INLINE void SIMDSafeFree(void* ptr) 558 | { 559 | if (!ptr) 560 | return; 561 | uint8_t* data = (uint8_t*)ptr; 562 | unsigned offset = data[-1]; 563 | if (offset >= kAlignmentBytes) 564 | { 565 | FECAL_DEBUG_BREAK; // Should never happen 566 | return; 567 | } 568 | data -= kAlignmentBytes - offset; 569 | free(data); 570 | } 571 | 572 | 573 | //------------------------------------------------------------------------------ 574 | // AppDataWindow 575 | 576 | // Base class for app data window shared between encoder and decoder 577 | struct AppDataWindow 578 | { 579 | // Application parameters 580 | unsigned InputCount = 0; // Number of input symbols 581 | uint64_t TotalBytes = 0; // Total number of input bytes 582 | unsigned FinalBytes = 0; // Number of bytes in the final symbol 583 | unsigned SymbolBytes = 0; // Number of bytes in all other symbols 584 | 585 | 586 | // Set parameter for the window (should be done first) 587 | // Returns false if input is invalid 588 | bool SetParameters(unsigned input_count, uint64_t total_bytes); 589 | 590 | GF256_FORCE_INLINE bool IsFinalColumn(unsigned column) 591 | { 592 | return (column == InputCount - 1); 593 | } 594 | 595 | // Helper function 596 | GF256_FORCE_INLINE unsigned GetColumnBytes(unsigned column) 597 | { 598 | return IsFinalColumn(column) ? FinalBytes : SymbolBytes; 599 | } 600 | }; 601 | 602 | 603 | //------------------------------------------------------------------------------ 604 | // XORSummer 605 | 606 | // This optimization speeds up the codec by 15% 607 | #define FECAL_ADD2_OPT 608 | 609 | class XORSummer 610 | { 611 | public: 612 | // Set the addition destination and byte count 613 | GF256_FORCE_INLINE void Initialize(uint8_t* dest, unsigned bytes) 614 | { 615 | DestBuffer = dest; 616 | Bytes = bytes; 617 | Waiting = nullptr; 618 | } 619 | 620 | // Accumulate some source data 621 | GF256_FORCE_INLINE void Add(const uint8_t* src) 622 | { 623 | #ifdef FECAL_ADD2_OPT 624 | if (Waiting) 625 | { 626 | gf256_add2_mem(DestBuffer, src, Waiting, Bytes); 627 | Waiting = nullptr; 628 | } 629 | else 630 | Waiting = src; 631 | #else 632 | gf256_add_mem(DestBuffer, src, Bytes); 633 | #endif 634 | } 635 | 636 | // Finalize in the destination buffer 637 | GF256_FORCE_INLINE void Finalize() 638 | { 639 | #ifdef FECAL_ADD2_OPT 640 | if (Waiting) 641 | gf256_add_mem(DestBuffer, Waiting, Bytes); 642 | #endif 643 | } 644 | 645 | protected: 646 | uint8_t* DestBuffer; 647 | unsigned Bytes; 648 | const uint8_t* Waiting; 649 | }; 650 | 651 | 652 | } // namespace fecal 653 | -------------------------------------------------------------------------------- /FecalDecoder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017 Christopher A. Taylor. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of Fecal nor the names of its contributors may be 13 | used to endorse or promote products derived from this software without 14 | specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include "FecalDecoder.h" 30 | 31 | namespace fecal { 32 | 33 | 34 | //------------------------------------------------------------------------------ 35 | // DecoderAppDataWindow 36 | 37 | void DecoderAppDataWindow::AllocateOriginals() 38 | { 39 | OriginalData.resize(InputCount); 40 | 41 | // Allocate some space for recovery data too (20% of original data size) 42 | RecoveryData.reserve(InputCount / 5 + 1); 43 | 44 | SubwindowCount = (InputCount + kSubwindowSize - 1) / kSubwindowSize; 45 | Subwindows.resize(SubwindowCount); 46 | } 47 | 48 | bool DecoderAppDataWindow::AddOriginal(unsigned column, uint8_t* data) 49 | { 50 | // If we already have this one: 51 | if (OriginalData[column].Data) 52 | return false; 53 | 54 | // Record this one 55 | OriginalData[column].Data = data; 56 | MarkGotElement(column); 57 | ++OriginalGotCount; 58 | 59 | return true; 60 | } 61 | 62 | bool DecoderAppDataWindow::AddRecovery(uint8_t* data, unsigned row) 63 | { 64 | FECAL_DEBUG_ASSERT(InputCount > 0); // SetParameters() must be called first 65 | 66 | // Trying to insert with duplicate ID: It will not be inserted 67 | auto res = RowSet.insert(row); 68 | if (!res.second) 69 | return false; 70 | 71 | RecoveryInfo info; 72 | info.Data = data; 73 | info.Row = row; 74 | info.UsedForSolution = false; 75 | RecoveryData.push_back(info); 76 | 77 | return true; 78 | } 79 | 80 | void DecoderAppDataWindow::MarkGotElement(unsigned element) 81 | { 82 | FECAL_DEBUG_ASSERT(element < InputCount); 83 | Subwindow& subwindow = Subwindows[element / kSubwindowSize]; 84 | FECAL_DEBUG_ASSERT(!subwindow.Got.Check(element % kSubwindowSize)); 85 | subwindow.Got.Set(element % kSubwindowSize); 86 | subwindow.GotCount++; 87 | } 88 | 89 | unsigned DecoderAppDataWindow::FindNextLostElement(unsigned elementStart) 90 | { 91 | if (elementStart >= InputCount) 92 | return InputCount; 93 | 94 | const unsigned subwindowEnd = SubwindowCount; 95 | unsigned subwindowIndex = elementStart / kSubwindowSize; 96 | unsigned bitIndex = elementStart % kSubwindowSize; 97 | FECAL_DEBUG_ASSERT(subwindowEnd <= SubwindowCount); 98 | FECAL_DEBUG_ASSERT(subwindowIndex < SubwindowCount); 99 | 100 | while (subwindowIndex < subwindowEnd) 101 | { 102 | // If there may be any lost packets in this subwindow: 103 | if (Subwindows[subwindowIndex].GotCount < kSubwindowSize) 104 | { 105 | for (;;) 106 | { 107 | // Seek next clear bit 108 | bitIndex = Subwindows[subwindowIndex].Got.FindFirstClear(bitIndex); 109 | 110 | // If there were none, skip this subwindow 111 | if (bitIndex >= kSubwindowSize) 112 | break; 113 | 114 | // Calculate element index and stop if we hit the end of the valid data 115 | unsigned nextElement = subwindowIndex * kSubwindowSize + bitIndex; 116 | if (nextElement > InputCount) 117 | nextElement = InputCount; 118 | 119 | return nextElement; 120 | } 121 | } 122 | 123 | // Reset bit index to the front of the next subwindow 124 | bitIndex = 0; 125 | 126 | // Check next subwindow 127 | ++subwindowIndex; 128 | } 129 | 130 | return InputCount; 131 | } 132 | 133 | 134 | //------------------------------------------------------------------------------ 135 | // Decoder 136 | 137 | FecalResult Decoder::Initialize(unsigned input_count, uint64_t total_bytes) 138 | { 139 | RecoveryMatrix.Window = &Window; 140 | 141 | if (!Window.SetParameters(input_count, total_bytes)) 142 | { 143 | FECAL_DEBUG_BREAK; // Invalid input 144 | return Fecal_InvalidInput; 145 | } 146 | Window.AllocateOriginals(); 147 | 148 | return Fecal_Success; 149 | } 150 | 151 | FecalResult Decoder::AddOriginal(const FecalSymbol& symbol) 152 | { 153 | if (symbol.Index >= Window.InputCount || 154 | symbol.Data == nullptr || 155 | symbol.Bytes != Window.GetColumnBytes(symbol.Index)) 156 | { 157 | FECAL_DEBUG_BREAK; // Invalid input 158 | return Fecal_InvalidInput; 159 | } 160 | 161 | if (Window.AddOriginal(symbol.Index, (uint8_t*)symbol.Data)) 162 | RecoveryAttempted = false; 163 | 164 | return Fecal_Success; 165 | } 166 | 167 | FecalResult Decoder::AddRecovery(const FecalSymbol& symbol) 168 | { 169 | if (symbol.Data == nullptr || 170 | symbol.Bytes != Window.SymbolBytes) 171 | { 172 | FECAL_DEBUG_BREAK; // Invalid input 173 | return Fecal_InvalidInput; 174 | } 175 | 176 | if (Window.AddRecovery((uint8_t*)symbol.Data, symbol.Index)) 177 | RecoveryAttempted = false; 178 | 179 | return Fecal_Success; 180 | } 181 | 182 | FecalResult Decoder::GetOriginal(unsigned column, FecalSymbol& symbol) 183 | { 184 | symbol.Index = column; 185 | symbol.Data = nullptr; 186 | symbol.Bytes = 0; 187 | 188 | if (column >= Window.InputCount) 189 | { 190 | FECAL_DEBUG_BREAK; // Invalid input 191 | return Fecal_InvalidInput; 192 | } 193 | 194 | symbol.Data = Window.OriginalData[column].Data; 195 | if (symbol.Data == nullptr) 196 | return Fecal_NeedMoreData; 197 | 198 | symbol.Bytes = Window.GetColumnBytes(column); 199 | return Fecal_Success; 200 | } 201 | 202 | FecalResult Decoder::Decode(RecoveredSymbols& symbols) 203 | { 204 | // Default return values 205 | symbols.Symbols = nullptr; 206 | symbols.Count = 0; 207 | 208 | // If all original data arrived: 209 | if (Window.OriginalGotCount >= Window.InputCount) 210 | return Fecal_Success; 211 | 212 | // If we have not received enough data to try to decode: 213 | if (Window.OriginalGotCount + static_cast(Window.RecoveryData.size()) < Window.InputCount) 214 | return Fecal_NeedMoreData; 215 | 216 | // If recovery was already attempted: 217 | if (RecoveryAttempted) 218 | return Fecal_NeedMoreData; 219 | RecoveryAttempted = true; 220 | 221 | // Generate updated recovery matrix 222 | if (!RecoveryMatrix.GenerateMatrix()) 223 | return Fecal_OutOfMemory; 224 | 225 | // Attempt to solve the linear system 226 | if (!RecoveryMatrix.GaussianElimination()) 227 | return Fecal_NeedMoreData; 228 | 229 | FecalResult result = EliminateOriginalData(); 230 | if (result != Fecal_Success) 231 | return result; 232 | 233 | MultiplyLowerTriangle(); 234 | 235 | result = BackSubstitution(); 236 | 237 | if (result == Fecal_Success) 238 | { 239 | symbols.Symbols = &RecoveredData[0]; 240 | symbols.Count = static_cast(RecoveredData.size()); 241 | } 242 | 243 | return result; 244 | } 245 | 246 | FecalResult Decoder::EliminateOriginalData() 247 | { 248 | // Allocate workspace 249 | const unsigned symbolBytes = Window.SymbolBytes; 250 | if (!ProductWorkspace.Allocate(symbolBytes)) 251 | return Fecal_OutOfMemory; 252 | 253 | const unsigned rows = static_cast(Window.RecoveryData.size()); 254 | 255 | // Eliminate data in sorted row order regardless of pivot order: 256 | for (unsigned matrixRowIndex = 0; matrixRowIndex < rows; ++matrixRowIndex) 257 | { 258 | const RecoveryInfo& recovery = Window.RecoveryData[matrixRowIndex]; 259 | if (!recovery.UsedForSolution) 260 | continue; 261 | 262 | // Zero the product sum 263 | memset(ProductWorkspace.Data, 0, symbolBytes); 264 | 265 | XORSummer summer1; 266 | summer1.Initialize(recovery.Data, symbolBytes); 267 | XORSummer summerRX; 268 | summerRX.Initialize(ProductWorkspace.Data, symbolBytes); 269 | 270 | // Eliminate dense recovery data outside of matrix: 271 | for (unsigned laneIndex = 0; laneIndex < kColumnLaneCount; ++laneIndex) 272 | { 273 | const unsigned opcode = GetRowOpcode(laneIndex, recovery.Row); 274 | 275 | // For summations into the RecoveryPacket buffer: 276 | unsigned mask = 1; 277 | for (unsigned sumIndex = 0; sumIndex < kColumnSumCount; ++sumIndex) 278 | { 279 | if (opcode & mask) 280 | summer1.Add(GetLaneSum(laneIndex, sumIndex)); 281 | mask <<= 1; 282 | } 283 | 284 | // For summations into the ProductWorkspace buffer: 285 | for (unsigned sumIndex = 0; sumIndex < kColumnSumCount; ++sumIndex) 286 | { 287 | if (opcode & mask) 288 | summerRX.Add(GetLaneSum(laneIndex, sumIndex)); 289 | mask <<= 1; 290 | } 291 | } 292 | 293 | // Eliminate light recovery data outside of matrix: 294 | const unsigned inputCount = Window.InputCount; 295 | PCGRandom prng; 296 | prng.Seed(recovery.Row, inputCount); 297 | 298 | const unsigned pairCount = (inputCount + kPairAddRate - 1) / kPairAddRate; 299 | for (unsigned i = 0; i < pairCount; ++i) 300 | { 301 | const unsigned element1 = prng.Next() % inputCount; 302 | const uint8_t* original1 = Window.OriginalData[element1].Data; 303 | if (original1) 304 | { 305 | if (element1 == inputCount - 1) 306 | gf256_add_mem(recovery.Data, original1, Window.FinalBytes); 307 | else 308 | summer1.Add(original1); 309 | } 310 | 311 | const unsigned elementRX = prng.Next() % inputCount; 312 | const uint8_t* originalRX = Window.OriginalData[elementRX].Data; 313 | if (originalRX) 314 | { 315 | if (elementRX == inputCount - 1) 316 | gf256_add_mem(ProductWorkspace.Data, originalRX, Window.FinalBytes); 317 | else 318 | summerRX.Add(originalRX); 319 | } 320 | } 321 | 322 | summer1.Finalize(); 323 | summerRX.Finalize(); 324 | 325 | const uint8_t RX = GetRowValue(recovery.Row); 326 | gf256_muladd_mem(recovery.Data, RX, ProductWorkspace.Data, symbolBytes); 327 | } 328 | 329 | return Fecal_Success; 330 | } 331 | 332 | const uint8_t* Decoder::GetLaneSum(unsigned laneIndex, unsigned sumIndex) 333 | { 334 | AlignedDataBuffer& sum = LaneSums[laneIndex][sumIndex]; 335 | if (sum.Data) 336 | return sum.Data; 337 | 338 | const unsigned symbolBytes = Window.SymbolBytes; 339 | if (!sum.Allocate(symbolBytes)) 340 | return nullptr; 341 | 342 | memset(sum.Data, 0, symbolBytes); 343 | 344 | const unsigned inputEnd = Window.InputCount - 1; 345 | if (sumIndex == 0) 346 | { 347 | XORSummer summer; 348 | summer.Initialize(sum.Data, symbolBytes); 349 | 350 | // For each input column: 351 | for (unsigned column = laneIndex; column < inputEnd; column += kColumnLaneCount) 352 | { 353 | const uint8_t* data = Window.OriginalData[column].Data; 354 | if (data) 355 | summer.Add(data); 356 | } 357 | if (inputEnd % kColumnLaneCount == laneIndex) 358 | { 359 | const uint8_t* data = Window.OriginalData[inputEnd].Data; 360 | if (data) 361 | gf256_add_mem(sum.Data, data, Window.FinalBytes); 362 | } 363 | 364 | summer.Finalize(); 365 | return sum.Data; 366 | } 367 | 368 | // For each input column: 369 | for (unsigned column = laneIndex; column < inputEnd; column += kColumnLaneCount) 370 | { 371 | const uint8_t* data = Window.OriginalData[column].Data; 372 | if (!data) 373 | continue; 374 | 375 | uint8_t CX_or_CX2 = GetColumnValue(column); 376 | if (sumIndex == 2) 377 | CX_or_CX2 = gf256_sqr(CX_or_CX2); 378 | 379 | gf256_muladd_mem(sum.Data, CX_or_CX2, data, symbolBytes); 380 | } 381 | if (inputEnd % kColumnLaneCount == laneIndex) 382 | { 383 | const uint8_t* data = Window.OriginalData[inputEnd].Data; 384 | if (data) 385 | { 386 | uint8_t CX_or_CX2 = GetColumnValue(inputEnd); 387 | if (sumIndex == 2) 388 | CX_or_CX2 = gf256_sqr(CX_or_CX2); 389 | 390 | gf256_muladd_mem(sum.Data, CX_or_CX2, data, Window.FinalBytes); 391 | } 392 | } 393 | 394 | return sum.Data; 395 | 396 | static_assert(kColumnSumCount == 3, "Update this"); 397 | } 398 | 399 | void Decoder::MultiplyLowerTriangle() 400 | { 401 | const unsigned columns = static_cast(RecoveryMatrix.Columns.size()); 402 | const unsigned srcBytes = Window.SymbolBytes; 403 | 404 | // Multiply lower triangle following solution order from left to right: 405 | for (unsigned col_i = 0; col_i < columns - 1; ++col_i) 406 | { 407 | const unsigned matrixRowIndex_i = RecoveryMatrix.Pivots[col_i]; 408 | const uint8_t* srcData = Window.RecoveryData[matrixRowIndex_i].Data; 409 | FECAL_DEBUG_ASSERT(srcData && srcBytes > 0); 410 | 411 | for (unsigned col_j = col_i + 1; col_j < columns; ++col_j) 412 | { 413 | const unsigned matrixRowIndex_j = RecoveryMatrix.Pivots[col_j]; 414 | const uint8_t y = RecoveryMatrix.Matrix.Get(matrixRowIndex_j, col_i); 415 | 416 | if (y == 0) 417 | continue; 418 | 419 | uint8_t* destData = Window.RecoveryData[matrixRowIndex_j].Data; 420 | gf256_muladd_mem(destData, y, srcData, srcBytes); 421 | } 422 | } 423 | } 424 | 425 | FecalResult Decoder::BackSubstitution() 426 | { 427 | const unsigned columns = static_cast(RecoveryMatrix.Columns.size()); 428 | const unsigned srcBytes = Window.SymbolBytes; 429 | 430 | RecoveredData.resize(columns); 431 | 432 | // For each column starting with the right-most column: 433 | for (int col_i = columns - 1; col_i >= 0; --col_i) 434 | { 435 | const unsigned matrixRowIndex = RecoveryMatrix.Pivots[col_i]; 436 | uint8_t* recovery = Window.RecoveryData[matrixRowIndex].Data; 437 | const uint8_t y = RecoveryMatrix.Matrix.Get(matrixRowIndex, col_i); 438 | FECAL_DEBUG_ASSERT(y != 0); 439 | const unsigned originalColumn = RecoveryMatrix.Columns[col_i].Column; 440 | const unsigned originalBytes = Window.GetColumnBytes(originalColumn); 441 | 442 | gf256_div_mem(recovery, recovery, y, originalBytes); 443 | 444 | Window.OriginalData[originalColumn].Data = recovery; 445 | 446 | // Write recovered packet data 447 | RecoveredData[col_i].Data = recovery; 448 | RecoveredData[col_i].Bytes = originalBytes; 449 | RecoveredData[col_i].Index = originalColumn; 450 | 451 | // Eliminate from all other pivot rows above it: 452 | for (unsigned col_j = 0; col_j < (unsigned)col_i; ++col_j) 453 | { 454 | unsigned pivot_j = RecoveryMatrix.Pivots[col_j]; 455 | const uint8_t x = RecoveryMatrix.Matrix.Get(pivot_j, col_i); 456 | 457 | if (x == 0) 458 | continue; 459 | 460 | gf256_muladd_mem(Window.RecoveryData[pivot_j].Data, x, recovery, originalBytes); 461 | } 462 | } 463 | 464 | return Fecal_Success; 465 | } 466 | 467 | 468 | //------------------------------------------------------------------------------ 469 | // RecoveryMatrixState 470 | 471 | void RecoveryMatrixState::PopulateColumns(const unsigned columns) 472 | { 473 | Columns.resize(columns); 474 | 475 | unsigned nextSearchColumn = 0; 476 | for (unsigned matrixColumn = 0; matrixColumn < columns; ++matrixColumn) 477 | { 478 | unsigned lostColumn = Window->FindNextLostElement(nextSearchColumn); 479 | if (lostColumn >= Window->InputCount) 480 | { 481 | FECAL_DEBUG_BREAK; // Should never happen 482 | break; 483 | } 484 | nextSearchColumn = lostColumn + 1; 485 | 486 | ColumnInfo& columnInfo = Columns[matrixColumn]; 487 | columnInfo.Column = lostColumn; 488 | columnInfo.CX = GetColumnValue(lostColumn); 489 | 490 | Window->OriginalData[lostColumn].RecoveryMatrixColumn = matrixColumn; 491 | } 492 | } 493 | 494 | bool RecoveryMatrixState::GenerateMatrix() 495 | { 496 | const unsigned input_count = Window->InputCount; 497 | const unsigned columns = input_count - Window->OriginalGotCount; 498 | const unsigned rows = static_cast(Window->RecoveryData.size()); 499 | FECAL_DEBUG_ASSERT(rows >= columns); 500 | 501 | // If column count changed: 502 | if (columns != (unsigned)Columns.size()) 503 | { 504 | PopulateColumns(columns); 505 | 506 | // Reset everything 507 | Pivots.clear(); 508 | GEResumePivot = 0; 509 | FilledRows = 0; 510 | 511 | if (!Matrix.Initialize(rows, columns)) 512 | return false; 513 | } 514 | else 515 | { 516 | // Otherwise we just added rows 517 | FECAL_DEBUG_ASSERT(FilledRows < rows); 518 | if (!Matrix.Resize(rows, columns)) 519 | return false; 520 | } 521 | 522 | const unsigned stride = Matrix.AllocatedColumns; 523 | uint8_t* rowData = Matrix.Data + FilledRows * stride; 524 | 525 | // For each row to fill: 526 | for (unsigned ii = FilledRows; ii < rows; ++ii, rowData += stride) 527 | { 528 | const unsigned row = Window->RecoveryData[ii].Row; 529 | 530 | // Calculate row multiplier RX 531 | const uint8_t RX = GetRowValue(row); 532 | 533 | // Fill columns from left for new rows: 534 | for (unsigned j = 0; j < columns; ++j) 535 | { 536 | const unsigned column = Columns[j].Column; 537 | 538 | // Generate opcode and parameters 539 | const uint8_t CX = Columns[j].CX; 540 | const uint8_t CX2 = gf256_sqr(CX); 541 | const unsigned lane = column % kColumnLaneCount; 542 | const unsigned opcode = GetRowOpcode(lane, row); 543 | 544 | unsigned value = opcode & 1; 545 | if (opcode & 2) 546 | value ^= CX; 547 | if (opcode & 4) 548 | value ^= CX2; 549 | if (opcode & 8) 550 | value ^= RX; 551 | if (opcode & 16) 552 | value ^= gf256_mul(CX, RX); 553 | if (opcode & 32) 554 | value ^= gf256_mul(CX2, RX); 555 | rowData[j] = (uint8_t)value; 556 | } 557 | 558 | PCGRandom prng; 559 | prng.Seed(row, input_count); 560 | 561 | const unsigned pairCount = (input_count + kPairAddRate - 1) / kPairAddRate; 562 | 563 | for (unsigned k = 0; k < pairCount; ++k) 564 | { 565 | const unsigned element1 = prng.Next() % input_count; 566 | if (!Window->OriginalData[element1].Data) 567 | { 568 | const unsigned matrixColumn = Window->OriginalData[element1].RecoveryMatrixColumn; 569 | rowData[matrixColumn] ^= 1; 570 | } 571 | 572 | const unsigned elementRX = prng.Next() % input_count; 573 | if (!Window->OriginalData[elementRX].Data) 574 | { 575 | const unsigned matrixColumn = Window->OriginalData[elementRX].RecoveryMatrixColumn; 576 | rowData[matrixColumn] ^= RX; 577 | } 578 | } // for each pair of random columns 579 | } // for each recovery row 580 | 581 | // Fill in revealed column pivots with their own value 582 | Pivots.resize(rows); 583 | for (unsigned i = FilledRows; i < rows; ++i) 584 | Pivots[i] = i; 585 | 586 | // If we have already performed some GE, then we need to eliminate new 587 | // row data and we need to carry on elimination for new columns 588 | if (GEResumePivot > 0) 589 | ResumeGE(FilledRows, rows); 590 | 591 | FilledRows = rows; 592 | 593 | return true; 594 | } 595 | 596 | void RecoveryMatrixState::ResumeGE(const unsigned oldRows, const unsigned rows) 597 | { 598 | // If we did not add any new rows: 599 | if (oldRows >= rows) 600 | { 601 | FECAL_DEBUG_ASSERT(oldRows == rows); 602 | return; 603 | } 604 | 605 | const unsigned stride = Matrix.AllocatedColumns; 606 | const unsigned columns = Matrix.Columns; 607 | 608 | // For each pivot we have determined already: 609 | for (unsigned pivot_i = 0; pivot_i < GEResumePivot; ++pivot_i) 610 | { 611 | // Get the row for that pivot 612 | const unsigned matrixRowIndex_i = Pivots[pivot_i]; 613 | const uint8_t* ge_row = Matrix.Data + stride * matrixRowIndex_i; 614 | const uint8_t val_i = ge_row[pivot_i]; 615 | FECAL_DEBUG_ASSERT(val_i != 0); 616 | 617 | uint8_t* rem_row = Matrix.Data + stride * oldRows; 618 | 619 | // For each new row that was added: 620 | for (unsigned newRowIndex = oldRows; newRowIndex < rows; ++newRowIndex, rem_row += stride) 621 | { 622 | EliminateRow(ge_row, rem_row, pivot_i, columns, val_i); 623 | 624 | FECAL_DEBUG_ASSERT(Pivots[newRowIndex] == newRowIndex); 625 | } 626 | } 627 | } 628 | 629 | bool RecoveryMatrixState::GaussianElimination() 630 | { 631 | // Attempt to solve as much of the matrix as possible without using a pivots array 632 | // since that requires extra memory operations. Since the matrix will be dense we 633 | // have a good chance of going pretty far before we hit a zero 634 | 635 | if (GEResumePivot > 0) 636 | return PivotedGaussianElimination(GEResumePivot); 637 | 638 | const unsigned columns = Matrix.Columns; 639 | const unsigned stride = Matrix.AllocatedColumns; 640 | const unsigned rows = Matrix.Rows; 641 | uint8_t* ge_row = Matrix.Data; 642 | 643 | for (unsigned pivot_i = 0; pivot_i < columns; ++pivot_i, ge_row += stride) 644 | { 645 | const uint8_t val_i = ge_row[pivot_i]; 646 | if (val_i == 0) 647 | return PivotedGaussianElimination(pivot_i); 648 | 649 | RecoveryInfo& rowInfo = Window->RecoveryData[pivot_i]; 650 | rowInfo.UsedForSolution = true; 651 | 652 | uint8_t* rem_row = ge_row; 653 | 654 | // For each remaining row: 655 | for (unsigned pivot_j = pivot_i + 1; pivot_j < rows; ++pivot_j) 656 | { 657 | rem_row += stride; 658 | EliminateRow(ge_row, rem_row, pivot_i, columns, val_i); 659 | } 660 | } 661 | 662 | return true; 663 | } 664 | 665 | bool RecoveryMatrixState::PivotedGaussianElimination(unsigned pivot_i) 666 | { 667 | const unsigned columns = Matrix.Columns; 668 | const unsigned stride = Matrix.AllocatedColumns; 669 | const unsigned rows = Matrix.Rows; 670 | 671 | // Resume from next row down... 672 | // Note: This is designed to be called by the non-pivoted version 673 | unsigned pivot_j = pivot_i + 1; 674 | goto UsePivoting; 675 | 676 | // For each pivot to determine: 677 | for (; pivot_i < columns; ++pivot_i) 678 | { 679 | pivot_j = pivot_i; 680 | UsePivoting: 681 | for (; pivot_j < rows; ++pivot_j) 682 | { 683 | const unsigned matrixRowIndex_j = Pivots[pivot_j]; 684 | const uint8_t* ge_row = Matrix.Data + stride * matrixRowIndex_j; 685 | const uint8_t val_i = ge_row[pivot_i]; 686 | if (val_i == 0) 687 | continue; 688 | 689 | // Swap out the pivot index for this one 690 | if (pivot_i != pivot_j) 691 | { 692 | const unsigned temp = Pivots[pivot_i]; 693 | Pivots[pivot_i] = Pivots[pivot_j]; 694 | Pivots[pivot_j] = temp; 695 | } 696 | 697 | RecoveryInfo& rowInfo = Window->RecoveryData[matrixRowIndex_j]; 698 | rowInfo.UsedForSolution = true; 699 | 700 | // Skip eliminating extra rows in the case that we just solved the matrix 701 | if (pivot_i >= columns - 1) 702 | return true; 703 | 704 | // For each remaining row: 705 | for (unsigned pivot_k = pivot_i + 1; pivot_k < rows; ++pivot_k) 706 | { 707 | const unsigned matrixRowIndex_k = Pivots[pivot_k]; 708 | uint8_t* rem_row = Matrix.Data + stride * matrixRowIndex_k; 709 | 710 | EliminateRow(ge_row, rem_row, pivot_i, columns, val_i); 711 | } 712 | 713 | goto NextPivot; 714 | } 715 | 716 | // Remember where we failed last time 717 | GEResumePivot = pivot_i; 718 | 719 | return false; 720 | NextPivot:; 721 | } 722 | 723 | return true; 724 | } 725 | 726 | 727 | } // namespace fecal 728 | -------------------------------------------------------------------------------- /FecalDecoder.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017 Christopher A. Taylor. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of Fecal nor the names of its contributors may be 13 | used to endorse or promote products derived from this software without 14 | specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #pragma once 30 | 31 | /* 32 | Siamese Decoder Data Recovery Process 33 | 34 | (1) Collect data: 35 | 36 | This collects original data packets and recovery packets, until a solution 37 | may be possible (recovery is possible about 99.9% of the time). 38 | 39 | (2) Generate recovery matrix: 40 | 41 | The recovery matrix is a square GF(2^^8) where the width of the matrix is 42 | the number of losses we are trying to recover. The recovery matrix elements 43 | are sampled from a larger matrix that is implicit (not actually constructed), 44 | where the columns correspond to original data and the rows correspond to 45 | recovery packets. 46 | 47 | (3) Solve recovery matrix: 48 | 49 | We experimentally perform Gaussian elimination on the matrix to put it in 50 | upper triangular form. If this is successful, then recovery can proceed. 51 | Note that we have done no operations on the original data yet, so this step 52 | is fairly inexpensive. 53 | 54 | To speed up this step with the density of the matrix in mind, we attempt 55 | GE without pivoting first and then switch to a pivoting algorithm as zeroes 56 | are encountered. 57 | 58 | If this fails we attempt to build a larger recovery matrix involving more 59 | received recovery packets, which may also involve more lost original data. 60 | If recovery is not possible with the received data, then we wait for more. 61 | 62 | (4) Eliminate received data: 63 | 64 | This step involves most of the received data and takes the most time. 65 | Its complexity is slightly less than that of the encoder. As a result, 66 | and improvement in encoder performance will translate to a faster decoder. 67 | 68 | For each recovery packet involved in solution we need to eliminate original 69 | data that is outside of the recovery matrix, so that the recovery matrix can 70 | be applied to recover the lost data. 71 | 72 | We construct the sums of received original data for each row as in the encoder, 73 | and roll the sums up as the left side is eliminated from later recovery packets. 74 | The sums are reused on multiple rows to eliminate data faster. 75 | 76 | (5) Recover original data: 77 | 78 | The same operations performed to arrive at the GE solution earlier are now 79 | performed on the recovery data packets. We then multiply by the upper 80 | triangle of the recovery matrix in back substitution order. Finally the 81 | diagonal is eliminated by dividing each recovery packet by the diagonal. 82 | The recovery packets now contain original data. 83 | 84 | The original data are prefixed by a length field so that the original data 85 | length can be recovered, since we support variable length input data. 86 | */ 87 | 88 | #include "FecalCommon.h" 89 | 90 | #include 91 | 92 | namespace fecal { 93 | 94 | 95 | //------------------------------------------------------------------------------ 96 | // DecoderAppDataWindow 97 | 98 | struct RecoveryInfo 99 | { 100 | uint8_t* Data = nullptr; 101 | unsigned Row = 0; 102 | bool UsedForSolution = false; 103 | }; 104 | 105 | struct OriginalInfo 106 | { 107 | uint8_t* Data = nullptr; 108 | unsigned RecoveryMatrixColumn = 0; 109 | }; 110 | 111 | // Keep this number of columns in each subwindow 112 | static const unsigned kSubwindowSize = kColumnLaneCount * 8; 113 | 114 | struct Subwindow 115 | { 116 | CustomBitSet Got; 117 | unsigned GotCount = 0; 118 | }; 119 | 120 | // Decoder-specialized app data window 121 | struct DecoderAppDataWindow : AppDataWindow 122 | { 123 | // Received original data 124 | std::vector OriginalData; 125 | 126 | // Received recovery data 127 | std::vector RecoveryData; 128 | 129 | // Track which entries are filled in 130 | unsigned SubwindowCount = 0; 131 | std::vector Subwindows; 132 | 133 | // Number of unique originals received so far 134 | unsigned OriginalGotCount = 0; 135 | 136 | // Check if row has been seen yet 137 | std::unordered_set RowSet; 138 | 139 | 140 | // Allocate originals 141 | void AllocateOriginals(); 142 | 143 | // Add symbol data 144 | // Returns false if we already have the data 145 | bool AddRecovery(uint8_t* data, unsigned row); 146 | 147 | // Add original data 148 | // Returns false if we already have the data 149 | bool AddOriginal(unsigned column, uint8_t* data); 150 | 151 | // Mark that we got an element 152 | void MarkGotElement(unsigned element); 153 | 154 | // Returns Count if no more elements were lost 155 | // Otherwise returns the next element that was lost at or after the given one 156 | unsigned FindNextLostElement(unsigned elementStart); 157 | }; 158 | 159 | 160 | //------------------------------------------------------------------------------ 161 | // RecoveryMatrixState 162 | 163 | /* 164 | We maintain a GF(2^^8) byte matrix that can grow a little in rows and 165 | columns to reattempt solving with a larger matrix that includes more 166 | lost columns and received recovery data, in the case that recovery fails. 167 | It is expected that recovery fails around 1% of the time. 168 | 169 | The matrix is also a bit oversized to allow us to prefetch the next row, 170 | and to align memory addresses with cache line boundaries for speed. 171 | */ 172 | 173 | class RecoveryMatrixState 174 | { 175 | public: 176 | DecoderAppDataWindow* Window = nullptr; 177 | 178 | struct ColumnInfo 179 | { 180 | // Column number for the missing data 181 | unsigned Column = 0; 182 | 183 | // Column multiplier 184 | uint8_t CX = 0; 185 | }; 186 | std::vector Columns; 187 | 188 | // Recovery matrix 189 | GrowingAlignedByteMatrix Matrix; 190 | 191 | // Array of pivots used for when rows need to be swapped 192 | // This allows us to swap indices rather than swap whole rows to reduce memory accesses 193 | std::vector Pivots; 194 | 195 | // Pivot to resume at when we get more data 196 | unsigned GEResumePivot = 0; 197 | 198 | // Number of matrix rows we already filled 199 | unsigned FilledRows = 0; 200 | 201 | 202 | // Populate Rows and Columns arrays 203 | void PopulateColumns(const unsigned columns); 204 | 205 | // Generate the matrix 206 | bool GenerateMatrix(); 207 | 208 | // Attempt to put the matrix in upper-triangular form 209 | bool GaussianElimination(); 210 | 211 | protected: 212 | // Resume GE from a previous failure point 213 | void ResumeGE(const unsigned oldRows, const unsigned rows); 214 | 215 | // Run GE with pivots after a column is found to be zero 216 | bool PivotedGaussianElimination(unsigned pivot_i); 217 | 218 | // rem_row[] += ge_row[] * y 219 | GF256_FORCE_INLINE void MulAddRows( 220 | const uint8_t* ge_row, uint8_t* rem_row, unsigned columnStart, 221 | const unsigned columnEnd, uint8_t y) 222 | { 223 | #ifdef GF256_ALIGNED_ACCESSES 224 | // Do unaligned operations first 225 | // Note: Each row starts at an aliged address 226 | unsigned unalignedEnd = NextAlignedOffset(columnStart); 227 | if (unalignedEnd > columnEnd) 228 | unalignedEnd = columnEnd; 229 | for (; columnStart < unalignedEnd; ++columnStart) 230 | rem_row[columnStart] ^= gf256_mul(ge_row[columnStart], y); 231 | if (columnStart >= columnEnd) 232 | return; 233 | #endif 234 | 235 | gf256_muladd_mem(rem_row + columnStart, y, ge_row + columnStart, columnEnd - columnStart); 236 | } 237 | 238 | // Internal function common to both GE functions, used to eliminate a row of data 239 | GF256_FORCE_INLINE void EliminateRow( 240 | const uint8_t* ge_row, uint8_t* rem_row, const unsigned pivot_i, 241 | const unsigned columnEnd, const uint8_t val_i) 242 | { 243 | // Skip if the element j,i is already zero 244 | const uint8_t val_j = rem_row[pivot_i]; 245 | if (val_j == 0) 246 | return; 247 | 248 | // Calculate element j,i elimination constant based on pivot row value 249 | const uint8_t y = gf256_div(val_j, val_i); 250 | 251 | // Remember what value was used to zero element j,i 252 | rem_row[pivot_i] = y; 253 | 254 | MulAddRows(ge_row, rem_row, pivot_i + 1, columnEnd, y); 255 | } 256 | }; 257 | 258 | 259 | //------------------------------------------------------------------------------ 260 | // Decoder 261 | 262 | class Decoder : public ICodec 263 | { 264 | public: 265 | virtual ~Decoder() {} 266 | 267 | // Initialize the decoder 268 | FecalResult Initialize(unsigned input_count, uint64_t total_bytes); 269 | 270 | // Add original data 271 | FecalResult AddOriginal(const FecalSymbol& symbol); 272 | 273 | // Add recovery data 274 | FecalResult AddRecovery(const FecalSymbol& symbol); 275 | 276 | // Try to decode 277 | FecalResult Decode(RecoveredSymbols& symbols); 278 | 279 | // Get original data 280 | FecalResult GetOriginal(unsigned column, FecalSymbol& symbol); 281 | 282 | protected: 283 | // Window of original data 284 | DecoderAppDataWindow Window; 285 | 286 | // Matrix containing recovery packets that may admit a solution 287 | RecoveryMatrixState RecoveryMatrix; 288 | 289 | // Has recovery been attempted with the latest inputs? 290 | bool RecoveryAttempted = false; 291 | 292 | // Recovered data array returned to application 293 | std::vector RecoveredData; 294 | 295 | // Sums for each lane 296 | AlignedDataBuffer LaneSums[kColumnLaneCount][kColumnSumCount]; 297 | 298 | // Output workspace 299 | AlignedDataBuffer ProductWorkspace; 300 | 301 | 302 | // Recovery step: Eliminate original data that was successfully received 303 | FecalResult EliminateOriginalData(); 304 | 305 | // Get lane sum for original data we have 306 | const uint8_t* GetLaneSum(unsigned laneIndex, unsigned sumIndex); 307 | 308 | // Recovery step: Multiply lower triangle following solution order 309 | void MultiplyLowerTriangle(); 310 | 311 | // Recovery step: Back-substitute upper triangle to reveal original data 312 | FecalResult BackSubstitution(); 313 | }; 314 | 315 | 316 | } // namespace fecal 317 | -------------------------------------------------------------------------------- /FecalEncoder.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017 Christopher A. Taylor. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of Fecal nor the names of its contributors may be 13 | used to endorse or promote products derived from this software without 14 | specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include "FecalEncoder.h" 30 | 31 | namespace fecal { 32 | 33 | 34 | //------------------------------------------------------------------------------ 35 | // EncoderAppDataWindow 36 | 37 | void EncoderAppDataWindow::AllocateOriginals() 38 | { 39 | OriginalData.resize(InputCount); 40 | } 41 | 42 | void EncoderAppDataWindow::SetEncoderInput(void* const * const input_data) 43 | { 44 | FECAL_DEBUG_ASSERT(InputCount > 0); // SetParameters() must be called first 45 | 46 | for (unsigned ii = 0, count = InputCount; ii < count; ++ii) 47 | OriginalData[ii] = reinterpret_cast(input_data[ii]); 48 | } 49 | 50 | 51 | //------------------------------------------------------------------------------ 52 | // Encoder 53 | 54 | // This optimization speeds up encoding by about 5% 55 | #ifdef FECAL_ADD2_OPT 56 | #define FECAL_ADD2_ENC_SETUP_OPT 57 | #endif 58 | 59 | FecalResult Encoder::Initialize(unsigned input_count, void* const * const input_data, uint64_t total_bytes) 60 | { 61 | // Validate input and set parameters 62 | if (!Window.SetParameters(input_count, total_bytes)) 63 | { 64 | FECAL_DEBUG_BREAK; // Invalid input 65 | return Fecal_InvalidInput; 66 | } 67 | Window.AllocateOriginals(); 68 | Window.SetEncoderInput(input_data); 69 | 70 | const unsigned symbolBytes = Window.SymbolBytes; 71 | 72 | // Allocate lane sums 73 | for (unsigned laneIndex = 0; laneIndex < kColumnLaneCount; ++laneIndex) 74 | { 75 | for (unsigned sumIndex = 0; sumIndex < kColumnSumCount; ++sumIndex) 76 | { 77 | if (!LaneSums[laneIndex][sumIndex].Allocate(symbolBytes)) 78 | return Fecal_OutOfMemory; 79 | 80 | // Clear memory in each lane sum 81 | memset(LaneSums[laneIndex][sumIndex].Data, 0, symbolBytes); 82 | } 83 | } 84 | 85 | // Allocate workspace 86 | if (!ProductWorkspace.Allocate(symbolBytes)) 87 | return Fecal_OutOfMemory; 88 | 89 | // TBD: Unroll first set of 8 lanes to avoid the extra memset above? 90 | // TBD: Use GetLaneSum() approach do to minimal work for small output? 91 | 92 | #ifdef FECAL_ADD2_ENC_SETUP_OPT 93 | for (unsigned laneIndex = 0; laneIndex < kColumnLaneCount; ++laneIndex) 94 | { 95 | // Sum[0] += Data 96 | XORSummer sum; 97 | sum.Initialize(LaneSums[laneIndex][0].Data, symbolBytes); 98 | 99 | const unsigned columnEnd = input_count - 1; 100 | 101 | for (unsigned column = laneIndex; column < columnEnd; column += kColumnLaneCount) 102 | { 103 | const uint8_t* columnData = reinterpret_cast(input_data[column]); 104 | sum.Add(columnData); 105 | } 106 | 107 | if ((columnEnd % kColumnLaneCount) == laneIndex) 108 | { 109 | const uint8_t* columnData = reinterpret_cast(input_data[columnEnd]); 110 | gf256_add_mem(LaneSums[laneIndex][0].Data, columnData, Window.FinalBytes); 111 | } 112 | 113 | sum.Finalize(); 114 | } 115 | #endif 116 | 117 | // For each input column: 118 | for (unsigned column = 0; column < input_count; ++column) 119 | { 120 | const uint8_t* columnData = reinterpret_cast(input_data[column]); 121 | const unsigned columnBytes = Window.GetColumnBytes(column); 122 | const unsigned laneIndex = column % kColumnLaneCount; 123 | const uint8_t CX = GetColumnValue(column); 124 | const uint8_t CX2 = gf256_sqr(CX); 125 | 126 | #ifndef FECAL_ADD2_ENC_SETUP_OPT 127 | // Sum[0] += Data 128 | gf256_add_mem(LaneSums[laneIndex][0].Data, columnData, columnBytes); 129 | #endif 130 | 131 | // Sum[1] += CX * Data 132 | gf256_muladd_mem(LaneSums[laneIndex][1].Data, CX, columnData, columnBytes); 133 | 134 | // Sum[2] += CX^2 * Data 135 | gf256_muladd_mem(LaneSums[laneIndex][2].Data, CX2, columnData, columnBytes); 136 | } 137 | 138 | return Fecal_Success; 139 | 140 | static_assert(kColumnSumCount == 3, "Update this"); 141 | } 142 | 143 | FecalResult Encoder::Encode(FecalSymbol& symbol) 144 | { 145 | // If encoder is not initialized: 146 | if (!ProductWorkspace.Data) 147 | return Fecal_InvalidInput; 148 | 149 | const unsigned symbolBytes = Window.SymbolBytes; 150 | if (symbol.Bytes != symbolBytes) 151 | return Fecal_InvalidInput; 152 | 153 | // Load parameters 154 | const unsigned count = Window.InputCount; 155 | uint8_t* outputSum = reinterpret_cast( symbol.Data ); 156 | uint8_t* outputProduct = ProductWorkspace.Data; 157 | 158 | const unsigned row = symbol.Index; 159 | 160 | // Initialize LDPC 161 | PCGRandom prng; 162 | prng.Seed(row, count); 163 | 164 | // Accumulate original data into the two sums 165 | const unsigned pairCount = (Window.InputCount + kPairAddRate - 1) / kPairAddRate; 166 | // Unrolled first loop: 167 | { 168 | const unsigned element1 = prng.Next() % count; 169 | const uint8_t* original1 = Window.OriginalData[element1]; 170 | 171 | const unsigned elementRX = prng.Next() % count; 172 | const uint8_t* originalRX = Window.OriginalData[elementRX]; 173 | 174 | // Sum = Original[element1] 175 | if (Window.IsFinalColumn(element1)) 176 | { 177 | memcpy(outputSum, original1, Window.FinalBytes); 178 | memset(outputSum + Window.FinalBytes, 0, symbolBytes - Window.FinalBytes); 179 | } 180 | else 181 | memcpy(outputSum, original1, symbolBytes); 182 | 183 | // Product = Original[elementRX] 184 | if (Window.IsFinalColumn(elementRX)) 185 | { 186 | memcpy(outputProduct, originalRX, Window.FinalBytes); 187 | memset(outputProduct + Window.FinalBytes, 0, symbolBytes - Window.FinalBytes); 188 | } 189 | else 190 | memcpy(outputProduct, originalRX, symbolBytes); 191 | } 192 | 193 | XORSummer sum; 194 | sum.Initialize(outputSum, symbolBytes); 195 | XORSummer prod; 196 | prod.Initialize(outputProduct, symbolBytes); 197 | 198 | for (unsigned i = 1; i < pairCount; ++i) 199 | { 200 | const unsigned element1 = prng.Next() % count; 201 | const uint8_t* original1 = Window.OriginalData[element1]; 202 | 203 | const unsigned elementRX = prng.Next() % count; 204 | const uint8_t* originalRX = Window.OriginalData[elementRX]; 205 | 206 | // Sum += Original[element1] 207 | if (Window.IsFinalColumn(element1)) 208 | gf256_add_mem(outputSum, original1, Window.FinalBytes); 209 | else 210 | sum.Add(original1); 211 | 212 | // Product += Original[elementRX] 213 | if (Window.IsFinalColumn(elementRX)) 214 | gf256_add_mem(outputProduct, originalRX, Window.FinalBytes); 215 | else 216 | prod.Add(originalRX); 217 | } 218 | 219 | // For each lane: 220 | for (unsigned laneIndex = 0; laneIndex < kColumnLaneCount; ++laneIndex) 221 | { 222 | // Compute the operations to run for this lane and row 223 | unsigned opcode = GetRowOpcode(laneIndex, row); 224 | 225 | // Sum += Random Lanes 226 | unsigned mask = 1; 227 | for (unsigned sumIndex = 0; sumIndex < kColumnSumCount; ++sumIndex, mask <<= 1) 228 | if (opcode & mask) 229 | sum.Add(LaneSums[laneIndex][sumIndex].Data); 230 | 231 | // Product += Random Lanes 232 | for (unsigned sumIndex = 0; sumIndex < kColumnSumCount; ++sumIndex, mask <<= 1) 233 | if (opcode & mask) 234 | prod.Add(LaneSums[laneIndex][sumIndex].Data); 235 | } 236 | 237 | sum.Finalize(); 238 | prod.Finalize(); 239 | 240 | // Sum += RX * Product 241 | gf256_muladd_mem(outputSum, GetRowValue(row), outputProduct, symbolBytes); 242 | 243 | return Fecal_Success; 244 | } 245 | 246 | 247 | } // namespace fecal 248 | -------------------------------------------------------------------------------- /FecalEncoder.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017 Christopher A. Taylor. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of Fecal nor the names of its contributors may be 13 | used to endorse or promote products derived from this software without 14 | specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #pragma once 30 | 31 | /* 32 | Encoder 33 | 34 | The encoder builds up sums of input data on Initialize(). 35 | 36 | When Encode() is called it will combine these sums in a deterministic way. 37 | 38 | Encode returns a pointer to the Sum workspace. 39 | */ 40 | 41 | #include "FecalCommon.h" 42 | 43 | namespace fecal { 44 | 45 | 46 | //------------------------------------------------------------------------------ 47 | // EncoderAppDataWindow 48 | 49 | // Encoder-specialized app data window 50 | struct EncoderAppDataWindow : AppDataWindow 51 | { 52 | // Original data 53 | std::vector OriginalData; 54 | 55 | 56 | // Set encoder input 57 | // Returns false if input is invalid 58 | void SetEncoderInput(void* const * const input_data); 59 | 60 | // Allocate originals 61 | void AllocateOriginals(); 62 | }; 63 | 64 | 65 | //------------------------------------------------------------------------------ 66 | // Encoder 67 | 68 | class Encoder : public ICodec 69 | { 70 | public: 71 | virtual ~Encoder() {} 72 | 73 | // Initialize the encoder 74 | FecalResult Initialize(unsigned input_count, void* const * const input_data, uint64_t total_bytes); 75 | 76 | // Generate the next recovery packet for the data 77 | FecalResult Encode(FecalSymbol& symbol); 78 | 79 | protected: 80 | // Application data set 81 | EncoderAppDataWindow Window; 82 | 83 | // Sums for each lane 84 | AlignedDataBuffer LaneSums[kColumnLaneCount][kColumnSumCount]; 85 | 86 | // Output workspace 87 | AlignedDataBuffer ProductWorkspace; 88 | }; 89 | 90 | 91 | } // namespace fecal 92 | -------------------------------------------------------------------------------- /License.md: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Christopher A. Taylor 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FEC-AL 2 | ## Forward Error Correction at the Application Layer in C 3 | 4 | FEC-AL is a simple, portable, fast library for Forward Error Correction. 5 | From a block of equally sized original data pieces, it generates recovery 6 | symbols that can be used to recover lost original data. 7 | 8 | * It requires that data pieces are all a fixed size. 9 | * It can take as input an unlimited number of input blocks. 10 | * It can generate an unlimited stream of recovery symbols used for decoding. 11 | * It has a small (about 1%) chance of failing to recover, so it is not an MDS code. 12 | 13 | The main limitation of the software is that it gets slower as O(N^^2) in 14 | the number of inputs or outputs. In trade, the encoder overhead is unusually 15 | low, and the decoder is extremely efficient when recovering from a small number 16 | of losses. It may be the best choice based on practical evaluation. 17 | 18 | FEC-AL is a block codec derived from the [Siamese](https://github.com/catid/siamese) streaming FEC library. 19 | 20 | 21 | #### Why fecal matters: 22 | 23 | It supports an unlimited number of inputs and outputs, similar to a Fountain Code, 24 | but it is designed as a Convolutional Code. This means that it does not perform 25 | well with a large number of losses. It is faster than existing erasure correction 26 | code (ECC) software when the loss count is expected to be small. 27 | 28 | 29 | #### Encoder API: 30 | 31 | ``` 32 | #include "fecal.h" 33 | ``` 34 | 35 | For full documentation please read `fecal.h`. 36 | 37 | + `fecal_init()` : Initialize library. 38 | + `fecal_encoder_create()`: Create encoder object. 39 | + `fecal_encode()`: Encode a recovery symbol. 40 | + `fecal_free()`: Free encoder object. 41 | 42 | 43 | #### Decoder API: 44 | 45 | ``` 46 | #include "fecal.h" 47 | ``` 48 | 49 | For full documentation please read `fecal.h`. 50 | 51 | + `fecal_init()` : Initialize library. 52 | + `fecal_decoder_create()`: Create a decoder object. 53 | + `fecal_decoder_add_original()`: Add original data to the decoder. 54 | + `fecal_decoder_add_recovery()`: Add recovery data to the decoder. 55 | + `fecal_decode()`: Attempt to decode with what has been added so far, returning recovered data. 56 | + `fecal_decoder_get()`: Read back original data after decode. 57 | + `fecal_free()`: Free decoder object. 58 | 59 | 60 | #### Benchmarks: 61 | 62 | For random losses in 2 MB of data split into 1000 equal-sized 2000 byte pieces: 63 | 64 | ``` 65 | Encoder(2 MB in 1000 pieces, 1 losses): Input=6968.64 MB/s, Output=6.96864 MB/s, (Encode create: 7225.69 MB/s) 66 | Decoder(2 MB in 1000 pieces, 1 losses): Input=9083.06 MB/s, Output=9.08307 MB/s, (Overhead = 0 pieces) 67 | 68 | Encoder(2 MB in 1000 pieces, 2 losses): Input=7181.33 MB/s, Output=14.5063 MB/s, (Encode create: 7663.72 MB/s) 69 | Decoder(2 MB in 1000 pieces, 2 losses): Input=7365.13 MB/s, Output=14.7303 MB/s, (Overhead = 0.02 pieces) 70 | 71 | Encoder(2 MB in 1000 pieces, 3 losses): Input=6805.5 MB/s, Output=20.4165 MB/s, (Encode create: 7526.72 MB/s) 72 | Decoder(2 MB in 1000 pieces, 3 losses): Input=6312.93 MB/s, Output=18.9388 MB/s, (Overhead = 0 pieces) 73 | 74 | Encoder(2 MB in 1000 pieces, 4 losses): Input=6751.28 MB/s, Output=27.0726 MB/s, (Encode create: 7645.84 MB/s) 75 | Decoder(2 MB in 1000 pieces, 4 losses): Input=6387.12 MB/s, Output=25.5485 MB/s, (Overhead = 0.0100002 pieces) 76 | 77 | Encoder(2 MB in 1000 pieces, 5 losses): Input=6502.16 MB/s, Output=32.5108 MB/s, (Encode create: 7645.55 MB/s) 78 | Decoder(2 MB in 1000 pieces, 5 losses): Input=5982.11 MB/s, Output=29.9106 MB/s, (Overhead = 0 pieces) 79 | 80 | Encoder(2 MB in 1000 pieces, 6 losses): Input=6014.13 MB/s, Output=36.3855 MB/s, (Encode create: 7238.51 MB/s) 81 | Decoder(2 MB in 1000 pieces, 6 losses): Input=5520.74 MB/s, Output=33.1245 MB/s, (Overhead = 0.0500002 pieces) 82 | 83 | Encoder(2 MB in 1000 pieces, 7 losses): Input=6284.56 MB/s, Output=44.1176 MB/s, (Encode create: 7764.88 MB/s) 84 | Decoder(2 MB in 1000 pieces, 7 losses): Input=5601.61 MB/s, Output=39.2113 MB/s, (Overhead = 0.02 pieces) 85 | 86 | Encoder(2 MB in 1000 pieces, 8 losses): Input=5854.97 MB/s, Output=46.8398 MB/s, (Encode create: 7388.25 MB/s) 87 | Decoder(2 MB in 1000 pieces, 8 losses): Input=5492.54 MB/s, Output=43.9403 MB/s, (Overhead = 0 pieces) 88 | 89 | Encoder(2 MB in 1000 pieces, 9 losses): Input=5843.34 MB/s, Output=52.6485 MB/s, (Encode create: 7645.84 MB/s) 90 | Decoder(2 MB in 1000 pieces, 9 losses): Input=5221.11 MB/s, Output=46.99 MB/s, (Overhead = 0.0100002 pieces) 91 | 92 | Encoder(2 MB in 1000 pieces, 10 losses): Input=5728.53 MB/s, Output=57.3998 MB/s, (Encode create: 7610.06 MB/s) 93 | Decoder(2 MB in 1000 pieces, 10 losses): Input=5172.24 MB/s, Output=51.7224 MB/s, (Overhead = 0.0200005 pieces) 94 | 95 | Encoder(2 MB in 1000 pieces, 11 losses): Input=5590.65 MB/s, Output=61.4972 MB/s, (Encode create: 7667.83 MB/s) 96 | Decoder(2 MB in 1000 pieces, 11 losses): Input=5012.53 MB/s, Output=55.1378 MB/s, (Overhead = 0 pieces) 97 | 98 | Encoder(2 MB in 1000 pieces, 13 losses): Input=5382.13 MB/s, Output=70.0753 MB/s, (Encode create: 7687.28 MB/s) 99 | Decoder(2 MB in 1000 pieces, 13 losses): Input=4790.53 MB/s, Output=62.2769 MB/s, (Overhead = 0.0200005 pieces) 100 | 101 | Encoder(2 MB in 1000 pieces, 15 losses): Input=5065.47 MB/s, Output=76.0327 MB/s, (Encode create: 7556.01 MB/s) 102 | Decoder(2 MB in 1000 pieces, 15 losses): Input=4490.45 MB/s, Output=67.3567 MB/s, (Overhead = 0.0100002 pieces) 103 | 104 | Encoder(2 MB in 1000 pieces, 16 losses): Input=4874.6 MB/s, Output=77.9936 MB/s, (Encode create: 7390.71 MB/s) 105 | Decoder(2 MB in 1000 pieces, 16 losses): Input=4279.45 MB/s, Output=68.4712 MB/s, (Overhead = 0 pieces) 106 | 107 | Encoder(2 MB in 1000 pieces, 18 losses): Input=4707.99 MB/s, Output=84.7438 MB/s, (Encode create: 7515.69 MB/s) 108 | Decoder(2 MB in 1000 pieces, 18 losses): Input=4008.9 MB/s, Output=72.1602 MB/s, (Overhead = 0 pieces) 109 | 110 | Encoder(2 MB in 1000 pieces, 20 losses): Input=4619.15 MB/s, Output=92.4754 MB/s, (Encode create: 7679.31 MB/s) 111 | Decoder(2 MB in 1000 pieces, 20 losses): Input=3858.4 MB/s, Output=77.1679 MB/s, (Overhead = 0.0200005 pieces) 112 | 113 | Encoder(2 MB in 1000 pieces, 25 losses): Input=4176.24 MB/s, Output=104.448 MB/s, (Encode create: 7576.33 MB/s) 114 | Decoder(2 MB in 1000 pieces, 25 losses): Input=3374.22 MB/s, Output=84.3554 MB/s, (Overhead = 0.0100002 pieces) 115 | 116 | Encoder(2 MB in 1000 pieces, 30 losses): Input=3731.27 MB/s, Output=111.976 MB/s, (Encode create: 7418.12 MB/s) 117 | Decoder(2 MB in 1000 pieces, 30 losses): Input=2950.2 MB/s, Output=88.506 MB/s, (Overhead = 0.0100002 pieces) 118 | 119 | Encoder(2 MB in 1000 pieces, 35 losses): Input=3542.46 MB/s, Output=124.021 MB/s, (Encode create: 7610.64 MB/s) 120 | Decoder(2 MB in 1000 pieces, 35 losses): Input=2702.99 MB/s, Output=94.6048 MB/s, (Overhead = 0.00999832 pieces) 121 | 122 | Encoder(2 MB in 1000 pieces, 40 losses): Input=3365.53 MB/s, Output=134.621 MB/s, (Encode create: 7846.52 MB/s) 123 | Decoder(2 MB in 1000 pieces, 40 losses): Input=2410.42 MB/s, Output=96.4169 MB/s, (Overhead = 0 pieces) 124 | 125 | Encoder(2 MB in 1000 pieces, 50 losses): Input=2658.13 MB/s, Output=132.933 MB/s, (Encode create: 6889.42 MB/s) 126 | Decoder(2 MB in 1000 pieces, 50 losses): Input=1917.88 MB/s, Output=95.8938 MB/s, (Overhead = 0.00999832 pieces) 127 | 128 | Encoder(2 MB in 1000 pieces, 60 losses): Input=2573.04 MB/s, Output=154.408 MB/s, (Encode create: 7578.92 MB/s) 129 | Decoder(2 MB in 1000 pieces, 60 losses): Input=1612.62 MB/s, Output=96.757 MB/s, (Overhead = 0.00999832 pieces) 130 | 131 | Encoder(2 MB in 1000 pieces, 70 losses): Input=2141.83 MB/s, Output=149.95 MB/s, (Encode create: 6861.77 MB/s) 132 | Decoder(2 MB in 1000 pieces, 70 losses): Input=1325.65 MB/s, Output=92.7957 MB/s, (Overhead = 0.0100021 pieces) 133 | 134 | Encoder(2 MB in 1000 pieces, 80 losses): Input=2052.65 MB/s, Output=164.212 MB/s, (Encode create: 7454.34 MB/s) 135 | Decoder(2 MB in 1000 pieces, 80 losses): Input=1112 MB/s, Output=88.9601 MB/s, (Overhead = 0 pieces) 136 | 137 | Encoder(2 MB in 1000 pieces, 90 losses): Input=1926.69 MB/s, Output=173.402 MB/s, (Encode create: 7593.01 MB/s) 138 | Decoder(2 MB in 1000 pieces, 90 losses): Input=972.81 MB/s, Output=87.5529 MB/s, (Overhead = 0 pieces) 139 | 140 | Encoder(2 MB in 1000 pieces, 100 losses): Input=1814.67 MB/s, Output=181.467 MB/s, (Encode create: 7866.27 MB/s) 141 | Decoder(2 MB in 1000 pieces, 100 losses): Input=861.668 MB/s, Output=86.1668 MB/s, (Overhead = 0 pieces) 142 | 143 | Encoder(2 MB in 1000 pieces, 110 losses): Input=1617.09 MB/s, Output=177.88 MB/s, (Encode create: 7514.28 MB/s) 144 | Decoder(2 MB in 1000 pieces, 110 losses): Input=740.198 MB/s, Output=81.4218 MB/s, (Overhead = 0 pieces) 145 | 146 | Encoder(2 MB in 1000 pieces, 120 losses): Input=1485.21 MB/s, Output=178.225 MB/s, (Encode create: 7274.05 MB/s) 147 | Decoder(2 MB in 1000 pieces, 120 losses): Input=645.417 MB/s, Output=77.4501 MB/s, (Overhead = 0 pieces) 148 | ``` 149 | 150 | 151 | #### Comparisons: 152 | 153 | Comparing with `wh256`, which is [Wirehair](https://github.com/catid/wirehair) using the GF256 library instead of the old library so it runs faster: 154 | 155 | For the same data sizes and about 100 losses: 156 | 157 | ``` 158 | >> wirehair_encode(N = 1000) in 2174.33 usec, 919.825 MB/s after 98.992 avg losses 159 | << wirehair_decode(N = 1000) average overhead = 0.023 blocks, average reconstruct time = 1519.61 usec, 1316.13 MB/s 160 | ``` 161 | 162 | Wirehair is asymptotically O(N) in speed, but for smaller input or output data it can be beaten by other codecs. 163 | In this case the Fecal encoder is twice as fast as Wirehair. Wirehair is almost twice as fast to decode, 164 | but it takes the same time regardless of the number of losses, so Fecal is much faster for small loss counts. 165 | 166 | For the same data sizes and about 30 losses: 167 | 168 | ``` 169 | >> wirehair_encode(N = 1000) in 2281.65 usec, 876.559 MB/s after 30.931 avg losses 170 | << wirehair_decode(N = 1000) average overhead = 0.02 blocks, average reconstruct time = 1462.48 usec, 1367.54 MB/s 171 | ``` 172 | 173 | Now Wirehair is 4x slower to encode and 2x slower to decode. There is definitely a large, useful region of operation 174 | where the Fecal algorithm is preferred. 175 | 176 | 177 | #### Smaller input benchmark: 178 | 179 | For random losses in 0.2 MB of data split into 100 equal-sized 2000 byte pieces: 180 | 181 | ``` 182 | Encoder(0.2 MB in 100 pieces, 1 losses): Input=5899.71 MB/s, Output=58.9971 MB/s, (Encode create: 6251.95 MB/s) 183 | Decoder(0.2 MB in 100 pieces, 1 losses): Input=8257.64 MB/s, Output=82.5764 MB/s, (Overhead = 0 pieces) 184 | 185 | Encoder(0.2 MB in 100 pieces, 2 losses): Input=6040.47 MB/s, Output=122.018 MB/s, (Encode create: 6680.03 MB/s) 186 | Decoder(0.2 MB in 100 pieces, 2 losses): Input=6572.46 MB/s, Output=131.449 MB/s, (Overhead = 0.02 pieces) 187 | 188 | Encoder(0.2 MB in 100 pieces, 3 losses): Input=5474.95 MB/s, Output=165.344 MB/s, (Encode create: 6391.82 MB/s) 189 | Decoder(0.2 MB in 100 pieces, 3 losses): Input=5274.26 MB/s, Output=158.228 MB/s, (Overhead = 0.02 pieces) 190 | 191 | Encoder(0.2 MB in 100 pieces, 4 losses): Input=5298.01 MB/s, Output=212.98 MB/s, (Encode create: 6504.06 MB/s) 192 | Decoder(0.2 MB in 100 pieces, 4 losses): Input=5055.61 MB/s, Output=202.224 MB/s, (Overhead = 0.02 pieces) 193 | 194 | Encoder(0.2 MB in 100 pieces, 5 losses): Input=5289.61 MB/s, Output=264.48 MB/s, (Encode create: 6768.19 MB/s) 195 | Decoder(0.2 MB in 100 pieces, 5 losses): Input=4785.83 MB/s, Output=239.292 MB/s, (Overhead = 0 pieces) 196 | 197 | Encoder(0.2 MB in 100 pieces, 6 losses): Input=4945.6 MB/s, Output=297.23 MB/s, (Encode create: 6648.94 MB/s) 198 | Decoder(0.2 MB in 100 pieces, 6 losses): Input=4356.35 MB/s, Output=261.381 MB/s, (Overhead = 0.0100002 pieces) 199 | 200 | Encoder(0.2 MB in 100 pieces, 7 losses): Input=4621.07 MB/s, Output=324.399 MB/s, (Encode create: 6466.21 MB/s) 201 | Decoder(0.2 MB in 100 pieces, 7 losses): Input=4024.95 MB/s, Output=281.747 MB/s, (Overhead = 0.02 pieces) 202 | 203 | Encoder(0.2 MB in 100 pieces, 8 losses): Input=4338.4 MB/s, Output=347.072 MB/s, (Encode create: 6287.33 MB/s) 204 | Decoder(0.2 MB in 100 pieces, 8 losses): Input=3762.94 MB/s, Output=301.035 MB/s, (Overhead = 0 pieces) 205 | 206 | Encoder(0.2 MB in 100 pieces, 9 losses): Input=4346.88 MB/s, Output=391.654 MB/s, (Encode create: 6548.79 MB/s) 207 | Decoder(0.2 MB in 100 pieces, 9 losses): Input=3592.6 MB/s, Output=323.334 MB/s, (Overhead = 0.0100002 pieces) 208 | 209 | Encoder(0.2 MB in 100 pieces, 10 losses): Input=4168.4 MB/s, Output=417.257 MB/s, (Encode create: 6553.08 MB/s) 210 | Decoder(0.2 MB in 100 pieces, 10 losses): Input=3413.55 MB/s, Output=341.355 MB/s, (Overhead = 0.0100002 pieces) 211 | ``` 212 | 213 | 214 | #### Comparisons: 215 | 216 | Comparing with `cm256`, which is a Cauchy Reed-Solomon erasure code library using GF256: 217 | 218 | ``` 219 | Encoder: 2000 bytes k = 100 m = 1 : 7.69775 usec, 25981.6 MBps 220 | Decoder: 2000 bytes k = 100 m = 1 : 15.0289 usec, 13307.7 MBps 221 | Encoder: 2000 bytes k = 100 m = 2 : 37.7556 usec, 5297.23 MBps 222 | Decoder: 2000 bytes k = 100 m = 2 : 36.2894 usec, 5511.25 MBps 223 | Encoder: 2000 bytes k = 100 m = 3 : 69.2797 usec, 2886.85 MBps 224 | Decoder: 2000 bytes k = 100 m = 3 : 43.9871 usec, 4546.78 MBps 225 | Encoder: 2000 bytes k = 100 m = 4 : 56.8167 usec, 3520.09 MBps 226 | Decoder: 2000 bytes k = 100 m = 4 : 74.4116 usec, 2687.75 MBps 227 | Encoder: 2000 bytes k = 100 m = 5 : 107.402 usec, 1862.16 MBps 228 | Decoder: 2000 bytes k = 100 m = 5 : 102.637 usec, 1948.62 MBps 229 | Encoder: 2000 bytes k = 100 m = 6 : 271.987 usec, 735.329 MBps 230 | Decoder: 2000 bytes k = 100 m = 6 : 300.945 usec, 664.573 MBps 231 | Encoder: 2000 bytes k = 100 m = 7 : 371.691 usec, 538.081 MBps 232 | Decoder: 2000 bytes k = 100 m = 7 : 336.135 usec, 594.999 MBps 233 | Encoder: 2000 bytes k = 100 m = 8 : 244.129 usec, 819.241 MBps 234 | Decoder: 2000 bytes k = 100 m = 8 : 251.093 usec, 796.517 MBps 235 | Encoder: 2000 bytes k = 100 m = 9 : 282.251 usec, 708.59 MBps 236 | Decoder: 2000 bytes k = 100 m = 9 : 282.984 usec, 706.754 MBps 237 | Encoder: 2000 bytes k = 100 m = 10 : 307.543 usec, 650.315 MBps 238 | Decoder: 2000 bytes k = 100 m = 10 : 313.775 usec, 637.4 MBps 239 | ``` 240 | 241 | Fecal is only slower for the special single loss case where `cm256` uses XOR, 242 | in all other cases the new library is much faster. For 10 losses, it is 6x faster. 243 | Note that `cm256` is also limited to 255 inputs or outputs. 244 | 245 | 246 | #### How fecal works: 247 | 248 | The library uses Siamese Codes for a structured convolutional matrix. 249 | This matrix has a fast matrix-vector product involving mostly XOR operations. 250 | This allows Siamese Codes to encode and decode much faster than other 251 | convolutional codes built on Cauchy or Vandermonde matrices. 252 | Let's call this the Siamese Matrix Structure or something similar. 253 | 254 | To produce an output packet, some preprocessing is performed. 255 | 256 | The input data is first split into 8 "lanes" where every 8th symbol {e.g. 0, 8, 16, 24, ...} is summed together. 257 | The second "lane" starts from input symbol 1 and contains every 8th symbol after that {e.g. 1, 9, 17, 25, ...}. 258 | 259 | For each "lane" there are three running "sums": 260 | 261 | + Sum 0: Simple XOR between all inputs in that lane. 262 | + Sum 1: Each input is multiplied by a coefficient provided by `GetColumnValue`, and then XORed into the sum. 263 | + Sum 2: Each input is multiplied by the same coefficient squared, and then XORed into the sum. 264 | 265 | This means there are 24 running sums, each with symbol_bytes bytes of data. 266 | 267 | When an output is being produced (encoded), two running sums are formed temporarily. Both are generated 268 | through the same process, and the result of one sum is multiplied by a row coefficient produced by the 269 | `GetRowValue` function and added to the other sum to produce the output. 270 | 271 | To produce each of the two sums, a formula is followed. 272 | For each lane, the `GetRowOpcode` function returns which sums should be used. 273 | Sums 0, 1, and 2 are incorporated in based on the function output. 274 | And then 1/16 of the input data are selected at random and XORed into each sum. 275 | 276 | The Siamese codec and the Fecal decoder both will compute lane sums only when they are needed. 277 | Since some of the 24 sums (about 50%) are unneeded, the number of operations will vary for each row. 278 | 279 | The final random XOR is similar to an LDPC code and allows the recovery properties of the code to perform well 280 | on a larger scale above about 32 input symbols. The GF(2^^8) multiplies dominate the recovery properties for smaller 281 | losses and input symbols. The specific code used was selected by experimenting with different parameters until a 282 | desired failure rate was achieved with good performance characteristics. 283 | 284 | As a result the Siamese Codes mainly use XORs. So it can run a lot faster than straight GF(2^^8) multiply-add operations. 285 | Since they are still Convolutional Codes, the Siamese Codes also lend themselves to streaming use case. 286 | 287 | When AVX2 and SSSE3 are unavailable, Siamese takes 4x longer to decode 288 | and 2.6x longer to encode. Encoding requires a lot more simple XOR ops 289 | so it is still pretty fast. Decoding is usually really quick because 290 | average loss rates are low, but when needed it requires a lot more 291 | GF multiplies requiring table lookups which is slower. 292 | 293 | 294 | #### Credits 295 | 296 | Software by Christopher A. Taylor , making shit happen. 297 | 298 | Please reach out if you need support or would like to collaborate on a project. 299 | -------------------------------------------------------------------------------- /fecal.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017 Christopher A. Taylor. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of Fecal nor the names of its contributors may be 13 | used to endorse or promote products derived from this software without 14 | specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include "fecal.h" 30 | #include "gf256.h" 31 | #include "FecalEncoder.h" 32 | #include "FecalDecoder.h" 33 | 34 | extern "C" { 35 | 36 | 37 | //------------------------------------------------------------------------------ 38 | // Initialization API 39 | 40 | static bool m_Initialized = false; 41 | 42 | FECAL_EXPORT int fecal_init_(int version) 43 | { 44 | if (version != FECAL_VERSION) 45 | return Fecal_InvalidInput; 46 | 47 | if (0 != gf256_init()) 48 | return Fecal_Platform; 49 | 50 | m_Initialized = true; 51 | return Fecal_Success; 52 | } 53 | 54 | 55 | //------------------------------------------------------------------------------ 56 | // Encoder API 57 | 58 | FECAL_EXPORT FecalEncoder fecal_encoder_create(unsigned input_count, void* const * const input_data, uint64_t total_bytes) 59 | { 60 | if (input_count <= 0 || !input_data || total_bytes < input_count) 61 | { 62 | FECAL_DEBUG_BREAK; // Invalid input 63 | return nullptr; 64 | } 65 | 66 | FECAL_DEBUG_ASSERT(m_Initialized); // Must call fecal_init() first 67 | if (!m_Initialized) 68 | return nullptr; 69 | 70 | fecal::Encoder* encoder = new(std::nothrow) fecal::Encoder; 71 | if (!encoder) 72 | { 73 | FECAL_DEBUG_BREAK; // Out of memory 74 | return nullptr; 75 | } 76 | 77 | if (Fecal_Success != encoder->Initialize(input_count, input_data, total_bytes)) 78 | { 79 | delete encoder; 80 | return nullptr; 81 | } 82 | 83 | return reinterpret_cast( encoder ); 84 | } 85 | 86 | FECAL_EXPORT int fecal_encode(FecalEncoder encoder_v, FecalSymbol* symbol) 87 | { 88 | fecal::Encoder* encoder = reinterpret_cast( encoder_v ); 89 | if (!encoder || !symbol) 90 | return Fecal_InvalidInput; 91 | 92 | return encoder->Encode(*symbol); 93 | } 94 | 95 | FECAL_EXPORT void fecal_free(void* codec_v) 96 | { 97 | if (codec_v) 98 | { 99 | fecal::ICodec* icodec = reinterpret_cast( codec_v ); 100 | delete icodec; 101 | } 102 | } 103 | 104 | 105 | //------------------------------------------------------------------------------ 106 | // Decoder API 107 | 108 | FECAL_EXPORT FecalDecoder fecal_decoder_create(unsigned input_count, uint64_t total_bytes) 109 | { 110 | if (input_count <= 0 || total_bytes < input_count) 111 | { 112 | FECAL_DEBUG_BREAK; // Invalid input 113 | return nullptr; 114 | } 115 | 116 | FECAL_DEBUG_ASSERT(m_Initialized); // Must call fecal_init() first 117 | if (!m_Initialized) 118 | return nullptr; 119 | 120 | fecal::Decoder* decoder = new(std::nothrow) fecal::Decoder; 121 | if (!decoder) 122 | { 123 | FECAL_DEBUG_BREAK; // Out of memory 124 | return nullptr; 125 | } 126 | 127 | if (Fecal_Success != decoder->Initialize(input_count, total_bytes)) 128 | { 129 | delete decoder; 130 | return nullptr; 131 | } 132 | 133 | return reinterpret_cast( decoder ); 134 | } 135 | 136 | FECAL_EXPORT int fecal_decoder_add_original(FecalDecoder decoder_v, const FecalSymbol* symbol) 137 | { 138 | fecal::Decoder* decoder = reinterpret_cast( decoder_v ); 139 | if (!decoder || !symbol) 140 | return Fecal_InvalidInput; 141 | 142 | return decoder->AddOriginal(*symbol); 143 | } 144 | 145 | FECAL_EXPORT int fecal_decoder_add_recovery(FecalDecoder decoder_v, const FecalSymbol* symbol) 146 | { 147 | fecal::Decoder* decoder = reinterpret_cast( decoder_v ); 148 | if (!decoder || !symbol) 149 | return Fecal_InvalidInput; 150 | 151 | return decoder->AddRecovery(*symbol); 152 | } 153 | 154 | FECAL_EXPORT int fecal_decode(FecalDecoder decoder_v, RecoveredSymbols* symbols) 155 | { 156 | fecal::Decoder* decoder = reinterpret_cast( decoder_v ); 157 | if (!decoder || !symbols) 158 | return Fecal_InvalidInput; 159 | 160 | return decoder->Decode(*symbols); 161 | } 162 | 163 | FECAL_EXPORT int fecal_decoder_get(FecalDecoder decoder_v, unsigned input_index, FecalSymbol* symbol) 164 | { 165 | fecal::Decoder* decoder = reinterpret_cast( decoder_v ); 166 | if (!decoder || !symbol) 167 | return Fecal_InvalidInput; 168 | 169 | return decoder->GetOriginal(input_index, *symbol); 170 | } 171 | 172 | 173 | } // extern "C" 174 | -------------------------------------------------------------------------------- /fecal.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017 Christopher A. Taylor. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of Fecal nor the names of its contributors may be 13 | used to endorse or promote products derived from this software without 14 | specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #ifndef CAT_FECAL_H 30 | #define CAT_FECAL_H 31 | 32 | /* 33 | FEC-AL: Forward Error Correction at the Application Layer 34 | Block erasure code based on math from the Siamese library. 35 | */ 36 | 37 | // Library version 38 | #define FECAL_VERSION 2 39 | 40 | // Tweak if the functions are exported or statically linked 41 | //#define FECAL_DLL /* Defined when building/linking as DLL */ 42 | //#define FECAL_BUILDING /* Defined by the library makefile */ 43 | 44 | #if defined(FECAL_BUILDING) 45 | # if defined(FECAL_DLL) 46 | #define FECAL_EXPORT __declspec(dllexport) 47 | # else 48 | #define FECAL_EXPORT 49 | # endif 50 | #else 51 | # if defined(FECAL_DLL) 52 | #define FECAL_EXPORT __declspec(dllimport) 53 | # else 54 | #define FECAL_EXPORT extern 55 | # endif 56 | #endif 57 | 58 | #include 59 | 60 | 61 | #ifdef __cplusplus 62 | extern "C" { 63 | #endif 64 | 65 | 66 | //------------------------------------------------------------------------------ 67 | // Initialization API 68 | // 69 | // Perform static initialization for the library, verifying that the platform 70 | // is supported. 71 | // 72 | // Returns 0 on success and other values on failure. 73 | 74 | FECAL_EXPORT int fecal_init_(int version); 75 | #define fecal_init() fecal_init_(FECAL_VERSION) 76 | 77 | 78 | //------------------------------------------------------------------------------ 79 | // Shared Constants / Datatypes 80 | 81 | // Results 82 | typedef enum FecalResultT 83 | { 84 | Fecal_NeedMoreData = 1, // More data is needed for this operation to succeed 85 | 86 | Fecal_Success = 0, 87 | 88 | Fecal_InvalidInput = -1, // A function parameter was invalid 89 | Fecal_Platform = -2, // Platform is unsupported 90 | Fecal_OutOfMemory = -3, // Out of memory error occurred 91 | Fecal_Unexpected = -4, // Unexpected error - Software bug? 92 | } FecalResult; 93 | 94 | // Encoder and Decoder object types 95 | typedef struct FecalEncoderImpl { int impl; }*FecalEncoder; 96 | typedef struct FecalDecoderImpl { int impl; }*FecalDecoder; 97 | 98 | // Data or Recovery symbol 99 | typedef struct FecalSymbolT 100 | { 101 | // User-provided data pointer allocated by application. 102 | void* Data; 103 | 104 | // User-provided number of bytes in the data buffer, for validation. 105 | unsigned Bytes; 106 | 107 | // Zero-based index in the data array, 108 | // or a larger number for recovery data. 109 | unsigned Index; 110 | } FecalSymbol; 111 | 112 | // Recovered data 113 | typedef struct RecoveredSymbolsT 114 | { 115 | // Array of symbols 116 | FecalSymbol* Symbols; 117 | 118 | // Number of symbols in the array 119 | unsigned Count; 120 | } RecoveredSymbols; 121 | 122 | 123 | //------------------------------------------------------------------------------ 124 | // Encoder API 125 | 126 | /* 127 | fecal_encoder_create() 128 | 129 | Create an encoder and set the input data. 130 | 131 | input_count: Number of input_data[] buffers provided. 132 | input_data: Array of pointers to input data. 133 | total_bytes: Sum of the total bytes in all buffers. 134 | 135 | Buffer data must be available until the decoder is freed with fecal_free(). 136 | Buffer data does not need to be aligned. 137 | Buffer data will not be modified, only read. 138 | 139 | Each buffer should have the same number of bytes except for the last one, 140 | which can be shorter. 141 | 142 | Let symbol_bytes = The number of bytes in each input_data buffer: 143 | 144 | input_count = static_cast( 145 | (total_bytes + symbol_bytes - 1) / symbol_bytes); 146 | 147 | Or if the number of pieces is known: 148 | 149 | symbol_bytes = static_cast( 150 | (total_bytes + input_count - 1) / input_count); 151 | 152 | Let final_bytes = The final piece of input data size in bytes: 153 | 154 | final_bytes = static_cast(total_bytes % symbol_bytes); 155 | if (final_bytes <= 0) 156 | final_bytes = symbol_bytes; 157 | 158 | Returns NULL on failure. 159 | */ 160 | FECAL_EXPORT FecalEncoder fecal_encoder_create(unsigned input_count, void* const * const input_data, uint64_t total_bytes); 161 | 162 | /* 163 | fecal_encode() 164 | 165 | Generate a recovery symbol. 166 | 167 | encoder: Encoder from fecal_encoder_create(). 168 | symbol->Index: Application provided recovery symbol index starting from 0. 169 | symbol->Data: Application provided buffer to write the symbol to. 170 | symbol->Bytes: Application provided number of bytes in the symbol buffer. 171 | 172 | Given total_bytes and input_count from fecal_encoder_create(): 173 | 174 | symbol->Bytes = static_cast( 175 | (total_bytes + input_count - 1) / input_count); 176 | 177 | Returns Fecal_Success on success. 178 | Returns Fecal_InvalidInput if the symbol parameter was invalid, or the 179 | codec is not initialized yet. 180 | */ 181 | FECAL_EXPORT int fecal_encode(FecalEncoder encoder, FecalSymbol* symbol); 182 | 183 | /* 184 | fecal_free() 185 | 186 | Free memory associated with the created encoder or decoder. 187 | 188 | codec: Pointer returned by fecal_encoder_create() or fecal_decoder_create() 189 | */ 190 | FECAL_EXPORT void fecal_free(void* codec); 191 | 192 | 193 | //------------------------------------------------------------------------------ 194 | // Decoder API 195 | 196 | /* 197 | fecal_decoder_create() 198 | 199 | Create a decoder and set the input_count and total_bytes. 200 | 201 | input_count: Number of input_data[] buffers provided to fecal_encoder_create(). 202 | total_bytes: Sum of the total bytes in all buffers. 203 | 204 | See documentation for fecal_encoder_create() above. 205 | 206 | Returns NULL on failure. 207 | */ 208 | FECAL_EXPORT FecalDecoder fecal_decoder_create(unsigned input_count, uint64_t total_bytes); 209 | 210 | /* 211 | fecal_decoder_add_original() 212 | 213 | Adds an original symbol to the decoder. 214 | 215 | decoder: Decoder from fecal_decoder_create(). 216 | symbol->Index: Input data index from 0..(input_count-1). 217 | symbol->Data: Application provided buffer to read the symbol from. 218 | symbol->Bytes: Application provided number of bytes in the symbol buffer. 219 | 220 | Buffer data must be available until the decoder is freed with fecal_free(). 221 | Buffer data does not need to be aligned. 222 | Buffer data will not be modified, only read. 223 | 224 | Given total_bytes and input_count from fecal_encoder_create(): 225 | 226 | // Calculate the number of bytes in each symbol 227 | unsigned symbol_bytes = static_cast( 228 | (total_bytes + input_count - 1) / input_count); 229 | 230 | // If it is the final symbol: 231 | if (symbol->Index == input_count - 1) 232 | symbol->Bytes = final_bytes; 233 | else 234 | symbol->Bytes = symbol_bytes; 235 | 236 | Returns Fecal_Success on success. 237 | Returns Fecal_InvalidInput if the symbol parameter was invalid, or the 238 | codec is not initialized yet. 239 | */ 240 | FECAL_EXPORT int fecal_decoder_add_original(FecalDecoder decoder, const FecalSymbol* symbol); 241 | 242 | /* 243 | fecal_decoder_add_recovery() 244 | 245 | Adds a recovery symbol to the decoder. 246 | 247 | decoder: Decoder from fecal_decoder_create(). 248 | symbol->Index: Application provided recovery symbol index starting from 0. 249 | symbol->Data: Application provided buffer to read the symbol from. 250 | symbol->Bytes: Application provided number of bytes in the symbol buffer. 251 | 252 | Buffer data must be available until the decoder is freed with fecal_free(). 253 | Buffer data does not need to be aligned. 254 | Buffer data WILL BE MODIFIED. 255 | 256 | Given total_bytes and input_count from fecal_encoder_create(): 257 | 258 | symbol->Bytes = static_cast( 259 | (total_bytes + input_count - 1) / input_count); 260 | 261 | Returns Fecal_Success on success. 262 | Returns Fecal_InvalidInput if the symbol parameter was invalid, or the 263 | codec is not initialized yet. 264 | */ 265 | FECAL_EXPORT int fecal_decoder_add_recovery(FecalDecoder decoder, const FecalSymbol* symbol); 266 | 267 | /* 268 | fecal_decode() 269 | 270 | Decode data if possible. 271 | 272 | decoder: Decoder from fecal_decoder_create(). 273 | symbols: Returned array of recovered input symbols. 274 | 275 | The returned data pointers are valid until fecal_free() is called. 276 | Note that the final symbol size can be different from the rest. 277 | The returned data pointers are taken from recovery symbols previously submitted. 278 | 279 | After decoding completes, the decoder object should be passed to fecal_free(). 280 | 281 | Returns Fecal_Success if decode was successful. `symbols` will contain results, 282 | and fecal_decoder_get() can be used to request specific pieces. 283 | Returns Fecal_NeedMoreData if more pieces must be added before decoding can proceed. 284 | Returns Fecal_InvalidInput if the parameters are invalid. 285 | */ 286 | FECAL_EXPORT int fecal_decode(FecalDecoder decoder, RecoveredSymbols* symbols); 287 | 288 | /* 289 | fecal_decoder_get() 290 | 291 | Get original data. 292 | 293 | decoder: Decoder from fecal_decoder_create(). 294 | input_index: Input data index from 0..(input_count-1). 295 | symbol: Returned original symbol data. 296 | 297 | The returned data pointers are taken from original or recovery symbols previously submitted. 298 | The returned data pointers are valid until fecal_free() is called. 299 | Note that the final symbol size can be different from the rest. 300 | 301 | After decoding completes, the decoder object should be passed to fecal_free(). 302 | 303 | Returns Fecal_Success on success. 304 | Returns Fecal_NeedMoreData if the data is unavailable. 305 | Returns Fecal_InvalidInput if the parameters are invalid. 306 | */ 307 | FECAL_EXPORT int fecal_decoder_get(FecalDecoder decoder, unsigned input_index, FecalSymbol* symbol); 308 | 309 | 310 | #ifdef __cplusplus 311 | } 312 | #endif 313 | 314 | 315 | #endif // CAT_FECAL_H 316 | -------------------------------------------------------------------------------- /gf256.h: -------------------------------------------------------------------------------- 1 | /** \file 2 | \brief GF(256) Main C API Header 3 | \copyright Copyright (c) 2017 Christopher A. Taylor. All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | * Neither the name of GF256 nor the names of its contributors may be 14 | used to endorse or promote products derived from this software without 15 | specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 21 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 | POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #ifndef CAT_GF256_H 31 | #define CAT_GF256_H 32 | 33 | /** \page GF256 GF(256) Math Module 34 | 35 | This module provides efficient implementations of bulk 36 | GF(2^^8) math operations over memory buffers. 37 | 38 | Addition is done over the base field in GF(2) meaning 39 | that addition is XOR between memory buffers. 40 | 41 | Multiplication is performed using table lookups via 42 | SIMD instructions. This is somewhat slower than XOR, 43 | but fast enough to not become a major bottleneck when 44 | used sparingly. 45 | */ 46 | 47 | #include // uint32_t etc 48 | #include // memcpy, memset 49 | 50 | /// Library header version 51 | #define GF256_VERSION 2 52 | 53 | //------------------------------------------------------------------------------ 54 | // Platform/Architecture 55 | 56 | #if defined(ANDROID) || defined(IOS) || defined(LINUX_ARM) || defined(__powerpc__) || defined(__s390__) 57 | #define GF256_TARGET_MOBILE 58 | #endif // ANDROID 59 | 60 | #if defined(__AVX2__) || (defined (_MSC_VER) && _MSC_VER >= 1900) 61 | #define GF256_TRY_AVX2 /* 256-bit */ 62 | #include 63 | #define GF256_ALIGN_BYTES 32 64 | #else // __AVX2__ 65 | #define GF256_ALIGN_BYTES 16 66 | #endif // __AVX2__ 67 | 68 | #if !defined(GF256_TARGET_MOBILE) 69 | // Note: MSVC currently only supports SSSE3 but not AVX2 70 | #include // SSSE3: _mm_shuffle_epi8 71 | #include // SSE2 72 | #endif // GF256_TARGET_MOBILE 73 | 74 | #if defined(HAVE_ARM_NEON_H) 75 | #include 76 | #endif // HAVE_ARM_NEON_H 77 | 78 | #if defined(GF256_TARGET_MOBILE) 79 | 80 | #define GF256_ALIGNED_ACCESSES /* Inputs must be aligned to GF256_ALIGN_BYTES */ 81 | 82 | # if defined(HAVE_ARM_NEON_H) 83 | // Compiler-specific 128-bit SIMD register keyword 84 | #define GF256_M128 uint8x16_t 85 | #define GF256_TRY_NEON 86 | #else 87 | #define GF256_M128 uint64_t 88 | # endif 89 | 90 | #else // GF256_TARGET_MOBILE 91 | 92 | // Compiler-specific 128-bit SIMD register keyword 93 | #define GF256_M128 __m128i 94 | 95 | #endif // GF256_TARGET_MOBILE 96 | 97 | #ifdef GF256_TRY_AVX2 98 | // Compiler-specific 256-bit SIMD register keyword 99 | #define GF256_M256 __m256i 100 | #endif 101 | 102 | // Compiler-specific C++11 restrict keyword 103 | #define GF256_RESTRICT __restrict 104 | 105 | // Compiler-specific force inline keyword 106 | #ifdef _MSC_VER 107 | #define GF256_FORCE_INLINE inline __forceinline 108 | #else 109 | #define GF256_FORCE_INLINE inline __attribute__((always_inline)) 110 | #endif 111 | 112 | // Compiler-specific alignment keyword 113 | // Note: Alignment only matters for ARM NEON where it should be 16 114 | #ifdef _MSC_VER 115 | #define GF256_ALIGNED __declspec(align(GF256_ALIGN_BYTES)) 116 | #else // _MSC_VER 117 | #define GF256_ALIGNED __attribute__((aligned(GF256_ALIGN_BYTES))) 118 | #endif // _MSC_VER 119 | 120 | #ifdef __cplusplus 121 | extern "C" { 122 | #endif // __cplusplus 123 | 124 | 125 | //------------------------------------------------------------------------------ 126 | // Portability 127 | 128 | /// Swap two memory buffers in-place 129 | extern void gf256_memswap(void * GF256_RESTRICT vx, void * GF256_RESTRICT vy, int bytes); 130 | 131 | 132 | //------------------------------------------------------------------------------ 133 | // GF(256) Context 134 | 135 | #ifdef _MSC_VER 136 | #pragma warning(push) 137 | #pragma warning(disable: 4324) // warning C4324: 'gf256_ctx' : structure was padded due to __declspec(align()) 138 | #endif // _MSC_VER 139 | 140 | /// The context object stores tables required to perform library calculations 141 | struct gf256_ctx 142 | { 143 | /// We require memory to be aligned since the SIMD instructions benefit from 144 | /// or require aligned accesses to the table data. 145 | struct 146 | { 147 | GF256_ALIGNED GF256_M128 TABLE_LO_Y[256]; 148 | GF256_ALIGNED GF256_M128 TABLE_HI_Y[256]; 149 | } MM128; 150 | #ifdef GF256_TRY_AVX2 151 | struct 152 | { 153 | GF256_ALIGNED GF256_M256 TABLE_LO_Y[256]; 154 | GF256_ALIGNED GF256_M256 TABLE_HI_Y[256]; 155 | } MM256; 156 | #endif // GF256_TRY_AVX2 157 | 158 | /// Mul/Div/Inv/Sqr tables 159 | uint8_t GF256_MUL_TABLE[256 * 256]; 160 | uint8_t GF256_DIV_TABLE[256 * 256]; 161 | uint8_t GF256_INV_TABLE[256]; 162 | uint8_t GF256_SQR_TABLE[256]; 163 | 164 | /// Log/Exp tables 165 | uint16_t GF256_LOG_TABLE[256]; 166 | uint8_t GF256_EXP_TABLE[512 * 2 + 1]; 167 | 168 | /// Polynomial used 169 | unsigned Polynomial; 170 | }; 171 | 172 | #ifdef _MSC_VER 173 | #pragma warning(pop) 174 | #endif // _MSC_VER 175 | 176 | extern gf256_ctx GF256Ctx; 177 | 178 | 179 | //------------------------------------------------------------------------------ 180 | // Initialization 181 | 182 | /** 183 | Initialize a context, filling in the tables. 184 | 185 | Thread-safety / Usage Notes: 186 | 187 | It is perfectly safe and encouraged to use a gf256_ctx object from multiple 188 | threads. The gf256_init() is relatively expensive and should only be done 189 | once, though it will take less than a millisecond. 190 | 191 | The gf256_ctx object must be aligned to 16 byte boundary. 192 | Simply tag the object with GF256_ALIGNED to achieve this. 193 | 194 | Example: 195 | static GF256_ALIGNED gf256_ctx TheGF256Context; 196 | gf256_init(&TheGF256Context, 0); 197 | 198 | Returns 0 on success and other values on failure. 199 | */ 200 | extern int gf256_init_(int version); 201 | #define gf256_init() gf256_init_(GF256_VERSION) 202 | 203 | 204 | //------------------------------------------------------------------------------ 205 | // Math Operations 206 | 207 | /// return x + y 208 | static GF256_FORCE_INLINE uint8_t gf256_add(uint8_t x, uint8_t y) 209 | { 210 | return (uint8_t)(x ^ y); 211 | } 212 | 213 | /// return x * y 214 | /// For repeated multiplication by a constant, it is faster to put the constant in y. 215 | static GF256_FORCE_INLINE uint8_t gf256_mul(uint8_t x, uint8_t y) 216 | { 217 | return GF256Ctx.GF256_MUL_TABLE[((unsigned)y << 8) + x]; 218 | } 219 | 220 | /// return x / y 221 | /// Memory-access optimized for constant divisors in y. 222 | static GF256_FORCE_INLINE uint8_t gf256_div(uint8_t x, uint8_t y) 223 | { 224 | return GF256Ctx.GF256_DIV_TABLE[((unsigned)y << 8) + x]; 225 | } 226 | 227 | /// return 1 / x 228 | static GF256_FORCE_INLINE uint8_t gf256_inv(uint8_t x) 229 | { 230 | return GF256Ctx.GF256_INV_TABLE[x]; 231 | } 232 | 233 | /// return x * x 234 | static GF256_FORCE_INLINE uint8_t gf256_sqr(uint8_t x) 235 | { 236 | return GF256Ctx.GF256_SQR_TABLE[x]; 237 | } 238 | 239 | 240 | //------------------------------------------------------------------------------ 241 | // Bulk Memory Math Operations 242 | 243 | /// Performs "x[] += y[]" bulk memory XOR operation 244 | extern void gf256_add_mem(void * GF256_RESTRICT vx, 245 | const void * GF256_RESTRICT vy, int bytes); 246 | 247 | /// Performs "z[] += x[] + y[]" bulk memory operation 248 | extern void gf256_add2_mem(void * GF256_RESTRICT vz, const void * GF256_RESTRICT vx, 249 | const void * GF256_RESTRICT vy, int bytes); 250 | 251 | /// Performs "z[] = x[] + y[]" bulk memory operation 252 | extern void gf256_addset_mem(void * GF256_RESTRICT vz, const void * GF256_RESTRICT vx, 253 | const void * GF256_RESTRICT vy, int bytes); 254 | 255 | /// Performs "z[] = x[] * y" bulk memory operation 256 | extern void gf256_mul_mem(void * GF256_RESTRICT vz, 257 | const void * GF256_RESTRICT vx, uint8_t y, int bytes); 258 | 259 | /// Performs "z[] += x[] * y" bulk memory operation 260 | extern void gf256_muladd_mem(void * GF256_RESTRICT vz, uint8_t y, 261 | const void * GF256_RESTRICT vx, int bytes); 262 | 263 | /// Performs "x[] /= y" bulk memory operation 264 | static GF256_FORCE_INLINE void gf256_div_mem(void * GF256_RESTRICT vz, 265 | const void * GF256_RESTRICT vx, uint8_t y, int bytes) 266 | { 267 | // Multiply by inverse 268 | gf256_mul_mem(vz, vx, y == 1 ? (uint8_t)1 : GF256Ctx.GF256_INV_TABLE[y], bytes); 269 | } 270 | 271 | 272 | //------------------------------------------------------------------------------ 273 | // Misc Operations 274 | 275 | /// Swap two memory buffers in-place 276 | extern void gf256_memswap(void * GF256_RESTRICT vx, void * GF256_RESTRICT vy, int bytes); 277 | 278 | 279 | #ifdef __cplusplus 280 | } 281 | #endif // __cplusplus 282 | 283 | #endif // CAT_GF256_H 284 | -------------------------------------------------------------------------------- /proj/msvc/Fecal.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 14 4 | VisualStudioVersion = 14.0.25420.1 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Fecal", "LibFecal.vcxproj", "{FF5912EF-7424-4974-B877-62B03D5046C6}" 7 | EndProject 8 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "FecalBenchmark", "..\..\tests\msvc\FecalBenchmark.vcxproj", "{32176592-2F30-4BD5-B645-EB11C8D3453E}" 9 | ProjectSection(ProjectDependencies) = postProject 10 | {FF5912EF-7424-4974-B877-62B03D5046C6} = {FF5912EF-7424-4974-B877-62B03D5046C6} 11 | EndProjectSection 12 | EndProject 13 | Global 14 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 15 | Debug|Win32 = Debug|Win32 16 | Debug|x64 = Debug|x64 17 | Release|Win32 = Release|Win32 18 | Release|x64 = Release|x64 19 | EndGlobalSection 20 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 21 | {FF5912EF-7424-4974-B877-62B03D5046C6}.Debug|Win32.ActiveCfg = Debug|Win32 22 | {FF5912EF-7424-4974-B877-62B03D5046C6}.Debug|Win32.Build.0 = Debug|Win32 23 | {FF5912EF-7424-4974-B877-62B03D5046C6}.Debug|x64.ActiveCfg = Debug|x64 24 | {FF5912EF-7424-4974-B877-62B03D5046C6}.Debug|x64.Build.0 = Debug|x64 25 | {FF5912EF-7424-4974-B877-62B03D5046C6}.Release|Win32.ActiveCfg = Release|Win32 26 | {FF5912EF-7424-4974-B877-62B03D5046C6}.Release|Win32.Build.0 = Release|Win32 27 | {FF5912EF-7424-4974-B877-62B03D5046C6}.Release|x64.ActiveCfg = Release|x64 28 | {FF5912EF-7424-4974-B877-62B03D5046C6}.Release|x64.Build.0 = Release|x64 29 | {E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Debug|Win32.ActiveCfg = Debug|Win32 30 | {E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Debug|Win32.Build.0 = Debug|Win32 31 | {E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Debug|x64.ActiveCfg = Debug|x64 32 | {E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Debug|x64.Build.0 = Debug|x64 33 | {E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Release|Win32.ActiveCfg = Release|Win32 34 | {E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Release|Win32.Build.0 = Release|Win32 35 | {E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Release|x64.ActiveCfg = Release|x64 36 | {E28D52C7-2A45-4E7E-86B5-75EA2F579C3E}.Release|x64.Build.0 = Release|x64 37 | {32176592-2F30-4BD5-B645-EB11C8D3453E}.Debug|Win32.ActiveCfg = Debug|Win32 38 | {32176592-2F30-4BD5-B645-EB11C8D3453E}.Debug|Win32.Build.0 = Debug|Win32 39 | {32176592-2F30-4BD5-B645-EB11C8D3453E}.Debug|x64.ActiveCfg = Debug|x64 40 | {32176592-2F30-4BD5-B645-EB11C8D3453E}.Debug|x64.Build.0 = Debug|x64 41 | {32176592-2F30-4BD5-B645-EB11C8D3453E}.Release|Win32.ActiveCfg = Release|Win32 42 | {32176592-2F30-4BD5-B645-EB11C8D3453E}.Release|Win32.Build.0 = Release|Win32 43 | {32176592-2F30-4BD5-B645-EB11C8D3453E}.Release|x64.ActiveCfg = Release|x64 44 | {32176592-2F30-4BD5-B645-EB11C8D3453E}.Release|x64.Build.0 = Release|x64 45 | EndGlobalSection 46 | GlobalSection(SolutionProperties) = preSolution 47 | HideSolutionNode = FALSE 48 | EndGlobalSection 49 | EndGlobal 50 | -------------------------------------------------------------------------------- /proj/msvc/LibFecal.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {FF5912EF-7424-4974-B877-62B03D5046C6} 23 | LibFecal 24 | Fecal 25 | 26 | 27 | 28 | StaticLibrary 29 | true 30 | v140 31 | MultiByte 32 | 33 | 34 | StaticLibrary 35 | true 36 | v140 37 | MultiByte 38 | 39 | 40 | StaticLibrary 41 | false 42 | v140 43 | true 44 | MultiByte 45 | 46 | 47 | StaticLibrary 48 | false 49 | v140 50 | true 51 | MultiByte 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | Output/$(ProjectName)/$(Configuration)/$(Platform)/ 71 | Obj/$(ProjectName)/$(Configuration)/$(Platform)/ 72 | 73 | 74 | Output/$(ProjectName)/$(Configuration)/$(Platform)/ 75 | Obj/$(ProjectName)/$(Configuration)/$(Platform)/ 76 | 77 | 78 | Output/$(ProjectName)/$(Configuration)/$(Platform)/ 79 | Obj/$(ProjectName)/$(Configuration)/$(Platform)/ 80 | 81 | 82 | Output/$(ProjectName)/$(Configuration)/$(Platform)/ 83 | Obj/$(ProjectName)/$(Configuration)/$(Platform)/ 84 | 85 | 86 | 87 | Level4 88 | Disabled 89 | true 90 | SIAMESE_BUILDING;%(PreprocessorDefinitions) 91 | true 92 | MultiThreadedDebug 93 | 94 | 95 | true 96 | $(OutDir)$(TargetName)$(TargetExt) 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | Level4 106 | Disabled 107 | true 108 | SIAMESE_BUILDING;%(PreprocessorDefinitions) 109 | true 110 | MultiThreadedDebug 111 | 112 | 113 | true 114 | $(OutDir)$(TargetName)$(TargetExt) 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | Level4 124 | Full 125 | true 126 | true 127 | true 128 | MultiThreaded 129 | false 130 | AnySuitable 131 | Speed 132 | true 133 | SIAMESE_BUILDING;%(PreprocessorDefinitions) 134 | true 135 | true 136 | false 137 | 138 | 139 | true 140 | true 141 | true 142 | $(OutDir)$(TargetName)$(TargetExt) 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | Level4 152 | Full 153 | true 154 | true 155 | true 156 | MultiThreaded 157 | false 158 | AnySuitable 159 | Speed 160 | true 161 | SIAMESE_BUILDING;%(PreprocessorDefinitions) 162 | true 163 | true 164 | false 165 | 166 | 167 | true 168 | true 169 | true 170 | $(OutDir)$(TargetName)$(TargetExt) 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | -------------------------------------------------------------------------------- /proj/msvc/LibFecal.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | Source Files 23 | 24 | 25 | Source Files 26 | 27 | 28 | Source Files 29 | 30 | 31 | Source Files 32 | 33 | 34 | 35 | 36 | Source Files 37 | 38 | 39 | Header Files 40 | 41 | 42 | Source Files 43 | 44 | 45 | Source Files 46 | 47 | 48 | Source Files 49 | 50 | 51 | 52 | 53 | Header Files 54 | 55 | 56 | -------------------------------------------------------------------------------- /tests/benchmark.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2017 Christopher A. Taylor. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | * Neither the name of Fecal nor the names of its contributors may be 13 | used to endorse or promote products derived from this software without 14 | specific prior written permission. 15 | 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 20 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 | POSSIBILITY OF SUCH DAMAGE. 27 | */ 28 | 29 | #include "../FecalCommon.h" 30 | #include "../fecal.h" 31 | 32 | #include 33 | #include 34 | #include 35 | #include 36 | using namespace std; 37 | 38 | //#define TEST_DATA_ALL_SAME 39 | //#define TEST_LOSE_FIRST_K_PACKETS 40 | 41 | 42 | //------------------------------------------------------------------------------ 43 | // Windows 44 | 45 | #ifdef _WIN32 46 | #define WIN32_LEAN_AND_MEAN 47 | 48 | #ifndef _WINSOCKAPI_ 49 | #define DID_DEFINE_WINSOCKAPI 50 | #define _WINSOCKAPI_ 51 | #endif 52 | #ifndef NOMINMAX 53 | #define NOMINMAX 54 | #endif 55 | #ifndef _WIN32_WINNT 56 | #define _WIN32_WINNT 0x0601 /* Windows 7+ */ 57 | #endif 58 | 59 | #include 60 | #endif 61 | 62 | #ifdef DID_DEFINE_WINSOCKAPI 63 | #undef _WINSOCKAPI_ 64 | #undef DID_DEFINE_WINSOCKAPI 65 | #endif 66 | 67 | 68 | //------------------------------------------------------------------------------ 69 | // Threads 70 | 71 | static bool SetCurrentThreadPriority() 72 | { 73 | #ifdef _WIN32 74 | return 0 != ::SetThreadPriority(::GetCurrentThread(), THREAD_PRIORITY_ABOVE_NORMAL); 75 | #else 76 | return -1 != nice(2); 77 | #endif 78 | } 79 | 80 | 81 | //------------------------------------------------------------------------------ 82 | // Timing 83 | 84 | static uint64_t GetTimeUsec() 85 | { 86 | #ifdef _WIN32 87 | LARGE_INTEGER timeStamp = {}; 88 | if (!::QueryPerformanceCounter(&timeStamp)) 89 | return 0; 90 | static double PerfFrequencyInverse = 0.; 91 | if (PerfFrequencyInverse == 0.) 92 | { 93 | LARGE_INTEGER freq = {}; 94 | if (!::QueryPerformanceFrequency(&freq) || freq.QuadPart == 0) 95 | return 0; 96 | PerfFrequencyInverse = 1000000. / (double)freq.QuadPart; 97 | } 98 | return (uint64_t)(PerfFrequencyInverse * timeStamp.QuadPart); 99 | #else 100 | struct timeval tv; 101 | gettimeofday(&tv, nullptr); 102 | return 1000000 * tv.tv_sec + tv.tv_usec; 103 | #endif // _WIN32 104 | } 105 | 106 | 107 | //------------------------------------------------------------------------------ 108 | // Self-Checking Packet 109 | 110 | static void WriteRandomSelfCheckingPacket(fecal::PCGRandom& prng, void* packet, unsigned bytes) 111 | { 112 | uint8_t* buffer = (uint8_t*)packet; 113 | #ifdef TEST_DATA_ALL_SAME 114 | if (bytes != 0) 115 | #else 116 | if (bytes < 16) 117 | #endif 118 | { 119 | FECAL_DEBUG_ASSERT(bytes >= 2); 120 | buffer[0] = (uint8_t)prng.Next(); 121 | for (unsigned i = 1; i < bytes; ++i) 122 | { 123 | buffer[i] = buffer[0]; 124 | } 125 | } 126 | else 127 | { 128 | uint32_t crc = bytes; 129 | *(uint32_t*)(buffer + 4) = bytes; 130 | for (unsigned i = 8; i < bytes; ++i) 131 | { 132 | uint8_t v = (uint8_t)prng.Next(); 133 | buffer[i] = v; 134 | crc = (crc << 3) | (crc >> (32 - 3)); 135 | crc += v; 136 | } 137 | *(uint32_t*)buffer = crc; 138 | } 139 | } 140 | 141 | static bool CheckPacket(const void* packet, unsigned bytes) 142 | { 143 | uint8_t* buffer = (uint8_t*)packet; 144 | #ifdef TEST_DATA_ALL_SAME 145 | if (bytes != 0) 146 | #else 147 | if (bytes < 16) 148 | #endif 149 | { 150 | if (bytes < 2) 151 | return false; 152 | 153 | uint8_t v = buffer[0]; 154 | for (unsigned i = 1; i < bytes; ++i) 155 | { 156 | if (buffer[i] != v) 157 | return false; 158 | } 159 | } 160 | else 161 | { 162 | uint32_t crc = bytes; 163 | uint32_t readBytes = *(uint32_t*)(buffer + 4); 164 | if (readBytes != bytes) 165 | return false; 166 | for (unsigned i = 8; i < bytes; ++i) 167 | { 168 | uint8_t v = buffer[i]; 169 | crc = (crc << 3) | (crc >> (32 - 3)); 170 | crc += v; 171 | } 172 | uint32_t readCRC = *(uint32_t*)buffer; 173 | if (readCRC != crc) 174 | return false; 175 | } 176 | return true; 177 | } 178 | 179 | 180 | //------------------------------------------------------------------------------ 181 | // FunctionTimer 182 | 183 | class FunctionTimer 184 | { 185 | public: 186 | FunctionTimer(const std::string& name) 187 | { 188 | FunctionName = name; 189 | } 190 | void BeginCall() 191 | { 192 | FECAL_DEBUG_ASSERT(t0 == 0); 193 | t0 = GetTimeUsec(); 194 | } 195 | void EndCall() 196 | { 197 | FECAL_DEBUG_ASSERT(t0 != 0); 198 | uint64_t t1 = GetTimeUsec(); 199 | ++Invokations; 200 | TotalUsec += t1 - t0; 201 | t0 = 0; 202 | } 203 | void Reset() 204 | { 205 | FECAL_DEBUG_ASSERT(t0 == 0); 206 | t0 = 0; 207 | Invokations = 0; 208 | TotalUsec = 0; 209 | } 210 | void Print(unsigned trials) 211 | { 212 | cout << FunctionName << " called " << Invokations / (float)trials << " times per trial (avg). " << TotalUsec / (double)Invokations << " usec avg for all invokations. " << TotalUsec / (float)trials << " usec (avg) of " << trials << " trials" << endl; 213 | } 214 | 215 | uint64_t t0 = 0; 216 | uint64_t Invokations = 0; 217 | uint64_t TotalUsec = 0; 218 | std::string FunctionName; 219 | }; 220 | 221 | 222 | //------------------------------------------------------------------------------ 223 | // Utility: Deck Shuffling function 224 | 225 | /* 226 | Given a PRNG, generate a deck of cards in a random order. 227 | The deck will contain elements with values between 0 and count - 1. 228 | */ 229 | 230 | static void ShuffleDeck16(fecal::PCGRandom &prng, uint16_t * GF256_RESTRICT deck, uint32_t count) 231 | { 232 | deck[0] = 0; 233 | 234 | // If we can unroll 4 times, 235 | if (count <= 256) 236 | { 237 | for (uint32_t ii = 1;;) 238 | { 239 | uint32_t jj, rv = prng.Next(); 240 | 241 | // 8-bit unroll 242 | switch (count - ii) 243 | { 244 | default: 245 | jj = (uint8_t)rv % ii; 246 | deck[ii] = deck[jj]; 247 | deck[jj] = ii; 248 | ++ii; 249 | jj = (uint8_t)(rv >> 8) % ii; 250 | deck[ii] = deck[jj]; 251 | deck[jj] = ii; 252 | ++ii; 253 | jj = (uint8_t)(rv >> 16) % ii; 254 | deck[ii] = deck[jj]; 255 | deck[jj] = ii; 256 | ++ii; 257 | jj = (uint8_t)(rv >> 24) % ii; 258 | deck[ii] = deck[jj]; 259 | deck[jj] = ii; 260 | ++ii; 261 | break; 262 | 263 | case 3: 264 | jj = (uint8_t)rv % ii; 265 | deck[ii] = deck[jj]; 266 | deck[jj] = ii; 267 | ++ii; 268 | case 2: 269 | jj = (uint8_t)(rv >> 8) % ii; 270 | deck[ii] = deck[jj]; 271 | deck[jj] = ii; 272 | ++ii; 273 | case 1: 274 | jj = (uint8_t)(rv >> 16) % ii; 275 | deck[ii] = deck[jj]; 276 | deck[jj] = ii; 277 | case 0: 278 | return; 279 | } 280 | } 281 | } 282 | else 283 | { 284 | // For each deck entry, 285 | for (uint32_t ii = 1;;) 286 | { 287 | uint32_t jj, rv = prng.Next(); 288 | 289 | // 16-bit unroll 290 | switch (count - ii) 291 | { 292 | default: 293 | jj = (uint16_t)rv % ii; 294 | deck[ii] = deck[jj]; 295 | deck[jj] = ii; 296 | ++ii; 297 | jj = (uint16_t)(rv >> 16) % ii; 298 | deck[ii] = deck[jj]; 299 | deck[jj] = ii; 300 | ++ii; 301 | break; 302 | 303 | case 1: 304 | jj = (uint16_t)rv % ii; 305 | deck[ii] = deck[jj]; 306 | deck[jj] = ii; 307 | case 0: 308 | return; 309 | } 310 | } 311 | } 312 | } 313 | 314 | 315 | //------------------------------------------------------------------------------ 316 | // Tests 317 | 318 | static void BasicTest(unsigned input_count, unsigned symbol_bytes, unsigned seed = 0) 319 | { 320 | cout << "Testing performance for input_count=" << input_count << " and symbol_bytes=" << symbol_bytes << endl; 321 | 322 | static const unsigned final_bytes = symbol_bytes; 323 | 324 | for (unsigned lossCount = 1; lossCount <= input_count; ++lossCount) 325 | { 326 | const uint64_t total_bytes = (input_count - 1) * symbol_bytes + final_bytes; 327 | 328 | FunctionTimer t_fecal_encoder_create("fecal_encoder_create"); 329 | FunctionTimer t_fecal_decoder_create("fecal_decoder_create"); 330 | FunctionTimer t_fecal_encode("fecal_encode"); 331 | FunctionTimer t_fecal_decoder_add_original("fecal_decoder_add_original"); 332 | FunctionTimer t_fecal_decoder_add_recovery("fecal_decoder_add_recovery"); 333 | FunctionTimer t_fecal_decode("fecal_decode"); 334 | 335 | static const unsigned kTrials = 100; 336 | 337 | uint64_t recoveryRequired = 0; 338 | 339 | for (unsigned trial = 0; trial < kTrials; ++trial) 340 | { 341 | fecal::PCGRandom prng; 342 | prng.Seed(seed, lossCount * kTrials + trial); 343 | 344 | std::vector OriginalData((size_t)total_bytes + 1); 345 | OriginalData[total_bytes] = 0xfe; 346 | std::vector input_data(input_count); 347 | 348 | uint8_t* data_buffer = &OriginalData[0]; 349 | for (unsigned ii = 0; ii < input_count - 1; ++ii) 350 | { 351 | input_data[ii] = data_buffer; 352 | WriteRandomSelfCheckingPacket(prng, data_buffer, symbol_bytes); 353 | data_buffer += symbol_bytes; 354 | } 355 | input_data[input_count - 1] = data_buffer; 356 | WriteRandomSelfCheckingPacket(prng, data_buffer, final_bytes); 357 | 358 | t_fecal_encoder_create.BeginCall(); 359 | FecalEncoder encoder = fecal_encoder_create(input_count, &input_data[0], total_bytes); 360 | t_fecal_encoder_create.EndCall(); 361 | 362 | if (!encoder) 363 | { 364 | cout << "Error: Unable to create encoder" << endl; 365 | FECAL_DEBUG_BREAK; 366 | return; 367 | } 368 | 369 | t_fecal_decoder_create.BeginCall(); 370 | FecalDecoder decoder = fecal_decoder_create(input_count, total_bytes); 371 | t_fecal_decoder_create.EndCall(); 372 | 373 | if (!decoder) 374 | { 375 | cout << "Error: Unable to create decoder" << endl; 376 | FECAL_DEBUG_BREAK; 377 | return; 378 | } 379 | 380 | #ifndef TEST_LOSE_FIRST_K_PACKETS 381 | FECAL_DEBUG_ASSERT(input_count <= 65536); 382 | std::vector deck(input_count); 383 | ShuffleDeck16(prng, &deck[0], input_count); 384 | #endif 385 | 386 | for (unsigned i = 0; i < input_count; ++i) 387 | { 388 | bool isLost = false; 389 | #ifdef TEST_LOSE_FIRST_K_PACKETS 390 | if (i < lossCount) 391 | isLost = true; 392 | #else 393 | for (unsigned k = 0; k < lossCount; ++k) 394 | { 395 | if (i == deck[k]) 396 | { 397 | isLost = true; 398 | break; 399 | } 400 | } 401 | #endif 402 | if (isLost) 403 | continue; 404 | 405 | FecalSymbol original; 406 | original.Data = input_data[i]; 407 | original.Bytes = symbol_bytes; 408 | if (i == input_count - 1) 409 | original.Bytes = final_bytes; 410 | original.Index = i; 411 | 412 | t_fecal_decoder_add_original.BeginCall(); 413 | int result = fecal_decoder_add_original(decoder, &original); 414 | t_fecal_decoder_add_original.EndCall(); 415 | 416 | if (result) 417 | { 418 | cout << "Error: Unable to add original data to decoder. error=" << result << endl; 419 | FECAL_DEBUG_BREAK; 420 | return; 421 | } 422 | } 423 | 424 | typedef std::shared_ptr< std::vector > vecptr_t; 425 | std::list recoveryData; 426 | 427 | for (unsigned recoveryIndex = 0;; ++recoveryIndex) 428 | { 429 | vecptr_t data = std::make_shared< std::vector >(symbol_bytes); 430 | recoveryData.push_back(data); 431 | 432 | FecalSymbol recovery; 433 | recovery.Index = recoveryIndex; 434 | recovery.Data = &data->at(0); 435 | recovery.Bytes = symbol_bytes; 436 | 437 | { 438 | t_fecal_encode.BeginCall(); 439 | int result = fecal_encode(encoder, &recovery); 440 | t_fecal_encode.EndCall(); 441 | 442 | if (result) 443 | { 444 | FECAL_DEBUG_BREAK; 445 | cout << "Error: Unable to generate encoded data. error=" << result << endl; 446 | return; 447 | } 448 | } 449 | 450 | ++recoveryRequired; 451 | 452 | { 453 | t_fecal_decoder_add_recovery.BeginCall(); 454 | int result = fecal_decoder_add_recovery(decoder, &recovery); 455 | t_fecal_decoder_add_recovery.EndCall(); 456 | if (result) 457 | { 458 | cout << "Error: Unable to add recovery data to decoder. error=" << result << endl; 459 | FECAL_DEBUG_BREAK; 460 | return; 461 | } 462 | } 463 | 464 | RecoveredSymbols recovered; 465 | 466 | t_fecal_decode.BeginCall(); 467 | int decodeResult = fecal_decode(decoder, &recovered); 468 | t_fecal_decode.EndCall(); 469 | 470 | if (decodeResult == Fecal_Success) 471 | { 472 | for (unsigned i = 0; i < recovered.Count; ++i) 473 | { 474 | if (!CheckPacket( 475 | recovered.Symbols[i].Data, 476 | recovered.Symbols[i].Bytes)) 477 | { 478 | cout << "Error: Packet check failed for " << i << endl; 479 | FECAL_DEBUG_BREAK; 480 | return; 481 | } 482 | } 483 | 484 | // Decode success! 485 | break; 486 | } 487 | else if (decodeResult == Fecal_NeedMoreData) 488 | { 489 | //cout << "Needed more data to decode"); 490 | } 491 | else 492 | { 493 | cout << "Error: Decode returned " << decodeResult << endl; 494 | FECAL_DEBUG_BREAK; 495 | return; 496 | } 497 | } 498 | 499 | // Decode success! 500 | 501 | fecal_free(encoder); 502 | fecal_free(decoder); 503 | 504 | if (OriginalData[total_bytes] != 0xfe) 505 | { 506 | cout << "Error: Corruption after final symbol" << endl; 507 | FECAL_DEBUG_BREAK; 508 | return; 509 | } 510 | } 511 | 512 | float avgRecoveryRequired = recoveryRequired / (float)kTrials; 513 | 514 | #ifdef TEST_PRINT_API_TIMINGS 515 | t_fecal_encoder_create.Print(kTrials); 516 | t_fecal_encode.Print(kTrials); 517 | t_fecal_decoder_create.Print(kTrials); 518 | t_fecal_decoder_add_original.Print(kTrials); 519 | t_fecal_decoder_add_recovery.Print(kTrials); 520 | t_fecal_decode.Print(kTrials); 521 | #endif 522 | 523 | float encode_input_MBPS = total_bytes * kTrials / (float)(t_fecal_encoder_create.TotalUsec + t_fecal_encode.TotalUsec); 524 | float encode_setup_MBPS = total_bytes * kTrials / (float)t_fecal_encoder_create.TotalUsec; 525 | float encode_output_MBPS = avgRecoveryRequired * symbol_bytes * kTrials / (float)(t_fecal_encoder_create.TotalUsec + t_fecal_encode.TotalUsec); 526 | float decode_input_MBPS = total_bytes * kTrials / (float)(t_fecal_decode.TotalUsec); 527 | float decode_output_MBPS = lossCount * symbol_bytes * kTrials / (float)(t_fecal_decode.TotalUsec); 528 | 529 | //cout << "Using " << avgRecoveryRequired << " average recovery packets for " << lossCount << " losses of " << input_count << " original packets:" << endl; 530 | cout << "Encoder(" << total_bytes / 1000000.f << " MB in " << input_count << " pieces, " << lossCount << " losses): Input=" << encode_input_MBPS << " MB/s, Output=" << encode_output_MBPS << " MB/s, (Encode create: " << encode_setup_MBPS << " MB/s)" << endl; 531 | cout << "Decoder(" << total_bytes / 1000000.f << " MB in " << input_count << " pieces, " << lossCount << " losses): Input=" << decode_input_MBPS << " MB/s, Output=" << decode_output_MBPS << " MB/s, (Overhead = " << avgRecoveryRequired - lossCount << " pieces)" << endl << endl; 532 | } 533 | } 534 | 535 | 536 | //------------------------------------------------------------------------------ 537 | // Entrypoint 538 | 539 | int main(int argc, char **argv) 540 | { 541 | SetCurrentThreadPriority(); 542 | 543 | FunctionTimer t_fecal_init("fecal_init"); 544 | 545 | t_fecal_init.BeginCall(); 546 | if (0 != fecal_init()) 547 | { 548 | cout << "Failed to initialize" << endl; 549 | return -1; 550 | } 551 | t_fecal_init.EndCall(); 552 | t_fecal_init.Print(1); 553 | 554 | unsigned input_count = 200; 555 | #ifdef FECAL_DEBUG 556 | unsigned symbol_bytes = 20; 557 | #else 558 | unsigned symbol_bytes = 1300; 559 | #endif 560 | 561 | if (argc >= 2) 562 | input_count = atoi(argv[1]); 563 | if (argc >= 3) 564 | symbol_bytes = atoi(argv[2]); 565 | 566 | BasicTest(input_count, symbol_bytes); 567 | 568 | getchar(); 569 | 570 | return 0; 571 | } 572 | -------------------------------------------------------------------------------- /tests/msvc/FecalBenchmark.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | Win32 7 | 8 | 9 | Debug 10 | x64 11 | 12 | 13 | Release 14 | Win32 15 | 16 | 17 | Release 18 | x64 19 | 20 | 21 | 22 | {32176592-2F30-4BD5-B645-EB11C8D3453E} 23 | Fecal 24 | FecalBenchmark 25 | 26 | 27 | 28 | Application 29 | true 30 | MultiByte 31 | v140 32 | 33 | 34 | Application 35 | true 36 | MultiByte 37 | v140 38 | 39 | 40 | Application 41 | false 42 | true 43 | MultiByte 44 | v140 45 | 46 | 47 | Application 48 | false 49 | true 50 | MultiByte 51 | v140 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | Output/$(ProjectName)/$(Configuration)/$(Platform)/ 71 | Obj/$(ProjectName)/$(Configuration)/$(Platform)/ 72 | 73 | 74 | Output/$(ProjectName)/$(Configuration)/$(Platform)/ 75 | Obj/$(ProjectName)/$(Configuration)/$(Platform)/ 76 | 77 | 78 | Output/$(ProjectName)/$(Configuration)/$(Platform)/ 79 | Obj/$(ProjectName)/$(Configuration)/$(Platform)/ 80 | 81 | 82 | Output/$(ProjectName)/$(Configuration)/$(Platform)/ 83 | Obj/$(ProjectName)/$(Configuration)/$(Platform)/ 84 | 85 | 86 | 87 | Level3 88 | Disabled 89 | true 90 | MultiThreadedDebug 91 | _MBCS;%(PreprocessorDefinitions) 92 | 93 | 94 | true 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | Level3 106 | Disabled 107 | true 108 | MultiThreadedDebug 109 | _MBCS;%(PreprocessorDefinitions) 110 | 111 | 112 | true 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | Level3 124 | MaxSpeed 125 | true 126 | true 127 | true 128 | AnySuitable 129 | Speed 130 | false 131 | MultiThreaded 132 | true 133 | _MBCS;%(PreprocessorDefinitions) 134 | 135 | 136 | true 137 | true 138 | true 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | Level3 150 | MaxSpeed 151 | true 152 | true 153 | true 154 | AnySuitable 155 | Speed 156 | false 157 | MultiThreaded 158 | true 159 | _MBCS;%(PreprocessorDefinitions) 160 | 161 | 162 | true 163 | true 164 | true 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | {ff5912ef-7424-4974-b877-62b03d5046c6} 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | -------------------------------------------------------------------------------- /tests/msvc/FecalBenchmark.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;hm;inl;inc;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | Source Files 20 | 21 | 22 | --------------------------------------------------------------------------------