├── .gitignore ├── LICENSE.txt ├── Lokad.FasterMath.sln ├── ReadMe.md ├── bench └── Lokad.FasterMath.Bench │ ├── AllBench.cs │ ├── Lokad.FasterMath.Bench.csproj │ ├── Program.cs │ └── ReadMe.md ├── lokad.png ├── src ├── Lokad.FasterMath.Alt │ ├── Digamma.cs │ ├── Log2.cs │ ├── LogGamma.cs │ ├── Lokad.FasterMath.Alt.csproj │ └── ReadMe.md └── Lokad.FasterMath │ ├── Digamma.cs │ ├── Exp.cs │ ├── FloatIntUnion.cs │ ├── Log.cs │ ├── Log2.cs │ ├── LogGamma.cs │ ├── Lokad.FasterMath.csproj │ ├── ReadMe.md │ ├── SpanExtensions.cs │ └── SpanExtensions.tt └── test └── Lokad.FasterMath.Tests ├── DigammaTests.cs ├── ExpTests.cs ├── Log2Tests.cs ├── LogGammaTests.cs ├── LogTests.cs ├── Lokad.FasterMath.Tests.csproj └── NumericErrorExtension.cs /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/macos,linux,windows,visualstudio,visualstudiocode 3 | # Edit at https://www.gitignore.io/?templates=macos,linux,windows,visualstudio,visualstudiocode 4 | 5 | ### Linux ### 6 | *~ 7 | 8 | # temporary files which can be created if a process still has a handle open of a deleted file 9 | .fuse_hidden* 10 | 11 | # KDE directory preferences 12 | .directory 13 | 14 | # Linux trash folder which might appear on any partition or disk 15 | .Trash-* 16 | 17 | # .nfs files are created when an open file is removed but is still being accessed 18 | .nfs* 19 | 20 | ### macOS ### 21 | # General 22 | .DS_Store 23 | .AppleDouble 24 | .LSOverride 25 | 26 | # Icon must end with two \r 27 | Icon 28 | 29 | # Thumbnails 30 | ._* 31 | 32 | # Files that might appear in the root of a volume 33 | .DocumentRevisions-V100 34 | .fseventsd 35 | .Spotlight-V100 36 | .TemporaryItems 37 | .Trashes 38 | .VolumeIcon.icns 39 | .com.apple.timemachine.donotpresent 40 | 41 | # Directories potentially created on remote AFP share 42 | .AppleDB 43 | .AppleDesktop 44 | Network Trash Folder 45 | Temporary Items 46 | .apdisk 47 | 48 | ### VisualStudioCode ### 49 | .vscode/* 50 | !.vscode/settings.json 51 | !.vscode/tasks.json 52 | !.vscode/launch.json 53 | !.vscode/extensions.json 54 | 55 | ### VisualStudioCode Patch ### 56 | # Ignore all local history of files 57 | .history 58 | 59 | ### Windows ### 60 | # Windows thumbnail cache files 61 | Thumbs.db 62 | ehthumbs.db 63 | ehthumbs_vista.db 64 | 65 | # Dump file 66 | *.stackdump 67 | 68 | # Folder config file 69 | [Dd]esktop.ini 70 | 71 | # Recycle Bin used on file shares 72 | $RECYCLE.BIN/ 73 | 74 | # Windows Installer files 75 | *.cab 76 | *.msi 77 | *.msix 78 | *.msm 79 | *.msp 80 | 81 | # Windows shortcuts 82 | *.lnk 83 | 84 | ### VisualStudio ### 85 | ## Ignore Visual Studio temporary files, build results, and 86 | ## files generated by popular Visual Studio add-ons. 87 | ## 88 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 89 | 90 | # User-specific files 91 | *.rsuser 92 | *.suo 93 | *.user 94 | *.userosscache 95 | *.sln.docstates 96 | 97 | # User-specific files (MonoDevelop/Xamarin Studio) 98 | *.userprefs 99 | 100 | # Build results 101 | [Dd]ebug/ 102 | [Dd]ebugPublic/ 103 | [Rr]elease/ 104 | [Rr]eleases/ 105 | x64/ 106 | x86/ 107 | [Aa][Rr][Mm]/ 108 | [Aa][Rr][Mm]64/ 109 | bld/ 110 | [Bb]in/ 111 | [Oo]bj/ 112 | [Ll]og/ 113 | 114 | # Visual Studio 2015/2017 cache/options directory 115 | .vs/ 116 | # Uncomment if you have tasks that create the project's static files in wwwroot 117 | #wwwroot/ 118 | 119 | # Visual Studio 2017 auto generated files 120 | Generated\ Files/ 121 | 122 | # MSTest test Results 123 | [Tt]est[Rr]esult*/ 124 | [Bb]uild[Ll]og.* 125 | 126 | # NUNIT 127 | *.VisualState.xml 128 | TestResult.xml 129 | 130 | # Build Results of an ATL Project 131 | [Dd]ebugPS/ 132 | [Rr]eleasePS/ 133 | dlldata.c 134 | 135 | # Benchmark Results 136 | BenchmarkDotNet.Artifacts/ 137 | 138 | # .NET Core 139 | project.lock.json 140 | project.fragment.lock.json 141 | artifacts/ 142 | 143 | # StyleCop 144 | StyleCopReport.xml 145 | 146 | # Files built by Visual Studio 147 | *_i.c 148 | *_p.c 149 | *_h.h 150 | *.ilk 151 | *.meta 152 | *.obj 153 | *.iobj 154 | *.pch 155 | *.pdb 156 | *.ipdb 157 | *.pgc 158 | *.pgd 159 | *.rsp 160 | *.sbr 161 | *.tlb 162 | *.tli 163 | *.tlh 164 | *.tmp 165 | *.tmp_proj 166 | *_wpftmp.csproj 167 | *.log 168 | *.vspscc 169 | *.vssscc 170 | .builds 171 | *.pidb 172 | *.svclog 173 | *.scc 174 | 175 | # Chutzpah Test files 176 | _Chutzpah* 177 | 178 | # Visual C++ cache files 179 | ipch/ 180 | *.aps 181 | *.ncb 182 | *.opendb 183 | *.opensdf 184 | *.sdf 185 | *.cachefile 186 | *.VC.db 187 | *.VC.VC.opendb 188 | 189 | # Visual Studio profiler 190 | *.psess 191 | *.vsp 192 | *.vspx 193 | *.sap 194 | 195 | # Visual Studio Trace Files 196 | *.e2e 197 | 198 | # TFS 2012 Local Workspace 199 | $tf/ 200 | 201 | # Guidance Automation Toolkit 202 | *.gpState 203 | 204 | # ReSharper is a .NET coding add-in 205 | _ReSharper*/ 206 | *.[Rr]e[Ss]harper 207 | *.DotSettings.user 208 | 209 | # JustCode is a .NET coding add-in 210 | .JustCode 211 | 212 | # TeamCity is a build add-in 213 | _TeamCity* 214 | 215 | # DotCover is a Code Coverage Tool 216 | *.dotCover 217 | 218 | # AxoCover is a Code Coverage Tool 219 | .axoCover/* 220 | !.axoCover/settings.json 221 | 222 | # Visual Studio code coverage results 223 | *.coverage 224 | *.coveragexml 225 | 226 | # NCrunch 227 | _NCrunch_* 228 | .*crunch*.local.xml 229 | nCrunchTemp_* 230 | 231 | # MightyMoose 232 | *.mm.* 233 | AutoTest.Net/ 234 | 235 | # Web workbench (sass) 236 | .sass-cache/ 237 | 238 | # Installshield output folder 239 | [Ee]xpress/ 240 | 241 | # DocProject is a documentation generator add-in 242 | DocProject/buildhelp/ 243 | DocProject/Help/*.HxT 244 | DocProject/Help/*.HxC 245 | DocProject/Help/*.hhc 246 | DocProject/Help/*.hhk 247 | DocProject/Help/*.hhp 248 | DocProject/Help/Html2 249 | DocProject/Help/html 250 | 251 | # Click-Once directory 252 | publish/ 253 | 254 | # Publish Web Output 255 | *.[Pp]ublish.xml 256 | *.azurePubxml 257 | # Note: Comment the next line if you want to checkin your web deploy settings, 258 | # but database connection strings (with potential passwords) will be unencrypted 259 | *.pubxml 260 | *.publishproj 261 | 262 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 263 | # checkin your Azure Web App publish settings, but sensitive information contained 264 | # in these scripts will be unencrypted 265 | PublishScripts/ 266 | 267 | # NuGet Packages 268 | *.nupkg 269 | # The packages folder can be ignored because of Package Restore 270 | **/[Pp]ackages/* 271 | # except build/, which is used as an MSBuild target. 272 | !**/[Pp]ackages/build/ 273 | # Uncomment if necessary however generally it will be regenerated when needed 274 | #!**/[Pp]ackages/repositories.config 275 | # NuGet v3's project.json files produces more ignorable files 276 | *.nuget.props 277 | *.nuget.targets 278 | 279 | # Microsoft Azure Build Output 280 | csx/ 281 | *.build.csdef 282 | 283 | # Microsoft Azure Emulator 284 | ecf/ 285 | rcf/ 286 | 287 | # Windows Store app package directories and files 288 | AppPackages/ 289 | BundleArtifacts/ 290 | Package.StoreAssociation.xml 291 | _pkginfo.txt 292 | *.appx 293 | 294 | # Visual Studio cache files 295 | # files ending in .cache can be ignored 296 | *.[Cc]ache 297 | # but keep track of directories ending in .cache 298 | !?*.[Cc]ache/ 299 | 300 | # Others 301 | ClientBin/ 302 | ~$* 303 | *.dbmdl 304 | *.dbproj.schemaview 305 | *.jfm 306 | *.pfx 307 | *.publishsettings 308 | orleans.codegen.cs 309 | 310 | # Including strong name files can present a security risk 311 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 312 | #*.snk 313 | 314 | # Since there are multiple workflows, uncomment next line to ignore bower_components 315 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 316 | #bower_components/ 317 | # ASP.NET Core default setup: bower directory is configured as wwwroot/lib/ and bower restore is true 318 | **/wwwroot/lib/ 319 | 320 | # RIA/Silverlight projects 321 | Generated_Code/ 322 | 323 | # Backup & report files from converting an old project file 324 | # to a newer Visual Studio version. Backup files are not needed, 325 | # because we have git ;-) 326 | _UpgradeReport_Files/ 327 | Backup*/ 328 | UpgradeLog*.XML 329 | UpgradeLog*.htm 330 | ServiceFabricBackup/ 331 | *.rptproj.bak 332 | 333 | # SQL Server files 334 | *.mdf 335 | *.ldf 336 | *.ndf 337 | 338 | # Business Intelligence projects 339 | *.rdl.data 340 | *.bim.layout 341 | *.bim_*.settings 342 | *.rptproj.rsuser 343 | *- Backup*.rdl 344 | 345 | # Microsoft Fakes 346 | FakesAssemblies/ 347 | 348 | # GhostDoc plugin setting file 349 | *.GhostDoc.xml 350 | 351 | # Node.js Tools for Visual Studio 352 | .ntvs_analysis.dat 353 | node_modules/ 354 | 355 | # Visual Studio 6 build log 356 | *.plg 357 | 358 | # Visual Studio 6 workspace options file 359 | *.opt 360 | 361 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 362 | *.vbw 363 | 364 | # Visual Studio LightSwitch build output 365 | **/*.HTMLClient/GeneratedArtifacts 366 | **/*.DesktopClient/GeneratedArtifacts 367 | **/*.DesktopClient/ModelManifest.xml 368 | **/*.Server/GeneratedArtifacts 369 | **/*.Server/ModelManifest.xml 370 | _Pvt_Extensions 371 | 372 | # Paket dependency manager 373 | .paket/paket.exe 374 | paket-files/ 375 | 376 | # FAKE - F# Make 377 | .fake/ 378 | 379 | # JetBrains Rider 380 | .idea/ 381 | *.sln.iml 382 | 383 | # CodeRush personal settings 384 | .cr/personal 385 | 386 | # Python Tools for Visual Studio (PTVS) 387 | __pycache__/ 388 | *.pyc 389 | 390 | # Cake - Uncomment if you are using it 391 | # tools/** 392 | # !tools/packages.config 393 | 394 | # Tabs Studio 395 | *.tss 396 | 397 | # Telerik's JustMock configuration file 398 | *.jmconfig 399 | 400 | # BizTalk build output 401 | *.btp.cs 402 | *.btm.cs 403 | *.odx.cs 404 | *.xsd.cs 405 | 406 | # OpenCover UI analysis results 407 | OpenCover/ 408 | 409 | # Azure Stream Analytics local run output 410 | ASALocalRun/ 411 | 412 | # MSBuild Binary and Structured Log 413 | *.binlog 414 | 415 | # NVidia Nsight GPU debugger configuration file 416 | *.nvuser 417 | 418 | # MFractors (Xamarin productivity tool) working folder 419 | .mfractor/ 420 | 421 | # Local History for Visual Studio 422 | .localhistory/ 423 | 424 | # BeatPulse healthcheck temp database 425 | healthchecksdb 426 | 427 | # End of https://www.gitignore.io/api/macos,linux,windows,visualstudio,visualstudiocode 428 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Lokad 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /Lokad.FasterMath.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 16 4 | VisualStudioVersion = 16.0.29411.108 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lokad.FasterMath", "src\Lokad.FasterMath\Lokad.FasterMath.csproj", "{2F2FD74A-07F3-42EF-9B2F-A92715C8347B}" 7 | EndProject 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lokad.FasterMath.Tests", "test\Lokad.FasterMath.Tests\Lokad.FasterMath.Tests.csproj", "{22ADF60A-2CD5-4681-B2C3-F63A8DEC1FB6}" 9 | EndProject 10 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Lokad.FasterMath.Bench", "bench\Lokad.FasterMath.Bench\Lokad.FasterMath.Bench.csproj", "{D61C9F40-647F-4849-ABD7-7C5125BEA89E}" 11 | EndProject 12 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Lokad.FasterMath.Alt", "src\Lokad.FasterMath.Alt\Lokad.FasterMath.Alt.csproj", "{F0EACB1A-638F-4E62-9F2F-070C4FF720D4}" 13 | EndProject 14 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{BC85A4E5-CA98-4134-B332-65B4778746E9}" 15 | ProjectSection(SolutionItems) = preProject 16 | LICENSE.txt = LICENSE.txt 17 | ReadMe.md = ReadMe.md 18 | EndProjectSection 19 | EndProject 20 | Global 21 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 22 | Debug|Any CPU = Debug|Any CPU 23 | Release|Any CPU = Release|Any CPU 24 | EndGlobalSection 25 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 26 | {2F2FD74A-07F3-42EF-9B2F-A92715C8347B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 27 | {2F2FD74A-07F3-42EF-9B2F-A92715C8347B}.Debug|Any CPU.Build.0 = Debug|Any CPU 28 | {2F2FD74A-07F3-42EF-9B2F-A92715C8347B}.Release|Any CPU.ActiveCfg = Release|Any CPU 29 | {2F2FD74A-07F3-42EF-9B2F-A92715C8347B}.Release|Any CPU.Build.0 = Release|Any CPU 30 | {22ADF60A-2CD5-4681-B2C3-F63A8DEC1FB6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 31 | {22ADF60A-2CD5-4681-B2C3-F63A8DEC1FB6}.Debug|Any CPU.Build.0 = Debug|Any CPU 32 | {22ADF60A-2CD5-4681-B2C3-F63A8DEC1FB6}.Release|Any CPU.ActiveCfg = Release|Any CPU 33 | {22ADF60A-2CD5-4681-B2C3-F63A8DEC1FB6}.Release|Any CPU.Build.0 = Release|Any CPU 34 | {D61C9F40-647F-4849-ABD7-7C5125BEA89E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 35 | {D61C9F40-647F-4849-ABD7-7C5125BEA89E}.Debug|Any CPU.Build.0 = Debug|Any CPU 36 | {D61C9F40-647F-4849-ABD7-7C5125BEA89E}.Release|Any CPU.ActiveCfg = Release|Any CPU 37 | {D61C9F40-647F-4849-ABD7-7C5125BEA89E}.Release|Any CPU.Build.0 = Release|Any CPU 38 | {F0EACB1A-638F-4E62-9F2F-070C4FF720D4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 39 | {F0EACB1A-638F-4E62-9F2F-070C4FF720D4}.Debug|Any CPU.Build.0 = Debug|Any CPU 40 | {F0EACB1A-638F-4E62-9F2F-070C4FF720D4}.Release|Any CPU.ActiveCfg = Release|Any CPU 41 | {F0EACB1A-638F-4E62-9F2F-070C4FF720D4}.Release|Any CPU.Build.0 = Release|Any CPU 42 | EndGlobalSection 43 | GlobalSection(SolutionProperties) = preSolution 44 | HideSolutionNode = FALSE 45 | EndGlobalSection 46 | GlobalSection(ExtensibilityGlobals) = postSolution 47 | SolutionGuid = {1F78905C-CE1B-4A3F-AAB6-3E2718840CE8} 48 | EndGlobalSection 49 | EndGlobal 50 | -------------------------------------------------------------------------------- /ReadMe.md: -------------------------------------------------------------------------------- 1 | # FasterMath by Lokad 2 | 3 | > Author: Joannes Vermorel (Lokad, j.vermorel@lokad.com) 4 | 5 | This library collects a short series of faster, but approximate, mathematical 6 | functions leveraging hardware intrinsics in .NET. The library maintains a 7 | relative precision of 1e-3 for the accelerated functions. 8 | 9 | The library has _no dependency_ and does _not_ rely on memoization techniques. 10 | The goal is to make the most of the superscalar capabilities of modern CPUs, 11 | without burdening the cache or the garbage collector. 12 | 13 | _This library is licensed under the MIT license._ 14 | 15 | ## Requirements 16 | 17 | * .NET Core 3.0+ 18 | * Modern CPU with AVX2 19 | 20 | ## Getting started 21 | 22 | To install with NuGet: 23 | 24 | Install-Package Lokad.FasterMath 25 | 26 | Usage: 27 | 28 | ```csharp 29 | using Lokad.Numerics; 30 | 31 | var x = FxMath.Log(123f); // scalar 32 | ReadOnlySpan myInputs = .. ; 33 | Span myResults = .. ; 34 | FxMath.Log(myInputs, myResults); // SIMD-accelerated 35 | ``` 36 | 37 | ## Performance results 38 | 39 | _The `S8` suffix indicates a SIMD implementation with `Vector256`._ 40 | 41 | 42 | | Method | Mean | Error | StdDev | 43 | |--------------------- |-----------:|----------:|----------:| 44 | | Digamma_FxMath | 8.1156 ns | 0.1911 ns | 0.2679 ns | 45 | | Digamma_FxMath_S8 | 18.4599 ns | 0.3741 ns | 0.3674 ns | 46 | | Exp_MathF | 3.4134 ns | 0.0962 ns | 0.2210 ns | 47 | | Exp_Math | 14.5137 ns | 0.2499 ns | 0.2338 ns | 48 | | Exp_FxMath | 2.0275 ns | 0.0706 ns | 0.0918 ns | 49 | | Exp_FxMath_S8 | 4.5711 ns | 0.1207 ns | 0.2297 ns | 50 | | Log_MathF | 3.9548 ns | 0.1100 ns | 0.2528 ns | 51 | | Log_Math | 11.2905 ns | 0.1388 ns | 0.1159 ns | 52 | | Log_FxMath | 2.7824 ns | 0.0856 ns | 0.1521 ns | 53 | | Log_FxMath_S8 | 6.4883 ns | 0.1236 ns | 0.1563 ns | 54 | | Log2_MathF | 13.0202 ns | 0.2853 ns | 0.3172 ns | 55 | | Log2_Math | 17.1575 ns | 0.3730 ns | 0.7187 ns | 56 | | Log2_FxMath | 0.0444 ns | 0.0252 ns | 0.0447 ns | 57 | | LogGamma_FxMath | 15.8183 ns | 0.3346 ns | 0.3436 ns | 58 | | LogGamma_FxMath_S8 | 28.8896 ns | 0.6744 ns | 1.0891 ns | 59 | 60 | ```BenchmarkDotNet=v0.12.0, OS=Windows 10.0.18362 61 | Intel Core i9-8950HK CPU 2.90GHz (Coffee Lake), 1 CPU, 12 logical and 6 physical cores 62 | .NET Core SDK=3.0.100 63 | [Host] : .NET Core 3.0.0 (CoreCLR 4.700.19.46205, CoreFX 4.700.19.46214), X64 RyuJIT 64 | DefaultJob : .NET Core 3.0.0 (CoreCLR 4.700.19.46205, CoreFX 4.700.19.46214), X64 RyuJIT``` 65 | 66 | -------------------------------------------------------------------------------- /bench/Lokad.FasterMath.Bench/AllBench.cs: -------------------------------------------------------------------------------- 1 | using BenchmarkDotNet.Attributes; 2 | using System; 3 | using System.Runtime.Intrinsics; 4 | 5 | namespace Lokad.Numerics.Bench 6 | { 7 | [RPlotExporter] 8 | public class AllBench 9 | { 10 | public float DiGX = 4f; 11 | 12 | public Vector256 DigGX8 = Vector256.Create(4f); 13 | 14 | [Benchmark] 15 | public float Digamma_FxMath() => FxMath.Digamma(DiGX); 16 | 17 | [Benchmark] 18 | public Vector256 Digamma_FxMath_S8() => FxMath.Digamma(DigGX8); 19 | 20 | 21 | 22 | public float ExpX = 0.1f; 23 | 24 | public Vector256 X8 = Vector256.Create(0.1f); 25 | 26 | [Benchmark] 27 | public float Exp_MathF() => MathF.Exp(ExpX); 28 | 29 | [Benchmark] 30 | public float Exp_Math() => (float)Math.Exp(ExpX); 31 | 32 | [Benchmark] 33 | public float Exp_FxMath() => FxMath.Exp(ExpX); 34 | 35 | [Benchmark] 36 | public Vector256 Exp_FxMath_S8() => FxMath.Exp(X8); 37 | 38 | 39 | public float LogX = 0.1f; 40 | 41 | public Vector256 LogX8 = Vector256.Create(0.1f); 42 | 43 | [Benchmark] 44 | public float Log_MathF() => MathF.Log(LogX); 45 | 46 | [Benchmark] 47 | public float Log_Math() => (float)Math.Log(LogX); 48 | 49 | [Benchmark] 50 | public float Log_FxMath() => FxMath.Log(LogX); 51 | 52 | [Benchmark] 53 | public Vector256 Log_FxMath_S8() => FxMath.Log(LogX8); 54 | 55 | 56 | public float Log2X = 123; 57 | 58 | [Benchmark] 59 | public uint Log2_MathF() => (uint)MathF.Log(Log2X, 2.0f); 60 | 61 | [Benchmark] 62 | public uint Log2_Math() => (uint)Math.Log(Log2X, 2.0); 63 | 64 | [Benchmark] 65 | public uint Log2_FxMath() => FxMath.Log2(123); 66 | 67 | 68 | 69 | public float LogGX = 0.1f; 70 | 71 | public Vector256 LogGX8 = Vector256.Create(0.1f); 72 | 73 | [Benchmark] 74 | public float LogGamma_FxMath() => FxMath.LogGamma(LogGX); 75 | 76 | [Benchmark] 77 | public Vector256 LogGamma_FxMath_S8() => FxMath.LogGamma(LogGX8); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /bench/Lokad.FasterMath.Bench/Lokad.FasterMath.Bench.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | Exe 5 | netcoreapp3.0 6 | Lokad.Numerics.Bench 7 | 8 | Lokad 9 | Copyright © Lokad 2019 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /bench/Lokad.FasterMath.Bench/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Runtime.Intrinsics; 3 | using BenchmarkDotNet.Attributes; 4 | using BenchmarkDotNet.Running; 5 | 6 | namespace Lokad.Numerics.Bench 7 | { 8 | public class Program 9 | { 10 | public static void Main(string[] args) 11 | { 12 | //var summary = BenchmarkRunner.Run(); 13 | //var summary = BenchmarkRunner.Run(); 14 | //var summary = BenchmarkRunner.Run(); 15 | //var summary = BenchmarkRunner.Run(); 16 | //var summary = BenchmarkRunner.Run(); 17 | 18 | var summary = BenchmarkRunner.Run(); 19 | } 20 | } 21 | 22 | [RPlotExporter] 23 | public class DigammaBench 24 | { 25 | public float DiGX = 4f; 26 | 27 | public Vector256 DigGX8 = Vector256.Create(4f); 28 | 29 | [Benchmark] 30 | public float Digamma_FxMath() => FxMath.Digamma(DiGX); 31 | 32 | [Benchmark] 33 | public Vector256 Digamma_FxMath_S8() => FxMath.Digamma(DigGX8); 34 | 35 | [Benchmark] 36 | public double Digamma_AltMath() => AltMath.Digamma(DiGX); 37 | } 38 | 39 | [RPlotExporter] 40 | public class ExpBench 41 | { 42 | public float ExpX = 0.1f; 43 | 44 | public Vector256 X8 = Vector256.Create(0.1f); 45 | 46 | [Benchmark] 47 | public float Exp_MathF() => MathF.Exp(ExpX); 48 | 49 | [Benchmark] 50 | public float Exp_Math() => (float)Math.Exp(ExpX); 51 | 52 | [Benchmark] 53 | public float Exp_FxMath() => FxMath.Exp(ExpX); 54 | 55 | [Benchmark] 56 | public Vector256 Exp_FxMath_S8() => FxMath.Exp(X8); 57 | } 58 | 59 | [RPlotExporter] 60 | public class LogBench 61 | { 62 | public float LogX = 0.1f; 63 | 64 | public Vector256 LogX8 = Vector256.Create(0.1f); 65 | 66 | [Benchmark] 67 | public float Log_MathF() => MathF.Log(LogX); 68 | 69 | [Benchmark] 70 | public float Log_Math() => (float)Math.Log(LogX); 71 | 72 | [Benchmark] 73 | public float Log_FxMath() => FxMath.Log(LogX); 74 | 75 | [Benchmark] 76 | public Vector256 Log_FxMath_S8() => FxMath.Log(LogX8); 77 | } 78 | 79 | [RPlotExporter] 80 | public class Log2Bench 81 | { 82 | public float Log2X = 123; 83 | 84 | [Benchmark] 85 | public uint Log2_MathF() => (uint)MathF.Log(Log2X, 2.0f); 86 | 87 | [Benchmark] 88 | public uint Log2_Math() => (uint)Math.Log(Log2X, 2.0); 89 | 90 | [Benchmark] 91 | public uint Log2_FxMath() => FxMath.Log2(123); 92 | 93 | [Benchmark] 94 | public uint Log2_AltMath_WithLookup() => AltMath.Log2(123); 95 | } 96 | 97 | [RPlotExporter] 98 | public class LogGammaBench 99 | { 100 | public float LogGX = 0.1f; 101 | 102 | public Vector256 LogGX8 = Vector256.Create(0.1f); 103 | 104 | [Benchmark] 105 | public float LogGamma_FxMath() => FxMath.LogGamma(LogGX); 106 | 107 | [Benchmark] 108 | public Vector256 LogGamma_FxMath_S8() => FxMath.LogGamma(LogGX8); 109 | 110 | [Benchmark] 111 | public double LogGamma_AltMath() => AltMath.LogGamma(LogGX); 112 | } 113 | } -------------------------------------------------------------------------------- /bench/Lokad.FasterMath.Bench/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Lokad.FasterMath.Bench 2 | 3 | Utility console app intended to collect performance metrics. 4 | 5 | See https://benchmarkdotnet.org/ 6 | -------------------------------------------------------------------------------- /lokad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lokad/FasterMath/00370396d8099107e1ba3a4b0882fcd87bc1e004/lokad.png -------------------------------------------------------------------------------- /src/Lokad.FasterMath.Alt/Digamma.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace Lokad.Numerics 4 | { 5 | public partial class AltMath 6 | { 7 | // Source: 8 | // https://github.com/mathnet/mathnet-numerics/blob/master/src/Numerics/SpecialFunctions/Gamma.cs 9 | 10 | /// 11 | /// Computes the Digamma function which is mathematically defined as the derivative of the logarithm of the gamma function. 12 | /// This implementation is based on 13 | /// Jose Bernardo 14 | /// Algorithm AS 103: 15 | /// Psi ( Digamma ) Function, 16 | /// Applied Statistics, 17 | /// Volume 25, Number 3, 1976, pages 315-317. 18 | /// Using the modifications as in Tom Minka's lightspeed toolbox. 19 | /// 20 | /// 21 | /// Intended as a reference implementation, precision-wise. 22 | /// 23 | public static double Digamma(double x) 24 | { 25 | const double c = 12.0; 26 | const double d1 = -0.57721566490153286; 27 | const double d2 = 1.6449340668482264365; 28 | const double s = 1e-6; 29 | const double s3 = 1.0 / 12.0; 30 | const double s4 = 1.0 / 120.0; 31 | const double s5 = 1.0 / 252.0; 32 | const double s6 = 1.0 / 240.0; 33 | const double s7 = 1.0 / 132.0; 34 | 35 | if (x <= 0) 36 | return double.NaN; 37 | 38 | if (x <= s) 39 | { 40 | return d1 - (1 / x) + (d2 * x); 41 | } 42 | 43 | double result = 0; 44 | while (x < c) 45 | { 46 | result -= 1 / x; 47 | x++; 48 | } 49 | 50 | if (x >= c) 51 | { 52 | var r = 1 / x; 53 | result += Math.Log(x) - (0.5 * r); 54 | r *= r; 55 | 56 | result -= r * (s3 - (r * (s4 - (r * (s5 - (r * (s6 - (r * s7)))))))); 57 | } 58 | 59 | return result; 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/Lokad.FasterMath.Alt/Log2.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Diagnostics; 3 | 4 | namespace Lokad.Numerics 5 | { 6 | public partial class AltMath 7 | { 8 | /// Log2 on zero is undefined. 9 | static readonly uint[] Log2Tab = { 10 | uint.MaxValue, 0, 58, 1, 59, 47, 53, 2, 11 | 60, 39, 48, 27, 54, 33, 42, 3, 12 | 61, 51, 37, 40, 49, 18, 28, 20, 13 | 55, 30, 34, 11, 43, 14, 22, 4, 14 | 62, 57, 46, 52, 38, 26, 32, 41, 15 | 50, 36, 17, 19, 29, 10, 13, 21, 16 | 56, 45, 25, 31, 35, 16, 9, 12, 17 | 44, 24, 15, 8, 23, 7, 6, 5}; 18 | 19 | public static uint Log2(ulong value) 20 | { 21 | Debug.Assert(value > 0); 22 | 23 | // TODO: [vermorel] probably to be moved to Lokad.Numerics with an intrinsic 24 | // https://stackoverflow.com/questions/11376288/fast-computing-of-log2-for-64-bit-integers 25 | 26 | value |= value >> 1; 27 | value |= value >> 2; 28 | value |= value >> 4; 29 | value |= value >> 8; 30 | value |= value >> 16; 31 | value |= value >> 32; 32 | return Log2Tab[(ulong)((value - (value >> 1)) * 0x07EDD5E59A4E28C2) >> 58]; 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/Lokad.FasterMath.Alt/LogGamma.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace Lokad.Numerics 4 | { 5 | public partial class AltMath 6 | { 7 | // Source: 8 | // https://github.com/mathnet/mathnet-numerics/blob/master/src/Numerics/SpecialFunctions/Gamma.cs 9 | 10 | /// 11 | /// The order of the approximation. 12 | /// 13 | const int GammaN = 10; 14 | 15 | /// 16 | /// Auxiliary variable when evaluating the function. 17 | /// 18 | const double GammaR = 10.900511; 19 | 20 | /// The number log[e](pi) 21 | public const double LnPi = 1.1447298858494001741434273513530587116472948129153d; 22 | 23 | /// The number log(2 * sqrt(e / pi)) 24 | public const double LogTwoSqrtEOverPi = 0.6207822376352452223455184457816472122518527279025978; 25 | 26 | /// 27 | /// Polynomial coefficients for the approximation. 28 | /// 29 | static readonly double[] GammaDk = 30 | { 31 | 2.48574089138753565546e-5, 32 | 1.05142378581721974210, 33 | -3.45687097222016235469, 34 | 4.51227709466894823700, 35 | -2.98285225323576655721, 36 | 1.05639711577126713077, 37 | -1.95428773191645869583e-1, 38 | 1.70970543404441224307e-2, 39 | -5.71926117404305781283e-4, 40 | 4.63399473359905636708e-6, 41 | -2.71994908488607703910e-9 42 | }; 43 | 44 | /// 45 | /// Computes the logarithm of the Gamma function. 46 | /// 47 | /// The argument of the gamma function. 48 | /// The logarithm of the gamma function. 49 | /// 50 | /// This implementation of the computation of the gamma and logarithm of the gamma function follows the derivation in 51 | /// "An Analysis Of The Lanczos Gamma Approximation", Glendon Ralph Pugh, 2004. 52 | /// We use the implementation listed on p. 116 which achieves an accuracy of 16 floating point digits. Although 16 digit accuracy 53 | /// should be sufficient for double values, improving accuracy is possible (see p. 126 in Pugh). 54 | /// Our unit tests suggest that the accuracy of the Gamma function is correct up to 14 floating point digits. 55 | /// 56 | public static double LogGamma(double z) 57 | { 58 | if (z < 0.5) 59 | { 60 | double s = GammaDk[0]; 61 | for (int i = 1; i <= GammaN; i++) 62 | { 63 | s += GammaDk[i] / (i - z); 64 | } 65 | 66 | return LnPi 67 | - Math.Log(Math.Sin(Math.PI * z)) 68 | - Math.Log(s) 69 | - LogTwoSqrtEOverPi 70 | - ((0.5 - z) * Math.Log((0.5 - z + GammaR) / Math.E)); 71 | } 72 | else 73 | { 74 | double s = GammaDk[0]; 75 | for (int i = 1; i <= GammaN; i++) 76 | { 77 | s += GammaDk[i] / (z + i - 1.0); 78 | } 79 | 80 | return Math.Log(s) 81 | + LogTwoSqrtEOverPi 82 | + ((z - 0.5) * Math.Log((z - 0.5 + GammaR) / Math.E)); 83 | } 84 | } 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /src/Lokad.FasterMath.Alt/Lokad.FasterMath.Alt.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netcoreapp3.0 5 | Lokad.Numerics 6 | 7 | Lokad 8 | Copyright © Lokad 2019 9 | 10 | 11 | 12 | AnyCPU 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/Lokad.FasterMath.Alt/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Lokad.FasterMath.Alt 2 | 3 | Alternative implementations, typically slower than hardware-accelerated ones. 4 | This library is introduced as a container for numerical recipees that are of 5 | interest, but kept seperated to avoid any confusion between high-performance 6 | implementations and their variants (notably reference implementations). -------------------------------------------------------------------------------- /src/Lokad.FasterMath/Digamma.cs: -------------------------------------------------------------------------------- 1 | using System.Diagnostics; 2 | using System.Runtime.Intrinsics; 3 | using System.Runtime.Intrinsics.X86; 4 | 5 | namespace Lokad.Numerics 6 | { 7 | public partial class FxMath 8 | { 9 | // Source: 10 | // https://github.com/mathnet/mathnet-numerics/blob/master/src/Numerics/SpecialFunctions/Gamma.cs 11 | 12 | /// 13 | /// Computes the Digamma function which is mathematically defined as the derivative 14 | /// of the logarithm of the gamma function. 15 | /// 16 | public static float Digamma(float x) 17 | { 18 | Debug.Assert(x > 0); 19 | 20 | const float s3 = 1.0f / 12.0f; 21 | const float s4 = 1.0f / 120.0f; 22 | const float s5 = 1.0f / 252.0f; 23 | const float s6 = 1.0f / 240.0f; 24 | const float s7 = 1.0f / 132.0f; 25 | 26 | // branch-free, to allow SIMD-variants 27 | 28 | float result = 0; 29 | for(var i = 0; i < 3; i++) // 3 tuned as a tradeoff performance-vs-precision 30 | { 31 | result -= 1 / x; 32 | x++; 33 | } 34 | 35 | var r = 1 / x; 36 | result += Log(x) - (0.5f * r); 37 | r *= r; 38 | 39 | result -= r * (s3 - (r * (s4 - (r * (s5 - (r * (s6 - (r * s7)))))))); 40 | 41 | return result; 42 | } 43 | 44 | public static Vector256 Digamma(Vector256 x) 45 | { 46 | const float s3 = 1.0f / 12.0f; 47 | const float s4 = 1.0f / 120.0f; 48 | const float s5 = 1.0f / 252.0f; 49 | const float s6 = 1.0f / 240.0f; 50 | const float s7 = 1.0f / 132.0f; 51 | const float half = 0.5f; 52 | const float one = 1f; 53 | 54 | var vone = Vector256.Create(one); 55 | 56 | // note: 'Reciprocal' does not yield the same numerical results than 'Divide(1f, x)' 57 | 58 | var result = Vector256.Zero; 59 | for(var i = 0; i < 3; i++) 60 | { 61 | result = Avx.Subtract(result, Avx.Divide(vone, x)); 62 | x = Avx2.Add(x, vone); 63 | } 64 | 65 | var r = Avx.Divide(vone, x); 66 | result = Avx.Add(result, Avx.Subtract(Log(x), Avx.Multiply(Vector256.Create(half), r))); 67 | r = Avx.Multiply(r, r); 68 | 69 | result = Avx.Subtract(result, 70 | Avx.Multiply(r, 71 | Avx.Subtract(Vector256.Create(s3), 72 | Avx.Multiply(r, 73 | Avx.Subtract(Vector256.Create(s4), 74 | Avx.Multiply(r, 75 | Avx.Subtract(Vector256.Create(s5), 76 | Avx.Multiply(r, 77 | Avx.Subtract(Vector256.Create(s6), 78 | Avx.Multiply(r, Vector256.Create(s7))))))))))); 79 | 80 | return result; 81 | } 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/Lokad.FasterMath/Exp.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Runtime.Intrinsics; 3 | using System.Runtime.Intrinsics.X86; 4 | 5 | namespace Lokad.Numerics 6 | { 7 | public partial class FxMath 8 | { 9 | // Source: 10 | // https://stackoverflow.com/questions/47025373/fastest-implementation-of-exponential-function-using-sse 11 | 12 | ///* max. rel. error <= 1.72886892e-3 on [-87.33654, 88.72283] */ 13 | //__m128 fast_exp_sse(__m128 x) 14 | //{ 15 | // __m128 f, p, r; 16 | // __m128i t, j; 17 | // const __m128 a = _mm_set1_ps(12102203.0f); /* (1 << 23) / log(2) */ 18 | // const __m128i m = _mm_set1_epi32(0xff800000); /* mask for integer bits */ 19 | // const __m128 ttm23 = _mm_set1_ps(1.1920929e-7f); /* exp2(-23) */ 20 | // const __m128 c0 = _mm_set1_ps(0.3371894346f); 21 | // const __m128 c1 = _mm_set1_ps(0.657636276f); 22 | // const __m128 c2 = _mm_set1_ps(1.00172476f); 23 | 24 | // t = _mm_cvtps_epi32(_mm_mul_ps(a, x)); 25 | // j = _mm_and_si128(t, m); /* j = (int)(floor (x/log(2))) << 23 */ 26 | // t = _mm_sub_epi32(t, j); 27 | // f = _mm_mul_ps(ttm23, _mm_cvtepi32_ps(t)); /* f = (x/log(2)) - floor (x/log(2)) */ 28 | // p = c0; /* c0 */ 29 | // p = _mm_mul_ps(p, f); /* c0 * f */ 30 | // p = _mm_add_ps(p, c1); /* c0 * f + c1 */ 31 | // p = _mm_mul_ps(p, f); /* (c0 * f + c1) * f */ 32 | // p = _mm_add_ps(p, c2); /* p = (c0 * f + c1) * f + c2 ~= 2^f */ 33 | // r = _mm_castsi128_ps(_mm_add_epi32(j, _mm_castps_si128(p))); /* r = p * 2^i*/ 34 | // return r; 35 | //} 36 | 37 | /// 38 | /// Max. relative error <= 1.72886892e-3 on [-87.33654, 88.72283]. 39 | /// 40 | public static float Exp(float x) 41 | { 42 | float f, p; 43 | int t, j; 44 | FloatIntUnion r; 45 | 46 | const float a = 12102203.0f; /* (1 << 23) / log(2) */ 47 | const int m = unchecked((int)0xff800000); /* mask for integer bits */ 48 | const float ttm23 = 1.1920929e-7f; /* exp2(-23) */ 49 | const float c0 = 0.3371894346f; 50 | const float c1 = 0.657636276f; 51 | const float c2 = 1.00172476f; 52 | 53 | t = (int)(a * x); 54 | j = t & m; /* j = (int)(floor (x/log(2))) << 23 */ 55 | t = t - j; 56 | f = ttm23 * t; /* f = (x/log(2)) - floor (x/log(2)) */ 57 | p = c0; /* c0 */ 58 | p = p * f; /* c0 * f */ 59 | p = p + c1; /* c0 * f + c1 */ 60 | p = p * f; /* (c0 * f + c1) * f */ 61 | p = p + c2; /* p = (c0 * f + c1) * f + c2 ~= 2^f */ 62 | 63 | r.Int = 0; // HACK: work-around compiler error 64 | r.Float = p; 65 | r.Int = j + r.Int; /* r = p * 2^i*/ 66 | return r.Float; 67 | } 68 | 69 | /// 70 | /// Max. relative error <= 1.72886892e-3 on [-87.33654, 88.72283]. 71 | /// 72 | /// 73 | /// | Method | Mean | Error | StdDev | 74 | /// |----------------- |----------:|----------:|----------:| 75 | /// | Exp_System_MathF | 3.369 ns | 0.1301 ns | 0.1866 ns | 76 | /// | Exp_System_Math | 14.355 ns | 0.2615 ns | 0.2183 ns | 77 | /// | Exp_FxMath | 4.104 ns | 0.1024 ns | 0.0957 ns | 78 | /// 79 | public static Vector256 Exp(Vector256 x) 80 | { 81 | Vector256 f, p, r; 82 | Vector256 t, j; 83 | 84 | // According to BenchmarkDotNet, isolating all the constants up-front 85 | // yield nearly 10% speed-up. 86 | 87 | const float a = 12102203.0f; /* (1 << 23) / log(2) */ 88 | const int m = unchecked((int)0xff800000); /* mask for integer bits */ 89 | const float ttm23 = 1.1920929e-7f; /* exp2(-23) */ 90 | const float c0 = 0.3371894346f; 91 | const float c1 = 0.657636276f; 92 | const float c2 = 1.00172476f; 93 | 94 | t = Avx2.ConvertToVector256Int32(Avx2.Multiply(Vector256.Create(a), x)); 95 | j = Avx2.And(t, Vector256.Create(m)); /* j = (int)(floor (x/log(2))) << 23 */ 96 | t = Avx2.Subtract(t, j); 97 | f = Avx2.Multiply(Vector256.Create(ttm23), Avx2.ConvertToVector256Single(t)); /* f = (x/log(2)) - floor (x/log(2)) */ 98 | p = Vector256.Create(c0); /* c0 */ 99 | p = Avx2.Multiply(p, f); /* c0 * f */ 100 | p = Avx2.Add(p, Vector256.Create(c1)); /* c0 * f + c1 */ 101 | p = Avx2.Multiply(p, f); /* (c0 * f + c1) * f */ 102 | p = Avx2.Add(p, Vector256.Create(c2)); /* p = (c0 * f + c1) * f + c2 ~= 2^f */ 103 | 104 | r = Avx2.Add(j, p.As()).As(); /* r = p * 2^i*/ 105 | return r; 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/Lokad.FasterMath/FloatIntUnion.cs: -------------------------------------------------------------------------------- 1 | using System.Runtime.InteropServices; 2 | 3 | namespace Lokad.Numerics 4 | { 5 | /// Helper intended for quick-cast float into int (and vice-versa). 6 | [StructLayout(LayoutKind.Explicit, Size = 4)] 7 | internal struct FloatIntUnion 8 | { 9 | [FieldOffset(0)] 10 | public float Float; 11 | 12 | [FieldOffset(0)] 13 | public int Int; 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /src/Lokad.FasterMath/Log.cs: -------------------------------------------------------------------------------- 1 | using System.Runtime.Intrinsics; 2 | using System.Runtime.Intrinsics.X86; 3 | 4 | namespace Lokad.Numerics 5 | { 6 | public partial class FxMath 7 | { 8 | // Source: 9 | // https://github.com/jhjourdan/SIMD-math-prims/blob/master/simd_math_prims.h 10 | 11 | ///* Absolute error bounded by 1e-6 for normalized inputs 12 | // Returns a finite number for +inf input 13 | // Returns -inf for nan and <= 0 inputs. 14 | // Continuous error. */ 15 | //inline float logapprox(float val) { 16 | // union { float f; int32_t i; } valu; 17 | // float exp, addcst, x; 18 | // valu.f = val; 19 | // exp = valu.i >> 23; 20 | // /* -89.970756366f = -127 * log(2) + constant term of polynomial below. */ 21 | // addcst = val > 0 ? -89.970756366f : -(float)INFINITY; 22 | // valu.i = (valu.i & 0x7FFFFF) | 0x3F800000; 23 | // x = valu.f; 24 | 25 | 26 | // /* Generated in Sollya using: 27 | // > f = remez(log(x)-(x-1)*log(2), 28 | // [|1,(x-1)*(x-2), (x-1)*(x-2)*x, (x-1)*(x-2)*x*x, 29 | // (x-1)*(x-2)*x*x*x|], [1,2], 1, 1e-8); 30 | // > plot(f+(x-1)*log(2)-log(x), [1,2]); 31 | // > f+(x-1)*log(2) 32 | // */ 33 | // return 34 | // x * (3.529304993f + x * (-2.461222105f + x * (1.130626167f + 35 | // x * (-0.288739945f + x * 3.110401639e-2f)))) 36 | // + (addcst + 0.6931471805f*exp); 37 | //} 38 | 39 | public static float Log(float x) 40 | { 41 | float exp, addcst, val; 42 | FloatIntUnion valu; 43 | 44 | valu.Int = 0; // HACK: work-around compiler error 45 | valu.Float = x; 46 | exp = valu.Int >> 23; 47 | 48 | addcst = x > 0 ? -89.970756366f : float.NaN; 49 | 50 | valu.Int = (valu.Int & 0x7FFFFF) | 0x3F800000; 51 | val = valu.Float; 52 | 53 | return val * (3.529304993f + 54 | val * (-2.461222105f + 55 | val * (1.130626167f + 56 | val * (-0.288739945f + 57 | val * 3.110401639e-2f)))) 58 | + (addcst + 0.6931471805f * exp); 59 | } 60 | 61 | /// 62 | /// Absolute error bounded by 1e-4. 63 | /// 64 | public static Vector256 Log(Vector256 x) 65 | { 66 | Vector256 exp, addcst, val; 67 | 68 | exp = Avx2.ConvertToVector256Single(Avx2.ShiftRightArithmetic(x.As(), 23)); 69 | 70 | // According to BenchmarkDotNet, isolating all the constants up-front 71 | // yield nearly 10% speed-up. 72 | 73 | const float bf0 = -89.970756366f; 74 | const float bf1 = float.NaN; // behavior of MathF.Log() on negative numbers 75 | const float bf2 = 3.529304993f; 76 | const float bf3 = -2.461222105f; 77 | const float bf4 = 1.130626167f; 78 | const float bf5 = -0.288739945f; 79 | const float bf6 = 3.110401639e-2f; 80 | const float bf7 = 0.6931471805f; 81 | 82 | const int bi0 = 0x7FFFFF; 83 | const int bi1 = 0x3F800000; 84 | 85 | //addcst = val > 0 ? -89.970756366f : -(float)INFINITY; 86 | 87 | addcst = Avx.BlendVariable(Vector256.Create(bf0), 88 | Vector256.Create(bf1), 89 | Avx.Compare(x, Vector256.Zero, FloatComparisonMode.OrderedLessThanNonSignaling)); 90 | 91 | val = Avx2.Or(Avx2.And( 92 | x.As(), 93 | Vector256.Create(bi0)), 94 | Vector256.Create(bi1)).As(); 95 | 96 | /* x * (3.529304993f + 97 | x * (-2.461222105f + 98 | x * (1.130626167f + 99 | x * (-0.288739945f + 100 | x * 3.110401639e-2f)))) 101 | + (addcst + 0.6931471805f*exp); */ 102 | 103 | return Avx2.Add( 104 | Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf2), 105 | Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf3), 106 | Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf4), 107 | Avx2.Multiply(val, Avx2.Add(Vector256.Create(bf5), 108 | Avx2.Multiply(val, Vector256.Create(bf6)))))))))), 109 | Avx.Add(addcst, 110 | Avx2.Multiply(Vector256.Create(bf7), exp))); 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /src/Lokad.FasterMath/Log2.cs: -------------------------------------------------------------------------------- 1 | using System.Diagnostics; 2 | using System.Runtime.Intrinsics.X86; 3 | 4 | namespace Lokad.Numerics 5 | { 6 | public partial class FxMath 7 | { 8 | public static uint Log2(uint value) 9 | { 10 | Debug.Assert(value > 0); 11 | 12 | return 32 - 1 - Lzcnt.LeadingZeroCount(value); 13 | } 14 | 15 | public static ulong Log2(ulong value) 16 | { 17 | Debug.Assert(value > 0); 18 | 19 | return 64 - 1 - Lzcnt.X64.LeadingZeroCount(value); 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/Lokad.FasterMath/LogGamma.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Diagnostics; 3 | using System.Runtime.Intrinsics; 4 | using System.Runtime.Intrinsics.X86; 5 | 6 | namespace Lokad.Numerics 7 | { 8 | public partial class FxMath 9 | { 10 | private static readonly double Ln2Pi_2 = Math.Log(2.0 * Math.PI) / 2.0; 11 | 12 | /// 13 | /// Log Gama Function continued fractions NIST Handbook of Mathematical Functions see: 14 | /// https://univ.jeanpaulcalvi.com/Posters/ConfAuchWeb/abramovitz2.pdf 15 | /// 16 | public static float LogGamma(float x) 17 | { 18 | Debug.Assert(x > 0); 19 | 20 | // Numeric shift to improve accuracy (cost +1 'Log'). 21 | // logGamma(x) = logGamma(x + 1) - log(x) 22 | var result = -Log(x); 23 | x += 1f; 24 | 25 | const float Ln2Pi_2 = 0.91893853320467274178032f; 26 | 27 | // A & S eq. 6.1.48 (continuing fraction) 28 | const float a0 = (float)(1.0 / 12); 29 | const float a1 = (float)(1.0 / 30); 30 | const float a2 = (float)(53.0 / 210); 31 | const float a3 = (float)(195.0 / 371); 32 | const float a4 = (float)(22999.0 / 22737); 33 | const float a5 = (float)(29944523.0 / 19733142); 34 | const float a6 = (float)(109535241009.0 / 48264275462); 35 | 36 | var t6 = a6 / x; 37 | var t5 = a5 / (x + t6); 38 | var t4 = a4 / (x + t5); 39 | var t3 = a3 / (x + t4); 40 | var t2 = a2 / (x + t3); 41 | var t1 = a1 / (x + t2); 42 | var t0 = a0 / (x + t1); 43 | 44 | result += t0 - x + (x - 0.5f) * Log(x) + Ln2Pi_2; 45 | 46 | return result; 47 | } 48 | 49 | public static Vector256 LogGamma(Vector256 x) 50 | { 51 | const float one = 1f; 52 | const float half = 0.5f; 53 | 54 | // Numeric shift to improve accuracy (cost +1 'Log'). 55 | // logGamma(x) = logGamma(x + 1) - log(x) 56 | var result = Log(x); 57 | x = Avx.Add(x, Vector256.Create(one)); 58 | 59 | const float Ln2Pi_2 = 0.91893853320467274178032f; 60 | 61 | // A & S eq. 6.1.48 (continuing fraction) 62 | const float a0 = (float)(1.0 / 12); 63 | const float a1 = (float)(1.0 / 30); 64 | const float a2 = (float)(53.0 / 210); 65 | const float a3 = (float)(195.0 / 371); 66 | const float a4 = (float)(22999.0 / 22737); 67 | const float a5 = (float)(29944523.0 / 19733142); 68 | const float a6 = (float)(109535241009.0 / 48264275462); 69 | 70 | var t6 = Avx.Divide(Vector256.Create(a6), x); 71 | var t5 = Avx.Divide(Vector256.Create(a5), Avx.Add(x, t6)); 72 | var t4 = Avx.Divide(Vector256.Create(a4), Avx.Add(x, t5)); 73 | var t3 = Avx.Divide(Vector256.Create(a3), Avx.Add(x, t4)); 74 | var t2 = Avx.Divide(Vector256.Create(a2), Avx.Add(x, t3)); 75 | var t1 = Avx.Divide(Vector256.Create(a1), Avx.Add(x, t2)); 76 | var t0 = Avx.Divide(Vector256.Create(a0), Avx.Add(x, t1)); 77 | 78 | result = Avx.Subtract( 79 | Avx.Add( 80 | Avx.Add(Avx.Subtract(t0, x), Avx.Multiply(Avx.Subtract(x, Vector256.Create(half)), Log(x))), 81 | Vector256.Create(Ln2Pi_2)), 82 | result); 83 | 84 | return result; 85 | } 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/Lokad.FasterMath/Lokad.FasterMath.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | netcoreapp3.0 5 | Lokad.Numerics 6 | 7 | Lokad 8 | Copyright © Lokad 2019 9 | 10 | 0.1.0.0 11 | 0.1.0.0 12 | Lokad.FasterMath 13 | 0.1.0.0 14 | true 15 | Lokad 16 | Intrinsics accelerated math functions for .NET Core - trading accuracy for performance. 17 | https://github.com/Lokad/FasterMath/blob/master/LICENSE.txt 18 | https://github.com/Lokad/FasterMath 19 | https://raw.githubusercontent.com/Lokad/FasterMath/master/lokad.png 20 | 0.1.0 21 | 22 | 23 | 24 | false 25 | false 26 | AnyCPU 27 | 28 | 29 | 30 | false 31 | AnyCPU 32 | 33 | 34 | 35 | 36 | True 37 | True 38 | SpanExtensions.tt 39 | 40 | 41 | 42 | 43 | 44 | TextTemplatingFileGenerator 45 | SpanExtensions.cs 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | True 56 | True 57 | SpanExtensions.tt 58 | 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /src/Lokad.FasterMath/ReadMe.md: -------------------------------------------------------------------------------- 1 | # Lokad.FasterMath 2 | 3 | High-performance low-precision mathematical operations in .NET 4 | leveraging hardware intrinsics. 5 | 6 | ## Design considerations 7 | 8 | The scalar and super-scalar variants must be numerically 9 | identical. Indeed, when considering hybrid scenarios, client 10 | code should not be "accidentally" correct by leveraging the 11 | scalar flavor over the super-scalar one. 12 | -------------------------------------------------------------------------------- /src/Lokad.FasterMath/SpanExtensions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Runtime.InteropServices; 3 | using System.Runtime.Intrinsics; 4 | 5 | namespace Lokad.Numerics 6 | { 7 | public partial class FxMath 8 | { 9 | public static void Digamma(ReadOnlySpan values, Span results) 10 | { 11 | var v = MemoryMarshal.Cast>(values); 12 | var r = MemoryMarshal.Cast>(results); 13 | 14 | for (var i = 0; i < r.Length; i++) 15 | r[i] = Digamma(v[i]); 16 | 17 | for (var i = r.Length * Vector256.Count; i < results.Length; i++) 18 | results[i] = Digamma(values[i]); 19 | } 20 | 21 | public static void Exp(ReadOnlySpan values, Span results) 22 | { 23 | var v = MemoryMarshal.Cast>(values); 24 | var r = MemoryMarshal.Cast>(results); 25 | 26 | for (var i = 0; i < r.Length; i++) 27 | r[i] = Exp(v[i]); 28 | 29 | for (var i = r.Length * Vector256.Count; i < results.Length; i++) 30 | results[i] = Exp(values[i]); 31 | } 32 | 33 | public static void Log(ReadOnlySpan values, Span results) 34 | { 35 | var v = MemoryMarshal.Cast>(values); 36 | var r = MemoryMarshal.Cast>(results); 37 | 38 | for (var i = 0; i < r.Length; i++) 39 | r[i] = Log(v[i]); 40 | 41 | for (var i = r.Length * Vector256.Count; i < results.Length; i++) 42 | results[i] = Log(values[i]); 43 | } 44 | 45 | public static void LogGamma(ReadOnlySpan values, Span results) 46 | { 47 | var v = MemoryMarshal.Cast>(values); 48 | var r = MemoryMarshal.Cast>(results); 49 | 50 | for (var i = 0; i < r.Length; i++) 51 | r[i] = LogGamma(v[i]); 52 | 53 | for (var i = r.Length * Vector256.Count; i < results.Length; i++) 54 | results[i] = LogGamma(values[i]); 55 | } 56 | 57 | } 58 | } -------------------------------------------------------------------------------- /src/Lokad.FasterMath/SpanExtensions.tt: -------------------------------------------------------------------------------- 1 | <#@ template debug="true" hostspecific="true" language="C#" #> 2 | <#@ output extension=".cs" #> 3 | using System; 4 | using System.Runtime.InteropServices; 5 | using System.Runtime.Intrinsics; 6 | 7 | namespace Lokad.Numerics 8 | { 9 | public partial class FxMath 10 | { 11 | <# 12 | var functions = new [] { "Digamma", "Exp", "Log", "LogGamma" }; 13 | foreach(var f in functions) 14 | { 15 | #> 16 | public static void <#= f #>(ReadOnlySpan values, Span results) 17 | { 18 | var v = MemoryMarshal.Cast>(values); 19 | var r = MemoryMarshal.Cast>(results); 20 | 21 | for (var i = 0; i < r.Length; i++) 22 | r[i] = <#= f #>(v[i]); 23 | 24 | for (var i = r.Length * Vector256.Count; i < results.Length; i++) 25 | results[i] = <#= f #>(values[i]); 26 | } 27 | 28 | <# 29 | } 30 | #> 31 | } 32 | } -------------------------------------------------------------------------------- /test/Lokad.FasterMath.Tests/DigammaTests.cs: -------------------------------------------------------------------------------- 1 | using System.Runtime.Intrinsics; 2 | using Xunit; 3 | 4 | namespace Lokad.Numerics.Tests 5 | { 6 | public class DigammaTests 7 | { 8 | [Fact] 9 | public void Digamma_float_limited() 10 | { 11 | // Source: https://www.wolframalpha.com/ (2019-02-11) 12 | var results = new[] { 13 | (0.1f, -10.4237549f), 14 | (0.8f, -0.965008567f), 15 | (1.5f, 0.03648997f), 16 | (15f, 2.67434666166f), 17 | (150f, 5.0072982570f) 18 | }; 19 | 20 | foreach(var (val, expected) in results) 21 | { 22 | var r = FxMath.Digamma(val); 23 | Assert.True(expected.RelError(r) < 1e-3f); 24 | } 25 | } 26 | 27 | [Fact] 28 | public void Digamma_float() 29 | { 30 | for(var i = 0.001; i < 10000d; i *= 1.2) 31 | { 32 | var expected = (float)AltMath.Digamma(i); 33 | var r = FxMath.Digamma((float)i); 34 | Assert.True(expected.RelError(r) < 1e-3f); 35 | } 36 | } 37 | 38 | [Fact] 39 | public void Digamma_Vector256() 40 | { 41 | for (var i = 0.001; i < 10000d; i *= 1.2) 42 | { 43 | var r = FxMath.Digamma(Vector256.Create((float)i)); 44 | var expected = FxMath.Digamma((float)i); 45 | 46 | for (var k = 0; k < 8; k++) 47 | { 48 | Assert.Equal(expected, r.GetElement(k)); 49 | } 50 | } 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /test/Lokad.FasterMath.Tests/ExpTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Runtime.Intrinsics; 3 | using Xunit; 4 | 5 | namespace Lokad.Numerics.Tests 6 | { 7 | public class ExpTests 8 | { 9 | [Fact] 10 | public void Exp_float() 11 | { 12 | for (float i = -85; i < 85; i += 0.1f) 13 | { 14 | var r = FxMath.Exp(i); 15 | Assert.True(MathF.Exp(i).RelError(r) <= 1.72886892e-3f); 16 | } 17 | } 18 | 19 | [Fact] 20 | public void Exp_Vector256() 21 | { 22 | for (float i = -85; i < 85; i += 0.1f) 23 | { 24 | var r = FxMath.Exp(Vector256.Create((float)i)); 25 | var expected = FxMath.Exp((float)i); 26 | 27 | for (var k = 0; k < 8; k++) 28 | { 29 | // HACK: [vermore] can't reproduce perfect numerical identity 30 | // between scalar and SIMD variants (very close though) 31 | Assert.True(expected.AbsError(r.GetElement(k)) <= 1e-6f); 32 | //Assert.Equal(expected, r.GetElement(k)); 33 | } 34 | } 35 | } 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /test/Lokad.FasterMath.Tests/Log2Tests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using Xunit; 3 | 4 | namespace Lokad.Numerics.Tests 5 | { 6 | public class Log2Tests 7 | { 8 | [Fact] 9 | public void Log2_uint() 10 | { 11 | for(var i = 1.0; i < uint.MaxValue; i *= 1.3) 12 | { 13 | var n = (uint)i; 14 | Assert.Equal((uint)Math.Log(n, 2.0), FxMath.Log2(n)); 15 | } 16 | } 17 | 18 | [Fact] 19 | public void Log2_ulong() 20 | { 21 | for (var i = 1.0; i < uint.MaxValue; i *= 1.3) 22 | { 23 | var n = (ulong)i; 24 | Assert.Equal((ulong)Math.Log(n, 2.0), FxMath.Log2(n)); 25 | } 26 | } 27 | 28 | [Fact] 29 | public void Log2_WithLookup() 30 | { 31 | for (var i = 1.0; i < uint.MaxValue; i *= 1.3) 32 | { 33 | var n = (ulong)i; 34 | Assert.Equal((uint)Math.Log(n, 2.0), AltMath.Log2(n)); 35 | } 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /test/Lokad.FasterMath.Tests/LogGammaTests.cs: -------------------------------------------------------------------------------- 1 | using System.Runtime.Intrinsics; 2 | using Xunit; 3 | 4 | namespace Lokad.Numerics.Tests 5 | { 6 | public class LogGammaTests 7 | { 8 | [Fact] 9 | public void LogGamma_float_limited() 10 | { 11 | // Source: https://www.wolframalpha.com/ (2019-02-11) 12 | var results = new[] { 13 | (0.1f, 2.252712651f), 14 | (0.8f, 0.152060f), 15 | (1.5f, -0.120782f), 16 | (15f, 25.191221f), 17 | (150f, 600.0094705f) 18 | }; 19 | 20 | foreach (var (val, expected) in results) 21 | { 22 | var r = FxMath.LogGamma(val); 23 | Assert.True(expected.RelError(r) < 1e-4f); 24 | } 25 | } 26 | 27 | [Fact] 28 | public void LogGamma_float() 29 | { 30 | for (var i = 0.001; i < 10000d; i *= 1.2) 31 | { 32 | var expected = (float)AltMath.LogGamma(i); 33 | var r = FxMath.LogGamma((float)i); 34 | Assert.True(expected.RelError(r) < 1e-3f); 35 | } 36 | } 37 | 38 | [Fact] 39 | public void LogGamma_Vector256() 40 | { 41 | for (var i = 0.001; i < 10000d; i *= 1.2) 42 | { 43 | var r = FxMath.LogGamma(Vector256.Create((float)i)); 44 | var expected = FxMath.LogGamma((float)i); 45 | 46 | for (var k = 0; k < 8; k++) 47 | { 48 | Assert.Equal(expected, r.GetElement(k)); 49 | } 50 | } 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /test/Lokad.FasterMath.Tests/LogTests.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Linq; 4 | using System.Runtime.Intrinsics; 5 | using Xunit; 6 | 7 | namespace Lokad.Numerics.Tests 8 | { 9 | public class LogTests 10 | { 11 | [Fact] 12 | public void Log_float() 13 | { 14 | for (float i = -1; i < 85; i += 0.1f) 15 | { 16 | var r = FxMath.Log(i); 17 | Assert.True(MathF.Log(i).AbsError(r) <= 1e-4f); 18 | } 19 | } 20 | 21 | [Fact] 22 | public void Log_Vector256() 23 | { 24 | for (float i = -1; i < 85; i += 0.1f) 25 | { 26 | var r = FxMath.Log(Vector256.Create((float)i)); 27 | var expected = FxMath.Log((float)i); 28 | 29 | for (var k = 0; k < 8; k++) 30 | { 31 | Assert.Equal(expected, r.GetElement(k)); 32 | } 33 | } 34 | } 35 | 36 | [Fact] 37 | public void Log_Span() 38 | { 39 | var pairs = new List<(float, float)>(); 40 | for (float i = -1; i < 85; i += 0.1f) 41 | { 42 | pairs.Add((i, FxMath.Log(i))); 43 | } 44 | 45 | var inputs = pairs.Select(tu => tu.Item1).ToArray(); 46 | var results = new float[pairs.Count]; 47 | 48 | FxMath.Log(inputs, results); 49 | 50 | for(var i = 0; i < inputs.Length; i++) 51 | { 52 | Assert.Equal(pairs[i].Item2, results[i]); 53 | } 54 | } 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /test/Lokad.FasterMath.Tests/Lokad.FasterMath.Tests.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | netcoreapp3.0 5 | 6 | Exe 7 | 8 | Lokad.Numerics.Tests 9 | 10 | Lokad 11 | Copyright © Lokad 2019 12 | 13 | 14 | 15 | false 16 | AnyCPU 17 | 18 | 19 | 20 | false 21 | AnyCPU 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /test/Lokad.FasterMath.Tests/NumericErrorExtension.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | 3 | namespace Lokad.Numerics.Tests 4 | { 5 | public static class NumericErrorExtension 6 | { 7 | public static float RelError(this float baseline, float other) 8 | { 9 | if (float.IsNaN(baseline) && float.IsNaN(other)) 10 | return 0f; 11 | 12 | return Math.Abs(baseline - other) / baseline; 13 | } 14 | 15 | public static float AbsError(this float baseline, float other) 16 | { 17 | if (float.IsNaN(baseline) && float.IsNaN(other)) 18 | return 0f; 19 | 20 | return Math.Abs(baseline - other); 21 | } 22 | } 23 | } 24 | --------------------------------------------------------------------------------