├── .gitattributes ├── .gitignore ├── .paket ├── paket.bootstrapper.exe └── paket.targets ├── Angara.Statistics.sln ├── LICENSE.txt ├── README.md ├── RELEASE_NOTES.md ├── SECURITY.md ├── paket.dependencies ├── paket.lock ├── src └── Angara.Statistics │ ├── Angara.Statistics.fsproj │ ├── AssemblyInfo.fs │ ├── Filzbach.fs │ ├── Statistics.fs │ ├── paket.references │ └── paket.template └── tests └── Angara.Statistics.Tests ├── Angara.Statistics.Tests.fsproj ├── App.config ├── FilzbachTests.fs ├── SerializationTests.fs ├── Tests.fs └── paket.references /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp text=auto eol=lf 6 | *.vb diff=csharp text=auto eol=lf 7 | *.fs diff=csharp text=auto eol=lf 8 | *.fsi diff=csharp text=auto eol=lf 9 | *.fsx diff=csharp text=auto eol=lf 10 | *.sln text eol=crlf merge=union 11 | *.csproj merge=union 12 | *.vbproj merge=union 13 | *.fsproj merge=union 14 | *.dbproj merge=union 15 | 16 | # Standard to msysgit 17 | *.doc diff=astextplain 18 | *.DOC diff=astextplain 19 | *.docx diff=astextplain 20 | *.DOCX diff=astextplain 21 | *.dot diff=astextplain 22 | *.DOT diff=astextplain 23 | *.pdf diff=astextplain 24 | *.PDF diff=astextplain 25 | *.rtf diff=astextplain 26 | *.RTF diff=astextplain 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | 4 | # User-specific files 5 | *.suo 6 | *.user 7 | *.sln.docstates 8 | 9 | # Xamarin Studio / monodevelop user-specific 10 | *.userprefs 11 | *.dll.mdb 12 | *.exe.mdb 13 | 14 | # Build results 15 | 16 | [Dd]ebug/ 17 | [Rr]elease/ 18 | x64/ 19 | build/ 20 | [Bb]in/ 21 | [Oo]bj/ 22 | 23 | # MSTest test Results 24 | [Tt]est[Rr]esult*/ 25 | [Bb]uild[Ll]og.* 26 | 27 | *_i.c 28 | *_p.c 29 | *.ilk 30 | *.meta 31 | *.obj 32 | *.pch 33 | *.pdb 34 | *.pgc 35 | *.pgd 36 | *.rsp 37 | *.sbr 38 | *.tlb 39 | *.tli 40 | *.tlh 41 | *.tmp 42 | *.tmp_proj 43 | *.log 44 | *.vspscc 45 | *.vssscc 46 | .builds 47 | *.pidb 48 | *.log 49 | *.scc 50 | 51 | # Visual C++ cache files 52 | ipch/ 53 | *.aps 54 | *.ncb 55 | *.opensdf 56 | *.sdf 57 | *.cachefile 58 | 59 | # Visual Studio profiler 60 | *.psess 61 | *.vsp 62 | *.vspx 63 | 64 | # Other Visual Studio data 65 | .vs/ 66 | 67 | # Guidance Automation Toolkit 68 | *.gpState 69 | 70 | # ReSharper is a .NET coding add-in 71 | _ReSharper*/ 72 | *.[Rr]e[Ss]harper 73 | 74 | # TeamCity is a build add-in 75 | _TeamCity* 76 | 77 | # DotCover is a Code Coverage Tool 78 | *.dotCover 79 | 80 | # NCrunch 81 | *.ncrunch* 82 | .*crunch*.local.xml 83 | 84 | # Installshield output folder 85 | [Ee]xpress/ 86 | 87 | # DocProject is a documentation generator add-in 88 | DocProject/buildhelp/ 89 | DocProject/Help/*.HxT 90 | DocProject/Help/*.HxC 91 | DocProject/Help/*.hhc 92 | DocProject/Help/*.hhk 93 | DocProject/Help/*.hhp 94 | DocProject/Help/Html2 95 | DocProject/Help/html 96 | 97 | # Click-Once directory 98 | publish/ 99 | 100 | # Publish Web Output 101 | *.Publish.xml 102 | 103 | # Enable nuget.exe in the .nuget folder (though normally executables are not tracked) 104 | !.nuget/NuGet.exe 105 | 106 | # Windows Azure Build Output 107 | csx 108 | *.build.csdef 109 | 110 | # Windows Store app package directory 111 | AppPackages/ 112 | 113 | # Others 114 | sql/ 115 | *.Cache 116 | ClientBin/ 117 | [Ss]tyle[Cc]op.* 118 | ~$* 119 | *~ 120 | *.dbmdl 121 | *.[Pp]ublish.xml 122 | *.pfx 123 | *.publishsettings 124 | 125 | # RIA/Silverlight projects 126 | Generated_Code/ 127 | 128 | # Backup & report files from converting an old project file to a newer 129 | # Visual Studio version. Backup files are not needed, because we have git ;-) 130 | _UpgradeReport_Files/ 131 | Backup*/ 132 | UpgradeLog*.XML 133 | UpgradeLog*.htm 134 | 135 | # SQL Server files 136 | App_Data/*.mdf 137 | App_Data/*.ldf 138 | 139 | 140 | #LightSwitch generated files 141 | GeneratedArtifacts/ 142 | _Pvt_Extensions/ 143 | ModelManifest.xml 144 | 145 | # ========================= 146 | # Windows detritus 147 | # ========================= 148 | 149 | # Windows image file caches 150 | Thumbs.db 151 | ehthumbs.db 152 | 153 | # Folder config file 154 | Desktop.ini 155 | 156 | # Recycle Bin used on file shares 157 | $RECYCLE.BIN/ 158 | 159 | # Mac desktop service store files 160 | .DS_Store 161 | 162 | # =================================================== 163 | # Exclude F# project specific directories and files 164 | # =================================================== 165 | 166 | # NuGet Packages Directory 167 | packages/ 168 | 169 | # Generated documentation folder 170 | docs/output/ 171 | 172 | # Temp folder used for publishing docs 173 | temp/ 174 | 175 | # Test results produced by build 176 | TestResults.xml 177 | 178 | # Nuget outputs 179 | nuget/*.nupkg 180 | release.cmd 181 | release.sh 182 | localpackages/ 183 | paket-files 184 | *.orig 185 | .paket/paket.exe 186 | docs/content/license.md 187 | docs/content/release-notes.md 188 | .fake 189 | docs/tools/FSharp.Formatting.svclog 190 | -------------------------------------------------------------------------------- /.paket/paket.bootstrapper.exe: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Angara.Statistics/e6b8194efcdb9e7ca3fe2ff5c5335f4b83176f9a/.paket/paket.bootstrapper.exe -------------------------------------------------------------------------------- /.paket/paket.targets: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | true 6 | 7 | true 8 | $(MSBuildThisFileDirectory) 9 | $(MSBuildThisFileDirectory)..\ 10 | 11 | 12 | 13 | $(PaketToolsPath)paket.exe 14 | $(PaketToolsPath)paket.bootstrapper.exe 15 | "$(PaketExePath)" 16 | mono --runtime=v4.0.30319 "$(PaketExePath)" 17 | "$(PaketBootStrapperExePath)" 18 | mono --runtime=v4.0.30319 $(PaketBootStrapperExePath) 19 | 20 | $(PaketCommand) restore 21 | $(PaketBootStrapperCommand) 22 | 23 | RestorePackages; $(BuildDependsOn); 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /Angara.Statistics.sln: -------------------------------------------------------------------------------- 1 | Microsoft Visual Studio Solution File, Format Version 12.00 2 | # Visual Studio 14 3 | VisualStudioVersion = 14.0.24720.0 4 | MinimumVisualStudioVersion = 10.0.40219.1 5 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = ".paket", ".paket", "{63297B98-5CED-492C-A5B7-A5B4F73CF142}" 6 | ProjectSection(SolutionItems) = preProject 7 | paket.dependencies = paket.dependencies 8 | paket.lock = paket.lock 9 | EndProjectSection 10 | EndProject 11 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Angara.Statistics", "src\Angara.Statistics\Angara.Statistics.fsproj", "{5161430D-44B4-441C-BE95-89C02F215D38}" 12 | EndProject 13 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "project", "project", "{BF60BC93-E09B-4E5F-9D85-95A519479D54}" 14 | ProjectSection(SolutionItems) = preProject 15 | build.fsx = build.fsx 16 | README.md = README.md 17 | RELEASE_NOTES.md = RELEASE_NOTES.md 18 | EndProjectSection 19 | EndProject 20 | Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tests", "tests", "{ED8079DD-2B06-4030-9F0F-DC548F98E1C4}" 21 | EndProject 22 | Project("{F2A71F9B-5D33-465A-A702-920D77279786}") = "Angara.Statistics.Tests", "tests\Angara.Statistics.Tests\Angara.Statistics.Tests.fsproj", "{D5620A55-AA34-4DE7-A970-B2168A06D28D}" 23 | EndProject 24 | Global 25 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 26 | Debug|Any CPU = Debug|Any CPU 27 | Release|Any CPU = Release|Any CPU 28 | EndGlobalSection 29 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 30 | {5161430D-44B4-441C-BE95-89C02F215D38}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 31 | {5161430D-44B4-441C-BE95-89C02F215D38}.Debug|Any CPU.Build.0 = Debug|Any CPU 32 | {5161430D-44B4-441C-BE95-89C02F215D38}.Release|Any CPU.ActiveCfg = Release|Any CPU 33 | {5161430D-44B4-441C-BE95-89C02F215D38}.Release|Any CPU.Build.0 = Release|Any CPU 34 | {D5620A55-AA34-4DE7-A970-B2168A06D28D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 35 | {D5620A55-AA34-4DE7-A970-B2168A06D28D}.Debug|Any CPU.Build.0 = Debug|Any CPU 36 | {D5620A55-AA34-4DE7-A970-B2168A06D28D}.Release|Any CPU.ActiveCfg = Release|Any CPU 37 | {D5620A55-AA34-4DE7-A970-B2168A06D28D}.Release|Any CPU.Build.0 = Release|Any CPU 38 | EndGlobalSection 39 | GlobalSection(SolutionProperties) = preSolution 40 | HideSolutionNode = FALSE 41 | EndGlobalSection 42 | GlobalSection(NestedProjects) = preSolution 43 | {D5620A55-AA34-4DE7-A970-B2168A06D28D} = {ED8079DD-2B06-4030-9F0F-DC548F98E1C4} 44 | EndGlobalSection 45 | EndGlobal 46 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Angara.Statistics 2 | 3 | Copyright (c) 2016 Microsoft Corporation 4 | 5 | All rights reserved. 6 | 7 | MIT License 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in 17 | all copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 25 | THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Issue Stats](http://issuestats.com/github/microsoft/Angara.Statistics/badge/issue)](http://issuestats.com/github/microsoft/Angara.Statistics) 2 | [![Issue Stats](http://issuestats.com/github/microsoft/Angara.Statistics/badge/pr)](http://issuestats.com/github/microsoft/Angara.Statistics) 3 | 4 | Angara.Statistics 5 | ================= 6 | 7 | A collection of essential algorithms for Bayesian data constrained modelling. 8 | Includes Mersenne twister random number generator, common probability distributions, 9 | sampling statistics and quantiles, a kernel density estimator and 10 | a resumable Metropolis-Hastings MCMC sampler. 11 | 12 | Documentation: http://predictionmachines.github.io/Angara.Statistics 13 | 14 | 15 | ## Build Status 16 | 17 | Mono | .NET 18 | ---- | ---- 19 | [![Mono CI Build Status](https://img.shields.io/travis/predictionmachines/Angara.Statistics/master.svg)](https://travis-ci.org/predictionmachines/Angara.Statistics) | [![.NET Build Status](https://img.shields.io/appveyor/ci/vassilyl/angara-statistics/master.svg)](https://ci.appveyor.com/project/vassilyl/angara-statistics) 20 | 21 | ## Maintainer(s) 22 | 23 | - [@vassilyl](https://github.com/vassilyl) 24 | - [@msdvoits](https://github.com/msdvoits) 25 | 26 | -------------------------------------------------------------------------------- /RELEASE_NOTES.md: -------------------------------------------------------------------------------- 1 | ### 0.1.5 2 | 3 | * Fixed a bug in Gamma random number generator. 4 | * Changed parameterization of Exponential distribution. 5 | * Added logit and logistic functions. 6 | * Added fromPiecewise helper method to build Mixture distribution 7 | * Renamed SamplerResult to SamplerCheckpoint and added burnInTrace to the structure 8 | * Changed signature of continuemcmc 9 | 10 | ### 0.1.4 - 2016-04-20 11 | 12 | * Sampler state can be serialized to continue unfinished computation. 13 | * Fixed Mersenne twister copy constructor 14 | 15 | ### 0.1.3 - 2016-04-12 16 | 17 | * Change layout of the repository to match [ProjectScaffold](http://fsprojects.github.io/ProjectScaffold/) recommendation. 18 | * Fix bisection algorithm of `ridders`. 19 | * Add documentation 20 | * Switch back to .NET Framework 4.5.2 21 | 22 | ### 0.1.2 - 2016-03-11 23 | 24 | * `prior` field of `ParameterDefinition` structure is a function instead of a Distribution. 25 | * Redesign of `Parameters.Add` overloads. 26 | * Switch to .NET Framework 4.6.1 27 | 28 | ### 0.1.1 - 2016-02-17 29 | 30 | Initial NuGet release. 31 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Security 4 | 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/). 6 | 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://aka.ms/opensource/security/definition), please report it to us as described below. 8 | 9 | ## Reporting Security Issues 10 | 11 | **Please do not report security vulnerabilities through public GitHub issues.** 12 | 13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://aka.ms/opensource/security/create-report). 14 | 15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com). If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://aka.ms/opensource/security/pgpkey). 16 | 17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://aka.ms/opensource/security/msrc). 18 | 19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 20 | 21 | * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 22 | * Full paths of source file(s) related to the manifestation of the issue 23 | * The location of the affected source code (tag/branch/commit or direct URL) 24 | * Any special configuration required to reproduce the issue 25 | * Step-by-step instructions to reproduce the issue 26 | * Proof-of-concept or exploit code (if possible) 27 | * Impact of the issue, including how an attacker might exploit the issue 28 | 29 | This information will help us triage your report more quickly. 30 | 31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://aka.ms/opensource/security/bounty) page for more details about our active programs. 32 | 33 | ## Preferred Languages 34 | 35 | We prefer all communications to be in English. 36 | 37 | ## Policy 38 | 39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://aka.ms/opensource/security/cvd). 40 | 41 | 42 | -------------------------------------------------------------------------------- /paket.dependencies: -------------------------------------------------------------------------------- 1 | source https://nuget.org/api/v2 2 | nuget Angara.Serialization 3 | 4 | group Test 5 | source https://nuget.org/api/v2 6 | 7 | nuget Angara.Serialization.Json 8 | nuget NUnit ~> 2 9 | nuget NUnit.Runners ~> 2 10 | nuget FsUnit 11 | nuget unquote -------------------------------------------------------------------------------- /paket.lock: -------------------------------------------------------------------------------- 1 | NUGET 2 | remote: https://www.nuget.org/api/v2 3 | specs: 4 | Angara.Serialization (0.2) 5 | 6 | GROUP Test 7 | NUGET 8 | remote: https://www.nuget.org/api/v2 9 | specs: 10 | Angara.Serialization (0.2) 11 | Angara.Serialization.Json (0.2) 12 | Angara.Serialization (>= 0.2) 13 | Newtonsoft.Json (>= 8.0.3) 14 | FSharp.Core (4.0.0.1) 15 | FsUnit (1.4.1) 16 | FSharp.Core (>= 3.1.2.5) 17 | NUnit (2.6.4) 18 | Newtonsoft.Json (8.0.3) 19 | NUnit (2.6.4) 20 | NUnit.Runners (2.6.4) 21 | Unquote (3.1.1) 22 | -------------------------------------------------------------------------------- /src/Angara.Statistics/Angara.Statistics.fsproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | 2.0 8 | 5161430d-44b4-441c-be95-89c02f215d38 9 | Library 10 | Angara.Statistics 11 | Angara.Statistics 12 | v4.5.2 13 | 4.4.0.0 14 | Angara.Statistics 15 | 16 | 17 | 18 | true 19 | full 20 | false 21 | false 22 | .\bin\Debug 23 | DEBUG;TRACE 24 | 3 25 | --warnon:1182 26 | 27 | 28 | pdbonly 29 | true 30 | true 31 | .\bin\Release 32 | TRACE 33 | 3 34 | .\bin\Release\Angara.Statistics.xml 35 | --warnon:1182 36 | 37 | 38 | 39 | 40 | True 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 11 55 | 56 | 57 | 58 | 59 | $(MSBuildExtensionsPath32)\..\Microsoft SDKs\F#\3.0\Framework\v4.0\Microsoft.FSharp.Targets 60 | 61 | 62 | 63 | 64 | $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\FSharp\Microsoft.FSharp.Targets 65 | 66 | 67 | 68 | 69 | 76 | 77 | 78 | 79 | 80 | 81 | ..\..\packages\Angara.Serialization\lib\net452\Angara.Serialization.dll 82 | True 83 | True 84 | 85 | 86 | 87 | 88 | -------------------------------------------------------------------------------- /src/Angara.Statistics/AssemblyInfo.fs: -------------------------------------------------------------------------------- 1 | namespace System 2 | open System.Reflection 3 | 4 | [] 5 | [] 6 | [] 7 | [] 8 | [] 9 | do () 10 | 11 | module internal AssemblyVersionInformation = 12 | let [] Version = "0.1.3" 13 | let [] InformationalVersion = "0.1.3" 14 | -------------------------------------------------------------------------------- /src/Angara.Statistics/Filzbach.fs: -------------------------------------------------------------------------------- 1 | module Angara.Filzbach 2 | open Angara.Statistics 3 | 4 | [][] 5 | type ParameterDefinition = 6 | { 7 | /// An index of the parameter in a parameter values array. 8 | index: int 9 | /// A number of values. For vector parameters 'size>1` 10 | size: int 11 | /// A lower bound of parameter values. 12 | lower: float 13 | /// An upper bound of parameter values. 14 | upper: float 15 | /// Prior probability distribution of the parameter; all elements of a vector parameter reuse the same prior. 16 | prior: Distribution 17 | /// When `isLog=true` the sampler transforms the parameter to logarithmic space. 18 | isLog: bool 19 | /// A preset log_pdf of prior distribution; if isLog, then the first argument of prior is log-parameter. 20 | /// This field automatically gets value from `prior`. 21 | log_priordf: float -> float 22 | /// If `delay<1`, the sampler initializes the parameter value with a random number. 23 | /// If `delay=1`, the sampler starts with the value from the definition record. 24 | /// If `delay>1`, the sampler doesn't change the parameter value for the first 'delay' iterations. 25 | delay: int 26 | } 27 | member x.isFixed = x.delay = System.Int32.MaxValue 28 | override x.Equals other = 29 | match other with 30 | | :? ParameterDefinition as y -> 31 | y.index = x.index && y.size = x.size && y.lower = x.lower && x.upper = x.upper 32 | && y.isLog = x.isLog && y.delay = x.delay && y.prior = x.prior 33 | | _ -> false 34 | override x.GetHashCode() = 35 | x.index.GetHashCode() ^^^ x.size.GetHashCode() ^^^ x.lower.GetHashCode() ^^^ x.upper.GetHashCode() 36 | ^^^ x.isLog.GetHashCode() ^^^ x.delay.GetHashCode() ^^^ x.prior.GetHashCode() 37 | 38 | type IParameters = System.Collections.Generic.IReadOnlyDictionary 39 | 40 | /// A container for model parameters. 41 | type Parameters private (pdefs: Map, pvalues: float[]) = 42 | static let defaultLog isLog = defaultArg isLog false 43 | static let defaultDelay delay = defaultArg delay 0 44 | static let uniformPrior _ = 0. 45 | static let log_priordf is_log prior = 46 | match is_log, prior with 47 | | false, Uniform(_,_) 48 | | true, LogUniform(_,_) -> uniformPrior 49 | | false, Normal(m,s) -> fun x -> let d = (x-m)/s in 0.5*d*d 50 | | true, LogNormal(m,s) -> let log_m = log m in fun x -> let d = (x-log_m)/s in 0.5*d*d 51 | | false, _ -> log_pdf prior 52 | | true, _ -> fun x -> log_pdf prior (exp x) 53 | let avalue pdef = Array.sub pvalues pdef.index pdef.size 54 | let all = pdefs |> Seq.map (fun kv -> 55 | System.Collections.Generic.KeyValuePair(kv.Key, avalue kv.Value)) 56 | do 57 | assert(Array.length pvalues = Map.fold (fun sum _ def -> sum + def.size) 0 pdefs) 58 | static member Empty = Parameters(Map.empty,[||]) 59 | member internal x.definitions = pdefs 60 | member internal x.values = pvalues 61 | 62 | /// Adds a parameter to the container. 63 | /// Parameter name. 64 | /// An array of one or more parameter values. 65 | /// Lower bound of parameter values. 66 | /// Upper bound of parameter values. 67 | /// Sampler behaviour: <1 (default) -- initialize values with random numbers; 68 | /// =1 -- start with the values supplied in the call; 69 | /// >1 -- release the parameter values after 'delay' iterations. 70 | /// If true, the sampler will use logarithmic transform for the parameter. 71 | /// The default is false. 72 | /// Prior knowledge. The default is a non-informative prior. 73 | member x.Add(name, values, lower, upper, ?delay, ?isLog, ?prior) = 74 | if name = null || name = "" || Map.containsKey name pdefs then 75 | invalidArg "name" "parameters must have unique non-empty names." 76 | if Array.length values < 1 then 77 | invalidArg "values" "empty values array, each parameter must have at least one value." 78 | let prior' = defaultArg prior (Uniform(lower, upper)) 79 | let lower' = match prior' with Uniform(a,_)|LogUniform(a,_) -> max a lower | _ -> lower 80 | let upper' = match prior' with Uniform(_,b)|LogUniform(_,b) -> min b upper | _ -> upper 81 | if lower' > upper' then 82 | invalidArg "lower" (sprintf "lower %g must not be greater than upper %g." lower' upper') 83 | values |> Array.iteri (fun i v -> 84 | if v < lower' || v > upper' then 85 | invalidArg "values" (sprintf "values[%d] is out of [lower..upper] range" i)) 86 | let isLog' = defaultLog isLog 87 | if isLog' && lower' <= 0. then 88 | invalidArg "lower" "lower must be >0 because isLog=true." 89 | Parameters( 90 | pdefs |> Map.add name { 91 | index = Array.length pvalues 92 | size = Array.length values 93 | lower = lower' 94 | upper = upper' 95 | isLog = isLog' 96 | delay = (if lower=upper then System.Int32.MaxValue else defaultDelay delay) 97 | prior = prior' 98 | log_priordf = log_priordf isLog' prior' 99 | }, 100 | Array.append pvalues values) 101 | /// Add a fixed scalar parameter. 102 | member x.Add(name, value) = x.Add(name, [|value|], value, value, System.Int32.MaxValue) 103 | /// Add a fixed vector parameter. 104 | member x.Add(name, values) = x.Add(name, values, Array.min values, Array.max values, System.Int32.MaxValue) 105 | /// Add a parameter with a prior 106 | member x.Add(name, prior, ?size) = 107 | let theSize = defaultArg size 1 108 | if theSize<1 then invalidArg "size" "vector parameter cannot have size < 1" 109 | match prior with 110 | | Uniform(lower, upper) -> 111 | if upper // isLog = true 115 | if lower <= 0. then invalidArg "prior" "in LogUniform prior lower must be > 0" else 116 | if upper 120 | // the below [lower, upper] interval contain 0.99999 of prior probability 121 | if sigma <= 0. then invalidArg "prior" "in Normal prior sigma must be > 0" else 122 | x.Add(name, Array.create theSize mu, mu - 4.417 * sigma, mu + 4.417 * sigma, 0, false, prior) 123 | | LogNormal(mu, sigma) -> // isLog = true 124 | // the below [lower, upper] interval contain 0.99999 of prior probability 125 | if mu <= 0. then invalidArg "prior" "in Normal prior mean must be > 0" else 126 | if sigma <= 0. then invalidArg "prior" "in Normal prior sigma must be > 0" else 127 | let logMu = log mu 128 | x.Add(name, Array.create theSize mu, exp(logMu - 4.417 * sigma), exp(logMu + 4.417 * sigma), 0, true, prior) 129 | | _ -> invalidArg "prior" "this method overload accepts only Uniform, LogUniform, Normal and LogNormal priors" 130 | 131 | /// Add a parameter with a prior and starting values 132 | member x.Add(name, values, prior) = 133 | if Array.length values < 1 then invalidArg "values" "vector parameter cannot have size < 1" 134 | match prior with 135 | | Uniform(lower, upper) -> 136 | if upper // isLog = true 140 | if lower <= 0. then invalidArg "prior" "in LogUniform prior lower must be > 0" else 141 | if upper 145 | // the below [lower, upper] interval contain 0.99999 of prior probability 146 | if sigma <= 0. then invalidArg "prior" "in Normal prior sigma must be > 0" else 147 | x.Add(name, values, min (mu - 4.417 * sigma) (Array.min values), max (mu + 4.417 * sigma) (Array.max values), 1, false, prior) 148 | | LogNormal(mu, sigma) -> 149 | // the below [lower, upper] interval contain 0.99999 of prior probability 150 | if mu <= 0. then invalidArg "prior" "in Normal prior mean must be > 0" else 151 | if sigma <= 0. then invalidArg "prior" "in Normal prior sigma must be > 0" else 152 | let logMu = log mu 153 | x.Add(name, values, min (exp(logMu - 4.417 * sigma)) (Array.min values), max (exp(logMu + 4.417 * sigma)) (Array.max values), 1, true, prior) 154 | | _ -> invalidArg "prior" "this method overload accepts only Uniform, LogUniform, Normal and LogNormal priors" 155 | 156 | /// Add a parameter. 157 | /// This signature is compatible with `parameter_create` and `parameter_create_vector` functions 158 | /// described in [Filzbach User Guide](http://research.microsoft.com/en-us/um/cambridge/groups/science/tools/filzbach/filzbach%20user%20gude%20v.1.1.pdf). 159 | /// The `dsply` argument is not used here. 160 | member x.Add(name, lb:float, ub:float, ``val``:float, ``type``, ``fixed``, dsply:int, ?number) = 161 | ignore dsply // prevent 'not used' warning 162 | let size = defaultArg number 1 163 | if ``type`` = 0 then 164 | x.Add(name, Array.create size ``val``, lb, ub, (if ``fixed`` = 0 then 0 else System.Int32.MaxValue), false, Uniform(lb, ub)) 165 | else 166 | x.Add(name, Array.create size ``val``, lb, ub, (if ``fixed`` = 0 then 0 else System.Int32.MaxValue), true, LogUniform(lb,ub)) 167 | 168 | /// Replaces all parameter values. 169 | /// For a parameter `"p"` the parameter values are at index `x.GetDefinition("p").index` in the `values` array. 170 | member x.SetValues (values:float[]) = 171 | if Array.length values <> Array.length pvalues then 172 | invalidArg "values" "invalid length of the array." 173 | Parameters(pdefs, values) 174 | 175 | /// Fast access to parameter values. See for explanation of indices. 176 | member x.GetValue idx = pvalues.[idx] 177 | 178 | /// Get a value of a scalar parameter. 179 | member x.GetValue name = 180 | let d = pdefs.[name] 181 | if d.size>1 then invalidOp ("use getValue(name,index) for the vector parameter "+name+".") 182 | pvalues.[d.index] 183 | 184 | /// Get a value of a vector parameter. 185 | member x.GetValue(name,idx) = 186 | let d = pdefs.[name] 187 | if idx<0 || idx >= d.size then raise(System.IndexOutOfRangeException()) 188 | pvalues.[d.index + idx] 189 | 190 | /// Get an array of all values of a parameter. 191 | member x.GetValues name = avalue pdefs.[name] 192 | 193 | /// Get all values of all parameters. 194 | /// For a parameter `"p"` the parameter values are at index `x.GetDefinition("p").index` in the `values` array. 195 | member x.AllValues = Seq.ofArray pvalues 196 | 197 | /// Total number of all parameter values. 198 | member x.CountValues = Array.length pvalues 199 | 200 | /// Get a parameter definition. 201 | member x.GetDefinition name = pdefs.[name] 202 | 203 | /// Get a parameter name by value index. 204 | member x.GetName idx = 205 | if idx < 0 || idx >= Array.length pvalues then raise(System.IndexOutOfRangeException()) 206 | Map.findKey (fun _ d -> idx >= d.index && idx < d.index+d.size) pdefs 207 | 208 | interface IParameters with 209 | member x.Count = pdefs.Count 210 | member x.ContainsKey key = pdefs.ContainsKey key 211 | member x.Keys = pdefs |> Seq.map (fun kv -> kv.Key) 212 | member x.Values = all |> Seq.map (fun kv -> kv.Value) 213 | member x.get_Item key = avalue pdefs.[key] 214 | member x.TryGetValue(key, value: byref) = 215 | if pdefs.ContainsKey key then 216 | value <- avalue pdefs.[key] 217 | true 218 | else false 219 | interface System.Collections.Generic.IEnumerable> with 220 | member x.GetEnumerator() = all.GetEnumerator() 221 | interface System.Collections.IEnumerable with 222 | member x.GetEnumerator() = (all:>System.Collections.IEnumerable).GetEnumerator() 223 | override x.Equals other = 224 | match other with 225 | | :? Parameters as y -> y.definitions = x.definitions && y.values = x.values 226 | | _ -> false 227 | override x.GetHashCode() = pdefs.GetHashCode() ^^^ pvalues.GetHashCode() 228 | 229 | type Sample = {values:float[]; logLikelihood:float; logPrior:float} 230 | 231 | type SamplerCheckpoint = 232 | { 233 | burnedIn:Sampler 234 | burnInTrace: (float*float) list 235 | final:Sampler 236 | samples:Sample list 237 | thinning: int 238 | acceptanceRate: float 239 | } 240 | 241 | /// An immutable state of Filzbach MCMC sampler. 242 | and Sampler private (// utilities 243 | pall: ParameterDefinition[], 244 | // variables 245 | metr_k: int, 246 | rng: MT19937, 247 | pp:Parameters, 248 | deltas:float[], 249 | ltotold: float, 250 | ptotold:float, 251 | accept: bool, // the probe has been accepted 252 | runalt:int[], // number of alterations of individual parameters 253 | runacc:int[] // number of accepted alterations of individual parameters 254 | ) = 255 | static let log_prior pall values = Array.fold2 (fun sum d v -> sum + d.log_priordf v) 0. pall values 256 | 257 | new(copy:Sampler) = 258 | let metr_k, (seed:uint32[]), (pp:Parameters), deltas, ltotold, ptotold, accept, runalt, runacc = copy.State 259 | let pall = Array.init pp.CountValues (fun i -> pp.GetName i |> pp.GetDefinition) 260 | Sampler(pall, metr_k, MT19937 seed, pp, deltas, ltotold, ptotold, accept, runalt, runacc) 261 | 262 | static member internal Restore(metr_k, rng, (pp:Parameters), deltas, ltotold, ptotold, accept, runalt, runacc) = 263 | let pall = Array.init pp.CountValues (fun i -> pp.GetName i |> pp.GetDefinition) 264 | Sampler(pall, metr_k, rng, pp, deltas, ltotold, ptotold, accept, runalt, runacc) 265 | 266 | static member Create(pp: Parameters, rng: MT19937, logl: Parameters -> float) = 267 | // init_chains 268 | let paramcount = pp.CountValues 269 | let pall = Array.init paramcount (fun i -> pp.GetName i |> pp.GetDefinition) 270 | // initRandomValues 271 | let values = pall |> Array.mapi (fun i def -> 272 | if def.delay<1 273 | then Uniform(def.lower, def.upper) |> draw rng 274 | else pp.values.[i]) 275 | // initStepSizes 276 | let deltas = pall |> Array.map (fun def -> 277 | if def.isLog 278 | then 0.50*(log def.upper - log def.lower) 279 | else 0.50*(def.upper - def.lower)) 280 | let runalt = Array.create paramcount 0 281 | let runacc = Array.create paramcount 0 282 | // init_likelihood 283 | let ltotold = pp.SetValues values |> logl 284 | let ptotold = log_prior pall values 285 | // initialize iteration number 286 | Sampler.Restore(1, rng, pp.SetValues values, deltas, ltotold, ptotold, false, runalt, runacc) 287 | 288 | /// Advance one iteration of either burn-in or sampling 289 | member x.Probe(isBurnIn:bool, logl: Parameters -> float) = 290 | let paramcount = pall.Length 291 | let rng = MT19937(rng) 292 | let values = Array.copy pp.values 293 | let deltas = Array.copy deltas 294 | let runalt = Array.copy runalt 295 | let runacc = Array.copy runacc 296 | 297 | // Select parameters to change. Most of the time change only one, two or three params. 298 | let alterable = [for i in 0..paramcount-1 do if pall.[i].delay < metr_k then yield i] 299 | let freeparamcount = alterable.Length 300 | if freeparamcount=0 then 301 | Sampler(pall, metr_k+1, rng, pp, deltas, ltotold, ptotold, accept, runalt, runacc) 302 | else 303 | let alt = // chain_params[i].alt=1 ~ alt |> List.any (fun item -> item=i) 304 | if freeparamcount=1 then alterable // one parameter always alters 305 | elif rng.uniform_float64() < 0.670 then 306 | // choose one param to alter 307 | let rnd = alterable.[rng.uniform_int(freeparamcount-1)] 308 | //alter parameters close by? 309 | [ 310 | if (rnd-1 >= 0) && (pall.[rnd-1].delay < metr_k) && (rng.uniform_float64() < 0.5) 311 | then yield rnd-1 312 | yield rnd 313 | if (rnd+1 < paramcount) && (pall.[rnd+1].delay < metr_k) && (rng.uniform_float64() < 0.5) 314 | then yield rnd+1 315 | ] 316 | else 317 | // change many parameters at once 318 | // draw prob change for this iteration 319 | let palt = min 0.99 (3.0/(float freeparamcount) * exp(4.0*(rng.uniform_float64() - 0.50))) 320 | // number of parameters to choose 321 | let nalt = max 1 (Binomial(freeparamcount, palt) |> draw rng |> int) 322 | // Knuth shuffle 323 | let shuffle = Array.create freeparamcount 0 324 | for i in 1..freeparamcount-1 do 325 | let j = rng.uniform_int i 326 | if j < i then shuffle.[i] <- shuffle.[j] 327 | shuffle.[j] <- i 328 | [for i in 0..nalt-1 -> alterable.[shuffle.[i]]] 329 | 330 | // change parameter values, i.e. make the 'jump' 331 | // VL: This should be compatible with original Filzbach as of v1.2 during the burn-in phase 332 | // and properly accounts for parameter bounds during the sampling phase 333 | for i in alt do 334 | let old = values.[i] 335 | let mutable more = true 336 | while more do 337 | let add = rng.normal() * deltas.[i] 338 | if pall.[i].isLog then 339 | values.[i] <- old * exp(add) 340 | else 341 | values.[i] <- old + add 342 | more <- isBurnIn && (values.[i]pall.[i].upper) 343 | // in burn-in phase we cannot jump out of bounds 344 | let inbounds = isBurnIn || (alt |> List.forall (fun i -> 345 | values.[i]>=pall.[i].lower && values.[i]<=pall.[i].upper)) 346 | 347 | // calc new lnlike 348 | let ltotnew = if inbounds then pp.SetValues values |> logl else ltotold 349 | let ptotnew = if inbounds then log_prior pall values else ptotold 350 | 351 | // compare new to old and accept or reject -- METROPOLIS CRITERION IS IN HERE 352 | let accept = 353 | inbounds && 354 | let dlik = (ltotnew+ptotnew) - (ltotold+ptotold) 355 | dlik >= 0. || 356 | (let rndnum = max 0.00000010 (min 0.99999990 (rng.uniform_float64())) 357 | log rndnum < dlik) 358 | 359 | // act on acceptance 360 | for i in alt do 361 | runalt.[i] <- runalt.[i]+1 362 | if accept then runacc.[i] <- runacc.[i]+1 363 | 364 | if isBurnIn then 365 | // adjust jump steps 366 | for ii in 0..paramcount-1 do 367 | if runalt.[ii]=20 then 368 | let dmin, dmax = 369 | let def = pall.[ii] 370 | if def.isLog then 0.010, 10.0 //TODO: ???? 371 | else let full = def.upper-def.lower in 0.0010*full, 0.50*full 372 | if runacc.[ii] < 4 then 373 | // decrease temperature by 20% 374 | deltas.[ii] <- max dmin (min dmax (deltas.[ii] * 0.80)) 375 | elif runacc.[ii] < 5 then 376 | // VL: decrease temperature by 10% 377 | deltas.[ii] <- max dmin (min dmax (deltas.[ii] * 0.90)) 378 | elif runacc.[ii] > 6 then 379 | // increase temperature by 20% 380 | deltas.[ii] <- max dmin (min dmax (deltas.[ii] * 1.20)) 381 | elif runacc.[ii] > 5 then 382 | // VL: increase temperature by 10% 383 | deltas.[ii] <- max dmin (min dmax (deltas.[ii] * 1.10)) 384 | runalt.[ii] <- 0 385 | runacc.[ii] <- 0 386 | 387 | if (accept) then 388 | Sampler(pall, metr_k+1, rng, pp.SetValues values, deltas, ltotnew, ptotnew, accept, runalt, runacc) 389 | else 390 | Sampler(pall, metr_k+1, rng, pp, deltas, ltotold, ptotold, accept, runalt, runacc) 391 | 392 | member x.Iteration = metr_k 393 | member x.Parameters = pp 394 | member x.LogLikelihood = ltotold 395 | member x.LogPrior = ptotold 396 | member x.SamplingWidths = Array.copy deltas 397 | member x.IsAccepted = accept 398 | member internal x.State = metr_k, rng.get_seed(), pp, Array.copy deltas, ltotold, ptotold, accept, Array.copy runalt, Array.copy runacc 399 | 400 | /// Complete sampling procedure that does `burnCount` burn-in iterations 401 | /// followed by collecting `sampleCount` samples from posterior. 402 | /// Total number of iterations is `burnCount + thinning * sampleCount`. 403 | static member runmcmc(pp, logl, burnCount, sampleCount, ?thinning, ?rng) = 404 | let thinning = defaultArg thinning 100 405 | if thinning<1 then invalidArg "thinning" "must be > 0." 406 | let rng = defaultArg rng (MT19937()) 407 | let sampler = Sampler.Create(pp, rng, logl) 408 | let checkpoint = {burnedIn=sampler; final=sampler; burnInTrace=[]; 409 | samples=[]; acceptanceRate=0.; thinning=thinning} 410 | Sampler.continuemcmc(checkpoint, logl, burnCount, sampleCount) 411 | 412 | /// Continuation of sampling procedure after incomplete burn-in. It does `burnCount` additional burn-in iterations 413 | /// followed by collecting `sampleCount` samples from posterior. 414 | /// Total number of iterations is `burnCount + thinning * sampleCount`. 415 | static member continuemcmc(checkpoint:SamplerCheckpoint, logl, burnCount, sampleCount) = 416 | let thinning = checkpoint.thinning 417 | if thinning<1 then invalidArg "thinning" "must be > 0." 418 | // initialize sampler 419 | let mutable sampler = checkpoint.burnedIn 420 | let checkpointBurnCount = sampler.Iteration 421 | let checkpointSamples, checkpointRate = 422 | if burnCount>checkpointBurnCount then [], 1. 423 | else checkpoint.samples, checkpoint.acceptanceRate 424 | // do burn-in iterations 425 | let trace = 426 | [ 427 | for _ in checkpointBurnCount..burnCount -> 428 | sampler <- sampler.Probe(true, logl) 429 | sampler.LogLikelihood, sampler.LogPrior 430 | ] 431 | let burnedIn = Sampler(sampler) // saved copy 432 | // collect sampleCount samples 433 | let checkpointSamplesCount = checkpointSamples.Length 434 | let mutable countAccepted = 0 435 | let samples = 436 | [ 437 | for _ in checkpointSamplesCount+1..sampleCount -> 438 | for _ in 1..thinning do 439 | sampler <- sampler.Probe(false, logl) 440 | if sampler.IsAccepted then countAccepted <- countAccepted + 1 441 | {values=Array.copy sampler.Parameters.values; logLikelihood = sampler.LogLikelihood; logPrior = sampler.LogPrior} 442 | ] 443 | { 444 | burnedIn=burnedIn 445 | burnInTrace = checkpoint.burnInTrace @ trace 446 | final = sampler 447 | samples = checkpointSamples @ samples 448 | acceptanceRate = 449 | ((float countAccepted + checkpointRate * float checkpointSamplesCount * float thinning) 450 | / (float sampleCount * float thinning)) 451 | thinning = thinning 452 | } 453 | 454 | /// Prints summary of results from `runmcmc` or `continuemcmc`. 455 | static member print {final=sampler; samples=samples; acceptanceRate = acceptanceRate} = 456 | printfn "Samples max log likelihood*prior = %g, acceptance rate at sampling = %5.3f" 457 | (samples |> Seq.map (fun {logLikelihood=logl; logPrior=logp} -> logl+logp) |> Seq.max) 458 | acceptanceRate 459 | printfn "------------+------------+------------+------------+------------+------------+------------+------------+" 460 | printfn " name | lower | lower 95%% | lower 68%% | median | upper 68%% | upper 95%% | upper | isLog" 461 | printfn "------------+------------+------------+------------+------------+------------+------------+------------+" 462 | for idx in 0..sampler.Parameters.CountValues-1 do 463 | let q = qsummary (samples |> Seq.map (fun {values=sample} -> sample.[idx])) 464 | let name = sampler.Parameters.GetName idx 465 | let pdef = sampler.Parameters.GetDefinition name 466 | let fullname = if pdef.size=1 then name else sprintf "%s[%d]" name (idx-pdef.index) 467 | printfn " %10s | %10g | %10g | %10g | %10g | %10g | %10g | %10g | %A" 468 | (fullname.Substring(0,min 10 fullname.Length)) 469 | pdef.lower 470 | q.lb95 q.lb68 q.median q.ub68 q.ub95 471 | pdef.upper pdef.isLog 472 | for off in 10..10..fullname.Length do 473 | printfn " %10s" (fullname.Substring(off,min 10 (fullname.Length-off))) 474 | 475 | module Serialization = 476 | open Angara.Serialization 477 | open Angara.Statistics.Serialization 478 | 479 | let invalidInfoSet() = invalidArg "is" "invalid InfoSet" 480 | 481 | let serializeParameters (p:Parameters) = 482 | let pd = 483 | p.definitions |> Seq.toArray 484 | |> Array.sortBy (fun kv -> kv.Value.index) 485 | |> Array.map (fun kv -> 486 | Seq [ 487 | String kv.Key 488 | Int kv.Value.size 489 | Double kv.Value.lower 490 | Double kv.Value.upper 491 | Int kv.Value.delay 492 | Bool kv.Value.isLog 493 | serializeDistribution kv.Value.prior 494 | ]) 495 | InfoSet.EmptyMap 496 | .AddInfoSet("v", DoubleArray(p.values)) 497 | .AddInfoSet("p", Seq(pd)) 498 | 499 | let deserializeParameters (is:InfoSet) = 500 | match is with 501 | | Map dict -> 502 | if dict.ContainsKey "v" && dict.ContainsKey "p" then 503 | match dict.["v"], dict.["p"] with 504 | | (DoubleArray values), (Seq pd) -> 505 | let values' = Array.ofSeq values 506 | pd |> Seq.fold (fun (p:Parameters, index) is_args -> 507 | match is_args with 508 | | Seq args -> 509 | match List.ofSeq args with 510 | | [ 511 | String name 512 | Int size 513 | Double lower 514 | Double upper 515 | Int delay 516 | Bool isLog 517 | is_prior 518 | ] -> 519 | p.Add(name, Array.sub values' index size, lower, upper, delay, isLog, deserializeDistribution is_prior), index+size 520 | | _ -> invalidInfoSet() 521 | | _ -> invalidInfoSet() 522 | ) (Parameters.Empty, 0) 523 | |> fst 524 | | _ -> invalidInfoSet() 525 | else invalidInfoSet() 526 | | _ -> invalidInfoSet() 527 | 528 | type ParametersSerializer() = 529 | interface ISerializer with 530 | member x.TypeId = "FilzbachP" 531 | member x.Serialize _ p = serializeParameters p 532 | member x.Deserialize _ is = deserializeParameters is 533 | 534 | let serializeSampler (s:Sampler) = 535 | let metr_k, seed, pp, deltas, ltotold, ptotold, accept, runalt, runacc = s.State 536 | Seq [Int metr_k; UIntArray seed; serializeParameters pp; DoubleArray deltas; Double ltotold; Double ptotold; Bool accept; IntArray runalt; IntArray runacc] 537 | 538 | let deserializeSampler is = 539 | match is with 540 | | Seq is_fields -> 541 | match is_fields |> List.ofSeq with 542 | | [Int metr_k; UIntArray seed; is_pp; DoubleArray deltas; Double ltotold; Double ptotold; Bool accept; IntArray runalt; IntArray runacc] -> 543 | let pp = deserializeParameters is_pp 544 | Sampler.Restore(metr_k, MT19937(Array.ofSeq seed), pp, Array.ofSeq deltas, ltotold, ptotold, accept, Array.ofSeq runalt, Array.ofSeq runacc) 545 | | _ -> invalidInfoSet() 546 | | _ -> invalidInfoSet() 547 | 548 | type SamplerSerializer() = 549 | interface ISerializer with 550 | member x.TypeId = "FilzbachS" 551 | member x.Serialize _ s = serializeSampler s 552 | member x.Deserialize _ is = deserializeSampler is 553 | 554 | let Register (libraries:ISerializerLibrary seq) = 555 | libraries |> Seq.iter (fun lib -> 556 | lib.Register(Angara.Statistics.Serialization.MersenneTwisterSerializer()) 557 | lib.Register(Angara.Statistics.Serialization.DistributionSerializer()) 558 | lib.Register(ParametersSerializer()) 559 | lib.Register(SamplerSerializer())) 560 | -------------------------------------------------------------------------------- /src/Angara.Statistics/Statistics.fs: -------------------------------------------------------------------------------- 1 | module Angara.Statistics 2 | 3 | // 4 | // Statistical library 5 | // 6 | 7 | type Distribution = 8 | /// A uniform distribution over [`lower_bound`, `upper_bound`) range. 9 | | Uniform of float*float // lower bound, upper bound (> lower bound) -> continuous [lower bound to upper bound) 10 | /// A uniform distribution in log space. 11 | | LogUniform of float*float // lower bound (> 0), upper bound (> lower bound) -> continuous [lower bound to upper bound) 12 | /// A linearly changing distribution over a [`lower_bound`, `upper_bound`) range. 13 | | Linear of lower_bound:float * upper_bound:float * density_at_lower_bound:float 14 | /// Normal distribution of `mean`, `standard_deviation`. 15 | | Normal of float*float // mean, standard deviation (> 0) -> continuous (-infinity to infinity) 16 | /// Normal distribution in log space of `mean`, `standard_deviation_of_log`. 17 | | LogNormal of float*float // log mean, standard deviation of logarithm (> 0) -> continuous (0.0 to infinity) 18 | /// Distribution of a yes/no experiment (1 or 0) which yields success with probability `p`. 19 | | Bernoulli of float // fraction of success [1e-16 to 1.0-1e-16] -> success/failure outcome, 1 or 0 20 | /// A number of successes in a sequence of `n` independent yes/no experiments, each of which yields success with probability `p`. 21 | | Binomial of int*float // number of trials, probability of success -> number of successes, [0 to max_int] 22 | /// A number of successes before a given number of failures `r` in a sequence of yes/no experiments, each of which yields success with probability `p = mean/(mean+r)`. 23 | | NegativeBinomial of mean:float * r:float // mean (0 to inf), number of failures or 'shape' for fractional values (0 to inf) -> number of successes, [0 to max_int] 24 | /// A number of events occuring in a fixed interval of time if these events occur with a known average rate = `mean`. 25 | | Poisson of mean:float // mean a.k.a. lambda [0, maxint] -> number of events [0 to maxint] 26 | /// A family of distributions of positive values. The parameters alpha and beta are sometimes called shape and rate. 27 | | Gamma of float*float // alpha (>0), beta (>0) -> continuous (0 to infinity) 28 | /// Time between events in a process in which events occur continuously and independently at a constant average `rate = 1/mean`. 29 | | Exponential of mean:float // rate lambda (>0) -> continuous [0 to infinity) 30 | /// A weighted mixture of distributions. 31 | | Mixture of (float*Distribution) list 32 | 33 | /// Make a distribution which density matches a piecewise linear curve. 34 | /// Abscissas must be in increasing order. 35 | /// Oridnates must be positive values. The function scales the density so that its norm = 1. 36 | static member fromPiecewise (density: (float * float) seq) = 37 | let x, y = density |> Array.ofSeq |> Array.unzip 38 | let n = Array.length x 39 | if seq {for i in 0..n-2 -> x.[i] >= x.[i+1]} |> Seq.reduce (||) then invalidOp "Abscissas must be in increasing order." 40 | if Array.exists (fun v -> v < 0.) y then invalidOp "Oridnates must be positive values." 41 | let cc = [for i in 0..n-2 -> 0.5*(y.[i+1]+y.[i])*(x.[i+1]-x.[i])] 42 | let norm = List.sum cc 43 | Mixture (cc |> List.mapi (fun i c -> c/norm, Linear(x.[i], x.[i+1], y.[i]/c))) 44 | 45 | /// The smallest positive normalized `float` value 46 | let improbable = 2.2250738585072014E-308 // 2^(-1022) 47 | /// Logarithm of `improbable` 48 | let log_improbable = log(improbable) // -708 49 | 50 | /// `1.0 - tolerance < 1.0 && 1.0 - 0.5*tolerance = 1.0` 51 | let tolerance = 1.1102230246251565E-16 // 2^(-53) 52 | /// Logarithm of `tolerance` 53 | let log_tolerance = log(tolerance) // -36.7 54 | 55 | /// Maximum exact integer `maxint+1.0 = maxint && maxint-1.0 < maxint` 56 | let maxint = 1.0/tolerance; // 9e15 -- 6 orders of magnitude alrger than int.maxvalue 57 | 58 | /// π 59 | let pi = 3.14159265358979323846264338327950288 60 | 61 | /// 2π 62 | let pi2 = 6.283185307179586476925286 63 | 64 | /// natural logarithm base 65 | let e = 2.71828182845904523536028747135266250 66 | 67 | /// sqrt 2π 68 | let sqrt2pi = sqrt(pi2) 69 | 70 | /// 1/2 * log 2π 71 | let log2pi = 0.5*log(pi2) 72 | 73 | let private isNan = System.Double.IsNaN 74 | let private isInf = System.Double.IsInfinity 75 | 76 | /// Sigmoidal function that maps [-infinity,infinity] interval onto [0,1] 77 | let logistic x = 78 | if x > 1.-log_tolerance then 1. else 79 | let ex = exp x 80 | ex / (1. + ex) 81 | 82 | /// Inverse logistic transform 83 | let logit p = 84 | if p > 1. || p < 0. then nan 85 | elif p = 1. then infinity 86 | elif p = 0. then -infinity 87 | else log(p/(1.-p)) 88 | 89 | type summaryType = 90 | {count:int; min:float; max:float; mean:float; variance:float} 91 | override me.ToString() = sprintf "%A" me 92 | 93 | /// Produces cumulant summary of the data using fast one-pass algorithm. 94 | let summary data = 95 | let folder summary d = 96 | if isNan(d) || isInf(d) then 97 | summary 98 | else 99 | let delta = d - summary.mean 100 | let n = summary.count + 1 101 | let mean = summary.mean + delta/float n 102 | { 103 | count = n 104 | min = (min d summary.min) 105 | max = (max d summary.max) 106 | mean = mean 107 | variance = summary.variance + delta*(d-mean) 108 | } 109 | let pass = 110 | Seq.fold folder { 111 | count=0 112 | min=System.Double.PositiveInfinity 113 | max=System.Double.NegativeInfinity 114 | mean=0.0 115 | variance=0.0 116 | } data 117 | if pass.count<2 then 118 | pass 119 | else 120 | let pass = {pass with variance=pass.variance/(float(pass.count-1))} 121 | pass 122 | 123 | type qsummaryType = 124 | {min:float; lb95:float; lb68:float; median:float; ub68:float; ub95:float; max:float} 125 | override me.ToString() = sprintf "%A" me 126 | 127 | /// Produces quantile summary of the data. 128 | let qsummary data = 129 | let a = data |> Seq.filter(fun d -> not (System.Double.IsNaN(d) || System.Double.IsInfinity(d))) |> Seq.toArray 130 | Array.sortInPlace a 131 | let n = a.Length 132 | if n<1 then {min=nan; lb95=nan; lb68=nan; median=nan; ub68=nan; ub95=nan; max=nan} 133 | else 134 | let q p = 135 | // Definition 8 from Hyndman, R. J. and Fan, Y. (1996) Sample quantiles in statistical packages, American Statistician 50, 361–365. 136 | // This is the same as stats.quantile(...,type=8) from R 137 | let h = p*(float n + 1./3.)-2./3. 138 | if h <= 0.0 then a.[0] 139 | elif h >= float (n-1) then a.[n-1] 140 | else 141 | let fh = floor h 142 | a.[int fh]*(1.0-h+fh) + a.[int fh + 1]*(h - fh) 143 | {min=a.[0]; lb95=q(0.025); lb68=q(0.16); median=q(0.5); ub68=q(0.84); ub95=q(0.975); max=a.[n-1]} 144 | 145 | // adopted from Numerical Recipes: The Art of Scientific Computing, Third Edition (2007), p.257 146 | let private log_gamma x = 147 | let cof=[|57.1562356658629235; -59.5979603554754912; 14.1360979747417471; -0.491913816097620199; 0.339946499848118887e-4; 0.465236289270485756e-4; -0.983744753048795646e-4; 0.158088703224912494e-3; -0.210264441724104883e-3; 0.217439618115212643e-3; -0.164318106536763890e-3; 0.844182239838527433e-4; -0.261908384015814087e-4; 0.368991826595316234e-5|] 148 | if x<=0.0 then nan else 149 | let t = x + 5.24218750000000000 // Rational 671/128. 150 | let t = (x+0.5)*log(t)-t 151 | let ser,_ = cof |> Seq.fold (fun (ser,x) c -> let y=x+1.0 in ser+c/y,y) (0.999999999999997092,x) 152 | t+log(2.5066282746310005*ser/x) 153 | 154 | // adopted from "Fast and Accurate Computation of Binomial Probabilities", C. Loader, 2000 155 | let private sfe = 156 | Seq.unfold (fun (n, lognf) -> 157 | if n=0 then 158 | Some(0.0, (1,0.0)) 159 | elif n<16 then 160 | let logn = log(float n) 161 | Some(lognf+float(n)-log2pi-(float(n)-0.5)*logn, (n+1, lognf+logn)) 162 | else None) (0, 0.0) |> Array.ofSeq 163 | 164 | let private stirlerr n = 165 | if (n<16) then sfe.[n] 166 | else 167 | let S0 = 1.0/12.0 168 | let S1 = 1.0/360.0 169 | let S2 = 1.0/1260.0 170 | let S3 = 1.0/1680.0 171 | let S4 = 1.0/1188.0 172 | let n1 = 1.0/float(n) 173 | let n2 = n1*n1 174 | if (n>500) then ((S0-S1*n2)*n1) 175 | elif (n>80) then ((S0-(S1-S2*n2)*n2)*n1) 176 | elif (n>35) then ((S0-(S1-(S2-S3*n2)*n2)*n2)*n1) 177 | else ((S0-(S1-(S2-(S3-S4*n2)*n2)*n2)*n2)*n1) 178 | 179 | let private bd0 (x: float, np: float) = 180 | if (abs(x-np) < 0.1*(x+np)) then 181 | let v = (x-np)/(x+np) 182 | let rec next j ej s = 183 | let s1 = s + ej/float(2*j+1) 184 | if s1=s then s1 185 | else next (j+1) (ej*v*v) s1 186 | next 1 (2.0*x*v*v*v) ((x-np)*v) 187 | else x*log(x/np)+np-x 188 | 189 | let private dbinom (x: int, n: int, p: float) = 190 | // assert((p>=0.0) && (p<=1.0)) 191 | // assert(n>=0) 192 | // assert((x>=0) && (x<=n)) 193 | if (p=0.0) then if x=0 then 1.0 else 0.0 194 | elif (p=1.0) then if x=n then 1.0 else 0.0 195 | elif (x=0) then exp(float n*log(1.0-p)) 196 | elif (x=n) then exp(float n*log(p)) 197 | else 198 | let lc = stirlerr(n) - stirlerr(x) - stirlerr(n-x) - bd0(float x, float(n)*p) - bd0(float(n-x), float(n)*(1.0-p)) 199 | exp(lc)*sqrt(float(n)/(pi2*float(x)*float(n-x))); 200 | 201 | let private dpois(x: int, lb: float) = 202 | if (lb=0.0) then if x=0 then 1.0 else 0.0 203 | elif (x=0) then exp(-lb) 204 | else exp(-stirlerr(x)-bd0(float x,lb))/sqrt(pi2*float(x)); 205 | 206 | /// Logarithm of a Probability Distribution Function 207 | let rec log_pdf d v = 208 | if System.Double.IsNaN(v) then log_improbable 209 | else 210 | let result = 211 | match d with 212 | | Normal(mean,stdev) -> let dev = (mean-v)/stdev in -0.5*dev*dev - log(sqrt2pi*stdev) 213 | | LogNormal(mean,stdev) -> let dev = (log mean-log v)/stdev in -0.5*dev*dev - log(sqrt2pi*stdev*v) 214 | | Uniform(lb,ub) -> 215 | if (vub || lb>=ub) then log_improbable 216 | else -log(ub-lb) 217 | | LogUniform(lb,ub) -> 218 | if (vub || lb>=ub) then log_improbable 219 | else -log(log ub - log lb) - log v 220 | | Linear(x1,x2,density) -> 221 | if v < min x1 x2 || v > max x1 x2 then log_improbable 222 | elif x1=x2 then infinity else 223 | let h = 2./abs(x2-x1) 224 | let p1 = if density h then h else density // p1 = 2*a*x1+b 225 | let p2 = h-p1 // 0.5*(p1+p2)*abs(x2-x1) == 1; p2 = 2*a*x2+b 226 | log(p1+(v-x1)*(p2-p1)/(x2-x1)) 227 | | Exponential(mean) -> 228 | if mean<=0.0 || mean = infinity then log_improbable else 229 | -log(mean) - v/mean 230 | | Gamma(a,b) -> 231 | a*log(b) - log_gamma(a) + (a-1.0)*log(v) - b*v 232 | | Bernoulli(fraction) -> 233 | if (fraction1.0-tolerance) then log_improbable 234 | elif v>0.5 then log(fraction) 235 | else log(1.0-fraction) 236 | | Binomial(n, p) -> 237 | // log(dbinom(int v, int n, p)) 238 | if (p<0.0) || (p>1.0) || (n<0) || (v<0.0) || (v>float n) then log_improbable 239 | else log(dbinom(int v, int n, p)) 240 | | NegativeBinomial(mean, r) -> 241 | if mean<=0.0 || r<=0.0 || v<0.0 || v>maxint then log_improbable else 242 | let k = round v 243 | r*log(r/(mean+r))+k*log(mean/(mean+r))+log_gamma(r+k)-log_gamma(k+1.0)-log_gamma(r) 244 | | Poisson(lambda) -> 245 | if (lambda<0.0 || lambda>float System.Int32.MaxValue) then log_improbable 246 | // log(dpois(int v, lambda)) 247 | elif (lambda=0.0) then if v=0.0 then 0.0 else log_improbable 248 | elif (v=0.0) then -lambda 249 | else let x = int v in - stirlerr(x) - bd0(float x,lambda) - 0.5*log(pi2*float(x)); 250 | | Mixture(components) -> log(components |> List.fold (fun s (w,d) -> s+w*exp(log_pdf d v)) 0.0) 251 | //| _ -> raise (System.NotImplementedException()) 252 | if System.Double.IsNaN(result) || System.Double.IsInfinity(result) then 253 | log_improbable 254 | else 255 | result 256 | 257 | // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c 258 | // 259 | // Adopted from a C-program for MT19937, with initialization improved 2002/1/26. 260 | // Coded by Takuji Nishimura and Makoto Matsumoto. 261 | // 262 | // Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, 263 | // All rights reserved. 264 | // 265 | // Redistribution and use in source and binary forms, with or without 266 | // modification, are permitted provided that the following conditions 267 | // are met: 268 | // 269 | // 1. Redistributions of source code must retain the above copyright 270 | // notice, this list of conditions and the following disclaimer. 271 | // 272 | // 2. Redistributions in binary form must reproduce the above copyright 273 | // notice, this list of conditions and the following disclaimer in the 274 | // documentation and/or other materials provided with the distribution. 275 | // 276 | // 3. The names of its contributors may not be used to endorse or promote 277 | // products derived from this software without specific prior written 278 | // permission. 279 | // 280 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 281 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 282 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 283 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR 284 | // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 285 | // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 286 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 287 | // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 288 | // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 289 | // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 290 | // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 291 | // 292 | // 293 | // Any feedback is very welcome. 294 | // http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html 295 | // email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) 296 | 297 | type MT19937 private ( 298 | mt:uint32[], // the array for the state vector 299 | idx:int // index of the next word from the state (0..N) 300 | ) = 301 | // Period parameters 302 | [] static let N = 624 303 | [] static let M = 397 304 | [] static let MATRIX_A = 0x9908b0dfu // constant vector a 305 | [] static let UPPER_MASK = 0x80000000u // most significant w-r bits 306 | [] static let LOWER_MASK = 0x7fffffffu // least significant r bits 307 | 308 | let mutable mti = idx // mti==N+1 means mt[N] is not initialized 309 | 310 | // initializes mt[N] with a seed 311 | static let init_genrand s = 312 | let mt:uint32[] = Array.zeroCreate N // the array for the state vector 313 | mt.[0] <- s &&& 0xffffffffu 314 | for mti = 1 to N-1 do 315 | mt.[mti] <- 316 | (1812433253u * (mt.[mti-1] ^^^ (mt.[mti-1] >>> 30)) + uint32 mti) 317 | // See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. 318 | // In the previous versions, MSBs of the seed affect 319 | // only MSBs of the array mt[]. 320 | // 2002/01/09 modified by Makoto Matsumoto 321 | mt.[mti] <- mt.[mti] &&& 0xffffffffu 322 | // for >32 bit machines 323 | mt 324 | 325 | static let init_by_array (init_key:uint32[]) = 326 | let mt = init_genrand(19650218u) 327 | let mutable i = 1 328 | let mutable j = 0 329 | let key_length = Array.length init_key 330 | for k = max key_length N downto 1 do 331 | mt.[i] <- (mt.[i] ^^^ ((mt.[i-1] ^^^ (mt.[i-1] >>> 30)) * 1664525u)) + init_key.[j] + uint32 j // non linear 332 | mt.[i] <- mt.[i] &&& 0xffffffffu // for WORDSIZE > 32 machines 333 | i <- i + 1 334 | j <- j + 1 335 | if i >= N then 336 | mt.[0] <- mt.[N-1] 337 | i <- 1 338 | if j >= key_length then j <- 0 339 | for k = N-1 downto 1 do 340 | mt.[i] <- (mt.[i] ^^^ ((mt.[i-1] ^^^ (mt.[i-1] >>> 30)) * 1566083941u)) - uint32 i; // non linear 341 | mt.[i] <- mt.[i] &&& 0xffffffffu // for WORDSIZE > 32 machines 342 | i <- i + 1 343 | if i >= N then 344 | mt.[0] <- mt.[N-1] 345 | i <- 1 346 | 347 | mt.[0] <- 0x80000000u //* MSB is 1; assuring non-zero initial array */ 348 | mt 349 | 350 | // generates a random number on [0,0xffffffff]-interval 351 | let genrand_int32() : uint32 = 352 | let mutable y = 0u 353 | let mag01 = [|0x0u; MATRIX_A|] 354 | // mag01[x] = x * MATRIX_A for x=0,1 355 | 356 | if (mti >= N) then // generate N words at one time 357 | for kk=0 to N-M-1 do 358 | y <- (mt.[kk] &&& UPPER_MASK) ||| (mt.[kk+1] &&& LOWER_MASK) 359 | mt.[kk] <- mt.[kk+M] ^^^ (y >>> 1) ^^^ mag01.[int(y &&& 0x1u)] 360 | for kk = N-M to N-2 do 361 | y <- (mt.[kk] &&& UPPER_MASK) ||| (mt.[kk+1] &&& LOWER_MASK) 362 | mt.[kk] <- mt.[kk+(M-N)] ^^^ (y >>> 1) ^^^ mag01.[int(y &&& 0x1u)] 363 | y <- (mt.[N-1] &&& UPPER_MASK) ||| (mt.[0] &&& LOWER_MASK) 364 | mt.[N-1] <- mt.[M-1] ^^^ (y >>> 1) ^^^ mag01.[int(y &&& 0x1u)]; 365 | 366 | mti <- 0 367 | 368 | y <- mt.[mti] 369 | mti <- mti + 1 370 | 371 | // Tempering 372 | y <- y ^^^ (y >>> 11) 373 | y <- y ^^^ ((y <<< 7) &&& 0x9d2c5680u) 374 | y <- y ^^^ ((y <<< 15) &&& 0xefc60000u) 375 | y <- y ^^^ (y >>> 18) 376 | 377 | y 378 | 379 | // generates a random number on [0,1)-real-interval 380 | let genrand_float() = 381 | float(genrand_int32())*(1.0/4294967296.0) 382 | // divided by 2^32 383 | 384 | // tables for ziggurat algorithm http://www.boost.org/doc/libs/1_60_0/boost/random/normal_distribution.hpp 385 | static let table_x = [| 386 | 3.7130862467403632609; 3.4426198558966521214; 3.2230849845786185446; 3.0832288582142137009; 387 | 2.9786962526450169606; 2.8943440070186706210; 2.8231253505459664379; 2.7611693723841538514; 388 | 2.7061135731187223371; 2.6564064112581924999; 2.6109722484286132035; 2.5690336259216391328; 389 | 2.5300096723854666170; 2.4934545220919507609; 2.4590181774083500943; 2.4264206455302115930; 390 | 2.3954342780074673425; 2.3658713701139875435; 2.3375752413355307354; 2.3104136836950021558; 391 | 2.2842740596736568056; 2.2590595738653295251; 2.2346863955870569803; 2.2110814088747278106; 392 | 2.1881804320720206093; 2.1659267937448407377; 2.1442701823562613518; 2.1231657086697899595; 393 | 2.1025731351849988838; 2.0824562379877246441; 2.0627822745039633575; 2.0435215366506694976; 394 | 2.0246469733729338782; 2.0061338699589668403; 1.9879595741230607243; 1.9701032608497132242; 395 | 1.9525457295488889058; 1.9352692282919002011; 1.9182573008597320303; 1.9014946531003176140; 396 | 1.8849670357028692380; 1.8686611409895420085; 1.8525645117230870617; 1.8366654602533840447; 397 | 1.8209529965910050740; 1.8054167642140487420; 1.7900469825946189862; 1.7748343955807692457; 398 | 1.7597702248942318749; 1.7448461281083765085; 1.7300541605582435350; 1.7153867407081165482; 399 | 1.7008366185643009437; 1.6863968467734863258; 1.6720607540918522072; 1.6578219209482075462; 400 | 1.6436741568569826489; 1.6296114794646783962; 1.6156280950371329644; 1.6017183802152770587; 401 | 1.5878768648844007019; 1.5740982160167497219; 1.5603772223598406870; 1.5467087798535034608; 402 | 1.5330878776675560787; 1.5195095847593707806; 1.5059690368565502602; 1.4924614237746154081; 403 | 1.4789819769830978546; 1.4655259573357946276; 1.4520886428822164926; 1.4386653166774613138; 404 | 1.4252512545068615734; 1.4118417124397602509; 1.3984319141236063517; 1.3850170377251486449; 405 | 1.3715922024197322698; 1.3581524543224228739; 1.3446927517457130432; 1.3312079496576765017; 406 | 1.3176927832013429910; 1.3041418501204215390; 1.2905495919178731508; 1.2769102735516997175; 407 | 1.2632179614460282310; 1.2494664995643337480; 1.2356494832544811749; 1.2217602305309625678; 408 | 1.2077917504067576028; 1.1937367078237721994; 1.1795873846544607035; 1.1653356361550469083; 409 | 1.1509728421389760651; 1.1364898520030755352; 1.1218769225722540661; 1.1071236475235353980; 410 | 1.0922188768965537614; 1.0771506248819376573; 1.0619059636836193998; 1.0464709007525802629; 411 | 1.0308302360564555907; 1.0149673952392994716; 0.99886423348064351303; 0.98250080350276038481; 412 | 0.96585507938813059489; 0.94890262549791195381; 0.93161619660135381056; 0.91396525100880177644; 413 | 0.89591535256623852894; 0.87742742909771569142; 0.85845684317805086354; 0.83895221428120745572; 414 | 0.81885390668331772331; 0.79809206062627480454; 0.77658398787614838598; 0.75423066443451007146; 415 | 0.73091191062188128150; 0.70647961131360803456; 0.68074791864590421664; 0.65347863871504238702; 416 | 0.62435859730908822111; 0.59296294244197797913; 0.55869217837551797140; 0.52065603872514491759; 417 | 0.47743783725378787681; 0.42654798630330512490; 0.36287143102841830424; 0.27232086470466385065; 418 | 0. 419 | |] 420 | 421 | static let table_y = [| 422 | 0.; 0.0026696290839025035092; 0.0055489952208164705392; 0.0086244844129304709682; 423 | 0.011839478657982313715; 0.015167298010672042468; 0.018592102737165812650; 0.022103304616111592615; 424 | 0.025693291936149616572; 0.029356317440253829618; 0.033087886146505155566; 0.036884388786968774128; 425 | 0.040742868074790604632; 0.044660862200872429800; 0.048636295860284051878; 0.052667401903503169793; 426 | 0.056752663481538584188; 0.060890770348566375972; 0.065080585213631873753; 0.069321117394180252601; 427 | 0.073611501884754893389; 0.077950982514654714188; 0.082338898242957408243; 0.086774671895542968998; 428 | 0.091257800827634710201; 0.09578784912257815216; 0.10036444102954554013; 0.10498725541035453978; 429 | 0.10965602101581776100; 0.11437051244988827452; 0.11913054670871858767; 0.12393598020398174246; 430 | 0.12878670619710396109; 0.13368265258464764118; 0.13862377998585103702; 0.14361008009193299469; 431 | 0.14864157424369696566; 0.15371831220958657066; 0.15884037114093507813; 0.16400785468492774791; 432 | 0.16922089223892475176; 0.17447963833240232295; 0.17978427212496211424; 0.18513499701071343216; 433 | 0.19053204032091372112; 0.19597565311811041399; 0.20146611007620324118; 0.20700370944187380064; 434 | 0.21258877307373610060; 0.21822164655637059599; 0.22390269938713388747; 0.22963232523430270355; 435 | 0.23541094226572765600; 0.24123899354775131610; 0.24711694751469673582; 0.25304529850976585934; 436 | 0.25902456739871074263; 0.26505530225816194029; 0.27113807914102527343; 0.27727350292189771153; 437 | 0.28346220822601251779; 0.28970486044581049771; 0.29600215684985583659; 0.30235482778947976274; 438 | 0.30876363800925192282; 0.31522938806815752222; 0.32175291587920862031; 0.32833509837615239609; 439 | 0.33497685331697116147; 0.34167914123501368412; 0.34844296754987246935; 0.35526938485154714435; 440 | 0.36215949537303321162; 0.36911445366827513952; 0.37613546951445442947; 0.38322381105988364587; 441 | 0.39038080824138948916; 0.39760785649804255208; 0.40490642081148835099; 0.41227804010702462062; 442 | 0.41972433205403823467; 0.42724699830956239880; 0.43484783025466189638; 0.44252871528024661483; 443 | 0.45029164368692696086; 0.45813871627287196483; 0.46607215269457097924; 0.47409430069824960453; 444 | 0.48220764633483869062; 0.49041482528932163741; 0.49871863547658432422; 0.50712205108130458951; 445 | 0.51562823824987205196; 0.52424057267899279809; 0.53296265938998758838; 0.54179835503172412311; 446 | 0.55075179312105527738; 0.55982741271069481791; 0.56902999107472161225; 0.57836468112670231279; 447 | 0.58783705444182052571; 0.59745315095181228217; 0.60721953663260488551; 0.61714337082656248870; 448 | 0.62723248525781456578; 0.63749547734314487428; 0.64794182111855080873; 0.65858200005865368016; 449 | 0.66942766735770616891; 0.68049184100641433355; 0.69178914344603585279; 0.70333609902581741633; 450 | 0.71515150742047704368; 0.72725691835450587793; 0.73967724368333814856; 0.75244155918570380145; 451 | 0.76558417390923599480; 0.77914608594170316563; 0.79317701178385921053; 0.80773829469612111340; 452 | 0.82290721139526200050; 0.83878360531064722379; 0.85550060788506428418; 0.87324304892685358879; 453 | 0.89228165080230272301; 0.91304364799203805999; 0.93628268170837107547; 0.96359969315576759960; 454 | 1. 455 | |] 456 | 457 | /// generates a sample from standard normal distribution N(0,1) using ziggurat algorithm. 458 | let znorm() = 459 | let tail() = 460 | let exponential() = -log(1.0-genrand_float()) 461 | let tail_start = table_x.[1] 462 | let mutable r = System.Double.PositiveInfinity 463 | while System.Double.IsPositiveInfinity r do 464 | let x = exponential() / tail_start 465 | let y = exponential() 466 | if 2.0*y > x*x then r <- x+tail_start 467 | r 468 | 469 | let mutable r = System.Double.PositiveInfinity 470 | while System.Double.IsPositiveInfinity r do 471 | let digit = int(genrand_int32() &&& 255u) 472 | let sign = if digit &&& 1 = 0 then -1.0 else 1.0 // float(int(digit &&& 1)*2-1) 473 | let i = digit >>> 1 474 | let x = genrand_float()*table_x.[i] 475 | if x=1.0) do 500 | rnorm_1 <- genrand_float()*2.0-1.0 501 | rnorm_2 <- genrand_float()*2.0-1.0 502 | s <- rnorm_1*rnorm_1 + rnorm_2*rnorm_2 503 | rnorm_f <- sqrt(-2.0*log(s)/s) 504 | rnorm_1*rnorm_f 505 | #endif 506 | 507 | do 508 | if mt.Length <> N then failwith (sprintf "State must be an array of length %d" N) 509 | 510 | new (?seed:uint32) = 511 | let state = init_genrand (defaultArg seed 5489u) 512 | MT19937(state, N) 513 | 514 | new (seed:uint32[]) = 515 | if Array.length seed = N+1 && seed.[N] < 2u + uint32 N then 516 | let state = Array.init N (fun i -> seed.[i]) 517 | let idx = int (seed.[N]) 518 | MT19937(state, idx) 519 | else 520 | let state = init_by_array(seed) 521 | MT19937(state, N) 522 | member private x.getMt = Array.copy mt 523 | member private x.getIdx = mti 524 | new(copy:MT19937) = 525 | MT19937(copy.getMt, copy.getIdx) 526 | 527 | /// returns an array that allows to exactly restore the state of the generator. 528 | member x.get_seed() = [| yield! mt; yield uint32 mti|] 529 | 530 | /// generates a random number on [0,0xffffffff]-interval 531 | member __.uniform_uint32() = genrand_int32() 532 | 533 | /// generates a random number on [0,1)-real-interval 534 | member __.uniform_float64() = genrand_float() 535 | 536 | /// generates a random number on [0,max]-int-interval 537 | member __.uniform_int (max:int) = 538 | if max < 0 then invalidArg "max" "The value cannot be negative" 539 | elif max = 0 then 0 540 | // if typeof were uint32: 541 | //elif max = System.UInt32.MaxValue then x.genrand_int32() 542 | else 543 | let umax = uint32 max 544 | let bucket_size = // (System.UInt32.MaxValue+1)/(max+1) 545 | let bs = System.UInt32.MaxValue / (umax + 1u) 546 | if System.UInt32.MaxValue % (umax + 1u) = umax then bs + 1u else bs 547 | // rejection algorithm 548 | let mutable r = genrand_int32() / bucket_size 549 | while r > umax do r <- genrand_int32() / bucket_size 550 | int r 551 | 552 | /// generates 'true' with probability 'p' or 'false' with probability '1-p' 553 | member __.bernoulli(p) = 554 | if p <= 0.0 then false 555 | elif p >= 1.0 then true 556 | else float(genrand_int32()) <= p*float(System.UInt32.MaxValue) 557 | 558 | /// generates a sample from standard normal distribution N(0,1) using ziggurat algorithm. 559 | member __.normal() = znorm() 560 | 561 | #if BOX_MULLER 562 | /// generates a sample from standard normal distribution N(0,1) using Box-Muller algorithm. 563 | member __.normal_bm() = rnorm() 564 | #endif 565 | 566 | 567 | let rec draw (gen:MT19937) d = // random number generator 568 | let rng_norm(mean, stdev) = mean + stdev * gen.normal() 569 | let rng_unif(lower, upper) = lower + gen.uniform_float64()*(upper-lower) 570 | let rng_poisson lambda = 571 | if lambda>30.0 then 572 | rng_norm(lambda,sqrt(lambda)) 573 | else 574 | let ell = exp(-lambda) 575 | let rec step p k = if p0.0 then u else rand_positive() 578 | let rng_exp mean = -log(rand_positive()) * mean 579 | let rng_gamma a b = 580 | let p = e/(a+e) 581 | let s = sqrt(2.0*a-1.0) 582 | let rec rand_positive () = let u = gen.uniform_float64() in if u>0.0 then u else rand_positive() 583 | if a<1.0 then 584 | // small values of alpha 585 | // from Knuth 586 | let rec iter() = 587 | // generate and reject 588 | let u = gen.uniform_float64() 589 | let v = rand_positive() 590 | let x,q = if u < p then let x=exp(log(v)/a) in x,exp(-x) else let x = 1.0 - log v in x, exp((a-1.0)*log x) 591 | if gen.uniform_float64() rand_positive()} |> Seq.reduce (*) 603 | -log(product)/b 604 | else 605 | // no shortcuts 606 | let rec iter () = 607 | let y = tan(pi*gen.uniform_float64()) 608 | let x = s*y+a-1.0 609 | if 0.0 < x && gen.uniform_float64() <= (1.0+y*y)*exp((a-1.0)*log(x/(a-1.0))-s*y) then 610 | x/b 611 | else iter() 612 | iter() 613 | match d with 614 | | Normal(mean,stdev) -> rng_norm(mean,stdev) 615 | | LogNormal(mean,stdev) -> exp(rng_norm(log mean,stdev)) 616 | | Gamma(a,b) -> rng_gamma a b 617 | | Exponential(mean) -> rng_exp mean 618 | | Uniform(lower, upper) -> rng_unif(lower, upper) 619 | | LogUniform(lower, upper) -> exp(rng_unif(log lower, log upper)) 620 | | Linear(x1,x2,density) -> 621 | if x1=x2 then x1 else 622 | let h = 2./(x2-x1) 623 | let p, pmin, pmax = if h>improbable then density, improbable, h else -density, h, -improbable 624 | let p1 = if ppmax then pmax else p // p1 = 2*a*x1+b 625 | let p2 = h-p1 // 0.5*(p1+p2)*(x2-x1) == 1; p2 = 2*a*x2+b 626 | let a4 = (p2-p1)*h // 4*a 627 | let b = p1 - 0.5*a4*x1 628 | let y = gen.uniform_float64() 629 | let absd = sqrt(p1*p1 + a4*y) 630 | let d = if h>0. then absd else -absd 631 | (d - b) * 2. / a4 632 | | Bernoulli(fraction) -> if gen.uniform_float64() rng_poisson lambda 634 | | Binomial(n,p) -> 635 | let rec step n k = if n<=0 then k else step (n-1) (if gen.uniform_float64()

638 | // adopted from VC++2012u3 639 | let v = rng_gamma r (r/mean) 640 | rng_poisson(v) 641 | | Mixture(components) -> 642 | let rec oneof f c = 643 | match c with 644 | | [] -> failwith "empty mixture!" 645 | | (w,d)::tail -> 646 | if f <= w then draw gen d else oneof (f-w) tail 647 | oneof (gen.uniform_float64()) components 648 | //| _ -> raise (System.NotImplementedException()) 649 | 650 | 651 | // Computes Pearson's correlation coefficient for two float arrays 652 | // The Pearson correlation is defined only if both of the standard deviations are finite and both of them are nonzero. 653 | // Returns NaN, otherwise. 654 | let correlation (x:float[]) (y:float[]) = 655 | if x.Length <> y.Length then invalidOp "Different lengths of arrays" 656 | let filtered = Seq.zip x y |> Seq.filter (fun(u,v) -> not (isNan(u) || isNan(v) || isInf(u) || isInf(v))) |> Array.ofSeq; 657 | let n = filtered.Length 658 | if n <= 1 then System.Double.NaN else 659 | let _x, _y = Array.map fst filtered, Array.map snd filtered 660 | let sx, sy = summary _x, summary _y 661 | let stdx, stdy = sqrt sx.variance, sqrt sy.variance 662 | if stdx = 0.0 || stdy = 0.0 || isInf(stdx) || isInf(stdy) then System.Double.NaN else 663 | let d1 = float(n) * sx.mean * sy.mean 664 | let d2 = float(n-1) * stdx * stdy 665 | ((filtered |> Array.map (fun(s,t)->s*t) |> Array.sum) - d1)/d2 666 | 667 | // KDE 668 | 669 | // adopted from MathNet.Numerics 670 | // https://github.com/mathnet/mathnet-numerics/blob/v3.9.0/src/Numerics/IntegralTransforms/Fourier.RadixN.cs 671 | 672 | open System.Numerics 673 | 674 | let private InverseScaleByOptions(samples:Complex[]) = 675 | let scalingFactor = 1.0/(float samples.Length) 676 | for i in 0..samples.Length-1 do 677 | samples.[i] <- samples.[i] * Complex(scalingFactor, 0.) 678 | 679 | let private ForwardScaleByOptions(samples:Complex[]) = 680 | let scalingFactor = sqrt(1.0/(float samples.Length)) 681 | for i in 0..samples.Length-1 do 682 | samples.[i] <- samples.[i] * Complex(scalingFactor, 0.) 683 | 684 | 685 | let private Radix2Reorder(samples:'T[]) = 686 | let mutable j = 0 687 | for i in 0..samples.Length - 2 do 688 | if (i < j) then 689 | let temp = samples.[i] 690 | samples.[i] <- samples.[j] 691 | samples.[j] <- temp 692 | 693 | let mutable m = samples.Length 694 | let mutable cont = true 695 | while cont do 696 | m <- m >>> 1; 697 | j <- j ^^^ m; 698 | cont <- (j &&& m) = 0 699 | 700 | let private Radix2Step(samples:Complex[], exponentSign:int, levelSize:int, k:int) = 701 | // Twiddle Factor 702 | let exponent = (float exponentSign*float k)*pi/float levelSize 703 | let w = Complex(cos(exponent), sin(exponent)) 704 | let step = levelSize <<< 1 705 | for i in k..step..samples.Length-1 do 706 | let ai = samples.[i] 707 | let t = w*samples.[i + levelSize] 708 | samples.[i] <- ai + t 709 | samples.[i + levelSize] <- ai - t 710 | 711 | let private Radix2(samples:Complex[], exponentSign:int) = 712 | let rec is_power_two x p = 713 | if x = p then true 714 | elif x < p then false 715 | else is_power_two x (2*p) 716 | if not <| is_power_two samples.Length 1 then invalidArg "samples" "The array length must be a power of 2." 717 | 718 | Radix2Reorder(samples) 719 | let mutable levelSize = 1 720 | while levelSize < samples.Length do 721 | for k = 0 to levelSize-1 do Radix2Step(samples, exponentSign, levelSize, k) 722 | levelSize <- levelSize * 2 723 | 724 | 725 | 726 | let private fi x = float(x) 727 | 728 | /// Inverse Fast Fourier Transform. 729 | let ifft (xs:Complex[]) = 730 | let samples = Array.copy xs 731 | Radix2(samples,1) 732 | let scalingFactor = 1.0/(float samples.Length) 733 | for i in 0..samples.Length-1 do 734 | let v = samples.[i] 735 | samples.[i] <- Complex(scalingFactor * v.Real, scalingFactor * v.Imaginary) 736 | samples 737 | 738 | /// Fast Fourier transform. 739 | let fft (xs:Complex[]) = 740 | let samples = Array.copy xs 741 | Radix2(samples,-1) 742 | samples 743 | 744 | let private (|Even|Odd|) input = if input % 2 = 0 then Even else Odd 745 | 746 | /// Descrete cosine transform. 747 | let dct (rxs:float[]) = 748 | let xs = Array.map (fun x -> Complex(x,0.0)) rxs 749 | let len = xs.Length 750 | let n = fi len 751 | let weights = 752 | let myseq = Seq.init (len-1) (fun x -> Complex(2.0, 0.0) * Complex.Exp(Complex(0.0, fi (x+1) * pi / (2.0*n)))) 753 | seq { yield Complex(2.0, 0.0); yield! myseq } |> Array.ofSeq 754 | let backpermute (arr:Complex[]) ind = ind |> Seq.map (fun i -> arr.[i]) 755 | let interleaved = 756 | let en = Seq.init ((len+1)/2) (fun i -> i * 2) 757 | let en2 = Seq.init (len/2) (fun i -> len - (i*2) - 1) 758 | backpermute xs (seq{ yield! en; yield! en2 }) |> Array.ofSeq 759 | 760 | Array.map2 (fun (a:Complex) (b:Complex) -> (a*b).Real) weights (fft interleaved) 761 | 762 | 763 | /// Inverse discrete cosine transform. 764 | // idct :: U.Vector CD -> U.Vector Double 765 | // http://hackage.haskell.org/package/statistics-0.10.0.0/docs/src/Statistics-Transform.html 766 | let idct (rxs:float[]) = 767 | let xs = Seq.map (fun x -> Complex(x,0.0)) rxs 768 | let len = rxs.Length 769 | let weights = 770 | let n = fi len 771 | let coeff k = Complex(2.0 * n, 0.0) * Complex.Exp(Complex(0.0, fi (k+1) * pi / (2.0*n))) 772 | seq { yield Complex(n,0.0); for i in 0..len-2 do yield coeff i } 773 | let vals = (Array.map (fun (c:Complex) -> c.Real) << ifft) (Seq.map2 (*) weights xs |> Array.ofSeq) 774 | let interleave z = 775 | let hz = z >>> 1 776 | match z with 777 | | Even _ -> vals.[hz] 778 | | Odd _ -> vals.[len - hz - 1] 779 | [| for i in 0..len-1 do yield interleave(i) |] 780 | 781 | // http://hackage.haskell.org/package/statistics-0.10.5.0/docs/src/Statistics-Function.html#nextHighestPowerOfTwo 782 | // 783 | // Efficiently compute the next highest power of two for a 784 | // non-negative integer. If the given value is already a power of 785 | // two, it is returned unchanged. If negative, zero is returned. 786 | let private nextHighestPowerOfTwo n = 787 | let i0 = n - 1 788 | let i1 = i0 ||| (i0 >>> 1) 789 | let i2 = i1 ||| (i1 >>> 2) 790 | let i4 = i2 ||| (i2 >>> 4) 791 | let i8 = i4 ||| (i4 >>> 8) 792 | let i16 = i8 ||| (i8 >>> 16) 793 | let _i32 = i16 ||| (i16 >>> 32) 794 | 1 + _i32 795 | 796 | let histogram_ n xmin xmax xs = 797 | if n < 1 then invalidArg "n" "must be > 0" 798 | let isNotFinite x = System.Double.IsNaN x || System.Double.IsInfinity x 799 | if isNotFinite xmin then invalidArg "xmin" (sprintf "is %g" xmin) 800 | if isNotFinite xmax then invalidArg "xmax" (sprintf "is %g" xmax) 801 | if xmin >= xmax then invalidOp "xmin should be less than xmax" 802 | let h = Array.zeroCreate n 803 | let step = (xmax - xmin) / float n 804 | let add x = 805 | if not(isNan x) && x >= xmin && x <= xmax then 806 | let idx = min (n-1) (int((x-xmin)/step)) 807 | h.[idx] <- h.[idx] + 1 808 | xs |> Seq.iter add 809 | h 810 | 811 | /// Approximate comparison of two double values. 812 | /// Tolerance `ulps` is in units of least precision. 813 | let within (ulps:uint32) a b = 814 | // See e.g. "Comparing Floating Point Numbers, 2012 Edition" by Bruce Dawson 815 | // https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ 816 | let ai = System.BitConverter.DoubleToInt64Bits a 817 | let bi = System.BitConverter.DoubleToInt64Bits b 818 | let cmp ai bi = if ai<=bi then bi-ai <= int64 ulps else ai-bi <= int64 ulps 819 | if ai<0L && bi>=0L then cmp (System.Int64.MinValue-ai) bi 820 | elif ai>=0L && bi<0L then cmp ai (System.Int64.MinValue-bi) 821 | else cmp ai bi 822 | 823 | /// Root of a function using Ridders method. 824 | // Ridders, C.F.J. (1979) A new algorithm for computing a single 825 | // root of a real continuous function. 826 | // /IEEE Transactions on Circuits and Systems/ 26:979--980. 827 | let ridders tolerance (lb, ub) (f : float->float) = 828 | // The function must have opposite signs when evaluated at the lower 829 | // and upper bounds of the search (i.e. the root must be bracketed). 830 | 831 | let rec iter a fa b fb i = 832 | if 100 <= i then None // Too many iterations performed. Fail 833 | else 834 | if within 1u a b then Some a // Root is bracketed within 1 ulp. No improvement could be made 835 | else 836 | let d = abs(b-a) 837 | let dm = (b-a) * 0.5 838 | let m = a + dm 839 | let fm = f m 840 | if 0.0 = fm then Some m else 841 | let dn = float(sign(fb - fa)) * dm * fm / sqrt(fm*fm - fa*fb) 842 | let n = m - float(sign dn) * min (abs dn) (abs dm - 0.5 * tolerance) 843 | if d < tolerance then Some n else 844 | if n=a || n=b then 845 | // Ridder's approximation coincide with one of old bounds. Revert to bisection 846 | if 0 > sign fm * sign fa then iter a fa m fm (i+1) 847 | else iter m fm b fb (i+1) 848 | else 849 | let fn = f n 850 | if 0.0 = fn then Some n 851 | elif 0.0 > fn*fm then iter n fn m fm (i+1) 852 | elif 0.0 > fn*fa then iter a fa n fn (i+1) 853 | else iter n fn b fb (i+1) 854 | 855 | if not (tolerance>=0.0) then invalidArg "tolerance" "must be greater than 0.0" 856 | let flb = f lb 857 | if 0.0 = flb then Some lb else 858 | let fub = f ub 859 | if 0.0 = fub then Some ub 860 | elif 0.0 < fub*flb then None // root is not bracketed 861 | else iter lb flb ub fub 0 862 | 863 | // from http://hackage.haskell.org/package/statistics-0.10.5.0/docs/src/Statistics-Sample-KernelDensity.html#kde 864 | // 865 | /// Gaussian kernel density estimator for one-dimensional data, using 866 | /// the method of Botev et al. 867 | // 868 | // Botev. Z.I., Grotowski J.F., Kroese D.P. (2010). Kernel density estimation via diffusion. 869 | // /Annals of Statistics/ 38(5):2916-2957. 870 | // 871 | // The result is a pair of vectors, containing: 872 | // 873 | // * The coordinates of each mesh point. 874 | // 875 | // * Density estimates at each mesh point. 876 | // 877 | // n0 The number of mesh points to use in the uniform discretization 878 | // of the interval @(min,max)@. If this value is not a power of 879 | // two, then it is rounded up to the next power of two. 880 | // 881 | // min Lower bound (@min@) of the mesh range. 882 | // max Upper bound (@max@) of the mesh range. 883 | // NaN in the sample are ignored. 884 | let kde2 n0 min max (sample:float seq) = 885 | // check kde2 arguments 886 | if sample = null then invalidArg "sample" "cannot be null" 887 | else if(n0 = 1) then invalidArg "n0" "cannot be 1" 888 | else 889 | let xs = Seq.filter (System.Double.IsNaN >> not) sample |> Array.ofSeq 890 | if Array.isEmpty xs then invalidArg "sample" "doesn't contain numeric values" 891 | let m_sqrt_2_pi = sqrt (2.0*pi) 892 | let m_sqrt_pi = sqrt pi 893 | let r = max - min 894 | let len = fi xs.Length 895 | let ni = nextHighestPowerOfTwo n0 896 | let n = fi ni 897 | let sqr a = a*a 898 | let mesh = 899 | let d = r/(n - 1.0) 900 | Array.init ni (fun z -> min + d * fi z) 901 | 902 | let density = 903 | let a = 904 | let h = Seq.map (fun x -> float(x) / len) (histogram_ ni min max xs) |> Array.ofSeq 905 | let sh = Array.sum h 906 | (dct << Array.map (fun p -> p/sh)) h 907 | 908 | let iv = [| for i in 1..ni-1 do yield sqr(fi i) |] 909 | let a2v = a |> Seq.skip(1) |> Seq.map (fun q -> sqr(q*0.5)) |> Array.ofSeq 910 | let t_star = 911 | let rec f q t = 912 | let g i a2 = i ** q * a2 * exp ((-i) * sqr(pi) * t) 913 | 2.0 * pi ** (q*2.0) * Seq.sum (Seq.map2 g iv a2v) 914 | let rec go s h : float = 915 | let si = fi s 916 | let k0 = 917 | let enum = seq{ for i in 1 .. s do yield 2*i - 1 } 918 | fi(Seq.fold (*) 1 enum) / m_sqrt_2_pi 919 | let _const = (1.0 + 0.5 ** (si+0.5)) / 3.0 920 | let time = (2.0 * _const * k0 / len / h) ** (2.0 / (3.0 + 2.0 * si)) 921 | if s=1 then h else go (s-1) (f si time) 922 | 923 | let eq x = x - (len * (2.0 * m_sqrt_pi) * go 6 (f 7.0 x)) ** (-0.4) 924 | match ridders 1e-14 (0.0,0.1) eq with Some root -> root | None -> (0.28 * len ** (-0.4)) 925 | 926 | let f2 b z = b * exp (sqr z * sqr pi * t_star * (-0.5)) 927 | let a2 = Seq.map2 f2 a [| for i in 0..ni-1 do yield fi i |] |> Array.ofSeq 928 | let a1 = idct a2 929 | let a0 = Array.map (fun x -> x / (2.0*r)) a1 930 | a0 931 | (mesh, density) 932 | 933 | /// Gaussian kernel density estimator for one-dimensional data, using 934 | /// the method of Botev et al. 935 | // 936 | // The result is a pair of vectors, containing: 937 | // 938 | // * The coordinates of each mesh point. The mesh interval is chosen 939 | // to be 20% larger than the range of the sample. (To specify the 940 | // mesh interval, use 'kde2'.) 941 | // 942 | // * Density estimates at each mesh point. 943 | // 944 | // n0 The number of mesh points to use in the uniform discretization 945 | // of the interval @(min,max)@. If this value is not a power of 946 | // two, then it is rounded up to the next power of two. 947 | let kde n0 (xs:float seq) = 948 | if(xs = null) then invalidArg "sample" "cannot be empty" 949 | else 950 | let mutable max = System.Double.MinValue 951 | let mutable min = System.Double.MaxValue 952 | let range = 953 | if Seq.isEmpty xs then 954 | min <- 0.0 955 | max <- 0.0 956 | 1.0 // unreasonable guess 957 | else 958 | xs |> Seq.iter (fun xsi -> 959 | if max < (xsi) then 960 | max <- xsi 961 | if min > (xsi) then 962 | min <- xsi) 963 | if min >= max then 1.0 else max - min 964 | kde2 n0 (min - range/10.0) (max + range/10.0) xs 965 | 966 | module Serialization = 967 | open Angara.Serialization 968 | 969 | let rec serializeDistribution (d:Distribution) = 970 | let islist = 971 | match d with 972 | | Uniform(lower,upper) -> [String "U"; Double lower; Double upper] 973 | | LogUniform(lower,upper) -> [String "LU"; Double lower; Double upper] 974 | | Linear(lower,upper,density) -> [String "L"; Double lower; Double upper; Double density] 975 | | Normal(mean, stdev) -> [String "N"; Double mean; Double stdev] 976 | | LogNormal(mean, stdev) -> [String "LN"; Double mean; Double stdev] 977 | | Gamma(a, b) -> [String "G"; Double a; Double b] 978 | | Binomial(n, p) -> [String "B"; Int n; Double p] 979 | | NegativeBinomial(mean, r) -> [String "NB"; Double mean; Double r] 980 | | Bernoulli(p) -> [String "C"; Double p] 981 | | Exponential(mean) -> [String "E"; Double mean] 982 | | Poisson(mean) -> [String "P"; Double mean] 983 | | Mixture(components) -> 984 | let cc = components |> List.map (fun (weight, c) -> 985 | Seq [Double weight; serializeDistribution c]) 986 | [String "M"; Seq(cc)] 987 | Seq islist 988 | 989 | let rec deserializeDistribution (is:InfoSet) = 990 | let invalidInfoSet() = invalidArg "is" "invalid InfoSet" 991 | match is with 992 | | Seq content -> 993 | match content |> List.ofSeq with 994 | | tag::args -> 995 | match tag.ToStringValue() with 996 | | "U" -> match args with [Double lower; Double upper] -> Uniform(lower, upper) | _ -> invalidInfoSet() 997 | | "LU" -> match args with [Double lower; Double upper]-> LogUniform(lower, upper) | _ -> invalidInfoSet() 998 | | "L" -> match args with [Double lower; Double upper; Double density]-> Linear(lower, upper, density) | _ -> invalidInfoSet() 999 | | "N" -> match args with [Double mean; Double stdev]-> Normal(mean, stdev) | _ -> invalidInfoSet() 1000 | | "LN" -> match args with [Double mean; Double stdev]-> LogNormal(mean, stdev) | _ -> invalidInfoSet() 1001 | | "G" -> match args with [Double a; Double b]-> Gamma(a, b) | _ -> invalidInfoSet() 1002 | | "NB" -> match args with [Double mean; Double r]-> NegativeBinomial(mean, r) | _ -> invalidInfoSet() 1003 | | "B" -> match args with [Int n; Double p]-> Binomial(n, p) | _ -> invalidInfoSet() 1004 | | "C" -> match args with [Double p]-> Bernoulli(p) | _ -> invalidInfoSet() 1005 | | "E" -> match args with [Double p]-> Exponential(p) | _ -> invalidInfoSet() 1006 | | "P" -> match args with [Double p]-> Poisson(p) | _ -> invalidInfoSet() 1007 | | "M" -> 1008 | match args with 1009 | | [Seq is_components] -> 1010 | let components = 1011 | is_components 1012 | |> Seq.map (function 1013 | | (Seq is_c) -> 1014 | match List.ofSeq is_c with [Double weight; is_d] -> weight, deserializeDistribution is_d | _ -> invalidInfoSet() 1015 | | _ -> invalidInfoSet()) 1016 | |> List.ofSeq 1017 | Mixture components 1018 | | _ -> invalidInfoSet() 1019 | | _ -> invalidInfoSet() 1020 | | _ -> invalidInfoSet() 1021 | | _ -> invalidInfoSet() 1022 | 1023 | let serializeMersenneTwister (mt:MT19937) = 1024 | let seed = mt.get_seed() 1025 | use buffer = new System.IO.MemoryStream (seed.Length*4) 1026 | use writer = new System.IO.BinaryWriter(buffer) 1027 | seed |> Array.iter writer.Write 1028 | ByteArray(buffer.GetBuffer()) 1029 | 1030 | let deserializeMersenneTwister is = 1031 | match is with 1032 | | ByteArray byteseq -> 1033 | let buffer = Array.ofSeq byteseq 1034 | use reader = new System.IO.BinaryReader(new System.IO.MemoryStream (buffer)) 1035 | let seed = Array.init (buffer.Length/4) (fun _ -> reader.ReadUInt32()) 1036 | MT19937 seed 1037 | | _ -> invalidArg "is" "Invalid InfoSet" 1038 | 1039 | type DistributionSerializer() = 1040 | interface ISerializer with 1041 | member x.TypeId = "ProbDist" 1042 | member x.Serialize _ d = serializeDistribution d 1043 | member x.Deserialize _ is = deserializeDistribution is 1044 | 1045 | type MersenneTwisterSerializer() = 1046 | interface ISerializer with 1047 | member x.TypeId = "MeresenneTwister" 1048 | member x.Serialize _ mt = serializeMersenneTwister mt 1049 | member x.Deserialize _ is = deserializeMersenneTwister is 1050 | -------------------------------------------------------------------------------- /src/Angara.Statistics/paket.references: -------------------------------------------------------------------------------- 1 | Angara.Serialization -------------------------------------------------------------------------------- /src/Angara.Statistics/paket.template: -------------------------------------------------------------------------------- 1 | type project 2 | title 3 | ##ProjectName## 4 | owners 5 | Vassily Lyutsarev 6 | authors 7 | Vassily Lyutsarev 8 | projectUrl 9 | http://github.com/microsoft/Angara.Statistics 10 | iconUrl 11 | https://raw.githubusercontent.com/microsoft/Angara.Statistics/master/docs/files/img/logo.png 12 | licenseUrl 13 | http://github.com/microsoft/Angara.Statistics/blob/master/LICENSE.txt 14 | requireLicenseAcceptance 15 | false 16 | copyright 17 | Copyright 2015 18 | tags 19 | statistics probability fsharp 20 | summary 21 | A collection of statistics algorithms from Mersenne twister generator to MCMC sampling. 22 | description 23 | Includes Mersenne twister random number generator, common probability distributions, statistics and quantiles, kernel density estimator, Metropolis-Hastings MCMC sampler. 24 | 25 | 26 | -------------------------------------------------------------------------------- /tests/Angara.Statistics.Tests/Angara.Statistics.Tests.fsproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | AnyCPU 7 | 2.0 8 | d5620a55-aa34-4de7-a970-b2168a06d28d 9 | Library 10 | Angara.Statistics.Tests 11 | Angara.Statistics.Tests 12 | v4.5.2 13 | 4.4.0.0 14 | Angara.Statistics.Tests 15 | 16 | ..\..\ 17 | 18 | 19 | true 20 | full 21 | false 22 | false 23 | bin\Debug\ 24 | DEBUG;TRACE 25 | 3 26 | Project 27 | 28 | 29 | 30 | 31 | 32 | 33 | pdbonly 34 | true 35 | true 36 | bin\Release\ 37 | TRACE 38 | 3 39 | 40 | 41 | 11 42 | 43 | 44 | 45 | 46 | $(MSBuildExtensionsPath32)\..\Microsoft SDKs\F#\3.0\Framework\v4.0\Microsoft.FSharp.Targets 47 | 48 | 49 | 50 | 51 | $(MSBuildExtensionsPath32)\Microsoft\VisualStudio\v$(VisualStudioVersion)\FSharp\Microsoft.FSharp.Targets 52 | 53 | 54 | 55 | 56 | 57 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | True 76 | 77 | 78 | 79 | 80 | 81 | Angara.Statistics 82 | {5161430d-44b4-441c-be95-89c02f215d38} 83 | True 84 | 85 | 86 | 87 | 88 | 89 | 90 | ..\..\packages\Angara.Serialization\lib\net452\Angara.Serialization.dll 91 | True 92 | True 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | ..\..\packages\test\Angara.Serialization.Json\lib\net452\Angara.Serialization.Json.dll 102 | True 103 | True 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | ..\..\packages\test\FsUnit\lib\net45\FsUnit.NUnit.dll 113 | True 114 | True 115 | 116 | 117 | ..\..\packages\test\FsUnit\lib\net45\NHamcrest.dll 118 | True 119 | True 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | ..\..\packages\test\Newtonsoft.Json\lib\net35\Newtonsoft.Json.dll 129 | True 130 | True 131 | 132 | 133 | 134 | 135 | 136 | 137 | ..\..\packages\test\Newtonsoft.Json\lib\net20\Newtonsoft.Json.dll 138 | True 139 | True 140 | 141 | 142 | 143 | 144 | 145 | 146 | ..\..\packages\test\Newtonsoft.Json\lib\net40\Newtonsoft.Json.dll 147 | True 148 | True 149 | 150 | 151 | 152 | 153 | 154 | 155 | ..\..\packages\test\Newtonsoft.Json\lib\net45\Newtonsoft.Json.dll 156 | True 157 | True 158 | 159 | 160 | 161 | 162 | 163 | 164 | ..\..\packages\test\Newtonsoft.Json\lib\portable-net45+wp80+win8+wpa81+dnxcore50\Newtonsoft.Json.dll 165 | True 166 | True 167 | 168 | 169 | 170 | 171 | 172 | 173 | ..\..\packages\test\Newtonsoft.Json\lib\portable-net40+sl5+wp80+win8+wpa81\Newtonsoft.Json.dll 174 | True 175 | True 176 | 177 | 178 | 179 | 180 | 181 | 182 | ..\..\packages\test\NUnit\lib\nunit.framework.dll 183 | True 184 | True 185 | 186 | 187 | 188 | 189 | 190 | 191 | ..\..\packages\test\Unquote\lib\net40\Unquote.dll 192 | True 193 | True 194 | 195 | 196 | 197 | 198 | 199 | 200 | ..\..\packages\test\Unquote\lib\net45\Unquote.dll 201 | True 202 | True 203 | 204 | 205 | 206 | 207 | 208 | 209 | ..\..\packages\test\Unquote\lib\portable-net45+netcore45+wpa81+wp8+MonoAndroid1+MonoTouch1\Unquote.dll 210 | True 211 | True 212 | 213 | 214 | 215 | 216 | -------------------------------------------------------------------------------- /tests/Angara.Statistics.Tests/App.config: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/Angara.Statistics.Tests/FilzbachTests.fs: -------------------------------------------------------------------------------- 1 | module FilzbachTests 2 | open NUnit.Framework 3 | open FsUnit 4 | open Swensen.Unquote 5 | 6 | open Angara.Filzbach 7 | 8 | let uniform = Angara.Statistics.Uniform(0.,1.) 9 | 10 | [] 11 | let ParametersTests() = 12 | let p0 = Parameters.Empty 13 | p0.CountValues |> should equal 0 14 | p0.AllValues |> should equal [||] 15 | (fun () -> p0.GetValue 0 |> ignore) |> should throw typeof 16 | (fun () -> p0.GetValue "unknown" |> ignore) |> should throw typeof 17 | (fun () -> p0.GetValue("unknown",0) |> ignore) |> should throw typeof 18 | (fun () -> p0.GetDefinition "unknown" |> ignore) |> should throw typeof 19 | (p0:>IParameters).Count |> should equal 0 20 | 21 | // scalar 22 | (fun () -> p0.Add(null, [|0.5|],0.,1.) |> ignore) |> should throw typeof // empty name 23 | (fun () -> p0.Add("", [|0.5|],0.,1.) |> ignore) |> should throw typeof // empty name 24 | (fun () -> p0.Add("s", [||],0.,1.) |> ignore) |> should throw typeof // empty values 25 | (fun () -> p0.Add("s", [|0.5|],1.,0.) |> ignore) |> should throw typeof // lower>upper 26 | (fun () -> p0.Add("s", [|0.|],0.5,1.) |> ignore) |> should throw typeof // value out of range 27 | (fun () -> p0.Add("s", [|0.5|],0.,1.,isLog=true) |> ignore) |> should throw typeof // lower for log par 28 | let p1 = p0.Add("s", [|0.5|],0.,1.) 29 | p0.Add("s", [|0.5|],0.,1.) |> should equal p1 30 | (fun () -> p1.Add("s", [|0.5|],0.,1.) |> ignore) |> should throw typeof // duplicate name 31 | p1.CountValues |> should equal 1 32 | p1.AllValues |> List.ofSeq |> should equal [0.5] 33 | p1.GetValue 0 |> should equal 0.5 34 | (fun () -> p1.GetValue 1 |> ignore) |> should throw typeof 35 | (fun () -> p1.GetValue -1 |> ignore) |> should throw typeof 36 | p1.GetValue "s" |> should equal 0.5 37 | p1.GetValue("s", 0) |> should equal 0.5 38 | (fun () -> p1.GetValue("s", 1) |> ignore) |> should throw typeof 39 | p1.GetDefinition "s" |> should equal {index=0; size=1; lower=0.; upper=1.; delay=0; prior=uniform; isLog=false; log_priordf=id} 40 | (p1:>IParameters).Count |> should equal 1 41 | (p1:>IParameters).ContainsKey "s" |> should be True 42 | (p1:>IParameters).ContainsKey "a" |> should be False 43 | (p1:>IParameters).["s"] |> should equal [|0.5|] 44 | let v = ref [||] 45 | (p1:>IParameters).TryGetValue("s", v) |> should be True 46 | !v |> should equal [|0.5|] 47 | (p1:>IParameters).Keys |> Seq.toList |> should equal ["s"] 48 | (p1:>IParameters).Values |> Seq.toList |> should equal [[|0.5|]] 49 | 50 | // vector 51 | (fun () -> p0.Add("v", [|0.5; -0.5|],0.,1.) |> ignore) |> should throw typeof // value out of range 52 | let p2 = p0.Add("v", [|0.6;0.7|],0.,1.) 53 | p2.CountValues |> should equal 2 54 | p2.AllValues |> List.ofSeq |> should equal [0.6;0.7] 55 | p2.GetValue 0 |> should equal 0.6 56 | p2.GetValue 1 |> should equal 0.7 57 | (fun () -> p2.GetValue "v" |> ignore) |> should throw typeof // vector syntax 58 | p2.GetValue("v", 0) |> should equal 0.6 59 | p2.GetValue("v", 1) |> should equal 0.7 60 | p2.GetDefinition "v" |> should equal {index=0; size=2; lower=0.; upper=1.; delay=0; prior=uniform; isLog=false; log_priordf=id} 61 | (p2:>IParameters).Count |> should equal 1 62 | (p2:>IParameters).ContainsKey "v" |> should be True 63 | (p2:>IParameters).ContainsKey "a" |> should be False 64 | (p2:>IParameters).["v"] |> should equal [|0.6;0.7|] 65 | (p2:>IParameters).TryGetValue("v", v) |> should be True 66 | !v |> should equal [|0.6;0.7|] 67 | (p2:>IParameters).Keys |> Seq.toList |> should equal ["v"] 68 | (p2:>IParameters).Values |> Seq.toList |> should equal [[|0.6;0.7|]] 69 | 70 | // second scalar 71 | let p3 = p2.Add("s", [|0.5|],0.,1.) 72 | p2.Add("s", [|0.5|],0.,1.) |> should equal p3 73 | (fun () -> p1.Add("s", [|0.5|],0.,1.) |> ignore) |> should throw typeof // duplicate name 74 | p3.CountValues |> should equal 3 75 | p3.AllValues |> List.ofSeq |> should equal [0.6;0.7;0.5] 76 | p3.GetValue 0 |> should equal 0.6 77 | p3.GetValue 1 |> should equal 0.7 78 | p3.GetValue 2 |> should equal 0.5 79 | p3.GetValue("v", 0) |> should equal 0.6 80 | p3.GetValue("v", 1) |> should equal 0.7 81 | p3.GetValue "s" |> should equal 0.5 82 | p3.GetValue("s", 0) |> should equal 0.5 83 | p3.GetDefinition "s" |> should equal {index=2; size=1; lower=0.; upper=1.; delay=0; prior=uniform; isLog=false; log_priordf=id} 84 | p3.GetDefinition "v" |> should equal {index=0; size=2; lower=0.; upper=1.; delay=0; prior=uniform; isLog=false; log_priordf=id} 85 | (p3:>IParameters).Count |> should equal 2 86 | (p3:>IParameters).ContainsKey "s" |> should be True 87 | (p3:>IParameters).ContainsKey "v" |> should be True 88 | (p3:>IParameters).ContainsKey "a" |> should be False 89 | (p3:>IParameters).["s"] |> should equal [|0.5|] 90 | (p3:>IParameters).["v"] |> should equal [|0.6;0.7|] 91 | (p3:>IParameters).TryGetValue("s", v) |> should be True 92 | !v |> should equal [|0.5|] 93 | (p3:>IParameters).TryGetValue("v", v) |> should be True 94 | !v |> should equal [|0.6;0.7|] 95 | (p3:>IParameters).Keys |> Seq.toList |> should equal ["s";"v"] 96 | (p3:>IParameters).Values |> Seq.toList |> should equal [[|0.5|]; [|0.6;0.7|]] 97 | 98 | // second vector 99 | let p4 = p1.Add("v", [|0.6;0.7|],0.,1.) 100 | p1.Add("v", [|0.6;0.7|],0.,1.) |> should equal p4 101 | p4.CountValues |> should equal 3 102 | p4.AllValues |> List.ofSeq |> should equal [0.5;0.6;0.7] 103 | p4.GetValue 0 |> should equal 0.5 104 | p4.GetValue 1 |> should equal 0.6 105 | p4.GetValue 2 |> should equal 0.7 106 | p4.GetValue("v", 0) |> should equal 0.6 107 | p4.GetValue("v", 1) |> should equal 0.7 108 | p4.GetValue "s" |> should equal 0.5 109 | p4.GetValue("s", 0) |> should equal 0.5 110 | p4.GetDefinition "s" |> should equal {index=0; size=1; lower=0.; upper=1.; delay=0; prior=uniform; isLog=false; log_priordf=id} 111 | p4.GetDefinition "v" |> should equal {index=1; size=2; lower=0.; upper=1.; delay=0; prior=uniform; isLog=false; log_priordf=id} 112 | (p4:>IParameters).Count |> should equal 2 113 | (p4:>IParameters).ContainsKey "s" |> should be True 114 | (p4:>IParameters).ContainsKey "v" |> should be True 115 | (p4:>IParameters).ContainsKey "a" |> should be False 116 | (p4:>IParameters).["s"] |> should equal [|0.5|] 117 | (p4:>IParameters).["v"] |> should equal [|0.6;0.7|] 118 | (p4:>IParameters).TryGetValue("s", v) |> should be True 119 | !v |> should equal [|0.5|] 120 | (p4:>IParameters).TryGetValue("v", v) |> should be True 121 | !v |> should equal [|0.6;0.7|] 122 | (p4:>IParameters).Keys |> Seq.toList |> should equal ["s";"v"] 123 | (p4:>IParameters).Values |> Seq.toList |> should equal [[|0.5|]; [|0.6;0.7|]] 124 | 125 | [] 126 | let SamplerTests() = 127 | let assertfail() : 'a = raise (AssertionException(null)) 128 | let mt = Angara.Statistics.MT19937() 129 | let logl (p:Parameters) = 130 | let s = p.AllValues |> Seq.sum 131 | - log (1. + exp(-s)) 132 | let sample = Sampler.Create(Parameters.Empty, mt, logl) 133 | test <@ Seq.isEmpty sample.Parameters.AllValues @> 134 | test <@ Seq.isEmpty (sample.Probe(true, logl).Parameters.AllValues) @> 135 | let s2 = Sampler.Create(Parameters.Empty.Add("a",1.), mt, logl) 136 | test <@ s2.Parameters.AllValues |> Seq.toList = [1.] @> 137 | test <@ s2.Probe(true, logl).Parameters.AllValues |> Seq.toList = [1.] @> 138 | let s3 = Sampler.Create(Parameters.Empty.Add("a",Angara.Statistics.Uniform(1.,2.)), mt, logl) 139 | let v3 = match s3.Parameters.AllValues |> Seq.toList with [v] -> v | _ -> assertfail() 140 | test <@ v3 > 1. && v3 < 2. @> 141 | let s3' = s3 |> Seq.unfold (fun s -> if s.IsAccepted then None else let s' = s.Probe(false,logl) in Some (s',s')) |> Seq.last 142 | let v3' = match s3'.Parameters.AllValues |> Seq.toList with [v] -> v | _ -> assertfail() 143 | test <@ s3'.IsAccepted && (v3 <> v3') @> 144 | let s3'' = s3 |> Seq.unfold (fun s -> if s.IsAccepted then None else let s' = s.Probe(true,logl) in Some (s',s')) |> Seq.last 145 | let v3'' = match s3''.Parameters.AllValues |> Seq.toList with [v] -> v | _ -> assertfail() 146 | test <@ s3''.IsAccepted && (v3 <> v3'') @> 147 | 148 | let s4 = Sampler.Create(Parameters.Empty.Add("a",Angara.Statistics.Uniform(1.,2.)).Add("b",3.), mt, logl) 149 | let v4, v41 = match s4.Parameters.AllValues |> Seq.toList with [v;v'] -> v,v' | _ -> assertfail() 150 | test <@ v41 = 3. && v4 > 1. && v4 < 2. @> 151 | let s4' = s4 |> Seq.unfold (fun s -> if s.IsAccepted then None else let s' = s.Probe(false,logl) in Some (s',s')) |> Seq.last 152 | let v4', v41' = match s4'.Parameters.AllValues |> Seq.toList with [v;v'] -> v,v' | _ -> assertfail() 153 | test <@ v41' = 3. && s4'.IsAccepted && (v4 <> v4') @> 154 | let s4'' = s4 |> Seq.unfold (fun s -> if s.IsAccepted then None else let s' = s.Probe(true,logl) in Some (s',s')) |> Seq.last 155 | let v4'', v41'' = match s4''.Parameters.AllValues |> Seq.toList with [v;v'] -> v,v' | _ -> assertfail() 156 | test <@ v41'' = 3. && s4''.IsAccepted && (v4 <> v4'') @> 157 | 158 | let s5 = Sampler.Create(Parameters.Empty.Add("b",[|3.;3.1|]).Add("a",Angara.Statistics.Uniform(1.,2.)), mt, logl) 159 | let v51, v52, v5 = match s5.Parameters.AllValues |> Seq.toList with [v;v';v''] -> v,v',v'' | _ -> assertfail() 160 | test <@ v51 = 3. && v52 = 3.1 && v5 > 1. && v5 < 2. @> 161 | let s5' = s5 |> Seq.unfold (fun s -> if s.IsAccepted then None else let s' = s.Probe(false,logl) in Some (s',s')) |> Seq.last 162 | let v51', v52', v5' = match s5'.Parameters.AllValues |> Seq.toList with [v;v';v''] -> v,v',v'' | _ -> assertfail() 163 | test <@ v51' = 3. && v52' = 3.1 && s5'.IsAccepted && (v5 <> v5') @> 164 | let s5'' = s5 |> Seq.unfold (fun s -> if s.IsAccepted then None else let s' = s.Probe(true,logl) in Some (s',s')) |> Seq.last 165 | let v51'', v52'', v5'' = match s5''.Parameters.AllValues |> Seq.toList with [v;v';v''] -> v,v',v'' | _ -> assertfail() 166 | test <@ v51'' = 3. && v52'' = 3.1 && s5''.IsAccepted && (v5 <> v5'') @> 167 | 168 | [] 169 | let ContinueationTest() = 170 | let logl (p:Parameters) = 171 | let s = p.AllValues |> Seq.sum 172 | - log (1. + exp(-s)) 173 | let pp = 174 | Parameters.Empty 175 | .Add("b", Angara.Statistics.Uniform(1.,2.)) 176 | .Add("a",Angara.Statistics.Normal(3.,4.),2) 177 | .Add("a b",Angara.Statistics.Uniform(5.,6.)) 178 | let r = Sampler.runmcmc(pp, logl, 100, 100, 1) 179 | 100 =! (r.samples |> Seq.length) 180 | let r1' = Sampler.runmcmc(pp, logl, 50, 100, 1) 181 | 100 =! (r1'.samples |> Seq.length) 182 | r.acceptanceRate <>! r1'.acceptanceRate 183 | r.samples <>! r1'.samples 184 | let r1 = Sampler.continuemcmc(r1', logl, 100, 100) 185 | r.acceptanceRate =! r1.acceptanceRate 186 | r.samples =! r1.samples -------------------------------------------------------------------------------- /tests/Angara.Statistics.Tests/SerializationTests.fs: -------------------------------------------------------------------------------- 1 | module SerializationTests 2 | 3 | open NUnit.Framework 4 | open Swensen.Unquote 5 | 6 | open Angara.Serialization 7 | open Angara.Statistics 8 | open Angara.Filzbach 9 | open Angara.Statistics.Serialization 10 | open Angara.Filzbach.Serialization 11 | 12 | [] 13 | let DistributionSerialization() = 14 | let lib = SerializerLibrary.CreateDefault() 15 | lib.Register(DistributionSerializer()) 16 | let check d = 17 | let is = serializeDistribution d 18 | d =! deserializeDistribution is 19 | let json = Json.FromObject(lib,d).ToString() 20 | d =! Json.ToObject(Newtonsoft.Json.Linq.JObject.Parse json,lib) 21 | Uniform(1., 2.) |> check 22 | LogUniform(1.,2.) |> check 23 | Normal(1., 2.) |> check 24 | LogNormal(1.,2.) |> check 25 | Gamma(1., 2.) |> check 26 | NegativeBinomial(1., 2.) |> check 27 | Binomial(2, 1.) |> check 28 | Bernoulli(0.7) |> check 29 | Exponential(2.) |> check 30 | Poisson(2.) |> check 31 | Mixture[0.4,LogNormal(1.,2.); 0.6,Normal(3.,4.)] |> check 32 | // 33 | raises <@ deserializeDistribution InfoSet.EmptyMap @> 34 | raises <@ deserializeDistribution (InfoSet.Seq[]) @> 35 | raises <@ deserializeDistribution (InfoSet.String "") @> 36 | raises <@ deserializeDistribution (InfoSet.Double 1.) @> 37 | 38 | [] 39 | let MersenneTwisterSerialization() = 40 | let lib = SerializerLibrary.CreateDefault() 41 | lib.Register(MersenneTwisterSerializer()) 42 | let check2 (mt1:MT19937) (mt2:MT19937) = 43 | mt1.uniform_uint32() =! mt2.uniform_uint32() 44 | mt1.uniform_float64() =! mt2.uniform_float64() 45 | mt1.normal() =! mt2.normal() 46 | mt1.uniform_uint32() =! mt2.uniform_uint32() 47 | let check mt = 48 | let is = serializeMersenneTwister mt 49 | let json = Json.FromObject(lib,mt).ToString() 50 | check2 (MT19937(mt)) (deserializeMersenneTwister is) 51 | check2 (MT19937(mt)) (Json.ToObject(Newtonsoft.Json.Linq.JObject.Parse json,lib)) 52 | let mutable mt = MT19937() 53 | for _ in 1..4 do 54 | let seed = mt.get_seed() 55 | check2 mt (MT19937(seed)) 56 | check (MT19937(seed)) 57 | mt <- MT19937(seed) 58 | mt.uniform_uint32() |> ignore 59 | 60 | [] 61 | let ParametersSerialization() = 62 | let lib = SerializerLibrary.CreateDefault() 63 | lib.Register(ParametersSerializer()) 64 | let check p = 65 | let is = serializeParameters p 66 | p =! deserializeParameters is 67 | let json = Json.FromObject(lib,p).ToString() 68 | p =! Json.ToObject(Newtonsoft.Json.Linq.JObject.Parse json,lib) 69 | Parameters.Empty |> check 70 | Parameters.Empty.Add("a", 1.) |> check 71 | Parameters.Empty.Add("a", 1.).Add("b", [|2.; 3.|]) |> check 72 | Parameters.Empty.Add("b", [|2.; 3.|]).Add("a", 1.) |> check 73 | Parameters.Empty 74 | .Add("a", Uniform(1.,2.)) 75 | .Add("z y w", LogUniform(3.,4.), 2) 76 | .Add("k", Normal(5.,6.), 3) 77 | .Add("l", LogNormal(7.,8.),2) |> check 78 | 79 | raises <@ deserializeParameters InfoSet.EmptyMap @> 80 | raises <@ deserializeParameters (InfoSet.Seq[]) @> 81 | raises <@ deserializeParameters (InfoSet.String "") @> 82 | raises <@ deserializeParameters (InfoSet.Double 1.) @> 83 | 84 | [] 85 | let SamplerSerialization() = 86 | let lib = SerializerLibrary.CreateDefault() 87 | Register([lib]) 88 | let logl (p:Parameters) = 89 | let s = p.AllValues |> Seq.sum 90 | - log (1. + exp(-s)) 91 | let s = 92 | Sampler.Create(Parameters.Empty.Add("b", Uniform(1.,2.)).Add("a",Normal(3.,4.),2).Add("a b",Uniform(5.,6.)), MT19937(), logl) 93 | |> Seq.unfold (fun s -> if s.Iteration>100 then None else let s' = s.Probe(true,logl) in Some (s',s')) 94 | |> Seq.last 95 | let json = Json.FromObject(lib, s).ToString() 96 | let s2 = Json.ToObject(Newtonsoft.Json.Linq.JObject.Parse json,lib) 97 | test <@ (s.Parameters.AllValues |> Seq.toArray) = (s2.Parameters.AllValues |> Seq.toArray) @> 98 | 99 | let s' = s |> Seq.unfold (fun s -> 100 | if s.Iteration>200 then None else let s' = s.Probe(true,logl) in Some (s',s')) |> Seq.last 101 | let s2' = s2 |> Seq.unfold (fun s -> 102 | if s.Iteration>200 then None else let s' = s.Probe(true,logl) in Some (s',s')) |> Seq.last 103 | test <@ (s'.Parameters.AllValues |> Seq.toArray) = (s2'.Parameters.AllValues |> Seq.toArray) @> 104 | -------------------------------------------------------------------------------- /tests/Angara.Statistics.Tests/Tests.fs: -------------------------------------------------------------------------------- 1 | module statistics.Tests 2 | open NUnit.Framework 3 | open FsUnit 4 | open Swensen.Unquote.Assertions 5 | type Complex = System.Numerics.Complex 6 | 7 | open Angara.Statistics 8 | 9 | 10 | [] 11 | let QSummary() = 12 | let qnan = {min=nan;lb95=nan;lb68=nan;median=nan;ub68=nan;ub95=nan;max=nan} 13 | qsummary Seq.empty |> should equal qnan 14 | // qsummary filters out nan and infinity values 15 | qsummary [nan] |> should equal qnan 16 | qsummary [infinity] |> should equal qnan 17 | qsummary [-infinity] |> should equal qnan 18 | for one in [-System.Double.MaxValue; -1.; 0.; 1.; System.Double.MaxValue] do 19 | qsummary [one] |> should equal {min=one;lb95=one;lb68=one;median=one;ub68=one;ub95=one;max=one} 20 | qsummary [1.;2.] |> should equal {min=1. ;lb95=1. ;lb68=1. ;median=1.5 ;ub68=2. ;ub95=2. ;max=2.} 21 | // > quantile(1:27 * 300,c(0.025,0.16,0.5,0.84,0.975),type=8) 22 | // 2.5% 16% 50% 84% 97.5% 23 | // 305 1412 4200 6988 8095 24 | let q = qsummary [300. .. 300. .. 8100.] 25 | q.min |> should equal 300. 26 | q.max |> should equal 8100. 27 | within 2u q.lb95 305. |> should be True 28 | within 2u q.lb68 1412. |> should be True 29 | within 2u q.median 4200. |> should be True 30 | within 2u q.ub68 6988. |> should be True 31 | within 2u q.ub95 8095. |> should be True 32 | 33 | 34 | 35 | [] 36 | let Radix2TransformsRealSineCorrectly () = 37 | let n = 16 38 | let step = pi2/float n 39 | let samples = Array.init 16 (fun i -> let x = step * float i in Complex(sin x, 0.)) 40 | let spectrum = fft samples 41 | spectrum |> should haveLength 16 42 | spectrum |> Array.iteri (fun i c -> 43 | c.Real |> should (equalWithin 1.e-12) 0.0) 44 | spectrum |> Array.iteri (fun i c -> 45 | c.Imaginary |> should (equalWithin 1.e-12) (match i with 1 -> -8.0 | 15 -> 8.0 | _ -> 0.)) 46 | let restore = ifft spectrum 47 | Seq.zip restore samples |> Seq.iteri (fun i (r,s) -> 48 | r.Real |> should (equalWithin 1.e-12) s.Real 49 | r.Imaginary |> should (equalWithin 1.e-12) s.Imaginary) 50 | 51 | [] 52 | let dct_1() = 53 | let result = dct [|1.0|] 54 | result |> should (equalWithin 1.e-12) [|2.0|] 55 | 56 | [] 57 | let dct_2a() = 58 | let result = dct [|1.0; 0.0|] 59 | result |> should (equalWithin 1.e-12) [|2.0; 2. * cos(pi*0.25)|] 60 | 61 | [] 62 | let dct_2b() = 63 | let result = dct [|0.0; 1.0|] 64 | result |> should (equalWithin 1.e-12) [|2.0; 2. * cos(pi*0.75)|] 65 | 66 | [] 67 | let dct_4() = 68 | let result = dct [|1.0; 0.0; 0.0; 0.0|] 69 | result |> should (equalWithin 1.e-12) [|2.0; 2.*cos(pi*1./8.); 2.*cos(pi*2./8.); 2.*cos(pi*3./8.)|] 70 | 71 | [] 72 | let within_tests() = 73 | let omicron = System.Double.Epsilon // smallest positive float 74 | let rec find_eps d = if 0.5*d + 1.0 = 1.0 then 1.0+d else find_eps (0.5*d) 75 | let epsilon = find_eps 1.0 // smallest float greater than one 76 | // minus 0 -- the second representation of zero 77 | let minus0 = -omicron / 2.0 78 | minus0 |> should equal 0.0 79 | let bits_0 = System.BitConverter.DoubleToInt64Bits 0.0 80 | let bits_minus0 = System.BitConverter.DoubleToInt64Bits minus0 81 | bits_minus0 |> should not' (equal bits_0) 82 | // 83 | // equality 84 | within 0u 1. 1. |> should be True 85 | within 0u -1. -1. |> should be True 86 | within 0u 0. 0. |> should be True 87 | within 0u minus0 minus0 |> should be True 88 | within 0u 0. minus0 |> should be True 89 | within 0u minus0 0. |> should be True 90 | // adjacent 91 | within 1u 0. omicron |> should be True 92 | within 0u 0. omicron |> should be False 93 | within 1u minus0 omicron |> should be True 94 | within 0u minus0 omicron |> should be False 95 | within 1u 0. -omicron |> should be True 96 | within 0u 0. -omicron |> should be False 97 | within 1u minus0 -omicron |> should be True 98 | within 0u minus0 -omicron |> should be False 99 | within 0u 1. epsilon |> should be False 100 | within 1u 1. epsilon |> should be True 101 | within 2u 1. epsilon |> should be True 102 | within System.UInt32.MaxValue 1. epsilon |> should be True 103 | within 0u epsilon 1. |> should be False 104 | within 1u epsilon 1. |> should be True 105 | within 2u epsilon 1. |> should be True 106 | within System.UInt32.MaxValue epsilon 1. |> should be True 107 | // two steps apart 108 | within 2u 0. (2.*omicron) |> should be True 109 | within 1u 0. (2.*omicron) |> should be False 110 | within 0u 0. (2.*omicron) |> should be False 111 | within 1u omicron -omicron |> should be False 112 | within 2u omicron -omicron |> should be True 113 | within 1u -omicron omicron |> should be False 114 | within 2u -omicron omicron |> should be True 115 | // 116 | for one in [1.; -1.] do 117 | let some = seq {1..100} |> Seq.scan (fun f i -> f*epsilon) (one*epsilon) 118 | some |> Seq.mapi (fun i f -> within (uint32 i) one f) |> should not' (contain true) 119 | some |> Seq.mapi (fun i f -> within (uint32 i + 1u) one f) |> should not' (contain false) 120 | 121 | [] 122 | let ridders_tests() = 123 | ridders 0. (-1.0, -2.0) (fun x -> x) =! None 124 | ridders 0. (1.0, 2.0) (fun x -> x) =! None 125 | ridders 0. (-2.0, -1.0) (fun x -> x) =! None 126 | ridders 0. (2.0, 1.0) (fun x -> x) =! None 127 | ridders 0. (1.0, 0.0) (fun x -> x) =! (Some 0.0) 128 | ridders 0. (0.0, 1.0) (fun x -> x) =! (Some 0.0) 129 | ridders 0. (-1.0, 1.0) (fun x -> x) =! (Some 0.0) 130 | ridders 0. (1.0, -1.0) (fun x -> x) =! (Some 0.0) 131 | // for linear function the solution is exact on the first iteration 132 | ridders 0. (-1.0, 2.0) (fun x -> x) =! (Some 0.0) 133 | ridders 1e-5 (-1.0, 2.0) (fun x -> x) =! (Some 0.0) 134 | ridders 0. (1.0, -2.0) (fun x -> x) =! (Some 0.0) 135 | ridders 1e-5 (1.0, -2.0) (fun x -> x) =! (Some 0.0) 136 | // quadratic function is well-behaved 137 | ridders 0. (0.5, 2.0) (fun x -> x * (x-1.)) =! (Some 1.0) 138 | ridders 1e-5 (-1.0, 2.0) (fun x -> x) =! (Some 0.0) 139 | let t1 = ridders 1e-3 (0.5, 2.0) (fun x -> x * (x-1.)) 140 | test <@ t1.IsSome && abs(t1.Value - 1.0) < 1e-3 @> 141 | let t2 = ridders 1e-15 (0.5, 2.0) (fun x -> x * (x-1.)) 142 | test <@ t2.IsSome && abs(t2.Value - 1.0) < 1e-15 @> 143 | // check exit by within 1u 144 | let t3 = ridders 0.0 (0.5, 2.0) (fun x -> if x < 1. then -1.0 else 1.0) 145 | test <@ t3.IsSome && t3.Value <> 1.0 && within 1u t3.Value 1. @> 146 | // check bisection branch when discriminant vanishes to 0 147 | let f4 x = (sqrt System.Double.Epsilon) * (x-1.) 148 | let t4 = ridders 0.0 (0.5, 2.0) f4 149 | test <@ t4.IsSome && within 1u t4.Value 1. @> 150 | 151 | 152 | [] 153 | let KernelDensityEstimation_2() = 154 | let x,y = kde 2 [| 0.0; 1.0 |] 155 | x |> should (equalWithin 1.e-12) [| -0.1; 1.1 |] 156 | y |> should (equalWithin 1.e-12) [| 1.0/1.2; 1.0/1.2 |] 157 | 158 | [] 159 | let KernelDensityEstimation_Normal() = 160 | let data = [| 0.204644865259654; -0.144545587125715; -0.118956445994713; -0.0469338391365766; 0.03745006601189; 0.0474434648487626; 161 | 0.0489947779088937; -0.023541123128428; -0.141367168805941; 0.0209207976156952; 0.124715845091793; -0.0255870975000263; 0.0146939287935733; 162 | -0.0125076454183801; -0.0451149615797648; 0.0844723611892559; 0.122250651498093; 0.0404912275709768; -0.237762060020886; -0.062608563178955; 163 | 0.0011964951706472; 0.251393099849191; 0.100797742833248; -0.00924513612403402; 0.00391431437480729; -0.00179251318410974; -0.0496055274851082; 164 | 0.0139763147460233; -0.022857038101599; -0.0156334781559978; -0.117729953886438; -0.0145329856473145; 0.168665446368054; -0.125197371233141; 165 | 0.140664523630094; -0.151865856740158; -0.0512219960203086; -0.0782859725775293; 0.051599570148176; -0.150777387039718; -0.125869365367987; 166 | -0.0701060122655738; -0.0362676987446099; -0.110384234156303; -0.0560945580954171; -0.0799446580772691; 0.0691093208986571; 0.0484885695433568; 167 | 0.0340971746323898; -0.06710993 |] 168 | 169 | let x,y = kde 16 data 170 | 171 | // Computed by Haskell's "kde": 172 | let xs = [| -0.28667757600789373; -0.24754516321828757; -0.2084127504286814; -0.16928033763907527; -0.1301479248494691; -9.101551205986294e-2; -5.1883099270256805e-2; -1.2750686480650641e-2; 2.6381726308955522e-2; 6.551413909856169e-2; 0.10464655188816785; 0.143778964677774; 0.18291137746738012; 0.22204379025698634; 0.26117620304659245; 0.30030861583619867 |] 173 | x |> should (equalWithin 1.e-12) xs 174 | 175 | let ys = [| 0.3379500767022503; 0.5684242002094704; 1.0274500185402244; 1.6464757237918854; 2.308063063786562; 2.9326163272608494; 3.42003145533083; 3.5678322580123796; 3.2496570321332054; 2.6063628556585825; 1.9158662533355955; 1.352070206249444; 0.938051703727766; 0.6397244704517325; 0.43125358539892195; 0.3160521681770156 |] 176 | y |> should (equalWithin 0.5) ys 177 | 178 | // from previous version of Angara.Math.KDE 179 | let ys' = [| 180 | 0.35517311013031783 181 | 0.64705323814149784 182 | 1.1707958297235739 183 | 1.7912613969845474 184 | 2.3510114101830766 185 | 2.7648724576259442 186 | 3.0276889436729015 187 | 3.1184456454369429 188 | 2.9667295647726752 189 | 2.5660398727253959 190 | 2.0422444692266968 191 | 1.5465126340059054 192 | 1.1396742615948274 193 | 0.81015114105654906 194 | 0.55401464652080867 195 | 0.40621277696505254 196 | |] 197 | y |> should (equalWithin 1.e-12) ys' 198 | [] 199 | let const_tests() = 200 | test <@ maxint+1.0 = maxint && maxint-1.0 < maxint @> 201 | test <@ 1.0 - tolerance < 1.0 && 1.0 - 0.5*tolerance = 1.0 @> 202 | 203 | [] 204 | let Mersenne_twister_copy_constructor() = 205 | let mt = MT19937() 206 | mt.normal() |> ignore 207 | let mt_copy = MT19937(mt) 208 | test <@ mt.uniform_uint32() = mt_copy.uniform_uint32() @> 209 | 210 | [] 211 | let logistic_tests() = 212 | test <@ logistic 0. = 0.5 @> 213 | test <@ logistic -infinity = 0. @> 214 | test <@ logistic infinity = 1. @> 215 | test <@ within 1u (logistic -log_tolerance) 1. @> 216 | test <@ logistic (log System.Double.Epsilon - 1.) = 0. @> 217 | test <@ logit 0.5 = 0. @> 218 | test <@ logit 0.4 < 0. @> 219 | test <@ logit 0. = -infinity @> 220 | test <@ logit 0.6 > 0. @> 221 | test <@ logit 1. = infinity @> 222 | test <@ within 1u -(logit 0.4) (logit 0.6) @> 223 | test <@ within 1u ((logit >> logistic) 0.6) 0.6 @> 224 | test <@ within 1u ((logit >> logistic) 0.4) 0.4 @> 225 | test <@ within 2u ((logistic >> logit) 0.6) 0.6 @> 226 | test <@ within 2u ((logistic >> logit) 0.4) 0.4 @> 227 | -------------------------------------------------------------------------------- /tests/Angara.Statistics.Tests/paket.references: -------------------------------------------------------------------------------- 1 | Angara.Serialization 2 | group Test 3 | NUnit 4 | NUnit.Runners 5 | FsUnit 6 | Unquote 7 | Angara.Serialization.Json --------------------------------------------------------------------------------