├── .github └── workflows │ └── erlang.yml ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── rebar.config ├── rebar3 └── src ├── basho_stats.app.src ├── basho_stats_histogram.erl ├── basho_stats_rv.erl ├── basho_stats_sample.erl └── basho_stats_utils.erl /.github/workflows/erlang.yml: -------------------------------------------------------------------------------- 1 | name: Erlang CI 2 | 3 | on: 4 | push: 5 | branches: [ develop ] 6 | pull_request: 7 | branches: [ develop ] 8 | 9 | 10 | jobs: 11 | 12 | build: 13 | 14 | runs-on: ubuntu-latest 15 | 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | otp: 20 | - "25.1" 21 | - "24.3" 22 | - "22.3" 23 | 24 | container: 25 | image: erlang:${{ matrix.otp }} 26 | 27 | steps: 28 | - uses: lukka/get-cmake@latest 29 | - uses: actions/checkout@v2 30 | - name: Compile 31 | run: ./rebar3 compile 32 | - name: Run xref and dialyzer 33 | run: ./rebar3 do xref, dialyzer 34 | - name: Run eunit 35 | run: ./rebar3 as gha do eunit 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # rebar 2 | *.crashdump 3 | /.cache/ 4 | /.eqc* 5 | /.eunit/ 6 | /.rebar/ 7 | /.rebar3/ 8 | /_build/ 9 | /_checkouts 10 | /deps 11 | /rebar.lock 12 | 13 | # work environments 14 | *.bak 15 | *.dump 16 | *.iml 17 | *.plt 18 | *.sublime-project 19 | *.sublime-workspace 20 | *.tmp 21 | *.txt 22 | *_plt 23 | *~ 24 | .DS_Store 25 | .idea/ 26 | .project 27 | .settings/ 28 | .tm_properties 29 | erln8.config 30 | tmp/ 31 | 32 | # Erlang build/test artifacts 33 | *.app 34 | *.beam 35 | *.eqc 36 | /doc/ 37 | /ebin/ 38 | log/ 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: compile rel cover test dialyzer eqc 2 | REBAR=./rebar3 3 | 4 | compile: 5 | $(REBAR) compile 6 | 7 | clean: 8 | $(REBAR) clean 9 | 10 | cover: test 11 | $(REBAR) cover 12 | 13 | test: compile 14 | $(REBAR) as test do eunit 15 | 16 | dialyzer: 17 | $(REBAR) dialyzer 18 | 19 | xref: 20 | $(REBAR) xref 21 | 22 | # As test, because the TEST macro is used 23 | eqc: 24 | $(REBAR) as test eqc 25 | 26 | check: test dialyzer xref 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # basho_stats 2 | 3 | [![Erlang CI Actions Status](https://github.com/basho/basho_stats/workflows/Erlang%20CI/badge.svg)](https://github.com/basho/basho_stats/actions) 4 | 5 | ```shell 6 | $ rebar3 as prod compile 7 | $ rebar3 as check xref 8 | $ rebar3 as check dialyzer 9 | $ rebar3 eunit 10 | $ rebar3 edoc 11 | ``` 12 | -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | %% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*- 2 | %% ------------------------------------------------------------------- 3 | %% 4 | %% Copyright (c) 2010-2017 Basho Technologies, Inc. 5 | %% 6 | %% This file is provided to you under the Apache License, 7 | %% Version 2.0 (the "License"); you may not use this file 8 | %% except in compliance with the License. You may obtain 9 | %% a copy of the License at 10 | %% 11 | %% http://www.apache.org/licenses/LICENSE-2.0 12 | %% 13 | %% Unless required by applicable law or agreed to in writing, 14 | %% software distributed under the License is distributed on an 15 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | %% KIND, either express or implied. See the License for the 17 | %% specific language governing permissions and limitations 18 | %% under the License. 19 | %% 20 | %% ------------------------------------------------------------------- 21 | 22 | {erl_opts, [ 23 | warn_bif_clash, 24 | warn_export_all, 25 | warn_export_vars, 26 | warn_obsolete_guard, 27 | warn_unused_import, 28 | warnings_as_errors 29 | ]}. 30 | 31 | {profiles, [ 32 | {gha, [{erl_opts, [{d, 'GITHUBEXCLUDE'}]}]}, 33 | {test, [ 34 | {cover_enabled, true}, 35 | {erl_opts, [ 36 | debug_info, 37 | nowarn_deprecated_function, 38 | nowarn_export_all, 39 | nowarn_unused_function, 40 | nowarn_unused_import, 41 | warnings_as_errors, 42 | {d, 'BASHO_TEST'} 43 | ]} 44 | ]} 45 | 46 | ]}. 47 | 48 | {plugins, [ 49 | {eqc_rebar, {git, "https://github.com/Quviq/eqc-rebar", {branch, "master"}}} 50 | ]}. 51 | 52 | {xref_checks,[undefined_function_calls,undefined_functions,locals_not_used, 53 | deprecated_function_calls, deprecated_functions]}. 54 | 55 | -------------------------------------------------------------------------------- /rebar3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/basho/basho_stats/3b33ac15e11cc88e2fc676a1e2d31b80bd02bea6/rebar3 -------------------------------------------------------------------------------- /src/basho_stats.app.src: -------------------------------------------------------------------------------- 1 | {application, basho_stats, [ 2 | {description, "Basic Erlang statistics library"}, 3 | {vsn, git}, 4 | {modules, []}, 5 | {registered, []}, 6 | {applications, [kernel, stdlib, sasl]}, 7 | {env, []}, 8 | {maintainers, ["Basho Technologies, Inc.", "Heinz N. Gies"]}, 9 | {licenses, ["Apache 2.0"]}, 10 | {links, [{"Github", "https://github.com/basho/basho_stats"}]} 11 | ]}. 12 | -------------------------------------------------------------------------------- /src/basho_stats_histogram.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2011-2017 Basho Technologies, Inc. 4 | %% Copyright (c) 2009 Dave Smith (dizzyd@dizzyd.com) 5 | %% 6 | %% This file is provided to you under the Apache License, 7 | %% Version 2.0 (the "License"); you may not use this file 8 | %% except in compliance with the License. You may obtain 9 | %% a copy of the License at 10 | %% 11 | %% http://www.apache.org/licenses/LICENSE-2.0 12 | %% 13 | %% Unless required by applicable law or agreed to in writing, 14 | %% software distributed under the License is distributed on an 15 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | %% KIND, either express or implied. See the License for the 17 | %% specific language governing permissions and limitations 18 | %% under the License. 19 | %% 20 | %% ------------------------------------------------------------------- 21 | 22 | %% @doc Histograms. 23 | -module(basho_stats_histogram). 24 | 25 | -export([ 26 | counts/1, 27 | new/3, 28 | observations/1, 29 | quantile/2, 30 | summary_stats/1, 31 | update/2, 32 | update_all/2 33 | ]). 34 | 35 | -ifdef(TEST). 36 | -ifdef(EQC). 37 | -export([ 38 | prop_count/0, 39 | prop_quantile/0 40 | ]). 41 | -include_lib("eqc/include/eqc.hrl"). 42 | -endif. % EQC 43 | -include_lib("eunit/include/eunit.hrl"). 44 | -endif. % TEST 45 | 46 | -define(FMT(Str, Args), lists:flatten(io_lib:format(Str, Args))). 47 | 48 | -record(hist, { n = 0, 49 | min, 50 | max, 51 | bin_scale, 52 | bin_step, 53 | bins, 54 | capacity, 55 | stats }). 56 | 57 | %% =================================================================== 58 | %% Public API 59 | %% =================================================================== 60 | 61 | new(MinVal, MaxVal, NumBins) -> 62 | #hist { min = MinVal, 63 | max = MaxVal, 64 | bin_scale = NumBins / (MaxVal - MinVal), 65 | bin_step = (MaxVal - MinVal) / NumBins, 66 | bins = gb_trees:empty(), 67 | capacity = NumBins, 68 | stats = basho_stats_sample:new() }. 69 | 70 | 71 | %% 72 | %% Update the histogram with a new observation. 73 | %% 74 | %% NOTE: update/2 caps values within #hist.min and #hist.max; 75 | %% if you provide a value outside those boundaries the first or last 76 | %% bin, respectively, get updated and the histogram is consequently 77 | %% skewed. 78 | %% 79 | update(Value, Hist) -> 80 | Bin = which_bin(Value, Hist), 81 | Counter = case gb_trees:lookup(Bin, Hist#hist.bins) of 82 | {value, Val} -> 83 | Val; 84 | none -> 85 | 0 86 | end, 87 | Hist#hist { n = Hist#hist.n + 1, 88 | bins = gb_trees:enter(Bin, Counter + 1, Hist#hist.bins), 89 | stats = basho_stats_sample:update(Value, Hist#hist.stats)}. 90 | 91 | update_all(Values, Hist) -> 92 | lists:foldl(fun update/2, Hist, Values). 93 | 94 | %% 95 | %% Estimate the quantile from the histogram. Quantile should be a value 96 | %% between 0 and 1. Returns 'NaN' if the histogram is currently empty. 97 | %% 98 | quantile(_Quantile, #hist { n = 0 }) -> 99 | 'NaN'; 100 | quantile(Quantile, Hist) 101 | when Quantile > 0; Quantile < 1 -> 102 | %% Sort out how many complete samples we need to satisfy the requested quantile 103 | MaxSamples = Quantile * Hist#hist.n, 104 | 105 | %% Now iterate over the bins, until we have gathered enough samples 106 | %% to satisfy the request. The resulting bin is an estimate. 107 | Itr = gb_trees:iterator(Hist#hist.bins), 108 | case quantile_itr(gb_trees:next(Itr), 0, MaxSamples) of 109 | max -> 110 | Hist#hist.max; 111 | EstBin -> 112 | %% We have an estimated bin -- determine the lower bound of said 113 | %% bin 114 | Hist#hist.min + (EstBin / Hist#hist.bin_scale) 115 | end. 116 | 117 | %% 118 | %% Get the counts for each bin in the histogram 119 | %% 120 | counts(Hist) -> 121 | [bin_count(I, Hist) || I <- lists:seq(0, Hist#hist.capacity-1)]. 122 | 123 | 124 | %% 125 | %% Number of observations that are present in this histogram 126 | %% 127 | observations(Hist) -> 128 | Hist#hist.n. 129 | 130 | %% 131 | %% Return basic summary stats for this histogram 132 | %% 133 | summary_stats(Hist) -> 134 | basho_stats_sample:summary(Hist#hist.stats). 135 | 136 | 137 | %% =================================================================== 138 | %% Internal functions 139 | %% =================================================================== 140 | 141 | which_bin(Value, Hist) -> 142 | Bin = trunc((Value - Hist#hist.min) * Hist#hist.bin_scale), 143 | Lower = Hist#hist.min + (Bin * Hist#hist.bin_step), 144 | Upper = Hist#hist.min + ((Bin + 1) * Hist#hist.bin_step), 145 | 146 | if 147 | Value > Upper -> 148 | erlang:min(Bin + 1, Hist#hist.capacity - 1); 149 | Value =< Lower -> 150 | erlang:max(Bin - 1, 0); 151 | Value == Hist#hist.max -> 152 | Hist#hist.capacity-1; 153 | true -> 154 | Bin 155 | end. 156 | 157 | 158 | quantile_itr(none, _Samples, _MaxSamples) -> 159 | max; 160 | quantile_itr({Bin, Counter, Itr2}, Samples, MaxSamples) -> 161 | Samples2 = Samples + Counter, 162 | if 163 | Samples2 < MaxSamples -> 164 | %% Not done yet, move to next bin 165 | quantile_itr(gb_trees:next(Itr2), Samples2, MaxSamples); 166 | true -> 167 | %% We only need some of the samples in this bin; we make 168 | %% the assumption that values within the bin are uniformly 169 | %% distributed. 170 | Bin + ((MaxSamples - Samples) / Counter) 171 | end. 172 | 173 | 174 | bin_count(Bin, Hist) -> 175 | case gb_trees:lookup(Bin, Hist#hist.bins) of 176 | {value, Count} -> 177 | Count; 178 | none -> 179 | 0 180 | end. 181 | 182 | %% =================================================================== 183 | %% Unit Tests 184 | %% =================================================================== 185 | 186 | -ifdef(TEST). 187 | 188 | simple_test() -> 189 | %% Pre-calculated tests 190 | [7,0] = counts(update_all([10,10,10,10,10,10,14], new(10,18,2))). 191 | 192 | -ifdef(EQC). 193 | 194 | qc_count_check(Min, Max, Bins, Xs) -> 195 | LCounts = counts(update_all(Xs, new(Min, Max, Bins))), 196 | RCounts = basho_stats_utils:r_run(Xs, 197 | ?FMT("hist(x, seq(~w,~w,length.out=~w), plot=FALSE)$counts", 198 | [Min, Max, Bins+1])), 199 | case LCounts == RCounts of 200 | true -> 201 | true; 202 | _ -> 203 | io:format("LCounts ~p, RCounts ~p~n", [LCounts, RCounts]), 204 | false 205 | end. 206 | 207 | 208 | prop_count() -> 209 | ?FORALL({Min, Bins, Xlen}, {choose(0, 99), choose(2, 20), choose(2, 100)}, 210 | ?LET(Max, choose(Min+1, 100), 211 | ?LET(Xs, vector(Xlen, choose(Min, Max)), 212 | ?WHENFAIL( 213 | begin 214 | io:format("Min ~p, Max ~p, Bins ~p, Xs ~w~n", 215 | [Min, Max, Bins, Xs]), 216 | Command = ?FMT("hist(x, seq(~w,~w,length.out=~w), plot=FALSE)$counts", 217 | [Min, Max, Bins+1]), 218 | InputStr = [integer_to_list(I) || I <- Xs], 219 | io:format(?FMT("x <- c(~s)\n", [string:join(InputStr, ",")])), 220 | io:format(?FMT("write(~s, ncolumns=1, file=stdout())\n", [Command])) 221 | end, 222 | qc_count_check(Min, Max, Bins, Xs))))). 223 | 224 | qc_count_test() -> 225 | ?assertEqual(ok, basho_stats_utils:r_check()), 226 | ?assertEqual(true, eqc:quickcheck(prop_count())). 227 | 228 | qc_quantile_check(Q, Min, Max, Bins, Xs) -> 229 | Hist = new(Min, Max, Bins), 230 | %% LCounts = counts(update_all(Xs, Hist)), 231 | Lq = quantile(Q * 0.01, update_all(Xs, Hist)), 232 | [Rq] = basho_stats_utils:r_run(Xs, ?FMT("quantile(x, ~4.2f, type=4)", [Q * 0.01])), 233 | case abs(Lq - Rq) < 1 of 234 | true -> 235 | true; 236 | false -> 237 | ?debugMsg("----\n"), 238 | ?debugFmt("Q: ~p Min: ~p Max: ~p Bins: ~p\n", [Q, Min, Max, Bins]), 239 | ?debugFmt("Lq: ~p != Rq: ~p\n", [Lq, Rq]), 240 | ?debugFmt("Xs: ~w\n", [Xs]), 241 | false 242 | end. 243 | 244 | prop_quantile() -> 245 | %% Loosey-goosey checking of the quantile estimation against R's more precise method. 246 | %% 247 | %% To ensure a minimal level of accuracy, we ensure that we have between 50-200 bins 248 | %% and between 100-500 data points. 249 | %% 250 | %% TODO: Need to nail down the exact error bounds 251 | %% 252 | %% XXX since we try to generate the quantile from the histogram, not the 253 | %% original data, our results and Rs don't always agree and this means the 254 | %% test will occasionally fail. There's not an easy way to fix this. 255 | ?SOMETIMES(3, 256 | %% as the comment above states, this is 257 | %% non-deterministic, but it should _never_ fail 3 258 | %% times of 3 259 | ?FORALL({Min, Bins, Xlen, Q}, {choose(1, 99), choose(50, 200), choose(100, 500), 260 | choose(0,100)}, 261 | ?LET(Max, choose(Min+1, 100), 262 | ?LET(Xs, vector(Xlen, choose(Min, Max)), 263 | ?WHENFAIL( 264 | begin 265 | io:format("Min ~p, Max ~p, Bins ~p, Q ~p, Xs ~w~n", 266 | [Min, Max, Bins, Q, Xs]), 267 | Command = ?FMT("quantile(x, ~4.2f, type=4)", [Q * 0.01]), 268 | InputStr = [integer_to_list(I) || I <- Xs], 269 | io:format(?FMT("x <- c(~s)\n", [string:join(InputStr, ",")])), 270 | io:format(?FMT("write(~s, ncolumns=1, file=stdout())\n", [Command])) 271 | end, 272 | 273 | qc_quantile_check(Q, Min, Max, Bins, Xs)))))). 274 | 275 | -endif. % EQC 276 | -endif. % TEST 277 | -------------------------------------------------------------------------------- /src/basho_stats_rv.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2010-2017 Basho Technologies, Inc. 4 | %% Copyright (c) 2009 Dave Smith (dizzyd@dizzyd.com) 5 | %% 6 | %% This file is provided to you under the Apache License, 7 | %% Version 2.0 (the "License"); you may not use this file 8 | %% except in compliance with the License. You may obtain 9 | %% a copy of the License at 10 | %% 11 | %% http://www.apache.org/licenses/LICENSE-2.0 12 | %% 13 | %% Unless required by applicable law or agreed to in writing, 14 | %% software distributed under the License is distributed on an 15 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | %% KIND, either express or implied. See the License for the 17 | %% specific language governing permissions and limitations 18 | %% under the License. 19 | %% 20 | %% ------------------------------------------------------------------- 21 | 22 | %% @doc Random values. 23 | -module(basho_stats_rv). 24 | 25 | -export([ 26 | exponential/1, 27 | normal/2, 28 | poisson/1, 29 | uniform/0 30 | ]). 31 | 32 | %% ==================================================================== 33 | %% Public API 34 | %% ==================================================================== 35 | 36 | %% 37 | %% @doc Generates a uniformly-distributed random float. 38 | %% 39 | -ifdef(NO_RAND_MODULE). 40 | uniform() -> 41 | % Make sure the PRNG in this process is seeded. 42 | % Alas, it *could* have been previously seeded with the default, but we 43 | % can't tell that, and if someone took care seeding it with good entropy 44 | % we don't want to throw that away on them. 45 | case erlang:get(random_seed) of 46 | undefined -> 47 | {A, B, C} = os:timestamp(), 48 | _ = random:seed( 49 | erlang:phash2({A, erlang:make_ref()}, 1 bsl 32), 50 | erlang:phash2({B, erlang:self()}, 1 bsl 32), C), 51 | ok; 52 | _ -> 53 | ok 54 | end, 55 | random:uniform(). 56 | -else. 57 | -compile({inline, [uniform/0]}). 58 | uniform() -> 59 | rand:uniform(). 60 | -endif. 61 | 62 | %% 63 | %% @doc Generates an exponential-distributed random variable, using inverse function. 64 | %% 65 | exponential(Lambda) -> 66 | -math:log(uniform()) / Lambda. 67 | 68 | %% 69 | %% @doc Generates a Poisson-distributed random variable by summing exponential rvs. 70 | %% 71 | %% Warning: This may be slow!! 72 | %% 73 | poisson(Lambda) -> 74 | poisson_rv_loop(Lambda, 0.0, -1). 75 | 76 | %% 77 | %% @doc Generates a Normal-distributed random variable, using Box-Muller method. 78 | %% 79 | normal(Mean, Sigma) -> 80 | Rv1 = uniform(), 81 | Rv2 = uniform(), 82 | Rho = math:sqrt(-2 * math:log(1-Rv2)), 83 | Rho * math:cos(2 * math:pi() * Rv1) * Sigma + Mean. 84 | 85 | 86 | %% ==================================================================== 87 | %% Internal functions 88 | %% ==================================================================== 89 | 90 | poisson_rv_loop(Lambda, Sum, N) when Sum < Lambda -> 91 | poisson_rv_loop(Lambda, Sum - math:log(uniform()), N+1); 92 | poisson_rv_loop(_Lambda, _Sum, N) -> 93 | N. 94 | -------------------------------------------------------------------------------- /src/basho_stats_sample.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2011-2017 Basho Technologies, Inc. 4 | %% Copyright (c) 2009 Dave Smith (dizzyd@dizzyd.com) 5 | %% 6 | %% This file is provided to you under the Apache License, 7 | %% Version 2.0 (the "License"); you may not use this file 8 | %% except in compliance with the License. You may obtain 9 | %% a copy of the License at 10 | %% 11 | %% http://www.apache.org/licenses/LICENSE-2.0 12 | %% 13 | %% Unless required by applicable law or agreed to in writing, 14 | %% software distributed under the License is distributed on an 15 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | %% KIND, either express or implied. See the License for the 17 | %% specific language governing permissions and limitations 18 | %% under the License. 19 | %% 20 | %% ------------------------------------------------------------------- 21 | 22 | %% @doc Statistics Suite for Erlang. 23 | -module(basho_stats_sample). 24 | 25 | -export([ 26 | count/1, 27 | max/1, 28 | mean/1, 29 | min/1, 30 | new/0, 31 | sdev/1, 32 | summary/1, 33 | update/2, 34 | update_all/2, 35 | variance/1 36 | ]). 37 | 38 | -ifdef(TEST). 39 | -ifdef(EQC). 40 | -export([prop_main/0]). 41 | -include_lib("eqc/include/eqc.hrl"). 42 | -endif. % EQC 43 | -include_lib("eunit/include/eunit.hrl"). 44 | -endif. % TEST 45 | 46 | -record(state, { n = 0, 47 | min = 'NaN', 48 | max = 'NaN', 49 | sum = 0, 50 | sum2 = 0 }). 51 | 52 | 53 | %% =================================================================== 54 | %% Public API 55 | %% =================================================================== 56 | 57 | new() -> 58 | #state{}. 59 | 60 | update(Value, State) -> 61 | State#state { 62 | n = State#state.n + 1, 63 | min = nan_min(Value, State#state.min), 64 | max = nan_max(Value, State#state.max), 65 | sum = State#state.sum + Value, 66 | sum2= State#state.sum2 + (Value * Value)}. 67 | 68 | 69 | update_all(Values, State) -> 70 | lists:foldl(fun(Value, S) -> update(Value, S) end, 71 | State, Values). 72 | 73 | count(State) -> 74 | State#state.n. 75 | 76 | min(State) -> 77 | State#state.min. 78 | 79 | mean(#state{n = 0}) -> 80 | 'NaN'; 81 | mean(State) -> 82 | State#state.sum / State#state.n. 83 | 84 | max(State) -> 85 | State#state.max. 86 | 87 | variance(#state { n = N }) when N < 2 -> 88 | 'NaN'; 89 | variance(State) -> 90 | SumSq = State#state.sum * State#state.sum, 91 | (State#state.sum2 - (SumSq / State#state.n)) / (State#state.n - 1). 92 | 93 | 94 | sdev(State) -> 95 | case variance(State) of 96 | 'NaN' -> 97 | 'NaN'; 98 | Value -> 99 | math:sqrt(Value) 100 | end. 101 | 102 | summary(State) -> 103 | {min(State), mean(State), max(State), variance(State), sdev(State)}. 104 | 105 | 106 | %% =================================================================== 107 | %% Internal functions 108 | %% =================================================================== 109 | 110 | nan_min(V1, 'NaN') -> V1; 111 | nan_min('NaN', V1) -> V1; 112 | nan_min(V1, V2) -> erlang:min(V1, V2). 113 | 114 | nan_max(V1, 'NaN') -> V1; 115 | nan_max('NaN', V1) -> V1; 116 | nan_max(V1, V2) -> erlang:max(V1, V2). 117 | 118 | 119 | %% =================================================================== 120 | %% Unit Tests 121 | %% =================================================================== 122 | 123 | -ifdef(TEST). 124 | 125 | simple_test() -> 126 | %% A few hand-checked values 127 | {1,3.0,5,2.5,1.5811388300841898} = summary(update_all([1,2,3,4,5], new())), 128 | {1,5.5,10,15.0,3.872983346207417} = summary(update_all(lists:seq(1,10,3), new())). 129 | 130 | empty_test() -> 131 | {'NaN','NaN','NaN','NaN','NaN'} = summary(new()). 132 | 133 | 134 | -ifdef(EQC). 135 | 136 | lists_equal([], []) -> 137 | true; 138 | lists_equal([V1 | R1], [V2 | R2]) -> 139 | case abs(V1-V2) < 0.01 of 140 | true -> 141 | lists_equal(R1, R2); 142 | false -> 143 | false 144 | end. 145 | 146 | prop_main() -> 147 | ?FORALL(Xlen, choose(2, 100), 148 | ?LET(Xs, vector(Xlen, int()), 149 | lists_equal(basho_stats_utils:r_run(Xs,"c(min(x), mean(x), max(x), var(x), sd(x))"), 150 | tuple_to_list(summary(update_all(Xs, new())))))). 151 | 152 | -endif. % EQC 153 | -endif. % TEST 154 | -------------------------------------------------------------------------------- /src/basho_stats_utils.erl: -------------------------------------------------------------------------------- 1 | %% ------------------------------------------------------------------- 2 | %% 3 | %% Copyright (c) 2011-2017 Basho Technologies, Inc. 4 | %% Copyright (c) 2009 Dave Smith (dizzyd@dizzyd.com) 5 | %% 6 | %% This file is provided to you under the Apache License, 7 | %% Version 2.0 (the "License"); you may not use this file 8 | %% except in compliance with the License. You may obtain 9 | %% a copy of the License at 10 | %% 11 | %% http://www.apache.org/licenses/LICENSE-2.0 12 | %% 13 | %% Unless required by applicable law or agreed to in writing, 14 | %% software distributed under the License is distributed on an 15 | %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16 | %% KIND, either express or implied. See the License for the 17 | %% specific language governing permissions and limitations 18 | %% under the License. 19 | %% 20 | %% ------------------------------------------------------------------- 21 | 22 | %% @doc Unit Test Helpers. 23 | %% The functions in this module are only visible when running tests. 24 | -module(basho_stats_utils). 25 | 26 | -ifdef(TEST). 27 | -export([ 28 | r_check/0, 29 | r_run/2 30 | ]). 31 | -include_lib("eunit/include/eunit.hrl"). 32 | 33 | -define(R_KEY, {?MODULE, 'R_exe'}). 34 | -define(P_KEY, {?MODULE, 'R_port'}). 35 | -define(R_ERR, {error, missing_R_executable}). 36 | -define(FMT(Str, Args), lists:flatten(io_lib:format(Str, Args))). 37 | 38 | %% =================================================================== 39 | %% Unit Test Helpers 40 | %% =================================================================== 41 | 42 | -spec r_check() -> ok | ?R_ERR. 43 | %% 44 | %% Checks for the presence of the R executable. 45 | %% 46 | r_check() -> 47 | case r_exe() of 48 | {error, _} = Err -> 49 | Err; 50 | _ -> 51 | ok 52 | end. 53 | 54 | -spec r_run(Input :: [integer()], Command :: string()) -> [number()]. 55 | %% 56 | %% Runs R Command with Input. 57 | %% 58 | r_run(Input, Command) -> 59 | case r_port() of 60 | {ok, Port} -> 61 | InputStr = [integer_to_list(I) || I <- Input], 62 | port_command(Port, ?FMT("x <- c(~s)\n", [string:join(InputStr, ",")])), 63 | port_command(Port, ?FMT("write(~s, ncolumns=1, file=stdout())\n", [Command])), 64 | port_command(Port, "write('', file=stdout())\n"), 65 | r_simple_read_loop(Port, []); 66 | {error, _} = Err -> 67 | Err 68 | end. 69 | 70 | %% =================================================================== 71 | %% Internal 72 | %% =================================================================== 73 | 74 | -spec r_exe() -> string() | ?R_ERR. 75 | r_exe() -> 76 | case erlang:get(?R_KEY) of 77 | undefined -> 78 | R = case os:find_executable("R") of 79 | false -> 80 | ?R_ERR; 81 | File -> 82 | File 83 | end, 84 | _ = erlang:put(?R_KEY, R), 85 | R; 86 | Val -> 87 | Val 88 | end. 89 | 90 | -spec r_port() -> {ok, port()} | {error, term()}. 91 | r_port() -> 92 | case erlang:get(?P_KEY) of 93 | undefined -> 94 | case r_exe() of 95 | {error, _} = Err -> 96 | Err; 97 | Exe -> 98 | Port = erlang:open_port({spawn_executable, Exe}, [ 99 | {args, ["--vanilla", "--slave"]}, {line, 16384}, 100 | use_stdio, exit_status, stderr_to_stdout, hide]), 101 | case r_port_(Port) of 102 | {error, _} = PErr -> 103 | PErr; 104 | ok -> 105 | _ = erlang:put(?P_KEY, Port), 106 | {ok, Port} 107 | end 108 | end; 109 | Prev -> 110 | case r_port_(Prev) of 111 | {error, _} = PErr -> 112 | _ = erlang:erase(?P_KEY), 113 | PErr; 114 | ok -> 115 | {ok, Prev} 116 | end 117 | end. 118 | 119 | -spec r_port_(Port :: port()) -> ok | {error, term()}. 120 | %% Check the status of the port 121 | r_port_(Port) -> 122 | try 123 | _ = erlang:port_command(Port, "write('', file=stdout())\n"), 124 | receive 125 | {Port, {data, {eol, []}}} -> 126 | ok; 127 | {Port, {data, {eol, Other}}} -> 128 | {error, Other} 129 | end 130 | catch 131 | error:badarg -> 132 | {error, port_closed} 133 | end. 134 | 135 | r_simple_read_loop(Port, Acc) -> 136 | receive 137 | {Port, {data, {eol, []}}} -> 138 | lists:reverse(Acc); 139 | {Port, {data, {eol, Line}}} -> 140 | case Line of 141 | "Error"++_ -> 142 | Error = get_error(Port, [Line]), 143 | exit({error, Error}); 144 | _ -> 145 | r_simple_read_loop(Port, [to_number(Line) | Acc]) 146 | end; 147 | {Port, {exit_status, _}} -> 148 | lists:reverse(Acc) 149 | end. 150 | 151 | get_error(Port, Acc) -> 152 | receive 153 | {Port, {data, {eol, []}}} -> 154 | lists:reverse(Acc); 155 | {Port, {data, {eol, Line}}} -> 156 | get_error(Port, [Line | Acc]); 157 | {Port, {exit_status, _}} -> 158 | lists:reverse(Acc) 159 | end. 160 | 161 | to_number(Str) -> 162 | case catch(list_to_integer(Str)) of 163 | {'EXIT', _} -> 164 | list_to_float(Str); 165 | Value -> 166 | Value 167 | end. 168 | 169 | -endif. % TEST 170 | 171 | --------------------------------------------------------------------------------