├── LICENSE ├── README.md ├── rebar ├── rebar.config ├── src ├── bear.app.src └── bear.erl └── test └── bear_test.erl /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### bear : a set of statistics functions for erlang 2 | 3 | Currently bear is focused on use inside the Folsom Erlang metrics library but all of these functions are generic and useful in other situations. 4 | 5 | Pull requests accepted! 6 | 7 | #### Available under the Apache 2.0 License 8 | -------------------------------------------------------------------------------- /rebar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/boundary/bear/0717c73bdedd27055211f9234a6efc5588a1299c/rebar -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | {deps, []}. 2 | {erl_opts, [debug_info]}. 3 | {cover_enabled, true}. 4 | -------------------------------------------------------------------------------- /src/bear.app.src: -------------------------------------------------------------------------------- 1 | {application, bear, 2 | [ 3 | {description, ""}, 4 | {vsn, git}, 5 | {registered, []}, 6 | {applications, []}, 7 | {env, []}, 8 | {modules, []} 9 | ]}. 10 | -------------------------------------------------------------------------------- /src/bear.erl: -------------------------------------------------------------------------------- 1 | %%% 2 | %%% Copyright 2011, Boundary 3 | %%% 4 | %%% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %%% you may not use this file except in compliance with the License. 6 | %%% You may obtain a copy of the License at 7 | %%% 8 | %%% http://www.apache.org/licenses/LICENSE-2.0 9 | %%% 10 | %%% Unless required by applicable law or agreed to in writing, software 11 | %%% distributed under the License is distributed on an "AS IS" BASIS, 12 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %%% See the License for the specific language governing permissions and 14 | %%% limitations under the License. 15 | %%% 16 | 17 | 18 | %%%------------------------------------------------------------------- 19 | %%% File: bear.erl 20 | %%% @author joe williams 21 | %%% @doc 22 | %%% statistics functions for calucating based on id and a list of values 23 | %%% @end 24 | %%%------------------------------------------------------------------ 25 | 26 | -module(bear). 27 | 28 | -compile([export_all]). 29 | 30 | -export([ 31 | get_statistics/1, 32 | get_statistics/2 33 | ]). 34 | 35 | -define(HIST_BINS, 10). 36 | 37 | -define(STATS_MIN, 5). 38 | 39 | -record(scan_result, {n=0, sumX=0, sumXX=0, sumInv=0, sumLog, max, min}). 40 | -record(scan_result2, {x2=0, x3=0, x4=0}). 41 | 42 | -compile([native]). 43 | 44 | get_statistics([_,_,_,_,_|_] = Values) -> 45 | Scan_res = scan_values(Values), 46 | Scan_res2 = scan_values2(Values, Scan_res), 47 | Variance = variance(Scan_res, Scan_res2), 48 | SortedValues = lists:sort(Values), 49 | [ 50 | {min, Scan_res#scan_result.min}, 51 | {max, Scan_res#scan_result.max}, 52 | {arithmetic_mean, arithmetic_mean(Scan_res)}, 53 | {geometric_mean, geometric_mean(Scan_res)}, 54 | {harmonic_mean, harmonic_mean(Scan_res)}, 55 | {median, percentile(SortedValues, Scan_res, 0.5)}, 56 | {variance, Variance}, 57 | {standard_deviation, std_deviation(Scan_res, Scan_res2)}, 58 | {skewness, skewness(Scan_res, Scan_res2)}, 59 | {kurtosis, kurtosis(Scan_res, Scan_res2)}, 60 | {percentile, 61 | [ 62 | {50, percentile(SortedValues, Scan_res, 0.50)}, 63 | {75, percentile(SortedValues, Scan_res, 0.75)}, 64 | {90, percentile(SortedValues, Scan_res, 0.90)}, 65 | {95, percentile(SortedValues, Scan_res, 0.95)}, 66 | {99, percentile(SortedValues, Scan_res, 0.99)}, 67 | {999, percentile(SortedValues, Scan_res, 0.999)} 68 | ] 69 | }, 70 | {histogram, get_histogram(Values, Scan_res, Scan_res2)}, 71 | {n, Scan_res#scan_result.n} 72 | ]; 73 | get_statistics(Values) when is_list(Values) -> 74 | [ 75 | {min, 0.0}, 76 | {max, 0.0}, 77 | {arithmetic_mean, 0.0}, 78 | {geometric_mean, 0.0}, 79 | {harmonic_mean, 0.0}, 80 | {median, 0.0}, 81 | {variance, 0.0}, 82 | {standard_deviation, 0.0}, 83 | {skewness, 0.0}, 84 | {kurtosis, 0.0}, 85 | {percentile, 86 | [ 87 | {50, 0.0}, 88 | {75, 0.0}, 89 | {90, 0.0}, 90 | {95, 0.0}, 91 | {99, 0.0}, 92 | {999, 0.0} 93 | ] 94 | }, 95 | {histogram, [{0, 0}]}, 96 | {n, 0} 97 | ]. 98 | 99 | get_statistics_subset([_,_,_,_,_|_] = Values, Items) -> 100 | Length = length(Values), 101 | SortedValues = lists:sort(Values), 102 | Steps = calc_steps(Items), 103 | Scan_res = if Steps > 1 -> scan_values(Values); 104 | true -> [] 105 | end, 106 | Scan_res2 = if Steps > 2 -> scan_values2(Values, Scan_res); 107 | true -> [] 108 | end, 109 | report_subset(Items, Length, SortedValues, Scan_res, Scan_res2); 110 | get_statistics_subset(Values, Items) when is_list(Values) -> 111 | get_null_statistics_subset(Items, []). 112 | 113 | get_null_statistics_subset([{percentile, Ps}|Items], Acc) -> 114 | get_null_statistics_subset(Items, [{percentile, [{P, 0.0} || P <- Ps]}|Acc]); 115 | get_null_statistics_subset([I|Items], Acc) -> 116 | get_null_statistics_subset(Items, [{I, 0.0}|Acc]); 117 | get_null_statistics_subset([], Acc) -> 118 | lists:reverse(Acc). 119 | 120 | calc_steps(Items) -> 121 | lists:foldl( 122 | fun({I,_},Acc) -> 123 | erlang:max(level(I), Acc); 124 | (I,Acc) -> 125 | erlang:max(level(I), Acc) 126 | end, 1, Items). 127 | 128 | level(standard_deviation) -> 3; 129 | level(variance ) -> 3; 130 | level(skewness ) -> 3; 131 | level(kurtosis ) -> 3; 132 | level(histogram ) -> 3; 133 | level(arithmetic_mean ) -> 2; 134 | level(geometric_mean ) -> 2; 135 | level(harmonic_mean ) -> 2; 136 | level(_) -> 1. 137 | 138 | report_subset(Items, N, SortedValues, Scan_res, Scan_res2) -> 139 | lists:map( 140 | fun(min) -> {min, hd(SortedValues)}; 141 | (max) -> {max, lists:last(SortedValues)}; 142 | (arithmetic_mean) -> {arithmetic_mean, arithmetic_mean(Scan_res)}; 143 | (harmonic_mean) -> {harmonic_mean, harmonic_mean(Scan_res)}; 144 | (geometric_mean) -> {geometric_mean, geometric_mean(Scan_res)}; 145 | (median) -> {median, percentile(SortedValues, 146 | #scan_result{n = N}, 0.5)}; 147 | (variance) -> {variance, variance(Scan_res, Scan_res2)}; 148 | (standard_deviation=I) -> {I, std_deviation(Scan_res, Scan_res2)}; 149 | (skewness) -> {skewness, skewness(Scan_res, Scan_res2)}; 150 | (kurtosis) -> {kurtosis, kurtosis(Scan_res, Scan_res2)}; 151 | ({percentile,Ps}) -> {percentile, percentiles(Ps, N, SortedValues)}; 152 | (histogram) -> 153 | {histogram, get_histogram(SortedValues, Scan_res, Scan_res2)}; 154 | (n) -> {n, N} 155 | end, Items). 156 | 157 | get_statistics(Values, _) when length(Values) < ?STATS_MIN -> 158 | 0.0; 159 | get_statistics(_, Values) when length(Values) < ?STATS_MIN -> 160 | 0.0; 161 | get_statistics(Values1, Values2) when length(Values1) /= length(Values2) -> 162 | 0.0; 163 | get_statistics(Values1, Values2) -> 164 | [ 165 | {covariance, get_covariance(Values1, Values2)}, 166 | {tau, get_kendall_correlation(Values1, Values2)}, 167 | {rho, get_pearson_correlation(Values1, Values2)}, 168 | {r, get_spearman_correlation(Values1, Values2)} 169 | ]. 170 | 171 | %%%=================================================================== 172 | %%% Internal functions 173 | %%%=================================================================== 174 | 175 | scan_values([X|Values]) -> 176 | scan_values(Values, #scan_result{n=1, sumX=X, sumXX=X*X, 177 | sumLog=math_log(X), 178 | max=X, min=X, sumInv=inverse(X)}). 179 | 180 | scan_values([X|Values], 181 | #scan_result{n=N, sumX=SumX, sumXX=SumXX, sumLog=SumLog, 182 | max=Max, min=Min, sumInv=SumInv}=Acc) -> 183 | scan_values(Values, 184 | Acc#scan_result{n=N+1, sumX=SumX+X, sumXX=SumXX+X*X, 185 | sumLog=SumLog+math_log(X), 186 | max=max(X,Max), min=min(X,Min), 187 | sumInv=SumInv+inverse(X)}); 188 | scan_values([], Acc) -> 189 | Acc. 190 | 191 | scan_values2(Values, #scan_result{n=N, sumX=SumX}) -> 192 | scan_values2(Values, SumX/N, #scan_result2{}). 193 | 194 | scan_values2([X|Values], Mean, #scan_result2{x2=X2, x3=X3, x4=X4}=Acc) -> 195 | Diff = X-Mean, 196 | Diff2 = Diff*Diff, 197 | Diff3 = Diff2*Diff, 198 | Diff4 = Diff2*Diff2, 199 | scan_values2(Values, Mean, Acc#scan_result2{x2=X2+Diff2, x3=X3+Diff3, 200 | x4=X4+Diff4}); 201 | scan_values2([], _, Acc) -> 202 | Acc. 203 | 204 | 205 | arithmetic_mean(#scan_result{n=N, sumX=Sum}) -> 206 | Sum/N. 207 | 208 | geometric_mean(#scan_result{n=N, sumLog=SumLog}) -> 209 | math:exp(SumLog/N). 210 | 211 | harmonic_mean(#scan_result{sumInv=Zero}) when Zero =:= 0 orelse 212 | Zero =:= 0.0 -> 213 | %% Protect against divide by 0 if we have all 0 values 214 | 0; 215 | harmonic_mean(#scan_result{n=N, sumInv=Sum}) -> 216 | N/Sum. 217 | 218 | percentile(SortedValues, #scan_result{n=N}, Percentile) 219 | when is_list(SortedValues) -> 220 | Element = round(Percentile * N), 221 | lists:nth(Element, SortedValues). 222 | 223 | %% Two pass variance 224 | %% Results match those given by the 'var' function in R 225 | variance(#scan_result{n=N}, #scan_result2{x2=X2}) -> 226 | X2/(N-1). 227 | 228 | std_deviation(Scan_res, Scan_res2) -> 229 | math:sqrt(variance(Scan_res, Scan_res2)). 230 | 231 | %% http://en.wikipedia.org/wiki/Skewness 232 | %% 233 | %% skewness results should match this R function: 234 | %% skewness <- function(x) { 235 | %% m3 <- mean((x - mean(x))^3) 236 | %% skew <- m3 / (sd(x)^3) 237 | %% skew 238 | %% } 239 | skewness(#scan_result{n=N}=Scan_res, #scan_result2{x3=X3}=Scan_res2) -> 240 | case math:pow(std_deviation(Scan_res,Scan_res2), 3) of 241 | 0.0 -> 242 | 0.0; %% Is this really the correct thing to do here? 243 | Else -> 244 | (X3/N)/Else 245 | end. 246 | 247 | %% http://en.wikipedia.org/wiki/Kurtosis 248 | %% 249 | %% results should match this R function: 250 | %% kurtosis <- function(x) { 251 | %% m4 <- mean((x - mean(x))^4) 252 | %% kurt <- m4 / (sd(x)^4) - 3 253 | %% kurt 254 | %% } 255 | kurtosis(#scan_result{n=N}=Scan_res, #scan_result2{x4=X4}=Scan_res2) -> 256 | case math:pow(std_deviation(Scan_res,Scan_res2), 4) of 257 | 0.0 -> 258 | 0.0; %% Is this really the correct thing to do here? 259 | Else -> 260 | ((X4/N)/Else) - 3 261 | end. 262 | 263 | get_histogram(Values, Scan_res, Scan_res2) -> 264 | Bins = get_hist_bins(Scan_res#scan_result.min, 265 | Scan_res#scan_result.max, 266 | std_deviation(Scan_res, Scan_res2), 267 | length(Values) 268 | ), 269 | 270 | Dict = lists:foldl(fun (Value, Dict) -> 271 | update_bin(Value, Bins, Dict) 272 | end, 273 | dict:from_list([{Bin, 0} || Bin <- Bins]), 274 | Values), 275 | 276 | lists:sort(dict:to_list(Dict)). 277 | 278 | update_bin(Value, [Bin|_Bins], Dict) when Value =< Bin -> 279 | dict:update_counter(Bin, 1, Dict); 280 | update_bin(Values, [_Bin|Bins], Dict) -> 281 | update_bin(Values, Bins, Dict). 282 | 283 | %% two pass covariance 284 | %% (http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Covariance) 285 | %% matches results given by excel's 'covar' function 286 | get_covariance(Values, _) when length(Values) < ?STATS_MIN -> 287 | 0.0; 288 | get_covariance(_, Values) when length(Values) < ?STATS_MIN -> 289 | 0.0; 290 | get_covariance(Values1, Values2) when length(Values1) /= length(Values2) -> 291 | 0.0; 292 | get_covariance(Values1, Values2) -> 293 | {SumX, SumY, N} = foldl2(fun (X, Y, {SumX, SumY, N}) -> 294 | {SumX+X, SumY+Y, N+1} 295 | end, {0,0,0}, Values1, Values2), 296 | MeanX = SumX/N, 297 | MeanY = SumY/N, 298 | Sum = foldl2(fun (X, Y, Sum) -> 299 | Sum + ((X - MeanX) * (Y - MeanY)) 300 | end, 301 | 0, Values1, Values2), 302 | Sum/N. 303 | 304 | get_kendall_correlation(Values, _) when length(Values) < ?STATS_MIN -> 305 | 0.0; 306 | get_kendall_correlation(_, Values) when length(Values) < ?STATS_MIN -> 307 | 0.0; 308 | get_kendall_correlation(Values1, Values2) when length(Values1) /= length(Values2) -> 309 | 0.0; 310 | get_kendall_correlation(Values1, Values2) -> 311 | bear:kendall_correlation(Values1, Values2). 312 | 313 | get_spearman_correlation(Values, _) when length(Values) < ?STATS_MIN -> 314 | 0.0; 315 | get_spearman_correlation(_, Values) when length(Values) < ?STATS_MIN -> 316 | 0.0; 317 | get_spearman_correlation(Values1, Values2) when length(Values1) /= length(Values2) -> 318 | 0.0; 319 | get_spearman_correlation(Values1, Values2) -> 320 | TR1 = ranks_of(Values1), 321 | TR2 = ranks_of(Values2), 322 | Numerator = 6 * foldl2(fun (X, Y, Acc) -> 323 | Diff = X-Y, 324 | Acc + Diff*Diff 325 | end, 0, TR1,TR2), 326 | N = length(Values1), 327 | Denominator = math:pow(N,3)-N, 328 | 1-(Numerator/Denominator). 329 | 330 | ranks_of(Values) when is_list(Values) -> 331 | [Fst|Rest] = revsort(Values), 332 | TRs = ranks_of(Rest, [], 2, Fst, 1), 333 | Dict = gb_trees:from_orddict(TRs), 334 | L = lists:foldl(fun (Val, Acc) -> 335 | Rank = gb_trees:get(Val, Dict), 336 | [Rank|Acc] 337 | end, [], Values), 338 | lists:reverse(L). 339 | 340 | ranks_of([E|Es], Acc, N, P, S) -> 341 | ranks_of(Es,[{P,(S+N-1)/2}|Acc], N+1, E, N); 342 | ranks_of([], Acc, N, P, S) -> 343 | [{P,(S+N-1)/2}|Acc]. 344 | 345 | 346 | get_pearson_correlation(Values, _) when length(Values) < ?STATS_MIN -> 347 | 0.0; 348 | get_pearson_correlation(_, Values) when length(Values) < ?STATS_MIN -> 349 | 0.0; 350 | get_pearson_correlation(Values1, Values2) when length(Values1) /= length(Values2) -> 351 | 0.0; 352 | get_pearson_correlation(Values1, Values2) -> 353 | {SumX, SumY, SumXX, SumYY, SumXY, N} = 354 | foldl2(fun (X,Y,{SX, SY, SXX, SYY, SXY, N}) -> 355 | {SX+X, SY+Y, SXX+X*X, SYY+Y*Y, SXY+X*Y, N+1} 356 | end, {0,0,0,0,0,0}, Values1, Values2), 357 | Numer = (N*SumXY) - (SumX * SumY), 358 | case math:sqrt(((N*SumXX)-(SumX*SumX)) * ((N*SumYY)-(SumY*SumY))) of 359 | 0.0 -> 360 | 0.0; %% Is this really the correct thing to do here? 361 | Denom -> 362 | Numer/Denom 363 | end. 364 | 365 | revsort(L) -> 366 | lists:reverse(lists:sort(L)). 367 | 368 | %% Foldl over two lists 369 | foldl2(F, Acc, [I1|L1], [I2|L2]) when is_function(F,3) -> 370 | foldl2(F, F(I1, I2, Acc), L1, L2); 371 | foldl2(_F, Acc, [], []) -> 372 | Acc. 373 | 374 | %% wrapper for math:log/1 to avoid dividing by zero 375 | math_log(0) -> 376 | 1; 377 | math_log(0.0) -> 378 | 1.0; 379 | math_log(X) when X < 0 -> 380 | 0; % it's not possible to take a log of a negative number, return 0 381 | math_log(X) -> 382 | math:log(X). 383 | 384 | %% wrapper for calculating inverse to avoid dividing by zero 385 | inverse(0) -> 386 | 0; 387 | inverse(0.0) -> 388 | 0.0; 389 | inverse(X) -> 390 | 1/X. 391 | 392 | get_hist_bins(Min, Max, StdDev, Count) -> 393 | BinWidth = get_bin_width(StdDev, Count), 394 | BinCount = get_bin_count(Min, Max, BinWidth), 395 | case get_bin_list(BinWidth, BinCount, []) of 396 | List when length(List) =< 1 -> 397 | [Max]; 398 | Bins -> 399 | %% add Min to Bins 400 | [Bin + Min || Bin <- Bins] 401 | end. 402 | 403 | get_bin_list(Width, Bins, Acc) when Bins > length(Acc) -> 404 | Bin = ((length(Acc) + 1) * Width ), 405 | get_bin_list(Width, Bins, [round_bin(Bin)| Acc]); 406 | get_bin_list(_, _, Acc) -> 407 | lists:usort(Acc). 408 | 409 | round_bin(Bin) -> 410 | Base = case erlang:trunc(math:pow(10, round(math:log10(Bin) - 1))) of 411 | 0 -> 412 | 1; 413 | Else -> 414 | Else 415 | end, 416 | %io:format("bin ~p, base ~p~n", [Bin, Base]), 417 | round_bin(Bin, Base). 418 | 419 | round_bin(Bin, Base) when Bin rem Base == 0 -> 420 | Bin; 421 | round_bin(Bin, Base) -> 422 | Bin + Base - (Bin rem Base). 423 | 424 | % the following is up for debate as far as what the best method 425 | % of choosing bin counts and widths. these seem to work *good enough* 426 | % in my testing 427 | 428 | % bin width based on Sturges 429 | % http://www.jstor.org/pss/2965501 430 | get_bin_width(StdDev, Count) -> 431 | %io:format("stddev: ~p, count: ~p~n", [StdDev, Count]), 432 | case round((3.5 * StdDev) / math:pow(Count, 0.3333333)) of 433 | 0 -> 434 | 1; 435 | Else -> 436 | Else 437 | end. 438 | 439 | % based on the simple ceilng function at 440 | % http://en.wikipedia.org/wiki/Histograms#Number_of_bins_and_width 441 | % with a modification to attempt to get on bin beyond the max value 442 | get_bin_count(Min, Max, Width) -> 443 | %io:format("min: ~p, max: ~p, width ~p~n", [Min, Max, Width]), 444 | round((Max - Min) / Width) + 1. 445 | 446 | %% taken from http://crunchyd.com/scutil/ 447 | %% All code here is MIT Licensed 448 | %% http://scutil.com/license.html 449 | 450 | % seems to match the value returned by the 'cor' (method="kendal") R function 451 | % http://en.wikipedia.org/wiki/Kendall_tau_rank_correlation_coefficient 452 | kendall_correlation(List1, List2) when is_list(List1), is_list(List2) -> 453 | {RA,_} = lists:unzip(tied_ordered_ranking(List1)), 454 | {RB,_} = lists:unzip(tied_ordered_ranking(List2)), 455 | 456 | Ordering = lists:keysort(1, lists:zip(RA,RB)), 457 | {_,OrdB} = lists:unzip(Ordering), 458 | 459 | N = length(List1), 460 | P = lists:sum(kendall_right_of(OrdB, [])), 461 | 462 | -(( (4*P) / (N * (N - 1))) - 1). 463 | 464 | simple_ranking(List) when is_list(List) -> 465 | lists:zip(lists:seq(1,length(List)),lists:reverse(lists:sort(List))). 466 | 467 | tied_ranking(List) -> 468 | tied_rank_worker(simple_ranking(List), [], no_prev_value). 469 | 470 | tied_ordered_ranking(List) when is_list(List) -> 471 | tied_ordered_ranking(List, tied_ranking(List), []). 472 | 473 | tied_ordered_ranking([], [], Work) -> 474 | lists:reverse(Work); 475 | 476 | tied_ordered_ranking([Front|Rem], Ranks, Work) -> 477 | {value,Item} = lists:keysearch(Front,2,Ranks), 478 | {IRank,Front} = Item, 479 | tied_ordered_ranking(Rem, Ranks--[Item], [{IRank,Front}]++Work). 480 | 481 | kendall_right_of([], Work) -> 482 | lists:reverse(Work); 483 | kendall_right_of([F|R], Work) -> 484 | kendall_right_of(R, [kendall_right_of_item(F,R)]++Work). 485 | 486 | kendall_right_of_item(B, Rem) -> 487 | length([R || R <- Rem, R < B]). 488 | 489 | tied_add_prev(Work, {FoundAt, NewValue}) -> 490 | lists:duplicate( length(FoundAt), {lists:sum(FoundAt)/length(FoundAt), NewValue} ) ++ Work. 491 | 492 | tied_rank_worker([], Work, PrevValue) -> 493 | lists:reverse(tied_add_prev(Work, PrevValue)); 494 | 495 | tied_rank_worker([Item|Remainder], Work, PrevValue) -> 496 | case PrevValue of 497 | no_prev_value -> 498 | {BaseRank,BaseVal} = Item, 499 | tied_rank_worker(Remainder, Work, {[BaseRank],BaseVal}); 500 | {FoundAt,OldVal} -> 501 | case Item of 502 | {Id,OldVal} -> 503 | tied_rank_worker(Remainder, Work, {[Id]++FoundAt,OldVal}); 504 | {Id,NewVal} -> 505 | tied_rank_worker(Remainder, tied_add_prev(Work, PrevValue), {[Id],NewVal}) 506 | 507 | end 508 | end. 509 | 510 | 511 | percentiles(Ps, N, Values) -> 512 | Items = [{P, perc(P, N)} || P <- Ps], 513 | pick_items(Values, 1, Items). 514 | 515 | pick_items([H|_] = L, P, [{Tag,P}|Ps]) -> 516 | [{Tag,H} | pick_items(L, P, Ps)]; 517 | pick_items([_|T], P, Ps) -> 518 | pick_items(T, P+1, Ps); 519 | pick_items([], _, Ps) -> 520 | [{Tag,undefined} || {Tag,_} <- Ps]. 521 | 522 | perc(P, Len) when is_integer(P), 0 =< P, P =< 100 -> 523 | V = round(P * Len / 100), 524 | erlang:max(1, V); 525 | perc(P, Len) when is_integer(P), 100 =< P, P =< 1000 -> 526 | V = round(P * Len / 1000), 527 | erlang:max(1, V); 528 | perc(P, Len) when is_float(P), 0 =< P, P =< 1 -> 529 | erlang:max(1, round(P * Len)). 530 | -------------------------------------------------------------------------------- /test/bear_test.erl: -------------------------------------------------------------------------------- 1 | %%% 2 | %%% Copyright 2013, Rodolphe Quiedeville 3 | %%% 4 | %%% Licensed under the Apache License, Version 2.0 (the "License"); 5 | %%% you may not use this file except in compliance with the License. 6 | %%% You may obtain a copy of the License at 7 | %%% 8 | %%% http://www.apache.org/licenses/LICENSE-2.0 9 | %%% 10 | %%% Unless required by applicable law or agreed to in writing, software 11 | %%% distributed under the License is distributed on an "AS IS" BASIS, 12 | %%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | %%% See the License for the specific language governing permissions and 14 | %%% limitations under the License. 15 | %%% 16 | 17 | %%% ==================================================================== 18 | %%% file : bear_test.erl 19 | %%% @author : Rodolphe Quiedeville 20 | %%% @doc 21 | %%% Unit test for functions defined in bear.erl 22 | %%% @end 23 | %%% ==================================================================== 24 | -module(bear_test). 25 | 26 | -compile(export_all). 27 | 28 | -record(scan_result, {n=0, sumX=0, sumXX=0, sumInv=0, sumLog, max, min}). 29 | -record(scan_result2, {x2=0, x3=0, x4=0}). 30 | 31 | -include_lib("eunit/include/eunit.hrl"). 32 | 33 | -define(PRECISION_DIGIT, 6). 34 | 35 | get_statistics_1_empty_test() -> 36 | %% get_statistics/1 37 | %% Empty set of values 38 | Percentile = [{50, 0.0},{75, 0.0},{90, 0.0},{95, 0.0},{99, 0.0},{999, 0.0}], 39 | Stats = bear:get_statistics([]), 40 | ?assertEqual({min, 0.0}, lists:keyfind(min, 1, Stats)), 41 | ?assertEqual({max, 0.0}, lists:keyfind(max, 1, Stats)), 42 | ?assertEqual({arithmetic_mean, 0.0}, lists:keyfind(arithmetic_mean, 1, Stats)), 43 | ?assertEqual({geometric_mean, 0.0}, lists:keyfind(geometric_mean, 1, Stats)), 44 | ?assertEqual({harmonic_mean, 0.0}, lists:keyfind(harmonic_mean, 1, Stats)), 45 | ?assertEqual({median, 0.0}, lists:keyfind(median, 1, Stats)), 46 | ?assertEqual({variance, 0.0}, lists:keyfind(variance, 1, Stats)), 47 | ?assertEqual({standard_deviation, 0.0}, lists:keyfind(standard_deviation, 1, Stats)), 48 | ?assertEqual({skewness, 0.0}, lists:keyfind(skewness, 1, Stats)), 49 | ?assertEqual({kurtosis, 0.0}, lists:keyfind(kurtosis, 1, Stats)), 50 | ?assertEqual({percentile, Percentile}, lists:keyfind(percentile, 1, Stats)), 51 | ?assertEqual({histogram, [{0,0}]}, lists:keyfind(histogram, 1, Stats)), 52 | ?assertEqual({n, 0}, lists:keyfind(n, 1, Stats)). 53 | 54 | get_statistics_1_regular_test() -> 55 | %% get_statistics/1 56 | %% Non empty set of values 57 | Percentile = [{50, -10},{75, 23},{90, 43},{95, 46},{99, 50},{999, 50}], 58 | Stats = bear:get_statistics(sample1()), 59 | 60 | {geometric_mean, Geometric} = lists:keyfind(geometric_mean, 1, Stats), 61 | {harmonic_mean, Harmonic} = lists:keyfind(harmonic_mean, 1, Stats), 62 | {variance, Variance} = lists:keyfind(variance, 1, Stats), 63 | {standard_deviation, StandardDeviation} = lists:keyfind(standard_deviation, 1, Stats), 64 | {kurtosis, Kurtosis} = lists:keyfind(kurtosis, 1, Stats), 65 | {skewness, Skewness} = lists:keyfind(skewness, 1, Stats), 66 | 67 | ?assertEqual({min, -49}, lists:keyfind(min, 1, Stats)), 68 | ?assertEqual({max, 50}, lists:keyfind(max, 1, Stats)), 69 | ?assertEqual({arithmetic_mean, -1.66}, lists:keyfind(arithmetic_mean, 1, Stats)), 70 | ?assertEqual(true, approx(4.08326, Geometric)), 71 | ?assertEqual(true, approx(54.255629738, Harmonic)), 72 | ?assertEqual({median, -10}, lists:keyfind(median, 1, Stats)), 73 | ?assertEqual(true, approx(921.0453061, Variance)), 74 | ?assertEqual(true, approx(30.348728, StandardDeviation)), 75 | ?assertEqual(true, approx(0.148722, Skewness)), 76 | ?assertEqual(true, approx(-1.2651687, Kurtosis)), 77 | ?assertEqual({percentile, Percentile}, lists:keyfind(percentile, 1, Stats)), 78 | ?assertEqual({histogram, [{-20,16},{11,16},{41,12},{71,6}]}, lists:keyfind(histogram, 1, Stats)), 79 | ?assertEqual({n, 50}, lists:keyfind(n, 1, Stats)). 80 | 81 | get_statistics_2_1_test() -> 82 | %% get_statistics/2 83 | %% First set of values is empty 84 | Stats = bear:get_statistics(lists:seq(1,10), []), 85 | ?assertEqual(0.0, Stats). 86 | 87 | get_statistics_3_test() -> 88 | %% get_statistics/2 89 | %% Second set of values is empty 90 | Stats = bear:get_statistics([], lists:seq(1,10)), 91 | ?assertEqual(0.0, Stats). 92 | 93 | get_statistics_4_test() -> 94 | %% get_statistics/2 95 | %% Two set of values with different sizes 96 | Stats = bear:get_statistics(lists:seq(1,10),lists:seq(1,20)), 97 | ?assertEqual(0.0, Stats). 98 | 99 | get_statistics_5_test() -> 100 | %% get_statistics/2 101 | %% Two set of values are valid 102 | Stats = bear:get_statistics(lists:seq(0,10),lists:seq(4,24,2)), 103 | ?assertEqual({covariance, 20.0}, lists:keyfind(covariance, 1, Stats)), 104 | ?assertEqual({tau, 1.0}, lists:keyfind(tau, 1, Stats)), 105 | ?assertEqual({rho, 1.0}, lists:keyfind(rho, 1, Stats)), 106 | ?assertEqual({r, 1.0}, lists:keyfind(r, 1, Stats)). 107 | 108 | scan_values_test() -> 109 | ?assertEqual(#scan_result{n=8}, bear:scan_values([], #scan_result{n=8})), 110 | ?assertEqual(#scan_result{n=1,sumX=1,sumXX=1,sumInv=1.0,sumLog=0.0,max=1,min=1}, bear:scan_values([1])), 111 | ?assertEqual(#scan_result{n=4,sumX=10,sumXX=30,sumInv=2.083333333333333,sumLog=3.1780538303479453,max=4,min=1}, 112 | bear:scan_values([1,3,2,4])). 113 | 114 | scan_values2_test() -> 115 | ?assertEqual(#scan_result{n=8}, bear:scan_values2([], 3, #scan_result{n=8})), 116 | ?assertEqual(#scan_result2{x2=6.6875,x3=-13.359375,x4=28.07421875}, bear:scan_values2([4,3,5], #scan_result{n=8,sumX=42})). 117 | 118 | revsort_test() -> 119 | ?assertEqual([], bear:revsort([])), 120 | ?assertEqual([4,3,2], bear:revsort([3,2,4])). 121 | 122 | arithmetic_mean_test() -> 123 | ?assertEqual(10.0, bear:arithmetic_mean(#scan_result{n=4, sumX=40})). 124 | 125 | geometric_mean_test() -> 126 | ?assertEqual(25.790339917193062, bear:geometric_mean(#scan_result{n=4, sumLog=13})). 127 | 128 | harmonic_mean_test() -> 129 | ?assertEqual(0, bear:harmonic_mean(#scan_result{n=100, sumInv=0})), 130 | ?assertEqual(10.0, bear:harmonic_mean(#scan_result{n=100, sumInv=10})). 131 | 132 | percentile_test() -> 133 | ?assertEqual(3, bear:percentile([1,2,3,4,5], #scan_result{n=5},0.5)), 134 | ?assertEqual(5, bear:percentile([1,2,3,4,5], #scan_result{n=5},0.95)). 135 | 136 | variance_test() -> 137 | ?assertEqual(7.0, bear:variance(#scan_result{n=7},#scan_result2{x2=42})). 138 | 139 | std_deviation_test() -> 140 | ?assertEqual(3.0, bear:std_deviation(#scan_result{n=10},#scan_result2{x2=81})). 141 | 142 | skewness_test() -> 143 | ?assertEqual(0.0, bear:skewness(#scan_result{n=10},#scan_result2{x2=0,x3=81})), 144 | ?assertEqual(3.0, bear:skewness(#scan_result{n=10},#scan_result2{x2=81,x3=810})). 145 | 146 | kurtosis_test() -> 147 | ?assertEqual(0.0, bear:kurtosis(#scan_result{n=10},#scan_result2{x2=0,x4=81})), 148 | ?assertEqual(-2.0, bear:kurtosis(#scan_result{n=10},#scan_result2{x2=81,x4=810})). 149 | 150 | update_bin_1_test() -> 151 | %% with empty dict 152 | Dict = dict:new(), 153 | C = bear:update_bin(4, [4], Dict), 154 | ?assertEqual(1, dict:fetch(4, C)). 155 | 156 | get_covariance_exceptions_test() -> 157 | %% Array 1 is too short 158 | ?assertEqual(0.0, bear:get_covariance([], [2,1,2,3,4,5,6])), 159 | %% Array 2 is too short 160 | ?assertEqual(0.0, bear:get_covariance([1,2,3,4,5,6], [])), 161 | %% diffenrent arry length 162 | ?assertEqual(0.0, bear:get_covariance([1,2,3,4,5,6], [1,2,3,4,5,6,7])). 163 | 164 | get_covariance_regular_test() -> 165 | %% Usual case 166 | %% Result is not the same as R compute, R use an unbiased estimate 167 | %% http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Covariance 168 | ?assertEqual(true, approx(170.813599, bear:get_covariance(sample1(),sample2()))). 169 | 170 | ranks_of_test() -> 171 | ?assertEqual([4.0,3.0,1.0,2.0], bear:ranks_of([3,4,15,6])). 172 | 173 | get_pearson_correlation_exceptions_test() -> 174 | ?assertEqual(0.0, bear:get_pearson_correlation([], 42)), 175 | ?assertEqual(0.0, bear:get_pearson_correlation(42, [])), 176 | ?assertEqual(0.0, bear:get_pearson_correlation(lists:seq(1,10), lists:seq(1,11))), 177 | ?assertEqual(1.0, bear:get_pearson_correlation(lists:seq(1,10), lists:seq(1,10))), 178 | ?assertEqual(1.0, bear:get_pearson_correlation(lists:seq(0,10), lists:seq(5,15))). 179 | 180 | get_pearson_correlation_regular_test() -> 181 | %% Target is calculate by R 182 | ?assertEqual(true, approx(0.2068785, bear:get_pearson_correlation(sample1(), sample2()))). 183 | 184 | get_pearson_correlation_nullresult_test() -> 185 | %% The two series do not correlate 186 | A = [-1,-0.5,0,0.5,1], 187 | B = [1,0.25,0,0.25,1], 188 | ?assertEqual(0.0, bear:get_pearson_correlation(A, B)). 189 | 190 | round_bin_test() -> 191 | ?assertEqual(10, bear:round_bin(10)), 192 | ?assertEqual(10, bear:round_bin(10, 5)), 193 | ?assertEqual(42, bear:round_bin(15, 42)), 194 | ?assertEqual(45, bear:round_bin(42, 15)). 195 | 196 | get_bin_width_test() -> 197 | ?assertEqual(1, bear:get_bin_width(0, 10)), 198 | ?assertEqual(22, bear:get_bin_width(10.0, 4.0)). 199 | 200 | get_bin_count_test() -> 201 | ?assertEqual(3, bear:get_bin_count(9, 15, 3)), 202 | ?assertEqual(4, bear:get_bin_count(10.2, 20.2, 4)). 203 | 204 | get_kendall_correlation_exceptions_test()-> 205 | ?assertEqual(0.0, bear:get_kendall_correlation([], [])), 206 | ?assertEqual(0.0, bear:get_kendall_correlation([], [1,2,3,4,5,6,7])), 207 | ?assertEqual(0.0, bear:get_kendall_correlation([1,2,3,4,5,6,7],[])), 208 | ?assertEqual(0.0, bear:get_kendall_correlation(lists:seq(1,10),lists:seq(1,11))). 209 | 210 | get_kendall_correlation_regular_test()-> 211 | Kendall = bear:get_kendall_correlation(sample1(order), sample2(order)), 212 | ?assertEqual(true, approx(0.9787755, Kendall)). 213 | 214 | kendall_correlation_test()-> 215 | Kendall = bear:kendall_correlation(sample1(order), sample2(order)), 216 | ?assertEqual(true, approx(0.9787755, Kendall)). 217 | 218 | get_spearman_correlation_exceptions_test()-> 219 | ?assertEqual(0.0, bear:get_spearman_correlation([], [])), 220 | ?assertEqual(0.0, bear:get_spearman_correlation([], [1,2,3,4,5,6,7])), 221 | ?assertEqual(0.0, bear:get_spearman_correlation([1,2,3,4,5,6,7],[])), 222 | ?assertEqual(0.0, bear:get_spearman_correlation(lists:seq(1,10),lists:seq(1,11))). 223 | 224 | get_spearman_correlation_regular_test()-> 225 | ?assertEqual(true, approx(0.997888, bear:get_spearman_correlation(sample1(order), sample2(order)))). 226 | 227 | math_log_test() -> 228 | ?assertEqual(1, bear:math_log(0)), 229 | ?assertEqual(1.0, bear:math_log(0.0)), 230 | ?assertEqual(true, approx(3.737669618283368, bear:math_log(42))). 231 | 232 | inverse_test() -> 233 | ?assertEqual(0, bear:inverse(0)), 234 | ?assertEqual(0.0, bear:inverse(0.0)), 235 | ?assertEqual(0.5, bear:inverse(2)). 236 | 237 | get_hist_bins_test() -> 238 | ?assertEqual([4], bear:get_hist_bins(1, 4, 5, 10)). 239 | 240 | tied_ordered_ranking_test() -> 241 | ?assertEqual([3,2,1], bear:tied_ordered_ranking([], [], [1,2,3])). 242 | 243 | kendall_right_off_test() -> 244 | %% empty array 245 | ?assertEqual("654321", bear:kendall_right_of([],"123456")). 246 | 247 | tied_add_prev_test() -> 248 | ?assertEqual([{2.5,5},{2.5,5},{2.5,5},{2.5,5},{2,3}], bear:tied_add_prev([{2, 3}], {[1,2,3,4], 5})). 249 | 250 | tied_rank_worker_test() -> 251 | ?assertEqual([{2.0,5},{2.0,5},{2.0,5},{2.0,5}], bear:tied_rank_worker([], [{2.0,5}], {[1,2,3], 5})), 252 | ?assertEqual([{2.0,5},{2.0,5},{2.0,5},{2.0,5},{2.0,5},{2.0,5}], 253 | bear:tied_rank_worker([{2.0,5},{2.0,5}], [{2.0,5}], {[1,2,3], 5})). 254 | 255 | perc_test() -> 256 | ?assertEqual(14, bear:perc(36, 40)), 257 | ?assertEqual(5, bear:perc(900, 5)), 258 | ?assertEqual(5, bear:perc(0.9, 5)). 259 | 260 | get_statistics_subset_nev_test() -> 261 | %% Not enough values case 262 | ?assertEqual([], bear:get_statistics_subset([1,2], [])). 263 | 264 | get_statistics_subset_regular_test() -> 265 | %% Regular case 266 | ?assertEqual([{max, 50},{min, -49}], bear:get_statistics_subset(sample1(), [max,min])). 267 | 268 | subset_test() -> 269 | Stats = bear:get_statistics(test_values()), 270 | match_values(Stats). 271 | 272 | full_subset_test() -> 273 | Stats = bear:get_statistics(test_values()), 274 | match_values2(Stats). 275 | 276 | negative_test() -> 277 | %% make sure things don't blow up with a negative value 278 | Values = [1,-1,-2,3,3,4,5,6,7], 279 | [{min, -2}] = bear:get_statistics_subset(Values, [min]). 280 | 281 | negative2_test() -> 282 | %% make sure things don't blow up with a negative value 283 | Values = [-1,-1,-2,-2,-3,-5,-6,-10], 284 | [{min, -10}] = bear:get_statistics_subset(Values, [min]). 285 | 286 | match_values([H|T]) -> 287 | Res = bear:get_statistics_subset(test_values(), [mk_item(H)]), 288 | Res = [H], 289 | match_values(T); 290 | match_values([]) -> 291 | ok. 292 | 293 | mk_item({percentile, Ps}) -> 294 | {percentile, [P || {P,_} <- Ps]}; 295 | mk_item({K, _}) -> 296 | K. 297 | 298 | match_values2(Stats) -> 299 | Items = [mk_item(I) || I <- Stats], 300 | Stats = bear:get_statistics_subset(test_values(), Items), 301 | ok. 302 | 303 | test_values() -> 304 | [1,1,1,1,1,1,1, 305 | 2,2,2,2,2,2,2, 306 | 3,3,3,3,3,3,3,3,3,3,3,3,3,3, 307 | 4,4,4,4,4,4,4,4,4,4,4,4,4,4, 308 | 5,5,5,5,5,5,5,5,5,5,5,5,5,5, 309 | 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 310 | 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 311 | 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 312 | 9,9,9,9,9,9,9]. 313 | 314 | negative_values() -> 315 | %% All values are negative 316 | [-1,-1,-1,-1,-1,-1,-1, 317 | -2,-2,-2,-2,-2,-2,-2, 318 | -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3, 319 | -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 320 | -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5, 321 | -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6, 322 | -7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7,-7, 323 | -8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8,-8, 324 | -9,-9,-9,-9,-9,-9,-9]. 325 | 326 | between(Value, Low, High) -> 327 | (Value >= Low) and (Value =< High). 328 | 329 | approx(Target, Value) -> 330 | High = Target + math:pow(10, - ?PRECISION_DIGIT), 331 | Low = Target - math:pow(10, - ?PRECISION_DIGIT), 332 | case (Value > Low) and (Value < High) of 333 | true -> true; 334 | _ -> Value 335 | end. 336 | 337 | check_sample_test() -> 338 | ?assertEqual(50, length(sample1())), 339 | ?assertEqual(50, length(sample1(order))), 340 | ?assertEqual(50, length(sample2())), 341 | ?assertEqual(50, length(sample2(order))). 342 | 343 | sample1(X) when X == order -> 344 | lists:sort(sample1()). 345 | 346 | sample2(X) when X == order -> 347 | lists:sort(sample2()). 348 | 349 | sample1() -> 350 | %% datas from file bear/samples/data.csv 351 | %% first column X 352 | [-16,-18,-47,22,-18,36,25,49,-24,15,36,-10,-21,43,-35,1,-24,10,33,-21,-18,-36,-36,-43,-37,-10,23,50,31,-49,43,46,22,-43,12,-47,15,-14,6,-31,46,-8,0,-46,-16,-22,6,10,38,-11]. 353 | 354 | sample2() -> 355 | %% datas from file bear/samples/data.csv 356 | %% second column Y 357 | [33,20,-35,16,-19,8,25,3,4,10,36,-20,-41,43,28,39,-30,3,-47,-23,17,-6,-50,16,-26,-49,8,-31,24,16,32,27,-19,-32,-17,1,-37,25,-50,-32,-42,-22,25,18,-34,-37,7,-13,16,10]. 358 | --------------------------------------------------------------------------------