├── .gitignore ├── Classes ├── SBFullStatistics.m ├── SBStatistics.h └── SBStatistics.m ├── Readme.md └── Tests ├── Errors.m ├── Full.m ├── Stream.m └── Tests.h /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | build/ 3 | DerivedData/ 4 | *.mode1v3 5 | *.pbxuser 6 | xcuserdata 7 | project.xcworkspace 8 | 9 | 10 | .loadpath 11 | .project 12 | 13 | -------------------------------------------------------------------------------- /Classes/SBFullStatistics.m: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2008, Stig Brautaset. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the author nor the names of its contributors may be 15 | used to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #import "SBStatistics.h" 31 | 32 | static void incrementValueForKey(NSMutableDictionary *dict, NSNumber *key) 33 | { 34 | id value = [dict objectForKey:key]; 35 | value = value ? [NSNumber numberWithInt:[value intValue] + 1] 36 | : [NSNumber numberWithInt:1]; 37 | 38 | [dict setObject:value forKey:key]; 39 | } 40 | 41 | 42 | @interface SBFullStatistics () 43 | 44 | @property (nonatomic, strong) NSMutableArray *mutableData; 45 | @property (nonatomic, copy) NSArray *sortedData; 46 | 47 | @end 48 | 49 | 50 | 51 | /// Instances of this class keeps a copy of each data point and is 52 | /// thereby able to produce sophisticated statistics. It can 53 | /// (eventually) take up very much memory if you collect a lot of data. 54 | /// @see SBStatistics 55 | @implementation SBFullStatistics 56 | 57 | #pragma mark Creation and deletion 58 | 59 | - (id)init { 60 | if ((self = [super init])) { 61 | self.mutableData = [[NSMutableArray alloc] init]; 62 | } 63 | return self; 64 | } 65 | 66 | /// This can be used to calculate the truncated (trimmed) mean, 67 | /// or any other trimmed value you might care to know. 68 | /// @see http://en.wikipedia.org/wiki/Truncated_mean 69 | /// @param l should be a real number such that 0 <= l < 1. 70 | /// @param h should be a real number such that 0 <= h < 1. 71 | - (id)statisticsDiscardingLowOutliers:(double)l high:(double)h 72 | { 73 | id copy = [[self class] new]; 74 | [copy addDataFromArray:[self sortedDataDiscardingLowOutliers:l high:h]]; 75 | return copy; 76 | } 77 | 78 | #pragma mark Adding data 79 | 80 | /// Overrides the superclass' method to store each data point. 81 | /// Also invalidates the cached sorted data, if any. 82 | - (void)addDouble:(double)d 83 | { 84 | [super addDouble:d]; 85 | [self.mutableData addObject:@(d)]; 86 | 87 | // Invalidate cached data 88 | self.sortedData = nil; 89 | } 90 | 91 | #pragma mark Returning data 92 | 93 | /// Returns an autoreleased copy of the data array. 94 | - (NSArray *)data { 95 | return [self.mutableData copy]; 96 | } 97 | 98 | /// This is used by several other methods. The result is cached until 99 | /// the next time addData: is called. 100 | - (NSArray*)sortedData 101 | { 102 | // Do we have cached sorted data? use it 103 | if (nil == _sortedData) 104 | // Create a cached sorted data array 105 | self.sortedData = [self.mutableData sortedArrayUsingSelector:@selector(compare:)]; 106 | 107 | return [_sortedData copy]; 108 | } 109 | 110 | /// The parameters l=0.05 and h=0.1 means discarding the lower 5% and 111 | /// upper 10% of the data. An exception is thrown if the parameters 112 | /// are not in the expected range. 113 | /// @param l should be a real number such that 0 <= l < 1. 114 | /// @param h should be a real number such that 0 <= h < 1. 115 | - (NSArray*)sortedDataDiscardingLowOutliers:(double)l high:(double)h 116 | { 117 | NSAssert1(l >= 0 && l < 1.0, @"Low bound must be 0 <= x < 1, was %f", l); 118 | NSAssert1(h >= 0 && h < 1.0, @"High bound must be 0 <= x < 1, was %f", h); 119 | 120 | NSUInteger lo = l * count; 121 | NSUInteger hi = ceil(count - h * count); 122 | NSRange r = NSMakeRange(lo, hi - lo); 123 | 124 | return [[self sortedData] subarrayWithRange:r]; 125 | } 126 | 127 | #pragma mark Statistics 128 | 129 | /// Returns the most frequently occuring data point, or nan if all the 130 | /// data points are unique. If there are multiple candidates it is 131 | /// undefined which one is returned. 132 | /// @see http://en.wikipedia.org/wiki/Mode_(statistics) 133 | - (double)mode 134 | { 135 | id freq = [NSMutableDictionary dictionaryWithCapacity:count]; 136 | for (NSNumber *x in self.mutableData) 137 | incrementValueForKey(freq, x); 138 | 139 | // No mode exists if all the numbers are unique 140 | if ([freq count] == count) 141 | return nan(0); 142 | return [[[freq keysSortedByValueUsingSelector:@selector(compare:)] lastObject] doubleValue]; 143 | } 144 | 145 | /// @see http://en.wikipedia.org/wiki/Median 146 | - (double)median 147 | { 148 | if (!count) 149 | return nan(0); 150 | if (count == 1) 151 | return self.mean; 152 | 153 | NSArray *sorted = [self sortedData]; 154 | if (count & 1) 155 | return [[sorted objectAtIndex:count / 2 - 1] doubleValue]; 156 | return ([[sorted objectAtIndex:count / 2 - 1] doubleValue] + [[sorted objectAtIndex:count / 2] doubleValue]) / 2; 157 | } 158 | 159 | /// @param x should be a real number such that 0 <= x <= 1. An 160 | /// exception is thrown if it is not in the right range. 161 | /// @see http://en.wikipedia.org/wiki/Percentile 162 | - (double)percentile:(double)x 163 | { 164 | NSAssert1(x >= 0 && x <= 1, @"Percentile must be 0 <= x <= 1, but was %f", x); 165 | NSUInteger i = (count-1) * x; 166 | return [[[self sortedData] objectAtIndex:i] doubleValue]; 167 | } 168 | 169 | /// The harmonic mean is undefined if any of the data points are zero, 170 | /// and this method will return nan in that case. 171 | /// @see http://en.wikipedia.org/wiki/Harmonic_mean 172 | - (double)harmonicMean 173 | { 174 | long double sum = 0.0; 175 | for (NSNumber *n in self.mutableData) { 176 | double d = [n doubleValue]; 177 | if (d == 0) 178 | return nan(0); 179 | sum += 1 / d; 180 | } 181 | return count / sum; 182 | } 183 | 184 | /// The geometric mean is undefined if any data point is less than 185 | /// zero, and this method returns nan in that case. Also returns nan() 186 | /// if called before any data has been added. 187 | /// @see http://en.wikipedia.org/wiki/Geometric_mean 188 | - (double)geometricMean 189 | { 190 | if (!count) 191 | return nan(0); 192 | 193 | long double sum = 1; 194 | for (NSNumber *n in self.mutableData) { 195 | double d = [n doubleValue]; 196 | if (d < 0) 197 | return nan(0); 198 | sum *= d; 199 | } 200 | return pow(sum, 1.0 / count); 201 | } 202 | 203 | /// Returns a dictionary of frequency distributions for the given 204 | /// buckets. The returned dictionary has a key for each of the values 205 | /// in @p theBuckets. The associated value is the count of data points 206 | /// that are less than or equal to the key, but greater than any 207 | /// smaller buckets if @p cumulative is false, or simply less than or 208 | /// equal to the key otherwise. 209 | /// @param theBuckets An array of buckets to partition the data into 210 | /// @param cumulative Whether to return the cumulative frequency distribution 211 | /// @see http://en.wikipedia.org/wiki/Frequency_distribution 212 | /// @see bucketsWithCount: 213 | /// @see bucketsWithInterval: 214 | - (NSDictionary*)frequencyDistributionWithBuckets:(NSArray*)theBuckets cumulative:(BOOL)cumulative 215 | { 216 | NSAssert([theBuckets count], @"No buckets given"); 217 | 218 | // Buckets must be NSNumbers 219 | id buckets = [NSMutableArray arrayWithCapacity:[theBuckets count]]; 220 | for (id b in theBuckets) 221 | [buckets addObject:[NSNumber numberWithDouble:[b doubleValue]]]; 222 | 223 | // Create dictionary to hold frequency distribution and initialise each bucket 224 | id freq = [NSMutableDictionary dictionaryWithCapacity:[buckets count]]; 225 | for (NSNumber *bucket in buckets) 226 | [freq setObject:[NSNumber numberWithInt:0] forKey:bucket]; 227 | 228 | // Make sure the buckets are sorted, and prepare an iterator for them 229 | buckets = [buckets sortedArrayUsingSelector:@selector(compare:)]; 230 | NSEnumerator *biter = [buckets objectEnumerator]; 231 | NSNumber *b = [biter nextObject]; 232 | 233 | // Determine the frequency for each bucket 234 | for (NSNumber *n in [self sortedData]) { 235 | again: 236 | if ([n compare:b] <= 0) { 237 | incrementValueForKey(freq, b); 238 | } else { 239 | b = [biter nextObject]; 240 | if (b) 241 | goto again; 242 | } 243 | } 244 | 245 | if (cumulative) { 246 | NSUInteger total = 0; 247 | id cfreq = [NSMutableDictionary dictionaryWithCapacity:[buckets count]]; 248 | for (id key in buckets) { 249 | total += [[freq objectForKey:key] unsignedIntValue]; 250 | [cfreq setObject:[NSNumber numberWithUnsignedInteger:total] forKey:key]; 251 | } 252 | freq = cfreq; 253 | } 254 | 255 | return freq; 256 | } 257 | 258 | #pragma mark Buckets 259 | 260 | /// Returns @p x buckets of even size that cover the entire range. The 261 | /// highest bucket will be equal to the max of the population. 262 | /// @see frequencyDistributionWithBuckets:cumulative: 263 | - (NSArray*)bucketsWithCount:(NSUInteger)x 264 | { 265 | return [self bucketsWithInterval:self.range / x]; 266 | } 267 | 268 | /// Returns a number of even-sized buckets, each with a range of @p x, 269 | /// that cover the entire range. The highest bucket will be equal to 270 | /// the max of the population. 271 | /// @see frequencyDistributionWithBuckets:cumulative: 272 | - (NSArray*)bucketsWithInterval:(double)interval 273 | { 274 | if (!count || interval <= 0) 275 | return nil; 276 | 277 | id buckets = [NSMutableArray arrayWithObject:[NSNumber numberWithDouble:self.max]]; 278 | 279 | double bucket; 280 | for (bucket = self.max - interval; bucket > self.min; bucket -= interval) 281 | [buckets addObject:[NSNumber numberWithDouble:bucket]]; 282 | return buckets; 283 | } 284 | 285 | 286 | @end 287 | -------------------------------------------------------------------------------- /Classes/SBStatistics.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2008, Stig Brautaset. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the author nor the names of its contributors may be 15 | used to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #import 31 | 32 | /// Sparse statistics class 33 | @interface SBStatistics : NSObject { 34 | NSUInteger count; 35 | NSUInteger mindex; 36 | NSUInteger maxdex; 37 | 38 | double min; 39 | double max; 40 | double mean; 41 | 42 | @private 43 | double pseudoVariance; 44 | } 45 | 46 | 47 | /// Custom init methods. 48 | - (id)initWithData:(id)x; 49 | 50 | - (id)initWithArray:(NSArray*)array; 51 | 52 | /// Add a data point. 53 | - (void)addDouble:(double)d; 54 | 55 | /// Add a data point from an object. 56 | - (void)addData:(id)x; 57 | 58 | /// Add data points from the given array. 59 | - (void)addDataFromArray:(NSArray*)x; 60 | 61 | /// Count of data points 62 | @property(readonly) NSUInteger count; 63 | 64 | /// Index of smallest data point 65 | @property(readonly) NSUInteger mindex; 66 | 67 | /// Index of largest data point 68 | @property(readonly) NSUInteger maxdex; 69 | 70 | /// Value of smallest data point 71 | @property(readonly) double min; 72 | 73 | /// Value of largest data point 74 | @property(readonly) double max; 75 | 76 | /// The arithmetic mean. @see http://en.wikipedia.org/wiki/Arithmetic_mean 77 | @property(readonly) double mean; 78 | 79 | /// Max - min 80 | - (double)range; 81 | 82 | /// Variance of sample (division by N-1) 83 | - (double)variance; 84 | 85 | /// Variance of population (division by N) 86 | - (double)biasedVariance; 87 | 88 | /// Standard deviation of sample (division by N-1) 89 | - (double)standardDeviation; 90 | 91 | /// Standard deviation of population (division by N) 92 | - (double)biasedStandardDeviation; 93 | 94 | @end 95 | 96 | /// Full statistics class. 97 | @interface SBFullStatistics : SBStatistics 98 | 99 | /// Most frequently occuring data point 100 | - (double)mode; 101 | 102 | /// Middle value in a list of sorted data points 103 | - (double)median; 104 | 105 | /// Find the largest data point less than a certain percentage 106 | - (double)percentile:(double)x; 107 | 108 | - (double)harmonicMean; 109 | 110 | - (double)geometricMean; 111 | 112 | /// Returns an (optionally cumulative) frequency distribution. 113 | - (NSDictionary*)frequencyDistributionWithBuckets:(NSArray*)x cumulative:(BOOL)y; 114 | 115 | /// Returns x equally-sized buckets covering the range of data. 116 | - (NSArray*)bucketsWithCount:(NSUInteger)x; 117 | 118 | /// Returns N buckets of size x covering the range of data. 119 | - (NSArray*)bucketsWithInterval:(double)x; 120 | 121 | /// Returns the data in the order it was added. 122 | //- (NSArray*)data; 123 | 124 | /// Returns the data in sorted order. 125 | - (NSArray*)sortedData; 126 | 127 | /// Returns the data sans low and high outliers. 128 | - (NSArray*)sortedDataDiscardingLowOutliers:(double)l high:(double)h; 129 | 130 | /// Returns a new statistics object, with outliers removed from the data. 131 | - (id)statisticsDiscardingLowOutliers:(double)l high:(double)h; 132 | 133 | @end 134 | -------------------------------------------------------------------------------- /Classes/SBStatistics.m: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright (c) 2008, Stig Brautaset. All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the author nor the names of its contributors may be 15 | used to endorse or promote products derived from this software without 16 | specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | */ 29 | 30 | #import "SBStatistics.h" 31 | 32 | 33 | /// This class doesn't keep any of the data points, so uses hardly any 34 | /// memory. On the other hand it isn't able to compute the more 35 | /// complex statistics. 36 | /// @see SBFullStatistics 37 | @implementation SBStatistics 38 | 39 | @synthesize count; 40 | @synthesize mindex; 41 | @synthesize maxdex; 42 | 43 | @synthesize min; 44 | @synthesize max; 45 | @synthesize mean; 46 | 47 | #pragma mark Initialisation 48 | 49 | - (id)init 50 | { 51 | self = [super init]; 52 | if (self) { 53 | min = max = mean = nan(0); 54 | } 55 | return self; 56 | } 57 | 58 | // Init objects with data structure(s). 59 | - (id)initWithData:(id)x 60 | { 61 | if (self = [super init]) { 62 | NSAssert([x respondsToSelector:@selector(doubleValue)], @"Data must respond to -doubleValue"); 63 | [self addDouble:[x doubleValue]]; 64 | } 65 | return self; 66 | } 67 | 68 | - (id)initWithArray:(NSArray*)array 69 | { 70 | if (self = [super init]) { 71 | for (id x in array) 72 | [self addData:x]; 73 | } 74 | return self; 75 | } 76 | 77 | #pragma mark Adding data 78 | 79 | /// @see addData: 80 | - (void)addDataFromArray:(NSArray*)array 81 | { 82 | for (id x in array) 83 | [self addData:x]; 84 | } 85 | 86 | /// Add a datapoint (any NSObject responding to -doubleValue) 87 | /// @param x must respond to -doubleValue. 88 | - (void)addData:(id)x 89 | { 90 | NSAssert([x respondsToSelector:@selector(doubleValue)], @"Data must respond to -doubleValue"); 91 | [self addDouble:[x doubleValue]]; 92 | } 93 | 94 | /// Add a datapoint. 95 | /// This method does most of the work. 96 | /// @param d a double-precision data point to add. 97 | - (void)addDouble:(double)d { 98 | if (!count) { 99 | min = INFINITY; 100 | max = -min; 101 | mean = 0; 102 | } 103 | 104 | if (d < min) { 105 | min = d; 106 | mindex = count; 107 | } 108 | if (d > max) { 109 | max = d; 110 | maxdex = count; 111 | } 112 | 113 | double oldMean = mean; 114 | mean += (d - oldMean) / ++count; 115 | pseudoVariance += (d - mean) * (d - oldMean); 116 | } 117 | 118 | #pragma mark Methods 119 | 120 | - (double)range 121 | { 122 | return max - min; 123 | } 124 | 125 | /// @see http://en.wikipedia.org/wiki/Variance 126 | - (double)variance 127 | { 128 | if (count > 1) 129 | return pseudoVariance / (count - 1); 130 | return nan(0); 131 | } 132 | 133 | /// @see variance 134 | - (double)biasedVariance 135 | { 136 | if (count > 1) 137 | return pseudoVariance / count; 138 | return nan(0); 139 | } 140 | 141 | /// @see http://en.wikipedia.org/wiki/Standard_deviation 142 | - (double)standardDeviation 143 | { 144 | return sqrt([self variance]); 145 | } 146 | 147 | /// @see standardDeviation 148 | - (double)biasedStandardDeviation 149 | { 150 | return sqrt([self biasedVariance]); 151 | } 152 | 153 | @end 154 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # Statistics 2 | 3 | **I've abandoned this project, and will not be supporting it.** Though 4 | it might still work for you! 5 | 6 | Like its inspiration (Perl's 8 | Statistics::Descriptive), it consists of two main classes. 9 | 10 | **SBStatistics** calculates a range of statistical measurements on the 11 | fly as you add data points. The data points are immediately discarded, 12 | giving it a low memory footprint. 13 | 14 | **SBFullStatistics** in turn subclasses SBStatistics and keeps each 15 | data point. It is therefore able to provide more advanced statistical 16 | functions. The trade-off is that it can consume a lot of memory if you 17 | are collecting a lot of data. 18 | 19 | ## Examples 20 | 21 | ``` objective-c 22 | // Create statistics object 23 | SBStatistics *stat = [SBStatistics new]; 24 | 25 | // Add some random data points 26 | for (int i = 0; i < 1000; i++) 27 | [stat addData:[NSNumber numberWithInt:random()/1000.0]]; 28 | 29 | // Format report 30 | id fmt = [NSMutableArray array]; 31 | [fmt addObject:@"Data set consists of %u data points."]; 32 | [fmt addObject:@" * Min: %f"]; 33 | [fmt addObject:@" * Max: %f"]; 34 | [fmt addObject:@" * Mean: %f"]; 35 | [fmt addObject:@" * Variance: %f"]; 36 | [fmt addObject:@" * StdDev: %f"]; 37 | 38 | // Print it 39 | NSLog([fmt componentsJoinedByString:@"\n"], 40 | stat.count, 41 | stat.min, 42 | stat.max, 43 | stat.mean, 44 | [stat variance], 45 | [stat standardDeviation] 46 | ); 47 | ``` 48 | 49 | The SBFullStatistics class can do more interesting stuff: 50 | 51 | ``` objective-c 52 | // Create statistics object 53 | SBFullStatistics *stat = [SBFullStatistics new]; 54 | 55 | // Add some random data. 56 | for (int i = 0; i < 1e4; i++) 57 | [stat addData:[NSNumber numberWithDouble:random()]]; 58 | 59 | // Produce 10 equally-sized buckets covering the entire range 60 | id buckets = [stat bucketsWithCount:10]; 61 | 62 | // Calculate frequency distributions. 63 | id freq = [stat frequencyDistributionWithBuckets:buckets 64 | cumulative:NO]; 65 | id cfreq = [stat frequencyDistributionWithBuckets:buckets 66 | cumulative:YES]; 67 | 68 | // Iterate over the buckets and output the values 69 | for (id bucket in buckets) 70 | NSLog(@"%@ => %@ => %@", 71 | bucket, 72 | [freq objectForKey:bucket], 73 | [cfreq objectForKey:bucket]); 74 | ``` 75 | 76 | ## Download 77 | 78 | * [Trunk branch](http://github.com/stig/Statistics/zipball/trunk) 79 | 80 | ## License 81 | 82 | Copyright (c) 2008-2013, Stig Brautaset. All rights reserved. 83 | 84 | Redistribution and use in source and binary forms, with or without 85 | modification, are permitted provided that the following conditions are 86 | met: 87 | 88 | * Redistributions of source code must retain the above copyright 89 | notice, this list of conditions and the following disclaimer. 90 | * Redistributions in binary form must reproduce the above copyright 91 | notice, this list of conditions and the following disclaimer in the 92 | documentation and/or other materials provided with the distribution. 93 | * Neither the name of the author nor the names of its contributors may 94 | be used to endorse or promote products derived from this software 95 | without specific prior written permission. 96 | 97 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 98 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 99 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 100 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 101 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 102 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 103 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 104 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 105 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 106 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 107 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 108 | 109 | ## Author 110 | 111 | * [Stig Brautaset](stig@brautaset.org) 112 | -------------------------------------------------------------------------------- /Tests/Errors.m: -------------------------------------------------------------------------------- 1 | // 2 | // Errors.m 3 | // Statistics 4 | // 5 | // Created by Stig Brautaset on 17/02/2008. 6 | // Copyright 2008 Stig Brautaset. All rights reserved. 7 | // 8 | 9 | #import "Tests.h" 10 | #import 11 | 12 | 13 | @implementation Errors 14 | 15 | #pragma mark Setup / Teardown 16 | 17 | - (void)setUp { 18 | stat = [SBFullStatistics new]; 19 | } 20 | 21 | - (void)tearDown { 22 | } 23 | 24 | #pragma mark SBStatistics 25 | 26 | - (void)testMin { 27 | XCTAssertTrue(isnan(stat.min)); 28 | [stat addData:@"2"]; 29 | XCTAssertFalse(isnan(stat.min)); 30 | } 31 | 32 | - (void)testMax { 33 | XCTAssertTrue(isnan(stat.max)); 34 | [stat addData:@"2"]; 35 | XCTAssertFalse(isnan(stat.max)); 36 | } 37 | 38 | - (void)testMean { 39 | XCTAssertTrue(isnan(stat.mean)); 40 | [stat addData:@"2"]; 41 | XCTAssertFalse(isnan(stat.mean)); 42 | } 43 | 44 | - (void)testRange { 45 | XCTAssertTrue(isnan([stat range])); 46 | [stat addData:@"2"]; 47 | XCTAssertFalse(isnan(stat.range)); 48 | } 49 | 50 | - (void)testVariance { 51 | XCTAssertTrue(isnan([stat variance])); 52 | XCTAssertTrue(isnan([stat biasedVariance])); 53 | 54 | [stat addData:@"2"]; 55 | XCTAssertTrue(isnan([stat variance])); 56 | XCTAssertTrue(isnan([stat biasedVariance])); 57 | } 58 | 59 | - (void)testStandardDeviation { 60 | XCTAssertTrue(isnan([stat standardDeviation])); 61 | XCTAssertTrue(isnan([stat biasedStandardDeviation])); 62 | 63 | [stat addData:@"2"]; 64 | XCTAssertTrue(isnan([stat standardDeviation])); 65 | XCTAssertTrue(isnan([stat biasedStandardDeviation])); 66 | } 67 | 68 | #pragma mark SBFullStatistics 69 | 70 | - (void)testMode { 71 | XCTAssertTrue(isnan([stat mode])); 72 | [stat addData:@"1"]; 73 | XCTAssertTrue(isnan([stat mode])); 74 | [stat addData:@"-1"]; 75 | XCTAssertTrue(isnan([stat mode])); 76 | [stat addData:@"-1"]; 77 | XCTAssertFalse(isnan([stat mode])); 78 | } 79 | 80 | - (void)testMedian { 81 | XCTAssertTrue(isnan([stat median])); 82 | [stat addData:@"1"]; 83 | XCTAssertFalse(isnan([stat median])); 84 | } 85 | 86 | - (void)testFrequencyDistributionWithBuckets { 87 | id buckets = [@"1 20" componentsSeparatedByString:@" "]; 88 | id expected = [NSDictionary dictionaryWithObjectsAndKeys: 89 | [NSNumber numberWithInt:0], [NSNumber numberWithInt:1], 90 | [NSNumber numberWithInt:0], [NSNumber numberWithInt:20], 91 | nil]; 92 | XCTAssertEqualObjects([stat frequencyDistributionWithBuckets:buckets cumulative:NO], expected); 93 | 94 | XCTAssertThrows([stat frequencyDistributionWithBuckets:nil cumulative:NO]); 95 | XCTAssertThrows([stat frequencyDistributionWithBuckets:[NSArray array] cumulative:NO]); 96 | } 97 | 98 | - (void)testHarmonicMean { 99 | XCTAssertTrue(isnan([stat harmonicMean])); 100 | 101 | [stat addData:@"1"]; 102 | XCTAssertFalse(isnan([stat harmonicMean])); 103 | 104 | [stat addData:@"0"]; 105 | XCTAssertTrue(isnan([stat harmonicMean])); 106 | } 107 | 108 | - (void)testGeometricMean { 109 | XCTAssertTrue(isnan([stat geometricMean])); 110 | 111 | [stat addData:@"1"]; 112 | XCTAssertFalse(isnan([stat geometricMean])); 113 | 114 | [stat addData:@"0"]; 115 | XCTAssertFalse(isnan([stat geometricMean])); 116 | 117 | [stat addData:@"-1"]; 118 | XCTAssertTrue(isnan([stat geometricMean])); 119 | } 120 | 121 | - (void)testBuckets { 122 | XCTAssertNil([stat bucketsWithCount:1]); 123 | } 124 | 125 | @end 126 | -------------------------------------------------------------------------------- /Tests/Full.m: -------------------------------------------------------------------------------- 1 | // 2 | // Full.m 3 | // Statistics 4 | // 5 | // Created by Stig Brautaset on 10/02/2008. 6 | // Copyright 2008 Stig Brautaset. All rights reserved. 7 | // 8 | 9 | #import "Tests.h" 10 | #import 11 | 12 | #define num(n) [NSNumber numberWithDouble:n] 13 | #define keyval(k, v) [NSNumber numberWithDouble:v], [NSNumber numberWithDouble:k] 14 | 15 | 16 | @implementation Full 17 | 18 | #pragma mark Setup / Teardown 19 | 20 | - (void)setUp { 21 | stat = [SBFullStatistics new]; 22 | } 23 | 24 | - (void)tearDown { 25 | } 26 | 27 | 28 | #pragma mark Tests 29 | 30 | - (void)testMode { 31 | [stat addDataFromArray:[@"9 3.3 1 2 2" componentsSeparatedByString:@" "]]; 32 | XCTAssertEqualWithAccuracy([stat mode], 2.0, 1e-6); 33 | 34 | [stat addDataFromArray:[@"4 4 4" componentsSeparatedByString:@" "]]; 35 | XCTAssertEqualWithAccuracy([stat mode], 4.0, 1e-6); 36 | 37 | // Ensure string/number agnosticism 38 | [stat addData:[NSNumber numberWithInt:2]]; 39 | [stat addData:[NSNumber numberWithInt:2]]; 40 | XCTAssertEqualWithAccuracy([stat mode], 2.0, 1e-6); 41 | } 42 | 43 | - (void)testMedian { 44 | [stat addDataFromArray:[@"9 3.3 1 2 2" componentsSeparatedByString:@" "]]; 45 | XCTAssertEqualWithAccuracy([stat median], 2.0, 1e-6); 46 | 47 | [stat addData:@"4"]; 48 | XCTAssertEqualWithAccuracy([stat median], (3.3 + 2)/2, 1e-6); 49 | 50 | [stat addData:@"-4"]; 51 | [stat addData:@"-4"]; 52 | XCTAssertEqualWithAccuracy([stat median], 2.0, 1e-6); 53 | } 54 | 55 | - (void)testPercentile { 56 | [stat addDataFromArray:[@"6 7 8 16 0 1 2 3 4 5" componentsSeparatedByString:@" "]]; 57 | XCTAssertEqualWithAccuracy([stat percentile:0.0], stat.min, 1e-6); 58 | XCTAssertEqualWithAccuracy([stat percentile:1.0], stat.max, 1e-6); 59 | 60 | XCTAssertEqualWithAccuracy([stat percentile:0.05], stat.min, 1e-6); 61 | XCTAssertEqualWithAccuracy([stat percentile:0.25], 2.0, 1e-6); 62 | 63 | XCTAssertEqualWithAccuracy([stat percentile:0.95], 8.0, 1e-6); 64 | XCTAssertEqualWithAccuracy([stat percentile:0.975], 8.0, 1e-6); 65 | } 66 | 67 | - (void)testHarmonicMean { 68 | [stat addDataFromArray:[@"8 9 10" componentsSeparatedByString:@" "]]; 69 | XCTAssertEqualWithAccuracy([stat harmonicMean], 8.926, 1e-3); 70 | } 71 | 72 | - (void)testGeometricMean { 73 | [stat addDataFromArray:[@"1 0.5 0.25" componentsSeparatedByString:@" "]]; 74 | XCTAssertEqualWithAccuracy([stat geometricMean], 0.5, 1e-6); 75 | } 76 | 77 | - (void)testFrequencyDistributionBuckets { 78 | [stat addDataFromArray:[@"9 3.3 1 5 2" componentsSeparatedByString:@" "]]; 79 | id expect = [NSArray arrayWithObjects:num(9), num(5), nil]; 80 | XCTAssertEqualObjects([stat bucketsWithCount:2], expect); 81 | 82 | id expect2 = [NSArray arrayWithObjects:num(9), num(7), num(5), num(3), nil]; 83 | XCTAssertEqualObjects([stat bucketsWithCount:4], expect2); 84 | 85 | // Now add a negative number. 86 | [stat addDataFromArray:[@"-9" componentsSeparatedByString:@" "]]; 87 | id expect3 = [NSArray arrayWithObjects:num(9), num(4.5), num(0), num(-4.5), nil]; 88 | XCTAssertEqualObjects([stat bucketsWithCount:4], expect3); 89 | } 90 | 91 | - (void)testFrequencyDistribution { 92 | [stat addDataFromArray:[@"9 3.3 1 5 2" componentsSeparatedByString:@" "]]; 93 | id expect = [NSDictionary dictionaryWithObjectsAndKeys: 94 | keyval(9, 1), 95 | keyval(5, 4), 96 | nil]; 97 | XCTAssertEqualObjects([stat frequencyDistributionWithBuckets:[stat bucketsWithCount:2] cumulative:NO], expect); 98 | 99 | id expect2 = [NSDictionary dictionaryWithObjectsAndKeys: 100 | keyval(9, 1), 101 | keyval(7, 0), 102 | keyval(5, 2), 103 | keyval(3, 2), 104 | nil]; 105 | XCTAssertEqualObjects([stat frequencyDistributionWithBuckets:[stat bucketsWithCount:4] cumulative:NO], expect2); 106 | 107 | // Now add a negative number. 108 | [stat addDataFromArray:[@"-9" componentsSeparatedByString:@" "]]; 109 | id expect3 = [NSDictionary dictionaryWithObjectsAndKeys: 110 | keyval(9, 2), 111 | keyval(4.5, 3), 112 | keyval(0, 0), 113 | keyval(-4.5, 1), 114 | nil]; 115 | XCTAssertEqualObjects([stat frequencyDistributionWithBuckets:[stat bucketsWithCount:4] cumulative:NO], expect3); 116 | } 117 | 118 | - (void)testFrequencyDistributionCumulative { 119 | [stat addDataFromArray:[@"9 3.3 1 5 2" componentsSeparatedByString:@" "]]; 120 | id expect = [NSDictionary dictionaryWithObjectsAndKeys: 121 | keyval(9, 5), 122 | keyval(5, 4), 123 | nil]; 124 | XCTAssertEqualObjects([stat frequencyDistributionWithBuckets:[stat bucketsWithCount:2] cumulative:YES], expect); 125 | 126 | id expect2 = [NSDictionary dictionaryWithObjectsAndKeys: 127 | keyval(9, 5), 128 | keyval(7, 4), 129 | keyval(5, 4), 130 | keyval(3, 2), 131 | nil]; 132 | XCTAssertEqualObjects([stat frequencyDistributionWithBuckets:[stat bucketsWithCount:4] cumulative:YES], expect2); 133 | 134 | // Now add a negative number. 135 | [stat addDataFromArray:[@"-9" componentsSeparatedByString:@" "]]; 136 | id expect3 = [NSDictionary dictionaryWithObjectsAndKeys: 137 | keyval(9, 6), 138 | keyval(4.5, 4), 139 | keyval(0, 1), 140 | keyval(-4.5, 1), 141 | nil]; 142 | XCTAssertEqualObjects([stat frequencyDistributionWithBuckets:[stat bucketsWithCount:4] cumulative:YES], expect3); 143 | } 144 | 145 | - (void)testFrequencyDistributionPerformance { 146 | int n = 1e5; 147 | int i; 148 | for (i = 0; i < n; i++) 149 | [stat addData:[NSNumber numberWithLong:random()]]; 150 | 151 | id start = [NSDate date]; 152 | [stat frequencyDistributionWithBuckets:[stat bucketsWithCount:n/100] cumulative:NO]; 153 | XCTAssertTrue(-[start timeIntervalSinceNow] < 3.0, @"Should be quick"); 154 | } 155 | 156 | - (void)testSortedDataDiscarding { 157 | [stat addDataFromArray:[@"6 7 8 9 0 1 2 3 4 5" componentsSeparatedByString:@" "]]; 158 | 159 | XCTAssertEqual([[stat sortedDataDiscardingLowOutliers:0.05 high:0.05] count], (NSUInteger)10); 160 | XCTAssertEqual([[stat sortedDataDiscardingLowOutliers:0.1 high:0.1] count], (NSUInteger)8); 161 | XCTAssertEqual([[stat sortedDataDiscardingLowOutliers:0.2 high:0.2] count], (NSUInteger)6); 162 | 163 | NSArray *sub = [stat sortedDataDiscardingLowOutliers:0.3 high:0.4]; 164 | XCTAssertEqual([[sub objectAtIndex:0] intValue], (int)3); 165 | XCTAssertEqual([[sub lastObject] intValue], (int)5); 166 | } 167 | 168 | #pragma mark Derived Statistics 169 | 170 | - (void)testTrimmedMean { 171 | [stat addDataFromArray:[@"0 15 15 15 35" componentsSeparatedByString:@" "]]; 172 | SBFullStatistics *s; 173 | 174 | s = [stat statisticsDiscardingLowOutliers:0.0 high:0.4]; 175 | XCTAssertEqualWithAccuracy([s mean], 10.0, 1e-6); 176 | 177 | s = [stat statisticsDiscardingLowOutliers:0.2 high:0.0]; 178 | XCTAssertEqualWithAccuracy([s mean], 20.0, 1e-6); 179 | 180 | s = [stat statisticsDiscardingLowOutliers:0.2 high:0.2]; 181 | XCTAssertEqualWithAccuracy([s mean], 15.0, 1e-6); 182 | } 183 | 184 | @end 185 | -------------------------------------------------------------------------------- /Tests/Stream.m: -------------------------------------------------------------------------------- 1 | // 2 | // Statistics.m 3 | // Statistics 4 | // 5 | // Created by Stig Brautaset on 09/02/2008. 6 | // Copyright 2008 Stig Brautaset. All rights reserved. 7 | // 8 | 9 | #import "Tests.h" 10 | #import 11 | 12 | 13 | @implementation Stream 14 | 15 | #pragma mark Setup / Teardown 16 | 17 | - (void)setUp { 18 | stat = [SBStatistics new]; 19 | } 20 | 21 | - (void)tearDown { 22 | } 23 | 24 | #pragma mark Tests 25 | 26 | - (void)testCount { 27 | XCTAssertEqual(stat.count, (NSUInteger) 0); 28 | 29 | [stat addData:@"1"]; 30 | [stat addData:@"2"]; 31 | XCTAssertEqual(stat.count, (NSUInteger)2); 32 | 33 | [stat addData:@"2"]; 34 | XCTAssertEqual(stat.count, (NSUInteger)3); 35 | } 36 | 37 | - (void)testMin { 38 | [stat addData:@"1"]; 39 | [stat addData:@"2"]; 40 | XCTAssertEqualWithAccuracy(stat.min, 1.0, 1e-6); 41 | 42 | [stat addData:@"-2"]; 43 | XCTAssertEqualWithAccuracy(stat.min, -2.0, 1e-6); 44 | } 45 | 46 | - (void)testMindex { 47 | [stat addData:@"1"]; 48 | [stat addData:@"2"]; 49 | XCTAssertEqual(stat.mindex, (NSUInteger)0); 50 | 51 | [stat addData:@"-2"]; 52 | XCTAssertEqual(stat.mindex, (NSUInteger)2); 53 | } 54 | 55 | - (void)testMax { 56 | [stat addData:@"1"]; 57 | [stat addData:@"2"]; 58 | XCTAssertEqualWithAccuracy(stat.max, 2.0, 1e-6); 59 | 60 | [stat addData:@"-3"]; 61 | XCTAssertEqualWithAccuracy(stat.max, 2.0, 1e-6); 62 | } 63 | 64 | - (void)testMaxdex { 65 | [stat addData:@"1"]; 66 | [stat addData:@"2"]; 67 | XCTAssertEqual(stat.maxdex, (NSUInteger)1); 68 | 69 | [stat addData:@"-3"]; 70 | XCTAssertEqual(stat.maxdex, (NSUInteger)1); 71 | } 72 | 73 | - (void)testMean { 74 | [stat addData:@"1"]; 75 | [stat addData:@"2"]; 76 | XCTAssertEqualWithAccuracy(stat.mean, 3/2.0, 1e-6); 77 | 78 | [stat addData:@"-2"]; 79 | XCTAssertEqualWithAccuracy(stat.mean, 1/3.0, 1e-6); 80 | } 81 | 82 | - (void)testRange { 83 | [stat addData:@"19"]; 84 | XCTAssertEqualWithAccuracy([stat range], 0.0, 1e-6); 85 | XCTAssertEqualWithAccuracy(stat.range, 0.0, 1e-6); 86 | 87 | [stat addData:@"2"]; 88 | XCTAssertEqualWithAccuracy([stat range], 17.0, 1e-6); 89 | XCTAssertEqualWithAccuracy(stat.range, 17.0, 1e-6); 90 | 91 | [stat addData:@"-2"]; 92 | XCTAssertEqualWithAccuracy([stat range], 21.0, 1e-6); 93 | XCTAssertEqualWithAccuracy(stat.range, 21.0, 1e-6); 94 | } 95 | 96 | - (void)testVariance { 97 | [stat addDataFromArray:[@"1 2 3 4" componentsSeparatedByString:@" "]]; 98 | XCTAssertEqualWithAccuracy([stat variance], 5/3.0, 1e-6); 99 | } 100 | 101 | - (void)testStandardDeviation { 102 | [stat addDataFromArray:[@"1 2 3 4" componentsSeparatedByString:@" "]]; 103 | XCTAssertEqualWithAccuracy([stat standardDeviation], sqrt(5/3.0), 1e-6); 104 | 105 | } 106 | 107 | - (void)testBiasedVariance { 108 | [stat addDataFromArray:[@"1 2 3 4" componentsSeparatedByString:@" "]]; 109 | XCTAssertEqualWithAccuracy([stat biasedVariance], 5/4.0, 1e-6); 110 | } 111 | 112 | - (void)testBiasedStandardDeviation { 113 | [stat addDataFromArray:[@"1 2 3 4" componentsSeparatedByString:@" "]]; 114 | XCTAssertEqualWithAccuracy([stat biasedStandardDeviation], sqrt(5/4.0), 1e-6); 115 | } 116 | 117 | @end 118 | -------------------------------------------------------------------------------- /Tests/Tests.h: -------------------------------------------------------------------------------- 1 | // 2 | // Statistics.h 3 | // Statistics 4 | // 5 | // Created by Stig Brautaset on 09/02/2008. 6 | // Copyright 2008 Stig Brautaset. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | @class SBStatistics; 12 | @class SBFullStatistics; 13 | 14 | @interface Stream : XCTestCase { 15 | SBStatistics *stat; 16 | } 17 | @end 18 | 19 | @interface Full : XCTestCase { 20 | SBFullStatistics *stat; 21 | } 22 | @end 23 | 24 | @interface Errors : XCTestCase { 25 | SBFullStatistics *stat; 26 | } 27 | @end 28 | 29 | --------------------------------------------------------------------------------