├── .gitignore ├── README.md ├── python ├── LICENSE ├── pykalibera │ ├── __init__.py │ ├── data.py │ └── graphs.py └── tests │ ├── support.py │ ├── test_data.py │ ├── test_graphs.py │ └── test_misc.py ├── ruby ├── .ruby-version ├── Gemfile ├── LICENSE.txt ├── Rakefile ├── kalibera.gemspec ├── lib │ ├── kalibera.rb │ └── kalibera │ │ └── data.rb └── test │ └── test_data.rb └── shared_metadata.json /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.pyc 3 | *.gem 4 | doc 5 | ruby/Gemfile.lock 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # libkalibera 2 | 3 | Implementations of The Kalibera Method. 4 | 5 | For more information, see: http://soft-dev.org/src/libkalibera/ 6 | -------------------------------------------------------------------------------- /python/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) King's College London, created by Edd Barrett and Carl 2 | Friedrich Bolz 3 | 4 | Permission is hereby granted, free of charge, to any person obtaining a copy 5 | of this software and associated documentation files (the "Software"), to deal 6 | in the Software without restriction, including without limitation the rights 7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the Software is 9 | furnished to do so, subject to the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be included in 12 | all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /python/pykalibera/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/softdevteam/libkalibera/46d972aa2b55799156052154b205c25e29369050/python/pykalibera/__init__.py -------------------------------------------------------------------------------- /python/pykalibera/data.py: -------------------------------------------------------------------------------- 1 | import math, itertools, random 2 | 3 | import bz2 4 | 5 | from functools import wraps 6 | 7 | constants = bz2.decompress("""\ 8 | QlpoOTFBWSZTWbTS4VUAC9bYAEAQAAF/4GAOGZ3e40HH2YJERUKomGbCNMAAtMBaAkCOP9U0/R+q 9 | qNCqfjAqVGOY3+qk96qmmIp+CCVNDD/1VGjfqkBJpIElG6uN92vE/PP+5IxhMIIgAbOxEMKLMVSq 10 | VWtZmZaEklAAAttoAAAAAAAAAAAAEklAAEklABttkksklkkknVu2dX1vW9yWrkuXJJJJJJJJJJKK 11 | JWsS5dq7k3RRRbu2222227oAAFQqFCAjkB0w7eMpKWy3bVI42225QlbQAAAAAAlbQbbUqkolE7JZ 12 | jjjmS5LluZkuZmZmZmZmZmZvZhOYnktttsskiaSSToAAA5znOZmMzGTSSJJJ1JO+7gLbR067u48V 13 | bZIAABJCSSjElG436ySek9f1/X3vZ72+7wPk5bbJG0kYTYA2+fHiolu7u8S62JEpjmZ3YS40AEt3 14 | mb8lzXwEpar+9P3s9vAq1o23mt3oaJmZvJAPQu6AlL3s9ojg6rRBmOQaKRb+zbOaL0FMxZKBTm9O 15 | vLmUJuqwVc+KevulFMM/JOzWTMN5Aa7cO5hmZuioHbboGzxzZLFATHYvXg5SUqCWxmre6As43wzV 16 | 30514PDn2m7ema93M9u9199F6QCSfsxJ7wA5R3bTsglUQaJLy4wKYu895byRoTJb7vXsGwZzhPZ0 17 | xOdgtMncj5PGCPeKFPCgenS83zcvnQwGfm3prLnb6bcxKJABZeOrvfNAUNTTobmLQ+fOHAjxo2WE 18 | JaevegHIDVvW+kRAD2TpoeJWFQDKtubzWOr6EFU3xs3rojhW98aghZQmIWXe9sUXKEXKvWvk6bTH 19 | GURStAQ1M7OzF07ui6Q2DYl1NojMzlvrwcO6+uY7V3ZFerzz3sIqJsGzcJN2EAAew/vvqqvvvvi7 20 | xXjhGH3nGNKv2u+Bt8k4USU+SaoLuU6HNmQoYyFTN3huLP721dwHIqQzrqVhjz2+UQw0ezok7gQl 21 | wyZ2YM0hgPVaZaOLK9q3TtGiaO3Br4xGyy7HfAWw72nvLmaGPeSz2c/FkuN7Qj1guqtgUU1NHry2 22 | 5h7KvWgs2jglhCZpYpa8qbl3PrrEDL1Jg/1VrZ8IthQhNKLznYMPozi9arWla2BODhV6yuIKmzsa 23 | zhOb3kxyjcD0ExuXvdys3WRxxYEQszLy8jxqTPZB7UQJ2xbk3YGV2QcdPN2HYuoVkWxUhtErw9u3 24 | 0mdw5HiO0WVtRUCEyxEAOdIHV1sWmbReT4iMTzRsB7Q36e72rpwePnrPggpSxjlZ9Lm8YJrgXDzJ 25 | /30MSDPwzV8s+g4Rcpy3a8c7Y1jxgHJQs8+MyLsudmYSFySWm3OrSn5p3qb++m8fvHUGfCfNCbol 26 | RSZ6wp+ZM14k8S+SKwqES7PQ72DFK4PTiMCA6LbvuSSSJ1R3iJAF10sQYlhpp2GSzWBw3ty+HjLj 27 | HCDTxku3yHPrNvTXekcBSOuzMfOvy3dybchXeLxvXN3vKTN/BdbwUlqXY+g4sWMoHTQT61MeXIMf 28 | PhgYq8KhOEbqeMqoyhWQp03eOOpV/LVvXl2X71ztaX7tMZJ5gBCshDGQCskDme9zu9b1dcgB1khU 29 | mmEk2yTySG2QPmEJp3m/jM+93nYSoe7YEPmExITTpITut87rehm+UgF13IG0nUk52+95Z+9wg49Y 30 | SUraiKIYo3UOvdtq6bVDDmbPTmhtyLfS1LCPXQmYLD7c9lu5ZfdaWSGn1m82kCd4xhYOuVUH33zB 31 | Kh5IsOsxNe+yB7XNd77Xc05kD5h1Jpk0hnLJpnrzXe9xdXpOJfrA4kzdhvLB1tzn3e6OqyaeM8m9 32 | 2HWH2m59jnvrO2w+9TTFDibQffe7880+cfu08zjLw/Mbx4faLWcMbzQ8vDWj6uDmr75CuG9hzAOl 33 | 1Wk0mWKqglrLcmu/uw/IVcPCtGw3hY3TgkN0PqENShQhpj5ZN7dzethJScvIGNEPPE7lcJTwYM8t 34 | 7zB5zMNkYZmHc1cbY1RirWMmuHzEFi7P04mPluFvMqnoirRUEEB3taRpio2svFVXtMcub+PuTmqL 35 | vlSOqbSO996bd/e0AoLJ1hV97AmbtfxIsAkWBILJAUgAoEiySCwgsICwDqAZlkiyEWBFhBSCwqSc 36 | 9zeoAskUBYRYRYb3rmeHWXZOMgsFgLAWBq2RQWRcSVIpFikWCwWF3mAoKKCgpr9TE21BYqy8zDbW 37 | LFFiixRRXLpcoooovmqiirm/rmlRVWl57xynNqqo8tVVVy1VVyrRVVVFb39rSovrvKitpVR/Woo5 38 | So32dxukUUUz2YY1FFLbF1u91TbbZUWNsqVrM3336515OpjWP1DMaFZ5ufsDOXTHLBSsrN85f1/G 39 | Z97s999hpF0nwOBV8gYfoGPnQqiKzPLcnpOky/b652qCQ9ti4PbvcjqmneMEtaV17cnt6NKZYybS 40 | TwHdBK34b2wy3CJ1qqi8qpigCKsVSvFUFMUMtVTFPjBoq+K5AGXzuffdyXtm0+ebv5HdMVnN0mMe 41 | ++473+/HTWnzd0OuWnHE20ZtC7oaZvN/jvn9efa9UHKC++prtL9ZWDu7c73vvaOTiKbTmUPJ7Pv2 42 | jEFDnO6Xe/deOG0+v7Cn6z8zO2VH9TMse/fvt67+w77n7QaQffsxOJfqGteOa/HdYe1Tm6LFOpUz 43 | VMR/aPvadm0zXsnMppiffYG27ZXfslV2hAJrPGmKsVfe9fSO8vVnru7tbzSU1a9cGv0qsQEdhHK7 44 | rJBfbPMSKZc3wmij3ULrhE9nIwoDMp4WAK2GkIKIqrHAK0Bjvo7sA2VZ941ggrwIsfGLZTHvGSZR 45 | 8UGKDKFAAcC8U45fTlKQKM8fnx+IAr3rmwtVbfFhj4VZqQviRXhavLu9zOQWISS0w9PxFYCEfK1l 46 | 9GK0GhrKxr5CwCveB4XDEsPYWKwfHDgrBnZT4XW5dlE2tW7FAR8RGW0XMy1MQoDwyQ+Hnmvet5I/ 47 | HrTVYQJbJ1e3y6B7LoCh5qyXWO03X5WbxWT0UvY55cyRbhmB8ib6lkhRo5USRAoLFA4WELV93ZV/ 48 | DKh2MIhnIWCPBLEh3FUTBSxJC7h4Z15qTFPTRmpe1Ldj1rlkVnAKHDySryior3OheiTPKZY2GaQ6 49 | N2YyvJh9wuO75VOarCWLEUdLavAs2RShYOntLrMVabUAyDnTJIQ4deJa92pAWd6KBz+F3JFOFCQt 50 | NLhVQA==""".decode("base64")) 51 | 52 | constants = [float(x) for x in constants.split()] 53 | 54 | def student_t_quantile95(ndeg): 55 | """Look up the 95% quantile from constant table.""" 56 | index = ndeg - 1 57 | if index >= len(constants): 58 | index = -1 # the quantile converges, we just take the last value 59 | return constants[index] 60 | 61 | 62 | class ConfRange(tuple): 63 | def __new__(cls, *args): 64 | assert len(args) == 3 65 | return tuple.__new__(cls, args) 66 | 67 | @property 68 | def lower(self): 69 | return self[0] 70 | 71 | @property 72 | def upper(self): 73 | return self[2] 74 | 75 | @property 76 | def median(self): 77 | return self[1] 78 | 79 | @property 80 | def error(self): 81 | return _mean([self.upper - self.median, self.median - self.lower]) 82 | 83 | 84 | def confidence_slice(means, confidence="0.95"): 85 | """Returns a tuples (lower, median, upper), where: 86 | lower: lower bound of 95% confidence interval 87 | median: the median value of the data 88 | upper: upper bound of 95% confidence interval 89 | 90 | Arguments: 91 | means -- the list of means (need not be sorted). 92 | """ 93 | 94 | means = sorted(means) 95 | # There may be >1 median indicies, i.e. data is even-sized. 96 | lower, middle_indicies, upper = _confidence_slice_indicies(len(means), confidence) 97 | median = _mean([means[i] for i in middle_indicies]) 98 | return ConfRange(means[lower], median, means[upper - 1]) # upper is *exclusive* 99 | 100 | def memoize(func): 101 | """ The @memoize decorator """ 102 | attr = "%s_%s" % (func.func_name, id(func)) 103 | @wraps(func) 104 | def memoized(self, *args, **kwargs): 105 | d = self._memoization_values 106 | key = attr, args 107 | try: 108 | return d[key] 109 | except KeyError: 110 | res = d[key] = func(self, *args, **kwargs) 111 | return res 112 | return memoized 113 | 114 | # Used for index calculation to not get weird float effects. 115 | # We actually saw some of these effects in our exerimentation. 116 | from decimal import Decimal, ROUND_UP, ROUND_DOWN 117 | 118 | def _confidence_slice_indicies(length, confidence_level=Decimal('0.95')): 119 | """Returns a triple (lower, mean_indicies, upper) so that l[lower:upper] 120 | gives confidence_level of all samples. Mean_indicies is a tuple of one or 121 | two indicies that correspond to the mean position 122 | 123 | Keyword arguments: 124 | confidence_level -- desired level of confidence as a Decimal instance. 125 | """ 126 | 127 | assert not isinstance(confidence_level, float) 128 | confidence_level = Decimal(confidence_level) 129 | assert isinstance(confidence_level, Decimal) 130 | exclude = (1 - confidence_level) / 2 131 | 132 | if length % 2 == 0: 133 | mean_indicies = (length // 2 - 1, length // 2) 134 | else: 135 | mean_indicies = (length // 2, ) 136 | 137 | lower_index = int( 138 | (exclude * length).quantize(Decimal('1.'), rounding=ROUND_DOWN) 139 | ) 140 | 141 | upper_index = int( 142 | ((1 - exclude) * length).quantize(Decimal('1.'), rounding=ROUND_UP) 143 | ) 144 | 145 | return lower_index, mean_indicies, upper_index 146 | 147 | def _mean(l): 148 | return math.fsum(l) / float(len(l)) 149 | 150 | def _geomean(l): 151 | res = 1.0 152 | for element in l: 153 | res *= element 154 | return res ** (1.0 / len(l)) 155 | 156 | # --- 157 | 158 | class Data(object): 159 | def __init__(self, data, reps): 160 | """Instances of this class store measurements (corresponding to 161 | the Y_... in the papers). 162 | 163 | Arguments: 164 | data -- Dict mapping tuples of all but the last index to lists of values. 165 | reps -- List of reps for each level, high to low. 166 | """ 167 | 168 | self.data = data 169 | self.reps = reps 170 | 171 | self._memoization_values = {} 172 | # check that all data is there 173 | for index in itertools.product(*[range(i) for i in reps]): 174 | self[index] # does not crash 175 | 176 | def __getitem__(self, indicies): 177 | assert len(indicies) == len(self.reps) 178 | return self.data[indicies[:-1]][indicies[-1]] 179 | 180 | def index_iterator(self, start=0, stop=None): 181 | """Computes a list of all possible data indcies gievn that 182 | start <= index <= stop are fixed.""" 183 | 184 | if stop is None: 185 | stop = self.n 186 | 187 | maximum_indicies = self.reps[start:stop] 188 | remaining_indicies = [range(maximum) for maximum in maximum_indicies] 189 | return itertools.product(*remaining_indicies) 190 | 191 | @property 192 | def n(self): 193 | """The number of levels in the experiment.""" 194 | return len(self.reps) 195 | 196 | def r(self, i): 197 | """The number of repetitions for level i. 198 | 199 | Arguments: 200 | i -- mathematical index. 201 | """ 202 | assert 1 <= i <= self.n 203 | index = self.n - i 204 | return self.reps[index] 205 | 206 | @memoize 207 | def mean(self, indicies=()): 208 | """Compute the mean across a number of values. 209 | 210 | Keyword arguments: 211 | indicies -- tuple of fixed indicies over which to compute the mean, 212 | given from left to right. The remaining indicies are variable.""" 213 | 214 | remaining_indicies_cross_product = \ 215 | self.index_iterator(start=len(indicies)) 216 | alldata = [self[indicies + remaining] \ 217 | for remaining in remaining_indicies_cross_product] 218 | return _mean(alldata) 219 | 220 | @memoize 221 | def Si2(self, i): 222 | """Biased estimator S_i^2. 223 | 224 | Arguments: 225 | i -- the mathematical index of the level from which to compute S_i^2 226 | """ 227 | assert 1 <= i <= self.n 228 | # self.reps is indexed from the left to right 229 | index = self.n - i 230 | factor = 1.0 231 | 232 | # We compute this iteratively leveraging the fact that 233 | # 1 / (a * b) = (1 / a) / b 234 | for rep in self.reps[:index]: 235 | factor /= rep 236 | # Then at this point we have: 237 | # factor * (1 / (r_i - 1)) = factor / (r_i - 1) 238 | factor /= self.reps[index] - 1 239 | 240 | # Second line of the above definition, the lines are multiplied. 241 | indicies = self.index_iterator(stop=index+1) 242 | sum = 0.0 243 | for index in indicies: 244 | a = self.mean(index) 245 | b = self.mean(index[:-1]) 246 | sum += (a - b) ** 2 247 | return factor * sum 248 | 249 | @memoize 250 | def Ti2(self, i): 251 | """Compute the unbiased T_i^2 variance estimator. 252 | 253 | Arguments: 254 | i -- the mathematical index from which to compute T_i^2. 255 | """ 256 | 257 | assert 1 <= i <= self.n 258 | if i == 1: 259 | return self.Si2(1) 260 | # Note: in the "Rigorous benchmarking in reasonable time" paper, the 261 | # expression belown was incorrectly shown as being equivalent to: 262 | # return self.Si2(i) - self.Ti2(i - 1) / self.r(i - 1) 263 | # This has since been corrected in a revised version of the paper, and 264 | # we use the revised version below. 265 | return self.Si2(i) - self.Si2(i - 1) / self.r(i - 1) 266 | 267 | @memoize 268 | def optimalreps(self, i, costs, round=True): 269 | """Computes the optimal number of repetitions for a given level. 270 | 271 | Arguments: 272 | i -- the mathematical level of which to compute optimal reps. 273 | costs -- A list of costs for each level, *high* to *low*. 274 | round -- When True, the result is rounded (up) to an integral number 275 | of repetitions. 276 | """ 277 | 278 | costs = [ float(x) for x in costs ] 279 | assert 1 <= i < self.n 280 | index = self.n - i 281 | res_f = (costs[index - 1] / costs[index] * \ 282 | self.Ti2(i) / self.Ti2(i + 1)) ** 0.5 283 | return int(math.ceil(res_f)) if round else res_f 284 | 285 | def confidence95(self): 286 | """Compute the 95% confidence interval.""" 287 | 288 | degfreedom = self.reps[0] - 1 289 | return student_t_quantile95(degfreedom) * \ 290 | (self.Si2(self.n) / self.reps[0]) ** 0.5 291 | 292 | def bootstrap_means(self, iterations=1000): 293 | """Compute a list of simulated means from bootstrap resampling. 294 | 295 | Note that, resampling occurs with replacement. 296 | 297 | Keyword arguments: 298 | iterations -- Number of resamples (and thus means) generated. 299 | """ 300 | means = [] 301 | for i in range(iterations): 302 | values = self._bootstrap_sample() 303 | means.append(_mean(values)) 304 | means.sort() 305 | return means 306 | 307 | def bootstrap_confidence_interval(self, iterations=10000, confidence="0.95"): 308 | """Compute a confidence interval via bootstrap method. 309 | 310 | Keyword arguments: 311 | iterations -- Number of resamplings to base result upon. Default is 10000. 312 | confidence -- The required confidence. Default is "0.95" (95%). 313 | """ 314 | 315 | means = self.bootstrap_means(iterations) 316 | return confidence_slice(means, confidence) 317 | 318 | def _bootstrap_sample(self): 319 | # Uses a closure to mimic the abritrary nested loop depth construct 320 | # shown in the paper "Quantifying performance changes with effect 321 | # size confidence intervals". 322 | def _random_measurement_sample(index=()): 323 | if len(index) == self.n: 324 | yield self[index] 325 | else: 326 | indicies = [random.randrange(self.reps[len(index)]) \ 327 | for i in range(self.reps[len(index)])] 328 | for single_index in indicies: 329 | newindex = index + (single_index, ) 330 | for value in _random_measurement_sample(newindex): 331 | yield value 332 | return list(_random_measurement_sample()) 333 | 334 | def bootstrap_quotient(self, other, iterations=10000, confidence='0.95'): 335 | ratios = [] 336 | for _ in range(iterations): 337 | ra = self._bootstrap_sample() 338 | rb = other._bootstrap_sample() 339 | mean_ra = _mean(ra) 340 | mean_rb = _mean(rb) 341 | 342 | if mean_rb == 0: # protect against divide by zero 343 | ratios.append(float("inf")) 344 | else: 345 | ratios.append(mean_ra / mean_rb) 346 | ratios.sort() 347 | return confidence_slice(ratios, confidence) 348 | 349 | def bootstrap_geomean(l_data_a, l_data_b, iterations=10000, confidence='0.95'): 350 | if len(l_data_a) != len(l_data_b): 351 | raise ValueError("lists need to match") 352 | geomeans = [] 353 | for _ in range(iterations): 354 | ratios = [] 355 | for a, b in zip(l_data_a, l_data_b): 356 | ra = a._bootstrap_sample() 357 | rb = b._bootstrap_sample() 358 | mean_ra = _mean(ra) 359 | mean_rb = _mean(rb) 360 | ratios.append(mean_ra / mean_rb) 361 | geomeans.append(_geomean(ratios)) 362 | geomeans.sort() 363 | return confidence_slice(geomeans, confidence) 364 | -------------------------------------------------------------------------------- /python/pykalibera/graphs.py: -------------------------------------------------------------------------------- 1 | class GraphError(Exception): pass 2 | 3 | import matplotlib.pyplot as plt 4 | import matplotlib.mlab as mlab 5 | 6 | def run_sequence_plot(data, title="Run sequence plot", filename=None, 7 | xlabel="Run #", ylabel="Time(s)"): 8 | """Plots a run sequence graph. 9 | 10 | Arguments: 11 | data -- list of data points 12 | 13 | Keyword arguments: 14 | title -- graph title 15 | filename -- filename to write graph to (None plots to screen) 16 | xlabel -- label on x-axis" 17 | ylabel -- label on y-axis" 18 | """ 19 | xs = range(len(data)) 20 | 21 | plt.cla() 22 | p = plt.plot(xs, data) 23 | plt.title(title) 24 | plt.xlabel(xlabel) 25 | plt.ylabel(ylabel) 26 | 27 | if filename is not None: 28 | plt.savefig(filename) 29 | else: 30 | plt.show() 31 | 32 | def lag_plot(data, lag=5, filename=None, 33 | title=None, xlabel="Lag time(s)", ylabel="Time(s)"): 34 | """Generates a lag plot. 35 | 36 | Arguments: 37 | data -- list of data points 38 | 39 | Keyword arguments: 40 | lag -- which lag to plot 41 | filename -- filename to write graph to (None plots to screen) 42 | title -- graph title (if None, then "Lag %d plot" % lag is used) 43 | xlabel -- label on x-axis 44 | ylabel -- label on y-axis 45 | """ 46 | 47 | if title is None: 48 | title = "Lag %d plot" % lag 49 | 50 | # Python's index operator allows correct wrapping if lag index 51 | # is less than zero. 52 | xs = [ data[x-lag] for x in range(len(data)) ] 53 | 54 | plt.cla() 55 | p = plt.plot(xs, data, 'rx') 56 | plt.title(title) 57 | plt.ylabel(ylabel) 58 | plt.xlabel(xlabel) 59 | 60 | if filename is not None: 61 | plt.savefig(filename) 62 | else: 63 | plt.show() 64 | 65 | def acr_plot(data, filename=None, title="ACR Plot", 66 | xlabel="Lag #", ylabel="Correlation"): 67 | """Generates an ACF plot, demeaned and normalised. 68 | 69 | Arguments: 70 | data -- list of data points 71 | 72 | Keyword arguments: 73 | filename -- filename to write graph to (None plots to screen) 74 | title -- graph title 75 | xlabel -- label on x-axis 76 | ylabel -- label on y-axis 77 | """ 78 | 79 | plt.cla() 80 | plt.acorr(data, detrend=mlab.detrend_mean, usevlines=True, 81 | maxlags=None, normed=True, lw=2) 82 | 83 | plt.title(title) 84 | plt.xlabel(xlabel) 85 | plt.ylabel(ylabel) 86 | 87 | if filename is not None: 88 | plt.savefig(filename) 89 | else: 90 | plt.show() 91 | -------------------------------------------------------------------------------- /python/tests/support.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import sys 3 | 4 | 5 | # Allow to run out of source dir 6 | def setup_paths(): 7 | HERE = os.path.abspath(os.path.dirname(__file__)) 8 | PARENT = os.path.join(HERE, "..") 9 | sys.path.append(PARENT) 10 | -------------------------------------------------------------------------------- /python/tests/test_data.py: -------------------------------------------------------------------------------- 1 | import sys, os.path, random, math 2 | import pytest 3 | 4 | import support 5 | 6 | support.setup_paths() 7 | 8 | from pykalibera.data import Data, _confidence_slice_indicies, _mean 9 | from pykalibera.data import confidence_slice, _geomean, bootstrap_geomean 10 | 11 | # ---------------------------------- 12 | # HELPER FIXTURES 13 | # ---------------------------------- 14 | 15 | @pytest.fixture 16 | def rdata(): 17 | """ Returns some random data """ 18 | return random.sample(xrange(1000), 1000) 19 | 20 | # ---------------------------------- 21 | # TESTS BEGIN 22 | # ---------------------------------- 23 | 24 | def test_indicies(): 25 | d = Data({ 26 | (0, 0) : [1, 2, 3, 4, 5], 27 | (0, 1) : [3, 4, 5, 6, 7] 28 | }, [1, 2, 5]) 29 | 30 | assert d[0, 0, 0] == 1 31 | assert d[0, 0, 4] == 5 32 | assert d[0, 1, 2] == 5 33 | 34 | def test_rep_levels(): 35 | d = Data({ 36 | (0, 0) : [1, 2, 3, 4, 5], 37 | (0, 1) : [3, 4, 5, 6, 7] 38 | }, [1, 2, 5]) 39 | 40 | assert d.r(1) == 5 # lowest level, i.e. arity of the lists in the map 41 | assert d.r(2) == 2 42 | assert d.r(3) == 1 43 | 44 | # indexs are one based, so 0 or less is invalid 45 | with pytest.raises(AssertionError): 46 | d.r(0) 47 | with pytest.raises(AssertionError): 48 | d.r(-1337) 49 | 50 | # Since we have 3 levels here, levels 4 and above are bogus 51 | with pytest.raises(AssertionError): 52 | d.r(4) 53 | with pytest.raises(AssertionError): 54 | d.r(666) 55 | 56 | def test_index_iter(): 57 | d = Data({ 58 | (0, 0) : [1, 2, 3, 4, 5], 59 | (0, 1) : [3, 4, 5, 6, 7] 60 | }, [1, 2, 5]) 61 | 62 | assert list(d.index_iterator()) == [ 63 | (0, 0, 0), (0, 0, 1), (0, 0, 2), (0, 0, 3), (0, 0, 4), 64 | (0, 1, 0), (0, 1, 1), (0, 1, 2), (0, 1, 3), (0, 1, 4), 65 | ] 66 | assert list(d.index_iterator(start=1)) == [ 67 | (0, 0), (0, 1), (0, 2), (0, 3), (0, 4), 68 | (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), 69 | ] 70 | assert list(d.index_iterator(start=0, stop=1)) == [(0, )] 71 | assert list(d.index_iterator(start=1, stop=2)) == [(0, ), (1, )] 72 | 73 | def test_index_means(): 74 | d = Data({ 75 | (0, 0) : [0, 2] 76 | }, [1, 1, 2]) 77 | 78 | assert d.mean(()) == 1 79 | assert d.mean((0, 0)) == 1 80 | assert d.mean((0, 0, 0)) == d[0, 0, 0] 81 | assert d.mean((0, 0, 1)) == d[0, 0, 1] 82 | 83 | def test_index_means2(): 84 | # Suppose we have three levels, so n = 3. 85 | # For the sake of example, level 1 is repetitions, level 2 is executions, 86 | # and level 3 is compilations. Now suppose we repeat level 3 twice, 87 | # level 2 twice and level 3 five times. 88 | # 89 | # This would be a valid data set: 90 | # Note that the indicies are off-by-one due to python indicies starting 91 | # from 0. 92 | d = Data({ (0, 0) : [ 3, 4, 4, 1, 2 ], # times for compile 1, execution 1 93 | (0, 1) : [ 3, 3, 3, 3, 3 ], # compile 1, execution 2 94 | (1, 0) : [ 1, 2, 3, 4, 5 ], # compile 2, execution 1 95 | (1, 1) : [ 1, 1, 4, 4, 1 ], # compile 2, execution 2 96 | }, [2, 2, 5]) # counts for each level (highest to lowest) 97 | 98 | # By calling mean with an empty tuple we compute the mean at all levels 99 | # i.e. the mean of all times: 100 | x = [3, 4, 4, 1, 2, 3, 3, 3, 3, 3, 1, 2, 3, 4, 5, 1, 1, 4, 4, 1] 101 | expect = sum(x)/float(len(x)) 102 | assert expect == d.mean(()) 103 | 104 | # By calling with a singleton tuple we compute the mean for a given 105 | #compilation. E.g. compilation 2 106 | x = [1, 2, 3, 4, 5, 1, 1, 4, 4, 1] 107 | expect = sum(x) / float(len(x)) 108 | assert expect == d.mean((1,)) 109 | 110 | # By calling with a pair we compute the mean for a given compile 111 | # and execution combo. 112 | # E.g. compile 1, execution 2, which is obviously a mean of 3. 113 | assert d.mean((0, 1)) == 3 114 | 115 | def test_si2(): 116 | d = Data({ 117 | (0, 0) : [0, 0] 118 | }, [1, 1, 2]) 119 | 120 | assert d.Si2(1) == 0 121 | 122 | def test_si2_bigger_example(): 123 | # Let's compute S_1^2 for the following data 124 | d = Data({ 125 | (0, 0) : [3,4,3], 126 | (0, 1) : [1.2, 3.1, 3], 127 | (1, 0) : [0.2, 1, 1.5], 128 | (1, 1) : [1, 2, 3] 129 | }, [2, 2, 3]) 130 | 131 | # So we have n = 3, r = (2, 2, 3) 132 | # By my reckoning we should get something close to 0.72667 (working below) 133 | # XXX Explanation from whiteboard need to go here XXX 134 | 135 | assert abs(d.Si2(1)-0.72667) <= 0.0001 136 | 137 | def test_ti2(): 138 | # To verify this, consider the following data: 139 | d = Data({ 140 | (0, 0) : [3,4,3], 141 | (0, 1) : [1.2, 3.1, 3], 142 | (1, 0) : [0.2, 1, 1.5], 143 | (1, 1) : [1, 2, 3] 144 | }, [2, 2, 3]) 145 | 146 | # Let's manually look at S_i^2 where 1 <= i <= n: 147 | #si_vec = [ d.Si2(i) for i in range(1, 4) ] 148 | #print(si_vec) 149 | 150 | ti_vec = [ d.Ti2(i) for i in range (1, 4) ] 151 | expect = [ 0.7266667, 0.262777778, 0.7747 ] 152 | 153 | for i in range(len(expect)): 154 | assert abs(ti_vec[i] - expect[i]) <= 0.0001 155 | 156 | def test_optimal_reps_no_rounding(): 157 | d = Data({ 158 | (0, 0) : [3,4,3], 159 | (0, 1) : [1.2, 3.1, 3], 160 | (1, 0) : [0.2, 1, 1.5], 161 | (1, 1) : [1, 2, 3] 162 | }, [2, 2, 3]) 163 | 164 | #ti_vec = [ d.Ti2(i) for i in range (1, 4) ] 165 | #print(ti_vec) 166 | 167 | # And suppose the costs (high level to low) are 100, 20 and 3 (seconds) 168 | # By my reckoning, the optimal repetition counts should be r_1 = 5, r_2 = 2 169 | # XXX show working XXX 170 | got = [d.optimalreps(i, (100, 20, 3), round=False) for i in [1,2]] 171 | expect = [4.2937, 1.3023] 172 | 173 | for i in range(len(got)): 174 | assert abs(got[i] - expect[i]) <= 0.001 175 | 176 | for i in got: 177 | assert type(i) == float 178 | 179 | def test_optimal_reps_with_rounding(): 180 | """ Same as test_optimal_reps_no_rounding() just with rounding.""" 181 | 182 | d = Data({ 183 | (0, 0) : [3,4,3], 184 | (0, 1) : [1.2, 3.1, 3], 185 | (1, 0) : [0.2, 1, 1.5], 186 | (1, 1) : [1, 2, 3] 187 | }, [2, 2, 3]) 188 | 189 | got = [d.optimalreps(i, (100, 20, 3)) for i in [1,2]] 190 | expect = [5, 2] 191 | 192 | for i in range(len(got)): 193 | assert got[i] == expect[i] 194 | 195 | for i in got: 196 | assert type(i) == int 197 | 198 | def test_worked_example_3_level(): 199 | # three level experiment 200 | # This is the worked example from the paper. 201 | data = Data({ 202 | (0, 0): [9., 5.], (0, 1): [8., 3.], 203 | (1, 0): [10., 6.], (1, 1): [7., 11.], 204 | (2, 0): [1., 12.], (2, 1): [2., 4.], 205 | }, [3, 2, 2]) 206 | 207 | correct = { 208 | (0, 0): 7.0, 209 | (0, 1): 5.5, 210 | (1, 0): 8.0, 211 | (1, 1): 9.0, 212 | (2, 0): 6.5, 213 | (2, 1): 3.0, 214 | } 215 | 216 | for index in data.index_iterator(stop=2): 217 | assert data.mean(index) == correct[index] 218 | 219 | assert data.mean() == 6.5 220 | 221 | assert round(data.Si2(1), 1) == 16.5 222 | assert round(data.Si2(2), 1) == 2.6 223 | assert round(data.Si2(3), 1) == 3.6 224 | assert round(data.Ti2(1), 1) == 16.5 225 | assert round(data.Ti2(2), 1) == -5.7 226 | assert round(data.Ti2(3), 1) == 2.3 227 | 228 | def test_worked_example_2_level(): 229 | data = Data({ 230 | (0, ): [9., 5., 8., 3.], 231 | (1, ): [10., 6., 7., 11.], 232 | (2, ): [1., 12., 2., 4.], 233 | }, [3, 4]) 234 | 235 | correct = {(0, ): 6.3, 236 | (1, ): 8.5, 237 | (2, ): 4.8, 238 | } 239 | for index in data.index_iterator(stop=1): 240 | assert round(data.mean(index), 1) == correct[index] 241 | 242 | assert data.mean() == 6.5 243 | 244 | assert round(data.Si2(1), 1) == 12.7 245 | assert round(data.Si2(2), 1) == 3.6 246 | 247 | assert round(data.Ti2(1), 1) == 12.7 248 | assert round(data.Ti2(2), 1) == 0.4 249 | 250 | def test_bootstrap(): 251 | # XXX needs info on how expected val was computed 252 | data = Data({ 253 | (0, ) : [ 2.5, 3.1, 2.7 ], 254 | (1, ) : [ 5.1, 1.1, 2.3 ], 255 | (2, ) : [ 4.7, 5.5, 7.1 ], 256 | }, [3, 3]) 257 | random.seed(1) 258 | 259 | expect = 4.8111111111 260 | got = data.bootstrap_means(1) # one iteration 261 | 262 | assert abs(got[0] - expect) <= 0.0001 263 | 264 | def test_confidence_slice_indicies(): 265 | assert _confidence_slice_indicies(10, '0.8') == (1, (4, 5), 9) 266 | assert _confidence_slice_indicies(11, '0.8') == (1, (5, ), 10) 267 | assert _confidence_slice_indicies(1000) == (25, (499, 500), 975) 268 | 269 | def test_confidence_slice(): 270 | # Suppose we get back the means: 271 | means = [ x + 15 for x in range(1000) ] # already sorted 272 | 273 | # For a data set of size 1000, we expect alpha/2 to be 25 274 | # (for a 95% confidence interval) 275 | alpha_over_two = len(means) * 0.025 276 | assert(alpha_over_two) == 25 277 | 278 | # Therefore we lose 25 items off each end of the means list. 279 | # The first 25 indicies are 0, ..., 24, so lower bound should be index 25. 280 | # The last 25 indicies are -1, ..., -25, so upper bound is index -26 281 | # Put differently, the last 25 indicies are 999, ..., 975 282 | 283 | lower_index = int(math.floor(alpha_over_two)) 284 | upper_index = int(-math.ceil(alpha_over_two) - 1) 285 | (lobo, hibo) = (means[lower_index], means[upper_index]) 286 | 287 | # Since the data is the index plus 15, we should get an 288 | # interval: [25+15, 974+15] 289 | expect = (25+15, 974+15) 290 | assert (lobo, hibo) == expect 291 | 292 | # There is strictly speaking no median of 1000 items. 293 | # We take the mean of the two middle items items 500 and 501 at indicies 294 | # 499 and 500. Since the data is the index + 15, the middle values are 295 | # 514 and 515, the mean of which is 514.5 296 | median = 514.5 297 | 298 | # Check the implementation. 299 | confrange = confidence_slice(means) 300 | (got_lobo, got_median, got_hibo) = confrange 301 | assert confrange.lower == got_lobo 302 | assert confrange.median == got_median 303 | assert confrange.upper == got_hibo 304 | 305 | assert got_lobo == lobo 306 | assert got_hibo == hibo 307 | assert median == got_median 308 | 309 | assert confrange.error == _mean([median - lobo, hibo - median]) 310 | 311 | 312 | def test_confidence_slice_pass_confidence_level(): 313 | means = [float(x) for x in range(10)] 314 | low, mean, high = confidence_slice(means, '0.8') 315 | assert mean == (4 + 5) / 2. 316 | assert low == 1 317 | assert high == 8 318 | 319 | 320 | means = [float(x) for x in range(11)] 321 | low, mean, high = confidence_slice(means, '0.8') 322 | assert mean == 5 323 | assert low == 1 324 | assert high == 9 325 | 326 | 327 | def test_confidence_quotient(): 328 | data1 = Data({ 329 | (0, ) : [ 2.5, 3.1, 2.7 ], 330 | (1, ) : [ 5.1, 1.1, 2.3 ], 331 | (2, ) : [ 4.7, 5.5, 7.1 ], 332 | }, [3, 3]) 333 | data2 = Data({ 334 | (0, ) : [ 3.5, 4.1, 3.7 ], 335 | (1, ) : [ 6.1, 2.1, 3.3 ], 336 | (2, ) : [ 5.7, 6.5, 8.1 ], 337 | }, [3, 3]) 338 | 339 | random.seed(1) 340 | a = data1._bootstrap_sample() 341 | b = data2._bootstrap_sample() 342 | 343 | random.seed(1) 344 | (_, mean, _) = data1.bootstrap_quotient(data2, iterations=1) 345 | assert mean == _mean(a) / _mean(b) 346 | 347 | def test_confidence_quotient_div_zero(): 348 | data1 = Data({ 349 | (0, ) : [ 2.5, 3.1, 2.7 ], 350 | (1, ) : [ 5.1, 1.1, 2.3 ], 351 | (2, ) : [ 4.7, 5.5, 7.1 ], 352 | }, [3, 3]) 353 | data2 = Data({ # This has a mean of zero 354 | (0, ) : [ 0, 0, 0], 355 | (1, ) : [ 0, 0, 0], 356 | (2, ) : [ 0, 0, 0], 357 | }, [3, 3]) 358 | 359 | # Since all ratios will be +inf, the median should also be +inf 360 | (_, median, _) = data1.bootstrap_quotient(data2, iterations=1) 361 | assert median== float("inf") 362 | 363 | def test_geomean(): 364 | assert _geomean([1]) == 1 365 | assert _geomean([10, 0.1]) == 1 366 | 367 | def test_geomean_data(): 368 | data1 = Data({ 369 | (0, ) : [ 2.9, 3.1, 3.0 ], 370 | (1, ) : [ 3.1, 2.6, 3.3 ], 371 | (2, ) : [ 3.2, 3.0, 2.9 ], 372 | }, [3, 3]) 373 | data2 = Data({ 374 | (0, ) : [ 3.9, 4.1, 4.0 ], 375 | (1, ) : [ 4.1, 3.6, 4.3 ], 376 | (2, ) : [ 4.2, 4.0, 3.9 ], 377 | }, [3, 3]) 378 | 379 | (_, mean1, _) = data1.bootstrap_quotient(data2) 380 | (_, mean2, _) = bootstrap_geomean([data1], [data2]) 381 | assert round(mean1, 3) == round(mean2, 3) 382 | 383 | (_, mean, _) = bootstrap_geomean([data1, data2], [data2, data1]) 384 | assert round(mean, 5) == 1.0 385 | -------------------------------------------------------------------------------- /python/tests/test_graphs.py: -------------------------------------------------------------------------------- 1 | import sys, os.path, random 2 | import pytest 3 | 4 | import support 5 | 6 | support.setup_paths() 7 | 8 | from pykalibera.graphs import run_sequence_plot, lag_plot, acr_plot 9 | 10 | # ---------------------------------- 11 | # HELPER FIXTURES 12 | # ---------------------------------- 13 | 14 | @pytest.fixture 15 | def rdata(): 16 | """ Returns some random data """ 17 | return random.sample(xrange(1000), 1000) 18 | 19 | # XXX Need to delete graphs 20 | @pytest.fixture 21 | def gpath(): 22 | """ Returns a temp filename for graph storage """ 23 | return pytest.ensuretemp("graphs").join("graph.png").strpath 24 | 25 | # ---------------------------------- 26 | # TESTS BEGIN 27 | # ---------------------------------- 28 | 29 | def test_run_sequence(gpath, rdata): 30 | # Does not crash 31 | run_sequence_plot(rdata, filename=gpath) 32 | 33 | def test_lag(gpath, rdata): 34 | # Does not crash 35 | lag_plot(rdata, filename=gpath) 36 | 37 | def test_acr(gpath, rdata): 38 | # Does not crash 39 | acr_plot(rdata, filename=gpath) 40 | -------------------------------------------------------------------------------- /python/tests/test_misc.py: -------------------------------------------------------------------------------- 1 | import support 2 | 3 | support.setup_paths() 4 | 5 | from pykalibera.data import Data 6 | 7 | 8 | def test_memoise_decor_docstrs(): 9 | prefix = "Computes the optimal number of repetitions" 10 | assert Data.optimalreps.__doc__.startswith(prefix) 11 | -------------------------------------------------------------------------------- /ruby/.ruby-version: -------------------------------------------------------------------------------- 1 | 2.5.9 2 | -------------------------------------------------------------------------------- /ruby/Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | gemspec -------------------------------------------------------------------------------- /ruby/LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) King's College London, created by Edd Barrett and Carl 2 | Friedrich Bolz 3 | 4 | Ruby transliteration (C) Chris Seaton 2014 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /ruby/Rakefile: -------------------------------------------------------------------------------- 1 | require "rake/testtask" 2 | 3 | Rake::TestTask.new do |t| 4 | t.libs << 'test' 5 | end 6 | 7 | desc "Run tests" 8 | task :default => :test 9 | -------------------------------------------------------------------------------- /ruby/kalibera.gemspec: -------------------------------------------------------------------------------- 1 | require 'json' 2 | 3 | metadata = JSON.parse(IO.read(File.expand_path('../../shared_metadata.json', __FILE__))) 4 | 5 | Gem::Specification.new do |spec| 6 | spec.name = "kalibera" 7 | spec.version = metadata["metadata"]["version"].to_s + ".2" 8 | spec.authors = ["Edd Barrett", "Carl Friedrich Bolz", "Chris Seaton"] 9 | spec.email = ["chris@chrisseaton.com"] 10 | spec.summary = metadata["metadata"]["short_descr"] 11 | spec.description = metadata["metadata"]["long_descr"] 12 | spec.homepage = metadata["metadata"]["url"] 13 | spec.license = "MIT" 14 | 15 | spec.files = `git ls-files -z`.split("\x0") 16 | spec.test_files = spec.files.grep(%r{^test/}) 17 | spec.require_paths = ["lib"] 18 | 19 | spec.add_development_dependency "rake", "~> 13.0" 20 | spec.add_development_dependency "test-unit", "~> 3.4" 21 | 22 | spec.add_dependency "rbzip2", "~> 0.3" 23 | spec.add_dependency "memoist", "~> 0.16" 24 | end 25 | -------------------------------------------------------------------------------- /ruby/lib/kalibera.rb: -------------------------------------------------------------------------------- 1 | require "kalibera/data" 2 | -------------------------------------------------------------------------------- /ruby/lib/kalibera/data.rb: -------------------------------------------------------------------------------- 1 | require "stringio" 2 | require "base64" 3 | require "rbzip2" 4 | require "bigdecimal" 5 | require "memoist" 6 | 7 | module Kalibera 8 | 9 | CONSTANTS = RBzip2.default_adapter::Decompressor.new(StringIO.new(Base64.decode64("""\ 10 | QlpoOTFBWSZTWbTS4VUAC9bYAEAQAAF/4GAOGZ3e40HH2YJERUKomGbCNMAAtMBaAkCOP9U0/R+q 11 | qNCqfjAqVGOY3+qk96qmmIp+CCVNDD/1VGjfqkBJpIElG6uN92vE/PP+5IxhMIIgAbOxEMKLMVSq 12 | VWtZmZaEklAAAttoAAAAAAAAAAAAEklAAEklABttkksklkkknVu2dX1vW9yWrkuXJJJJJJJJJJKK 13 | JWsS5dq7k3RRRbu2222227oAAFQqFCAjkB0w7eMpKWy3bVI42225QlbQAAAAAAlbQbbUqkolE7JZ 14 | jjjmS5LluZkuZmZmZmZmZmZvZhOYnktttsskiaSSToAAA5znOZmMzGTSSJJJ1JO+7gLbR067u48V 15 | bZIAABJCSSjElG436ySek9f1/X3vZ72+7wPk5bbJG0kYTYA2+fHiolu7u8S62JEpjmZ3YS40AEt3 16 | mb8lzXwEpar+9P3s9vAq1o23mt3oaJmZvJAPQu6AlL3s9ojg6rRBmOQaKRb+zbOaL0FMxZKBTm9O 17 | vLmUJuqwVc+KevulFMM/JOzWTMN5Aa7cO5hmZuioHbboGzxzZLFATHYvXg5SUqCWxmre6As43wzV 18 | 30514PDn2m7ema93M9u9199F6QCSfsxJ7wA5R3bTsglUQaJLy4wKYu895byRoTJb7vXsGwZzhPZ0 19 | xOdgtMncj5PGCPeKFPCgenS83zcvnQwGfm3prLnb6bcxKJABZeOrvfNAUNTTobmLQ+fOHAjxo2WE 20 | JaevegHIDVvW+kRAD2TpoeJWFQDKtubzWOr6EFU3xs3rojhW98aghZQmIWXe9sUXKEXKvWvk6bTH 21 | GURStAQ1M7OzF07ui6Q2DYl1NojMzlvrwcO6+uY7V3ZFerzz3sIqJsGzcJN2EAAew/vvqqvvvvi7 22 | xXjhGH3nGNKv2u+Bt8k4USU+SaoLuU6HNmQoYyFTN3huLP721dwHIqQzrqVhjz2+UQw0ezok7gQl 23 | wyZ2YM0hgPVaZaOLK9q3TtGiaO3Br4xGyy7HfAWw72nvLmaGPeSz2c/FkuN7Qj1guqtgUU1NHry2 24 | 5h7KvWgs2jglhCZpYpa8qbl3PrrEDL1Jg/1VrZ8IthQhNKLznYMPozi9arWla2BODhV6yuIKmzsa 25 | zhOb3kxyjcD0ExuXvdys3WRxxYEQszLy8jxqTPZB7UQJ2xbk3YGV2QcdPN2HYuoVkWxUhtErw9u3 26 | 0mdw5HiO0WVtRUCEyxEAOdIHV1sWmbReT4iMTzRsB7Q36e72rpwePnrPggpSxjlZ9Lm8YJrgXDzJ 27 | /30MSDPwzV8s+g4Rcpy3a8c7Y1jxgHJQs8+MyLsudmYSFySWm3OrSn5p3qb++m8fvHUGfCfNCbol 28 | RSZ6wp+ZM14k8S+SKwqES7PQ72DFK4PTiMCA6LbvuSSSJ1R3iJAF10sQYlhpp2GSzWBw3ty+HjLj 29 | HCDTxku3yHPrNvTXekcBSOuzMfOvy3dybchXeLxvXN3vKTN/BdbwUlqXY+g4sWMoHTQT61MeXIMf 30 | PhgYq8KhOEbqeMqoyhWQp03eOOpV/LVvXl2X71ztaX7tMZJ5gBCshDGQCskDme9zu9b1dcgB1khU 31 | mmEk2yTySG2QPmEJp3m/jM+93nYSoe7YEPmExITTpITut87rehm+UgF13IG0nUk52+95Z+9wg49Y 32 | SUraiKIYo3UOvdtq6bVDDmbPTmhtyLfS1LCPXQmYLD7c9lu5ZfdaWSGn1m82kCd4xhYOuVUH33zB 33 | Kh5IsOsxNe+yB7XNd77Xc05kD5h1Jpk0hnLJpnrzXe9xdXpOJfrA4kzdhvLB1tzn3e6OqyaeM8m9 34 | 2HWH2m59jnvrO2w+9TTFDibQffe7880+cfu08zjLw/Mbx4faLWcMbzQ8vDWj6uDmr75CuG9hzAOl 35 | 1Wk0mWKqglrLcmu/uw/IVcPCtGw3hY3TgkN0PqENShQhpj5ZN7dzethJScvIGNEPPE7lcJTwYM8t 36 | 7zB5zMNkYZmHc1cbY1RirWMmuHzEFi7P04mPluFvMqnoirRUEEB3taRpio2svFVXtMcub+PuTmqL 37 | vlSOqbSO996bd/e0AoLJ1hV97AmbtfxIsAkWBILJAUgAoEiySCwgsICwDqAZlkiyEWBFhBSCwqSc 38 | 9zeoAskUBYRYRYb3rmeHWXZOMgsFgLAWBq2RQWRcSVIpFikWCwWF3mAoKKCgpr9TE21BYqy8zDbW 39 | LFFiixRRXLpcoooovmqiirm/rmlRVWl57xynNqqo8tVVVy1VVyrRVVVFb39rSovrvKitpVR/Woo5 40 | So32dxukUUUz2YY1FFLbF1u91TbbZUWNsqVrM3336515OpjWP1DMaFZ5ufsDOXTHLBSsrN85f1/G 41 | Z97s999hpF0nwOBV8gYfoGPnQqiKzPLcnpOky/b652qCQ9ti4PbvcjqmneMEtaV17cnt6NKZYybS 42 | TwHdBK34b2wy3CJ1qqi8qpigCKsVSvFUFMUMtVTFPjBoq+K5AGXzuffdyXtm0+ebv5HdMVnN0mMe 43 | ++473+/HTWnzd0OuWnHE20ZtC7oaZvN/jvn9efa9UHKC++prtL9ZWDu7c73vvaOTiKbTmUPJ7Pv2 44 | jEFDnO6Xe/deOG0+v7Cn6z8zO2VH9TMse/fvt67+w77n7QaQffsxOJfqGteOa/HdYe1Tm6LFOpUz 45 | VMR/aPvadm0zXsnMppiffYG27ZXfslV2hAJrPGmKsVfe9fSO8vVnru7tbzSU1a9cGv0qsQEdhHK7 46 | rJBfbPMSKZc3wmij3ULrhE9nIwoDMp4WAK2GkIKIqrHAK0Bjvo7sA2VZ941ggrwIsfGLZTHvGSZR 47 | 8UGKDKFAAcC8U45fTlKQKM8fnx+IAr3rmwtVbfFhj4VZqQviRXhavLu9zOQWISS0w9PxFYCEfK1l 48 | 9GK0GhrKxr5CwCveB4XDEsPYWKwfHDgrBnZT4XW5dlE2tW7FAR8RGW0XMy1MQoDwyQ+Hnmvet5I/ 49 | HrTVYQJbJ1e3y6B7LoCh5qyXWO03X5WbxWT0UvY55cyRbhmB8ib6lkhRo5USRAoLFA4WELV93ZV/ 50 | DKh2MIhnIWCPBLEh3FUTBSxJC7h4Z15qTFPTRmpe1Ldj1rlkVnAKHDySryior3OheiTPKZY2GaQ6 51 | N2YyvJh9wuO75VOarCWLEUdLavAs2RShYOntLrMVabUAyDnTJIQ4deJa92pAWd6KBz+F3JFOFCQt 52 | NLhVQA=="""))).read.split().map { |x| Float(x) } 53 | 54 | # Look up the 95% quantile from constant table. 55 | def self.student_t_quantile95(ndeg) 56 | index = ndeg - 1 57 | if index >= CONSTANTS.size 58 | index = -1 # the quantile converges, we just take the last value 59 | end 60 | CONSTANTS[index] 61 | end 62 | 63 | ConfRange = Struct.new(:lower, :median, :upper) do 64 | def error 65 | Kalibera.mean([upper - median, median - lower]) 66 | end 67 | end 68 | 69 | # Returns a tuples (lower, median, upper), where: 70 | # lower: lower bound of 95% confidence interval 71 | # median: the median value of the data 72 | # upper: upper bound of 95% confidence interval 73 | # 74 | # Arguments: 75 | # means -- the list of means (need not be sorted). 76 | def self.confidence_slice(means, confidence="0.95") 77 | means = means.sort 78 | # There may be >1 median indicies, i.e. data is even-sized. 79 | lower, middle_indicies, upper = confidence_slice_indicies(means.size, confidence) 80 | median = mean(middle_indicies.map { |i| means[i] }) 81 | ConfRange.new(means[lower], median, means[upper - 1]) # upper is *exclusive* 82 | end 83 | 84 | # Returns a triple (lower, mean_indicies, upper) so that l[lower:upper] 85 | # gives confidence_level of all samples. Mean_indicies is a tuple of one or 86 | # two indicies that correspond to the mean position 87 | # 88 | # Keyword arguments: 89 | # confidence_level -- desired level of confidence as a Decimal instance. 90 | def self.confidence_slice_indicies(length, confidence_level=BigDecimal('0.95')) 91 | raise unless !confidence_level.instance_of?(Float) 92 | confidence_level = BigDecimal(confidence_level) 93 | raise unless confidence_level.instance_of?(BigDecimal) 94 | exclude = (1 - confidence_level) / 2 95 | 96 | if length % 2 == 0 97 | mean_indicies = [length / 2 - 1, length / 2] # TRANSLITERATION: was // 98 | else 99 | mean_indicies = [length / 2] # TRANSLITERATION: was // 100 | end 101 | 102 | lower_index = Integer( 103 | (exclude * length).round(0, BigDecimal::ROUND_DOWN) # TRANSLITERATION: was quantize 1. 104 | ) 105 | 106 | upper_index = Integer( 107 | ((1 - exclude) * length).round(0, BigDecimal::ROUND_UP) # TRANSLITERATION: was quantize 1. 108 | ) 109 | 110 | [lower_index, mean_indicies, upper_index] 111 | end 112 | 113 | def self.mean(l) 114 | l.inject(0, :+) / Float(l.size) 115 | end 116 | 117 | def self.geomean(l) 118 | l.inject(1, :*) ** (1.0 / Float(l.size)) 119 | end 120 | 121 | class Data 122 | 123 | extend Memoist 124 | 125 | # Instances of this class store measurements (corresponding to 126 | # the Y_... in the papers). 127 | # 128 | # Arguments: 129 | # data -- Dict mapping tuples of all but the last index to lists of values. 130 | # reps -- List of reps for each level, high to low. 131 | def initialize(data, reps) 132 | @data = data 133 | @reps = reps 134 | 135 | # check that all data is there 136 | 137 | array = reps.map { |i| (0...i).to_a } 138 | array[0].product(*array.drop(1)).each do |index| 139 | self[*index] # does not crash 140 | end 141 | end 142 | 143 | def [](*indicies) 144 | raise unless indicies.size == @reps.size 145 | x = @data[indicies[0...indicies.size-1]] 146 | raise unless !x.nil? 147 | x[indicies[-1]] 148 | end 149 | 150 | # Computes a list of all possible data indcies gievn that 151 | # start <= index <= stop are fixed. 152 | def index_iterator(start=0, stop=nil) 153 | if stop.nil? 154 | stop = n 155 | end 156 | 157 | maximum_indicies = @reps[start...stop] 158 | remaining_indicies = maximum_indicies.map { |maximum| (0...maximum).to_a } 159 | return [[]] if remaining_indicies.empty? 160 | remaining_indicies[0].product(*remaining_indicies.drop(1)) 161 | end 162 | 163 | # The number of levels in the experiment. 164 | def n 165 | @reps.size 166 | end 167 | 168 | # The number of repetitions for level i. 169 | # 170 | # Arguments: 171 | # i -- mathematical index. 172 | def r(i) 173 | raise unless 1 <= i 174 | raise unless i <= n 175 | index = n - i 176 | @reps[index] 177 | end 178 | 179 | # Compute the mean across a number of values. 180 | # 181 | # Keyword arguments: 182 | # indicies -- tuple of fixed indicies over which to compute the mean, 183 | # given from left to right. The remaining indicies are variable. 184 | def mean(indicies=[]) 185 | remaining_indicies_cross_product = 186 | index_iterator(start=indicies.size) 187 | alldata = remaining_indicies_cross_product.map { |remaining| self[*(indicies + remaining)] } 188 | Kalibera.mean(alldata) 189 | end 190 | 191 | memoize :mean 192 | 193 | # Biased estimator S_i^2. 194 | # 195 | # Arguments: 196 | # i -- the mathematical index of the level from which to compute S_i^2 197 | def Si2(i) 198 | raise unless 1 <= i 199 | raise unless i <= n 200 | # @reps is indexed from the left to right 201 | index = n - i 202 | factor = 1.0 203 | 204 | # We compute this iteratively leveraging the fact that 205 | # 1 / (a * b) = (1 / a) / b 206 | for rep in @reps[0, index] 207 | factor /= rep 208 | end 209 | # Then at this point we have: 210 | # factor * (1 / (r_i - 1)) = factor / (r_i - 1) 211 | factor /= @reps[index] - 1 212 | 213 | # Second line of the above definition, the lines are multiplied. 214 | indicies = index_iterator(0, index+1) 215 | sum = 0.0 216 | for index in indicies 217 | a = mean(index) 218 | b = mean(index[0,index.size-1]) 219 | sum += (a - b) ** 2 220 | end 221 | factor * sum 222 | end 223 | 224 | memoize :Si2 225 | 226 | # Compute the unbiased T_i^2 variance estimator. 227 | # 228 | # Arguments: 229 | # i -- the mathematical index from which to compute T_i^2. 230 | def Ti2(i) 231 | # This is the broken implementation of T_i^2 shown in the pubslished 232 | # version of "Rigorous benchmarking in reasonable time". Tomas has 233 | # since fixed this in local versions of the paper. 234 | #@memoize 235 | #def broken_Ti2(self, i) 236 | # """ Compute the unbiased T_i^2 variance estimator. 237 | # 238 | # Arguments: 239 | # i -- the mathematical index from which to compute T_i^2. 240 | # """ 241 | # 242 | # raise unless 1 <= i <= n 243 | # if i == 1: 244 | # return self.Si2(1) 245 | # return self.Si2(i) - self.Ti2(i - 1) / self.r(i - 1) 246 | 247 | # This is the correct definition of T_i^2 248 | 249 | raise unless 1 <= i 250 | raise unless i <= n 251 | if i == 1 252 | return Si2(1) 253 | end 254 | Si2(i) - Si2(i - 1) / r(i - 1) 255 | end 256 | 257 | memoize :Ti2 258 | 259 | # Computes the optimal number of repetitions for a given level. 260 | # 261 | # Note that the resulting number of reps is not rounded. 262 | # 263 | # Arguments: 264 | # i -- the mathematical level of which to compute optimal reps. 265 | # costs -- A list of costs for each level, *high* to *low*. 266 | def optimalreps(i, costs) 267 | # NOTE: Does not round 268 | costs = costs.map { |x| Float(x) } 269 | raise unless 1 <= i 270 | raise unless i < n 271 | index = n - i 272 | return (costs[index - 1] / costs[index] * 273 | Ti2(i) / Ti2(i + 1)) ** 0.5 274 | end 275 | 276 | memoize :optimalreps 277 | 278 | # Compute the 95% confidence interval. 279 | def confidence95 280 | degfreedom = @reps[0] - 1 281 | student_t_quantile95(degfreedom) * 282 | (Si2(n) / @reps[0]) ** 0.5 283 | end 284 | 285 | # Compute a list of simulated means from bootstrap resampling. 286 | # 287 | # Note that, resampling occurs with replacement. 288 | # 289 | # Keyword arguments: 290 | # iterations -- Number of resamples (and thus means) generated. 291 | def bootstrap_means(iterations=1000) 292 | means = [] 293 | for _ in 0...iterations 294 | values = bootstrap_sample() 295 | means.push(Kalibera.mean(values)) 296 | end 297 | means.sort() 298 | means 299 | end 300 | 301 | # Compute a confidence interval via bootstrap method. 302 | # 303 | # Keyword arguments: 304 | # iterations -- Number of resamplings to base result upon. Default is 10000. 305 | # confidence -- The required confidence. Default is "0.95" (95%). 306 | def bootstrap_confidence_interval(iterations=10000, confidence="0.95") 307 | means = bootstrap_means(iterations) 308 | Kalibera.confidence_slice(means, confidence) 309 | end 310 | 311 | def random_measurement_sample(index=[]) 312 | results = [] 313 | if index.size == n 314 | results.push self[*index] 315 | else 316 | indicies = (0...@reps[index.size]).map { |i| rand(@reps[index.size]) } 317 | for single_index in indicies 318 | newindex = index + [single_index] 319 | for value in random_measurement_sample(newindex) 320 | results.push value 321 | end 322 | end 323 | end 324 | results 325 | end 326 | 327 | def bootstrap_sample 328 | random_measurement_sample 329 | end 330 | 331 | def bootstrap_quotient(other, iterations=10000, confidence='0.95') 332 | ratios = [] 333 | for _ in 0...iterations 334 | ra = bootstrap_sample() 335 | rb = other.bootstrap_sample() 336 | mean_ra = Kalibera.mean(ra) 337 | mean_rb = Kalibera.mean(rb) 338 | 339 | if mean_rb == 0 # protect against divide by zero 340 | ratios.push(Float::INFINITY) 341 | else 342 | ratios.push(mean_ra / mean_rb) 343 | end 344 | end 345 | ratios.sort! 346 | Kalibera.confidence_slice(ratios, confidence).values 347 | end 348 | 349 | end 350 | 351 | def self.bootstrap_geomean(l_data_a, l_data_b, iterations=10000, confidence='0.95') 352 | raise "lists need to match" unless l_data_a.size == l_data_b.size 353 | geomeans = [] 354 | iterations.times do 355 | ratios = [] 356 | l_data_a.zip(l_data_b).each do |a, b| 357 | ra = a.bootstrap_sample 358 | rb = b.bootstrap_sample 359 | mean_ra = mean(ra) 360 | mean_rb = mean(rb) 361 | ratios << mean_ra / mean_rb 362 | end 363 | geomeans << geomean(ratios) 364 | end 365 | geomeans.sort! 366 | confidence_slice(geomeans, confidence) 367 | end 368 | 369 | end 370 | -------------------------------------------------------------------------------- /ruby/test/test_data.rb: -------------------------------------------------------------------------------- 1 | require "test/unit" 2 | 3 | require "kalibera" 4 | 5 | # We need to match Python's random numbers when testing 6 | 7 | class TestData < Kalibera::Data 8 | 9 | RAND = [0, 2, 2, 0, 1, 1, 1, 2, 0, 0, 2, 1, 2, 0, 1, 2, 0, 2, 2, 0, 0, 1, 10 | 2, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 2, 1, 1, 0, 2, 2, 0, 0] 11 | 12 | def initialize(data, reps) 13 | super 14 | @rand_counter = 0 15 | end 16 | 17 | def reset_local_rand 18 | @rand_counter = 0 19 | end 20 | 21 | def rand(r) 22 | raise "mock rand designed for range=3" unless r == 3 23 | raise "mock rand out of data" unless @rand_counter < RAND.size 24 | 25 | n = RAND[@rand_counter] 26 | @rand_counter += 1 27 | n 28 | end 29 | 30 | end 31 | 32 | class TestKaliberaData < Test::Unit::TestCase 33 | 34 | def test_indicies 35 | d = TestData.new({ 36 | [0, 0] => [1, 2, 3, 4, 5], 37 | [0, 1] => [3, 4, 5, 6, 7] 38 | }, [1, 2, 5]) 39 | 40 | assert_equal 1, d[0, 0, 0] 41 | assert_equal 5, d[0, 0, 4] 42 | assert_equal 5, d[0, 1, 2] 43 | end 44 | 45 | def test_rep_levels 46 | d = TestData.new({ 47 | [0, 0] => [1, 2, 3, 4, 5], 48 | [0, 1] => [3, 4, 5, 6, 7] 49 | }, [1, 2, 5]) 50 | 51 | assert_equal 5, d.r(1) # lowest level, i.e. arity of the lists in the map 52 | assert_equal 2, d.r(2) 53 | assert_equal 1, d.r(3) 54 | 55 | # indexs are one based, so 0 or less is invalid 56 | assert_raise RuntimeError do 57 | d.r(0) 58 | end 59 | 60 | assert_raise RuntimeError do 61 | d.r(-1337) 62 | end 63 | 64 | # Since we have 3 levels here, levels 4 and above are bogus 65 | assert_raise RuntimeError do 66 | d.r(4) 67 | end 68 | 69 | assert_raise RuntimeError do 70 | d.r(666) 71 | end 72 | end 73 | 74 | def test_index_iter 75 | d = TestData.new({ 76 | [0, 0] => [1, 2, 3, 4, 5], 77 | [0, 1] => [3, 4, 5, 6, 7] 78 | }, [1, 2, 5]) 79 | 80 | assert_equal [ 81 | [0, 0, 0], [0, 0, 1], [0, 0, 2], [0, 0, 3], [0, 0, 4], 82 | [0, 1, 0], [0, 1, 1], [0, 1, 2], [0, 1, 3], [0, 1, 4], 83 | ], d.index_iterator() 84 | assert_equal [ 85 | [0, 0], [0, 1], [0, 2], [0, 3], [0, 4], 86 | [1, 0], [1, 1], [1, 2], [1, 3], [1, 4], 87 | ], d.index_iterator(start=1) 88 | assert_equal [[0]], d.index_iterator(start=0, stop=1) 89 | assert_equal [[0], [1]], d.index_iterator(start=1, stop=2) 90 | end 91 | 92 | def test_index_means 93 | d = TestData.new({ 94 | [0, 0] => [0, 2] 95 | }, [1, 1, 2]) 96 | 97 | assert_equal 1, d.mean([]) 98 | assert_equal 1, d.mean([0, 0]) 99 | assert_equal d[0, 0, 0], d.mean([0, 0, 0]) 100 | assert_equal d[0, 0, 1], d.mean([0, 0, 1]) 101 | end 102 | 103 | def test_index_means2 104 | # Suppose we have three levels, so n = 3. 105 | # For the sake of example, level 1 is repetitions, level 2 is executions, 106 | # and level 3 is compilations. Now suppose we repeat level 3 twice, 107 | # level 2 twice and level 3 five times. 108 | # 109 | # This would be a valid data set: 110 | # Note that the indicies are off-by-one due to python indicies starting 111 | # from 0. 112 | d = TestData.new({ [0, 0] => [ 3, 4, 4, 1, 2 ], # times for compile 1, execution 1 113 | [0, 1] => [ 3, 3, 3, 3, 3 ], # compile 1, execution 2 114 | [1, 0] => [ 1, 2, 3, 4, 5 ], # compile 2, execution 1 115 | [1, 1] => [ 1, 1, 4, 4, 1 ], # compile 2, execution 2 116 | }, [2, 2, 5]) # counts for each level (highest to lowest) 117 | 118 | # By calling mean with an empty tuple we compute the mean at all levels 119 | # i.e. the mean of all times: 120 | x = [3, 4, 4, 1, 2, 3, 3, 3, 3, 3, 1, 2, 3, 4, 5, 1, 1, 4, 4, 1] 121 | expect = x.inject(0, :+)/Float(x.size) 122 | assert_equal d.mean([]), expect 123 | 124 | # By calling with a singleton tuple we compute the mean for a given 125 | #compilation. E.g. compilation 2 126 | x = [1, 2, 3, 4, 5, 1, 1, 4, 4, 1] 127 | expect = x.inject(0, :+) / Float(x.size) 128 | assert_equal d.mean([1]), expect 129 | 130 | # By calling with a pair we compute the mean for a given compile 131 | # and execution combo. 132 | # E.g. compile 1, execution 2, which is obviously a mean of 3. 133 | assert_equal 3, d.mean([0, 1]) 134 | end 135 | 136 | def test_si2 137 | d = TestData.new({ 138 | [0, 0] => [0, 0] 139 | }, [1, 1, 2]) 140 | 141 | assert_equal 0, d.Si2(1) 142 | end 143 | 144 | def test_si2_bigger_example 145 | # Let's compute S_1^2 for the following data 146 | d = TestData.new({ 147 | [0, 0] => [3,4,3], 148 | [0, 1] => [1.2, 3.1, 3], 149 | [1, 0] => [0.2, 1, 1.5], 150 | [1, 1] => [1, 2, 3] 151 | }, [2, 2, 3]) 152 | 153 | # So we have n = 3, r = (2, 2, 3) 154 | # By my reckoning we should get something close to 0.72667 (working below) 155 | # XXX Explanation from whiteboard need to go here XXX 156 | 157 | assert_less_equal (d.Si2(1)-0.72667).abs, 0.0001 158 | end 159 | 160 | def test_ti2 161 | # To verify this, consider the following data: 162 | d = TestData.new({ 163 | [0, 0] => [3,4,3], 164 | [0, 1] => [1.2, 3.1, 3], 165 | [1, 0] => [0.2, 1, 1.5], 166 | [1, 1] => [1, 2, 3] 167 | }, [2, 2, 3]) 168 | 169 | # Let's manually look at S_i^2 where 1 <= i <= n: 170 | #si_vec = [ d.Si2(i) for i in range(1, 4) ] 171 | #print(si_vec) 172 | 173 | ti_vec = (1...4).map { |i| d.Ti2(i) } 174 | expect = [ 0.7266667, 0.262777778, 0.7747 ] 175 | 176 | (0...expect.size).each do |i| 177 | assert (ti_vec[i] - expect[i]).abs <= 0.0001, "#{} <= 0.0001" 178 | end 179 | end 180 | 181 | def test_optimal_reps 182 | d = TestData.new({ 183 | [0, 0] => [3,4,3], 184 | [0, 1] => [1.2, 3.1, 3], 185 | [1, 0] => [0.2, 1, 1.5], 186 | [1, 1] => [1, 2, 3] 187 | }, [2, 2, 3]) 188 | 189 | #ti_vec = [ d.Ti2(i) for i in range (1, 4) ] 190 | #print(ti_vec) 191 | 192 | # And suppose the costs (high level to low) are 100, 20 and 3 (seconds) 193 | # By my reckoning, the optimal repetition counts should be r_1 = 5, r_2 = 2 194 | # XXX show working XXX 195 | got = [1,2].map { |i|d.optimalreps(i, [100, 20, 3]) } 196 | expect = [4.2937, 1.3023] 197 | 198 | (0...got.size).each do |i| 199 | assert_less_equal (got[i] - expect[i]).abs, 0.001 200 | end 201 | end 202 | 203 | def test_worked_example_3_level 204 | # three level experiment 205 | # This is the worked example from the paper. 206 | data = TestData.new({ 207 | [0, 0] => [9.0, 5.0], [0, 1] => [8.0, 3.0], 208 | [1, 0] => [10.0, 6.0], [1, 1] => [7.0, 11.0], 209 | [2, 0] => [1.0, 12.0], [2, 1] => [2.0, 4.0], 210 | }, [3, 2, 2]) 211 | 212 | correct = { 213 | [0, 0] => 7.0, 214 | [0, 1] => 5.5, 215 | [1, 0] => 8.0, 216 | [1, 1] => 9.0, 217 | [2, 0] => 6.5, 218 | [2, 1] => 3.0, 219 | } 220 | 221 | data.index_iterator(0, 2).each do |index| 222 | assert data.mean(index) == correct[index] 223 | end 224 | 225 | assert_equal 6.5, data.mean() 226 | 227 | assert_equal 16.5, data.Si2(1).round(1) 228 | assert_equal 2.6, data.Si2(2).round(1) 229 | assert_equal 3.6, data.Si2(3).round(1) 230 | assert_equal 16.5, data.Ti2(1).round(1) 231 | assert_equal (-5.7), data.Ti2(2).round(1) 232 | assert_equal 2.3, data.Ti2(3).round(1) 233 | end 234 | 235 | def test_worked_example_2_level 236 | data = TestData.new({ 237 | [0] => [9.0, 5.0, 8.0, 3.0], 238 | [1] => [10.0, 6.0, 7.0, 11.0], 239 | [2] => [1.0, 12.0, 2.0, 4.0], 240 | }, [3, 4]) 241 | 242 | correct = {[0] => 6.3, 243 | [1] => 8.5, 244 | [2] => 4.8, 245 | } 246 | data.index_iterator(0, 1).each do |index| 247 | assert data.mean(index).round(1) == correct[index] 248 | end 249 | 250 | assert_equal 6.5, data.mean() 251 | 252 | assert_equal 12.7, data.Si2(1).round(1) 253 | assert_equal 3.6, data.Si2(2).round(1) 254 | 255 | assert_equal 12.7, data.Ti2(1).round(1) 256 | assert_equal 0.4, data.Ti2(2).round(1) 257 | end 258 | 259 | def test_bootstrap 260 | # XXX needs info on how expected val was computed 261 | data = TestData.new({ 262 | [0] => [ 2.5, 3.1, 2.7 ], 263 | [1] => [ 5.1, 1.1, 2.3 ], 264 | [2] => [ 4.7, 5.5, 7.1 ], 265 | }, [3, 3]) 266 | data.reset_local_rand 267 | 268 | expect = 4.8111111111 269 | got = data.bootstrap_means(1) # one iteration 270 | 271 | assert_less_equal (got[0] - expect).abs, 0.0001 272 | end 273 | 274 | def test_confidence_slice_indicies 275 | assert_equal [1, [4, 5], 9], Kalibera.confidence_slice_indicies(10, '0.8') 276 | assert_equal [1, [5], 10], Kalibera.confidence_slice_indicies(11, '0.8') 277 | assert_equal [25, [499, 500], 975], Kalibera.confidence_slice_indicies(1000) 278 | end 279 | 280 | def test_confidence_slice 281 | # Suppose we get back the means: 282 | means = (0...1000).map { |x| x + 15 } # already sorted 283 | 284 | # For a data set of size 1000, we expect alpha/2 to be 25 285 | # (for a 95% confidence interval) 286 | alpha_over_two = means.size * 0.025 287 | assert_equal alpha_over_two, 25 288 | 289 | # Therefore we lose 25 items off each end of the means list. 290 | # The first 25 indicies are 0, ...0, 24, so lower bound should be index 25. 291 | # The last 25 indicies are -1, ...0, -25, so upper bound is index -26 292 | # Put differently, the last 25 indicies are 999, ...0, 975 293 | 294 | lower_index = Integer(alpha_over_two.floor) 295 | upper_index = Integer(-alpha_over_two.ceil - 1) 296 | lobo, hibo = [means[lower_index], means[upper_index]] 297 | 298 | # Since the data is the index plus 15, we should get an 299 | # interval: [25+15, 974+15] 300 | expect = [25+15, 974+15] 301 | assert_equal expect, [lobo, hibo] 302 | 303 | # There is strictly speaking no median of 1000 items. 304 | # We take the mean of the two middle items items 500 and 501 at indicies 305 | # 499 and 500. Since the data is the index + 15, the middle values are 306 | # 514 and 515, the mean of which is 514.5 307 | median = 514.5 308 | 309 | # Check the implementation. 310 | confrange = Kalibera.confidence_slice(means) 311 | got_lobo, got_median, got_hibo = confrange.values 312 | assert_equal got_lobo, confrange.lower 313 | assert_equal got_median, confrange.median 314 | assert_equal got_hibo, confrange.upper 315 | 316 | assert_equal lobo, got_lobo 317 | assert_equal hibo, got_hibo 318 | assert_equal got_median, median 319 | 320 | assert_equal Kalibera.mean([median - lobo, hibo - median]), confrange.error 321 | end 322 | 323 | def test_confidence_slice_pass_confidence_level 324 | means = (0...10).map { |x| Float(x) } 325 | low, mean, high = Kalibera.confidence_slice(means, '0.8').values 326 | assert_equal (4 + 5) / 2.0, mean 327 | assert_equal 1, low 328 | assert_equal 8, high 329 | 330 | 331 | means = (0...11).map { |x| Float(x) } 332 | low, mean, high = Kalibera.confidence_slice(means, '0.8').values 333 | assert_equal 5, mean 334 | assert_equal 1, low 335 | assert_equal 9, high 336 | end 337 | 338 | def test_confidence_quotient 339 | data1 = TestData.new({ 340 | [0] => [ 2.5, 3.1, 2.7 ], 341 | [1] => [ 5.1, 1.1, 2.3 ], 342 | [2] => [ 4.7, 5.5, 7.1 ], 343 | }, [3, 3]) 344 | data2 = TestData.new({ 345 | [0] => [ 3.5, 4.1, 3.7 ], 346 | [1] => [ 6.1, 2.1, 3.3 ], 347 | [2] => [ 5.7, 6.5, 8.1 ], 348 | }, [3, 3]) 349 | 350 | data1.reset_local_rand 351 | data2.reset_local_rand 352 | a = data1.bootstrap_sample 353 | b = data2.bootstrap_sample 354 | 355 | data1.reset_local_rand 356 | data2.reset_local_rand 357 | _, mean, _ = data1.bootstrap_quotient(data2, iterations=1) 358 | assert_equal Kalibera.mean(a) / Kalibera.mean(b), mean 359 | end 360 | 361 | def test_confidence_quotient_div_zero 362 | data1 = TestData.new({ 363 | [0] => [ 2.5, 3.1, 2.7 ], 364 | [1] => [ 5.1, 1.1, 2.3 ], 365 | [2] => [ 4.7, 5.5, 7.1 ], 366 | }, [3, 3]) 367 | data2 = TestData.new({ # This has a mean of zero 368 | [0] => [ 0, 0, 0], 369 | [1] => [ 0, 0, 0], 370 | [2] => [ 0, 0, 0], 371 | }, [3, 3]) 372 | 373 | # Since all ratios will be +inf, the median should also be +inf 374 | _, median, _ = data1.bootstrap_quotient(data2, iterations=1) 375 | assert_equal Float::INFINITY, median 376 | end 377 | 378 | def test_geomean 379 | assert_equal 1, Kalibera.geomean([10, 0.1]) 380 | assert_equal 1, Kalibera.geomean([1]) 381 | end 382 | 383 | # This requires a very large volume of random data, which we can't easily 384 | # just store in the mock random generator above. 385 | 386 | #def test_geomean_data 387 | # data1 = TestData.new({ 388 | # [0] => [ 2.9, 3.1, 3.0 ], 389 | # [1] => [ 3.1, 2.6, 3.3 ], 390 | # [2] => [ 3.2, 3.0, 2.9 ], 391 | # }, [3, 3]) 392 | # data2 = TestData.new({ 393 | # [0] => [ 3.9, 4.1, 4.0 ], 394 | # [1] => [ 4.1, 3.6, 4.3 ], 395 | # [2] => [ 4.2, 4.0, 3.9 ], 396 | # }, [3, 3]) 397 | # 398 | # _, mean1, _ = data1.bootstrap_quotient(data2) 399 | # _, mean2, _ = Kalibera.bootstrap_geomean([data1], [data2]) 400 | # assert_equal mean2.round(3), mean1.round(3) 401 | # 402 | # (_, mean, _) = Kalibera.bootstrap_geomean([data1, data2], [data2, data1]) 403 | # assert_equal 1.0, mean.round(5) 404 | #end 405 | 406 | def assert_less_equal(x, y) 407 | assert x <= y, "#{x.inspect} <= #{y.inspect}" 408 | end 409 | 410 | end 411 | -------------------------------------------------------------------------------- /shared_metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "_comment": "This file stores metadata that can be shared between package managers, e.g. pip (for Python) and gem (for Ruby).", 3 | 4 | 5 | "metadata" : { 6 | "short_descr": "An implementation of Tomas Kalibera's statistically rigorous benchmarking method.", 7 | "long_descr": "libkalibera contains reimplementations of the statistical computations for benchmarking evaluation from the following papers by Tomas Kalibera and Richard Jones: 'Rigorous benchmarking in reasonable time'; 'Quantifying performance changes with effect size confidence intervals'.", 8 | "version": 0.1, 9 | "url": "http://soft-dev.org/src/libkalibera/" 10 | } 11 | } 12 | --------------------------------------------------------------------------------