├── .gitignore ├── LICENSE ├── analysis ├── Aria Benchmark.ipynb ├── Cascades Benchmark.ipynb ├── Cheby Precision Loss.ipynb ├── Cross Benchmark.ipynb ├── DiscreteOutlier Micro Tests [Paper].ipynb ├── Druid Plot.ipynb ├── Lesion Results.ipynb ├── Log Lesion.ipynb ├── Merge Benchmark.ipynb ├── Parallel Merge Benchmark.ipynb ├── Perf Micro Plots.ipynb ├── Point Benchmark.ipynb ├── Precision Benchmark.ipynb ├── Sliding Windows Benchmark.ipynb └── SysML Paper.ipynb ├── cvx_estimator ├── cvx_estimator.py ├── cvxbench.py ├── estimator.py ├── exponential.py ├── gauss_estimator.py ├── hepmass.py ├── lesion_study.py └── milan.py ├── javamsketch ├── accBench.sh ├── confs │ ├── cross_exp.json │ ├── cross_hepmass.json │ ├── cross_milan.json │ ├── discrete_bench.json │ ├── find_exp.json │ ├── find_hepmass_d.json │ ├── find_hepmass_r.json │ ├── find_milan_ahist.json │ ├── find_milan_d.json │ ├── find_milan_r.json │ ├── gk │ │ ├── find_exp_gk.json │ │ ├── find_hepmass_gk.json │ │ ├── find_milan_gk.json │ │ ├── merge_aria_gk.json │ │ ├── merge_exp_gk.json │ │ ├── merge_expb_gk.json │ │ ├── merge_gauss_gk.json │ │ ├── merge_hepmass_gk.json │ │ ├── merge_hepmassb_gk.json │ │ ├── merge_milan_gk.json │ │ ├── merge_milanb_gk.json │ │ ├── point_exp_gk.json │ │ ├── point_hepmass_gk.json │ │ ├── point_milan_gk.json │ │ ├── point_occupancy_gk.json │ │ ├── point_power_gk.json │ │ ├── point_retail_gk.json │ │ ├── strong_scaling_hepmass_gk.json │ │ ├── strong_scaling_milan_gk.json │ │ ├── weak_scaling_hepmass_gk.json │ │ └── weak_scaling_milan_gk.json │ ├── lesion_solve.json │ ├── log_lesion.json │ ├── merge_aria.json │ ├── merge_exp.json │ ├── merge_exp_m.json │ ├── merge_expb.json │ ├── merge_gauss.json │ ├── merge_hepmass.json │ ├── merge_hepmassb.json │ ├── merge_milan.json │ ├── merge_milan_ahist.json │ ├── merge_milan_m.json │ ├── merge_milanb.json │ ├── outlier_bench.json │ ├── point_exp.json │ ├── point_exp_m.json │ ├── point_hepmass.json │ ├── point_hepmass_m.json │ ├── point_milan.json │ ├── point_milan_m.json │ ├── point_occupancy.json │ ├── point_occupancy_m.json │ ├── point_power.json │ ├── point_power_m.json │ ├── point_retail.json │ ├── point_retail_m.json │ ├── random │ │ ├── find_hepmass_r.json │ │ ├── find_milan_r.json │ │ ├── merge_exp_r.json │ │ ├── merge_gauss_r.json │ │ ├── merge_milan_r.json │ │ ├── point_exp_r.json │ │ ├── point_hepmass_r.json │ │ ├── point_milan_r.json │ │ ├── point_occupancy_r.json │ │ ├── point_power_r.json │ │ └── point_retail_r.json │ ├── sort_milan.json │ ├── sort_milan_approx.json │ ├── strong_scaling_hepmass.json │ ├── strong_scaling_milan.json │ ├── weak_scaling_hepmass.json │ └── weak_scaling_milan.json ├── discretebench.sh ├── genCP.sh ├── lesionSolve.sh ├── loglesion.sh ├── mRuntimeBench.sh ├── mergeBench.sh ├── msolver │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ └── msolver │ │ │ ├── BoundSolver.java │ │ │ ├── ChebyshevMomentSolver.java │ │ │ ├── ChebyshevMomentSolver2.java │ │ │ ├── GFunction.java │ │ │ ├── MathUtil.java │ │ │ ├── MaxEntFunction.java │ │ │ ├── MaxEntFunction2.java │ │ │ ├── MaxEntPotential.java │ │ │ ├── MaxEntPotential2.java │ │ │ ├── MnatSolver.java │ │ │ ├── SimpleBoundSolver.java │ │ │ ├── SolveBasisSelector.java │ │ │ ├── chebyshev │ │ │ ├── ChebyshevPolynomial.java │ │ │ ├── CosScaledFunction.java │ │ │ └── QuadraticCosFunction.java │ │ │ ├── data │ │ │ ├── ExponentialData.java │ │ │ ├── GaussianData.java │ │ │ ├── HepData.java │ │ │ ├── MilanData.java │ │ │ ├── MomentData.java │ │ │ ├── OccupancyData.java │ │ │ ├── RetailData.java │ │ │ ├── RetailQuantityData.java │ │ │ ├── RetailQuantityLogData.java │ │ │ ├── ShuttleData.java │ │ │ └── UniformData.java │ │ │ ├── optimizer │ │ │ ├── BFGSOptimizer.java │ │ │ ├── FunctionWithHessian.java │ │ │ ├── GenericOptimizer.java │ │ │ ├── NewtonOptimizer.java │ │ │ └── QuadraticPotential.java │ │ │ ├── struct │ │ │ └── MomentStruct.java │ │ │ └── thresholds │ │ │ ├── MarkovThreshold.java │ │ │ ├── MomentThreshold.java │ │ │ ├── RTTThreshold.java │ │ │ └── ThresholdCascade.java │ │ └── test │ │ └── java │ │ └── msolver │ │ ├── BoundSolverTest.java │ │ ├── ChebyshevMomentSolver2Test.java │ │ ├── ChebyshevMomentSolverTest.java │ │ ├── ChebyshevPolynomialTest.java │ │ ├── MathUtilTest.java │ │ ├── MaxEntFunction2Test.java │ │ ├── MaxEntFunctionTest.java │ │ ├── MaxEntPotential2Test.java │ │ ├── MaxEntPotentialTest.java │ │ ├── MnatSolverTest.java │ │ ├── SimpleBoundSolverTest.java │ │ ├── SolveBasisSelectorTest.java │ │ ├── optimizer │ │ ├── BFGSOptimizerTest.java │ │ ├── NewtonOptimizerTest.java │ │ └── QuadraticPotentialTest.java │ │ └── thresholds │ │ ├── MarkovThresholdTest.java │ │ ├── RTTThresholdTest.java │ │ └── ThresholdCascadeTest.java ├── outlierbench.sh ├── parallelMergeBench.sh ├── pom.xml ├── quantilebench │ ├── pom.xml │ └── src │ │ ├── main │ │ └── java │ │ │ ├── AccuracyBench.java │ │ │ ├── BoundRuntimeBench.java │ │ │ ├── DiscreteBench.java │ │ │ ├── LogMomentsLesion.java │ │ │ ├── MSketchBench.java │ │ │ ├── MergeBench.java │ │ │ ├── OutlierBench.java │ │ │ ├── ParallelMergeBench.java │ │ │ ├── RunConfig.java │ │ │ ├── SamplingBench.java │ │ │ ├── SolveLesionBench.java │ │ │ ├── SortBench.java │ │ │ ├── gk │ │ │ └── GKSketch.java │ │ │ ├── histogram │ │ │ ├── ApproximateHistogram.java │ │ │ └── ArrayUtils.java │ │ │ ├── io │ │ │ ├── CSVDataSource.java │ │ │ ├── CSVOutput.java │ │ │ ├── DataGrouper.java │ │ │ ├── DataSource.java │ │ │ ├── GroupedCSVDataSource.java │ │ │ ├── SeqDataGrouper.java │ │ │ └── SimpleCSVDataSource.java │ │ │ ├── sampling │ │ │ ├── PreambleUtil.java │ │ │ ├── ReservoirItemsSketch.java │ │ │ ├── ReservoirItemsUnion.java │ │ │ ├── ReservoirSize.java │ │ │ ├── SampleSubsetSummary.java │ │ │ └── SamplingUtil.java │ │ │ ├── sketches │ │ │ ├── ApproximateHistogramSketch.java │ │ │ ├── CMomentSketch.java │ │ │ ├── GKAdaptiveSketch.java │ │ │ ├── HistogramSketch.java │ │ │ ├── HybridMomentSketch.java │ │ │ ├── MomentSketch.java │ │ │ ├── QuantileSketch.java │ │ │ ├── QuantileUtil.java │ │ │ ├── RandomSketch.java │ │ │ ├── ReservoirSamplingSketch.java │ │ │ ├── SamplingSketch.java │ │ │ ├── SketchLoader.java │ │ │ ├── SparkGKSketch.java │ │ │ ├── TDigestSketch.java │ │ │ └── YahooSketch.java │ │ │ ├── tdigest │ │ │ ├── AVLGroupTree.java │ │ │ ├── AVLTreeDigest.java │ │ │ ├── AbstractTDigest.java │ │ │ ├── Centroid.java │ │ │ ├── IntAVLTree.java │ │ │ ├── MergingDigest.java │ │ │ └── TDigest.java │ │ │ └── yahoo │ │ │ ├── CompactDoublesSketch.java │ │ │ ├── DirectCompactDoublesSketch.java │ │ │ ├── DirectDoublesSketchAccessor.java │ │ │ ├── DirectUpdateDoublesSketch.java │ │ │ ├── DirectUpdateDoublesSketchR.java │ │ │ ├── DoublesArrayAccessor.java │ │ │ ├── DoublesAuxiliary.java │ │ │ ├── DoublesBufferAccessor.java │ │ │ ├── DoublesByteArrayImpl.java │ │ │ ├── DoublesMergeImpl.java │ │ │ ├── DoublesPmfCdfImpl.java │ │ │ ├── DoublesSketch.java │ │ │ ├── DoublesSketchAccessor.java │ │ │ ├── DoublesSketchBuilder.java │ │ │ ├── DoublesUnion.java │ │ │ ├── DoublesUnionBuilder.java │ │ │ ├── DoublesUnionImpl.java │ │ │ ├── DoublesUnionImplR.java │ │ │ ├── DoublesUpdateImpl.java │ │ │ ├── DoublesUtil.java │ │ │ ├── HeapCompactDoublesSketch.java │ │ │ ├── HeapDoublesSketchAccessor.java │ │ │ ├── HeapUpdateDoublesSketch.java │ │ │ ├── PreambleUtil.java │ │ │ ├── QuantilesHelper.java │ │ │ ├── UpdateDoublesSketch.java │ │ │ └── Util.java │ │ └── test │ │ ├── java │ │ ├── AccuracyBenchTest.java │ │ ├── RunConfigTest.java │ │ ├── data │ │ │ └── TestDataSource.java │ │ ├── gk │ │ │ └── GKSketchTest.java │ │ ├── histogram │ │ │ └── ApproximateHistogramTest.java │ │ ├── io │ │ │ ├── CSVDataSourceTest.java │ │ │ ├── GroupedCSVDataSourceTest.java │ │ │ └── SeqDataGrouperTest.java │ │ └── sketches │ │ │ ├── ApproximateHistogramSketchTest.java │ │ │ ├── CMomentSketchTest.java │ │ │ ├── HistogramSketchTest.java │ │ │ ├── HybridMomentSketchTest.java │ │ │ ├── MomentSketchTest.java │ │ │ ├── QuantileUtilTest.java │ │ │ ├── RandomSketchTest.java │ │ │ ├── ReservoirSamplingSketchTest.java │ │ │ ├── SamplingSketchTest.java │ │ │ ├── SparkGKSketchTest.java │ │ │ ├── TDigestSketchTest.java │ │ │ └── YahooSketchTest.java │ │ └── resources │ │ ├── acc_bench.json │ │ ├── grouped.csv │ │ ├── test.csv │ │ └── tiny_conf.json ├── readme.md └── results │ ├── cascade_milan.csv │ ├── info.txt │ ├── merge_aria.csv │ ├── precision_merge_hepmass.csv │ ├── precision_merge_milan.csv │ ├── sliding_milan.csv │ ├── strong_scaling_hepmass.csv │ ├── strong_scaling_milan.csv │ ├── weak_scaling_hepmass.csv │ └── weak_scaling_milan.csv ├── math ├── Bounds.ipynb ├── Chebyshev.ipynb ├── MomentConversion.ipynb ├── Quadrature.ipynb ├── combined.lyx └── notes.lyx ├── readme.md └── testdata └── gauss_2.csv /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | CMakeFiles/ 3 | *.cmake 4 | *.svg 5 | *.dylib 6 | mlib/main 7 | mlib/rtest 8 | javamsketch/cp.txt 9 | CMakeCache.txt 10 | *.pdf 11 | *.csv 12 | *.lyx~ 13 | mlib/Makefile 14 | mscript/results/report.html 15 | *.iml 16 | *.cbp 17 | *.json 18 | math/*.ipynb 19 | 20 | # Byte-compiled / optimized / DLL files 21 | __pycache__/ 22 | *.py[cod] 23 | *$py.class 24 | 25 | # C extensions 26 | *.so 27 | 28 | # Distribution / packaging 29 | .Python 30 | env/ 31 | build/ 32 | develop-eggs/ 33 | dist/ 34 | downloads/ 35 | eggs/ 36 | .eggs/ 37 | lib64/ 38 | parts/ 39 | sdist/ 40 | var/ 41 | wheels/ 42 | *.egg-info/ 43 | .installed.cfg 44 | *.egg 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *.cover 65 | .hypothesis/ 66 | 67 | # Translations 68 | *.mo 69 | *.pot 70 | 71 | # Django stuff: 72 | *.log 73 | local_settings.py 74 | 75 | # Flask stuff: 76 | instance/ 77 | .webassets-cache 78 | 79 | # Scrapy stuff: 80 | .scrapy 81 | 82 | # Sphinx documentation 83 | docs/_build/ 84 | 85 | # PyBuilder 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # pyenv 92 | .python-version 93 | 94 | # celery beat schedule file 95 | celerybeat-schedule 96 | 97 | # SageMath parsed files 98 | *.sage.py 99 | 100 | # dotenv 101 | .env 102 | 103 | # virtualenv 104 | .venv 105 | venv/ 106 | ENV/ 107 | 108 | # Spyder project settings 109 | .spyderproject 110 | .spyproject 111 | 112 | # Rope project settings 113 | .ropeproject 114 | 115 | # mkdocs documentation 116 | /site 117 | 118 | # mypy 119 | .mypy_cache/ 120 | -------------------------------------------------------------------------------- /cvx_estimator/cvx_estimator.py: -------------------------------------------------------------------------------- 1 | import cvxpy as cvx 2 | import numpy as np 3 | 4 | import estimator 5 | 6 | 7 | class CvxEstimator(estimator.Estimator): 8 | def __init__( 9 | self, 10 | k:int, 11 | resolution=1000, 12 | solver="maxent" 13 | ): 14 | super().__init__(k) 15 | self.resolution = resolution 16 | self.locs = np.linspace(0, 1, self.resolution) 17 | m_list = [] 18 | for i in range(k): 19 | m_list.append( 20 | self.locs**i 21 | ) 22 | self.loc_moments = np.array(m_list) 23 | self.solver = solver 24 | self.values = None 25 | 26 | def solve(self): 27 | if self.solver == "lp": 28 | xsol = np.linalg.lstsq( 29 | self.loc_moments, 30 | self.moments 31 | )[0] 32 | self.values = xsol 33 | else: 34 | # Moment values of the boundaries 35 | Xs = cvx.Variable(self.resolution) 36 | constraints = [ 37 | Xs >= 0, 38 | Xs <= 1.0, 39 | self.loc_moments * Xs == self.moments 40 | ] 41 | if self.solver == "mindensity": 42 | o = cvx.Minimize(cvx.max_entries(Xs)) 43 | else: 44 | o = cvx.Maximize(cvx.sum_entries(cvx.entr(Xs))) 45 | prob = cvx.Problem(o, constraints) 46 | sol = prob.solve(solver=cvx.ECOS) 47 | self.values = Xs.value 48 | return self.values * 1000 49 | 50 | def estimate(self, p: float): 51 | running_sum = 0 52 | excess_fraction = 0.5 53 | for i, val in enumerate(self.values): 54 | val = float(val) 55 | running_sum += val 56 | if running_sum >= p: 57 | excess_fraction = (running_sum - p) / val 58 | break 59 | 60 | best_est = (1-excess_fraction)*self.locs[i] + excess_fraction*self.locs[i+1] 61 | return best_est*(self.a_max-self.a_min) + self.a_min 62 | -------------------------------------------------------------------------------- /cvx_estimator/estimator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Estimator: 5 | def __init__(self, k): 6 | self.a_min = 0.0 7 | self.a_max = 0.0 8 | self.k = k 9 | self.moments = np.zeros(k) 10 | 11 | def set_statistics( 12 | self, 13 | a_min: float, 14 | a_max: float, 15 | moments: float, 16 | ): 17 | self.a_min = a_min 18 | self.a_max = a_max 19 | self.moments = moments 20 | 21 | def solve(self): 22 | raise NotImplemented() 23 | 24 | def estimate(self, p: float): 25 | raise NotImplemented() -------------------------------------------------------------------------------- /cvx_estimator/exponential.py: -------------------------------------------------------------------------------- 1 | data = {'logSums': [1000000.0, 2 | -578739.68503790628, 3 | 1983294.7107239107, 4 | -5476131.5807481054, 5 | 23699783.595155567, 6 | -118308097.22723247, 7 | 712931526.56406581, 8 | -4933389056.9205084, 9 | 38572700905.764816], 10 | 'powerSums': [1000000.0, 11 | 998677.78490783856, 12 | 1991896.3142772249, 13 | 5947596.181134684, 14 | 23651528.771513801, 15 | 117830113.75301118, 16 | 710308609.25475609, 17 | 5073006703.31318, 18 | 42241259442.990211], 19 | 'ranges': [5.0366845333122605e-07, 20 | 15.619130152703306, 21 | -14.501347616032602, 22 | 2.7484964548137394], 23 | 'sLogMoments': [1.0, 24 | 0.80711500195674524, 25 | 0.656974252137142, 26 | 0.53872445396440571, 27 | 0.4446626223157536, 28 | 0.36919408574024815, 29 | 0.30818132453630431, 30 | 0.25851852122086599, 31 | 0.21784462174490385], 32 | 'sMoments': [1.0, 33 | 0.063939368177348854, 34 | 0.0081649372041997393, 35 | 0.0015608844878646378, 36 | 0.00039740349129520455, 37 | 0.00012675699302677922, 38 | 4.8922189070036127e-05, 39 | 2.2370073592382382e-05, 40 | 1.1925648630801986e-05]} -------------------------------------------------------------------------------- /cvx_estimator/gauss_estimator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import scipy.stats 4 | 5 | import estimator 6 | 7 | class GaussEstimator(estimator.Estimator): 8 | def __init__(self,k): 9 | super().__init__(k) 10 | self.mu = 0 11 | self.std = 1 12 | 13 | def solve(self): 14 | self.mu = self.moments[1] 15 | self.std = math.sqrt(self.moments[2] - self.mu*self.mu) 16 | # xs = np.linspace(0,1,1000) 17 | # values = scipy.stats.norm.pdf( 18 | # xs, loc=self.mu, scale=self.std 19 | # ) 20 | # return values 21 | 22 | def estimate(self, p: float): 23 | xloc = scipy.stats.norm.ppf( 24 | p, loc=self.mu, scale=self.std 25 | ) 26 | return xloc*(self.a_max-self.a_min) + self.a_min -------------------------------------------------------------------------------- /cvx_estimator/hepmass.py: -------------------------------------------------------------------------------- 1 | data = {'logSums': [], 2 | 'powerSums': [ 3 | 10500000.0, 4 | 171590.10344086567, 5 | 10600692.901931819, 6 | 3656024.1204772266, 7 | 27999983.535580769, 8 | 26085580.4679562, 9 | 112571037.57246359, 10 | 178035035.71434641, 11 | 597052564.3216269, 12 | 1272051754.8467662, 13 | 3862170673.2579589, 14 | 9736800905.3333244, 15 | 28892120151.48772, 16 | 80400830561.547516, 17 | 241656673866.27899, 18 | 717077710760.47144, 19 | 2216395701960.5981, 20 | 6900059165428.8955, 21 | 22051018347144.91, 22 | 71474655616939.391], 23 | 'ranges': [ 24 | -1.960548996925354, 25 | 4.378281593322753, 26 | 0, 27 | 1 28 | ], 29 | 'sLogMoments': [], 30 | 'sMoments': [ 31 | 1.0, 32 | 0.31186997087129814, 33 | 0.12238242373563772, 34 | 0.055008244827096542, 35 | 0.027220887553311052, 36 | 0.014487304468093964, 37 | 0.0081629650675412203, 38 | 0.0048149951778547577, 39 | 0.0029486583576583472, 40 | 0.0018630257219128987, 41 | 0.0012086628166143047, 42 | 0.00080219348365761188, 43 | 0.00054310728720349658, 44 | 0.00037422477800937956, 45 | 0.00026195604863845853, 46 | 0.0001860103093390545, 47 | 0.00013382691163873778, 48 | 9.746061695181729e-05, 49 | 7.1788126925935228e-05, 50 | 5.3448399906880567e-05 51 | ]} -------------------------------------------------------------------------------- /cvx_estimator/lesion_study.py: -------------------------------------------------------------------------------- 1 | import milan 2 | import exponential 3 | import hepmass 4 | import estimator 5 | import cvx_estimator 6 | import gauss_estimator 7 | import time 8 | import pandas as pd 9 | import numpy 10 | import math 11 | 12 | def main(): 13 | ps = numpy.linspace(0, 1, 21) 14 | ps[0] = 0.01 15 | ps[-1] = 0.99 16 | k = 7 17 | datasets = { 18 | "milan": milan.data, 19 | "exponential": exponential.data, 20 | "hepmass": hepmass.data 21 | } 22 | isLog = { 23 | "milan": True, 24 | "exponential": False, 25 | "hepmass": False 26 | } 27 | solvers = { 28 | "lp": cvx_estimator.CvxEstimator(k,1000,solver="lp"), 29 | "maxent": cvx_estimator.CvxEstimator(k,1000,solver="maxent"), 30 | "mindensity": cvx_estimator.CvxEstimator(k,1000,solver="mindensity"), 31 | "gaussian": gauss_estimator.GaussEstimator(k), 32 | } 33 | results = [] 34 | for dname in datasets: 35 | print(dname) 36 | data = datasets[dname] 37 | distributions = {} 38 | num_trials = { 39 | "lp": 500, 40 | "maxent": 10, 41 | "mindensity": 50, 42 | "gaussian": 1000, 43 | } 44 | # num_trials = { 45 | # "lp": 10, 46 | # "maxent": 1, 47 | # "mindensity": 1, 48 | # "gaussian": 1, 49 | # } 50 | 51 | for sname in solvers: 52 | print(sname) 53 | e = solvers[sname] 54 | if isLog[dname]: 55 | e.set_statistics( 56 | data["ranges"][2], 57 | data["ranges"][3], 58 | data["sLogMoments"][:k] 59 | ) 60 | else: 61 | e.set_statistics( 62 | data["ranges"][0], 63 | data["ranges"][1], 64 | data["sMoments"][:k] 65 | ) 66 | distributions[sname] = e.solve() 67 | 68 | start_time = time.time() 69 | for i in range(num_trials[sname]): 70 | e.solve() 71 | end_time = time.time() 72 | 73 | for p in ps: 74 | q_est = e.estimate(p) 75 | if isLog[dname]: 76 | q_est = math.exp(q_est) 77 | results.append({ 78 | "dataset": dname, 79 | "size_param": k, 80 | "sketch": sname, 81 | "query_time": ((end_time-start_time)/num_trials[sname]) * 1e9, 82 | "q": "{0:.3g}".format(p), 83 | "quantile_estimate": q_est 84 | }) 85 | 86 | pd.DataFrame(results).to_csv("lesion_results.csv", index=False) 87 | 88 | # import matplotlib.pyplot as plt 89 | # import numpy as np 90 | # plt.figure() 91 | # xs = np.linspace(0, 1, 1000) 92 | # for sname in solvers: 93 | # plt.plot(xs, distributions[sname], label=sname) 94 | # plt.legend() 95 | # plt.show() 96 | 97 | 98 | if __name__ == "__main__": 99 | main() -------------------------------------------------------------------------------- /cvx_estimator/milan.py: -------------------------------------------------------------------------------- 1 | data = {'logSums': [81109777.0, 2 | 39954311.084389985, 3 | 975086507.65943003, 4 | 405866981.35082442, 5 | 22518788911.620308, 6 | -22190675853.522003, 7 | 840220861538.83716, 8 | -3008231730689.7461, 9 | 47081672847213.734, 10 | -301156754002264.44, 11 | 3606454337590192.0, 12 | -30859127373541904.0, 13 | 3.4368887948251686e+17, 14 | -3.4172450245762365e+18, 15 | 3.8032805624376271e+19, 16 | -4.1088294003414683e+20, 17 | 4.6743717140718132e+21, 18 | -5.3111237684754928e+22, 19 | 6.1969914256246635e+23, 20 | -7.2792735817422283e+24], 21 | 'powerSums': [81109777.0, 22 | 2982688169.6811647, 23 | 978439974358.73865, 24 | 871718166698519.75, 25 | 1.3802820224027354e+18, 26 | 3.1721196216284679e+21, 27 | 9.4920224710331096e+24, 28 | 3.4822245172502998e+28, 29 | 1.5113747392231059e+32, 30 | 7.5487997054766176e+35, 31 | 4.2236053442107682e+39, 32 | 2.5794131452332432e+43, 33 | 1.6810991725372064e+47, 34 | 1.1485760824967742e+51, 35 | 8.1186456852298384e+54, 36 | 5.8811422826709824e+58, 37 | 4.3371573032664479e+62, 38 | 3.2410617072895531e+66, 39 | 2.4461380713938232e+70, 40 | 1.8602581958318741e+74], 41 | 'ranges': [2.3314976995293306e-06, 42 | 7936.2653798841566, 43 | -12.968999707389781, 44 | 8.9791980884111684], 45 | 'sLogMoments': [1.0, 46 | 0.61333487750323645, 47 | 0.40063179991927766, 48 | 0.27453173426455313, 49 | 0.19502432705501629, 50 | 0.14233442579727892, 51 | 0.10602004766100606, 52 | 0.080221796729629868, 53 | 0.061462957528847437, 54 | 0.047575254996741687, 55 | 0.037147332790898123, 56 | 0.029227356882084689, 57 | 0.023154751016763224, 58 | 0.018460507742974343, 59 | 0.014805560178125263, 60 | 0.01194121507705594, 61 | 0.0096829127542519666, 62 | 0.0078923483112435695, 63 | 0.0064650172819627847, 64 | 0.0053213826228772198], 65 | 'sMoments': [1.0, 66 | 0.0046335988796396103, 67 | 0.00019152638468741077, 68 | 2.1500786230811892e-05, 69 | 4.2897293791083479e-06, 70 | 1.2422111949367304e-06, 71 | 4.6836934091972981e-07, 72 | 2.1650617554231916e-07, 73 | 1.1840485146229515e-07, 74 | 7.4517634271396796e-08, 75 | 5.2534947875489019e-08, 76 | 4.0426831294724278e-08, 77 | 3.319907214581448e-08, 78 | 2.8580919620340124e-08, 79 | 2.5455629231168828e-08, 80 | 2.323516458777822e-08, 81 | 2.1591014984505603e-08, 82 | 2.0330076044202664e-08, 83 | 1.9333767367484097e-08, 84 | 1.8526465267492987e-08]} -------------------------------------------------------------------------------- /javamsketch/accBench.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) AccuracyBench $@ 3 | -------------------------------------------------------------------------------- /javamsketch/confs/cross_exp.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "cross-exp", 3 | "fileName": "../../datasets/exponential_100M.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "cellFractions": [0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 1.0], 7 | "numTrials": 10, 8 | "numSolveTrials": 3000, 9 | "methods": { 10 | "cmoments": [11.0], 11 | "yahoo": [32.0], 12 | "random": [40.0], 13 | "spark_gk": [50.0], 14 | "gk_adaptive": [40.0] 15 | }, 16 | "quantiles": [ 17 | 0.01, 0.05, 0.1, 0.15, 18 | 0.2, 0.25 ,0.3, 0.35, 19 | 0.4, 0.45, 0.5, 0.55, 20 | 0.6, 0.65, 0.7, 0.75, 21 | 0.8, 0.85, 0.9, 0.95, 22 | 0.99 23 | ], 24 | 25 | "verbose": false, 26 | "calcError": false, 27 | "appendTimeStamp": false 28 | } -------------------------------------------------------------------------------- /javamsketch/confs/cross_hepmass.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "cross-hepmass", 3 | "fileName": "../../datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "cellFractions": [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 1.0], 7 | "numTrials": 10, 8 | "numSolveTrials": 3000, 9 | "methods": { 10 | "cmoments": [11.0], 11 | "yahoo": [32.0], 12 | "random": [40.0], 13 | "spark_gk": [50.0], 14 | "gk_adaptive": [40.0] 15 | }, 16 | "quantiles": [ 17 | 0.01, 0.05, 0.1, 0.15, 18 | 0.2, 0.25 ,0.3, 0.35, 19 | 0.4, 0.45, 0.5, 0.55, 20 | 0.6, 0.65, 0.7, 0.75, 21 | 0.8, 0.85, 0.9, 0.95, 22 | 0.99 23 | ], 24 | 25 | "verbose": false, 26 | "calcError": false, 27 | "appendTimeStamp": false 28 | } -------------------------------------------------------------------------------- /javamsketch/confs/cross_milan.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "cross-milan", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "cellFractions": [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 1.0], 7 | "numTrials": 10, 8 | "numSolveTrials": 3000, 9 | "methods": { 10 | "cmoments": [11.0], 11 | "yahoo": [32.0], 12 | "random": [40.0], 13 | "spark_gk": [50.0], 14 | "gk_adaptive": [60.0] 15 | }, 16 | "quantiles": [ 17 | 0.01, 0.05, 0.1, 0.15, 18 | 0.2, 0.25 ,0.3, 0.35, 19 | 0.4, 0.45, 0.5, 0.55, 20 | 0.6, 0.65, 0.7, 0.75, 21 | 0.8, 0.85, 0.9, 0.95, 22 | 0.99 23 | ], 24 | 25 | "verbose": false, 26 | "calcError": false, 27 | "appendTimeStamp": false 28 | } -------------------------------------------------------------------------------- /javamsketch/confs/discrete_bench.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "discrete_bench", 3 | "verbose": false, 4 | 5 | "methods": { 6 | "cmoments": [5.0, 11.0], 7 | "yahoo": [32.0], 8 | "random": [40.0], 9 | "spark_gk": [50.0] 10 | }, 11 | "numSolveTrials": 1, 12 | "totalSize": 100000, 13 | "cardinalities": [6, 10, 30, 100, 300, 1000], 14 | "quantiles": [ 15 | 0.01, 0.05, 0.1, 0.15, 16 | 0.2, 0.25 ,0.3, 0.35, 17 | 0.4, 0.45, 0.5, 0.55, 18 | 0.6, 0.65, 0.7, 0.75, 19 | 0.8, 0.85, 0.9, 0.95, 20 | 0.99] 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/find_exp.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "find_exp", 3 | "fileName": "../../datasets/exponential_10M.csv", 4 | "columnIdx": 0, 5 | "numTrials": 10, 6 | "numSolveTrials": 1, 7 | "methods": { 8 | "tdigest": [1.0, 1.5, 2.0, 2.5, 3.0, 4.0], 9 | "yahoo": [8.0, 16.0, 32.0], 10 | "sampling": [100.0, 250.0, 400.0], 11 | "spark_gk": [20.0, 30.0, 40.0, 50.0], 12 | "approx_histogram": [100.0, 200.0, 400.0, 1000.0], 13 | "histogram": [20.0, 30.0, 40.0, 50.0, 60.0] 14 | }, 15 | "quantiles": [ 16 | 0.01, 0.05, 0.1, 0.15, 17 | 0.2, 0.25 ,0.3, 0.35, 18 | 0.4, 0.45, 0.5, 0.55, 19 | 0.6, 0.65, 0.7, 0.75, 20 | 0.8, 0.85, 0.9, 0.95, 21 | 0.99], 22 | 23 | "verbose": false, 24 | "calcError": true, 25 | "appendTimeStamp": false 26 | } -------------------------------------------------------------------------------- /javamsketch/confs/find_hepmass_d.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "find_hepmass_d", 3 | "fileName": "../../datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "numTrials": 1, 6 | "numSolveTrials": 1, 7 | "methods": { 8 | "cmoments": [2.0, 3.0, 9.0, 13.0], 9 | "approx_histogram": [20.0, 40.0, 80.0, 100.0], 10 | "tdigest": [1.0, 1.3, 1.5, 1.7, 2.0], 11 | "histogram": [5.0, 7.0, 10.0, 15.0, 20.0], 12 | "spark_gk": [20.0, 30.0, 40.0, 50.0] 13 | }, 14 | "quantiles": [ 15 | 0.01, 0.05, 0.1, 0.15, 16 | 0.2, 0.25 ,0.3, 0.35, 17 | 0.4, 0.45, 0.5, 0.55, 18 | 0.6, 0.65, 0.7, 0.75, 19 | 0.8, 0.85, 0.9, 0.95, 20 | 0.99], 21 | 22 | "verbose": false, 23 | "calcError": true, 24 | "appendTimeStamp": false 25 | } -------------------------------------------------------------------------------- /javamsketch/confs/find_hepmass_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "find_hepmass_r", 3 | "fileName": "../../datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "numTrials": 20, 6 | "numSolveTrials": 1, 7 | "methods": { 8 | "sampling": [500.0, 800.0, 1000.0], 9 | "yahoo": [8.0, 16.0, 32.0] 10 | }, 11 | "quantiles": [ 12 | 0.01, 0.05, 0.1, 0.15, 13 | 0.2, 0.25 ,0.3, 0.35, 14 | 0.4, 0.45, 0.5, 0.55, 15 | 0.6, 0.65, 0.7, 0.75, 16 | 0.8, 0.85, 0.9, 0.95, 17 | 0.99], 18 | 19 | "verbose": false, 20 | "calcError": true, 21 | "appendTimeStamp": false 22 | } -------------------------------------------------------------------------------- /javamsketch/confs/find_milan_ahist.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "find_milan", 3 | "fileName": "../../datasets/internet-mi-2013-11-01_09.csv", 4 | "columnIdx": 0, 5 | "numTrials": 1, 6 | "numSolveTrials": 1, 7 | "methods": { 8 | "approx_histogram": [10000.0, 20000.0, 40000.0], 9 | "histogram": [10000.0, 20000.0, 40000.0] 10 | }, 11 | "quantiles": [ 12 | 0.01, 0.05, 0.1, 0.15, 13 | 0.2, 0.25 ,0.3, 0.35, 14 | 0.4, 0.45, 0.5, 0.55, 15 | 0.6, 0.65, 0.7, 0.75, 16 | 0.8, 0.85, 0.9, 0.95, 17 | 0.99], 18 | 19 | "verbose": false, 20 | "calcError": true, 21 | "appendTimeStamp": false 22 | } -------------------------------------------------------------------------------- /javamsketch/confs/find_milan_d.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "find_milan_d", 3 | "fileName": "../../datasets/internet-mi-2013-11-01_09.csv", 4 | "columnIdx": 0, 5 | "numTrials": 1, 6 | "numSolveTrials": 1, 7 | "methods": { 8 | "approx_histogram": [50.0, 100.0, 200.0], 9 | "histogram": [50.0, 100.0, 200.0], 10 | "tdigest": [4.0, 5.0, 6.0, 7.0, 8.0], 11 | "spark_gk": [20.0, 30.0, 40.0, 50.0], 12 | "cmoments": [8.0, 9.0, 10.0] 13 | }, 14 | "quantiles": [ 15 | 0.01, 0.05, 0.1, 0.15, 16 | 0.2, 0.25 ,0.3, 0.35, 17 | 0.4, 0.45, 0.5, 0.55, 18 | 0.6, 0.65, 0.7, 0.75, 19 | 0.8, 0.85, 0.9, 0.95, 20 | 0.99], 21 | 22 | "verbose": false, 23 | "calcError": true, 24 | "appendTimeStamp": false 25 | } -------------------------------------------------------------------------------- /javamsketch/confs/find_milan_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "find_milan_r", 3 | "fileName": "../../datasets/internet-mi-2013-11-01_09.csv", 4 | "columnIdx": 0, 5 | "numTrials": 10, 6 | "numSolveTrials": 1, 7 | "methods": { 8 | "yahoo": [8.0, 16.0, 32.0], 9 | "sampling": [800.0, 1000.0, 1300.0, 1600.0, 2000.0] 10 | }, 11 | "quantiles": [ 12 | 0.01, 0.05, 0.1, 0.15, 13 | 0.2, 0.25 ,0.3, 0.35, 14 | 0.4, 0.45, 0.5, 0.55, 15 | 0.6, 0.65, 0.7, 0.75, 16 | 0.8, 0.85, 0.9, 0.95, 17 | 0.99], 18 | 19 | "verbose": false, 20 | "calcError": true, 21 | "appendTimeStamp": false 22 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/find_exp_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "find_exp_gk", 3 | "fileName": "../../datasets/exponential_10M.csv", 4 | "columnIdx": 0, 5 | "numTrials": 2, 6 | "numSolveTrials": 1, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 50.0, 100.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/find_hepmass_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "find_hepmass_gk", 3 | "fileName": "../../datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "numTrials": 2, 6 | "numSolveTrials": 1, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 50.0, 100.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/find_milan_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "find_milan_gk", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "numTrials": 2, 6 | "numSolveTrials": 1, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/merge_aria_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_aria_gk", 3 | "fileName": "/lfs/1/jding/datasets/aria-grouped.csv", 4 | "pregrouped": true, 5 | "numTrials": 5, 6 | "methods": { 7 | "gk_adaptive": [5.0, 10.0, 20.0, 50.0, 100.0] 8 | }, 9 | "quantiles": [ 10 | 0.01, 0.05, 0.1, 0.15, 11 | 0.2, 0.25 ,0.3, 0.35, 12 | 0.4, 0.45, 0.5, 0.55, 13 | 0.6, 0.65, 0.7, 0.75, 14 | 0.8, 0.85, 0.9, 0.95, 15 | 0.99], 16 | 17 | "verbose": false, 18 | "calcError": false, 19 | "appendTimeStamp": false 20 | } 21 | -------------------------------------------------------------------------------- /javamsketch/confs/gk/merge_exp_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_exp_gk", 3 | "fileName": "../../datasets/exponential_100M.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 10, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/merge_expb_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_expb_gk", 3 | "fileName": "../../datasets/exponential_100M.csv", 4 | "columnIdx": 0, 5 | "cellSize": 2000, 6 | "numTrials": 20, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/merge_gauss_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_gauss_gk", 3 | "fileName": "gauss", 4 | "columnIdx": 0, 5 | "cellSize": 10000, 6 | "numTrials": 10, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/merge_hepmass_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_hepmass_gk", 3 | "fileName": "../../datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 10, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/merge_hepmassb_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_hepmassb_gk", 3 | "fileName": "../../datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "cellSize": 2000, 6 | "numTrials": 20, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/merge_milan_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_milan_gk", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 10, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/merge_milanb_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_milanb_gk", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "cellSize": 2000, 6 | "numTrials": 20, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/point_exp_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_exp_gk", 3 | "fileName": "../../datasets/exponential_10M.csv", 4 | "columnIdx": 0, 5 | "numTrials": 5, 6 | "numSolveTrials": 300, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/point_hepmass_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_hepmass_gk", 3 | "fileName": "../../datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "numTrials": 5, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/point_milan_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_milan_gk", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "numTrials": 5, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/point_occupancy_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_occupancy_gk", 3 | "fileName": "../../datasets/occupancy_2.csv", 4 | "columnIdx": 3, 5 | "numTrials": 5, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/point_power_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_power_gk", 3 | "fileName": "../../datasets/household_power.csv", 4 | "columnIdx": 2, 5 | "numTrials": 5, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/point_retail_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_retail_gk", 3 | "fileName": "../../datasets/retail_quantity_pos.csv", 4 | "columnIdx": 0, 5 | "numTrials": 5, 6 | "numSolveTrials": 300, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/gk/strong_scaling_hepmass_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "strong_scaling_hepmass_gk", 3 | "fileName": "/lfs/1/egan1/datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 20, 7 | "methods": { 8 | "gk_adaptive": [40.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | "numMergeThreads": [ 18 | 1, 2, 4, 8, 12, 16, 24, 32], 19 | "numDuplications": 8, 20 | 21 | "verbose": false, 22 | "calcError": false, 23 | "appendTimeStamp": false 24 | } 25 | -------------------------------------------------------------------------------- /javamsketch/confs/gk/strong_scaling_milan_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "strong_scaling_milan_gk", 3 | "fileName": "/lfs/1/egan1/datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 10, 7 | "methods": { 8 | "gk_adaptive": [60.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | "numMergeThreads": [ 18 | 1, 2, 4, 8, 12, 16, 24, 32], 19 | "numDuplications": 1, 20 | 21 | "verbose": false, 22 | "calcError": false, 23 | "appendTimeStamp": false 24 | } 25 | -------------------------------------------------------------------------------- /javamsketch/confs/gk/weak_scaling_hepmass_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "weak_scaling_hepmass_gk", 3 | "fileName": "/lfs/1/egan1/datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 20, 7 | "methods": { 8 | "gk_adaptive": [40.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | "numMergeThreads": [ 18 | 1, 2, 4, 8, 12, 16, 24, 32], 19 | "numDuplications": 8, 20 | "weakScaling": true, 21 | 22 | "verbose": false, 23 | "calcError": false, 24 | "appendTimeStamp": false 25 | } 26 | -------------------------------------------------------------------------------- /javamsketch/confs/gk/weak_scaling_milan_gk.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "weak_scaling_milan_gk", 3 | "fileName": "/lfs/1/egan1/datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 5, 7 | "methods": { 8 | "gk_adaptive": [60.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | "numMergeThreads": [ 18 | 1, 2, 4, 8, 12, 16, 24, 32], 19 | "numDuplications": 1, 20 | "weakScaling": true, 21 | 22 | "verbose": false, 23 | "calcError": false, 24 | "appendTimeStamp": false 25 | } 26 | -------------------------------------------------------------------------------- /javamsketch/confs/lesion_solve.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "lesion_solve", 3 | "k": 11, 4 | "verbose": false 5 | } -------------------------------------------------------------------------------- /javamsketch/confs/log_lesion.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "log_lesion", 3 | "ks": [3, 5, 7, 9, 11, 13, 15], 4 | "numSolveTrials": 2, 5 | "verbose": false 6 | } -------------------------------------------------------------------------------- /javamsketch/confs/merge_aria.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_aria", 3 | "fileName": "/lfs/1/jding/datasets/aria-grouped.csv", 4 | "pregrouped": true, 5 | "numTrials": 5, 6 | "methods": { 7 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0], 8 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0], 9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 12 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0], 13 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0], 14 | "random": [5.0, 10.0, 20.0, 40.0, 100.0] 15 | }, 16 | "quantiles": [ 17 | 0.01, 0.05, 0.1, 0.15, 18 | 0.2, 0.25 ,0.3, 0.35, 19 | 0.4, 0.45, 0.5, 0.55, 20 | 0.6, 0.65, 0.7, 0.75, 21 | 0.8, 0.85, 0.9, 0.95, 22 | 0.99], 23 | 24 | "verbose": false, 25 | "calcError": false, 26 | "appendTimeStamp": true 27 | } 28 | -------------------------------------------------------------------------------- /javamsketch/confs/merge_exp.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_exp", 3 | "fileName": "../../datasets/exponential_100M.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 10, 7 | "methods": { 8 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0], 9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 12 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0], 13 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0] 14 | }, 15 | "quantiles": [ 16 | 0.01, 0.05, 0.1, 0.15, 17 | 0.2, 0.25 ,0.3, 0.35, 18 | 0.4, 0.45, 0.5, 0.55, 19 | 0.6, 0.65, 0.7, 0.75, 20 | 0.8, 0.85, 0.9, 0.95, 21 | 0.99], 22 | 23 | "verbose": false, 24 | "calcError": false, 25 | "appendTimeStamp": false 26 | } -------------------------------------------------------------------------------- /javamsketch/confs/merge_exp_m.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_exp_m", 3 | "fileName": "../../datasets/exponential_100M.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 10, 7 | "methods": { 8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/merge_expb.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_expb", 3 | "fileName": "../../datasets/exponential_100M.csv", 4 | "columnIdx": 0, 5 | "cellSize": 2000, 6 | "numTrials": 20, 7 | "methods": { 8 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0], 9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 10 | "random": [5.0, 10.0, 20.0, 40.0, 100.0], 11 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 12 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 13 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0], 14 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0], 15 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0] 16 | }, 17 | "quantiles": [ 18 | 0.01, 0.05, 0.1, 0.15, 19 | 0.2, 0.25 ,0.3, 0.35, 20 | 0.4, 0.45, 0.5, 0.55, 21 | 0.6, 0.65, 0.7, 0.75, 22 | 0.8, 0.85, 0.9, 0.95, 23 | 0.99], 24 | 25 | "verbose": false, 26 | "calcError": false, 27 | "appendTimeStamp": false 28 | } -------------------------------------------------------------------------------- /javamsketch/confs/merge_gauss.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_gauss", 3 | "fileName": "gauss", 4 | "columnIdx": 0, 5 | "cellSize": 10000, 6 | "numTrials": 5, 7 | "methods": { 8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0], 9 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0], 10 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 11 | "random": [5.0, 10.0, 20.0, 40.0, 100.0], 12 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 13 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 14 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0], 15 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0] 16 | }, 17 | "quantiles": [ 18 | 0.01, 0.05, 0.1, 0.15, 19 | 0.2, 0.25 ,0.3, 0.35, 20 | 0.4, 0.45, 0.5, 0.55, 21 | 0.6, 0.65, 0.7, 0.75, 22 | 0.8, 0.85, 0.9, 0.95, 23 | 0.99], 24 | 25 | "verbose": false, 26 | "calcError": false, 27 | "appendTimeStamp": false 28 | } -------------------------------------------------------------------------------- /javamsketch/confs/merge_hepmass.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_hepmass", 3 | "fileName": "../../datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 5, 7 | "methods": { 8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0], 9 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0], 10 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 11 | "random": [5.0, 10.0, 20.0, 40.0, 100.0], 12 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 13 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 14 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0], 15 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0] 16 | }, 17 | "quantiles": [ 18 | 0.01, 0.05, 0.1, 0.15, 19 | 0.2, 0.25 ,0.3, 0.35, 20 | 0.4, 0.45, 0.5, 0.55, 21 | 0.6, 0.65, 0.7, 0.75, 22 | 0.8, 0.85, 0.9, 0.95, 23 | 0.99], 24 | 25 | "verbose": false, 26 | "calcError": false, 27 | "appendTimeStamp": false 28 | } -------------------------------------------------------------------------------- /javamsketch/confs/merge_hepmassb.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_hepmassb", 3 | "fileName": "../../datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "cellSize": 2000, 6 | "numTrials": 20, 7 | "methods": { 8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0], 9 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0], 10 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 11 | "random": [5.0, 10.0, 20.0, 40.0, 100.0], 12 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 13 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 14 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0], 15 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0] 16 | }, 17 | "quantiles": [ 18 | 0.01, 0.05, 0.1, 0.15, 19 | 0.2, 0.25 ,0.3, 0.35, 20 | 0.4, 0.45, 0.5, 0.55, 21 | 0.6, 0.65, 0.7, 0.75, 22 | 0.8, 0.85, 0.9, 0.95, 23 | 0.99], 24 | 25 | "verbose": false, 26 | "calcError": false, 27 | "appendTimeStamp": false 28 | } -------------------------------------------------------------------------------- /javamsketch/confs/merge_milan.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_milan", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 5, 7 | "methods": { 8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0], 9 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0], 10 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 11 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 12 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 13 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0], 14 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0] 15 | }, 16 | "quantiles": [ 17 | 0.01, 0.05, 0.1, 0.15, 18 | 0.2, 0.25 ,0.3, 0.35, 19 | 0.4, 0.45, 0.5, 0.55, 20 | 0.6, 0.65, 0.7, 0.75, 21 | 0.8, 0.85, 0.9, 0.95, 22 | 0.99], 23 | 24 | "verbose": false, 25 | "calcError": false, 26 | "appendTimeStamp": false 27 | } -------------------------------------------------------------------------------- /javamsketch/confs/merge_milan_ahist.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_milan_ahist", 3 | "fileName": "../../datasets/internet-mi-2013-11-01_09.csv", 4 | "columnIdx": 0, 5 | "cellSize": 100, 6 | "numTrials": 10, 7 | "methods": { 8 | "approx_histogram": [50.0, 100.0, 200.0, 500.0, 1000.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/merge_milan_m.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_milan_m", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 10, 7 | "methods": { 8 | "cmoments": [3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/merge_milanb.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_milanb", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "cellSize": 2000, 6 | "numTrials": 20, 7 | "methods": { 8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0], 9 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0], 10 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 11 | "random": [5.0, 10.0, 20.0, 40.0, 100.0], 12 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 13 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 14 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0], 15 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0] 16 | }, 17 | "quantiles": [ 18 | 0.01, 0.05, 0.1, 0.15, 19 | 0.2, 0.25 ,0.3, 0.35, 20 | 0.4, 0.45, 0.5, 0.55, 21 | 0.6, 0.65, 0.7, 0.75, 22 | 0.8, 0.85, 0.9, 0.95, 23 | 0.99], 24 | 25 | "verbose": false, 26 | "calcError": false, 27 | "appendTimeStamp": false 28 | } -------------------------------------------------------------------------------- /javamsketch/confs/outlier_bench.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "outlier_bench", 3 | "fileName": "../../datasets/gauss_10M.csv", 4 | "columnIdx": 0, 5 | "verbose": true, 6 | 7 | "methods": { 8 | "cmoments": [11.0], 9 | "histogram": [20.0, 100.0], 10 | "yahoo": [32.0], 11 | "random": [40.0], 12 | "spark_gk": [50.0] 13 | }, 14 | "numSolveTrials": 1, 15 | "distances": [5.0, 10.0, 50.0, 100.0, 500.0, 1000.0], 16 | "fractions": [0.01, 0.01, 0.01, 0.01, 0.01, 0.01], 17 | "scaleFactor": 0.1, 18 | "quantiles": [ 19 | 0.01, 0.05, 0.1, 0.15, 20 | 0.2, 0.25 ,0.3, 0.35, 21 | 0.4, 0.45, 0.5, 0.55, 22 | 0.6, 0.65, 0.7, 0.75, 23 | 0.8, 0.85, 0.9, 0.95, 24 | 0.99] 25 | } -------------------------------------------------------------------------------- /javamsketch/confs/point_exp.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_exp", 3 | "fileName": "../../datasets/exponential_10M.csv", 4 | "columnIdx": 0, 5 | "numTrials": 10, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "tdigest": [1.0, 1.5, 2.0, 4.0, 8.0, 16.0], 9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 12 | "approx_histogram": [50.0, 100.0, 200.0, 500.0, 1000.0], 13 | "histogram": [10.0, 20.0, 40.0, 100.0, 200.0] 14 | }, 15 | "quantiles": [ 16 | 0.01, 0.05, 0.1, 0.15, 17 | 0.2, 0.25 ,0.3, 0.35, 18 | 0.4, 0.45, 0.5, 0.55, 19 | 0.6, 0.65, 0.7, 0.75, 20 | 0.8, 0.85, 0.9, 0.95, 21 | 0.99], 22 | 23 | "verbose": false, 24 | "calcError": true, 25 | "appendTimeStamp": false 26 | } -------------------------------------------------------------------------------- /javamsketch/confs/point_exp_m.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_exp_m", 3 | "fileName": "../../datasets/exponential_10M.csv", 4 | "columnIdx": 0, 5 | "numTrials": 5, 6 | "numSolveTrials": 300, 7 | "methods": { 8 | "cmoments": [3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 15.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/point_hepmass.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_hepmass", 3 | "fileName": "../../datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "numTrials": 10, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0], 9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 12 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0], 13 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0] 14 | }, 15 | "quantiles": [ 16 | 0.01, 0.05, 0.1, 0.15, 17 | 0.2, 0.25 ,0.3, 0.35, 18 | 0.4, 0.45, 0.5, 0.55, 19 | 0.6, 0.65, 0.7, 0.75, 20 | 0.8, 0.85, 0.9, 0.95, 21 | 0.99], 22 | 23 | "verbose": false, 24 | "calcError": true, 25 | "appendTimeStamp": false 26 | } -------------------------------------------------------------------------------- /javamsketch/confs/point_hepmass_m.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_hepmass_m", 3 | "fileName": "../../datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "numTrials": 5, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/point_milan.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_milan", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "numTrials": 10, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0], 9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 12 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0], 13 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0] 14 | }, 15 | "quantiles": [ 16 | 0.01, 0.05, 0.1, 0.15, 17 | 0.2, 0.25 ,0.3, 0.35, 18 | 0.4, 0.45, 0.5, 0.55, 19 | 0.6, 0.65, 0.7, 0.75, 20 | 0.8, 0.85, 0.9, 0.95, 21 | 0.99], 22 | 23 | "verbose": false, 24 | "calcError": true, 25 | "appendTimeStamp": false 26 | } -------------------------------------------------------------------------------- /javamsketch/confs/point_milan_m.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_milan_m", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "numTrials": 5, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/point_occupancy.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_occupancy", 3 | "fileName": "../../datasets/occupancy_2.csv", 4 | "columnIdx": 3, 5 | "numTrials": 10, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "tdigest": [1.0, 1.5, 2.0, 4.0, 8.0, 16.0], 9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 12 | "approx_histogram": [50.0, 100.0, 200.0, 500.0, 1000.0], 13 | "histogram": [10.0, 20.0, 40.0, 100.0, 200.0] 14 | }, 15 | "quantiles": [ 16 | 0.01, 0.05, 0.1, 0.15, 17 | 0.2, 0.25 ,0.3, 0.35, 18 | 0.4, 0.45, 0.5, 0.55, 19 | 0.6, 0.65, 0.7, 0.75, 20 | 0.8, 0.85, 0.9, 0.95, 21 | 0.99], 22 | 23 | "verbose": false, 24 | "calcError": true, 25 | "appendTimeStamp": false 26 | } -------------------------------------------------------------------------------- /javamsketch/confs/point_occupancy_m.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_occupancy_m", 3 | "fileName": "../../datasets/occupancy_2.csv", 4 | "columnIdx": 3, 5 | "numTrials": 5, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "cmoments": [3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 15.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/point_power.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_power", 3 | "fileName": "../../datasets/household_power.csv", 4 | "columnIdx": 2, 5 | "numTrials": 10, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "tdigest": [1.0, 1.5, 2.0, 4.0, 8.0, 16.0], 9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 12 | "approx_histogram": [50.0, 100.0, 200.0, 500.0, 1000.0], 13 | "histogram": [10.0, 20.0, 40.0, 100.0, 200.0] 14 | }, 15 | "quantiles": [ 16 | 0.01, 0.05, 0.1, 0.15, 17 | 0.2, 0.25 ,0.3, 0.35, 18 | 0.4, 0.45, 0.5, 0.55, 19 | 0.6, 0.65, 0.7, 0.75, 20 | 0.8, 0.85, 0.9, 0.95, 21 | 0.99], 22 | 23 | "verbose": false, 24 | "calcError": true, 25 | "appendTimeStamp": false 26 | } -------------------------------------------------------------------------------- /javamsketch/confs/point_power_m.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_power_m", 3 | "fileName": "../../datasets/household_power.csv", 4 | "columnIdx": 2, 5 | "numTrials": 5, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "cmoments": [3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 15.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/point_retail.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_retail", 3 | "fileName": "../../datasets/retail_quantity_pos.csv", 4 | "columnIdx": 0, 5 | "numTrials": 10, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "tdigest": [1.0, 1.5, 2.0, 4.0, 8.0, 16.0], 9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0], 10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0], 11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0], 12 | "approx_histogram": [50.0, 100.0, 200.0, 500.0, 1000.0], 13 | "histogram": [10.0, 20.0, 40.0, 100.0, 200.0] 14 | }, 15 | "quantiles": [ 16 | 0.01, 0.05, 0.1, 0.15, 17 | 0.2, 0.25 ,0.3, 0.35, 18 | 0.4, 0.45, 0.5, 0.55, 19 | 0.6, 0.65, 0.7, 0.75, 20 | 0.8, 0.85, 0.9, 0.95, 21 | 0.99], 22 | 23 | "verbose": false, 24 | "calcError": true, 25 | "appendTimeStamp": false 26 | } -------------------------------------------------------------------------------- /javamsketch/confs/point_retail_m.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_retail_m", 3 | "fileName": "../../datasets/retail_quantity_pos.csv", 4 | "columnIdx": 0, 5 | "numTrials": 5, 6 | "numSolveTrials": 300, 7 | "methods": { 8 | "cmoments": [3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/random/find_hepmass_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "find_hepmass_rand", 3 | "fileName": "../../datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "numTrials": 1, 6 | "numSolveTrials": 1, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 50.0, 100.0], 9 | "random": [5.0, 10.0, 20.0, 40.0, 100.0] 10 | }, 11 | "quantiles": [ 12 | 0.01, 0.05, 0.1, 0.15, 13 | 0.2, 0.25 ,0.3, 0.35, 14 | 0.4, 0.45, 0.5, 0.55, 15 | 0.6, 0.65, 0.7, 0.75, 16 | 0.8, 0.85, 0.9, 0.95, 17 | 0.99], 18 | 19 | "verbose": false, 20 | "calcError": true, 21 | "appendTimeStamp": false 22 | } -------------------------------------------------------------------------------- /javamsketch/confs/random/find_milan_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "find_milan_rand", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "numTrials": 10, 6 | "numSolveTrials": 1, 7 | "methods": { 8 | "gk_adaptive": [5.0, 10.0, 20.0, 50.0, 100.0], 9 | "random": [5.0, 10.0, 20.0, 40.0, 100.0] 10 | }, 11 | "quantiles": [ 12 | 0.01, 0.05, 0.1, 0.15, 13 | 0.2, 0.25 ,0.3, 0.35, 14 | 0.4, 0.45, 0.5, 0.55, 15 | 0.6, 0.65, 0.7, 0.75, 16 | 0.8, 0.85, 0.9, 0.95, 17 | 0.99], 18 | 19 | "verbose": false, 20 | "calcError": true, 21 | "appendTimeStamp": false 22 | } -------------------------------------------------------------------------------- /javamsketch/confs/random/merge_exp_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_exp_r", 3 | "fileName": "../../datasets/exponential_100M.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 10, 7 | "methods": { 8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/random/merge_gauss_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_gauss_r", 3 | "fileName": "gauss", 4 | "columnIdx": 0, 5 | "cellSize": 10000, 6 | "numTrials": 5, 7 | "methods": { 8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/random/merge_milan_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "merge_milan_r", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 10, 7 | "methods": { 8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/random/point_exp_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_exp_r", 3 | "fileName": "../../datasets/exponential_10M.csv", 4 | "columnIdx": 0, 5 | "numTrials": 5, 6 | "numSolveTrials": 300, 7 | "methods": { 8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/random/point_hepmass_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_hepmass_r", 3 | "fileName": "../../datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "numTrials": 5, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/random/point_milan_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_milan_r", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "numTrials": 5, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/random/point_occupancy_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_occupancy_r", 3 | "fileName": "../../datasets/occupancy_2.csv", 4 | "columnIdx": 3, 5 | "numTrials": 5, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": false, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/random/point_power_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_power_r", 3 | "fileName": "../../datasets/household_power.csv", 4 | "columnIdx": 2, 5 | "numTrials": 5, 6 | "numSolveTrials": 1000, 7 | "methods": { 8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/random/point_retail_r.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "point_retail_r", 3 | "fileName": "../../datasets/retail_quantity_pos.csv", 4 | "columnIdx": 0, 5 | "numTrials": 5, 6 | "numSolveTrials": 300, 7 | "methods": { 8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0] 9 | }, 10 | "quantiles": [ 11 | 0.01, 0.05, 0.1, 0.15, 12 | 0.2, 0.25 ,0.3, 0.35, 13 | 0.4, 0.45, 0.5, 0.55, 14 | 0.6, 0.65, 0.7, 0.75, 15 | 0.8, 0.85, 0.9, 0.95, 16 | 0.99], 17 | 18 | "verbose": false, 19 | "calcError": true, 20 | "appendTimeStamp": false 21 | } -------------------------------------------------------------------------------- /javamsketch/confs/sort_milan.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "sort_milan", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "numTrials": 10, 6 | "methods": ["sort", "select", "yahoo", "random"], 7 | "sizeParams": [1.0, 1.0, 32.0, 40.0], 8 | 9 | "verbose": true 10 | } -------------------------------------------------------------------------------- /javamsketch/confs/sort_milan_approx.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "sort_milan_approx", 3 | "fileName": "../../datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "numTrials": 10, 6 | "methods": ["yahoo", "random"], 7 | "sizeParams": [32.0, 40.0], 8 | 9 | "verbose": true 10 | } -------------------------------------------------------------------------------- /javamsketch/confs/strong_scaling_hepmass.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "strong_scaling_hepmass", 3 | "fileName": "/lfs/1/egan1/datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 20, 7 | "methods": { 8 | "cmoments": [4.0], 9 | "tdigest": [1.5], 10 | "yahoo": [32.0], 11 | "sampling": [1000.0], 12 | "spark_gk": [50.0], 13 | "histogram": [15.0], 14 | "approx_histogram": [100.0], 15 | "random": [40.0] 16 | }, 17 | "quantiles": [ 18 | 0.01, 0.05, 0.1, 0.15, 19 | 0.2, 0.25 ,0.3, 0.35, 20 | 0.4, 0.45, 0.5, 0.55, 21 | 0.6, 0.65, 0.7, 0.75, 22 | 0.8, 0.85, 0.9, 0.95, 23 | 0.99], 24 | "numMergeThreads": [ 25 | 1, 2, 4, 8, 12, 16, 24, 32], 26 | "numDuplications": 8, 27 | 28 | "verbose": false, 29 | "calcError": false, 30 | "appendTimeStamp": true 31 | } 32 | -------------------------------------------------------------------------------- /javamsketch/confs/strong_scaling_milan.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "strong_scaling_milan", 3 | "fileName": "/lfs/1/egan1/datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 10, 7 | "methods": { 8 | "cmoments": [11.0], 9 | "tdigest": [5.0], 10 | "yahoo": [32.0], 11 | "sampling": [1000.0], 12 | "spark_gk": [50.0], 13 | "histogram": [100.0], 14 | "approx_histogram": [100.0], 15 | "random": [40.0] 16 | }, 17 | "quantiles": [ 18 | 0.01, 0.05, 0.1, 0.15, 19 | 0.2, 0.25 ,0.3, 0.35, 20 | 0.4, 0.45, 0.5, 0.55, 21 | 0.6, 0.65, 0.7, 0.75, 22 | 0.8, 0.85, 0.9, 0.95, 23 | 0.99], 24 | "numMergeThreads": [ 25 | 1, 2, 4, 8, 12, 16, 24, 32], 26 | "numDuplications": 1, 27 | 28 | "verbose": false, 29 | "calcError": false, 30 | "appendTimeStamp": true 31 | } 32 | -------------------------------------------------------------------------------- /javamsketch/confs/weak_scaling_hepmass.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "weak_scaling_hepmass", 3 | "fileName": "/lfs/1/egan1/datasets/hepmass_l.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 20, 7 | "methods": { 8 | "cmoments": [4.0], 9 | "tdigest": [1.5], 10 | "yahoo": [32.0], 11 | "sampling": [1000.0], 12 | "spark_gk": [50.0], 13 | "histogram": [15.0], 14 | "approx_histogram": [100.0], 15 | "random": [40.0] 16 | }, 17 | "quantiles": [ 18 | 0.01, 0.05, 0.1, 0.15, 19 | 0.2, 0.25 ,0.3, 0.35, 20 | 0.4, 0.45, 0.5, 0.55, 21 | 0.6, 0.65, 0.7, 0.75, 22 | 0.8, 0.85, 0.9, 0.95, 23 | 0.99], 24 | "numMergeThreads": [ 25 | 1, 2, 4, 8, 12, 16, 24, 32], 26 | "numDuplications": 8, 27 | "weakScaling": true, 28 | 29 | "verbose": false, 30 | "calcError": false, 31 | "appendTimeStamp": true 32 | } 33 | -------------------------------------------------------------------------------- /javamsketch/confs/weak_scaling_milan.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "weak_scaling_milan", 3 | "fileName": "/lfs/1/egan1/datasets/internet-mi-2013-11.csv", 4 | "columnIdx": 0, 5 | "cellSize": 200, 6 | "numTrials": 5, 7 | "methods": { 8 | "cmoments": [11.0], 9 | "tdigest": [5.0], 10 | "yahoo": [32.0], 11 | "sampling": [1000.0], 12 | "spark_gk": [50.0], 13 | "histogram": [100.0], 14 | "approx_histogram": [100.0], 15 | "random": [40.0] 16 | }, 17 | "quantiles": [ 18 | 0.01, 0.05, 0.1, 0.15, 19 | 0.2, 0.25 ,0.3, 0.35, 20 | 0.4, 0.45, 0.5, 0.55, 21 | 0.6, 0.65, 0.7, 0.75, 22 | 0.8, 0.85, 0.9, 0.95, 23 | 0.99], 24 | "numMergeThreads": [ 25 | 1, 2, 4, 8, 12, 16, 24, 32], 26 | "numDuplications": 1, 27 | "weakScaling": true, 28 | 29 | "verbose": false, 30 | "calcError": false, 31 | "appendTimeStamp": true 32 | } 33 | -------------------------------------------------------------------------------- /javamsketch/discretebench.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \ 3 | DiscreteBench $@ 4 | -------------------------------------------------------------------------------- /javamsketch/genCP.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | mvn dependency:build-classpath -Dmdep.outputFile=cp.txt 3 | 4 | -------------------------------------------------------------------------------- /javamsketch/lesionSolve.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \ 3 | SolveLesionBench $@ -------------------------------------------------------------------------------- /javamsketch/loglesion.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \ 3 | LogMomentsLesion $@ 4 | -------------------------------------------------------------------------------- /javamsketch/mRuntimeBench.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \ 3 | MSketchBench 4 | -------------------------------------------------------------------------------- /javamsketch/mergeBench.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \ 3 | MergeBench $@ 4 | -------------------------------------------------------------------------------- /javamsketch/msolver/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | java-msketch 7 | futuredata 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | msolver 13 | 14 | 15 | 16 | org.apache.commons 17 | commons-math3 18 | 3.6.1 19 | 20 | 21 | com.github.vinhkhuc 22 | lbfgs4j 23 | 0.2.1 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/ChebyshevMomentSolver.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import msolver.chebyshev.ChebyshevPolynomial; 4 | import msolver.optimizer.NewtonOptimizer; 5 | import org.apache.commons.math3.analysis.solvers.BrentSolver; 6 | import org.apache.commons.math3.analysis.solvers.UnivariateSolver; 7 | 8 | import java.util.Arrays; 9 | 10 | public class ChebyshevMomentSolver { 11 | private double[] d_mus; 12 | private boolean verbose = false; 13 | 14 | private double[] lambdas; 15 | private ChebyshevPolynomial approxCDF; 16 | private boolean isConverged; 17 | 18 | private NewtonOptimizer optimizer; 19 | private int cumFuncEvals; 20 | 21 | public ChebyshevMomentSolver(double[] chebyshev_moments) { 22 | d_mus = chebyshev_moments; 23 | } 24 | 25 | public static ChebyshevMomentSolver fromPowerSums( 26 | double min, double max, double[] powerSums 27 | ) { 28 | double[] scaledChebyMoments = MathUtil.powerSumsToChebyMoments( 29 | min, max, powerSums 30 | ); 31 | return new ChebyshevMomentSolver(scaledChebyMoments); 32 | } 33 | 34 | public void setVerbose(boolean flag) { 35 | this.verbose = flag; 36 | } 37 | 38 | public int solve(double tol) { 39 | double[] l_initial = new double[d_mus.length]; 40 | return solve(l_initial, tol); 41 | } 42 | 43 | public int solve(double[] l_initial, double tol) { 44 | MaxEntPotential potential = new MaxEntPotential(d_mus); 45 | optimizer = new NewtonOptimizer(potential); 46 | optimizer.setVerbose(verbose); 47 | lambdas = optimizer.solve(l_initial, tol); 48 | isConverged = optimizer.isConverged(); 49 | if (verbose) { 50 | System.out.println("Final Polynomial: " + Arrays.toString(lambdas)); 51 | } 52 | cumFuncEvals = potential.getCumFuncEvals(); 53 | 54 | approxCDF = ChebyshevPolynomial.fit(new MaxEntFunction(lambdas), tol).integralPoly(); 55 | return optimizer.getStepCount(); 56 | } 57 | 58 | public double[] estimateQuantiles(double[] ps, double min, double max) { 59 | UnivariateSolver bSolver = new BrentSolver(1e-6); 60 | int n = ps.length; 61 | double c = (max + min) / 2; 62 | double r = (max - min) / 2; 63 | double[] quantiles = new double[n]; 64 | 65 | for (int i = 0; i < n; i++) { 66 | double p = ps[i]; 67 | double q; 68 | if (p <= 0.0) { 69 | q = -1; 70 | } else if (p >= 1.0) { 71 | q = 1; 72 | } else { 73 | q = bSolver.solve( 74 | 100, 75 | (x) -> approxCDF.value(x) - p, 76 | -1, 77 | 1, 78 | 0 79 | ); 80 | } 81 | quantiles[i] = q*r+c; 82 | } 83 | return quantiles; 84 | } 85 | 86 | public double estimateCDF(double x) { 87 | return approxCDF.value(x); 88 | } 89 | 90 | public double[] getLambdas() { 91 | return lambdas; 92 | } 93 | 94 | public NewtonOptimizer getOptimizer() { 95 | return optimizer; 96 | } 97 | public int getCumFuncEvals() { 98 | return cumFuncEvals; 99 | } 100 | public double[] getChebyshevMoments() { return d_mus; } 101 | 102 | public boolean isConverged() { 103 | return isConverged; 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/GFunction.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import msolver.chebyshev.ChebyshevPolynomial; 4 | import org.apache.commons.math3.analysis.UnivariateFunction; 5 | 6 | class GFunction implements UnivariateFunction { 7 | private boolean useStandardBasis; 8 | private double aCenter, aScale, bCenter, bScale; 9 | private ChebyshevPolynomial cBasis; 10 | 11 | public GFunction( 12 | int k, boolean useStandardBasis, 13 | double aCenter, double aScale, 14 | double bCenter, double bScale 15 | ) { 16 | this.cBasis = ChebyshevPolynomial.basis(k); 17 | this.useStandardBasis = useStandardBasis; 18 | this.aCenter = aCenter; 19 | this.aScale = aScale; 20 | this.bCenter = bCenter; 21 | this.bScale = bScale; 22 | } 23 | 24 | @Override 25 | public double value(double y) { 26 | double x = y * aScale + aCenter; 27 | double gX; 28 | if (useStandardBasis) { 29 | gX = Math.log(x); 30 | } else { 31 | gX = Math.exp(x); 32 | } 33 | double scaledBGX = (gX - bCenter) / bScale; 34 | return cBasis.value(scaledBGX); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/MaxEntFunction.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import msolver.chebyshev.ChebyshevPolynomial; 4 | import org.apache.commons.math3.analysis.UnivariateFunction; 5 | import org.apache.commons.math3.util.FastMath; 6 | 7 | /** 8 | * Solutions to the maximum entropy moment problem have the form exp(-poly(x)). 9 | * It is useful to express the polynomial in a chebyshev basis for better conditioning. 10 | */ 11 | public class MaxEntFunction implements UnivariateFunction{ 12 | private ChebyshevPolynomial p; 13 | private ChebyshevPolynomial p_approx; 14 | private int funcEvals; 15 | 16 | public int getFuncEvals() { 17 | return funcEvals; 18 | } 19 | 20 | public MaxEntFunction(double[] coeffs) { 21 | this.p = new ChebyshevPolynomial(coeffs); 22 | this.funcEvals = 0; 23 | } 24 | 25 | @Override 26 | public double value(double v) { 27 | return FastMath.exp(-p.value(v)); 28 | } 29 | 30 | public double[] moments(int mu_k, double tol) { 31 | p_approx = ChebyshevPolynomial.fit(this, tol); 32 | funcEvals += p_approx.getNumFitEvals(); 33 | double[] out_moments = new double[mu_k]; 34 | for (int i = 0; i < mu_k; i++) { 35 | ChebyshevPolynomial p_times_moment = p_approx.multiplyByBasis(i); 36 | out_moments[i] = p_times_moment.integrate(); 37 | } 38 | return out_moments; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/MaxEntPotential.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import msolver.optimizer.FunctionWithHessian; 4 | 5 | /** 6 | * Minimizing this function yields a maxent pdf which matches the the empirical 7 | * moments of a dataset. The function is convex with symmetric positive definite 8 | * hessian and has a global stationary minimum (gradient = 0) at the solution. 9 | */ 10 | public class MaxEntPotential implements FunctionWithHessian { 11 | protected double[] d_mus; 12 | 13 | private int cumFuncEvals = 0; 14 | protected double[] lambd; 15 | protected double[] mus; 16 | protected double[] grad; 17 | protected double[][] hess; 18 | 19 | public MaxEntPotential( 20 | double[] d_mus 21 | ) { 22 | this.d_mus = d_mus; 23 | 24 | this.cumFuncEvals = 0; 25 | 26 | int k = d_mus.length; 27 | this.mus = new double[k]; 28 | this.grad = new double[k]; 29 | this.hess = new double[k][k]; 30 | } 31 | 32 | @Override 33 | public int dim() { 34 | return d_mus.length; 35 | } 36 | 37 | @Override 38 | public void computeOnlyValue(double[] point, double tol) { 39 | computeAll(point, tol); 40 | } 41 | 42 | @Override 43 | public void computeAll(double[] lambd, double tol) { 44 | this.lambd = lambd; 45 | int k = lambd.length; 46 | MaxEntFunction f = new MaxEntFunction(lambd); 47 | this.mus = f.moments(k*2, tol); 48 | this.cumFuncEvals += f.getFuncEvals(); 49 | 50 | for (int i = 0; i < k; i++) { 51 | this.grad[i] = d_mus[i] - mus[i]; 52 | } 53 | for (int i=0; i < k; i++) { 54 | for (int j=0; j <= i; j++) { 55 | this.hess[i][j] = (mus[i+j] + mus[i-j])/2; 56 | } 57 | } 58 | for (int i=0; i < k; i++) { 59 | for (int j=i+1; j < k; j++) { 60 | this.hess[i][j] = hess[j][i]; 61 | } 62 | } 63 | } 64 | 65 | @Override 66 | public double getValue() { 67 | double sum = 0.0; 68 | int k = d_mus.length; 69 | for (int i = 0; i < k; i++) { 70 | sum += lambd[i] * d_mus[i]; 71 | } 72 | return this.mus[0] + sum; 73 | } 74 | 75 | @Override 76 | public double[] getGradient() { 77 | return grad; 78 | } 79 | 80 | @Override 81 | public double[][] getHessian() { 82 | return hess; 83 | } 84 | 85 | public int getCumFuncEvals() { 86 | return cumFuncEvals; 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/MnatSolver.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import java.util.Arrays; 4 | import java.util.List; 5 | 6 | /** 7 | * https://www.sciencedirect.com/science/article/pii/S0167715208000539 8 | */ 9 | public class MnatSolver { 10 | // values expected to lie in [0,1] 11 | public static double[] estimatePDF( 12 | double[] moments 13 | ) { 14 | int a = moments.length - 1; 15 | double[] pdf = new double[a+1]; 16 | long[][] binoms = MathUtil.getBinomials(a); 17 | 18 | for (int k = 0; k <= a; k++) { 19 | double sum = 0; 20 | for (int j = k; j <= a; j++) { 21 | double curTerm = (double)binoms[a][j]*(double)binoms[j][k]*moments[j]; 22 | if ((j - k) % 2 != 0) { 23 | curTerm *= -1; 24 | } 25 | sum += curTerm; 26 | } 27 | pdf[k] = sum; 28 | } 29 | return pdf; 30 | } 31 | 32 | public static double[] estimateCDF( 33 | double[] moments 34 | ) { 35 | double[] pdf = estimatePDF(moments); 36 | double[] cdf = new double[pdf.length]; 37 | cdf[0] = pdf[0]; 38 | for (int i = 1; i < pdf.length; i++) { 39 | cdf[i] = cdf[i-1] + pdf[i]; 40 | } 41 | return cdf; 42 | } 43 | 44 | public static double[] estimateQuantiles( 45 | double min, 46 | double max, 47 | double[] powerSums, 48 | List ps 49 | ) { 50 | double[] moments = MathUtil.powerSumsToPosMoments(powerSums, min, max); 51 | double[] cdf = estimateCDF(moments); 52 | 53 | int n = ps.size(); 54 | double[] qs = new double[n]; 55 | int a = powerSums.length - 1; 56 | for (int i = 0; i < n; i++) { 57 | double p = ps.get(i); 58 | int idx = Arrays.binarySearch(cdf, p); 59 | if (idx < 0) { 60 | idx = -(idx + 1); 61 | } 62 | double fracIdx = 0.0; 63 | if (idx > 0) { 64 | // fracIdx = (idx-1.0)*1.0 + (p - cdf[idx - 1]) / (cdf[idx] - cdf[idx-1]); 65 | fracIdx = idx; 66 | } 67 | qs[i] = fracIdx/a * (max - min) + min; 68 | } 69 | return qs; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/chebyshev/CosScaledFunction.java: -------------------------------------------------------------------------------- 1 | package msolver.chebyshev; 2 | 3 | public interface CosScaledFunction { 4 | int numFuncs(); 5 | double[][] calc(int N); 6 | } 7 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/chebyshev/QuadraticCosFunction.java: -------------------------------------------------------------------------------- 1 | package msolver.chebyshev; 2 | 3 | import org.apache.commons.math3.util.FastMath; 4 | 5 | public class QuadraticCosFunction implements CosScaledFunction { 6 | private int k; 7 | public QuadraticCosFunction(int k) { 8 | this.k = k; 9 | } 10 | 11 | @Override 12 | public int numFuncs() { 13 | return k; 14 | } 15 | 16 | @Override 17 | public double[][] calc(int N) { 18 | double[] cosValues = new double[N+1]; 19 | for (int j = 0; j <= N; j++) { 20 | cosValues[j] = FastMath.cos(j * Math.PI / N); 21 | } 22 | 23 | double[][] values = new double[k][N+1]; 24 | for (int i = 0; i < k; i++) { 25 | for (int j = 0; j <= N; j++) { 26 | values[i][j] = (i+1)*cosValues[j]*cosValues[j]; 27 | } 28 | } 29 | return values; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/data/ExponentialData.java: -------------------------------------------------------------------------------- 1 | package msolver.data; 2 | 3 | public class ExponentialData extends MomentData { 4 | private final double[] ranges = { 5 | 5.0366845333122605e-07, 6 | 15.619130152703306, 7 | -14.501347616032602, 8 | 2.7484964548137394 9 | }; 10 | private final double[] powerSums = { 11 | 1000000.0, 12 | 998677.78490783856, 13 | 1991896.3142772249, 14 | 5947596.181134684, 15 | 23651528.771513801, 16 | 117830113.75301118, 17 | 710308609.25475609, 18 | 5073006703.31318, 19 | 42241259442.990211, 20 | 403047330063.94159, 21 | 4314998960419.9683, 22 | 50641655821410.031, 23 | 637282955148883.38 24 | }; 25 | private final double[] logSums = { 26 | 1000000.0, 27 | -578739.68503790628, 28 | 1983294.7107239107, 29 | -5476131.5807481054, 30 | 23699783.595155567, 31 | -118308097.22723247, 32 | 712931526.56406581, 33 | -4933389056.9205084, 34 | 38572700905.764816, 35 | -334201709511.9444, 36 | 3161470899883.4248, 37 | -32231476977189.141, 38 | 350149425729588.94 39 | }; 40 | 41 | @Override 42 | public double[] getPowerSums() { 43 | return powerSums; 44 | } 45 | 46 | @Override 47 | public double getMin() { 48 | return ranges[0]; 49 | } 50 | 51 | @Override 52 | public double getMax() { 53 | return ranges[1]; 54 | } 55 | 56 | @Override 57 | public double[] getLogSums() { 58 | return logSums; 59 | } 60 | 61 | @Override 62 | public double getLogMin() { 63 | return ranges[2]; 64 | } 65 | 66 | @Override 67 | public double getLogMax() { 68 | return ranges[3]; 69 | } 70 | } 71 | 72 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/data/GaussianData.java: -------------------------------------------------------------------------------- 1 | package msolver.data; 2 | 3 | public class GaussianData extends MomentData { 4 | private final double[] ranges = { 5 | -5.4426943895132043, 5.0389474541327557, 6 | 0,1 7 | }; 8 | private final double[] powerSums = { 9 | 10000000.0, 10 | 480.5458937862864, 11 | 9993547.2435967941, 12 | 309.37531648859698, 13 | 29973127.286717705, 14 | -9041.491442481145, 15 | 150000244.00952214, 16 | -444367.57808807673, 17 | 1053293551.5678303, 18 | -13936938.368433975, 19 | 9539780309.9934578, 20 | -386976102.34488362, 21 | 105935145427.16087, 22 | -10292879169.101974, 23 | 1391938729272.7761, 24 | -270802772130.85544, 25 | 21042777928938.496, 26 | -7160080883740.7246, 27 | 357414134285331.12, 28 | -191665951342369.09 29 | }; 30 | private final double[] logSums = { 31 | 10000000.0 32 | }; 33 | 34 | @Override 35 | public double[] getPowerSums() { 36 | return powerSums; 37 | } 38 | 39 | @Override 40 | public double getMin() { 41 | return ranges[0]; 42 | } 43 | 44 | @Override 45 | public double getMax() { 46 | return ranges[1]; 47 | } 48 | 49 | @Override 50 | public double[] getLogSums() { 51 | return logSums; 52 | } 53 | 54 | @Override 55 | public double getLogMin() { 56 | return ranges[2]; 57 | } 58 | 59 | @Override 60 | public double getLogMax() { 61 | return ranges[3]; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/data/HepData.java: -------------------------------------------------------------------------------- 1 | package msolver.data; 2 | 3 | public class HepData extends MomentData { 4 | private final double[] range = { 5 | -1.960548996925354, 6 | 4.378281593322753 7 | }; 8 | private final double[] powerSums = { 9 | 10500000.0, 10 | 171590.10344086567, 11 | 10600692.901931819, 12 | 3656024.1204772266, 13 | 27999983.535580769, 14 | 26085580.4679562, 15 | 112571037.57246359, 16 | 178035035.71434641, 17 | 597052564.3216269, 18 | 1272051754.8467662, 19 | 3862170673.2579589, 20 | 9736800905.3333244, 21 | 28892120151.48772, 22 | 80400830561.547516, 23 | 241656673866.27899, 24 | 717077710760.47144, 25 | 2216395701960.5981, 26 | 6900059165428.8955, 27 | 22051018347144.91, 28 | 71474655616939.391 29 | }; 30 | private final double[] logSums = {1.0}; 31 | 32 | @Override 33 | public double[] getPowerSums() { 34 | return powerSums; 35 | } 36 | 37 | @Override 38 | public double getMin() { 39 | return range[0]; 40 | } 41 | 42 | @Override 43 | public double getMax() { 44 | return range[1]; 45 | } 46 | 47 | @Override 48 | public double[] getLogSums() { 49 | return logSums; 50 | } 51 | 52 | @Override 53 | public double getLogMin() { 54 | return 0.0; 55 | } 56 | 57 | @Override 58 | public double getLogMax() { 59 | return 0.0; 60 | } 61 | } 62 | 63 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/data/MilanData.java: -------------------------------------------------------------------------------- 1 | package msolver.data; 2 | 3 | // 2013-11 4 | public class MilanData extends MomentData { 5 | private final double[] ranges = { 6 | 2.3314976995293306e-06, 7 | 7936.2653798841566, 8 | -12.968999707389781, 9 | 8.9791980884111684 10 | }; 11 | private final double[] powerSums = { 12 | 81109777.0, 13 | 2982688169.6811647, 14 | 978439974358.73865, 15 | 871718166698519.75, 16 | 1.3802820224027354e+18, 17 | 3.1721196216284679e+21, 18 | 9.4920224710331096e+24, 19 | 3.4822245172502998e+28, 20 | 1.5113747392231059e+32, 21 | 7.5487997054766176e+35, 22 | 4.2236053442107682e+39, 23 | 2.5794131452332432e+43, 24 | 1.6810991725372064e+47, 25 | 1.1485760824967742e+51, 26 | 8.1186456852298384e+54, 27 | 5.8811422826709824e+58, 28 | 4.3371573032664479e+62, 29 | 3.2410617072895531e+66, 30 | 2.4461380713938232e+70, 31 | 1.8602581958318741e+74 32 | }; 33 | private final double[] logSums = { 34 | 81109777.0, 35 | 39954311.084389985, 36 | 975086507.65943003, 37 | 405866981.35082442, 38 | 22518788911.620308, 39 | -22190675853.522003, 40 | 840220861538.83716, 41 | -3008231730689.7461, 42 | 47081672847213.734, 43 | -301156754002264.44, 44 | 3606454337590192.0, 45 | -30859127373541904.0, 46 | 3.4368887948251686e+17, 47 | -3.4172450245762365e+18, 48 | 3.8032805624376271e+19, 49 | -4.1088294003414683e+20, 50 | 4.6743717140718132e+21, 51 | -5.3111237684754928e+22, 52 | 6.1969914256246635e+23, 53 | -7.2792735817422283e+24 54 | }; 55 | 56 | @Override 57 | public double[] getPowerSums() { 58 | return powerSums; 59 | } 60 | 61 | @Override 62 | public double getMin() { 63 | return ranges[0]; 64 | } 65 | 66 | @Override 67 | public double getMax() { 68 | return ranges[1]; 69 | } 70 | 71 | @Override 72 | public double[] getLogSums() { 73 | return logSums; 74 | } 75 | 76 | @Override 77 | public double getLogMin() { 78 | return ranges[2]; 79 | } 80 | 81 | @Override 82 | public double getLogMax() { 83 | return ranges[3]; 84 | } 85 | } 86 | 87 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/data/MomentData.java: -------------------------------------------------------------------------------- 1 | package msolver.data; 2 | 3 | import java.util.Arrays; 4 | 5 | public abstract class MomentData { 6 | abstract public double[] getPowerSums(); 7 | abstract public double getMin(); 8 | abstract public double getMax(); 9 | 10 | public double[] getLogSums() { 11 | double[] results = new double[1]; 12 | results[0] = 1.0; 13 | return results; 14 | } 15 | public double getLogMin() { 16 | return 0.0; 17 | } 18 | public double getLogMax() { 19 | return 0.0; 20 | } 21 | 22 | public double[] getPowerSums(int k) { 23 | return Arrays.copyOf(getPowerSums(), k); 24 | } 25 | public double[] getLogSums(int k) { 26 | return Arrays.copyOf(getLogSums(), k); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/data/OccupancyData.java: -------------------------------------------------------------------------------- 1 | package msolver.data; 2 | 3 | public class OccupancyData extends MomentData { 4 | private final double min = 412.75; 5 | private final double max = 2076.5; 6 | private final double logMin = 6.0228420828002376; 7 | private final double logMax = 7.6384390630708081; 8 | private final double[] powerSums = { 9 | 20560.0, 10 | 14197775.359523809, 11 | 11795382081.900866, 12 | 11920150330935.938, 13 | 14243310876969824.0, 14 | 1.9248869180998238e+19, 15 | 2.8335762132634282e+22, 16 | 4.431640701816542e+25, 17 | 7.2509584910158713e+28, 18 | 1.2290081330972746e+32, 19 | 2.1433360706825834e+35, 20 | 3.8263457725342386e+38, 21 | 6.9641284233810108e+41, 22 | 1.287891117361348e+45, 23 | 2.4132657512596994e+48, 24 | 4.5712141086232246e+51, 25 | 8.7361384845196883e+54, 26 | 1.6818212554569329e+58, 27 | 3.2572457284172447e+61, 28 | 6.3398052560875453e+64 29 | }; 30 | private final double[] logSums = { 31 | 20560.0, 32 | 132778.81355561133, 33 | 860423.75561972987, 34 | 5595528.9043199299, 35 | 36524059.16578535, 36 | 239323723.78677931, 37 | 1574401576.9855776, 38 | 10399585507.478024, 39 | 68980678228.532593, 40 | 459495821550.01648, 41 | 3073979747643.9238, 42 | 20653745268445.156, 43 | 139372854449999.69, 44 | 944566287701071.0, 45 | 6429026416774866.0, 46 | 43943128435886808.0, 47 | 3.0160302130365139e+17, 48 | 2.0784407797638454e+18, 49 | 1.4379655766584013e+19, 50 | 9.9865203720404238e+19 51 | }; 52 | 53 | @Override 54 | public double[] getPowerSums() { 55 | return powerSums; 56 | } 57 | 58 | @Override 59 | public double getMin() { 60 | return min; 61 | } 62 | 63 | @Override 64 | public double getMax() { 65 | return max; 66 | } 67 | 68 | @Override 69 | public double[] getLogSums() { 70 | return logSums; 71 | } 72 | 73 | @Override 74 | public double getLogMin() { 75 | return logMin; 76 | } 77 | 78 | @Override 79 | public double getLogMax() { 80 | return logMax; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/data/RetailData.java: -------------------------------------------------------------------------------- 1 | package msolver.data; 2 | 3 | public class RetailData { 4 | public static double[] moments = { 5 | 1.0, 6 | -0.99803244599458163, 7 | 0.99242770820196713, 8 | -0.98346992011358814, 9 | 0.97167161929459778, 10 | -0.95732586721724033, 11 | 0.94080698235117055, 12 | -0.92238731864676526, 13 | 0.90236473106015347, 14 | -0.88094882137844899, 15 | 0.85841937756560194, 16 | -0.83491133917007543, 17 | 0.81068359928271649, 18 | -0.78582604637928322, 19 | 0.76061221132889334, 20 | -0.73507756422695447, 21 | 0.70940149326508817, 22 | -0.68358226554192769, 23 | 0.65781293088596959, 24 | -0.63212643646674627, 25 | 0.60669960200873319, 26 | -0.58153118648992819 27 | }; 28 | 29 | public static double[] powerSums = { 30 | 541909.0, 31 | 5872552.0, 32 | 2028897322.0, 33 | 10327002658180.0, 34 | 87690166564334848.0, 35 | 8.2085567205202107e+20, 36 | 7.9010897350141134e+24, 37 | 7.685072127365144e+28, 38 | 7.510941100173536e+32, 39 | 7.3600614962632969e+36, 40 | 7.2243545948138156e+40 41 | }; 42 | 43 | public static double min = 1; 44 | public static double max = 10000; 45 | } 46 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/data/RetailQuantityData.java: -------------------------------------------------------------------------------- 1 | package msolver.data; 2 | 3 | public class RetailQuantityData extends MomentData{ 4 | public static final double[] powerSums = { 5 | 531285.0, 6 | 5660981.0, 7 | 13127647799.0, 8 | 943385744203541.0, 9 | 7.3401290527335825e+19, 10 | 5.7374634895753686e+24, 11 | 4.4941878460622702e+29, 12 | 3.5267533869172936e+34, 13 | 2.7724146420399472e+39, 14 | 2.1831198887574202e+44, 15 | 1.7219100191391005e+49, 16 | 1.3602936186335558e+54, 17 | 1.0762640535486096e+59, 18 | 8.5279096877163735e+63, 19 | 6.7666981261637846e+68, 20 | 5.3764281895692519e+73, 21 | 4.2772622266819037e+78, 22 | 3.4069441013233427e+83, 23 | 2.7168368273312946e+88, 24 | 2.1688733753934956e+93 25 | }; 26 | 27 | public static final double[] logSums = { 28 | 531285.0, 29 | 733706.08385088702, 30 | 1803377.9327264477, 31 | 5313341.6192785092, 32 | 18079041.058607381, 33 | 69790609.377532199, 34 | 302287816.89519858, 35 | 1456494992.1214058, 36 | 7765406800.8637161, 37 | 45788298431.40226, 38 | 299590569873.87885, 39 | 2181621707739.7278, 40 | 17627303232908.797, 41 | 156164602838914.75, 42 | 1489418691939729.2, 43 | 14997093766426690.0, 44 | 1.5677925320389693e+17, 45 | 1.6804693592838444e+18, 46 | 1.831003035492703e+19, 47 | 2.0164791544383e+20 48 | }; 49 | 50 | @Override 51 | public double[] getPowerSums() { 52 | return powerSums; 53 | } 54 | 55 | @Override 56 | public double[] getLogSums() { 57 | return logSums; 58 | } 59 | 60 | @Override 61 | public double getMin() { 62 | return 1.0; 63 | } 64 | 65 | @Override 66 | public double getMax() { 67 | return 80995.0; 68 | } 69 | 70 | @Override 71 | public double getLogMin() { 72 | return 0.0; 73 | } 74 | 75 | @Override 76 | public double getLogMax() { 77 | return Math.log(80995.0); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/data/RetailQuantityLogData.java: -------------------------------------------------------------------------------- 1 | package msolver.data; 2 | 3 | public class RetailQuantityLogData extends MomentData { 4 | public static final double[] powerSums = { 5 | 531285.0, 6 | 733706.08385088702, 7 | 1803377.9327264477, 8 | 5313341.6192785092, 9 | 18079041.058607381, 10 | 69790609.377532199, 11 | 302287816.89519858, 12 | 1456494992.1214058, 13 | 7765406800.8637161, 14 | 45788298431.40226, 15 | 299590569873.87885, 16 | 2181621707739.7278, 17 | 17627303232908.797, 18 | 156164602838914.75, 19 | 1489418691939729.2 20 | }; 21 | 22 | @Override 23 | public double[] getPowerSums() { 24 | return powerSums; 25 | } 26 | 27 | @Override 28 | public double getMin() { 29 | return 0; 30 | } 31 | 32 | @Override 33 | public double getMax() { 34 | return 11.302142703354239; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/data/ShuttleData.java: -------------------------------------------------------------------------------- 1 | package msolver.data; 2 | 3 | public class ShuttleData { 4 | public static double[] moments = { 5 | 1.0, 6 | -0.57071217926390339, 7 | -0.22603817813570512, 8 | 0.68115364159641145, 9 | -0.60778341324143959, 10 | 0.17844626060954086, 11 | 0.2201808190269598, 12 | -0.29730967690787835, 13 | 0.14621881025862984, 14 | -0.011587023082648803, 15 | 0.062701300094490348, 16 | -0.19322750394339511, 17 | 0.075545786808184023, 18 | 0.22135866677749558, 19 | -0.31192502106061021, 20 | 0.16682708244373193, 21 | 0.023534352707310534, 22 | -0.15061754686789203, 23 | 0.12689914341612776, 24 | -0.02445402425925142, 25 | 0.026327570711651923, 26 | -0.13152321634595951 27 | }; 28 | 29 | public static double[] powerSums = { 30 | 43500.0, 31 | 2098864.0, 32 | 107800016.0, 33 | 6005831428.0, 34 | 369924186368.0, 35 | 25475420299084.0, 36 | 1954803892405976.0, 37 | 1.6430830896853082e+17, 38 | 1.4790926986227182e+19, 39 | 1.3971091486866444e+21, 40 | 1.3638448459556252e+23 41 | }; 42 | 43 | public static double min = 27; 44 | public static double max = 126; 45 | } 46 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/data/UniformData.java: -------------------------------------------------------------------------------- 1 | package msolver.data; 2 | 3 | public class UniformData extends MomentData{ 4 | public static final int N = 1000000; 5 | public static final int k = 100; 6 | 7 | @Override 8 | public double[] getPowerSums() { 9 | double[] powerSums = new double[k]; 10 | for (int i = 0; i < k; i++) { 11 | powerSums[i] = N*1.0 / (i+1); 12 | } 13 | return powerSums; 14 | } 15 | 16 | @Override 17 | public double getMin() { 18 | return 0.0; 19 | } 20 | 21 | @Override 22 | public double getMax() { 23 | return 1.0; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/optimizer/FunctionWithHessian.java: -------------------------------------------------------------------------------- 1 | package msolver.optimizer; 2 | 3 | /** 4 | * Describes a function which can be optimized using Newton's method. 5 | */ 6 | public interface FunctionWithHessian { 7 | void computeOnlyValue(double[] point, double tol); 8 | void computeAll(double[] point, double tol); 9 | int dim(); 10 | double getValue(); 11 | double[] getGradient(); 12 | // Returns in row-major order 13 | double[][] getHessian(); 14 | } 15 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/optimizer/GenericOptimizer.java: -------------------------------------------------------------------------------- 1 | package msolver.optimizer; 2 | 3 | public interface GenericOptimizer { 4 | void setVerbose(boolean flag); 5 | 6 | void setMaxIter(int maxIter); 7 | 8 | boolean isConverged(); 9 | 10 | int getStepCount(); 11 | 12 | FunctionWithHessian getP(); 13 | 14 | double[] solve(double[] start, double gradTol); 15 | } 16 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/optimizer/QuadraticPotential.java: -------------------------------------------------------------------------------- 1 | package msolver.optimizer; 2 | 3 | /** 4 | * Simple quadratic function for use in tests. 5 | */ 6 | public class QuadraticPotential implements FunctionWithHessian { 7 | private int k; 8 | private double Pval; 9 | private double[] Pgrad; 10 | private double[][] Phess; 11 | 12 | public QuadraticPotential(int k) { 13 | this.k = k; 14 | Pgrad = new double[k]; 15 | Phess = new double[k][k]; 16 | } 17 | 18 | @Override 19 | public void computeOnlyValue(double[] point, double tol) { 20 | double sum = 0; 21 | for (int i = 0; i < point.length; i++) { 22 | sum += point[i] * point[i]; 23 | } 24 | Pval = sum; 25 | } 26 | 27 | @Override 28 | public void computeAll(double[] point, double tol) { 29 | double sum = 0; 30 | for (int i = 0; i < point.length; i++) { 31 | sum += point[i] * point[i]; 32 | } 33 | Pval = sum; 34 | 35 | for (int i = 0; i < point.length; i++) { 36 | Pgrad[i] = 2*point[i]; 37 | for (int j = 0; j < point.length; j++) { 38 | if (j == i) { 39 | Phess[i][j] = 2; 40 | } else { 41 | Phess[i][j] = 0.0; 42 | } 43 | } 44 | } 45 | } 46 | 47 | @Override 48 | public int dim() { 49 | return this.k; 50 | } 51 | 52 | @Override 53 | public double getValue() { 54 | return Pval; 55 | } 56 | 57 | @Override 58 | public double[] getGradient() { 59 | return Pgrad; 60 | } 61 | 62 | @Override 63 | public double[][] getHessian() { 64 | return Phess; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/struct/MomentStruct.java: -------------------------------------------------------------------------------- 1 | package msolver.struct; 2 | 3 | public class MomentStruct { 4 | public double min, max, logMin, logMax; 5 | public double[] powerSums; 6 | public double[] logSums; 7 | } 8 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/thresholds/MarkovThreshold.java: -------------------------------------------------------------------------------- 1 | package msolver.thresholds; 2 | 3 | import msolver.MathUtil; 4 | import msolver.struct.MomentStruct; 5 | 6 | public class MarkovThreshold implements MomentThreshold { 7 | private MomentStruct ms; 8 | 9 | public MarkovThreshold( 10 | MomentStruct ms 11 | ) { 12 | this.ms = ms; 13 | } 14 | 15 | @Override 16 | public double[] bound(double cutoff) { 17 | double[] outlierRateBounds = new double[2]; 18 | outlierRateBounds[0] = 0.0; 19 | outlierRateBounds[1] = 1.0; 20 | 21 | int ka = ms.powerSums.length; 22 | int kb = ms.logSums.length; 23 | double n = ms.powerSums[0]; 24 | if (ka > 1) { 25 | double[] xMinusMinMoments = MathUtil.shiftPowerSum(ms.powerSums, 1, ms.min); 26 | double[] maxMinusXMoments = MathUtil.shiftPowerSum(ms.powerSums, -1, ms.max); 27 | for (int i = 1; i < ka; i++) { 28 | double outlierRateUpperBound = (xMinusMinMoments[i] / n) / Math.pow(cutoff - ms.min, i); 29 | double outlierRateLowerBound = 1.0 - (maxMinusXMoments[i] / n) / Math.pow(ms.max - cutoff, i); 30 | outlierRateBounds[0] = Math.max(outlierRateBounds[0], outlierRateLowerBound); 31 | outlierRateBounds[1] = Math.min(outlierRateBounds[1], outlierRateUpperBound); 32 | } 33 | } 34 | 35 | double nl = ms.logSums[0]; 36 | if (kb > 1 && nl != 0) { 37 | double logCutoff = Math.log(cutoff); 38 | double fracIncluded = nl / n; 39 | double[] xMinusMinLogMoments = MathUtil.shiftPowerSum(ms.logSums, 1, ms.logMin); 40 | double[] maxMinusXLogMoments = MathUtil.shiftPowerSum(ms.logSums, -1, ms.logMax); 41 | for (int i = 1; i < kb; i++) { 42 | double outlierRateUpperBound = ( 43 | (1.0 - fracIncluded) + 44 | fracIncluded * (xMinusMinLogMoments[i] / nl) / Math.pow(logCutoff - ms.logMin, i) 45 | ); 46 | double outlierRateLowerBound = ( 47 | 1.0 - 48 | fracIncluded * (maxMinusXLogMoments[i] / nl) / Math.pow(ms.logMax - logCutoff, i) 49 | ); 50 | outlierRateBounds[0] = Math.max(outlierRateBounds[0], outlierRateLowerBound); 51 | outlierRateBounds[1] = Math.min(outlierRateBounds[1], outlierRateUpperBound); 52 | } 53 | } 54 | 55 | return outlierRateBounds; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/thresholds/MomentThreshold.java: -------------------------------------------------------------------------------- 1 | package msolver.thresholds; 2 | 3 | public interface MomentThreshold { 4 | double[] bound(double cutoff); 5 | } 6 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/thresholds/RTTThreshold.java: -------------------------------------------------------------------------------- 1 | package msolver.thresholds; 2 | 3 | import msolver.MathUtil; 4 | import msolver.SimpleBoundSolver; 5 | import msolver.struct.MomentStruct; 6 | 7 | public class RTTThreshold implements MomentThreshold { 8 | private MomentStruct ms; 9 | 10 | public RTTThreshold(MomentStruct ms) { 11 | this.ms = ms; 12 | } 13 | 14 | public double[] bound(double x) { 15 | double[] xs = new double[]{x}; 16 | double[] gttBounds = new double[]{0.0, 1.0}; 17 | double[] moments; 18 | SimpleBoundSolver boundSolver; 19 | double[] boundSizes; 20 | 21 | int ka = ms.powerSums.length; 22 | int kb = ms.logSums.length; 23 | 24 | // Standard basis 25 | moments = MathUtil.powerSumsToMoments(ms.powerSums); 26 | boundSolver = new SimpleBoundSolver(ka); 27 | try { 28 | boundSizes = boundSolver.solveBounds(moments, xs); 29 | double[] standardBounds = boundSolver.getBoundEndpoints(moments, x, boundSizes[0]); 30 | if (1.0 - standardBounds[1] > gttBounds[0]) { 31 | gttBounds[0] = 1.0 - standardBounds[1]; 32 | } 33 | if (1.0 - standardBounds[0] < gttBounds[1]) { 34 | gttBounds[1] = 1.0 - standardBounds[0]; 35 | } 36 | } catch (Exception e) {} 37 | 38 | // Log basis 39 | double[] logXs = new double[]{Math.log(x)}; 40 | moments = MathUtil.powerSumsToMoments(ms.logSums); 41 | try { 42 | boundSolver = new SimpleBoundSolver(kb); 43 | boundSizes = boundSolver.solveBounds(moments, logXs); 44 | double[] logBounds = boundSolver.getBoundEndpoints(moments, Math.log(x), boundSizes[0]); 45 | if (1.0 - logBounds[1] > gttBounds[0]) { 46 | gttBounds[0] = 1.0 - logBounds[1]; 47 | } 48 | if (1.0 - logBounds[0] < gttBounds[1]) { 49 | gttBounds[1] = 1.0 - logBounds[0]; 50 | } 51 | } catch (Exception e) {} 52 | 53 | return gttBounds; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/main/java/msolver/thresholds/ThresholdCascade.java: -------------------------------------------------------------------------------- 1 | package msolver.thresholds; 2 | 3 | import msolver.ChebyshevMomentSolver2; 4 | import msolver.struct.MomentStruct; 5 | 6 | public class ThresholdCascade { 7 | private MomentStruct ms; 8 | private MomentThreshold[] cascade; 9 | private ChebyshevMomentSolver2 solver; 10 | 11 | public ThresholdCascade(MomentStruct ms) { 12 | this.ms = ms; 13 | this.cascade = new MomentThreshold[2]; 14 | this.cascade[0] = new MarkovThreshold(ms); 15 | this.cascade[1] = new RTTThreshold(ms); 16 | } 17 | 18 | // Are there phi fraction above x, aka is CDF(x) < 1 - phi? 19 | public boolean threshold(double x, double phi) { 20 | int ka = ms.powerSums.length; 21 | if (ka > 0) { 22 | if (ms.min == ms.max) { 23 | return x > ms.min; 24 | } 25 | } else { 26 | if (ms.logMin == ms.logMax) { 27 | return x > Math.exp(ms.logMin); 28 | } 29 | } 30 | 31 | if (x < ms.min) { 32 | return true; 33 | } 34 | if (x > ms.max) { 35 | return false; 36 | } 37 | 38 | for (int i = 0; i < cascade.length; i++) { 39 | MomentThreshold mt = cascade[i]; 40 | double[] bounds = mt.bound(x); 41 | if (bounds[0] > phi) { 42 | return true; 43 | } 44 | if (bounds[1] < phi) { 45 | return false; 46 | } 47 | } 48 | 49 | solver = ChebyshevMomentSolver2.fromPowerSums( 50 | ms.min, ms.max, 51 | ms.powerSums, 52 | ms.logMin, ms.logMax, 53 | ms.logSums 54 | ); 55 | solver.solve(1e-9); 56 | double cdfValue = solver.estimateCDF(x); 57 | if (cdfValue < 1 - phi) { 58 | return true; 59 | } else { 60 | return false; 61 | } 62 | } 63 | 64 | public ChebyshevMomentSolver2 getSolver() { 65 | return solver; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/ChebyshevMomentSolverTest.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import msolver.data.RetailData; 4 | import msolver.data.ShuttleData; 5 | import msolver.optimizer.NewtonOptimizer; 6 | import org.apache.commons.math3.util.FastMath; 7 | import org.junit.Test; 8 | 9 | import static org.junit.Assert.*; 10 | 11 | public class ChebyshevMomentSolverTest { 12 | @Test 13 | public void testUniform() { 14 | double m_values[] = {1.0, 0, -1.0/3, 0, -1.0/15, 0, -1.0/35}; 15 | double tol = 1e-10; 16 | ChebyshevMomentSolver solver = new ChebyshevMomentSolver(m_values); 17 | solver.solve(tol); 18 | 19 | double[] coeffs = solver.getLambdas(); 20 | 21 | assertEquals(FastMath.log(2), coeffs[0], 1e-10); 22 | for (int i = 1; i < coeffs.length; i++) { 23 | assertEquals(0.0, coeffs[i], 1e-10); 24 | } 25 | NewtonOptimizer opt = solver.getOptimizer(); 26 | assertTrue(opt.getStepCount() < 20); 27 | assertEquals(0, opt.getDampedStepCount()); 28 | } 29 | 30 | @Test 31 | public void testShuttle() { 32 | int k = 11; 33 | double[] m_values = new double[k]; 34 | for (int i = 0; i < k; i++) { 35 | m_values[i] = ShuttleData.moments[i]; 36 | } 37 | double tol = 1e-10; 38 | ChebyshevMomentSolver solver = new ChebyshevMomentSolver(m_values); 39 | solver.solve(tol); 40 | 41 | double[] coeffs = solver.getLambdas(); 42 | MaxEntFunction f = new MaxEntFunction(coeffs); 43 | double[] f_mus = f.moments(k, tol); 44 | for (int i = 0; i < k; i++) { 45 | assertEquals(m_values[i], f_mus[i], 10*tol); 46 | } 47 | NewtonOptimizer opt = solver.getOptimizer(); 48 | assertTrue(opt.getStepCount() < 20); 49 | 50 | double[] ps = {.5}; 51 | double[] expectedQs = {-0.602}; 52 | assertArrayEquals( 53 | expectedQs, 54 | solver.estimateQuantiles(ps, -1, 1), 55 | 1e-3 56 | ); 57 | 58 | expectedQs[0] = 45.0; 59 | assertArrayEquals( 60 | expectedQs, 61 | solver.estimateQuantiles(ps, 27, 126), 62 | 5.0 63 | ); 64 | } 65 | 66 | @Test 67 | public void testRetail() { 68 | int k = 11; 69 | double[] m_values = new double[k]; 70 | for (int i = 0; i < k; i++) { 71 | m_values[i] = RetailData.moments[i]; 72 | } 73 | double tol = 1e-10; 74 | ChebyshevMomentSolver solver = new ChebyshevMomentSolver(m_values); 75 | solver.solve(tol); 76 | 77 | double[] coeffs = solver.getLambdas(); 78 | MaxEntFunction f = new MaxEntFunction(coeffs); 79 | double[] f_mus = f.moments(k, tol); 80 | for (int i = 0; i < k; i++) { 81 | assertEquals(m_values[i], f_mus[i], 10*tol); 82 | } 83 | 84 | NewtonOptimizer opt = solver.getOptimizer(); 85 | assertTrue(opt.getStepCount() < 100); 86 | assertTrue(opt.getDampedStepCount() > 0); 87 | } 88 | 89 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/ChebyshevPolynomialTest.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import msolver.chebyshev.ChebyshevPolynomial; 4 | import msolver.chebyshev.QuadraticCosFunction; 5 | import org.junit.Test; 6 | 7 | import static org.junit.Assert.assertEquals; 8 | 9 | public class ChebyshevPolynomialTest { 10 | @Test 11 | public void testSimple() { 12 | double[] coeff = {1.0, 2.0, 3.0}; 13 | ChebyshevPolynomial cp = new ChebyshevPolynomial(coeff); 14 | assertEquals(2.34, cp.value(.7), 1e-10); 15 | assertEquals(cp.value(.7), cp.value2(.7), 1e-10); 16 | 17 | ChebyshevPolynomial cb = ChebyshevPolynomial.basis(2); 18 | assertEquals(-0.02, cb.value(.7), 1e-10); 19 | 20 | double[] coeff2 = {2.0, 1.0, 3.0}; 21 | cp = new ChebyshevPolynomial(coeff2); 22 | assertEquals(2.0, cp.integrate(), 1e-10); 23 | 24 | assertEquals(1.848, cp.multiplyByBasis(1).value(.7), 1e-10); 25 | } 26 | 27 | @Test 28 | public void testFitMulti() { 29 | QuadraticCosFunction multiFunction = new QuadraticCosFunction(3); 30 | ChebyshevPolynomial[] cfit = ChebyshevPolynomial.fitMulti(multiFunction, 1e-10); 31 | for (int i = 0; i < cfit.length; i++) { 32 | assertEquals((i+1)*.25, cfit[i].value(.5), 1e-10); 33 | } 34 | } 35 | 36 | @Test 37 | public void testFit() { 38 | double[] coeff = {1.0, 2.0, 3.0}; 39 | ChebyshevPolynomial cp = new ChebyshevPolynomial(coeff); 40 | ChebyshevPolynomial cfit = ChebyshevPolynomial.fit(cp, 1e-10); 41 | 42 | for (int i = 0; i < coeff.length; i++) { 43 | assertEquals(coeff[i], cfit.coeffs()[i], 1e-10); 44 | } 45 | } 46 | 47 | @Test 48 | public void testIntegrate() { 49 | double[] coeff = {2.0, 1.0, 3.0}; 50 | ChebyshevPolynomial cp = new ChebyshevPolynomial(coeff); 51 | assertEquals(cp.integrate(), cp.integralPoly().value(1), 1e-10); 52 | } 53 | 54 | @Test 55 | public void testMultiply() { 56 | double[] c1 = {1.0, 2.0, 3.0, 4.0}; 57 | double[] c2 = {.5, .6, .7, .8, .9}; 58 | ChebyshevPolynomial cp1 = new ChebyshevPolynomial(c1); 59 | ChebyshevPolynomial cp2 = new ChebyshevPolynomial(c2); 60 | 61 | ChebyshevPolynomial product = cp1.multiply(cp2); 62 | assertEquals( 63 | cp1.value(.5)*cp2.value(.5), 64 | product.value(.5), 65 | 1e-10 66 | ); 67 | 68 | product = cp1.multiply(cp1); 69 | assertEquals( 70 | cp1.value(.7)*cp1.value(.7), 71 | product.value(.7), 72 | 1e-10 73 | ); 74 | } 75 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/MathUtilTest.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import msolver.data.HepData; 4 | import msolver.data.MomentData; 5 | import msolver.data.OccupancyData; 6 | import org.junit.Test; 7 | 8 | import java.util.Arrays; 9 | 10 | import static org.junit.Assert.assertArrayEquals; 11 | import static org.junit.Assert.assertEquals; 12 | 13 | public class MathUtilTest { 14 | @Test 15 | public void testBinomial() { 16 | long[][] binoms = MathUtil.getBinomials(5); 17 | assertEquals(binoms[5][2], 10L); 18 | } 19 | 20 | @Test 21 | public void testChebyCoefficient() { 22 | int[][] cCoeffs = MathUtil.getChebyCoefficients(5); 23 | int[] expected = {0, -3, 0, 4, 0, 0}; 24 | assertArrayEquals(expected, cCoeffs[3]); 25 | } 26 | 27 | @Test 28 | public void testConvertMoments() { 29 | // integers from 0...1000 30 | double[] uniformPowerSums = {1001,500500,333833500,250500250000L}; 31 | double[] convertedChebyshevMoments = MathUtil.powerSumsToChebyMoments(0, 1000, uniformPowerSums); 32 | 33 | double[] expectedChebyshevMoments = {1.0, 0, -.332, 0}; 34 | assertArrayEquals(expectedChebyshevMoments, convertedChebyshevMoments, 1e-14); 35 | } 36 | 37 | @Test 38 | public void testChebyAccuracy() { 39 | MomentData data = new OccupancyData(); 40 | double[] chebys = MathUtil.powerSumsToChebyMoments( 41 | data.getMin(), data.getMax(), 42 | data.getPowerSums(40) 43 | ); 44 | System.out.println(Arrays.toString(chebys)); 45 | // chebys = MathUtil.powerSumsToChebyMoments( 46 | // data.getLogMin(), data.getLogMax(), 47 | // data.getLogSums(20) 48 | // ); 49 | data = new HepData(); 50 | chebys = MathUtil.powerSumsToChebyMoments( 51 | data.getMin(), data.getMax(), 52 | data.getPowerSums(40) 53 | ); 54 | System.out.println(Arrays.toString(chebys)); 55 | } 56 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/MaxEntFunction2Test.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.assertEquals; 6 | 7 | public class MaxEntFunction2Test { 8 | @Test 9 | public void testSimple() { 10 | double[] aCoeffs = {0, -1}; 11 | double[] bCoeffs = {0, 1}; 12 | MaxEntFunction2 f = new MaxEntFunction2( 13 | true, 14 | aCoeffs, 15 | bCoeffs, 16 | 5.05, 17 | 4.95, 18 | 2.220446049250313e-16, 19 | 2.302585092994046 20 | ); 21 | assertEquals(3.73002156214, f.zerothMoment(1e-8), 1e-8); 22 | 23 | double[][] pairwiseMoments = f.getPairwiseMoments(1e-8); 24 | assertEquals(3.73002156214, pairwiseMoments[0][0], 1e-8); 25 | assertEquals(0.4078218803, pairwiseMoments[1][3], 1e-8); 26 | assertEquals(1.12095675177, pairwiseMoments[1][1], 1e-8); 27 | 28 | double[][] hess = f.getHessian(1e-7); 29 | assertEquals(3.73002156214, hess[0][0], 1e-8); 30 | assertEquals(0.4078218803, hess[1][2], 1e-8); 31 | assertEquals(1.12095675177, hess[1][1], 1e-8); 32 | } 33 | 34 | @Test 35 | public void testCompareNumpy(){ 36 | double[] aCoeffs = {-1495.2106196044201, 63797.93868346012, -830014.2179376424, -296736.79198347515, -56032.24104079366, -6481.702314031079, -394.9593472527941}; 37 | double[] bCoeffs = {0, 280793.4251573418, 887242.8040778289, -41598.46235869913, 725.9088731130822, 223.10996965225195, -35.8575946806040}; 38 | MaxEntFunction2 f = new MaxEntFunction2( 39 | false, 40 | aCoeffs, 41 | bCoeffs, 42 | 6.830640572935523, 0.8077984901352853, 1244.625000, 831.875000 43 | ); 44 | assertEquals(1.23, f.value(-.8), 0.01); 45 | } 46 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/MaxEntFunctionTest.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.assertEquals; 6 | import static org.junit.Assert.assertTrue; 7 | 8 | public class MaxEntFunctionTest { 9 | @Test 10 | public void testMoments() { 11 | double[] coeff = {1.0, 2.0, 3.0}; 12 | MaxEntFunction f = new MaxEntFunction(coeff); 13 | double[] moments = f.moments(8, 1e-9); 14 | double[] expectedMoments = { 15 | 6.303954641290793, -1.0395877292934701, 16 | -4.9297352972133845, 2.0119170973456093, 17 | 2.458369282294647, -1.5127916121976486, 18 | -0.84272224125321182, 0.73491729283435847 19 | }; 20 | for (int i = 0; i < moments.length; i++) { 21 | assertEquals(expectedMoments[i], moments[i], 1e-10); 22 | } 23 | assertTrue(f.getFuncEvals() < 1000); 24 | } 25 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/MaxEntPotential2Test.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import msolver.optimizer.GenericOptimizer; 4 | import msolver.optimizer.NewtonOptimizer; 5 | import org.junit.Test; 6 | 7 | import static org.junit.Assert.assertArrayEquals; 8 | import static org.junit.Assert.assertEquals; 9 | 10 | public class MaxEntPotential2Test { 11 | @Test 12 | public void testSimple() { 13 | double[] lambdas = {0, -1, 1}; 14 | double[] d_mus = {3.730021562141137, -0.45542618913430216, 2.0002734064304235}; 15 | MaxEntPotential2 P = new MaxEntPotential2( 16 | true, 17 | 2, 18 | d_mus, 19 | 5.05, 20 | 4.95, 21 | 2.220446049250313e-16, 22 | 2.302585092994046 23 | ); 24 | P.computeAll(lambdas, 1e-8); 25 | assertEquals(0.40782188035828565, P.getHessian()[1][2], 1e-8); 26 | assertEquals(0, P.getGradient()[1], 1e-8); 27 | 28 | double[] l0 = {0, 0, 0}; 29 | GenericOptimizer optimizer = new NewtonOptimizer(P); 30 | l0 = optimizer.solve(l0, 1e-6); 31 | assertArrayEquals(lambdas, l0, 1e-6); 32 | } 33 | 34 | @Test 35 | public void testSimpleExp() { 36 | double[] lambdas = {0, 1, -1}; 37 | double[] d_mus = {3.730021562141138, 0.2702518442892859, -2.1854477512754396}; 38 | MaxEntPotential2 P = new MaxEntPotential2( 39 | false, 40 | 2, 41 | d_mus, 42 | 2.220446049250313e-16, 43 | 2.302585092994046, 44 | 5.05, 45 | 4.95 46 | ); 47 | P.computeAll(lambdas, 1e-8); 48 | double[] l0 = {0, 0, 0}; 49 | GenericOptimizer optimizer = new NewtonOptimizer(P); 50 | l0 = optimizer.solve(l0, 1e-6); 51 | assertArrayEquals(lambdas, l0, 1e-6); 52 | } 53 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/MaxEntPotentialTest.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.assertEquals; 6 | 7 | public class MaxEntPotentialTest { 8 | @Test 9 | public void testTrivial() { 10 | double m_values[] = {1.0, 0, -1.0/3, 0, -1.0/15, 0, -1.0/35}; 11 | double l_values[] = {0.0, 0, 0, 0, 0, 0, 0}; 12 | double tol = 1e-10; 13 | MaxEntPotential P = new MaxEntPotential(m_values); 14 | P.computeAll(l_values, tol); 15 | assertEquals(m_values.length, P.getGradient().length); 16 | } 17 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/MnatSolverTest.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import org.junit.Test; 4 | 5 | import java.util.Arrays; 6 | 7 | import static org.junit.Assert.assertArrayEquals; 8 | 9 | public class MnatSolverTest { 10 | @Test 11 | public void testUniform() { 12 | double[] m_values = {1.0, 1.0/2, 1.0/3, 1.0/4, 1.0/5, 1.0/6, 1.0/7}; 13 | 14 | double[] cdf = MnatSolver.estimateCDF(m_values); 15 | double[] qs = MnatSolver.estimateQuantiles(0, 1, m_values, Arrays.asList(.2, .5, .8)); 16 | double[] expectedQs = {.2, .5, .8}; 17 | assertArrayEquals(expectedQs, qs, .1); 18 | } 19 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/SolveBasisSelectorTest.java: -------------------------------------------------------------------------------- 1 | package msolver; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.assertEquals; 6 | 7 | public class SolveBasisSelectorTest { 8 | @Test 9 | public void testMilan() { 10 | double[] linscales = {-1.9949008094893061,10.974098897900475,3968.1326911078277,3968.13268877633}; 11 | SolveBasisSelector sel = new SolveBasisSelector(); 12 | sel.select( 13 | false, new double[7], new double[7], 14 | linscales[0], linscales[1], linscales[2], linscales[3] 15 | ); 16 | assertEquals(2, sel.getKb()); 17 | assertEquals(7, sel.getKa()); 18 | } 19 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/optimizer/BFGSOptimizerTest.java: -------------------------------------------------------------------------------- 1 | package msolver.optimizer; 2 | 3 | import org.junit.Test; 4 | 5 | import java.util.Arrays; 6 | 7 | import static org.junit.Assert.*; 8 | 9 | public class BFGSOptimizerTest { 10 | @Test 11 | public void testQuadratic() { 12 | QuadraticPotential qp = new QuadraticPotential(2); 13 | BFGSOptimizer opt = new BFGSOptimizer(qp); 14 | opt.setVerbose(false); 15 | double[] start = {1.0, 2.0}; 16 | double[] solution = opt.solve(start, 1e-10); 17 | for (int i = 0; i < start.length; i++) { 18 | assertEquals(0.0, solution[i], 1e-10); 19 | } 20 | } 21 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/optimizer/NewtonOptimizerTest.java: -------------------------------------------------------------------------------- 1 | package msolver.optimizer; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.assertEquals; 6 | import static org.junit.Assert.assertTrue; 7 | 8 | public class NewtonOptimizerTest { 9 | @Test 10 | public void testQuadratic() { 11 | QuadraticPotential qp = new QuadraticPotential(2); 12 | NewtonOptimizer opt = new NewtonOptimizer(qp); 13 | double[] start = {1.0, 2.0}; 14 | double[] solution = opt.solve(start, 1e-10); 15 | for (int i = 0; i < start.length; i++) { 16 | assertEquals(0.0, solution[i], 1e-10); 17 | } 18 | assertEquals(1, opt.getStepCount()); 19 | assertEquals(0, opt.getDampedStepCount()); 20 | assertTrue(opt.isConverged()); 21 | } 22 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/optimizer/QuadraticPotentialTest.java: -------------------------------------------------------------------------------- 1 | package msolver.optimizer; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.assertEquals; 6 | 7 | public class QuadraticPotentialTest { 8 | @Test 9 | public void testSimple() { 10 | QuadraticPotential qp = new QuadraticPotential(2); 11 | double[] x = {1.0, 2.0}; 12 | qp.computeAll(x, 0.0); 13 | assertEquals(5, qp.getValue(), 1e-10); 14 | 15 | double[] xMin = {0.0, 0.0}; 16 | qp.computeAll(xMin, 0.0); 17 | assertEquals(0, qp.getGradient()[1], 1e-10); 18 | } 19 | 20 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/thresholds/MarkovThresholdTest.java: -------------------------------------------------------------------------------- 1 | package msolver.thresholds; 2 | 3 | import msolver.data.ExponentialData; 4 | import msolver.data.GaussianData; 5 | import msolver.data.MomentData; 6 | import msolver.struct.MomentStruct; 7 | import org.junit.Test; 8 | 9 | import java.util.Arrays; 10 | 11 | import static org.junit.Assert.*; 12 | 13 | public class MarkovThresholdTest { 14 | @Test 15 | public void testSimple() { 16 | MomentData data = new ExponentialData(); 17 | MomentStruct m = new MomentStruct(); 18 | m.min = data.getMin(); 19 | m.max = data.getMax(); 20 | m.logMin = data.getLogMin(); 21 | m.logMax = data.getLogMax(); 22 | m.powerSums = data.getPowerSums(10); 23 | m.logSums = data.getLogSums(10); 24 | 25 | MarkovThreshold mt = new MarkovThreshold(m); 26 | double[] bounds; 27 | bounds = mt.bound(.1); 28 | assertTrue(bounds[0] > 0); 29 | assertTrue(bounds[0] < .9); 30 | assertTrue(bounds[1] > .9); 31 | assertTrue(bounds[1] <= 1); 32 | 33 | bounds = mt.bound(5); 34 | assertTrue(bounds[0] < 0.01); 35 | assertTrue(bounds[1] > 0.01); 36 | assertTrue(bounds[1] <= 0.5); 37 | } 38 | 39 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/thresholds/RTTThresholdTest.java: -------------------------------------------------------------------------------- 1 | package msolver.thresholds; 2 | 3 | import msolver.data.ExponentialData; 4 | import msolver.data.MomentData; 5 | import msolver.struct.MomentStruct; 6 | import org.junit.Test; 7 | 8 | import java.util.Arrays; 9 | 10 | import static org.junit.Assert.*; 11 | 12 | public class RTTThresholdTest { 13 | @Test 14 | public void testSimple() { 15 | MomentData data = new ExponentialData(); 16 | MomentStruct m = new MomentStruct(); 17 | m.min = data.getMin(); 18 | m.max = data.getMax(); 19 | m.logMin = data.getLogMin(); 20 | m.logMax = data.getLogMax(); 21 | m.powerSums = data.getPowerSums(10); 22 | m.logSums = data.getLogSums(10); 23 | 24 | MomentThreshold mt = new RTTThreshold(m); 25 | double[] bounds, bounds2; 26 | bounds = mt.bound(.1); 27 | assertTrue(bounds[0] > 0); 28 | assertTrue(bounds[0] < .9); 29 | assertTrue(bounds[1] > .9); 30 | assertTrue(bounds[1] <= 1); 31 | 32 | MomentThreshold markov = new MarkovThreshold(m); 33 | bounds = mt.bound(5); 34 | bounds2 = markov.bound(5); 35 | assertTrue(bounds[0] > bounds2[0]); 36 | assertTrue(bounds[1] < bounds2[1]); 37 | assertTrue(bounds[0] < 0.01); 38 | assertTrue(bounds[1] > 0.01); 39 | assertTrue(bounds[1] <= 0.5); 40 | } 41 | 42 | } -------------------------------------------------------------------------------- /javamsketch/msolver/src/test/java/msolver/thresholds/ThresholdCascadeTest.java: -------------------------------------------------------------------------------- 1 | package msolver.thresholds; 2 | 3 | import msolver.data.ExponentialData; 4 | import msolver.data.MomentData; 5 | import msolver.struct.MomentStruct; 6 | import org.junit.Test; 7 | 8 | import static org.junit.Assert.*; 9 | 10 | public class ThresholdCascadeTest { 11 | @Test 12 | public void testSimple() { 13 | MomentData data = new ExponentialData(); 14 | MomentStruct m = new MomentStruct(); 15 | m.min = data.getMin(); 16 | m.max = data.getMax(); 17 | m.logMin = data.getLogMin(); 18 | m.logMax = data.getLogMax(); 19 | m.powerSums = data.getPowerSums(10); 20 | m.logSums = data.getLogSums(10); 21 | 22 | ThresholdCascade tc = new ThresholdCascade(m); 23 | boolean flag = tc.threshold(2, .01); 24 | assertTrue(flag); 25 | 26 | flag = tc.threshold(4, .01); 27 | assertTrue(flag); 28 | } 29 | 30 | } -------------------------------------------------------------------------------- /javamsketch/outlierbench.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \ 3 | OutlierBench $@ 4 | -------------------------------------------------------------------------------- /javamsketch/parallelMergeBench.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | java -Xmx50g -Xms50g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \ 3 | ParallelMergeBench $@ 4 | -------------------------------------------------------------------------------- /javamsketch/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | futuredata 8 | java-msketch 9 | pom 10 | 1.0-SNAPSHOT 11 | 12 | 13 | 14 | 15 | org.apache.maven.plugins 16 | maven-compiler-plugin 17 | 3.3 18 | 19 | 1.8 20 | 1.8 21 | 22 | 23 | 24 | 25 | 26 | msolver 27 | quantilebench 28 | 29 | 30 | 31 | 32 | junit 33 | junit 34 | 4.12 35 | test 36 | 37 | 38 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | java-msketch 7 | futuredata 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | quantile-bench 13 | 14 | 15 | 16 | futuredata 17 | msolver 18 | 1.0-SNAPSHOT 19 | 20 | 21 | org.apache.commons 22 | commons-csv 23 | 1.5 24 | 25 | 26 | com.tdunning 27 | t-digest 28 | 3.2 29 | 30 | 31 | com.yahoo.datasketches 32 | sketches-core 33 | 0.10.3 34 | 35 | 36 | org.apache.spark 37 | spark-catalyst_2.11 38 | 2.2.1 39 | 40 | 41 | com.fasterxml.jackson.core 42 | jackson-databind 43 | 2.9.3 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/BoundRuntimeBench.java: -------------------------------------------------------------------------------- 1 | import msolver.BoundSolver; 2 | import msolver.data.ShuttleData; 3 | 4 | public class BoundRuntimeBench { 5 | private static int numIters = 1000000; 6 | private static BoundSolver boundSolver; 7 | 8 | public static void main(String[] args) throws Exception { 9 | int k = 11; 10 | double[] powerSums = new double[k]; 11 | for (int i = 0; i < powerSums.length; i++) { 12 | powerSums[i] = ShuttleData.powerSums[i]; 13 | } 14 | 15 | // Check bounds match 16 | boundSolver = new BoundSolver(ShuttleData.powerSums, ShuttleData.min, ShuttleData.max); 17 | double boundLindsay = boundSolver.boundSizeLindsay(45); 18 | double boundRacz = boundSolver.boundSizeRacz(45); 19 | if (Math.abs(boundLindsay - boundRacz) > 1e-4) { 20 | System.out.format("Lindsay bound and Racz bound do not match: %f %f\n", boundLindsay, boundRacz); 21 | } 22 | 23 | // Warm start 24 | for (int i = 0; i < numIters / 2; i++) { 25 | boundSolver = new BoundSolver(ShuttleData.powerSums, ShuttleData.min, ShuttleData.max); 26 | boundSolver.boundSizeLindsay(45); 27 | boundSolver.boundSizeRacz(45); 28 | } 29 | 30 | bench(1); 31 | bench(10); 32 | bench(100); 33 | } 34 | 35 | public static void bench(int queriesPerSolver) { 36 | long startTime; 37 | long elapsed; 38 | 39 | startTime = System.nanoTime(); 40 | for (int i = 0; i < numIters / queriesPerSolver; i++) { 41 | boundSolver = new BoundSolver(ShuttleData.powerSums, ShuttleData.min, ShuttleData.max); 42 | for (int j = 0; j < queriesPerSolver; j++) { 43 | boundSolver.boundSizeRacz(45); 44 | } 45 | } 46 | elapsed = System.nanoTime() - startTime; 47 | double secondsPerRacz = elapsed / (1.0e9 * numIters); 48 | System.out.format("Time Per Solve @%d queries per solver (Racz): %g\n", queriesPerSolver, secondsPerRacz); 49 | 50 | startTime = System.nanoTime(); 51 | for (int i = 0; i < numIters / queriesPerSolver; i++) { 52 | boundSolver = new BoundSolver(ShuttleData.powerSums, ShuttleData.min, ShuttleData.max); 53 | for (int j = 0; j < queriesPerSolver; j++) { 54 | boundSolver.boundSizeLindsay(45); 55 | } 56 | } 57 | elapsed = System.nanoTime() - startTime; 58 | double secondsPerLindsay = elapsed / (1.0e9 * numIters); 59 | System.out.format("Time Per Solve @%d queries per solver (Lindsay): %g\n", queriesPerSolver, secondsPerLindsay); 60 | 61 | System.out.println("Speedup by using Lindsay (higher is better): "+secondsPerRacz/secondsPerLindsay); 62 | System.out.println(""); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/RunConfig.java: -------------------------------------------------------------------------------- 1 | import com.fasterxml.jackson.core.type.TypeReference; 2 | import com.fasterxml.jackson.databind.ObjectMapper; 3 | 4 | import java.io.BufferedReader; 5 | import java.io.FileReader; 6 | import java.io.IOException; 7 | import java.util.Map; 8 | 9 | public class RunConfig { 10 | private Map values; 11 | 12 | public RunConfig(Map values) { 13 | this.values = values; 14 | } 15 | 16 | public static RunConfig fromJsonFile(String file) throws IOException { 17 | BufferedReader r = new BufferedReader(new FileReader(file)); 18 | ObjectMapper mapper = new ObjectMapper(); 19 | Map map = mapper.readValue( 20 | r, 21 | new TypeReference>() {} 22 | ); 23 | return new RunConfig(map); 24 | } 25 | 26 | public static RunConfig fromJsonString(String json) throws IOException { 27 | ObjectMapper mapper = new ObjectMapper(); 28 | Map map = mapper.readValue( 29 | json, 30 | new TypeReference>() {} 31 | ); 32 | return new RunConfig(map); 33 | } 34 | 35 | @SuppressWarnings("unchecked") 36 | public T get(String key) { 37 | return (T) values.get(key); 38 | } 39 | 40 | @SuppressWarnings("unchecked") 41 | public T get(String key, T defaultValue) { 42 | return (T) values.getOrDefault(key, defaultValue); 43 | } 44 | 45 | public Map getValues() { 46 | return values; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/histogram/ArrayUtils.java: -------------------------------------------------------------------------------- 1 | package histogram; 2 | 3 | /* 4 | * Licensed to Metamarkets Group Inc. (Metamarkets) under one 5 | * or more contributor license agreements. See the NOTICE file 6 | * distributed with this work for additional information 7 | * regarding copyright ownership. Metamarkets licenses this file 8 | * to you under the Apache License, Version 2.0 (the 9 | * "License"); you may not use this file except in compliance 10 | * with the License. You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, 15 | * software distributed under the License is distributed on an 16 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 | * KIND, either express or implied. See the License for the 18 | * specific language governing permissions and limitations 19 | * under the License. 20 | */ 21 | 22 | public class ArrayUtils 23 | { 24 | public static int hashCode(long[] a, int fromIndex, int toIndex) 25 | { 26 | int hashCode = 1; 27 | int i = fromIndex; 28 | while (i < toIndex) { 29 | long v = a[i]; 30 | hashCode = 31 * hashCode + (int) (v ^ (v >>> 32)); 31 | ++i; 32 | } 33 | return hashCode; 34 | } 35 | 36 | public static int hashCode(float[] a, int fromIndex, int toIndex) 37 | { 38 | int hashCode = 1; 39 | int i = fromIndex; 40 | while (i < toIndex) { 41 | hashCode = 31 * hashCode + Float.floatToIntBits(a[i]); 42 | ++i; 43 | } 44 | return hashCode; 45 | } 46 | 47 | public static int hashCode(double[] a, int fromIndex, int toIndex) 48 | { 49 | int hashCode = 1; 50 | int i = fromIndex; 51 | while (i < toIndex) { 52 | long v = Double.doubleToLongBits(a[i]); 53 | hashCode = 31 * hashCode + (int) (v ^ (v >>> 32)); 54 | ++i; 55 | } 56 | return hashCode; 57 | } 58 | } 59 | 60 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/io/CSVDataSource.java: -------------------------------------------------------------------------------- 1 | package io; 2 | 3 | import org.apache.commons.csv.CSVFormat; 4 | import org.apache.commons.csv.CSVRecord; 5 | 6 | import java.io.FileReader; 7 | import java.io.IOException; 8 | import java.io.Reader; 9 | import java.util.ArrayList; 10 | 11 | public class CSVDataSource implements DataSource { 12 | public String fileName; 13 | public int column; 14 | public int limit = Integer.MAX_VALUE; 15 | public boolean hasHeader = true; 16 | 17 | public CSVDataSource(String fileName, int column) { 18 | this.fileName = fileName; 19 | this.column = column; 20 | } 21 | 22 | public void setHasHeader(boolean flag) { 23 | this.hasHeader = flag; 24 | } 25 | 26 | public void setLimit(int limit) { 27 | this.limit = limit; 28 | } 29 | 30 | @Override 31 | public double[] get() throws IOException { 32 | Reader in = new FileReader(fileName); 33 | CSVFormat format = CSVFormat.RFC4180; 34 | if (hasHeader) { 35 | format = format.withFirstRecordAsHeader(); 36 | } 37 | Iterable records = format.parse(in); 38 | 39 | ArrayList results = new ArrayList<>(); 40 | int rowCount = 0; 41 | for (CSVRecord row : records) { 42 | results.add(Double.parseDouble(row.get(column))); 43 | rowCount++; 44 | if (rowCount >= limit) { 45 | break; 46 | } 47 | } 48 | in.close(); 49 | 50 | double[] resultArr = new double[results.size()]; 51 | for (int i = 0; i < resultArr.length; i++) { 52 | resultArr[i] = results.get(i); 53 | } 54 | 55 | return resultArr; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/io/CSVOutput.java: -------------------------------------------------------------------------------- 1 | package io; 2 | 3 | import java.io.BufferedWriter; 4 | import java.io.FileWriter; 5 | import java.io.PrintWriter; 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | import java.util.Map; 9 | 10 | public class CSVOutput { 11 | private String baseDir = "results"; 12 | private boolean addTimeStamp = true; 13 | 14 | public CSVOutput() {} 15 | 16 | public void writeAllResults( 17 | List> results, 18 | String fileName 19 | ) throws Exception { 20 | long seconds = System.currentTimeMillis() / 1000; 21 | String fName = ""; 22 | if (addTimeStamp) { 23 | fName = String.format( 24 | "%s/%s_%d.csv", 25 | baseDir, 26 | fileName, 27 | seconds 28 | ); 29 | } else { 30 | fName = String.format( 31 | "%s/%s.csv", 32 | baseDir, 33 | fileName 34 | ); 35 | } 36 | PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(fName))); 37 | 38 | List keys = new ArrayList<>(results.get(0).keySet()); 39 | out.println(String.join(",", keys)); 40 | for (Map row : results) { 41 | List vals = new ArrayList<>(keys.size()); 42 | for (String key : keys) { 43 | vals.add(row.get(key)); 44 | } 45 | out.println(String.join(",", vals)); 46 | } 47 | out.close(); 48 | } 49 | 50 | public void setAddTimeStamp(boolean addTimeStamp) { 51 | this.addTimeStamp = addTimeStamp; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/io/DataGrouper.java: -------------------------------------------------------------------------------- 1 | package io; 2 | 3 | import java.util.ArrayList; 4 | 5 | public interface DataGrouper { 6 | public ArrayList group(double[] data); 7 | } 8 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/io/DataSource.java: -------------------------------------------------------------------------------- 1 | package io; 2 | 3 | import java.io.IOException; 4 | 5 | public interface DataSource { 6 | double[] get() throws IOException; 7 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/io/GroupedCSVDataSource.java: -------------------------------------------------------------------------------- 1 | package io; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.FileReader; 5 | import java.io.IOException; 6 | import java.util.ArrayList; 7 | 8 | public class GroupedCSVDataSource { 9 | public String fileName; 10 | public int limit = Integer.MAX_VALUE; 11 | public boolean hasHeader = true; 12 | 13 | public GroupedCSVDataSource(String fileName) { 14 | this.fileName = fileName; 15 | } 16 | 17 | public void setHasHeader(boolean flag) { 18 | this.hasHeader = flag; 19 | } 20 | 21 | public void setLimit(int limit) { 22 | this.limit = limit; 23 | } 24 | 25 | public ArrayList get() throws IOException { 26 | BufferedReader bf = new BufferedReader(new FileReader(fileName)); 27 | if (hasHeader) { 28 | bf.readLine(); 29 | } 30 | ArrayList vals = new ArrayList<>(); 31 | for (int i = 0; i < limit; i++) { 32 | String curLine = bf.readLine(); 33 | if (curLine == null) { 34 | break; 35 | } 36 | String[] rawGroup = curLine.substring(curLine.indexOf('[')+1, curLine.lastIndexOf(']')).split(","); 37 | double[] group = new double[rawGroup.length]; 38 | for (int j = 0; j < group.length; j++) { 39 | group[j] = Double.parseDouble(rawGroup[j]); 40 | } 41 | vals.add(group); 42 | } 43 | 44 | return vals; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/io/SeqDataGrouper.java: -------------------------------------------------------------------------------- 1 | package io; 2 | 3 | import java.util.ArrayList; 4 | 5 | public class SeqDataGrouper implements DataGrouper { 6 | private int cellSize; 7 | 8 | public SeqDataGrouper(int cellSize) { 9 | this.cellSize = cellSize; 10 | } 11 | 12 | @Override 13 | public ArrayList group(double[] data) { 14 | int n = data.length; 15 | int numCells = (int)Math.ceil(n*1.0/cellSize); 16 | ArrayList cells = new ArrayList<>(numCells); 17 | for (int i = 0; i < numCells; i++) { 18 | int startIdx = i*cellSize; 19 | int endIdx = Math.min( 20 | (i+1)*cellSize, n 21 | ); 22 | double[] curCell = new double[endIdx - startIdx]; 23 | for (int j = 0; j < curCell.length; j++) { 24 | curCell[j] = data[startIdx+j]; 25 | } 26 | cells.add(curCell); 27 | } 28 | return cells; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/io/SimpleCSVDataSource.java: -------------------------------------------------------------------------------- 1 | package io; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.FileReader; 5 | import java.io.IOException; 6 | import java.util.ArrayList; 7 | 8 | public class SimpleCSVDataSource implements DataSource{ 9 | public String fileName; 10 | public int column; 11 | public int limit = Integer.MAX_VALUE; 12 | public boolean hasHeader = true; 13 | 14 | public SimpleCSVDataSource(String fileName, int column) { 15 | this.fileName = fileName; 16 | this.column = column; 17 | } 18 | 19 | public void setHasHeader(boolean flag) { 20 | this.hasHeader = flag; 21 | } 22 | 23 | public void setLimit(int limit) { 24 | this.limit = limit; 25 | } 26 | 27 | @Override 28 | public double[] get() throws IOException { 29 | BufferedReader bf = new BufferedReader(new FileReader(fileName)); 30 | if (hasHeader) { 31 | bf.readLine(); 32 | } 33 | ArrayList vals = new ArrayList<>(); 34 | for (int i = 0; i < limit; i++) { 35 | String curLine = bf.readLine(); 36 | if (curLine == null) { 37 | break; 38 | } 39 | int colCount = 0; 40 | int startIdx = 0; 41 | int nextIdx = -1; 42 | while (colCount <= column) { 43 | startIdx = nextIdx+1; 44 | nextIdx = curLine.indexOf(',', startIdx); 45 | colCount++; 46 | if (nextIdx == -1) { 47 | nextIdx = curLine.length(); 48 | break; 49 | } 50 | } 51 | vals.add(Double.parseDouble(curLine.substring(startIdx, nextIdx))); 52 | } 53 | 54 | double[] uVals = new double[vals.size()]; 55 | for (int i = 0; i < uVals.length; i++) { 56 | uVals[i] = vals.get(i); 57 | } 58 | return uVals; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/sampling/SampleSubsetSummary.java: -------------------------------------------------------------------------------- 1 | package sampling; 2 | 3 | /** 4 | * A simple object o capture the results of a subset sum query on a sampling sketch. 5 | * 6 | * @author Jon Malkin 7 | */ 8 | public class SampleSubsetSummary { 9 | private double lowerBound; 10 | private double estimate; 11 | private double upperBound; 12 | private double totalSketchWeight; 13 | 14 | SampleSubsetSummary(final double lowerBound, 15 | final double estimate, 16 | final double upperBound, 17 | final double totalSketchWeight) { 18 | this.lowerBound = lowerBound; 19 | this.estimate = estimate; 20 | this.upperBound = upperBound; 21 | this.totalSketchWeight = totalSketchWeight; 22 | } 23 | 24 | public double getLowerBound() { 25 | return lowerBound; 26 | } 27 | 28 | public double getTotalSketchWeight() { 29 | return totalSketchWeight; 30 | } 31 | 32 | public double getUpperBound() { 33 | return upperBound; 34 | } 35 | 36 | public double getEstimate() { 37 | return estimate; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/sampling/SamplingUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2016-17, Yahoo! Inc. 3 | * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms. 4 | */ 5 | 6 | package sampling; 7 | 8 | import static com.yahoo.sketches.BoundsOnBinomialProportions.approximateLowerBoundOnP; 9 | import static com.yahoo.sketches.BoundsOnBinomialProportions.approximateUpperBoundOnP; 10 | 11 | import java.util.Random; 12 | 13 | /** 14 | * Common utility functions for the sampling family of sketches. 15 | * 16 | * @author Jon Malkin 17 | */ 18 | final class SamplingUtil { 19 | 20 | /** 21 | * Number of standard deviations to use for subset sum error bounds 22 | */ 23 | private static final double DEFAULT_KAPPA = 2.0; 24 | 25 | private SamplingUtil() {} 26 | 27 | /** 28 | * Checks if target sampling allocation is more than 50% of max sampling size. If so, returns 29 | * max sampling size, otherwise passes through the target size. 30 | * 31 | * @param maxSize Maximum allowed reservoir size, as from getK() 32 | * @param resizeTarget Next size based on a pure ResizeFactor scaling 33 | * @return (reservoirSize_ < 2*resizeTarget ? reservoirSize_ : resizeTarget) 34 | */ 35 | static int getAdjustedSize(final int maxSize, final int resizeTarget) { 36 | if (maxSize - (resizeTarget << 1) < 0L) { 37 | return maxSize; 38 | } 39 | return resizeTarget; 40 | } 41 | 42 | // static double nextDoubleExcludeZero() { 43 | // double r = rand.nextDouble(); 44 | // while (r == 0.0) { 45 | // r = rand.nextDouble(); 46 | // } 47 | // return r; 48 | // } 49 | 50 | static int startingSubMultiple(final int lgTarget, final int lgRf, final int lgMin) { 51 | return (lgTarget <= lgMin) 52 | ? lgMin : (lgRf == 0) ? lgTarget 53 | : (lgTarget - lgMin) % lgRf + lgMin; 54 | } 55 | 56 | static double pseudoHypergeometricUBonP(final long n, final int k, final double samplingRate) { 57 | final double adjustedKappa = DEFAULT_KAPPA * Math.sqrt(1 - samplingRate); 58 | return approximateUpperBoundOnP(n, k, adjustedKappa); 59 | } 60 | 61 | static double pseudoHypergeometricLBonP(final long n, final int k, final double samplingRate) { 62 | final double adjustedKappa = DEFAULT_KAPPA * Math.sqrt(1 - samplingRate); 63 | return approximateLowerBoundOnP(n, k, adjustedKappa); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/sketches/ApproximateHistogramSketch.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import histogram.ApproximateHistogram; 4 | 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | public class ApproximateHistogramSketch implements QuantileSketch{ 9 | private int size; 10 | private ApproximateHistogram hist; 11 | private boolean calcError; 12 | 13 | private double[] errors; 14 | 15 | @Override 16 | public String getName() { 17 | return "approx_histogram"; 18 | } 19 | 20 | @Override 21 | public int getSize() { 22 | return hist.getMinStorageSize(); 23 | } 24 | 25 | @Override 26 | public double getSizeParam() { 27 | return size; 28 | } 29 | 30 | @Override 31 | public void setSizeParam(double sizeParam) { 32 | this.size = (int)sizeParam; 33 | } 34 | 35 | @Override 36 | public void setCalcError(boolean flag) { 37 | this.calcError = calcError; 38 | } 39 | 40 | @Override 41 | public void initialize() { 42 | this.hist = new ApproximateHistogram(size); 43 | } 44 | 45 | @Override 46 | public void add(double[] data) { 47 | for (double x : data) { 48 | this.hist.offer((float)x); 49 | } 50 | } 51 | 52 | @Override 53 | public QuantileSketch merge(List sketches, int startIndex, int endIndex) { 54 | for (int i = startIndex; i < endIndex; i++) { 55 | ApproximateHistogramSketch s = (ApproximateHistogramSketch) sketches.get(i); 56 | this.hist.foldFast(s.hist); 57 | } 58 | return this; 59 | } 60 | 61 | @Override 62 | public double[] getQuantiles(List ps) throws Exception { 63 | float[] psArray = new float[ps.size()]; 64 | for (int i = 0; i < ps.size(); i++) { 65 | psArray[i] = ps.get(i).floatValue(); 66 | } 67 | float[] qsFloat = this.hist.getQuantiles(psArray); 68 | double[] qsDouble = new double[qsFloat.length]; 69 | errors = new double[qsFloat.length]; 70 | for (int i = 0; i < qsFloat.length; i++) { 71 | qsDouble[i] = qsFloat[i]; 72 | } 73 | return qsDouble; 74 | } 75 | 76 | @Override 77 | public double[] getErrors() { 78 | return errors; 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/sketches/GKAdaptiveSketch.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import gk.GKSketch; 4 | 5 | import java.util.ArrayList; 6 | import java.util.Arrays; 7 | import java.util.List; 8 | 9 | public class GKAdaptiveSketch implements QuantileSketch{ 10 | private GKSketch summary; 11 | private double sizeParam = 100.0; 12 | private int bufferSize = 100; 13 | private double[] buffer; 14 | private double[] errors; 15 | 16 | @Override 17 | public String getName() { 18 | return "gk_adaptive"; 19 | } 20 | 21 | @Override 22 | public int getSize() { 23 | return summary.getTuples().size() * (Double.BYTES + 2*Integer.BYTES); 24 | } 25 | 26 | @Override 27 | public double getSizeParam() { 28 | return sizeParam; 29 | } 30 | 31 | @Override 32 | public void setSizeParam(double sizeParam) { 33 | this.sizeParam = sizeParam; 34 | } 35 | 36 | @Override 37 | public void setCalcError(boolean flag) { 38 | return; 39 | } 40 | 41 | @Override 42 | public void initialize() { 43 | this.buffer = new double[bufferSize]; 44 | this.summary = new GKSketch( 45 | 1.0/sizeParam 46 | ); 47 | } 48 | 49 | @Override 50 | public void add(double[] data) { 51 | int iBuff = 0; 52 | for (double x : data) { 53 | buffer[iBuff] = x; 54 | iBuff++; 55 | if (iBuff == bufferSize) { 56 | this.summary.add(buffer); 57 | iBuff = 0; 58 | } 59 | } 60 | this.summary.add(Arrays.copyOf(buffer, iBuff)); 61 | } 62 | 63 | @Override 64 | public QuantileSketch merge(List sketches, int startIndex, int endIndex) { 65 | GKSketch newSumm = this.summary; 66 | for (int i = startIndex; i < endIndex; i++) { 67 | GKAdaptiveSketch gks = (GKAdaptiveSketch) sketches.get(i); 68 | newSumm.merge(gks.summary); 69 | } 70 | return this; 71 | } 72 | 73 | @Override 74 | public double[] getQuantiles(List ps) throws Exception { 75 | int m = ps.size(); 76 | double[] quantiles = new double[m]; 77 | for (int i = 0; i < m; i++) { 78 | quantiles[i] = summary.quantile(ps.get(i)); 79 | } 80 | 81 | errors = new double[m]; 82 | for (int i = 0; i < m; i++) { 83 | errors[i] = 1.0/sizeParam; 84 | } 85 | return quantiles; 86 | } 87 | 88 | @Override 89 | public double[] getErrors() { 90 | return errors; 91 | } 92 | 93 | public String getDebugString() { 94 | ArrayList parts = new ArrayList<>(); 95 | for (GKSketch.Tuple t : summary.getTuples()) { 96 | parts.add(t.toString()); 97 | } 98 | return String.join(",", parts); 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/sketches/QuantileUtil.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import org.apache.commons.math3.stat.descriptive.rank.Percentile; 4 | 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | import java.util.function.Supplier; 8 | 9 | public class QuantileUtil { 10 | public static double[] getTrueQuantiles(List ps, double[] data) { 11 | double[] expectedQs = new double[ps.size()]; 12 | Percentile p = new Percentile().withEstimationType(Percentile.EstimationType.R_1); 13 | p.setData(data); 14 | for (int i = 0; i < ps.size(); i++) { 15 | expectedQs[i] = p.evaluate(ps.get(i)*100); 16 | } 17 | return expectedQs; 18 | } 19 | 20 | public static QuantileSketch trainAndMerge( 21 | Supplier sFactory, 22 | ArrayList cellData 23 | ) throws Exception { 24 | int n = cellData.size(); 25 | ArrayList sketches = new ArrayList<>(n); 26 | for (int i = 0; i < n; i++) { 27 | QuantileSketch curSketch = sFactory.get(); 28 | curSketch.initialize(); 29 | curSketch.add(cellData.get(i)); 30 | sketches.add(curSketch); 31 | } 32 | QuantileSketch mergedSketch = sFactory.get(); 33 | mergedSketch.initialize(); 34 | mergedSketch.merge(sketches); 35 | return mergedSketch; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/sketches/SamplingSketch.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import sampling.ReservoirItemsSketch; 4 | import sampling.ReservoirItemsUnion; 5 | import org.apache.commons.math3.util.FastMath; 6 | 7 | import java.util.List; 8 | 9 | public class SamplingSketch implements QuantileSketch { 10 | private int size; 11 | private ReservoirItemsSketch reservoir; 12 | private double[] errors; 13 | private boolean calcError = true; 14 | 15 | public SamplingSketch() {} 16 | 17 | @Override 18 | public String getName() { 19 | return "sampling"; 20 | } 21 | 22 | @Override 23 | public int getSize() { 24 | return (Long.BYTES + this.size * Double.BYTES); 25 | } 26 | 27 | @Override 28 | public double getSizeParam() { 29 | return size; 30 | } 31 | 32 | @Override 33 | public void setSizeParam(double sizeParam) { 34 | this.size = (int)sizeParam; 35 | } 36 | 37 | @Override 38 | public void setCalcError(boolean flag) { 39 | this.calcError = flag; 40 | } 41 | 42 | @Override 43 | public void initialize() { 44 | this.reservoir = ReservoirItemsSketch.newInstance(size); 45 | } 46 | 47 | @Override 48 | public void add(double[] data) { 49 | for (double x : data) { 50 | this.reservoir.update(x); 51 | } 52 | } 53 | 54 | 55 | @Override 56 | public QuantileSketch merge(List sketches, int startIndex, int endIndex) { 57 | ReservoirItemsUnion newUnion = ReservoirItemsUnion.newInstance(this.size); 58 | newUnion.update(this.reservoir); 59 | for (int i = startIndex; i < endIndex; i++) { 60 | SamplingSketch ss = (SamplingSketch) sketches.get(i); 61 | newUnion.update(ss.reservoir); 62 | } 63 | this.reservoir = newUnion.getResult(); 64 | return this; 65 | } 66 | 67 | @Override 68 | public double[] getQuantiles(List ps) throws Exception { 69 | Double[] samples = this.reservoir.getSamples(); 70 | double[] data = new double[samples.length]; 71 | for (int i = 0; i < data.length; i++) { 72 | data[i] = samples[i]; 73 | } 74 | 75 | int m = ps.size(); 76 | double[] quantiles = QuantileUtil.getTrueQuantiles(ps, data); 77 | 78 | errors = new double[m]; 79 | if (calcError) { 80 | for (int i = 0; i < m; i++) { 81 | double p = ps.get(i); 82 | errors[i] = 2.5 * FastMath.sqrt(p * (1 - p) / this.size); 83 | } 84 | } 85 | return quantiles; 86 | } 87 | 88 | @Override 89 | public double[] getErrors() { 90 | return errors; 91 | } 92 | 93 | } 94 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/sketches/SketchLoader.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import java.io.IOException; 4 | 5 | public class SketchLoader { 6 | public static QuantileSketch load( 7 | String sketchName 8 | ) throws IOException { 9 | if (sketchName.startsWith("cmoment")) { 10 | return new CMomentSketch(1e-9); 11 | } else if (sketchName.startsWith("tdigest")) { 12 | return new TDigestSketch(); 13 | } else if (sketchName.startsWith("yahoo")) { 14 | return new YahooSketch(); 15 | } else if (sketchName.startsWith("spark_gk")) { 16 | return new SparkGKSketch(); 17 | } else if (sketchName.startsWith("sampling")) { 18 | return new SamplingSketch(); 19 | } else if (sketchName.startsWith("reservoir_sampling")) { 20 | return new ReservoirSamplingSketch(); 21 | } else if (sketchName.startsWith("histogram")) { 22 | return new HistogramSketch(); 23 | } else if (sketchName.startsWith("moment")) { 24 | return new MomentSketch(1e-9); 25 | } else if (sketchName.startsWith("hmoment")) { 26 | return new HybridMomentSketch(1e-9); 27 | } else if (sketchName.startsWith("approx_histogram")) { 28 | return new ApproximateHistogramSketch(); 29 | } else if (sketchName.startsWith("random")) { 30 | return new RandomSketch(); 31 | } else if (sketchName.startsWith("gk_adaptive")) { 32 | return new GKAdaptiveSketch(); 33 | } 34 | throw new IOException("Invalid Sketch"); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/sketches/SparkGKSketch.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import org.apache.spark.sql.catalyst.util.QuantileSummaries; 4 | import scala.Option; 5 | 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | 9 | public class SparkGKSketch implements QuantileSketch{ 10 | private QuantileSummaries summary; 11 | private double sizeParam = 100.0; 12 | private double[] errors; 13 | 14 | @Override 15 | public String getName() { 16 | return "spark_gk"; 17 | } 18 | 19 | @Override 20 | public int getSize() { 21 | return summary.sampled().length * (Double.BYTES + 2*Integer.BYTES); 22 | } 23 | 24 | @Override 25 | public double getSizeParam() { 26 | return sizeParam; 27 | } 28 | 29 | @Override 30 | public void setSizeParam(double sizeParam) { 31 | this.sizeParam = sizeParam; 32 | } 33 | 34 | @Override 35 | public void setCalcError(boolean flag) { 36 | return; 37 | } 38 | 39 | @Override 40 | public void initialize() { 41 | this.summary = new QuantileSummaries( 42 | QuantileSummaries.defaultCompressThreshold(), 43 | 1.0/sizeParam, 44 | new QuantileSummaries.Stats[0], 45 | 0 46 | ); 47 | } 48 | 49 | @Override 50 | public void add(double[] data) { 51 | QuantileSummaries curSummary = this.summary; 52 | for (double d : data) { 53 | curSummary = curSummary.insert(d); 54 | } 55 | this.summary = curSummary.compress(); 56 | } 57 | 58 | @Override 59 | public QuantileSketch merge(List sketches, int startIndex, int endIndex) { 60 | QuantileSummaries newSumm = this.summary; 61 | for (int i = startIndex; i < endIndex; i++) { 62 | SparkGKSketch gks = (SparkGKSketch) sketches.get(i); 63 | newSumm = newSumm.merge(gks.summary); 64 | } 65 | this.summary = newSumm.compress(); 66 | return this; 67 | } 68 | 69 | @Override 70 | public double[] getQuantiles(List ps) throws Exception { 71 | int m = ps.size(); 72 | double[] quantiles = new double[m]; 73 | for (int i = 0; i < m; i++) { 74 | Option res = summary.query(ps.get(i)); 75 | if (res.isEmpty()) { 76 | throw new Exception("Bad GK Query"); 77 | } else { 78 | quantiles[i] = (Double)res.get(); 79 | } 80 | } 81 | 82 | errors = new double[m]; 83 | for (int i = 0; i < m; i++) { 84 | errors[i] = 1.0/sizeParam; 85 | } 86 | return quantiles; 87 | } 88 | 89 | @Override 90 | public double[] getErrors() { 91 | return errors; 92 | } 93 | 94 | public String getDebugString() { 95 | StringBuilder b = new StringBuilder(); 96 | for (QuantileSummaries.Stats s : summary.sampled()) { 97 | b.append(s.value()); 98 | b.append(":"); 99 | b.append(s.delta()+"/"+s.g()); 100 | b.append(","); 101 | } 102 | return b.toString(); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/sketches/YahooSketch.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | //import com.yahoo.sketches.quantiles.DoublesSketch; 4 | //import com.yahoo.sketches.quantiles.DoublesUnion; 5 | //import com.yahoo.sketches.quantiles.UpdateDoublesSketch; 6 | 7 | import yahoo.DoublesSketch; 8 | import yahoo.DoublesUnion; 9 | import yahoo.UpdateDoublesSketch; 10 | 11 | import java.util.ArrayList; 12 | import java.util.Arrays; 13 | import java.util.List; 14 | import java.util.concurrent.CountDownLatch; 15 | 16 | public class YahooSketch implements QuantileSketch { 17 | private int k; 18 | private UpdateDoublesSketch sketch; 19 | private boolean calcError = true; 20 | 21 | private double[] errors; 22 | 23 | public YahooSketch() {} 24 | 25 | protected YahooSketch(UpdateDoublesSketch s) { 26 | this.k = s.getK(); 27 | this.sketch = s; 28 | } 29 | 30 | @Override 31 | public String getName() { 32 | return "yahoo"; 33 | } 34 | 35 | @Override 36 | public int getSize() { 37 | return sketch.getStorageBytes(); 38 | } 39 | 40 | @Override 41 | public double getSizeParam() { 42 | return k; 43 | } 44 | 45 | @Override 46 | public void setSizeParam(double sizeParam) { 47 | this.k = (int)sizeParam; 48 | } 49 | 50 | @Override 51 | public void setCalcError(boolean flag) { 52 | calcError = flag; 53 | } 54 | 55 | @Override 56 | public void initialize() { 57 | sketch = DoublesSketch.builder().setK(this.k).build(); 58 | } 59 | 60 | @Override 61 | public void add(double[] data) { 62 | for (double x : data) { 63 | sketch.update(x); 64 | } 65 | } 66 | 67 | @Override 68 | public QuantileSketch merge(List sketches, int startIndex, int endIndex) { 69 | DoublesUnion union = DoublesUnion.builder().setMaxK(k).build(); 70 | union.update(this.sketch); 71 | for (int i = startIndex; i < endIndex; i++) { 72 | YahooSketch ys = (YahooSketch) sketches.get(i); 73 | union.update(ys.sketch); 74 | } 75 | this.sketch = union.getResult(); 76 | return this; 77 | } 78 | 79 | @Override 80 | public double[] getQuantiles(List ps) throws Exception { 81 | int m = ps.size(); 82 | double[] psArray = new double[m]; 83 | for (int i = 0; i < m; i++) { 84 | psArray[i] = ps.get(i); 85 | } 86 | double[] quantiles = sketch.getQuantiles(psArray); 87 | 88 | errors = new double[m]; 89 | if (calcError) { 90 | double errorVal = sketch.getNormalizedRankError(); 91 | for (int i = 0; i < m; i++) { 92 | errors[i] = errorVal; 93 | } 94 | } 95 | return quantiles; 96 | } 97 | 98 | @Override 99 | public double[] getErrors() { 100 | return errors; 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/yahoo/CompactDoublesSketch.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017, Yahoo! Inc. Licensed under the terms of the 3 | * Apache License 2.0. See LICENSE file at the project root for terms. 4 | */ 5 | 6 | package yahoo; 7 | 8 | import com.yahoo.memory.Memory; 9 | import com.yahoo.memory.WritableMemory; 10 | 11 | /** 12 | * @author Jon Malkin 13 | */ 14 | public abstract class CompactDoublesSketch extends DoublesSketch { 15 | CompactDoublesSketch(final int k) { 16 | super(k); 17 | } 18 | 19 | public static CompactDoublesSketch heapify(final Memory srcMem) { 20 | return HeapCompactDoublesSketch.heapifyInstance(srcMem); 21 | } 22 | 23 | @Override 24 | boolean isCompact() { 25 | return true; 26 | } 27 | 28 | /** 29 | * Gets the Memory if it exists, otherwise returns null. 30 | * @return the Memory if it exists, otherwise returns null. 31 | */ 32 | @Override 33 | abstract WritableMemory getMemory(); 34 | } 35 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/yahoo/DirectDoublesSketchAccessor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017, Yahoo! Inc. Licensed under the terms of the 3 | * Apache License 2.0. See LICENSE file at the project root for terms. 4 | */ 5 | 6 | package yahoo; 7 | 8 | import java.util.Arrays; 9 | 10 | import com.yahoo.memory.WritableMemory; 11 | 12 | /** 13 | * @author Jon Malkin 14 | */ 15 | class DirectDoublesSketchAccessor extends DoublesSketchAccessor { 16 | DirectDoublesSketchAccessor(final DoublesSketch ds, 17 | final boolean forceSize, 18 | final int level) { 19 | super(ds, forceSize, level); 20 | assert ds.isDirect(); 21 | } 22 | 23 | @Override 24 | DoublesSketchAccessor copyAndSetLevel(final int level) { 25 | return new DirectDoublesSketchAccessor(ds_, forceSize_, level); 26 | } 27 | 28 | @Override 29 | double get(final int index) { 30 | assert index >= 0 && index < numItems_; 31 | assert n_ == ds_.getN(); 32 | 33 | final int idxOffset = offset_ + (index << 3); 34 | return ds_.getMemory().getDouble(idxOffset); 35 | } 36 | 37 | @Override 38 | double set(final int index, final double value) { 39 | assert index >= 0 && index < numItems_; 40 | assert n_ == ds_.getN(); 41 | assert !ds_.isCompact(); // can't write to a compact sketch 42 | 43 | final int idxOffset = offset_ + (index << 3); 44 | final WritableMemory mem = ds_.getMemory(); 45 | final double oldVal = mem.getDouble(idxOffset); 46 | mem.putDouble(idxOffset, value); 47 | return oldVal; 48 | } 49 | 50 | @Override 51 | double[] getArray(final int fromIdx, final int numItems) { 52 | final double[] dstArray = new double[numItems]; 53 | final int offsetBytes = offset_ + (fromIdx << 3); 54 | ds_.getMemory().getDoubleArray(offsetBytes, dstArray, 0, numItems); 55 | return dstArray; 56 | } 57 | 58 | @Override 59 | void putArray(final double[] srcArray, final int srcIndex, 60 | final int dstIndex, final int numItems) { 61 | assert !ds_.isCompact(); // can't write to compact sketch 62 | final int offsetBytes = offset_ + (dstIndex << 3); 63 | ds_.getMemory().putDoubleArray(offsetBytes, srcArray, srcIndex, numItems); 64 | } 65 | 66 | @Override 67 | void sort() { 68 | assert currLvl_ == BB_LVL_IDX; 69 | 70 | final double[] tmpBuffer = new double[numItems_]; 71 | final WritableMemory mem = ds_.getMemory(); 72 | mem.getDoubleArray(offset_, tmpBuffer, 0, numItems_); 73 | Arrays.sort(tmpBuffer, 0, numItems_); 74 | mem.putDoubleArray(offset_, tmpBuffer, 0, numItems_); 75 | } 76 | 77 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/yahoo/DoublesArrayAccessor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017, Yahoo! Inc. Licensed under the terms of the 3 | * Apache License 2.0. See LICENSE file at the project root for terms. 4 | */ 5 | 6 | package yahoo; 7 | 8 | import java.util.Arrays; 9 | 10 | /** 11 | * @author Jon Malkin 12 | */ 13 | final class DoublesArrayAccessor extends DoublesBufferAccessor { 14 | private int numItems_; 15 | private double[] buffer_; 16 | 17 | private DoublesArrayAccessor(final double[] buffer) { 18 | numItems_ = buffer.length; 19 | buffer_ = buffer; 20 | } 21 | 22 | static DoublesArrayAccessor wrap(final double[] buffer) { 23 | return new DoublesArrayAccessor(buffer); 24 | } 25 | 26 | static DoublesArrayAccessor initialize(final int numItems) { 27 | return new DoublesArrayAccessor(new double[numItems]); 28 | } 29 | 30 | @Override 31 | double get(final int index) { 32 | assert index >= 0 && index < numItems_; 33 | return buffer_[index]; 34 | } 35 | 36 | @Override 37 | double set(final int index, final double value) { 38 | assert index >= 0 && index < numItems_; 39 | 40 | final double retVal = buffer_[index]; 41 | buffer_[index] = value; 42 | return retVal; 43 | } 44 | 45 | @Override 46 | int numItems() { 47 | return numItems_; 48 | } 49 | 50 | @Override 51 | double[] getArray(final int fromIdx, final int numItems) { 52 | return Arrays.copyOfRange(buffer_, fromIdx, fromIdx + numItems); 53 | } 54 | 55 | @Override 56 | void putArray(final double[] srcArray, final int srcIndex, 57 | final int dstIndex, final int numItems) { 58 | System.arraycopy(srcArray, srcIndex, buffer_, dstIndex, numItems); 59 | } 60 | 61 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/yahoo/DoublesBufferAccessor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017, Yahoo! Inc. Licensed under the terms of the 3 | * Apache License 2.0. See LICENSE file at the project root for terms. 4 | */ 5 | 6 | package yahoo; 7 | 8 | /** 9 | * @author Jon Malkin 10 | */ 11 | abstract class DoublesBufferAccessor { 12 | abstract double get(final int index); 13 | 14 | abstract double set(final int index, final double value); 15 | 16 | abstract int numItems(); 17 | 18 | abstract double[] getArray(int fromIdx, int numItems); 19 | 20 | abstract void putArray(double[] srcArray, int srcIndex, 21 | int dstIndex, int numItems); 22 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/yahoo/DoublesSketchBuilder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2015-16, Yahoo! Inc. 3 | * Licensed under the terms of the Apache License 2.0. See LICENSE file at the project root for terms. 4 | */ 5 | 6 | package yahoo; 7 | 8 | import static yahoo.Util.LS; 9 | import static yahoo.Util.TAB; 10 | 11 | import com.yahoo.memory.WritableMemory; 12 | 13 | /** 14 | * For building a new quantiles DoublesSketch. 15 | * 16 | * @author Lee Rhodes 17 | */ 18 | public class DoublesSketchBuilder { 19 | private int bK = PreambleUtil.DEFAULT_K; 20 | 21 | /** 22 | * Constructor for a new DoublesSketchBuilder. The default configuration is 23 | *
    24 | *
  • k: 128. This produces a normalized rank error of about 1.7%
  • 25 | *
  • Memory: null
  • 26 | *
27 | */ 28 | public DoublesSketchBuilder() {} 29 | 30 | /** 31 | * Sets the parameter k that determines the accuracy and size of the sketch. 32 | * @param k determines the accuracy and size of the sketch. 33 | * It is recommended that k be a power of 2 to enable unioning of sketches with 34 | * different values of k. It is only possible to union from 35 | * larger values of k to smaller values. 36 | * @return this builder 37 | */ 38 | public DoublesSketchBuilder setK(final int k) { 39 | Util.checkK(k); 40 | bK = k; 41 | return this; 42 | } 43 | 44 | /** 45 | * Gets the current configured value of k 46 | * @return the current configured value of k 47 | */ 48 | public int getK() { 49 | return bK; 50 | } 51 | 52 | /** 53 | * Returns an UpdateDoublesSketch with the current configuration of this Builder. 54 | * @return a UpdateDoublesSketch 55 | */ 56 | public UpdateDoublesSketch build() { 57 | return HeapUpdateDoublesSketch.newInstance(bK); 58 | } 59 | 60 | /** 61 | * Returns a quantiles UpdateDoublesSketch with the current configuration of this builder 62 | * and the specified backing destination Memory store. 63 | * @param dstMem destination memory for use by the sketch 64 | * @return an UpdateDoublesSketch 65 | */ 66 | public UpdateDoublesSketch build(final WritableMemory dstMem) { 67 | return DirectUpdateDoublesSketch.newInstance(bK, dstMem); 68 | } 69 | 70 | /** 71 | * Creates a human readable string that describes the current configuration of this builder. 72 | */ 73 | @Override 74 | public String toString() { 75 | final StringBuilder sb = new StringBuilder(); 76 | sb.append("QuantileSketchBuilder configuration:").append(LS); 77 | sb.append("K : ").append(TAB).append(bK).append(LS); 78 | return sb.toString(); 79 | } 80 | 81 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/yahoo/HeapDoublesSketchAccessor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2017, Yahoo! Inc. Licensed under the terms of the 3 | * Apache License 2.0. See LICENSE file at the project root for terms. 4 | */ 5 | 6 | package yahoo; 7 | 8 | import java.util.Arrays; 9 | 10 | /** 11 | * @author Jon Malkin 12 | */ 13 | class HeapDoublesSketchAccessor extends DoublesSketchAccessor { 14 | HeapDoublesSketchAccessor(final DoublesSketch ds, 15 | final boolean forceSize, 16 | final int level) { 17 | super(ds, forceSize, level); 18 | assert !ds.isDirect(); 19 | } 20 | 21 | @Override 22 | DoublesSketchAccessor copyAndSetLevel(final int level) { 23 | return new HeapDoublesSketchAccessor(ds_, forceSize_, level); 24 | } 25 | 26 | @Override 27 | double get(final int index) { 28 | assert index >= 0 && index < numItems_; 29 | assert n_ == ds_.getN(); 30 | 31 | return ds_.getCombinedBuffer()[offset_ + index]; 32 | } 33 | 34 | @Override 35 | double set(final int index, final double value) { 36 | assert index >= 0 && index < numItems_; 37 | assert n_ == ds_.getN(); 38 | 39 | final int idxOffset = offset_ + index; 40 | final double oldVal = ds_.getCombinedBuffer()[idxOffset]; 41 | ds_.getCombinedBuffer()[idxOffset] = value; 42 | 43 | return oldVal; 44 | } 45 | 46 | @Override 47 | double[] getArray(final int fromIdx, final int numItems) { 48 | final int stIdx = offset_ + fromIdx; 49 | return Arrays.copyOfRange(ds_.getCombinedBuffer(), stIdx, stIdx + numItems); 50 | } 51 | 52 | @Override 53 | void putArray(final double[] srcArray, final int srcIndex, 54 | final int dstIndex, final int numItems) { 55 | final int tgtIdx = offset_ + dstIndex; 56 | System.arraycopy(srcArray, srcIndex, ds_.getCombinedBuffer(), tgtIdx, numItems); 57 | } 58 | 59 | @Override 60 | void sort() { 61 | assert currLvl_ == BB_LVL_IDX; 62 | 63 | if (!ds_.isCompact()) { // compact sketch is already sorted; not an error but a no-op 64 | Arrays.sort(ds_.getCombinedBuffer(), offset_, offset_ + numItems_); 65 | } 66 | } 67 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/main/java/yahoo/QuantilesHelper.java: -------------------------------------------------------------------------------- 1 | package yahoo; 2 | 3 | /** 4 | * Common static methods for quantiles sketches 5 | */ 6 | public class QuantilesHelper { 7 | 8 | /** 9 | * Convert the weights into totals of the weights preceding each item 10 | * @param array of weights 11 | * @return total weight 12 | */ 13 | public static long convertToPrecedingCummulative(final long[] array) { 14 | long subtotal = 0; 15 | for (int i = 0; i < array.length; i++) { 16 | final long newSubtotal = subtotal + array[i]; 17 | array[i] = subtotal; 18 | subtotal = newSubtotal; 19 | } 20 | return subtotal; 21 | } 22 | 23 | /** 24 | * Returns the zero-based index (position) of a value in the hypothetical sorted stream of 25 | * values of size n. 26 | * @param phi the fractional position where: 0 ≤ φ ≤ 1.0. 27 | * @param n the size of the stream 28 | * @return the index, a value between 0 and n-1. 29 | */ 30 | public static long posOfPhi(final double phi, final long n) { 31 | final long pos = (long) Math.floor(phi * n); 32 | return (pos == n) ? n - 1 : pos; 33 | } 34 | 35 | /** 36 | * This is written in terms of a plain array to facilitate testing. 37 | * @param arr the chunk containing the position 38 | * @param pos the position 39 | * @return the index of the chunk containing the position 40 | */ 41 | public static int chunkContainingPos(final long[] arr, final long pos) { 42 | final int nominalLength = arr.length - 1; /* remember, arr contains an "extra" position */ 43 | assert nominalLength > 0; 44 | final long n = arr[nominalLength]; 45 | assert 0 <= pos; 46 | assert pos < n; 47 | final int l = 0; 48 | final int r = nominalLength; 49 | // the following three asserts should probably be retained since they ensure 50 | // that the necessary invariants hold at the beginning of the search 51 | assert l < r; 52 | assert arr[l] <= pos; 53 | assert pos < arr[r]; 54 | return searchForChunkContainingPos(arr, pos, l, r); 55 | } 56 | 57 | // Let m_i denote the minimum position of the length=n "full" sorted sequence 58 | // that is represented in slot i of the length = n "chunked" sorted sequence. 59 | // 60 | // Note that m_i is the same thing as auxCumWtsArr_[i] 61 | // 62 | // Then the answer to a positional query 0 <= q < n is l, where 0 <= l < len, 63 | // A) m_l <= q 64 | // B) q < m_r 65 | // C) l+1 = r 66 | // 67 | // A) and B) provide the invariants for our binary search. 68 | // Observe that they are satisfied by the initial conditions: l = 0 and r = len. 69 | private static int searchForChunkContainingPos(final long[] arr, final long pos, final int l, final int r) { 70 | // the following three asserts can probably go away eventually, since it is fairly clear 71 | // that if these invariants hold at the beginning of the search, they will be maintained 72 | assert l < r; 73 | assert arr[l] <= pos; 74 | assert pos < arr[r]; 75 | if (l + 1 == r) { 76 | return l; 77 | } 78 | final int m = l + (r - l) / 2; 79 | if (arr[m] <= pos) { 80 | return searchForChunkContainingPos(arr, pos, m, r); 81 | } 82 | return searchForChunkContainingPos(arr, pos, l, m); 83 | } 84 | 85 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/AccuracyBenchTest.java: -------------------------------------------------------------------------------- 1 | import org.junit.Test; 2 | 3 | import java.util.List; 4 | import java.util.Map; 5 | 6 | import static org.junit.Assert.assertEquals; 7 | 8 | public class AccuracyBenchTest { 9 | @Test 10 | public void testSimple() throws Exception { 11 | AccuracyBench bench = new AccuracyBench("src/test/resources/acc_bench.json"); 12 | List> results = bench.run(); 13 | assertEquals(12*2, results.size()); 14 | } 15 | 16 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/RunConfigTest.java: -------------------------------------------------------------------------------- 1 | import org.junit.Test; 2 | 3 | import java.io.IOException; 4 | import java.util.List; 5 | 6 | import static org.junit.Assert.assertEquals; 7 | 8 | public class RunConfigTest { 9 | @Test 10 | public void testTinyConf() throws Exception { 11 | RunConfig r = RunConfig.fromJsonFile("src/test/resources/tiny_conf.json"); 12 | assertEquals(true, r.get("flag")); 13 | List vals = r.get("vals"); 14 | assertEquals(2, vals.size()); 15 | } 16 | 17 | @Test 18 | public void testFromString() throws IOException { 19 | String jsonConf = "{\"val\":1.0}"; 20 | RunConfig r = RunConfig.fromJsonString(jsonConf); 21 | assertEquals(1.0, r.get("val"), 0); 22 | } 23 | 24 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/data/TestDataSource.java: -------------------------------------------------------------------------------- 1 | package data; 2 | 3 | import java.util.Random; 4 | import java.util.concurrent.ThreadLocalRandom; 5 | 6 | public class TestDataSource { 7 | public static double[] getUniform(double start, double end, int length) { 8 | double[] vals = new double[length]; 9 | double stepSize = (end - start) / (length-1); 10 | for (int i = 0; i < length; i++) { 11 | vals[i] = start + i * stepSize; 12 | } 13 | return vals; 14 | } 15 | 16 | public static double[] getUniform(int n) { 17 | double[] vals = new double[n]; 18 | for (int i = 0; i < n; i++) { 19 | vals[i] = i; 20 | } 21 | return vals; 22 | } 23 | 24 | public static double[] getGaussian(Random r, int n) { 25 | double[] vals = new double[n]; 26 | for (int i = 0; i < n; i++) { 27 | vals[i] = r.nextGaussian(); 28 | } 29 | return vals; 30 | } 31 | 32 | public static void shuffleArray(double[] ar) 33 | { 34 | Random rnd = ThreadLocalRandom.current(); 35 | for (int i = ar.length - 1; i > 0; i--) 36 | { 37 | int index = rnd.nextInt(i + 1); 38 | // Simple swap 39 | double a = ar[index]; 40 | ar[index] = ar[i]; 41 | ar[i] = a; 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/gk/GKSketchTest.java: -------------------------------------------------------------------------------- 1 | package gk; 2 | 3 | import data.TestDataSource; 4 | import org.apache.commons.math3.stat.descriptive.rank.Percentile; 5 | import org.apache.spark.sql.catalyst.util.QuantileSummaries; 6 | import org.junit.Test; 7 | 8 | import java.util.ArrayList; 9 | 10 | import static org.junit.Assert.*; 11 | 12 | public class GKSketchTest { 13 | @Test 14 | public void testSimple() { 15 | double[] xs = TestDataSource.getUniform(100); 16 | GKSketch s = new GKSketch(.05); 17 | s.add(xs); 18 | s.add(xs); 19 | // System.out.println(s.toString()); 20 | // System.out.println(s.quantile(.5)); 21 | } 22 | 23 | @Test 24 | public void mergeTest() { 25 | ArrayList cellData = new ArrayList<>(); 26 | int numCells = 50; 27 | int nPerCell = 1000; 28 | double[] totalData = new double[numCells * nPerCell]; 29 | int totalIdx = 0; 30 | 31 | double eps = 0.01; 32 | GKSketch globalSummary = new GKSketch(eps); 33 | for (int cellIdx = 0; cellIdx < numCells; cellIdx++) { 34 | double[] curData = new double[nPerCell]; 35 | for (int i = 0; i < nPerCell; i++) { 36 | double curValue; 37 | if (cellIdx % 2 == 0) { 38 | curValue = cellIdx * 200 + i; 39 | } else { 40 | curValue = i*2000 + cellIdx; 41 | } 42 | curData[i] = curValue; 43 | totalData[totalIdx] = curValue; 44 | totalIdx++; 45 | } 46 | globalSummary.add(curData); 47 | cellData.add(curData); 48 | } 49 | 50 | Percentile p = new Percentile(); 51 | double targetP = .45; 52 | double q = p.evaluate(totalData, targetP*100); 53 | double qll = p.evaluate(totalData, (targetP-2*eps)*100); 54 | double ql = p.evaluate(totalData, (targetP-eps)*100); 55 | double qh = p.evaluate(totalData, (targetP+eps)*100); 56 | double qhh = p.evaluate(totalData, (targetP+2*eps)*100); 57 | System.out.println(qll+","+ql+",["+q+"],"+qh+","+qhh); 58 | 59 | // System.out.println("Global Size: "+globalSummary.getTuples().size()); 60 | // System.out.println("Global Quantile: "+globalSummary.quantile(targetP)); 61 | 62 | ArrayList summaries = new ArrayList<>(); 63 | for (double[] curData : cellData) { 64 | GKSketch s = new GKSketch(eps); 65 | s.add(curData); 66 | summaries.add(s); 67 | } 68 | 69 | GKSketch merged = new GKSketch(eps); 70 | for (GKSketch toMerge : summaries) { 71 | merged.merge(toMerge); 72 | } 73 | 74 | // System.out.println("Merged Size: "+merged.getTuples().size()); 75 | // System.out.println("Merged Quantile: "+merged.quantile(targetP)); 76 | // System.out.println(merged.toString()); 77 | } 78 | 79 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/histogram/ApproximateHistogramTest.java: -------------------------------------------------------------------------------- 1 | package histogram; 2 | 3 | import data.TestDataSource; 4 | import org.junit.Test; 5 | 6 | import java.util.Arrays; 7 | 8 | public class ApproximateHistogramTest { 9 | @Test 10 | public void testSimple() { 11 | ApproximateHistogram h = new ApproximateHistogram( 12 | 100 13 | ); 14 | double[] data = TestDataSource.getUniform(10001); 15 | for (double x : data) { 16 | h.offer((float)x); 17 | } 18 | float[] ps = {.1f, .5f, .9f}; 19 | float[] qs = h.getQuantiles(ps); 20 | System.out.println(Arrays.toString(qs)); 21 | } 22 | 23 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/io/CSVDataSourceTest.java: -------------------------------------------------------------------------------- 1 | package io; 2 | 3 | import org.junit.Test; 4 | 5 | import static org.junit.Assert.assertEquals; 6 | 7 | public class CSVDataSourceTest { 8 | @Test 9 | public void testLoadCSV() throws Exception { 10 | CSVDataSource s = new CSVDataSource("src/test/resources/test.csv", 0); 11 | s.setHasHeader(true); 12 | double[] col = s.get(); 13 | assertEquals(1.0, col[0], 0.0); 14 | } 15 | 16 | @Test 17 | public void testLoadCSV2() throws Exception { 18 | SimpleCSVDataSource s = new SimpleCSVDataSource("src/test/resources/test.csv", 1); 19 | s.setHasHeader(true); 20 | double[] col = s.get(); 21 | assertEquals(9.0, col[1], 0.0); 22 | } 23 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/io/GroupedCSVDataSourceTest.java: -------------------------------------------------------------------------------- 1 | package io; 2 | 3 | import org.junit.Test; 4 | 5 | import java.util.ArrayList; 6 | 7 | import static org.junit.Assert.assertArrayEquals; 8 | import static org.junit.Assert.assertEquals; 9 | 10 | public class GroupedCSVDataSourceTest { 11 | @Test 12 | public void testLoadCSV() throws Exception { 13 | GroupedCSVDataSource s = new GroupedCSVDataSource("src/test/resources/grouped.csv"); 14 | s.setHasHeader(true); 15 | ArrayList groups = s.get(); 16 | assertEquals(2, groups.size()); 17 | assertArrayEquals(new double[]{2.0}, groups.get(0), 0.0); 18 | assertArrayEquals(new double[]{3.0, 4.0}, groups.get(1), 0.0); 19 | } 20 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/io/SeqDataGrouperTest.java: -------------------------------------------------------------------------------- 1 | package io; 2 | 3 | import data.TestDataSource; 4 | import org.junit.Test; 5 | 6 | import java.util.ArrayList; 7 | 8 | import static org.junit.Assert.assertEquals; 9 | 10 | public class SeqDataGrouperTest { 11 | @Test 12 | public void testSimple() { 13 | double[] data = TestDataSource.getUniform(-2, 3, 1000); 14 | SeqDataGrouper g = new SeqDataGrouper(90); 15 | ArrayList cells = g.group(data); 16 | assertEquals(12, cells.size()); 17 | assertEquals(10, cells.get(11).length); 18 | assertEquals(3.0, cells.get(11)[9], 0.0); 19 | } 20 | 21 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/sketches/ApproximateHistogramSketchTest.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import data.TestDataSource; 4 | import io.DataGrouper; 5 | import io.SeqDataGrouper; 6 | import org.junit.Test; 7 | 8 | import java.util.ArrayList; 9 | import java.util.Arrays; 10 | import java.util.List; 11 | 12 | import static org.junit.Assert.assertArrayEquals; 13 | 14 | public class ApproximateHistogramSketchTest { 15 | @Test 16 | public void testUniform() throws Exception { 17 | ApproximateHistogramSketch s = new ApproximateHistogramSketch(); 18 | s.setSizeParam(1000); 19 | s.initialize(); 20 | 21 | double[] data = TestDataSource.getUniform(10001); 22 | s.add(data); 23 | 24 | List ps = Arrays.asList(.1, .5, .9); 25 | double[] qs = s.getQuantiles(ps); 26 | double[] expectedQs = QuantileUtil.getTrueQuantiles(ps, data); 27 | assertArrayEquals(expectedQs, qs, 100.0); 28 | 29 | DataGrouper grouper = new SeqDataGrouper(60); 30 | ArrayList cellData = grouper.group(data); 31 | QuantileSketch mergedSketch = QuantileUtil.trainAndMerge( 32 | () -> { 33 | QuantileSketch newSketch = new ApproximateHistogramSketch(); 34 | newSketch.setSizeParam(1000.0); 35 | return newSketch; 36 | }, 37 | cellData 38 | ); 39 | double[] qs2 = mergedSketch.getQuantiles(ps); 40 | assertArrayEquals(expectedQs, qs2, 10.0); 41 | System.out.println(Arrays.toString(qs2)); 42 | } 43 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/sketches/HistogramSketchTest.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import data.TestDataSource; 4 | import io.DataGrouper; 5 | import io.SeqDataGrouper; 6 | import org.junit.Test; 7 | 8 | import java.util.ArrayList; 9 | import java.util.Arrays; 10 | import java.util.List; 11 | 12 | import static org.junit.Assert.assertArrayEquals; 13 | import static org.junit.Assert.assertTrue; 14 | 15 | public class HistogramSketchTest { 16 | @Test 17 | public void testSimple() throws Exception { 18 | HistogramSketch hs = new HistogramSketch(); 19 | hs.setSizeParam(10); 20 | hs.initialize(); 21 | 22 | double[] data = TestDataSource.getUniform(10001); 23 | hs.add(data); 24 | 25 | List ps = Arrays.asList(.1, .5, .9); 26 | double[] qs = hs.getQuantiles(ps); 27 | 28 | double[] expectedQs = QuantileUtil.getTrueQuantiles(ps, data); 29 | assertArrayEquals(expectedQs, qs, 200.0); 30 | 31 | double[] errors = hs.getErrors(); 32 | for (double e : errors) { 33 | assertTrue(e < .15); 34 | } 35 | } 36 | 37 | @Test 38 | public void testMerge() throws Exception { 39 | HistogramSketch hs1 = new HistogramSketch(); 40 | hs1.setSizeParam(10); 41 | hs1.initialize(); 42 | double[] data = TestDataSource.getUniform(10001); 43 | hs1.add(data); 44 | 45 | DataGrouper grouper = new SeqDataGrouper(100); 46 | ArrayList cellData = grouper.group(data); 47 | QuantileSketch mergedSketch = QuantileUtil.trainAndMerge( 48 | () -> { 49 | QuantileSketch newSketch = new HistogramSketch(); 50 | newSketch.setSizeParam(10); 51 | return newSketch; 52 | }, 53 | cellData 54 | ); 55 | 56 | List ps = Arrays.asList(.1, .5, .9); 57 | double[] origQuantiles = hs1.getQuantiles(ps); 58 | double[] mergedQuantiles = mergedSketch.getQuantiles(ps); 59 | assertArrayEquals(origQuantiles, mergedQuantiles, 0.0); 60 | 61 | } 62 | 63 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/sketches/MomentSketchTest.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import data.TestDataSource; 4 | import io.DataGrouper; 5 | import io.SeqDataGrouper; 6 | import msolver.MathUtil; 7 | import org.junit.Test; 8 | 9 | import java.util.ArrayList; 10 | import java.util.Arrays; 11 | import java.util.List; 12 | 13 | import static org.junit.Assert.assertArrayEquals; 14 | 15 | public class MomentSketchTest { 16 | @Test 17 | public void testUniform() throws Exception { 18 | MomentSketch ms = new MomentSketch(1e-10); 19 | ms.setSizeParam(7); 20 | ms.initialize(); 21 | 22 | double[] data = TestDataSource.getUniform(0,1,10001); 23 | ms.add(data); 24 | 25 | List ps = Arrays.asList(.1, .5, .9); 26 | double[] qs = ms.getQuantiles(ps); 27 | double[] expectedQs = QuantileUtil.getTrueQuantiles(ps, data); 28 | assertArrayEquals(expectedQs, qs, 1.0); 29 | 30 | DataGrouper grouper = new SeqDataGrouper(60); 31 | ArrayList cellData = grouper.group(data); 32 | QuantileSketch mergedSketch = QuantileUtil.trainAndMerge( 33 | () -> { 34 | MomentSketch newMs = new MomentSketch(1e-10); 35 | newMs.setSizeParam(7); 36 | return newMs; 37 | }, 38 | cellData 39 | ); 40 | MomentSketch mmSketch = (MomentSketch)mergedSketch; 41 | double[] qs2 = mmSketch.getQuantiles(ps); 42 | 43 | assertArrayEquals(qs, qs2, 1e-7); 44 | } 45 | 46 | @Test 47 | public void testLogOccupancy() throws Exception { 48 | double[] powerSums = { 49 | 20560.0, 132778.81355561252, 860423.7556197477, 5595528.904319964, 50 | 3.652405916578557E7, 2.3932372378677437E8, 1.5744015769855406E9, 1.0399585507478048E10, 51 | 6.898067822853244E10, 4.59495821550009E11, 3.073979747643975E12 52 | }; 53 | double min = 6.022842082800238; 54 | double max = 7.638439063070808; 55 | 56 | double[] chebyMoments = MathUtil.powerSumsToChebyMoments(min, max, powerSums); 57 | // System.out.println(Arrays.toString(chebyMoments)); 58 | // MomentSketch ms = new MomentSketch(1e-9); 59 | // ms.setStats(powerSums, min, max); 60 | // 61 | // List ps = Arrays.asList(.1, .5, .9); 62 | // double[] qs = ms.getQuantiles(ps); 63 | } 64 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/sketches/QuantileUtilTest.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import data.TestDataSource; 4 | import org.junit.Test; 5 | 6 | import java.util.Arrays; 7 | 8 | import static org.junit.Assert.assertEquals; 9 | 10 | public class QuantileUtilTest { 11 | @Test 12 | public void testTrueQuantiles() { 13 | double[] data = TestDataSource.getUniform(1001); 14 | double[] qs = QuantileUtil.getTrueQuantiles(Arrays.asList(.1, .5), data); 15 | assertEquals(100.0, qs[0], 0.0); 16 | assertEquals(500.0, qs[1], 0.0); 17 | } 18 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/sketches/ReservoirSamplingSketchTest.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import data.TestDataSource; 4 | import io.DataGrouper; 5 | import io.SeqDataGrouper; 6 | import org.junit.Test; 7 | 8 | import java.util.ArrayList; 9 | import java.util.Arrays; 10 | import java.util.List; 11 | 12 | import static org.junit.Assert.assertArrayEquals; 13 | 14 | public class ReservoirSamplingSketchTest { 15 | @Test 16 | public void testSimple() throws Exception { 17 | ReservoirSamplingSketch sketch = new ReservoirSamplingSketch(); 18 | sketch.setSizeParam(1000); 19 | sketch.initialize(); 20 | 21 | double[] data = TestDataSource.getUniform(10001); 22 | sketch.add(data); 23 | 24 | List ps = Arrays.asList(.1, .5, .9); 25 | double[] qs = sketch.getQuantiles(ps); 26 | 27 | double[] expectedQs = QuantileUtil.getTrueQuantiles(ps, data); 28 | assertArrayEquals(expectedQs, qs, 800.0); 29 | 30 | DataGrouper grouper = new SeqDataGrouper(60); 31 | ArrayList cellData = grouper.group(data); 32 | QuantileSketch mergedSketch = QuantileUtil.trainAndMerge( 33 | () -> { 34 | QuantileSketch newSketch = new ReservoirSamplingSketch(); 35 | newSketch.setSizeParam(1000); 36 | return newSketch; 37 | }, 38 | cellData 39 | ); 40 | double[] qs2 = mergedSketch.getQuantiles(ps); 41 | 42 | assertArrayEquals(expectedQs, qs, 800.0); 43 | assertArrayEquals(expectedQs, qs2, 800.0); 44 | } 45 | 46 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/sketches/SamplingSketchTest.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import data.TestDataSource; 4 | import io.DataGrouper; 5 | import io.SeqDataGrouper; 6 | import org.junit.Test; 7 | 8 | import java.util.ArrayList; 9 | import java.util.Arrays; 10 | import java.util.List; 11 | 12 | import static org.junit.Assert.assertArrayEquals; 13 | 14 | public class SamplingSketchTest { 15 | @Test 16 | public void testSimple() throws Exception { 17 | SamplingSketch sketch = new SamplingSketch(); 18 | sketch.setSizeParam(1000); 19 | sketch.initialize(); 20 | 21 | double[] data = TestDataSource.getUniform(10001); 22 | sketch.add(data); 23 | 24 | List ps = Arrays.asList(.1, .5, .9); 25 | double[] qs = sketch.getQuantiles(ps); 26 | 27 | double[] expectedQs = QuantileUtil.getTrueQuantiles(ps, data); 28 | assertArrayEquals(expectedQs, qs, 1000.0); 29 | 30 | DataGrouper grouper = new SeqDataGrouper(60); 31 | ArrayList cellData = grouper.group(data); 32 | QuantileSketch mergedSketch = QuantileUtil.trainAndMerge( 33 | () -> { 34 | QuantileSketch newSketch = new SamplingSketch(); 35 | newSketch.setSizeParam(1000); 36 | return newSketch; 37 | }, 38 | cellData 39 | ); 40 | double[] qs2 = mergedSketch.getQuantiles(ps); 41 | 42 | assertArrayEquals(expectedQs, qs2, 1000.0); 43 | } 44 | 45 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/sketches/SparkGKSketchTest.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import data.TestDataSource; 4 | import io.DataGrouper; 5 | import io.SeqDataGrouper; 6 | import org.junit.Test; 7 | 8 | import java.util.ArrayList; 9 | import java.util.Arrays; 10 | import java.util.List; 11 | 12 | import static org.junit.Assert.assertArrayEquals; 13 | 14 | public class SparkGKSketchTest { 15 | @Test 16 | public void testSimple() throws Exception { 17 | SparkGKSketch sketch = new SparkGKSketch(); 18 | int size = 100; 19 | sketch.setSizeParam(100); 20 | sketch.initialize(); 21 | 22 | int n = 20000; 23 | double[] data = TestDataSource.getUniform(n+1); 24 | sketch.add(data); 25 | 26 | List ps = Arrays.asList(.1, .5, .9); 27 | double[] qs = sketch.getQuantiles(ps); 28 | 29 | double[] expectedQs = QuantileUtil.getTrueQuantiles(ps, data); 30 | 31 | assertArrayEquals(expectedQs, qs, n/size); 32 | 33 | DataGrouper grouper = new SeqDataGrouper(60); 34 | ArrayList cellData = grouper.group(data); 35 | QuantileSketch mergedSketch = QuantileUtil.trainAndMerge( 36 | () -> { 37 | SparkGKSketch newSketch = new SparkGKSketch(); 38 | newSketch.setSizeParam(100); 39 | return newSketch; 40 | }, 41 | cellData 42 | ); 43 | double[] qs2 = mergedSketch.getQuantiles(ps); 44 | 45 | assertArrayEquals(expectedQs, qs2, n/size); 46 | } 47 | 48 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/sketches/TDigestSketchTest.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import data.TestDataSource; 4 | import io.DataGrouper; 5 | import io.SeqDataGrouper; 6 | import org.junit.Test; 7 | 8 | import java.util.ArrayList; 9 | import java.util.Arrays; 10 | import java.util.List; 11 | 12 | import static org.junit.Assert.assertArrayEquals; 13 | 14 | public class TDigestSketchTest { 15 | @Test 16 | public void testUniform() throws Exception { 17 | TDigestSketch ts = new TDigestSketch(); 18 | ts.setSizeParam(20); 19 | ts.initialize(); 20 | 21 | double[] data = TestDataSource.getUniform(10001); 22 | ts.add(data); 23 | 24 | List ps = Arrays.asList(.1, .5, .9); 25 | double[] qs = ts.getQuantiles(ps); 26 | double[] expectedQs = QuantileUtil.getTrueQuantiles(ps, data); 27 | assertArrayEquals(expectedQs, qs, 100.0); 28 | 29 | DataGrouper grouper = new SeqDataGrouper(60); 30 | ArrayList cellData = grouper.group(data); 31 | QuantileSketch mergedSketch = QuantileUtil.trainAndMerge( 32 | () -> { 33 | QuantileSketch newSketch = new TDigestSketch(); 34 | newSketch.setSizeParam(20); 35 | return newSketch; 36 | }, 37 | cellData 38 | ); 39 | double[] qs2 = mergedSketch.getQuantiles(ps); 40 | assertArrayEquals(expectedQs, qs2, 200.0); 41 | } 42 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/java/sketches/YahooSketchTest.java: -------------------------------------------------------------------------------- 1 | package sketches; 2 | 3 | import data.TestDataSource; 4 | import io.DataGrouper; 5 | import io.SeqDataGrouper; 6 | import org.junit.Test; 7 | 8 | import java.util.ArrayList; 9 | import java.util.Arrays; 10 | import java.util.List; 11 | 12 | import static org.junit.Assert.assertArrayEquals; 13 | 14 | public class YahooSketchTest { 15 | @Test 16 | public void testUniform() throws Exception { 17 | YahooSketch s = new YahooSketch(); 18 | s.setSizeParam(1024.0); 19 | s.initialize(); 20 | 21 | double[] data = TestDataSource.getUniform(10001); 22 | s.add(data); 23 | 24 | List ps = Arrays.asList(.1, .5, .9); 25 | double[] qs = s.getQuantiles(ps); 26 | double[] expectedQs = QuantileUtil.getTrueQuantiles(ps, data); 27 | assertArrayEquals(expectedQs, qs, 10.0); 28 | 29 | DataGrouper grouper = new SeqDataGrouper(60); 30 | ArrayList cellData = grouper.group(data); 31 | QuantileSketch mergedSketch = QuantileUtil.trainAndMerge( 32 | () -> { 33 | QuantileSketch newSketch = new YahooSketch(); 34 | newSketch.setSizeParam(1024.0); 35 | return newSketch; 36 | }, 37 | cellData 38 | ); 39 | double[] qs2 = mergedSketch.getQuantiles(ps); 40 | assertArrayEquals(expectedQs, qs2, 10.0); 41 | } 42 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/resources/acc_bench.json: -------------------------------------------------------------------------------- 1 | { 2 | "testName": "acc_bench_test", 3 | "fileName": "src/test/resources/test.csv", 4 | "columnIdx": 0, 5 | "numTrials": 2, 6 | "numSolveTrials": 10, 7 | "methods": { 8 | "moments": [3.0, 9.0], 9 | "tdigest": [5.0, 40.0] 10 | }, 11 | "quantiles": [0.1, 0.5, 0.9], 12 | 13 | "calcError": true, 14 | "verbose": false 15 | } -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/resources/grouped.csv: -------------------------------------------------------------------------------- 1 | data 2 | [2.0] 3 | "[3.0, 4.0]" -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/resources/test.csv: -------------------------------------------------------------------------------- 1 | x,y,z 2 | 1,10,5 3 | 2,9,5 4 | 3,8,5 5 | 4,7,5 6 | 5,6,5 7 | 6,5,5 8 | 7,4,5 9 | 8,3,5 10 | 9,2,5 11 | 10,1,5 -------------------------------------------------------------------------------- /javamsketch/quantilebench/src/test/resources/tiny_conf.json: -------------------------------------------------------------------------------- 1 | { 2 | "flag": true, 3 | "vals": [1.0, 2.0] 4 | } -------------------------------------------------------------------------------- /javamsketch/readme.md: -------------------------------------------------------------------------------- 1 | # Moments Sketch Code 2 | 3 | This repository contains an implementation of the moments sketch 4 | maximum entropy solver, as well as benchmark implementations of a variety 5 | of other quantile summaries. 6 | 7 | The core moment sketch solver code is in the msolver package, while 8 | the benchmarking and reference implementations are in the quantilebench 9 | package. 10 | 11 | ### Compiling and Running 12 | 13 | First, generate an up-to-date classpath. This only needs to be done once. 14 | ``` 15 | mvn install 16 | ./genCP.sh 17 | ``` 18 | 19 | Install the solver code: 20 | ``` 21 | cd msolver && mvn install && cd .. 22 | ``` 23 | 24 | Build the benchmark code: 25 | ``` 26 | cd quantilebench && mvn package && cd .. 27 | ``` 28 | 29 | Run an example accuracy benchmark with the moments sketch: 30 | ``` 31 | ./accBench confs/test_gauss_2.json 32 | ``` 33 | Results for the example workload are saved in `results/test_gauss_2.csv` 34 | 35 | Merge benchmarks can be run using `./mergeBench` 36 | 37 | ### Important configuration parameters 38 | 39 | - fileName: input csv file 40 | - columnIdx: which column contains the metric of interest 41 | - methods: list of quantile summaries one would like to benchmark, 42 | parameterized by sketch size 43 | - `"cmoments": [11.0]` means to run a moments sketch 44 | with up to order 10 moments 45 | 46 | 47 | ### Moments Sketch Internals 48 | 49 | To call the java code for the solver directly, if you have collected the 50 | statistics you can use the `ChebyshevMomentSolver2` class. See the 51 | `ChebyshevMomentSolver2Test` for example usage. -------------------------------------------------------------------------------- /javamsketch/results/cascade_milan.csv: -------------------------------------------------------------------------------- 1 | ,avg_querytime,markov_hit,maxent_hit,racz_throughput,simple,min_support,type,avg_mergetime,containment,racz,markov_time,simple_throughput,simple_time,markov_throughput,cube,avg_runtime,avg_apltime,min_globalratio,maxent_time,racz_hit,maxent_throughput,racz_time,simple_hit,markov,overall_throughput 2 | 0,39.977365,10341.0,10341.0,Infinity,False,0.02,cascade,2.4685610000000002,True,False,0.0,2356385.740414,0.00011,Infinity,/lfs/0/jding/datasets/milan_1month_4H_cubed.csv,50.022214,43.875806,30.0,0.99989,10341.0,258.75527400000004,0.0,10341.0,False,258.726863 3 | 1,3.905733,1448.0,1448.0,Infinity,True,0.02,cascade,2.359812,True,False,0.0,11651555.287143,0.000227,Infinity,/lfs/0/jding/datasets/milan_1month_4H_cubed.csv,15.000379999999998,7.6043449999999995,30.0,0.9997729999999999,1448.0,371.245134,0.0,10341.0,False,2650.671662 4 | 2,0.369856,1448.0,192.0,Infinity,True,0.02,cascade,2.3185729999999998,True,False,0.008217,13946660.306909,0.002029,482136.075915,/lfs/0/jding/datasets/milan_1month_4H_cubed.csv,10.814832000000001,4.045584,30.0,0.9897540000000001,192.0,530.747296,0.0,10341.0,True,28292.838691 5 | 3,0.156844,1448.0,72.0,36460.410672,True,0.02,cascade,2.309679,True,True,0.019204,14296267.247064002,0.00474,494114.475541,/lfs/0/jding/datasets/milan_1month_4H_cubed.csv,10.37232,3.7853019999999997,30.0,0.9415479999999999,192.0,501.11196900000004,0.034508,10341.0,True,67765.27547200001 6 | 4,0.015715,,,,,,yahoo1,19.620108,,,,,,,,39.845493,20.310234,,,,,,,, 7 | 5,3.19227,,,,,,yahoo2,6.107064,,,,,,,,31.05183,9.6342,,,,,,,, 8 | -------------------------------------------------------------------------------- /javamsketch/results/info.txt: -------------------------------------------------------------------------------- 1 | results from benchmarks here -------------------------------------------------------------------------------- /javamsketch/results/sliding_milan.csv: -------------------------------------------------------------------------------- 1 | ,avg_runtime,avg_querytime,markov_hit,maxent_hit,racz_throughput,avg_createtime,simple,maxent_time,racz_hit,maxent_throughput,avg_mergetime,racz_time,racz,markov_time,simple_throughput,simple_time,simple_hit,markov_throughput,markov,overall_throughput 2 | 0,9.247726,6.282106,4297.0,4297.0,Infinity,2.8585279999999997,False,0.999949,4297.0,684.0976400000001,0.021755,0.0,False,0.0,13514157.089328,5.1e-05,4297.0,Infinity,False,684.063012 3 | 1,8.143391000000001,5.2364,3630.0,3630.0,Infinity,2.799337,True,0.999928,3630.0,693.3267639999999,0.023368,0.0,False,0.0,11441801.451327,7.2e-05,4297.0,Infinity,False,820.664303 4 | 2,2.9440939999999998,0.054083000000000006,3630.0,34.0,Infinity,2.799916,True,0.891086,34.0,708.911119,0.021121,0.0,False,0.105066,20746040.843746,0.003848,4297.0,641912.376460,True,79835.788037 5 | 3,2.973519,0.013609999999999999,3630.0,4.0,30629.331085,2.872201,True,0.45723100000000005,34.0,656.093737,0.021431,0.08325,True,0.443208,19756949.043812,0.016311000000000003,4297.0,614243.744359,True,322260.431527 6 | 4,3.8058970000000003,0.038357999999999996,,,,3.2525150000000003,,,,,0.44237600000000005,,,,,,,,, 7 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Moment-based quantile sketches for aggregations 2 | 3 | This repository contains code for reproducing the experiments in the Moment-based quantile sketches [paper](http://www.vldb.org/pvldb/vol11/p1647-gan.pdf). Please see the [*companion repository*](https://github.com/stanford-futuredata/momentsketch) for a more readable implementation of the sketch suitable for experimentation and development. 4 | 5 | The maximum entropy solver and benchmarks can be found in `javamsketch/` directory, see the readme.md there for instructions on how to build and run the code. 6 | 7 | The other directories contain supporting files for the paper. 8 | --------------------------------------------------------------------------------