├── .gitignore
├── LICENSE
├── analysis
├── Aria Benchmark.ipynb
├── Cascades Benchmark.ipynb
├── Cheby Precision Loss.ipynb
├── Cross Benchmark.ipynb
├── DiscreteOutlier Micro Tests [Paper].ipynb
├── Druid Plot.ipynb
├── Lesion Results.ipynb
├── Log Lesion.ipynb
├── Merge Benchmark.ipynb
├── Parallel Merge Benchmark.ipynb
├── Perf Micro Plots.ipynb
├── Point Benchmark.ipynb
├── Precision Benchmark.ipynb
├── Sliding Windows Benchmark.ipynb
└── SysML Paper.ipynb
├── cvx_estimator
├── cvx_estimator.py
├── cvxbench.py
├── estimator.py
├── exponential.py
├── gauss_estimator.py
├── hepmass.py
├── lesion_study.py
└── milan.py
├── javamsketch
├── accBench.sh
├── confs
│ ├── cross_exp.json
│ ├── cross_hepmass.json
│ ├── cross_milan.json
│ ├── discrete_bench.json
│ ├── find_exp.json
│ ├── find_hepmass_d.json
│ ├── find_hepmass_r.json
│ ├── find_milan_ahist.json
│ ├── find_milan_d.json
│ ├── find_milan_r.json
│ ├── gk
│ │ ├── find_exp_gk.json
│ │ ├── find_hepmass_gk.json
│ │ ├── find_milan_gk.json
│ │ ├── merge_aria_gk.json
│ │ ├── merge_exp_gk.json
│ │ ├── merge_expb_gk.json
│ │ ├── merge_gauss_gk.json
│ │ ├── merge_hepmass_gk.json
│ │ ├── merge_hepmassb_gk.json
│ │ ├── merge_milan_gk.json
│ │ ├── merge_milanb_gk.json
│ │ ├── point_exp_gk.json
│ │ ├── point_hepmass_gk.json
│ │ ├── point_milan_gk.json
│ │ ├── point_occupancy_gk.json
│ │ ├── point_power_gk.json
│ │ ├── point_retail_gk.json
│ │ ├── strong_scaling_hepmass_gk.json
│ │ ├── strong_scaling_milan_gk.json
│ │ ├── weak_scaling_hepmass_gk.json
│ │ └── weak_scaling_milan_gk.json
│ ├── lesion_solve.json
│ ├── log_lesion.json
│ ├── merge_aria.json
│ ├── merge_exp.json
│ ├── merge_exp_m.json
│ ├── merge_expb.json
│ ├── merge_gauss.json
│ ├── merge_hepmass.json
│ ├── merge_hepmassb.json
│ ├── merge_milan.json
│ ├── merge_milan_ahist.json
│ ├── merge_milan_m.json
│ ├── merge_milanb.json
│ ├── outlier_bench.json
│ ├── point_exp.json
│ ├── point_exp_m.json
│ ├── point_hepmass.json
│ ├── point_hepmass_m.json
│ ├── point_milan.json
│ ├── point_milan_m.json
│ ├── point_occupancy.json
│ ├── point_occupancy_m.json
│ ├── point_power.json
│ ├── point_power_m.json
│ ├── point_retail.json
│ ├── point_retail_m.json
│ ├── random
│ │ ├── find_hepmass_r.json
│ │ ├── find_milan_r.json
│ │ ├── merge_exp_r.json
│ │ ├── merge_gauss_r.json
│ │ ├── merge_milan_r.json
│ │ ├── point_exp_r.json
│ │ ├── point_hepmass_r.json
│ │ ├── point_milan_r.json
│ │ ├── point_occupancy_r.json
│ │ ├── point_power_r.json
│ │ └── point_retail_r.json
│ ├── sort_milan.json
│ ├── sort_milan_approx.json
│ ├── strong_scaling_hepmass.json
│ ├── strong_scaling_milan.json
│ ├── weak_scaling_hepmass.json
│ └── weak_scaling_milan.json
├── discretebench.sh
├── genCP.sh
├── lesionSolve.sh
├── loglesion.sh
├── mRuntimeBench.sh
├── mergeBench.sh
├── msolver
│ ├── pom.xml
│ └── src
│ │ ├── main
│ │ └── java
│ │ │ └── msolver
│ │ │ ├── BoundSolver.java
│ │ │ ├── ChebyshevMomentSolver.java
│ │ │ ├── ChebyshevMomentSolver2.java
│ │ │ ├── GFunction.java
│ │ │ ├── MathUtil.java
│ │ │ ├── MaxEntFunction.java
│ │ │ ├── MaxEntFunction2.java
│ │ │ ├── MaxEntPotential.java
│ │ │ ├── MaxEntPotential2.java
│ │ │ ├── MnatSolver.java
│ │ │ ├── SimpleBoundSolver.java
│ │ │ ├── SolveBasisSelector.java
│ │ │ ├── chebyshev
│ │ │ ├── ChebyshevPolynomial.java
│ │ │ ├── CosScaledFunction.java
│ │ │ └── QuadraticCosFunction.java
│ │ │ ├── data
│ │ │ ├── ExponentialData.java
│ │ │ ├── GaussianData.java
│ │ │ ├── HepData.java
│ │ │ ├── MilanData.java
│ │ │ ├── MomentData.java
│ │ │ ├── OccupancyData.java
│ │ │ ├── RetailData.java
│ │ │ ├── RetailQuantityData.java
│ │ │ ├── RetailQuantityLogData.java
│ │ │ ├── ShuttleData.java
│ │ │ └── UniformData.java
│ │ │ ├── optimizer
│ │ │ ├── BFGSOptimizer.java
│ │ │ ├── FunctionWithHessian.java
│ │ │ ├── GenericOptimizer.java
│ │ │ ├── NewtonOptimizer.java
│ │ │ └── QuadraticPotential.java
│ │ │ ├── struct
│ │ │ └── MomentStruct.java
│ │ │ └── thresholds
│ │ │ ├── MarkovThreshold.java
│ │ │ ├── MomentThreshold.java
│ │ │ ├── RTTThreshold.java
│ │ │ └── ThresholdCascade.java
│ │ └── test
│ │ └── java
│ │ └── msolver
│ │ ├── BoundSolverTest.java
│ │ ├── ChebyshevMomentSolver2Test.java
│ │ ├── ChebyshevMomentSolverTest.java
│ │ ├── ChebyshevPolynomialTest.java
│ │ ├── MathUtilTest.java
│ │ ├── MaxEntFunction2Test.java
│ │ ├── MaxEntFunctionTest.java
│ │ ├── MaxEntPotential2Test.java
│ │ ├── MaxEntPotentialTest.java
│ │ ├── MnatSolverTest.java
│ │ ├── SimpleBoundSolverTest.java
│ │ ├── SolveBasisSelectorTest.java
│ │ ├── optimizer
│ │ ├── BFGSOptimizerTest.java
│ │ ├── NewtonOptimizerTest.java
│ │ └── QuadraticPotentialTest.java
│ │ └── thresholds
│ │ ├── MarkovThresholdTest.java
│ │ ├── RTTThresholdTest.java
│ │ └── ThresholdCascadeTest.java
├── outlierbench.sh
├── parallelMergeBench.sh
├── pom.xml
├── quantilebench
│ ├── pom.xml
│ └── src
│ │ ├── main
│ │ └── java
│ │ │ ├── AccuracyBench.java
│ │ │ ├── BoundRuntimeBench.java
│ │ │ ├── DiscreteBench.java
│ │ │ ├── LogMomentsLesion.java
│ │ │ ├── MSketchBench.java
│ │ │ ├── MergeBench.java
│ │ │ ├── OutlierBench.java
│ │ │ ├── ParallelMergeBench.java
│ │ │ ├── RunConfig.java
│ │ │ ├── SamplingBench.java
│ │ │ ├── SolveLesionBench.java
│ │ │ ├── SortBench.java
│ │ │ ├── gk
│ │ │ └── GKSketch.java
│ │ │ ├── histogram
│ │ │ ├── ApproximateHistogram.java
│ │ │ └── ArrayUtils.java
│ │ │ ├── io
│ │ │ ├── CSVDataSource.java
│ │ │ ├── CSVOutput.java
│ │ │ ├── DataGrouper.java
│ │ │ ├── DataSource.java
│ │ │ ├── GroupedCSVDataSource.java
│ │ │ ├── SeqDataGrouper.java
│ │ │ └── SimpleCSVDataSource.java
│ │ │ ├── sampling
│ │ │ ├── PreambleUtil.java
│ │ │ ├── ReservoirItemsSketch.java
│ │ │ ├── ReservoirItemsUnion.java
│ │ │ ├── ReservoirSize.java
│ │ │ ├── SampleSubsetSummary.java
│ │ │ └── SamplingUtil.java
│ │ │ ├── sketches
│ │ │ ├── ApproximateHistogramSketch.java
│ │ │ ├── CMomentSketch.java
│ │ │ ├── GKAdaptiveSketch.java
│ │ │ ├── HistogramSketch.java
│ │ │ ├── HybridMomentSketch.java
│ │ │ ├── MomentSketch.java
│ │ │ ├── QuantileSketch.java
│ │ │ ├── QuantileUtil.java
│ │ │ ├── RandomSketch.java
│ │ │ ├── ReservoirSamplingSketch.java
│ │ │ ├── SamplingSketch.java
│ │ │ ├── SketchLoader.java
│ │ │ ├── SparkGKSketch.java
│ │ │ ├── TDigestSketch.java
│ │ │ └── YahooSketch.java
│ │ │ ├── tdigest
│ │ │ ├── AVLGroupTree.java
│ │ │ ├── AVLTreeDigest.java
│ │ │ ├── AbstractTDigest.java
│ │ │ ├── Centroid.java
│ │ │ ├── IntAVLTree.java
│ │ │ ├── MergingDigest.java
│ │ │ └── TDigest.java
│ │ │ └── yahoo
│ │ │ ├── CompactDoublesSketch.java
│ │ │ ├── DirectCompactDoublesSketch.java
│ │ │ ├── DirectDoublesSketchAccessor.java
│ │ │ ├── DirectUpdateDoublesSketch.java
│ │ │ ├── DirectUpdateDoublesSketchR.java
│ │ │ ├── DoublesArrayAccessor.java
│ │ │ ├── DoublesAuxiliary.java
│ │ │ ├── DoublesBufferAccessor.java
│ │ │ ├── DoublesByteArrayImpl.java
│ │ │ ├── DoublesMergeImpl.java
│ │ │ ├── DoublesPmfCdfImpl.java
│ │ │ ├── DoublesSketch.java
│ │ │ ├── DoublesSketchAccessor.java
│ │ │ ├── DoublesSketchBuilder.java
│ │ │ ├── DoublesUnion.java
│ │ │ ├── DoublesUnionBuilder.java
│ │ │ ├── DoublesUnionImpl.java
│ │ │ ├── DoublesUnionImplR.java
│ │ │ ├── DoublesUpdateImpl.java
│ │ │ ├── DoublesUtil.java
│ │ │ ├── HeapCompactDoublesSketch.java
│ │ │ ├── HeapDoublesSketchAccessor.java
│ │ │ ├── HeapUpdateDoublesSketch.java
│ │ │ ├── PreambleUtil.java
│ │ │ ├── QuantilesHelper.java
│ │ │ ├── UpdateDoublesSketch.java
│ │ │ └── Util.java
│ │ └── test
│ │ ├── java
│ │ ├── AccuracyBenchTest.java
│ │ ├── RunConfigTest.java
│ │ ├── data
│ │ │ └── TestDataSource.java
│ │ ├── gk
│ │ │ └── GKSketchTest.java
│ │ ├── histogram
│ │ │ └── ApproximateHistogramTest.java
│ │ ├── io
│ │ │ ├── CSVDataSourceTest.java
│ │ │ ├── GroupedCSVDataSourceTest.java
│ │ │ └── SeqDataGrouperTest.java
│ │ └── sketches
│ │ │ ├── ApproximateHistogramSketchTest.java
│ │ │ ├── CMomentSketchTest.java
│ │ │ ├── HistogramSketchTest.java
│ │ │ ├── HybridMomentSketchTest.java
│ │ │ ├── MomentSketchTest.java
│ │ │ ├── QuantileUtilTest.java
│ │ │ ├── RandomSketchTest.java
│ │ │ ├── ReservoirSamplingSketchTest.java
│ │ │ ├── SamplingSketchTest.java
│ │ │ ├── SparkGKSketchTest.java
│ │ │ ├── TDigestSketchTest.java
│ │ │ └── YahooSketchTest.java
│ │ └── resources
│ │ ├── acc_bench.json
│ │ ├── grouped.csv
│ │ ├── test.csv
│ │ └── tiny_conf.json
├── readme.md
└── results
│ ├── cascade_milan.csv
│ ├── info.txt
│ ├── merge_aria.csv
│ ├── precision_merge_hepmass.csv
│ ├── precision_merge_milan.csv
│ ├── sliding_milan.csv
│ ├── strong_scaling_hepmass.csv
│ ├── strong_scaling_milan.csv
│ ├── weak_scaling_hepmass.csv
│ └── weak_scaling_milan.csv
├── math
├── Bounds.ipynb
├── Chebyshev.ipynb
├── MomentConversion.ipynb
├── Quadrature.ipynb
├── combined.lyx
└── notes.lyx
├── readme.md
└── testdata
└── gauss_2.csv
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | CMakeFiles/
3 | *.cmake
4 | *.svg
5 | *.dylib
6 | mlib/main
7 | mlib/rtest
8 | javamsketch/cp.txt
9 | CMakeCache.txt
10 | *.pdf
11 | *.csv
12 | *.lyx~
13 | mlib/Makefile
14 | mscript/results/report.html
15 | *.iml
16 | *.cbp
17 | *.json
18 | math/*.ipynb
19 |
20 | # Byte-compiled / optimized / DLL files
21 | __pycache__/
22 | *.py[cod]
23 | *$py.class
24 |
25 | # C extensions
26 | *.so
27 |
28 | # Distribution / packaging
29 | .Python
30 | env/
31 | build/
32 | develop-eggs/
33 | dist/
34 | downloads/
35 | eggs/
36 | .eggs/
37 | lib64/
38 | parts/
39 | sdist/
40 | var/
41 | wheels/
42 | *.egg-info/
43 | .installed.cfg
44 | *.egg
45 |
46 | # PyInstaller
47 | # Usually these files are written by a python script from a template
48 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
49 | *.manifest
50 | *.spec
51 |
52 | # Installer logs
53 | pip-log.txt
54 | pip-delete-this-directory.txt
55 |
56 | # Unit test / coverage reports
57 | htmlcov/
58 | .tox/
59 | .coverage
60 | .coverage.*
61 | .cache
62 | nosetests.xml
63 | coverage.xml
64 | *.cover
65 | .hypothesis/
66 |
67 | # Translations
68 | *.mo
69 | *.pot
70 |
71 | # Django stuff:
72 | *.log
73 | local_settings.py
74 |
75 | # Flask stuff:
76 | instance/
77 | .webassets-cache
78 |
79 | # Scrapy stuff:
80 | .scrapy
81 |
82 | # Sphinx documentation
83 | docs/_build/
84 |
85 | # PyBuilder
86 | target/
87 |
88 | # Jupyter Notebook
89 | .ipynb_checkpoints
90 |
91 | # pyenv
92 | .python-version
93 |
94 | # celery beat schedule file
95 | celerybeat-schedule
96 |
97 | # SageMath parsed files
98 | *.sage.py
99 |
100 | # dotenv
101 | .env
102 |
103 | # virtualenv
104 | .venv
105 | venv/
106 | ENV/
107 |
108 | # Spyder project settings
109 | .spyderproject
110 | .spyproject
111 |
112 | # Rope project settings
113 | .ropeproject
114 |
115 | # mkdocs documentation
116 | /site
117 |
118 | # mypy
119 | .mypy_cache/
120 |
--------------------------------------------------------------------------------
/cvx_estimator/cvx_estimator.py:
--------------------------------------------------------------------------------
1 | import cvxpy as cvx
2 | import numpy as np
3 |
4 | import estimator
5 |
6 |
7 | class CvxEstimator(estimator.Estimator):
8 | def __init__(
9 | self,
10 | k:int,
11 | resolution=1000,
12 | solver="maxent"
13 | ):
14 | super().__init__(k)
15 | self.resolution = resolution
16 | self.locs = np.linspace(0, 1, self.resolution)
17 | m_list = []
18 | for i in range(k):
19 | m_list.append(
20 | self.locs**i
21 | )
22 | self.loc_moments = np.array(m_list)
23 | self.solver = solver
24 | self.values = None
25 |
26 | def solve(self):
27 | if self.solver == "lp":
28 | xsol = np.linalg.lstsq(
29 | self.loc_moments,
30 | self.moments
31 | )[0]
32 | self.values = xsol
33 | else:
34 | # Moment values of the boundaries
35 | Xs = cvx.Variable(self.resolution)
36 | constraints = [
37 | Xs >= 0,
38 | Xs <= 1.0,
39 | self.loc_moments * Xs == self.moments
40 | ]
41 | if self.solver == "mindensity":
42 | o = cvx.Minimize(cvx.max_entries(Xs))
43 | else:
44 | o = cvx.Maximize(cvx.sum_entries(cvx.entr(Xs)))
45 | prob = cvx.Problem(o, constraints)
46 | sol = prob.solve(solver=cvx.ECOS)
47 | self.values = Xs.value
48 | return self.values * 1000
49 |
50 | def estimate(self, p: float):
51 | running_sum = 0
52 | excess_fraction = 0.5
53 | for i, val in enumerate(self.values):
54 | val = float(val)
55 | running_sum += val
56 | if running_sum >= p:
57 | excess_fraction = (running_sum - p) / val
58 | break
59 |
60 | best_est = (1-excess_fraction)*self.locs[i] + excess_fraction*self.locs[i+1]
61 | return best_est*(self.a_max-self.a_min) + self.a_min
62 |
--------------------------------------------------------------------------------
/cvx_estimator/estimator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class Estimator:
5 | def __init__(self, k):
6 | self.a_min = 0.0
7 | self.a_max = 0.0
8 | self.k = k
9 | self.moments = np.zeros(k)
10 |
11 | def set_statistics(
12 | self,
13 | a_min: float,
14 | a_max: float,
15 | moments: float,
16 | ):
17 | self.a_min = a_min
18 | self.a_max = a_max
19 | self.moments = moments
20 |
21 | def solve(self):
22 | raise NotImplemented()
23 |
24 | def estimate(self, p: float):
25 | raise NotImplemented()
--------------------------------------------------------------------------------
/cvx_estimator/exponential.py:
--------------------------------------------------------------------------------
1 | data = {'logSums': [1000000.0,
2 | -578739.68503790628,
3 | 1983294.7107239107,
4 | -5476131.5807481054,
5 | 23699783.595155567,
6 | -118308097.22723247,
7 | 712931526.56406581,
8 | -4933389056.9205084,
9 | 38572700905.764816],
10 | 'powerSums': [1000000.0,
11 | 998677.78490783856,
12 | 1991896.3142772249,
13 | 5947596.181134684,
14 | 23651528.771513801,
15 | 117830113.75301118,
16 | 710308609.25475609,
17 | 5073006703.31318,
18 | 42241259442.990211],
19 | 'ranges': [5.0366845333122605e-07,
20 | 15.619130152703306,
21 | -14.501347616032602,
22 | 2.7484964548137394],
23 | 'sLogMoments': [1.0,
24 | 0.80711500195674524,
25 | 0.656974252137142,
26 | 0.53872445396440571,
27 | 0.4446626223157536,
28 | 0.36919408574024815,
29 | 0.30818132453630431,
30 | 0.25851852122086599,
31 | 0.21784462174490385],
32 | 'sMoments': [1.0,
33 | 0.063939368177348854,
34 | 0.0081649372041997393,
35 | 0.0015608844878646378,
36 | 0.00039740349129520455,
37 | 0.00012675699302677922,
38 | 4.8922189070036127e-05,
39 | 2.2370073592382382e-05,
40 | 1.1925648630801986e-05]}
--------------------------------------------------------------------------------
/cvx_estimator/gauss_estimator.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import math
3 | import scipy.stats
4 |
5 | import estimator
6 |
7 | class GaussEstimator(estimator.Estimator):
8 | def __init__(self,k):
9 | super().__init__(k)
10 | self.mu = 0
11 | self.std = 1
12 |
13 | def solve(self):
14 | self.mu = self.moments[1]
15 | self.std = math.sqrt(self.moments[2] - self.mu*self.mu)
16 | # xs = np.linspace(0,1,1000)
17 | # values = scipy.stats.norm.pdf(
18 | # xs, loc=self.mu, scale=self.std
19 | # )
20 | # return values
21 |
22 | def estimate(self, p: float):
23 | xloc = scipy.stats.norm.ppf(
24 | p, loc=self.mu, scale=self.std
25 | )
26 | return xloc*(self.a_max-self.a_min) + self.a_min
--------------------------------------------------------------------------------
/cvx_estimator/hepmass.py:
--------------------------------------------------------------------------------
1 | data = {'logSums': [],
2 | 'powerSums': [
3 | 10500000.0,
4 | 171590.10344086567,
5 | 10600692.901931819,
6 | 3656024.1204772266,
7 | 27999983.535580769,
8 | 26085580.4679562,
9 | 112571037.57246359,
10 | 178035035.71434641,
11 | 597052564.3216269,
12 | 1272051754.8467662,
13 | 3862170673.2579589,
14 | 9736800905.3333244,
15 | 28892120151.48772,
16 | 80400830561.547516,
17 | 241656673866.27899,
18 | 717077710760.47144,
19 | 2216395701960.5981,
20 | 6900059165428.8955,
21 | 22051018347144.91,
22 | 71474655616939.391],
23 | 'ranges': [
24 | -1.960548996925354,
25 | 4.378281593322753,
26 | 0,
27 | 1
28 | ],
29 | 'sLogMoments': [],
30 | 'sMoments': [
31 | 1.0,
32 | 0.31186997087129814,
33 | 0.12238242373563772,
34 | 0.055008244827096542,
35 | 0.027220887553311052,
36 | 0.014487304468093964,
37 | 0.0081629650675412203,
38 | 0.0048149951778547577,
39 | 0.0029486583576583472,
40 | 0.0018630257219128987,
41 | 0.0012086628166143047,
42 | 0.00080219348365761188,
43 | 0.00054310728720349658,
44 | 0.00037422477800937956,
45 | 0.00026195604863845853,
46 | 0.0001860103093390545,
47 | 0.00013382691163873778,
48 | 9.746061695181729e-05,
49 | 7.1788126925935228e-05,
50 | 5.3448399906880567e-05
51 | ]}
--------------------------------------------------------------------------------
/cvx_estimator/lesion_study.py:
--------------------------------------------------------------------------------
1 | import milan
2 | import exponential
3 | import hepmass
4 | import estimator
5 | import cvx_estimator
6 | import gauss_estimator
7 | import time
8 | import pandas as pd
9 | import numpy
10 | import math
11 |
12 | def main():
13 | ps = numpy.linspace(0, 1, 21)
14 | ps[0] = 0.01
15 | ps[-1] = 0.99
16 | k = 7
17 | datasets = {
18 | "milan": milan.data,
19 | "exponential": exponential.data,
20 | "hepmass": hepmass.data
21 | }
22 | isLog = {
23 | "milan": True,
24 | "exponential": False,
25 | "hepmass": False
26 | }
27 | solvers = {
28 | "lp": cvx_estimator.CvxEstimator(k,1000,solver="lp"),
29 | "maxent": cvx_estimator.CvxEstimator(k,1000,solver="maxent"),
30 | "mindensity": cvx_estimator.CvxEstimator(k,1000,solver="mindensity"),
31 | "gaussian": gauss_estimator.GaussEstimator(k),
32 | }
33 | results = []
34 | for dname in datasets:
35 | print(dname)
36 | data = datasets[dname]
37 | distributions = {}
38 | num_trials = {
39 | "lp": 500,
40 | "maxent": 10,
41 | "mindensity": 50,
42 | "gaussian": 1000,
43 | }
44 | # num_trials = {
45 | # "lp": 10,
46 | # "maxent": 1,
47 | # "mindensity": 1,
48 | # "gaussian": 1,
49 | # }
50 |
51 | for sname in solvers:
52 | print(sname)
53 | e = solvers[sname]
54 | if isLog[dname]:
55 | e.set_statistics(
56 | data["ranges"][2],
57 | data["ranges"][3],
58 | data["sLogMoments"][:k]
59 | )
60 | else:
61 | e.set_statistics(
62 | data["ranges"][0],
63 | data["ranges"][1],
64 | data["sMoments"][:k]
65 | )
66 | distributions[sname] = e.solve()
67 |
68 | start_time = time.time()
69 | for i in range(num_trials[sname]):
70 | e.solve()
71 | end_time = time.time()
72 |
73 | for p in ps:
74 | q_est = e.estimate(p)
75 | if isLog[dname]:
76 | q_est = math.exp(q_est)
77 | results.append({
78 | "dataset": dname,
79 | "size_param": k,
80 | "sketch": sname,
81 | "query_time": ((end_time-start_time)/num_trials[sname]) * 1e9,
82 | "q": "{0:.3g}".format(p),
83 | "quantile_estimate": q_est
84 | })
85 |
86 | pd.DataFrame(results).to_csv("lesion_results.csv", index=False)
87 |
88 | # import matplotlib.pyplot as plt
89 | # import numpy as np
90 | # plt.figure()
91 | # xs = np.linspace(0, 1, 1000)
92 | # for sname in solvers:
93 | # plt.plot(xs, distributions[sname], label=sname)
94 | # plt.legend()
95 | # plt.show()
96 |
97 |
98 | if __name__ == "__main__":
99 | main()
--------------------------------------------------------------------------------
/cvx_estimator/milan.py:
--------------------------------------------------------------------------------
1 | data = {'logSums': [81109777.0,
2 | 39954311.084389985,
3 | 975086507.65943003,
4 | 405866981.35082442,
5 | 22518788911.620308,
6 | -22190675853.522003,
7 | 840220861538.83716,
8 | -3008231730689.7461,
9 | 47081672847213.734,
10 | -301156754002264.44,
11 | 3606454337590192.0,
12 | -30859127373541904.0,
13 | 3.4368887948251686e+17,
14 | -3.4172450245762365e+18,
15 | 3.8032805624376271e+19,
16 | -4.1088294003414683e+20,
17 | 4.6743717140718132e+21,
18 | -5.3111237684754928e+22,
19 | 6.1969914256246635e+23,
20 | -7.2792735817422283e+24],
21 | 'powerSums': [81109777.0,
22 | 2982688169.6811647,
23 | 978439974358.73865,
24 | 871718166698519.75,
25 | 1.3802820224027354e+18,
26 | 3.1721196216284679e+21,
27 | 9.4920224710331096e+24,
28 | 3.4822245172502998e+28,
29 | 1.5113747392231059e+32,
30 | 7.5487997054766176e+35,
31 | 4.2236053442107682e+39,
32 | 2.5794131452332432e+43,
33 | 1.6810991725372064e+47,
34 | 1.1485760824967742e+51,
35 | 8.1186456852298384e+54,
36 | 5.8811422826709824e+58,
37 | 4.3371573032664479e+62,
38 | 3.2410617072895531e+66,
39 | 2.4461380713938232e+70,
40 | 1.8602581958318741e+74],
41 | 'ranges': [2.3314976995293306e-06,
42 | 7936.2653798841566,
43 | -12.968999707389781,
44 | 8.9791980884111684],
45 | 'sLogMoments': [1.0,
46 | 0.61333487750323645,
47 | 0.40063179991927766,
48 | 0.27453173426455313,
49 | 0.19502432705501629,
50 | 0.14233442579727892,
51 | 0.10602004766100606,
52 | 0.080221796729629868,
53 | 0.061462957528847437,
54 | 0.047575254996741687,
55 | 0.037147332790898123,
56 | 0.029227356882084689,
57 | 0.023154751016763224,
58 | 0.018460507742974343,
59 | 0.014805560178125263,
60 | 0.01194121507705594,
61 | 0.0096829127542519666,
62 | 0.0078923483112435695,
63 | 0.0064650172819627847,
64 | 0.0053213826228772198],
65 | 'sMoments': [1.0,
66 | 0.0046335988796396103,
67 | 0.00019152638468741077,
68 | 2.1500786230811892e-05,
69 | 4.2897293791083479e-06,
70 | 1.2422111949367304e-06,
71 | 4.6836934091972981e-07,
72 | 2.1650617554231916e-07,
73 | 1.1840485146229515e-07,
74 | 7.4517634271396796e-08,
75 | 5.2534947875489019e-08,
76 | 4.0426831294724278e-08,
77 | 3.319907214581448e-08,
78 | 2.8580919620340124e-08,
79 | 2.5455629231168828e-08,
80 | 2.323516458777822e-08,
81 | 2.1591014984505603e-08,
82 | 2.0330076044202664e-08,
83 | 1.9333767367484097e-08,
84 | 1.8526465267492987e-08]}
--------------------------------------------------------------------------------
/javamsketch/accBench.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) AccuracyBench $@
3 |
--------------------------------------------------------------------------------
/javamsketch/confs/cross_exp.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "cross-exp",
3 | "fileName": "../../datasets/exponential_100M.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "cellFractions": [0.00005, 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 1.0],
7 | "numTrials": 10,
8 | "numSolveTrials": 3000,
9 | "methods": {
10 | "cmoments": [11.0],
11 | "yahoo": [32.0],
12 | "random": [40.0],
13 | "spark_gk": [50.0],
14 | "gk_adaptive": [40.0]
15 | },
16 | "quantiles": [
17 | 0.01, 0.05, 0.1, 0.15,
18 | 0.2, 0.25 ,0.3, 0.35,
19 | 0.4, 0.45, 0.5, 0.55,
20 | 0.6, 0.65, 0.7, 0.75,
21 | 0.8, 0.85, 0.9, 0.95,
22 | 0.99
23 | ],
24 |
25 | "verbose": false,
26 | "calcError": false,
27 | "appendTimeStamp": false
28 | }
--------------------------------------------------------------------------------
/javamsketch/confs/cross_hepmass.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "cross-hepmass",
3 | "fileName": "../../datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "cellFractions": [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 1.0],
7 | "numTrials": 10,
8 | "numSolveTrials": 3000,
9 | "methods": {
10 | "cmoments": [11.0],
11 | "yahoo": [32.0],
12 | "random": [40.0],
13 | "spark_gk": [50.0],
14 | "gk_adaptive": [40.0]
15 | },
16 | "quantiles": [
17 | 0.01, 0.05, 0.1, 0.15,
18 | 0.2, 0.25 ,0.3, 0.35,
19 | 0.4, 0.45, 0.5, 0.55,
20 | 0.6, 0.65, 0.7, 0.75,
21 | 0.8, 0.85, 0.9, 0.95,
22 | 0.99
23 | ],
24 |
25 | "verbose": false,
26 | "calcError": false,
27 | "appendTimeStamp": false
28 | }
--------------------------------------------------------------------------------
/javamsketch/confs/cross_milan.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "cross-milan",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "cellFractions": [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 1.0],
7 | "numTrials": 10,
8 | "numSolveTrials": 3000,
9 | "methods": {
10 | "cmoments": [11.0],
11 | "yahoo": [32.0],
12 | "random": [40.0],
13 | "spark_gk": [50.0],
14 | "gk_adaptive": [60.0]
15 | },
16 | "quantiles": [
17 | 0.01, 0.05, 0.1, 0.15,
18 | 0.2, 0.25 ,0.3, 0.35,
19 | 0.4, 0.45, 0.5, 0.55,
20 | 0.6, 0.65, 0.7, 0.75,
21 | 0.8, 0.85, 0.9, 0.95,
22 | 0.99
23 | ],
24 |
25 | "verbose": false,
26 | "calcError": false,
27 | "appendTimeStamp": false
28 | }
--------------------------------------------------------------------------------
/javamsketch/confs/discrete_bench.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "discrete_bench",
3 | "verbose": false,
4 |
5 | "methods": {
6 | "cmoments": [5.0, 11.0],
7 | "yahoo": [32.0],
8 | "random": [40.0],
9 | "spark_gk": [50.0]
10 | },
11 | "numSolveTrials": 1,
12 | "totalSize": 100000,
13 | "cardinalities": [6, 10, 30, 100, 300, 1000],
14 | "quantiles": [
15 | 0.01, 0.05, 0.1, 0.15,
16 | 0.2, 0.25 ,0.3, 0.35,
17 | 0.4, 0.45, 0.5, 0.55,
18 | 0.6, 0.65, 0.7, 0.75,
19 | 0.8, 0.85, 0.9, 0.95,
20 | 0.99]
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/find_exp.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "find_exp",
3 | "fileName": "../../datasets/exponential_10M.csv",
4 | "columnIdx": 0,
5 | "numTrials": 10,
6 | "numSolveTrials": 1,
7 | "methods": {
8 | "tdigest": [1.0, 1.5, 2.0, 2.5, 3.0, 4.0],
9 | "yahoo": [8.0, 16.0, 32.0],
10 | "sampling": [100.0, 250.0, 400.0],
11 | "spark_gk": [20.0, 30.0, 40.0, 50.0],
12 | "approx_histogram": [100.0, 200.0, 400.0, 1000.0],
13 | "histogram": [20.0, 30.0, 40.0, 50.0, 60.0]
14 | },
15 | "quantiles": [
16 | 0.01, 0.05, 0.1, 0.15,
17 | 0.2, 0.25 ,0.3, 0.35,
18 | 0.4, 0.45, 0.5, 0.55,
19 | 0.6, 0.65, 0.7, 0.75,
20 | 0.8, 0.85, 0.9, 0.95,
21 | 0.99],
22 |
23 | "verbose": false,
24 | "calcError": true,
25 | "appendTimeStamp": false
26 | }
--------------------------------------------------------------------------------
/javamsketch/confs/find_hepmass_d.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "find_hepmass_d",
3 | "fileName": "../../datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "numTrials": 1,
6 | "numSolveTrials": 1,
7 | "methods": {
8 | "cmoments": [2.0, 3.0, 9.0, 13.0],
9 | "approx_histogram": [20.0, 40.0, 80.0, 100.0],
10 | "tdigest": [1.0, 1.3, 1.5, 1.7, 2.0],
11 | "histogram": [5.0, 7.0, 10.0, 15.0, 20.0],
12 | "spark_gk": [20.0, 30.0, 40.0, 50.0]
13 | },
14 | "quantiles": [
15 | 0.01, 0.05, 0.1, 0.15,
16 | 0.2, 0.25 ,0.3, 0.35,
17 | 0.4, 0.45, 0.5, 0.55,
18 | 0.6, 0.65, 0.7, 0.75,
19 | 0.8, 0.85, 0.9, 0.95,
20 | 0.99],
21 |
22 | "verbose": false,
23 | "calcError": true,
24 | "appendTimeStamp": false
25 | }
--------------------------------------------------------------------------------
/javamsketch/confs/find_hepmass_r.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "find_hepmass_r",
3 | "fileName": "../../datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "numTrials": 20,
6 | "numSolveTrials": 1,
7 | "methods": {
8 | "sampling": [500.0, 800.0, 1000.0],
9 | "yahoo": [8.0, 16.0, 32.0]
10 | },
11 | "quantiles": [
12 | 0.01, 0.05, 0.1, 0.15,
13 | 0.2, 0.25 ,0.3, 0.35,
14 | 0.4, 0.45, 0.5, 0.55,
15 | 0.6, 0.65, 0.7, 0.75,
16 | 0.8, 0.85, 0.9, 0.95,
17 | 0.99],
18 |
19 | "verbose": false,
20 | "calcError": true,
21 | "appendTimeStamp": false
22 | }
--------------------------------------------------------------------------------
/javamsketch/confs/find_milan_ahist.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "find_milan",
3 | "fileName": "../../datasets/internet-mi-2013-11-01_09.csv",
4 | "columnIdx": 0,
5 | "numTrials": 1,
6 | "numSolveTrials": 1,
7 | "methods": {
8 | "approx_histogram": [10000.0, 20000.0, 40000.0],
9 | "histogram": [10000.0, 20000.0, 40000.0]
10 | },
11 | "quantiles": [
12 | 0.01, 0.05, 0.1, 0.15,
13 | 0.2, 0.25 ,0.3, 0.35,
14 | 0.4, 0.45, 0.5, 0.55,
15 | 0.6, 0.65, 0.7, 0.75,
16 | 0.8, 0.85, 0.9, 0.95,
17 | 0.99],
18 |
19 | "verbose": false,
20 | "calcError": true,
21 | "appendTimeStamp": false
22 | }
--------------------------------------------------------------------------------
/javamsketch/confs/find_milan_d.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "find_milan_d",
3 | "fileName": "../../datasets/internet-mi-2013-11-01_09.csv",
4 | "columnIdx": 0,
5 | "numTrials": 1,
6 | "numSolveTrials": 1,
7 | "methods": {
8 | "approx_histogram": [50.0, 100.0, 200.0],
9 | "histogram": [50.0, 100.0, 200.0],
10 | "tdigest": [4.0, 5.0, 6.0, 7.0, 8.0],
11 | "spark_gk": [20.0, 30.0, 40.0, 50.0],
12 | "cmoments": [8.0, 9.0, 10.0]
13 | },
14 | "quantiles": [
15 | 0.01, 0.05, 0.1, 0.15,
16 | 0.2, 0.25 ,0.3, 0.35,
17 | 0.4, 0.45, 0.5, 0.55,
18 | 0.6, 0.65, 0.7, 0.75,
19 | 0.8, 0.85, 0.9, 0.95,
20 | 0.99],
21 |
22 | "verbose": false,
23 | "calcError": true,
24 | "appendTimeStamp": false
25 | }
--------------------------------------------------------------------------------
/javamsketch/confs/find_milan_r.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "find_milan_r",
3 | "fileName": "../../datasets/internet-mi-2013-11-01_09.csv",
4 | "columnIdx": 0,
5 | "numTrials": 10,
6 | "numSolveTrials": 1,
7 | "methods": {
8 | "yahoo": [8.0, 16.0, 32.0],
9 | "sampling": [800.0, 1000.0, 1300.0, 1600.0, 2000.0]
10 | },
11 | "quantiles": [
12 | 0.01, 0.05, 0.1, 0.15,
13 | 0.2, 0.25 ,0.3, 0.35,
14 | 0.4, 0.45, 0.5, 0.55,
15 | 0.6, 0.65, 0.7, 0.75,
16 | 0.8, 0.85, 0.9, 0.95,
17 | 0.99],
18 |
19 | "verbose": false,
20 | "calcError": true,
21 | "appendTimeStamp": false
22 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/find_exp_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "find_exp_gk",
3 | "fileName": "../../datasets/exponential_10M.csv",
4 | "columnIdx": 0,
5 | "numTrials": 2,
6 | "numSolveTrials": 1,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 50.0, 100.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/find_hepmass_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "find_hepmass_gk",
3 | "fileName": "../../datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "numTrials": 2,
6 | "numSolveTrials": 1,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 50.0, 100.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/find_milan_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "find_milan_gk",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "numTrials": 2,
6 | "numSolveTrials": 1,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/merge_aria_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_aria_gk",
3 | "fileName": "/lfs/1/jding/datasets/aria-grouped.csv",
4 | "pregrouped": true,
5 | "numTrials": 5,
6 | "methods": {
7 | "gk_adaptive": [5.0, 10.0, 20.0, 50.0, 100.0]
8 | },
9 | "quantiles": [
10 | 0.01, 0.05, 0.1, 0.15,
11 | 0.2, 0.25 ,0.3, 0.35,
12 | 0.4, 0.45, 0.5, 0.55,
13 | 0.6, 0.65, 0.7, 0.75,
14 | 0.8, 0.85, 0.9, 0.95,
15 | 0.99],
16 |
17 | "verbose": false,
18 | "calcError": false,
19 | "appendTimeStamp": false
20 | }
21 |
--------------------------------------------------------------------------------
/javamsketch/confs/gk/merge_exp_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_exp_gk",
3 | "fileName": "../../datasets/exponential_100M.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 10,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/merge_expb_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_expb_gk",
3 | "fileName": "../../datasets/exponential_100M.csv",
4 | "columnIdx": 0,
5 | "cellSize": 2000,
6 | "numTrials": 20,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/merge_gauss_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_gauss_gk",
3 | "fileName": "gauss",
4 | "columnIdx": 0,
5 | "cellSize": 10000,
6 | "numTrials": 10,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/merge_hepmass_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_hepmass_gk",
3 | "fileName": "../../datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 10,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/merge_hepmassb_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_hepmassb_gk",
3 | "fileName": "../../datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "cellSize": 2000,
6 | "numTrials": 20,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/merge_milan_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_milan_gk",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 10,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/merge_milanb_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_milanb_gk",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "cellSize": 2000,
6 | "numTrials": 20,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/point_exp_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_exp_gk",
3 | "fileName": "../../datasets/exponential_10M.csv",
4 | "columnIdx": 0,
5 | "numTrials": 5,
6 | "numSolveTrials": 300,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/point_hepmass_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_hepmass_gk",
3 | "fileName": "../../datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "numTrials": 5,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/point_milan_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_milan_gk",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "numTrials": 5,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/point_occupancy_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_occupancy_gk",
3 | "fileName": "../../datasets/occupancy_2.csv",
4 | "columnIdx": 3,
5 | "numTrials": 5,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/point_power_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_power_gk",
3 | "fileName": "../../datasets/household_power.csv",
4 | "columnIdx": 2,
5 | "numTrials": 5,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/point_retail_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_retail_gk",
3 | "fileName": "../../datasets/retail_quantity_pos.csv",
4 | "columnIdx": 0,
5 | "numTrials": 5,
6 | "numSolveTrials": 300,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 40.0, 60.0, 100.0, 200.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/gk/strong_scaling_hepmass_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "strong_scaling_hepmass_gk",
3 | "fileName": "/lfs/1/egan1/datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 20,
7 | "methods": {
8 | "gk_adaptive": [40.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 | "numMergeThreads": [
18 | 1, 2, 4, 8, 12, 16, 24, 32],
19 | "numDuplications": 8,
20 |
21 | "verbose": false,
22 | "calcError": false,
23 | "appendTimeStamp": false
24 | }
25 |
--------------------------------------------------------------------------------
/javamsketch/confs/gk/strong_scaling_milan_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "strong_scaling_milan_gk",
3 | "fileName": "/lfs/1/egan1/datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 10,
7 | "methods": {
8 | "gk_adaptive": [60.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 | "numMergeThreads": [
18 | 1, 2, 4, 8, 12, 16, 24, 32],
19 | "numDuplications": 1,
20 |
21 | "verbose": false,
22 | "calcError": false,
23 | "appendTimeStamp": false
24 | }
25 |
--------------------------------------------------------------------------------
/javamsketch/confs/gk/weak_scaling_hepmass_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "weak_scaling_hepmass_gk",
3 | "fileName": "/lfs/1/egan1/datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 20,
7 | "methods": {
8 | "gk_adaptive": [40.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 | "numMergeThreads": [
18 | 1, 2, 4, 8, 12, 16, 24, 32],
19 | "numDuplications": 8,
20 | "weakScaling": true,
21 |
22 | "verbose": false,
23 | "calcError": false,
24 | "appendTimeStamp": false
25 | }
26 |
--------------------------------------------------------------------------------
/javamsketch/confs/gk/weak_scaling_milan_gk.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "weak_scaling_milan_gk",
3 | "fileName": "/lfs/1/egan1/datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 5,
7 | "methods": {
8 | "gk_adaptive": [60.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 | "numMergeThreads": [
18 | 1, 2, 4, 8, 12, 16, 24, 32],
19 | "numDuplications": 1,
20 | "weakScaling": true,
21 |
22 | "verbose": false,
23 | "calcError": false,
24 | "appendTimeStamp": false
25 | }
26 |
--------------------------------------------------------------------------------
/javamsketch/confs/lesion_solve.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "lesion_solve",
3 | "k": 11,
4 | "verbose": false
5 | }
--------------------------------------------------------------------------------
/javamsketch/confs/log_lesion.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "log_lesion",
3 | "ks": [3, 5, 7, 9, 11, 13, 15],
4 | "numSolveTrials": 2,
5 | "verbose": false
6 | }
--------------------------------------------------------------------------------
/javamsketch/confs/merge_aria.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_aria",
3 | "fileName": "/lfs/1/jding/datasets/aria-grouped.csv",
4 | "pregrouped": true,
5 | "numTrials": 5,
6 | "methods": {
7 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0],
8 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0],
9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
12 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0],
13 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0],
14 | "random": [5.0, 10.0, 20.0, 40.0, 100.0]
15 | },
16 | "quantiles": [
17 | 0.01, 0.05, 0.1, 0.15,
18 | 0.2, 0.25 ,0.3, 0.35,
19 | 0.4, 0.45, 0.5, 0.55,
20 | 0.6, 0.65, 0.7, 0.75,
21 | 0.8, 0.85, 0.9, 0.95,
22 | 0.99],
23 |
24 | "verbose": false,
25 | "calcError": false,
26 | "appendTimeStamp": true
27 | }
28 |
--------------------------------------------------------------------------------
/javamsketch/confs/merge_exp.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_exp",
3 | "fileName": "../../datasets/exponential_100M.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 10,
7 | "methods": {
8 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0],
9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
12 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0],
13 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0]
14 | },
15 | "quantiles": [
16 | 0.01, 0.05, 0.1, 0.15,
17 | 0.2, 0.25 ,0.3, 0.35,
18 | 0.4, 0.45, 0.5, 0.55,
19 | 0.6, 0.65, 0.7, 0.75,
20 | 0.8, 0.85, 0.9, 0.95,
21 | 0.99],
22 |
23 | "verbose": false,
24 | "calcError": false,
25 | "appendTimeStamp": false
26 | }
--------------------------------------------------------------------------------
/javamsketch/confs/merge_exp_m.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_exp_m",
3 | "fileName": "../../datasets/exponential_100M.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 10,
7 | "methods": {
8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/merge_expb.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_expb",
3 | "fileName": "../../datasets/exponential_100M.csv",
4 | "columnIdx": 0,
5 | "cellSize": 2000,
6 | "numTrials": 20,
7 | "methods": {
8 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0],
9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
10 | "random": [5.0, 10.0, 20.0, 40.0, 100.0],
11 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
12 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
13 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0],
14 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0],
15 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0]
16 | },
17 | "quantiles": [
18 | 0.01, 0.05, 0.1, 0.15,
19 | 0.2, 0.25 ,0.3, 0.35,
20 | 0.4, 0.45, 0.5, 0.55,
21 | 0.6, 0.65, 0.7, 0.75,
22 | 0.8, 0.85, 0.9, 0.95,
23 | 0.99],
24 |
25 | "verbose": false,
26 | "calcError": false,
27 | "appendTimeStamp": false
28 | }
--------------------------------------------------------------------------------
/javamsketch/confs/merge_gauss.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_gauss",
3 | "fileName": "gauss",
4 | "columnIdx": 0,
5 | "cellSize": 10000,
6 | "numTrials": 5,
7 | "methods": {
8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0],
9 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0],
10 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
11 | "random": [5.0, 10.0, 20.0, 40.0, 100.0],
12 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
13 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
14 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0],
15 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0]
16 | },
17 | "quantiles": [
18 | 0.01, 0.05, 0.1, 0.15,
19 | 0.2, 0.25 ,0.3, 0.35,
20 | 0.4, 0.45, 0.5, 0.55,
21 | 0.6, 0.65, 0.7, 0.75,
22 | 0.8, 0.85, 0.9, 0.95,
23 | 0.99],
24 |
25 | "verbose": false,
26 | "calcError": false,
27 | "appendTimeStamp": false
28 | }
--------------------------------------------------------------------------------
/javamsketch/confs/merge_hepmass.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_hepmass",
3 | "fileName": "../../datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 5,
7 | "methods": {
8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0],
9 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0],
10 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
11 | "random": [5.0, 10.0, 20.0, 40.0, 100.0],
12 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
13 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
14 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0],
15 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0]
16 | },
17 | "quantiles": [
18 | 0.01, 0.05, 0.1, 0.15,
19 | 0.2, 0.25 ,0.3, 0.35,
20 | 0.4, 0.45, 0.5, 0.55,
21 | 0.6, 0.65, 0.7, 0.75,
22 | 0.8, 0.85, 0.9, 0.95,
23 | 0.99],
24 |
25 | "verbose": false,
26 | "calcError": false,
27 | "appendTimeStamp": false
28 | }
--------------------------------------------------------------------------------
/javamsketch/confs/merge_hepmassb.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_hepmassb",
3 | "fileName": "../../datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "cellSize": 2000,
6 | "numTrials": 20,
7 | "methods": {
8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0],
9 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0],
10 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
11 | "random": [5.0, 10.0, 20.0, 40.0, 100.0],
12 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
13 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
14 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0],
15 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0]
16 | },
17 | "quantiles": [
18 | 0.01, 0.05, 0.1, 0.15,
19 | 0.2, 0.25 ,0.3, 0.35,
20 | 0.4, 0.45, 0.5, 0.55,
21 | 0.6, 0.65, 0.7, 0.75,
22 | 0.8, 0.85, 0.9, 0.95,
23 | 0.99],
24 |
25 | "verbose": false,
26 | "calcError": false,
27 | "appendTimeStamp": false
28 | }
--------------------------------------------------------------------------------
/javamsketch/confs/merge_milan.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_milan",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 5,
7 | "methods": {
8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0],
9 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0],
10 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
11 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
12 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
13 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0],
14 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0]
15 | },
16 | "quantiles": [
17 | 0.01, 0.05, 0.1, 0.15,
18 | 0.2, 0.25 ,0.3, 0.35,
19 | 0.4, 0.45, 0.5, 0.55,
20 | 0.6, 0.65, 0.7, 0.75,
21 | 0.8, 0.85, 0.9, 0.95,
22 | 0.99],
23 |
24 | "verbose": false,
25 | "calcError": false,
26 | "appendTimeStamp": false
27 | }
--------------------------------------------------------------------------------
/javamsketch/confs/merge_milan_ahist.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_milan_ahist",
3 | "fileName": "../../datasets/internet-mi-2013-11-01_09.csv",
4 | "columnIdx": 0,
5 | "cellSize": 100,
6 | "numTrials": 10,
7 | "methods": {
8 | "approx_histogram": [50.0, 100.0, 200.0, 500.0, 1000.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/merge_milan_m.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_milan_m",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 10,
7 | "methods": {
8 | "cmoments": [3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/merge_milanb.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_milanb",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "cellSize": 2000,
6 | "numTrials": 20,
7 | "methods": {
8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0],
9 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0],
10 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
11 | "random": [5.0, 10.0, 20.0, 40.0, 100.0],
12 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
13 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
14 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0],
15 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0]
16 | },
17 | "quantiles": [
18 | 0.01, 0.05, 0.1, 0.15,
19 | 0.2, 0.25 ,0.3, 0.35,
20 | 0.4, 0.45, 0.5, 0.55,
21 | 0.6, 0.65, 0.7, 0.75,
22 | 0.8, 0.85, 0.9, 0.95,
23 | 0.99],
24 |
25 | "verbose": false,
26 | "calcError": false,
27 | "appendTimeStamp": false
28 | }
--------------------------------------------------------------------------------
/javamsketch/confs/outlier_bench.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "outlier_bench",
3 | "fileName": "../../datasets/gauss_10M.csv",
4 | "columnIdx": 0,
5 | "verbose": true,
6 |
7 | "methods": {
8 | "cmoments": [11.0],
9 | "histogram": [20.0, 100.0],
10 | "yahoo": [32.0],
11 | "random": [40.0],
12 | "spark_gk": [50.0]
13 | },
14 | "numSolveTrials": 1,
15 | "distances": [5.0, 10.0, 50.0, 100.0, 500.0, 1000.0],
16 | "fractions": [0.01, 0.01, 0.01, 0.01, 0.01, 0.01],
17 | "scaleFactor": 0.1,
18 | "quantiles": [
19 | 0.01, 0.05, 0.1, 0.15,
20 | 0.2, 0.25 ,0.3, 0.35,
21 | 0.4, 0.45, 0.5, 0.55,
22 | 0.6, 0.65, 0.7, 0.75,
23 | 0.8, 0.85, 0.9, 0.95,
24 | 0.99]
25 | }
--------------------------------------------------------------------------------
/javamsketch/confs/point_exp.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_exp",
3 | "fileName": "../../datasets/exponential_10M.csv",
4 | "columnIdx": 0,
5 | "numTrials": 10,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "tdigest": [1.0, 1.5, 2.0, 4.0, 8.0, 16.0],
9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
12 | "approx_histogram": [50.0, 100.0, 200.0, 500.0, 1000.0],
13 | "histogram": [10.0, 20.0, 40.0, 100.0, 200.0]
14 | },
15 | "quantiles": [
16 | 0.01, 0.05, 0.1, 0.15,
17 | 0.2, 0.25 ,0.3, 0.35,
18 | 0.4, 0.45, 0.5, 0.55,
19 | 0.6, 0.65, 0.7, 0.75,
20 | 0.8, 0.85, 0.9, 0.95,
21 | 0.99],
22 |
23 | "verbose": false,
24 | "calcError": true,
25 | "appendTimeStamp": false
26 | }
--------------------------------------------------------------------------------
/javamsketch/confs/point_exp_m.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_exp_m",
3 | "fileName": "../../datasets/exponential_10M.csv",
4 | "columnIdx": 0,
5 | "numTrials": 5,
6 | "numSolveTrials": 300,
7 | "methods": {
8 | "cmoments": [3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 15.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/point_hepmass.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_hepmass",
3 | "fileName": "../../datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "numTrials": 10,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0],
9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
12 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0],
13 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0]
14 | },
15 | "quantiles": [
16 | 0.01, 0.05, 0.1, 0.15,
17 | 0.2, 0.25 ,0.3, 0.35,
18 | 0.4, 0.45, 0.5, 0.55,
19 | 0.6, 0.65, 0.7, 0.75,
20 | 0.8, 0.85, 0.9, 0.95,
21 | 0.99],
22 |
23 | "verbose": false,
24 | "calcError": true,
25 | "appendTimeStamp": false
26 | }
--------------------------------------------------------------------------------
/javamsketch/confs/point_hepmass_m.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_hepmass_m",
3 | "fileName": "../../datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "numTrials": 5,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/point_milan.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_milan",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "numTrials": 10,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "tdigest": [1.0, 1.3, 1.5, 2.0, 4.0, 5.0, 8.0, 16.0],
9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
12 | "histogram": [10.0, 15.0, 20.0, 40.0, 100.0, 200.0],
13 | "approx_histogram": [20.0, 50.0, 100.0, 200.0, 500.0]
14 | },
15 | "quantiles": [
16 | 0.01, 0.05, 0.1, 0.15,
17 | 0.2, 0.25 ,0.3, 0.35,
18 | 0.4, 0.45, 0.5, 0.55,
19 | 0.6, 0.65, 0.7, 0.75,
20 | 0.8, 0.85, 0.9, 0.95,
21 | 0.99],
22 |
23 | "verbose": false,
24 | "calcError": true,
25 | "appendTimeStamp": false
26 | }
--------------------------------------------------------------------------------
/javamsketch/confs/point_milan_m.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_milan_m",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "numTrials": 5,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "cmoments": [2.0, 3.0, 5.0, 7.0, 9.0, 10.0, 11.0, 13.0, 15.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/point_occupancy.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_occupancy",
3 | "fileName": "../../datasets/occupancy_2.csv",
4 | "columnIdx": 3,
5 | "numTrials": 10,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "tdigest": [1.0, 1.5, 2.0, 4.0, 8.0, 16.0],
9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
12 | "approx_histogram": [50.0, 100.0, 200.0, 500.0, 1000.0],
13 | "histogram": [10.0, 20.0, 40.0, 100.0, 200.0]
14 | },
15 | "quantiles": [
16 | 0.01, 0.05, 0.1, 0.15,
17 | 0.2, 0.25 ,0.3, 0.35,
18 | 0.4, 0.45, 0.5, 0.55,
19 | 0.6, 0.65, 0.7, 0.75,
20 | 0.8, 0.85, 0.9, 0.95,
21 | 0.99],
22 |
23 | "verbose": false,
24 | "calcError": true,
25 | "appendTimeStamp": false
26 | }
--------------------------------------------------------------------------------
/javamsketch/confs/point_occupancy_m.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_occupancy_m",
3 | "fileName": "../../datasets/occupancy_2.csv",
4 | "columnIdx": 3,
5 | "numTrials": 5,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "cmoments": [3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 15.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/point_power.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_power",
3 | "fileName": "../../datasets/household_power.csv",
4 | "columnIdx": 2,
5 | "numTrials": 10,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "tdigest": [1.0, 1.5, 2.0, 4.0, 8.0, 16.0],
9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
12 | "approx_histogram": [50.0, 100.0, 200.0, 500.0, 1000.0],
13 | "histogram": [10.0, 20.0, 40.0, 100.0, 200.0]
14 | },
15 | "quantiles": [
16 | 0.01, 0.05, 0.1, 0.15,
17 | 0.2, 0.25 ,0.3, 0.35,
18 | 0.4, 0.45, 0.5, 0.55,
19 | 0.6, 0.65, 0.7, 0.75,
20 | 0.8, 0.85, 0.9, 0.95,
21 | 0.99],
22 |
23 | "verbose": false,
24 | "calcError": true,
25 | "appendTimeStamp": false
26 | }
--------------------------------------------------------------------------------
/javamsketch/confs/point_power_m.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_power_m",
3 | "fileName": "../../datasets/household_power.csv",
4 | "columnIdx": 2,
5 | "numTrials": 5,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "cmoments": [3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 13.0, 15.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/point_retail.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_retail",
3 | "fileName": "../../datasets/retail_quantity_pos.csv",
4 | "columnIdx": 0,
5 | "numTrials": 10,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "tdigest": [1.0, 1.5, 2.0, 4.0, 8.0, 16.0],
9 | "yahoo": [2.0, 4.0, 8.0, 16.0, 32.0, 64.0],
10 | "sampling": [10.0, 50.0, 100.0, 400.0, 1000.0, 2000.0],
11 | "spark_gk": [5.0, 10.0, 20.0, 50.0, 100.0],
12 | "approx_histogram": [50.0, 100.0, 200.0, 500.0, 1000.0],
13 | "histogram": [10.0, 20.0, 40.0, 100.0, 200.0]
14 | },
15 | "quantiles": [
16 | 0.01, 0.05, 0.1, 0.15,
17 | 0.2, 0.25 ,0.3, 0.35,
18 | 0.4, 0.45, 0.5, 0.55,
19 | 0.6, 0.65, 0.7, 0.75,
20 | 0.8, 0.85, 0.9, 0.95,
21 | 0.99],
22 |
23 | "verbose": false,
24 | "calcError": true,
25 | "appendTimeStamp": false
26 | }
--------------------------------------------------------------------------------
/javamsketch/confs/point_retail_m.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_retail_m",
3 | "fileName": "../../datasets/retail_quantity_pos.csv",
4 | "columnIdx": 0,
5 | "numTrials": 5,
6 | "numSolveTrials": 300,
7 | "methods": {
8 | "cmoments": [3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/random/find_hepmass_r.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "find_hepmass_rand",
3 | "fileName": "../../datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "numTrials": 1,
6 | "numSolveTrials": 1,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 50.0, 100.0],
9 | "random": [5.0, 10.0, 20.0, 40.0, 100.0]
10 | },
11 | "quantiles": [
12 | 0.01, 0.05, 0.1, 0.15,
13 | 0.2, 0.25 ,0.3, 0.35,
14 | 0.4, 0.45, 0.5, 0.55,
15 | 0.6, 0.65, 0.7, 0.75,
16 | 0.8, 0.85, 0.9, 0.95,
17 | 0.99],
18 |
19 | "verbose": false,
20 | "calcError": true,
21 | "appendTimeStamp": false
22 | }
--------------------------------------------------------------------------------
/javamsketch/confs/random/find_milan_r.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "find_milan_rand",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "numTrials": 10,
6 | "numSolveTrials": 1,
7 | "methods": {
8 | "gk_adaptive": [5.0, 10.0, 20.0, 50.0, 100.0],
9 | "random": [5.0, 10.0, 20.0, 40.0, 100.0]
10 | },
11 | "quantiles": [
12 | 0.01, 0.05, 0.1, 0.15,
13 | 0.2, 0.25 ,0.3, 0.35,
14 | 0.4, 0.45, 0.5, 0.55,
15 | 0.6, 0.65, 0.7, 0.75,
16 | 0.8, 0.85, 0.9, 0.95,
17 | 0.99],
18 |
19 | "verbose": false,
20 | "calcError": true,
21 | "appendTimeStamp": false
22 | }
--------------------------------------------------------------------------------
/javamsketch/confs/random/merge_exp_r.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_exp_r",
3 | "fileName": "../../datasets/exponential_100M.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 10,
7 | "methods": {
8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/random/merge_gauss_r.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_gauss_r",
3 | "fileName": "gauss",
4 | "columnIdx": 0,
5 | "cellSize": 10000,
6 | "numTrials": 5,
7 | "methods": {
8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/random/merge_milan_r.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "merge_milan_r",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 10,
7 | "methods": {
8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/random/point_exp_r.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_exp_r",
3 | "fileName": "../../datasets/exponential_10M.csv",
4 | "columnIdx": 0,
5 | "numTrials": 5,
6 | "numSolveTrials": 300,
7 | "methods": {
8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/random/point_hepmass_r.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_hepmass_r",
3 | "fileName": "../../datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "numTrials": 5,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/random/point_milan_r.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_milan_r",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "numTrials": 5,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/random/point_occupancy_r.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_occupancy_r",
3 | "fileName": "../../datasets/occupancy_2.csv",
4 | "columnIdx": 3,
5 | "numTrials": 5,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": false,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/random/point_power_r.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_power_r",
3 | "fileName": "../../datasets/household_power.csv",
4 | "columnIdx": 2,
5 | "numTrials": 5,
6 | "numSolveTrials": 1000,
7 | "methods": {
8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/random/point_retail_r.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "point_retail_r",
3 | "fileName": "../../datasets/retail_quantity_pos.csv",
4 | "columnIdx": 0,
5 | "numTrials": 5,
6 | "numSolveTrials": 300,
7 | "methods": {
8 | "random": [5.0, 10.0, 20.0, 40.0, 100.0]
9 | },
10 | "quantiles": [
11 | 0.01, 0.05, 0.1, 0.15,
12 | 0.2, 0.25 ,0.3, 0.35,
13 | 0.4, 0.45, 0.5, 0.55,
14 | 0.6, 0.65, 0.7, 0.75,
15 | 0.8, 0.85, 0.9, 0.95,
16 | 0.99],
17 |
18 | "verbose": false,
19 | "calcError": true,
20 | "appendTimeStamp": false
21 | }
--------------------------------------------------------------------------------
/javamsketch/confs/sort_milan.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "sort_milan",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "numTrials": 10,
6 | "methods": ["sort", "select", "yahoo", "random"],
7 | "sizeParams": [1.0, 1.0, 32.0, 40.0],
8 |
9 | "verbose": true
10 | }
--------------------------------------------------------------------------------
/javamsketch/confs/sort_milan_approx.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "sort_milan_approx",
3 | "fileName": "../../datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "numTrials": 10,
6 | "methods": ["yahoo", "random"],
7 | "sizeParams": [32.0, 40.0],
8 |
9 | "verbose": true
10 | }
--------------------------------------------------------------------------------
/javamsketch/confs/strong_scaling_hepmass.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "strong_scaling_hepmass",
3 | "fileName": "/lfs/1/egan1/datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 20,
7 | "methods": {
8 | "cmoments": [4.0],
9 | "tdigest": [1.5],
10 | "yahoo": [32.0],
11 | "sampling": [1000.0],
12 | "spark_gk": [50.0],
13 | "histogram": [15.0],
14 | "approx_histogram": [100.0],
15 | "random": [40.0]
16 | },
17 | "quantiles": [
18 | 0.01, 0.05, 0.1, 0.15,
19 | 0.2, 0.25 ,0.3, 0.35,
20 | 0.4, 0.45, 0.5, 0.55,
21 | 0.6, 0.65, 0.7, 0.75,
22 | 0.8, 0.85, 0.9, 0.95,
23 | 0.99],
24 | "numMergeThreads": [
25 | 1, 2, 4, 8, 12, 16, 24, 32],
26 | "numDuplications": 8,
27 |
28 | "verbose": false,
29 | "calcError": false,
30 | "appendTimeStamp": true
31 | }
32 |
--------------------------------------------------------------------------------
/javamsketch/confs/strong_scaling_milan.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "strong_scaling_milan",
3 | "fileName": "/lfs/1/egan1/datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 10,
7 | "methods": {
8 | "cmoments": [11.0],
9 | "tdigest": [5.0],
10 | "yahoo": [32.0],
11 | "sampling": [1000.0],
12 | "spark_gk": [50.0],
13 | "histogram": [100.0],
14 | "approx_histogram": [100.0],
15 | "random": [40.0]
16 | },
17 | "quantiles": [
18 | 0.01, 0.05, 0.1, 0.15,
19 | 0.2, 0.25 ,0.3, 0.35,
20 | 0.4, 0.45, 0.5, 0.55,
21 | 0.6, 0.65, 0.7, 0.75,
22 | 0.8, 0.85, 0.9, 0.95,
23 | 0.99],
24 | "numMergeThreads": [
25 | 1, 2, 4, 8, 12, 16, 24, 32],
26 | "numDuplications": 1,
27 |
28 | "verbose": false,
29 | "calcError": false,
30 | "appendTimeStamp": true
31 | }
32 |
--------------------------------------------------------------------------------
/javamsketch/confs/weak_scaling_hepmass.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "weak_scaling_hepmass",
3 | "fileName": "/lfs/1/egan1/datasets/hepmass_l.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 20,
7 | "methods": {
8 | "cmoments": [4.0],
9 | "tdigest": [1.5],
10 | "yahoo": [32.0],
11 | "sampling": [1000.0],
12 | "spark_gk": [50.0],
13 | "histogram": [15.0],
14 | "approx_histogram": [100.0],
15 | "random": [40.0]
16 | },
17 | "quantiles": [
18 | 0.01, 0.05, 0.1, 0.15,
19 | 0.2, 0.25 ,0.3, 0.35,
20 | 0.4, 0.45, 0.5, 0.55,
21 | 0.6, 0.65, 0.7, 0.75,
22 | 0.8, 0.85, 0.9, 0.95,
23 | 0.99],
24 | "numMergeThreads": [
25 | 1, 2, 4, 8, 12, 16, 24, 32],
26 | "numDuplications": 8,
27 | "weakScaling": true,
28 |
29 | "verbose": false,
30 | "calcError": false,
31 | "appendTimeStamp": true
32 | }
33 |
--------------------------------------------------------------------------------
/javamsketch/confs/weak_scaling_milan.json:
--------------------------------------------------------------------------------
1 | {
2 | "testName": "weak_scaling_milan",
3 | "fileName": "/lfs/1/egan1/datasets/internet-mi-2013-11.csv",
4 | "columnIdx": 0,
5 | "cellSize": 200,
6 | "numTrials": 5,
7 | "methods": {
8 | "cmoments": [11.0],
9 | "tdigest": [5.0],
10 | "yahoo": [32.0],
11 | "sampling": [1000.0],
12 | "spark_gk": [50.0],
13 | "histogram": [100.0],
14 | "approx_histogram": [100.0],
15 | "random": [40.0]
16 | },
17 | "quantiles": [
18 | 0.01, 0.05, 0.1, 0.15,
19 | 0.2, 0.25 ,0.3, 0.35,
20 | 0.4, 0.45, 0.5, 0.55,
21 | 0.6, 0.65, 0.7, 0.75,
22 | 0.8, 0.85, 0.9, 0.95,
23 | 0.99],
24 | "numMergeThreads": [
25 | 1, 2, 4, 8, 12, 16, 24, 32],
26 | "numDuplications": 1,
27 | "weakScaling": true,
28 |
29 | "verbose": false,
30 | "calcError": false,
31 | "appendTimeStamp": true
32 | }
33 |
--------------------------------------------------------------------------------
/javamsketch/discretebench.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \
3 | DiscreteBench $@
4 |
--------------------------------------------------------------------------------
/javamsketch/genCP.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | mvn dependency:build-classpath -Dmdep.outputFile=cp.txt
3 |
4 |
--------------------------------------------------------------------------------
/javamsketch/lesionSolve.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \
3 | SolveLesionBench $@
--------------------------------------------------------------------------------
/javamsketch/loglesion.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \
3 | LogMomentsLesion $@
4 |
--------------------------------------------------------------------------------
/javamsketch/mRuntimeBench.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \
3 | MSketchBench
4 |
--------------------------------------------------------------------------------
/javamsketch/mergeBench.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \
3 | MergeBench $@
4 |
--------------------------------------------------------------------------------
/javamsketch/msolver/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | java-msketch
7 | futuredata
8 | 1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | msolver
13 |
14 |
15 |
16 | org.apache.commons
17 | commons-math3
18 | 3.6.1
19 |
20 |
21 | com.github.vinhkhuc
22 | lbfgs4j
23 | 0.2.1
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/ChebyshevMomentSolver.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import msolver.chebyshev.ChebyshevPolynomial;
4 | import msolver.optimizer.NewtonOptimizer;
5 | import org.apache.commons.math3.analysis.solvers.BrentSolver;
6 | import org.apache.commons.math3.analysis.solvers.UnivariateSolver;
7 |
8 | import java.util.Arrays;
9 |
10 | public class ChebyshevMomentSolver {
11 | private double[] d_mus;
12 | private boolean verbose = false;
13 |
14 | private double[] lambdas;
15 | private ChebyshevPolynomial approxCDF;
16 | private boolean isConverged;
17 |
18 | private NewtonOptimizer optimizer;
19 | private int cumFuncEvals;
20 |
21 | public ChebyshevMomentSolver(double[] chebyshev_moments) {
22 | d_mus = chebyshev_moments;
23 | }
24 |
25 | public static ChebyshevMomentSolver fromPowerSums(
26 | double min, double max, double[] powerSums
27 | ) {
28 | double[] scaledChebyMoments = MathUtil.powerSumsToChebyMoments(
29 | min, max, powerSums
30 | );
31 | return new ChebyshevMomentSolver(scaledChebyMoments);
32 | }
33 |
34 | public void setVerbose(boolean flag) {
35 | this.verbose = flag;
36 | }
37 |
38 | public int solve(double tol) {
39 | double[] l_initial = new double[d_mus.length];
40 | return solve(l_initial, tol);
41 | }
42 |
43 | public int solve(double[] l_initial, double tol) {
44 | MaxEntPotential potential = new MaxEntPotential(d_mus);
45 | optimizer = new NewtonOptimizer(potential);
46 | optimizer.setVerbose(verbose);
47 | lambdas = optimizer.solve(l_initial, tol);
48 | isConverged = optimizer.isConverged();
49 | if (verbose) {
50 | System.out.println("Final Polynomial: " + Arrays.toString(lambdas));
51 | }
52 | cumFuncEvals = potential.getCumFuncEvals();
53 |
54 | approxCDF = ChebyshevPolynomial.fit(new MaxEntFunction(lambdas), tol).integralPoly();
55 | return optimizer.getStepCount();
56 | }
57 |
58 | public double[] estimateQuantiles(double[] ps, double min, double max) {
59 | UnivariateSolver bSolver = new BrentSolver(1e-6);
60 | int n = ps.length;
61 | double c = (max + min) / 2;
62 | double r = (max - min) / 2;
63 | double[] quantiles = new double[n];
64 |
65 | for (int i = 0; i < n; i++) {
66 | double p = ps[i];
67 | double q;
68 | if (p <= 0.0) {
69 | q = -1;
70 | } else if (p >= 1.0) {
71 | q = 1;
72 | } else {
73 | q = bSolver.solve(
74 | 100,
75 | (x) -> approxCDF.value(x) - p,
76 | -1,
77 | 1,
78 | 0
79 | );
80 | }
81 | quantiles[i] = q*r+c;
82 | }
83 | return quantiles;
84 | }
85 |
86 | public double estimateCDF(double x) {
87 | return approxCDF.value(x);
88 | }
89 |
90 | public double[] getLambdas() {
91 | return lambdas;
92 | }
93 |
94 | public NewtonOptimizer getOptimizer() {
95 | return optimizer;
96 | }
97 | public int getCumFuncEvals() {
98 | return cumFuncEvals;
99 | }
100 | public double[] getChebyshevMoments() { return d_mus; }
101 |
102 | public boolean isConverged() {
103 | return isConverged;
104 | }
105 | }
106 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/GFunction.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import msolver.chebyshev.ChebyshevPolynomial;
4 | import org.apache.commons.math3.analysis.UnivariateFunction;
5 |
6 | class GFunction implements UnivariateFunction {
7 | private boolean useStandardBasis;
8 | private double aCenter, aScale, bCenter, bScale;
9 | private ChebyshevPolynomial cBasis;
10 |
11 | public GFunction(
12 | int k, boolean useStandardBasis,
13 | double aCenter, double aScale,
14 | double bCenter, double bScale
15 | ) {
16 | this.cBasis = ChebyshevPolynomial.basis(k);
17 | this.useStandardBasis = useStandardBasis;
18 | this.aCenter = aCenter;
19 | this.aScale = aScale;
20 | this.bCenter = bCenter;
21 | this.bScale = bScale;
22 | }
23 |
24 | @Override
25 | public double value(double y) {
26 | double x = y * aScale + aCenter;
27 | double gX;
28 | if (useStandardBasis) {
29 | gX = Math.log(x);
30 | } else {
31 | gX = Math.exp(x);
32 | }
33 | double scaledBGX = (gX - bCenter) / bScale;
34 | return cBasis.value(scaledBGX);
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/MaxEntFunction.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import msolver.chebyshev.ChebyshevPolynomial;
4 | import org.apache.commons.math3.analysis.UnivariateFunction;
5 | import org.apache.commons.math3.util.FastMath;
6 |
7 | /**
8 | * Solutions to the maximum entropy moment problem have the form exp(-poly(x)).
9 | * It is useful to express the polynomial in a chebyshev basis for better conditioning.
10 | */
11 | public class MaxEntFunction implements UnivariateFunction{
12 | private ChebyshevPolynomial p;
13 | private ChebyshevPolynomial p_approx;
14 | private int funcEvals;
15 |
16 | public int getFuncEvals() {
17 | return funcEvals;
18 | }
19 |
20 | public MaxEntFunction(double[] coeffs) {
21 | this.p = new ChebyshevPolynomial(coeffs);
22 | this.funcEvals = 0;
23 | }
24 |
25 | @Override
26 | public double value(double v) {
27 | return FastMath.exp(-p.value(v));
28 | }
29 |
30 | public double[] moments(int mu_k, double tol) {
31 | p_approx = ChebyshevPolynomial.fit(this, tol);
32 | funcEvals += p_approx.getNumFitEvals();
33 | double[] out_moments = new double[mu_k];
34 | for (int i = 0; i < mu_k; i++) {
35 | ChebyshevPolynomial p_times_moment = p_approx.multiplyByBasis(i);
36 | out_moments[i] = p_times_moment.integrate();
37 | }
38 | return out_moments;
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/MaxEntPotential.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import msolver.optimizer.FunctionWithHessian;
4 |
5 | /**
6 | * Minimizing this function yields a maxent pdf which matches the the empirical
7 | * moments of a dataset. The function is convex with symmetric positive definite
8 | * hessian and has a global stationary minimum (gradient = 0) at the solution.
9 | */
10 | public class MaxEntPotential implements FunctionWithHessian {
11 | protected double[] d_mus;
12 |
13 | private int cumFuncEvals = 0;
14 | protected double[] lambd;
15 | protected double[] mus;
16 | protected double[] grad;
17 | protected double[][] hess;
18 |
19 | public MaxEntPotential(
20 | double[] d_mus
21 | ) {
22 | this.d_mus = d_mus;
23 |
24 | this.cumFuncEvals = 0;
25 |
26 | int k = d_mus.length;
27 | this.mus = new double[k];
28 | this.grad = new double[k];
29 | this.hess = new double[k][k];
30 | }
31 |
32 | @Override
33 | public int dim() {
34 | return d_mus.length;
35 | }
36 |
37 | @Override
38 | public void computeOnlyValue(double[] point, double tol) {
39 | computeAll(point, tol);
40 | }
41 |
42 | @Override
43 | public void computeAll(double[] lambd, double tol) {
44 | this.lambd = lambd;
45 | int k = lambd.length;
46 | MaxEntFunction f = new MaxEntFunction(lambd);
47 | this.mus = f.moments(k*2, tol);
48 | this.cumFuncEvals += f.getFuncEvals();
49 |
50 | for (int i = 0; i < k; i++) {
51 | this.grad[i] = d_mus[i] - mus[i];
52 | }
53 | for (int i=0; i < k; i++) {
54 | for (int j=0; j <= i; j++) {
55 | this.hess[i][j] = (mus[i+j] + mus[i-j])/2;
56 | }
57 | }
58 | for (int i=0; i < k; i++) {
59 | for (int j=i+1; j < k; j++) {
60 | this.hess[i][j] = hess[j][i];
61 | }
62 | }
63 | }
64 |
65 | @Override
66 | public double getValue() {
67 | double sum = 0.0;
68 | int k = d_mus.length;
69 | for (int i = 0; i < k; i++) {
70 | sum += lambd[i] * d_mus[i];
71 | }
72 | return this.mus[0] + sum;
73 | }
74 |
75 | @Override
76 | public double[] getGradient() {
77 | return grad;
78 | }
79 |
80 | @Override
81 | public double[][] getHessian() {
82 | return hess;
83 | }
84 |
85 | public int getCumFuncEvals() {
86 | return cumFuncEvals;
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/MnatSolver.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import java.util.Arrays;
4 | import java.util.List;
5 |
6 | /**
7 | * https://www.sciencedirect.com/science/article/pii/S0167715208000539
8 | */
9 | public class MnatSolver {
10 | // values expected to lie in [0,1]
11 | public static double[] estimatePDF(
12 | double[] moments
13 | ) {
14 | int a = moments.length - 1;
15 | double[] pdf = new double[a+1];
16 | long[][] binoms = MathUtil.getBinomials(a);
17 |
18 | for (int k = 0; k <= a; k++) {
19 | double sum = 0;
20 | for (int j = k; j <= a; j++) {
21 | double curTerm = (double)binoms[a][j]*(double)binoms[j][k]*moments[j];
22 | if ((j - k) % 2 != 0) {
23 | curTerm *= -1;
24 | }
25 | sum += curTerm;
26 | }
27 | pdf[k] = sum;
28 | }
29 | return pdf;
30 | }
31 |
32 | public static double[] estimateCDF(
33 | double[] moments
34 | ) {
35 | double[] pdf = estimatePDF(moments);
36 | double[] cdf = new double[pdf.length];
37 | cdf[0] = pdf[0];
38 | for (int i = 1; i < pdf.length; i++) {
39 | cdf[i] = cdf[i-1] + pdf[i];
40 | }
41 | return cdf;
42 | }
43 |
44 | public static double[] estimateQuantiles(
45 | double min,
46 | double max,
47 | double[] powerSums,
48 | List ps
49 | ) {
50 | double[] moments = MathUtil.powerSumsToPosMoments(powerSums, min, max);
51 | double[] cdf = estimateCDF(moments);
52 |
53 | int n = ps.size();
54 | double[] qs = new double[n];
55 | int a = powerSums.length - 1;
56 | for (int i = 0; i < n; i++) {
57 | double p = ps.get(i);
58 | int idx = Arrays.binarySearch(cdf, p);
59 | if (idx < 0) {
60 | idx = -(idx + 1);
61 | }
62 | double fracIdx = 0.0;
63 | if (idx > 0) {
64 | // fracIdx = (idx-1.0)*1.0 + (p - cdf[idx - 1]) / (cdf[idx] - cdf[idx-1]);
65 | fracIdx = idx;
66 | }
67 | qs[i] = fracIdx/a * (max - min) + min;
68 | }
69 | return qs;
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/chebyshev/CosScaledFunction.java:
--------------------------------------------------------------------------------
1 | package msolver.chebyshev;
2 |
3 | public interface CosScaledFunction {
4 | int numFuncs();
5 | double[][] calc(int N);
6 | }
7 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/chebyshev/QuadraticCosFunction.java:
--------------------------------------------------------------------------------
1 | package msolver.chebyshev;
2 |
3 | import org.apache.commons.math3.util.FastMath;
4 |
5 | public class QuadraticCosFunction implements CosScaledFunction {
6 | private int k;
7 | public QuadraticCosFunction(int k) {
8 | this.k = k;
9 | }
10 |
11 | @Override
12 | public int numFuncs() {
13 | return k;
14 | }
15 |
16 | @Override
17 | public double[][] calc(int N) {
18 | double[] cosValues = new double[N+1];
19 | for (int j = 0; j <= N; j++) {
20 | cosValues[j] = FastMath.cos(j * Math.PI / N);
21 | }
22 |
23 | double[][] values = new double[k][N+1];
24 | for (int i = 0; i < k; i++) {
25 | for (int j = 0; j <= N; j++) {
26 | values[i][j] = (i+1)*cosValues[j]*cosValues[j];
27 | }
28 | }
29 | return values;
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/data/ExponentialData.java:
--------------------------------------------------------------------------------
1 | package msolver.data;
2 |
3 | public class ExponentialData extends MomentData {
4 | private final double[] ranges = {
5 | 5.0366845333122605e-07,
6 | 15.619130152703306,
7 | -14.501347616032602,
8 | 2.7484964548137394
9 | };
10 | private final double[] powerSums = {
11 | 1000000.0,
12 | 998677.78490783856,
13 | 1991896.3142772249,
14 | 5947596.181134684,
15 | 23651528.771513801,
16 | 117830113.75301118,
17 | 710308609.25475609,
18 | 5073006703.31318,
19 | 42241259442.990211,
20 | 403047330063.94159,
21 | 4314998960419.9683,
22 | 50641655821410.031,
23 | 637282955148883.38
24 | };
25 | private final double[] logSums = {
26 | 1000000.0,
27 | -578739.68503790628,
28 | 1983294.7107239107,
29 | -5476131.5807481054,
30 | 23699783.595155567,
31 | -118308097.22723247,
32 | 712931526.56406581,
33 | -4933389056.9205084,
34 | 38572700905.764816,
35 | -334201709511.9444,
36 | 3161470899883.4248,
37 | -32231476977189.141,
38 | 350149425729588.94
39 | };
40 |
41 | @Override
42 | public double[] getPowerSums() {
43 | return powerSums;
44 | }
45 |
46 | @Override
47 | public double getMin() {
48 | return ranges[0];
49 | }
50 |
51 | @Override
52 | public double getMax() {
53 | return ranges[1];
54 | }
55 |
56 | @Override
57 | public double[] getLogSums() {
58 | return logSums;
59 | }
60 |
61 | @Override
62 | public double getLogMin() {
63 | return ranges[2];
64 | }
65 |
66 | @Override
67 | public double getLogMax() {
68 | return ranges[3];
69 | }
70 | }
71 |
72 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/data/GaussianData.java:
--------------------------------------------------------------------------------
1 | package msolver.data;
2 |
3 | public class GaussianData extends MomentData {
4 | private final double[] ranges = {
5 | -5.4426943895132043, 5.0389474541327557,
6 | 0,1
7 | };
8 | private final double[] powerSums = {
9 | 10000000.0,
10 | 480.5458937862864,
11 | 9993547.2435967941,
12 | 309.37531648859698,
13 | 29973127.286717705,
14 | -9041.491442481145,
15 | 150000244.00952214,
16 | -444367.57808807673,
17 | 1053293551.5678303,
18 | -13936938.368433975,
19 | 9539780309.9934578,
20 | -386976102.34488362,
21 | 105935145427.16087,
22 | -10292879169.101974,
23 | 1391938729272.7761,
24 | -270802772130.85544,
25 | 21042777928938.496,
26 | -7160080883740.7246,
27 | 357414134285331.12,
28 | -191665951342369.09
29 | };
30 | private final double[] logSums = {
31 | 10000000.0
32 | };
33 |
34 | @Override
35 | public double[] getPowerSums() {
36 | return powerSums;
37 | }
38 |
39 | @Override
40 | public double getMin() {
41 | return ranges[0];
42 | }
43 |
44 | @Override
45 | public double getMax() {
46 | return ranges[1];
47 | }
48 |
49 | @Override
50 | public double[] getLogSums() {
51 | return logSums;
52 | }
53 |
54 | @Override
55 | public double getLogMin() {
56 | return ranges[2];
57 | }
58 |
59 | @Override
60 | public double getLogMax() {
61 | return ranges[3];
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/data/HepData.java:
--------------------------------------------------------------------------------
1 | package msolver.data;
2 |
3 | public class HepData extends MomentData {
4 | private final double[] range = {
5 | -1.960548996925354,
6 | 4.378281593322753
7 | };
8 | private final double[] powerSums = {
9 | 10500000.0,
10 | 171590.10344086567,
11 | 10600692.901931819,
12 | 3656024.1204772266,
13 | 27999983.535580769,
14 | 26085580.4679562,
15 | 112571037.57246359,
16 | 178035035.71434641,
17 | 597052564.3216269,
18 | 1272051754.8467662,
19 | 3862170673.2579589,
20 | 9736800905.3333244,
21 | 28892120151.48772,
22 | 80400830561.547516,
23 | 241656673866.27899,
24 | 717077710760.47144,
25 | 2216395701960.5981,
26 | 6900059165428.8955,
27 | 22051018347144.91,
28 | 71474655616939.391
29 | };
30 | private final double[] logSums = {1.0};
31 |
32 | @Override
33 | public double[] getPowerSums() {
34 | return powerSums;
35 | }
36 |
37 | @Override
38 | public double getMin() {
39 | return range[0];
40 | }
41 |
42 | @Override
43 | public double getMax() {
44 | return range[1];
45 | }
46 |
47 | @Override
48 | public double[] getLogSums() {
49 | return logSums;
50 | }
51 |
52 | @Override
53 | public double getLogMin() {
54 | return 0.0;
55 | }
56 |
57 | @Override
58 | public double getLogMax() {
59 | return 0.0;
60 | }
61 | }
62 |
63 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/data/MilanData.java:
--------------------------------------------------------------------------------
1 | package msolver.data;
2 |
3 | // 2013-11
4 | public class MilanData extends MomentData {
5 | private final double[] ranges = {
6 | 2.3314976995293306e-06,
7 | 7936.2653798841566,
8 | -12.968999707389781,
9 | 8.9791980884111684
10 | };
11 | private final double[] powerSums = {
12 | 81109777.0,
13 | 2982688169.6811647,
14 | 978439974358.73865,
15 | 871718166698519.75,
16 | 1.3802820224027354e+18,
17 | 3.1721196216284679e+21,
18 | 9.4920224710331096e+24,
19 | 3.4822245172502998e+28,
20 | 1.5113747392231059e+32,
21 | 7.5487997054766176e+35,
22 | 4.2236053442107682e+39,
23 | 2.5794131452332432e+43,
24 | 1.6810991725372064e+47,
25 | 1.1485760824967742e+51,
26 | 8.1186456852298384e+54,
27 | 5.8811422826709824e+58,
28 | 4.3371573032664479e+62,
29 | 3.2410617072895531e+66,
30 | 2.4461380713938232e+70,
31 | 1.8602581958318741e+74
32 | };
33 | private final double[] logSums = {
34 | 81109777.0,
35 | 39954311.084389985,
36 | 975086507.65943003,
37 | 405866981.35082442,
38 | 22518788911.620308,
39 | -22190675853.522003,
40 | 840220861538.83716,
41 | -3008231730689.7461,
42 | 47081672847213.734,
43 | -301156754002264.44,
44 | 3606454337590192.0,
45 | -30859127373541904.0,
46 | 3.4368887948251686e+17,
47 | -3.4172450245762365e+18,
48 | 3.8032805624376271e+19,
49 | -4.1088294003414683e+20,
50 | 4.6743717140718132e+21,
51 | -5.3111237684754928e+22,
52 | 6.1969914256246635e+23,
53 | -7.2792735817422283e+24
54 | };
55 |
56 | @Override
57 | public double[] getPowerSums() {
58 | return powerSums;
59 | }
60 |
61 | @Override
62 | public double getMin() {
63 | return ranges[0];
64 | }
65 |
66 | @Override
67 | public double getMax() {
68 | return ranges[1];
69 | }
70 |
71 | @Override
72 | public double[] getLogSums() {
73 | return logSums;
74 | }
75 |
76 | @Override
77 | public double getLogMin() {
78 | return ranges[2];
79 | }
80 |
81 | @Override
82 | public double getLogMax() {
83 | return ranges[3];
84 | }
85 | }
86 |
87 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/data/MomentData.java:
--------------------------------------------------------------------------------
1 | package msolver.data;
2 |
3 | import java.util.Arrays;
4 |
5 | public abstract class MomentData {
6 | abstract public double[] getPowerSums();
7 | abstract public double getMin();
8 | abstract public double getMax();
9 |
10 | public double[] getLogSums() {
11 | double[] results = new double[1];
12 | results[0] = 1.0;
13 | return results;
14 | }
15 | public double getLogMin() {
16 | return 0.0;
17 | }
18 | public double getLogMax() {
19 | return 0.0;
20 | }
21 |
22 | public double[] getPowerSums(int k) {
23 | return Arrays.copyOf(getPowerSums(), k);
24 | }
25 | public double[] getLogSums(int k) {
26 | return Arrays.copyOf(getLogSums(), k);
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/data/OccupancyData.java:
--------------------------------------------------------------------------------
1 | package msolver.data;
2 |
3 | public class OccupancyData extends MomentData {
4 | private final double min = 412.75;
5 | private final double max = 2076.5;
6 | private final double logMin = 6.0228420828002376;
7 | private final double logMax = 7.6384390630708081;
8 | private final double[] powerSums = {
9 | 20560.0,
10 | 14197775.359523809,
11 | 11795382081.900866,
12 | 11920150330935.938,
13 | 14243310876969824.0,
14 | 1.9248869180998238e+19,
15 | 2.8335762132634282e+22,
16 | 4.431640701816542e+25,
17 | 7.2509584910158713e+28,
18 | 1.2290081330972746e+32,
19 | 2.1433360706825834e+35,
20 | 3.8263457725342386e+38,
21 | 6.9641284233810108e+41,
22 | 1.287891117361348e+45,
23 | 2.4132657512596994e+48,
24 | 4.5712141086232246e+51,
25 | 8.7361384845196883e+54,
26 | 1.6818212554569329e+58,
27 | 3.2572457284172447e+61,
28 | 6.3398052560875453e+64
29 | };
30 | private final double[] logSums = {
31 | 20560.0,
32 | 132778.81355561133,
33 | 860423.75561972987,
34 | 5595528.9043199299,
35 | 36524059.16578535,
36 | 239323723.78677931,
37 | 1574401576.9855776,
38 | 10399585507.478024,
39 | 68980678228.532593,
40 | 459495821550.01648,
41 | 3073979747643.9238,
42 | 20653745268445.156,
43 | 139372854449999.69,
44 | 944566287701071.0,
45 | 6429026416774866.0,
46 | 43943128435886808.0,
47 | 3.0160302130365139e+17,
48 | 2.0784407797638454e+18,
49 | 1.4379655766584013e+19,
50 | 9.9865203720404238e+19
51 | };
52 |
53 | @Override
54 | public double[] getPowerSums() {
55 | return powerSums;
56 | }
57 |
58 | @Override
59 | public double getMin() {
60 | return min;
61 | }
62 |
63 | @Override
64 | public double getMax() {
65 | return max;
66 | }
67 |
68 | @Override
69 | public double[] getLogSums() {
70 | return logSums;
71 | }
72 |
73 | @Override
74 | public double getLogMin() {
75 | return logMin;
76 | }
77 |
78 | @Override
79 | public double getLogMax() {
80 | return logMax;
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/data/RetailData.java:
--------------------------------------------------------------------------------
1 | package msolver.data;
2 |
3 | public class RetailData {
4 | public static double[] moments = {
5 | 1.0,
6 | -0.99803244599458163,
7 | 0.99242770820196713,
8 | -0.98346992011358814,
9 | 0.97167161929459778,
10 | -0.95732586721724033,
11 | 0.94080698235117055,
12 | -0.92238731864676526,
13 | 0.90236473106015347,
14 | -0.88094882137844899,
15 | 0.85841937756560194,
16 | -0.83491133917007543,
17 | 0.81068359928271649,
18 | -0.78582604637928322,
19 | 0.76061221132889334,
20 | -0.73507756422695447,
21 | 0.70940149326508817,
22 | -0.68358226554192769,
23 | 0.65781293088596959,
24 | -0.63212643646674627,
25 | 0.60669960200873319,
26 | -0.58153118648992819
27 | };
28 |
29 | public static double[] powerSums = {
30 | 541909.0,
31 | 5872552.0,
32 | 2028897322.0,
33 | 10327002658180.0,
34 | 87690166564334848.0,
35 | 8.2085567205202107e+20,
36 | 7.9010897350141134e+24,
37 | 7.685072127365144e+28,
38 | 7.510941100173536e+32,
39 | 7.3600614962632969e+36,
40 | 7.2243545948138156e+40
41 | };
42 |
43 | public static double min = 1;
44 | public static double max = 10000;
45 | }
46 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/data/RetailQuantityData.java:
--------------------------------------------------------------------------------
1 | package msolver.data;
2 |
3 | public class RetailQuantityData extends MomentData{
4 | public static final double[] powerSums = {
5 | 531285.0,
6 | 5660981.0,
7 | 13127647799.0,
8 | 943385744203541.0,
9 | 7.3401290527335825e+19,
10 | 5.7374634895753686e+24,
11 | 4.4941878460622702e+29,
12 | 3.5267533869172936e+34,
13 | 2.7724146420399472e+39,
14 | 2.1831198887574202e+44,
15 | 1.7219100191391005e+49,
16 | 1.3602936186335558e+54,
17 | 1.0762640535486096e+59,
18 | 8.5279096877163735e+63,
19 | 6.7666981261637846e+68,
20 | 5.3764281895692519e+73,
21 | 4.2772622266819037e+78,
22 | 3.4069441013233427e+83,
23 | 2.7168368273312946e+88,
24 | 2.1688733753934956e+93
25 | };
26 |
27 | public static final double[] logSums = {
28 | 531285.0,
29 | 733706.08385088702,
30 | 1803377.9327264477,
31 | 5313341.6192785092,
32 | 18079041.058607381,
33 | 69790609.377532199,
34 | 302287816.89519858,
35 | 1456494992.1214058,
36 | 7765406800.8637161,
37 | 45788298431.40226,
38 | 299590569873.87885,
39 | 2181621707739.7278,
40 | 17627303232908.797,
41 | 156164602838914.75,
42 | 1489418691939729.2,
43 | 14997093766426690.0,
44 | 1.5677925320389693e+17,
45 | 1.6804693592838444e+18,
46 | 1.831003035492703e+19,
47 | 2.0164791544383e+20
48 | };
49 |
50 | @Override
51 | public double[] getPowerSums() {
52 | return powerSums;
53 | }
54 |
55 | @Override
56 | public double[] getLogSums() {
57 | return logSums;
58 | }
59 |
60 | @Override
61 | public double getMin() {
62 | return 1.0;
63 | }
64 |
65 | @Override
66 | public double getMax() {
67 | return 80995.0;
68 | }
69 |
70 | @Override
71 | public double getLogMin() {
72 | return 0.0;
73 | }
74 |
75 | @Override
76 | public double getLogMax() {
77 | return Math.log(80995.0);
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/data/RetailQuantityLogData.java:
--------------------------------------------------------------------------------
1 | package msolver.data;
2 |
3 | public class RetailQuantityLogData extends MomentData {
4 | public static final double[] powerSums = {
5 | 531285.0,
6 | 733706.08385088702,
7 | 1803377.9327264477,
8 | 5313341.6192785092,
9 | 18079041.058607381,
10 | 69790609.377532199,
11 | 302287816.89519858,
12 | 1456494992.1214058,
13 | 7765406800.8637161,
14 | 45788298431.40226,
15 | 299590569873.87885,
16 | 2181621707739.7278,
17 | 17627303232908.797,
18 | 156164602838914.75,
19 | 1489418691939729.2
20 | };
21 |
22 | @Override
23 | public double[] getPowerSums() {
24 | return powerSums;
25 | }
26 |
27 | @Override
28 | public double getMin() {
29 | return 0;
30 | }
31 |
32 | @Override
33 | public double getMax() {
34 | return 11.302142703354239;
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/data/ShuttleData.java:
--------------------------------------------------------------------------------
1 | package msolver.data;
2 |
3 | public class ShuttleData {
4 | public static double[] moments = {
5 | 1.0,
6 | -0.57071217926390339,
7 | -0.22603817813570512,
8 | 0.68115364159641145,
9 | -0.60778341324143959,
10 | 0.17844626060954086,
11 | 0.2201808190269598,
12 | -0.29730967690787835,
13 | 0.14621881025862984,
14 | -0.011587023082648803,
15 | 0.062701300094490348,
16 | -0.19322750394339511,
17 | 0.075545786808184023,
18 | 0.22135866677749558,
19 | -0.31192502106061021,
20 | 0.16682708244373193,
21 | 0.023534352707310534,
22 | -0.15061754686789203,
23 | 0.12689914341612776,
24 | -0.02445402425925142,
25 | 0.026327570711651923,
26 | -0.13152321634595951
27 | };
28 |
29 | public static double[] powerSums = {
30 | 43500.0,
31 | 2098864.0,
32 | 107800016.0,
33 | 6005831428.0,
34 | 369924186368.0,
35 | 25475420299084.0,
36 | 1954803892405976.0,
37 | 1.6430830896853082e+17,
38 | 1.4790926986227182e+19,
39 | 1.3971091486866444e+21,
40 | 1.3638448459556252e+23
41 | };
42 |
43 | public static double min = 27;
44 | public static double max = 126;
45 | }
46 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/data/UniformData.java:
--------------------------------------------------------------------------------
1 | package msolver.data;
2 |
3 | public class UniformData extends MomentData{
4 | public static final int N = 1000000;
5 | public static final int k = 100;
6 |
7 | @Override
8 | public double[] getPowerSums() {
9 | double[] powerSums = new double[k];
10 | for (int i = 0; i < k; i++) {
11 | powerSums[i] = N*1.0 / (i+1);
12 | }
13 | return powerSums;
14 | }
15 |
16 | @Override
17 | public double getMin() {
18 | return 0.0;
19 | }
20 |
21 | @Override
22 | public double getMax() {
23 | return 1.0;
24 | }
25 | }
26 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/optimizer/FunctionWithHessian.java:
--------------------------------------------------------------------------------
1 | package msolver.optimizer;
2 |
3 | /**
4 | * Describes a function which can be optimized using Newton's method.
5 | */
6 | public interface FunctionWithHessian {
7 | void computeOnlyValue(double[] point, double tol);
8 | void computeAll(double[] point, double tol);
9 | int dim();
10 | double getValue();
11 | double[] getGradient();
12 | // Returns in row-major order
13 | double[][] getHessian();
14 | }
15 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/optimizer/GenericOptimizer.java:
--------------------------------------------------------------------------------
1 | package msolver.optimizer;
2 |
3 | public interface GenericOptimizer {
4 | void setVerbose(boolean flag);
5 |
6 | void setMaxIter(int maxIter);
7 |
8 | boolean isConverged();
9 |
10 | int getStepCount();
11 |
12 | FunctionWithHessian getP();
13 |
14 | double[] solve(double[] start, double gradTol);
15 | }
16 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/optimizer/QuadraticPotential.java:
--------------------------------------------------------------------------------
1 | package msolver.optimizer;
2 |
3 | /**
4 | * Simple quadratic function for use in tests.
5 | */
6 | public class QuadraticPotential implements FunctionWithHessian {
7 | private int k;
8 | private double Pval;
9 | private double[] Pgrad;
10 | private double[][] Phess;
11 |
12 | public QuadraticPotential(int k) {
13 | this.k = k;
14 | Pgrad = new double[k];
15 | Phess = new double[k][k];
16 | }
17 |
18 | @Override
19 | public void computeOnlyValue(double[] point, double tol) {
20 | double sum = 0;
21 | for (int i = 0; i < point.length; i++) {
22 | sum += point[i] * point[i];
23 | }
24 | Pval = sum;
25 | }
26 |
27 | @Override
28 | public void computeAll(double[] point, double tol) {
29 | double sum = 0;
30 | for (int i = 0; i < point.length; i++) {
31 | sum += point[i] * point[i];
32 | }
33 | Pval = sum;
34 |
35 | for (int i = 0; i < point.length; i++) {
36 | Pgrad[i] = 2*point[i];
37 | for (int j = 0; j < point.length; j++) {
38 | if (j == i) {
39 | Phess[i][j] = 2;
40 | } else {
41 | Phess[i][j] = 0.0;
42 | }
43 | }
44 | }
45 | }
46 |
47 | @Override
48 | public int dim() {
49 | return this.k;
50 | }
51 |
52 | @Override
53 | public double getValue() {
54 | return Pval;
55 | }
56 |
57 | @Override
58 | public double[] getGradient() {
59 | return Pgrad;
60 | }
61 |
62 | @Override
63 | public double[][] getHessian() {
64 | return Phess;
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/struct/MomentStruct.java:
--------------------------------------------------------------------------------
1 | package msolver.struct;
2 |
3 | public class MomentStruct {
4 | public double min, max, logMin, logMax;
5 | public double[] powerSums;
6 | public double[] logSums;
7 | }
8 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/thresholds/MarkovThreshold.java:
--------------------------------------------------------------------------------
1 | package msolver.thresholds;
2 |
3 | import msolver.MathUtil;
4 | import msolver.struct.MomentStruct;
5 |
6 | public class MarkovThreshold implements MomentThreshold {
7 | private MomentStruct ms;
8 |
9 | public MarkovThreshold(
10 | MomentStruct ms
11 | ) {
12 | this.ms = ms;
13 | }
14 |
15 | @Override
16 | public double[] bound(double cutoff) {
17 | double[] outlierRateBounds = new double[2];
18 | outlierRateBounds[0] = 0.0;
19 | outlierRateBounds[1] = 1.0;
20 |
21 | int ka = ms.powerSums.length;
22 | int kb = ms.logSums.length;
23 | double n = ms.powerSums[0];
24 | if (ka > 1) {
25 | double[] xMinusMinMoments = MathUtil.shiftPowerSum(ms.powerSums, 1, ms.min);
26 | double[] maxMinusXMoments = MathUtil.shiftPowerSum(ms.powerSums, -1, ms.max);
27 | for (int i = 1; i < ka; i++) {
28 | double outlierRateUpperBound = (xMinusMinMoments[i] / n) / Math.pow(cutoff - ms.min, i);
29 | double outlierRateLowerBound = 1.0 - (maxMinusXMoments[i] / n) / Math.pow(ms.max - cutoff, i);
30 | outlierRateBounds[0] = Math.max(outlierRateBounds[0], outlierRateLowerBound);
31 | outlierRateBounds[1] = Math.min(outlierRateBounds[1], outlierRateUpperBound);
32 | }
33 | }
34 |
35 | double nl = ms.logSums[0];
36 | if (kb > 1 && nl != 0) {
37 | double logCutoff = Math.log(cutoff);
38 | double fracIncluded = nl / n;
39 | double[] xMinusMinLogMoments = MathUtil.shiftPowerSum(ms.logSums, 1, ms.logMin);
40 | double[] maxMinusXLogMoments = MathUtil.shiftPowerSum(ms.logSums, -1, ms.logMax);
41 | for (int i = 1; i < kb; i++) {
42 | double outlierRateUpperBound = (
43 | (1.0 - fracIncluded) +
44 | fracIncluded * (xMinusMinLogMoments[i] / nl) / Math.pow(logCutoff - ms.logMin, i)
45 | );
46 | double outlierRateLowerBound = (
47 | 1.0 -
48 | fracIncluded * (maxMinusXLogMoments[i] / nl) / Math.pow(ms.logMax - logCutoff, i)
49 | );
50 | outlierRateBounds[0] = Math.max(outlierRateBounds[0], outlierRateLowerBound);
51 | outlierRateBounds[1] = Math.min(outlierRateBounds[1], outlierRateUpperBound);
52 | }
53 | }
54 |
55 | return outlierRateBounds;
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/thresholds/MomentThreshold.java:
--------------------------------------------------------------------------------
1 | package msolver.thresholds;
2 |
3 | public interface MomentThreshold {
4 | double[] bound(double cutoff);
5 | }
6 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/thresholds/RTTThreshold.java:
--------------------------------------------------------------------------------
1 | package msolver.thresholds;
2 |
3 | import msolver.MathUtil;
4 | import msolver.SimpleBoundSolver;
5 | import msolver.struct.MomentStruct;
6 |
7 | public class RTTThreshold implements MomentThreshold {
8 | private MomentStruct ms;
9 |
10 | public RTTThreshold(MomentStruct ms) {
11 | this.ms = ms;
12 | }
13 |
14 | public double[] bound(double x) {
15 | double[] xs = new double[]{x};
16 | double[] gttBounds = new double[]{0.0, 1.0};
17 | double[] moments;
18 | SimpleBoundSolver boundSolver;
19 | double[] boundSizes;
20 |
21 | int ka = ms.powerSums.length;
22 | int kb = ms.logSums.length;
23 |
24 | // Standard basis
25 | moments = MathUtil.powerSumsToMoments(ms.powerSums);
26 | boundSolver = new SimpleBoundSolver(ka);
27 | try {
28 | boundSizes = boundSolver.solveBounds(moments, xs);
29 | double[] standardBounds = boundSolver.getBoundEndpoints(moments, x, boundSizes[0]);
30 | if (1.0 - standardBounds[1] > gttBounds[0]) {
31 | gttBounds[0] = 1.0 - standardBounds[1];
32 | }
33 | if (1.0 - standardBounds[0] < gttBounds[1]) {
34 | gttBounds[1] = 1.0 - standardBounds[0];
35 | }
36 | } catch (Exception e) {}
37 |
38 | // Log basis
39 | double[] logXs = new double[]{Math.log(x)};
40 | moments = MathUtil.powerSumsToMoments(ms.logSums);
41 | try {
42 | boundSolver = new SimpleBoundSolver(kb);
43 | boundSizes = boundSolver.solveBounds(moments, logXs);
44 | double[] logBounds = boundSolver.getBoundEndpoints(moments, Math.log(x), boundSizes[0]);
45 | if (1.0 - logBounds[1] > gttBounds[0]) {
46 | gttBounds[0] = 1.0 - logBounds[1];
47 | }
48 | if (1.0 - logBounds[0] < gttBounds[1]) {
49 | gttBounds[1] = 1.0 - logBounds[0];
50 | }
51 | } catch (Exception e) {}
52 |
53 | return gttBounds;
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/main/java/msolver/thresholds/ThresholdCascade.java:
--------------------------------------------------------------------------------
1 | package msolver.thresholds;
2 |
3 | import msolver.ChebyshevMomentSolver2;
4 | import msolver.struct.MomentStruct;
5 |
6 | public class ThresholdCascade {
7 | private MomentStruct ms;
8 | private MomentThreshold[] cascade;
9 | private ChebyshevMomentSolver2 solver;
10 |
11 | public ThresholdCascade(MomentStruct ms) {
12 | this.ms = ms;
13 | this.cascade = new MomentThreshold[2];
14 | this.cascade[0] = new MarkovThreshold(ms);
15 | this.cascade[1] = new RTTThreshold(ms);
16 | }
17 |
18 | // Are there phi fraction above x, aka is CDF(x) < 1 - phi?
19 | public boolean threshold(double x, double phi) {
20 | int ka = ms.powerSums.length;
21 | if (ka > 0) {
22 | if (ms.min == ms.max) {
23 | return x > ms.min;
24 | }
25 | } else {
26 | if (ms.logMin == ms.logMax) {
27 | return x > Math.exp(ms.logMin);
28 | }
29 | }
30 |
31 | if (x < ms.min) {
32 | return true;
33 | }
34 | if (x > ms.max) {
35 | return false;
36 | }
37 |
38 | for (int i = 0; i < cascade.length; i++) {
39 | MomentThreshold mt = cascade[i];
40 | double[] bounds = mt.bound(x);
41 | if (bounds[0] > phi) {
42 | return true;
43 | }
44 | if (bounds[1] < phi) {
45 | return false;
46 | }
47 | }
48 |
49 | solver = ChebyshevMomentSolver2.fromPowerSums(
50 | ms.min, ms.max,
51 | ms.powerSums,
52 | ms.logMin, ms.logMax,
53 | ms.logSums
54 | );
55 | solver.solve(1e-9);
56 | double cdfValue = solver.estimateCDF(x);
57 | if (cdfValue < 1 - phi) {
58 | return true;
59 | } else {
60 | return false;
61 | }
62 | }
63 |
64 | public ChebyshevMomentSolver2 getSolver() {
65 | return solver;
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/ChebyshevMomentSolverTest.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import msolver.data.RetailData;
4 | import msolver.data.ShuttleData;
5 | import msolver.optimizer.NewtonOptimizer;
6 | import org.apache.commons.math3.util.FastMath;
7 | import org.junit.Test;
8 |
9 | import static org.junit.Assert.*;
10 |
11 | public class ChebyshevMomentSolverTest {
12 | @Test
13 | public void testUniform() {
14 | double m_values[] = {1.0, 0, -1.0/3, 0, -1.0/15, 0, -1.0/35};
15 | double tol = 1e-10;
16 | ChebyshevMomentSolver solver = new ChebyshevMomentSolver(m_values);
17 | solver.solve(tol);
18 |
19 | double[] coeffs = solver.getLambdas();
20 |
21 | assertEquals(FastMath.log(2), coeffs[0], 1e-10);
22 | for (int i = 1; i < coeffs.length; i++) {
23 | assertEquals(0.0, coeffs[i], 1e-10);
24 | }
25 | NewtonOptimizer opt = solver.getOptimizer();
26 | assertTrue(opt.getStepCount() < 20);
27 | assertEquals(0, opt.getDampedStepCount());
28 | }
29 |
30 | @Test
31 | public void testShuttle() {
32 | int k = 11;
33 | double[] m_values = new double[k];
34 | for (int i = 0; i < k; i++) {
35 | m_values[i] = ShuttleData.moments[i];
36 | }
37 | double tol = 1e-10;
38 | ChebyshevMomentSolver solver = new ChebyshevMomentSolver(m_values);
39 | solver.solve(tol);
40 |
41 | double[] coeffs = solver.getLambdas();
42 | MaxEntFunction f = new MaxEntFunction(coeffs);
43 | double[] f_mus = f.moments(k, tol);
44 | for (int i = 0; i < k; i++) {
45 | assertEquals(m_values[i], f_mus[i], 10*tol);
46 | }
47 | NewtonOptimizer opt = solver.getOptimizer();
48 | assertTrue(opt.getStepCount() < 20);
49 |
50 | double[] ps = {.5};
51 | double[] expectedQs = {-0.602};
52 | assertArrayEquals(
53 | expectedQs,
54 | solver.estimateQuantiles(ps, -1, 1),
55 | 1e-3
56 | );
57 |
58 | expectedQs[0] = 45.0;
59 | assertArrayEquals(
60 | expectedQs,
61 | solver.estimateQuantiles(ps, 27, 126),
62 | 5.0
63 | );
64 | }
65 |
66 | @Test
67 | public void testRetail() {
68 | int k = 11;
69 | double[] m_values = new double[k];
70 | for (int i = 0; i < k; i++) {
71 | m_values[i] = RetailData.moments[i];
72 | }
73 | double tol = 1e-10;
74 | ChebyshevMomentSolver solver = new ChebyshevMomentSolver(m_values);
75 | solver.solve(tol);
76 |
77 | double[] coeffs = solver.getLambdas();
78 | MaxEntFunction f = new MaxEntFunction(coeffs);
79 | double[] f_mus = f.moments(k, tol);
80 | for (int i = 0; i < k; i++) {
81 | assertEquals(m_values[i], f_mus[i], 10*tol);
82 | }
83 |
84 | NewtonOptimizer opt = solver.getOptimizer();
85 | assertTrue(opt.getStepCount() < 100);
86 | assertTrue(opt.getDampedStepCount() > 0);
87 | }
88 |
89 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/ChebyshevPolynomialTest.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import msolver.chebyshev.ChebyshevPolynomial;
4 | import msolver.chebyshev.QuadraticCosFunction;
5 | import org.junit.Test;
6 |
7 | import static org.junit.Assert.assertEquals;
8 |
9 | public class ChebyshevPolynomialTest {
10 | @Test
11 | public void testSimple() {
12 | double[] coeff = {1.0, 2.0, 3.0};
13 | ChebyshevPolynomial cp = new ChebyshevPolynomial(coeff);
14 | assertEquals(2.34, cp.value(.7), 1e-10);
15 | assertEquals(cp.value(.7), cp.value2(.7), 1e-10);
16 |
17 | ChebyshevPolynomial cb = ChebyshevPolynomial.basis(2);
18 | assertEquals(-0.02, cb.value(.7), 1e-10);
19 |
20 | double[] coeff2 = {2.0, 1.0, 3.0};
21 | cp = new ChebyshevPolynomial(coeff2);
22 | assertEquals(2.0, cp.integrate(), 1e-10);
23 |
24 | assertEquals(1.848, cp.multiplyByBasis(1).value(.7), 1e-10);
25 | }
26 |
27 | @Test
28 | public void testFitMulti() {
29 | QuadraticCosFunction multiFunction = new QuadraticCosFunction(3);
30 | ChebyshevPolynomial[] cfit = ChebyshevPolynomial.fitMulti(multiFunction, 1e-10);
31 | for (int i = 0; i < cfit.length; i++) {
32 | assertEquals((i+1)*.25, cfit[i].value(.5), 1e-10);
33 | }
34 | }
35 |
36 | @Test
37 | public void testFit() {
38 | double[] coeff = {1.0, 2.0, 3.0};
39 | ChebyshevPolynomial cp = new ChebyshevPolynomial(coeff);
40 | ChebyshevPolynomial cfit = ChebyshevPolynomial.fit(cp, 1e-10);
41 |
42 | for (int i = 0; i < coeff.length; i++) {
43 | assertEquals(coeff[i], cfit.coeffs()[i], 1e-10);
44 | }
45 | }
46 |
47 | @Test
48 | public void testIntegrate() {
49 | double[] coeff = {2.0, 1.0, 3.0};
50 | ChebyshevPolynomial cp = new ChebyshevPolynomial(coeff);
51 | assertEquals(cp.integrate(), cp.integralPoly().value(1), 1e-10);
52 | }
53 |
54 | @Test
55 | public void testMultiply() {
56 | double[] c1 = {1.0, 2.0, 3.0, 4.0};
57 | double[] c2 = {.5, .6, .7, .8, .9};
58 | ChebyshevPolynomial cp1 = new ChebyshevPolynomial(c1);
59 | ChebyshevPolynomial cp2 = new ChebyshevPolynomial(c2);
60 |
61 | ChebyshevPolynomial product = cp1.multiply(cp2);
62 | assertEquals(
63 | cp1.value(.5)*cp2.value(.5),
64 | product.value(.5),
65 | 1e-10
66 | );
67 |
68 | product = cp1.multiply(cp1);
69 | assertEquals(
70 | cp1.value(.7)*cp1.value(.7),
71 | product.value(.7),
72 | 1e-10
73 | );
74 | }
75 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/MathUtilTest.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import msolver.data.HepData;
4 | import msolver.data.MomentData;
5 | import msolver.data.OccupancyData;
6 | import org.junit.Test;
7 |
8 | import java.util.Arrays;
9 |
10 | import static org.junit.Assert.assertArrayEquals;
11 | import static org.junit.Assert.assertEquals;
12 |
13 | public class MathUtilTest {
14 | @Test
15 | public void testBinomial() {
16 | long[][] binoms = MathUtil.getBinomials(5);
17 | assertEquals(binoms[5][2], 10L);
18 | }
19 |
20 | @Test
21 | public void testChebyCoefficient() {
22 | int[][] cCoeffs = MathUtil.getChebyCoefficients(5);
23 | int[] expected = {0, -3, 0, 4, 0, 0};
24 | assertArrayEquals(expected, cCoeffs[3]);
25 | }
26 |
27 | @Test
28 | public void testConvertMoments() {
29 | // integers from 0...1000
30 | double[] uniformPowerSums = {1001,500500,333833500,250500250000L};
31 | double[] convertedChebyshevMoments = MathUtil.powerSumsToChebyMoments(0, 1000, uniformPowerSums);
32 |
33 | double[] expectedChebyshevMoments = {1.0, 0, -.332, 0};
34 | assertArrayEquals(expectedChebyshevMoments, convertedChebyshevMoments, 1e-14);
35 | }
36 |
37 | @Test
38 | public void testChebyAccuracy() {
39 | MomentData data = new OccupancyData();
40 | double[] chebys = MathUtil.powerSumsToChebyMoments(
41 | data.getMin(), data.getMax(),
42 | data.getPowerSums(40)
43 | );
44 | System.out.println(Arrays.toString(chebys));
45 | // chebys = MathUtil.powerSumsToChebyMoments(
46 | // data.getLogMin(), data.getLogMax(),
47 | // data.getLogSums(20)
48 | // );
49 | data = new HepData();
50 | chebys = MathUtil.powerSumsToChebyMoments(
51 | data.getMin(), data.getMax(),
52 | data.getPowerSums(40)
53 | );
54 | System.out.println(Arrays.toString(chebys));
55 | }
56 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/MaxEntFunction2Test.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import org.junit.Test;
4 |
5 | import static org.junit.Assert.assertEquals;
6 |
7 | public class MaxEntFunction2Test {
8 | @Test
9 | public void testSimple() {
10 | double[] aCoeffs = {0, -1};
11 | double[] bCoeffs = {0, 1};
12 | MaxEntFunction2 f = new MaxEntFunction2(
13 | true,
14 | aCoeffs,
15 | bCoeffs,
16 | 5.05,
17 | 4.95,
18 | 2.220446049250313e-16,
19 | 2.302585092994046
20 | );
21 | assertEquals(3.73002156214, f.zerothMoment(1e-8), 1e-8);
22 |
23 | double[][] pairwiseMoments = f.getPairwiseMoments(1e-8);
24 | assertEquals(3.73002156214, pairwiseMoments[0][0], 1e-8);
25 | assertEquals(0.4078218803, pairwiseMoments[1][3], 1e-8);
26 | assertEquals(1.12095675177, pairwiseMoments[1][1], 1e-8);
27 |
28 | double[][] hess = f.getHessian(1e-7);
29 | assertEquals(3.73002156214, hess[0][0], 1e-8);
30 | assertEquals(0.4078218803, hess[1][2], 1e-8);
31 | assertEquals(1.12095675177, hess[1][1], 1e-8);
32 | }
33 |
34 | @Test
35 | public void testCompareNumpy(){
36 | double[] aCoeffs = {-1495.2106196044201, 63797.93868346012, -830014.2179376424, -296736.79198347515, -56032.24104079366, -6481.702314031079, -394.9593472527941};
37 | double[] bCoeffs = {0, 280793.4251573418, 887242.8040778289, -41598.46235869913, 725.9088731130822, 223.10996965225195, -35.8575946806040};
38 | MaxEntFunction2 f = new MaxEntFunction2(
39 | false,
40 | aCoeffs,
41 | bCoeffs,
42 | 6.830640572935523, 0.8077984901352853, 1244.625000, 831.875000
43 | );
44 | assertEquals(1.23, f.value(-.8), 0.01);
45 | }
46 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/MaxEntFunctionTest.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import org.junit.Test;
4 |
5 | import static org.junit.Assert.assertEquals;
6 | import static org.junit.Assert.assertTrue;
7 |
8 | public class MaxEntFunctionTest {
9 | @Test
10 | public void testMoments() {
11 | double[] coeff = {1.0, 2.0, 3.0};
12 | MaxEntFunction f = new MaxEntFunction(coeff);
13 | double[] moments = f.moments(8, 1e-9);
14 | double[] expectedMoments = {
15 | 6.303954641290793, -1.0395877292934701,
16 | -4.9297352972133845, 2.0119170973456093,
17 | 2.458369282294647, -1.5127916121976486,
18 | -0.84272224125321182, 0.73491729283435847
19 | };
20 | for (int i = 0; i < moments.length; i++) {
21 | assertEquals(expectedMoments[i], moments[i], 1e-10);
22 | }
23 | assertTrue(f.getFuncEvals() < 1000);
24 | }
25 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/MaxEntPotential2Test.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import msolver.optimizer.GenericOptimizer;
4 | import msolver.optimizer.NewtonOptimizer;
5 | import org.junit.Test;
6 |
7 | import static org.junit.Assert.assertArrayEquals;
8 | import static org.junit.Assert.assertEquals;
9 |
10 | public class MaxEntPotential2Test {
11 | @Test
12 | public void testSimple() {
13 | double[] lambdas = {0, -1, 1};
14 | double[] d_mus = {3.730021562141137, -0.45542618913430216, 2.0002734064304235};
15 | MaxEntPotential2 P = new MaxEntPotential2(
16 | true,
17 | 2,
18 | d_mus,
19 | 5.05,
20 | 4.95,
21 | 2.220446049250313e-16,
22 | 2.302585092994046
23 | );
24 | P.computeAll(lambdas, 1e-8);
25 | assertEquals(0.40782188035828565, P.getHessian()[1][2], 1e-8);
26 | assertEquals(0, P.getGradient()[1], 1e-8);
27 |
28 | double[] l0 = {0, 0, 0};
29 | GenericOptimizer optimizer = new NewtonOptimizer(P);
30 | l0 = optimizer.solve(l0, 1e-6);
31 | assertArrayEquals(lambdas, l0, 1e-6);
32 | }
33 |
34 | @Test
35 | public void testSimpleExp() {
36 | double[] lambdas = {0, 1, -1};
37 | double[] d_mus = {3.730021562141138, 0.2702518442892859, -2.1854477512754396};
38 | MaxEntPotential2 P = new MaxEntPotential2(
39 | false,
40 | 2,
41 | d_mus,
42 | 2.220446049250313e-16,
43 | 2.302585092994046,
44 | 5.05,
45 | 4.95
46 | );
47 | P.computeAll(lambdas, 1e-8);
48 | double[] l0 = {0, 0, 0};
49 | GenericOptimizer optimizer = new NewtonOptimizer(P);
50 | l0 = optimizer.solve(l0, 1e-6);
51 | assertArrayEquals(lambdas, l0, 1e-6);
52 | }
53 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/MaxEntPotentialTest.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import org.junit.Test;
4 |
5 | import static org.junit.Assert.assertEquals;
6 |
7 | public class MaxEntPotentialTest {
8 | @Test
9 | public void testTrivial() {
10 | double m_values[] = {1.0, 0, -1.0/3, 0, -1.0/15, 0, -1.0/35};
11 | double l_values[] = {0.0, 0, 0, 0, 0, 0, 0};
12 | double tol = 1e-10;
13 | MaxEntPotential P = new MaxEntPotential(m_values);
14 | P.computeAll(l_values, tol);
15 | assertEquals(m_values.length, P.getGradient().length);
16 | }
17 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/MnatSolverTest.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import org.junit.Test;
4 |
5 | import java.util.Arrays;
6 |
7 | import static org.junit.Assert.assertArrayEquals;
8 |
9 | public class MnatSolverTest {
10 | @Test
11 | public void testUniform() {
12 | double[] m_values = {1.0, 1.0/2, 1.0/3, 1.0/4, 1.0/5, 1.0/6, 1.0/7};
13 |
14 | double[] cdf = MnatSolver.estimateCDF(m_values);
15 | double[] qs = MnatSolver.estimateQuantiles(0, 1, m_values, Arrays.asList(.2, .5, .8));
16 | double[] expectedQs = {.2, .5, .8};
17 | assertArrayEquals(expectedQs, qs, .1);
18 | }
19 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/SolveBasisSelectorTest.java:
--------------------------------------------------------------------------------
1 | package msolver;
2 |
3 | import org.junit.Test;
4 |
5 | import static org.junit.Assert.assertEquals;
6 |
7 | public class SolveBasisSelectorTest {
8 | @Test
9 | public void testMilan() {
10 | double[] linscales = {-1.9949008094893061,10.974098897900475,3968.1326911078277,3968.13268877633};
11 | SolveBasisSelector sel = new SolveBasisSelector();
12 | sel.select(
13 | false, new double[7], new double[7],
14 | linscales[0], linscales[1], linscales[2], linscales[3]
15 | );
16 | assertEquals(2, sel.getKb());
17 | assertEquals(7, sel.getKa());
18 | }
19 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/optimizer/BFGSOptimizerTest.java:
--------------------------------------------------------------------------------
1 | package msolver.optimizer;
2 |
3 | import org.junit.Test;
4 |
5 | import java.util.Arrays;
6 |
7 | import static org.junit.Assert.*;
8 |
9 | public class BFGSOptimizerTest {
10 | @Test
11 | public void testQuadratic() {
12 | QuadraticPotential qp = new QuadraticPotential(2);
13 | BFGSOptimizer opt = new BFGSOptimizer(qp);
14 | opt.setVerbose(false);
15 | double[] start = {1.0, 2.0};
16 | double[] solution = opt.solve(start, 1e-10);
17 | for (int i = 0; i < start.length; i++) {
18 | assertEquals(0.0, solution[i], 1e-10);
19 | }
20 | }
21 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/optimizer/NewtonOptimizerTest.java:
--------------------------------------------------------------------------------
1 | package msolver.optimizer;
2 |
3 | import org.junit.Test;
4 |
5 | import static org.junit.Assert.assertEquals;
6 | import static org.junit.Assert.assertTrue;
7 |
8 | public class NewtonOptimizerTest {
9 | @Test
10 | public void testQuadratic() {
11 | QuadraticPotential qp = new QuadraticPotential(2);
12 | NewtonOptimizer opt = new NewtonOptimizer(qp);
13 | double[] start = {1.0, 2.0};
14 | double[] solution = opt.solve(start, 1e-10);
15 | for (int i = 0; i < start.length; i++) {
16 | assertEquals(0.0, solution[i], 1e-10);
17 | }
18 | assertEquals(1, opt.getStepCount());
19 | assertEquals(0, opt.getDampedStepCount());
20 | assertTrue(opt.isConverged());
21 | }
22 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/optimizer/QuadraticPotentialTest.java:
--------------------------------------------------------------------------------
1 | package msolver.optimizer;
2 |
3 | import org.junit.Test;
4 |
5 | import static org.junit.Assert.assertEquals;
6 |
7 | public class QuadraticPotentialTest {
8 | @Test
9 | public void testSimple() {
10 | QuadraticPotential qp = new QuadraticPotential(2);
11 | double[] x = {1.0, 2.0};
12 | qp.computeAll(x, 0.0);
13 | assertEquals(5, qp.getValue(), 1e-10);
14 |
15 | double[] xMin = {0.0, 0.0};
16 | qp.computeAll(xMin, 0.0);
17 | assertEquals(0, qp.getGradient()[1], 1e-10);
18 | }
19 |
20 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/thresholds/MarkovThresholdTest.java:
--------------------------------------------------------------------------------
1 | package msolver.thresholds;
2 |
3 | import msolver.data.ExponentialData;
4 | import msolver.data.GaussianData;
5 | import msolver.data.MomentData;
6 | import msolver.struct.MomentStruct;
7 | import org.junit.Test;
8 |
9 | import java.util.Arrays;
10 |
11 | import static org.junit.Assert.*;
12 |
13 | public class MarkovThresholdTest {
14 | @Test
15 | public void testSimple() {
16 | MomentData data = new ExponentialData();
17 | MomentStruct m = new MomentStruct();
18 | m.min = data.getMin();
19 | m.max = data.getMax();
20 | m.logMin = data.getLogMin();
21 | m.logMax = data.getLogMax();
22 | m.powerSums = data.getPowerSums(10);
23 | m.logSums = data.getLogSums(10);
24 |
25 | MarkovThreshold mt = new MarkovThreshold(m);
26 | double[] bounds;
27 | bounds = mt.bound(.1);
28 | assertTrue(bounds[0] > 0);
29 | assertTrue(bounds[0] < .9);
30 | assertTrue(bounds[1] > .9);
31 | assertTrue(bounds[1] <= 1);
32 |
33 | bounds = mt.bound(5);
34 | assertTrue(bounds[0] < 0.01);
35 | assertTrue(bounds[1] > 0.01);
36 | assertTrue(bounds[1] <= 0.5);
37 | }
38 |
39 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/thresholds/RTTThresholdTest.java:
--------------------------------------------------------------------------------
1 | package msolver.thresholds;
2 |
3 | import msolver.data.ExponentialData;
4 | import msolver.data.MomentData;
5 | import msolver.struct.MomentStruct;
6 | import org.junit.Test;
7 |
8 | import java.util.Arrays;
9 |
10 | import static org.junit.Assert.*;
11 |
12 | public class RTTThresholdTest {
13 | @Test
14 | public void testSimple() {
15 | MomentData data = new ExponentialData();
16 | MomentStruct m = new MomentStruct();
17 | m.min = data.getMin();
18 | m.max = data.getMax();
19 | m.logMin = data.getLogMin();
20 | m.logMax = data.getLogMax();
21 | m.powerSums = data.getPowerSums(10);
22 | m.logSums = data.getLogSums(10);
23 |
24 | MomentThreshold mt = new RTTThreshold(m);
25 | double[] bounds, bounds2;
26 | bounds = mt.bound(.1);
27 | assertTrue(bounds[0] > 0);
28 | assertTrue(bounds[0] < .9);
29 | assertTrue(bounds[1] > .9);
30 | assertTrue(bounds[1] <= 1);
31 |
32 | MomentThreshold markov = new MarkovThreshold(m);
33 | bounds = mt.bound(5);
34 | bounds2 = markov.bound(5);
35 | assertTrue(bounds[0] > bounds2[0]);
36 | assertTrue(bounds[1] < bounds2[1]);
37 | assertTrue(bounds[0] < 0.01);
38 | assertTrue(bounds[1] > 0.01);
39 | assertTrue(bounds[1] <= 0.5);
40 | }
41 |
42 | }
--------------------------------------------------------------------------------
/javamsketch/msolver/src/test/java/msolver/thresholds/ThresholdCascadeTest.java:
--------------------------------------------------------------------------------
1 | package msolver.thresholds;
2 |
3 | import msolver.data.ExponentialData;
4 | import msolver.data.MomentData;
5 | import msolver.struct.MomentStruct;
6 | import org.junit.Test;
7 |
8 | import static org.junit.Assert.*;
9 |
10 | public class ThresholdCascadeTest {
11 | @Test
12 | public void testSimple() {
13 | MomentData data = new ExponentialData();
14 | MomentStruct m = new MomentStruct();
15 | m.min = data.getMin();
16 | m.max = data.getMax();
17 | m.logMin = data.getLogMin();
18 | m.logMax = data.getLogMax();
19 | m.powerSums = data.getPowerSums(10);
20 | m.logSums = data.getLogSums(10);
21 |
22 | ThresholdCascade tc = new ThresholdCascade(m);
23 | boolean flag = tc.threshold(2, .01);
24 | assertTrue(flag);
25 |
26 | flag = tc.threshold(4, .01);
27 | assertTrue(flag);
28 | }
29 |
30 | }
--------------------------------------------------------------------------------
/javamsketch/outlierbench.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | java -Xmx10g -Xms10g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \
3 | OutlierBench $@
4 |
--------------------------------------------------------------------------------
/javamsketch/parallelMergeBench.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | java -Xmx50g -Xms50g -cp quantilebench/target/quantile-bench-1.0-SNAPSHOT.jar:$(cat quantilebench/cp.txt) \
3 | ParallelMergeBench $@
4 |
--------------------------------------------------------------------------------
/javamsketch/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | futuredata
8 | java-msketch
9 | pom
10 | 1.0-SNAPSHOT
11 |
12 |
13 |
14 |
15 | org.apache.maven.plugins
16 | maven-compiler-plugin
17 | 3.3
18 |
19 | 1.8
20 | 1.8
21 |
22 |
23 |
24 |
25 |
26 | msolver
27 | quantilebench
28 |
29 |
30 |
31 |
32 | junit
33 | junit
34 | 4.12
35 | test
36 |
37 |
38 |
--------------------------------------------------------------------------------
/javamsketch/quantilebench/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | java-msketch
7 | futuredata
8 | 1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | quantile-bench
13 |
14 |
15 |
16 | futuredata
17 | msolver
18 | 1.0-SNAPSHOT
19 |
20 |
21 | org.apache.commons
22 | commons-csv
23 | 1.5
24 |
25 |
26 | com.tdunning
27 | t-digest
28 | 3.2
29 |
30 |
31 | com.yahoo.datasketches
32 | sketches-core
33 | 0.10.3
34 |
35 |
36 | org.apache.spark
37 | spark-catalyst_2.11
38 | 2.2.1
39 |
40 |
41 | com.fasterxml.jackson.core
42 | jackson-databind
43 | 2.9.3
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/javamsketch/quantilebench/src/main/java/BoundRuntimeBench.java:
--------------------------------------------------------------------------------
1 | import msolver.BoundSolver;
2 | import msolver.data.ShuttleData;
3 |
4 | public class BoundRuntimeBench {
5 | private static int numIters = 1000000;
6 | private static BoundSolver boundSolver;
7 |
8 | public static void main(String[] args) throws Exception {
9 | int k = 11;
10 | double[] powerSums = new double[k];
11 | for (int i = 0; i < powerSums.length; i++) {
12 | powerSums[i] = ShuttleData.powerSums[i];
13 | }
14 |
15 | // Check bounds match
16 | boundSolver = new BoundSolver(ShuttleData.powerSums, ShuttleData.min, ShuttleData.max);
17 | double boundLindsay = boundSolver.boundSizeLindsay(45);
18 | double boundRacz = boundSolver.boundSizeRacz(45);
19 | if (Math.abs(boundLindsay - boundRacz) > 1e-4) {
20 | System.out.format("Lindsay bound and Racz bound do not match: %f %f\n", boundLindsay, boundRacz);
21 | }
22 |
23 | // Warm start
24 | for (int i = 0; i < numIters / 2; i++) {
25 | boundSolver = new BoundSolver(ShuttleData.powerSums, ShuttleData.min, ShuttleData.max);
26 | boundSolver.boundSizeLindsay(45);
27 | boundSolver.boundSizeRacz(45);
28 | }
29 |
30 | bench(1);
31 | bench(10);
32 | bench(100);
33 | }
34 |
35 | public static void bench(int queriesPerSolver) {
36 | long startTime;
37 | long elapsed;
38 |
39 | startTime = System.nanoTime();
40 | for (int i = 0; i < numIters / queriesPerSolver; i++) {
41 | boundSolver = new BoundSolver(ShuttleData.powerSums, ShuttleData.min, ShuttleData.max);
42 | for (int j = 0; j < queriesPerSolver; j++) {
43 | boundSolver.boundSizeRacz(45);
44 | }
45 | }
46 | elapsed = System.nanoTime() - startTime;
47 | double secondsPerRacz = elapsed / (1.0e9 * numIters);
48 | System.out.format("Time Per Solve @%d queries per solver (Racz): %g\n", queriesPerSolver, secondsPerRacz);
49 |
50 | startTime = System.nanoTime();
51 | for (int i = 0; i < numIters / queriesPerSolver; i++) {
52 | boundSolver = new BoundSolver(ShuttleData.powerSums, ShuttleData.min, ShuttleData.max);
53 | for (int j = 0; j < queriesPerSolver; j++) {
54 | boundSolver.boundSizeLindsay(45);
55 | }
56 | }
57 | elapsed = System.nanoTime() - startTime;
58 | double secondsPerLindsay = elapsed / (1.0e9 * numIters);
59 | System.out.format("Time Per Solve @%d queries per solver (Lindsay): %g\n", queriesPerSolver, secondsPerLindsay);
60 |
61 | System.out.println("Speedup by using Lindsay (higher is better): "+secondsPerRacz/secondsPerLindsay);
62 | System.out.println("");
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/javamsketch/quantilebench/src/main/java/RunConfig.java:
--------------------------------------------------------------------------------
1 | import com.fasterxml.jackson.core.type.TypeReference;
2 | import com.fasterxml.jackson.databind.ObjectMapper;
3 |
4 | import java.io.BufferedReader;
5 | import java.io.FileReader;
6 | import java.io.IOException;
7 | import java.util.Map;
8 |
9 | public class RunConfig {
10 | private Map values;
11 |
12 | public RunConfig(Map values) {
13 | this.values = values;
14 | }
15 |
16 | public static RunConfig fromJsonFile(String file) throws IOException {
17 | BufferedReader r = new BufferedReader(new FileReader(file));
18 | ObjectMapper mapper = new ObjectMapper();
19 | Map map = mapper.readValue(
20 | r,
21 | new TypeReference