├── MANIFEST.in ├── README.md ├── pyautoweka ├── __init__.py ├── java │ ├── autoweka.jar │ ├── params │ │ ├── attribselection │ │ │ ├── eval │ │ │ │ ├── weka.attributeSelection.CfsSubsetEval.params │ │ │ │ ├── weka.attributeSelection.CorrelationAttributeEval.params │ │ │ │ ├── weka.attributeSelection.GainRatioAttributeEval.params │ │ │ │ ├── weka.attributeSelection.InfoGainAttributeEval.params │ │ │ │ ├── weka.attributeSelection.OneRAttributeEval.params │ │ │ │ ├── weka.attributeSelection.PrincipalComponents.params │ │ │ │ ├── weka.attributeSelection.ReliefFAttributeEval.params │ │ │ │ └── weka.attributeSelection.SymmetricalUncertAttributeEval.params │ │ │ └── search │ │ │ │ ├── weka.attributeSelection.BestFirst.params │ │ │ │ ├── weka.attributeSelection.GreedyStepwise.params │ │ │ │ └── weka.attributeSelection.Ranker.params │ │ ├── base │ │ │ ├── weka.classifiers.bayes.BayesNet.params │ │ │ ├── weka.classifiers.bayes.NaiveBayes.params │ │ │ ├── weka.classifiers.bayes.NaiveBayesMultinomial.params │ │ │ ├── weka.classifiers.functions.GaussianProcesses.params │ │ │ ├── weka.classifiers.functions.LinearRegression.params │ │ │ ├── weka.classifiers.functions.Logistic.params │ │ │ ├── weka.classifiers.functions.MultilayerPerceptron.params │ │ │ ├── weka.classifiers.functions.SGD.params │ │ │ ├── weka.classifiers.functions.SMO.params │ │ │ ├── weka.classifiers.functions.SMOreg.params │ │ │ ├── weka.classifiers.functions.SimpleLinearRegression.params │ │ │ ├── weka.classifiers.functions.SimpleLogistic.params │ │ │ ├── weka.classifiers.functions.VotedPerceptron.params │ │ │ ├── weka.classifiers.lazy.IBk.params │ │ │ ├── weka.classifiers.lazy.KStar.params │ │ │ ├── weka.classifiers.rules.DecisionTable.params │ │ │ ├── weka.classifiers.rules.JRip.params │ │ │ ├── weka.classifiers.rules.M5Rules.params │ │ │ ├── weka.classifiers.rules.OneR.params │ │ │ ├── weka.classifiers.rules.PART.params │ │ │ ├── weka.classifiers.rules.ZeroR.params │ │ │ ├── weka.classifiers.trees.DecisionStump.params │ │ │ ├── weka.classifiers.trees.J48.params │ │ │ ├── weka.classifiers.trees.LMT.params │ │ │ ├── weka.classifiers.trees.M5P.params │ │ │ ├── weka.classifiers.trees.REPTree.params │ │ │ ├── weka.classifiers.trees.RandomForest.params │ │ │ └── weka.classifiers.trees.RandomTree.params │ │ ├── ensemble │ │ │ ├── weka.classifiers.meta.Stacking.params │ │ │ └── weka.classifiers.meta.Vote.params │ │ └── meta │ │ │ ├── weka.classifiers.lazy.LWL.params │ │ │ ├── weka.classifiers.meta.AdaBoostM1.params │ │ │ ├── weka.classifiers.meta.AdditiveRegression.params │ │ │ ├── weka.classifiers.meta.AttributeSelectedClassifier.params │ │ │ ├── weka.classifiers.meta.Bagging.params │ │ │ ├── weka.classifiers.meta.ClassificationViaRegression.params │ │ │ ├── weka.classifiers.meta.LogitBoost.params │ │ │ ├── weka.classifiers.meta.MultiClassClassifier.params │ │ │ ├── weka.classifiers.meta.RandomCommittee.params │ │ │ └── weka.classifiers.meta.RandomSubSpace.params │ ├── smac-v2.04.01-master-447-patched │ │ ├── DomainInter.jar │ │ ├── Jama-1.0.2.jar │ │ ├── StructureGraphic.jar │ │ ├── aclib-src.jar │ │ ├── aclib.jar │ │ ├── commons-collections-3.2.1-sources.jar │ │ ├── commons-collections-3.2.1.jar │ │ ├── commons-io-2.1.jar │ │ ├── commons-math-2.2.jar │ │ ├── commons-math3-3.0.jar │ │ ├── conf │ │ │ ├── logback-off.xml │ │ │ └── logback.xml │ │ ├── doc │ │ │ ├── faq.pdf │ │ │ ├── manual.pdf │ │ │ ├── options-ref.pdf │ │ │ ├── output.pdf │ │ │ └── quickstart.pdf │ │ ├── fastrf-src.jar │ │ ├── fastrf.jar │ │ ├── git-hashes.tex │ │ ├── git-hashes.txt │ │ ├── jcommander.jar │ │ ├── jmatharray.jar │ │ ├── logback-access-1.0.0.jar │ │ ├── logback-classic-1.0.0.jar │ │ ├── logback-core-1.0.0.jar │ │ ├── numerics4j-1.3.jar │ │ ├── opencsv-2.3.jar │ │ ├── patches │ │ │ └── ca │ │ │ │ └── ubc │ │ │ │ └── cs │ │ │ │ └── beta │ │ │ │ └── aclib │ │ │ │ ├── algorithmrun │ │ │ │ ├── CommandLineAlgorithmRun$1.class │ │ │ │ ├── CommandLineAlgorithmRun$2.class │ │ │ │ └── CommandLineAlgorithmRun.class │ │ │ │ ├── misc │ │ │ │ └── string │ │ │ │ │ └── SplitQuotedString.class │ │ │ │ └── targetalgorithmevaluator │ │ │ │ └── AbstractTargetAlgorithmEvaluator.class │ │ ├── slf4j-api-1.6.4.jar │ │ ├── smac │ │ ├── smac-possible-restores │ │ ├── smac-src.jar │ │ ├── smac-validate │ │ ├── smac.bat │ │ ├── smac.jar │ │ ├── smac.sh │ │ ├── spi-0.2.4.jar │ │ └── util │ │ │ └── bash_autocomplete.sh │ └── weka.jar └── pyautoweka.py └── setup.py /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | recursive-include pyautoweka/java *.jar 3 | recursive-include pyautoweka/java *.params 4 | recursive-include pyautoweka/java *.sh 5 | recursive-include pyautoweka/java *.class 6 | recursive-include pyautoweka/java *.xml 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pyautoweka 2 | ========== 3 | 4 | Description 5 | ----------- 6 | 7 | pyautoweka is a python wrapper for [Auto-WEKA](http://www.cs.ubc.ca/labs/beta/Projects/autoweka/), a Java application for algorithm selection and hyperparameter optimizations, that is build on [WEKA](http://www.cs.waikato.ac.nz/ml/weka/). 8 | 9 | 10 | Installation 11 | ------------ 12 | 13 | Download, go to the project sources and install: 14 | ``` 15 | git clone git@github.com:tdomhan/pyautoweka.git 16 | cd pyautoweka 17 | python setup.py install 18 | ``` 19 | 20 | Running a classification experiment 21 | ----------------------------------- 22 | 23 | AutoWeka for python. 24 | 25 | ```python 26 | import pyautoweka 27 | 28 | #Create an experiment 29 | experiment = pyautoweka.ClassificationExperiment(tuner_timeout=360) 30 | ``` 31 | `tuner_timeout` is the time the optimization will run in seconds. So e.g. 360 seconds = 6 minutes. The longer you run the optimization, the better of course. (Note that the `experiment` object has an interface similar to sklearn classifiers.) 32 | 33 | First we need to load some data. Let's for example the famous [Iris dataset](http://archive.ics.uci.edu/ml/datasets/Iris). Download it using [this link](http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data). 34 | 35 | Let's load it into python: 36 | 37 | ```python 38 | #load the data: 39 | import numpy as np 40 | import random 41 | 42 | X = np.loadtxt("iris.data", delimiter=",", usecols=range(4)) 43 | y = np.loadtxt("iris.data", delimiter=",", usecols=[4], dtype="object") 44 | 45 | #shuffle the data: 46 | indices = range(len(X)) 47 | random.shuffle(indices) 48 | X = X[indices] 49 | y = y[indices] 50 | 51 | #split into train and test set: 52 | X_train = X[0:100] 53 | y_train = y[0:100] 54 | 55 | X_test = X[100:] 56 | y_test = y[100:] 57 | 58 | #now we can fit a model: 59 | experiment.fit(X_train, y_train) 60 | 61 | #and predict the labels of the held out data: 62 | y_predict = experiment.predict(X_test) 63 | 64 | #Let's check what accuracy we get: 65 | num_correct = sum([1 for predicted, correct in zip(y_predict, y_test) if predicted == correct]) 66 | print "Accuracy: %f" % (float(num_correct) / len(y_test)) 67 | ``` 68 | 69 | This should give you an accuracy in the high 90%s. 70 | 71 | Running a regression experiment 72 | ----------------------------------- 73 | 74 | ```python 75 | import pyautoweka 76 | 77 | #Create an experiment 78 | experiment = pyautoweka.RegressionExperiment(tuner_timeout=360) 79 | ``` 80 | 81 | First we need to load some data. Let's for example the [Boston housing dataset](https://archive.ics.uci.edu/ml/datasets/Housing). Download it using [this link](https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data). 82 | 83 | ```python 84 | #load the data: 85 | import numpy as np 86 | import random 87 | 88 | X = np.loadtxt("housing.data.txt", usecols=range(13)) 89 | y = np.loadtxt("housing.data.txt", usecols=[13]) 90 | 91 | #shuffle the data: 92 | indices = range(len(X)) 93 | random.shuffle(indices) 94 | X = X[indices] 95 | y = y[indices] 96 | 97 | #split into train and test set: 98 | X_train = X[0:100] 99 | y_train = y[0:100] 100 | 101 | X_test = X[100:] 102 | y_test = y[100:] 103 | 104 | #now we can fit a model: 105 | experiment.fit(X_train, y_train) 106 | 107 | #and regress on held out test data: 108 | y_predict = experiment.predict(X_test) 109 | 110 | #RMSE of the prediction: 111 | rmse = np.sqrt(((y_predict-y_test)**2).mean()) 112 | ``` 113 | 114 | 115 | Advanced: Selecting specific classifiers 116 | ---------------------------------------- 117 | 118 | When you don't set a specific classifier all available classifiers will be tried. You have the option to limit the search to certain classifiers as follows: 119 | 120 | First of all let's see what classifiers are available: 121 | 122 | ```python 123 | import pyautoweka 124 | print pyautoweka.AVAILABLE_CLASSIFIERS 125 | ``` 126 | 127 | Now let's say we want to just use the Simple Logistic classifier: 128 | ```python 129 | experiment.add_classfier("weka.classifiers.functions.SimpleLogistic") 130 | ``` 131 | 132 | 133 | Advanced: files created 134 | ----------------------- 135 | 136 | When you create a new experiment theres a bunch of files that will be generated before and during the run of AutoWeka. For each experiment there will be a new folder within in the `experiments` folder. The folder will have the name of the experiment, if it was specified in the constructor. Each time you fit data a tempraroy arff file will be created that holds all the data in it. This file will be delete after the `fit` call. 137 | 138 | -------------------------------------------------------------------------------- /pyautoweka/__init__.py: -------------------------------------------------------------------------------- 1 | from pyautoweka import DataSet, CrossValidation, RandomSubSampling 2 | from pyautoweka import ClassificationExperiment, RegressionExperiment, AVAILABLE_CLASSIFIERS 3 | -------------------------------------------------------------------------------- /pyautoweka/java/autoweka.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/autoweka.jar -------------------------------------------------------------------------------- /pyautoweka/java/params/attribselection/eval/weka.attributeSelection.CfsSubsetEval.params: -------------------------------------------------------------------------------- 1 | 0_M {REMOVED,REMOVE_PREV}[REMOVE_PREV] 2 | 1_L {REMOVED,REMOVE_PREV}[REMOVE_PREV] 3 | -------------------------------------------------------------------------------- /pyautoweka/java/params/attribselection/eval/weka.attributeSelection.CorrelationAttributeEval.params: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/params/attribselection/eval/weka.attributeSelection.CorrelationAttributeEval.params -------------------------------------------------------------------------------- /pyautoweka/java/params/attribselection/eval/weka.attributeSelection.GainRatioAttributeEval.params: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/params/attribselection/eval/weka.attributeSelection.GainRatioAttributeEval.params -------------------------------------------------------------------------------- /pyautoweka/java/params/attribselection/eval/weka.attributeSelection.InfoGainAttributeEval.params: -------------------------------------------------------------------------------- 1 | 0_M {REMOVED,REMOVE_PREV}[REMOVE_PREV] 2 | 1_B {REMOVED,REMOVE_PREV}[REMOVE_PREV] 3 | 4 | -------------------------------------------------------------------------------- /pyautoweka/java/params/attribselection/eval/weka.attributeSelection.OneRAttributeEval.params: -------------------------------------------------------------------------------- 1 | 0_S {0}[0] 2 | 1_F [2,15][10]i 3 | 2_D {REMOVED,REMOVE_PREV}[REMOVE_PREV] 4 | 3_INT_B [1,64][6]il 5 | -------------------------------------------------------------------------------- /pyautoweka/java/params/attribselection/eval/weka.attributeSelection.PrincipalComponents.params: -------------------------------------------------------------------------------- 1 | num_HIDDEN {0,1}[0] 2 | 1_INT_A {-1}[-1] 3 | 2_INT_A [1, 1024][32]il 4 | 1_C {REMOVED,REMOVE_PREV}[REMOVE_PREV] 5 | 2_R [0.5, 1.0][0.95] 6 | 3_O {REMOVED,REMOVE_PREV}[REMOVE_PREV] 7 | 8 | Conditionals: 9 | 1_INT_A |num_HIDDEN in {0} 10 | 2_INT_A |num_HIDDEN in {1} 11 | -------------------------------------------------------------------------------- /pyautoweka/java/params/attribselection/eval/weka.attributeSelection.ReliefFAttributeEval.params: -------------------------------------------------------------------------------- 1 | 0_INT_K [2,64][10]il 2 | 1_W {REMOVED,REMOVE_PREV}[REMOVE_PREV] 3 | 2_INT_A [1,8][2]il 4 | 5 | Conditionals: 6 | 2_INT_A | 1_W in {REMOVED} 7 | -------------------------------------------------------------------------------- /pyautoweka/java/params/attribselection/eval/weka.attributeSelection.SymmetricalUncertAttributeEval.params: -------------------------------------------------------------------------------- 1 | 0_M {REMOVED,REMOVE_PREV}[REMOVE_PREV] 2 | 3 | -------------------------------------------------------------------------------- /pyautoweka/java/params/attribselection/search/weka.attributeSelection.BestFirst.params: -------------------------------------------------------------------------------- 1 | 0_D {0, 1, 2}[1] 2 | 1_INT_N [2, 10][5]i 3 | 2_S {0}[0] 4 | -------------------------------------------------------------------------------- /pyautoweka/java/params/attribselection/search/weka.attributeSelection.GreedyStepwise.params: -------------------------------------------------------------------------------- 1 | 0_C {REMOVED,REMOVE_PREV}[REMOVE_PREV] 2 | 1_B {REMOVED,REMOVE_PREV}[REMOVE_PREV] 3 | 2_R {REMOVED,REMOVE_PREV}[REMOVE_PREV] 4 | 3_T [0, 20] [1] 5 | 4_INT_N [10,1000][30]il 6 | 7 | Conditionals: 8 | 3_T | 2_R in {REMOVED} 9 | 4_INT_N | 2_R in {REMOVE_PREV} 10 | 11 | -------------------------------------------------------------------------------- /pyautoweka/java/params/attribselection/search/weka.attributeSelection.Ranker.params: -------------------------------------------------------------------------------- 1 | 0_T [0.2,10][1] 2 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.bayes.BayesNet.params: -------------------------------------------------------------------------------- 1 | D {REMOVED,REMOVE_PREV}[REMOVE_PREV] 2 | Q {weka.classifiers.bayes.net.search.local.K2,weka.classifiers.bayes.net.search.local.HillClimber,weka.classifiers.bayes.net.search.local.LAGDHillClimber,weka.classifiers.bayes.net.search.local.SimulatedAnnealing,weka.classifiers.bayes.net.search.local.TabuSearch,weka.classifiers.bayes.net.search.local.TAN} [weka.classifiers.bayes.net.search.local.K2] 3 | 4 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.bayes.NaiveBayes.params: -------------------------------------------------------------------------------- 1 | K {REMOVED,REMOVE_PREV}[REMOVE_PREV] 2 | D {REMOVED,REMOVE_PREV}[REMOVE_PREV] 3 | 4 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.bayes.NaiveBayesMultinomial.params: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/params/base/weka.classifiers.bayes.NaiveBayesMultinomial.params -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.functions.GaussianProcesses.params: -------------------------------------------------------------------------------- 1 | L [0.0001, 1] [0.1]l 2 | N {0,1,2} [0] 3 | 4 | K {weka.classifiers.functions.supportVector.NormalizedPolyKernel,weka.classifiers.functions.supportVector.PolyKernel,weka.classifiers.functions.supportVector.Puk,weka.classifiers.functions.supportVector.RBFKernel}[weka.classifiers.functions.supportVector.NormalizedPolyKernel] 5 | #Normalized Poly Kernel 6 | npoly_E [0.2, 5] [1.0] 7 | npoly_L {REMOVED,REMOVE_PREV}[REMOVE_PREV] 8 | #Poly Kernel 9 | poly_E [0.2, 5] [1.0] 10 | poly_L {REMOVED,REMOVE_PREV}[REMOVE_PREV] 11 | #Puk 12 | puk_S [0.1, 10] [1.0] 13 | puk_O [0.1, 1] [1.0] 14 | #RBF 15 | rbf_C [0.0001,1] [0.01]l 16 | 17 | Conditionals 18 | npoly_E| K in {weka.classifiers.functions.supportVector.NormalizedPolyKernel} 19 | npoly_L| K in {weka.classifiers.functions.supportVector.NormalizedPolyKernel} 20 | poly_E| K in {weka.classifiers.functions.supportVector.PolyKernel} 21 | poly_L| K in {weka.classifiers.functions.supportVector.PolyKernel} 22 | puk_S | K in {weka.classifiers.functions.supportVector.Puk} 23 | puk_O | K in {weka.classifiers.functions.supportVector.Puk} 24 | rbf_C | K in {weka.classifiers.functions.supportVector.RBFKernel} 25 | 26 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.functions.LinearRegression.params: -------------------------------------------------------------------------------- 1 | S {0,1,2} [0] 2 | C {REMOVED,REMOVE_PREV}[REMOVE_PREV] 3 | R [1e-7, 10] [1e-7]l 4 | 5 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.functions.Logistic.params: -------------------------------------------------------------------------------- 1 | R [1e-12, 10] [1e-7]l 2 | 3 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.functions.MultilayerPerceptron.params: -------------------------------------------------------------------------------- 1 | L [0.1, 1] [0.3] 2 | M [0.1, 1] [0.2] 3 | B {REMOVED,REMOVE_PREV}[REMOVE_PREV] 4 | H {a,i,o,t} [a] 5 | C {REMOVED,REMOVE_PREV}[REMOVE_PREV] 6 | R {REMOVED,REMOVE_PREV}[REMOVE_PREV] 7 | D {REMOVED,REMOVE_PREV}[REMOVE_PREV] 8 | S {1}[1] 9 | 10 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.functions.SGD.params: -------------------------------------------------------------------------------- 1 | F {0,1,2}[0] 2 | L [0.00001, 0.1] [0.01]l 3 | R [1e-12, 10] [1e-4]l 4 | N {REMOVED,REMOVE_PREV}[REMOVE_PREV] 5 | M {REMOVED,REMOVE_PREV}[REMOVE_PREV] 6 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.functions.SMO.params: -------------------------------------------------------------------------------- 1 | 0_C [0.5,1.5][1.0] 2 | 1_N {0,1,2} [0] 3 | 2_M {REMOVED,REMOVE_PREV}[REMOVE_PREV] 4 | 3_REG_IGNORE_QUOTE_START_K {weka.classifiers.functions.supportVector.NormalizedPolyKernel,weka.classifiers.functions.supportVector.PolyKernel,weka.classifiers.functions.supportVector.Puk,weka.classifiers.functions.supportVector.RBFKernel}[weka.classifiers.functions.supportVector.NormalizedPolyKernel] 5 | #Normalized Poly Kernel 6 | 4_npoly_E [0.2, 5] [1.0] 7 | 4_npoly_L {REMOVED,REMOVE_PREV}[REMOVE_PREV] 8 | #Poly Kernel 9 | 4_poly_E [0.2, 5] [1.0] 10 | 4_poly_L {REMOVED,REMOVE_PREV}[REMOVE_PREV] 11 | #Puk 12 | 4_puk_S [0.1, 10] [1.0] 13 | 4_puk_O [0.1, 1] [1.0] 14 | #RBF 15 | 4_rbf_G [0.0001,1] [0.01]l 16 | 5_QUOTE_END {REMOVED} [REMOVED] 17 | 18 | Conditionals: 19 | 4_npoly_E| 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.NormalizedPolyKernel} 20 | 4_npoly_L| 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.NormalizedPolyKernel} 21 | 4_poly_E| 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.PolyKernel} 22 | 4_poly_L| 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.PolyKernel} 23 | 4_puk_S | 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.Puk} 24 | 4_puk_O | 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.Puk} 25 | 4_rbf_G | 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.RBFKernel} 26 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.functions.SMOreg.params: -------------------------------------------------------------------------------- 1 | 0_C [0.5,1.5][1.0] 2 | 1_N {0,1,2} [0] 3 | 2_V {REMOVED,REMOVE_PREV}[REMOVE_PREV] 4 | 3_REG_IGNORE_QUOTE_START_K {weka.classifiers.functions.supportVector.NormalizedPolyKernel,weka.classifiers.functions.supportVector.PolyKernel,weka.classifiers.functions.supportVector.Puk,weka.classifiers.functions.supportVector.RBFKernel}[weka.classifiers.functions.supportVector.NormalizedPolyKernel] 5 | #Normalized Poly Kernel 6 | 4_npoly_E [0.2, 5] [1.0] 7 | 4_npoly_L {REMOVED,REMOVE_PREV}[REMOVE_PREV] 8 | #Poly Kernel 9 | 4_poly_E [0.2, 5] [1.0] 10 | 4_poly_L {REMOVED,REMOVE_PREV}[REMOVE_PREV] 11 | #Puk 12 | 4_puk_S [0.1, 10] [1.0] 13 | 4_puk_O [0.1, 1] [1.0] 14 | #RBF 15 | 4_rbf_G [0.0001,1] [0.01]l 16 | 5_QUOTE_END {REMOVED} [REMOVED] 17 | 18 | Conditionals: 19 | 4_npoly_E| 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.NormalizedPolyKernel} 20 | 4_npoly_L| 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.NormalizedPolyKernel} 21 | 4_poly_E| 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.PolyKernel} 22 | 4_poly_L| 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.PolyKernel} 23 | 4_puk_S | 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.Puk} 24 | 4_puk_O | 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.Puk} 25 | 4_rbf_G | 3_REG_IGNORE_QUOTE_START_K in {weka.classifiers.functions.supportVector.RBFKernel} 26 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.functions.SimpleLinearRegression.params: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/params/base/weka.classifiers.functions.SimpleLinearRegression.params -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.functions.SimpleLogistic.params: -------------------------------------------------------------------------------- 1 | S {REMOVED,REMOVE_PREV}[REMOVE_PREV] 2 | W_HIDDEN {0,1}[0] 3 | 1_W {0}[0] 4 | 2_W [0,1][0] 5 | A {REMOVED,REMOVE_PREV}[REMOVE_PREV] 6 | 7 | Conditionals: 8 | 1_W | W_HIDDEN in {0} 9 | 2_W | W_HIDDEN in {1} 10 | 11 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.functions.VotedPerceptron.params: -------------------------------------------------------------------------------- 1 | INT_I [1, 10] [1]i 2 | INT_M [5000, 50000] [10000]il 3 | E [0.2, 5] [1.0] 4 | 5 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.lazy.IBk.params: -------------------------------------------------------------------------------- 1 | E {REMOVED,REMOVE_PREV}[REMOVE_PREV] 2 | INT_K [1,64] [1]il 3 | X {REMOVED,REMOVE_PREV}[REMOVE_PREV] 4 | F {REMOVED,REMOVE_PREV}[REMOVE_PREV] 5 | I {REMOVED,REMOVE_PREV}[REMOVE_PREV] 6 | 7 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.lazy.KStar.params: -------------------------------------------------------------------------------- 1 | INT_B [1,100] [20]i 2 | E {REMOVED,REMOVE_PREV}[REMOVE_PREV] 3 | M {a,d,m,n} [a] 4 | 5 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.rules.DecisionTable.params: -------------------------------------------------------------------------------- 1 | E {acc,rmse,mae,auc} [acc] 2 | I {REMOVED,REMOVE_PREV}[REMOVE_PREV] 3 | S {weka.attributeSelection.BestFirst,weka.attributeSelection.GreedyStepwise,weka.attributeSelection.Ranker}[weka.attributeSelection.BestFirst] 4 | X {1,2,3,4} [1] 5 | 6 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.rules.JRip.params: -------------------------------------------------------------------------------- 1 | N [1, 5] [2.0] 2 | E {REMOVED,REMOVE_PREV}[REMOVE_PREV] 3 | P {REMOVED,REMOVE_PREV}[REMOVE_PREV] 4 | INT_O [1,5][2]i 5 | 6 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.rules.M5Rules.params: -------------------------------------------------------------------------------- 1 | N {REMOVED,REMOVE_PREV}[REMOVE_PREV] 2 | INT_M [1,64][4]il 3 | U {REMOVED,REMOVE_PREV}[REMOVE_PREV] 4 | R {REMOVED,REMOVE_PREV}[REMOVE_PREV] 5 | 6 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.rules.OneR.params: -------------------------------------------------------------------------------- 1 | INT_B [1,32][6]il 2 | 3 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.rules.PART.params: -------------------------------------------------------------------------------- 1 | INT_N [2,5][3]i 2 | INT_M [1,64][2]il 3 | R {REMOVED,REMOVE_PREV}[REMOVE_PREV] 4 | B {REMOVED,REMOVE_PREV}[REMOVE_PREV] 5 | 6 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.rules.ZeroR.params: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/params/base/weka.classifiers.rules.ZeroR.params -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.trees.DecisionStump.params: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/params/base/weka.classifiers.trees.DecisionStump.params -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.trees.J48.params: -------------------------------------------------------------------------------- 1 | O {REMOVED,REMOVE_PREV}[REMOVE_PREV] 2 | U {REMOVED,REMOVE_PREV}[REMOVE_PREV] 3 | B {REMOVED,REMOVE_PREV}[REMOVE_PREV] 4 | J {REMOVED,REMOVE_PREV}[REMOVE_PREV] 5 | A {REMOVED,REMOVE_PREV}[REMOVE_PREV] 6 | S {REMOVED,REMOVE_PREV}[REMOVE_PREV] 7 | INT_M [1, 64][2]il 8 | C [0,1][0.25] 9 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.trees.LMT.params: -------------------------------------------------------------------------------- 1 | B {REMOVED,REMOVE_PREV}[REMOVE_PREV] 2 | R {REMOVED,REMOVE_PREV}[REMOVE_PREV] 3 | C {REMOVED,REMOVE_PREV}[REMOVE_PREV] 4 | P {REMOVED,REMOVE_PREV}[REMOVE_PREV] 5 | INT_M [1,64][15]il 6 | W_HIDDEN {0,1}[0] 7 | 1_W {0}[0] 8 | 2_W [0,1][0] 9 | A {REMOVED,REMOVE_PREV}[REMOVE_PREV] 10 | 11 | Conditionals: 12 | 1_W | W_HIDDEN in {0} 13 | 2_W | W_HIDDEN in {1} 14 | 15 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.trees.M5P.params: -------------------------------------------------------------------------------- 1 | N {REMOVED,REMOVE_PREV}[REMOVE_PREV] 2 | INT_M [1,64][4]il 3 | U {REMOVED,REMOVE_PREV}[REMOVE_PREV] 4 | R {REMOVED,REMOVE_PREV}[REMOVE_PREV] 5 | 6 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.trees.REPTree.params: -------------------------------------------------------------------------------- 1 | INT_M [1, 64][2]il 2 | V [1e-5, 1e-1][1e-3]l 3 | depth_HIDDEN {0,1}[0] 4 | 1_INT_L {-1}[-1] 5 | 2_INT_L [2, 20][2]i 6 | P {REMOVED,REMOVE_PREV}[REMOVE_PREV] 7 | 8 | Conditionals: 9 | 1_INT_L | depth_HIDDEN in {0} 10 | 2_INT_L | depth_HIDDEN in {1} 11 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.trees.RandomForest.params: -------------------------------------------------------------------------------- 1 | INT_I [2, 256][10]il 2 | features_HIDDEN {0,1}[0] 3 | 1_INT_K {1}[1] 4 | 2_INT_K [2, 32][2]il 5 | depth_HIDDEN {0,1}[0] 6 | 1_INT_depth {1}[1] 7 | 2_INT_depth [2, 20][2]i 8 | 9 | Conditionals: 10 | 1_INT_K | features_HIDDEN in {0} 11 | 2_INT_K | features_HIDDEN in {1} 12 | 1_INT_depth |depth_HIDDEN in {0} 13 | 2_INT_depth |depth_HIDDEN in {1} 14 | 15 | -------------------------------------------------------------------------------- /pyautoweka/java/params/base/weka.classifiers.trees.RandomTree.params: -------------------------------------------------------------------------------- 1 | INT_M [1, 64][1]il 2 | features_HIDDEN {0,1}[0] 3 | 1_INT_K {0}[0] 4 | 2_INT_K [2, 32][2]il 5 | depth_HIDDEN {0,1}[0] 6 | 1_INT_depth {0}[0] 7 | 2_INT_depth [2, 20][2]i 8 | back_HIDDEN {0,1}[0] 9 | 1_INT_N {0}[0] 10 | 2_INT_N [2, 5][3]i 11 | U {REMOVED,REMOVE_PREV}[REMOVE_PREV] 12 | 13 | Conditionals: 14 | 1_INT_K |features_HIDDEN in {0} 15 | 2_INT_K |features_HIDDEN in {1} 16 | 1_INT_depth |depth_HIDDEN in {0} 17 | 2_INT_depth |depth_HIDDEN in {1} 18 | 1_INT_N |back_HIDDEN in {0} 19 | 2_INT_N |back_HIDDEN in {1} 20 | 21 | -------------------------------------------------------------------------------- /pyautoweka/java/params/ensemble/weka.classifiers.meta.Stacking.params: -------------------------------------------------------------------------------- 1 | X {10}[10] 2 | S {1}[1] 3 | -------------------------------------------------------------------------------- /pyautoweka/java/params/ensemble/weka.classifiers.meta.Vote.params: -------------------------------------------------------------------------------- 1 | R {AVG,PROD,MAJ,MIN,MAX} [AVG] 2 | S {1}[1] 3 | -------------------------------------------------------------------------------- /pyautoweka/java/params/meta/weka.classifiers.lazy.LWL.params: -------------------------------------------------------------------------------- 1 | K {-1,10,30,60,90,120} [-1] 2 | U {0,1,2,3,4} [0] 3 | A {weka.core.neighboursearch.LinearNNSearch} [weka.core.neighboursearch.LinearNNSearch] 4 | -------------------------------------------------------------------------------- /pyautoweka/java/params/meta/weka.classifiers.meta.AdaBoostM1.params: -------------------------------------------------------------------------------- 1 | p_HIDDEN {0,1}[0] 2 | 1_P {100}[100] 3 | 2_INT_P [50,100][100]i 4 | INT_I [2,128][10]il 5 | Q {REMOVED,REMOVE_PREV}[REMOVE_PREV] 6 | S {1}[1] 7 | 8 | Conditionals: 9 | 1_P | p_HIDDEN in {0} 10 | 2_INT_P | p_HIDDEN in {1} 11 | -------------------------------------------------------------------------------- /pyautoweka/java/params/meta/weka.classifiers.meta.AdditiveRegression.params: -------------------------------------------------------------------------------- 1 | s_HIDDEN {0,1}[0] 2 | 1_S {1}[1] 3 | 2_S [0,1.0][1] 4 | INT_I [2,128][10]il 5 | 6 | Conditionals: 7 | 1_S | s_HIDDEN in {0} 8 | 2_S | s_HIDDEN in {1} 9 | 10 | -------------------------------------------------------------------------------- /pyautoweka/java/params/meta/weka.classifiers.meta.AttributeSelectedClassifier.params: -------------------------------------------------------------------------------- 1 | S {weka.attributeSelection.BestFirst,weka.attributeSelection.GreedyStepwise,weka.attributeSelection.Ranker}[weka.attributeSelection.BestFirst] 2 | E {weka.attributeSelection.CfsSubsetEval,weka.attributeSelection.WrapperSubsetEval,weka.attributeSelection.OneRAttributeEval,weka.attributeSelection.InfoGainAttributeEval,weka.attributeSelection.HoldOutSubsetEvaluator,weka.attributeSelection.GainRatioAttributeEval}[weka.attributeSelection.CfsSubsetEval] 3 | -------------------------------------------------------------------------------- /pyautoweka/java/params/meta/weka.classifiers.meta.Bagging.params: -------------------------------------------------------------------------------- 1 | INT_P [10,200][100]i 2 | INT_I [2,128][10]il 3 | S {1}[1] 4 | O {REMOVED,REMOVE_PREV}[REMOVE_PREV] 5 | 6 | -------------------------------------------------------------------------------- /pyautoweka/java/params/meta/weka.classifiers.meta.ClassificationViaRegression.params: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/params/meta/weka.classifiers.meta.ClassificationViaRegression.params -------------------------------------------------------------------------------- /pyautoweka/java/params/meta/weka.classifiers.meta.LogitBoost.params: -------------------------------------------------------------------------------- 1 | INT_I [2,128][10]il 2 | h_HIDDEN {0,1}[0] 3 | 1_H {1}[1] 4 | 2_H [0,1.0][1] 5 | INT_R [1,5][1]i 6 | f_HIDDEN {0,1}[0] 7 | 1_F {0}[0] 8 | 2_INT_F [1,5][1]i 9 | Q {REMOVED,REMOVE_PREV}[REMOVE_PREV] 10 | p_HIDDEN {0,1}[0] 11 | 1_P {100}[100] 12 | 2_INT_P [50,100][100]i 13 | L{1e50}[1e50] 14 | S{1}[1] 15 | 16 | Conditionals: 17 | 1_H | h_HIDDEN in {0} 18 | 2_H | h_HIDDEN in {1} 19 | 1_F | f_HIDDEN in {0} 20 | 2_INT_F | f_HIDDEN in {1} 21 | 1_P | p_HIDDEN in {0} 22 | 2_INT_P | p_HIDDEN in {1} 23 | -------------------------------------------------------------------------------- /pyautoweka/java/params/meta/weka.classifiers.meta.MultiClassClassifier.params: -------------------------------------------------------------------------------- 1 | M {0,1,2,3} [0] 2 | R [0.5,4][2.0] 3 | P {REMOVED,REMOVE_PREV}[REMOVE_PREV] 4 | S {1}[1] 5 | -------------------------------------------------------------------------------- /pyautoweka/java/params/meta/weka.classifiers.meta.RandomCommittee.params: -------------------------------------------------------------------------------- 1 | INT_I [2, 64][10]il 2 | S{1}[1] 3 | 4 | -------------------------------------------------------------------------------- /pyautoweka/java/params/meta/weka.classifiers.meta.RandomSubSpace.params: -------------------------------------------------------------------------------- 1 | INT_I [2, 64][10]il 2 | P [0.1,1.0] [0.5] 3 | S{1}[1] 4 | 5 | -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/DomainInter.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/DomainInter.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/Jama-1.0.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/Jama-1.0.2.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/StructureGraphic.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/StructureGraphic.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/aclib-src.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/aclib-src.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/aclib.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/aclib.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/commons-collections-3.2.1-sources.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/commons-collections-3.2.1-sources.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/commons-collections-3.2.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/commons-collections-3.2.1.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/commons-io-2.1.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/commons-io-2.1.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/commons-math-2.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/commons-math-2.2.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/commons-math3-3.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/commons-math3-3.0.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/conf/logback-off.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/conf/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | ${OUTPUTDIR}/${RUNGROUPDIR}/log-run${NUMRUN}.txt 8 | false 9 | 10 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | ${OUTPUTDIR}/${RUNGROUPDIR}/log-warn${NUMRUN}.txt 19 | false 20 | 21 | WARN 22 | 23 | 24 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | ${OUTPUTDIR}/${RUNGROUPDIR}/log-err${NUMRUN}.txt 33 | false 34 | 35 | ERROR 36 | 37 | 38 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 55 | 56 | 57 | 58 | 59 | false 60 | ${OUTPUTDIR}/${RUNGROUPDIR}/runhashes-run${NUMRUN}.txt 61 | 62 | %msg%n 63 | 64 | 65 | 66 | 67 | 68 | 69 | ${STDOUT-LEVEL} 70 | 71 | 72 | 73 | 74 | 75 | [%-5level] %msg%n 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/doc/faq.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/doc/faq.pdf -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/doc/manual.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/doc/manual.pdf -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/doc/options-ref.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/doc/options-ref.pdf -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/doc/output.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/doc/output.pdf -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/doc/quickstart.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/doc/quickstart.pdf -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/fastrf-src.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/fastrf-src.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/fastrf.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/fastrf.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/git-hashes.tex: -------------------------------------------------------------------------------- 1 | fastrf & v1.05.01-master-89 & 37eb5bea2cc01327f8ce7b395e56310b7e0b493a & 0 \\ 2 | \hline 3 | ACLib & v2.04.01-master-429 & 7cdcd3c32a72c0c0f2af2a951b1f5f293eacf7ea & 0 \\ 4 | \hline 5 | SMAC & v2.04.01-master-447 & 7ac3348146a31faf66e573d500ee7ed749c1a504 & 0 \\ 6 | \hline 7 | -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/git-hashes.txt: -------------------------------------------------------------------------------- 1 | fastrf 37eb5bea2cc01327f8ce7b395e56310b7e0b493a 0 2 | ACLib 7cdcd3c32a72c0c0f2af2a951b1f5f293eacf7ea 0 3 | SMAC 7ac3348146a31faf66e573d500ee7ed749c1a504 0 4 | -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/jcommander.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/jcommander.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/jmatharray.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/jmatharray.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/logback-access-1.0.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/logback-access-1.0.0.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/logback-classic-1.0.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/logback-classic-1.0.0.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/logback-core-1.0.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/logback-core-1.0.0.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/numerics4j-1.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/numerics4j-1.3.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/opencsv-2.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/opencsv-2.3.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/patches/ca/ubc/cs/beta/aclib/algorithmrun/CommandLineAlgorithmRun$1.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/patches/ca/ubc/cs/beta/aclib/algorithmrun/CommandLineAlgorithmRun$1.class -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/patches/ca/ubc/cs/beta/aclib/algorithmrun/CommandLineAlgorithmRun$2.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/patches/ca/ubc/cs/beta/aclib/algorithmrun/CommandLineAlgorithmRun$2.class -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/patches/ca/ubc/cs/beta/aclib/algorithmrun/CommandLineAlgorithmRun.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/patches/ca/ubc/cs/beta/aclib/algorithmrun/CommandLineAlgorithmRun.class -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/patches/ca/ubc/cs/beta/aclib/misc/string/SplitQuotedString.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/patches/ca/ubc/cs/beta/aclib/misc/string/SplitQuotedString.class -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/patches/ca/ubc/cs/beta/aclib/targetalgorithmevaluator/AbstractTargetAlgorithmEvaluator.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/patches/ca/ubc/cs/beta/aclib/targetalgorithmevaluator/AbstractTargetAlgorithmEvaluator.class -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/slf4j-api-1.6.4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/slf4j-api-1.6.4.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/smac: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | SMAC_MEMORY_INPUT=$SMAC_MEMORY 3 | SMACMEM=1024 4 | test "$SMAC_MEMORY_INPUT" -ge 1 2>&- && SMACMEM=$SMAC_MEMORY_INPUT 5 | EXEC=ca.ubc.cs.beta.smac.executors.AutomaticConfigurator 6 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 7 | echo "Starting with $SMACMEM MB of RAM" 8 | exec java -Xmx"$SMACMEM"m -cp "$DIR/patches:$DIR/smac.jar:$DIR/commons-math-2.2.jar:$DIR/Jama-1.0.2.jar:$DIR/commons-math3-3.0.jar:$DIR/conf/:$DIR/numerics4j-1.3.jar:$DIR/slf4j-api-1.6.4.jar:$DIR/commons-collections-3.2.1.jar:$DIR/commons-io-2.1.jar:$DIR/fastrf.jar:$DIR/jcommander.jar:$DIR/opencsv-2.3.jar:$DIR/aclib.jar:$DIR/truezip-samples-7.4.3-jar-with-dependencies.jar:$DIR/logback-access-1.0.0.jar:$DIR/logback-core-1.0.0.jar:$DIR/logback-classic-1.0.0.jar:$DIR/patches" $EXEC "$@" 9 | 10 | -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/smac-possible-restores: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | EXEC=ca.ubc.cs.beta.aclib.state.legacy.LegacyStateDirectoryScanner 3 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 4 | exec java -cp "$DIR/smac.jar:$DIR/commons-math-2.2.jar:$DIR/Jama-1.0.2.jar:$DIR/commons-math3-3.0.jar:$DIR/conf/:$DIR/numerics4j-1.3.jar:$DIR/slf4j-api-1.6.4.jar:$DIR/commons-collections-3.2.1.jar:$DIR/commons-io-2.1.jar:$DIR/fastrf.jar:$DIR/jcommander.jar:$DIR/opencsv-2.3.jar:$DIR/aclib.jar:$DIR/truezip-samples-7.4.3-jar-with-dependencies.jar:$DIR/logback-access-1.0.0.jar:$DIR/logback-core-1.0.0.jar:$DIR/logback-classic-1.0.0.jar" $EXEC "$@" 5 | 6 | -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/smac-src.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/smac-src.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/smac-validate: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | SMAC_MEMORY_INPUT=$SMAC_MEMORY 3 | SMACMEM=1024 4 | test "$SMAC_MEMORY_INPUT" -ge 1 2>&- && SMACMEM=$SMAC_MEMORY_INPUT 5 | EXEC=ca.ubc.cs.beta.smac.executors.ValidatorExecutor 6 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 7 | exec java -Xmx"$SMACMEM"m -cp "$DIR/smac.jar:$DIR/commons-math-2.2.jar:$DIR/Jama-1.0.2.jar:$DIR/commons-math3-3.0.jar:$DIR/conf/:$DIR/numerics4j-1.3.jar:$DIR/slf4j-api-1.6.4.jar:$DIR/commons-collections-3.2.1.jar:$DIR/commons-io-2.1.jar:$DIR/fastrf.jar:$DIR/jcommander.jar:$DIR/opencsv-2.3.jar:$DIR/aclib.jar:$DIR/truezip-samples-7.4.3-jar-with-dependencies.jar:$DIR/logback-access-1.0.0.jar:$DIR/logback-core-1.0.0.jar:$DIR/logback-classic-1.0.0.jar" $EXEC "$@" 8 | -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/smac.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | set SMACMEM=1024 3 | IF NOT "%SMAC_MEMORY%"=="" (set SMACMEM=%SMAC_MEMORY%) 4 | set DIR=%~dp0 5 | set EXEC=ca.ubc.cs.beta.smac.executors.AutomaticConfigurator 6 | set jarconcat= 7 | SETLOCAL ENABLEDELAYEDEXPANSION 8 | for /F "delims=" %%a IN ('dir /b /s "%DIR%\*.jar"') do set jarconcat=%%a;!jarconcat! 9 | echo Starting with %SMACMEM% MB of RAM 10 | @echo on 11 | java -Xmx%SMACMEM%m -cp "%DIR%conf\;%DIR%patches\;%jarconcat%%DIR%patches\ " %EXEC% %* 12 | -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/smac.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/smac.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/smac.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | SMAC_MEMORY_INPUT=$SMAC_MEMORY 3 | SMACMEM=1024 4 | test "$SMAC_MEMORY_INPUT" -ge 1 2>&- && SMACMEM=$SMAC_MEMORY_INPUT 5 | EXEC=ca.ubc.cs.beta.smac.executors.AutomaticConfigurator 6 | DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 7 | echo "Starting with $SMACMEM MB of RAM" 8 | exec java -Xmx"$SMACMEM"m -cp "$DIR/patches:$DIR/smac.jar:$DIR/commons-math-2.2.jar:$DIR/Jama-1.0.2.jar:$DIR/commons-math3-3.0.jar:$DIR/conf/:$DIR/numerics4j-1.3.jar:$DIR/slf4j-api-1.6.4.jar:$DIR/commons-collections-3.2.1.jar:$DIR/commons-io-2.1.jar:$DIR/fastrf.jar:$DIR/jcommander.jar:$DIR/opencsv-2.3.jar:$DIR/aclib.jar:$DIR/truezip-samples-7.4.3-jar-with-dependencies.jar:$DIR/logback-access-1.0.0.jar:$DIR/logback-core-1.0.0.jar:$DIR/logback-classic-1.0.0.jar:$DIR/patches" $EXEC "$@" 9 | 10 | -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/spi-0.2.4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/smac-v2.04.01-master-447-patched/spi-0.2.4.jar -------------------------------------------------------------------------------- /pyautoweka/java/smac-v2.04.01-master-447-patched/util/bash_autocomplete.sh: -------------------------------------------------------------------------------- 1 | #Taken from http://www.debian-administration.org/article/An_introduction_to_bash_completion_part_2 2 | _smac-validate() 3 | { 4 | local cur prev opts 5 | COMPREPLY=() 6 | cur="${COMP_WORDS[COMP_CWORD]}" 7 | prev="${COMP_WORDS[COMP_CWORD-1]}" 8 | opts="--abortOnCrash --abortOnFirstRunCrash --algo --algoExec --checkInstanceFilesExist --configuration --cutoffLength --cutoffTime --cutoff_length --cutoff_time --deterministic --empericalPerformance --execDir --execdir --experimentDir --feature_file --instanceFeatureFile --instanceFile --instance_file --instance_seed_file --interInstanceObj --inter_instance_obj --intraInstanceObj --intra_instance_obj --leakMemory --leakMemoryAmount --logAllCallStrings --logAllProcessOutput --maxConcurrentAlgoExecs --maxTimestamp --minTimestamp --multFactor --numConcurrentAlgoExecs --numRun --numSeedsPerTestInstance --numTestInstances --numValidationRuns --numberOfConcurrentAlgoExecs --numberOfSeedsPerTestInstance --numberOfTestInstances --numberOfValidationRuns --outdir --outputDirectory --outputFileSuffix --overallObj --overall_obj --paramFile --paramfile --retryTargetAlgorithmRunCount --runHashCodeFile --runObj --run_obj --scenarioFile --seed --tae --taeSP --targetAlgorithmEvaluator --targetAlgorithmEvaluatorSearchPath --testInstanceFile --test_instance_file --test_instance_seed_file --trajectoryFile --tunerOverheadTime --tunerTime --tunerTimeout --useScenarioOutDir --validateOnlyLastIncumbent --validateTestInstances --validationHeaders --validationRoundingMode --verifySAT " 9 | 10 | if [[ ${cur} == -* ]] ; then 11 | COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) 12 | return 0 13 | fi 14 | } 15 | complete -F _smac-validate smac-validate 16 | 17 | #Taken from http://www.debian-administration.org/article/An_introduction_to_bash_completion_part_2 18 | _smac() 19 | { 20 | local cur prev opts 21 | COMPREPLY=() 22 | cur="${COMP_WORDS[COMP_CWORD]}" 23 | prev="${COMP_WORDS[COMP_CWORD-1]}" 24 | opts="--abortOnCrash --abortOnFirstRunCrash --adaptiveCapping --algo --algoExec --capAddSlack --capSlack --checkInstanceFilesExist --cleanOldStateOnSuccess --consoleLogLevel --countSMACTimeAsTunerTime --cutoffLength --cutoffTime --cutoff_length --cutoff_time --defaultConfigRuns --deterministic --doValidation --execDir --execdir --executionMode --expectedImprovementFunction --experimentDir --feature_file --frac_rawruntime --freeMemoryPecentageToSubsample --fullTreeBootstrap --help --ignoreConditionality --imputationIterations --initialChallenge --initialIncumbent --initialIncumbentRuns --initialN --instanceFeatureFile --instanceFile --instance_file --instance_seed_file --intensificationPercentage --interInstanceObj --inter_instance_obj --intraInstanceObj --intra_instance_obj --leakMemory --leakMemoryAmount --logAllCallStrings --logAllProcessOutput --logLevel --logModel --maskInactiveConditionalParametersAsDefaultValue --maxConcurrentAlgoExecs --maxIncumbentRuns --maxRunsForIncumbent --maxTimestamp --minTimestamp --minVariance --modelHashCodeFile --multFactor --nTrees --numChallengers --numConcurrentAlgoExecs --numEIRandomConfigs --numIterations --numPCA --numRandomConfigsInEI --numRun --numRunsLimit --numSeedsPerTestInstance --numTestInstances --numTrees --numValidationRuns --numberOfChallengers --numberOfConcurrentAlgoExecs --numberOfEIRandomConfigs --numberOfIterations --numberOfRandomConfigsInEI --numberOfRunsLimit --numberOfSeedsPerTestInstance --numberOfTestInstances --numberOfTrees --numberOfValidationRuns --optionFile --optionFile2 --outdir --outputDirectory --outputFileSuffix --overallObj --overall_obj --paramFile --paramfile --penalizeImputedValues --preprocessMarginal --ratioFeatures --restoreIteration --restoreStateFrom --restoreStateIteration --retryTargetAlgorithmRunCount --runGroupName --runHashCodeFile --runObj --run_obj --runtimeLimit --saveContext --saveContextWithState --scenarioFile --secondaryOptionsFile --seed --seedOffset --showHiddenParameters --shuffleImputedValues --splitMin --stateDeserializer --stateSerializer --storeDataInLeaves --subsamplePercentage --subsampleValuesWhenLowMemory --subsampleValuesWhenLowOnMemory --tae --taeSP --targetAlgorithmEvaluator --targetAlgorithmEvaluatorSearchPath --testInstanceFile --test_instance_file --test_instance_seed_file --totalNumRunsLimit --treatCensoredDataAsUncensored --tunerTimeout --useBrokenVarianceCalculation --validateOnlyLastIncumbent --validation --validationHeaders --validationRoundingMode --verifySAT --version --wallClockLimit " 25 | 26 | if [[ ${cur} == -* ]] ; then 27 | COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) 28 | return 0 29 | fi 30 | } 31 | complete -F _smac smac 32 | 33 | -------------------------------------------------------------------------------- /pyautoweka/java/weka.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/automl/pyautoweka/f21fc244fc79cbdd1cb22846f3eaaf25dc799866/pyautoweka/java/weka.jar -------------------------------------------------------------------------------- /pyautoweka/pyautoweka.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import xml.dom.minidom 3 | from subprocess import call, check_output 4 | from abc import ABCMeta, abstractmethod 5 | import numpy as np 6 | import datetime 7 | import os 8 | import imp 9 | import ast 10 | import tempfile 11 | 12 | from pkg_resources import resource_filename 13 | 14 | 15 | EXPERIMENT_BASE_FOLDER = "experiments" 16 | 17 | def get_available_classifiers(): 18 | """ 19 | Determine the available classifiers by iterating over 20 | all parameter files. 21 | """ 22 | params_dir = resource_filename(__name__, 'java/params') 23 | classifiers = [] 24 | for root, dir, files in os.walk(params_dir): 25 | for file in files: 26 | if file.startswith("weka.classifiers") and file.endswith(".params"): 27 | clf = file[0:-len(".params")] 28 | classifiers.append(clf) 29 | return classifiers 30 | 31 | AVAILABLE_CLASSIFIERS = get_available_classifiers() 32 | 33 | def run_program(cmd, hide_output=False): 34 | if hide_output: 35 | ret = call(cmd, 36 | stdout=open(os.devnull), 37 | stderr=open(os.devnull)) 38 | else: 39 | ret = call(cmd) 40 | return ret 41 | 42 | def arff_write(fout, name, X, y, feature_names=None, unique_labels=None): 43 | """ 44 | Write out an arff file based on X and y. 45 | 46 | nans are treated as missing values, that will be encoded as ? 47 | 48 | unique_labels: the unique labels in y. Set to None if y contains real numbers 49 | """ 50 | nexamples = len(X[0]) 51 | 52 | if feature_names == None: 53 | feature_names = ["feature%d" % i for i in xrange(0,nexamples)] 54 | fout.write("@RELATION %s\n" % name) 55 | for feature_name in feature_names: 56 | fout.write("@ATTRIBUTE %s REAL\n" % feature_name) 57 | if unique_labels is not None: 58 | fout.write("@ATTRIBUTE class {%s}\n" % ", ".join([str(x) for x in unique_labels])) 59 | else: 60 | fout.write("@ATTRIBUTE target REAL\n") 61 | fout.write("@DATA\n") 62 | for row, label in zip(X, y): 63 | for value in row: 64 | if np.isfinite(value): 65 | fout.write(str(value)) 66 | else: 67 | #missing value, encoded as ? 68 | fout.write("?") 69 | fout.write(",") 70 | fout.write(str(label)) 71 | fout.write("\n") 72 | 73 | 74 | def simple_csv_read(fin, skip_header=True): 75 | """ 76 | Read csv file and yield row by row. 77 | Note: escaping is not supported 78 | """ 79 | for line in fin: 80 | yield line.split(",") 81 | 82 | def value_to_literal(value): 83 | """ 84 | Tries to convert a value to either a 85 | float, int or boolean. 86 | """ 87 | try: 88 | return ast.literal_eval(value) 89 | except: 90 | return value 91 | 92 | def read_predictions_from_csv(fin): 93 | rows = simple_csv_read(fin) 94 | header = next(rows) 95 | header_field_to_idx = dict(zip(header, range(len(header)))) 96 | predictions = [] 97 | for row in rows: 98 | prediction = row[header_field_to_idx["predicted"]] 99 | if ":" in prediction: 100 | prediction = prediction.split(":")[1] 101 | predictions.append(value_to_literal(prediction)) 102 | return np.asarray(predictions) 103 | 104 | 105 | class InstanceGenerator(object): 106 | def __init__(self): 107 | self.name = "Default" 108 | self.params = {} 109 | 110 | def get_arg_str(self): 111 | key_value_str = lambda key, value: "%s=%s" % (str(key), str(value)) 112 | return ":".join([key_value_str(key, value) 113 | for key, value in self.params.iteritems()]) 114 | 115 | 116 | class CrossValidation(InstanceGenerator): 117 | """ 118 | Performs k-fold cross validation on the training set. 119 | """ 120 | def __init__(self, seed=0, num_folds=10): 121 | """ 122 | :param seed: The seed to use for randomizing the dataset 123 | :param num_fold./s: The number of folds to generate 124 | """ 125 | super(CrossValidation, self).__init__() 126 | self.name = "autoweka.instancegenerators.CrossValidation" 127 | self.params["seed"] = seed 128 | self.params["numFolds"] = num_folds 129 | 130 | 131 | class RandomSubSampling(InstanceGenerator): 132 | """ 133 | Performs generates an arbitrary number of folds by randomly 134 | making a partition of the training data of a fixed percentage. 135 | """ 136 | def __init__(self, starting_seed=0, num_samples=10, 137 | percent_training=70, bias_to_uniform=None): 138 | """ 139 | 140 | :param starting_seed: The seed to use for randomizing the dataset 141 | :param num_samples: The number of subsamples to generate 142 | :param percent_training: The percent of the training data to use 143 | as 'new training data' 144 | :param bias_to_uniform: The bias towards a uniform class 145 | distribution (optional) 146 | """ 147 | super(RandomSubSampling, self).__init__() 148 | self.name = "autoweka.instancegenerators.RandomSubSampling" 149 | self.params["startingSeed"] = starting_seed 150 | self.params["numSamples"] = num_samples 151 | self.params["percent"] = percent_training 152 | if bias_to_uniform: 153 | self.params["bias"] = bias_to_uniform 154 | 155 | 156 | class DataSet(object): 157 | def __init__(self, train_file, test_file=None, name="data", unique_labels=None): 158 | """ 159 | Dataset. 160 | 161 | :param train_file: ARFF file containing the training data 162 | :param test_file: ARFF file containing the testing data, that will be 163 | used once the experiment completed (optional) 164 | :param name: name of the dataset (optional) 165 | """ 166 | self.train_file = os.path.abspath(train_file) 167 | if test_file: 168 | self.test_file = os.path.abspath(test_file) 169 | else: 170 | self.test_file = None 171 | self.name = name 172 | self.unique_labels = unique_labels 173 | 174 | 175 | class Experiment(object): 176 | 177 | __metaclass__ = ABCMeta 178 | 179 | OPTIMIZATION_METHOD = ["SMAC", "TPE"] 180 | 181 | OPTIMIZATION_METHOD_CONSTRUCTOR = { 182 | "SMAC": "autoweka.smac.SMACExperimentConstructor", 183 | "TPE": "autoweka.tpe.TPEExperimentConstructor"} 184 | 185 | OPTIMIZATION_METHOD_ARGS = { 186 | "SMAC": [ 187 | "-experimentpath", os.path.abspath(EXPERIMENT_BASE_FOLDER), 188 | "-propertyoverride", 189 | ("smacexecutable=%s" % (resource_filename(__name__, 'java/smac-v2.04.01-master-447-patched/smac.sh'))) 190 | ], 191 | #TODO: fix the TPE paths 192 | "TPE": [ 193 | "-experimentpath", os.path.abspath(EXPERIMENT_BASE_FOLDER), 194 | "-propertyoverride", 195 | ("pythonpath=$PYTHONPATH\:~/src/hyperopt\:~/src/hyperopt/external:" 196 | "tperunner=./src/python/tperunner.py:python=/usr/bin/python2") 197 | ] 198 | } 199 | 200 | OPTIMIZATION_METHOD_EXTRA = { 201 | "SMAC": "executionMode=SMAC:initialIncumbent=RANDOM:initialN=1", 202 | "TPE": "" 203 | } 204 | 205 | def __init__( 206 | self, 207 | experiment_name="Experiment", 208 | result_metric=None, 209 | optimization_method=OPTIMIZATION_METHOD[0], 210 | instance_generator=None, 211 | tuner_timeout=180, 212 | train_timeout=120, 213 | attribute_selection=False, 214 | attribute_selection_timeout=100, 215 | memory="3000m" 216 | ): 217 | """ 218 | Create a new experiment. 219 | 220 | :param tuner_timeout: The number of seconds to run the SMBO method. (total timeout) 221 | :param train_timeout: The number of seconds to spend training 222 | a classifier with a set of hyperparameters on a given partition of 223 | the training set. (timeout per parameter setting) 224 | """ 225 | 226 | if optimization_method not in Experiment.OPTIMIZATION_METHOD: 227 | raise ValueError("%s is not a valid optimization method," 228 | " choose one from:" % ( 229 | optimization_method, 230 | ", ".join(Experiment.OPTIMIZATION_METHOD))) 231 | 232 | if (instance_generator 233 | and not isinstance(instance_generator, InstanceGenerator)): 234 | raise ValueError(("instance_generator needs to be" 235 | " an InstanceGenerator or None")) 236 | 237 | if not isinstance(attribute_selection, bool): 238 | raise ValueError("attribute_selection needs to be a boolean") 239 | 240 | self.experiment_name = experiment_name 241 | self.result_metric = result_metric 242 | self.optimization_method = optimization_method 243 | self.instance_generator = instance_generator 244 | self.tuner_timeout = tuner_timeout 245 | self.train_timeout = train_timeout 246 | self.attribute_selection = attribute_selection 247 | self.attribute_selection_timeout = attribute_selection_timeout 248 | self.memory = memory 249 | 250 | self.datasets = [] 251 | self.classifiers = [] 252 | 253 | self.file_name = None 254 | 255 | self.prepared = False 256 | 257 | def _get_xml(self): 258 | """ 259 | Write this experiment as a valid xml that can be read by Auto-WEKA. 260 | """ 261 | 262 | root = ET.Element('experimentBatch') 263 | tree = ET.ElementTree(root) 264 | 265 | experiment = ET.SubElement(root, 'experimentComponent') 266 | 267 | name_node = ET.SubElement(experiment, 'name') 268 | name_node.text = self.experiment_name 269 | 270 | result_metric_node = ET.SubElement(experiment, 'resultMetric') 271 | result_metric_node.text = self.result_metric 272 | 273 | experiment_constructor = ET.SubElement(experiment, 274 | 'experimentConstructor') 275 | experiment_constructor.text = Experiment.OPTIMIZATION_METHOD_CONSTRUCTOR[ 276 | self.optimization_method] 277 | for experiment_arg in Experiment.OPTIMIZATION_METHOD_ARGS[ 278 | self.optimization_method]: 279 | experiment_arg_node = ET.SubElement(experiment, 280 | 'experimentConstructorArgs') 281 | experiment_arg_node.text = experiment_arg 282 | 283 | extra_props_node = ET.SubElement(experiment, 'extraProps') 284 | extra_props_node.text = Experiment.OPTIMIZATION_METHOD_EXTRA[ 285 | self.optimization_method] 286 | 287 | instance_generator_node = ET.SubElement(experiment, 288 | 'instanceGenerator') 289 | if not self.instance_generator: 290 | #Default generator 291 | instance_generator_node.text = "autoweka.instancegenerators.Default" 292 | instance_generator_args_node = ET.SubElement( 293 | experiment, 294 | 'instanceGeneratorArgs') 295 | instance_generator_args_node.text = "" 296 | else: 297 | instance_generator_node.text = self.instance_generator.name 298 | instance_generator_args_node = ET.SubElement( 299 | experiment, 300 | 'instanceGeneratorArgs') 301 | instance_generator_args_node.text = self.instance_generator.get_arg_str() 302 | 303 | tuner_timeout_node = ET.SubElement(experiment, 'tunerTimeout') 304 | tuner_timeout_node.text = str(self.tuner_timeout) 305 | train_timeout_node = ET.SubElement(experiment, 'trainTimeout') 306 | train_timeout_node.text = str(self.train_timeout) 307 | 308 | attribute_selection_node = ET.SubElement(experiment, 'attributeSelection') 309 | if self.attribute_selection: 310 | attribute_selection_node.text = "true" 311 | attr_select_timeout_node = ET.SubElement( 312 | experiment, 'attributeSelectionTimeout') 313 | attr_select_timeout_node.text = str(self.attribute_selection_timeout) 314 | else: 315 | attribute_selection_node.text = "false" 316 | 317 | for classifier in self.classifiers: 318 | classifier_node = ET.SubElement(experiment, 'allowedClassifiers') 319 | classifier_node.text = classifier 320 | 321 | memory_node = ET.SubElement(experiment, 'memory') 322 | memory_node.text = str(self.memory) 323 | 324 | # Write all dataset components: 325 | 326 | for dataset in self.datasets: 327 | dataset_node = ET.SubElement(root, 'datasetComponent') 328 | train_file_node = ET.SubElement(dataset_node, 'trainArff') 329 | train_file_node.text = dataset.train_file 330 | test_file_node = ET.SubElement(dataset_node, 'testArff') 331 | if dataset.test_file: 332 | test_file_node.text = dataset.test_file 333 | else: 334 | #train_file not set, so use the train file again 335 | test_file_node.text = dataset.train_file 336 | name_node = ET.SubElement(dataset_node, 'name') 337 | name_node.text = dataset.name 338 | 339 | return tree 340 | 341 | def __repr__(self): 342 | root = self._get_xml().getroot() 343 | return xml.dom.minidom.parseString(ET.tostring(root)).toprettyxml() 344 | 345 | def _write_xml(self, file_name="experiment.xml"): 346 | tree = self._get_xml() 347 | self.file_name = file_name 348 | tree.write(file_name) 349 | 350 | @abstractmethod 351 | def set_data_set(self, 352 | train_data, 353 | train_labels, 354 | test_data=None, 355 | test_labels=None, 356 | feature_names=None, 357 | name="dataset1"): 358 | pass 359 | 360 | @abstractmethod 361 | def _write_prediction_file(self, prediction_file, X): 362 | pass 363 | 364 | def set_data_set_files(self, train_file, test_file=None, name=None): 365 | """ 366 | Add a dataset to the experiment. 367 | (For now only on dataset per experiment is supported) 368 | 369 | :param train_file: ARFF file containing the training data 370 | :param test_file: ARFF file containing the testing data, that will be 371 | used once the experiment completed (optional) 372 | :param name: name of the dataset (optional) 373 | """ 374 | if not os.path.exists(train_file): 375 | raise Exception("train_file doesn't exist") 376 | if test_file is not None and not os.path.exists(test_file): 377 | raise Exception("test_file doesn't exist") 378 | if name == None: 379 | name = os.path.basename(train_file) 380 | #check there's not other dataset with the same name 381 | for dataset in self.datasets: 382 | if dataset.name == name: 383 | raise ValueError("A dataset with the name '%s', was already added." % name) 384 | self.datasets = [DataSet(train_file, test_file, name)] 385 | 386 | def add_classfier(self, clf): 387 | """ 388 | Restrict the search to a certain classifier. Call multiple times to select more than one. 389 | If not called, all classifiers will be used. 390 | 391 | For a list of available classifiers see: pyautoweka.AVAILABLE_CLASSIFIERS 392 | 393 | :param clf: the classifier 394 | """ 395 | if not clf in AVAILABLE_CLASSIFIERS: 396 | raise ValueError("%s is not one of the AVAILABLE_CLASSIFIERS." % clf) 397 | self.classifiers.append(clf) 398 | self.prepared = False 399 | 400 | def prepare(self, hide_output=True): 401 | """ 402 | Creates the experiment folder. 403 | 404 | """ 405 | if len(self.datasets) == 0: 406 | raise Exception("No datasets added yet, see Experiment.set_data_set") 407 | self._write_xml(self.experiment_name + ".xml") 408 | experiment_constructor = [ "java", 409 | "-cp", 410 | resource_filename(__name__, 'java/autoweka.jar'), 411 | "autoweka.ExperimentConstructor", 412 | self.file_name] 413 | ret = run_program(experiment_constructor, hide_output=hide_output) 414 | if ret == 0: 415 | #TODO: check return type for errors 416 | self.prepared = True 417 | return 418 | else: 419 | self.prepared = False 420 | raise Exception("Could not prepare the experiment") 421 | 422 | def run(self, seeds=[0], hide_output=True): 423 | """ 424 | Run a experiment that was previously created 425 | 426 | :param seeds: a list of seeds for the random number generator 427 | """ 428 | #TODO: run multiple experiments in parallel (maybe one per CPU: multiprocessing.cpu_count()) 429 | # -> let each java process run in the background and wait until all the processes have finished 430 | if not self.prepared: 431 | self.prepare() 432 | print "Running experiments" 433 | print "Time allocated(see Experiment.tuner_timeout): ", str(datetime.timedelta(seconds=self.tuner_timeout)) 434 | for dataset in self.datasets: 435 | print "Running experiment on dataset %s" % dataset.name 436 | experiment_folder = self.get_experiment_folder(dataset) 437 | for seed in seeds: 438 | print "Running for seed %d" % seed 439 | experiment_runner = [ "java", 440 | "-cp", 441 | resource_filename(__name__, 'java/autoweka.jar'), 442 | "autoweka.tools.ExperimentRunner", 443 | experiment_folder, 444 | str(seed)] 445 | run_program(experiment_runner, hide_output=hide_output) 446 | #now let's merge the trajectories 447 | trajectory_merger = ["java", 448 | "-cp", 449 | resource_filename(__name__, 'java/autoweka.jar'), 450 | "autoweka.TrajectoryMerger", 451 | experiment_folder] 452 | print "Merging trajectories" 453 | run_program(trajectory_merger, hide_output=hide_output) 454 | 455 | def get_experiment_folder(self, dataset): 456 | experiment_folder = os.path.join(EXPERIMENT_BASE_FOLDER, 457 | self.experiment_name + "-" + dataset.name) 458 | return experiment_folder 459 | 460 | def get_best_seed_from_trajectories(self, dataset): 461 | experiment_folder = self.get_experiment_folder(dataset) 462 | 463 | trajectories_file = os.path.join(experiment_folder, 464 | self.experiment_name + "-" + dataset.name + ".trajectories") 465 | if not os.path.exists(trajectories_file): 466 | raise Exception("Trajectories file doesn't exist. Did you run the experiment?") 467 | best_trajectory_group = ["java", 468 | "-cp", 469 | resource_filename(__name__, 'java/autoweka.jar'), 470 | "autoweka.tools.GetBestFromTrajectoryGroup", 471 | trajectories_file] 472 | #print " ".join(best_trajectory_group) 473 | program_output = str(check_output(best_trajectory_group)) 474 | seed = -1 475 | for line in program_output.split("\n"): 476 | if line.startswith("Best point seed"): 477 | seed = int(line[len("Best point seed"):]) 478 | if seed < 0: 479 | raise Exception("Failed getting seed") 480 | #print "Best seed: %d" % seed 481 | return seed 482 | 483 | def predict_from_file(self, data_file, predictions_file="out.csv", hide_output=True): 484 | """ 485 | Make predictions on unseen data, using the best parameters. 486 | 487 | The predictions will be written in CSV format into predictions_file. 488 | """ 489 | #TODO: check the experiment has been run already 490 | if len(self.datasets) == 0: 491 | raise Exception("No datasets added yet, see Experiment.set_data_set") 492 | 493 | #TODO: for now we only support a single dataset 494 | dataset = self.datasets[0] 495 | seed = self.get_best_seed_from_trajectories(dataset) 496 | experiment_folder = self.get_experiment_folder(dataset) 497 | 498 | #TODO: what if there's not attribute selection 499 | prediction_runner = ["java", 500 | "-cp", 501 | resource_filename(__name__, 'java/autoweka.jar'), 502 | "autoweka.tools.TrainedModelPredictionMaker", 503 | "-model", 504 | "%s/trained.%d.model" % (experiment_folder, seed)] 505 | attributeselection_file = "%s/trained.%d.attributeselection" % (experiment_folder, seed) 506 | if self.attribute_selection and os.path.exists(attributeselection_file): 507 | prediction_runner.append("-attributeselection") 508 | prediction_runner.append(attributeselection_file) 509 | prediction_runner.extend(["-dataset", 510 | data_file, 511 | "-predictionpath", 512 | predictions_file]) 513 | run_program(prediction_runner, hide_output=hide_output) 514 | 515 | def fit(self, X, y): 516 | """ 517 | Fit a model to the data. 518 | 519 | X: array-like samples x features 520 | y: array-like labels 521 | """ 522 | 523 | self.set_data_set(X, y) 524 | 525 | self.run() 526 | 527 | def fit_arff(self, file_name): 528 | self.set_data_set(file_name) 529 | self.run() 530 | 531 | def predict(self, X): 532 | """ 533 | Make predictions. 534 | """ 535 | temp_dir = tempfile.mkdtemp() 536 | prediction_data_path = os.path.join(temp_dir, "X.arff") 537 | prediction_output_path = os.path.join(temp_dir, "out.csv") 538 | 539 | try: 540 | with open(prediction_data_path, 'w') as prediction_file: 541 | X = np.asarray(X) 542 | assert len(X.shape) == 2, "X needs to be 2d: n_samples x n_features" 543 | 544 | self._write_prediction_file(prediction_file, X) 545 | prediction_file.flush() 546 | 547 | self.predict_from_file(prediction_data_path, 548 | predictions_file=prediction_output_path, 549 | hide_output=True) 550 | 551 | #read the output: 552 | with open(prediction_output_path) as predictions_input: 553 | predictions = read_predictions_from_csv(predictions_input) 554 | return predictions 555 | finally: 556 | if os.path.exists(prediction_data_path): 557 | os.remove(prediction_data_path) 558 | 559 | if os.path.exists(prediction_output_path): 560 | os.remove(prediction_output_path) 561 | 562 | os.rmdir(temp_dir) 563 | 564 | return None 565 | 566 | def score(self, X, y): 567 | pass 568 | 569 | class ClassificationExperiment(Experiment): 570 | 571 | RESULT_METRICS = ["errorRate"] 572 | 573 | def __init__(self, 574 | result_metric=RESULT_METRICS[0], 575 | *args, 576 | **kwargs): 577 | if result_metric not in ClassificationExperiment.RESULT_METRICS: 578 | raise ValueError("%s is not a valid classification result metric," 579 | " choose one from: %s" % ( 580 | result_metric, 581 | ", ".join(ClassificationExperiment.RESULT_METRICS))) 582 | super(ClassificationExperiment, self).__init__(result_metric=result_metric, *args, **kwargs) 583 | 584 | def set_data_set(self, 585 | train_data, 586 | train_labels, 587 | test_data=None, 588 | test_labels=None, 589 | feature_names=None, 590 | name="dataset1"): 591 | """ 592 | Add a dataset that the experiment will be run on. 593 | (For now only one dataset per experiment is supported) 594 | 595 | :param train_data: training data as a 2 dimensional list, n_samples x n_features + 1 (label) 596 | :param test_data: test data as a 2 dimensional list, n_samples x n_features 597 | :param feature_names: the name of each feature 598 | :param name: the name of the dataset 599 | """ 600 | fname_train = name + "_train.arff" 601 | if test_data is not None and test_labels is not None: 602 | fname_test = name + "_test.arff" 603 | #add the labels as the last column to the test data: 604 | test_data = np.asarray(test_data) 605 | test_labels = np.asarray(test_labels) 606 | 607 | assert len(test_data.shape) == 2, "test_data needs to be 2d: n_samples x n_features" 608 | assert len(test_labels.shape) == 1, "test_labels needs to be 1d" 609 | #assert test_labels.dtype == np.int, "the labels need to be integer values" 610 | 611 | #test_combined = np.append(test_data,test_labels[:,None],1) 612 | else: 613 | fname_test = None 614 | 615 | #add the labels as the last column to the train data: 616 | train_data = np.asarray(train_data) 617 | train_labels = np.asarray(train_labels) 618 | #train_combined = np.append(train_data,train_labels[:,None],1) 619 | train_unique_labels = np.unique(train_labels) 620 | 621 | assert len(train_data.shape) == 2, "train_data needs to be 2d: n_samples x n_features + 1 (label)" 622 | assert len(train_labels.shape) == 1, "train_labels needs to be 1d" 623 | #assert train_labels.dtype == np.int, "the labels need to be integer values" 624 | 625 | with open(fname_train, 'w') as fout: 626 | arff_write(fout, name, train_data, train_labels, feature_names, train_unique_labels) 627 | 628 | if fname_test: 629 | with open(fname_test, 'w') as fout: 630 | arff_write(fout, name, test_data, test_labels, feature_names, train_unique_labels) 631 | 632 | self.datasets = [DataSet(fname_train, fname_test, name, train_unique_labels)] 633 | 634 | 635 | def _write_prediction_file(self, prediction_file, X): 636 | pseudo_label = [self.datasets[0].unique_labels[0]] * X.shape[0] 637 | arff_write(prediction_file, 638 | "prediction_data", 639 | X, 640 | pseudo_label, 641 | unique_labels=self.datasets[0].unique_labels) 642 | 643 | 644 | class RegressionExperiment(Experiment): 645 | 646 | RESULT_METRICS = ["rmse", 647 | "rrse",#root relative square error 648 | "meanAbsoluteErrorMetric", 649 | "relativeAbsoluteErrorMetric"] 650 | 651 | def __init__(self, 652 | result_metric=RESULT_METRICS[0], 653 | *args, 654 | **kwargs): 655 | if result_metric not in RegressionExperiment.RESULT_METRICS: 656 | raise ValueError("%s is not a valid regression result metric," 657 | " choose one from: %s" % ( 658 | result_metric, 659 | ", ".join(RegressionExperiment.RESULT_METRICS))) 660 | super(RegressionExperiment, self).__init__(result_metric=result_metric, *args, **kwargs) 661 | 662 | def set_data_set(self, 663 | train_data, 664 | train_labels, 665 | test_data=None, 666 | test_labels=None, 667 | feature_names=None, 668 | name="dataset1"): 669 | """ 670 | Add a dataset that the experiment will be run on. 671 | (For now only one dataset per experiment is supported) 672 | 673 | :param train_data: training data as a 2 dimensional list, n_samples x n_features + 1 (label) 674 | :param test_data: test data as a 2 dimensional list, n_samples x n_features 675 | :param feature_names: the name of each feature 676 | :param name: the name of the dataset 677 | """ 678 | fname_train = name + "_train.arff" 679 | if test_data is not None and test_labels is not None: 680 | fname_test = name + "_test.arff" 681 | #add the labels as the last column to the test data: 682 | test_data = np.asarray(test_data) 683 | test_labels = np.asarray(test_labels) 684 | 685 | assert len(test_data.shape) == 2, "test_data needs to be 2d: n_samples x n_features" 686 | assert len(test_labels.shape) == 1, "test_labels needs to be 1d" 687 | #assert test_labels.dtype == np.int, "the labels need to be integer values" 688 | 689 | #test_combined = np.append(test_data,test_labels[:,None],1) 690 | else: 691 | fname_test = None 692 | 693 | #add the labels as the last column to the train data: 694 | train_data = np.asarray(train_data) 695 | train_labels = np.asarray(train_labels) 696 | 697 | assert len(train_data.shape) == 2, "train_data needs to be 2d: n_samples x n_features + 1 (label)" 698 | assert len(train_labels.shape) == 1, "train_labels needs to be 1d" 699 | 700 | with open(fname_train, 'w') as fout: 701 | arff_write(fout, name, train_data, train_labels, feature_names, unique_labels=None) 702 | 703 | if fname_test: 704 | with open(fname_test, 'w') as fout: 705 | arff_write(fout, name, test_data, test_labels, feature_names, unique_labels=None) 706 | 707 | self.datasets = [DataSet(fname_train, fname_test, name)] 708 | 709 | 710 | def _write_prediction_file(self, prediction_file, X): 711 | pseudo_targets = [1.] * X.shape[0] 712 | arff_write(prediction_file, 713 | "prediction_data", 714 | X, 715 | pseudo_targets, 716 | unique_labels=None) 717 | 718 | 719 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | import os 5 | 6 | def read(fname): 7 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 8 | 9 | setup(name='pyautoweka', 10 | version='.1', 11 | description='AutoWeka for python', 12 | author='Tobias Domhan', 13 | author_email='tdomhan@gmail.com', 14 | url='http://www.cs.ubc.ca/labs/beta/Projects/autoweka/', 15 | packages=['pyautoweka'], 16 | #package_data={"pyautoweka": ["./java/weka.jar"]}, 17 | include_package_data = True, 18 | eager_resources=["pyautoweka/weka.jar"], 19 | long_description=read('README.md'), 20 | requires=[ 21 | 'lxml', 22 | 'numpy' 23 | ], 24 | ) 25 | --------------------------------------------------------------------------------