├── .gitignore ├── COLAB.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── RELEASE.md ├── setup.py └── src ├── __init__.py └── wekaexamples ├── __init__.py ├── associations ├── __init__.py ├── apriori_output.py └── associations.py ├── attribute_selection ├── __init__.py ├── attribute_selection.py ├── attribute_selection2.py ├── attribute_selection_cv.py └── attribute_selection_test.py ├── book ├── __init__.py └── message_classifier.py ├── classifiers ├── __init__.py ├── adtree.py ├── bayesnet.py ├── classifiers.py ├── cost_sensitive.py ├── crossvalidation_addprediction.py ├── incremental_classifier.py ├── mp5.py ├── output_class_distribution.py ├── parameter_optimization.py └── train_test_split.py ├── clusterers ├── __init__.py ├── centroids.py ├── classes_to_clusters.py ├── clope.py ├── cluster_data.py ├── clusterers.py └── silhouette_coefficient.py ├── core ├── __init__.py ├── capabilities.py ├── classes.py ├── converters.py ├── database.py ├── dataset.py ├── packages.py ├── random_dataset.py ├── serialization.py └── version.py ├── data ├── airline.arff ├── anneal.arff ├── anneal_excel.csv ├── anneal_weka.csv ├── bodyfat.arff ├── bolts.arff ├── diabetes.arff ├── glass.arff ├── iris-test.arff ├── iris-train.arff ├── iris.arff ├── iris.csv ├── iris_no_class.arff ├── labor.arff ├── reutersTop10Randomized_1perc_shortened.arff ├── supermarket.arff ├── vote.arff └── wine.arff ├── datagenerators ├── __init__.py └── datagenerators.py ├── experiments ├── __init__.py ├── experiments.py └── learning_curve.py ├── filters ├── __init__.py └── filters.py ├── flow ├── __init__.py ├── attribute_selection.py ├── build_classifier_incrementally.py ├── build_clusterer_incrementally.py ├── build_evaluate_classifier.py ├── build_evaluate_clusterer.py ├── build_save_clusterer.py ├── build_save_load_classifier.py ├── classify_data.py ├── cluster_data.py ├── combine_storage.py ├── conversions.py ├── crossvalidate_classifier.py ├── crossvalidate_clusterer.py ├── dataset_matrixplot.py ├── dump_instances.py ├── filter_datasets.py ├── for_loop.py ├── generate_dataset.py ├── init_storage_value.py ├── list_file.py ├── load_database.py ├── load_dataset.py ├── load_save_flow.py ├── math_expression.py ├── output_actor_help.py ├── plot_dataset.py ├── stop_flow.py └── update_storage_value.py ├── helper.py └── timeseries ├── __init__.py └── timeseries.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | 56 | # vi 57 | *~ 58 | *.swp 59 | 60 | # IDEs 61 | *.e4p 62 | *.nja 63 | .idea 64 | 65 | 66 | # other 67 | generated.arff 68 | 69 | /bin/ 70 | .classpath 71 | .project 72 | .pydevproject 73 | -------------------------------------------------------------------------------- /COLAB.md: -------------------------------------------------------------------------------- 1 | # COLAB 2 | 3 | python-weka-wrapper3 can be run in Google's COLAB as well. 4 | 5 | ## Installation 6 | 7 | Use the following minimal instructions to install python-weka-wrapper3 in a COLAB Python 3 environment: 8 | 9 | ``` 10 | !pip install python-weka-wrapper3 11 | ``` 12 | 13 | ## Pitfalls 14 | 15 | * Once the JVM has been stopped, you might have to restart the COLAB runtime 16 | * After installing packages, you need to restart the COLAB runtime, otherwise the packages won't be available to the JVM 17 | 18 | ## Examples 19 | 20 | Below are some example notebooks: 21 | * [associator_hotspot](https://colab.research.google.com/drive/10KTYZ1-1eZajf11706iR4hr0ARaqIaJT?usp=drive_link) 22 | * [experiments](https://colab.research.google.com/drive/1N0cQd1Ax-6EcjAxP8QlBT2Xpj_AKr2MQ?usp=drive_link) 23 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include src *.arff 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python-weka-wrapper3-examples 2 | 3 | Example code for the [python-weka-wrapper3](https://github.com/fracpete/python-weka-wrapper3) library. 4 | 5 | Some of the examples are modelled after the original Examples for Weka: 6 | 7 | https://git.cms.waikato.ac.nz/weka/weka/-/tree/main/trunk/wekaexamples 8 | 9 | 10 | Requirements: 11 | 12 | * Python 3 13 | * python-weka-wrapper3 (>= 0.3.0) 14 | * JDK 11 or later (OpenJDK 11 recommended) 15 | 16 | The Python libraries you can either install using `pip install ` or use 17 | pre-built packages available for your platform. 18 | 19 | ## COLAB 20 | 21 | For some COLAB examples, see the [COLAB document](COLAB.md). 22 | 23 | 24 | ## Forum 25 | 26 | You can post questions, patches or enhancement requests in the following Google Group: 27 | 28 | https://groups.google.com/forum/#!forum/python-weka-wrapper 29 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | Pypi 2 | ==== 3 | 4 | Preparation: 5 | 6 | * increment version in `setup.py` 7 | 8 | Commands for releasing on pypi.org: 9 | 10 | ``` 11 | find -name "*~" -delete 12 | python setup.py clean 13 | python setup.py sdist upload 14 | ``` 15 | 16 | 17 | Github 18 | ====== 19 | 20 | Steps: 21 | 22 | * start new release (version: `vX.Y.Z`) 23 | * enter release notes, i.e., significant changes since last release 24 | * upload `python-weka-wrapper3-examples-X.Y.Z.tar.gz` previously generated with `setup.py` 25 | * publish 26 | 27 | 28 | Google Group 29 | ============ 30 | 31 | * post release on the Google Group: https://groups.google.com/forum/#!forum/python-weka-wrapper 32 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # setup.py 15 | # Copyright (C) 2014-2024 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | from setuptools import setup 18 | 19 | 20 | setup( 21 | name="python-weka-wrapper3-examples", 22 | description="Examples for the python-weka-wrapper3 library.", 23 | long_description= 24 | "Examples for the python-weka-wrapper3 library. " 25 | + "Some examples are modelled after the Examples for Weka, located here: " 26 | + "https://git.cms.waikato.ac.nz/weka/weka/-/tree/main/trunk/wekaexamples", 27 | url="https://github.com/fracpete/python-weka-wrapper3-examples", 28 | classifiers=[ 29 | 'Development Status :: 4 - Beta', 30 | 'License :: OSI Approved :: GNU General Public License (GPL)', 31 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 32 | 'Programming Language :: Python :: 3', 33 | ], 34 | license='GNU General Public License version 3.0 (GPLv3)', 35 | package_dir={ 36 | '': 'src' 37 | }, 38 | packages=[ 39 | "wekaexamples", 40 | "wekaexamples.associations", 41 | "wekaexamples.attribute_selection", 42 | "wekaexamples.book", 43 | "wekaexamples.classifiers", 44 | "wekaexamples.core" 45 | ], 46 | version="0.3.0", 47 | author='Peter "fracpete" Reutemann', 48 | author_email='pythonwekawrapper at gmail dot com', 49 | install_requires=[ 50 | "python-weka-wrapper3>=0.3.0", 51 | ], 52 | ) 53 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fracpete/python-weka-wrapper3-examples/5df69b68220c841cc592cc253bc008f696413c65/src/__init__.py -------------------------------------------------------------------------------- /src/wekaexamples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fracpete/python-weka-wrapper3-examples/5df69b68220c841cc592cc253bc008f696413c65/src/wekaexamples/__init__.py -------------------------------------------------------------------------------- /src/wekaexamples/associations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fracpete/python-weka-wrapper3-examples/5df69b68220c841cc592cc253bc008f696413c65/src/wekaexamples/associations/__init__.py -------------------------------------------------------------------------------- /src/wekaexamples/associations/apriori_output.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # apriori_output.py 15 | # Copyright (C) 2014-2024 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import sys 19 | import traceback 20 | import weka.core.jvm as jvm 21 | import wekaexamples.helper as helper 22 | from jpype import JObject 23 | from weka.core.converters import Loader 24 | from weka.associations import Associator 25 | 26 | 27 | def main(args): 28 | """ 29 | Trains Apriori on the specified dataset (uses vote UCI dataset if no dataset specified). 30 | :param args: the commandline arguments 31 | :type args: list 32 | """ 33 | 34 | # load a dataset 35 | if len(args) <= 1: 36 | data_file = helper.get_data_dir() + os.sep + "vote.arff" 37 | else: 38 | data_file = args[1] 39 | helper.print_info("Loading dataset: " + data_file) 40 | loader = Loader(classname="weka.core.converters.ArffLoader") 41 | data = loader.load_file(data_file) 42 | data.class_is_last() 43 | 44 | # build Apriori, using last attribute as class attribute 45 | apriori = Associator(classname="weka.associations.Apriori", options=["-c", "-1"]) 46 | apriori.build_associations(data) 47 | print(str(apriori)) 48 | 49 | # iterate association rules (low-level) 50 | helper.print_info("Rules (low-level)") 51 | # make the underlying rules list object iterable in Python 52 | rules = apriori.jobject.getAssociationRules().getRules() 53 | for i, r in enumerate(rules): 54 | # wrap the Java object to make its methods accessible 55 | rule = JObject(r) 56 | print(str(i+1) + ". " + str(rule)) 57 | # output some details on rule 58 | print(" - consequence support: " + str(rule.getConsequenceSupport())) 59 | print(" - premise support: " + str(rule.getPremiseSupport())) 60 | print(" - total support: " + str(rule.getTotalSupport())) 61 | print(" - total transactions: " + str(rule.getTotalTransactions())) 62 | 63 | # iterate association rules (high-level) 64 | helper.print_info("Rules (high-level)") 65 | print("can produce rules? " + str(apriori.can_produce_rules())) 66 | print("rule metric names: " + str(apriori.rule_metric_names)) 67 | rules = apriori.association_rules() 68 | if rules is not None: 69 | print("producer: " + rules.producer) 70 | print("# rules: " + str(len(rules))) 71 | for i, rule in enumerate(rules): 72 | print(str(i+1) + ". " + str(rule)) 73 | # output some details on rule 74 | print(" - consequence support: " + str(rule.consequence_support)) 75 | print(" - consequence: " + str(rule.consequence)) 76 | print(" - premise support: " + str(rule.premise_support)) 77 | print(" - premise: " + str(rule.premise)) 78 | print(" - total support: " + str(rule.total_support)) 79 | print(" - total transactions: " + str(rule.total_transactions)) 80 | print(" - metric names: " + str(rule.metric_names)) 81 | print(" - metric values: " + str(rule.metric_values)) 82 | print(" - metric value 'Confidence': " + str(rule.metric_value('Confidence'))) 83 | print(" - primary metric name: " + str(rule.primary_metric_name)) 84 | print(" - primary metric value: " + str(rule.primary_metric_value)) 85 | #print(" - equals first: " + str(rule == rules[0])) 86 | #print(" - greater than first: " + str(rule > rules[0])) 87 | #print(" - greater or equal than first: " + str(rule >= rules[0])) 88 | 89 | if __name__ == "__main__": 90 | try: 91 | jvm.start() 92 | main(sys.argv) 93 | except Exception as e: 94 | print(traceback.format_exc()) 95 | finally: 96 | jvm.stop() 97 | -------------------------------------------------------------------------------- /src/wekaexamples/associations/associations.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # associations.py 15 | # Copyright (C) 2014 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.converters import Loader 22 | from weka.associations import Associator 23 | 24 | 25 | def main(): 26 | """ 27 | Just runs some example code. 28 | """ 29 | 30 | # load a dataset 31 | vote_file = helper.get_data_dir() + os.sep + "vote.arff" 32 | helper.print_info("Loading dataset: " + vote_file) 33 | loader = Loader(classname="weka.core.converters.ArffLoader") 34 | vote_data = loader.load_file(vote_file) 35 | vote_data.class_is_last() 36 | 37 | # train and output associator 38 | associator = Associator(classname="weka.associations.Apriori", options=["-N", "9", "-I"]) 39 | associator.build_associations(vote_data) 40 | print(associator) 41 | 42 | 43 | if __name__ == "__main__": 44 | try: 45 | jvm.start() 46 | main() 47 | except Exception as e: 48 | print(traceback.format_exc()) 49 | finally: 50 | jvm.stop() 51 | -------------------------------------------------------------------------------- /src/wekaexamples/attribute_selection/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'fracpete' 2 | -------------------------------------------------------------------------------- /src/wekaexamples/attribute_selection/attribute_selection.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # attribute_selection.py 15 | # Copyright (C) 2014-2021 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.converters import Loader 22 | from weka.attribute_selection import ASSearch 23 | from weka.attribute_selection import ASEvaluation 24 | from weka.attribute_selection import AttributeSelection 25 | 26 | 27 | def main(): 28 | """ 29 | Just runs some example code. 30 | """ 31 | 32 | # load a dataset 33 | anneal_file = helper.get_data_dir() + os.sep + "anneal.arff" 34 | helper.print_info("Loading dataset: " + anneal_file) 35 | loader = Loader(classname="weka.core.converters.ArffLoader") 36 | anneal_data = loader.load_file(anneal_file) 37 | anneal_data.class_is_last() 38 | 39 | # perform attribute selection 40 | helper.print_title("Attribute selection") 41 | search = ASSearch(classname="weka.attributeSelection.BestFirst", options=["-D", "1", "-N", "5"]) 42 | evaluation = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval", options=["-P", "1", "-E", "1"]) 43 | attsel = AttributeSelection() 44 | attsel.search(search) 45 | attsel.evaluator(evaluation) 46 | attsel.select_attributes(anneal_data) 47 | print("# attributes: " + str(attsel.number_attributes_selected)) 48 | print("attributes (as numpy array): " + str(attsel.selected_attributes)) 49 | print("attributes (as list): " + str(list(attsel.selected_attributes))) 50 | print("result string:\n" + attsel.results_string) 51 | 52 | # perform ranking 53 | helper.print_title("Attribute ranking (2-fold CV)") 54 | search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-N", "-1"]) 55 | evaluation = ASEvaluation("weka.attributeSelection.InfoGainAttributeEval") 56 | attsel = AttributeSelection() 57 | attsel.ranking(True) 58 | attsel.folds(2) 59 | attsel.crossvalidation(True) 60 | attsel.seed(42) 61 | attsel.search(search) 62 | attsel.evaluator(evaluation) 63 | attsel.select_attributes(anneal_data) 64 | print("ranked attributes:\n" + str(attsel.ranked_attributes)) 65 | print("result string:\n" + attsel.results_string) 66 | 67 | # transform data 68 | helper.print_title("Transform data") 69 | search = ASSearch(classname="weka.attributeSelection.Ranker", options=["-N", "-1"]) 70 | evaluation = ASEvaluation(classname="weka.attributeSelection.PrincipalComponents", options=[]) 71 | attsel = AttributeSelection() 72 | attsel.search(search) 73 | attsel.evaluator(evaluation) 74 | attsel.select_attributes(anneal_data) 75 | print("transformed header:\n" + str(evaluation.transformed_header())) 76 | print("\ntransformed data:\n" + str(evaluation.transformed_data(anneal_data))) 77 | print("\nconvert instance:\n" + str(evaluation.convert_instance(anneal_data.get_instance(0)))) 78 | 79 | 80 | if __name__ == "__main__": 81 | try: 82 | jvm.start() 83 | main() 84 | except Exception as e: 85 | print(traceback.format_exc()) 86 | finally: 87 | jvm.stop() 88 | -------------------------------------------------------------------------------- /src/wekaexamples/attribute_selection/attribute_selection2.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # attribute_selection2.py 15 | # Copyright (C) 2022 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.classes import from_commandline, get_classname 22 | from weka.core.converters import Loader 23 | from weka.attribute_selection import ASSearch 24 | from weka.attribute_selection import ASEvaluation 25 | from weka.attribute_selection import AttributeSelection 26 | 27 | 28 | def main(): 29 | """ 30 | Just runs some example code. 31 | """ 32 | 33 | # load a dataset 34 | anneal_file = helper.get_data_dir() + os.sep + "anneal.arff" 35 | helper.print_info("Loading dataset: " + anneal_file) 36 | loader = Loader(classname="weka.core.converters.ArffLoader") 37 | anneal_data = loader.load_file(anneal_file) 38 | anneal_data.class_is_last() 39 | 40 | # perform attribute selection 41 | helper.print_title("Attribute selection") 42 | 43 | # instantiate search/evaluation from commandlines 44 | search = from_commandline('weka.attributeSelection.GreedyStepwise -B -T -1.7976931348623157E308 -N -1 -num-slots 1', classname=get_classname(ASSearch)) 45 | print("search:", search.to_commandline()) 46 | evaluation = from_commandline('weka.attributeSelection.CfsSubsetEval -P 1 -E 1', classname=get_classname(ASEvaluation)) 47 | print("evaluation:", evaluation.to_commandline()) 48 | 49 | attsel = AttributeSelection() 50 | attsel.search(search) 51 | attsel.evaluator(evaluation) 52 | attsel.select_attributes(anneal_data) 53 | print("# attributes: " + str(attsel.number_attributes_selected)) 54 | print("attributes (as numpy array): " + str(attsel.selected_attributes)) 55 | print("attributes (as list): " + str(list(attsel.selected_attributes))) 56 | print("result string:\n" + attsel.results_string) 57 | 58 | 59 | if __name__ == "__main__": 60 | try: 61 | jvm.start() 62 | main() 63 | except Exception as e: 64 | print(traceback.format_exc()) 65 | finally: 66 | jvm.stop() 67 | -------------------------------------------------------------------------------- /src/wekaexamples/attribute_selection/attribute_selection_cv.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # attribute_selection_cv.py 15 | # Copyright (C) 2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.classes import from_commandline, get_classname 22 | from weka.core.converters import Loader 23 | from weka.attribute_selection import ASSearch 24 | from weka.attribute_selection import ASEvaluation 25 | from weka.attribute_selection import AttributeSelection 26 | 27 | 28 | def main(): 29 | """ 30 | Just runs some example code. 31 | """ 32 | 33 | # load a dataset 34 | anneal_file = helper.get_data_dir() + os.sep + "anneal.arff" 35 | helper.print_info("Loading dataset: " + anneal_file) 36 | loader = Loader(classname="weka.core.converters.ArffLoader") 37 | anneal_data = loader.load_file(anneal_file) 38 | anneal_data.class_is_last() 39 | 40 | # instantiate search/evaluation from commandlines 41 | helper.print_title("Attribute selection (cross-validation - subset evaluation)") 42 | search = from_commandline('weka.attributeSelection.GreedyStepwise -B -T -1.7976931348623157E308 -N -1 -num-slots 1', classname=get_classname(ASSearch)) 43 | print("search:", search.to_commandline()) 44 | evaluation = from_commandline('weka.attributeSelection.CfsSubsetEval -P 1 -E 1', classname=get_classname(ASEvaluation)) 45 | print("evaluation:", evaluation.to_commandline()) 46 | 47 | attsel = AttributeSelection() 48 | attsel.crossvalidation(True) 49 | attsel.search(search) 50 | attsel.evaluator(evaluation) 51 | attsel.select_attributes(anneal_data) 52 | print("\nsubset string:" + attsel.cv_results) 53 | print("subset list:\n" + str(attsel.subset_results)) 54 | 55 | # instantiate search/evaluation from commandlines 56 | helper.print_title("Attribute selection (cross-validation - ranking)") 57 | search = from_commandline('weka.attributeSelection.Ranker', classname=get_classname(ASSearch)) 58 | print("search:", search.to_commandline()) 59 | evaluation = from_commandline('weka.attributeSelection.InfoGainAttributeEval', classname=get_classname(ASEvaluation)) 60 | print("evaluation:", evaluation.to_commandline()) 61 | 62 | attsel = AttributeSelection() 63 | attsel.crossvalidation(True) 64 | attsel.search(search) 65 | attsel.evaluator(evaluation) 66 | attsel.select_attributes(anneal_data) 67 | print("\nrank string:" + attsel.cv_results) 68 | print("rank dictionary:\n" + str(attsel.rank_results)) 69 | 70 | 71 | if __name__ == "__main__": 72 | try: 73 | jvm.start() 74 | main() 75 | except Exception as e: 76 | print(traceback.format_exc()) 77 | finally: 78 | jvm.stop() 79 | -------------------------------------------------------------------------------- /src/wekaexamples/attribute_selection/attribute_selection_test.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # attribute_selection_test.py 15 | # Copyright (C) 2014-2020 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import sys 19 | import traceback 20 | import weka.core.jvm as jvm 21 | import wekaexamples.helper as helper 22 | from weka.core.converters import Loader 23 | from weka.core.classes import Random 24 | from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection 25 | from weka.classifiers import Classifier, Evaluation, AttributeSelectedClassifier 26 | import weka.filters as wfilters 27 | 28 | 29 | def use_classifier(data): 30 | """ 31 | Uses the meta-classifier AttributeSelectedClassifier for attribute selection. 32 | :param data: the dataset to use 33 | :type data: Instances 34 | """ 35 | print("\n1. Meta-classifier") 36 | classifier = AttributeSelectedClassifier() 37 | aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval") 38 | assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"]) 39 | base = Classifier(classname="weka.classifiers.trees.J48") 40 | # setting nested options is always a bit tricky, getting all the escaped double quotes right 41 | # simply using the bean property for setting Java objects is often easier and less error prone 42 | classifier.classifier = base 43 | classifier.evaluator = aseval 44 | classifier.search = assearch 45 | evaluation = Evaluation(data) 46 | evaluation.crossvalidate_model(classifier, data, 10, Random(1)) 47 | print(evaluation.summary()) 48 | print("Evaluator:\n", classifier.evaluator) 49 | print("Search:\n", classifier.search) 50 | 51 | 52 | def use_filter(data): 53 | """ 54 | Uses the AttributeSelection filter for attribute selection. 55 | :param data: the dataset to use 56 | :type data: Instances 57 | """ 58 | print("\n2. Filter") 59 | flter = wfilters.AttributeSelection() 60 | aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval") 61 | assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"]) 62 | flter.evaluator = aseval 63 | flter.search = assearch 64 | flter.inputformat(data) 65 | filtered = flter.filter(data) 66 | print(str(filtered)) 67 | print("Evaluator:\n", flter.evaluator) 68 | print("Search:\n", flter.search) 69 | 70 | 71 | def use_low_level(data): 72 | """ 73 | Uses the attribute selection API directly. 74 | :param data: the dataset to use 75 | :type data: Instances 76 | """ 77 | print("\n3. Low-level") 78 | attsel = AttributeSelection() 79 | aseval = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval") 80 | assearch = ASSearch(classname="weka.attributeSelection.GreedyStepwise", options=["-B"]) 81 | attsel.jobject.setEvaluator(aseval.jobject) 82 | attsel.jobject.setSearch(assearch.jobject) 83 | attsel.select_attributes(data) 84 | indices = attsel.selected_attributes 85 | print("selected attribute indices (starting with 0):\n" + str(indices.tolist())) 86 | 87 | 88 | def main(args): 89 | """ 90 | Performs attribute selection on the specified dataset (uses vote UCI dataset if no dataset specified). Last 91 | attribute is assumed to be the class attribute. Used: CfsSubsetEval, GreedyStepwise, J48 92 | :param args: the commandline arguments 93 | :type args: list 94 | """ 95 | 96 | # load a dataset 97 | if len(args) <= 1: 98 | data_file = helper.get_data_dir() + os.sep + "vote.arff" 99 | else: 100 | data_file = args[1] 101 | helper.print_info("Loading dataset: " + data_file) 102 | loader = Loader(classname="weka.core.converters.ArffLoader") 103 | data = loader.load_file(data_file) 104 | data.class_is_last() 105 | 106 | use_classifier(data) 107 | use_filter(data) 108 | use_low_level(data) 109 | 110 | if __name__ == "__main__": 111 | try: 112 | jvm.start() 113 | main(sys.argv) 114 | except Exception as e: 115 | print(traceback.format_exc()) 116 | finally: 117 | jvm.stop() 118 | -------------------------------------------------------------------------------- /src/wekaexamples/book/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fracpete/python-weka-wrapper3-examples/5df69b68220c841cc592cc253bc008f696413c65/src/wekaexamples/book/__init__.py -------------------------------------------------------------------------------- /src/wekaexamples/book/message_classifier.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # attribute_selection_test.py 15 | # Copyright (C) 2014 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import sys 19 | import traceback 20 | import weka.core.jvm as jvm 21 | import wekaexamples.helper as helper 22 | from weka.core.converters import Loader 23 | from weka.core.classes import Random 24 | from weka.core.dataset import Instances, Instance, Attribute 25 | from weka.classifiers import Classifier, Evaluation 26 | from weka.filters import Filter 27 | 28 | 29 | def create_dataset_header(): 30 | """ 31 | Creates the dataset header. 32 | :return: the header 33 | :rtype: Instances 34 | """ 35 | att_msg = Attribute.create_string("Message") 36 | att_cls = Attribute.create_nominal("Class", ["miss", "hit"]) 37 | result = Instances.create_instances("MessageClassificationProblem", [att_msg, att_cls], 0) 38 | return result 39 | 40 | 41 | def main(args): 42 | """ 43 | TODO 44 | :param args: the commandline arguments 45 | :type args: list 46 | """ 47 | 48 | data = create_dataset_header() 49 | print(str(data)) 50 | 51 | if __name__ == "__main__": 52 | try: 53 | jvm.start() 54 | main(sys.argv) 55 | except Exception as e: 56 | print(traceback.format_exc()) 57 | finally: 58 | jvm.stop() 59 | -------------------------------------------------------------------------------- /src/wekaexamples/classifiers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fracpete/python-weka-wrapper3-examples/5df69b68220c841cc592cc253bc008f696413c65/src/wekaexamples/classifiers/__init__.py -------------------------------------------------------------------------------- /src/wekaexamples/classifiers/adtree.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # adtree.py 15 | # Copyright (C) 2022 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | import weka.core.jvm as jvm 19 | from weka.core.packages import is_installed, install_package 20 | from weka.classifiers import Classifier 21 | 22 | 23 | def main(): 24 | """ 25 | Installs package for ADTree if necessary (and prompts user to restart) 26 | and once available instantiates an ADTree instance. 27 | """ 28 | pkgname = "alternatingDecisionTrees" 29 | if not is_installed(pkgname): 30 | print("Package %s not installed, attempting installation..." % pkgname) 31 | if install_package(pkgname): 32 | print("Package %s installed, please rerun script!" % pkgname) 33 | else: 34 | print("Failed to install package %s!" % pkgname) 35 | return 36 | else: 37 | print("Package already installed: %s" % pkgname) 38 | 39 | cls = Classifier(classname="weka.classifiers.trees.ADTree", options=[]) 40 | print(cls.to_commandline()) 41 | 42 | 43 | if __name__ == "__main__": 44 | try: 45 | jvm.start(packages=True) 46 | main() 47 | except Exception as e: 48 | print(traceback.format_exc()) 49 | finally: 50 | jvm.stop() 51 | -------------------------------------------------------------------------------- /src/wekaexamples/classifiers/bayesnet.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # bayesnet.py 15 | # Copyright (C) 2022 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.classes import from_commandline 22 | from weka.core.converters import load_any_file 23 | from weka.plot.graph import plot_xmlbif_graph, xmlbif_to_dot 24 | 25 | 26 | def main(): 27 | """ 28 | Shows how to plot the belief network generated by BayesNet. 29 | """ 30 | 31 | # load a dataset 32 | data_file = helper.get_data_dir() + os.sep + "glass.arff" 33 | helper.print_info("Loading dataset: " + data_file) 34 | data = load_any_file(data_file, class_index="last") 35 | 36 | # classifier 37 | classifier = from_commandline("weka.classifiers.bayes.BayesNet -D -Q weka.classifiers.bayes.net.search.local.K2 -- -P 2 -S BAYES -E weka.classifiers.bayes.net.estimate.SimpleEstimator -- -A 0.5", 38 | classname="weka.classifiers.Classifier") 39 | classifier.build_classifier(data) 40 | xmlbif = classifier.graph 41 | print("\nXML BIF\n\n", xmlbif) 42 | dot = xmlbif_to_dot(xmlbif) 43 | print("\nDOT\n\n", dot) 44 | plot_xmlbif_graph(xmlbif) 45 | 46 | 47 | if __name__ == "__main__": 48 | try: 49 | jvm.start() 50 | main() 51 | except Exception as e: 52 | print(traceback.format_exc()) 53 | finally: 54 | jvm.stop() 55 | -------------------------------------------------------------------------------- /src/wekaexamples/classifiers/cost_sensitive.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # cost_sensitive.py 15 | # Copyright (C) 2014 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.classes import Random 22 | from weka.core.converters import Loader 23 | from weka.classifiers import SingleClassifierEnhancer, Classifier, Evaluation 24 | 25 | 26 | def main(): 27 | """ 28 | Shows how to use the CostSensitiveClassifier. 29 | """ 30 | 31 | # load a dataset 32 | data_file = helper.get_data_dir() + os.sep + "diabetes.arff" 33 | helper.print_info("Loading dataset: " + data_file) 34 | loader = Loader(classname="weka.core.converters.ArffLoader") 35 | data = loader.load_file(data_file) 36 | data.class_is_last() 37 | 38 | # classifier 39 | classifier = SingleClassifierEnhancer( 40 | classname="weka.classifiers.meta.CostSensitiveClassifier", 41 | options=["-cost-matrix", "[0 1; 2 0]", "-S", "2"]) 42 | base = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) 43 | classifier.classifier = base 44 | 45 | folds = 10 46 | evaluation = Evaluation(data) 47 | evaluation.crossvalidate_model(classifier, data, folds, Random(1)) 48 | 49 | 50 | print("") 51 | print("=== Setup ===") 52 | print("Classifier: " + classifier.to_commandline()) 53 | print("Dataset: " + data.relationname) 54 | print("") 55 | print(evaluation.summary("=== " + str(folds) + " -fold Cross-Validation ===")) 56 | 57 | if __name__ == "__main__": 58 | try: 59 | jvm.start() 60 | main() 61 | except Exception as e: 62 | print(traceback.format_exc()) 63 | finally: 64 | jvm.stop() 65 | -------------------------------------------------------------------------------- /src/wekaexamples/classifiers/crossvalidation_addprediction.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # crossvalidation_addprediction.py 15 | # Copyright (C) 2014 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.classes import Random 22 | from weka.core.converters import Loader 23 | from weka.core.dataset import Instances 24 | from weka.classifiers import Classifier, Evaluation 25 | from weka.filters import Filter 26 | 27 | 28 | def main(): 29 | """ 30 | Just runs some example code. 31 | """ 32 | 33 | # load a dataset 34 | data_file = helper.get_data_dir() + os.sep + "vote.arff" 35 | helper.print_info("Loading dataset: " + data_file) 36 | loader = Loader(classname="weka.core.converters.ArffLoader") 37 | data = loader.load_file(data_file) 38 | data.class_is_last() 39 | 40 | # classifier 41 | classifier = Classifier(classname="weka.classifiers.trees.J48") 42 | 43 | # randomize data 44 | folds = 10 45 | seed = 1 46 | rnd = Random(seed) 47 | rand_data = Instances.copy_instances(data) 48 | rand_data.randomize(rnd) 49 | if rand_data.class_attribute.is_nominal: 50 | rand_data.stratify(folds) 51 | 52 | # perform cross-validation and add predictions 53 | predicted_data = None 54 | evaluation = Evaluation(rand_data) 55 | for i in range(folds): 56 | train = rand_data.train_cv(folds, i) 57 | # the above code is used by the StratifiedRemoveFolds filter, 58 | # the following code is used by the Explorer/Experimenter 59 | # train = rand_data.train_cv(folds, i, rnd) 60 | test = rand_data.test_cv(folds, i) 61 | 62 | # build and evaluate classifier 63 | cls = Classifier.make_copy(classifier) 64 | cls.build_classifier(train) 65 | evaluation.test_model(cls, test) 66 | 67 | # add predictions 68 | addcls = Filter( 69 | classname="weka.filters.supervised.attribute.AddClassification", 70 | options=["-classification", "-distribution", "-error"]) 71 | # setting the java object directory avoids issues with correct quoting in option array 72 | addcls.set_property("classifier", Classifier.make_copy(classifier)) 73 | addcls.inputformat(train) 74 | addcls.filter(train) # trains the classifier 75 | pred = addcls.filter(test) 76 | if predicted_data is None: 77 | predicted_data = Instances.template_instances(pred, 0) 78 | for n in range(pred.num_instances): 79 | predicted_data.add_instance(pred.get_instance(n)) 80 | 81 | print("") 82 | print("=== Setup ===") 83 | print("Classifier: " + classifier.to_commandline()) 84 | print("Dataset: " + data.relationname) 85 | print("Folds: " + str(folds)) 86 | print("Seed: " + str(seed)) 87 | print("") 88 | print(evaluation.summary("=== " + str(folds) + " -fold Cross-Validation ===")) 89 | print("") 90 | print(predicted_data) 91 | 92 | 93 | if __name__ == "__main__": 94 | try: 95 | jvm.start() 96 | main() 97 | except Exception as e: 98 | print(traceback.format_exc()) 99 | finally: 100 | jvm.stop() 101 | -------------------------------------------------------------------------------- /src/wekaexamples/classifiers/incremental_classifier.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # incremental_classifier.py 15 | # Copyright (C) 2014 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import sys 19 | import traceback 20 | import weka.core.jvm as jvm 21 | import wekaexamples.helper as helper 22 | from weka.core.converters import Loader 23 | from weka.classifiers import Classifier 24 | 25 | 26 | def main(args): 27 | """ 28 | Trains a NaiveBayesUpdateable classifier incrementally on a dataset. The dataset can be supplied as parameter. 29 | :param args: the commandline arguments 30 | :type args: list 31 | """ 32 | 33 | # load a dataset 34 | if len(args) <= 1: 35 | data_file = helper.get_data_dir() + os.sep + "vote.arff" 36 | else: 37 | data_file = args[1] 38 | helper.print_info("Loading dataset: " + data_file) 39 | loader = Loader(classname="weka.core.converters.ArffLoader") 40 | data = loader.load_file(data_file, incremental=True) 41 | data.class_is_last() 42 | 43 | # classifier 44 | nb = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable") 45 | nb.build_classifier(data) 46 | 47 | # train incrementally 48 | for inst in loader: 49 | nb.update_classifier(inst) 50 | 51 | print(nb) 52 | 53 | 54 | if __name__ == "__main__": 55 | try: 56 | jvm.start() 57 | main(sys.argv) 58 | except Exception as e: 59 | print(traceback.format_exc()) 60 | finally: 61 | jvm.stop() 62 | -------------------------------------------------------------------------------- /src/wekaexamples/classifiers/mp5.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # mp5.py 15 | # Copyright (C) 2017 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.converters import Loader 22 | from weka.classifiers import Classifier 23 | 24 | 25 | def main(): 26 | """ 27 | Just runs some example code. 28 | """ 29 | 30 | # load a dataset 31 | bodyfat_file = helper.get_data_dir() + os.sep + "bodyfat.arff" 32 | helper.print_info("Loading dataset: " + bodyfat_file) 33 | loader = Loader(classname="weka.core.converters.ArffLoader") 34 | bodyfat_data = loader.load_file(bodyfat_file) 35 | bodyfat_data.class_is_last() 36 | 37 | # classifier help 38 | helper.print_title("Creating help string") 39 | classifier = Classifier(classname="weka.classifiers.trees.M5P") 40 | classifier.build_classifier(bodyfat_data) 41 | print(classifier) 42 | 43 | 44 | if __name__ == "__main__": 45 | try: 46 | jvm.start() 47 | main() 48 | except Exception as e: 49 | print(traceback.format_exc()) 50 | finally: 51 | jvm.stop() 52 | -------------------------------------------------------------------------------- /src/wekaexamples/classifiers/output_class_distribution.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # output_class_distribution.py 15 | # Copyright (C) 2014-2021 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import sys 19 | import traceback 20 | import weka.core.jvm as jvm 21 | import wekaexamples.helper as helper 22 | from weka.core.converters import Loader 23 | from weka.classifiers import Classifier 24 | 25 | 26 | def main(args): 27 | """ 28 | Trains a J48 classifier on a training set and outputs the predicted class and class distribution alongside the 29 | actual class from a test set. Class attribute is assumed to be the last attribute. 30 | :param args: the commandline arguments (train and test datasets, optional) 31 | :type args: list 32 | """ 33 | 34 | # load a dataset 35 | if len(args) < 3: 36 | helper.print_info("No custom dataset filenames provided, falling back on default ones") 37 | train_file = helper.get_data_dir() + os.sep + "iris-train.arff" 38 | test_file = helper.get_data_dir() + os.sep + "iris-test.arff" 39 | else: 40 | train_file = args[1] 41 | test_file = args[2] 42 | helper.print_info("Loading train: " + train_file) 43 | loader = Loader(classname="weka.core.converters.ArffLoader") 44 | train = loader.load_file(train_file) 45 | train.class_index = train.num_attributes - 1 46 | helper.print_info("Loading test: " + test_file) 47 | test = loader.load_file(test_file) 48 | test.class_is_last() 49 | 50 | # classifier 51 | cls = Classifier(classname="weka.classifiers.trees.J48") 52 | cls.build_classifier(train) 53 | 54 | # output predictions 55 | print("# - actual - predicted - error - distribution") 56 | for index, inst in enumerate(test): 57 | pred = cls.classify_instance(inst) 58 | dist = cls.distribution_for_instance(inst) 59 | print( 60 | "%d - %s - %s - %s - %s" % 61 | (index+1, 62 | inst.get_string_value(inst.class_index), 63 | inst.class_attribute.value(int(pred)), 64 | "yes" if pred != inst.get_value(inst.class_index) else "no", 65 | str(dist.tolist()))) 66 | 67 | 68 | if __name__ == "__main__": 69 | try: 70 | jvm.start() 71 | main(sys.argv) 72 | except Exception as e: 73 | print(traceback.format_exc()) 74 | finally: 75 | jvm.stop() 76 | -------------------------------------------------------------------------------- /src/wekaexamples/classifiers/parameter_optimization.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # parameter_optimization.py 15 | # Copyright (C) 2015-2021 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.converters import Loader 22 | from weka.core.classes import ListParameter, MathParameter 23 | from weka.classifiers import Classifier, GridSearch, MultiSearch 24 | 25 | 26 | def gridsearch(): 27 | """ 28 | Applies GridSearch to a dataset. GridSearch package must be not be installed, as the monolithic weka.jar 29 | already contains this package. 30 | """ 31 | 32 | helper.print_title("GridSearch") 33 | 34 | # load a dataset 35 | fname = helper.get_data_dir() + os.sep + "bolts.arff" 36 | helper.print_info("Loading train: " + fname) 37 | loader = Loader(classname="weka.core.converters.ArffLoader") 38 | train = loader.load_file(fname) 39 | train.class_is_last() 40 | 41 | # classifier 42 | grid = GridSearch(options=["-sample-size", "100.0", "-traversal", "ROW-WISE", "-num-slots", "1", "-S", "1"]) 43 | grid.evaluation = "CC" 44 | grid.y = {"property": "kernel.gamma", "min": -3.0, "max": 3.0, "step": 1.0, "base": 10.0, "expression": "pow(BASE,I)"} 45 | grid.x = {"property": "C", "min": -3.0, "max": 3.0, "step": 1.0, "base": 10.0, "expression": "pow(BASE,I)"} 46 | cls = Classifier( 47 | classname="weka.classifiers.functions.SMOreg", 48 | options=["-K", "weka.classifiers.functions.supportVector.RBFKernel"]) 49 | grid.classifier = cls 50 | grid.build_classifier(train) 51 | print("Model:\n" + str(grid)) 52 | print("\nBest setup:\n" + grid.best.to_commandline()) 53 | 54 | 55 | def multisearch(): 56 | """ 57 | Applies MultiSearch to a dataset. "multisearch-weka-package" package must be installed. 58 | """ 59 | 60 | helper.print_title("MultiSearch") 61 | 62 | # load a dataset 63 | fname = helper.get_data_dir() + os.sep + "bolts.arff" 64 | helper.print_info("Loading train: " + fname) 65 | loader = Loader(classname="weka.core.converters.ArffLoader") 66 | train = loader.load_file(fname) 67 | train.class_is_last() 68 | 69 | # classifier 70 | multi = MultiSearch(options=["-S", "1"]) 71 | multi.evaluation = "CC" 72 | mparam = MathParameter() 73 | mparam.prop = "kernel.gamma" 74 | mparam.minimum = -3.0 75 | mparam.maximum = 3.0 76 | mparam.step = 1.0 77 | mparam.base = 10.0 78 | mparam.expression = "pow(BASE,I)" 79 | lparam = ListParameter() 80 | lparam.prop = "C" 81 | lparam.values = ["-2.0", "-1.0", "0.0", "1.0", "2.0"] 82 | multi.parameters = [mparam, lparam] 83 | cls = Classifier( 84 | classname="weka.classifiers.functions.SMOreg", 85 | options=["-K", "weka.classifiers.functions.supportVector.RBFKernel"]) 86 | multi.classifier = cls 87 | multi.build_classifier(train) 88 | print("Model:\n" + str(multi)) 89 | print("\nBest setup:\n" + multi.best.to_commandline()) 90 | 91 | 92 | def main(): 93 | """ 94 | Calls the parameter optimization method(s). 95 | """ 96 | gridsearch() 97 | multisearch() 98 | 99 | 100 | if __name__ == "__main__": 101 | try: 102 | jvm.start(packages=True) 103 | main() 104 | except Exception as e: 105 | print(traceback.format_exc()) 106 | finally: 107 | jvm.stop() 108 | -------------------------------------------------------------------------------- /src/wekaexamples/classifiers/train_test_split.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # train_test_split.py 15 | # Copyright (C) 2015-2019 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import sys 19 | import traceback 20 | import weka.core.jvm as jvm 21 | import wekaexamples.helper as helper 22 | from weka.core.classes import Random 23 | from weka.core.converters import Loader 24 | from weka.classifiers import Classifier, Evaluation, PredictionOutput 25 | 26 | 27 | def main(args): 28 | """ 29 | Loads a dataset, shuffles it, splits it into train/test set. Trains J48 with training set and 30 | evaluates the built model on the test set. 31 | The predictions get recorded in two different ways: 32 | 1. in-memory via the test_model method 33 | 2. directly to file (more memory efficient), but a separate run of making predictions 34 | 35 | :param args: the commandline arguments (optional, can be dataset filename) 36 | :type args: list 37 | """ 38 | 39 | # load a dataset 40 | if len(args) <= 1: 41 | data_file = helper.get_data_dir() + os.sep + "vote.arff" 42 | else: 43 | data_file = args[1] 44 | helper.print_info("Loading dataset: " + data_file) 45 | loader = Loader(classname="weka.core.converters.ArffLoader") 46 | data = loader.load_file(data_file) 47 | data.class_is_last() 48 | 49 | # generate train/test split of randomized data 50 | train, test = data.train_test_split(66.0, Random(1)) 51 | 52 | # build classifier 53 | cls = Classifier(classname="weka.classifiers.trees.J48") 54 | cls.build_classifier(train) 55 | print(cls) 56 | 57 | # evaluate and record predictions in memory 58 | helper.print_title("recording predictions in-memory") 59 | output = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV", options=["-distribution"]) 60 | evl = Evaluation(train) 61 | evl.test_model(cls, test, output=output) 62 | print(evl.summary()) 63 | helper.print_info("Predictions:") 64 | print(output.buffer_content()) 65 | 66 | # record/output predictions separately 67 | helper.print_title("recording/outputting predictions separately") 68 | outputfile = helper.get_tmp_dir() + "/j48_vote.csv" 69 | output = PredictionOutput(classname="weka.classifiers.evaluation.output.prediction.CSV", options=["-distribution", "-suppress", "-file", outputfile]) 70 | output.header = test 71 | output.print_all(cls, test) 72 | helper.print_info("Predictions stored in:" + outputfile) 73 | # by using "-suppress" we don't store the output in memory, the following statement won't output anything 74 | print(output.buffer_content()) 75 | 76 | 77 | if __name__ == "__main__": 78 | try: 79 | jvm.start() 80 | main(sys.argv) 81 | except Exception as e: 82 | print(traceback.format_exc()) 83 | finally: 84 | jvm.stop() 85 | -------------------------------------------------------------------------------- /src/wekaexamples/clusterers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fracpete/python-weka-wrapper3-examples/5df69b68220c841cc592cc253bc008f696413c65/src/wekaexamples/clusterers/__init__.py -------------------------------------------------------------------------------- /src/wekaexamples/clusterers/centroids.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # centroids.py 15 | # Copyright (C) 2022-2024 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.converters import load_any_file 22 | from weka.core.dataset import Instances 23 | from weka.clusterers import Clusterer 24 | 25 | 26 | def main(): 27 | """ 28 | Just runs some example code. 29 | """ 30 | 31 | # load a dataset 32 | iris_file = helper.get_data_dir() + os.sep + "iris.arff" 33 | helper.print_info("Loading dataset: " + iris_file) 34 | 35 | # delete last attribute 36 | data = load_any_file(iris_file) 37 | data.delete_attribute(data.num_attributes - 1) 38 | 39 | # build SimpleKMeans 40 | cls = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"]) 41 | cls.build_clusterer(data) 42 | 43 | # jwrapper approach to get centroids 44 | print("jwrapper") 45 | centroids = cls.jobject.getClusterCentroids() 46 | for i in range(centroids.numInstances()): 47 | print(centroids.instance(i)) 48 | 49 | # jni/pww approach to get centroids 50 | print("jni/pww") 51 | centroids = Instances(cls.jobject.getClusterCentroids()) 52 | for i in range(centroids.num_instances): 53 | print(centroids.get_instance(i)) 54 | 55 | 56 | if __name__ == "__main__": 57 | try: 58 | jvm.start() 59 | main() 60 | except Exception as e: 61 | print(traceback.format_exc()) 62 | finally: 63 | jvm.stop() 64 | -------------------------------------------------------------------------------- /src/wekaexamples/clusterers/classes_to_clusters.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # classes_to_clusters.py 15 | # Copyright (C) 2017 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.converters import Loader, Instances 22 | from weka.clusterers import Clusterer, ClusterEvaluation 23 | 24 | 25 | def main(): 26 | """ 27 | Just runs some example code. 28 | """ 29 | 30 | # load a dataset 31 | iris_file = helper.get_data_dir() + os.sep + "iris.arff" 32 | helper.print_info("Loading dataset: " + iris_file) 33 | loader = Loader(classname="weka.core.converters.ArffLoader") 34 | full = loader.load_file(iris_file) 35 | full.class_is_last() 36 | 37 | # remove class attribute 38 | data = Instances.copy_instances(full) 39 | data.no_class() 40 | data.delete_last_attribute() 41 | 42 | # build a clusterer and output model 43 | helper.print_title("Training SimpleKMeans clusterer") 44 | clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"]) 45 | clusterer.build_clusterer(data) 46 | print("done") 47 | 48 | # classes to clusters 49 | evl = ClusterEvaluation() 50 | evl.set_model(clusterer) 51 | evl.test_model(full) 52 | helper.print_title("Cluster results") 53 | print(evl.cluster_results) 54 | helper.print_title("Classes to clusters") 55 | print(evl.classes_to_clusters) 56 | 57 | if __name__ == "__main__": 58 | try: 59 | jvm.start() 60 | main() 61 | except Exception as e: 62 | print(traceback.format_exc()) 63 | finally: 64 | jvm.stop() 65 | -------------------------------------------------------------------------------- /src/wekaexamples/clusterers/clope.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # clope.py 15 | # Copyright (C) 2019 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | import weka.core.jvm as jvm 19 | from weka.clusterers import Clusterer 20 | from weka.core.packages import install_package, is_installed 21 | 22 | 23 | def main(): 24 | if not is_installed("CLOPE"): 25 | print("CLOPE is not installed, installing now") 26 | install_package("CLOPE") 27 | print("please restart") 28 | return 29 | 30 | cls = Clusterer(classname="weka.clusterers.CLOPE") 31 | print("CLOPE is installed:", cls.to_commandline()) 32 | 33 | 34 | if __name__ == "__main__": 35 | try: 36 | jvm.start(packages=True) 37 | main() 38 | except Exception as e: 39 | print(traceback.format_exc()) 40 | finally: 41 | jvm.stop() 42 | -------------------------------------------------------------------------------- /src/wekaexamples/clusterers/cluster_data.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # cluster_data.py 15 | # Copyright (C) 2015-2016 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.converters import Loader 22 | from weka.clusterers import Clusterer, FilteredClusterer, ClusterEvaluation 23 | from weka.filters import Filter 24 | import weka.plot.graph as plg 25 | import weka.plot.clusterers as plc 26 | 27 | 28 | def main(): 29 | """ 30 | Just runs some example code. 31 | """ 32 | 33 | # load a dataset 34 | iris_file = helper.get_data_dir() + os.sep + "iris.arff" 35 | helper.print_info("Loading dataset: " + iris_file) 36 | loader = Loader(classname="weka.core.converters.ArffLoader") 37 | data = loader.load_file(iris_file) 38 | 39 | # remove class attribute 40 | data.delete_last_attribute() 41 | 42 | # build a clusterer and output model 43 | helper.print_title("Training SimpleKMeans clusterer") 44 | clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"]) 45 | clusterer.build_clusterer(data) 46 | print(clusterer) 47 | 48 | # cluster data 49 | helper.print_info("Clustering data") 50 | for index, inst in enumerate(data): 51 | cl = clusterer.cluster_instance(inst) 52 | dist = clusterer.distribution_for_instance(inst) 53 | print(str(index+1) + ": cluster=" + str(cl) + ", distribution=" + str(dist)) 54 | 55 | if __name__ == "__main__": 56 | try: 57 | jvm.start() 58 | main() 59 | except Exception as e: 60 | print(traceback.format_exc()) 61 | finally: 62 | jvm.stop() 63 | -------------------------------------------------------------------------------- /src/wekaexamples/clusterers/clusterers.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # clusterers.py 15 | # Copyright (C) 2014-2017 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.converters import Loader 22 | from weka.clusterers import Clusterer, FilteredClusterer, ClusterEvaluation 23 | from weka.filters import Filter 24 | import weka.plot.graph as plg 25 | import weka.plot.clusterers as plc 26 | 27 | 28 | def main(): 29 | """ 30 | Just runs some example code. 31 | """ 32 | 33 | # load a dataset 34 | iris_file = helper.get_data_dir() + os.sep + "iris.arff" 35 | helper.print_info("Loading dataset: " + iris_file) 36 | loader = Loader(classname="weka.core.converters.ArffLoader") 37 | data = loader.load_file(iris_file) 38 | 39 | # remove class attribute 40 | data.delete_last_attribute() 41 | 42 | # build a clusterer and output model 43 | helper.print_title("Training SimpleKMeans clusterer") 44 | clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"]) 45 | clusterer.build_clusterer(data) 46 | print(clusterer) 47 | helper.print_info("Evaluating on data") 48 | evaluation = ClusterEvaluation() 49 | evaluation.set_model(clusterer) 50 | evaluation.test_model(data) 51 | print("# clusters: " + str(evaluation.num_clusters)) 52 | print("log likelihood: " + str(evaluation.log_likelihood)) 53 | print("cluster assignments:\n" + str(evaluation.cluster_assignments)) 54 | plc.plot_cluster_assignments(evaluation, data, inst_no=True) 55 | 56 | # using a filtered clusterer 57 | helper.print_title("Filtered clusterer") 58 | loader = Loader(classname="weka.core.converters.ArffLoader") 59 | data = loader.load_file(iris_file) 60 | clusterer = Clusterer(classname="weka.clusterers.SimpleKMeans", options=["-N", "3"]) 61 | remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"]) 62 | fclusterer = FilteredClusterer() 63 | fclusterer.clusterer = clusterer 64 | fclusterer.filter = remove 65 | fclusterer.build_clusterer(data) 66 | print(fclusterer) 67 | 68 | # load a dataset incrementally and build clusterer incrementally 69 | helper.print_title("Incremental clusterer") 70 | loader = Loader(classname="weka.core.converters.ArffLoader") 71 | iris_inc = loader.load_file(iris_file, incremental=True) 72 | clusterer = Clusterer("weka.clusterers.Cobweb") 73 | remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"]) 74 | remove.inputformat(iris_inc) 75 | iris_filtered = remove.outputformat() 76 | clusterer.build_clusterer(iris_filtered) 77 | for inst in loader: 78 | remove.input(inst) 79 | inst_filtered = remove.output() 80 | clusterer.update_clusterer(inst_filtered) 81 | clusterer.update_finished() 82 | print(clusterer.to_commandline()) 83 | print(clusterer) 84 | print(clusterer.graph) 85 | plg.plot_dot_graph(clusterer.graph) 86 | 87 | # partial classname 88 | helper.print_title("Creating clusterer from partial classname") 89 | clsname = ".SimpleKMeans" 90 | clusterer = Clusterer(classname=clsname) 91 | print(clsname + " --> " + clusterer.classname) 92 | 93 | 94 | if __name__ == "__main__": 95 | try: 96 | jvm.start() 97 | main() 98 | except Exception as e: 99 | print(traceback.format_exc()) 100 | finally: 101 | jvm.stop() 102 | -------------------------------------------------------------------------------- /src/wekaexamples/clusterers/silhouette_coefficient.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # silhouette_coefficient.py 15 | # Copyright (C) 2021 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.converters import load_any_file 22 | from weka.clusterers import Clusterer, avg_silhouette_coefficient 23 | from weka.core.distances import DistanceFunction 24 | from weka.filters import Filter 25 | 26 | 27 | def main(): 28 | """ 29 | Just runs some example code. 30 | """ 31 | 32 | # load a dataset 33 | helper.print_title("Loading and preparing iris dataset") 34 | iris_file = helper.get_data_dir() + os.sep + "iris.arff" 35 | helper.print_info("Loading dataset: " + iris_file) 36 | data = load_any_file(iris_file) 37 | data.delete_last_attribute() 38 | 39 | # filter dataset 40 | std = Filter(classname="weka.filters.unsupervised.attribute.Standardize") 41 | std.inputformat(data) 42 | data = std.filter(data) 43 | 44 | # computing average silhouette coefficient 45 | helper.print_title("Computing average silhouette coefficient") 46 | 47 | # Eculidean distance without normalization 48 | dist_func = DistanceFunction(classname="weka.core.EuclideanDistance", options=["-D"]) 49 | 50 | clusterer = Clusterer("weka.clusterers.EM") 51 | clusterer.build_clusterer(data) 52 | print(clusterer.to_commandline() + "\n--> " + str(avg_silhouette_coefficient(clusterer, dist_func, data))) 53 | 54 | # we need to make sure that SimpleKMeans's distance function doesn't normalize 55 | clusterer = Clusterer("weka.clusterers.SimpleKMeans", options=["-N", "3", "-A", dist_func.to_commandline()]) 56 | clusterer.build_clusterer(data) 57 | print(clusterer.to_commandline() + "\n--> " + str(avg_silhouette_coefficient(clusterer, dist_func, data))) 58 | 59 | 60 | if __name__ == "__main__": 61 | try: 62 | jvm.start() 63 | main() 64 | except Exception as e: 65 | print(traceback.format_exc()) 66 | finally: 67 | jvm.stop() 68 | -------------------------------------------------------------------------------- /src/wekaexamples/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fracpete/python-weka-wrapper3-examples/5df69b68220c841cc592cc253bc008f696413c65/src/wekaexamples/core/__init__.py -------------------------------------------------------------------------------- /src/wekaexamples/core/capabilities.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # capabilities.py 15 | # Copyright (C) 2014 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.capabilities import Capability 22 | from weka.classifiers import Classifier 23 | from weka.core.converters import Loader 24 | from weka.core.capabilities import Capabilities 25 | 26 | 27 | def main(): 28 | """ 29 | Just runs some example code. 30 | """ 31 | 32 | classifier = Classifier(classname="weka.classifiers.trees.J48") 33 | 34 | helper.print_title("Capabilities") 35 | capabilities = classifier.capabilities 36 | print(capabilities) 37 | 38 | # load a dataset 39 | iris_file = helper.get_data_dir() + os.sep + "iris.arff" 40 | helper.print_info("Loading dataset: " + iris_file) 41 | loader = Loader(classname="weka.core.converters.ArffLoader") 42 | iris_data = loader.load_file(iris_file) 43 | iris_data.class_is_last() 44 | data_capabilities = Capabilities.for_instances(iris_data) 45 | print(data_capabilities) 46 | print("classifier handles dataset: " + str(capabilities.supports(data_capabilities))) 47 | 48 | # disable/enable 49 | helper.print_title("Disable/Enable") 50 | capability = Capability(member="UNARY_ATTRIBUTES") 51 | capabilities.disable(capability) 52 | capabilities.min_instances = 10 53 | print("Removing: " + str(capability)) 54 | print(capabilities) 55 | 56 | 57 | if __name__ == "__main__": 58 | try: 59 | jvm.start() 60 | main() 61 | except Exception as e: 62 | print(traceback.format_exc()) 63 | finally: 64 | jvm.stop() 65 | -------------------------------------------------------------------------------- /src/wekaexamples/core/classes.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # classes.py 15 | # Copyright (C) 2014-2024 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | import weka.core.jvm as jvm 19 | import wekaexamples.helper as helper 20 | from weka.core.classes import Random, SingleIndex, Range, Tag, JavaObject 21 | 22 | 23 | def main(): 24 | """ 25 | Just runs some example code. 26 | """ 27 | 28 | # generic JavaObject stuff 29 | helper.print_title("Generic stuff using weka.core.SystemInfo") 30 | info = JavaObject(JavaObject.new_instance("weka.core.SystemInfo")) 31 | print("toString() method:") 32 | print(info.jobject.toString()) 33 | 34 | # random 35 | helper.print_title("Random") 36 | rnd = Random(1) 37 | for i in range(10): 38 | print(rnd.next_double()) 39 | for i in range(10): 40 | print(rnd.next_int(100)) 41 | 42 | # single index 43 | helper.print_title("SingleIndex") 44 | si = SingleIndex(index="first") 45 | upper = 10 46 | si.upper(upper) 47 | print(str(si) + " (upper=" + str(upper) + ")\n -> " + str(si.index())) 48 | si.single_index = "3" 49 | si.upper(upper) 50 | print(str(si) + " (upper=" + str(upper) + ")\n -> " + str(si.index())) 51 | si.single_index = "last" 52 | si.upper(upper) 53 | print(str(si) + " (upper=" + str(upper) + ")\n -> " + str(si.index())) 54 | 55 | # range 56 | helper.print_title("Range") 57 | rng = Range(ranges="first") 58 | upper = 10 59 | invert = False 60 | rng.upper(upper) 61 | rng.invert = invert 62 | print(str(rng.ranges) + " (upper=" + str(upper) + ", invert=" + str(invert) + ")\n -> " + str(rng.selection())) 63 | rng.ranges = "3" 64 | rng.upper(upper) 65 | rng.invert = invert 66 | print(str(rng.ranges) + " (upper=" + str(upper) + ", invert=" + str(invert) + ")\n -> " + str(rng.selection())) 67 | rng.ranges = "last" 68 | rng.upper(upper) 69 | rng.invert = invert 70 | print(str(rng.ranges) + " (upper=" + str(upper) + ", invert=" + str(invert) + ")\n -> " + str(rng.selection())) 71 | rng.ranges = "first-last" 72 | rng.upper(upper) 73 | rng.invert = invert 74 | print(str(rng.ranges) + " (upper=" + str(upper) + ", invert=" + str(invert) + ")\n -> " + str(rng.selection())) 75 | rng.ranges = "3,4,7-last" 76 | rng.upper(upper) 77 | rng.invert = invert 78 | print(str(rng.ranges) + " (upper=" + str(upper) + ", invert=" + str(invert) + ")\n -> " + str(rng.selection())) 79 | rng.ranges = "3,4,7-last" 80 | rng.upper(upper) 81 | invert = True 82 | rng.invert = invert 83 | print(str(rng.ranges) + " (upper=" + str(upper) + ", invert=" + str(invert) + ")\n -> " + str(rng.selection())) 84 | 85 | # tag 86 | helper.print_title("Tag") 87 | tag = Tag(ident=1, ident_str="one") 88 | print("tag=" + str(tag) + ", ident=" + str(tag.ident) + ", readable=" + tag.readable) 89 | tag.ident = 3 90 | print("tag=" + str(tag) + ", ident=" + str(tag.ident) + ", readable=" + tag.readable) 91 | tag = Tag(ident=2, ident_str="two", readable="2nd tag") 92 | print("tag=" + str(tag) + ", ident=" + str(tag.ident) + ", readable=" + tag.readable) 93 | 94 | 95 | if __name__ == "__main__": 96 | try: 97 | jvm.start() 98 | main() 99 | except Exception as e: 100 | print(traceback.format_exc()) 101 | finally: 102 | jvm.stop() 103 | -------------------------------------------------------------------------------- /src/wekaexamples/core/converters.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # converters.py 15 | # Copyright (C) 2015-2024 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | import os 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.converters import Loader, TextDirectoryLoader, load_csv_file, load_any_file 22 | from simple_range import range_indices 23 | from weka.filters import Filter 24 | 25 | 26 | def main(): 27 | """ 28 | Just runs some example code. 29 | """ 30 | 31 | # load ARFF file 32 | helper.print_title("Loading ARFF file") 33 | loader = Loader(classname="weka.core.converters.ArffLoader") 34 | data = loader.load_file(helper.get_data_dir() + os.sep + "iris.arff") 35 | print(str(data)) 36 | 37 | # load CSV file 38 | helper.print_title("Loading CSV file") 39 | loader = Loader(classname="weka.core.converters.CSVLoader") 40 | data = loader.load_file(helper.get_data_dir() + os.sep + "iris.csv") 41 | print(str(data)) 42 | 43 | # load directory 44 | # changes this to something sensible 45 | text_dir = "/some/where" 46 | if os.path.exists(text_dir) and os.path.isdir(text_dir): 47 | helper.print_title("Loading directory: " + text_dir) 48 | loader = TextDirectoryLoader(options=["-dir", text_dir, "-F", "-charset", "UTF-8"]) 49 | data = loader.load() 50 | print(unicode(data)) 51 | 52 | # load any file 53 | helper.print_title("Loading a CSV file without specifying loader explicitly") 54 | data = load_any_file(helper.get_data_dir() + os.sep + "anneal_weka.csv") 55 | print(str(data)) 56 | 57 | # load CSV file (without using Weka's CSVLoader) 58 | helper.print_title("Loading a CSV file without using Weka's CSVLoader") 59 | num_cols = range_indices("4-5,9,33-35", maximum=39) 60 | data = load_csv_file(helper.get_data_dir() + os.sep + "anneal_excel.csv", num_cols=num_cols) 61 | str_to_nom = Filter(classname="weka.filters.unsupervised.attribute.StringToNominal", options=["-R", "1-3,6-8,10-32,36-39"]) 62 | str_to_nom.inputformat(data) 63 | data = str_to_nom.filter(data) 64 | print(str(data)) 65 | 66 | 67 | if __name__ == "__main__": 68 | try: 69 | jvm.start() 70 | main() 71 | except Exception as e: 72 | print(traceback.format_exc()) 73 | finally: 74 | jvm.stop() 75 | -------------------------------------------------------------------------------- /src/wekaexamples/core/database.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # database.py 15 | # Copyright (C) 2015-2016 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | import weka.core.jvm as jvm 19 | import wekaexamples.helper as helper 20 | from weka.core.database import InstanceQuery 21 | 22 | 23 | def main(): 24 | """ 25 | Just runs some example code. 26 | NB: You have to fill in the following parameters to make it work with MySQL: 27 | - HOSTNAME: the database server IP or hostname (or just 'localhost') 28 | - DBNAME: the name of the database to connect to 29 | - DBUSER: the user for connecting to the database 30 | - DBPW: the password for the database user 31 | - TABLE: the table to retrieve the data from 32 | And also supply the correct path to the MySQL jar in the "main" method below. 33 | """ 34 | 35 | # retrieve some data 36 | helper.print_title("Loading data from a database") 37 | iquery = InstanceQuery() 38 | iquery.db_url = "jdbc:mysql://HOSTNAME:3306/DBNAME" 39 | iquery.user = "DBUSER" 40 | iquery.password = "DBPW" 41 | iquery.query = "select * from TABLE" 42 | data = iquery.retrieve_instances() 43 | print(data) 44 | 45 | if __name__ == "__main__": 46 | try: 47 | mysql_jar = "/some/where/mysql-connector-java-X.Y.Z-bin.jar" 48 | jvm.start(class_path=[mysql_jar]) 49 | main() 50 | except Exception as e: 51 | print(traceback.format_exc()) 52 | finally: 53 | jvm.stop() 54 | -------------------------------------------------------------------------------- /src/wekaexamples/core/packages.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # packages.py 15 | # Copyright (C) 2021 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | import weka.core.jvm as jvm 19 | import wekaexamples.helper as helper 20 | import weka.core.packages as pkgs 21 | 22 | 23 | def main(): 24 | """ 25 | Runs some example code. 26 | """ 27 | helper.print_info("Listing packages") 28 | items = pkgs.all_packages() 29 | for item in items: 30 | print(item) 31 | if item.name == "CLOPE": 32 | print(item.name + " " + item.url) 33 | 34 | helper.print_info("Installing CLOPE") 35 | pkgs.install_package("CLOPE") 36 | items = pkgs.installed_packages() 37 | for item in items: 38 | print(item.name + " " + item.url) 39 | 40 | helper.print_info("Uninstalling CLOPE") 41 | pkgs.uninstall_package("CLOPE") 42 | items = pkgs.installed_packages() 43 | for item in items: 44 | print(item.name + " " + item.url) 45 | 46 | 47 | if __name__ == "__main__": 48 | try: 49 | jvm.start(packages=True) 50 | main() 51 | except Exception as e: 52 | print(traceback.format_exc()) 53 | finally: 54 | jvm.stop() 55 | -------------------------------------------------------------------------------- /src/wekaexamples/core/random_dataset.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # random_dataset.py 15 | # Copyright (C) 2015-2016 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | import weka.core.jvm as jvm 19 | from weka.core.dataset import Attribute, Instance, Instances 20 | 21 | 22 | def main(): 23 | """ 24 | Creates a dataset from scratch using random data and outputs it. 25 | """ 26 | 27 | atts = [] 28 | for i in range(5): 29 | atts.append(Attribute.create_numeric("x" + str(i))) 30 | 31 | data = Instances.create_instances("data", atts, 10) 32 | 33 | for n in range(10): 34 | values = [] 35 | for i in range(5): 36 | values.append(n*100 + i) 37 | inst = Instance.create_instance(values) 38 | data.add_instance(inst) 39 | 40 | print(data) 41 | 42 | 43 | if __name__ == "__main__": 44 | try: 45 | jvm.start() 46 | main() 47 | except Exception as e: 48 | print(traceback.format_exc()) 49 | finally: 50 | jvm.stop() 51 | -------------------------------------------------------------------------------- /src/wekaexamples/core/serialization.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # serialization.py 15 | # Copyright (C) 2014-2024 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import pickle 19 | import tempfile 20 | import traceback 21 | import weka.core.jvm as jvm 22 | import wekaexamples.helper as helper 23 | from weka.core.classes import is_instance_of 24 | from weka.core.converters import Loader 25 | from weka.core.dataset import Instances 26 | from weka.classifiers import Classifier 27 | from weka.core.classes import serialization_read, serialization_write, serialization_read_all, serialization_write_all 28 | 29 | 30 | def main(): 31 | """ 32 | Just runs some example code. 33 | """ 34 | 35 | # load a dataset 36 | iris_file = helper.get_data_dir() + os.sep + "iris.arff" 37 | helper.print_info("Loading dataset: " + iris_file) 38 | loader = Loader(classname="weka.core.converters.ArffLoader") 39 | iris_data = loader.load_file(iris_file) 40 | iris_data.class_is_last() 41 | 42 | # train classifier 43 | classifier = Classifier(classname="weka.classifiers.trees.J48") 44 | classifier.build_classifier(iris_data) 45 | 46 | # save and read object 47 | helper.print_title("I/O: model (using serialization module)") 48 | outfile = tempfile.gettempdir() + os.sep + "j48.model" 49 | serialization_write(outfile, classifier) 50 | model = Classifier(jobject=serialization_read(outfile)) 51 | print(model) 52 | 53 | # save classifier and dataset header (multiple objects) 54 | helper.print_title("I/O: model and header (using serialization module)") 55 | serialization_write_all(outfile, [classifier, Instances.template_instances(iris_data)]) 56 | objects = serialization_read_all(outfile) 57 | for i, obj in enumerate(objects): 58 | helper.print_info("Object #" + str(i+1) + ":") 59 | if is_instance_of(obj, "weka.core.Instances"): 60 | obj = Instances(jobject=obj) 61 | elif is_instance_of(obj, "weka.classifiers.Classifier"): 62 | obj = Classifier(jobject=obj) 63 | print(obj) 64 | 65 | # save and read object 66 | helper.print_title("I/O: just model (using Classifier class)") 67 | outfile = tempfile.gettempdir() + os.sep + "j48.model" 68 | classifier.serialize(outfile) 69 | model, _ = Classifier.deserialize(outfile) 70 | print(model) 71 | 72 | # save classifier and dataset header (multiple objects) 73 | helper.print_title("I/O: model and header (using Classifier class)") 74 | classifier.serialize(outfile, header=iris_data) 75 | model, header = Classifier.deserialize(outfile) 76 | print(model) 77 | if header is not None: 78 | print(header) 79 | 80 | # using pickle 81 | helper.print_title("I/O: using pickle to save/load model") 82 | classifier = Classifier(classname="weka.classifiers.trees.J48") 83 | classifier.build_classifier(iris_data) 84 | outfile = tempfile.gettempdir() + os.sep + "j48-pickle.model" 85 | with open(outfile, "wb") as of: 86 | pickle.dump(classifier, of) 87 | with open(outfile, "rb") as of: 88 | classifier2 = pickle.load(of) 89 | print(classifier2) 90 | print(classifier2.header) 91 | 92 | 93 | if __name__ == "__main__": 94 | try: 95 | jvm.start() 96 | main() 97 | except Exception as e: 98 | print(traceback.format_exc()) 99 | finally: 100 | jvm.stop() 101 | -------------------------------------------------------------------------------- /src/wekaexamples/core/version.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # version.py 15 | # Copyright (C) 2014 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | import weka.core.jvm as jvm 19 | import weka.core.version as version 20 | 21 | 22 | def main(): 23 | """ 24 | Just runs some example code. 25 | """ 26 | 27 | print(version.weka_version()) 28 | 29 | 30 | if __name__ == "__main__": 31 | try: 32 | jvm.start() 33 | main() 34 | except Exception as e: 35 | print(traceback.format_exc()) 36 | finally: 37 | jvm.stop() 38 | -------------------------------------------------------------------------------- /src/wekaexamples/data/airline.arff: -------------------------------------------------------------------------------- 1 | %% Monthly totals of international airline passengers (in thousands) for 2 | %% 1949-1960. 3 | 4 | @relation airline_passengers 5 | @attribute passenger_numbers numeric 6 | @attribute Date date 'yyyy-MM-dd' 7 | 8 | @data 9 | 112,1949-01-01 10 | 118,1949-02-01 11 | 132,1949-03-01 12 | 129,1949-04-01 13 | 121,1949-05-01 14 | 135,1949-06-01 15 | 148,1949-07-01 16 | 148,1949-08-01 17 | 136,1949-09-01 18 | 119,1949-10-01 19 | 104,1949-11-01 20 | 118,1949-12-01 21 | 115,1950-01-01 22 | 126,1950-02-01 23 | 141,1950-03-01 24 | 135,1950-04-01 25 | 125,1950-05-01 26 | 149,1950-06-01 27 | 170,1950-07-01 28 | 170,1950-08-01 29 | 158,1950-09-01 30 | 133,1950-10-01 31 | 114,1950-11-01 32 | 140,1950-12-01 33 | 145,1951-01-01 34 | 150,1951-02-01 35 | 178,1951-03-01 36 | 163,1951-04-01 37 | 172,1951-05-01 38 | 178,1951-06-01 39 | 199,1951-07-01 40 | 199,1951-08-01 41 | 184,1951-09-01 42 | 162,1951-10-01 43 | 146,1951-11-01 44 | 166,1951-12-01 45 | 171,1952-01-01 46 | 180,1952-02-01 47 | 193,1952-03-01 48 | 181,1952-04-01 49 | 183,1952-05-01 50 | 218,1952-06-01 51 | 230,1952-07-01 52 | 242,1952-08-01 53 | 209,1952-09-01 54 | 191,1952-10-01 55 | 172,1952-11-01 56 | 194,1952-12-01 57 | 196,1953-01-01 58 | 196,1953-02-01 59 | 236,1953-03-01 60 | 235,1953-04-01 61 | 229,1953-05-01 62 | 243,1953-06-01 63 | 264,1953-07-01 64 | 272,1953-08-01 65 | 237,1953-09-01 66 | 211,1953-10-01 67 | 180,1953-11-01 68 | 201,1953-12-01 69 | 204,1954-01-01 70 | 188,1954-02-01 71 | 235,1954-03-01 72 | 227,1954-04-01 73 | 234,1954-05-01 74 | 264,1954-06-01 75 | 302,1954-07-01 76 | 293,1954-08-01 77 | 259,1954-09-01 78 | 229,1954-10-01 79 | 203,1954-11-01 80 | 229,1954-12-01 81 | 242,1955-01-01 82 | 233,1955-02-01 83 | 267,1955-03-01 84 | 269,1955-04-01 85 | 270,1955-05-01 86 | 315,1955-06-01 87 | 364,1955-07-01 88 | 347,1955-08-01 89 | 312,1955-09-01 90 | 274,1955-10-01 91 | 237,1955-11-01 92 | 278,1955-12-01 93 | 284,1956-01-01 94 | 277,1956-02-01 95 | 317,1956-03-01 96 | 313,1956-04-01 97 | 318,1956-05-01 98 | 374,1956-06-01 99 | 413,1956-07-01 100 | 405,1956-08-01 101 | 355,1956-09-01 102 | 306,1956-10-01 103 | 271,1956-11-01 104 | 306,1956-12-01 105 | 315,1957-01-01 106 | 301,1957-02-01 107 | 356,1957-03-01 108 | 348,1957-04-01 109 | 355,1957-05-01 110 | 422,1957-06-01 111 | 465,1957-07-01 112 | 467,1957-08-01 113 | 404,1957-09-01 114 | 347,1957-10-01 115 | 305,1957-11-01 116 | 336,1957-12-01 117 | 340,1958-01-01 118 | 318,1958-02-01 119 | 362,1958-03-01 120 | 348,1958-04-01 121 | 363,1958-05-01 122 | 435,1958-06-01 123 | 491,1958-07-01 124 | 505,1958-08-01 125 | 404,1958-09-01 126 | 359,1958-10-01 127 | 310,1958-11-01 128 | 337,1958-12-01 129 | 360,1959-01-01 130 | 342,1959-02-01 131 | 406,1959-03-01 132 | 396,1959-04-01 133 | 420,1959-05-01 134 | 472,1959-06-01 135 | 548,1959-07-01 136 | 559,1959-08-01 137 | 463,1959-09-01 138 | 407,1959-10-01 139 | 362,1959-11-01 140 | 405,1959-12-01 141 | 417,1960-01-01 142 | 391,1960-02-01 143 | 419,1960-03-01 144 | 461,1960-04-01 145 | 472,1960-05-01 146 | 535,1960-06-01 147 | 622,1960-07-01 148 | 606,1960-08-01 149 | 508,1960-09-01 150 | 461,1960-10-01 151 | 390,1960-11-01 152 | 432,1960-12-01 153 | -------------------------------------------------------------------------------- /src/wekaexamples/data/bolts.arff: -------------------------------------------------------------------------------- 1 | % Data from StatLib (ftp stat.cmu.edu/datasets) 2 | % 3 | % SUMMARY: 4 | % 5 | % Data from an experiment on the affects of machine adjustments on 6 | % the time to count bolts. Data appear as the STATS (Issue 10) Challenge. 7 | % 8 | % DATA: 9 | % 10 | % Submitted by W. Robert Stephenson, Iowa State University 11 | % email: wrstephe@iastate.edu 12 | % 13 | % A manufacturer of automotive accessories provides hardware, e.g. nuts, 14 | % bolts, washers and screws, to fasten the accessory to the car or truck. 15 | % Hardware is counted and packaged automatically. Specifically, bolts 16 | % are dumped into a large metal dish. A plate that forms the bottom of 17 | % the dish rotates counterclockwise. This rotation forces bolts to the 18 | % outside of the dish and up along a narrow ledge. Due to the vibration 19 | % of the dish caused by the spinning bottom plate, some bolts fall off 20 | % the ledge and back into the dish. The ledge spirals up to a point 21 | % where the bolts are allowed to drop into a pan on a conveyor belt. 22 | % As a bolt drops, it passes by an electronic eye that counts it. When 23 | % the electronic counter reaches the preset number of bolts, the 24 | % rotation is stopped and the conveyor belt is moved forward. 25 | % 26 | % There are several adjustments on the machine that affect its operation. 27 | % These include; a speed setting that controls the speed of rotation 28 | % (SPEED1) of the plate at the bottom of the dish, a total number of 29 | % bolts (TOTAL) to be counted, a second speed setting (SPEED2) that is 30 | % used to change the speed of rotation (usually slowing it down) for the 31 | % last few bolts, the number of bolts to be counted at this second speed 32 | % (NUMBER2), and the sensitivity of the electronic eye (SENS). The 33 | % sensitivity setting is to insure that the correct number of bolts are 34 | % counted. Too few bolts packaged causes customer complaints. Too many 35 | % bolts packaged increases costs. For each run conducted in this 36 | % experiment the correct number of bolts was counted. From an 37 | % engineering standpoint if the correct number of bolts is counted, the 38 | % sensitivity should not affect the time to count bolts. The measured 39 | % response is the time (TIME), in seconds, it takes to count the desired 40 | % number of bolts. In order to put times on a equal footing the 41 | % response to be analyzed is the time to count 20 bolts (T20BOLT). 42 | % Below are the data for 40 combinations of settings. RUN is the order 43 | % in which the data were collected. 44 | % 45 | % Analyze the data. What adjustments have the greatest effect on the 46 | % time to count 20 bolts? How would you adjust the machine to get 47 | % the shortest time to count 20 bolts? Are there any unusual features 48 | % to the data? 49 | % 50 | % The data description and data may be freely used for non-commercial 51 | % purposes and can be freely distributed. Copyright remains with the 52 | % author and STATS Magazine. 53 | % 54 | 55 | @relation bolts 56 | 57 | @attribute RUN integer 58 | @attribute SPEED1 integer 59 | @attribute TOTAL integer 60 | @attribute SPEED2 integer 61 | @attribute NUMBER2 integer 62 | @attribute SENS integer 63 | @attribute TIME real 64 | @attribute T20BOLT real 65 | 66 | @data 67 | 25, 2, 10, 1.5, 0, 6, 5.70, 11.40 68 | 24, 2, 10, 1.5, 0, 10, 17.56, 35.12 69 | 30, 2, 10, 1.5, 2, 6, 11.28, 22.56 70 | 2, 2, 10, 1.5, 2, 10, 8.39, 16.78 71 | 40, 2, 10, 2.5, 0, 6, 16.67, 33.34 72 | 37, 2, 10, 2.5, 0, 10, 12.04, 24.08 73 | 16, 2, 10, 2.5, 2, 6, 9.22, 18.44 74 | 22, 2, 10, 2.5, 2, 10, 3.94, 7.88 75 | 33, 2, 30, 1.5, 0, 6, 27.02, 18.01 76 | 17, 2, 30, 1.5, 0, 10, 19.46, 12.97 77 | 28, 2, 30, 1.5, 2, 6, 18.54, 12.36 78 | 27, 2, 30, 1.5, 2, 10, 25.70, 17.13 79 | 14, 2, 30, 2.5, 0, 6, 19.02, 12.68 80 | 13, 2, 30, 2.5, 0, 10, 22.39, 14.93 81 | 4, 2, 30, 2.5, 2, 6, 23.85, 15.90 82 | 21, 2, 30, 2.5, 2, 10, 30.12, 20.08 83 | 23, 6, 10, 1.5, 0, 6, 13.42, 26.84 84 | 35, 6, 10, 1.5, 0, 10, 34.26, 68.52 85 | 19, 6, 10, 1.5, 2, 6, 39.74, 79.48 86 | 34, 6, 10, 1.5, 2, 10, 10.60, 21.20 87 | 31, 6, 10, 2.5, 0, 6, 28.89, 57.78 88 | 9, 6, 10, 2.5, 0, 10, 35.61, 71.22 89 | 38, 6, 10, 2.5, 2, 6, 17.20, 34.40 90 | 15, 6, 10, 2.5, 2, 10, 6.00, 12.00 91 | 39, 6, 30, 1.5, 0, 6, 129.45, 86.30 92 | 8, 6, 30, 1.5, 0, 0, 107.38, 71.59 93 | 26, 6, 30, 1.5, 2, 6, 111.66, 74.44 94 | 11, 6, 30, 1.5, 2, 0, 109.10, 72.73 95 | 6, 6, 30, 2.5, 0, 6, 100.43, 66.95 96 | 20, 6, 30, 2.5, 0, 0, 109.28, 72.85 97 | 10, 6, 30, 2.5, 2, 6, 106.46, 70.97 98 | 32, 6, 30, 2.5, 2, 0, 134.01, 89.34 99 | 1, 4, 20, 2.0, 1, 8, 10.78, 10.78 100 | 3, 4, 20, 2.0, 1, 8, 9.39, 9.39 101 | 5, 4, 20, 2.0, 1, 8, 9.84, 9.84 102 | 7, 4, 20, 2.0, 1, 8, 13.94, 13.94 103 | 12, 4, 20, 2.0, 1, 8, 12.33, 12.33 104 | 18, 4, 20, 2.0, 1, 8, 7.32, 7.32 105 | 29, 4, 20, 2.0, 1, 8, 7.91, 7.91 106 | 36, 4, 20, 2.0, 1, 8, 15.58, 15.58 107 | -------------------------------------------------------------------------------- /src/wekaexamples/data/iris-test.arff: -------------------------------------------------------------------------------- 1 | @relation iris-test 2 | 3 | @attribute sepallength numeric 4 | @attribute sepalwidth numeric 5 | @attribute petallength numeric 6 | @attribute petalwidth numeric 7 | @attribute class {Iris-setosa,Iris-versicolor,Iris-virginica} 8 | 9 | @data 10 | 6.9,3.1,4.9,1.5,Iris-versicolor 11 | 7.4,2.8,6.1,1.9,Iris-virginica 12 | 6.6,2.9,4.6,1.3,Iris-versicolor 13 | 5,3.2,1.2,0.2,Iris-setosa 14 | 6.3,3.3,6,2.5,Iris-virginica 15 | 6.3,2.3,4.4,1.3,Iris-versicolor 16 | 4.5,2.3,1.3,0.3,Iris-setosa 17 | 5,3.4,1.5,0.2,Iris-setosa 18 | 6.2,2.9,4.3,1.3,Iris-versicolor 19 | 6.4,3.2,5.3,2.3,Iris-virginica 20 | 5.7,2.8,4.5,1.3,Iris-versicolor 21 | 6.4,2.8,5.6,2.1,Iris-virginica 22 | 5.2,3.5,1.5,0.2,Iris-setosa 23 | 5.7,2.9,4.2,1.3,Iris-versicolor 24 | 6.1,2.9,4.7,1.4,Iris-versicolor 25 | 6,2.2,5,1.5,Iris-virginica 26 | 6.3,3.4,5.6,2.4,Iris-virginica 27 | 5.8,2.7,4.1,1,Iris-versicolor 28 | 6.2,3.4,5.4,2.3,Iris-virginica 29 | 6.5,3,5.8,2.2,Iris-virginica 30 | 6,3.4,4.5,1.6,Iris-versicolor 31 | 6.5,3,5.2,2,Iris-virginica 32 | 4.9,2.4,3.3,1,Iris-versicolor 33 | 5.7,3,4.2,1.2,Iris-versicolor 34 | 4.4,3.2,1.3,0.2,Iris-setosa 35 | 7.9,3.8,6.4,2,Iris-virginica 36 | 5.2,4.1,1.5,0.1,Iris-setosa 37 | 6,2.9,4.5,1.5,Iris-versicolor 38 | 5.1,3.8,1.9,0.4,Iris-setosa 39 | 4.4,3,1.3,0.2,Iris-setosa 40 | 6.3,3.3,4.7,1.6,Iris-versicolor 41 | 6.9,3.1,5.1,2.3,Iris-virginica 42 | 5.4,3,4.5,1.5,Iris-versicolor 43 | 5.8,4,1.2,0.2,Iris-setosa 44 | 4.8,3.1,1.6,0.2,Iris-setosa 45 | 6.7,3.3,5.7,2.1,Iris-virginica 46 | 5,3.4,1.6,0.4,Iris-setosa 47 | 4.8,3.4,1.6,0.2,Iris-setosa 48 | 6.3,2.8,5.1,1.5,Iris-virginica 49 | 4.9,3.1,1.5,0.1,Iris-setosa 50 | 5.5,2.5,4,1.3,Iris-versicolor 51 | 4.3,3,1.1,0.1,Iris-setosa 52 | 5.8,2.8,5.1,2.4,Iris-virginica 53 | 5.7,2.5,5,2,Iris-virginica 54 | 7,3.2,4.7,1.4,Iris-versicolor 55 | 6.4,2.9,4.3,1.3,Iris-versicolor 56 | 5.5,2.6,4.4,1.2,Iris-versicolor 57 | 5.1,3.5,1.4,0.2,Iris-setosa 58 | 6.4,2.7,5.3,1.9,Iris-virginica 59 | 6.3,2.5,4.9,1.5,Iris-versicolor 60 | 7.7,3,6.1,2.3,Iris-virginica 61 | -------------------------------------------------------------------------------- /src/wekaexamples/data/iris-train.arff: -------------------------------------------------------------------------------- 1 | @relation iris-train 2 | 3 | @attribute sepallength numeric 4 | @attribute sepalwidth numeric 5 | @attribute petallength numeric 6 | @attribute petalwidth numeric 7 | @attribute class {Iris-setosa,Iris-versicolor,Iris-virginica} 8 | 9 | @data 10 | 6.7,3.1,5.6,2.4,Iris-virginica 11 | 6.7,3,5.2,2.3,Iris-virginica 12 | 5.6,2.9,3.6,1.3,Iris-versicolor 13 | 4.6,3.1,1.5,0.2,Iris-setosa 14 | 5.9,3.2,4.8,1.8,Iris-versicolor 15 | 7.6,3,6.6,2.1,Iris-virginica 16 | 5.5,2.4,3.7,1,Iris-versicolor 17 | 6.4,2.8,5.6,2.2,Iris-virginica 18 | 5.4,3.9,1.7,0.4,Iris-setosa 19 | 6.8,3.2,5.9,2.3,Iris-virginica 20 | 6.7,3,5,1.7,Iris-versicolor 21 | 5.5,3.5,1.3,0.2,Iris-setosa 22 | 4.6,3.4,1.4,0.3,Iris-setosa 23 | 6.2,2.8,4.8,1.8,Iris-virginica 24 | 4.9,3,1.4,0.2,Iris-setosa 25 | 5.7,2.6,3.5,1,Iris-versicolor 26 | 6.5,2.8,4.6,1.5,Iris-versicolor 27 | 4.9,3.1,1.5,0.1,Iris-setosa 28 | 5.4,3.4,1.5,0.4,Iris-setosa 29 | 4.6,3.2,1.4,0.2,Iris-setosa 30 | 6.5,3,5.5,1.8,Iris-virginica 31 | 6.6,3,4.4,1.4,Iris-versicolor 32 | 5.6,3,4.5,1.5,Iris-versicolor 33 | 5,3.5,1.3,0.3,Iris-setosa 34 | 7.1,3,5.9,2.1,Iris-virginica 35 | 5.9,3,5.1,1.8,Iris-virginica 36 | 4.6,3.6,1,0.2,Iris-setosa 37 | 4.8,3,1.4,0.3,Iris-setosa 38 | 5,3.5,1.6,0.6,Iris-setosa 39 | 5.2,3.4,1.4,0.2,Iris-setosa 40 | 7.2,3.2,6,1.8,Iris-virginica 41 | 5.1,2.5,3,1.1,Iris-versicolor 42 | 6,2.7,5.1,1.6,Iris-versicolor 43 | 6.1,2.8,4,1.3,Iris-versicolor 44 | 5.4,3.9,1.3,0.4,Iris-setosa 45 | 5.1,3.7,1.5,0.4,Iris-setosa 46 | 6.7,3.1,4.7,1.5,Iris-versicolor 47 | 5.2,2.7,3.9,1.4,Iris-versicolor 48 | 5.8,2.7,3.9,1.2,Iris-versicolor 49 | 5.8,2.6,4,1.2,Iris-versicolor 50 | 6.2,2.2,4.5,1.5,Iris-versicolor 51 | 5.6,2.5,3.9,1.1,Iris-versicolor 52 | 6.3,2.7,4.9,1.8,Iris-virginica 53 | 6.1,2.8,4.7,1.2,Iris-versicolor 54 | 5,3.3,1.4,0.2,Iris-setosa 55 | 7.7,2.6,6.9,2.3,Iris-virginica 56 | 5.1,3.3,1.7,0.5,Iris-setosa 57 | 7.3,2.9,6.3,1.8,Iris-virginica 58 | 6.7,3.1,4.4,1.4,Iris-versicolor 59 | 6.8,2.8,4.8,1.4,Iris-versicolor 60 | 5.7,3.8,1.7,0.3,Iris-setosa 61 | 6.1,3,4.9,1.8,Iris-virginica 62 | 6.9,3.2,5.7,2.3,Iris-virginica 63 | 6.1,3,4.6,1.4,Iris-versicolor 64 | 5,3,1.6,0.2,Iris-setosa 65 | 5.5,2.3,4,1.3,Iris-versicolor 66 | 5.8,2.7,5.1,1.9,Iris-virginica 67 | 5.7,2.8,4.1,1.3,Iris-versicolor 68 | 4.9,2.5,4.5,1.7,Iris-virginica 69 | 5.6,2.8,4.9,2,Iris-virginica 70 | 6,3,4.8,1.8,Iris-virginica 71 | 5.1,3.5,1.4,0.3,Iris-setosa 72 | 5.8,2.7,5.1,1.9,Iris-virginica 73 | 4.7,3.2,1.3,0.2,Iris-setosa 74 | 5.1,3.4,1.5,0.2,Iris-setosa 75 | 5.5,4.2,1.4,0.2,Iris-setosa 76 | 6.5,3.2,5.1,2,Iris-virginica 77 | 4.7,3.2,1.6,0.2,Iris-setosa 78 | 6.4,3.2,4.5,1.5,Iris-versicolor 79 | 5.3,3.7,1.5,0.2,Iris-setosa 80 | 6.7,3.3,5.7,2.5,Iris-virginica 81 | 4.9,3.1,1.5,0.1,Iris-setosa 82 | 6.9,3.1,5.4,2.1,Iris-virginica 83 | 4.4,2.9,1.4,0.2,Iris-setosa 84 | 4.8,3,1.4,0.1,Iris-setosa 85 | 5.4,3.7,1.5,0.2,Iris-setosa 86 | 4.8,3.4,1.9,0.2,Iris-setosa 87 | 5.9,3,4.2,1.5,Iris-versicolor 88 | 5.1,3.8,1.6,0.2,Iris-setosa 89 | 5.1,3.8,1.5,0.3,Iris-setosa 90 | 5.4,3.4,1.7,0.2,Iris-setosa 91 | 6.7,2.5,5.8,1.8,Iris-virginica 92 | 5,2,3.5,1,Iris-versicolor 93 | 5.5,2.4,3.8,1.1,Iris-versicolor 94 | 5.6,3,4.1,1.3,Iris-versicolor 95 | 7.2,3,5.8,1.6,Iris-virginica 96 | 7.7,2.8,6.7,2,Iris-virginica 97 | 5,2.3,3.3,1,Iris-versicolor 98 | 6.8,3,5.5,2.1,Iris-virginica 99 | 6.3,2.5,5,1.9,Iris-virginica 100 | 5,3.6,1.4,0.2,Iris-setosa 101 | 6.1,2.6,5.6,1.4,Iris-virginica 102 | 7.2,3.6,6.1,2.5,Iris-virginica 103 | 6,2.2,4,1,Iris-versicolor 104 | 6.4,3.1,5.5,1.8,Iris-virginica 105 | 7.7,3.8,6.7,2.2,Iris-virginica 106 | 6.3,2.9,5.6,1.8,Iris-virginica 107 | 5.6,2.7,4.2,1.3,Iris-versicolor 108 | 5.7,4.4,1.5,0.4,Iris-setosa 109 | -------------------------------------------------------------------------------- /src/wekaexamples/data/iris.csv: -------------------------------------------------------------------------------- 1 | sepallength,sepalwidth,petallength,petalwidth,class 2 | 5.1,3.5,1.4,0.2,Iris-setosa 3 | 4.9,3,1.4,0.2,Iris-setosa 4 | 4.7,3.2,1.3,0.2,Iris-setosa 5 | 4.6,3.1,1.5,0.2,Iris-setosa 6 | 5,3.6,1.4,0.2,Iris-setosa 7 | 5.4,3.9,1.7,0.4,Iris-setosa 8 | 4.6,3.4,1.4,0.3,Iris-setosa 9 | 5,3.4,1.5,0.2,Iris-setosa 10 | 4.4,2.9,1.4,0.2,Iris-setosa 11 | 4.9,3.1,1.5,0.1,Iris-setosa 12 | 5.4,3.7,1.5,0.2,Iris-setosa 13 | 4.8,3.4,1.6,0.2,Iris-setosa 14 | 4.8,3,1.4,0.1,Iris-setosa 15 | 4.3,3,1.1,0.1,Iris-setosa 16 | 5.8,4,1.2,0.2,Iris-setosa 17 | 5.7,4.4,1.5,0.4,Iris-setosa 18 | 5.4,3.9,1.3,0.4,Iris-setosa 19 | 5.1,3.5,1.4,0.3,Iris-setosa 20 | 5.7,3.8,1.7,0.3,Iris-setosa 21 | 5.1,3.8,1.5,0.3,Iris-setosa 22 | 5.4,3.4,1.7,0.2,Iris-setosa 23 | 5.1,3.7,1.5,0.4,Iris-setosa 24 | 4.6,3.6,1,0.2,Iris-setosa 25 | 5.1,3.3,1.7,0.5,Iris-setosa 26 | 4.8,3.4,1.9,0.2,Iris-setosa 27 | 5,3,1.6,0.2,Iris-setosa 28 | 5,3.4,1.6,0.4,Iris-setosa 29 | 5.2,3.5,1.5,0.2,Iris-setosa 30 | 5.2,3.4,1.4,0.2,Iris-setosa 31 | 4.7,3.2,1.6,0.2,Iris-setosa 32 | 4.8,3.1,1.6,0.2,Iris-setosa 33 | 5.4,3.4,1.5,0.4,Iris-setosa 34 | 5.2,4.1,1.5,0.1,Iris-setosa 35 | 5.5,4.2,1.4,0.2,Iris-setosa 36 | 4.9,3.1,1.5,0.1,Iris-setosa 37 | 5,3.2,1.2,0.2,Iris-setosa 38 | 5.5,3.5,1.3,0.2,Iris-setosa 39 | 4.9,3.1,1.5,0.1,Iris-setosa 40 | 4.4,3,1.3,0.2,Iris-setosa 41 | 5.1,3.4,1.5,0.2,Iris-setosa 42 | 5,3.5,1.3,0.3,Iris-setosa 43 | 4.5,2.3,1.3,0.3,Iris-setosa 44 | 4.4,3.2,1.3,0.2,Iris-setosa 45 | 5,3.5,1.6,0.6,Iris-setosa 46 | 5.1,3.8,1.9,0.4,Iris-setosa 47 | 4.8,3,1.4,0.3,Iris-setosa 48 | 5.1,3.8,1.6,0.2,Iris-setosa 49 | 4.6,3.2,1.4,0.2,Iris-setosa 50 | 5.3,3.7,1.5,0.2,Iris-setosa 51 | 5,3.3,1.4,0.2,Iris-setosa 52 | 7,3.2,4.7,1.4,Iris-versicolor 53 | 6.4,3.2,4.5,1.5,Iris-versicolor 54 | 6.9,3.1,4.9,1.5,Iris-versicolor 55 | 5.5,2.3,4,1.3,Iris-versicolor 56 | 6.5,2.8,4.6,1.5,Iris-versicolor 57 | 5.7,2.8,4.5,1.3,Iris-versicolor 58 | 6.3,3.3,4.7,1.6,Iris-versicolor 59 | 4.9,2.4,3.3,1,Iris-versicolor 60 | 6.6,2.9,4.6,1.3,Iris-versicolor 61 | 5.2,2.7,3.9,1.4,Iris-versicolor 62 | 5,2,3.5,1,Iris-versicolor 63 | 5.9,3,4.2,1.5,Iris-versicolor 64 | 6,2.2,4,1,Iris-versicolor 65 | 6.1,2.9,4.7,1.4,Iris-versicolor 66 | 5.6,2.9,3.6,1.3,Iris-versicolor 67 | 6.7,3.1,4.4,1.4,Iris-versicolor 68 | 5.6,3,4.5,1.5,Iris-versicolor 69 | 5.8,2.7,4.1,1,Iris-versicolor 70 | 6.2,2.2,4.5,1.5,Iris-versicolor 71 | 5.6,2.5,3.9,1.1,Iris-versicolor 72 | 5.9,3.2,4.8,1.8,Iris-versicolor 73 | 6.1,2.8,4,1.3,Iris-versicolor 74 | 6.3,2.5,4.9,1.5,Iris-versicolor 75 | 6.1,2.8,4.7,1.2,Iris-versicolor 76 | 6.4,2.9,4.3,1.3,Iris-versicolor 77 | 6.6,3,4.4,1.4,Iris-versicolor 78 | 6.8,2.8,4.8,1.4,Iris-versicolor 79 | 6.7,3,5,1.7,Iris-versicolor 80 | 6,2.9,4.5,1.5,Iris-versicolor 81 | 5.7,2.6,3.5,1,Iris-versicolor 82 | 5.5,2.4,3.8,1.1,Iris-versicolor 83 | 5.5,2.4,3.7,1,Iris-versicolor 84 | 5.8,2.7,3.9,1.2,Iris-versicolor 85 | 6,2.7,5.1,1.6,Iris-versicolor 86 | 5.4,3,4.5,1.5,Iris-versicolor 87 | 6,3.4,4.5,1.6,Iris-versicolor 88 | 6.7,3.1,4.7,1.5,Iris-versicolor 89 | 6.3,2.3,4.4,1.3,Iris-versicolor 90 | 5.6,3,4.1,1.3,Iris-versicolor 91 | 5.5,2.5,4,1.3,Iris-versicolor 92 | 5.5,2.6,4.4,1.2,Iris-versicolor 93 | 6.1,3,4.6,1.4,Iris-versicolor 94 | 5.8,2.6,4,1.2,Iris-versicolor 95 | 5,2.3,3.3,1,Iris-versicolor 96 | 5.6,2.7,4.2,1.3,Iris-versicolor 97 | 5.7,3,4.2,1.2,Iris-versicolor 98 | 5.7,2.9,4.2,1.3,Iris-versicolor 99 | 6.2,2.9,4.3,1.3,Iris-versicolor 100 | 5.1,2.5,3,1.1,Iris-versicolor 101 | 5.7,2.8,4.1,1.3,Iris-versicolor 102 | 6.3,3.3,6,2.5,Iris-virginica 103 | 5.8,2.7,5.1,1.9,Iris-virginica 104 | 7.1,3,5.9,2.1,Iris-virginica 105 | 6.3,2.9,5.6,1.8,Iris-virginica 106 | 6.5,3,5.8,2.2,Iris-virginica 107 | 7.6,3,6.6,2.1,Iris-virginica 108 | 4.9,2.5,4.5,1.7,Iris-virginica 109 | 7.3,2.9,6.3,1.8,Iris-virginica 110 | 6.7,2.5,5.8,1.8,Iris-virginica 111 | 7.2,3.6,6.1,2.5,Iris-virginica 112 | 6.5,3.2,5.1,2,Iris-virginica 113 | 6.4,2.7,5.3,1.9,Iris-virginica 114 | 6.8,3,5.5,2.1,Iris-virginica 115 | 5.7,2.5,5,2,Iris-virginica 116 | 5.8,2.8,5.1,2.4,Iris-virginica 117 | 6.4,3.2,5.3,2.3,Iris-virginica 118 | 6.5,3,5.5,1.8,Iris-virginica 119 | 7.7,3.8,6.7,2.2,Iris-virginica 120 | 7.7,2.6,6.9,2.3,Iris-virginica 121 | 6,2.2,5,1.5,Iris-virginica 122 | 6.9,3.2,5.7,2.3,Iris-virginica 123 | 5.6,2.8,4.9,2,Iris-virginica 124 | 7.7,2.8,6.7,2,Iris-virginica 125 | 6.3,2.7,4.9,1.8,Iris-virginica 126 | 6.7,3.3,5.7,2.1,Iris-virginica 127 | 7.2,3.2,6,1.8,Iris-virginica 128 | 6.2,2.8,4.8,1.8,Iris-virginica 129 | 6.1,3,4.9,1.8,Iris-virginica 130 | 6.4,2.8,5.6,2.1,Iris-virginica 131 | 7.2,3,5.8,1.6,Iris-virginica 132 | 7.4,2.8,6.1,1.9,Iris-virginica 133 | 7.9,3.8,6.4,2,Iris-virginica 134 | 6.4,2.8,5.6,2.2,Iris-virginica 135 | 6.3,2.8,5.1,1.5,Iris-virginica 136 | 6.1,2.6,5.6,1.4,Iris-virginica 137 | 7.7,3,6.1,2.3,Iris-virginica 138 | 6.3,3.4,5.6,2.4,Iris-virginica 139 | 6.4,3.1,5.5,1.8,Iris-virginica 140 | 6,3,4.8,1.8,Iris-virginica 141 | 6.9,3.1,5.4,2.1,Iris-virginica 142 | 6.7,3.1,5.6,2.4,Iris-virginica 143 | 6.9,3.1,5.1,2.3,Iris-virginica 144 | 5.8,2.7,5.1,1.9,Iris-virginica 145 | 6.8,3.2,5.9,2.3,Iris-virginica 146 | 6.7,3.3,5.7,2.5,Iris-virginica 147 | 6.7,3,5.2,2.3,Iris-virginica 148 | 6.3,2.5,5,1.9,Iris-virginica 149 | 6.5,3,5.2,2,Iris-virginica 150 | 6.2,3.4,5.4,2.3,Iris-virginica 151 | 5.9,3,5.1,1.8,Iris-virginica 152 | -------------------------------------------------------------------------------- /src/wekaexamples/data/iris_no_class.arff: -------------------------------------------------------------------------------- 1 | @relation iris_no_class 2 | 3 | @attribute sepallength numeric 4 | @attribute sepalwidth numeric 5 | @attribute petallength numeric 6 | @attribute petalwidth numeric 7 | 8 | @data 9 | 5.1,3.5,1.4,0.2 10 | 4.9,3,1.4,0.2 11 | 4.7,3.2,1.3,0.2 12 | 4.6,3.1,1.5,0.2 13 | 5,3.6,1.4,0.2 14 | 5.4,3.9,1.7,0.4 15 | 4.6,3.4,1.4,0.3 16 | 5,3.4,1.5,0.2 17 | 4.4,2.9,1.4,0.2 18 | 4.9,3.1,1.5,0.1 19 | 5.4,3.7,1.5,0.2 20 | 4.8,3.4,1.6,0.2 21 | 4.8,3,1.4,0.1 22 | 4.3,3,1.1,0.1 23 | 5.8,4,1.2,0.2 24 | 5.7,4.4,1.5,0.4 25 | 5.4,3.9,1.3,0.4 26 | 5.1,3.5,1.4,0.3 27 | 5.7,3.8,1.7,0.3 28 | 5.1,3.8,1.5,0.3 29 | 5.4,3.4,1.7,0.2 30 | 5.1,3.7,1.5,0.4 31 | 4.6,3.6,1,0.2 32 | 5.1,3.3,1.7,0.5 33 | 4.8,3.4,1.9,0.2 34 | 5,3,1.6,0.2 35 | 5,3.4,1.6,0.4 36 | 5.2,3.5,1.5,0.2 37 | 5.2,3.4,1.4,0.2 38 | 4.7,3.2,1.6,0.2 39 | 4.8,3.1,1.6,0.2 40 | 5.4,3.4,1.5,0.4 41 | 5.2,4.1,1.5,0.1 42 | 5.5,4.2,1.4,0.2 43 | 4.9,3.1,1.5,0.1 44 | 5,3.2,1.2,0.2 45 | 5.5,3.5,1.3,0.2 46 | 4.9,3.1,1.5,0.1 47 | 4.4,3,1.3,0.2 48 | 5.1,3.4,1.5,0.2 49 | 5,3.5,1.3,0.3 50 | 4.5,2.3,1.3,0.3 51 | 4.4,3.2,1.3,0.2 52 | 5,3.5,1.6,0.6 53 | 5.1,3.8,1.9,0.4 54 | 4.8,3,1.4,0.3 55 | 5.1,3.8,1.6,0.2 56 | 4.6,3.2,1.4,0.2 57 | 5.3,3.7,1.5,0.2 58 | 5,3.3,1.4,0.2 59 | 7,3.2,4.7,1.4 60 | 6.4,3.2,4.5,1.5 61 | 6.9,3.1,4.9,1.5 62 | 5.5,2.3,4,1.3 63 | 6.5,2.8,4.6,1.5 64 | 5.7,2.8,4.5,1.3 65 | 6.3,3.3,4.7,1.6 66 | 4.9,2.4,3.3,1 67 | 6.6,2.9,4.6,1.3 68 | 5.2,2.7,3.9,1.4 69 | 5,2,3.5,1 70 | 5.9,3,4.2,1.5 71 | 6,2.2,4,1 72 | 6.1,2.9,4.7,1.4 73 | 5.6,2.9,3.6,1.3 74 | 6.7,3.1,4.4,1.4 75 | 5.6,3,4.5,1.5 76 | 5.8,2.7,4.1,1 77 | 6.2,2.2,4.5,1.5 78 | 5.6,2.5,3.9,1.1 79 | 5.9,3.2,4.8,1.8 80 | 6.1,2.8,4,1.3 81 | 6.3,2.5,4.9,1.5 82 | 6.1,2.8,4.7,1.2 83 | 6.4,2.9,4.3,1.3 84 | 6.6,3,4.4,1.4 85 | 6.8,2.8,4.8,1.4 86 | 6.7,3,5,1.7 87 | 6,2.9,4.5,1.5 88 | 5.7,2.6,3.5,1 89 | 5.5,2.4,3.8,1.1 90 | 5.5,2.4,3.7,1 91 | 5.8,2.7,3.9,1.2 92 | 6,2.7,5.1,1.6 93 | 5.4,3,4.5,1.5 94 | 6,3.4,4.5,1.6 95 | 6.7,3.1,4.7,1.5 96 | 6.3,2.3,4.4,1.3 97 | 5.6,3,4.1,1.3 98 | 5.5,2.5,4,1.3 99 | 5.5,2.6,4.4,1.2 100 | 6.1,3,4.6,1.4 101 | 5.8,2.6,4,1.2 102 | 5,2.3,3.3,1 103 | 5.6,2.7,4.2,1.3 104 | 5.7,3,4.2,1.2 105 | 5.7,2.9,4.2,1.3 106 | 6.2,2.9,4.3,1.3 107 | 5.1,2.5,3,1.1 108 | 5.7,2.8,4.1,1.3 109 | 6.3,3.3,6,2.5 110 | 5.8,2.7,5.1,1.9 111 | 7.1,3,5.9,2.1 112 | 6.3,2.9,5.6,1.8 113 | 6.5,3,5.8,2.2 114 | 7.6,3,6.6,2.1 115 | 4.9,2.5,4.5,1.7 116 | 7.3,2.9,6.3,1.8 117 | 6.7,2.5,5.8,1.8 118 | 7.2,3.6,6.1,2.5 119 | 6.5,3.2,5.1,2 120 | 6.4,2.7,5.3,1.9 121 | 6.8,3,5.5,2.1 122 | 5.7,2.5,5,2 123 | 5.8,2.8,5.1,2.4 124 | 6.4,3.2,5.3,2.3 125 | 6.5,3,5.5,1.8 126 | 7.7,3.8,6.7,2.2 127 | 7.7,2.6,6.9,2.3 128 | 6,2.2,5,1.5 129 | 6.9,3.2,5.7,2.3 130 | 5.6,2.8,4.9,2 131 | 7.7,2.8,6.7,2 132 | 6.3,2.7,4.9,1.8 133 | 6.7,3.3,5.7,2.1 134 | 7.2,3.2,6,1.8 135 | 6.2,2.8,4.8,1.8 136 | 6.1,3,4.9,1.8 137 | 6.4,2.8,5.6,2.1 138 | 7.2,3,5.8,1.6 139 | 7.4,2.8,6.1,1.9 140 | 7.9,3.8,6.4,2 141 | 6.4,2.8,5.6,2.2 142 | 6.3,2.8,5.1,1.5 143 | 6.1,2.6,5.6,1.4 144 | 7.7,3,6.1,2.3 145 | 6.3,3.4,5.6,2.4 146 | 6.4,3.1,5.5,1.8 147 | 6,3,4.8,1.8 148 | 6.9,3.1,5.4,2.1 149 | 6.7,3.1,5.6,2.4 150 | 6.9,3.1,5.1,2.3 151 | 5.8,2.7,5.1,1.9 152 | 6.8,3.2,5.9,2.3 153 | 6.7,3.3,5.7,2.5 154 | 6.7,3,5.2,2.3 155 | 6.3,2.5,5,1.9 156 | 6.5,3,5.2,2 157 | 6.2,3.4,5.4,2.3 158 | 5.9,3,5.1,1.8 159 | -------------------------------------------------------------------------------- /src/wekaexamples/datagenerators/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fracpete/python-weka-wrapper3-examples/5df69b68220c841cc592cc253bc008f696413c65/src/wekaexamples/datagenerators/__init__.py -------------------------------------------------------------------------------- /src/wekaexamples/datagenerators/datagenerators.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # datagenerators.py 15 | # Copyright (C) 2014-2017 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | import weka.core.jvm as jvm 19 | import wekaexamples.helper as helper 20 | from weka.datagenerators import DataGenerator 21 | 22 | 23 | def main(): 24 | """ 25 | Just runs some example code. 26 | """ 27 | 28 | helper.print_title("Generate data (Agrawal)") 29 | generator = DataGenerator( 30 | classname="weka.datagenerators.classifiers.classification.Agrawal", 31 | options=["-n", "10", "-r", "agrawal"]) 32 | generator.dataset_format = generator.define_data_format() 33 | print(generator.dataset_format) 34 | if generator.single_mode_flag: 35 | for i in range(generator.num_examples_act): 36 | print(generator.generate_example()) 37 | else: 38 | print(generator.generate_examples()) 39 | 40 | helper.print_title("Generate data (BayesNet)") 41 | generator = DataGenerator( 42 | classname="weka.datagenerators.classifiers.classification.BayesNet", 43 | options=["-S", "2", "-n", "10", "-C", "10"]) 44 | generator.dataset_format = generator.define_data_format() 45 | print(generator.dataset_format) 46 | if generator.single_mode_flag: 47 | for i in range(generator.num_examples_act): 48 | print(generator.generate_example()) 49 | else: 50 | print(generator.generate_examples()) 51 | 52 | # partial classname 53 | helper.print_title("Creating clusterer from partial classname") 54 | clsname = ".Agrawal" 55 | generator = DataGenerator(classname=clsname) 56 | print(clsname + " --> " + generator.classname) 57 | 58 | if __name__ == "__main__": 59 | try: 60 | jvm.start() 61 | main() 62 | except Exception as e: 63 | print(traceback.format_exc()) 64 | finally: 65 | jvm.stop() 66 | -------------------------------------------------------------------------------- /src/wekaexamples/experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fracpete/python-weka-wrapper3-examples/5df69b68220c841cc592cc253bc008f696413c65/src/wekaexamples/experiments/__init__.py -------------------------------------------------------------------------------- /src/wekaexamples/experiments/experiments.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # experiments.py 15 | # Copyright (C) 2014-2022 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import tempfile 19 | import traceback 20 | import weka.core.jvm as jvm 21 | import weka.core.converters as converters 22 | import wekaexamples.helper as helper 23 | from weka.classifiers import Classifier 24 | from weka.experiments import SimpleCrossValidationExperiment, SimpleRandomSplitExperiment, Tester, ResultMatrix 25 | import weka.plot.experiments as plot_exp 26 | 27 | 28 | def main(): 29 | """ 30 | Just runs some example code. 31 | """ 32 | 33 | print(helper.get_data_dir()) 34 | 35 | # cross-validation + classification 36 | helper.print_title("Experiment: Cross-validation + classification") 37 | datasets = [helper.get_data_dir() + os.sep + "iris.arff", helper.get_data_dir() + os.sep + "anneal.arff"] 38 | classifiers = [Classifier(classname="weka.classifiers.rules.ZeroR"), Classifier(classname="weka.classifiers.trees.J48")] 39 | outfile = tempfile.gettempdir() + os.sep + "results-cv.arff" 40 | exp = SimpleCrossValidationExperiment( 41 | classification=True, 42 | runs=10, 43 | folds=10, 44 | datasets=datasets, 45 | classifiers=classifiers, 46 | result=outfile, 47 | pred_target_column=True, # outputting predictions and ground truth in separate columns (CAUTION: output can get very large!) 48 | class_for_ir_statistics=1) # using 2nd class label for AUC 49 | exp.setup() 50 | exp.run() 51 | 52 | # evaluate 53 | loader = converters.loader_for_file(outfile) 54 | data = loader.load_file(outfile) 55 | matrix = ResultMatrix("weka.experiment.ResultMatrixPlainText") 56 | # comparing datasets 57 | helper.print_info("Comparing datasets") 58 | tester = Tester(classname="weka.experiment.PairedCorrectedTTester") 59 | tester.swap_rows_and_cols = True 60 | tester.resultmatrix = matrix 61 | comparison_col = data.attribute_by_name("Area_under_ROC").index 62 | tester.instances = data 63 | print(tester.header(comparison_col)) 64 | print(tester.multi_resultset_full(0, comparison_col)) 65 | # comparing classifiers 66 | helper.print_info("Comparing classifiers") 67 | tester.swap_rows_and_cols = False 68 | print(tester.header(comparison_col)) 69 | print(tester.multi_resultset_full(0, comparison_col)) 70 | 71 | # random split + regression 72 | helper.print_title("Experiment: Random split + regression") 73 | datasets = [helper.get_data_dir() + os.sep + "bolts.arff", helper.get_data_dir() + os.sep + "bodyfat.arff"] 74 | classifiers = [ 75 | Classifier(classname="weka.classifiers.rules.ZeroR"), 76 | Classifier(classname="weka.classifiers.functions.LinearRegression") 77 | ] 78 | outfile = tempfile.gettempdir() + os.sep + "results-rs.arff" 79 | exp = SimpleRandomSplitExperiment( 80 | classification=False, 81 | runs=10, 82 | percentage=66.6, 83 | preserve_order=False, 84 | datasets=datasets, 85 | classifiers=classifiers, 86 | result=outfile) 87 | exp.setup() 88 | exp.run() 89 | 90 | # evaluate 91 | loader = converters.loader_for_file(outfile) 92 | data = loader.load_file(outfile) 93 | matrix = ResultMatrix(classname="weka.experiment.ResultMatrixPlainText", options=["-print-col-names", "-print-row-names"]) 94 | 95 | # comparing classifiers 96 | helper.print_info("Comparing classifiers") 97 | tester = Tester(classname="weka.experiment.PairedCorrectedTTester") 98 | tester.swap_rows_and_cols = False 99 | tester.resultmatrix = matrix 100 | comparison_col = data.attribute_by_name("Correlation_coefficient").index 101 | tester.instances = data 102 | print(tester.header(comparison_col)) 103 | print(tester.multi_resultset_full(0, comparison_col)) 104 | 105 | # comparing datasets 106 | helper.print_info("Comparing datasets") 107 | tester = Tester(classname="weka.experiment.PairedCorrectedTTester") 108 | tester.swap_rows_and_cols = True 109 | tester.resultmatrix = matrix 110 | comparison_col = data.attribute_by_name("Correlation_coefficient").index 111 | tester.instances = data 112 | print(tester.header(comparison_col)) 113 | print(tester.multi_resultset_full(0, comparison_col)) 114 | 115 | # plot 116 | plot_exp.plot_experiment(matrix, title="Random split", measure="Correlation coefficient", 117 | key_loc="lower left", bbox_to_anchor=(0, 1, 1, 0), 118 | axes_swapped=True, 119 | show_stdev=True, wait=True) 120 | 121 | 122 | if __name__ == "__main__": 123 | try: 124 | jvm.start() 125 | main() 126 | except Exception as e: 127 | print(traceback.format_exc()) 128 | finally: 129 | jvm.stop() 130 | -------------------------------------------------------------------------------- /src/wekaexamples/experiments/learning_curve.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # learning_curve.py 15 | # Copyright (C) 2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import tempfile 19 | import traceback 20 | import weka.core.jvm as jvm 21 | from weka.core.converters import load_any_file, save_any_file 22 | from weka.filters import Filter, MultiFilter 23 | from weka.classifiers import Classifier 24 | from weka.experiments import SimpleCrossValidationExperiment, ResultMatrix, Tester 25 | from weka.plot.experiments import plot_experiment 26 | import wekaexamples.helper as helper 27 | 28 | 29 | def main(): 30 | """ 31 | Just runs some example code. 32 | """ 33 | 34 | # load data 35 | data_file = os.path.join(helper.get_data_dir(), "anneal.arff") 36 | print("Loading %s" % data_file) 37 | data = load_any_file(data_file, class_index="last") 38 | 39 | # generate datasets 40 | print("Generates learning curves using the experimenter") 41 | percentages = [] 42 | filtered_datasets = dict() 43 | for p in range(10, 110, 20): 44 | percentages.append(p) 45 | remove = 100 - p 46 | print("dataset size: %d%%" % p) 47 | if remove > 0: 48 | randomize = Filter(classname="weka.filters.unsupervised.instance.Randomize", options=["-S", "1"]) 49 | removeperc = Filter(classname="weka.filters.unsupervised.instance.RemovePercentage", 50 | options=["-P", str(remove)]) 51 | multi = MultiFilter() 52 | multi.filters = [randomize, removeperc] 53 | multi.inputformat(data) 54 | filtered = multi.filter(data) 55 | filtered.relationname = "%d%%" % p 56 | filtered_file = os.path.join(tempfile.gettempdir(), str(p) + ".arff") 57 | save_any_file(filtered, filtered_file) 58 | filtered_datasets[p] = filtered_file 59 | else: 60 | filtered_datasets[p] = data_file 61 | 62 | # setup experiment 63 | print("Configuring experiment") 64 | datasets = [] 65 | for p in percentages: 66 | datasets.append(filtered_datasets[p]) 67 | classifiers = [ 68 | Classifier(classname="weka.classifiers.rules.ZeroR"), 69 | Classifier(classname="weka.classifiers.trees.J48"), 70 | Classifier(classname="weka.classifiers.trees.RandomForest") 71 | ] 72 | results_file = os.path.join(tempfile.gettempdir(), "results.arff") 73 | exp = SimpleCrossValidationExperiment( 74 | classification=True, 75 | runs=10, 76 | folds=10, 77 | datasets=datasets, 78 | classifiers=classifiers, 79 | result=results_file) 80 | 81 | # run experiment 82 | print("Running experiment") 83 | exp.setup() 84 | exp.run() 85 | 86 | # evaluate 87 | print("Evaluating experiment") 88 | results_data = load_any_file(results_file) 89 | matrix = ResultMatrix(classname="weka.experiment.ResultMatrixPlainText", 90 | options=["-print-row-names", "-print-col-names"]) 91 | # comparing datasets 92 | tester = Tester(classname="weka.experiment.PairedCorrectedTTester") 93 | tester.swap_rows_and_cols = True 94 | tester.resultmatrix = matrix 95 | comparison_col = results_data.attribute_by_name("Percent_correct").index 96 | tester.instances = results_data 97 | print(tester.header(comparison_col)) 98 | print(tester.multi_resultset_full(0, comparison_col)) 99 | 100 | # plot 101 | print("Plotting results") 102 | plot_experiment(matrix, title="Learning curve", measure="Percent_correct", 103 | key_loc="lower left", bbox_to_anchor=(0, 1, 1, 0), 104 | y_label="Accuracy %", x_label="Dataset size: %s%%" % (",".join([str(x) for x in percentages])), 105 | axes_swapped=True, 106 | show_stdev=True, wait=True) 107 | 108 | 109 | if __name__ == "__main__": 110 | try: 111 | jvm.start() 112 | main() 113 | except Exception as e: 114 | print(traceback.format_exc()) 115 | finally: 116 | jvm.stop() 117 | -------------------------------------------------------------------------------- /src/wekaexamples/filters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fracpete/python-weka-wrapper3-examples/5df69b68220c841cc592cc253bc008f696413c65/src/wekaexamples/filters/__init__.py -------------------------------------------------------------------------------- /src/wekaexamples/filters/filters.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # filters.py 15 | # Copyright (C) 2014-2019 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.converters import Loader 22 | from weka.core.stemmers import Stemmer 23 | from weka.core.stopwords import Stopwords 24 | from weka.core.tokenizers import Tokenizer 25 | from weka.filters import Filter, MultiFilter, StringToWordVector 26 | 27 | 28 | def main(): 29 | """ 30 | Just runs some example code. 31 | """ 32 | 33 | # load a dataset 34 | iris = helper.get_data_dir() + os.sep + "iris.arff" 35 | helper.print_info("Loading dataset: " + iris) 36 | loader = Loader(classname="weka.core.converters.ArffLoader") 37 | data = loader.load_file(iris) 38 | 39 | # remove class attribute 40 | helper.print_info("Removing class attribute") 41 | remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"]) 42 | remove.inputformat(data) 43 | filtered = remove.filter(data) 44 | 45 | # use MultiFilter 46 | helper.print_info("Use MultiFilter") 47 | remove = Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "first"]) 48 | std = Filter(classname="weka.filters.unsupervised.attribute.Standardize") 49 | multi = MultiFilter() 50 | multi.filters = [remove, std] 51 | multi.inputformat(data) 52 | filtered_multi = multi.filter(data) 53 | 54 | # output datasets 55 | helper.print_title("Input") 56 | print(data) 57 | helper.print_title("Output") 58 | print(filtered) 59 | helper.print_title("Output (MultiFilter)") 60 | print(filtered_multi) 61 | 62 | # load text dataset 63 | text = helper.get_data_dir() + os.sep + "reutersTop10Randomized_1perc_shortened.arff" 64 | helper.print_info("Loading dataset: " + text) 65 | loader = Loader(classname="weka.core.converters.ArffLoader") 66 | data = loader.load_file(text) 67 | data.class_is_last() 68 | 69 | # apply StringToWordVector 70 | stemmer = Stemmer(classname="weka.core.stemmers.IteratedLovinsStemmer") 71 | stopwords = Stopwords(classname="weka.core.stopwords.Rainbow") 72 | tokenizer = Tokenizer(classname="weka.core.tokenizers.WordTokenizer") 73 | s2wv = StringToWordVector(options=["-W", "10", "-L", "-C"]) 74 | s2wv.stemmer = stemmer 75 | s2wv.stopwords = stopwords 76 | s2wv.tokenizer = tokenizer 77 | s2wv.inputformat(data) 78 | filtered = s2wv.filter(data) 79 | 80 | helper.print_title("Input (StringToWordVector)") 81 | print(data) 82 | helper.print_title("Output (StringToWordVector)") 83 | print(filtered) 84 | 85 | # partial classname 86 | helper.print_title("Creating filter from partial classname") 87 | clsname = ".Standardize" 88 | f = Filter(classname=clsname) 89 | print(clsname + " --> " + f.classname) 90 | 91 | # source code 92 | helper.print_info("Generate source code") 93 | bolts = helper.get_data_dir() + os.sep + "labor.arff" 94 | helper.print_info("Loading dataset: " + bolts) 95 | loader = Loader(classname="weka.core.converters.ArffLoader") 96 | data = loader.load_file(bolts) 97 | replace = Filter(classname="weka.filters.unsupervised.attribute.ReplaceMissingValues") 98 | replace.inputformat(data) 99 | replace.filter(data) 100 | print(replace.to_source("MyReplaceMissingValues", data)) 101 | 102 | 103 | if __name__ == "__main__": 104 | try: 105 | jvm.start() 106 | main() 107 | except Exception as e: 108 | print(traceback.format_exc()) 109 | finally: 110 | jvm.stop() 111 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fracpete/python-weka-wrapper3-examples/5df69b68220c841cc592cc253bc008f696413c65/src/wekaexamples/flow/__init__.py -------------------------------------------------------------------------------- /src/wekaexamples/flow/attribute_selection.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # attribute_selection.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | 20 | import weka.core.jvm as jvm 21 | import wekaexamples.helper as helper 22 | from simflow.control import Flow, ContainerValuePicker, Tee, run_flow 23 | from simflow.sink import Console 24 | from simflow.source import FileSupplier 25 | from weka.attribute_selection import ASSearch, ASEvaluation 26 | from weka.flow.transformer import LoadDataset, AttributeSelection 27 | 28 | 29 | def main(): 30 | """ 31 | Just runs some example code. 32 | """ 33 | 34 | # setup the flow 35 | helper.print_title("Attribute selection") 36 | iris = helper.get_data_dir() + os.sep + "iris.arff" 37 | 38 | flow = Flow(name="attribute selection") 39 | 40 | filesupplier = FileSupplier() 41 | filesupplier.config["files"] = [iris] 42 | flow.actors.append(filesupplier) 43 | 44 | loaddataset = LoadDataset() 45 | loaddataset.config["incremental"] = False 46 | flow.actors.append(loaddataset) 47 | 48 | attsel = AttributeSelection() 49 | attsel.config["search"] = ASSearch(classname="weka.attributeSelection.BestFirst") 50 | attsel.config["eval"] = ASEvaluation(classname="weka.attributeSelection.CfsSubsetEval") 51 | flow.actors.append(attsel) 52 | 53 | results = Tee() 54 | results.name = "output results" 55 | flow.actors.append(results) 56 | 57 | picker = ContainerValuePicker() 58 | picker.config["value"] = "Results" 59 | picker.config["switch"] = True 60 | results.actors.append(picker) 61 | 62 | console = Console() 63 | console.config["prefix"] = "Attribute selection results:" 64 | results.actors.append(console) 65 | 66 | reduced = Tee() 67 | reduced.name = "reduced dataset" 68 | flow.actors.append(reduced) 69 | 70 | picker = ContainerValuePicker() 71 | picker.config["value"] = "Reduced" 72 | picker.config["switch"] = True 73 | reduced.actors.append(picker) 74 | 75 | console = Console() 76 | console.config["prefix"] = "Reduced dataset:\n\n" 77 | reduced.actors.append(console) 78 | 79 | # run the flow 80 | run_flow(flow, print_tree=True, cleanup=True) 81 | 82 | 83 | if __name__ == "__main__": 84 | try: 85 | jvm.start() 86 | main() 87 | except Exception as e: 88 | print(traceback.format_exc()) 89 | finally: 90 | jvm.stop() 91 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/build_classifier_incrementally.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # build_classifier_incrementally.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | 20 | import weka.core.jvm as jvm 21 | import wekaexamples.helper as helper 22 | from simflow.control import Flow, ContainerValuePicker, Tee, Trigger, run_flow 23 | from simflow.sink import Console 24 | from simflow.source import FileSupplier, GetStorageValue 25 | from simflow.transformer import InitStorageValue, UpdateStorageValue 26 | from weka.classifiers import Classifier 27 | from weka.flow.transformer import LoadDataset, ClassSelector, Train 28 | 29 | 30 | def main(): 31 | """ 32 | Just runs some example code. 33 | """ 34 | 35 | # setup the flow 36 | count = 50 37 | helper.print_title("build classifier incrementally") 38 | iris = helper.get_data_dir() + os.sep + "iris.arff" 39 | 40 | flow = Flow(name="build classifier incrementally") 41 | 42 | filesupplier = FileSupplier() 43 | filesupplier.config["files"] = [iris] 44 | flow.actors.append(filesupplier) 45 | 46 | initcounter = InitStorageValue() 47 | initcounter.config["storage_name"] = "counter" 48 | initcounter.config["value"] = 0 49 | flow.actors.append(initcounter) 50 | 51 | loaddataset = LoadDataset() 52 | loaddataset.config["incremental"] = True 53 | flow.actors.append(loaddataset) 54 | 55 | select = ClassSelector() 56 | select.config["index"] = "last" 57 | flow.actors.append(select) 58 | 59 | inccounter = UpdateStorageValue() 60 | inccounter.config["storage_name"] = "counter" 61 | inccounter.config["expression"] = "{X} + 1" 62 | flow.actors.append(inccounter) 63 | 64 | train = Train() 65 | train.config["setup"] = Classifier(classname="weka.classifiers.bayes.NaiveBayesUpdateable") 66 | flow.actors.append(train) 67 | 68 | pick = ContainerValuePicker() 69 | pick.config["value"] = "Model" 70 | pick.config["switch"] = True 71 | flow.actors.append(pick) 72 | 73 | tee = Tee(name="output model every " + str(count) + " instances") 74 | tee.config["condition"] = "@{counter} % " + str(count) + " == 0" 75 | flow.actors.append(tee) 76 | 77 | trigger = Trigger(name="output # of instances") 78 | tee.actors.append(trigger) 79 | 80 | getcounter = GetStorageValue() 81 | getcounter.config["storage_name"] = "counter" 82 | trigger.actors.append(getcounter) 83 | 84 | console = Console() 85 | console.config["prefix"] = "# of instances: " 86 | trigger.actors.append(console) 87 | 88 | console = Console(name="output model") 89 | tee.actors.append(console) 90 | 91 | # run the flow 92 | run_flow(flow, print_tree=True, cleanup=True) 93 | 94 | 95 | if __name__ == "__main__": 96 | try: 97 | jvm.start() 98 | main() 99 | except Exception as e: 100 | print(traceback.format_exc()) 101 | finally: 102 | jvm.stop() 103 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/build_clusterer_incrementally.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # build_clusterer_incrementally.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | 20 | import weka.core.jvm as jvm 21 | import weka.filters as filters 22 | import wekaexamples.helper as helper 23 | from simflow.control import Flow, ContainerValuePicker, Tee, Trigger, run_flow 24 | from simflow.sink import Console 25 | from simflow.source import FileSupplier, GetStorageValue 26 | from simflow.transformer import InitStorageValue, UpdateStorageValue 27 | from weka.clusterers import Clusterer 28 | from weka.flow.transformer import LoadDataset, Train, Filter 29 | 30 | 31 | def main(): 32 | """ 33 | Just runs some example code. 34 | """ 35 | 36 | # setup the flow 37 | count = 50 38 | helper.print_title("build clusterer incrementally") 39 | iris = helper.get_data_dir() + os.sep + "iris.arff" 40 | 41 | flow = Flow(name="build clusterer incrementally") 42 | 43 | filesupplier = FileSupplier() 44 | filesupplier.config["files"] = [iris] 45 | flow.actors.append(filesupplier) 46 | 47 | initcounter = InitStorageValue() 48 | initcounter.config["storage_name"] = "counter" 49 | initcounter.config["value"] = 0 50 | flow.actors.append(initcounter) 51 | 52 | loaddataset = LoadDataset() 53 | loaddataset.config["incremental"] = True 54 | flow.actors.append(loaddataset) 55 | 56 | remove = Filter(name="remove class attribute") 57 | remove.config["setup"] = filters.Filter(classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"]) 58 | flow.actors.append(remove) 59 | 60 | inccounter = UpdateStorageValue() 61 | inccounter.config["storage_name"] = "counter" 62 | inccounter.config["expression"] = "{X} + 1" 63 | flow.actors.append(inccounter) 64 | 65 | train = Train() 66 | train.config["setup"] = Clusterer(classname="weka.clusterers.Cobweb") 67 | flow.actors.append(train) 68 | 69 | pick = ContainerValuePicker() 70 | pick.config["value"] = "Model" 71 | pick.config["switch"] = True 72 | flow.actors.append(pick) 73 | 74 | tee = Tee(name="output model every " + str(count) + " instances") 75 | tee.config["condition"] = "@{counter} % " + str(count) + " == 0" 76 | flow.actors.append(tee) 77 | 78 | trigger = Trigger(name="output # of instances") 79 | tee.actors.append(trigger) 80 | 81 | getcounter = GetStorageValue() 82 | getcounter.config["storage_name"] = "counter" 83 | trigger.actors.append(getcounter) 84 | 85 | console = Console() 86 | console.config["prefix"] = "# of instances: " 87 | trigger.actors.append(console) 88 | 89 | console = Console(name="output model") 90 | tee.actors.append(console) 91 | 92 | # run the flow 93 | run_flow(flow, print_tree=True, cleanup=True) 94 | 95 | 96 | if __name__ == "__main__": 97 | try: 98 | jvm.start() 99 | main() 100 | except Exception as e: 101 | print(traceback.format_exc()) 102 | finally: 103 | jvm.stop() 104 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/build_evaluate_classifier.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # build_evaluate_classifier.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | 20 | import weka.core.jvm as jvm 21 | import wekaexamples.helper as helper 22 | from simflow.control import Flow, ContainerValuePicker, Trigger, run_flow 23 | from simflow.sink import Console 24 | from simflow.source import FileSupplier, Start, GetStorageValue 25 | from weka.classifiers import Classifier 26 | from weka.flow.transformer import LoadDataset, ClassSelector, Train, Evaluate, SetStorageValue, EvaluationSummary 27 | 28 | 29 | def main(): 30 | """ 31 | Just runs some example code. 32 | """ 33 | 34 | # setup the flow 35 | helper.print_title("build and evaluate classifier") 36 | iris = helper.get_data_dir() + os.sep + "iris.arff" 37 | 38 | flow = Flow(name="build and evaluate classifier") 39 | 40 | start = Start() 41 | flow.actors.append(start) 42 | 43 | build_save = Trigger() 44 | build_save.name = "build and store classifier" 45 | flow.actors.append(build_save) 46 | 47 | filesupplier = FileSupplier() 48 | filesupplier.config["files"] = [iris] 49 | build_save.actors.append(filesupplier) 50 | 51 | loaddataset = LoadDataset() 52 | build_save.actors.append(loaddataset) 53 | 54 | select = ClassSelector() 55 | select.config["index"] = "last" 56 | build_save.actors.append(select) 57 | 58 | ssv = SetStorageValue() 59 | ssv.config["storage_name"] = "data" 60 | build_save.actors.append(ssv) 61 | 62 | train = Train() 63 | train.config["setup"] = Classifier(classname="weka.classifiers.trees.J48") 64 | build_save.actors.append(train) 65 | 66 | pick = ContainerValuePicker() 67 | pick.config["value"] = "Model" 68 | build_save.actors.append(pick) 69 | 70 | ssv = SetStorageValue() 71 | ssv.config["storage_name"] = "model" 72 | pick.actors.append(ssv) 73 | 74 | evaluate = Trigger() 75 | evaluate.name = "evaluate classifier" 76 | flow.actors.append(evaluate) 77 | 78 | gsv = GetStorageValue() 79 | gsv.config["storage_name"] = "data" 80 | evaluate.actors.append(gsv) 81 | 82 | evl = Evaluate() 83 | evl.config["storage_name"] = "model" 84 | evaluate.actors.append(evl) 85 | 86 | summary = EvaluationSummary() 87 | summary.config["matrix"] = True 88 | evaluate.actors.append(summary) 89 | 90 | console = Console() 91 | evaluate.actors.append(console) 92 | 93 | # run the flow 94 | run_flow(flow, print_tree=True, cleanup=True) 95 | 96 | 97 | if __name__ == "__main__": 98 | try: 99 | jvm.start() 100 | main() 101 | except Exception as e: 102 | print(traceback.format_exc()) 103 | finally: 104 | jvm.stop() 105 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/build_evaluate_clusterer.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # build_evaluate_classifier.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | 20 | import weka.core.jvm as jvm 21 | import wekaexamples.helper as helper 22 | from simflow.control import Flow, ContainerValuePicker, Trigger, run_flow 23 | from simflow.sink import Console 24 | from simflow.source import FileSupplier, Start, GetStorageValue 25 | from weka.clusterers import Clusterer 26 | from weka.flow.transformer import LoadDataset, Train, Evaluate, SetStorageValue, EvaluationSummary 27 | 28 | 29 | def main(): 30 | """ 31 | Just runs some example code. 32 | """ 33 | 34 | # setup the flow 35 | helper.print_title("build and evaluate classifier") 36 | iris = helper.get_data_dir() + os.sep + "iris_no_class.arff" 37 | 38 | flow = Flow(name="build and evaluate classifier") 39 | 40 | start = Start() 41 | flow.actors.append(start) 42 | 43 | build_save = Trigger() 44 | build_save.name = "build and store classifier" 45 | flow.actors.append(build_save) 46 | 47 | filesupplier = FileSupplier() 48 | filesupplier.config["files"] = [iris] 49 | build_save.actors.append(filesupplier) 50 | 51 | loaddataset = LoadDataset() 52 | build_save.actors.append(loaddataset) 53 | 54 | ssv = SetStorageValue() 55 | ssv.config["storage_name"] = "data" 56 | build_save.actors.append(ssv) 57 | 58 | train = Train() 59 | train.config["setup"] = Clusterer(classname="weka.clusterers.SimpleKMeans") 60 | build_save.actors.append(train) 61 | 62 | pick = ContainerValuePicker() 63 | pick.config["value"] = "Model" 64 | build_save.actors.append(pick) 65 | 66 | ssv = SetStorageValue() 67 | ssv.config["storage_name"] = "model" 68 | pick.actors.append(ssv) 69 | 70 | evaluate = Trigger() 71 | evaluate.name = "evaluate classifier" 72 | flow.actors.append(evaluate) 73 | 74 | gsv = GetStorageValue() 75 | gsv.config["storage_name"] = "data" 76 | evaluate.actors.append(gsv) 77 | 78 | evl = Evaluate() 79 | evl.config["storage_name"] = "model" 80 | evaluate.actors.append(evl) 81 | 82 | summary = EvaluationSummary() 83 | summary.config["matrix"] = True 84 | evaluate.actors.append(summary) 85 | 86 | console = Console() 87 | evaluate.actors.append(console) 88 | 89 | # run the flow 90 | run_flow(flow, print_tree=True, cleanup=True) 91 | 92 | 93 | if __name__ == "__main__": 94 | try: 95 | jvm.start() 96 | main() 97 | except Exception as e: 98 | print(traceback.format_exc()) 99 | finally: 100 | jvm.stop() 101 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/build_save_clusterer.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # build_save_clusterer.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import tempfile 19 | import traceback 20 | 21 | import weka.core.jvm as jvm 22 | import wekaexamples.helper as helper 23 | from simflow.control import Flow, ContainerValuePicker, run_flow 24 | from simflow.sink import Console 25 | from simflow.source import FileSupplier 26 | from weka.clusterers import Clusterer 27 | from weka.flow.sink import ModelWriter 28 | from weka.flow.transformer import LoadDataset, Train 29 | 30 | 31 | def main(): 32 | """ 33 | Just runs some example code. 34 | """ 35 | 36 | # setup the flow 37 | helper.print_title("build and save clusterer") 38 | iris = helper.get_data_dir() + os.sep + "iris_no_class.arff" 39 | 40 | flow = Flow(name="build and save clusterer") 41 | 42 | filesupplier = FileSupplier() 43 | filesupplier.config["files"] = [iris] 44 | flow.actors.append(filesupplier) 45 | 46 | loaddataset = LoadDataset() 47 | flow.actors.append(loaddataset) 48 | 49 | train = Train() 50 | train.config["setup"] = Clusterer(classname="weka.clusterers.SimpleKMeans") 51 | flow.actors.append(train) 52 | 53 | pick = ContainerValuePicker() 54 | pick.config["value"] = "Model" 55 | flow.actors.append(pick) 56 | 57 | console = Console() 58 | pick.actors.append(console) 59 | 60 | writer = ModelWriter() 61 | writer.config["output"] = str(tempfile.gettempdir()) + os.sep + "simplekmeans.model" 62 | flow.actors.append(writer) 63 | 64 | # run the flow 65 | run_flow(flow, print_tree=True, cleanup=True) 66 | 67 | 68 | if __name__ == "__main__": 69 | try: 70 | jvm.start() 71 | main() 72 | except Exception as e: 73 | print(traceback.format_exc()) 74 | finally: 75 | jvm.stop() 76 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/build_save_load_classifier.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # build_save_load_classifier.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import tempfile 19 | import traceback 20 | 21 | import weka.core.jvm as jvm 22 | import wekaexamples.helper as helper 23 | from simflow.control import Flow, ContainerValuePicker, Trigger, run_flow 24 | from simflow.sink import Console 25 | from simflow.source import FileSupplier, Start 26 | from weka.classifiers import Classifier 27 | from weka.flow.sink import ModelWriter 28 | from weka.flow.transformer import LoadDataset, ClassSelector, Train, ModelReader 29 | 30 | 31 | def main(): 32 | """ 33 | Just runs some example code. 34 | """ 35 | 36 | # setup the flow 37 | helper.print_title("build, save and load classifier") 38 | iris = helper.get_data_dir() + os.sep + "iris.arff" 39 | clsfile = str(tempfile.gettempdir()) + os.sep + "j48.model" 40 | 41 | flow = Flow(name="build, save and load classifier") 42 | 43 | start = Start() 44 | flow.actors.append(start) 45 | 46 | build_save = Trigger() 47 | build_save.name = "build and save classifier" 48 | flow.actors.append(build_save) 49 | 50 | filesupplier = FileSupplier() 51 | filesupplier.config["files"] = [iris] 52 | build_save.actors.append(filesupplier) 53 | 54 | loaddataset = LoadDataset() 55 | build_save.actors.append(loaddataset) 56 | 57 | select = ClassSelector() 58 | select.config["index"] = "last" 59 | build_save.actors.append(select) 60 | 61 | train = Train() 62 | train.config["setup"] = Classifier(classname="weka.classifiers.trees.J48") 63 | build_save.actors.append(train) 64 | 65 | pick = ContainerValuePicker() 66 | pick.config["value"] = "Model" 67 | build_save.actors.append(pick) 68 | 69 | console = Console() 70 | console.config["prefix"] = "built: " 71 | pick.actors.append(console) 72 | 73 | writer = ModelWriter() 74 | writer.config["output"] = clsfile 75 | build_save.actors.append(writer) 76 | 77 | load = Trigger() 78 | load.name = "load classifier" 79 | flow.actors.append(load) 80 | 81 | filesupplier = FileSupplier() 82 | filesupplier.config["files"] = [clsfile] 83 | load.actors.append(filesupplier) 84 | 85 | reader = ModelReader() 86 | load.actors.append(reader) 87 | 88 | pick = ContainerValuePicker() 89 | pick.config["value"] = "Model" 90 | load.actors.append(pick) 91 | 92 | console = Console() 93 | console.config["prefix"] = "loaded: " 94 | pick.actors.append(console) 95 | 96 | # run the flow 97 | run_flow(flow, print_tree=True, cleanup=True) 98 | 99 | 100 | if __name__ == "__main__": 101 | try: 102 | jvm.start() 103 | main() 104 | except Exception as e: 105 | print(traceback.format_exc()) 106 | finally: 107 | jvm.stop() 108 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/classify_data.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # classify_data.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import tempfile 19 | import traceback 20 | 21 | import weka.core.jvm as jvm 22 | import wekaexamples.helper as helper 23 | from simflow.control import Flow, ContainerValuePicker, Trigger, run_flow 24 | from simflow.sink import Console 25 | from simflow.source import FileSupplier, Start 26 | from weka.classifiers import Classifier 27 | from weka.flow.sink import ModelWriter 28 | from weka.flow.transformer import LoadDataset, ClassSelector, Train, SetStorageValue, Predict 29 | 30 | 31 | def main(): 32 | """ 33 | Just runs some example code. 34 | """ 35 | 36 | # setup the flow 37 | helper.print_title("classify data") 38 | iris = helper.get_data_dir() + os.sep + "iris.arff" 39 | clsfile = str(tempfile.gettempdir()) + os.sep + "j48.model" 40 | 41 | flow = Flow(name="classify data") 42 | 43 | start = Start() 44 | flow.actors.append(start) 45 | 46 | build_save = Trigger() 47 | build_save.name = "build and save classifier" 48 | flow.actors.append(build_save) 49 | 50 | filesupplier = FileSupplier() 51 | filesupplier.config["files"] = [iris] 52 | build_save.actors.append(filesupplier) 53 | 54 | loaddataset = LoadDataset() 55 | build_save.actors.append(loaddataset) 56 | 57 | select = ClassSelector() 58 | select.config["index"] = "last" 59 | build_save.actors.append(select) 60 | 61 | ssv = SetStorageValue() 62 | ssv.config["storage_name"] = "data" 63 | build_save.actors.append(ssv) 64 | 65 | train = Train() 66 | train.config["setup"] = Classifier(classname="weka.classifiers.trees.J48") 67 | build_save.actors.append(train) 68 | 69 | ssv = SetStorageValue() 70 | ssv.config["storage_name"] = "model" 71 | build_save.actors.append(ssv) 72 | 73 | pick = ContainerValuePicker() 74 | pick.config["value"] = "Model" 75 | build_save.actors.append(pick) 76 | 77 | console = Console() 78 | console.config["prefix"] = "built: " 79 | pick.actors.append(console) 80 | 81 | writer = ModelWriter() 82 | writer.config["output"] = clsfile 83 | build_save.actors.append(writer) 84 | 85 | pred_serialized = Trigger() 86 | pred_serialized.name = "make predictions (serialized model)" 87 | flow.actors.append(pred_serialized) 88 | 89 | filesupplier = FileSupplier() 90 | filesupplier.config["files"] = [iris] 91 | pred_serialized.actors.append(filesupplier) 92 | 93 | loaddataset = LoadDataset() 94 | loaddataset.config["incremental"] = True 95 | pred_serialized.actors.append(loaddataset) 96 | 97 | select = ClassSelector() 98 | select.config["index"] = "last" 99 | pred_serialized.actors.append(select) 100 | 101 | predict = Predict() 102 | predict.config["model"] = clsfile 103 | pred_serialized.actors.append(predict) 104 | 105 | console = Console() 106 | console.config["prefix"] = "serialized: " 107 | pred_serialized.actors.append(console) 108 | 109 | pred_storage = Trigger() 110 | pred_storage.name = "make predictions (model from storage)" 111 | flow.actors.append(pred_storage) 112 | 113 | filesupplier = FileSupplier() 114 | filesupplier.config["files"] = [iris] 115 | pred_storage.actors.append(filesupplier) 116 | 117 | loaddataset = LoadDataset() 118 | loaddataset.config["incremental"] = True 119 | pred_storage.actors.append(loaddataset) 120 | 121 | select = ClassSelector() 122 | select.config["index"] = "last" 123 | pred_storage.actors.append(select) 124 | 125 | predict = Predict() 126 | predict.config["storage_name"] = "model" 127 | pred_storage.actors.append(predict) 128 | 129 | console = Console() 130 | console.config["prefix"] = "storage: " 131 | pred_storage.actors.append(console) 132 | 133 | # run the flow 134 | run_flow(flow, print_tree=True, cleanup=True) 135 | 136 | 137 | if __name__ == "__main__": 138 | try: 139 | jvm.start() 140 | main() 141 | except Exception as e: 142 | print(traceback.format_exc()) 143 | finally: 144 | jvm.stop() 145 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/cluster_data.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # cluster_data.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import tempfile 19 | import traceback 20 | 21 | import weka.core.jvm as jvm 22 | import wekaexamples.helper as helper 23 | from simflow.control import Flow, ContainerValuePicker, Trigger, run_flow 24 | from simflow.sink import Console 25 | from simflow.source import FileSupplier, Start 26 | from weka.clusterers import Clusterer 27 | from weka.flow.sink import ModelWriter 28 | from weka.flow.transformer import LoadDataset, Train, SetStorageValue, Predict 29 | 30 | 31 | def main(): 32 | """ 33 | Just runs some example code. 34 | """ 35 | 36 | # setup the flow 37 | helper.print_title("cluster data") 38 | iris = helper.get_data_dir() + os.sep + "iris_no_class.arff" 39 | clsfile = str(tempfile.gettempdir()) + os.sep + "simplekmeans.model" 40 | 41 | flow = Flow(name="cluster data") 42 | 43 | start = Start() 44 | flow.actors.append(start) 45 | 46 | build_save = Trigger() 47 | build_save.name = "build and save clusterer" 48 | flow.actors.append(build_save) 49 | 50 | filesupplier = FileSupplier() 51 | filesupplier.config["files"] = [iris] 52 | build_save.actors.append(filesupplier) 53 | 54 | loaddataset = LoadDataset() 55 | build_save.actors.append(loaddataset) 56 | 57 | ssv = SetStorageValue() 58 | ssv.config["storage_name"] = "data" 59 | build_save.actors.append(ssv) 60 | 61 | train = Train() 62 | train.config["setup"] = Clusterer(classname="weka.clusterers.SimpleKMeans") 63 | build_save.actors.append(train) 64 | 65 | ssv = SetStorageValue() 66 | ssv.config["storage_name"] = "model" 67 | build_save.actors.append(ssv) 68 | 69 | pick = ContainerValuePicker() 70 | pick.config["value"] = "Model" 71 | build_save.actors.append(pick) 72 | 73 | console = Console() 74 | console.config["prefix"] = "built: " 75 | pick.actors.append(console) 76 | 77 | writer = ModelWriter() 78 | writer.config["output"] = clsfile 79 | build_save.actors.append(writer) 80 | 81 | pred_serialized = Trigger() 82 | pred_serialized.name = "make predictions (serialized model)" 83 | flow.actors.append(pred_serialized) 84 | 85 | filesupplier = FileSupplier() 86 | filesupplier.config["files"] = [iris] 87 | pred_serialized.actors.append(filesupplier) 88 | 89 | loaddataset = LoadDataset() 90 | loaddataset.config["incremental"] = True 91 | pred_serialized.actors.append(loaddataset) 92 | 93 | predict = Predict() 94 | predict.config["model"] = clsfile 95 | pred_serialized.actors.append(predict) 96 | 97 | console = Console() 98 | console.config["prefix"] = "serialized: " 99 | pred_serialized.actors.append(console) 100 | 101 | pred_storage = Trigger() 102 | pred_storage.name = "make predictions (model from storage)" 103 | flow.actors.append(pred_storage) 104 | 105 | filesupplier = FileSupplier() 106 | filesupplier.config["files"] = [iris] 107 | pred_storage.actors.append(filesupplier) 108 | 109 | loaddataset = LoadDataset() 110 | loaddataset.config["incremental"] = True 111 | pred_storage.actors.append(loaddataset) 112 | 113 | predict = Predict() 114 | predict.config["storage_name"] = "model" 115 | pred_storage.actors.append(predict) 116 | 117 | console = Console() 118 | console.config["prefix"] = "storage: " 119 | pred_storage.actors.append(console) 120 | 121 | # run the flow 122 | run_flow(flow, print_tree=True, cleanup=True) 123 | 124 | 125 | if __name__ == "__main__": 126 | try: 127 | jvm.start() 128 | main() 129 | except Exception as e: 130 | print(traceback.format_exc()) 131 | finally: 132 | jvm.stop() 133 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/combine_storage.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # combine_storage.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | 19 | import weka.core.jvm as jvm 20 | from simflow.control import Flow, Trigger, run_flow 21 | from simflow.sink import Console 22 | from simflow.source import ForLoop, CombineStorage 23 | from simflow.transformer import SetStorageValue 24 | 25 | 26 | def main(): 27 | """ 28 | Just runs some example code. 29 | """ 30 | 31 | # setup the flow 32 | flow = Flow(name="combine storage") 33 | 34 | outer = ForLoop() 35 | outer.name = "outer" 36 | outer.config["max"] = 3 37 | flow.actors.append(outer) 38 | 39 | ssv = SetStorageValue() 40 | ssv.config["storage_name"] = "max" 41 | flow.actors.append(ssv) 42 | 43 | trigger = Trigger() 44 | flow.actors.append(trigger) 45 | 46 | inner = ForLoop() 47 | inner.name = "inner" 48 | inner.config["max"] = "@{max}" 49 | trigger.actors.append(inner) 50 | 51 | ssv2 = SetStorageValue() 52 | ssv2.config["storage_name"] = "inner" 53 | trigger.actors.append(ssv2) 54 | 55 | trigger2 = Trigger() 56 | trigger.actors.append(trigger2) 57 | 58 | combine = CombineStorage() 59 | combine.config["format"] = "@{max} / @{inner}" 60 | trigger2.actors.append(combine) 61 | 62 | console = Console() 63 | trigger2.actors.append(console) 64 | 65 | # run the flow 66 | run_flow(flow, print_tree=True, cleanup=True) 67 | 68 | 69 | if __name__ == "__main__": 70 | try: 71 | jvm.start() 72 | main() 73 | except Exception as e: 74 | print(traceback.format_exc()) 75 | finally: 76 | jvm.stop() 77 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/conversions.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # conversions.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from simflow.control import Flow, run_flow 22 | from simflow.sink import Console 23 | from simflow.source import StringConstants 24 | from simflow.transformer import Convert 25 | from weka.flow.conversion import AnyToCommandline, CommandlineToAny 26 | 27 | 28 | def main(): 29 | """ 30 | Just runs some example code. 31 | """ 32 | """ 33 | Tests some conversions. 34 | """ 35 | 36 | # setup the flow 37 | helper.print_title("conversions") 38 | 39 | flow = Flow(name="conversions") 40 | 41 | strings = StringConstants() 42 | strings.config["strings"] = ["weka.classifiers.trees.J48", "weka.classifiers.functions.SMO"] 43 | flow.actors.append(strings) 44 | 45 | c2a = CommandlineToAny() 46 | c2a.config["wrapper"] = "weka.classifiers.Classifier" 47 | convert1 = Convert() 48 | convert1.config["setup"] = c2a 49 | flow.actors.append(convert1) 50 | 51 | convert2 = Convert() 52 | convert2.config["setup"] = AnyToCommandline() 53 | flow.actors.append(convert2) 54 | 55 | console = Console() 56 | console.config["prefix"] = "setup: " 57 | flow.actors.append(console) 58 | 59 | # run the flow 60 | run_flow(flow, print_tree=True, cleanup=True) 61 | 62 | 63 | if __name__ == "__main__": 64 | try: 65 | jvm.start() 66 | main() 67 | except Exception as e: 68 | print(traceback.format_exc()) 69 | finally: 70 | jvm.stop() 71 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/crossvalidate_classifier.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # crossvalidate_classifier.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | 20 | import weka.core.jvm as jvm 21 | import wekaexamples.helper as helper 22 | from simflow.control import Flow, Branch, Sequence, run_flow 23 | from simflow.sink import Console 24 | from simflow.source import FileSupplier 25 | from weka.classifiers import Classifier 26 | from weka.flow.sink import ClassifierErrors, ROC, PRC 27 | from weka.flow.transformer import LoadDataset, ClassSelector, CrossValidate, EvaluationSummary 28 | 29 | 30 | def main(): 31 | """ 32 | Just runs some example code. 33 | """ 34 | 35 | # setup the flow 36 | helper.print_title("Cross-validate classifier") 37 | iris = helper.get_data_dir() + os.sep + "iris.arff" 38 | 39 | flow = Flow(name="cross-validate classifier") 40 | 41 | filesupplier = FileSupplier() 42 | filesupplier.config["files"] = [iris] 43 | flow.actors.append(filesupplier) 44 | 45 | loaddataset = LoadDataset() 46 | flow.actors.append(loaddataset) 47 | 48 | select = ClassSelector() 49 | select.config["index"] = "last" 50 | flow.actors.append(select) 51 | 52 | cv = CrossValidate() 53 | cv.config["setup"] = Classifier(classname="weka.classifiers.trees.J48") 54 | flow.actors.append(cv) 55 | 56 | branch = Branch() 57 | flow.actors.append(branch) 58 | 59 | seqsum = Sequence() 60 | seqsum.name = "summary" 61 | branch.actors.append(seqsum) 62 | 63 | summary = EvaluationSummary() 64 | summary.config["title"] = "=== J48/iris ===" 65 | summary.config["complexity"] = False 66 | summary.config["matrix"] = True 67 | seqsum.actors.append(summary) 68 | 69 | console = Console() 70 | seqsum.actors.append(console) 71 | 72 | seqerr = Sequence() 73 | seqerr.name = "errors" 74 | branch.actors.append(seqerr) 75 | 76 | errors = ClassifierErrors() 77 | errors.config["wait"] = False 78 | seqerr.actors.append(errors) 79 | 80 | seqroc = Sequence() 81 | seqroc.name = "roc" 82 | branch.actors.append(seqroc) 83 | 84 | roc = ROC() 85 | roc.config["wait"] = False 86 | roc.config["class_index"] = [0, 1, 2] 87 | seqroc.actors.append(roc) 88 | 89 | seqprc = Sequence() 90 | seqprc.name = "prc" 91 | branch.actors.append(seqprc) 92 | 93 | prc = PRC() 94 | prc.config["wait"] = True 95 | prc.config["class_index"] = [0, 1, 2] 96 | seqprc.actors.append(prc) 97 | 98 | # run the flow 99 | run_flow(flow, print_tree=True, cleanup=True) 100 | 101 | 102 | if __name__ == "__main__": 103 | try: 104 | jvm.start() 105 | main() 106 | except Exception as e: 107 | print(traceback.format_exc()) 108 | finally: 109 | jvm.stop() 110 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/crossvalidate_clusterer.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # crossvalidate_clusterer.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | 20 | import weka.core.jvm as jvm 21 | import weka.filters as filters 22 | import wekaexamples.helper as helper 23 | from simflow.control import Flow, run_flow 24 | from simflow.sink import Console 25 | from simflow.source import FileSupplier 26 | from weka.clusterers import Clusterer 27 | from weka.flow.transformer import LoadDataset, Filter, CrossValidate 28 | 29 | 30 | def main(): 31 | """ 32 | Just runs some example code. 33 | """ 34 | 35 | # setup the flow 36 | helper.print_title("Cross-validate clusterer") 37 | iris = helper.get_data_dir() + os.sep + "iris.arff" 38 | 39 | flow = Flow(name="cross-validate clusterer") 40 | 41 | filesupplier = FileSupplier() 42 | filesupplier.config["files"] = [iris] 43 | flow.actors.append(filesupplier) 44 | 45 | loaddataset = LoadDataset() 46 | flow.actors.append(loaddataset) 47 | 48 | flter = Filter() 49 | flter.name = "Remove class" 50 | flter.config["filter"] = filters.Filter( 51 | classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"]) 52 | flow.actors.append(flter) 53 | 54 | cv = CrossValidate() 55 | cv.config["setup"] = Clusterer(classname="weka.clusterers.EM") 56 | flow.actors.append(cv) 57 | 58 | console = Console() 59 | console.config["prefix"] = "Loglikelihood: " 60 | flow.actors.append(console) 61 | 62 | # run the flow 63 | run_flow(flow, print_tree=True, cleanup=True) 64 | 65 | 66 | if __name__ == "__main__": 67 | try: 68 | jvm.start() 69 | main() 70 | except Exception as e: 71 | print(traceback.format_exc()) 72 | finally: 73 | jvm.stop() 74 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/dataset_matrixplot.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # dataset_matrixplot.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | 20 | import weka.core.jvm as jvm 21 | import wekaexamples.helper as helper 22 | from simflow.control import Flow, run_flow 23 | from simflow.source import FileSupplier 24 | from weka.flow.sink import MatrixPlot 25 | from weka.flow.transformer import LoadDataset 26 | 27 | 28 | def main(): 29 | """ 30 | Just runs some example code. 31 | """ 32 | """ 33 | Displays a dataset as matrixplot. 34 | """ 35 | 36 | # setup the flow 37 | helper.print_title("Matrix plot") 38 | iris = helper.get_data_dir() + os.sep + "iris.arff" 39 | 40 | flow = Flow(name="matrix plot") 41 | 42 | filesupplier = FileSupplier() 43 | filesupplier.config["files"] = [iris] 44 | flow.actors.append(filesupplier) 45 | 46 | loaddataset = LoadDataset() 47 | flow.actors.append(loaddataset) 48 | 49 | plot = MatrixPlot() 50 | plot.config["percent"] = 50.0 51 | flow.actors.append(plot) 52 | 53 | # run the flow 54 | run_flow(flow, print_tree=True, cleanup=True) 55 | 56 | 57 | if __name__ == "__main__": 58 | try: 59 | jvm.start() 60 | main() 61 | except Exception as e: 62 | print(traceback.format_exc()) 63 | finally: 64 | jvm.stop() 65 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/dump_instances.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # dump_instances.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import tempfile 19 | import traceback 20 | 21 | import weka.core.jvm as jvm 22 | import weka.filters as filters 23 | import wekaexamples.helper as helper 24 | from simflow.control import Flow, run_flow 25 | from simflow.source import FileSupplier 26 | from weka.flow.sink import InstanceDumper 27 | from weka.flow.transformer import LoadDataset, Filter, RenameRelation 28 | 29 | 30 | def main(): 31 | """ 32 | Just runs some example code. 33 | """ 34 | """ 35 | Loads/filters a dataset incrementally and saves it to a new file. 36 | """ 37 | 38 | # setup the flow 39 | helper.print_title("Load/filter/save dataset (incrementally)") 40 | iris = helper.get_data_dir() + os.sep + "iris.arff" 41 | 42 | flow = Flow(name="Load/filter/save dataset (incrementally)") 43 | 44 | filesupplier = FileSupplier() 45 | filesupplier.config["files"] = [iris] 46 | flow.actors.append(filesupplier) 47 | 48 | loaddataset = LoadDataset() 49 | loaddataset.config["incremental"] = True 50 | flow.actors.append(loaddataset) 51 | 52 | flter = Filter() 53 | flter.config["setup"] = filters.Filter( 54 | classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"]) 55 | flow.actors.append(flter) 56 | 57 | rename = RenameRelation() 58 | rename.config["name"] = "iris-reduced" 59 | flow.actors.append(rename) 60 | 61 | dumper = InstanceDumper() 62 | dumper.config["output"] = tempfile.gettempdir() + os.sep + "out.arff" 63 | flow.actors.append(dumper) 64 | 65 | # run the flow 66 | run_flow(flow, print_tree=True, cleanup=True) 67 | 68 | 69 | if __name__ == "__main__": 70 | try: 71 | jvm.start() 72 | main() 73 | except Exception as e: 74 | print(traceback.format_exc()) 75 | finally: 76 | jvm.stop() 77 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/filter_datasets.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # filter_datasets.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | 20 | import weka.core.jvm as jvm 21 | import weka.filters as filters 22 | import wekaexamples.helper as helper 23 | from simflow.control import Flow, run_flow 24 | from simflow.sink import Console 25 | from simflow.source import FileSupplier 26 | from weka.flow.transformer import LoadDataset, Filter 27 | 28 | 29 | def batch_mode(): 30 | """ 31 | Just runs some example code. 32 | """ 33 | """ 34 | Loads/filters a dataset in batch mode. 35 | """ 36 | 37 | # setup the flow 38 | helper.print_title("Filter datasets (batch mode)") 39 | iris = helper.get_data_dir() + os.sep + "iris.arff" 40 | anneal = helper.get_data_dir() + os.sep + "anneal.arff" 41 | 42 | flow = Flow(name="filter datasets (batch mode)") 43 | 44 | filesupplier = FileSupplier() 45 | filesupplier.config["files"] = [iris, anneal] 46 | flow.actors.append(filesupplier) 47 | 48 | loaddataset = LoadDataset() 49 | flow.actors.append(loaddataset) 50 | 51 | flter = Filter() 52 | flter.config["setup"] = filters.Filter( 53 | classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1"]) 54 | flter.config["keep_relationname"] = True 55 | flow.actors.append(flter) 56 | 57 | console = Console() 58 | flow.actors.append(console) 59 | 60 | # run the flow 61 | run_flow(flow, print_tree=True, cleanup=True) 62 | 63 | 64 | def incremental(): 65 | """ 66 | Just runs some example code. 67 | """ 68 | """ 69 | Loads/filters a dataset incrementally. 70 | """ 71 | 72 | # setup the flow 73 | helper.print_title("Filter datasets (incrementally)") 74 | iris = helper.get_data_dir() + os.sep + "iris.arff" 75 | anneal = helper.get_data_dir() + os.sep + "anneal.arff" 76 | 77 | flow = Flow(name="filter datasets (incrementally)") 78 | 79 | filesupplier = FileSupplier() 80 | filesupplier.config["files"] = [iris, anneal] 81 | flow.actors.append(filesupplier) 82 | 83 | loaddataset = LoadDataset() 84 | loaddataset.config["incremental"] = True 85 | flow.actors.append(loaddataset) 86 | 87 | flter = Filter() 88 | flter.config["setup"] = filters.Filter( 89 | classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "1"]) 90 | flter.config["keep_relationname"] = True 91 | flow.actors.append(flter) 92 | 93 | console = Console() 94 | flow.actors.append(console) 95 | 96 | # run the flow 97 | run_flow(flow, print_tree=True, cleanup=True) 98 | 99 | 100 | def main(): 101 | batch_mode() 102 | incremental() 103 | 104 | 105 | if __name__ == "__main__": 106 | try: 107 | jvm.start() 108 | main() 109 | except Exception as e: 110 | print(traceback.format_exc()) 111 | finally: 112 | jvm.stop() 113 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/for_loop.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # for_loop.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | 19 | import weka.core.jvm as jvm 20 | from simflow.control import Flow, Trigger, run_flow 21 | from simflow.sink import Console 22 | from simflow.source import ForLoop 23 | from simflow.transformer import SetStorageValue 24 | 25 | 26 | def main(): 27 | """ 28 | Just runs some example code. 29 | """ 30 | 31 | # setup the flow 32 | flow = Flow(name="example loop") 33 | 34 | outer = ForLoop() 35 | outer.name = "outer" 36 | outer.config["max"] = 3 37 | flow.actors.append(outer) 38 | 39 | ssv = SetStorageValue() 40 | ssv.config["storage_name"] = "max" 41 | flow.actors.append(ssv) 42 | 43 | trigger = Trigger() 44 | flow.actors.append(trigger) 45 | 46 | inner = ForLoop() 47 | inner.name = "inner" 48 | inner.config["max"] = "@{max}" 49 | trigger.actors.append(inner) 50 | 51 | console = Console() 52 | trigger.actors.append(console) 53 | 54 | # run the flow 55 | run_flow(flow, print_tree=True, cleanup=True) 56 | 57 | 58 | if __name__ == "__main__": 59 | try: 60 | jvm.start() 61 | main() 62 | except Exception as e: 63 | print(traceback.format_exc()) 64 | finally: 65 | jvm.stop() 66 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/generate_dataset.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # generate_dataset.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | 20 | import weka.core.jvm as jvm 21 | import weka.datagenerators as datagen 22 | import wekaexamples.helper as helper 23 | from simflow.control import Flow, run_flow 24 | from simflow.sink import Console 25 | from simflow.source import FileSupplier 26 | from weka.flow.source import DataGenerator 27 | from weka.flow.transformer import LoadDataset 28 | from weka.core.converters import Loader 29 | 30 | 31 | def load_batch(): 32 | """ 33 | Loads a dataset in batch mode. 34 | """ 35 | 36 | 37 | 38 | def load_incremental(): 39 | """ 40 | Loads a dataset incrementally. 41 | """ 42 | 43 | # setup the flow 44 | helper.print_title("Load dataset (incremental)") 45 | iris = helper.get_data_dir() + os.sep + "iris.arff" 46 | 47 | flow = Flow(name="load dataset") 48 | 49 | filesupplier = FileSupplier() 50 | filesupplier.config["files"] = [iris] 51 | flow.actors.append(filesupplier) 52 | 53 | loaddataset = LoadDataset() 54 | loaddataset.config["incremental"] = True 55 | flow.actors.append(loaddataset) 56 | 57 | console = Console() 58 | flow.actors.append(console) 59 | 60 | # run the flow 61 | run_flow(flow, print_tree=True, cleanup=True) 62 | 63 | 64 | def load_custom_loader(): 65 | """ 66 | Loads a dataset using a custom loader. 67 | """ 68 | 69 | # setup the flow 70 | helper.print_title("Load dataset (custom loader)") 71 | iris = helper.get_data_dir() + os.sep + "iris.csv" 72 | 73 | flow = Flow(name="load dataset") 74 | 75 | filesupplier = FileSupplier() 76 | filesupplier.config["files"] = [iris] 77 | flow.actors.append(filesupplier) 78 | 79 | loaddataset = LoadDataset() 80 | loaddataset.config["incremental"] = False 81 | loaddataset.config["use_custom_loader"] = True 82 | loaddataset.config["custom_loader"] = Loader(classname="weka.core.converters.CSVLoader") 83 | flow.actors.append(loaddataset) 84 | 85 | console = Console() 86 | flow.actors.append(console) 87 | 88 | # run the flow 89 | msg = flow.setup() 90 | if msg is None: 91 | msg = flow.execute() 92 | if msg is not None: 93 | print("Error executing flow:\n" + msg) 94 | else: 95 | print("Error setting up flow:\n" + msg) 96 | flow.wrapup() 97 | flow.cleanup() 98 | 99 | 100 | def main(): 101 | """ 102 | Just runs some example code. 103 | """ 104 | # setup the flow 105 | helper.print_title("Generate dataset") 106 | 107 | flow = Flow(name="generate dataset") 108 | 109 | generator = DataGenerator() 110 | generator.config["setup"] = datagen.DataGenerator(classname="weka.datagenerators.classifiers.classification.Agrawal") 111 | flow.actors.append(generator) 112 | 113 | console = Console() 114 | flow.actors.append(console) 115 | 116 | # run the flow 117 | msg = flow.setup() 118 | if msg is None: 119 | print("\n" + flow.tree + "\n") 120 | msg = flow.execute() 121 | if msg is not None: 122 | print("Error executing flow:\n" + msg) 123 | else: 124 | print("Error setting up flow:\n" + msg) 125 | flow.wrapup() 126 | flow.cleanup() 127 | 128 | 129 | if __name__ == "__main__": 130 | try: 131 | jvm.start() 132 | main() 133 | except Exception as e: 134 | print(traceback.format_exc()) 135 | finally: 136 | jvm.stop() 137 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/init_storage_value.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # init_storage_value.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | 19 | import weka.core.jvm as jvm 20 | from simflow.control import Flow, Trigger, run_flow 21 | from simflow.sink import Console 22 | from simflow.source import ForLoop, Start 23 | from simflow.transformer import InitStorageValue 24 | 25 | 26 | def main(): 27 | """ 28 | Just runs some example code. 29 | """ 30 | 31 | # setup the flow 32 | flow = Flow(name="init storage value") 33 | 34 | start = Start() 35 | flow.actors.append(start) 36 | 37 | init = InitStorageValue() 38 | init.config["storage_name"] = "max" 39 | init.config["value"] = "int(3)" 40 | flow.actors.append(init) 41 | 42 | trigger = Trigger() 43 | flow.actors.append(trigger) 44 | 45 | inner = ForLoop() 46 | inner.name = "inner" 47 | inner.config["max"] = "@{max}" 48 | trigger.actors.append(inner) 49 | 50 | console = Console() 51 | trigger.actors.append(console) 52 | 53 | # run the flow 54 | run_flow(flow, print_tree=True, cleanup=True) 55 | 56 | 57 | if __name__ == "__main__": 58 | try: 59 | jvm.start() 60 | main() 61 | except Exception as e: 62 | print(traceback.format_exc()) 63 | finally: 64 | jvm.stop() 65 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/list_file.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # list_files.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import tempfile 18 | import traceback 19 | 20 | import weka.core.jvm as jvm 21 | from simflow.control import Flow, run_flow 22 | from simflow.sink import Console 23 | from simflow.source import ListFiles 24 | 25 | 26 | def main(): 27 | """ 28 | Just runs some example code. 29 | """ 30 | 31 | # setup the flow 32 | flow = Flow(name="list files") 33 | 34 | listfiles = ListFiles() 35 | listfiles.config["dir"] = str(tempfile.gettempdir()) 36 | listfiles.config["list_files"] = True 37 | listfiles.config["list_dirs"] = False 38 | listfiles.config["recursive"] = False 39 | listfiles.config["regexp"] = ".*r.*" 40 | flow.actors.append(listfiles) 41 | 42 | console = Console() 43 | console.config["prefix"] = "Match: " 44 | flow.actors.append(console) 45 | 46 | # run the flow 47 | run_flow(flow, print_tree=True, cleanup=True) 48 | 49 | 50 | if __name__ == "__main__": 51 | try: 52 | jvm.start() 53 | main() 54 | except Exception as e: 55 | print(traceback.format_exc()) 56 | finally: 57 | jvm.stop() 58 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/load_database.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # load_database.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | import weka.core.jvm as jvm 19 | import wekaexamples.helper as helper 20 | from simflow.control import Flow, run_flow 21 | from weka.flow.source import LoadDatabase 22 | from simflow.sink import Console 23 | 24 | 25 | def main(): 26 | """ 27 | Just runs some example code. 28 | """ 29 | """ 30 | Loads data from a database. 31 | """ 32 | 33 | # setup the flow 34 | helper.print_title("Load from database") 35 | 36 | flow = Flow(name="load from database") 37 | 38 | loaddatabase = LoadDatabase() 39 | loaddatabase.config["db_url"] = "jdbc:mysql://HOSTNAME:3306/DBNAME" 40 | loaddatabase.config["user"] = "DBUSER" 41 | loaddatabase.config["password"] = "DBPW" 42 | loaddatabase.config["query"] = "select * from TABLE" 43 | flow.actors.append(loaddatabase) 44 | 45 | console = Console() 46 | flow.actors.append(console) 47 | 48 | # run the flow 49 | run_flow(flow, print_tree=True, cleanup=True) 50 | 51 | 52 | if __name__ == "__main__": 53 | try: 54 | mysql_jar = "/some/where/mysql-connector-java-X.Y.Z-bin.jar" 55 | jvm.start(class_path=[mysql_jar]) 56 | main() 57 | except Exception as e: 58 | print(traceback.format_exc()) 59 | finally: 60 | jvm.stop() 61 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/load_dataset.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # load_dataset.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | 20 | import weka.core.jvm as jvm 21 | import wekaexamples.helper as helper 22 | from simflow.control import Flow, run_flow 23 | from simflow.sink import Console 24 | from simflow.source import FileSupplier 25 | from weka.core.converters import Loader 26 | from weka.flow.transformer import LoadDataset 27 | 28 | 29 | def load_batch(): 30 | """ 31 | Loads a dataset in batch mode. 32 | """ 33 | 34 | # setup the flow 35 | helper.print_title("Load dataset (batch)") 36 | iris = helper.get_data_dir() + os.sep + "iris.arff" 37 | 38 | flow = Flow(name="load dataset") 39 | 40 | filesupplier = FileSupplier() 41 | filesupplier.config["files"] = [iris] 42 | flow.actors.append(filesupplier) 43 | 44 | loaddataset = LoadDataset() 45 | loaddataset.config["incremental"] = False 46 | flow.actors.append(loaddataset) 47 | 48 | console = Console() 49 | flow.actors.append(console) 50 | 51 | # run the flow 52 | run_flow(flow, print_tree=True, cleanup=True) 53 | 54 | 55 | def load_incremental(): 56 | """ 57 | Loads a dataset incrementally. 58 | """ 59 | 60 | # setup the flow 61 | helper.print_title("Load dataset (incremental)") 62 | iris = helper.get_data_dir() + os.sep + "iris.arff" 63 | 64 | flow = Flow(name="load dataset") 65 | 66 | filesupplier = FileSupplier() 67 | filesupplier.config["files"] = [iris] 68 | flow.actors.append(filesupplier) 69 | 70 | loaddataset = LoadDataset() 71 | loaddataset.config["incremental"] = True 72 | flow.actors.append(loaddataset) 73 | 74 | console = Console() 75 | flow.actors.append(console) 76 | 77 | # run the flow 78 | msg = flow.setup() 79 | if msg is None: 80 | msg = flow.execute() 81 | if msg is not None: 82 | print("Error executing flow:\n" + msg) 83 | else: 84 | print("Error setting up flow:\n" + msg) 85 | flow.wrapup() 86 | flow.cleanup() 87 | 88 | 89 | def load_custom_loader(): 90 | """ 91 | Loads a dataset using a custom loader. 92 | """ 93 | 94 | # setup the flow 95 | helper.print_title("Load dataset (custom loader)") 96 | iris = helper.get_data_dir() + os.sep + "iris.csv" 97 | 98 | flow = Flow(name="load dataset") 99 | 100 | filesupplier = FileSupplier() 101 | filesupplier.config["files"] = [iris] 102 | flow.actors.append(filesupplier) 103 | 104 | loaddataset = LoadDataset() 105 | loaddataset.config["incremental"] = False 106 | loaddataset.config["use_custom_loader"] = True 107 | loaddataset.config["custom_loader"] = Loader(classname="weka.core.converters.CSVLoader") 108 | flow.actors.append(loaddataset) 109 | 110 | console = Console() 111 | flow.actors.append(console) 112 | 113 | # run the flow 114 | msg = flow.setup() 115 | if msg is None: 116 | msg = flow.execute() 117 | if msg is not None: 118 | print("Error executing flow:\n" + msg) 119 | else: 120 | print("Error setting up flow:\n" + msg) 121 | flow.wrapup() 122 | flow.cleanup() 123 | 124 | 125 | def main(): 126 | """ 127 | Just runs some example code. 128 | """ 129 | load_batch() 130 | load_incremental() 131 | load_custom_loader() 132 | 133 | 134 | if __name__ == "__main__": 135 | try: 136 | jvm.start() 137 | main() 138 | except Exception as e: 139 | print(traceback.format_exc()) 140 | finally: 141 | jvm.stop() 142 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/load_save_flow.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # load_save_flow.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import tempfile 19 | import traceback 20 | 21 | import simflow.conversion as conversion 22 | import weka.core.jvm as jvm 23 | import wekaexamples.helper as helper 24 | from simflow.control import Flow, Tee, run_flow 25 | from simflow.sink import Console 26 | from simflow.source import ListFiles 27 | from simflow.transformer import Convert 28 | from weka.classifiers import Classifier 29 | from weka.flow.transformer import LoadDataset, CrossValidate, EvaluationSummary 30 | 31 | 32 | def main(): 33 | """ 34 | Just runs some example code. 35 | """ 36 | 37 | # setup the flow 38 | flow = Flow(name="list files") 39 | 40 | listfiles = ListFiles() 41 | listfiles.config["dir"] = str(helper.get_data_dir()) 42 | listfiles.config["list_files"] = True 43 | listfiles.config["list_dirs"] = False 44 | listfiles.config["recursive"] = False 45 | listfiles.config["regexp"] = ".*.arff" 46 | flow.actors.append(listfiles) 47 | 48 | tee = Tee() 49 | flow.actors.append(tee) 50 | 51 | convert = Convert() 52 | convert.config["setup"] = conversion.PassThrough() 53 | tee.actors.append(convert) 54 | 55 | console = Console() 56 | console.config["prefix"] = "Match: " 57 | tee.actors.append(console) 58 | 59 | load = LoadDataset() 60 | load.config["use_custom_loader"] = True 61 | flow.actors.append(load) 62 | 63 | cross = CrossValidate() 64 | cross.config["setup"] = Classifier(classname="weka.classifiers.trees.J48", options=["-C", "0.3"]) 65 | flow.actors.append(cross) 66 | 67 | summary = EvaluationSummary() 68 | summary.config["matrix"] = True 69 | flow.actors.append(summary) 70 | 71 | # print flow 72 | flow.setup() 73 | print("\n" + flow.tree + "\n") 74 | 75 | # save the flow 76 | fname = tempfile.gettempdir() + os.sep + "simpleflow.json" 77 | Flow.save(flow, fname) 78 | 79 | # load flow 80 | fl2 = Flow.load(fname) 81 | 82 | # output flow 83 | fl2.setup() 84 | print("\n" + fl2.tree + "\n") 85 | 86 | 87 | if __name__ == "__main__": 88 | try: 89 | jvm.start() 90 | main() 91 | except Exception as e: 92 | print(traceback.format_exc()) 93 | finally: 94 | jvm.stop() 95 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/math_expression.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # math_expression.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | import weka.core.jvm as jvm 19 | from simflow.control import Flow, run_flow 20 | from simflow.source import ForLoop 21 | from simflow.sink import Console 22 | from simflow.transformer import MathExpression 23 | 24 | 25 | def main(): 26 | """ 27 | Just runs some example code. 28 | """ 29 | 30 | # setup the flow 31 | flow = Flow(name="math expression") 32 | 33 | outer = ForLoop() 34 | outer.config["max"] = 100 35 | flow.actors.append(outer) 36 | 37 | expr = MathExpression() 38 | expr.config["expression"] = "math.sqrt({X})" 39 | flow.actors.append(expr) 40 | 41 | console = Console() 42 | flow.actors.append(console) 43 | 44 | # run the flow 45 | run_flow(flow, print_tree=True, cleanup=True) 46 | 47 | 48 | if __name__ == "__main__": 49 | try: 50 | jvm.start() 51 | main() 52 | except Exception as e: 53 | print(traceback.format_exc()) 54 | finally: 55 | jvm.stop() 56 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/output_actor_help.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # output_actor_help.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.classifiers import Classifier 22 | from weka.flow.transformer import CrossValidate 23 | 24 | 25 | def main(): 26 | """ 27 | Just runs some example code. 28 | """ 29 | 30 | # setup the flow 31 | helper.print_title("Output actor help") 32 | 33 | cv = CrossValidate() 34 | cv.config["setup"] = Classifier(classname="weka.classifiers.trees.J48") 35 | cv.print_help() 36 | 37 | 38 | if __name__ == "__main__": 39 | try: 40 | jvm.start() 41 | main() 42 | except Exception as e: 43 | print(traceback.format_exc()) 44 | finally: 45 | jvm.stop() 46 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/plot_dataset.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # plot_dataset.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | 20 | import weka.core.jvm as jvm 21 | import weka.filters as filters 22 | import wekaexamples.helper as helper 23 | from simflow.control import Flow, Branch, Sequence, run_flow 24 | from simflow.source import FileSupplier 25 | from weka.flow.sink import MatrixPlot, LinePlot 26 | from weka.flow.transformer import LoadDataset, Filter, Copy 27 | 28 | 29 | def main(): 30 | """ 31 | Just runs some example code. 32 | """ 33 | """ 34 | Plots a dataset. 35 | """ 36 | 37 | # setup the flow 38 | helper.print_title("Plot dataset") 39 | iris = helper.get_data_dir() + os.sep + "iris.arff" 40 | 41 | flow = Flow(name="plot dataset") 42 | 43 | filesupplier = FileSupplier() 44 | filesupplier.config["files"] = [iris] 45 | flow.actors.append(filesupplier) 46 | 47 | loaddataset = LoadDataset() 48 | flow.actors.append(loaddataset) 49 | 50 | branch = Branch() 51 | flow.actors.append(branch) 52 | 53 | seq = Sequence(name="matrix plot") 54 | branch.actors.append(seq) 55 | 56 | mplot = MatrixPlot() 57 | mplot.config["percent"] = 50.0 58 | mplot.config["wait"] = False 59 | seq.actors.append(mplot) 60 | 61 | seq = Sequence(name="line plot") 62 | branch.actors.append(seq) 63 | 64 | copy = Copy() 65 | seq.actors.append(copy) 66 | 67 | flter = Filter() 68 | flter.config["setup"] = filters.Filter( 69 | classname="weka.filters.unsupervised.attribute.Remove", options=["-R", "last"]) 70 | flter.config["keep_relationname"] = True 71 | seq.actors.append(flter) 72 | 73 | lplot = LinePlot() 74 | lplot.config["percent"] = 50.0 75 | lplot.config["wait"] = True 76 | seq.actors.append(lplot) 77 | 78 | # run the flow 79 | run_flow(flow, print_tree=True, cleanup=True) 80 | 81 | 82 | if __name__ == "__main__": 83 | try: 84 | jvm.start() 85 | main() 86 | except Exception as e: 87 | print(traceback.format_exc()) 88 | finally: 89 | jvm.stop() 90 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/stop_flow.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # stop_flow.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | 19 | import weka.core.jvm as jvm 20 | from simflow.control import Flow, Tee, Stop, run_flow 21 | from simflow.sink import Console 22 | from simflow.source import ForLoop 23 | from simflow.transformer import SetStorageValue 24 | 25 | 26 | def main(): 27 | """ 28 | Just runs some example code. 29 | """ 30 | 31 | # setup the flow 32 | flow = Flow(name="stopping the flow") 33 | 34 | outer = ForLoop() 35 | outer.config["max"] = 10 36 | flow.actors.append(outer) 37 | 38 | ssv = SetStorageValue() 39 | ssv.config["storage_name"] = "current" 40 | flow.actors.append(ssv) 41 | 42 | tee = Tee() 43 | tee.config["condition"] = "@{current} == 7" 44 | flow.actors.append(tee) 45 | 46 | stop = Stop() 47 | tee.actors.append(stop) 48 | 49 | console = Console() 50 | flow.actors.append(console) 51 | 52 | # run the flow 53 | run_flow(flow, print_tree=True, cleanup=True) 54 | 55 | 56 | if __name__ == "__main__": 57 | try: 58 | jvm.start() 59 | main() 60 | except Exception as e: 61 | print(traceback.format_exc()) 62 | finally: 63 | jvm.stop() 64 | -------------------------------------------------------------------------------- /src/wekaexamples/flow/update_storage_value.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # update_storage_value.py 15 | # Copyright (C) 2015-2023 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import traceback 18 | 19 | import weka.core.jvm as jvm 20 | from simflow.control import Flow, Trigger, run_flow 21 | from simflow.sink import Console 22 | from simflow.source import ForLoop, Start 23 | from simflow.transformer import InitStorageValue, UpdateStorageValue 24 | 25 | 26 | def main(): 27 | """ 28 | Just runs some example code. 29 | """ 30 | 31 | # setup the flow 32 | flow = Flow(name="update storage value") 33 | 34 | start = Start() 35 | flow.actors.append(start) 36 | 37 | init = InitStorageValue() 38 | init.config["storage_name"] = "max" 39 | init.config["value"] = "int(1)" 40 | flow.actors.append(init) 41 | 42 | trigger = Trigger() 43 | flow.actors.append(trigger) 44 | 45 | outer = ForLoop() 46 | outer.name = "outer" 47 | outer.config["max"] = 3 48 | trigger.actors.append(outer) 49 | 50 | trigger2 = Trigger() 51 | trigger.actors.append(trigger2) 52 | 53 | inner = ForLoop() 54 | inner.name = "inner" 55 | inner.config["max"] = "@{max}" 56 | trigger2.actors.append(inner) 57 | 58 | console = Console() 59 | trigger2.actors.append(console) 60 | 61 | update = UpdateStorageValue() 62 | update.config["storage_name"] = "max" 63 | update.config["expression"] = "{X} + 2" 64 | trigger.actors.append(update) 65 | 66 | # run the flow 67 | run_flow(flow, print_tree=True, cleanup=True) 68 | 69 | 70 | if __name__ == "__main__": 71 | try: 72 | jvm.start() 73 | main() 74 | except Exception as e: 75 | print(traceback.format_exc()) 76 | finally: 77 | jvm.stop() 78 | -------------------------------------------------------------------------------- /src/wekaexamples/helper.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # helper.py 15 | # Copyright (C) 2014-2019 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import tempfile 19 | 20 | 21 | def get_data_dir(): 22 | """ 23 | Returns the data directory. 24 | 25 | :return: the data directory 26 | :rtype: str 27 | """ 28 | rootdir = os.path.dirname(__file__) 29 | libdir = rootdir + os.sep + "data" 30 | return libdir 31 | 32 | 33 | def print_title(title): 34 | """ 35 | Prints the title underlined. 36 | 37 | :param title: the title to print 38 | :type title: str 39 | """ 40 | 41 | print("\n" + title) 42 | print("=" * len(title)) 43 | 44 | 45 | def print_info(info): 46 | """ 47 | Prints the info. 48 | 49 | :param info: the info to print 50 | :type info: str 51 | """ 52 | 53 | print("\n" + info) 54 | 55 | 56 | def get_tmp_dir(): 57 | """ 58 | Returns the tmp directory. 59 | 60 | :return: the tmp directory 61 | """ 62 | return tempfile.gettempdir() 63 | -------------------------------------------------------------------------------- /src/wekaexamples/timeseries/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fracpete/python-weka-wrapper3-examples/5df69b68220c841cc592cc253bc008f696413c65/src/wekaexamples/timeseries/__init__.py -------------------------------------------------------------------------------- /src/wekaexamples/timeseries/timeseries.py: -------------------------------------------------------------------------------- 1 | # This program is free software: you can redistribute it and/or modify 2 | # it under the terms of the GNU General Public License as published by 3 | # the Free Software Foundation, either version 3 of the License, or 4 | # (at your option) any later version. 5 | # 6 | # This program is distributed in the hope that it will be useful, 7 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 8 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 | # GNU General Public License for more details. 10 | # 11 | # You should have received a copy of the GNU General Public License 12 | # along with this program. If not, see . 13 | 14 | # timeseries.py 15 | # Copyright (C) 2021-2022 Fracpete (pythonwekawrapper at gmail dot com) 16 | 17 | import os 18 | import traceback 19 | import weka.core.jvm as jvm 20 | import wekaexamples.helper as helper 21 | from weka.core.converters import Loader 22 | from weka.core.dataset import Instances 23 | from weka.timeseries import TSEvaluation, TSEvalModule, WekaForecaster 24 | from weka.classifiers import Classifier 25 | from weka.core.classes import serialization_write, serialization_read 26 | 27 | 28 | def main(): 29 | """ 30 | Just runs some example code. 31 | """ 32 | 33 | # load a dataset 34 | airline_file = helper.get_data_dir() + os.sep + "airline.arff" 35 | helper.print_info("Loading dataset: " + airline_file) 36 | loader = Loader(classname="weka.core.converters.ArffLoader") 37 | airline_data = loader.load_file(airline_file) 38 | airline_data.class_is_last() 39 | 40 | # available evaluation modules 41 | helper.print_title("Evaluation modules") 42 | modules = TSEvalModule.module_list() 43 | helper.print_info("Available modules") 44 | for module in modules: 45 | print("-" + str(module)) 46 | helper.print_info("Loading module by name") 47 | print(TSEvalModule.module("MAE")) 48 | 49 | # evaluate forecaster 50 | helper.print_title("Evaluate forecaster") 51 | forecaster = WekaForecaster() 52 | forecaster.fields_to_forecast = ["passenger_numbers"] 53 | forecaster.base_forecaster = Classifier(classname="weka.classifiers.functions.LinearRegression") 54 | forecaster.tslag_maker.timestamp_field = "Date" 55 | forecaster.tslag_maker.adjust_for_variance = False 56 | forecaster.tslag_maker.include_powers_of_time = True 57 | forecaster.tslag_maker.include_timelag_products = True 58 | forecaster.tslag_maker.remove_leading_instances_with_unknown_lag_values = False 59 | forecaster.tslag_maker.add_month_of_year = True 60 | forecaster.tslag_maker.add_quarter_of_year = True 61 | print("algorithm name: " + str(forecaster.algorithm_name)) 62 | print("command-line: " + forecaster.to_commandline()) 63 | print("lag maker: " + forecaster.tslag_maker.to_commandline()) 64 | 65 | evaluation = TSEvaluation(airline_data, 0.0) 66 | evaluation.evaluate_on_training_data = False 67 | evaluation.evaluate_on_test_data = False 68 | evaluation.prime_window_size = forecaster.tslag_maker.max_lag 69 | evaluation.prime_for_test_data_with_test_data = True 70 | evaluation.rebuild_model_after_each_test_forecast_step = False 71 | evaluation.forecast_future = True 72 | evaluation.horizon = 20 73 | evaluation.evaluation_modules = "MAE,RMSE" 74 | evaluation.evaluate(forecaster) 75 | print(evaluation) 76 | if evaluation.evaluate_on_training_data or evaluation.evaluate_on_test_data: 77 | print(evaluation.summary()) 78 | if evaluation.evaluate_on_training_data: 79 | print("Predictions (training data): " + evaluation.predictions_for_training_data(1).summary) 80 | if evaluation.evaluate_on_test_data: 81 | print("Predictions (test data): " + evaluation.predictions_for_test_data(1).summary) 82 | preds = evaluation.predictions_for_test_data(1) 83 | print("Counts for targets: " + str(preds.counts_for_targets())) 84 | print("Errors for target 'passenger_numbers': " + str(preds.errors_for_target("passenger_numbers"))) 85 | print("Errors for all targets: " + str(preds.predictions_for_all_targets())) 86 | if evaluation.training_data is not None: 87 | print("Future forecasts (training)\n" + evaluation.print_future_forecast_on_training_data(forecaster)) 88 | if evaluation.test_data is not None: 89 | print("Future forecasts (test)\n" + evaluation.print_future_forecast_on_test_data(forecaster)) 90 | if evaluation.evaluate_on_training_data: 91 | print(evaluation.print_predictions_for_training_data("Predictions (training)", "passenger_numbers", 1)) 92 | if evaluation.evaluate_on_test_data: 93 | print(evaluation.print_predictions_for_test_data("Predictions (test)", "passenger_numbers", 1)) 94 | 95 | # build forecaster 96 | helper.print_title("Build/use forecaster") 97 | airline_train, airline_test = airline_data.train_test_split(90.0) 98 | forecaster = WekaForecaster() 99 | forecaster.fields_to_forecast = ["passenger_numbers"] 100 | forecaster.base_forecaster = Classifier(classname="weka.classifiers.functions.LinearRegression") 101 | forecaster.fields_to_forecast = "passenger_numbers" 102 | forecaster.build_forecaster(airline_train) 103 | num_prime_instances = 12 104 | airline_prime = Instances.copy_instances(airline_train, airline_train.num_instances - num_prime_instances, num_prime_instances) 105 | forecaster.prime_forecaster(airline_prime) 106 | num_future_forecasts = airline_test.num_instances 107 | preds = forecaster.forecast(num_future_forecasts) 108 | print("Actual,Predicted,Error") 109 | for i in range(num_future_forecasts): 110 | actual = airline_test.get_instance(i).get_value(0) 111 | predicted = preds[i][0].predicted 112 | error = actual - predicted 113 | print("%f,%f,%f" % (actual, predicted, error)) 114 | 115 | # serialization (if supported) 116 | helper.print_title("Serialization") 117 | model_file = helper.get_tmp_dir() + "/base.model" 118 | if forecaster.base_model_has_serializer: 119 | forecaster.save_base_model(model_file) 120 | forecaster2 = WekaForecaster() 121 | forecaster2.load_base_model(model_file) 122 | print(forecaster2.to_commandline()) 123 | else: 124 | print("Base model has no serializer, falling back to generic serialization") 125 | serialization_write(model_file, forecaster.base_forecaster) 126 | cls = Classifier(jobject=serialization_read(model_file)) 127 | print(cls.to_commandline()) 128 | 129 | # state management 130 | helper.print_title("State") 131 | model_file = helper.get_tmp_dir() + "/state.ser" 132 | if forecaster.uses_state: 133 | forecaster.serialize_state(model_file) 134 | forecaster2 = WekaForecaster() 135 | forecaster2.load_serialized_state(model_file) 136 | print(forecaster2.to_commandline()) 137 | else: 138 | print("Forecaster does not use state, falling back to generic serialization") 139 | serialization_write(model_file, forecaster) 140 | forecaster2 = WekaForecaster(jobject=serialization_read(model_file)) 141 | print(forecaster2.to_commandline()) 142 | 143 | 144 | if __name__ == "__main__": 145 | try: 146 | jvm.start(packages=True) 147 | main() 148 | except Exception as e: 149 | print(traceback.format_exc()) 150 | finally: 151 | jvm.stop() 152 | --------------------------------------------------------------------------------