├── .gitignore ├── LICENSE ├── README.md ├── incal ├── __init__.py ├── cli.py ├── experiments │ ├── __init__.py │ ├── analyze.py │ ├── cli.py │ ├── examples.py │ ├── find_hyperplanes.py │ ├── find_operators.py │ ├── learn.py │ └── prepare.py ├── extra │ ├── __init__.py │ ├── api.py │ ├── combine_results.py │ ├── demo.py │ ├── deploy.py │ ├── experiments.py │ ├── main.py │ ├── migrate.py │ └── smt_scan.py ├── generator.py ├── incremental_learner.py ├── k_cnf_smt_learner.py ├── learn.py ├── learner.py ├── lp │ ├── __init__.py │ ├── examples.py │ └── model.py ├── observe │ ├── __init__.py │ ├── inc_logging.py │ ├── observe.py │ └── plotting.py ├── old_learners │ ├── __init__.py │ ├── dt_learner.py │ ├── k_dnf_greedy_learner.py │ ├── k_dnf_learner.py │ ├── k_dnf_logic_learner.py │ └── k_dnf_smt_learner.py ├── parameter_free_learner.py ├── tests │ ├── examples.py │ ├── test_evaluation.py │ ├── test_generation.py │ ├── test_one_class.py │ └── test_polytope.py ├── util │ ├── __init__.py │ ├── analyze.py │ ├── options.py │ ├── parallel.py │ ├── plot.py │ ├── sampling.py │ └── timeout.py └── violations │ ├── __init__.py │ ├── core.py │ ├── dt_selection.py │ └── virtual_data.py ├── notebooks ├── Learn2Fix.py ├── baldur.py ├── experiments.ipynb ├── experiments.sh ├── gps.py ├── playground.py ├── results.sh └── synthetic.py ├── plotting_commands.txt ├── repairs └── genprog │ ├── run-version-genprog.sh │ ├── test-genprog-incal.py │ └── validate-fix-genprog.sh ├── results ├── Plots.Rmd ├── Plots.pdf ├── accuracy.pdf ├── effort1.pdf ├── effort2.pdf ├── manualTrainingTSsize.csv ├── patchquality.pdf ├── repairability.pdf ├── results-l-10-t-5-g-10-runs.csv ├── results-l-20-t-10-g-10-runs.csv ├── results-l-30-t-10-g-10-runs.csv ├── training.pdf ├── validation.pdf └── validation2.pdf ├── scripts ├── h_combine.sh ├── h_generate.sh ├── h_learn.sh ├── k_combine.sh ├── k_generate.sh ├── k_learn.sh ├── l_combine.sh ├── l_generate.sh ├── l_learn.sh ├── pf_combine.sh ├── pf_generate.sh ├── pf_learn.sh ├── plot.sh ├── samples_combine.sh ├── samples_generate.sh └── samples_learn.sh └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | synthetic/ 2 | res/ 3 | remote_res/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU LESSER GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | 9 | This version of the GNU Lesser General Public License incorporates 10 | the terms and conditions of version 3 of the GNU General Public 11 | License, supplemented by the additional permissions listed below. 12 | 13 | 0. Additional Definitions. 14 | 15 | As used herein, "this License" refers to version 3 of the GNU Lesser 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU 17 | General Public License. 18 | 19 | "The Library" refers to a covered work governed by this License, 20 | other than an Application or a Combined Work as defined below. 21 | 22 | An "Application" is any work that makes use of an interface provided 23 | by the Library, but which is not otherwise based on the Library. 24 | Defining a subclass of a class defined by the Library is deemed a mode 25 | of using an interface provided by the Library. 26 | 27 | A "Combined Work" is a work produced by combining or linking an 28 | Application with the Library. The particular version of the Library 29 | with which the Combined Work was made is also called the "Linked 30 | Version". 31 | 32 | The "Minimal Corresponding Source" for a Combined Work means the 33 | Corresponding Source for the Combined Work, excluding any source code 34 | for portions of the Combined Work that, considered in isolation, are 35 | based on the Application, and not on the Linked Version. 36 | 37 | The "Corresponding Application Code" for a Combined Work means the 38 | object code and/or source code for the Application, including any data 39 | and utility programs needed for reproducing the Combined Work from the 40 | Application, but excluding the System Libraries of the Combined Work. 41 | 42 | 1. Exception to Section 3 of the GNU GPL. 43 | 44 | You may convey a covered work under sections 3 and 4 of this License 45 | without being bound by section 3 of the GNU GPL. 46 | 47 | 2. Conveying Modified Versions. 48 | 49 | If you modify a copy of the Library, and, in your modifications, a 50 | facility refers to a function or data to be supplied by an Application 51 | that uses the facility (other than as an argument passed when the 52 | facility is invoked), then you may convey a copy of the modified 53 | version: 54 | 55 | a) under this License, provided that you make a good faith effort to 56 | ensure that, in the event an Application does not supply the 57 | function or data, the facility still operates, and performs 58 | whatever part of its purpose remains meaningful, or 59 | 60 | b) under the GNU GPL, with none of the additional permissions of 61 | this License applicable to that copy. 62 | 63 | 3. Object Code Incorporating Material from Library Header Files. 64 | 65 | The object code form of an Application may incorporate material from 66 | a header file that is part of the Library. You may convey such object 67 | code under terms of your choice, provided that, if the incorporated 68 | material is not limited to numerical parameters, data structure 69 | layouts and accessors, or small macros, inline functions and templates 70 | (ten or fewer lines in length), you do both of the following: 71 | 72 | a) Give prominent notice with each copy of the object code that the 73 | Library is used in it and that the Library and its use are 74 | covered by this License. 75 | 76 | b) Accompany the object code with a copy of the GNU GPL and this license 77 | document. 78 | 79 | 4. Combined Works. 80 | 81 | You may convey a Combined Work under terms of your choice that, 82 | taken together, effectively do not restrict modification of the 83 | portions of the Library contained in the Combined Work and reverse 84 | engineering for debugging such modifications, if you also do each of 85 | the following: 86 | 87 | a) Give prominent notice with each copy of the Combined Work that 88 | the Library is used in it and that the Library and its use are 89 | covered by this License. 90 | 91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license 92 | document. 93 | 94 | c) For a Combined Work that displays copyright notices during 95 | execution, include the copyright notice for the Library among 96 | these notices, as well as a reference directing the user to the 97 | copies of the GNU GPL and this license document. 98 | 99 | d) Do one of the following: 100 | 101 | 0) Convey the Minimal Corresponding Source under the terms of this 102 | License, and the Corresponding Application Code in a form 103 | suitable for, and under terms that permit, the user to 104 | recombine or relink the Application with a modified version of 105 | the Linked Version to produce a modified Combined Work, in the 106 | manner specified by section 6 of the GNU GPL for conveying 107 | Corresponding Source. 108 | 109 | 1) Use a suitable shared library mechanism for linking with the 110 | Library. A suitable mechanism is one that (a) uses at run time 111 | a copy of the Library already present on the user's computer 112 | system, and (b) will operate properly with a modified version 113 | of the Library that is interface-compatible with the Linked 114 | Version. 115 | 116 | e) Provide Installation Information, but only if you would otherwise 117 | be required to provide such information under section 6 of the 118 | GNU GPL, and only to the extent that such information is 119 | necessary to install and execute a modified version of the 120 | Combined Work produced by recombining or relinking the 121 | Application with a modified version of the Linked Version. (If 122 | you use option 4d0, the Installation Information must accompany 123 | the Minimal Corresponding Source and Corresponding Application 124 | Code. If you use option 4d1, you must provide the Installation 125 | Information in the manner specified by section 6 of the GNU GPL 126 | for conveying Corresponding Source.) 127 | 128 | 5. Combined Libraries. 129 | 130 | You may place library facilities that are a work based on the 131 | Library side by side in a single library together with other library 132 | facilities that are not Applications and are not covered by this 133 | License, and convey such a combined library under terms of your 134 | choice, if you do both of the following: 135 | 136 | a) Accompany the combined library with a copy of the same work based 137 | on the Library, uncombined with any other library facilities, 138 | conveyed under the terms of this License. 139 | 140 | b) Give prominent notice with the combined library that part of it 141 | is a work based on the Library, and explaining where to find the 142 | accompanying uncombined form of the same work. 143 | 144 | 6. Revised Versions of the GNU Lesser General Public License. 145 | 146 | The Free Software Foundation may publish revised and/or new versions 147 | of the GNU Lesser General Public License from time to time. Such new 148 | versions will be similar in spirit to the present version, but may 149 | differ in detail to address new problems or concerns. 150 | 151 | Each version is given a distinguishing version number. If the 152 | Library as you received it specifies that a certain numbered version 153 | of the GNU Lesser General Public License "or any later version" 154 | applies to it, you have the option of following the terms and 155 | conditions either of that published version or of any later version 156 | published by the Free Software Foundation. If the Library as you 157 | received it does not specify a version number of the GNU Lesser 158 | General Public License, you may choose any version of the GNU Lesser 159 | General Public License ever published by the Free Software Foundation. 160 | 161 | If the Library as you received it specifies that a proxy can decide 162 | whether future versions of the GNU Lesser General Public License shall 163 | apply, that proxy's public statement of acceptance of any version is 164 | permanent authorization for you to choose that version for the 165 | Library. 166 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Learn2Fix 2 | Learn2Fix is a human-in-the-loop automatic repair technique for programs that take numeric inputs. Given a test input that reproduces the bug, Learn2Fix uses mutational fuzzing to generate alternative test inputs, and presents some of those to the human to ask whether those test inputs also reproduce the bug. Meanwhile, Learn2Fix uses the [Incal](https://github.com/ML-KULeuven/incal) constraint learning tool to construct a Satisfiability Modulo Linear Real Arithmetic SMT(LRA) constraint that is satisfied only by test inputs labeled as reproducing the bug. SMT provides a natural representation of program semantics and is a fundamental building block of symbolic execution and semantic program repair. The learned SMT constraint serves as an automatic bug oracle that can predict the label of new test inputs. Iteratively, the oracle is trained to predict the user’s responses with increasing accuracy. Using the trained oracle, the user can be asked more strategically. The key challenge that Learn2Fix addresses is to maximize the oracle’s accuracy, given only a limited number of queries to the user. 3 | 4 | * You can find the technical details in our ICST'20 paper: https://arxiv.org/abs/1912.07758 5 | * To cite our paper, you can use the following bibtex entry: 6 | ```bibtex 7 | @inproceedings{learn2fix, 8 | author = {B\"ohme, Marcel and Geethal, Charaka and Pham, Van-Thuan}, 9 | title = {Human-In-The-Loop Automatic Program Repair}, 10 | booktitle = {Proceedings of the 2020 IEEE International Conference on Software Testing, Verification and Validation}, 11 | series = {ICST 2020}, 12 | year = {2020}, 13 | location = {Porto, Portugal}, 14 | pages = {1-12}, 15 | numpages = {12} 16 | } 17 | ``` 18 | Learn2Fix is implemented in Python, quickly set up in a Docker container, and uses the following projects: 19 | * Incal constraint learner: [Paper](https://www.ijcai.org/proceedings/2018/0323.pdf), [Tool](https://github.com/ML-KULeuven/incal) 20 | * GenProg test-driven repair: [Paper](https://web.eecs.umich.edu/~weimerw/p/weimer-tse2012-genprog.pdf), [Tool](https://github.com/squareslab/genprog-code) 21 | * CodeFlaws repair benchmark: [Paper](https://codeflaws.github.io/postercameraready.pdf), [Tool](https://codeflaws.github.io/) 22 | 23 | # How to run Learn2Fix 24 | To facilitate open science and reproducibility, we make our tool (Learn2Fix), data, and scripts available. Following are the concrete instructions to set up and run Learn2Fix on the Codeflaws benchmark to reproduce the results we reported in our paper. 25 | 26 | ## Step-1. Install Codeflaws with GenProg 27 | 28 | Set up a docker container for GenProg repair tool 29 | ```bash 30 | docker pull squareslab/genprog 31 | docker run -it squareslab/genprog /bin/bash 32 | ``` 33 | 34 | Download and set up any dependencies 35 | ```bash 36 | apt-get update 37 | apt-get -y install git time build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev wget z3 bc 38 | 39 | # Install python 40 | pushd /tmp 41 | wget https://www.python.org/ftp/python/3.7.2/Python-3.7.2.tar.xz 42 | tar -xf Python-3.7.2.tar.xz 43 | cd Python-3.7.2 44 | ./configure --enable-optimizations 45 | make -j4 46 | make altinstall 47 | ln -s $(which pip3.7) /usr/bin/pip 48 | mv /usr/bin/python /usr/bin/python.old 49 | ln -s $(which python3.7) /usr/bin/python 50 | popd 51 | ``` 52 | 53 | Download and set up the Codeflaws benchmark inside the container 54 | ```bash 55 | cd /root 56 | git clone https://github.com/codeflaws/codeflaws 57 | cd codeflaws/all-script 58 | wget http://www.comp.nus.edu.sg/~release/codeflaws/codeflaws.tar.gz 59 | tar -zxf codeflaws.tar.gz 60 | ``` 61 | 62 | ## Step-2. Install Learn2Fix 63 | Download and compile Learn2Fix and its dependencies (e.g., INCAL) 64 | ```bash 65 | cd /root/codeflaws/all-script 66 | git clone https://github.com/mboehme/learn2fix 67 | export learn2fix="$PWD/learn2fix" 68 | cd $learn2fix 69 | # Install LattE 70 | wget https://github.com/latte-int/latte/releases/download/version_1_7_5/latte-integrale-1.7.5.tar.gz 71 | tar -xvzf latte-integrale-1.7.5.tar.gz 72 | cd latte-integrale-1.7.5 73 | ./configure 74 | make -j4 75 | make install 76 | # Install Incal 77 | cd $learn2fix 78 | python setup.py build 79 | python setup.py install 80 | pip install cvxopt 81 | pip install plotting 82 | pip install seaborn 83 | pip install wmipa 84 | pip install pywmi 85 | pysmt-install --z3 #confirm with [Y]es 86 | ``` 87 | 88 | Export environment variables 89 | ```bash 90 | cd $learn2fix 91 | export PATH=/root/.opam/system/bin/:$PATH 92 | export PATH=$PATH:$PWD/latte-integrale-1.7.5/dest/bin/ 93 | cd $learn2fix/notebooks 94 | export PYTHONPATH=$PWD/../incal/experiments 95 | export PYTHONPATH=$PYTHONPATH:$PWD/../incal/extras 96 | export PYTHONPATH=$PYTHONPATH:$PWD/../incal 97 | ``` 98 | 99 | # How to reproduce our results 100 | ## Run Learn2Fix on Codeflaws 101 | Run the following command to execute Learn2Fix. Learn2Fix produces several CSV files, one for each experimental run (e.g., results_it_1.csv for the first run) 102 | ```bash 103 | cd $learn2fix/notebooks 104 | ./experiments.sh /root/codeflaws/all-script/codeflaws 2>learn2fix.log 105 | ``` 106 | Once the experiment completes, concatenate all CSV files to form a single file containing all results 107 | ```bash 108 | cat results_it_*.csv > results_all.csv 109 | ``` 110 | 111 | ## Run Plot.Rmd on the files that are produced 112 | See Plot.Rmd and our data under the results folder 113 | ```bash 114 | ls $learn2fix/results 115 | ``` 116 | -------------------------------------------------------------------------------- /incal/__init__.py: -------------------------------------------------------------------------------- 1 | from pysmt.shortcuts import Real 2 | from pywmi.domain import Density 3 | 4 | 5 | class Formula(Density): 6 | def __init__(self, domain, support): 7 | super().__init__(domain, support, Real(1)) 8 | 9 | @classmethod 10 | def from_state(cls, state: dict): 11 | density = Density.from_state(state) 12 | return cls(density.domain, density.support) 13 | -------------------------------------------------------------------------------- /incal/cli.py: -------------------------------------------------------------------------------- 1 | from pywmi.smt_print import pretty_print 2 | 3 | from .learn import LearnOptions 4 | 5 | 6 | def main(): 7 | formula, k, h = LearnOptions().execute_from_command_line("Learn SMT(LRA) theories from data") 8 | print("Learned formula (k={k}, h={h}): {f}".format(f=pretty_print(formula), k=k, h=h)) 9 | 10 | -------------------------------------------------------------------------------- /incal/experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/experiments/__init__.py -------------------------------------------------------------------------------- /incal/experiments/analyze.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import os 4 | 5 | from typing import List 6 | 7 | import numpy as np 8 | import pickledb 9 | from pywmi import RejectionEngine, nested_to_smt, import_domain 10 | from pywmi.domain import Density, Domain 11 | 12 | from .prepare import select_benchmark_files, benchmark_filter, get_synthetic_db 13 | from incal.util.options import Experiment 14 | from incal.util import analyze as show 15 | 16 | from .learn import get_experiment 17 | 18 | import pysmt.shortcuts as smt 19 | import pysmt.environment 20 | 21 | 22 | class Properties(object): 23 | bounds = dict() 24 | db = None 25 | 26 | @staticmethod 27 | def to_name(filename): 28 | return filename[filename.find("QF_LRA"):filename.find("smt2")+4] 29 | 30 | @staticmethod 31 | def to_sample_name(filename): 32 | return filename[filename.find("QF_LRA"):] 33 | 34 | @staticmethod 35 | def to_synthetic_name(filename): 36 | parts = os.path.basename(filename).split(".") 37 | return parts[0] 38 | 39 | @staticmethod 40 | def compute(experiments): 41 | Properties.db = pickledb.load('example.db', True) 42 | if Properties.db.exists("bounds"): 43 | Properties.bounds = Properties.db.get("bounds") 44 | else: 45 | used_names = {Properties.to_sample_name(e.parameters.original_values["data"]) for e in experiments} 46 | names_to_bounds = dict() 47 | summary_file = "remote_res/smt_lib_benchmark/qf_lra_summary.pickle" 48 | for name, entry, density_filename in select_benchmark_files(benchmark_filter, summary_file): 49 | if "samples" in entry: 50 | for s in entry["samples"]: 51 | name = Properties.to_sample_name(s["samples_filename"]) 52 | if name in used_names: 53 | names_to_bounds[name] = s["bounds"] 54 | Properties.bounds = names_to_bounds 55 | Properties.db.set("bounds", Properties.bounds) 56 | 57 | @staticmethod 58 | def get_bound(experiment): 59 | return Properties.bounds[Properties.to_sample_name(experiment.parameters.original_values["data"])] 60 | 61 | @staticmethod 62 | def get_db_synthetic(experiment): 63 | return get_synthetic_db(os.path.dirname(experiment.parameters.original_values["domain"])) 64 | 65 | @staticmethod 66 | def original_k(experiment): 67 | db = Properties.get_db_synthetic(experiment) 68 | name = Properties.to_synthetic_name(experiment.imported_from_file) 69 | return db.get(name)["generation"]["k"] 70 | 71 | @staticmethod 72 | def original_h(experiment): 73 | db = Properties.get_db_synthetic(experiment) 74 | name = Properties.to_synthetic_name(experiment.imported_from_file) 75 | return db.get(name)["generation"]["h"] 76 | 77 | @staticmethod 78 | def original_l(experiment): 79 | db = Properties.get_db_synthetic(experiment) 80 | name = Properties.to_synthetic_name(experiment.imported_from_file) 81 | return db.get(name)["generation"]["l"] 82 | 83 | @staticmethod 84 | def executed(experiment): 85 | return 1 if experiment.results.duration is not None else 0 86 | 87 | @staticmethod 88 | def positive_ratio(experiment): 89 | labels = np.load(experiment.parameters.original_values["labels"]) 90 | return sum(labels) / len(labels) 91 | 92 | @staticmethod 93 | def accuracy_approx(experiment): 94 | key = "accuracy_approx:{}".format(experiment.imported_from_file) 95 | if Properties.db.exists(key): 96 | return Properties.db.get(key) 97 | else: 98 | pysmt.environment.push_env() 99 | pysmt.environment.get_env().enable_infix_notation = True 100 | if os.path.basename(experiment.imported_from_file).startswith("synthetic"): 101 | db = Properties.get_db_synthetic(experiment) 102 | name = Properties.to_synthetic_name(experiment.imported_from_file) 103 | entry = db.get(name) 104 | domain = import_domain(json.loads(entry["domain"])) 105 | true_formula = nested_to_smt(entry["formula"]) 106 | else: 107 | density = Density.import_from(experiment.parameters.original_values["domain"]) 108 | domain = Domain(density.domain.variables, density.domain.var_types, Properties.get_bound(experiment)) 109 | true_formula = density.support 110 | learned_formula = nested_to_smt(experiment.results.formula) 111 | engine = RejectionEngine(domain, smt.TRUE(), smt.Real(1.0), 100000) 112 | accuracy = engine.compute_probability(smt.Iff(true_formula, learned_formula)) 113 | pysmt.environment.pop_env() 114 | print(accuracy) 115 | Properties.db.set(key, accuracy) 116 | return accuracy 117 | 118 | 119 | def register_derived(experiment): 120 | experiment.register_derived("accuracy_approx", Properties.accuracy_approx) 121 | experiment.register_derived("original_h", Properties.original_h) 122 | experiment.register_derived("original_l", Properties.original_l) 123 | experiment.register_derived("original_k", Properties.original_k) 124 | experiment.register_derived("executed", Properties.executed) 125 | experiment.register_derived("pos_rate", Properties.positive_ratio) 126 | return experiment 127 | 128 | 129 | def analyze(results_directories, res_path, show_args): 130 | experiments = [] # type: List[Experiment] 131 | for results_directory in results_directories: 132 | for filename in glob.glob("{}/**/*.result".format(results_directory), recursive=True): 133 | log_file = filename.replace(".result", ".log") 134 | if not os.path.exists(log_file): 135 | log_file = None 136 | experiment = get_experiment(res_path).load(filename) 137 | experiments.append(register_derived(experiment)) 138 | 139 | Properties.compute(experiments) 140 | show.show(experiments, *show_args) 141 | -------------------------------------------------------------------------------- /incal/experiments/cli.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import numpy as np 4 | from pywmi.smt_print import pretty_print 5 | 6 | from .learn import learn_benchmark, get_experiment, learn_synthetic 7 | from .prepare import prepare_smt_lib_benchmark, prepare_ratios, prepare_samples, prepare_synthetic 8 | from incal.learn import LearnOptions 9 | from . import examples 10 | from .analyze import analyze 11 | from incal.util import analyze as show 12 | 13 | 14 | def main(): 15 | smt_lib_name = "smt-lib-benchmark" 16 | synthetic_name = "synthetic" 17 | parser = argparse.ArgumentParser(description="Interface with benchmark or synthetic data for experiments") 18 | 19 | parser.add_argument("source") 20 | parser.add_argument("--sample_size", type=int, default=None) 21 | parser.add_argument("--runs", type=int, default=None) 22 | parser.add_argument("--input_dir", type=str, default=None) 23 | parser.add_argument("--output_dir", type=str, default=None) 24 | parser.add_argument("--processes", type=int, default=None) 25 | parser.add_argument("--time_out", type=int, default=None) 26 | 27 | task_parsers = parser.add_subparsers(dest="task") 28 | prepare_parser = task_parsers.add_parser("prepare") 29 | prepare_parser.add_argument("--reset_samples", type=bool, default=False) 30 | learn_parser = task_parsers.add_parser("learn") 31 | analyze_parser = task_parsers.add_parser("analyze") 32 | analyze_parser.add_argument("--dirs", nargs="+", type=str) 33 | analyze_parser.add_argument("--res_path", type=str, default=None) 34 | 35 | show_parsers = analyze_parser.add_subparsers() 36 | show_parser = show_parsers.add_parser("show") 37 | show.add_arguments(show_parser) 38 | 39 | learn_options = LearnOptions() 40 | learn_options.add_arguments(learn_parser) 41 | 42 | args = parser.parse_args() 43 | if args.task == "prepare": 44 | if args.source == smt_lib_name: 45 | prepare_smt_lib_benchmark() 46 | prepare_ratios() 47 | prepare_samples(args.runs, args.sample_size, args.reset_samples) 48 | elif args.source == synthetic_name: 49 | prepare_synthetic(args.input_dir, args.output_dir, args.runs, args.sample_size) 50 | elif args.task == "learn": 51 | learn_options.parse_arguments(args) 52 | if args.source == smt_lib_name: 53 | learn_benchmark(args.runs, args.sample_size, args.processes, args.time_out, learn_options) 54 | elif args.source == synthetic_name: 55 | learn_synthetic(args.input_dir, args.output_dir, args.runs, args.sample_size, args.processes, args.time_out, learn_options) 56 | elif args.source.startswith("ex"): 57 | example_name = args.source.split(":", 1)[1] 58 | domain, formula = examples.get_by_name(example_name) 59 | np.random.seed(1) 60 | from pywmi.sample import uniform 61 | samples = uniform(domain, args.sample_size) 62 | from pywmi import evaluate 63 | labels = evaluate(domain, formula, samples) 64 | learn_options.set_value("domain", domain, False) 65 | learn_options.set_value("data", samples, False) 66 | learn_options.set_value("labels", labels, False) 67 | (formula, k, h), duration = learn_options.call(True) 68 | print("[{:.2f}s] Learned formula (k={}, h={}): {}".format(duration, k, h, pretty_print(formula))) 69 | elif args.task == "analyze": 70 | analyze(args.dirs, args.res_path, show.parse_args(args)) 71 | 72 | 73 | 74 | if __name__ == "__main__": 75 | main() 76 | -------------------------------------------------------------------------------- /incal/experiments/examples.py: -------------------------------------------------------------------------------- 1 | from pywmi import Domain 2 | from pysmt.shortcuts import REAL, Or, And, LE, Real, Symbol, BOOL, GT, Not, Plus, Times, GE 3 | 4 | 5 | def xy_domain(): 6 | variables = ["x", "y"] 7 | var_types = {"x": REAL, "y": REAL} 8 | var_domains = {"x": (0, 1), "y": (0, 1)} 9 | return Domain(variables, var_types, var_domains) 10 | 11 | 12 | def simple_checker_problem(): 13 | theory = Or( 14 | And(LE(Symbol("x", REAL), Real(0.5)), LE(Symbol("y", REAL), Real(0.5))), 15 | And(GT(Symbol("x", REAL), Real(0.5)), GT(Symbol("y", REAL), Real(0.5))) 16 | ) 17 | 18 | return xy_domain(), theory, "simple_checker" 19 | 20 | 21 | def simple_checker_problem_cnf(): 22 | x, y = (Symbol(n, REAL) for n in ["x", "y"]) 23 | theory = ((x <= 0.5) | (y > 0.5)) & ((x > 0.5) | (y <= 0.5)) 24 | return xy_domain(), theory, "simple_cnf_checker" 25 | 26 | 27 | def checker_problem(): 28 | variables = ["x", "y", "a"] 29 | var_types = {"x": REAL, "y": REAL, "a": BOOL} 30 | var_domains = {"x": (0, 1), "y": (0, 1)} 31 | 32 | theory = Or( 33 | And(LE(Symbol("x", REAL), Real(0.5)), LE(Symbol("y", REAL), Real(0.5)), Symbol("a", BOOL)), 34 | And(GT(Symbol("x", REAL), Real(0.5)), GT(Symbol("y", REAL), Real(0.5)), Symbol("a", BOOL)), 35 | And(GT(Symbol("x", REAL), Real(0.5)), LE(Symbol("y", REAL), Real(0.5)), Not(Symbol("a", BOOL))), 36 | And(LE(Symbol("x", REAL), Real(0.5)), GT(Symbol("y", REAL), Real(0.5)), Not(Symbol("a", BOOL))) 37 | ) 38 | 39 | return Domain(variables, var_types, var_domains), theory, "checker" 40 | 41 | 42 | def simple_univariate_problem(): 43 | variables = ["x"] 44 | var_types = {"x": REAL} 45 | var_domains = {"x": (0, 1)} 46 | 47 | theory = LE(Symbol("x", REAL), Real(0.6)) 48 | 49 | return Domain(variables, var_types, var_domains), theory, "one_test" 50 | 51 | 52 | def shared_hyperplane_problem(): 53 | domain = xy_domain() 54 | x, y = (domain.get_symbol(v) for v in ["x", "y"]) 55 | # y <= -x + 1.25 56 | shared1 = LE(y, Plus(Times(Real(-1.0), x), Real(1.25))) 57 | # y >= -x + 0.75 58 | shared2 = GE(y, Plus(Times(Real(-1.0), x), Real(0.75))) 59 | 60 | # y <= x + 0.5 61 | h1 = LE(y, Plus(x, Real(0.5))) 62 | # y >= x + 0.25 63 | h2 = GE(y, Plus(x, Real(0.25))) 64 | 65 | # y <= x - 0.25 66 | h3 = LE(y, Plus(x, Real(-0.25))) 67 | # y >= x - 0.5 68 | h4 = GE(y, Plus(x, Real(-0.5))) 69 | return domain, Or(And(shared1, shared2, h1, h2), And(shared1, shared2, h3, h4)), "shared" 70 | 71 | 72 | def cross_problem(): 73 | domain = xy_domain() 74 | x, y = (domain.get_symbol(v) for v in ["x", "y"]) 75 | top = y <= 0.9 76 | middle_top = y <= 0.7 77 | middle_bottom = y >= 0.5 78 | bottom = y >= 0.1 79 | 80 | left = x >= 0.2 81 | middle_left = x >= 0.4 82 | middle_right = x <= 0.6 83 | right = x <= 0.8 84 | theory = (top & middle_left & middle_right & bottom) | (left & middle_top & middle_bottom & right) 85 | return domain, theory, "cross" 86 | 87 | 88 | def bool_xor_problem(): 89 | variables = ["a", "b"] 90 | var_types = {"a": BOOL, "b": BOOL} 91 | var_domains = dict() 92 | domain = Domain(variables, var_types, var_domains) 93 | 94 | a, b = (domain.get_symbol(v) for v in variables) 95 | 96 | theory = (a & ~b) | (~a & b) 97 | return domain, theory, "2xor" 98 | 99 | 100 | def ice_cream_problem(): 101 | variables = ["chocolate", "banana", "weekend"] 102 | chocolate, banana, weekend = variables 103 | var_types = {chocolate: REAL, banana: REAL, weekend: BOOL} 104 | var_domains = {chocolate: (0, 1), banana: (0, 1)} 105 | domain = Domain(variables, var_types, var_domains) 106 | 107 | chocolate, banana, weekend = (domain.get_symbol(v) for v in variables) 108 | theory = (chocolate < 0.650) \ 109 | & (banana < 0.550) \ 110 | & (chocolate + 0.7 * banana <= 0.700) \ 111 | & (chocolate + 1.2 * banana <= 0.750) \ 112 | & (~weekend | (chocolate + 0.7 * banana <= 0.340)) 113 | 114 | return domain, theory, "ice_cream" 115 | 116 | 117 | def get_all(): 118 | return [ 119 | simple_checker_problem(), 120 | simple_checker_problem_cnf(), 121 | checker_problem(), 122 | simple_univariate_problem(), 123 | shared_hyperplane_problem(), 124 | cross_problem(), 125 | bool_xor_problem(), 126 | ice_cream_problem(), 127 | ] 128 | 129 | 130 | def get_by_name(name): 131 | for t in get_all(): 132 | if t[2] == name: 133 | return t[0], t[1] 134 | -------------------------------------------------------------------------------- /incal/experiments/find_hyperplanes.py: -------------------------------------------------------------------------------- 1 | from pywmi import SmtWalker, smt_to_nested 2 | 3 | 4 | class HalfSpaceWalker(SmtWalker): 5 | def __init__(self): 6 | self.half_spaces = set() 7 | 8 | def walk_and(self, args): 9 | self.walk_smt_multiple(args) 10 | 11 | def walk_or(self, args): 12 | self.walk_smt_multiple(args) 13 | 14 | def walk_plus(self, args): 15 | self.walk_smt_multiple(args) 16 | 17 | def walk_minus(self, left, right): 18 | self.walk_smt_multiple([left, right]) 19 | 20 | def walk_times(self, args): 21 | self.walk_smt_multiple(args) 22 | 23 | def walk_not(self, argument): 24 | self.walk_smt_multiple([argument]) 25 | 26 | def walk_ite(self, if_arg, then_arg, else_arg): 27 | self.walk_smt_multiple([if_arg, then_arg, else_arg]) 28 | 29 | def walk_pow(self, base, exponent): 30 | self.walk_smt_multiple([base, exponent]) 31 | 32 | def walk_lte(self, left, right): 33 | self.half_spaces.add(smt_to_nested(left <= right)) 34 | 35 | def walk_lt(self, left, right): 36 | self.half_spaces.add(smt_to_nested(left < right)) 37 | 38 | def walk_equals(self, left, right): 39 | self.walk_smt_multiple([left, right]) 40 | 41 | def walk_symbol(self, name, v_type): 42 | pass 43 | 44 | def walk_constant(self, value, v_type): 45 | pass 46 | 47 | def find_half_spaces(self, formula): 48 | self.walk_smt(formula) 49 | return list(self.half_spaces) 50 | -------------------------------------------------------------------------------- /incal/experiments/find_operators.py: -------------------------------------------------------------------------------- 1 | import pywmi 2 | from pywmi import SmtWalker 3 | 4 | 5 | class OperatorWalker(SmtWalker): 6 | def __init__(self): 7 | self.operators = set() 8 | 9 | def walk_and(self, args): 10 | self.operators.add("&") 11 | self.walk_smt_multiple(args) 12 | 13 | def walk_or(self, args): 14 | self.operators.add("|") 15 | self.walk_smt_multiple(args) 16 | 17 | def walk_plus(self, args): 18 | self.operators.add("+") 19 | self.walk_smt_multiple(args) 20 | 21 | def walk_minus(self, left, right): 22 | self.operators.add("-") 23 | self.walk_smt_multiple([left, right]) 24 | 25 | def walk_times(self, args): 26 | self.operators.add("*") 27 | self.walk_smt_multiple(args) 28 | 29 | def walk_not(self, argument): 30 | self.operators.add("~") 31 | self.walk_smt_multiple([argument]) 32 | 33 | def walk_ite(self, if_arg, then_arg, else_arg): 34 | self.operators.add("ite") 35 | self.walk_smt_multiple([if_arg, then_arg, else_arg]) 36 | 37 | def walk_pow(self, base, exponent): 38 | self.operators.add("^") 39 | self.walk_smt_multiple([base, exponent]) 40 | 41 | def walk_lte(self, left, right): 42 | self.operators.add("<=") 43 | self.walk_smt_multiple([left, right]) 44 | 45 | def walk_lt(self, left, right): 46 | self.operators.add("<") 47 | self.walk_smt_multiple([left, right]) 48 | 49 | def walk_equals(self, left, right): 50 | self.operators.add("=") 51 | self.walk_smt_multiple([left, right]) 52 | 53 | def walk_symbol(self, name, v_type): 54 | pass 55 | 56 | def walk_constant(self, value, v_type): 57 | pass 58 | 59 | def find_operators(self, formula): 60 | self.walk_smt(formula) 61 | return list(self.operators) 62 | -------------------------------------------------------------------------------- /incal/experiments/learn.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import random 4 | import warnings 5 | 6 | from .prepare import get_synthetic_db 7 | from incal.util.options import Options, Experiment 8 | 9 | from incal.learn import LearnOptions, LearnResults 10 | from incal.util.parallel import run_commands 11 | from .prepare import select_benchmark_files, benchmark_filter, get_benchmark_results_dir 12 | 13 | 14 | def get_bound_volume(bounds): 15 | size = 1 16 | for ub_lb in bounds.values(): 17 | size *= ub_lb[1] - ub_lb[0] 18 | return size 19 | 20 | 21 | def rel_ratio(ratio): 22 | return abs(0.5 - ratio) 23 | 24 | 25 | def learn_synthetic(input_directory, output_directory, runs, sample_size, processes, time_out, learn_options: LearnOptions): 26 | commands = [] 27 | 28 | db = get_synthetic_db(input_directory) 29 | for name in db.getall(): 30 | entry = db.get(name) 31 | matching_samples = [] 32 | for sample in entry["samples"]: 33 | if sample["sample_size"] == sample_size and len(matching_samples) < runs: 34 | matching_samples.append(sample) 35 | if len(matching_samples) != runs: 36 | raise RuntimeError("Insufficient samples available, prepare more samples first") 37 | 38 | for sample in matching_samples: 39 | detail_learn_options = learn_options.copy() 40 | detail_learn_options.domain = os.path.join(input_directory, "{}.density".format(name)) 41 | detail_learn_options.data = os.path.join(input_directory, sample["samples_file"]) 42 | detail_learn_options.labels = os.path.join(input_directory, sample["labels_file"]) 43 | 44 | export_file = "{}{sep}{}.{}.{}.result" \ 45 | .format( output_directory, name, sample_size, sample["seed"], sep=os.path.sep) 46 | log_file = "{}{sep}{}.{}.{}.log" \ 47 | .format(output_directory, name, sample_size, sample["seed"], sep=os.path.sep) 48 | 49 | if not os.path.exists(os.path.dirname(export_file)): 50 | os.makedirs(os.path.dirname(export_file)) 51 | 52 | commands.append("incal-track {} --export {} --log {}" 53 | .format(detail_learn_options.print_arguments(), export_file, log_file)) 54 | 55 | run_commands(commands, processes, time_out) 56 | 57 | 58 | def learn_benchmark(runs, sample_size, processes, time_out, learn_options: LearnOptions): 59 | # def filter1(entry): 60 | # return "real_variables_count" in entry and entry["real_variables_count"] + entry["bool_variables_count"] <= 10 61 | # 62 | # count = 0 63 | # boolean = 0 64 | # for name, entry, density_filename in select_benchmark_files(filter1): 65 | # if entry["bool_variables_count"] > 0: 66 | # boolean += 1 67 | # count += 1 68 | # 69 | # print("{} / {}".format(boolean, count)) 70 | # 71 | # count = 0 72 | # boolean = 0 73 | # for name, entry, density_filename in select_benchmark_files(benchmark_filter): 74 | # if entry["bool_variables_count"] > 0: 75 | # boolean += 1 76 | # count += 1 77 | # 78 | # print("{} / {}".format(boolean, count)) 79 | 80 | def learn_filter(_e): 81 | return benchmark_filter(_e) and "samples" in _e 82 | 83 | count = 0 84 | problems_to_learn = [] 85 | for name, entry, density_filename in select_benchmark_files(learn_filter): 86 | if len(entry["bounds"]) > 0: 87 | best_ratio = min(rel_ratio(t[1]) for t in entry["bounds"]) 88 | if best_ratio <= 0.3: 89 | qualifying = [t for t in entry["bounds"] if rel_ratio(t[1]) <= 0.3 and abs(rel_ratio(t[1]) - best_ratio) <= best_ratio / 5] 90 | selected = sorted(qualifying, key=lambda x: get_bound_volume(x[0]))[0] 91 | print(name, "\n", rel_ratio(selected[1]), best_ratio, selected[0], entry["bool_variables_count"]) 92 | count += 1 93 | selected_samples = [s for s in entry["samples"] 94 | if s["bounds"] == selected[0] and s["sample_size"] >= sample_size] 95 | if len(selected_samples) < runs: 96 | raise RuntimeError("Insufficient number of data set available ({} of {})" 97 | .format(len(selected_samples), runs)) 98 | elif len(selected_samples) > runs: 99 | selected_samples = selected_samples[:runs] 100 | for selected_sample in selected_samples: 101 | problems_to_learn.append((name, density_filename, selected_sample)) 102 | 103 | commands = [] 104 | for name, density_filename, selected_sample in problems_to_learn: 105 | detail_learn_options = learn_options.copy() 106 | detail_learn_options.domain = density_filename 107 | detail_learn_options.data = selected_sample["samples_filename"] 108 | detail_learn_options.labels = selected_sample["labels_filename"] 109 | export_file = "{}{sep}{}.{}.{}.result".format(get_benchmark_results_dir(), name, selected_sample["sample_size"], 110 | selected_sample["seed"], sep=os.path.sep) 111 | log_file = "{}{sep}{}.{}.{}.log".format(get_benchmark_results_dir(), name, selected_sample["sample_size"], 112 | selected_sample["seed"], sep=os.path.sep) 113 | if not os.path.exists(os.path.dirname(export_file)): 114 | os.makedirs(os.path.dirname(export_file)) 115 | commands.append("incal-track {} --export {} --log {}" 116 | .format(detail_learn_options.print_arguments(), export_file, log_file)) 117 | 118 | run_commands(commands, processes, time_out) 119 | 120 | 121 | def get_experiment(res_path=None): 122 | def import_handler(parameters_dict, results_dict, config_dict): 123 | for key, entry in parameters_dict.items(): 124 | if isinstance(entry, str): 125 | index = entry.find("res/") 126 | if index >= 0: 127 | parameters_dict[key] = res_path + os.path.sep + entry[index+4:] 128 | 129 | config = Options() 130 | config.add_option("export", str) 131 | return Experiment(LearnOptions(), LearnResults(), config, import_handler if res_path else None) 132 | 133 | 134 | def track(): 135 | with warnings.catch_warnings(): 136 | warnings.simplefilter("ignore") 137 | experiment = get_experiment() 138 | experiment.import_from_command_line() 139 | experiment.save(experiment.config.export) 140 | experiment.execute() 141 | experiment.save(experiment.config.export) 142 | -------------------------------------------------------------------------------- /incal/extra/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/extra/__init__.py -------------------------------------------------------------------------------- /incal/extra/api.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import os 4 | 5 | import math 6 | 7 | 8 | if __name__ == "__main__": 9 | def parse_args(): 10 | parser = argparse.ArgumentParser() 11 | subparsers = parser.add_subparsers(dest="mode") 12 | 13 | scan_parser = subparsers.add_parser("scan", help="Scan the directory and load smt problems") 14 | scan_parser.add_argument("-d", "--dir", default=None, help="Specify the directory to load files from") 15 | 16 | learn_parser = subparsers.add_parser("learn", help="Learn SMT formulas") 17 | learn_parser.add_argument("dir", help="Specify the results directory") 18 | learn_parser.add_argument("-s", "--samples", type=int, help="Specify the number of samples for learning") 19 | learn_parser.add_argument("-a", "--all", default=False, action="store_true", 20 | help="If set, learning will not use incremental mode and include all examples") 21 | learn_parser.add_argument("-d", "--dnf", default=False, action="store_true", 22 | help="If set, learning bias is DNF instead of CNF") 23 | 24 | table_parser = subparsers.add_parser("table", help="Types can be: [time, k, h, id, acc, samples, l]") 25 | table_parser.add_argument("row_key", help="Specify the row key type") 26 | table_parser.add_argument("col_key", default=None, help="Specify the col key type") 27 | table_parser.add_argument("value", default=None, help="Specify the value type") 28 | table_parser.add_argument("dirs", nargs="*", help="Specify the directories to load files from, always in pairs:" 29 | "result_dir, data_dir") 30 | 31 | table_subparsers = table_parser.add_subparsers(dest="command") 32 | table_print_parser = table_subparsers.add_parser("print", help="Print the table") 33 | table_print_parser.add_argument("-d", "--delimiter", default="\t", help="Specify the delimiter (default=tab)") 34 | table_print_parser.add_argument("-a", "--aggregate", default=False, action="store_true", 35 | help="Aggregate the rows in the plot") 36 | 37 | table_plot_parser = table_subparsers.add_parser("plot", help="Plot the table") 38 | table_plot_parser.add_argument("-a", "--aggregate", default=False, action="store_true", 39 | help="Aggregate the rows in the plot") 40 | table_plot_parser.add_argument("--y_min", default=None, type=float, help="Minimum value for y") 41 | table_plot_parser.add_argument("--y_max", default=None, type=float, help="Maximum value for y") 42 | table_plot_parser.add_argument("--x_min", default=None, type=float, help="Minimum value for x") 43 | table_plot_parser.add_argument("--x_max", default=None, type=float, help="Maximum value for x") 44 | table_plot_parser.add_argument("--legend_pos", default=None, type=str, help="Legend position") 45 | table_plot_parser.add_argument("-o", "--output", default=None, help="Specify the output file") 46 | 47 | combine_parser = subparsers.add_parser("combine", help="Combine multiple results directories") 48 | combine_parser.add_argument("output_dir", help="The output directory to summarize results in") 49 | combine_parser.add_argument("input_dirs", nargs="*", help="Specify the directories to combine") 50 | combine_parser.add_argument("-b", "--bias", default=None, help="Specify the bias") 51 | combine_parser.add_argument("-p", "--prefix", default=None, help="Specify the prefix for input dirs") 52 | 53 | gen_parser = subparsers.add_parser("generate", help="Generate synthetic examples") 54 | gen_parser.add_argument("data_dir") 55 | gen_parser.add_argument("-n", "--data_sets", default=10, type=int) 56 | gen_parser.add_argument("--prefix", default="synthetics") 57 | gen_parser.add_argument("-b", "--bool_count", default=2, type=int) 58 | gen_parser.add_argument("-r", "--real_count", default=2, type=int) 59 | gen_parser.add_argument("--bias", default="cnf") 60 | gen_parser.add_argument("-k", "--k", default=3, type=int) 61 | gen_parser.add_argument("-l", "--literals", default=4, type=int) 62 | gen_parser.add_argument("--half_spaces", default=7, type=int) 63 | gen_parser.add_argument("-s", "--samples", default=1000, type=int) 64 | gen_parser.add_argument("--ratio", default=90, type=int) 65 | gen_parser.add_argument("-p", "--plot_dir", default=None) 66 | gen_parser.add_argument("-e", "--errors", default=0, type=int) 67 | 68 | migration_parser = subparsers.add_parser("migrate", help="Migrate files to newer or extended versions") 69 | migration_subparsers = migration_parser.add_subparsers(dest="type") 70 | 71 | migration_fix_parser = migration_subparsers.add_parser("fix", help="Fix result files") 72 | migration_fix_parser.add_argument("results_dir", help="Specify the result directory") 73 | migration_fix_parser.add_argument("-b", "--bias", default=None, help="Specify the bias") 74 | 75 | migration_acc_parser = migration_subparsers.add_parser("accuracy", help="Add accuracy to result files") 76 | migration_acc_parser.add_argument("results_dir", help="Specify the result directory") 77 | migration_acc_parser.add_argument("-d", "--data_dir", help="Specify the data directory for synthetic problems") 78 | migration_acc_parser.add_argument("-s", "--samples", default=None, help="Specify the number of samples", type=int) 79 | migration_acc_parser.add_argument("-f", "--force", default=False, action="store_true", help="Overwrites existing values") 80 | 81 | migration_ratio_parser = migration_subparsers.add_parser("ratio", help="Add ratio to result files") 82 | migration_ratio_parser.add_argument("results_dir", help="Specify the result directory") 83 | migration_ratio_parser.add_argument("-d", "--data_dir", help="Specify the data directory for synthetic problems") 84 | migration_ratio_parser.add_argument("-s", "--samples", default=None, help="Specify the number of samples", type=int) 85 | migration_ratio_parser.add_argument("-f", "--force", default=False, action="store_true", help="Overwrites existing values") 86 | 87 | args = parser.parse_args() 88 | 89 | if args.mode == "scan": 90 | full_dir = os.path.abspath(args.filename) 91 | root_dir = os.path.dirname(full_dir) 92 | 93 | import smt_scan 94 | smt_scan.scan(full_dir, root_dir) 95 | smt_scan.analyze(root_dir) 96 | smt_scan.ratios() 97 | elif args.mode == "learn": 98 | import smt_scan 99 | smt_scan.learn(args.samples, args.dir, args.all, args.dnf) 100 | elif args.mode == "table": 101 | import smt_scan 102 | table = smt_scan.TableMaker(args.row_key, args.col_key, args.value) 103 | for i in range(int(math.floor(len(args.dirs) / 3))): 104 | table.load_table(args.dirs[3 * i], args.dirs[3 * i + 1], args.dirs[3 * i + 2]) 105 | if args.command == "print": 106 | table.delimiter = args.delimiter 107 | print(table.to_txt(0, args.aggregate)) 108 | elif args.command == "plot": 109 | table.plot_table(args.output, None if args.aggregate else 0, args.y_min, args.y_max, args.x_min, args.x_max, args.legend_pos) 110 | else: 111 | print("Error: unknown table command {}".format(args.command)) 112 | elif args.mode == "combine": 113 | import combine_results 114 | combine_results.combine(args.output_dir, args.input_dirs, args.bias, args.prefix) 115 | elif args.mode == "generate": 116 | from generator import generate_random 117 | generate_random(args.data_sets, args.prefix, args.bool_count, args.real_count, args.bias, args.k, 118 | args.literals, args.half_spaces, args.samples, args.ratio, args.errors, args.data_dir, 119 | args.plot_dir) 120 | elif args.mode == "migrate": 121 | import migrate 122 | if args.type == "fix": 123 | migrate.migrate_results(args.results_dir, args.bias) 124 | elif args.type == "accuracy": 125 | migrate.add_accuracy(args.results_dir, args.data_dir, args.samples, args.force) 126 | elif args.type == "ratio": 127 | migrate.add_ratio(args.results_dir, args.data_dir, args.samples, args.force) 128 | else: 129 | print("Error: unknown migration type {}".format(args.type)) 130 | else: 131 | print("Error: unknown mode {}".format(args.mode)) 132 | 133 | 134 | parse_args() 135 | -------------------------------------------------------------------------------- /incal/extra/combine_results.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import filecmp 3 | import fnmatch 4 | import json 5 | 6 | import os 7 | import shutil 8 | 9 | import migrate 10 | 11 | 12 | def combine(output_dir, dirs, bias=None, prefix=None): 13 | if not os.path.exists(output_dir): 14 | os.makedirs(output_dir) 15 | 16 | migrate.migrate_results(output_dir, bias) 17 | 18 | summary = os.path.join(output_dir, "problems.txt") 19 | if not os.path.isfile(summary): 20 | flat = {} 21 | else: 22 | with open(summary, "r") as f: 23 | flat = json.load(f) 24 | 25 | if prefix is not None: 26 | dirs = [str(prefix) + str(directory) for directory in dirs] 27 | 28 | for input_dir in dirs: 29 | migrate.migrate_results(input_dir, bias) 30 | input_summary = os.path.join(input_dir, "problems.txt") 31 | with open(input_summary, "r") as f: 32 | input_flat = json.load(f) 33 | for problem_id in input_flat: 34 | if problem_id not in flat: 35 | flat[problem_id] = {} 36 | for sample_size in input_flat[problem_id]: 37 | if sample_size not in flat[problem_id]: 38 | flat[problem_id][sample_size] = input_flat[problem_id][sample_size] 39 | else: 40 | raise RuntimeError("Attempting to overwrite sample size {} for problem {} from file {}" 41 | .format(sample_size, problem_id, input_summary)) 42 | 43 | for input_dir in dirs: 44 | for input_file in os.listdir(input_dir): 45 | if fnmatch.fnmatch(input_file, '*.learning_log.txt'): 46 | old_file = os.path.join(input_dir, input_file) 47 | new_file = os.path.join(output_dir, input_file) 48 | if not os.path.isfile(new_file): 49 | shutil.copy(old_file, new_file) 50 | else: 51 | if not filecmp.cmp(old_file, new_file): 52 | raise RuntimeError("Attempting to overwrite {} with {}".format(new_file, old_file)) 53 | 54 | with open(summary, "w") as f: 55 | json.dump(flat, f) 56 | 57 | 58 | def parse(): 59 | parser = argparse.ArgumentParser() 60 | parser.add_argument("output_dir") 61 | parser.add_argument("dirs", nargs="*") 62 | parser.add_argument("-b", "--bias", default=None, help="Specify the bias") 63 | parser.add_argument("-p", "--prefix", default=None, help="Specify the prefix for input dirs") 64 | parsed = parser.parse_args() 65 | combine(parsed.output_dir, parsed.dirs, parsed.bias, parsed.prefix) 66 | 67 | 68 | if __name__ == "__main__": 69 | parse() 70 | -------------------------------------------------------------------------------- /incal/extra/demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | 3 | import argparse 4 | import hashlib 5 | import json 6 | import random 7 | 8 | import os 9 | import tempfile 10 | 11 | import time 12 | 13 | import problem 14 | import generator 15 | import parse 16 | import inc_logging 17 | 18 | from os.path import basename 19 | 20 | import pysmt.shortcuts as smt 21 | 22 | from incremental_learner import RandomViolationsStrategy 23 | from k_cnf_smt_learner import KCnfSmtLearner 24 | from parameter_free_learner import learn_bottom_up 25 | 26 | 27 | def learn(name, domain, h, data, seed): 28 | initial_size = 20 29 | violations_size = 10 30 | log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "demo", "results") 31 | problem_name = hashlib.sha256(name).hexdigest() 32 | 33 | def learn_inc(_data, _k, _h): 34 | violations_strategy = RandomViolationsStrategy(violations_size) 35 | learner = KCnfSmtLearner(_k, _h, violations_strategy) 36 | initial_indices = random.sample(list(range(len(data))), initial_size) 37 | log_file = os.path.join(log_dir, "{}_{}_{}.txt".format(problem_name, _k, _h)) 38 | learner.add_observer(inc_logging.LoggingObserver(log_file, seed, True, violations_strategy)) 39 | learned_theory = learner.learn(domain, data, initial_indices) 40 | # learned_theory = Or(*[And(*planes) for planes in hyperplane_dnf]) 41 | print("Learned theory:\n{}".format(parse.smt_to_nested(learned_theory))) 42 | return learned_theory 43 | 44 | phi, k, h = learn_bottom_up(data, learn_inc, 1, 1, init_h=h, max_h=h) 45 | 46 | with open(os.path.join(log_dir, "problems.txt"), "a") as f: 47 | print(json.dumps({problem_name: name, "k": k, "h": h}), file=f) 48 | 49 | 50 | def main(filename, sample_count): 51 | seed = time.time() 52 | random.seed(seed) 53 | 54 | target_formula = smt.read_smtlib(filename) 55 | 56 | variables = target_formula.get_free_variables() 57 | var_names = [str(v) for v in variables] 58 | var_types = {str(v): v.symbol_type() for v in variables} 59 | var_domains = {str(v): (0, 200) for v in variables} # TODO This is a hack 60 | 61 | domain = problem.Domain(var_names, var_types, var_domains) 62 | name = basename(filename).split(".")[0] 63 | target_problem = problem.Problem(domain, target_formula, name) 64 | 65 | # compute_difference(domain, target_formula, target_formula) 66 | 67 | samples = generator.get_problem_samples(target_problem, sample_count, 1) 68 | 69 | initial_indices = random.sample(list(range(sample_count)), 20) 70 | learner = KCnfSmtLearner(3, 3, RandomViolationsStrategy(5)) 71 | 72 | dir_name = "../output/{}".format(name) 73 | img_name = "{}_{}_{}".format(learner.name, sample_count, seed) 74 | # learner.add_observer(plotting.PlottingObserver(data_set.samples, dir_name, img_name, "r0", "r1")) 75 | with open("log.txt", "w") as f: 76 | learner.add_observer(inc_logging.LoggingObserver(f)) 77 | 78 | print(parse.smt_to_nested(learner.learn(domain, samples, initial_indices))) 79 | 80 | 81 | def compute_difference(domain, target_theory, learned_theory): 82 | query = (target_theory & ~learned_theory) | (~target_theory & learned_theory) 83 | compute_wmi(domain, query, domain.variables) 84 | 85 | 86 | def compute_wmi(domain, query, variables): 87 | # os.environ["PATH"] += os.pathsep + "/Users/samuelkolb/Downloads/latte/dest/bin" 88 | # from sys import path 89 | # path.insert(0, "/Users/samuelkolb/Documents/PhD/wmi-pa/src") 90 | # from wmi import WMI 91 | 92 | # support = [] 93 | # for v in domain.real_vars: 94 | # lb, ub = domain.var_domains[v] 95 | # sym = domain.get_symbol(v) 96 | # support.append((lb <= sym) & (sym <= ub)) 97 | # 98 | # support = smt.And(*support) 99 | # wmi = WMI() 100 | # total_volume, _ = wmi.compute(support, 1, WMI.MODE_PA) 101 | # query_volume, _ = wmi.compute(support & query, 1, WMI.MODE_PA) 102 | # print(query_volume / total_volume) 103 | 104 | f = tempfile.NamedTemporaryFile(delete=False) 105 | try: 106 | flat = { 107 | "domain": problem.export_domain(domain, to_str=False), 108 | "query": parse.smt_to_nested(query), 109 | "variables": variables 110 | } 111 | json.dump(flat, f) 112 | with open("test.txt", "w") as f2: 113 | json.dump(flat, f2) 114 | f.close() 115 | finally: 116 | os.remove(f.name) 117 | 118 | 119 | 120 | if __name__ == "__main__": 121 | parser = argparse.ArgumentParser() 122 | parser.add_argument("filename") 123 | parser.add_argument("sample_count", type=int) 124 | args = parser.parse_args() 125 | main(args.filename, args.sample_count) 126 | -------------------------------------------------------------------------------- /incal/extra/deploy.py: -------------------------------------------------------------------------------- 1 | import json 2 | import StringIO 3 | import os 4 | from os.path import join, dirname 5 | 6 | import sys 7 | from fabric.api import run, env, execute, cd, local, put, get, prefix, lcd 8 | from fabric.contrib import files 9 | 10 | 11 | def vary_synthetic_parameter(parameter_name, values, fixed_values, learner_settings, time_out=None, samples=None, 12 | exp_name=None, override=False): 13 | default_values = { 14 | "data_sets": 10, 15 | "bool_count": 2, 16 | "real_count": 2, 17 | "bias": "cnf", 18 | "k": 3, 19 | "literals": 4, 20 | "half_spaces": 7, 21 | "samples": 1000, 22 | "ratio": 90, 23 | "errors": 0, 24 | } 25 | for key, value in fixed_values.items(): 26 | if key not in default_values: 27 | raise RuntimeError("Found unknown parameter name {}".format(key)) 28 | default_values[key] = value 29 | 30 | del default_values[parameter_name] 31 | 32 | config = {"fixed": default_values, "vary": parameter_name, "values": values, "learner": learner_settings} 33 | if exp_name is None: 34 | exp_name = "h" + str(hash(json.dumps(config)) + sys.maxsize + 1) 35 | 36 | print(config) 37 | 38 | exp_path = join("synthetic", parameter_name, exp_name) 39 | local_root = dirname(dirname(__file__)) 40 | full_gen = join(local_root, exp_path) 41 | full_out = join(local_root, "output", exp_path) 42 | full_code = join(local_root, "smtlearn") 43 | full_api = join(full_code, "api.py") 44 | full_exp = join(full_code, "experiments.py") 45 | 46 | # Generate 47 | gen_config = join(full_gen, "config.json") 48 | if override or not os.path.exists(gen_config): 49 | local("mkdir -p {}".format(full_gen)) 50 | 51 | with open(gen_config, "w") as f: 52 | json.dump(config, f) 53 | 54 | commands = [] 55 | for value in values: 56 | default_values[parameter_name] = value 57 | options = " ".join("--{} {}".format(name, val) for name, val in default_values.items()) 58 | command = "python {api} generate {input}/{val} {options}" \ 59 | .format(api=full_api, input=full_gen, val=value, options=options) 60 | commands.append(command) 61 | commands.append("wait") 62 | 63 | local(" & ".join(commands)) 64 | 65 | # Learn 66 | out_config = join(full_out, "config.json") 67 | if override or not os.path.exists(out_config): 68 | local("mkdir -p {}".format(full_out)) 69 | 70 | with open(out_config, "w") as f: 71 | json.dump(config, f) 72 | 73 | commands = [] 74 | for value in values: 75 | options = " ".join("--{} {}".format(name, val) for name, val in learner_settings.items()) 76 | command = "python {exp} {input}/{val} \"\" {output}/{val} {options}" \ 77 | .format(exp=full_exp, input=full_gen, output=full_out, val=value, options=options) 78 | if time_out is not None: 79 | command += " -t {}".format(time_out) 80 | commands.append(command) 81 | commands.append("wait") 82 | 83 | local(" & ".join(commands)) 84 | 85 | # Combine 86 | if override or not os.path.exists(join(full_out, "summary")): 87 | with lcd(full_gen): 88 | local("mkdir -p all") 89 | for value in values: 90 | local("cp {}/* all/".format(value)) 91 | 92 | local("python {api} combine {output}/summary {values} -p {output}/" 93 | .format(api=full_api, output=full_out, values=" ".join(str(v) for v in values))) 94 | 95 | for migration in ["ratio", "accuracy"]: 96 | command = "python {api} migrate {migration} {output}/summary -d {input}/all" \ 97 | .format(output=full_out, input=full_gen, values=" ".join(str(v) for v in values), api=full_api, 98 | migration=migration) 99 | if samples is not None: 100 | command += " -s {}".format(samples) 101 | local(command) 102 | 103 | 104 | def vary_h(time_out=None, samples=None, override=False): 105 | parameter = "half_spaces" 106 | values = [3, 4, 5, 6, 7, 8, 9, 10] 107 | fixed_values = {"data_sets": 100, "bool_count": 0, "real_count": 2, "k": 2, "literals": 3} 108 | 109 | learner = {"bias": "cnf", "selection": "random"} 110 | vary_synthetic_parameter(parameter, values, fixed_values, learner, time_out, samples, "standard", override) 111 | 112 | learner["selection"] = "dt_weighted" 113 | vary_synthetic_parameter(parameter, values, fixed_values, learner, time_out, samples, "dt", override) 114 | 115 | 116 | def vary_h_simple(time_out=None, samples=None): 117 | parameter_name = "half_spaces" 118 | values = [3, 4, 5, 6, 7, 8] 119 | fixed_values = {"data_sets": 10, "bool_count": 0, "real_count": 2, "k": 2, "literals": 3} 120 | 121 | learner = {"bias": "cnf", "selection": "random"} 122 | vary_synthetic_parameter(parameter_name, values, fixed_values, learner, time_out, samples, "small_standard") 123 | 124 | learner["selection_size"] = 1 125 | vary_synthetic_parameter(parameter_name, values, fixed_values, learner, time_out, samples, "small_standard_single") 126 | 127 | learner["selection_size"] = 20 128 | vary_synthetic_parameter(parameter_name, values, fixed_values, learner, time_out, samples, "small_standard_20") 129 | 130 | learner["selection"] = "dt_weighted" 131 | learner["selection_size"] = 1 132 | vary_synthetic_parameter(parameter_name, values, fixed_values, learner, time_out, samples, "small_dt_1") 133 | 134 | learner["selection"] = "dt" 135 | learner["selection_size"] = 1 136 | vary_synthetic_parameter(parameter_name, values, fixed_values, learner, time_out, samples, "small_sdt_1") 137 | 138 | 139 | if __name__ == "__main__": 140 | import authenticate 141 | 142 | authenticate.config() 143 | execute(vary_h_simple, time_out=200, samples=1000) 144 | -------------------------------------------------------------------------------- /incal/extra/experiments.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import argparse 4 | import json 5 | import random 6 | 7 | import os 8 | 9 | import time 10 | 11 | from generator import import_synthetic_data_files 12 | from inc_logging import LoggingObserver 13 | from incremental_learner import AllViolationsStrategy, RandomViolationsStrategy, WeightedRandomViolationsStrategy, \ 14 | MaxViolationsStrategy 15 | from k_cnf_smt_learner import KCnfSmtLearner 16 | from k_dnf_smt_learner import KDnfSmtLearner 17 | from parameter_free_learner import learn_bottom_up 18 | from timeout import timeout 19 | 20 | 21 | class IncrementalConfig(object): 22 | def __init__(self, initial, initial_size, selection, selection_size): 23 | self.initial = initial 24 | self.initial_size = initial_size 25 | self.selection = selection 26 | self.selection_size = selection_size 27 | self.domain = None 28 | self.data = None 29 | self.dt_weights = None 30 | 31 | def set_data(self, data): 32 | self.data = data 33 | self.dt_weights = None 34 | 35 | def get_dt_weights(self): 36 | if self.dt_weights is None: 37 | import dt_selection 38 | self.dt_weights = [min(d.values()) for d in dt_selection.get_distances(self.domain, self.data)] 39 | return self.dt_weights 40 | 41 | def get_initial_indices(self): 42 | if self.initial is None: 43 | return list(range(len(self.data))) 44 | elif self.initial == "random": 45 | return random.sample(range(len(self.data)), self.initial_size) 46 | elif self.initial == "dt_weighted": 47 | import sampling 48 | return sampling.sample_weighted(zip(range(len(self.data)), self.get_dt_weights()), self.initial_size) 49 | else: 50 | raise RuntimeError("Unknown initial type {}".format(self.initial)) 51 | 52 | def get_selection_strategy(self): 53 | if self.selection is None: 54 | return RandomViolationsStrategy(0) 55 | elif self.selection == "random": 56 | return RandomViolationsStrategy(self.selection_size) 57 | elif self.selection == "dt_weighted": 58 | return WeightedRandomViolationsStrategy(self.selection_size, self.get_dt_weights()) 59 | elif self.selection == "dt": 60 | return MaxViolationsStrategy(self.selection_size, self.get_dt_weights()) 61 | else: 62 | raise RuntimeError("Unknown selection type {}".format(self.selection)) 63 | 64 | 65 | def learn_synthetic(input_dir, prefix, results_dir, bias, incremental_config, plot=None, sample_count=None, 66 | time_out=None, parameter_free=False): 67 | 68 | input_dir = os.path.abspath(input_dir) 69 | data_sets = list(import_synthetic_data_files(input_dir, prefix)) 70 | 71 | if not os.path.exists(results_dir): 72 | os.makedirs(results_dir) 73 | overview = os.path.join(results_dir, "problems.txt") 74 | 75 | if not os.path.isfile(overview): 76 | flat = {} 77 | else: 78 | with open(overview, "r") as f: 79 | flat = json.load(f) 80 | 81 | for data_set in data_sets: 82 | synthetic_problem = data_set.synthetic_problem 83 | data = data_set.samples 84 | name = synthetic_problem.theory_problem.name 85 | domain = synthetic_problem.theory_problem.domain 86 | 87 | if name not in flat: 88 | flat[name] = {} 89 | 90 | print(name) 91 | 92 | seed = hash(time.time()) 93 | random.seed(seed) 94 | 95 | if sample_count is not None and sample_count < len(data): 96 | data = random.sample(data, sample_count) 97 | else: 98 | sample_count = len(data) 99 | 100 | incremental_config.set_data(data) 101 | incremental_config.domain = domain 102 | 103 | if not parameter_free: 104 | initial_indices = incremental_config.get_initial_indices() 105 | h = synthetic_problem.half_space_count 106 | k = synthetic_problem.formula_count 107 | 108 | if bias == "cnf" or bias == "dnf": 109 | selection_strategy = incremental_config.get_selection_strategy() 110 | if bias == "cnf": 111 | learner = KCnfSmtLearner(k, h, selection_strategy) 112 | elif bias == "dnf": 113 | learner = KDnfSmtLearner(k, h, selection_strategy) 114 | 115 | if plot is not None and plot and synthetic_problem.bool_count == 0 and synthetic_problem.real_count == 2: 116 | import plotting 117 | feats = domain.real_vars 118 | plots_dir = os.path.join(results_dir, name) 119 | exp_id = "{}_{}_{}".format(learner.name, sample_count, seed) 120 | learner.add_observer(plotting.PlottingObserver(data, plots_dir, exp_id, *feats)) 121 | log_file = "{}_{}_{}_{}_{}.learning_log.txt".format(name, sample_count, seed, k, h) 122 | learner.add_observer(LoggingObserver(os.path.join(results_dir, log_file), seed, True, selection_strategy)) 123 | else: 124 | raise RuntimeError("Unknown bias {}".format(bias)) 125 | 126 | result = timeout(learner.learn, [domain, data, initial_indices], duration=time_out) 127 | else: 128 | def learn_f(_data, _k, _h): 129 | selection_strategy = incremental_config.get_selection_strategy() 130 | if bias == "cnf": 131 | learner = KCnfSmtLearner(_k, _h, selection_strategy) 132 | elif bias == "dnf": 133 | learner = KDnfSmtLearner(_k, _h, selection_strategy) 134 | initial_indices = incremental_config.get_initial_indices() 135 | log_file = "{}_{}_{}_{}_{}.learning_log.txt".format(name, sample_count, seed, _k, _h) 136 | learner.add_observer(LoggingObserver(os.path.join(results_dir, log_file), seed, True, selection_strategy)) 137 | return learner.learn(domain, data, initial_indices) 138 | 139 | result, k, h = learn_bottom_up(data, learn_f, 3, 1) 140 | if result is None: 141 | flat[name][sample_count] = {"k": k, "h": h, "seed": seed, "bias": bias, "time_out": True} 142 | else: 143 | flat[name][sample_count] = {"k": k, "h": h, "seed": seed, "bias": bias, "time_out": False} 144 | if time_out is not None: 145 | flat[name][sample_count]["time_limit"] = time_out 146 | 147 | with open(overview, "w") as f: 148 | json.dump(flat, f) 149 | 150 | 151 | if __name__ == "__main__": 152 | parser = argparse.ArgumentParser() 153 | parser.add_argument("input_dir") 154 | parser.add_argument("prefix") 155 | parser.add_argument("output_dir") 156 | parser.add_argument("--bias", default="cnf") 157 | parser.add_argument("--initial", default="random") 158 | parser.add_argument("--initial_size", default=20, type=int) 159 | parser.add_argument("--selection", default="random") 160 | parser.add_argument("--selection_size", default=10, type=int) 161 | parser.add_argument("-p", "--plot", action="store_true") 162 | parser.add_argument("-s", "--samples", default=None, type=int) 163 | parser.add_argument("-t", "--time_out", default=None, type=int) 164 | parser.add_argument("-a", "--non_incremental", default=False, action="store_true") 165 | parser.add_argument("-f", "--parameter_free", default=False, action="store_true") 166 | parsed = parser.parse_args() 167 | 168 | if parsed.non_incremental: 169 | inc_config = IncrementalConfig(None, None, None, None) 170 | else: 171 | inc_config = IncrementalConfig(parsed.initial, parsed.initial_size, parsed.selection, parsed.selection_size) 172 | 173 | learn_synthetic(parsed.input_dir, parsed.prefix, parsed.output_dir, parsed.bias, inc_config, 174 | parsed.plot, parsed.samples, parsed.time_out, parsed.parameter_free) 175 | -------------------------------------------------------------------------------- /incal/extra/migrate.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | 3 | import argparse 4 | import json 5 | 6 | import os 7 | import random 8 | import shutil 9 | 10 | import re 11 | import subprocess 12 | 13 | import time 14 | from bitarray import bitarray 15 | 16 | import generator 17 | import parse 18 | import problem 19 | from smt_check import test 20 | from smt_print import pretty_print 21 | from smt_scan import load_results, get_log_messages, dump_results 22 | import pysmt.shortcuts as smt 23 | 24 | 25 | def migrate_results(directory, bias=None): 26 | summary = os.path.join(directory, "problems.txt") 27 | if os.path.isfile(summary): 28 | with open(summary, "r") as f: 29 | flat = json.load(f) 30 | 31 | for problem_id in flat: 32 | for sample_size in flat[problem_id]: 33 | if "bias" not in flat[problem_id][sample_size]: 34 | flat[problem_id][sample_size]["bias"] = "cnf" if bias is None else bias 35 | 36 | seed, k, h = (flat[problem_id][sample_size][v] for v in ["seed", "k", "h"]) 37 | 38 | pattern = r'{problem_id}_{size}_{seed}_\d+_\d+.txt' \ 39 | .format(problem_id=problem_id, size=sample_size, seed=seed) 40 | for old_file in os.listdir(directory): 41 | if re.match(pattern, old_file): 42 | new_file = old_file[:-4] + ".learning_log.txt" 43 | shutil.move(os.path.join(directory, old_file), os.path.join(directory, new_file)) 44 | 45 | with open(summary, "w") as f: 46 | json.dump(flat, f) 47 | 48 | 49 | def calculate_accuracy(domain, target_formula, learned_formula): 50 | # from sys import path 51 | # path.insert(0, "/Users/samuelkolb/Documents/PhD/wmi-pa/experiments/client") 52 | # from run import compute_wmi 53 | print("Calculate accuracy:") 54 | # print(pretty_print(target_formula)) 55 | # print(pretty_print(learned_formula)) 56 | 57 | # r0, r1 = [smt.Symbol(n, smt.REAL) for n in ["r0", "r1"]] 58 | # b0, b1, b2, b3 = [smt.Symbol(n, smt.BOOL) for n in ["b0", "b1", "b2", "b3"]] 59 | # t1 = (~(1.0 <= 0.427230115861 * r0 + 1.02084935803 * r1) | ~(1.0 <= 1.59402729715 * r0 + 0.309004054118 * r1) | ~b1) 60 | # t2 = (b2 | (1.0 <= 1.59402729715 * r0 + 0.309004054118 * r1) | ~b0) 61 | 62 | # domain = problem.Domain(["x", "y"], {"x": smt.REAL, "y": smt.REAL}, {"x": (0, 1), "y": (0, 1)}) 63 | # x, y = smt.Symbol("x", smt.REAL), smt.Symbol("y", smt.REAL) 64 | # t2 = (1.0 <= 1.5 * x + 0.5 * y) 65 | # t2 = (2 <= 3 * x + y) 66 | # f = (t1 & t2) 67 | 68 | flat = { 69 | "domain": problem.export_domain(domain, False), 70 | "query": parse.smt_to_nested(smt.Iff(target_formula, learned_formula)) 71 | } 72 | 73 | print(domain) 74 | print(pretty_print(target_formula)) 75 | print(pretty_print(learned_formula)) 76 | # accuracy = list(compute_wmi(domain, [smt.Iff(target_formula, learned_formula)]))[0] 77 | 78 | output = str(subprocess.check_output(["/Users/samuelkolb/Documents/PhD/wmi-pa/env/bin/python", 79 | "/Users/samuelkolb/Documents/PhD/wmi-pa/experiments/client/run.py", "-s", 80 | json.dumps(flat)])) 81 | accuracy = float(output.split(": ")[1]) 82 | print(accuracy) 83 | return accuracy 84 | 85 | 86 | def calculate_accuracy_approx(domain, target_formula, learned_formula, samples): 87 | bits_target = bitarray([test(target_formula, sample) for sample in samples]) 88 | bits_learned = bitarray([test(learned_formula, sample) for sample in samples]) 89 | accuracy = ((bits_target & bits_learned) | (~bits_target & ~bits_learned)).count() / len(samples) 90 | print(accuracy) 91 | return accuracy 92 | 93 | 94 | def adapt_domain_multiple(target_problem, new_bounds): 95 | domain = target_problem.domain 96 | adapted_domain = problem.Domain(domain.variables, domain.var_types, new_bounds) 97 | return problem.Problem(adapted_domain, target_problem.theory, target_problem.name) 98 | 99 | 100 | def get_problem(data_dir, problem_id): 101 | try: 102 | with open(os.path.join(data_dir, "{}.txt".format(str(problem_id)))) as f: 103 | import generator 104 | s_problem = generator.import_synthetic_data(json.load(f)) 105 | return s_problem.synthetic_problem.theory_problem 106 | except IOError: 107 | with open(os.path.join(data_dir, "problems", "{}.txt".format(str(problem_id)))) as f: 108 | import generator 109 | theory_problem = problem.import_problem(json.load(f)) 110 | 111 | with open(os.path.join(data_dir, "summary.json"), "r") as f: 112 | flat = json.load(f) 113 | ratio_dict = flat["ratios"] 114 | lookup = flat["lookup"] 115 | 116 | adapted_problem = adapt_domain_multiple(theory_problem, ratio_dict[lookup[problem_id]]["bounds"]) 117 | 118 | return adapted_problem 119 | 120 | 121 | def add_accuracy(results_dir, data_dir=None, acc_sample_size=None, recompute=False): 122 | results_flat = load_results(results_dir) 123 | 124 | for problem_id in results_flat: 125 | 126 | if data_dir is not None: 127 | theory_problem = get_problem(data_dir, problem_id) 128 | domain = theory_problem.domain 129 | target_formula = theory_problem.theory 130 | print(problem_id) 131 | print(pretty_print(target_formula)) 132 | else: 133 | raise RuntimeError("Data directory missing") 134 | 135 | for sample_size in results_flat[problem_id]: 136 | config = results_flat[problem_id][sample_size] 137 | timed_out = config.get("time_out", False) 138 | if not timed_out: 139 | learned_formula = None 140 | for message in get_log_messages(results_dir, config, p_id=problem_id, samples=sample_size): 141 | if message["type"] == "update": 142 | learned_formula = parse.nested_to_smt(message["theory"]) 143 | 144 | print(pretty_print(learned_formula)) 145 | print() 146 | 147 | if acc_sample_size is None: 148 | if recompute or "exact_accuracy" not in config: 149 | config["exact_accuracy"] = calculate_accuracy(domain, target_formula, learned_formula) 150 | else: 151 | if recompute or "approx_accuracy" not in config: 152 | config["approx_accuracy"] = dict() 153 | acc_dict = config["approx_accuracy"] 154 | if acc_sample_size not in acc_dict: 155 | acc_dict[acc_sample_size] = [] 156 | if len(acc_dict[acc_sample_size]) < 1: 157 | seed = hash(time.time()) 158 | random.seed(seed) 159 | samples = [generator.get_sample(domain) for _ in range(acc_sample_size)] 160 | acc_dict[acc_sample_size].append({ 161 | "acc": calculate_accuracy_approx(domain, target_formula, learned_formula, samples), 162 | "seed": seed, 163 | }) 164 | 165 | dump_results(results_flat, results_dir) 166 | 167 | 168 | def calculate_ratio(domain, formula): 169 | raise NotImplementedError() 170 | 171 | 172 | def calculate_ratio_approx(formula, samples): 173 | bits = bitarray([test(formula, sample) for sample in samples]) 174 | positives = bits.count() / len(samples) 175 | ratio = max(positives, 1 - positives) 176 | print("Ratio: {}".format(ratio)) 177 | return ratio 178 | 179 | 180 | def add_ratio(results_dir, data_dir=None, ratio_sample_size=None, recompute=False): 181 | results_flat = load_results(results_dir) 182 | 183 | ratio_cache = dict() 184 | 185 | for problem_id in results_flat: 186 | if data_dir is not None: 187 | theory_problem = get_problem(data_dir, problem_id) 188 | domain = theory_problem.domain 189 | formula = theory_problem.theory 190 | else: 191 | raise RuntimeError("Data directory missing") 192 | 193 | seed = hash(time.time()) 194 | random.seed(seed) 195 | samples = [generator.get_sample(domain) for _ in range(ratio_sample_size)] 196 | 197 | ratio = calculate_ratio(domain, formula) if ratio_sample_size is None else calculate_ratio_approx(formula, samples) 198 | 199 | for sample_size in results_flat[problem_id]: 200 | config = results_flat[problem_id][sample_size] 201 | 202 | if ratio_sample_size is None: 203 | if recompute or "exact_ratio" not in config: 204 | config["exact_ratio"] = ratio 205 | else: 206 | if recompute or "approx_ratio" not in config: 207 | config["approx_ratio"] = dict() 208 | ratio_dict = config["approx_ratio"] 209 | if ratio_sample_size not in ratio_dict: 210 | ratio_dict[ratio_sample_size] = [] 211 | if len(ratio_dict[ratio_sample_size]) < 1: 212 | ratio_dict[ratio_sample_size].append({ 213 | "ratio": ratio, 214 | "seed": seed, 215 | }) 216 | 217 | dump_results(results_flat, results_dir) 218 | 219 | 220 | if __name__ == "__main__": 221 | x = smt.Symbol("x", smt.REAL) 222 | calculate_accuracy(problem.Domain(["x"], {"x": smt.REAL}, {"x": (0, 1)}), x <= smt.Real(0.5), x <= smt.Real(0.4)) -------------------------------------------------------------------------------- /incal/incremental_learner.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import pysmt.shortcuts as smt 4 | from pysmt.exceptions import InternalSolverError 5 | 6 | from observe import observe 7 | from learner import Learner, NoFormulaFound 8 | 9 | 10 | class IncrementalObserver(observe.SpecializedObserver): 11 | def observe_initial(self, data, labels, initial_indices): 12 | raise NotImplementedError() 13 | 14 | def observe_iteration(self, data, labels, formula, new_active_indices, solving_time, selection_time): 15 | raise NotImplementedError() 16 | 17 | 18 | class IncrementalLearner(Learner): 19 | def __init__(self, name, selection_strategy, smt_solver=True): 20 | """ 21 | Initializes a new incremental learner 22 | :param str name: The learner name 23 | :param SelectionStrategy selection_strategy: The selection strategy 24 | """ 25 | Learner.__init__(self, "incremental_{}".format(name)) 26 | self.selection_strategy = selection_strategy 27 | self.observer = observe.DispatchObserver() 28 | self.smt_solver = smt_solver 29 | 30 | def add_observer(self, observer): 31 | self.observer.add_observer(observer) 32 | 33 | def learn(self, domain, data, labels, initial_indices=None): 34 | if self.smt_solver: 35 | with smt.Solver() as solver: 36 | data, formula, labels = self.incremental_loop(domain, data, labels, initial_indices, solver) 37 | else: 38 | data, formula, labels = self.incremental_loop(domain, data, labels, initial_indices, None) 39 | 40 | return data, labels, formula 41 | 42 | def incremental_loop(self, domain, data, labels, initial_indices, solver): 43 | active_indices = list(range(len(data))) if initial_indices is None else initial_indices 44 | all_active_indices = active_indices 45 | self.observer.observe("initial", data, labels, active_indices) 46 | formula = None 47 | while len(active_indices) > 0: 48 | solving_start = time.time() 49 | try: 50 | formula = self.learn_partial(solver, domain, data, labels, active_indices) 51 | except InternalSolverError: 52 | raise NoFormulaFound(data, labels) 53 | except Exception as e: 54 | if "Z3Exception" in str(type(e)): 55 | raise NoFormulaFound(data, labels) 56 | else: 57 | raise e 58 | 59 | solving_time = time.time() - solving_start 60 | 61 | selection_start = time.time() 62 | data, labels, new_active_indices = \ 63 | self.selection_strategy.select_active(domain, data, labels, formula, all_active_indices) 64 | active_indices = list(new_active_indices) 65 | all_active_indices += active_indices 66 | selection_time = time.time() - selection_start 67 | self.observer.observe("iteration", data, labels, formula, active_indices, solving_time, selection_time) 68 | return data, formula, labels 69 | 70 | def learn_partial(self, solver, domain, data, labels, new_active_indices): 71 | raise NotImplementedError() 72 | -------------------------------------------------------------------------------- /incal/k_cnf_smt_learner.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import pysmt.shortcuts as smt 5 | from pysmt.fnode import FNode 6 | from pysmt.typing import REAL 7 | from typing import Set, Tuple, List 8 | 9 | from incremental_learner import IncrementalLearner 10 | from pywmi import Domain 11 | 12 | 13 | class KCnfSmtLearner(IncrementalLearner): 14 | def __init__(self, conjunction_count, half_space_count, selection_strategy, symmetries, allow_negations=True): 15 | IncrementalLearner.__init__(self, "cnf_smt", selection_strategy) 16 | self.conjunction_count = conjunction_count 17 | self.half_space_count = half_space_count 18 | self.symmetries = symmetries 19 | self.allow_negations = allow_negations 20 | 21 | def learn_partial(self, solver, domain: Domain, data: np.ndarray, labels: np.ndarray, new_active_indices: Set): 22 | 23 | # Constants 24 | n_b_original = len(domain.bool_vars) 25 | n_b = n_b_original * 2 26 | n_r = len(domain.real_vars) 27 | 28 | n_h_original = self.half_space_count if n_r > 0 else 0 29 | n_h = n_h_original * 2 if self.allow_negations else n_h_original 30 | 31 | n_c = self.conjunction_count 32 | n_d = data.shape[0] 33 | 34 | real_indices = np.array([domain.var_types[v] == smt.REAL for v in domain.variables]) 35 | real_features = data[:, real_indices] 36 | bool_features = data[:, np.logical_not(real_indices)] 37 | 38 | # Variables 39 | a_hr = [[smt.Symbol("a_hr[{}][{}]".format(h, r), REAL) for r in range(n_r)] for h in range(n_h_original)] 40 | b_h = [smt.Symbol("b_h[{}]".format(h), REAL) for h in range(n_h_original)] 41 | s_ch = [[smt.Symbol("s_ch[{}][{}]".format(c, h)) for h in range(n_h)] for c in range(n_c)] 42 | s_cb = [[smt.Symbol("s_cb[{}][{}]".format(c, b)) for b in range(n_b)] for c in range(n_c)] 43 | 44 | # Aux variables 45 | s_ih = [[smt.Symbol("s_ih[{}][{}]".format(i, h)) for h in range(n_h)] for i in range(n_d)] 46 | s_ic = [[smt.Symbol("s_ic[{}][{}]".format(i, c)) for c in range(n_c)] for i in range(n_d)] 47 | 48 | def pair(real: bool, c: int, index: int) -> Tuple[FNode, FNode]: 49 | if real: 50 | return s_ch[c][index], s_ch[c][index + n_h_original] 51 | else: 52 | return s_cb[c][index], s_cb[c][index + n_b_original] 53 | 54 | def order_equal(pair1, pair2): 55 | x_t, x_f, y_t, y_f = pair1 + pair2 56 | return smt.Iff(x_t, y_t) & smt.Iff(x_f, y_f) 57 | 58 | def order_geq(pair1, pair2): 59 | x_t, x_f, y_t, y_f = pair1 + pair2 60 | return x_t | y_f | ((~x_f) & (~y_t)) 61 | 62 | def pairs(c: int) -> List[Tuple[FNode, FNode]]: 63 | return [pair(True, c, i) for i in range(n_h_original)] + [pair(False, c, i) for i in range(n_b_original)] 64 | 65 | def order_geq_lex(c1: int, c2: int): 66 | pairs_c1, pairs_c2 = pairs(c1), pairs(c2) 67 | assert len(pairs_c1) == len(pairs_c2) 68 | constraints = smt.TRUE() 69 | for j in range(len(pairs_c1)): 70 | condition = smt.TRUE() 71 | for i in range(j): 72 | condition &= order_equal(pairs_c1[i], pairs_c2[i]) 73 | constraints &= smt.Implies(condition, order_geq(pairs_c1[j], pairs_c2[j])) 74 | return constraints 75 | 76 | # Constraints 77 | for i in new_active_indices: 78 | x_r, x_b, label = [float(val) for val in real_features[i]], bool_features[i], labels[i] 79 | 80 | for h in range(n_h_original): 81 | sum_coefficients = smt.Plus([a_hr[h][r] * smt.Real(x_r[r]) for r in range(n_r)]) 82 | solver.add_assertion(smt.Iff(s_ih[i][h], sum_coefficients <= b_h[h])) 83 | 84 | for h in range(n_h_original, n_h): 85 | solver.add_assertion(smt.Iff(s_ih[i][h], ~s_ih[i][h - n_h_original])) 86 | 87 | for c in range(n_c): 88 | solver.add_assertion(smt.Iff(s_ic[i][c], smt.Or( 89 | [smt.FALSE()] 90 | + [(s_ch[c][h] & s_ih[i][h]) for h in range(n_h)] 91 | + [s_cb[c][b] for b in range(n_b_original) if x_b[b]] 92 | + [s_cb[c][b] for b in range(n_b_original, n_b) if not x_b[b - n_b_original]] 93 | ))) 94 | 95 | # --- [start] symmetry breaking --- 96 | # Mutually exclusive 97 | if "m" in self.symmetries: 98 | for c in range(n_c): 99 | for h in range(n_h_original): 100 | solver.add_assertion(~(s_ch[c][h] & s_ch[c][h + n_h_original])) 101 | for b in range(n_b_original): 102 | solver.add_assertion(~(s_cb[c][b] & s_cb[c][b + n_b_original])) 103 | 104 | # Normalized 105 | if "n" in self.symmetries: 106 | for h in range(n_h_original): 107 | solver.add_assertion(smt.Equals(b_h[h], smt.Real(1.0)) | smt.Equals(b_h[h], smt.Real(0.0))) 108 | 109 | # Vertical symmetries 110 | if "v" in self.symmetries: 111 | for c in range(n_c - 1): 112 | solver.add_assertion(order_geq_lex(c, c + 1)) 113 | 114 | # Horizontal symmetries 115 | if "h" in self.symmetries: 116 | for h in range(n_h_original - 1): 117 | solver.add_assertion(a_hr[h][0] >= a_hr[h + 1][0]) 118 | # --- [end] symmetry breaking --- 119 | 120 | if label: 121 | solver.add_assertion(smt.And([s_ic[i][c] for c in range(n_c)])) 122 | else: 123 | solver.add_assertion(smt.Or([~s_ic[i][c] for c in range(n_c)])) 124 | 125 | solver.solve() 126 | model = solver.get_model() 127 | 128 | x_vars = [domain.get_symbol(domain.real_vars[r]) for r in range(n_r)] 129 | half_spaces = [ 130 | smt.Plus([model.get_value(a_hr[h][r]) * x_vars[r] for r in range(n_r)]) <= model.get_value(b_h[h]) 131 | for h in range(n_h_original) 132 | ] + [ 133 | smt.Plus([model.get_value(a_hr[h][r]) * x_vars[r] for r in range(n_r)]) > model.get_value(b_h[h]) 134 | for h in range(n_h - n_h_original) 135 | ] 136 | 137 | b_vars = [domain.get_symbol(domain.bool_vars[b]) for b in range(n_b_original)] 138 | bool_literals = [b_vars[b] for b in range(n_b_original)] 139 | bool_literals += [~b_vars[b] for b in range(n_b - n_b_original)] 140 | 141 | conjunctions = [ 142 | [half_spaces[h] for h in range(n_h) if model.get_py_value(s_ch[c][h])] 143 | + [bool_literals[b] for b in range(n_b) if model.get_py_value(s_cb[c][b])] 144 | for c in range(n_c) 145 | ] 146 | 147 | return smt.And([smt.Or(conjunction) for conjunction in conjunctions]) 148 | -------------------------------------------------------------------------------- /incal/learn.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | from incal.observe.inc_logging import LoggingObserver 5 | from pysmt.fnode import FNode 6 | from pywmi import smt_to_nested 7 | from pywmi.domain import Density, Domain 8 | from typing import Tuple, Optional 9 | 10 | from .parameter_free_learner import learn_bottom_up 11 | from .violations.core import RandomViolationsStrategy 12 | from .violations.dt_selection import DecisionTreeSelection 13 | from .k_cnf_smt_learner import KCnfSmtLearner 14 | from .util.options import Options, Results 15 | 16 | 17 | class LearnOptions(Options): 18 | def __init__(self): 19 | super().__init__(learn) 20 | self.add_option("domain", str, None, LearnOptions.domain_extraction) 21 | self.add_option("data", str, None, LearnOptions.np_extraction) 22 | self.add_option("labels", str, None, LearnOptions.np_extraction) 23 | 24 | self.add_option("learner", (str, str), ("cnf", "-"), Options.convert_dict( 25 | cnf=LearnOptions.cnf_factory_wrap 26 | ), arg_name="learner_factory") 27 | self.add_option("initial_strategy", (str, int), ("random", 20), Options.convert_dict( 28 | random=LearnOptions.initial_random 29 | )) 30 | self.add_option("selection_strategy", (str, int), ("random", 10), Options.convert_dict( 31 | random=LearnOptions.select_random, 32 | dt=LearnOptions.select_dt 33 | )) 34 | self.add_option("initial_k", int, 1) 35 | self.add_option("initial_h", int, 0) 36 | self.add_option("weight_k", float, 1) 37 | self.add_option("weight_h", float, 1) 38 | self.add_option("log", str) 39 | # self.add_option("max_k", int, None) 40 | # self.add_option("max_h", int, None) 41 | 42 | @staticmethod 43 | def domain_extraction(filename): 44 | return Density.import_from(filename).domain 45 | 46 | @staticmethod 47 | def np_extraction(filename): 48 | return np.load(filename) 49 | 50 | @staticmethod 51 | def cnf_factory_wrap(symmetries): 52 | def cnf_factory(k, h, selection_strategy): 53 | return KCnfSmtLearner(k, h, selection_strategy, symmetries=symmetries) 54 | return cnf_factory 55 | 56 | @staticmethod 57 | def initial_random(count): 58 | def random_selection(indices): 59 | return random.sample(indices, count) 60 | return random_selection 61 | 62 | @staticmethod 63 | def select_random(count): 64 | return RandomViolationsStrategy(count) 65 | 66 | @staticmethod 67 | def select_dt(count): 68 | return DecisionTreeSelection() 69 | 70 | def make_copy(self): 71 | return LearnOptions() 72 | 73 | 74 | class LearnResults(Results): 75 | def __init__(self): 76 | super().__init__() 77 | self.add_duration() 78 | self.add_result("formula", LearnResults.extract_formula) 79 | self.add_result("k", LearnResults.extract_k) 80 | self.add_result("h", LearnResults.extract_h) 81 | 82 | @staticmethod 83 | def extract_formula(result): 84 | return smt_to_nested(result[0]) 85 | 86 | @staticmethod 87 | def extract_k(result): 88 | return result[1] 89 | 90 | @staticmethod 91 | def extract_h(result): 92 | return result[2] 93 | 94 | 95 | def learn( 96 | domain: Domain, 97 | data: np.ndarray, 98 | labels: np.ndarray, 99 | learner_factory: callable, 100 | initial_strategy: callable, 101 | selection_strategy: object, 102 | initial_k: int, 103 | initial_h: int, 104 | weight_k: float, 105 | weight_h: float, 106 | log: Optional[str]=None 107 | ) -> Tuple[FNode, int, int]: 108 | """ 109 | Learn a formula that separates the positive and negative examples 110 | :return: A tuple containing 1. the learned formula, 2. the number of terms (or clauses) used, 111 | 3. the number of hyperplanes used 112 | """ 113 | 114 | # log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "demo", "results") 115 | # problem_name = hashlib.sha256(name).hexdigest() 116 | 117 | def learn_inc(_data, _labels, _i, _k, _h): 118 | learner = learner_factory(_k, _h, selection_strategy) 119 | initial_indices = initial_strategy(list(range(len(_data)))) 120 | # log_file = os.path.join(log_dir, "{}_{}_{}.txt".format(problem_name, _k, _h)) 121 | if log is not None: 122 | learner.add_observer(LoggingObserver(log, _k, _h, None, False, selection_strategy)) 123 | return learner.learn(domain, _data, _labels, initial_indices) 124 | 125 | ((_d, _l, formula), k, h) =\ 126 | learn_bottom_up(data, labels, learn_inc, weight_k, weight_h, initial_k, initial_h, None, None) 127 | return formula, k, h 128 | -------------------------------------------------------------------------------- /incal/learner.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pysmt.shortcuts as smt 3 | from typing import Tuple 4 | 5 | from pysmt.fnode import FNode 6 | from pywmi import Domain 7 | 8 | 9 | class NoFormulaFound(RuntimeError): 10 | def __init__(self, data, labels): 11 | self.data = data 12 | self.labels = labels 13 | 14 | 15 | class Learner(object): 16 | def __init__(self, name): 17 | self.name = name 18 | 19 | def learn(self, domain: Domain, data: np.ndarray, labels: np.ndarray, border_indices)\ 20 | -> Tuple[np.ndarray, np.ndarray, FNode]: 21 | raise NotImplementedError() 22 | 23 | @staticmethod 24 | def _convert(value): 25 | return float(value.constant_value()) 26 | 27 | @staticmethod 28 | def _get_misclassification(data): 29 | true_count = 0 30 | for _, l in data: 31 | if l: 32 | true_count += 1 33 | return min(true_count, len(data) - true_count) 34 | 35 | @staticmethod 36 | def check_example(domain, example_features, dnf_list): 37 | x_vars = [domain.get_symbol(var) for var in domain.real_vars] 38 | b_vars = [domain.get_symbol(var) for var in domain.bool_vars] 39 | 40 | formula = smt.Or([smt.And(hyperplane_conjunct) for hyperplane_conjunct in dnf_list]) 41 | substitution = {var: example_features[str(var)] for var in x_vars + b_vars} 42 | return formula.substitute(substitution).simplify().is_true() 43 | 44 | @staticmethod 45 | def fit_hyperplane(domain, examples): 46 | matrix = examples[:, [domain.is_real(v) for v in domain.variables]] 47 | k = np.ones((len(examples), 1)) 48 | a = np.matrix.dot(np.linalg.inv(matrix), k) 49 | return a, 1 50 | -------------------------------------------------------------------------------- /incal/lp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/lp/__init__.py -------------------------------------------------------------------------------- /incal/lp/examples.py: -------------------------------------------------------------------------------- 1 | from pywmi import Domain 2 | 3 | from lp.model import Model 4 | 5 | 6 | def lp_domain(n, ranges=None): 7 | if ranges is None: 8 | ranges = [(None, None) for i in range(n)] 9 | return Domain.make([], ["x{}".format(i + 1) for i in range(n)], ranges) 10 | 11 | 12 | def lp_2_6() -> Model: 13 | domain = lp_domain(2) 14 | x1, x2 = domain.get_symbols(domain.variables) 15 | return Model( 16 | domain, 17 | 300 * x1 + 200 * x2, 18 | [ 19 | 2 * x1 + x2 <= 100, 20 | x1 + x2 <= 80, 21 | x1 <= 40, 22 | x1 >= 0, 23 | x2 >= 0 24 | ], 25 | minimize=False, 26 | name="LP_2_6" 27 | ) 28 | 29 | 30 | def lp_2_7() -> Model: 31 | domain = lp_domain(4) 32 | x1, x2, x3, x4 = domain.get_symbols(domain.variables) 33 | return Model( 34 | domain, 35 | 320 * x1 + 400 * x2 + 480 * x3 + 560 * x4, 36 | [ 37 | 0.06 * x1 + 0.03 * x2 + 0.02 * x3 + 0.01 * x4 >= 3.5, 38 | 0.03 * x1 + 0.02 * x2 + 0.05 * x3 + 0.06 * x4 <= 3, 39 | 0.08 * x1 + 0.03 * x2 + 0.02 * x3 + 0.01 * x4 == 4, 40 | x1 + x2 + x3 + x4 == 110, 41 | ] + [x >= 0 for x in domain.get_symbols(domain.variables)], 42 | minimize=False, 43 | name="LP_2_7" 44 | ) 45 | 46 | 47 | def lp_2_8() -> Model: 48 | domain = lp_domain(3) 49 | x1, x2, x3 = domain.get_symbols(domain.variables) 50 | return Model( 51 | domain, 52 | 5 * x1 + 4 * x2 + 3 * x3, 53 | [ 54 | 2 * x1 + 3 * x2 + x3 <= 5, 55 | 4 * x1 + x2 + 2 * x3 <= 11, 56 | 3 * x1 + 4 * x2 + 2 * x3 <= 5, 57 | ] + [x >= 0 for x in domain.get_symbols(domain.variables)], 58 | minimize=False, 59 | name="LP_2_8" 60 | ) 61 | 62 | 63 | def lp_2_9() -> Model: 64 | domain = lp_domain(4) 65 | x1, x2, x3, x4 = domain.get_symbols(domain.variables) 66 | return Model( 67 | domain, 68 | 3 * x1 - x2, 69 | [ 70 | 0 - x1 + 6 * x2 - x3 + x4 >= -3, 71 | 7 * x2 + 2 * x4 == 5, 72 | x1 + x2 + x3 - x4 <= 2, 73 | x1 >= 0, 74 | x3 >= 0, 75 | ], 76 | minimize=True, 77 | name="LP_2_9" 78 | ) 79 | 80 | 81 | -------------------------------------------------------------------------------- /incal/lp/model.py: -------------------------------------------------------------------------------- 1 | from pysmt.fnode import FNode 2 | from pywmi import Domain 3 | from typing import List 4 | 5 | 6 | class Model(object): 7 | def __init__(self, domain: Domain, objective: FNode, constraints: List[FNode], minimize: bool=True, name=None): 8 | self.domain = domain 9 | self.objective = objective 10 | self.constraints = constraints 11 | self.minimize = minimize 12 | self.name = name 13 | -------------------------------------------------------------------------------- /incal/observe/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/observe/__init__.py -------------------------------------------------------------------------------- /incal/observe/inc_logging.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | 3 | import json 4 | 5 | from pywmi import smt_to_nested 6 | from pywmi.smt_print import pretty_print 7 | 8 | from incal.incremental_learner import IncrementalObserver 9 | 10 | 11 | class LoggingObserver(IncrementalObserver): 12 | def __init__(self, filename, k, h, seed=None, verbose=True, violation_counter=None): 13 | self.filename = filename 14 | self.verbose = verbose 15 | self.violation_counter = violation_counter 16 | self.k = k 17 | self.h = h 18 | 19 | if filename is not None: 20 | with open(self.filename, "w") as f: 21 | print("", file=f, end="") 22 | 23 | if seed is not None: 24 | self.log({"type": "seed", "seed": seed, "k": self.k, "h": self.h}) 25 | 26 | def log(self, flat): 27 | if self.filename is not None: 28 | with open(self.filename, "a") as f: 29 | print(json.dumps(flat), file=f) 30 | 31 | def observe_initial(self, data, labels, initial_indices): 32 | flat = {"type": "initial", "indices": initial_indices, "k": self.k, "h": self.h} 33 | if self.verbose: 34 | print("Starting with {} examples".format(len(initial_indices))) 35 | self.log(flat) 36 | 37 | def observe_iteration(self, data, labels, formula, new_active_indices, solving_time, selection_time): 38 | flat = { 39 | "type": "update", 40 | "theory": smt_to_nested(formula), 41 | "indices": [int(v) for v in new_active_indices], 42 | "solving_time": solving_time, 43 | "selection_time": selection_time, 44 | "k": self.k, 45 | "h": self.h, 46 | } 47 | if self.violation_counter is not None: 48 | flat["violations"] = [int(v) for v in self.violation_counter.last_violations] 49 | 50 | if self.verbose: 51 | print("Found model after {:.2f}s".format(solving_time)) 52 | print(pretty_print(formula)) 53 | if self.violation_counter is not None: 54 | violation_count = len(self.violation_counter.last_violations) 55 | selected_count = len(new_active_indices) 56 | print("Selected {} of {} violations in {:.2f}s".format(selected_count, violation_count, selection_time)) 57 | self.log(flat) 58 | -------------------------------------------------------------------------------- /incal/observe/observe.py: -------------------------------------------------------------------------------- 1 | class Observer(object): 2 | def observe(self, name, *args, **kwargs): 3 | raise NotImplementedError() 4 | 5 | 6 | class DispatchObserver(Observer): 7 | def __init__(self): 8 | self.observers = [] 9 | 10 | def add_observer(self, observer): 11 | self.observers.append(observer) 12 | 13 | def observe(self, name, *args, **kwargs): 14 | for observer in self.observers: 15 | observer.observe(name, *args, **kwargs) 16 | 17 | 18 | class SpecializedObserver(Observer): 19 | def observe(self, name, *args, **kwargs): 20 | instance_method_ref = getattr(self, "observe_{}".format(name)) 21 | instance_method_ref(*args, **kwargs) 22 | -------------------------------------------------------------------------------- /incal/observe/plotting.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import os 4 | from pywmi import evaluate, smt_to_nested 5 | 6 | from incal.incremental_learner import IncrementalObserver 7 | 8 | from pywmi.plot import plot_combined 9 | 10 | 11 | class PlottingObserver(IncrementalObserver): 12 | def __init__(self, domain, directory, name, feat_x, feat_y, condition=None, auto_clean=False, run_name=None): 13 | self.domain = domain 14 | 15 | if not os.path.exists(directory): 16 | os.makedirs(directory) 17 | 18 | if auto_clean: 19 | run_number = 0 20 | run_dir = None 21 | while run_dir is None or os.path.exists(run_dir): 22 | date_folders = time.strftime("%Y{s}%m{s}%d{s}".format(s=os.path.sep)) 23 | run_name = run_name + " " if run_name is not None else "" 24 | run_dir_name = "run {}{}".format(run_name, time.strftime("%Hh %Mm %Ss")) 25 | run_dir = os.path.join(directory, date_folders, run_dir_name) 26 | if run_number > 0: 27 | run_dir += "_{}".format(run_number) 28 | run_number += 1 29 | os.makedirs(run_dir) 30 | directory = run_dir 31 | 32 | self.directory = directory 33 | 34 | self.name = name 35 | self.all_active = set() 36 | self.feat_x = feat_x 37 | self.feat_y = feat_y 38 | self.iteration = 0 39 | self.condition = condition 40 | 41 | def observe_initial(self, data, labels, initial_indices): 42 | self.all_active = self.all_active.union(initial_indices) 43 | name = "{}{}{}_{}".format(self.directory, os.path.sep, self.name, self.iteration) 44 | plot_combined(self.feat_x, self.feat_y, self.domain, None, (data, labels), None, name, initial_indices, set(), 45 | self.condition) 46 | 47 | def observe_iteration(self, data, labels, formula, new_active_indices, solving_time, selection_time): 48 | self.iteration += 1 49 | learned_labels = evaluate(self.domain, formula, data) 50 | name = "{}{}{}_{}".format(self.directory, os.path.sep, self.name, self.iteration) 51 | plot_combined(self.feat_x, self.feat_y, self.domain, formula, (data, labels), learned_labels, name, 52 | self.all_active, new_active_indices, condition=self.condition) 53 | self.all_active = self.all_active.union(new_active_indices) 54 | -------------------------------------------------------------------------------- /incal/old_learners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/old_learners/__init__.py -------------------------------------------------------------------------------- /incal/old_learners/k_dnf_logic_learner.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import itertools 4 | 5 | import numpy as np 6 | from pysmt.shortcuts import Plus, Real, Times, LE, GE 7 | from pysmt.typing import REAL, BOOL 8 | 9 | from learner import Learner 10 | 11 | 12 | class KDNFLogicLearner(Learner): 13 | def __init__(self, k): 14 | Learner.__init__(self) 15 | self.k = k 16 | 17 | def learn(self, domain, data, border_indices): 18 | positive_indices = [i for i in range(len(data)) if data[i][1]] 19 | real_vars = [v for v in domain.variables if domain.var_types[v] == REAL] 20 | bool_vars = [v for v in domain.variables if domain.var_types[v] == BOOL] 21 | d = len(real_vars) 22 | hyperplanes = [] 23 | for indices in itertools.combinations(positive_indices, d): 24 | print(indices) 25 | hyperplanes.append(Learner.fit_hyperplane(domain, [data[i][0] for i in indices])) 26 | boolean_data = [] 27 | for i in range(len(data)): 28 | row = [] 29 | for v in bool_vars: 30 | row.append(data[i][0][v].constant_value()) 31 | boolean_data.append(row) 32 | hyperplanes_smt = [] 33 | for a, c in hyperplanes: 34 | lhs_smt = Plus(Times(Real(float(a[j])), domain.get_symbol(real_vars[j])) for j in range(d)) 35 | hyperplanes_smt.append(LE(lhs_smt, Real(c))) 36 | lhs_smt = Plus(Times(Real(-float(a[j])), domain.get_symbol(real_vars[j])) for j in range(d)) 37 | hyperplanes_smt.append(LE(lhs_smt, Real(-c))) 38 | for i in range(len(data)): 39 | lhs = 0 40 | for j in range(d): 41 | lhs += float(a[j]) * float(data[i][0][real_vars[j]].constant_value()) 42 | boolean_data[i].append(lhs <= c) 43 | boolean_data[i].append(lhs >= c) 44 | print(boolean_data) 45 | # logical_dnf_indices = [[i] for i in range(len(boolean_data[0]))] 46 | logical_dnf_indices = self.learn_logical(boolean_data, [row[1] for row in data]) 47 | logical_dnf = [ 48 | [domain.get_symbol(bool_vars[i]) if i < len(bool_vars) else 49 | hyperplanes_smt[i - len(bool_vars)] for i in conj_indices] 50 | for conj_indices in logical_dnf_indices 51 | ] 52 | print(logical_dnf) 53 | return logical_dnf 54 | 55 | def learn_logical(self, boolean_data, labels): 56 | conjunctions = [] 57 | for k in range(1, self.k + 1): 58 | for features in itertools.combinations(list(range(len(boolean_data))), k): 59 | accept = True 60 | for entry, label in zip(boolean_data, labels): 61 | if not label and all(entry[j] for j in features): 62 | accept = False 63 | break 64 | if accept: 65 | conjunctions.append(features) 66 | return conjunctions 67 | 68 | 69 | class GreedyMaxRuleLearner(KDNFLogicLearner): 70 | def __init__(self, max_literals): 71 | KDNFLogicLearner.__init__(self, max_literals) 72 | 73 | def learn_logical(self, boolean_data, labels): 74 | attributes = np.matrix(boolean_data) 75 | examples = attributes.shape[0] 76 | features = attributes.shape[1] 77 | conjunctions = [] 78 | counts = np.sum(attributes, axis=0).A1 79 | print(examples, features, counts.shape) 80 | 81 | return [] 82 | 83 | 84 | class GreedyLogicDNFLearner(KDNFLogicLearner): 85 | def __init__(self, max_terms, max_literals): 86 | KDNFLogicLearner.__init__(self, max_literals) 87 | self.max_terms = max_terms 88 | 89 | @property 90 | def max_literals(self): 91 | return self.k 92 | 93 | def learn_logical(self, boolean_data, labels): 94 | attributes = np.matrix(boolean_data) 95 | examples = attributes.shape[0] 96 | features = attributes.shape[1] 97 | conjunctions = [] 98 | counts = np.sum(attributes, axis=0).A1 99 | print(counts[0]) 100 | 101 | for i in range(self.max_terms): 102 | lb = 0 103 | ub = examples 104 | candidates = [([], examples)] 105 | new_candidates = [] 106 | while len(candidates) > 0: 107 | for pattern, count in candidates: 108 | start_index = 0 if len(pattern) == 0 else max(pattern) + 1 109 | covered = [i for i in range(examples) if all(attributes[i, j] for j in pattern)] 110 | pos_covered = [i for i in covered if labels[i]] 111 | neg_covered = [i for i in covered if not labels[i]] 112 | 113 | for j in range(start_index, self.max_literals): 114 | if counts[j] > lb: 115 | pass 116 | for j in range(self.max_literals): 117 | for features in itertools.combinations(list(range(len(boolean_data))), self.k): 118 | accept = True 119 | for entry, label in zip(boolean_data, labels): 120 | if not label and all(entry[j] for j in features): 121 | accept = False 122 | break 123 | if accept: 124 | conjunctions.append(features) 125 | return conjunctions 126 | -------------------------------------------------------------------------------- /incal/old_learners/k_dnf_smt_learner.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import pysmt.shortcuts as smt 4 | from pysmt.typing import REAL, BOOL 5 | 6 | from incremental_learner import IncrementalLearner 7 | 8 | 9 | class KDnfSmtLearner(IncrementalLearner): 10 | def __init__(self, conjunction_count, half_space_count, selection_strategy, allow_negations=True): 11 | IncrementalLearner.__init__(self, "dnf_smt", selection_strategy) 12 | self.conjunction_count = conjunction_count 13 | self.half_space_count = half_space_count 14 | self.allow_negations = allow_negations 15 | 16 | def learn_partial(self, solver, domain, data, new_active_indices): 17 | # Constants 18 | n_b_original = len(domain.bool_vars) 19 | n_b = n_b_original * 2 20 | n_r = len(domain.real_vars) 21 | 22 | n_h_original = self.half_space_count if n_r > 0 else 0 23 | n_h = n_h_original * 2 if self.allow_negations else n_h_original 24 | 25 | n_c = self.conjunction_count 26 | n_d = len(data) 27 | 28 | real_features = [[row[v] for v in domain.real_vars] for row, _ in data] 29 | bool_features = [[row[v] for v in domain.bool_vars] for row, _ in data] 30 | labels = [row[1] for row in data] 31 | 32 | # Variables 33 | a_hr = [[smt.Symbol("a_hr[{}][{}]".format(h, r), REAL) for r in range(n_r)] for h in range(n_h_original)] 34 | b_h = [smt.Symbol("b_h[{}]".format(h), REAL) for h in range(n_h_original)] 35 | s_ch = [[smt.Symbol("s_ch[{}][{}]".format(c, h)) for h in range(n_h)] for c in range(n_c)] 36 | s_cb = [[smt.Symbol("s_cb[{}][{}]".format(c, b)) for b in range(n_b)] for c in range(n_c)] 37 | 38 | # Aux variables 39 | s_ih = [[smt.Symbol("s_ih[{}][{}]".format(i, h)) for h in range(n_h)] for i in range(n_d)] 40 | s_ic = [[smt.Symbol("s_ic[{}][{}]".format(i, c)) for c in range(n_c)] for i in range(n_d)] 41 | 42 | # Constraints 43 | for i in new_active_indices: 44 | x_r, x_b, label = real_features[i], bool_features[i], labels[i] 45 | 46 | for h in range(n_h_original): 47 | sum_coefficients = smt.Plus([a_hr[h][r] * smt.Real(x_r[r]) for r in range(n_r)]) 48 | solver.add_assertion(smt.Iff(s_ih[i][h], sum_coefficients <= b_h[h])) 49 | 50 | for h in range(n_h_original, n_h): 51 | solver.add_assertion(smt.Iff(s_ih[i][h], ~s_ih[i][h - n_h_original])) 52 | 53 | for c in range(n_c): 54 | solver.add_assertion(smt.Iff(s_ic[i][c], smt.And( 55 | [smt.TRUE()] 56 | + [(~s_ch[c][h] | s_ih[i][h]) for h in range(n_h)] 57 | + [~s_cb[c][b] for b in range(n_b_original) if not x_b[b]] 58 | + [~s_cb[c][b] for b in range(n_b_original, n_b) if x_b[b - n_b_original]] 59 | ))) 60 | 61 | if label: 62 | solver.add_assertion(smt.Or([s_ic[i][c] for c in range(n_c)])) 63 | else: 64 | solver.add_assertion(smt.And([~s_ic[i][c] for c in range(n_c)])) 65 | 66 | solver.solve() 67 | model = solver.get_model() 68 | 69 | x_vars = [domain.get_symbol(domain.real_vars[r]) for r in range(n_r)] 70 | half_spaces = [ 71 | smt.Plus([model.get_value(a_hr[h][r]) * x_vars[r] for r in range(n_r)]) <= model.get_value(b_h[h]) 72 | for h in range(n_h_original) 73 | ] + [ 74 | smt.Plus([model.get_value(a_hr[h][r]) * x_vars[r] for r in range(n_r)]) > model.get_value(b_h[h]) 75 | for h in range(n_h - n_h_original) 76 | ] 77 | 78 | b_vars = [domain.get_symbol(domain.bool_vars[b]) for b in range(n_b_original)] 79 | bool_literals = [b_vars[b] for b in range(n_b_original)] 80 | bool_literals += [~b_vars[b] for b in range(n_b - n_b_original)] 81 | 82 | conjunctions = [ 83 | [half_spaces[h] for h in range(n_h) if model.get_py_value(s_ch[c][h])] 84 | + [bool_literals[b] for b in range(n_b) if model.get_py_value(s_cb[c][b])] 85 | for c in range(n_c) 86 | ] 87 | 88 | return smt.Or([smt.And(conjunction) for conjunction in conjunctions]) 89 | 90 | 91 | # class KDnfSmtLearner(Learner): 92 | # def __init__(self, max_hyperplanes, max_terms, allow_negations=True): 93 | # Learner.__init__(self) 94 | # self.max_hyperplanes = max_hyperplanes 95 | # self.max_terms = max_terms 96 | # self.allow_negations = allow_negations 97 | # 98 | # def learn(self, domain, data, initial_indices=None): 99 | # # Constants 100 | # n_b_original = len(domain.bool_vars) 101 | # n_b = n_b_original * 2 if self.allow_negations else n_b_original 102 | # 103 | # n_f = len(domain.real_vars) 104 | # n_h_original = self.max_hyperplanes if n_f > 0 else 0 105 | # n_h = n_h_original * 2 if self.allow_negations else n_h_original 106 | # 107 | # n_c = self.max_terms 108 | # n_d = len(data) 109 | # 110 | # real_features = [[Learner._convert(row[v]) for v in domain.real_vars] for row, _ in data] 111 | # bool_features = [[bool(row[v].constant_value()) for v in domain.bool_vars] for row, _ in data] 112 | # labels = [row[1] for row in data] 113 | # 114 | # # Variables 115 | # a_hf = [[smt.Symbol("a_hf[{}][{}]".format(h, f), REAL) for f in range(n_f)] for h in range(n_h_original)] 116 | # b_h = [smt.Symbol("b_h[{}]".format(h), REAL) for h in range(n_h_original)] 117 | # s_ch = [[smt.Symbol("s_ch[{}][{}]".format(c, h)) for h in range(n_h)] for c in range(n_c)] 118 | # s_cb = [[smt.Symbol("s_cb[{}][{}]".format(c, b)) for b in range(n_b)] for c in range(n_c)] 119 | # 120 | # # Aux variables 121 | # s_ih = [[smt.Symbol("s_ih[{}][{}]".format(i, h)) for h in range(n_h)] for i in range(n_d)] 122 | # s_ic = [[smt.Symbol("s_ic[{}][{}]".format(i, c)) for c in range(n_c)] for i in range(n_d)] 123 | # 124 | # # Constraints 125 | # start = time.time() 126 | # active_indices = list(range(len(data))) if initial_indices is None else initial_indices 127 | # remaining = list(range(len(data))) # list(sorted(set(range(len(data))) - set(active_indices))) 128 | # 129 | # hyperplane_dnf = [] 130 | # 131 | # def check_model(_x): 132 | # _formula = smt.Or([smt.And(hyperplane_conjunct) for hyperplane_conjunct in hyperplane_dnf]) 133 | # substitution = {_var: _x[str(_var)] for _var in x_vars + b_vars} 134 | # return _formula.substitute(substitution).simplify().is_true() 135 | # 136 | # print("Starting solver with {} examples".format(len(active_indices))) 137 | # 138 | # with smt.Solver() as solver: 139 | # while len(active_indices) > 0: 140 | # remaining = list(sorted(set(remaining) - set(active_indices))) 141 | # for i in active_indices: 142 | # x, x_b, label = real_features[i], bool_features[i], labels[i] 143 | # 144 | # for h in range(n_h_original): 145 | # sum_coefficients = smt.Plus([a_hf[h][f] * smt.Real(x[f]) for f in range(n_f)]) 146 | # solver.add_assertion(smt.Iff(s_ih[i][h], sum_coefficients <= b_h[h])) 147 | # 148 | # for h in range(n_h_original, n_h): 149 | # solver.add_assertion(smt.Iff(s_ih[i][h], ~s_ih[i][h - n_h_original])) 150 | # 151 | # for c in range(n_c): 152 | # solver.add_assertion(smt.Iff(s_ic[i][c], smt.And( 153 | # [(~s_ch[c][h] | s_ih[i][h]) for h in range(n_h)] 154 | # + [~s_cb[c][b] for b in range(n_b_original) if not x_b[b]] 155 | # + [~s_cb[c][b] for b in range(n_b_original, n_b) if x_b[b - n_b_original]] 156 | # ))) 157 | # 158 | # if label: 159 | # solver.add_assertion(smt.Or([s_ic[i][c] for c in range(n_c)])) 160 | # else: 161 | # solver.add_assertion(smt.And([~s_ic[i][c] for c in range(n_c)])) 162 | # 163 | # solver.solve() 164 | # model = solver.get_model() 165 | # 166 | # x_vars = [domain.get_symbol(domain.variables[f]) for f in range(n_f)] 167 | # hyperplanes = [ 168 | # smt.Plus([model.get_value(a_hf[h][f]) * x_vars[f] for f in range(n_f)]) <= model.get_value(b_h[h]) 169 | # for h in range(n_h_original)] 170 | # hyperplanes += [ 171 | # smt.Plus([model.get_value(a_hf[h][f]) * x_vars[f] for f in range(n_f)]) > model.get_value(b_h[h]) 172 | # for h in range(n_h - n_h_original)] 173 | # 174 | # b_vars = [domain.get_symbol(domain.bool_vars[b]) for b in range(n_b_original)] 175 | # bool_literals = [b_vars[b] for b in range(n_b_original)] 176 | # bool_literals += [~b_vars[b - n_b_original] for b in range(n_b_original, n_b)] 177 | # 178 | # hyperplane_dnf = [ 179 | # [hyperplanes[h] for h in range(n_h) if model.get_py_value(s_ch[c][h])] 180 | # + [bool_literals[b] for b in range(n_b) if model.get_py_value(s_cb[c][b])] 181 | # for c in range(n_c) 182 | # ] 183 | # 184 | # active_indices = [i for i in remaining if labels[i] != check_model(data[i][0])] 185 | # print("Found model violating {} examples".format(len(active_indices))) 186 | # 187 | # time_taken = time.time() - start 188 | # print("Took {:.2f}s".format(time_taken)) 189 | # return hyperplane_dnf 190 | # -------------------------------------------------------------------------------- /incal/parameter_free_learner.py: -------------------------------------------------------------------------------- 1 | import heapq 2 | 3 | import time 4 | 5 | from learner import NoFormulaFound 6 | 7 | 8 | class ParameterFrontier(object): 9 | def __init__(self, w_k, w_h): 10 | self.c = lambda k, h: w_k * k + w_h * h 11 | self.pq = [] 12 | self.tried = set() 13 | 14 | def push(self, k, h): 15 | if (k, h) not in self.tried: 16 | heapq.heappush(self.pq, (self.c(k, h), k, h)) 17 | self.tried.add((k, h)) 18 | 19 | def pop(self): 20 | c, k, h = heapq.heappop(self.pq) 21 | return k, h 22 | 23 | 24 | def learn_bottom_up(data, labels, learn_f, w_k, w_h, init_k=1, init_h=0, max_k=None, max_h=None): 25 | """ 26 | Learns a CNF(k, h) SMT formula phi using the learner encapsulated in init_learner such that 27 | C(k, h) = w_k * k + w_h * h is minimal. 28 | :param data: List of tuples of assignments and labels 29 | :param labels: Array of labels 30 | :param learn_f: Function called with data, k and h: learn_f(data, k, h) 31 | :param w_k: The weight assigned to k 32 | :param w_h: The weight assigned to h 33 | :param init_k: The minimal value for k 34 | :param init_h: The minimal value for h 35 | :param max_k: The maximal value for k 36 | :param max_h: The maximal value for h 37 | :return: A tuple containing: 1) the CNF(k, h) formula phi with minimal complexity C(k, h); 2) k; and 3) h 38 | """ 39 | solution = None 40 | frontier = ParameterFrontier(w_k, w_h) 41 | frontier.push(init_k, init_h) 42 | i = 0 43 | while solution is None: 44 | i += 1 45 | k, h = frontier.pop() 46 | # print("Attempting to solve with k={} and h={}".format(k, h)) 47 | start = time.time() 48 | try: 49 | solution = learn_f(data, labels, i, k, h) 50 | # print("Found solution after {:.2f}s".format(time.time() - start)) 51 | except NoFormulaFound as e: 52 | data = e.data 53 | labels = e.labels 54 | if max_k is None or k + 1 <= max_k: 55 | frontier.push(k + 1, h) 56 | if max_h is None or h + 1 <= max_h: 57 | frontier.push(k, h + 1) 58 | return solution, k, h 59 | -------------------------------------------------------------------------------- /incal/tests/examples.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | 3 | mpl.use('TkAgg') 4 | import matplotlib.pyplot as plt 5 | 6 | from pysmt.typing import REAL 7 | import pysmt.shortcuts as smt 8 | 9 | from problem import Domain 10 | from visualize import RegionBuilder 11 | 12 | 13 | def xy_domain(): 14 | return Domain(["x", "y"], {"x": REAL, "y": REAL}, {"x": [0, 1], "y": [0, 1]}) 15 | 16 | 17 | def example1(domain): 18 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL) 19 | return domain, (x + y <= 0.5) 20 | 21 | 22 | def example2(domain): 23 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL) 24 | return domain, (((-1.81491574069 < 2.82223533496 * x + -2.86421413834 * y) | ( 25 | 1.74295350642 < 5.75692214636 * x + -5.67797696689 * y)) & ( 26 | 5.75692214636 * x + -5.67797696689 * y <= 1.74295350642)) 27 | 28 | 29 | def example3(domain): 30 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL) 31 | return domain, (((5.03100425089 < 4.72202520763*x + 4.11473198213*y) | (-4.6261635019 < -5.93640712709*x + -5.87100650773*y)) & ((5.03100425089 < 4.72202520763*x + 4.11473198213*y) | (-4.6261635019 < -5.93640712709*x + -5.87100650773*y))) 32 | 33 | 34 | def example4(domain): 35 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL) 36 | return domain, (((106.452209182 < 58.3305562428*x + 162.172448357*y) | (-82.1173457701 < -121.782718841*x + -45.7311195244*y)) & ((58.3305562428*x + 162.172448357*y <= 106.452209182) | (-121.782718841*x + -45.7311195244*y <= -82.1173457701))) 37 | 38 | 39 | def example5(domain): 40 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL) 41 | return domain, (((-1.81491574069 < 2.82223533496*x + -2.86421413834*y) | (1.74295350642 < 5.75692214636*x + -5.67797696689*y)) & (5.75692214636*x + -5.67797696689*y <= 1.74295350642)) 42 | 43 | 44 | def example6(domain): 45 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL) 46 | return domain, (((-1.27554738321 < 2.00504448571*x + -2.40276942762*y) | (4.56336137649 < 11.0066321223*x + -9.72098326672*y)) & (11.0066321223*x + -9.72098326672*y <= 4.56336137649)) 47 | 48 | 49 | def visualize(domain, formula): 50 | fig = plt.figure() 51 | ax = fig.add_subplot(1, 1, 1) 52 | RegionBuilder(domain).walk_smt(formula).plot(ax=ax) 53 | plt.show() 54 | 55 | 56 | if __name__ == "__main__": 57 | visualize(*example6(xy_domain())) 58 | -------------------------------------------------------------------------------- /incal/tests/test_evaluation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from incal.experiments.examples import ice_cream_problem 4 | from pywmi import Domain, evaluate 5 | 6 | 7 | def test_example1(): 8 | domain, formula, name = ice_cream_problem() 9 | c, b, w = domain.get_symbols(["chocolate", "banana", "weekend"]) 10 | 11 | c_val = 0.41358769878652346 12 | b_val = 0.04881279380000003 13 | assignment = {"chocolate": c_val, "banana": b_val, "weekend": 1.0} 14 | instance = np.array([assignment[v] for v in domain.variables]) 15 | 16 | h1 = -0.9094061613514598 < (-2.11558444119424*c + -0.7052753601938021*b) 17 | print(-0.9094061613514598, (-2.11558444119424 * c_val + -0.7052753601938021 * b_val)) 18 | h2 = -43.62318633585081 < (-56.41097694745345*c + -50.5657977670196*b) 19 | print(-43.62318633585081, (-56.41097694745345 * c_val + -50.5657977670196 * b_val)) 20 | h3 = -0.9094061613514598 < (-2.11558444119424*c + -0.7052753601938021*b) 21 | print(-0.9094061613514598, (-2.11558444119424 * c_val + -0.7052753601938021 * b_val)) 22 | h4 = 7.792607696237757 < (18.128225098004087*c + 6.043431893671825*b) 23 | print(7.792607696237757, (18.128225098004087 * c_val + 6.043431893671825 * b_val)) 24 | h5 = -0.9094061613514598 < -(2.11558444119424*c + -0.7052753601938021*b) 25 | print(-0.9094061613514598, -(2.11558444119424 * c_val + -0.7052753601938021 * b_val)) 26 | # h1: True, h2: True, h3: True, h4: False, h5: True 27 | 28 | learned = ((h1 | h2) & (h3 | ~w) & (h4 | h5)) 29 | 30 | print(evaluate(domain, formula, instance)) 31 | print(evaluate(domain, learned, instance)) 32 | -------------------------------------------------------------------------------- /incal/tests/test_generation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from incal.generator import generate_half_space_sample 4 | from incal.learner import Learner 5 | from pysmt.typing import REAL, BOOL 6 | from pywmi import Domain 7 | 8 | 9 | def get_xay_domain(): 10 | return Domain(["x", "a", "y"], {"x": REAL, "a": BOOL, "y": REAL}, {"x": (0, 1), "y": (0, 1)}) 11 | 12 | 13 | def test_generate_hyperplane(): 14 | domain = get_xay_domain() 15 | samples = np.array([[0, 1, 0.1], [0.5, 0, 0.5]]) 16 | coefficients, b = Learner.fit_hyperplane(domain, samples) 17 | slope = coefficients[0] / coefficients[1] 18 | assert abs(slope) == 0.4 / 0.5 19 | assert b == 1 20 | 21 | 22 | def test_generate_hyperplane_sample_sanity(): 23 | generate_half_space_sample(get_xay_domain(), 2) 24 | -------------------------------------------------------------------------------- /incal/tests/test_one_class.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import time 4 | import numpy as np 5 | 6 | from pywmi.smt_print import pretty_print 7 | 8 | from incal.learn import LearnOptions 9 | from pywmi import evaluate, Domain, smt_to_nested, plot, RejectionEngine 10 | from pywmi.sample import uniform 11 | 12 | from incal.experiments.examples import simple_checker_problem, checker_problem 13 | from incal.violations.core import RandomViolationsStrategy 14 | 15 | from incal.violations.virtual_data import OneClassStrategy 16 | 17 | from incal.k_cnf_smt_learner import KCnfSmtLearner 18 | 19 | from incal.parameter_free_learner import learn_bottom_up 20 | 21 | # from incal.observe.inc_logging import LoggingObserver 22 | from incal.observe.plotting import PlottingObserver 23 | 24 | 25 | def main(): 26 | domain, formula, name = checker_problem() 27 | thresholds = {v: 0.1 for v in domain.real_vars} 28 | data = uniform(domain, 1000) 29 | labels = evaluate(domain, formula, data) 30 | data = data[labels == 1] 31 | labels = labels[labels == 1] 32 | 33 | def learn_inc(_data, _labels, _i, _k, _h): 34 | strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds) 35 | learner = KCnfSmtLearner(_k, _h, strategy, "mvn") 36 | initial_indices = LearnOptions.initial_random(20)(list(range(len(_data)))) 37 | # learner.add_observer(LoggingObserver(None, _k, _h, None, True)) 38 | learner.add_observer(PlottingObserver(domain, "test_output/checker", "run_{}_{}_{}".format(_i, _k, _h), 39 | domain.real_vars[0], domain.real_vars[1], None, False)) 40 | return learner.learn(domain, _data, _labels, initial_indices) 41 | 42 | (new_data, new_labels, formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1, None, None) 43 | print("Learned CNF(k={}, h={}) formula {}".format(k, h, pretty_print(formula))) 44 | print("Data-set grew from {} to {} entries".format(len(labels), len(new_labels))) 45 | 46 | 47 | def background_knowledge_example(): 48 | domain = Domain.make(["a", "b"], ["x", "y"], [(0, 1), (0, 1)]) 49 | a, b, x, y = domain.get_symbols(domain.variables) 50 | formula = (a | b) & (~a | ~b) & (x >= 0) & (x <= y) & (y <= 1) 51 | thresholds = {v: 0.1 for v in domain.real_vars} 52 | data = uniform(domain, 10000) 53 | labels = evaluate(domain, formula, data) 54 | data = data[labels == 1] 55 | labels = labels[labels == 1] 56 | 57 | def learn_inc(_data, _labels, _i, _k, _h): 58 | strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds) #, background_knowledge=(a | b) & (~a | ~b)) 59 | learner = KCnfSmtLearner(_k, _h, strategy, "mvn") 60 | initial_indices = LearnOptions.initial_random(20)(list(range(len(_data)))) 61 | # learner.add_observer(LoggingObserver(None, _k, _h, None, True)) 62 | learner.add_observer(PlottingObserver(domain, "test_output/bg", "run_{}_{}_{}".format(_i, _k, _h), 63 | domain.real_vars[0], domain.real_vars[1], None, False)) 64 | return learner.learn(domain, _data, _labels, initial_indices) 65 | 66 | (new_data, new_labels, formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1, None, None) 67 | print("Learned CNF(k={}, h={}) formula {}".format(k, h, pretty_print(formula))) 68 | print("Data-set grew from {} to {} entries".format(len(labels), len(new_labels))) 69 | 70 | 71 | def negative_samples_example(background_knowledge): 72 | domain = Domain.make(["a", "b"], ["x", "y"], [(0, 1), (0, 1)]) 73 | a, b, x, y = domain.get_symbols(domain.variables) 74 | formula = (a | b) & (~a | ~b) & (x <= y) & domain.get_bounds() 75 | background_knowledge = (a | b) & (~a | ~b) if background_knowledge else None 76 | thresholds = {"x": 0.1, "y": 0.2} 77 | data = uniform(domain, 10000) 78 | labels = evaluate(domain, formula, data) 79 | data = data[labels == 1] 80 | labels = labels[labels == 1] 81 | original_sample_count = len(labels) 82 | 83 | start_time = time.time() 84 | 85 | data, labels = OneClassStrategy.add_negatives(domain, data, labels, thresholds, 100, background_knowledge) 86 | print("Created {} negative examples".format(len(labels) - original_sample_count)) 87 | 88 | directory = "test_output{}bg_sampled{}{}".format(os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss")) 89 | 90 | def learn_inc(_data, _labels, _i, _k, _h): 91 | strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds, background_knowledge=background_knowledge) 92 | learner = KCnfSmtLearner(_k, _h, strategy, "mvn") 93 | initial_indices = LearnOptions.initial_random(20)(list(range(len(_data)))) 94 | learner.add_observer(PlottingObserver(domain, directory, "run_{}_{}_{}".format(_i, _k, _h), 95 | domain.real_vars[0], domain.real_vars[1], None, False)) 96 | return learner.learn(domain, _data, _labels, initial_indices) 97 | 98 | (new_data, new_labels, learned_formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1, None, None) 99 | if background_knowledge: 100 | learned_formula = learned_formula & background_knowledge 101 | 102 | duration = time.time() - start_time 103 | 104 | print("{}".format(smt_to_nested(learned_formula))) 105 | print("Learned CNF(k={}, h={}) formula {}".format(k, h, pretty_print(learned_formula))) 106 | print("Data-set grew from {} to {} entries".format(len(labels), len(new_labels))) 107 | print("Learning took {:.2f}s".format(duration)) 108 | 109 | test_data, labels = OneClassStrategy.add_negatives(domain, data, labels, thresholds, 1000, background_knowledge) 110 | assert all(evaluate(domain, learned_formula, test_data) == labels) 111 | 112 | 113 | def test_negative_samples(): 114 | for label in (True, False): 115 | random.seed(888) 116 | np.random.seed(888) 117 | negative_samples_example(label) 118 | 119 | 120 | def test_adaptive_threshold(): 121 | random.seed(888) 122 | np.random.seed(888) 123 | 124 | domain = Domain.make([], ["x", "y"], [(0, 1), (0, 1)]) 125 | x, y = domain.get_symbols(domain.variables) 126 | formula = (x <= y) & (x <= 0.5) & (y <= 0.5) & domain.get_bounds() 127 | thresholds = {"x": 0.1, "y": 0.1} 128 | data, _ = RejectionEngine(domain, formula, x * x, 100000).get_samples(50) 129 | k = 4 130 | nearest_neighbors = [] 131 | for i in range(len(data)): 132 | nearest_neighbors.append([]) 133 | for j in range(len(data)): 134 | if i != j: 135 | distance = 1 if any(data[i, b] != data[j, b] for b, v in enumerate(domain.variables) 136 | if domain.is_bool(v))\ 137 | else max(abs(data[i, r] - data[j, r]) / (domain.var_domains[v][1] - domain.var_domains[v][0]) for r, v in enumerate(domain.variables) if domain.is_real(v)) 138 | if len(nearest_neighbors[i]) < k: 139 | nearest_neighbors[i].append((j, distance)) 140 | else: 141 | index_of_furthest = None 142 | for fi, f in enumerate(nearest_neighbors[i]): 143 | if index_of_furthest is None or f[1] > nearest_neighbors[i][index_of_furthest][1]: 144 | index_of_furthest = fi 145 | if distance < nearest_neighbors[i][index_of_furthest][1]: 146 | nearest_neighbors[i][index_of_furthest] = (j, distance) 147 | print(nearest_neighbors) 148 | t = [[sum(n[1] for n in nearest_neighbors[i]) / len(nearest_neighbors[i]) * (domain.var_domains[v][1] - domain.var_domains[v][0]) for v in domain.real_vars] 149 | for i in range(len(nearest_neighbors))] 150 | t = np.array(t) 151 | print(t) 152 | print(data) 153 | # data = uniform(domain, 400) 154 | labels = evaluate(domain, formula, data) 155 | data = data[labels == 1] 156 | labels = labels[labels == 1] 157 | data, labels = OneClassStrategy.add_negatives(domain, data, labels, t, 1000) 158 | 159 | directory = "test_output{}adaptive{}{}".format(os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss")) 160 | os.makedirs(directory) 161 | 162 | name = os.path.join(directory, "combined.png") 163 | plot.plot_combined("x", "y", domain, formula, (data, labels), None, name, set(), set()) 164 | -------------------------------------------------------------------------------- /incal/tests/test_polytope.py: -------------------------------------------------------------------------------- 1 | import matplotlib as mpl 2 | 3 | mpl.use('TkAgg') 4 | import matplotlib.pyplot as plt 5 | 6 | from unittest import TestCase 7 | 8 | from pysmt.typing import REAL 9 | import pysmt.shortcuts as smt 10 | 11 | from pywmi.domain import Domain 12 | from pywmi.plot import RegionBuilder 13 | 14 | 15 | class TestPolytope(TestCase): 16 | def test_example1(self): 17 | domain = Domain(["x", "y"], {"x": REAL, "y": REAL}, {"x": [0, 1], "y": [0, 1]}) 18 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL) 19 | formula = (x + y <= 0.5) 20 | RegionBuilder(domain).walk_smt(formula).plot() 21 | # plt.show() 22 | 23 | 24 | -------------------------------------------------------------------------------- /incal/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/util/__init__.py -------------------------------------------------------------------------------- /incal/util/options.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import time 4 | from typing import Union, Tuple, Any, Dict, List, Optional 5 | 6 | 7 | class Option(object): 8 | def __init__(self, name, from_type=None, default_from=None, converter=None, default=None, arg_name=None): 9 | self.name = name 10 | self.from_type = from_type 11 | self.default_from = default_from 12 | self.converter = converter 13 | self.default = default 14 | self.arg_name = arg_name or name 15 | if converter is not None and default_from is not None and default is None: 16 | self.default = self.convert(default_from) 17 | 18 | def convert(self, value): 19 | if value is None: 20 | return self.default 21 | elif self.converter: 22 | if isinstance(self.from_type, tuple): 23 | parts = value.split(":") 24 | if len(parts) != len(self.from_type): 25 | raise RuntimeError("Could not parse arguments for option {}, got: {}".format(self.name, value)) 26 | value = tuple(t(v) for t, v in zip(self.from_type, parts)) 27 | 28 | if isinstance(value, tuple): 29 | return self.converter(*value) 30 | else: 31 | return self.converter(value) 32 | else: 33 | return value 34 | 35 | 36 | class Options(object): 37 | def __init__(self, callback=None): 38 | self.options = dict() 39 | self.values = dict() 40 | self.original_values = dict() 41 | self.callback = callback 42 | 43 | def add_option(self, name, from_type=None, default_from=None, converter=None, default=None, arg_name=None): 44 | if isinstance(default_from, tuple): 45 | default_from = ":".join(str(e) for e in default_from) 46 | self.options[name] = Option(name, from_type, default_from, converter, default, arg_name) 47 | 48 | def set_values(self, convert=True, **kwargs): 49 | for key, value in kwargs.items(): 50 | self.set_value(key, value, convert) 51 | 52 | @staticmethod 53 | def convert_dict(**kwargs): 54 | def convert(*args): 55 | if args[0] in kwargs: 56 | if len(args) > 1: 57 | return kwargs[args[0]](*args[1:]) 58 | return kwargs[args[0]] 59 | raise RuntimeError("Unknown option {}, should be one of: {}".format(args[0], list(kwargs.keys()))) 60 | 61 | return convert 62 | 63 | def set_value(self, name, value, convert=True): 64 | self.original_values[name] = value 65 | if convert: 66 | self.values[name] = self.options[name].convert(value) 67 | else: 68 | self.values[name] = value 69 | 70 | def __setattr__(self, key, value): 71 | if key in ["options", "values", "original_values", "callback"] or key.startswith("__"): 72 | return super().__setattr__(key, value) 73 | self.set_value(key, value) 74 | 75 | def __getattr__(self, item): 76 | if item in ["options", "values", "original_values", "callback"] or item.startswith("__"): 77 | return super().__getattr__(item) 78 | return self.values[item] if item in self.values else self.options[item].default 79 | 80 | def add_arguments(self, parser): 81 | for o_name, option in self.options.items(): 82 | parser.add_argument( 83 | "--{}".format(option.name), 84 | type=option.from_type if not isinstance(option.from_type, tuple) else str, 85 | default=option.default_from 86 | ) 87 | 88 | def parse_arguments(self, args): 89 | for o_name, option in self.options.items(): 90 | self.set_value(option.name, getattr(args, option.name)) 91 | 92 | def print_arguments(self): 93 | return " ".join("--{} {}".format(name, o_value) for name, o_value in self.original_values.items() 94 | if o_value is not None) 95 | 96 | def call(self, timed=False) -> Union[Tuple[Any, float], Any]: 97 | def make_call(): 98 | return self.callback(**{self.options[o_name].arg_name: value for o_name, value in self.values.items()}) 99 | 100 | if timed: 101 | start_time = time.time() 102 | result = make_call() 103 | duration = time.time() - start_time 104 | return result, duration 105 | else: 106 | return make_call() 107 | 108 | def execute_from_command_line(self, description: str=None, timed: bool=False) -> Union[Tuple[Any, float], Any]: 109 | import argparse 110 | parser = argparse.ArgumentParser(description=description) 111 | self.add_arguments(parser) 112 | self.parse_arguments(parser.parse_args()) 113 | return self.call(timed) 114 | 115 | def copy(self): 116 | options = self.make_copy() 117 | options.options = dict(self.options) 118 | options.values = dict(self.values) 119 | options.original_values = dict(self.original_values) 120 | return options 121 | 122 | def make_copy(self): 123 | return Options(self.callback) 124 | 125 | def export_to_dict(self): 126 | return dict(self.original_values) 127 | 128 | def import_from_dict(self, values_dict): 129 | self.set_values(True, **values_dict) 130 | 131 | 132 | class Results(Options): 133 | @staticmethod 134 | def make_converter(converter): 135 | def convert(result, duration): 136 | return converter(result) 137 | return convert 138 | 139 | def add_result(self, name, converter): 140 | self.add_option(name, converter=Results.make_converter(converter)) 141 | 142 | def add_duration(self, name="duration"): 143 | def convert(result, duration): 144 | return duration 145 | self.add_option(name, converter=convert) 146 | 147 | def export_to_dict(self): 148 | return dict(self.values) 149 | 150 | def import_from_dict(self, values_dict): 151 | self.set_values(False, **values_dict) 152 | 153 | 154 | class Experiment(object): 155 | def __init__(self, parameters: Options, results: Options, config: Optional[Options]=None, import_handler=None): 156 | self.parameters = parameters 157 | self.results = results 158 | self.config = config 159 | self.import_handler = import_handler 160 | self.derived = dict() 161 | self.imported_from_file = None 162 | 163 | def register_derived(self, name, callback): 164 | self.derived[name] = callback 165 | 166 | def import_from_command_line(self): 167 | import argparse 168 | parser = argparse.ArgumentParser() 169 | self.parameters.add_arguments(parser) 170 | if self.config: 171 | self.config.add_arguments(parser) 172 | args = parser.parse_args() 173 | self.parameters.parse_arguments(args) 174 | if self.config: 175 | self.config.parse_arguments(args) 176 | 177 | def execute_from_command_line(self): 178 | self.import_from_command_line() 179 | self.execute() 180 | 181 | def execute(self): 182 | result = self.parameters.call(timed=True) 183 | for o_name in self.results.options: 184 | self.results.set_value(o_name, result) 185 | 186 | def export_to_dict(self): 187 | return {"parameters": self.parameters.export_to_dict(), "results": self.results.export_to_dict(), 188 | "config": self.config.export_to_dict() if self.config else None} 189 | 190 | def save(self, filename): 191 | with open(filename, "w") as ref: 192 | json.dump(self.export_to_dict(), ref) 193 | 194 | def import_from_dict(self, values_dict): 195 | parameters_dict, results_dict, config_dict = (values_dict[k] for k in ["parameters", "results", "config"]) 196 | if self.import_handler is not None: 197 | self.import_handler(parameters_dict, results_dict, config_dict) 198 | self.parameters.import_from_dict(parameters_dict) 199 | self.results.import_from_dict(results_dict) 200 | if self.config and config_dict: 201 | self.config.import_from_dict(config_dict) 202 | 203 | def load(self, filename): 204 | with open(filename, "r") as ref: 205 | self.import_from_dict(json.load(ref)) 206 | self.imported_from_file = os.path.realpath(filename) 207 | return self 208 | 209 | 210 | 211 | -------------------------------------------------------------------------------- /incal/util/parallel.py: -------------------------------------------------------------------------------- 1 | import os 2 | import signal 3 | import subprocess 4 | from multiprocessing.pool import Pool 5 | from subprocess import TimeoutExpired 6 | 7 | 8 | def run_command(args): 9 | command, time_out = args 10 | with subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, preexec_fn=os.setsid) as process: 11 | try: 12 | process.communicate(timeout=time_out) 13 | except TimeoutExpired: 14 | os.killpg(process.pid, signal.SIGINT) # send signal to the process group 15 | process.communicate() 16 | 17 | 18 | def run_commands(commands, processes=None, time_out=None): 19 | pool = Pool(processes=processes) 20 | commands = [(command, time_out) for command in commands] 21 | pool.map(run_command, commands) 22 | -------------------------------------------------------------------------------- /incal/util/plot.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import platform 3 | 4 | import matplotlib as mpl 5 | 6 | if platform.system() == "Darwin": 7 | mpl.use('TkAgg') 8 | 9 | import matplotlib.markers as mark 10 | import matplotlib.pyplot as plt 11 | import matplotlib.cm as cm 12 | 13 | import numpy 14 | 15 | 16 | class ScatterData: 17 | # colors = ["black", "green", "red"] 18 | colors = [] 19 | markers = ["o", "v", "x"] 20 | 21 | def __init__(self, title, plot_options): 22 | self.title = title 23 | self.data = [] 24 | self.limits = None, None 25 | self.plot_options = plot_options 26 | 27 | def add_data(self, name, x_data, y_data, error=None): 28 | self.data.append((name, x_data, y_data, error)) 29 | return self 30 | 31 | @property 32 | def size(self): 33 | return len(self.data) 34 | 35 | def x_lim(self, limits): 36 | self.limits = limits, self.limits[1] 37 | 38 | def y_lim(self, limits): 39 | self.limits = self.limits[0], limits 40 | 41 | def gen_colors(self): 42 | if len(self.data) <= len(self.colors): 43 | return self.colors[:len(self.data)] 44 | iterator = iter(cm.rainbow(numpy.linspace(0, 1, len(self.data)))) 45 | return [next(iterator) for _ in range(len(self.data))] 46 | 47 | def gen_markers(self): 48 | if len(self.data) <= len(self.markers): 49 | return self.markers[:len(self.data)] 50 | iterator = itertools.cycle(mark.MarkerStyle.filled_markers) 51 | return [next(iterator) for _ in range(len(self.data))] 52 | 53 | def render(self, ax, lines=True, log_x=True, log_y=True, label_x=None, label_y=None, legend_pos=None, 54 | x_ticks=None, y_ticks=None): 55 | 56 | plots = [] 57 | colors = self.gen_colors() 58 | markers = self.gen_markers() 59 | 60 | if legend_pos is None: 61 | legend_pos = "lower right" 62 | 63 | plot_diagonal = False 64 | plot_extra = None 65 | plot_format = "scatter" 66 | show_error = True 67 | steps_x = None 68 | steps_y = None 69 | 70 | cache = None 71 | for plot_option in self.plot_options or (): 72 | if cache is None: 73 | if plot_option == "diagonal": 74 | plot_diagonal = True 75 | else: 76 | cache = plot_option 77 | else: 78 | if cache == "format": 79 | plot_format = plot_option 80 | elif cache == "error": 81 | show_error = (int(plot_option) == 1) 82 | elif cache == "legend_pos": 83 | legend_pos = plot_option 84 | elif cache == "lx": 85 | label_x = plot_option 86 | elif cache == "ly": 87 | label_y = plot_option 88 | elif cache == "steps_x": 89 | steps_x = int(plot_option) 90 | elif cache == "steps_y": 91 | steps_y = int(plot_option) 92 | elif cache == "plot_extra": 93 | plot_extra = plot_option 94 | elif cache == "x_lim": 95 | parts = plot_option.split(":") 96 | limits = (float(parts[0]), float(parts[1])) 97 | self.x_lim(limits) 98 | elif cache == "y_lim": 99 | parts = plot_option.split(":") 100 | limits = (float(parts[0]), float(parts[1])) 101 | self.y_lim(limits) 102 | cache = None 103 | 104 | min_x, max_x, min_y, max_y = numpy.infty, -numpy.infty, numpy.infty, -numpy.infty 105 | for i in range(self.size): 106 | name, x_data, y_data, error = self.data[i] 107 | try: 108 | min_x = min(min_x, numpy.min(x_data)) 109 | min_y = min(min_y, numpy.min(y_data)) 110 | max_x = max(max_x, numpy.max(x_data)) 111 | max_y = max(max_y, numpy.max(y_data)) 112 | except TypeError: 113 | pass 114 | 115 | if plot_format == "scatter": 116 | plots.append(ax.scatter(x_data, y_data, color=colors[i], marker=markers[i], s=40)) 117 | if lines: 118 | ax.plot(x_data, y_data, color=colors[i]) 119 | if show_error and error is not None: 120 | ax.fill_between(x_data, y_data - error, y_data + error, color=colors[i], alpha=0.35, 121 | linewidth=0) 122 | # ax.errorbar(x_data, y_data, error, linestyle='None', color=colors[i]) 123 | elif plot_format == "bar": 124 | plots.append(ax.bar(x_data, y_data, color=colors[i])) 125 | else: 126 | raise ValueError("Unknown plot format") 127 | 128 | if plot_diagonal: 129 | ax.plot(numpy.array([min_x, max_x]), numpy.array([min_y, max_y]), linestyle="--") 130 | if plot_extra and plot_extra == "1/x": 131 | ax.plot(x_data, 1 / x_data, linestyle="--") 132 | 133 | ax.grid(True) 134 | legend_names = list(t[0] for t in self.data) 135 | # legend_names = ["No mixing - DT", "No mixing - RF", "Mixing - DT", "Mixing - RF"] 136 | # legend_names = ["No formulas", "Formulas"] 137 | # legend_names = [] 138 | if 10 > len(self.data) == len(legend_names): 139 | ax.legend(plots, legend_names, loc=legend_pos) 140 | 141 | if log_x: 142 | ax.set_xscale('log') 143 | if log_y: 144 | ax.set_yscale('log') 145 | 146 | x_lim, y_lim = self.limits 147 | if x_lim is not None: 148 | ax.set_xlim(x_lim) 149 | if y_lim is not None: 150 | ax.set_ylim(y_lim) 151 | 152 | if label_y is not None: 153 | ax.set_ylabel(label_y) 154 | if label_x is not None: 155 | ax.set_xlabel(label_x) 156 | 157 | if steps_x is not None: 158 | x_ticks = numpy.linspace(min_x, max_x, steps_x) 159 | if steps_y is not None: 160 | y_ticks = numpy.linspace(min_y, max_y, steps_y) 161 | # x_ticks = [1, 2, 3] 162 | if x_ticks is not None: 163 | ax.xaxis.set_ticks(x_ticks) 164 | if y_ticks is not None: 165 | ax.yaxis.set_ticks(y_ticks) 166 | 167 | def plot(self, filename=None, size=None, **kwargs): 168 | fig = plt.figure() 169 | if size is not None: 170 | fig.set_size_inches(*size) 171 | self.render(fig.gca(), **kwargs) 172 | if filename is None: 173 | plt.show(block=True) 174 | else: 175 | plt.savefig(filename, format="png", bbox_inches="tight", pad_inches=0.08, dpi=600) 176 | 177 | 178 | def plot(file, *args, **kwargs): 179 | fig = plt.figure() 180 | fig.set_size_inches(12, 12) 181 | 182 | subplots = len(args) 183 | cols = int(numpy.ceil(numpy.sqrt(subplots))) 184 | rows = int(numpy.ceil(subplots / cols)) 185 | 186 | import matplotlib.gridspec as grid_spec 187 | gs = grid_spec.GridSpec(rows, cols) 188 | 189 | axes = [plt.subplot(gs[0, 0]), plt.subplot(gs[0, 1]), plt.subplot(gs[1, :])] 190 | legend_positions = ["lower right", "upper right", "lower left"] 191 | 192 | for i in range(subplots): 193 | legend_pos = legend_positions[i] 194 | args[i].render(axes[i], legend_pos=legend_pos, **kwargs) 195 | 196 | if file is None: 197 | plt.show() 198 | else: 199 | plt.savefig(file, format="pdf") 200 | -------------------------------------------------------------------------------- /incal/util/sampling.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | # Implementation modified from https://stackoverflow.com/a/2149533/253387 5 | 6 | class Node(object): 7 | # Each node in the heap has a weight, original weight, value, and total weight. 8 | # The total weight, self.tw, is self.w plus the weight of any children. 9 | __slots__ = ['w', 'ow', 'v', 'tw'] 10 | 11 | def __init__(self, w, ow, v, tw): 12 | self.w, self.ow, self.v, self.tw = w, ow, v, tw 13 | 14 | 15 | def rws_heap(items): 16 | # h is the heap. It's like a binary tree that lives in an array. 17 | # It has a Node for each pair in `items`. h[1] is the root. Each 18 | # other Node h[i] has a parent at h[i>>1]. Each node has up to 2 19 | # children, h[i<<1] and h[(i<<1)+1]. To get this nice simple 20 | # arithmetic, we have to leave h[0] vacant. 21 | h = [None] # leave h[0] vacant 22 | for v, w in items: 23 | h.append(Node(w, w, v, w)) 24 | for i in range(len(h) - 1, 1, -1): # total up the tws 25 | h[i>>1].tw += h[i].tw # add h[i]'s total to its parent 26 | return h 27 | 28 | 29 | def rws_heap_pop(h): 30 | gas = h[1].tw * random.random() # start with a random amount of gas 31 | 32 | i = 1 # start driving at the root 33 | while gas >= h[i].w: # while we have enough gas to get past node i: 34 | gas -= h[i].w # drive past node i 35 | i <<= 1 # move to first child 36 | if gas >= h[i].tw: # if we have enough gas: 37 | gas -= h[i].tw # drive past first child and descendants 38 | i += 1 # move to second child 39 | w = h[i].w # out of gas! h[i] is the selected node. 40 | v = h[i].v 41 | 42 | h[i].w = 0 # make sure this node isn't chosen again 43 | while i: # fix up total weights 44 | h[i].tw -= w 45 | i >>= 1 46 | return v 47 | 48 | 49 | def sample_weighted(items, n): # items are pairs (value, weight) 50 | heap = rws_heap(items) # just make a heap... 51 | for i in range(n): 52 | yield rws_heap_pop(heap) # and pop n items off it. 53 | -------------------------------------------------------------------------------- /incal/util/timeout.py: -------------------------------------------------------------------------------- 1 | def timeout(func, args=None, kwargs=None, duration=1, default=None): 2 | if args is None: 3 | args = () 4 | if kwargs is None: 5 | kwargs = dict() 6 | 7 | if duration is None: 8 | return func(*args, **kwargs) 9 | 10 | import signal 11 | 12 | class TimeoutError(Exception): 13 | pass 14 | 15 | def handler(signum, frame): 16 | raise TimeoutError() 17 | 18 | # set the timeout handler 19 | signal.signal(signal.SIGALRM, handler) 20 | signal.alarm(duration) 21 | 22 | try: 23 | result = func(*args, **kwargs) 24 | except TimeoutError: 25 | result = default 26 | finally: 27 | signal.alarm(0) 28 | 29 | return result 30 | -------------------------------------------------------------------------------- /incal/violations/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/violations/__init__.py -------------------------------------------------------------------------------- /incal/violations/core.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | 5 | from pywmi.smt_check import evaluate 6 | from pywmi.smt_print import pretty_print, pretty_print_instance 7 | from typing import Tuple, List 8 | 9 | 10 | class SelectionStrategy(object): 11 | def select_active(self, domain, data, labels, formula, active_indices) -> Tuple[np.ndarray, np.ndarray, List[int]]: 12 | raise NotImplementedError() 13 | 14 | 15 | class AllViolationsStrategy(SelectionStrategy): 16 | def select_active(self, domain, data, labels, formula, active_indices) -> Tuple[np.ndarray, np.ndarray, List[int]]: 17 | active_set = set(active_indices) 18 | learned_labels = evaluate(domain, formula, data) 19 | differences = np.logical_xor(labels, learned_labels) 20 | difference_set = set(np.where(differences)[0]) 21 | # print(active_set) 22 | # print(difference_set) 23 | # print(pretty_print(formula)) 24 | # for i in active_set & difference_set: 25 | # print(i) 26 | # print(pretty_print_instance(domain, data[i])) 27 | # print(labels[i], learned_labels[i]) 28 | # print() 29 | # assert len(active_set & difference_set) == 0 30 | return data, labels, sorted(difference_set - active_set) 31 | 32 | 33 | class RandomViolationsStrategy(AllViolationsStrategy): 34 | def __init__(self, sample_size): 35 | self.sample_size = sample_size 36 | self.last_violations = None 37 | 38 | def select_active(self, domain, data, labels, formula, active_indices) -> Tuple[np.ndarray, np.ndarray, List[int]]: 39 | data, labels, all_violations = AllViolationsStrategy.select_active(self, domain, data, labels, formula, active_indices) 40 | self.last_violations = list(all_violations) 41 | sample_size = min(self.sample_size, len(self.last_violations)) 42 | return data, labels, random.sample(self.last_violations, sample_size) 43 | 44 | 45 | class WeightedRandomViolationsStrategy(AllViolationsStrategy): 46 | def __init__(self, sample_size, weights): 47 | self.sample_size = sample_size 48 | self.last_violations = None 49 | self.weights = weights 50 | 51 | def select_active(self, domain, data, labels, formula, active_indices) -> Tuple[np.ndarray, np.ndarray, List[int]]: 52 | data, labels, all_violations = AllViolationsStrategy.select_active(self, domain, data, labels, formula, active_indices) 53 | self.last_violations = list(all_violations) 54 | sample_size = min(self.sample_size, len(self.last_violations)) 55 | import sampling 56 | return data, labels, sampling.sample_weighted(zip(self.last_violations, [self.weights[i] for i in self.last_violations]), sample_size) 57 | 58 | 59 | class MaxViolationsStrategy(AllViolationsStrategy): 60 | def __init__(self, sample_size, weights): 61 | self.sample_size = sample_size 62 | self.last_violations = None 63 | self.weights = weights 64 | 65 | def select_active(self, domain, data, labels, formula, active_indices) -> Tuple[np.ndarray, np.ndarray, List[int]]: 66 | data, labels, all_violations = AllViolationsStrategy.select_active(self, domain, data, labels, formula, active_indices) 67 | all_violations = list(all_violations) 68 | self.last_violations = all_violations 69 | sample_size = min(self.sample_size, len(all_violations)) 70 | weighted_violations = zip(all_violations, [self.weights[i] for i in all_violations]) 71 | weighted_violations = sorted(weighted_violations, key=lambda t: t[1]) 72 | # noinspection PyTypeChecker 73 | return data, labels, [t[0] for t in weighted_violations[0:sample_size]] 74 | -------------------------------------------------------------------------------- /incal/violations/dt_selection.py: -------------------------------------------------------------------------------- 1 | # Given a number of points: 2 | # - Train a DT (scale points?) 3 | # - For every point compute distance to the decision boundary 4 | import sklearn.tree as tree 5 | 6 | import pysmt.shortcuts as smt 7 | 8 | from .core import MaxViolationsStrategy 9 | 10 | 11 | class DecisionTreeSelection(MaxViolationsStrategy): 12 | def __init__(self): 13 | super().__init__(1, None) 14 | 15 | def select_active(self, domain, data, labels, formula, active_indices): 16 | if self.weights is None: 17 | self.weights = [min(d.values()) for d in get_distances(domain, data, labels)] 18 | return super().select_active(domain, data, labels, formula, active_indices) 19 | 20 | 21 | def convert(domain, data, labels): 22 | # def _convert(var, val): 23 | # if domain.var_types[var] == smt.BOOL: 24 | # return 1 if val else 0 25 | # elif domain.var_types[var] == smt.REAL: 26 | # return float(val) 27 | 28 | # feature_matrix = [] 29 | # labels = [] 30 | # for instance, label in data: 31 | # feature_matrix.append([_convert(v, instance[v]) for v in domain.variables]) 32 | # labels.append(1 if label else 0) 33 | return data, labels 34 | 35 | 36 | def learn_dt(feature_matrix, labels, **kwargs): 37 | # noinspection PyArgumentList 38 | estimator = tree.DecisionTreeClassifier(**kwargs) 39 | estimator.fit(feature_matrix, labels) 40 | return estimator 41 | 42 | 43 | def export_dt(dt): 44 | import graphviz 45 | dot_data = tree.export_graphviz(dt, out_file=None) 46 | graph = graphviz.Source(dot_data) 47 | graph.render("DT") 48 | 49 | 50 | def get_distances_dt(dt, domain, feature_matrix): 51 | # Include more features than trained with? 52 | 53 | leave_id = dt.apply(feature_matrix) 54 | feature = dt.tree_.feature 55 | threshold = dt.tree_.threshold 56 | node_indicator = dt.decision_path(feature_matrix) 57 | 58 | distances = [] 59 | 60 | for sample_id in range(len(feature_matrix)): 61 | distance = dict() 62 | node_index = node_indicator.indices[node_indicator.indptr[sample_id]: node_indicator.indptr[sample_id + 1]] 63 | for node_id in node_index: 64 | variable = domain.variables[feature[node_id]] 65 | if leave_id[sample_id] != node_id and domain.var_types[variable] == smt.REAL: 66 | new_distance = abs(feature_matrix[sample_id][feature[node_id]] - threshold[node_id]) 67 | if variable not in distance or new_distance < distance[variable]: 68 | distance[variable] = new_distance 69 | distances.append(distance) 70 | 71 | return distances 72 | 73 | 74 | def get_distances(domain, data, labels): 75 | # feature_matrix, labels = convert(domain, data, labels) 76 | dt = learn_dt(data, labels) 77 | return get_distances_dt(dt, domain, data) 78 | 79 | 80 | if __name__ == "__main__": 81 | pass 82 | 83 | -------------------------------------------------------------------------------- /incal/violations/virtual_data.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import scipy 5 | from pysmt.exceptions import InternalSolverError 6 | from pysmt.environment import Environment 7 | from pysmt.shortcuts import TRUE 8 | from pysmt.typing import REAL, BOOL 9 | from pywmi import Domain, evaluate 10 | from pywmi.sample import uniform 11 | from typing import Dict, Any, Tuple, TYPE_CHECKING 12 | 13 | from .core import SelectionStrategy 14 | 15 | if TYPE_CHECKING: 16 | from pysmt.fnode import FNode 17 | 18 | 19 | class OneClassStrategy(SelectionStrategy): 20 | def __init__(self, regular_strategy, thresholds, tight_mode=True, class_label=True, background_knowledge=None): 21 | self.regular_strategy = regular_strategy # type: SelectionStrategy 22 | self.thresholds = thresholds 23 | self.tight_mode = tight_mode 24 | assert class_label, "Currently only the positive setting is supported" 25 | self.class_label = class_label 26 | self.environment = Environment() 27 | if background_knowledge is None: 28 | self.background_knowledge = self.environment.formula_manager.TRUE() 29 | else: 30 | self.background_knowledge = self.environment.formula_manager.normalize(background_knowledge) 31 | 32 | def find_violating(self, domain, data, labels, formula): 33 | fm = self.environment.formula_manager 34 | formula = fm.normalize(formula) 35 | real_symbols = {name: fm.Symbol(name, REAL) for name in domain.real_vars} 36 | bool_symbols = {name: fm.Symbol(name, BOOL) for name in domain.bool_vars} 37 | symbols = real_symbols.copy() 38 | symbols.update(bool_symbols) 39 | bounds = domain.get_bounds(fm) 40 | try: 41 | with self.environment.factory.Solver() as solver: 42 | solver.add_assertion(formula) 43 | solver.add_assertion(bounds) 44 | solver.add_assertion(self.background_knowledge) 45 | # equalities = [] 46 | # for row, label in data: 47 | # for real_var in domain.real_vars: 48 | # sym = real_symbols[real_var] 49 | # val = fm.Real(row[real_var]) 50 | # t = fm.Real(self.thresholds[real_var]) 51 | # equalities.append(fm.Ite(sym >= val, fm.Equals(sym - val, t), fm.Equals(val - sym, t))) 52 | # solver.add_assertion(fm.Or(*equalities)) 53 | for i in range(len(labels)): 54 | row = {v: data[i, j].item() for j, v in enumerate(domain.variables)} 55 | label = labels[i] == 1 56 | if label == self.class_label: 57 | constraint = fm.Implies(fm.And( 58 | *[fm.Iff(bool_symbols[bool_var], fm.Bool(row[bool_var] == 1)) for bool_var in domain.bool_vars]), 59 | fm.Or(*[fm.Ite(real_symbols[real_var] >= fm.Real(row[real_var]), 60 | real_symbols[real_var] - fm.Real(row[real_var]) >= fm.Real( 61 | self.thresholds[real_var]), 62 | fm.Real(row[real_var]) - real_symbols[real_var] >= fm.Real( 63 | self.thresholds[real_var])) for real_var in 64 | domain.real_vars])) 65 | elif label == (not self.class_label): 66 | constraint = fm.Implies(fm.And( 67 | *[fm.Iff(bool_symbols[bool_var], fm.Bool(row[bool_var] == 1)) for bool_var in domain.bool_vars]), 68 | fm.Or(*[fm.Ite(real_symbols[real_var] >= fm.Real(row[real_var]), 69 | real_symbols[real_var] - fm.Real(row[real_var]) >= fm.Real( 70 | self.thresholds[real_var]), 71 | fm.Real(row[real_var]) - real_symbols[real_var] >= fm.Real( 72 | self.thresholds[real_var])) for real_var in 73 | domain.real_vars])) 74 | else: 75 | raise ValueError("Unknown label l_{} = {}".format(i, label)) 76 | solver.add_assertion(constraint) 77 | solver.solve() 78 | model = solver.get_model() 79 | example = [float(model.get_value(symbols[var]).constant_value()) for var in domain.variables] 80 | except InternalSolverError: 81 | return None 82 | except Exception as e: 83 | if "Z3Exception" in str(type(e)): 84 | return None 85 | else: 86 | raise e 87 | 88 | return example 89 | 90 | def select_active(self, domain, data, labels, formula, active_indices): 91 | data, labels, selected = self.regular_strategy.select_active(domain, data, labels, formula, active_indices) 92 | if len(selected) > 0: 93 | return data, labels, selected 94 | else: 95 | example = self.find_violating(domain, data, labels, formula) 96 | if example is None: 97 | return data, labels, [] 98 | data = np.vstack([data, example]) 99 | labels = np.append(labels, np.array([0 if self.class_label else 1])) 100 | return data, labels, [len(labels) - 1] 101 | 102 | @staticmethod 103 | def add_negatives(domain, data, labels, thresholds, sample_count, background_knowledge=None, distance_measure=None): 104 | # type: (Domain, np.ndarray, np.ndarray, Dict, int, FNode, Any) -> Tuple[np.ndarray, np.ndarray] 105 | 106 | new_data = uniform(domain, sample_count) 107 | background_knowledge = background_knowledge or TRUE() 108 | supported_indices = evaluate(domain, background_knowledge, new_data) 109 | boolean_indices = [i for i, v in enumerate(domain.variables) if domain.is_bool(v)] 110 | real_indices = [i for i, v in enumerate(domain.variables) if domain.is_real(v)] 111 | for j in range(new_data.shape[0]): 112 | valid_negative = True 113 | for i in range(data.shape[0]): 114 | # noinspection PyTypeChecker 115 | if labels[i] and all(data[i, boolean_indices] == new_data[j, boolean_indices]): 116 | in_range = True 117 | for ri, v in zip(real_indices, domain.real_vars): 118 | t = thresholds[v] if isinstance(thresholds, dict) else thresholds[i, ri] 119 | if abs(data[i, ri] - new_data[j, ri]) > t: 120 | in_range = False 121 | break 122 | valid_negative = valid_negative and (not in_range) 123 | if not valid_negative: 124 | break 125 | supported_indices[j] = supported_indices[j] and valid_negative 126 | new_data = new_data[supported_indices == 1, :] 127 | return np.concatenate([data, new_data], axis=0), np.concatenate([labels, np.zeros(new_data.shape[0])]) 128 | 129 | 130 | """ 131 | There is a point e, such that for every example e': d(e, e') > t 132 | AND(OR(d(e, e', r) > t, forall r), forall e) 133 | """ 134 | -------------------------------------------------------------------------------- /notebooks/baldur.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import time 4 | 5 | from pywmi import Domain, RejectionEngine, evaluate, plot 6 | 7 | import numpy as np 8 | 9 | from incal import Formula 10 | from incal.violations.virtual_data import OneClassStrategy 11 | from incal.k_cnf_smt_learner import KCnfSmtLearner 12 | from incal.learn import LearnOptions 13 | from incal.observe.plotting import PlottingObserver 14 | from incal.parameter_free_learner import learn_bottom_up 15 | from incal.violations.dt_selection import DecisionTreeSelection 16 | from incal.violations.core import RandomViolationsStrategy 17 | 18 | 19 | def experiment(): 20 | random.seed(888) 21 | np.random.seed(888) 22 | 23 | start = time.time() 24 | 25 | domain = Domain.make([], ["x", "y"], [(0, 1), (0, 1)]) 26 | x, y = domain.get_symbols(domain.variables) 27 | thresholds = {"x": 0.1, "y": 0.1} 28 | # data, _ = RejectionEngine(domain, formula, x * x, 100000).get_samples(50) 29 | filename = "/Users/samuelkolb/Downloads/bg512/AR0206SR.map.scen" 30 | data = np.loadtxt(filename, delimiter="\t", skiprows=1, usecols=[4, 5]) / 512 31 | k = 12 32 | nearest_neighbors = [] 33 | 34 | for i in range(len(data)): 35 | nearest_neighbors.append([]) 36 | for j in range(len(data)): 37 | if i != j: 38 | distance = 1 if any(data[i, b] != data[j, b] for b, v in enumerate(domain.variables) 39 | if domain.is_bool(v))\ 40 | else max(abs(data[i, r] - data[j, r]) / (domain.var_domains[v][1] - domain.var_domains[v][0]) for r, v in enumerate(domain.variables) if domain.is_real(v)) 41 | if len(nearest_neighbors[i]) < k: 42 | nearest_neighbors[i].append((j, distance)) 43 | else: 44 | index_of_furthest = None 45 | for fi, f in enumerate(nearest_neighbors[i]): 46 | if index_of_furthest is None or f[1] > nearest_neighbors[i][index_of_furthest][1]: 47 | index_of_furthest = fi 48 | if distance < nearest_neighbors[i][index_of_furthest][1]: 49 | nearest_neighbors[i][index_of_furthest] = (j, distance) 50 | print(nearest_neighbors) 51 | t = [[sum(n[1] for n in nearest_neighbors[i]) / len(nearest_neighbors[i]) * (domain.var_domains[v][1] - domain.var_domains[v][0]) for v in domain.real_vars] 52 | for i in range(len(nearest_neighbors))] 53 | t = np.array(t) * 4 54 | print(t) 55 | # data = uniform(domain, 400) 56 | labels = np.ones(len(data)) 57 | data, labels = OneClassStrategy.add_negatives(domain, data, labels, t, 1000) 58 | 59 | directory = "output{}baldur{}{}".format(os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss")) 60 | os.makedirs(directory) 61 | 62 | name = os.path.join(directory, "combined.png") 63 | plot.plot_combined("x", "y", domain, None, (data, labels), None, name, set(), set()) 64 | 65 | def learn_inc(_data, _labels, _i, _k, _h): 66 | # strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds) 67 | strategy = RandomViolationsStrategy(10) 68 | learner = KCnfSmtLearner(_k, _h, strategy, "mvn") 69 | initial_indices = LearnOptions.initial_random(20)(list(range(len(_data)))) 70 | learner.add_observer(PlottingObserver(domain, directory, "run_{}_{}_{}".format(_i, _k, _h), 71 | domain.real_vars[0], domain.real_vars[1], None, False)) 72 | return learner.learn(domain, _data, _labels, initial_indices) 73 | 74 | (new_data, new_labels, learned_formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 3, 6, None, None) 75 | duration = time.time() - start 76 | Formula(domain, learned_formula).to_file(os.path.join(directory, "result_{}_{}_{}.json".format(k, h, int(duration)))) 77 | 78 | 79 | if __name__ == '__main__': 80 | experiment() -------------------------------------------------------------------------------- /notebooks/experiments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ $# -ne 1 ]; then 3 | echo "$0 " 1>&2 4 | exit 5 | fi 6 | if ! [ -d "$1" ]; then 7 | echo "Not a directory: $1" 1>&2 8 | exit 9 | fi 10 | 11 | if [ -z "$(which cilly)" ]; then 12 | echo "cilly compiler not found!" 1>&2 13 | exit 14 | fi 15 | 16 | codeflaws_dir=$1 17 | repair_dir=$codeflaws_dir/../ 18 | rm $codeflaws_dir/*/autogen* &> /dev/null 19 | rm $codeflaws_dir/*/incal* &> /dev/null 20 | 21 | if ! [ -e $repair_dir/learn2fix ]; then 22 | echo $repair_dir/learn2fix does not exist. 23 | exit 24 | fi 25 | cp $repair_dir/learn2fix/repairs/genprog/* $repair_dir/ 26 | if [ -e $repair_dir/genprog-run ]; then 27 | echo "[INFO] Saving $repair_dir/genprog-run.." 1>&2 28 | rm -rf $repair_dir/genprog-run.old 2> /dev/null 29 | mv $repair_dir/genprog-run $repair_dir/genprog-run.old 30 | fi 31 | mkdir $repair_dir/genprog-run 32 | 33 | #TODO Where is genprog_allfixes created? 34 | if [ -e $repair_dir/genprog-allfixes ]; then 35 | echo "[INFO] Saving $repair_dir/genprog-allfixes.." 1>&2 36 | rm -rf $repair_dir/genprog-allfixes.old 2> /dev/null 37 | mv $repair_dir/genprog-allfixes $repair_dir/genprog-allfixes.old 38 | fi 39 | mkdir $repair_dir/genprog-allfixes 40 | 41 | 42 | for s in $(ls -1d $codeflaws_dir/*/); do 43 | found=false; 44 | for f in $(ls -1 $s/*input*); do if [ $(wc -l $f | cut -d" " -f1) -gt 1 ]; then found=true; continue; fi; done; 45 | if [ "$found" = false ]; then 46 | if [ $(cat $s/input-neg1 | grep -x -E '[[:blank:]]*([[:digit:]]+[[:blank:]]*)*' | wc -l) -eq 1 ]; then 47 | #echo $s 48 | subject=$(echo $s | rev | cut -d/ -f2 | rev) 49 | buggy=$(echo $subject | cut -d- -f1,2,4) 50 | golden=$(echo $subject | cut -d- -f1,2,5) 51 | if [ 0 -eq $(grep "$subject" $codeflaws_dir/codeflaws-defect-detail-info.txt | grep "WRONG_ANSWER" | wc -l) ]; then 52 | echo "[INFO] Skipping non-semantic bug $subject" 1>&2 53 | continue 54 | fi 55 | if ! [ -f "$s/$buggy" ]; then 56 | gcc -fno-optimize-sibling-calls -fno-strict-aliasing -fno-asm -std=c99 -c $s/$buggy.c -o $s/$buggy.o &> /dev/null 57 | gcc $s/$buggy.o -o $s/$buggy -lm -s -O2 &> /dev/null 58 | fi 59 | if ! [ -f "$s/$golden" ]; then 60 | gcc -fno-optimize-sibling-calls -fno-strict-aliasing -fno-asm -std=c99 -c $s/$golden.c -o $s/$golden.o &> /dev/null 61 | gcc $s/$golden.o -o $s/$golden -lm -s -O2 &> /dev/null 62 | fi 63 | cp $repair_dir/learn2fix/repairs/genprog/test-genprog-incal.py $s/ 64 | 65 | for i in $(seq 1 $(nproc --all)); do 66 | ( 67 | autotest=$(timeout 11m ./Learn2Fix.py -t 10 -s $s -i $i) 68 | if [ $? -eq 0 ]; then 69 | manual=$($repair_dir/run-version-genprog.sh $subject $i manual 10m) 70 | autogen=$($repair_dir/run-version-genprog.sh $subject $i autogen 10m) 71 | echo $autotest | tr -d '\n' 72 | echo ,$manual | tr -d '\n' 73 | echo ,$autogen 74 | fi 75 | ) >> results_it_$i.csv & 76 | done 77 | wait 78 | else 79 | echo "[INFO] Skipping non-numeric input subject: $s" 1>&2 80 | fi 81 | else 82 | echo "[INFO] Skipping multi-line input subject: $s" 1>&2 83 | fi 84 | done 85 | 86 | -------------------------------------------------------------------------------- /notebooks/gps.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import time 4 | 5 | from pywmi import Domain, RejectionEngine, evaluate, plot 6 | 7 | import numpy as np 8 | 9 | from incal import Formula 10 | from incal.violations.virtual_data import OneClassStrategy 11 | from incal.k_cnf_smt_learner import KCnfSmtLearner 12 | from incal.learn import LearnOptions 13 | from incal.observe.plotting import PlottingObserver 14 | from incal.parameter_free_learner import learn_bottom_up 15 | from incal.violations.dt_selection import DecisionTreeSelection 16 | from incal.violations.core import RandomViolationsStrategy 17 | 18 | 19 | def experiment(): 20 | random.seed(888) 21 | np.random.seed(888) 22 | 23 | start = time.time() 24 | 25 | domain = Domain.make([], ["x", "y"], [(0, 1), (0, 1)]) 26 | x, y = domain.get_symbols(domain.variables) 27 | thresholds = {"x": 0.1, "y": 0.1} 28 | # data, _ = RejectionEngine(domain, formula, x * x, 100000).get_samples(50) 29 | filename = "/Users/samuelkolb/Downloads/gps-trajectory-simpler/cell-0-2.data" 30 | data = np.loadtxt(filename, delimiter=",", skiprows=0, usecols=[0, 1]) 31 | minimum_0, minimum_1 = min(data[:, 0]), min(data[:, 1]) 32 | maximum_0, maximum_1 = max(data[:, 0]), max(data[:, 1]) 33 | data[:, 0] = (data[:, 0] - minimum_0) / (maximum_0 - minimum_0) 34 | data[:, 1] = (data[:, 1] - minimum_1) / (maximum_1 - minimum_1) 35 | k = 3 36 | nearest_neighbors = [] 37 | 38 | for i in range(len(data)): 39 | nearest_neighbors.append([]) 40 | for j in range(len(data)): 41 | if i != j: 42 | distance = 1 if any(data[i, b] != data[j, b] for b, v in enumerate(domain.variables) 43 | if domain.is_bool(v))\ 44 | else max(abs(data[i, r] - data[j, r]) / (domain.var_domains[v][1] - domain.var_domains[v][0]) for r, v in enumerate(domain.variables) if domain.is_real(v)) 45 | if len(nearest_neighbors[i]) < k: 46 | nearest_neighbors[i].append((j, distance)) 47 | else: 48 | index_of_furthest = None 49 | for fi, f in enumerate(nearest_neighbors[i]): 50 | if index_of_furthest is None or f[1] > nearest_neighbors[i][index_of_furthest][1]: 51 | index_of_furthest = fi 52 | if distance < nearest_neighbors[i][index_of_furthest][1]: 53 | nearest_neighbors[i][index_of_furthest] = (j, distance) 54 | print(nearest_neighbors) 55 | t = [[sum(n[1] for n in nearest_neighbors[i]) / len(nearest_neighbors[i]) * (domain.var_domains[v][1] - domain.var_domains[v][0]) for v in domain.real_vars] 56 | for i in range(len(nearest_neighbors))] 57 | t = np.array(t) * 2 58 | print(t) 59 | # data = uniform(domain, 400) 60 | labels = np.ones(len(data)) 61 | data, labels = OneClassStrategy.add_negatives(domain, data, labels, t, 1000) 62 | 63 | directory = "output{}gps{}{}".format(os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss")) 64 | os.makedirs(directory) 65 | 66 | name = os.path.join(directory, "combined.png") 67 | plot.plot_combined("x", "y", domain, None, (data, labels), None, name, set(), set()) 68 | 69 | def learn_inc(_data, _labels, _i, _k, _h): 70 | # strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds) 71 | strategy = RandomViolationsStrategy(10) 72 | learner = KCnfSmtLearner(_k, _h, strategy, "mvn") 73 | initial_indices = LearnOptions.initial_random(20)(list(range(len(_data)))) 74 | learner.add_observer(PlottingObserver(domain, directory, "run_{}_{}_{}".format(_i, _k, _h), 75 | domain.real_vars[0], domain.real_vars[1], None, False)) 76 | return learner.learn(domain, _data, _labels, initial_indices) 77 | 78 | (new_data, new_labels, learned_formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 4, 8, None, None) 79 | duration = time.time() - start 80 | Formula(domain, learned_formula).to_file(os.path.join(directory, "result_{}_{}_{}.json".format(k, h, int(duration)))) 81 | 82 | 83 | if __name__ == '__main__': 84 | experiment() -------------------------------------------------------------------------------- /notebooks/playground.py: -------------------------------------------------------------------------------- 1 | from inspect import signature 2 | 3 | import numpy as np 4 | 5 | from smtlearn.examples import ice_cream_problem 6 | from pywmi.plot import plot_data, plot_formula 7 | from pywmi.sample import uniform 8 | from pywmi.smt_check import evaluate 9 | import random 10 | from smtlearn.violations.core import RandomViolationsStrategy 11 | from smtlearn.k_cnf_smt_learner import KCnfSmtLearner 12 | from pywmi.smt_print import pretty_print 13 | 14 | random.seed(666) 15 | np.random.seed(666) 16 | 17 | domain, formula, name = ice_cream_problem() 18 | # plot_formula(None, domain, formula) 19 | 20 | data = uniform(domain, 100) 21 | labels = evaluate(domain, formula, data) 22 | 23 | learner = KCnfSmtLearner(3, 3, RandomViolationsStrategy(10)) 24 | initial_indices = random.sample(range(data.shape[0]), 20) 25 | 26 | learned_theory = learner.learn(domain, data, labels, initial_indices) 27 | print(pretty_print(learned_theory)) 28 | -------------------------------------------------------------------------------- /notebooks/results.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | if [ $# -ne 1 ]; then 3 | echo "$0 " 4 | exit 5 | fi 6 | results=$1 7 | 8 | if ! [ -e $results ]; then 9 | echo $results does not exist. 10 | exit 11 | fi 12 | 13 | if [ 0 -eq $(cat $results | wc -l) ]; then 14 | echo $results is empty. 15 | exit 16 | fi 17 | 18 | n_subjects=$(cat $results | cut -d, -f1 | sort | uniq | wc -l) 19 | avg_labeling=$(echo "scale=2; $(cat $results | cut -d, -f4 | awk '{s+=$1} END {print s}') / $(cat $results | wc -l)" | bc) 20 | no_failing=$(cat $results | cut -d, -f-5 | grep ,0$ | cut -d, -f1 | sort | uniq | wc -l) 21 | no_labled_fail=$(cat $results | cut -d, -f-6 | grep ,0$ | cut -d, -f1 | sort | uniq | wc -l) 22 | 23 | labeling_effort=$(echo "scale=2; $(cat $results | cut -d, -f4 | awk '{s+=$1} END {print s}') / $(cat $results | cut -d, -f3 | awk '{s+=$1} END {print s}') * 100" | bc) 24 | 25 | labeled_fail=$(echo "scale=2; $(cat $results | cut -d, -f6 | awk '{s+=$1} END {print s}') / $(cat $results | cut -d, -f4 | awk '{s+=$1} END {print s}') * 100" | bc) 26 | failure_rate=$(echo "scale=2; $(cat $results | cut -d, -f5 | awk '{s+=$1} END {print s}') / $(cat $results | cut -d, -f3 | awk '{s+=$1} END {print s}') * 100" | bc) 27 | improvement=$(echo "scale=2; $labeled_fail / $failure_rate" | bc) 28 | 29 | 30 | accuracy=$(echo "scale=2; $(cat $results | cut -d, -f8 | awk '{s+=$1} END {print s}') / $(cat $results | cut -d, -f7 | awk '{s+=$1} END {print s}') * 100" | bc) 31 | accuracy_fail=$(echo "scale=2; $(cat $results | cut -d, -f10 | awk '{s+=$1} END {print s}') / $(cat $results | cut -d, -f9 | awk '{s+=$1} END {print s}') * 100" | bc) 32 | 33 | 34 | 35 | 36 | echo "n_subjects $n_subjects" 37 | echo "avg_labeling $avg_labeling" 38 | echo "no_failing $no_failing #subjects where none of the generated test cases are failing." 39 | echo "no_labled_fail $no_labled_fail #subjects where none of the labeled generated test cases are failing." 40 | 41 | echo "labeling_effort $labeling_effort" 42 | echo "labeled_fail $labeled_fail" 43 | echo "failure_rate $failure_rate" 44 | echo "improvement $improvement" 45 | echo "accuracy $accuracy" 46 | echo "accuracy_fail $accuracy_fail" 47 | -------------------------------------------------------------------------------- /notebooks/synthetic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import time 4 | 5 | from pywmi import Domain, RejectionEngine, evaluate, plot 6 | 7 | import numpy as np 8 | 9 | from incal import Formula 10 | from incal.violations.virtual_data import OneClassStrategy 11 | from incal.k_cnf_smt_learner import KCnfSmtLearner 12 | from incal.learn import LearnOptions 13 | from incal.observe.plotting import PlottingObserver 14 | from incal.parameter_free_learner import learn_bottom_up 15 | from incal.violations.dt_selection import DecisionTreeSelection 16 | from incal.violations.core import RandomViolationsStrategy 17 | 18 | 19 | def experiment(): 20 | random.seed(888) 21 | np.random.seed(888) 22 | 23 | start = time.time() 24 | 25 | domain = Domain.make(["b0", "b1", "b2"], ["x0", "x1"], [(0, 1), (0, 1)]) 26 | # thresholds = {"x": 0.1, "y": 0.1} 27 | # data, _ = RejectionEngine(domain, formula, x * x, 100000).get_samples(50) 28 | filename = "/Users/samuelkolb/Downloads/input-ijcai-rh/ijcai-rh_2_3_2_100_50_4_3.problem_0.train_dataset.data" 29 | data = np.loadtxt(filename, delimiter=",", skiprows=0) 30 | k = 4 31 | nearest_neighbors = [] 32 | 33 | for i in range(len(data)): 34 | nearest_neighbors.append([]) 35 | for j in range(len(data)): 36 | if i != j: 37 | distance = 1 if any(data[i, b] != data[j, b] for b, v in enumerate(domain.variables) 38 | if domain.is_bool(v))\ 39 | else max(abs(data[i, r] - data[j, r]) / (domain.var_domains[v][1] - domain.var_domains[v][0]) for r, v in enumerate(domain.variables) if domain.is_real(v)) 40 | if len(nearest_neighbors[i]) < k: 41 | nearest_neighbors[i].append((j, distance)) 42 | else: 43 | index_of_furthest = None 44 | for fi, f in enumerate(nearest_neighbors[i]): 45 | if index_of_furthest is None or f[1] > nearest_neighbors[i][index_of_furthest][1]: 46 | index_of_furthest = fi 47 | if distance < nearest_neighbors[i][index_of_furthest][1]: 48 | nearest_neighbors[i][index_of_furthest] = (j, distance) 49 | print(nearest_neighbors) 50 | t = [[sum(n[1] for n in nearest_neighbors[i]) / len(nearest_neighbors[i]) * 51 | (domain.var_domains[v][1] - domain.var_domains[v][0]) if domain.is_real(v) else 0 for v in domain.variables] 52 | for i in range(len(nearest_neighbors))] 53 | t = np.array(t) * 1.5 54 | print(t) 55 | # data = uniform(domain, 400) 56 | labels = np.ones(len(data)) 57 | data, labels = OneClassStrategy.add_negatives(domain, data, labels, t, 1000) 58 | 59 | directory = "output{}lariat-synthetic{}{}".format(os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss")) 60 | os.makedirs(directory) 61 | 62 | name = os.path.join(directory, "combined.png") 63 | plot.plot_combined("x0", "x1", domain, None, (data, labels), None, name, set(), set()) 64 | 65 | def learn_inc(_data, _labels, _i, _k, _h): 66 | # strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds) 67 | strategy = DecisionTreeSelection() 68 | learner = KCnfSmtLearner(_k, _h, strategy, "mvn") 69 | initial_indices = LearnOptions.initial_random(20)(list(range(len(_data)))) 70 | learner.add_observer(PlottingObserver(domain, directory, "run_{}_{}_{}".format(_i, _k, _h), 71 | domain.real_vars[0], domain.real_vars[1], None, False)) 72 | return learner.learn(domain, _data, _labels, initial_indices) 73 | 74 | (new_data, new_labels, learned_formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 2, 4, None, None) 75 | duration = time.time() - start 76 | Formula(domain, learned_formula).to_file(os.path.join(directory, "result_{}_{}_{}.json".format(k, h, int(duration)))) 77 | 78 | 79 | if __name__ == '__main__': 80 | experiment() -------------------------------------------------------------------------------- /plotting_commands.txt: -------------------------------------------------------------------------------- 1 | # Effect of symmetries 2 | incal-experiments smt-lib-benchmark analyze --dirs remote_res/smt_lib_benchmark/qf_lra_results_plain/ remote_res/smt_lib_benchmark/qf_lra_results_m/ remote_res/smt_lib_benchmark/qf_lra_results_n/ remote_res/smt_lib_benchmark/qf_lra_results_v/ remote_res/smt_lib_benchmark/qf_lra_results_mvn/ remote_res/smt_lib_benchmark/qf_lra_results_h --res_path remote_res show -p -t duration -g k learner -o ly "Duration (s)" lx "Number of learned clauses (k)" steps_x 3 error 0 -w ../incal_mlj/figures/symmetries_benchmark.png 3 | 4 | # Counts 5 | incal-experiments smt-lib-benchmark analyze --dirs remote_res/smt_lib_benchmark/qf_lra_results_plain/ remote_res/smt_lib_benchmark/qf_lra_results_m/ remote_res/smt_lib_benchmark/qf_lra_results_n/ remote_res/smt_lib_benchmark/qf_lra_results_v/ remote_res/smt_lib_benchmark/qf_lra_results_mvn/ remote_res/smt_lib_benchmark/qf_lra_results_h --res_path remote_res show -p -a count -t id -g k learner -o ly "Number of runs" lx "Number of learned clauses (k)" steps_x 3 error 0 6 | 7 | # Learned halfspaces 8 | incal-experiments smt-lib-benchmark analyze --dirs remote_res/smt_lib_benchmark/qf_lra_results_plain/ remote_res/smt_lib_benchmark/qf_lra_results_m/ remote_res/smt_lib_benchmark/qf_lra_results_n/ remote_res/smt_lib_benchmark/qf_lra_results_v/ remote_res/smt_lib_benchmark/qf_lra_results_mvn/ remote_res/smt_lib_benchmark/qf_lra_results_h --res_path remote_res show -p -t h -g k learner -o error 1 9 | 10 | # Effect of dt 11 | incal-experiments smt-lib-benchmark analyze --dirs remote_res/smt_lib_benchmark/qf_lra_results_plain/ remote_res/smt_lib_benchmark/qf_lra_results_dt/ remote_res/smt_lib_benchmark/qf_lra_results_mvn remote_res/smt_lib_benchmark/qf_lra_results_dt_mvn --res_path remote_res show -p -t duration -g k selection_strategy learner -o ly "Duration (s)" lx "Number of learned clauses (k)" steps_x 3 error 0 -w ../incal_mlj/figures/dt_benchmark.png 12 | 13 | # --- Exploring H 14 | # Duration over increasing original h 15 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/hh_plain res/synthetic/output/hh_mvn res/synthetic/output/hh_dt res/synthetic/output/hh_dt_mvn res/synthetic/output/hh_dt_mn --res_path res show -p -t duration -g original_h selection_strategy learner -o ly "Duration (s)" lx "Number of inequalities in ground truth (h)" -w ../incal_mlj/figures/time_over_oh_synthetic.png 16 | # Learned h over original h 17 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/hh_plain res/synthetic/output/hh_mvn res/synthetic/output/hh_dt res/synthetic/output/hh_dt_mvn res/synthetic/output/hh_dt_mn --res_path res show -p -t h -g original_h selection_strategy learner -o ly "Number of learned hyperplanes (h)" lx "Number of inequalities in ground truth (h)" -w ../incal_mlj/figures/h_over_oh_synthetic.png 18 | # Duration over learned h 19 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/hh_plain res/synthetic/output/hh_mvn res/synthetic/output/hh_dt res/synthetic/output/hh_dt_mvn res/synthetic/output/hh_dt_mn --res_path res show -p -t duration -g h selection_strategy learner -o ly "Duration (s)" lx "Number of learned inequalities (h)" -w ../incal_mlj/figures/time_over_h_synthetic.png 20 | # Accuracy over learned h 21 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/hh_plain res/synthetic/output/hh_mvn res/synthetic/output/hh_dt res/synthetic/output/hh_dt_mvn res/synthetic/output/hh_dt_mn --res_path res show -p -t accuracy_approx -g h selection_strategy learner -o ly "Accuracy on test set" lx "Number of learned inequalities (h)" y_lim "0.5:1" legend_pos "lower center" -w ../incal_mlj/figures/acc_over_h_synthetic.png 22 | 23 | # Duration over positive ratio 24 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/hh_plain --res_path res show -p -t duration -g pos_rate__batch0.1 h -o ly "Duration (s)" lx "Positive rate" -e "k!=2" "h<5" -w ../incal_mlj/figures/time_over_pos_rate.png 25 | 26 | 27 | # --- Exploring L 28 | # Timeouts over increasing original l 29 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/ll_plain --res_path res show -p -t executed -g original_l learner -o error 0 y_lim 0:1 lx "Number of literals in ground truth" ly "Fraction of experiments completed within time limit" steps_x 5 -w ../incal_mlj/figures/ex_over_l_synthetic.png 30 | 31 | 32 | # --- Exploring K 33 | # Timeouts over increasing original l 34 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/kk_plain res/synthetic/output/kk_dt_mvn --res_path res show -p -t executed -g original_k learner -o error 0 y_lim 0:1 lx "Number of clauses in ground truth" ly "Fraction of experiments completed within time limit" steps_x 5 -w ../incal_mlj/figures/ex_over_k_synthetic.png 35 | # Learned k over original k 36 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/kk_plain res/synthetic/output/kk_dt_mvn --res_path res show -p -t k -g original_k selection_strategy learner -o ly "Number of learned clauses (k)" lx "Number of clauses in ground truth (k)" -e "executed=0" -w ../incal_mlj/figures/k_over_original_k_synthetic.png 37 | -------------------------------------------------------------------------------- /repairs/genprog/run-version-genprog.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script to run spr on subjects in codeflaws directory 3 | #The following variables needs to be changed: 4 | rootdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" #directory of this script 5 | rundir="$rootdir/genprog-run" # directory in which genprog is called from, a temporary output directory where everything will be copied to during the repair 6 | versiondir="$rootdir/codeflaws" #directory where the codeflaws.tar.gz is extracted 7 | version=$1 8 | genprog="/opt/genprog/bin/genprog" # location of the installed genprog 9 | iteration=$2 10 | testsuite=$3 11 | timelimit=$4 12 | 13 | kill_descendant_processes() { 14 | local pid="$1" 15 | local and_self="${2:-false}" 16 | if children="$(pgrep -P "$pid")"; then 17 | for child in $children; do 18 | kill_descendant_processes "$child" true 19 | done 20 | fi 21 | if [[ "$and_self" == true ]]; then 22 | kill -9 "$pid" 23 | fi 24 | } 25 | 26 | print_results(){ 27 | if ! [ -z "$4" ]; then 28 | echo "$testsuite,$2,$3,$4" 29 | else 30 | echo "$testsuite,$2,$3,,," 31 | fi 32 | exit 33 | } 34 | 35 | if [[ "$version" == *"-bug-"* ]]; then 36 | if ! grep -q "$version" $rootdir/versions-ignored-all.txt; then 37 | var=$((var+1)) 38 | #get buggy filename from directory name: 39 | contestnum=$(echo $version | cut -d$'-' -f1) 40 | probnum=$(echo $version | cut -d$'-' -f2) 41 | buggyfile=$(echo $version | cut -d$'-' -f4) 42 | cfile=$(echo "$contestnum-$probnum-$buggyfile".c) 43 | cilfile=$(echo "$contestnum-$probnum-$buggyfile".cil.c) 44 | cfixfile=$(echo "$contestnum-$probnum-$buggyfile"-fix.c) 45 | if [[ "$testsuite" = autogen ]]; then 46 | diffc=$(ls -1 $versiondir/$version/autogen-$iteration-n* | wc -l) 47 | posc=$(ls -1 $versiondir/$version/autogen-$iteration-p* | wc -l) 48 | else 49 | diffc=$(grep "Diff Cases" $versiondir/$version/$cfile.revlog | awk '{print $NF}') 50 | posc=$(grep "Positive Cases" $versiondir/$version/$cfile.revlog | awk '{print $NF}') 51 | fi 52 | 53 | echo "[INFO] Repairing $version ($testsuite, iteration $iteration with $posc positive and $diffc negative test cases)" 1>&2 54 | 55 | DIRECTORY="$versiondir/$version" 56 | if [ ! -d "$DIRECTORY" ]; then 57 | echo "[ERROR] FOLDER DOESNT EXIST: $version" 1>&2 58 | exit 1 59 | fi 60 | 61 | cd $rundir/ 62 | rm -rf $rundir/tempworkdir-$version-$iteration-$testsuite 63 | rm -rf $rundir/tempworkdir-$version-$iteration-$testsuite-validation 64 | cp -r $versiondir/$version $rundir/tempworkdir-$version-$iteration-$testsuite 65 | cd $rundir/tempworkdir-$version-$iteration-$testsuite 66 | 67 | cp $rootdir/configuration-default configuration-$version 68 | if [[ "$testsuite" = autogen ]]; then 69 | sed -i "s/test-genprog.sh/test-genprog-incal.py $iteration $version/g" configuration-$version 70 | else 71 | sed -i "s/50s/2s/g" test-genprog.sh #Timeout management 72 | fi 73 | 74 | cp $rootdir/compile.pl compile.pl 75 | echo "$cfile">>bugged-program.txt 76 | echo "--pos-tests $posc">>configuration-$version 77 | echo "--neg-tests $diffc">>configuration-$version 78 | rm -rf preprocessed 79 | rm -rf coverage 80 | mkdir -p preprocessed 81 | make CC="cilly" CFLAGS="--save-temps -std=c99 -fno-optimize-sibling-calls -fno-strict-aliasing -fno-asm" &> initialbuild 82 | if grep -q "Error:" initialbuild; then 83 | if grep -q "Length of array is not" initialbuild; then 84 | printf "[ERROR] %s\t%s\t%s\n" "$version" "MAKE:ARRAY BUG" "0s" 1>&2 85 | else 86 | printf "[ERROR] %s\t%s\t%s\n" "$version" "MAKEFAILED!" "0s" 1>&2 87 | fi 88 | exit 1 89 | fi 90 | cp $cilfile preprocessed/$cfile 91 | cp preprocessed/$cfile $cfile 92 | rm -rf coverage 93 | rm -rf coverage.path.* 94 | rm -rf repair.cache 95 | rm -rf repair.debug.* 96 | #echo "[INFO] RUNNING CMD:$genprog configuration-$version" 1>&2 97 | timeout -k 0 $timelimit $genprog configuration-$version &> $rundir/temp-$version-$iteration-$testsuite.out 98 | timespent=$(grep "TOTAL" "$rundir/temp-$version-$iteration-$testsuite.out" | cut -d'=' -f1 | awk '{print $NF}') 99 | #echo "[INFO] Time Spent: $timespent" 1>&2 100 | if [ -z "${timespent}" ]; then 101 | print_results $version "TIMEOUT" $timelimit 102 | fi 103 | if [ ! -f "$rundir/tempworkdir-$version-$iteration-$testsuite/build.log" ]; then 104 | print_results $version "BUILDFAILED:FILE" $timespent 105 | elif grep -q "Failed to make" $rundir/tempworkdir-$version-$iteration-$testsuite/build.log; then 106 | print_results $version "BUILDFAILED" $timespent 107 | elif grep -q "nexpected" "$rundir/temp-$version-$iteration-$testsuite.out"; then 108 | print_results $version "VERIFICATIONFAILED" $timespent 109 | elif grep -q "Timeout" "$rundir/temp-$version-$iteration-$testsuite.out"; then 110 | print_results $version "TIMEOUT" $timelimit 111 | elif grep -q "Repair Found" "$rundir/temp-$version-$iteration-$testsuite.out"; then 112 | contestnum=$(echo "$version" | cut -d$'-' -f1) 113 | probnum=$(echo "$version" | cut -d$'-' -f2) 114 | buggyfile=$(echo "$version" | cut -d$'-' -f4) 115 | cfile=$(echo "$contestnum-$probnum-$buggyfile".c) 116 | cfixfile=$(echo "$version-fix".c) 117 | fixf="$rundir/tempworkdir-$version-$iteration-$testsuite/repair/$cfile" 118 | #for fixing the asm_booo instruction that GenProg introduced 119 | sed -i '/booo/d' "$fixf" 120 | cp $fixf $rootdir/genprog-allfixes/repair-$contestnum-$probnum-$buggyfile-$iteration-$testsuite.c 121 | validity=$($rootdir/validate-fix-genprog.sh "$version" "$rundir/temp-$version-$iteration-$testsuite.out" "$rundir/tempworkdir-$version-$iteration-$testsuite") 122 | print_results $version "REPAIR" $timespent $validity 123 | elif grep -q "no repair" "$rundir/temp-$version-$iteration-$testsuite.out"; then 124 | print_results $version "NOREPAIR" $timespent 125 | elif grep -q "Assertion failed" "$rundir/temp-$version-$iteration-$testsuite.out"; then 126 | print_results $version "COVERAGEFAIL" $timespent 127 | fi 128 | echo "[ERROR] No interpretation:" 1>&2 129 | cat "$rundir/temp-$version-$iteration-$testsuite.out" 1>&2 130 | print_results $version "????" $timespent 131 | else 132 | echo "[INFO] IGNORING:$version" 1>&2 133 | fi 134 | #if test -e "$rundir/tempworkdir-$version-$iteration-manual/bugreport.txt";then 135 | # echo "BUG" 136 | # if grep -q "BOOO" "$rundir/tempworkdir-$version-$iteration-manual/bugreport.txt"; then 137 | # printf "%s\tBOOOBUG\n" "$version" >> "$rootdir/genprog-bugs.log" 138 | # fi 139 | #fi 140 | fi 141 | #kill_descendant_processes $$ 142 | -------------------------------------------------------------------------------- /repairs/genprog/test-genprog-incal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import numpy as np 5 | import subprocess 6 | 7 | # Write: Convert input value(s) into string 8 | def format_input(test_input): 9 | 10 | input_vals="" 11 | if((type(test_input) is np.ndarray) or (type(test_input) is list)): 12 | 13 | for i_val in test_input: 14 | input_vals+=str(int(i_val))+" " 15 | 16 | input_vals=input_vals.strip() 17 | 18 | else: 19 | input_vals=str(int(test_input)) 20 | 21 | 22 | return input_vals 23 | 24 | #return str(int(test_input)) 25 | 26 | # Read: Convert input string into value(s) 27 | def unformat_input(test_input, input_size): 28 | if((type(test_input) is np.ndarray) or (type(test_input) is list)): 29 | if test_input.size != input_size and input_size > 0: 30 | sys.exit("[ERROR "+subject_name+"] Input has variable length") 31 | return test_input 32 | 33 | if input_size > 1: 34 | sys.exit("[ERROR "+subject_name+"] Input has variable length") 35 | return int(test_input) 36 | 37 | # Read: Convert output string into value. 38 | def unformat_output(test_output): 39 | if test_output == "": return 0 40 | if test_output.strip().lower() == "yes": return 1 41 | if test_output.strip().lower() == "no" : return 0 42 | 43 | try: return int(test_output) 44 | except: 45 | try: return int(round(float(test_output),0)) 46 | except: sys.exit("[ERROR "+subject_name+"] Unknown output") 47 | 48 | # Execute test_input on program and return output value 49 | def run_test(test_input, program): 50 | formatted_input = format_input(test_input) 51 | process = subprocess.Popen(('echo', formatted_input), stdout=subprocess.PIPE) 52 | test_output="" 53 | try: 54 | test_output = subprocess.check_output(["timeout", "-k", "2s", "2s", program], stdin=process.stdout,encoding="utf-8") 55 | except: 56 | pass 57 | 58 | unformatted_output = unformat_output(test_output) 59 | 60 | return unformatted_output 61 | 62 | # Execute test_input on buggy and golden program. Return false (test failure) if output differs 63 | def ask_human(test_input, bug_prog, gold_prog): 64 | actual_output = run_test(test_input, bug_prog) 65 | expected_output = run_test(test_input, gold_prog) 66 | return actual_output == expected_output 67 | 68 | 69 | iteration = sys.argv[1] 70 | subject = sys.argv[2] 71 | test = sys.argv[3] 72 | 73 | bug_dir = subject.rstrip("/") 74 | temp = bug_dir.split("/")[-1].split("-") 75 | bug_prog = "./"+temp[0] + "-" + temp[1] + "-" + temp[3] 76 | gold_prog = "./"+temp[0] + "-" + temp[1] + "-" + temp[4] 77 | subject_name = bug_dir.split("/")[-1] 78 | 79 | test_suite = [] 80 | test_data = np.genfromtxt("autogen-"+iteration+"-"+test) 81 | test_suite.append(unformat_input(test_data, 0)) 82 | test_suite = np.array(test_suite) 83 | 84 | if ask_human(test_suite[0], bug_prog, gold_prog): 85 | sys.exit(0) 86 | else: 87 | sys.exit(1) 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /repairs/genprog/validate-fix-genprog.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #rootdir="/home/ubuntu/codeforces-crawler/CodeforcesSpider" #directory of this script 3 | rootdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" #directory of this script 4 | rundir=$rootdir/genprog-run # directory in which genprog is called from 5 | version="$1" 6 | filen="$2" 7 | repairdir="$3" 8 | 9 | contestnum=$(echo $version | cut -d$'-' -f1) 10 | probnum=$(echo $version | cut -d$'-' -f2) 11 | buggyfile=$(echo $version | cut -d$'-' -f4) 12 | cfile=$(echo "$contestnum-$probnum-$buggyfile".c) 13 | cexefile=$(echo "$contestnum-$probnum-$buggyfile") 14 | correctfile=$(echo $version | cut -d$'-' -f5) 15 | goldenfile=$(echo "$contestnum-$probnum-$correctfile".c) 16 | 17 | validdir=$repairdir-validation 18 | 19 | runalltest(){ 20 | i="$1" 21 | origversion="$2" 22 | goldenpass="$3" 23 | filen=$validdir/test-valid.sh 24 | passt=0 25 | failt=0 26 | totalt=0 27 | alltests=($(grep -E "p[0-9]+\)" $filen | cut -d')' -f 1)) 28 | for t in "${alltests[@]}" 29 | do 30 | timeout -k 2s 2s $filen "$t"&>/dev/null 31 | if [ $? -ne 0 ]; then 32 | failt=$((failt+1)) 33 | else 34 | passt=$((passt+1)) 35 | fi 36 | totalt=$((totalt+1)) 37 | done 38 | if [ ! -z "$goldenpass" ]; then 39 | echo "$passt,$goldenpass,$totalt" 40 | else 41 | echo $passt 42 | fi 43 | #pkill $cexefile 44 | #pkill test-valid.sh 45 | } 46 | 47 | 48 | if grep -q "Repair Found" $filen; then 49 | fixf="$repairdir/repair/$cfile" 50 | cp -r $repairdir $validdir 51 | cd $validdir 52 | cp $validdir/preprocessed/$cfile $cfile 53 | make clean &>/dev/null 54 | make CFLAGS="-std=c99 -fno-optimize-sibling-calls -fno-strict-aliasing -fno-asm" &> /dev/null 55 | runalltest "orig-$cfile" "$version" "" > /dev/null 56 | cp "$cfile" "$cfile.orig" 57 | #copy and compile the fix file before running validation tests 58 | cp $goldenfile $cfile 59 | make clean &> /dev/null 60 | make CFLAGS="-std=c99 -fno-optimize-sibling-calls -fno-strict-aliasing -fno-asm" &> /dev/null 61 | 62 | 63 | goldenpasst=$(runalltest "golden-$goldenfile" "$version" "") 64 | #echo "GOLDEN:$goldenpasst" 65 | 66 | cp $fixf $cfile 67 | make clean &> /dev/null 68 | make CFLAGS="-std=c99 -fno-optimize-sibling-calls -fno-strict-aliasing -fno-asm" &> /dev/null 69 | 70 | runalltest "fix-$cfile" "$version" "$goldenpasst" 71 | 72 | #restore the file 73 | cp "$cfile.orig" "$cfile" 74 | #rm -rf "$repairdir" 75 | fi 76 | -------------------------------------------------------------------------------- /results/Plots.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/Plots.pdf -------------------------------------------------------------------------------- /results/accuracy.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/accuracy.pdf -------------------------------------------------------------------------------- /results/effort1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/effort1.pdf -------------------------------------------------------------------------------- /results/effort2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/effort2.pdf -------------------------------------------------------------------------------- /results/patchquality.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/patchquality.pdf -------------------------------------------------------------------------------- /results/repairability.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/repairability.pdf -------------------------------------------------------------------------------- /results/training.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/training.pdf -------------------------------------------------------------------------------- /results/validation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/validation.pdf -------------------------------------------------------------------------------- /results/validation2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/validation2.pdf -------------------------------------------------------------------------------- /scripts/h_combine.sh: -------------------------------------------------------------------------------- 1 | function join_by { local IFS="$1"; shift; echo "$*"; } 2 | 3 | cd ../smtlearn 4 | declare -a options=(3 4 5 6 7 8 9 10) 5 | 6 | mkdir ../synthetic/hh/all 7 | for i in "${options[@]}"; do 8 | cp ../synthetic/hh/$i/* ../synthetic/hh/all/ 9 | done 10 | 11 | scp -r samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/output/synthetic/hh ../output/synthetic/hh 12 | 13 | python api.py combine ../output/synthetic/hh/summary $(join_by " " "${options[@]}") -p ../output/synthetic/hh/ 14 | python api.py migrate ratio ../output/synthetic/hh/summary/ -d ../synthetic/hh/all -s 1000 -f 15 | python api.py migrate accuracy ../output/synthetic/hh/summary/ -d ../synthetic/hh/all -s 1000 -f 16 | -------------------------------------------------------------------------------- /scripts/h_generate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../smtlearn 4 | 5 | declare -a options=(3 4 5 6 7 8 9 10) 6 | 7 | for i in "${options[@]}"; do 8 | python api.py generate ../synthetic/hh/$i -n 100 -b 0 -r 2 -k 2 -l 3 --half_spaces $i 9 | done 10 | wait 11 | 12 | scp -r ../synthetic/hh samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/synthetic/ -------------------------------------------------------------------------------- /scripts/h_learn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../smtlearn 4 | 5 | declare -a options=(3 4 5 6 7 8 9 10) 6 | 7 | for i in "${options[@]}"; do 8 | python experiments.py ../synthetic/hh/$i "" ../output/synthetic/hh/$i cnf -t 200 & 9 | done 10 | wait -------------------------------------------------------------------------------- /scripts/k_combine.sh: -------------------------------------------------------------------------------- 1 | function join_by { local IFS="$1"; shift; echo "$*"; } 2 | 3 | cd ../smtlearn 4 | declare -a options=(1 2 3 4 5) 5 | 6 | mkdir ../synthetic/kk/all 7 | for i in "${options[@]}"; do 8 | cp ../synthetic/kk/$i/* ../synthetic/kk/all/ 9 | done 10 | 11 | scp -r samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/output/synthetic/kk ../output/synthetic/kk 12 | 13 | python api.py combine ../output/synthetic/kk/summary $(join_by " " "${options[@]}") -p ../output/synthetic/kk/ 14 | python api.py migrate ratio ../output/synthetic/kk/summary/ -d ../synthetic/kk/all -s 1000 -f 15 | python api.py migrate accuracy ../output/synthetic/kk/summary/ -d ../synthetic/kk/all -s 1000 -f 16 | -------------------------------------------------------------------------------- /scripts/k_generate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../smtlearn 4 | 5 | declare -a options=(1 2 3 4 5 6) 6 | 7 | for i in "${options[@]}"; do 8 | python api.py generate ../synthetic/kk/$i -n 100 -b 6 -r 2 -k $i -l 3 --half_spaces 6 9 | done 10 | wait 11 | 12 | scp -r ../synthetic/kk samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/synthetic/ -------------------------------------------------------------------------------- /scripts/k_learn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../smtlearn 4 | 5 | declare -a options=(1 2 3 4 5) 6 | 7 | for i in "${options[@]}"; do 8 | python experiments.py ../synthetic/kk/$i "" ../output/synthetic/kk/$i cnf -t 200 & 9 | done 10 | wait -------------------------------------------------------------------------------- /scripts/l_combine.sh: -------------------------------------------------------------------------------- 1 | function join_by { local IFS="$1"; shift; echo "$*"; } 2 | 3 | cd ../smtlearn 4 | declare -a options=(1 2 3 4 5) 5 | 6 | mkdir ../synthetic/ll/all 7 | for i in "${options[@]}"; do 8 | cp ../synthetic/ll/$i/* ../synthetic/ll/all/ 9 | done 10 | 11 | scp -r samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/output/synthetic/ll ../output/synthetic/ll 12 | 13 | python api.py combine ../output/synthetic/ll/summary $(join_by " " "${options[@]}") -p ../output/synthetic/ll/ 14 | python api.py migrate ratio ../output/synthetic/ll/summary/ -d ../synthetic/ll/all -s 1000 -f 15 | python api.py migrate accuracy ../output/synthetic/ll/summary/ -d ../synthetic/ll/all -s 1000 -f 16 | -------------------------------------------------------------------------------- /scripts/l_generate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../smtlearn 4 | 5 | declare -a options=(1 2 3 4 5) 6 | 7 | for i in "${options[@]}"; do 8 | python api.py generate ../synthetic/ll/$i -n 100 -b 0 -r 2 -k 3 -l $i --half_spaces 10 9 | done 10 | wait 11 | 12 | scp -r ../synthetic/ll samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/synthetic/ -------------------------------------------------------------------------------- /scripts/l_learn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../smtlearn 4 | 5 | declare -a options=(1 2 3 4 5) 6 | 7 | for i in "${options[@]}"; do 8 | python experiments.py ../synthetic/ll/$i "" ../output/synthetic/ll/$i cnf -t 200 & 9 | done 10 | wait -------------------------------------------------------------------------------- /scripts/pf_combine.sh: -------------------------------------------------------------------------------- 1 | function join_by { local IFS="$1"; shift; echo "$*"; } 2 | 3 | cd ../smtlearn 4 | declare -a options=(1 2 3 4 5) 5 | 6 | scp -r samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/output/synthetic/pf ../output/synthetic/pf 7 | 8 | python api.py migrate ratio ../output/synthetic/pf -d ../synthetic/pf -s 1000 -f 9 | python api.py migrate accuracy ../output/synthetic/pf -d ../synthetic/pf -s 1000 -f 10 | -------------------------------------------------------------------------------- /scripts/pf_generate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../smtlearn 4 | 5 | python api.py generate ../synthetic/pf -n 100 -b 6 -r 2 -k 3 -l 3 --half_spaces 6 6 | 7 | scp -r ../synthetic/pf samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/synthetic/ -------------------------------------------------------------------------------- /scripts/pf_learn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../smtlearn 4 | 5 | python experiments.py ../synthetic/pf "" ../output/synthetic/pf cnf -t 200 -f & 6 | wait -------------------------------------------------------------------------------- /scripts/plot.sh: -------------------------------------------------------------------------------- 1 | cd ../smtlearn 2 | 3 | # Plot samples 4 | python api.py table id samples acc ../output/synthetic/ss/summary/ ../synthetic/ss/10000/ INCAL ../output/synthetic/ss/esummary_1000/ ../synthetic/ss/10000/ "non-incremental" plot -a -o ../../ijcai18/figures/s_inc_acc.png 5 | python api.py table id samples time ../output/synthetic/ss/summary/ ../synthetic/ss/10000/ INCAL ../output/synthetic/ss/esummary_1000/ ../synthetic/ss/10000/ "non-incremental" plot -a -o ../../ijcai18/figures/s_inc_time.png --legend_pos "upper right" 6 | python api.py table id samples active ../output/synthetic/ss/summary/ ../synthetic/ss/all/ INCAL plot -a -o ../../ijcai18/figures/s_inc_active.png 7 | python api.py table id samples active_ratio ../output/synthetic/ss/summary/ ../synthetic/ss/all/ "" plot -a -o ../../ijcai18/figures/s_inc_active_ratio.png --legend_pos "upper right" 8 | 9 | python api.py table id samples time ../output/synthetic/ss/summary ../synthetic/ss/all INCAL print -a 10 | python api.py table id samples time ../output/synthetic/ss/esummary ../synthetic/ss/all INCAL print -a 11 | 12 | # Plot k 13 | python api.py table id k acc ../output/synthetic/kk/summary/ ../synthetic/kk/all/ INCAL plot -a -o ../../ijcai18/figures/k_inc_acc.png 14 | python api.py table id k time ../output/synthetic/kk/summary/ ../synthetic/kk/all/ INCAL plot -a -o ../../ijcai18/figures/k_inc_time.png 15 | 16 | # Plot l 17 | python api.py table id l acc ../output/synthetic/ll/summary/ ../synthetic/ll/all/ INCAL plot -a -o ../../ijcai18/figures/l_inc_acc.png 18 | python api.py table id l time ../output/synthetic/ll/summary/ ../synthetic/ll/all/ INCAL plot -a -o ../../ijcai18/figures/l_inc_time.png 19 | 20 | # Plot h 21 | python api.py table id h acc ../output/synthetic/hh/summary/ ../synthetic/hh/all/ INCAL plot -a -o ../../ijcai18/figures/h_inc_acc.png 22 | python api.py table id h time ../output/synthetic/hh/summary/ ../synthetic/hh/all/ INCAL plot -a -o ../../ijcai18/figures/h_inc_time.png 23 | 24 | # Print parameter-free ratio 25 | python api.py table id samples time_ratio ../output/synthetic/pf/ ../synthetic/pf/ INCAL print -a 26 | 27 | # Print benchmark 28 | python api.py table id constant full_time ../output/benchmark/pf1000/ ../demo/cache/ INCAL print -a 29 | python api.py table id constant time ../output/benchmark/pf1000/ ../demo/cache/ INCAL print -a 30 | python api.py table id constant time_ratio ../output/benchmark/pf1000/ ../demo/cache/ INCAL print -a 31 | python api.py table id constant acc ../output/benchmark/pf1000/ ../demo/cache/ INCAL print -a 32 | -------------------------------------------------------------------------------- /scripts/samples_combine.sh: -------------------------------------------------------------------------------- 1 | function join_by { local IFS="$1"; shift; echo "$*"; } 2 | 3 | cd ../smtlearn 4 | declare -a options=(25 50 75 100 250 500 750 1000 2500 5000 7500 10000) 5 | declare -a options_e=(25 50 75 100 250 500 750 1000 2500) 6 | 7 | scp -r samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/output/synthetic/ss ../output/synthetic/ss 8 | 9 | python api.py combine ../output/synthetic/ss/summary $(join_by " " "${options[@]}") -p ../output/synthetic/ss/ 10 | python api.py combine ../output/synthetic/ss/esummary $(join_by " " "${options_e[@]}") -p ../output/synthetic/ss/e 11 | python api.py migrate ratio ../output/synthetic/ss/summary/ -d ../synthetic/ss/10000 -s 1000 -f 12 | python api.py migrate accuracy ../output/synthetic/ss/summary/ -d ../synthetic/ss/10000 -s 1000 -f 13 | python api.py migrate ratio ../output/synthetic/ss/esummary/ -d ../synthetic/ss/10000 -s 1000 -f 14 | python api.py migrate accuracy ../output/synthetic/ss/esummary/ -d ../synthetic/ss/10000 -s 1000 -f 15 | -------------------------------------------------------------------------------- /scripts/samples_generate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../smtlearn 4 | 5 | python api.py generate ../synthetic/ss/10000 -n 100 -b 6 -r 2 -k 2 -l 3 --half_spaces 6 -s 10000 6 | scp -r ../synthetic/ss samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/synthetic/ -------------------------------------------------------------------------------- /scripts/samples_learn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd ../smtlearn 4 | 5 | declare -a options=(25 50 75 100 250 500 750 1000 2500 5000 7500 10000) 6 | declare -a options_e=(25 50 75 100 250 500 750 1000 2500) 7 | 8 | for i in "${options[@]}"; do 9 | python experiments.py ../synthetic/ss/10000/ "" ../output/synthetic/ss/$i cnf -s $i -t 200 & 10 | done 11 | 12 | for i in "${options_e[@]}"; do 13 | python experiments.py ../synthetic/ss/10000/ "" ../output/synthetic/ss/e$i cnf -s $i -t 200 -a & 14 | done 15 | wait 16 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import sys 4 | 5 | from setuptools import setup, find_packages, Command 6 | from os import path 7 | 8 | # To upload: 9 | # pip install --upgrade twine wheel setuptools 10 | # python setup.py upload 11 | 12 | NAME = 'incal' 13 | DESCRIPTION = 'Learning SMT(LRA) formulas' 14 | URL = 'https://github.com/smtlearning/incal' 15 | EMAIL = 'samuel.kolb@me.com' 16 | AUTHOR = 'Samuel Kolb' 17 | REQUIRES_PYTHON = '>=3.5.0' 18 | VERSION = "0.1.1" 19 | 20 | # What packages are required for this module to be executed? 21 | REQUIRED = [ 22 | 'pywmi', 'numpy', 'typing', 'pysmt', 'matplotlib', 'scikit-learn', 'pickledb' 23 | ] 24 | 25 | # What packages are optional? 26 | EXTRAS = { 27 | 'sdd': ["pysdd"] 28 | } 29 | 30 | here = os.path.abspath(os.path.dirname(__file__)) 31 | 32 | with open(path.join(here, "README.md")) as ref: 33 | long_description = ref.read() 34 | 35 | 36 | class UploadCommand(Command): 37 | """Support setup.py upload.""" 38 | 39 | description = 'Build and publish the package.' 40 | user_options = [] 41 | 42 | @staticmethod 43 | def status(s): 44 | """Prints things in bold.""" 45 | print('\033[1m{0}\033[0m'.format(s)) 46 | 47 | def initialize_options(self): 48 | pass 49 | 50 | def finalize_options(self): 51 | pass 52 | 53 | def run(self): 54 | try: 55 | self.status('Removing previous builds…') 56 | shutil.rmtree(os.path.join(here, 'dist')) 57 | except OSError: 58 | pass 59 | 60 | self.status('Building Source and Wheel (universal) distribution…') 61 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) 62 | 63 | self.status('Uploading the package to PyPI via Twine…') 64 | os.system('twine upload dist/*') 65 | 66 | # self.status('Pushing git tags…') 67 | # os.system('git tag v{0}'.format(about['__version__'])) 68 | # os.system('git push --tags') 69 | 70 | sys.exit() 71 | 72 | 73 | setup( 74 | name=NAME, 75 | version=VERSION, 76 | description=DESCRIPTION, 77 | long_description=long_description, 78 | long_description_content_type="text/markdown", 79 | url=URL, 80 | author=AUTHOR, 81 | author_email=EMAIL, 82 | license='MIT', 83 | classifiers=[ 84 | 'License :: OSI Approved :: MIT License', 85 | 'Programming Language :: Python', 86 | 'Programming Language :: Python :: 3', 87 | ], 88 | python_requires=REQUIRES_PYTHON, 89 | packages=find_packages(exclude=('tests',)), 90 | zip_safe=False, 91 | install_requires=REQUIRED, 92 | extras_require=EXTRAS, 93 | setup_requires=['pytest-runner'], 94 | tests_require=["pytest"], 95 | entry_points={ 96 | "console_scripts": [ 97 | "incal-experiments = incal.experiments.cli:main", 98 | "incal-track = incal.experiments.learn:track", 99 | ] 100 | }, 101 | cmdclass={ 102 | 'upload': UploadCommand, 103 | }, 104 | ) 105 | --------------------------------------------------------------------------------