├── .gitignore
├── LICENSE
├── README.md
├── incal
├── __init__.py
├── cli.py
├── experiments
│ ├── __init__.py
│ ├── analyze.py
│ ├── cli.py
│ ├── examples.py
│ ├── find_hyperplanes.py
│ ├── find_operators.py
│ ├── learn.py
│ └── prepare.py
├── extra
│ ├── __init__.py
│ ├── api.py
│ ├── combine_results.py
│ ├── demo.py
│ ├── deploy.py
│ ├── experiments.py
│ ├── main.py
│ ├── migrate.py
│ └── smt_scan.py
├── generator.py
├── incremental_learner.py
├── k_cnf_smt_learner.py
├── learn.py
├── learner.py
├── lp
│ ├── __init__.py
│ ├── examples.py
│ └── model.py
├── observe
│ ├── __init__.py
│ ├── inc_logging.py
│ ├── observe.py
│ └── plotting.py
├── old_learners
│ ├── __init__.py
│ ├── dt_learner.py
│ ├── k_dnf_greedy_learner.py
│ ├── k_dnf_learner.py
│ ├── k_dnf_logic_learner.py
│ └── k_dnf_smt_learner.py
├── parameter_free_learner.py
├── tests
│ ├── examples.py
│ ├── test_evaluation.py
│ ├── test_generation.py
│ ├── test_one_class.py
│ └── test_polytope.py
├── util
│ ├── __init__.py
│ ├── analyze.py
│ ├── options.py
│ ├── parallel.py
│ ├── plot.py
│ ├── sampling.py
│ └── timeout.py
└── violations
│ ├── __init__.py
│ ├── core.py
│ ├── dt_selection.py
│ └── virtual_data.py
├── notebooks
├── Learn2Fix.py
├── baldur.py
├── experiments.ipynb
├── experiments.sh
├── gps.py
├── playground.py
├── results.sh
└── synthetic.py
├── plotting_commands.txt
├── repairs
└── genprog
│ ├── run-version-genprog.sh
│ ├── test-genprog-incal.py
│ └── validate-fix-genprog.sh
├── results
├── Plots.Rmd
├── Plots.pdf
├── accuracy.pdf
├── effort1.pdf
├── effort2.pdf
├── manualTrainingTSsize.csv
├── patchquality.pdf
├── repairability.pdf
├── results-l-10-t-5-g-10-runs.csv
├── results-l-20-t-10-g-10-runs.csv
├── results-l-30-t-10-g-10-runs.csv
├── training.pdf
├── validation.pdf
└── validation2.pdf
├── scripts
├── h_combine.sh
├── h_generate.sh
├── h_learn.sh
├── k_combine.sh
├── k_generate.sh
├── k_learn.sh
├── l_combine.sh
├── l_generate.sh
├── l_learn.sh
├── pf_combine.sh
├── pf_generate.sh
├── pf_learn.sh
├── plot.sh
├── samples_combine.sh
├── samples_generate.sh
└── samples_learn.sh
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | synthetic/
2 | res/
3 | remote_res/
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | GNU LESSER GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | Copyright (C) 2007 Free Software Foundation, Inc.
5 | Everyone is permitted to copy and distribute verbatim copies
6 | of this license document, but changing it is not allowed.
7 |
8 |
9 | This version of the GNU Lesser General Public License incorporates
10 | the terms and conditions of version 3 of the GNU General Public
11 | License, supplemented by the additional permissions listed below.
12 |
13 | 0. Additional Definitions.
14 |
15 | As used herein, "this License" refers to version 3 of the GNU Lesser
16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
17 | General Public License.
18 |
19 | "The Library" refers to a covered work governed by this License,
20 | other than an Application or a Combined Work as defined below.
21 |
22 | An "Application" is any work that makes use of an interface provided
23 | by the Library, but which is not otherwise based on the Library.
24 | Defining a subclass of a class defined by the Library is deemed a mode
25 | of using an interface provided by the Library.
26 |
27 | A "Combined Work" is a work produced by combining or linking an
28 | Application with the Library. The particular version of the Library
29 | with which the Combined Work was made is also called the "Linked
30 | Version".
31 |
32 | The "Minimal Corresponding Source" for a Combined Work means the
33 | Corresponding Source for the Combined Work, excluding any source code
34 | for portions of the Combined Work that, considered in isolation, are
35 | based on the Application, and not on the Linked Version.
36 |
37 | The "Corresponding Application Code" for a Combined Work means the
38 | object code and/or source code for the Application, including any data
39 | and utility programs needed for reproducing the Combined Work from the
40 | Application, but excluding the System Libraries of the Combined Work.
41 |
42 | 1. Exception to Section 3 of the GNU GPL.
43 |
44 | You may convey a covered work under sections 3 and 4 of this License
45 | without being bound by section 3 of the GNU GPL.
46 |
47 | 2. Conveying Modified Versions.
48 |
49 | If you modify a copy of the Library, and, in your modifications, a
50 | facility refers to a function or data to be supplied by an Application
51 | that uses the facility (other than as an argument passed when the
52 | facility is invoked), then you may convey a copy of the modified
53 | version:
54 |
55 | a) under this License, provided that you make a good faith effort to
56 | ensure that, in the event an Application does not supply the
57 | function or data, the facility still operates, and performs
58 | whatever part of its purpose remains meaningful, or
59 |
60 | b) under the GNU GPL, with none of the additional permissions of
61 | this License applicable to that copy.
62 |
63 | 3. Object Code Incorporating Material from Library Header Files.
64 |
65 | The object code form of an Application may incorporate material from
66 | a header file that is part of the Library. You may convey such object
67 | code under terms of your choice, provided that, if the incorporated
68 | material is not limited to numerical parameters, data structure
69 | layouts and accessors, or small macros, inline functions and templates
70 | (ten or fewer lines in length), you do both of the following:
71 |
72 | a) Give prominent notice with each copy of the object code that the
73 | Library is used in it and that the Library and its use are
74 | covered by this License.
75 |
76 | b) Accompany the object code with a copy of the GNU GPL and this license
77 | document.
78 |
79 | 4. Combined Works.
80 |
81 | You may convey a Combined Work under terms of your choice that,
82 | taken together, effectively do not restrict modification of the
83 | portions of the Library contained in the Combined Work and reverse
84 | engineering for debugging such modifications, if you also do each of
85 | the following:
86 |
87 | a) Give prominent notice with each copy of the Combined Work that
88 | the Library is used in it and that the Library and its use are
89 | covered by this License.
90 |
91 | b) Accompany the Combined Work with a copy of the GNU GPL and this license
92 | document.
93 |
94 | c) For a Combined Work that displays copyright notices during
95 | execution, include the copyright notice for the Library among
96 | these notices, as well as a reference directing the user to the
97 | copies of the GNU GPL and this license document.
98 |
99 | d) Do one of the following:
100 |
101 | 0) Convey the Minimal Corresponding Source under the terms of this
102 | License, and the Corresponding Application Code in a form
103 | suitable for, and under terms that permit, the user to
104 | recombine or relink the Application with a modified version of
105 | the Linked Version to produce a modified Combined Work, in the
106 | manner specified by section 6 of the GNU GPL for conveying
107 | Corresponding Source.
108 |
109 | 1) Use a suitable shared library mechanism for linking with the
110 | Library. A suitable mechanism is one that (a) uses at run time
111 | a copy of the Library already present on the user's computer
112 | system, and (b) will operate properly with a modified version
113 | of the Library that is interface-compatible with the Linked
114 | Version.
115 |
116 | e) Provide Installation Information, but only if you would otherwise
117 | be required to provide such information under section 6 of the
118 | GNU GPL, and only to the extent that such information is
119 | necessary to install and execute a modified version of the
120 | Combined Work produced by recombining or relinking the
121 | Application with a modified version of the Linked Version. (If
122 | you use option 4d0, the Installation Information must accompany
123 | the Minimal Corresponding Source and Corresponding Application
124 | Code. If you use option 4d1, you must provide the Installation
125 | Information in the manner specified by section 6 of the GNU GPL
126 | for conveying Corresponding Source.)
127 |
128 | 5. Combined Libraries.
129 |
130 | You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 |
136 | a) Accompany the combined library with a copy of the same work based
137 | on the Library, uncombined with any other library facilities,
138 | conveyed under the terms of this License.
139 |
140 | b) Give prominent notice with the combined library that part of it
141 | is a work based on the Library, and explaining where to find the
142 | accompanying uncombined form of the same work.
143 |
144 | 6. Revised Versions of the GNU Lesser General Public License.
145 |
146 | The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 |
151 | Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 |
161 | If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Learn2Fix
2 | Learn2Fix is a human-in-the-loop automatic repair technique for programs that take numeric inputs. Given a test input that reproduces the bug, Learn2Fix uses mutational fuzzing to generate alternative test inputs, and presents some of those to the human to ask whether those test inputs also reproduce the bug. Meanwhile, Learn2Fix uses the [Incal](https://github.com/ML-KULeuven/incal) constraint learning tool to construct a Satisfiability Modulo Linear Real Arithmetic SMT(LRA) constraint that is satisfied only by test inputs labeled as reproducing the bug. SMT provides a natural representation of program semantics and is a fundamental building block of symbolic execution and semantic program repair. The learned SMT constraint serves as an automatic bug oracle that can predict the label of new test inputs. Iteratively, the oracle is trained to predict the user’s responses with increasing accuracy. Using the trained oracle, the user can be asked more strategically. The key challenge that Learn2Fix addresses is to maximize the oracle’s accuracy, given only a limited number of queries to the user.
3 |
4 | * You can find the technical details in our ICST'20 paper: https://arxiv.org/abs/1912.07758
5 | * To cite our paper, you can use the following bibtex entry:
6 | ```bibtex
7 | @inproceedings{learn2fix,
8 | author = {B\"ohme, Marcel and Geethal, Charaka and Pham, Van-Thuan},
9 | title = {Human-In-The-Loop Automatic Program Repair},
10 | booktitle = {Proceedings of the 2020 IEEE International Conference on Software Testing, Verification and Validation},
11 | series = {ICST 2020},
12 | year = {2020},
13 | location = {Porto, Portugal},
14 | pages = {1-12},
15 | numpages = {12}
16 | }
17 | ```
18 | Learn2Fix is implemented in Python, quickly set up in a Docker container, and uses the following projects:
19 | * Incal constraint learner: [Paper](https://www.ijcai.org/proceedings/2018/0323.pdf), [Tool](https://github.com/ML-KULeuven/incal)
20 | * GenProg test-driven repair: [Paper](https://web.eecs.umich.edu/~weimerw/p/weimer-tse2012-genprog.pdf), [Tool](https://github.com/squareslab/genprog-code)
21 | * CodeFlaws repair benchmark: [Paper](https://codeflaws.github.io/postercameraready.pdf), [Tool](https://codeflaws.github.io/)
22 |
23 | # How to run Learn2Fix
24 | To facilitate open science and reproducibility, we make our tool (Learn2Fix), data, and scripts available. Following are the concrete instructions to set up and run Learn2Fix on the Codeflaws benchmark to reproduce the results we reported in our paper.
25 |
26 | ## Step-1. Install Codeflaws with GenProg
27 |
28 | Set up a docker container for GenProg repair tool
29 | ```bash
30 | docker pull squareslab/genprog
31 | docker run -it squareslab/genprog /bin/bash
32 | ```
33 |
34 | Download and set up any dependencies
35 | ```bash
36 | apt-get update
37 | apt-get -y install git time build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev libssl-dev libreadline-dev libffi-dev wget z3 bc
38 |
39 | # Install python
40 | pushd /tmp
41 | wget https://www.python.org/ftp/python/3.7.2/Python-3.7.2.tar.xz
42 | tar -xf Python-3.7.2.tar.xz
43 | cd Python-3.7.2
44 | ./configure --enable-optimizations
45 | make -j4
46 | make altinstall
47 | ln -s $(which pip3.7) /usr/bin/pip
48 | mv /usr/bin/python /usr/bin/python.old
49 | ln -s $(which python3.7) /usr/bin/python
50 | popd
51 | ```
52 |
53 | Download and set up the Codeflaws benchmark inside the container
54 | ```bash
55 | cd /root
56 | git clone https://github.com/codeflaws/codeflaws
57 | cd codeflaws/all-script
58 | wget http://www.comp.nus.edu.sg/~release/codeflaws/codeflaws.tar.gz
59 | tar -zxf codeflaws.tar.gz
60 | ```
61 |
62 | ## Step-2. Install Learn2Fix
63 | Download and compile Learn2Fix and its dependencies (e.g., INCAL)
64 | ```bash
65 | cd /root/codeflaws/all-script
66 | git clone https://github.com/mboehme/learn2fix
67 | export learn2fix="$PWD/learn2fix"
68 | cd $learn2fix
69 | # Install LattE
70 | wget https://github.com/latte-int/latte/releases/download/version_1_7_5/latte-integrale-1.7.5.tar.gz
71 | tar -xvzf latte-integrale-1.7.5.tar.gz
72 | cd latte-integrale-1.7.5
73 | ./configure
74 | make -j4
75 | make install
76 | # Install Incal
77 | cd $learn2fix
78 | python setup.py build
79 | python setup.py install
80 | pip install cvxopt
81 | pip install plotting
82 | pip install seaborn
83 | pip install wmipa
84 | pip install pywmi
85 | pysmt-install --z3 #confirm with [Y]es
86 | ```
87 |
88 | Export environment variables
89 | ```bash
90 | cd $learn2fix
91 | export PATH=/root/.opam/system/bin/:$PATH
92 | export PATH=$PATH:$PWD/latte-integrale-1.7.5/dest/bin/
93 | cd $learn2fix/notebooks
94 | export PYTHONPATH=$PWD/../incal/experiments
95 | export PYTHONPATH=$PYTHONPATH:$PWD/../incal/extras
96 | export PYTHONPATH=$PYTHONPATH:$PWD/../incal
97 | ```
98 |
99 | # How to reproduce our results
100 | ## Run Learn2Fix on Codeflaws
101 | Run the following command to execute Learn2Fix. Learn2Fix produces several CSV files, one for each experimental run (e.g., results_it_1.csv for the first run)
102 | ```bash
103 | cd $learn2fix/notebooks
104 | ./experiments.sh /root/codeflaws/all-script/codeflaws 2>learn2fix.log
105 | ```
106 | Once the experiment completes, concatenate all CSV files to form a single file containing all results
107 | ```bash
108 | cat results_it_*.csv > results_all.csv
109 | ```
110 |
111 | ## Run Plot.Rmd on the files that are produced
112 | See Plot.Rmd and our data under the results folder
113 | ```bash
114 | ls $learn2fix/results
115 | ```
116 |
--------------------------------------------------------------------------------
/incal/__init__.py:
--------------------------------------------------------------------------------
1 | from pysmt.shortcuts import Real
2 | from pywmi.domain import Density
3 |
4 |
5 | class Formula(Density):
6 | def __init__(self, domain, support):
7 | super().__init__(domain, support, Real(1))
8 |
9 | @classmethod
10 | def from_state(cls, state: dict):
11 | density = Density.from_state(state)
12 | return cls(density.domain, density.support)
13 |
--------------------------------------------------------------------------------
/incal/cli.py:
--------------------------------------------------------------------------------
1 | from pywmi.smt_print import pretty_print
2 |
3 | from .learn import LearnOptions
4 |
5 |
6 | def main():
7 | formula, k, h = LearnOptions().execute_from_command_line("Learn SMT(LRA) theories from data")
8 | print("Learned formula (k={k}, h={h}): {f}".format(f=pretty_print(formula), k=k, h=h))
9 |
10 |
--------------------------------------------------------------------------------
/incal/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/experiments/__init__.py
--------------------------------------------------------------------------------
/incal/experiments/analyze.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import json
3 | import os
4 |
5 | from typing import List
6 |
7 | import numpy as np
8 | import pickledb
9 | from pywmi import RejectionEngine, nested_to_smt, import_domain
10 | from pywmi.domain import Density, Domain
11 |
12 | from .prepare import select_benchmark_files, benchmark_filter, get_synthetic_db
13 | from incal.util.options import Experiment
14 | from incal.util import analyze as show
15 |
16 | from .learn import get_experiment
17 |
18 | import pysmt.shortcuts as smt
19 | import pysmt.environment
20 |
21 |
22 | class Properties(object):
23 | bounds = dict()
24 | db = None
25 |
26 | @staticmethod
27 | def to_name(filename):
28 | return filename[filename.find("QF_LRA"):filename.find("smt2")+4]
29 |
30 | @staticmethod
31 | def to_sample_name(filename):
32 | return filename[filename.find("QF_LRA"):]
33 |
34 | @staticmethod
35 | def to_synthetic_name(filename):
36 | parts = os.path.basename(filename).split(".")
37 | return parts[0]
38 |
39 | @staticmethod
40 | def compute(experiments):
41 | Properties.db = pickledb.load('example.db', True)
42 | if Properties.db.exists("bounds"):
43 | Properties.bounds = Properties.db.get("bounds")
44 | else:
45 | used_names = {Properties.to_sample_name(e.parameters.original_values["data"]) for e in experiments}
46 | names_to_bounds = dict()
47 | summary_file = "remote_res/smt_lib_benchmark/qf_lra_summary.pickle"
48 | for name, entry, density_filename in select_benchmark_files(benchmark_filter, summary_file):
49 | if "samples" in entry:
50 | for s in entry["samples"]:
51 | name = Properties.to_sample_name(s["samples_filename"])
52 | if name in used_names:
53 | names_to_bounds[name] = s["bounds"]
54 | Properties.bounds = names_to_bounds
55 | Properties.db.set("bounds", Properties.bounds)
56 |
57 | @staticmethod
58 | def get_bound(experiment):
59 | return Properties.bounds[Properties.to_sample_name(experiment.parameters.original_values["data"])]
60 |
61 | @staticmethod
62 | def get_db_synthetic(experiment):
63 | return get_synthetic_db(os.path.dirname(experiment.parameters.original_values["domain"]))
64 |
65 | @staticmethod
66 | def original_k(experiment):
67 | db = Properties.get_db_synthetic(experiment)
68 | name = Properties.to_synthetic_name(experiment.imported_from_file)
69 | return db.get(name)["generation"]["k"]
70 |
71 | @staticmethod
72 | def original_h(experiment):
73 | db = Properties.get_db_synthetic(experiment)
74 | name = Properties.to_synthetic_name(experiment.imported_from_file)
75 | return db.get(name)["generation"]["h"]
76 |
77 | @staticmethod
78 | def original_l(experiment):
79 | db = Properties.get_db_synthetic(experiment)
80 | name = Properties.to_synthetic_name(experiment.imported_from_file)
81 | return db.get(name)["generation"]["l"]
82 |
83 | @staticmethod
84 | def executed(experiment):
85 | return 1 if experiment.results.duration is not None else 0
86 |
87 | @staticmethod
88 | def positive_ratio(experiment):
89 | labels = np.load(experiment.parameters.original_values["labels"])
90 | return sum(labels) / len(labels)
91 |
92 | @staticmethod
93 | def accuracy_approx(experiment):
94 | key = "accuracy_approx:{}".format(experiment.imported_from_file)
95 | if Properties.db.exists(key):
96 | return Properties.db.get(key)
97 | else:
98 | pysmt.environment.push_env()
99 | pysmt.environment.get_env().enable_infix_notation = True
100 | if os.path.basename(experiment.imported_from_file).startswith("synthetic"):
101 | db = Properties.get_db_synthetic(experiment)
102 | name = Properties.to_synthetic_name(experiment.imported_from_file)
103 | entry = db.get(name)
104 | domain = import_domain(json.loads(entry["domain"]))
105 | true_formula = nested_to_smt(entry["formula"])
106 | else:
107 | density = Density.import_from(experiment.parameters.original_values["domain"])
108 | domain = Domain(density.domain.variables, density.domain.var_types, Properties.get_bound(experiment))
109 | true_formula = density.support
110 | learned_formula = nested_to_smt(experiment.results.formula)
111 | engine = RejectionEngine(domain, smt.TRUE(), smt.Real(1.0), 100000)
112 | accuracy = engine.compute_probability(smt.Iff(true_formula, learned_formula))
113 | pysmt.environment.pop_env()
114 | print(accuracy)
115 | Properties.db.set(key, accuracy)
116 | return accuracy
117 |
118 |
119 | def register_derived(experiment):
120 | experiment.register_derived("accuracy_approx", Properties.accuracy_approx)
121 | experiment.register_derived("original_h", Properties.original_h)
122 | experiment.register_derived("original_l", Properties.original_l)
123 | experiment.register_derived("original_k", Properties.original_k)
124 | experiment.register_derived("executed", Properties.executed)
125 | experiment.register_derived("pos_rate", Properties.positive_ratio)
126 | return experiment
127 |
128 |
129 | def analyze(results_directories, res_path, show_args):
130 | experiments = [] # type: List[Experiment]
131 | for results_directory in results_directories:
132 | for filename in glob.glob("{}/**/*.result".format(results_directory), recursive=True):
133 | log_file = filename.replace(".result", ".log")
134 | if not os.path.exists(log_file):
135 | log_file = None
136 | experiment = get_experiment(res_path).load(filename)
137 | experiments.append(register_derived(experiment))
138 |
139 | Properties.compute(experiments)
140 | show.show(experiments, *show_args)
141 |
--------------------------------------------------------------------------------
/incal/experiments/cli.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import numpy as np
4 | from pywmi.smt_print import pretty_print
5 |
6 | from .learn import learn_benchmark, get_experiment, learn_synthetic
7 | from .prepare import prepare_smt_lib_benchmark, prepare_ratios, prepare_samples, prepare_synthetic
8 | from incal.learn import LearnOptions
9 | from . import examples
10 | from .analyze import analyze
11 | from incal.util import analyze as show
12 |
13 |
14 | def main():
15 | smt_lib_name = "smt-lib-benchmark"
16 | synthetic_name = "synthetic"
17 | parser = argparse.ArgumentParser(description="Interface with benchmark or synthetic data for experiments")
18 |
19 | parser.add_argument("source")
20 | parser.add_argument("--sample_size", type=int, default=None)
21 | parser.add_argument("--runs", type=int, default=None)
22 | parser.add_argument("--input_dir", type=str, default=None)
23 | parser.add_argument("--output_dir", type=str, default=None)
24 | parser.add_argument("--processes", type=int, default=None)
25 | parser.add_argument("--time_out", type=int, default=None)
26 |
27 | task_parsers = parser.add_subparsers(dest="task")
28 | prepare_parser = task_parsers.add_parser("prepare")
29 | prepare_parser.add_argument("--reset_samples", type=bool, default=False)
30 | learn_parser = task_parsers.add_parser("learn")
31 | analyze_parser = task_parsers.add_parser("analyze")
32 | analyze_parser.add_argument("--dirs", nargs="+", type=str)
33 | analyze_parser.add_argument("--res_path", type=str, default=None)
34 |
35 | show_parsers = analyze_parser.add_subparsers()
36 | show_parser = show_parsers.add_parser("show")
37 | show.add_arguments(show_parser)
38 |
39 | learn_options = LearnOptions()
40 | learn_options.add_arguments(learn_parser)
41 |
42 | args = parser.parse_args()
43 | if args.task == "prepare":
44 | if args.source == smt_lib_name:
45 | prepare_smt_lib_benchmark()
46 | prepare_ratios()
47 | prepare_samples(args.runs, args.sample_size, args.reset_samples)
48 | elif args.source == synthetic_name:
49 | prepare_synthetic(args.input_dir, args.output_dir, args.runs, args.sample_size)
50 | elif args.task == "learn":
51 | learn_options.parse_arguments(args)
52 | if args.source == smt_lib_name:
53 | learn_benchmark(args.runs, args.sample_size, args.processes, args.time_out, learn_options)
54 | elif args.source == synthetic_name:
55 | learn_synthetic(args.input_dir, args.output_dir, args.runs, args.sample_size, args.processes, args.time_out, learn_options)
56 | elif args.source.startswith("ex"):
57 | example_name = args.source.split(":", 1)[1]
58 | domain, formula = examples.get_by_name(example_name)
59 | np.random.seed(1)
60 | from pywmi.sample import uniform
61 | samples = uniform(domain, args.sample_size)
62 | from pywmi import evaluate
63 | labels = evaluate(domain, formula, samples)
64 | learn_options.set_value("domain", domain, False)
65 | learn_options.set_value("data", samples, False)
66 | learn_options.set_value("labels", labels, False)
67 | (formula, k, h), duration = learn_options.call(True)
68 | print("[{:.2f}s] Learned formula (k={}, h={}): {}".format(duration, k, h, pretty_print(formula)))
69 | elif args.task == "analyze":
70 | analyze(args.dirs, args.res_path, show.parse_args(args))
71 |
72 |
73 |
74 | if __name__ == "__main__":
75 | main()
76 |
--------------------------------------------------------------------------------
/incal/experiments/examples.py:
--------------------------------------------------------------------------------
1 | from pywmi import Domain
2 | from pysmt.shortcuts import REAL, Or, And, LE, Real, Symbol, BOOL, GT, Not, Plus, Times, GE
3 |
4 |
5 | def xy_domain():
6 | variables = ["x", "y"]
7 | var_types = {"x": REAL, "y": REAL}
8 | var_domains = {"x": (0, 1), "y": (0, 1)}
9 | return Domain(variables, var_types, var_domains)
10 |
11 |
12 | def simple_checker_problem():
13 | theory = Or(
14 | And(LE(Symbol("x", REAL), Real(0.5)), LE(Symbol("y", REAL), Real(0.5))),
15 | And(GT(Symbol("x", REAL), Real(0.5)), GT(Symbol("y", REAL), Real(0.5)))
16 | )
17 |
18 | return xy_domain(), theory, "simple_checker"
19 |
20 |
21 | def simple_checker_problem_cnf():
22 | x, y = (Symbol(n, REAL) for n in ["x", "y"])
23 | theory = ((x <= 0.5) | (y > 0.5)) & ((x > 0.5) | (y <= 0.5))
24 | return xy_domain(), theory, "simple_cnf_checker"
25 |
26 |
27 | def checker_problem():
28 | variables = ["x", "y", "a"]
29 | var_types = {"x": REAL, "y": REAL, "a": BOOL}
30 | var_domains = {"x": (0, 1), "y": (0, 1)}
31 |
32 | theory = Or(
33 | And(LE(Symbol("x", REAL), Real(0.5)), LE(Symbol("y", REAL), Real(0.5)), Symbol("a", BOOL)),
34 | And(GT(Symbol("x", REAL), Real(0.5)), GT(Symbol("y", REAL), Real(0.5)), Symbol("a", BOOL)),
35 | And(GT(Symbol("x", REAL), Real(0.5)), LE(Symbol("y", REAL), Real(0.5)), Not(Symbol("a", BOOL))),
36 | And(LE(Symbol("x", REAL), Real(0.5)), GT(Symbol("y", REAL), Real(0.5)), Not(Symbol("a", BOOL)))
37 | )
38 |
39 | return Domain(variables, var_types, var_domains), theory, "checker"
40 |
41 |
42 | def simple_univariate_problem():
43 | variables = ["x"]
44 | var_types = {"x": REAL}
45 | var_domains = {"x": (0, 1)}
46 |
47 | theory = LE(Symbol("x", REAL), Real(0.6))
48 |
49 | return Domain(variables, var_types, var_domains), theory, "one_test"
50 |
51 |
52 | def shared_hyperplane_problem():
53 | domain = xy_domain()
54 | x, y = (domain.get_symbol(v) for v in ["x", "y"])
55 | # y <= -x + 1.25
56 | shared1 = LE(y, Plus(Times(Real(-1.0), x), Real(1.25)))
57 | # y >= -x + 0.75
58 | shared2 = GE(y, Plus(Times(Real(-1.0), x), Real(0.75)))
59 |
60 | # y <= x + 0.5
61 | h1 = LE(y, Plus(x, Real(0.5)))
62 | # y >= x + 0.25
63 | h2 = GE(y, Plus(x, Real(0.25)))
64 |
65 | # y <= x - 0.25
66 | h3 = LE(y, Plus(x, Real(-0.25)))
67 | # y >= x - 0.5
68 | h4 = GE(y, Plus(x, Real(-0.5)))
69 | return domain, Or(And(shared1, shared2, h1, h2), And(shared1, shared2, h3, h4)), "shared"
70 |
71 |
72 | def cross_problem():
73 | domain = xy_domain()
74 | x, y = (domain.get_symbol(v) for v in ["x", "y"])
75 | top = y <= 0.9
76 | middle_top = y <= 0.7
77 | middle_bottom = y >= 0.5
78 | bottom = y >= 0.1
79 |
80 | left = x >= 0.2
81 | middle_left = x >= 0.4
82 | middle_right = x <= 0.6
83 | right = x <= 0.8
84 | theory = (top & middle_left & middle_right & bottom) | (left & middle_top & middle_bottom & right)
85 | return domain, theory, "cross"
86 |
87 |
88 | def bool_xor_problem():
89 | variables = ["a", "b"]
90 | var_types = {"a": BOOL, "b": BOOL}
91 | var_domains = dict()
92 | domain = Domain(variables, var_types, var_domains)
93 |
94 | a, b = (domain.get_symbol(v) for v in variables)
95 |
96 | theory = (a & ~b) | (~a & b)
97 | return domain, theory, "2xor"
98 |
99 |
100 | def ice_cream_problem():
101 | variables = ["chocolate", "banana", "weekend"]
102 | chocolate, banana, weekend = variables
103 | var_types = {chocolate: REAL, banana: REAL, weekend: BOOL}
104 | var_domains = {chocolate: (0, 1), banana: (0, 1)}
105 | domain = Domain(variables, var_types, var_domains)
106 |
107 | chocolate, banana, weekend = (domain.get_symbol(v) for v in variables)
108 | theory = (chocolate < 0.650) \
109 | & (banana < 0.550) \
110 | & (chocolate + 0.7 * banana <= 0.700) \
111 | & (chocolate + 1.2 * banana <= 0.750) \
112 | & (~weekend | (chocolate + 0.7 * banana <= 0.340))
113 |
114 | return domain, theory, "ice_cream"
115 |
116 |
117 | def get_all():
118 | return [
119 | simple_checker_problem(),
120 | simple_checker_problem_cnf(),
121 | checker_problem(),
122 | simple_univariate_problem(),
123 | shared_hyperplane_problem(),
124 | cross_problem(),
125 | bool_xor_problem(),
126 | ice_cream_problem(),
127 | ]
128 |
129 |
130 | def get_by_name(name):
131 | for t in get_all():
132 | if t[2] == name:
133 | return t[0], t[1]
134 |
--------------------------------------------------------------------------------
/incal/experiments/find_hyperplanes.py:
--------------------------------------------------------------------------------
1 | from pywmi import SmtWalker, smt_to_nested
2 |
3 |
4 | class HalfSpaceWalker(SmtWalker):
5 | def __init__(self):
6 | self.half_spaces = set()
7 |
8 | def walk_and(self, args):
9 | self.walk_smt_multiple(args)
10 |
11 | def walk_or(self, args):
12 | self.walk_smt_multiple(args)
13 |
14 | def walk_plus(self, args):
15 | self.walk_smt_multiple(args)
16 |
17 | def walk_minus(self, left, right):
18 | self.walk_smt_multiple([left, right])
19 |
20 | def walk_times(self, args):
21 | self.walk_smt_multiple(args)
22 |
23 | def walk_not(self, argument):
24 | self.walk_smt_multiple([argument])
25 |
26 | def walk_ite(self, if_arg, then_arg, else_arg):
27 | self.walk_smt_multiple([if_arg, then_arg, else_arg])
28 |
29 | def walk_pow(self, base, exponent):
30 | self.walk_smt_multiple([base, exponent])
31 |
32 | def walk_lte(self, left, right):
33 | self.half_spaces.add(smt_to_nested(left <= right))
34 |
35 | def walk_lt(self, left, right):
36 | self.half_spaces.add(smt_to_nested(left < right))
37 |
38 | def walk_equals(self, left, right):
39 | self.walk_smt_multiple([left, right])
40 |
41 | def walk_symbol(self, name, v_type):
42 | pass
43 |
44 | def walk_constant(self, value, v_type):
45 | pass
46 |
47 | def find_half_spaces(self, formula):
48 | self.walk_smt(formula)
49 | return list(self.half_spaces)
50 |
--------------------------------------------------------------------------------
/incal/experiments/find_operators.py:
--------------------------------------------------------------------------------
1 | import pywmi
2 | from pywmi import SmtWalker
3 |
4 |
5 | class OperatorWalker(SmtWalker):
6 | def __init__(self):
7 | self.operators = set()
8 |
9 | def walk_and(self, args):
10 | self.operators.add("&")
11 | self.walk_smt_multiple(args)
12 |
13 | def walk_or(self, args):
14 | self.operators.add("|")
15 | self.walk_smt_multiple(args)
16 |
17 | def walk_plus(self, args):
18 | self.operators.add("+")
19 | self.walk_smt_multiple(args)
20 |
21 | def walk_minus(self, left, right):
22 | self.operators.add("-")
23 | self.walk_smt_multiple([left, right])
24 |
25 | def walk_times(self, args):
26 | self.operators.add("*")
27 | self.walk_smt_multiple(args)
28 |
29 | def walk_not(self, argument):
30 | self.operators.add("~")
31 | self.walk_smt_multiple([argument])
32 |
33 | def walk_ite(self, if_arg, then_arg, else_arg):
34 | self.operators.add("ite")
35 | self.walk_smt_multiple([if_arg, then_arg, else_arg])
36 |
37 | def walk_pow(self, base, exponent):
38 | self.operators.add("^")
39 | self.walk_smt_multiple([base, exponent])
40 |
41 | def walk_lte(self, left, right):
42 | self.operators.add("<=")
43 | self.walk_smt_multiple([left, right])
44 |
45 | def walk_lt(self, left, right):
46 | self.operators.add("<")
47 | self.walk_smt_multiple([left, right])
48 |
49 | def walk_equals(self, left, right):
50 | self.operators.add("=")
51 | self.walk_smt_multiple([left, right])
52 |
53 | def walk_symbol(self, name, v_type):
54 | pass
55 |
56 | def walk_constant(self, value, v_type):
57 | pass
58 |
59 | def find_operators(self, formula):
60 | self.walk_smt(formula)
61 | return list(self.operators)
62 |
--------------------------------------------------------------------------------
/incal/experiments/learn.py:
--------------------------------------------------------------------------------
1 | import glob
2 | import os
3 | import random
4 | import warnings
5 |
6 | from .prepare import get_synthetic_db
7 | from incal.util.options import Options, Experiment
8 |
9 | from incal.learn import LearnOptions, LearnResults
10 | from incal.util.parallel import run_commands
11 | from .prepare import select_benchmark_files, benchmark_filter, get_benchmark_results_dir
12 |
13 |
14 | def get_bound_volume(bounds):
15 | size = 1
16 | for ub_lb in bounds.values():
17 | size *= ub_lb[1] - ub_lb[0]
18 | return size
19 |
20 |
21 | def rel_ratio(ratio):
22 | return abs(0.5 - ratio)
23 |
24 |
25 | def learn_synthetic(input_directory, output_directory, runs, sample_size, processes, time_out, learn_options: LearnOptions):
26 | commands = []
27 |
28 | db = get_synthetic_db(input_directory)
29 | for name in db.getall():
30 | entry = db.get(name)
31 | matching_samples = []
32 | for sample in entry["samples"]:
33 | if sample["sample_size"] == sample_size and len(matching_samples) < runs:
34 | matching_samples.append(sample)
35 | if len(matching_samples) != runs:
36 | raise RuntimeError("Insufficient samples available, prepare more samples first")
37 |
38 | for sample in matching_samples:
39 | detail_learn_options = learn_options.copy()
40 | detail_learn_options.domain = os.path.join(input_directory, "{}.density".format(name))
41 | detail_learn_options.data = os.path.join(input_directory, sample["samples_file"])
42 | detail_learn_options.labels = os.path.join(input_directory, sample["labels_file"])
43 |
44 | export_file = "{}{sep}{}.{}.{}.result" \
45 | .format( output_directory, name, sample_size, sample["seed"], sep=os.path.sep)
46 | log_file = "{}{sep}{}.{}.{}.log" \
47 | .format(output_directory, name, sample_size, sample["seed"], sep=os.path.sep)
48 |
49 | if not os.path.exists(os.path.dirname(export_file)):
50 | os.makedirs(os.path.dirname(export_file))
51 |
52 | commands.append("incal-track {} --export {} --log {}"
53 | .format(detail_learn_options.print_arguments(), export_file, log_file))
54 |
55 | run_commands(commands, processes, time_out)
56 |
57 |
58 | def learn_benchmark(runs, sample_size, processes, time_out, learn_options: LearnOptions):
59 | # def filter1(entry):
60 | # return "real_variables_count" in entry and entry["real_variables_count"] + entry["bool_variables_count"] <= 10
61 | #
62 | # count = 0
63 | # boolean = 0
64 | # for name, entry, density_filename in select_benchmark_files(filter1):
65 | # if entry["bool_variables_count"] > 0:
66 | # boolean += 1
67 | # count += 1
68 | #
69 | # print("{} / {}".format(boolean, count))
70 | #
71 | # count = 0
72 | # boolean = 0
73 | # for name, entry, density_filename in select_benchmark_files(benchmark_filter):
74 | # if entry["bool_variables_count"] > 0:
75 | # boolean += 1
76 | # count += 1
77 | #
78 | # print("{} / {}".format(boolean, count))
79 |
80 | def learn_filter(_e):
81 | return benchmark_filter(_e) and "samples" in _e
82 |
83 | count = 0
84 | problems_to_learn = []
85 | for name, entry, density_filename in select_benchmark_files(learn_filter):
86 | if len(entry["bounds"]) > 0:
87 | best_ratio = min(rel_ratio(t[1]) for t in entry["bounds"])
88 | if best_ratio <= 0.3:
89 | qualifying = [t for t in entry["bounds"] if rel_ratio(t[1]) <= 0.3 and abs(rel_ratio(t[1]) - best_ratio) <= best_ratio / 5]
90 | selected = sorted(qualifying, key=lambda x: get_bound_volume(x[0]))[0]
91 | print(name, "\n", rel_ratio(selected[1]), best_ratio, selected[0], entry["bool_variables_count"])
92 | count += 1
93 | selected_samples = [s for s in entry["samples"]
94 | if s["bounds"] == selected[0] and s["sample_size"] >= sample_size]
95 | if len(selected_samples) < runs:
96 | raise RuntimeError("Insufficient number of data set available ({} of {})"
97 | .format(len(selected_samples), runs))
98 | elif len(selected_samples) > runs:
99 | selected_samples = selected_samples[:runs]
100 | for selected_sample in selected_samples:
101 | problems_to_learn.append((name, density_filename, selected_sample))
102 |
103 | commands = []
104 | for name, density_filename, selected_sample in problems_to_learn:
105 | detail_learn_options = learn_options.copy()
106 | detail_learn_options.domain = density_filename
107 | detail_learn_options.data = selected_sample["samples_filename"]
108 | detail_learn_options.labels = selected_sample["labels_filename"]
109 | export_file = "{}{sep}{}.{}.{}.result".format(get_benchmark_results_dir(), name, selected_sample["sample_size"],
110 | selected_sample["seed"], sep=os.path.sep)
111 | log_file = "{}{sep}{}.{}.{}.log".format(get_benchmark_results_dir(), name, selected_sample["sample_size"],
112 | selected_sample["seed"], sep=os.path.sep)
113 | if not os.path.exists(os.path.dirname(export_file)):
114 | os.makedirs(os.path.dirname(export_file))
115 | commands.append("incal-track {} --export {} --log {}"
116 | .format(detail_learn_options.print_arguments(), export_file, log_file))
117 |
118 | run_commands(commands, processes, time_out)
119 |
120 |
121 | def get_experiment(res_path=None):
122 | def import_handler(parameters_dict, results_dict, config_dict):
123 | for key, entry in parameters_dict.items():
124 | if isinstance(entry, str):
125 | index = entry.find("res/")
126 | if index >= 0:
127 | parameters_dict[key] = res_path + os.path.sep + entry[index+4:]
128 |
129 | config = Options()
130 | config.add_option("export", str)
131 | return Experiment(LearnOptions(), LearnResults(), config, import_handler if res_path else None)
132 |
133 |
134 | def track():
135 | with warnings.catch_warnings():
136 | warnings.simplefilter("ignore")
137 | experiment = get_experiment()
138 | experiment.import_from_command_line()
139 | experiment.save(experiment.config.export)
140 | experiment.execute()
141 | experiment.save(experiment.config.export)
142 |
--------------------------------------------------------------------------------
/incal/extra/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/extra/__init__.py
--------------------------------------------------------------------------------
/incal/extra/api.py:
--------------------------------------------------------------------------------
1 | import argparse
2 |
3 | import os
4 |
5 | import math
6 |
7 |
8 | if __name__ == "__main__":
9 | def parse_args():
10 | parser = argparse.ArgumentParser()
11 | subparsers = parser.add_subparsers(dest="mode")
12 |
13 | scan_parser = subparsers.add_parser("scan", help="Scan the directory and load smt problems")
14 | scan_parser.add_argument("-d", "--dir", default=None, help="Specify the directory to load files from")
15 |
16 | learn_parser = subparsers.add_parser("learn", help="Learn SMT formulas")
17 | learn_parser.add_argument("dir", help="Specify the results directory")
18 | learn_parser.add_argument("-s", "--samples", type=int, help="Specify the number of samples for learning")
19 | learn_parser.add_argument("-a", "--all", default=False, action="store_true",
20 | help="If set, learning will not use incremental mode and include all examples")
21 | learn_parser.add_argument("-d", "--dnf", default=False, action="store_true",
22 | help="If set, learning bias is DNF instead of CNF")
23 |
24 | table_parser = subparsers.add_parser("table", help="Types can be: [time, k, h, id, acc, samples, l]")
25 | table_parser.add_argument("row_key", help="Specify the row key type")
26 | table_parser.add_argument("col_key", default=None, help="Specify the col key type")
27 | table_parser.add_argument("value", default=None, help="Specify the value type")
28 | table_parser.add_argument("dirs", nargs="*", help="Specify the directories to load files from, always in pairs:"
29 | "result_dir, data_dir")
30 |
31 | table_subparsers = table_parser.add_subparsers(dest="command")
32 | table_print_parser = table_subparsers.add_parser("print", help="Print the table")
33 | table_print_parser.add_argument("-d", "--delimiter", default="\t", help="Specify the delimiter (default=tab)")
34 | table_print_parser.add_argument("-a", "--aggregate", default=False, action="store_true",
35 | help="Aggregate the rows in the plot")
36 |
37 | table_plot_parser = table_subparsers.add_parser("plot", help="Plot the table")
38 | table_plot_parser.add_argument("-a", "--aggregate", default=False, action="store_true",
39 | help="Aggregate the rows in the plot")
40 | table_plot_parser.add_argument("--y_min", default=None, type=float, help="Minimum value for y")
41 | table_plot_parser.add_argument("--y_max", default=None, type=float, help="Maximum value for y")
42 | table_plot_parser.add_argument("--x_min", default=None, type=float, help="Minimum value for x")
43 | table_plot_parser.add_argument("--x_max", default=None, type=float, help="Maximum value for x")
44 | table_plot_parser.add_argument("--legend_pos", default=None, type=str, help="Legend position")
45 | table_plot_parser.add_argument("-o", "--output", default=None, help="Specify the output file")
46 |
47 | combine_parser = subparsers.add_parser("combine", help="Combine multiple results directories")
48 | combine_parser.add_argument("output_dir", help="The output directory to summarize results in")
49 | combine_parser.add_argument("input_dirs", nargs="*", help="Specify the directories to combine")
50 | combine_parser.add_argument("-b", "--bias", default=None, help="Specify the bias")
51 | combine_parser.add_argument("-p", "--prefix", default=None, help="Specify the prefix for input dirs")
52 |
53 | gen_parser = subparsers.add_parser("generate", help="Generate synthetic examples")
54 | gen_parser.add_argument("data_dir")
55 | gen_parser.add_argument("-n", "--data_sets", default=10, type=int)
56 | gen_parser.add_argument("--prefix", default="synthetics")
57 | gen_parser.add_argument("-b", "--bool_count", default=2, type=int)
58 | gen_parser.add_argument("-r", "--real_count", default=2, type=int)
59 | gen_parser.add_argument("--bias", default="cnf")
60 | gen_parser.add_argument("-k", "--k", default=3, type=int)
61 | gen_parser.add_argument("-l", "--literals", default=4, type=int)
62 | gen_parser.add_argument("--half_spaces", default=7, type=int)
63 | gen_parser.add_argument("-s", "--samples", default=1000, type=int)
64 | gen_parser.add_argument("--ratio", default=90, type=int)
65 | gen_parser.add_argument("-p", "--plot_dir", default=None)
66 | gen_parser.add_argument("-e", "--errors", default=0, type=int)
67 |
68 | migration_parser = subparsers.add_parser("migrate", help="Migrate files to newer or extended versions")
69 | migration_subparsers = migration_parser.add_subparsers(dest="type")
70 |
71 | migration_fix_parser = migration_subparsers.add_parser("fix", help="Fix result files")
72 | migration_fix_parser.add_argument("results_dir", help="Specify the result directory")
73 | migration_fix_parser.add_argument("-b", "--bias", default=None, help="Specify the bias")
74 |
75 | migration_acc_parser = migration_subparsers.add_parser("accuracy", help="Add accuracy to result files")
76 | migration_acc_parser.add_argument("results_dir", help="Specify the result directory")
77 | migration_acc_parser.add_argument("-d", "--data_dir", help="Specify the data directory for synthetic problems")
78 | migration_acc_parser.add_argument("-s", "--samples", default=None, help="Specify the number of samples", type=int)
79 | migration_acc_parser.add_argument("-f", "--force", default=False, action="store_true", help="Overwrites existing values")
80 |
81 | migration_ratio_parser = migration_subparsers.add_parser("ratio", help="Add ratio to result files")
82 | migration_ratio_parser.add_argument("results_dir", help="Specify the result directory")
83 | migration_ratio_parser.add_argument("-d", "--data_dir", help="Specify the data directory for synthetic problems")
84 | migration_ratio_parser.add_argument("-s", "--samples", default=None, help="Specify the number of samples", type=int)
85 | migration_ratio_parser.add_argument("-f", "--force", default=False, action="store_true", help="Overwrites existing values")
86 |
87 | args = parser.parse_args()
88 |
89 | if args.mode == "scan":
90 | full_dir = os.path.abspath(args.filename)
91 | root_dir = os.path.dirname(full_dir)
92 |
93 | import smt_scan
94 | smt_scan.scan(full_dir, root_dir)
95 | smt_scan.analyze(root_dir)
96 | smt_scan.ratios()
97 | elif args.mode == "learn":
98 | import smt_scan
99 | smt_scan.learn(args.samples, args.dir, args.all, args.dnf)
100 | elif args.mode == "table":
101 | import smt_scan
102 | table = smt_scan.TableMaker(args.row_key, args.col_key, args.value)
103 | for i in range(int(math.floor(len(args.dirs) / 3))):
104 | table.load_table(args.dirs[3 * i], args.dirs[3 * i + 1], args.dirs[3 * i + 2])
105 | if args.command == "print":
106 | table.delimiter = args.delimiter
107 | print(table.to_txt(0, args.aggregate))
108 | elif args.command == "plot":
109 | table.plot_table(args.output, None if args.aggregate else 0, args.y_min, args.y_max, args.x_min, args.x_max, args.legend_pos)
110 | else:
111 | print("Error: unknown table command {}".format(args.command))
112 | elif args.mode == "combine":
113 | import combine_results
114 | combine_results.combine(args.output_dir, args.input_dirs, args.bias, args.prefix)
115 | elif args.mode == "generate":
116 | from generator import generate_random
117 | generate_random(args.data_sets, args.prefix, args.bool_count, args.real_count, args.bias, args.k,
118 | args.literals, args.half_spaces, args.samples, args.ratio, args.errors, args.data_dir,
119 | args.plot_dir)
120 | elif args.mode == "migrate":
121 | import migrate
122 | if args.type == "fix":
123 | migrate.migrate_results(args.results_dir, args.bias)
124 | elif args.type == "accuracy":
125 | migrate.add_accuracy(args.results_dir, args.data_dir, args.samples, args.force)
126 | elif args.type == "ratio":
127 | migrate.add_ratio(args.results_dir, args.data_dir, args.samples, args.force)
128 | else:
129 | print("Error: unknown migration type {}".format(args.type))
130 | else:
131 | print("Error: unknown mode {}".format(args.mode))
132 |
133 |
134 | parse_args()
135 |
--------------------------------------------------------------------------------
/incal/extra/combine_results.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import filecmp
3 | import fnmatch
4 | import json
5 |
6 | import os
7 | import shutil
8 |
9 | import migrate
10 |
11 |
12 | def combine(output_dir, dirs, bias=None, prefix=None):
13 | if not os.path.exists(output_dir):
14 | os.makedirs(output_dir)
15 |
16 | migrate.migrate_results(output_dir, bias)
17 |
18 | summary = os.path.join(output_dir, "problems.txt")
19 | if not os.path.isfile(summary):
20 | flat = {}
21 | else:
22 | with open(summary, "r") as f:
23 | flat = json.load(f)
24 |
25 | if prefix is not None:
26 | dirs = [str(prefix) + str(directory) for directory in dirs]
27 |
28 | for input_dir in dirs:
29 | migrate.migrate_results(input_dir, bias)
30 | input_summary = os.path.join(input_dir, "problems.txt")
31 | with open(input_summary, "r") as f:
32 | input_flat = json.load(f)
33 | for problem_id in input_flat:
34 | if problem_id not in flat:
35 | flat[problem_id] = {}
36 | for sample_size in input_flat[problem_id]:
37 | if sample_size not in flat[problem_id]:
38 | flat[problem_id][sample_size] = input_flat[problem_id][sample_size]
39 | else:
40 | raise RuntimeError("Attempting to overwrite sample size {} for problem {} from file {}"
41 | .format(sample_size, problem_id, input_summary))
42 |
43 | for input_dir in dirs:
44 | for input_file in os.listdir(input_dir):
45 | if fnmatch.fnmatch(input_file, '*.learning_log.txt'):
46 | old_file = os.path.join(input_dir, input_file)
47 | new_file = os.path.join(output_dir, input_file)
48 | if not os.path.isfile(new_file):
49 | shutil.copy(old_file, new_file)
50 | else:
51 | if not filecmp.cmp(old_file, new_file):
52 | raise RuntimeError("Attempting to overwrite {} with {}".format(new_file, old_file))
53 |
54 | with open(summary, "w") as f:
55 | json.dump(flat, f)
56 |
57 |
58 | def parse():
59 | parser = argparse.ArgumentParser()
60 | parser.add_argument("output_dir")
61 | parser.add_argument("dirs", nargs="*")
62 | parser.add_argument("-b", "--bias", default=None, help="Specify the bias")
63 | parser.add_argument("-p", "--prefix", default=None, help="Specify the prefix for input dirs")
64 | parsed = parser.parse_args()
65 | combine(parsed.output_dir, parsed.dirs, parsed.bias, parsed.prefix)
66 |
67 |
68 | if __name__ == "__main__":
69 | parse()
70 |
--------------------------------------------------------------------------------
/incal/extra/demo.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division
2 |
3 | import argparse
4 | import hashlib
5 | import json
6 | import random
7 |
8 | import os
9 | import tempfile
10 |
11 | import time
12 |
13 | import problem
14 | import generator
15 | import parse
16 | import inc_logging
17 |
18 | from os.path import basename
19 |
20 | import pysmt.shortcuts as smt
21 |
22 | from incremental_learner import RandomViolationsStrategy
23 | from k_cnf_smt_learner import KCnfSmtLearner
24 | from parameter_free_learner import learn_bottom_up
25 |
26 |
27 | def learn(name, domain, h, data, seed):
28 | initial_size = 20
29 | violations_size = 10
30 | log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "demo", "results")
31 | problem_name = hashlib.sha256(name).hexdigest()
32 |
33 | def learn_inc(_data, _k, _h):
34 | violations_strategy = RandomViolationsStrategy(violations_size)
35 | learner = KCnfSmtLearner(_k, _h, violations_strategy)
36 | initial_indices = random.sample(list(range(len(data))), initial_size)
37 | log_file = os.path.join(log_dir, "{}_{}_{}.txt".format(problem_name, _k, _h))
38 | learner.add_observer(inc_logging.LoggingObserver(log_file, seed, True, violations_strategy))
39 | learned_theory = learner.learn(domain, data, initial_indices)
40 | # learned_theory = Or(*[And(*planes) for planes in hyperplane_dnf])
41 | print("Learned theory:\n{}".format(parse.smt_to_nested(learned_theory)))
42 | return learned_theory
43 |
44 | phi, k, h = learn_bottom_up(data, learn_inc, 1, 1, init_h=h, max_h=h)
45 |
46 | with open(os.path.join(log_dir, "problems.txt"), "a") as f:
47 | print(json.dumps({problem_name: name, "k": k, "h": h}), file=f)
48 |
49 |
50 | def main(filename, sample_count):
51 | seed = time.time()
52 | random.seed(seed)
53 |
54 | target_formula = smt.read_smtlib(filename)
55 |
56 | variables = target_formula.get_free_variables()
57 | var_names = [str(v) for v in variables]
58 | var_types = {str(v): v.symbol_type() for v in variables}
59 | var_domains = {str(v): (0, 200) for v in variables} # TODO This is a hack
60 |
61 | domain = problem.Domain(var_names, var_types, var_domains)
62 | name = basename(filename).split(".")[0]
63 | target_problem = problem.Problem(domain, target_formula, name)
64 |
65 | # compute_difference(domain, target_formula, target_formula)
66 |
67 | samples = generator.get_problem_samples(target_problem, sample_count, 1)
68 |
69 | initial_indices = random.sample(list(range(sample_count)), 20)
70 | learner = KCnfSmtLearner(3, 3, RandomViolationsStrategy(5))
71 |
72 | dir_name = "../output/{}".format(name)
73 | img_name = "{}_{}_{}".format(learner.name, sample_count, seed)
74 | # learner.add_observer(plotting.PlottingObserver(data_set.samples, dir_name, img_name, "r0", "r1"))
75 | with open("log.txt", "w") as f:
76 | learner.add_observer(inc_logging.LoggingObserver(f))
77 |
78 | print(parse.smt_to_nested(learner.learn(domain, samples, initial_indices)))
79 |
80 |
81 | def compute_difference(domain, target_theory, learned_theory):
82 | query = (target_theory & ~learned_theory) | (~target_theory & learned_theory)
83 | compute_wmi(domain, query, domain.variables)
84 |
85 |
86 | def compute_wmi(domain, query, variables):
87 | # os.environ["PATH"] += os.pathsep + "/Users/samuelkolb/Downloads/latte/dest/bin"
88 | # from sys import path
89 | # path.insert(0, "/Users/samuelkolb/Documents/PhD/wmi-pa/src")
90 | # from wmi import WMI
91 |
92 | # support = []
93 | # for v in domain.real_vars:
94 | # lb, ub = domain.var_domains[v]
95 | # sym = domain.get_symbol(v)
96 | # support.append((lb <= sym) & (sym <= ub))
97 | #
98 | # support = smt.And(*support)
99 | # wmi = WMI()
100 | # total_volume, _ = wmi.compute(support, 1, WMI.MODE_PA)
101 | # query_volume, _ = wmi.compute(support & query, 1, WMI.MODE_PA)
102 | # print(query_volume / total_volume)
103 |
104 | f = tempfile.NamedTemporaryFile(delete=False)
105 | try:
106 | flat = {
107 | "domain": problem.export_domain(domain, to_str=False),
108 | "query": parse.smt_to_nested(query),
109 | "variables": variables
110 | }
111 | json.dump(flat, f)
112 | with open("test.txt", "w") as f2:
113 | json.dump(flat, f2)
114 | f.close()
115 | finally:
116 | os.remove(f.name)
117 |
118 |
119 |
120 | if __name__ == "__main__":
121 | parser = argparse.ArgumentParser()
122 | parser.add_argument("filename")
123 | parser.add_argument("sample_count", type=int)
124 | args = parser.parse_args()
125 | main(args.filename, args.sample_count)
126 |
--------------------------------------------------------------------------------
/incal/extra/deploy.py:
--------------------------------------------------------------------------------
1 | import json
2 | import StringIO
3 | import os
4 | from os.path import join, dirname
5 |
6 | import sys
7 | from fabric.api import run, env, execute, cd, local, put, get, prefix, lcd
8 | from fabric.contrib import files
9 |
10 |
11 | def vary_synthetic_parameter(parameter_name, values, fixed_values, learner_settings, time_out=None, samples=None,
12 | exp_name=None, override=False):
13 | default_values = {
14 | "data_sets": 10,
15 | "bool_count": 2,
16 | "real_count": 2,
17 | "bias": "cnf",
18 | "k": 3,
19 | "literals": 4,
20 | "half_spaces": 7,
21 | "samples": 1000,
22 | "ratio": 90,
23 | "errors": 0,
24 | }
25 | for key, value in fixed_values.items():
26 | if key not in default_values:
27 | raise RuntimeError("Found unknown parameter name {}".format(key))
28 | default_values[key] = value
29 |
30 | del default_values[parameter_name]
31 |
32 | config = {"fixed": default_values, "vary": parameter_name, "values": values, "learner": learner_settings}
33 | if exp_name is None:
34 | exp_name = "h" + str(hash(json.dumps(config)) + sys.maxsize + 1)
35 |
36 | print(config)
37 |
38 | exp_path = join("synthetic", parameter_name, exp_name)
39 | local_root = dirname(dirname(__file__))
40 | full_gen = join(local_root, exp_path)
41 | full_out = join(local_root, "output", exp_path)
42 | full_code = join(local_root, "smtlearn")
43 | full_api = join(full_code, "api.py")
44 | full_exp = join(full_code, "experiments.py")
45 |
46 | # Generate
47 | gen_config = join(full_gen, "config.json")
48 | if override or not os.path.exists(gen_config):
49 | local("mkdir -p {}".format(full_gen))
50 |
51 | with open(gen_config, "w") as f:
52 | json.dump(config, f)
53 |
54 | commands = []
55 | for value in values:
56 | default_values[parameter_name] = value
57 | options = " ".join("--{} {}".format(name, val) for name, val in default_values.items())
58 | command = "python {api} generate {input}/{val} {options}" \
59 | .format(api=full_api, input=full_gen, val=value, options=options)
60 | commands.append(command)
61 | commands.append("wait")
62 |
63 | local(" & ".join(commands))
64 |
65 | # Learn
66 | out_config = join(full_out, "config.json")
67 | if override or not os.path.exists(out_config):
68 | local("mkdir -p {}".format(full_out))
69 |
70 | with open(out_config, "w") as f:
71 | json.dump(config, f)
72 |
73 | commands = []
74 | for value in values:
75 | options = " ".join("--{} {}".format(name, val) for name, val in learner_settings.items())
76 | command = "python {exp} {input}/{val} \"\" {output}/{val} {options}" \
77 | .format(exp=full_exp, input=full_gen, output=full_out, val=value, options=options)
78 | if time_out is not None:
79 | command += " -t {}".format(time_out)
80 | commands.append(command)
81 | commands.append("wait")
82 |
83 | local(" & ".join(commands))
84 |
85 | # Combine
86 | if override or not os.path.exists(join(full_out, "summary")):
87 | with lcd(full_gen):
88 | local("mkdir -p all")
89 | for value in values:
90 | local("cp {}/* all/".format(value))
91 |
92 | local("python {api} combine {output}/summary {values} -p {output}/"
93 | .format(api=full_api, output=full_out, values=" ".join(str(v) for v in values)))
94 |
95 | for migration in ["ratio", "accuracy"]:
96 | command = "python {api} migrate {migration} {output}/summary -d {input}/all" \
97 | .format(output=full_out, input=full_gen, values=" ".join(str(v) for v in values), api=full_api,
98 | migration=migration)
99 | if samples is not None:
100 | command += " -s {}".format(samples)
101 | local(command)
102 |
103 |
104 | def vary_h(time_out=None, samples=None, override=False):
105 | parameter = "half_spaces"
106 | values = [3, 4, 5, 6, 7, 8, 9, 10]
107 | fixed_values = {"data_sets": 100, "bool_count": 0, "real_count": 2, "k": 2, "literals": 3}
108 |
109 | learner = {"bias": "cnf", "selection": "random"}
110 | vary_synthetic_parameter(parameter, values, fixed_values, learner, time_out, samples, "standard", override)
111 |
112 | learner["selection"] = "dt_weighted"
113 | vary_synthetic_parameter(parameter, values, fixed_values, learner, time_out, samples, "dt", override)
114 |
115 |
116 | def vary_h_simple(time_out=None, samples=None):
117 | parameter_name = "half_spaces"
118 | values = [3, 4, 5, 6, 7, 8]
119 | fixed_values = {"data_sets": 10, "bool_count": 0, "real_count": 2, "k": 2, "literals": 3}
120 |
121 | learner = {"bias": "cnf", "selection": "random"}
122 | vary_synthetic_parameter(parameter_name, values, fixed_values, learner, time_out, samples, "small_standard")
123 |
124 | learner["selection_size"] = 1
125 | vary_synthetic_parameter(parameter_name, values, fixed_values, learner, time_out, samples, "small_standard_single")
126 |
127 | learner["selection_size"] = 20
128 | vary_synthetic_parameter(parameter_name, values, fixed_values, learner, time_out, samples, "small_standard_20")
129 |
130 | learner["selection"] = "dt_weighted"
131 | learner["selection_size"] = 1
132 | vary_synthetic_parameter(parameter_name, values, fixed_values, learner, time_out, samples, "small_dt_1")
133 |
134 | learner["selection"] = "dt"
135 | learner["selection_size"] = 1
136 | vary_synthetic_parameter(parameter_name, values, fixed_values, learner, time_out, samples, "small_sdt_1")
137 |
138 |
139 | if __name__ == "__main__":
140 | import authenticate
141 |
142 | authenticate.config()
143 | execute(vary_h_simple, time_out=200, samples=1000)
144 |
--------------------------------------------------------------------------------
/incal/extra/experiments.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import argparse
4 | import json
5 | import random
6 |
7 | import os
8 |
9 | import time
10 |
11 | from generator import import_synthetic_data_files
12 | from inc_logging import LoggingObserver
13 | from incremental_learner import AllViolationsStrategy, RandomViolationsStrategy, WeightedRandomViolationsStrategy, \
14 | MaxViolationsStrategy
15 | from k_cnf_smt_learner import KCnfSmtLearner
16 | from k_dnf_smt_learner import KDnfSmtLearner
17 | from parameter_free_learner import learn_bottom_up
18 | from timeout import timeout
19 |
20 |
21 | class IncrementalConfig(object):
22 | def __init__(self, initial, initial_size, selection, selection_size):
23 | self.initial = initial
24 | self.initial_size = initial_size
25 | self.selection = selection
26 | self.selection_size = selection_size
27 | self.domain = None
28 | self.data = None
29 | self.dt_weights = None
30 |
31 | def set_data(self, data):
32 | self.data = data
33 | self.dt_weights = None
34 |
35 | def get_dt_weights(self):
36 | if self.dt_weights is None:
37 | import dt_selection
38 | self.dt_weights = [min(d.values()) for d in dt_selection.get_distances(self.domain, self.data)]
39 | return self.dt_weights
40 |
41 | def get_initial_indices(self):
42 | if self.initial is None:
43 | return list(range(len(self.data)))
44 | elif self.initial == "random":
45 | return random.sample(range(len(self.data)), self.initial_size)
46 | elif self.initial == "dt_weighted":
47 | import sampling
48 | return sampling.sample_weighted(zip(range(len(self.data)), self.get_dt_weights()), self.initial_size)
49 | else:
50 | raise RuntimeError("Unknown initial type {}".format(self.initial))
51 |
52 | def get_selection_strategy(self):
53 | if self.selection is None:
54 | return RandomViolationsStrategy(0)
55 | elif self.selection == "random":
56 | return RandomViolationsStrategy(self.selection_size)
57 | elif self.selection == "dt_weighted":
58 | return WeightedRandomViolationsStrategy(self.selection_size, self.get_dt_weights())
59 | elif self.selection == "dt":
60 | return MaxViolationsStrategy(self.selection_size, self.get_dt_weights())
61 | else:
62 | raise RuntimeError("Unknown selection type {}".format(self.selection))
63 |
64 |
65 | def learn_synthetic(input_dir, prefix, results_dir, bias, incremental_config, plot=None, sample_count=None,
66 | time_out=None, parameter_free=False):
67 |
68 | input_dir = os.path.abspath(input_dir)
69 | data_sets = list(import_synthetic_data_files(input_dir, prefix))
70 |
71 | if not os.path.exists(results_dir):
72 | os.makedirs(results_dir)
73 | overview = os.path.join(results_dir, "problems.txt")
74 |
75 | if not os.path.isfile(overview):
76 | flat = {}
77 | else:
78 | with open(overview, "r") as f:
79 | flat = json.load(f)
80 |
81 | for data_set in data_sets:
82 | synthetic_problem = data_set.synthetic_problem
83 | data = data_set.samples
84 | name = synthetic_problem.theory_problem.name
85 | domain = synthetic_problem.theory_problem.domain
86 |
87 | if name not in flat:
88 | flat[name] = {}
89 |
90 | print(name)
91 |
92 | seed = hash(time.time())
93 | random.seed(seed)
94 |
95 | if sample_count is not None and sample_count < len(data):
96 | data = random.sample(data, sample_count)
97 | else:
98 | sample_count = len(data)
99 |
100 | incremental_config.set_data(data)
101 | incremental_config.domain = domain
102 |
103 | if not parameter_free:
104 | initial_indices = incremental_config.get_initial_indices()
105 | h = synthetic_problem.half_space_count
106 | k = synthetic_problem.formula_count
107 |
108 | if bias == "cnf" or bias == "dnf":
109 | selection_strategy = incremental_config.get_selection_strategy()
110 | if bias == "cnf":
111 | learner = KCnfSmtLearner(k, h, selection_strategy)
112 | elif bias == "dnf":
113 | learner = KDnfSmtLearner(k, h, selection_strategy)
114 |
115 | if plot is not None and plot and synthetic_problem.bool_count == 0 and synthetic_problem.real_count == 2:
116 | import plotting
117 | feats = domain.real_vars
118 | plots_dir = os.path.join(results_dir, name)
119 | exp_id = "{}_{}_{}".format(learner.name, sample_count, seed)
120 | learner.add_observer(plotting.PlottingObserver(data, plots_dir, exp_id, *feats))
121 | log_file = "{}_{}_{}_{}_{}.learning_log.txt".format(name, sample_count, seed, k, h)
122 | learner.add_observer(LoggingObserver(os.path.join(results_dir, log_file), seed, True, selection_strategy))
123 | else:
124 | raise RuntimeError("Unknown bias {}".format(bias))
125 |
126 | result = timeout(learner.learn, [domain, data, initial_indices], duration=time_out)
127 | else:
128 | def learn_f(_data, _k, _h):
129 | selection_strategy = incremental_config.get_selection_strategy()
130 | if bias == "cnf":
131 | learner = KCnfSmtLearner(_k, _h, selection_strategy)
132 | elif bias == "dnf":
133 | learner = KDnfSmtLearner(_k, _h, selection_strategy)
134 | initial_indices = incremental_config.get_initial_indices()
135 | log_file = "{}_{}_{}_{}_{}.learning_log.txt".format(name, sample_count, seed, _k, _h)
136 | learner.add_observer(LoggingObserver(os.path.join(results_dir, log_file), seed, True, selection_strategy))
137 | return learner.learn(domain, data, initial_indices)
138 |
139 | result, k, h = learn_bottom_up(data, learn_f, 3, 1)
140 | if result is None:
141 | flat[name][sample_count] = {"k": k, "h": h, "seed": seed, "bias": bias, "time_out": True}
142 | else:
143 | flat[name][sample_count] = {"k": k, "h": h, "seed": seed, "bias": bias, "time_out": False}
144 | if time_out is not None:
145 | flat[name][sample_count]["time_limit"] = time_out
146 |
147 | with open(overview, "w") as f:
148 | json.dump(flat, f)
149 |
150 |
151 | if __name__ == "__main__":
152 | parser = argparse.ArgumentParser()
153 | parser.add_argument("input_dir")
154 | parser.add_argument("prefix")
155 | parser.add_argument("output_dir")
156 | parser.add_argument("--bias", default="cnf")
157 | parser.add_argument("--initial", default="random")
158 | parser.add_argument("--initial_size", default=20, type=int)
159 | parser.add_argument("--selection", default="random")
160 | parser.add_argument("--selection_size", default=10, type=int)
161 | parser.add_argument("-p", "--plot", action="store_true")
162 | parser.add_argument("-s", "--samples", default=None, type=int)
163 | parser.add_argument("-t", "--time_out", default=None, type=int)
164 | parser.add_argument("-a", "--non_incremental", default=False, action="store_true")
165 | parser.add_argument("-f", "--parameter_free", default=False, action="store_true")
166 | parsed = parser.parse_args()
167 |
168 | if parsed.non_incremental:
169 | inc_config = IncrementalConfig(None, None, None, None)
170 | else:
171 | inc_config = IncrementalConfig(parsed.initial, parsed.initial_size, parsed.selection, parsed.selection_size)
172 |
173 | learn_synthetic(parsed.input_dir, parsed.prefix, parsed.output_dir, parsed.bias, inc_config,
174 | parsed.plot, parsed.samples, parsed.time_out, parsed.parameter_free)
175 |
--------------------------------------------------------------------------------
/incal/extra/migrate.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division
2 |
3 | import argparse
4 | import json
5 |
6 | import os
7 | import random
8 | import shutil
9 |
10 | import re
11 | import subprocess
12 |
13 | import time
14 | from bitarray import bitarray
15 |
16 | import generator
17 | import parse
18 | import problem
19 | from smt_check import test
20 | from smt_print import pretty_print
21 | from smt_scan import load_results, get_log_messages, dump_results
22 | import pysmt.shortcuts as smt
23 |
24 |
25 | def migrate_results(directory, bias=None):
26 | summary = os.path.join(directory, "problems.txt")
27 | if os.path.isfile(summary):
28 | with open(summary, "r") as f:
29 | flat = json.load(f)
30 |
31 | for problem_id in flat:
32 | for sample_size in flat[problem_id]:
33 | if "bias" not in flat[problem_id][sample_size]:
34 | flat[problem_id][sample_size]["bias"] = "cnf" if bias is None else bias
35 |
36 | seed, k, h = (flat[problem_id][sample_size][v] for v in ["seed", "k", "h"])
37 |
38 | pattern = r'{problem_id}_{size}_{seed}_\d+_\d+.txt' \
39 | .format(problem_id=problem_id, size=sample_size, seed=seed)
40 | for old_file in os.listdir(directory):
41 | if re.match(pattern, old_file):
42 | new_file = old_file[:-4] + ".learning_log.txt"
43 | shutil.move(os.path.join(directory, old_file), os.path.join(directory, new_file))
44 |
45 | with open(summary, "w") as f:
46 | json.dump(flat, f)
47 |
48 |
49 | def calculate_accuracy(domain, target_formula, learned_formula):
50 | # from sys import path
51 | # path.insert(0, "/Users/samuelkolb/Documents/PhD/wmi-pa/experiments/client")
52 | # from run import compute_wmi
53 | print("Calculate accuracy:")
54 | # print(pretty_print(target_formula))
55 | # print(pretty_print(learned_formula))
56 |
57 | # r0, r1 = [smt.Symbol(n, smt.REAL) for n in ["r0", "r1"]]
58 | # b0, b1, b2, b3 = [smt.Symbol(n, smt.BOOL) for n in ["b0", "b1", "b2", "b3"]]
59 | # t1 = (~(1.0 <= 0.427230115861 * r0 + 1.02084935803 * r1) | ~(1.0 <= 1.59402729715 * r0 + 0.309004054118 * r1) | ~b1)
60 | # t2 = (b2 | (1.0 <= 1.59402729715 * r0 + 0.309004054118 * r1) | ~b0)
61 |
62 | # domain = problem.Domain(["x", "y"], {"x": smt.REAL, "y": smt.REAL}, {"x": (0, 1), "y": (0, 1)})
63 | # x, y = smt.Symbol("x", smt.REAL), smt.Symbol("y", smt.REAL)
64 | # t2 = (1.0 <= 1.5 * x + 0.5 * y)
65 | # t2 = (2 <= 3 * x + y)
66 | # f = (t1 & t2)
67 |
68 | flat = {
69 | "domain": problem.export_domain(domain, False),
70 | "query": parse.smt_to_nested(smt.Iff(target_formula, learned_formula))
71 | }
72 |
73 | print(domain)
74 | print(pretty_print(target_formula))
75 | print(pretty_print(learned_formula))
76 | # accuracy = list(compute_wmi(domain, [smt.Iff(target_formula, learned_formula)]))[0]
77 |
78 | output = str(subprocess.check_output(["/Users/samuelkolb/Documents/PhD/wmi-pa/env/bin/python",
79 | "/Users/samuelkolb/Documents/PhD/wmi-pa/experiments/client/run.py", "-s",
80 | json.dumps(flat)]))
81 | accuracy = float(output.split(": ")[1])
82 | print(accuracy)
83 | return accuracy
84 |
85 |
86 | def calculate_accuracy_approx(domain, target_formula, learned_formula, samples):
87 | bits_target = bitarray([test(target_formula, sample) for sample in samples])
88 | bits_learned = bitarray([test(learned_formula, sample) for sample in samples])
89 | accuracy = ((bits_target & bits_learned) | (~bits_target & ~bits_learned)).count() / len(samples)
90 | print(accuracy)
91 | return accuracy
92 |
93 |
94 | def adapt_domain_multiple(target_problem, new_bounds):
95 | domain = target_problem.domain
96 | adapted_domain = problem.Domain(domain.variables, domain.var_types, new_bounds)
97 | return problem.Problem(adapted_domain, target_problem.theory, target_problem.name)
98 |
99 |
100 | def get_problem(data_dir, problem_id):
101 | try:
102 | with open(os.path.join(data_dir, "{}.txt".format(str(problem_id)))) as f:
103 | import generator
104 | s_problem = generator.import_synthetic_data(json.load(f))
105 | return s_problem.synthetic_problem.theory_problem
106 | except IOError:
107 | with open(os.path.join(data_dir, "problems", "{}.txt".format(str(problem_id)))) as f:
108 | import generator
109 | theory_problem = problem.import_problem(json.load(f))
110 |
111 | with open(os.path.join(data_dir, "summary.json"), "r") as f:
112 | flat = json.load(f)
113 | ratio_dict = flat["ratios"]
114 | lookup = flat["lookup"]
115 |
116 | adapted_problem = adapt_domain_multiple(theory_problem, ratio_dict[lookup[problem_id]]["bounds"])
117 |
118 | return adapted_problem
119 |
120 |
121 | def add_accuracy(results_dir, data_dir=None, acc_sample_size=None, recompute=False):
122 | results_flat = load_results(results_dir)
123 |
124 | for problem_id in results_flat:
125 |
126 | if data_dir is not None:
127 | theory_problem = get_problem(data_dir, problem_id)
128 | domain = theory_problem.domain
129 | target_formula = theory_problem.theory
130 | print(problem_id)
131 | print(pretty_print(target_formula))
132 | else:
133 | raise RuntimeError("Data directory missing")
134 |
135 | for sample_size in results_flat[problem_id]:
136 | config = results_flat[problem_id][sample_size]
137 | timed_out = config.get("time_out", False)
138 | if not timed_out:
139 | learned_formula = None
140 | for message in get_log_messages(results_dir, config, p_id=problem_id, samples=sample_size):
141 | if message["type"] == "update":
142 | learned_formula = parse.nested_to_smt(message["theory"])
143 |
144 | print(pretty_print(learned_formula))
145 | print()
146 |
147 | if acc_sample_size is None:
148 | if recompute or "exact_accuracy" not in config:
149 | config["exact_accuracy"] = calculate_accuracy(domain, target_formula, learned_formula)
150 | else:
151 | if recompute or "approx_accuracy" not in config:
152 | config["approx_accuracy"] = dict()
153 | acc_dict = config["approx_accuracy"]
154 | if acc_sample_size not in acc_dict:
155 | acc_dict[acc_sample_size] = []
156 | if len(acc_dict[acc_sample_size]) < 1:
157 | seed = hash(time.time())
158 | random.seed(seed)
159 | samples = [generator.get_sample(domain) for _ in range(acc_sample_size)]
160 | acc_dict[acc_sample_size].append({
161 | "acc": calculate_accuracy_approx(domain, target_formula, learned_formula, samples),
162 | "seed": seed,
163 | })
164 |
165 | dump_results(results_flat, results_dir)
166 |
167 |
168 | def calculate_ratio(domain, formula):
169 | raise NotImplementedError()
170 |
171 |
172 | def calculate_ratio_approx(formula, samples):
173 | bits = bitarray([test(formula, sample) for sample in samples])
174 | positives = bits.count() / len(samples)
175 | ratio = max(positives, 1 - positives)
176 | print("Ratio: {}".format(ratio))
177 | return ratio
178 |
179 |
180 | def add_ratio(results_dir, data_dir=None, ratio_sample_size=None, recompute=False):
181 | results_flat = load_results(results_dir)
182 |
183 | ratio_cache = dict()
184 |
185 | for problem_id in results_flat:
186 | if data_dir is not None:
187 | theory_problem = get_problem(data_dir, problem_id)
188 | domain = theory_problem.domain
189 | formula = theory_problem.theory
190 | else:
191 | raise RuntimeError("Data directory missing")
192 |
193 | seed = hash(time.time())
194 | random.seed(seed)
195 | samples = [generator.get_sample(domain) for _ in range(ratio_sample_size)]
196 |
197 | ratio = calculate_ratio(domain, formula) if ratio_sample_size is None else calculate_ratio_approx(formula, samples)
198 |
199 | for sample_size in results_flat[problem_id]:
200 | config = results_flat[problem_id][sample_size]
201 |
202 | if ratio_sample_size is None:
203 | if recompute or "exact_ratio" not in config:
204 | config["exact_ratio"] = ratio
205 | else:
206 | if recompute or "approx_ratio" not in config:
207 | config["approx_ratio"] = dict()
208 | ratio_dict = config["approx_ratio"]
209 | if ratio_sample_size not in ratio_dict:
210 | ratio_dict[ratio_sample_size] = []
211 | if len(ratio_dict[ratio_sample_size]) < 1:
212 | ratio_dict[ratio_sample_size].append({
213 | "ratio": ratio,
214 | "seed": seed,
215 | })
216 |
217 | dump_results(results_flat, results_dir)
218 |
219 |
220 | if __name__ == "__main__":
221 | x = smt.Symbol("x", smt.REAL)
222 | calculate_accuracy(problem.Domain(["x"], {"x": smt.REAL}, {"x": (0, 1)}), x <= smt.Real(0.5), x <= smt.Real(0.4))
--------------------------------------------------------------------------------
/incal/incremental_learner.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import pysmt.shortcuts as smt
4 | from pysmt.exceptions import InternalSolverError
5 |
6 | from observe import observe
7 | from learner import Learner, NoFormulaFound
8 |
9 |
10 | class IncrementalObserver(observe.SpecializedObserver):
11 | def observe_initial(self, data, labels, initial_indices):
12 | raise NotImplementedError()
13 |
14 | def observe_iteration(self, data, labels, formula, new_active_indices, solving_time, selection_time):
15 | raise NotImplementedError()
16 |
17 |
18 | class IncrementalLearner(Learner):
19 | def __init__(self, name, selection_strategy, smt_solver=True):
20 | """
21 | Initializes a new incremental learner
22 | :param str name: The learner name
23 | :param SelectionStrategy selection_strategy: The selection strategy
24 | """
25 | Learner.__init__(self, "incremental_{}".format(name))
26 | self.selection_strategy = selection_strategy
27 | self.observer = observe.DispatchObserver()
28 | self.smt_solver = smt_solver
29 |
30 | def add_observer(self, observer):
31 | self.observer.add_observer(observer)
32 |
33 | def learn(self, domain, data, labels, initial_indices=None):
34 | if self.smt_solver:
35 | with smt.Solver() as solver:
36 | data, formula, labels = self.incremental_loop(domain, data, labels, initial_indices, solver)
37 | else:
38 | data, formula, labels = self.incremental_loop(domain, data, labels, initial_indices, None)
39 |
40 | return data, labels, formula
41 |
42 | def incremental_loop(self, domain, data, labels, initial_indices, solver):
43 | active_indices = list(range(len(data))) if initial_indices is None else initial_indices
44 | all_active_indices = active_indices
45 | self.observer.observe("initial", data, labels, active_indices)
46 | formula = None
47 | while len(active_indices) > 0:
48 | solving_start = time.time()
49 | try:
50 | formula = self.learn_partial(solver, domain, data, labels, active_indices)
51 | except InternalSolverError:
52 | raise NoFormulaFound(data, labels)
53 | except Exception as e:
54 | if "Z3Exception" in str(type(e)):
55 | raise NoFormulaFound(data, labels)
56 | else:
57 | raise e
58 |
59 | solving_time = time.time() - solving_start
60 |
61 | selection_start = time.time()
62 | data, labels, new_active_indices = \
63 | self.selection_strategy.select_active(domain, data, labels, formula, all_active_indices)
64 | active_indices = list(new_active_indices)
65 | all_active_indices += active_indices
66 | selection_time = time.time() - selection_start
67 | self.observer.observe("iteration", data, labels, formula, active_indices, solving_time, selection_time)
68 | return data, formula, labels
69 |
70 | def learn_partial(self, solver, domain, data, labels, new_active_indices):
71 | raise NotImplementedError()
72 |
--------------------------------------------------------------------------------
/incal/k_cnf_smt_learner.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import numpy as np
4 | import pysmt.shortcuts as smt
5 | from pysmt.fnode import FNode
6 | from pysmt.typing import REAL
7 | from typing import Set, Tuple, List
8 |
9 | from incremental_learner import IncrementalLearner
10 | from pywmi import Domain
11 |
12 |
13 | class KCnfSmtLearner(IncrementalLearner):
14 | def __init__(self, conjunction_count, half_space_count, selection_strategy, symmetries, allow_negations=True):
15 | IncrementalLearner.__init__(self, "cnf_smt", selection_strategy)
16 | self.conjunction_count = conjunction_count
17 | self.half_space_count = half_space_count
18 | self.symmetries = symmetries
19 | self.allow_negations = allow_negations
20 |
21 | def learn_partial(self, solver, domain: Domain, data: np.ndarray, labels: np.ndarray, new_active_indices: Set):
22 |
23 | # Constants
24 | n_b_original = len(domain.bool_vars)
25 | n_b = n_b_original * 2
26 | n_r = len(domain.real_vars)
27 |
28 | n_h_original = self.half_space_count if n_r > 0 else 0
29 | n_h = n_h_original * 2 if self.allow_negations else n_h_original
30 |
31 | n_c = self.conjunction_count
32 | n_d = data.shape[0]
33 |
34 | real_indices = np.array([domain.var_types[v] == smt.REAL for v in domain.variables])
35 | real_features = data[:, real_indices]
36 | bool_features = data[:, np.logical_not(real_indices)]
37 |
38 | # Variables
39 | a_hr = [[smt.Symbol("a_hr[{}][{}]".format(h, r), REAL) for r in range(n_r)] for h in range(n_h_original)]
40 | b_h = [smt.Symbol("b_h[{}]".format(h), REAL) for h in range(n_h_original)]
41 | s_ch = [[smt.Symbol("s_ch[{}][{}]".format(c, h)) for h in range(n_h)] for c in range(n_c)]
42 | s_cb = [[smt.Symbol("s_cb[{}][{}]".format(c, b)) for b in range(n_b)] for c in range(n_c)]
43 |
44 | # Aux variables
45 | s_ih = [[smt.Symbol("s_ih[{}][{}]".format(i, h)) for h in range(n_h)] for i in range(n_d)]
46 | s_ic = [[smt.Symbol("s_ic[{}][{}]".format(i, c)) for c in range(n_c)] for i in range(n_d)]
47 |
48 | def pair(real: bool, c: int, index: int) -> Tuple[FNode, FNode]:
49 | if real:
50 | return s_ch[c][index], s_ch[c][index + n_h_original]
51 | else:
52 | return s_cb[c][index], s_cb[c][index + n_b_original]
53 |
54 | def order_equal(pair1, pair2):
55 | x_t, x_f, y_t, y_f = pair1 + pair2
56 | return smt.Iff(x_t, y_t) & smt.Iff(x_f, y_f)
57 |
58 | def order_geq(pair1, pair2):
59 | x_t, x_f, y_t, y_f = pair1 + pair2
60 | return x_t | y_f | ((~x_f) & (~y_t))
61 |
62 | def pairs(c: int) -> List[Tuple[FNode, FNode]]:
63 | return [pair(True, c, i) for i in range(n_h_original)] + [pair(False, c, i) for i in range(n_b_original)]
64 |
65 | def order_geq_lex(c1: int, c2: int):
66 | pairs_c1, pairs_c2 = pairs(c1), pairs(c2)
67 | assert len(pairs_c1) == len(pairs_c2)
68 | constraints = smt.TRUE()
69 | for j in range(len(pairs_c1)):
70 | condition = smt.TRUE()
71 | for i in range(j):
72 | condition &= order_equal(pairs_c1[i], pairs_c2[i])
73 | constraints &= smt.Implies(condition, order_geq(pairs_c1[j], pairs_c2[j]))
74 | return constraints
75 |
76 | # Constraints
77 | for i in new_active_indices:
78 | x_r, x_b, label = [float(val) for val in real_features[i]], bool_features[i], labels[i]
79 |
80 | for h in range(n_h_original):
81 | sum_coefficients = smt.Plus([a_hr[h][r] * smt.Real(x_r[r]) for r in range(n_r)])
82 | solver.add_assertion(smt.Iff(s_ih[i][h], sum_coefficients <= b_h[h]))
83 |
84 | for h in range(n_h_original, n_h):
85 | solver.add_assertion(smt.Iff(s_ih[i][h], ~s_ih[i][h - n_h_original]))
86 |
87 | for c in range(n_c):
88 | solver.add_assertion(smt.Iff(s_ic[i][c], smt.Or(
89 | [smt.FALSE()]
90 | + [(s_ch[c][h] & s_ih[i][h]) for h in range(n_h)]
91 | + [s_cb[c][b] for b in range(n_b_original) if x_b[b]]
92 | + [s_cb[c][b] for b in range(n_b_original, n_b) if not x_b[b - n_b_original]]
93 | )))
94 |
95 | # --- [start] symmetry breaking ---
96 | # Mutually exclusive
97 | if "m" in self.symmetries:
98 | for c in range(n_c):
99 | for h in range(n_h_original):
100 | solver.add_assertion(~(s_ch[c][h] & s_ch[c][h + n_h_original]))
101 | for b in range(n_b_original):
102 | solver.add_assertion(~(s_cb[c][b] & s_cb[c][b + n_b_original]))
103 |
104 | # Normalized
105 | if "n" in self.symmetries:
106 | for h in range(n_h_original):
107 | solver.add_assertion(smt.Equals(b_h[h], smt.Real(1.0)) | smt.Equals(b_h[h], smt.Real(0.0)))
108 |
109 | # Vertical symmetries
110 | if "v" in self.symmetries:
111 | for c in range(n_c - 1):
112 | solver.add_assertion(order_geq_lex(c, c + 1))
113 |
114 | # Horizontal symmetries
115 | if "h" in self.symmetries:
116 | for h in range(n_h_original - 1):
117 | solver.add_assertion(a_hr[h][0] >= a_hr[h + 1][0])
118 | # --- [end] symmetry breaking ---
119 |
120 | if label:
121 | solver.add_assertion(smt.And([s_ic[i][c] for c in range(n_c)]))
122 | else:
123 | solver.add_assertion(smt.Or([~s_ic[i][c] for c in range(n_c)]))
124 |
125 | solver.solve()
126 | model = solver.get_model()
127 |
128 | x_vars = [domain.get_symbol(domain.real_vars[r]) for r in range(n_r)]
129 | half_spaces = [
130 | smt.Plus([model.get_value(a_hr[h][r]) * x_vars[r] for r in range(n_r)]) <= model.get_value(b_h[h])
131 | for h in range(n_h_original)
132 | ] + [
133 | smt.Plus([model.get_value(a_hr[h][r]) * x_vars[r] for r in range(n_r)]) > model.get_value(b_h[h])
134 | for h in range(n_h - n_h_original)
135 | ]
136 |
137 | b_vars = [domain.get_symbol(domain.bool_vars[b]) for b in range(n_b_original)]
138 | bool_literals = [b_vars[b] for b in range(n_b_original)]
139 | bool_literals += [~b_vars[b] for b in range(n_b - n_b_original)]
140 |
141 | conjunctions = [
142 | [half_spaces[h] for h in range(n_h) if model.get_py_value(s_ch[c][h])]
143 | + [bool_literals[b] for b in range(n_b) if model.get_py_value(s_cb[c][b])]
144 | for c in range(n_c)
145 | ]
146 |
147 | return smt.And([smt.Or(conjunction) for conjunction in conjunctions])
148 |
--------------------------------------------------------------------------------
/incal/learn.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import numpy as np
4 | from incal.observe.inc_logging import LoggingObserver
5 | from pysmt.fnode import FNode
6 | from pywmi import smt_to_nested
7 | from pywmi.domain import Density, Domain
8 | from typing import Tuple, Optional
9 |
10 | from .parameter_free_learner import learn_bottom_up
11 | from .violations.core import RandomViolationsStrategy
12 | from .violations.dt_selection import DecisionTreeSelection
13 | from .k_cnf_smt_learner import KCnfSmtLearner
14 | from .util.options import Options, Results
15 |
16 |
17 | class LearnOptions(Options):
18 | def __init__(self):
19 | super().__init__(learn)
20 | self.add_option("domain", str, None, LearnOptions.domain_extraction)
21 | self.add_option("data", str, None, LearnOptions.np_extraction)
22 | self.add_option("labels", str, None, LearnOptions.np_extraction)
23 |
24 | self.add_option("learner", (str, str), ("cnf", "-"), Options.convert_dict(
25 | cnf=LearnOptions.cnf_factory_wrap
26 | ), arg_name="learner_factory")
27 | self.add_option("initial_strategy", (str, int), ("random", 20), Options.convert_dict(
28 | random=LearnOptions.initial_random
29 | ))
30 | self.add_option("selection_strategy", (str, int), ("random", 10), Options.convert_dict(
31 | random=LearnOptions.select_random,
32 | dt=LearnOptions.select_dt
33 | ))
34 | self.add_option("initial_k", int, 1)
35 | self.add_option("initial_h", int, 0)
36 | self.add_option("weight_k", float, 1)
37 | self.add_option("weight_h", float, 1)
38 | self.add_option("log", str)
39 | # self.add_option("max_k", int, None)
40 | # self.add_option("max_h", int, None)
41 |
42 | @staticmethod
43 | def domain_extraction(filename):
44 | return Density.import_from(filename).domain
45 |
46 | @staticmethod
47 | def np_extraction(filename):
48 | return np.load(filename)
49 |
50 | @staticmethod
51 | def cnf_factory_wrap(symmetries):
52 | def cnf_factory(k, h, selection_strategy):
53 | return KCnfSmtLearner(k, h, selection_strategy, symmetries=symmetries)
54 | return cnf_factory
55 |
56 | @staticmethod
57 | def initial_random(count):
58 | def random_selection(indices):
59 | return random.sample(indices, count)
60 | return random_selection
61 |
62 | @staticmethod
63 | def select_random(count):
64 | return RandomViolationsStrategy(count)
65 |
66 | @staticmethod
67 | def select_dt(count):
68 | return DecisionTreeSelection()
69 |
70 | def make_copy(self):
71 | return LearnOptions()
72 |
73 |
74 | class LearnResults(Results):
75 | def __init__(self):
76 | super().__init__()
77 | self.add_duration()
78 | self.add_result("formula", LearnResults.extract_formula)
79 | self.add_result("k", LearnResults.extract_k)
80 | self.add_result("h", LearnResults.extract_h)
81 |
82 | @staticmethod
83 | def extract_formula(result):
84 | return smt_to_nested(result[0])
85 |
86 | @staticmethod
87 | def extract_k(result):
88 | return result[1]
89 |
90 | @staticmethod
91 | def extract_h(result):
92 | return result[2]
93 |
94 |
95 | def learn(
96 | domain: Domain,
97 | data: np.ndarray,
98 | labels: np.ndarray,
99 | learner_factory: callable,
100 | initial_strategy: callable,
101 | selection_strategy: object,
102 | initial_k: int,
103 | initial_h: int,
104 | weight_k: float,
105 | weight_h: float,
106 | log: Optional[str]=None
107 | ) -> Tuple[FNode, int, int]:
108 | """
109 | Learn a formula that separates the positive and negative examples
110 | :return: A tuple containing 1. the learned formula, 2. the number of terms (or clauses) used,
111 | 3. the number of hyperplanes used
112 | """
113 |
114 | # log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "demo", "results")
115 | # problem_name = hashlib.sha256(name).hexdigest()
116 |
117 | def learn_inc(_data, _labels, _i, _k, _h):
118 | learner = learner_factory(_k, _h, selection_strategy)
119 | initial_indices = initial_strategy(list(range(len(_data))))
120 | # log_file = os.path.join(log_dir, "{}_{}_{}.txt".format(problem_name, _k, _h))
121 | if log is not None:
122 | learner.add_observer(LoggingObserver(log, _k, _h, None, False, selection_strategy))
123 | return learner.learn(domain, _data, _labels, initial_indices)
124 |
125 | ((_d, _l, formula), k, h) =\
126 | learn_bottom_up(data, labels, learn_inc, weight_k, weight_h, initial_k, initial_h, None, None)
127 | return formula, k, h
128 |
--------------------------------------------------------------------------------
/incal/learner.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pysmt.shortcuts as smt
3 | from typing import Tuple
4 |
5 | from pysmt.fnode import FNode
6 | from pywmi import Domain
7 |
8 |
9 | class NoFormulaFound(RuntimeError):
10 | def __init__(self, data, labels):
11 | self.data = data
12 | self.labels = labels
13 |
14 |
15 | class Learner(object):
16 | def __init__(self, name):
17 | self.name = name
18 |
19 | def learn(self, domain: Domain, data: np.ndarray, labels: np.ndarray, border_indices)\
20 | -> Tuple[np.ndarray, np.ndarray, FNode]:
21 | raise NotImplementedError()
22 |
23 | @staticmethod
24 | def _convert(value):
25 | return float(value.constant_value())
26 |
27 | @staticmethod
28 | def _get_misclassification(data):
29 | true_count = 0
30 | for _, l in data:
31 | if l:
32 | true_count += 1
33 | return min(true_count, len(data) - true_count)
34 |
35 | @staticmethod
36 | def check_example(domain, example_features, dnf_list):
37 | x_vars = [domain.get_symbol(var) for var in domain.real_vars]
38 | b_vars = [domain.get_symbol(var) for var in domain.bool_vars]
39 |
40 | formula = smt.Or([smt.And(hyperplane_conjunct) for hyperplane_conjunct in dnf_list])
41 | substitution = {var: example_features[str(var)] for var in x_vars + b_vars}
42 | return formula.substitute(substitution).simplify().is_true()
43 |
44 | @staticmethod
45 | def fit_hyperplane(domain, examples):
46 | matrix = examples[:, [domain.is_real(v) for v in domain.variables]]
47 | k = np.ones((len(examples), 1))
48 | a = np.matrix.dot(np.linalg.inv(matrix), k)
49 | return a, 1
50 |
--------------------------------------------------------------------------------
/incal/lp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/lp/__init__.py
--------------------------------------------------------------------------------
/incal/lp/examples.py:
--------------------------------------------------------------------------------
1 | from pywmi import Domain
2 |
3 | from lp.model import Model
4 |
5 |
6 | def lp_domain(n, ranges=None):
7 | if ranges is None:
8 | ranges = [(None, None) for i in range(n)]
9 | return Domain.make([], ["x{}".format(i + 1) for i in range(n)], ranges)
10 |
11 |
12 | def lp_2_6() -> Model:
13 | domain = lp_domain(2)
14 | x1, x2 = domain.get_symbols(domain.variables)
15 | return Model(
16 | domain,
17 | 300 * x1 + 200 * x2,
18 | [
19 | 2 * x1 + x2 <= 100,
20 | x1 + x2 <= 80,
21 | x1 <= 40,
22 | x1 >= 0,
23 | x2 >= 0
24 | ],
25 | minimize=False,
26 | name="LP_2_6"
27 | )
28 |
29 |
30 | def lp_2_7() -> Model:
31 | domain = lp_domain(4)
32 | x1, x2, x3, x4 = domain.get_symbols(domain.variables)
33 | return Model(
34 | domain,
35 | 320 * x1 + 400 * x2 + 480 * x3 + 560 * x4,
36 | [
37 | 0.06 * x1 + 0.03 * x2 + 0.02 * x3 + 0.01 * x4 >= 3.5,
38 | 0.03 * x1 + 0.02 * x2 + 0.05 * x3 + 0.06 * x4 <= 3,
39 | 0.08 * x1 + 0.03 * x2 + 0.02 * x3 + 0.01 * x4 == 4,
40 | x1 + x2 + x3 + x4 == 110,
41 | ] + [x >= 0 for x in domain.get_symbols(domain.variables)],
42 | minimize=False,
43 | name="LP_2_7"
44 | )
45 |
46 |
47 | def lp_2_8() -> Model:
48 | domain = lp_domain(3)
49 | x1, x2, x3 = domain.get_symbols(domain.variables)
50 | return Model(
51 | domain,
52 | 5 * x1 + 4 * x2 + 3 * x3,
53 | [
54 | 2 * x1 + 3 * x2 + x3 <= 5,
55 | 4 * x1 + x2 + 2 * x3 <= 11,
56 | 3 * x1 + 4 * x2 + 2 * x3 <= 5,
57 | ] + [x >= 0 for x in domain.get_symbols(domain.variables)],
58 | minimize=False,
59 | name="LP_2_8"
60 | )
61 |
62 |
63 | def lp_2_9() -> Model:
64 | domain = lp_domain(4)
65 | x1, x2, x3, x4 = domain.get_symbols(domain.variables)
66 | return Model(
67 | domain,
68 | 3 * x1 - x2,
69 | [
70 | 0 - x1 + 6 * x2 - x3 + x4 >= -3,
71 | 7 * x2 + 2 * x4 == 5,
72 | x1 + x2 + x3 - x4 <= 2,
73 | x1 >= 0,
74 | x3 >= 0,
75 | ],
76 | minimize=True,
77 | name="LP_2_9"
78 | )
79 |
80 |
81 |
--------------------------------------------------------------------------------
/incal/lp/model.py:
--------------------------------------------------------------------------------
1 | from pysmt.fnode import FNode
2 | from pywmi import Domain
3 | from typing import List
4 |
5 |
6 | class Model(object):
7 | def __init__(self, domain: Domain, objective: FNode, constraints: List[FNode], minimize: bool=True, name=None):
8 | self.domain = domain
9 | self.objective = objective
10 | self.constraints = constraints
11 | self.minimize = minimize
12 | self.name = name
13 |
--------------------------------------------------------------------------------
/incal/observe/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/observe/__init__.py
--------------------------------------------------------------------------------
/incal/observe/inc_logging.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function, division
2 |
3 | import json
4 |
5 | from pywmi import smt_to_nested
6 | from pywmi.smt_print import pretty_print
7 |
8 | from incal.incremental_learner import IncrementalObserver
9 |
10 |
11 | class LoggingObserver(IncrementalObserver):
12 | def __init__(self, filename, k, h, seed=None, verbose=True, violation_counter=None):
13 | self.filename = filename
14 | self.verbose = verbose
15 | self.violation_counter = violation_counter
16 | self.k = k
17 | self.h = h
18 |
19 | if filename is not None:
20 | with open(self.filename, "w") as f:
21 | print("", file=f, end="")
22 |
23 | if seed is not None:
24 | self.log({"type": "seed", "seed": seed, "k": self.k, "h": self.h})
25 |
26 | def log(self, flat):
27 | if self.filename is not None:
28 | with open(self.filename, "a") as f:
29 | print(json.dumps(flat), file=f)
30 |
31 | def observe_initial(self, data, labels, initial_indices):
32 | flat = {"type": "initial", "indices": initial_indices, "k": self.k, "h": self.h}
33 | if self.verbose:
34 | print("Starting with {} examples".format(len(initial_indices)))
35 | self.log(flat)
36 |
37 | def observe_iteration(self, data, labels, formula, new_active_indices, solving_time, selection_time):
38 | flat = {
39 | "type": "update",
40 | "theory": smt_to_nested(formula),
41 | "indices": [int(v) for v in new_active_indices],
42 | "solving_time": solving_time,
43 | "selection_time": selection_time,
44 | "k": self.k,
45 | "h": self.h,
46 | }
47 | if self.violation_counter is not None:
48 | flat["violations"] = [int(v) for v in self.violation_counter.last_violations]
49 |
50 | if self.verbose:
51 | print("Found model after {:.2f}s".format(solving_time))
52 | print(pretty_print(formula))
53 | if self.violation_counter is not None:
54 | violation_count = len(self.violation_counter.last_violations)
55 | selected_count = len(new_active_indices)
56 | print("Selected {} of {} violations in {:.2f}s".format(selected_count, violation_count, selection_time))
57 | self.log(flat)
58 |
--------------------------------------------------------------------------------
/incal/observe/observe.py:
--------------------------------------------------------------------------------
1 | class Observer(object):
2 | def observe(self, name, *args, **kwargs):
3 | raise NotImplementedError()
4 |
5 |
6 | class DispatchObserver(Observer):
7 | def __init__(self):
8 | self.observers = []
9 |
10 | def add_observer(self, observer):
11 | self.observers.append(observer)
12 |
13 | def observe(self, name, *args, **kwargs):
14 | for observer in self.observers:
15 | observer.observe(name, *args, **kwargs)
16 |
17 |
18 | class SpecializedObserver(Observer):
19 | def observe(self, name, *args, **kwargs):
20 | instance_method_ref = getattr(self, "observe_{}".format(name))
21 | instance_method_ref(*args, **kwargs)
22 |
--------------------------------------------------------------------------------
/incal/observe/plotting.py:
--------------------------------------------------------------------------------
1 | import time
2 |
3 | import os
4 | from pywmi import evaluate, smt_to_nested
5 |
6 | from incal.incremental_learner import IncrementalObserver
7 |
8 | from pywmi.plot import plot_combined
9 |
10 |
11 | class PlottingObserver(IncrementalObserver):
12 | def __init__(self, domain, directory, name, feat_x, feat_y, condition=None, auto_clean=False, run_name=None):
13 | self.domain = domain
14 |
15 | if not os.path.exists(directory):
16 | os.makedirs(directory)
17 |
18 | if auto_clean:
19 | run_number = 0
20 | run_dir = None
21 | while run_dir is None or os.path.exists(run_dir):
22 | date_folders = time.strftime("%Y{s}%m{s}%d{s}".format(s=os.path.sep))
23 | run_name = run_name + " " if run_name is not None else ""
24 | run_dir_name = "run {}{}".format(run_name, time.strftime("%Hh %Mm %Ss"))
25 | run_dir = os.path.join(directory, date_folders, run_dir_name)
26 | if run_number > 0:
27 | run_dir += "_{}".format(run_number)
28 | run_number += 1
29 | os.makedirs(run_dir)
30 | directory = run_dir
31 |
32 | self.directory = directory
33 |
34 | self.name = name
35 | self.all_active = set()
36 | self.feat_x = feat_x
37 | self.feat_y = feat_y
38 | self.iteration = 0
39 | self.condition = condition
40 |
41 | def observe_initial(self, data, labels, initial_indices):
42 | self.all_active = self.all_active.union(initial_indices)
43 | name = "{}{}{}_{}".format(self.directory, os.path.sep, self.name, self.iteration)
44 | plot_combined(self.feat_x, self.feat_y, self.domain, None, (data, labels), None, name, initial_indices, set(),
45 | self.condition)
46 |
47 | def observe_iteration(self, data, labels, formula, new_active_indices, solving_time, selection_time):
48 | self.iteration += 1
49 | learned_labels = evaluate(self.domain, formula, data)
50 | name = "{}{}{}_{}".format(self.directory, os.path.sep, self.name, self.iteration)
51 | plot_combined(self.feat_x, self.feat_y, self.domain, formula, (data, labels), learned_labels, name,
52 | self.all_active, new_active_indices, condition=self.condition)
53 | self.all_active = self.all_active.union(new_active_indices)
54 |
--------------------------------------------------------------------------------
/incal/old_learners/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/old_learners/__init__.py
--------------------------------------------------------------------------------
/incal/old_learners/k_dnf_logic_learner.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import itertools
4 |
5 | import numpy as np
6 | from pysmt.shortcuts import Plus, Real, Times, LE, GE
7 | from pysmt.typing import REAL, BOOL
8 |
9 | from learner import Learner
10 |
11 |
12 | class KDNFLogicLearner(Learner):
13 | def __init__(self, k):
14 | Learner.__init__(self)
15 | self.k = k
16 |
17 | def learn(self, domain, data, border_indices):
18 | positive_indices = [i for i in range(len(data)) if data[i][1]]
19 | real_vars = [v for v in domain.variables if domain.var_types[v] == REAL]
20 | bool_vars = [v for v in domain.variables if domain.var_types[v] == BOOL]
21 | d = len(real_vars)
22 | hyperplanes = []
23 | for indices in itertools.combinations(positive_indices, d):
24 | print(indices)
25 | hyperplanes.append(Learner.fit_hyperplane(domain, [data[i][0] for i in indices]))
26 | boolean_data = []
27 | for i in range(len(data)):
28 | row = []
29 | for v in bool_vars:
30 | row.append(data[i][0][v].constant_value())
31 | boolean_data.append(row)
32 | hyperplanes_smt = []
33 | for a, c in hyperplanes:
34 | lhs_smt = Plus(Times(Real(float(a[j])), domain.get_symbol(real_vars[j])) for j in range(d))
35 | hyperplanes_smt.append(LE(lhs_smt, Real(c)))
36 | lhs_smt = Plus(Times(Real(-float(a[j])), domain.get_symbol(real_vars[j])) for j in range(d))
37 | hyperplanes_smt.append(LE(lhs_smt, Real(-c)))
38 | for i in range(len(data)):
39 | lhs = 0
40 | for j in range(d):
41 | lhs += float(a[j]) * float(data[i][0][real_vars[j]].constant_value())
42 | boolean_data[i].append(lhs <= c)
43 | boolean_data[i].append(lhs >= c)
44 | print(boolean_data)
45 | # logical_dnf_indices = [[i] for i in range(len(boolean_data[0]))]
46 | logical_dnf_indices = self.learn_logical(boolean_data, [row[1] for row in data])
47 | logical_dnf = [
48 | [domain.get_symbol(bool_vars[i]) if i < len(bool_vars) else
49 | hyperplanes_smt[i - len(bool_vars)] for i in conj_indices]
50 | for conj_indices in logical_dnf_indices
51 | ]
52 | print(logical_dnf)
53 | return logical_dnf
54 |
55 | def learn_logical(self, boolean_data, labels):
56 | conjunctions = []
57 | for k in range(1, self.k + 1):
58 | for features in itertools.combinations(list(range(len(boolean_data))), k):
59 | accept = True
60 | for entry, label in zip(boolean_data, labels):
61 | if not label and all(entry[j] for j in features):
62 | accept = False
63 | break
64 | if accept:
65 | conjunctions.append(features)
66 | return conjunctions
67 |
68 |
69 | class GreedyMaxRuleLearner(KDNFLogicLearner):
70 | def __init__(self, max_literals):
71 | KDNFLogicLearner.__init__(self, max_literals)
72 |
73 | def learn_logical(self, boolean_data, labels):
74 | attributes = np.matrix(boolean_data)
75 | examples = attributes.shape[0]
76 | features = attributes.shape[1]
77 | conjunctions = []
78 | counts = np.sum(attributes, axis=0).A1
79 | print(examples, features, counts.shape)
80 |
81 | return []
82 |
83 |
84 | class GreedyLogicDNFLearner(KDNFLogicLearner):
85 | def __init__(self, max_terms, max_literals):
86 | KDNFLogicLearner.__init__(self, max_literals)
87 | self.max_terms = max_terms
88 |
89 | @property
90 | def max_literals(self):
91 | return self.k
92 |
93 | def learn_logical(self, boolean_data, labels):
94 | attributes = np.matrix(boolean_data)
95 | examples = attributes.shape[0]
96 | features = attributes.shape[1]
97 | conjunctions = []
98 | counts = np.sum(attributes, axis=0).A1
99 | print(counts[0])
100 |
101 | for i in range(self.max_terms):
102 | lb = 0
103 | ub = examples
104 | candidates = [([], examples)]
105 | new_candidates = []
106 | while len(candidates) > 0:
107 | for pattern, count in candidates:
108 | start_index = 0 if len(pattern) == 0 else max(pattern) + 1
109 | covered = [i for i in range(examples) if all(attributes[i, j] for j in pattern)]
110 | pos_covered = [i for i in covered if labels[i]]
111 | neg_covered = [i for i in covered if not labels[i]]
112 |
113 | for j in range(start_index, self.max_literals):
114 | if counts[j] > lb:
115 | pass
116 | for j in range(self.max_literals):
117 | for features in itertools.combinations(list(range(len(boolean_data))), self.k):
118 | accept = True
119 | for entry, label in zip(boolean_data, labels):
120 | if not label and all(entry[j] for j in features):
121 | accept = False
122 | break
123 | if accept:
124 | conjunctions.append(features)
125 | return conjunctions
126 |
--------------------------------------------------------------------------------
/incal/old_learners/k_dnf_smt_learner.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import pysmt.shortcuts as smt
4 | from pysmt.typing import REAL, BOOL
5 |
6 | from incremental_learner import IncrementalLearner
7 |
8 |
9 | class KDnfSmtLearner(IncrementalLearner):
10 | def __init__(self, conjunction_count, half_space_count, selection_strategy, allow_negations=True):
11 | IncrementalLearner.__init__(self, "dnf_smt", selection_strategy)
12 | self.conjunction_count = conjunction_count
13 | self.half_space_count = half_space_count
14 | self.allow_negations = allow_negations
15 |
16 | def learn_partial(self, solver, domain, data, new_active_indices):
17 | # Constants
18 | n_b_original = len(domain.bool_vars)
19 | n_b = n_b_original * 2
20 | n_r = len(domain.real_vars)
21 |
22 | n_h_original = self.half_space_count if n_r > 0 else 0
23 | n_h = n_h_original * 2 if self.allow_negations else n_h_original
24 |
25 | n_c = self.conjunction_count
26 | n_d = len(data)
27 |
28 | real_features = [[row[v] for v in domain.real_vars] for row, _ in data]
29 | bool_features = [[row[v] for v in domain.bool_vars] for row, _ in data]
30 | labels = [row[1] for row in data]
31 |
32 | # Variables
33 | a_hr = [[smt.Symbol("a_hr[{}][{}]".format(h, r), REAL) for r in range(n_r)] for h in range(n_h_original)]
34 | b_h = [smt.Symbol("b_h[{}]".format(h), REAL) for h in range(n_h_original)]
35 | s_ch = [[smt.Symbol("s_ch[{}][{}]".format(c, h)) for h in range(n_h)] for c in range(n_c)]
36 | s_cb = [[smt.Symbol("s_cb[{}][{}]".format(c, b)) for b in range(n_b)] for c in range(n_c)]
37 |
38 | # Aux variables
39 | s_ih = [[smt.Symbol("s_ih[{}][{}]".format(i, h)) for h in range(n_h)] for i in range(n_d)]
40 | s_ic = [[smt.Symbol("s_ic[{}][{}]".format(i, c)) for c in range(n_c)] for i in range(n_d)]
41 |
42 | # Constraints
43 | for i in new_active_indices:
44 | x_r, x_b, label = real_features[i], bool_features[i], labels[i]
45 |
46 | for h in range(n_h_original):
47 | sum_coefficients = smt.Plus([a_hr[h][r] * smt.Real(x_r[r]) for r in range(n_r)])
48 | solver.add_assertion(smt.Iff(s_ih[i][h], sum_coefficients <= b_h[h]))
49 |
50 | for h in range(n_h_original, n_h):
51 | solver.add_assertion(smt.Iff(s_ih[i][h], ~s_ih[i][h - n_h_original]))
52 |
53 | for c in range(n_c):
54 | solver.add_assertion(smt.Iff(s_ic[i][c], smt.And(
55 | [smt.TRUE()]
56 | + [(~s_ch[c][h] | s_ih[i][h]) for h in range(n_h)]
57 | + [~s_cb[c][b] for b in range(n_b_original) if not x_b[b]]
58 | + [~s_cb[c][b] for b in range(n_b_original, n_b) if x_b[b - n_b_original]]
59 | )))
60 |
61 | if label:
62 | solver.add_assertion(smt.Or([s_ic[i][c] for c in range(n_c)]))
63 | else:
64 | solver.add_assertion(smt.And([~s_ic[i][c] for c in range(n_c)]))
65 |
66 | solver.solve()
67 | model = solver.get_model()
68 |
69 | x_vars = [domain.get_symbol(domain.real_vars[r]) for r in range(n_r)]
70 | half_spaces = [
71 | smt.Plus([model.get_value(a_hr[h][r]) * x_vars[r] for r in range(n_r)]) <= model.get_value(b_h[h])
72 | for h in range(n_h_original)
73 | ] + [
74 | smt.Plus([model.get_value(a_hr[h][r]) * x_vars[r] for r in range(n_r)]) > model.get_value(b_h[h])
75 | for h in range(n_h - n_h_original)
76 | ]
77 |
78 | b_vars = [domain.get_symbol(domain.bool_vars[b]) for b in range(n_b_original)]
79 | bool_literals = [b_vars[b] for b in range(n_b_original)]
80 | bool_literals += [~b_vars[b] for b in range(n_b - n_b_original)]
81 |
82 | conjunctions = [
83 | [half_spaces[h] for h in range(n_h) if model.get_py_value(s_ch[c][h])]
84 | + [bool_literals[b] for b in range(n_b) if model.get_py_value(s_cb[c][b])]
85 | for c in range(n_c)
86 | ]
87 |
88 | return smt.Or([smt.And(conjunction) for conjunction in conjunctions])
89 |
90 |
91 | # class KDnfSmtLearner(Learner):
92 | # def __init__(self, max_hyperplanes, max_terms, allow_negations=True):
93 | # Learner.__init__(self)
94 | # self.max_hyperplanes = max_hyperplanes
95 | # self.max_terms = max_terms
96 | # self.allow_negations = allow_negations
97 | #
98 | # def learn(self, domain, data, initial_indices=None):
99 | # # Constants
100 | # n_b_original = len(domain.bool_vars)
101 | # n_b = n_b_original * 2 if self.allow_negations else n_b_original
102 | #
103 | # n_f = len(domain.real_vars)
104 | # n_h_original = self.max_hyperplanes if n_f > 0 else 0
105 | # n_h = n_h_original * 2 if self.allow_negations else n_h_original
106 | #
107 | # n_c = self.max_terms
108 | # n_d = len(data)
109 | #
110 | # real_features = [[Learner._convert(row[v]) for v in domain.real_vars] for row, _ in data]
111 | # bool_features = [[bool(row[v].constant_value()) for v in domain.bool_vars] for row, _ in data]
112 | # labels = [row[1] for row in data]
113 | #
114 | # # Variables
115 | # a_hf = [[smt.Symbol("a_hf[{}][{}]".format(h, f), REAL) for f in range(n_f)] for h in range(n_h_original)]
116 | # b_h = [smt.Symbol("b_h[{}]".format(h), REAL) for h in range(n_h_original)]
117 | # s_ch = [[smt.Symbol("s_ch[{}][{}]".format(c, h)) for h in range(n_h)] for c in range(n_c)]
118 | # s_cb = [[smt.Symbol("s_cb[{}][{}]".format(c, b)) for b in range(n_b)] for c in range(n_c)]
119 | #
120 | # # Aux variables
121 | # s_ih = [[smt.Symbol("s_ih[{}][{}]".format(i, h)) for h in range(n_h)] for i in range(n_d)]
122 | # s_ic = [[smt.Symbol("s_ic[{}][{}]".format(i, c)) for c in range(n_c)] for i in range(n_d)]
123 | #
124 | # # Constraints
125 | # start = time.time()
126 | # active_indices = list(range(len(data))) if initial_indices is None else initial_indices
127 | # remaining = list(range(len(data))) # list(sorted(set(range(len(data))) - set(active_indices)))
128 | #
129 | # hyperplane_dnf = []
130 | #
131 | # def check_model(_x):
132 | # _formula = smt.Or([smt.And(hyperplane_conjunct) for hyperplane_conjunct in hyperplane_dnf])
133 | # substitution = {_var: _x[str(_var)] for _var in x_vars + b_vars}
134 | # return _formula.substitute(substitution).simplify().is_true()
135 | #
136 | # print("Starting solver with {} examples".format(len(active_indices)))
137 | #
138 | # with smt.Solver() as solver:
139 | # while len(active_indices) > 0:
140 | # remaining = list(sorted(set(remaining) - set(active_indices)))
141 | # for i in active_indices:
142 | # x, x_b, label = real_features[i], bool_features[i], labels[i]
143 | #
144 | # for h in range(n_h_original):
145 | # sum_coefficients = smt.Plus([a_hf[h][f] * smt.Real(x[f]) for f in range(n_f)])
146 | # solver.add_assertion(smt.Iff(s_ih[i][h], sum_coefficients <= b_h[h]))
147 | #
148 | # for h in range(n_h_original, n_h):
149 | # solver.add_assertion(smt.Iff(s_ih[i][h], ~s_ih[i][h - n_h_original]))
150 | #
151 | # for c in range(n_c):
152 | # solver.add_assertion(smt.Iff(s_ic[i][c], smt.And(
153 | # [(~s_ch[c][h] | s_ih[i][h]) for h in range(n_h)]
154 | # + [~s_cb[c][b] for b in range(n_b_original) if not x_b[b]]
155 | # + [~s_cb[c][b] for b in range(n_b_original, n_b) if x_b[b - n_b_original]]
156 | # )))
157 | #
158 | # if label:
159 | # solver.add_assertion(smt.Or([s_ic[i][c] for c in range(n_c)]))
160 | # else:
161 | # solver.add_assertion(smt.And([~s_ic[i][c] for c in range(n_c)]))
162 | #
163 | # solver.solve()
164 | # model = solver.get_model()
165 | #
166 | # x_vars = [domain.get_symbol(domain.variables[f]) for f in range(n_f)]
167 | # hyperplanes = [
168 | # smt.Plus([model.get_value(a_hf[h][f]) * x_vars[f] for f in range(n_f)]) <= model.get_value(b_h[h])
169 | # for h in range(n_h_original)]
170 | # hyperplanes += [
171 | # smt.Plus([model.get_value(a_hf[h][f]) * x_vars[f] for f in range(n_f)]) > model.get_value(b_h[h])
172 | # for h in range(n_h - n_h_original)]
173 | #
174 | # b_vars = [domain.get_symbol(domain.bool_vars[b]) for b in range(n_b_original)]
175 | # bool_literals = [b_vars[b] for b in range(n_b_original)]
176 | # bool_literals += [~b_vars[b - n_b_original] for b in range(n_b_original, n_b)]
177 | #
178 | # hyperplane_dnf = [
179 | # [hyperplanes[h] for h in range(n_h) if model.get_py_value(s_ch[c][h])]
180 | # + [bool_literals[b] for b in range(n_b) if model.get_py_value(s_cb[c][b])]
181 | # for c in range(n_c)
182 | # ]
183 | #
184 | # active_indices = [i for i in remaining if labels[i] != check_model(data[i][0])]
185 | # print("Found model violating {} examples".format(len(active_indices)))
186 | #
187 | # time_taken = time.time() - start
188 | # print("Took {:.2f}s".format(time_taken))
189 | # return hyperplane_dnf
190 | #
--------------------------------------------------------------------------------
/incal/parameter_free_learner.py:
--------------------------------------------------------------------------------
1 | import heapq
2 |
3 | import time
4 |
5 | from learner import NoFormulaFound
6 |
7 |
8 | class ParameterFrontier(object):
9 | def __init__(self, w_k, w_h):
10 | self.c = lambda k, h: w_k * k + w_h * h
11 | self.pq = []
12 | self.tried = set()
13 |
14 | def push(self, k, h):
15 | if (k, h) not in self.tried:
16 | heapq.heappush(self.pq, (self.c(k, h), k, h))
17 | self.tried.add((k, h))
18 |
19 | def pop(self):
20 | c, k, h = heapq.heappop(self.pq)
21 | return k, h
22 |
23 |
24 | def learn_bottom_up(data, labels, learn_f, w_k, w_h, init_k=1, init_h=0, max_k=None, max_h=None):
25 | """
26 | Learns a CNF(k, h) SMT formula phi using the learner encapsulated in init_learner such that
27 | C(k, h) = w_k * k + w_h * h is minimal.
28 | :param data: List of tuples of assignments and labels
29 | :param labels: Array of labels
30 | :param learn_f: Function called with data, k and h: learn_f(data, k, h)
31 | :param w_k: The weight assigned to k
32 | :param w_h: The weight assigned to h
33 | :param init_k: The minimal value for k
34 | :param init_h: The minimal value for h
35 | :param max_k: The maximal value for k
36 | :param max_h: The maximal value for h
37 | :return: A tuple containing: 1) the CNF(k, h) formula phi with minimal complexity C(k, h); 2) k; and 3) h
38 | """
39 | solution = None
40 | frontier = ParameterFrontier(w_k, w_h)
41 | frontier.push(init_k, init_h)
42 | i = 0
43 | while solution is None:
44 | i += 1
45 | k, h = frontier.pop()
46 | # print("Attempting to solve with k={} and h={}".format(k, h))
47 | start = time.time()
48 | try:
49 | solution = learn_f(data, labels, i, k, h)
50 | # print("Found solution after {:.2f}s".format(time.time() - start))
51 | except NoFormulaFound as e:
52 | data = e.data
53 | labels = e.labels
54 | if max_k is None or k + 1 <= max_k:
55 | frontier.push(k + 1, h)
56 | if max_h is None or h + 1 <= max_h:
57 | frontier.push(k, h + 1)
58 | return solution, k, h
59 |
--------------------------------------------------------------------------------
/incal/tests/examples.py:
--------------------------------------------------------------------------------
1 | import matplotlib as mpl
2 |
3 | mpl.use('TkAgg')
4 | import matplotlib.pyplot as plt
5 |
6 | from pysmt.typing import REAL
7 | import pysmt.shortcuts as smt
8 |
9 | from problem import Domain
10 | from visualize import RegionBuilder
11 |
12 |
13 | def xy_domain():
14 | return Domain(["x", "y"], {"x": REAL, "y": REAL}, {"x": [0, 1], "y": [0, 1]})
15 |
16 |
17 | def example1(domain):
18 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL)
19 | return domain, (x + y <= 0.5)
20 |
21 |
22 | def example2(domain):
23 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL)
24 | return domain, (((-1.81491574069 < 2.82223533496 * x + -2.86421413834 * y) | (
25 | 1.74295350642 < 5.75692214636 * x + -5.67797696689 * y)) & (
26 | 5.75692214636 * x + -5.67797696689 * y <= 1.74295350642))
27 |
28 |
29 | def example3(domain):
30 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL)
31 | return domain, (((5.03100425089 < 4.72202520763*x + 4.11473198213*y) | (-4.6261635019 < -5.93640712709*x + -5.87100650773*y)) & ((5.03100425089 < 4.72202520763*x + 4.11473198213*y) | (-4.6261635019 < -5.93640712709*x + -5.87100650773*y)))
32 |
33 |
34 | def example4(domain):
35 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL)
36 | return domain, (((106.452209182 < 58.3305562428*x + 162.172448357*y) | (-82.1173457701 < -121.782718841*x + -45.7311195244*y)) & ((58.3305562428*x + 162.172448357*y <= 106.452209182) | (-121.782718841*x + -45.7311195244*y <= -82.1173457701)))
37 |
38 |
39 | def example5(domain):
40 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL)
41 | return domain, (((-1.81491574069 < 2.82223533496*x + -2.86421413834*y) | (1.74295350642 < 5.75692214636*x + -5.67797696689*y)) & (5.75692214636*x + -5.67797696689*y <= 1.74295350642))
42 |
43 |
44 | def example6(domain):
45 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL)
46 | return domain, (((-1.27554738321 < 2.00504448571*x + -2.40276942762*y) | (4.56336137649 < 11.0066321223*x + -9.72098326672*y)) & (11.0066321223*x + -9.72098326672*y <= 4.56336137649))
47 |
48 |
49 | def visualize(domain, formula):
50 | fig = plt.figure()
51 | ax = fig.add_subplot(1, 1, 1)
52 | RegionBuilder(domain).walk_smt(formula).plot(ax=ax)
53 | plt.show()
54 |
55 |
56 | if __name__ == "__main__":
57 | visualize(*example6(xy_domain()))
58 |
--------------------------------------------------------------------------------
/incal/tests/test_evaluation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from incal.experiments.examples import ice_cream_problem
4 | from pywmi import Domain, evaluate
5 |
6 |
7 | def test_example1():
8 | domain, formula, name = ice_cream_problem()
9 | c, b, w = domain.get_symbols(["chocolate", "banana", "weekend"])
10 |
11 | c_val = 0.41358769878652346
12 | b_val = 0.04881279380000003
13 | assignment = {"chocolate": c_val, "banana": b_val, "weekend": 1.0}
14 | instance = np.array([assignment[v] for v in domain.variables])
15 |
16 | h1 = -0.9094061613514598 < (-2.11558444119424*c + -0.7052753601938021*b)
17 | print(-0.9094061613514598, (-2.11558444119424 * c_val + -0.7052753601938021 * b_val))
18 | h2 = -43.62318633585081 < (-56.41097694745345*c + -50.5657977670196*b)
19 | print(-43.62318633585081, (-56.41097694745345 * c_val + -50.5657977670196 * b_val))
20 | h3 = -0.9094061613514598 < (-2.11558444119424*c + -0.7052753601938021*b)
21 | print(-0.9094061613514598, (-2.11558444119424 * c_val + -0.7052753601938021 * b_val))
22 | h4 = 7.792607696237757 < (18.128225098004087*c + 6.043431893671825*b)
23 | print(7.792607696237757, (18.128225098004087 * c_val + 6.043431893671825 * b_val))
24 | h5 = -0.9094061613514598 < -(2.11558444119424*c + -0.7052753601938021*b)
25 | print(-0.9094061613514598, -(2.11558444119424 * c_val + -0.7052753601938021 * b_val))
26 | # h1: True, h2: True, h3: True, h4: False, h5: True
27 |
28 | learned = ((h1 | h2) & (h3 | ~w) & (h4 | h5))
29 |
30 | print(evaluate(domain, formula, instance))
31 | print(evaluate(domain, learned, instance))
32 |
--------------------------------------------------------------------------------
/incal/tests/test_generation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from incal.generator import generate_half_space_sample
4 | from incal.learner import Learner
5 | from pysmt.typing import REAL, BOOL
6 | from pywmi import Domain
7 |
8 |
9 | def get_xay_domain():
10 | return Domain(["x", "a", "y"], {"x": REAL, "a": BOOL, "y": REAL}, {"x": (0, 1), "y": (0, 1)})
11 |
12 |
13 | def test_generate_hyperplane():
14 | domain = get_xay_domain()
15 | samples = np.array([[0, 1, 0.1], [0.5, 0, 0.5]])
16 | coefficients, b = Learner.fit_hyperplane(domain, samples)
17 | slope = coefficients[0] / coefficients[1]
18 | assert abs(slope) == 0.4 / 0.5
19 | assert b == 1
20 |
21 |
22 | def test_generate_hyperplane_sample_sanity():
23 | generate_half_space_sample(get_xay_domain(), 2)
24 |
--------------------------------------------------------------------------------
/incal/tests/test_one_class.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import time
4 | import numpy as np
5 |
6 | from pywmi.smt_print import pretty_print
7 |
8 | from incal.learn import LearnOptions
9 | from pywmi import evaluate, Domain, smt_to_nested, plot, RejectionEngine
10 | from pywmi.sample import uniform
11 |
12 | from incal.experiments.examples import simple_checker_problem, checker_problem
13 | from incal.violations.core import RandomViolationsStrategy
14 |
15 | from incal.violations.virtual_data import OneClassStrategy
16 |
17 | from incal.k_cnf_smt_learner import KCnfSmtLearner
18 |
19 | from incal.parameter_free_learner import learn_bottom_up
20 |
21 | # from incal.observe.inc_logging import LoggingObserver
22 | from incal.observe.plotting import PlottingObserver
23 |
24 |
25 | def main():
26 | domain, formula, name = checker_problem()
27 | thresholds = {v: 0.1 for v in domain.real_vars}
28 | data = uniform(domain, 1000)
29 | labels = evaluate(domain, formula, data)
30 | data = data[labels == 1]
31 | labels = labels[labels == 1]
32 |
33 | def learn_inc(_data, _labels, _i, _k, _h):
34 | strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds)
35 | learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
36 | initial_indices = LearnOptions.initial_random(20)(list(range(len(_data))))
37 | # learner.add_observer(LoggingObserver(None, _k, _h, None, True))
38 | learner.add_observer(PlottingObserver(domain, "test_output/checker", "run_{}_{}_{}".format(_i, _k, _h),
39 | domain.real_vars[0], domain.real_vars[1], None, False))
40 | return learner.learn(domain, _data, _labels, initial_indices)
41 |
42 | (new_data, new_labels, formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1, None, None)
43 | print("Learned CNF(k={}, h={}) formula {}".format(k, h, pretty_print(formula)))
44 | print("Data-set grew from {} to {} entries".format(len(labels), len(new_labels)))
45 |
46 |
47 | def background_knowledge_example():
48 | domain = Domain.make(["a", "b"], ["x", "y"], [(0, 1), (0, 1)])
49 | a, b, x, y = domain.get_symbols(domain.variables)
50 | formula = (a | b) & (~a | ~b) & (x >= 0) & (x <= y) & (y <= 1)
51 | thresholds = {v: 0.1 for v in domain.real_vars}
52 | data = uniform(domain, 10000)
53 | labels = evaluate(domain, formula, data)
54 | data = data[labels == 1]
55 | labels = labels[labels == 1]
56 |
57 | def learn_inc(_data, _labels, _i, _k, _h):
58 | strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds) #, background_knowledge=(a | b) & (~a | ~b))
59 | learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
60 | initial_indices = LearnOptions.initial_random(20)(list(range(len(_data))))
61 | # learner.add_observer(LoggingObserver(None, _k, _h, None, True))
62 | learner.add_observer(PlottingObserver(domain, "test_output/bg", "run_{}_{}_{}".format(_i, _k, _h),
63 | domain.real_vars[0], domain.real_vars[1], None, False))
64 | return learner.learn(domain, _data, _labels, initial_indices)
65 |
66 | (new_data, new_labels, formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1, None, None)
67 | print("Learned CNF(k={}, h={}) formula {}".format(k, h, pretty_print(formula)))
68 | print("Data-set grew from {} to {} entries".format(len(labels), len(new_labels)))
69 |
70 |
71 | def negative_samples_example(background_knowledge):
72 | domain = Domain.make(["a", "b"], ["x", "y"], [(0, 1), (0, 1)])
73 | a, b, x, y = domain.get_symbols(domain.variables)
74 | formula = (a | b) & (~a | ~b) & (x <= y) & domain.get_bounds()
75 | background_knowledge = (a | b) & (~a | ~b) if background_knowledge else None
76 | thresholds = {"x": 0.1, "y": 0.2}
77 | data = uniform(domain, 10000)
78 | labels = evaluate(domain, formula, data)
79 | data = data[labels == 1]
80 | labels = labels[labels == 1]
81 | original_sample_count = len(labels)
82 |
83 | start_time = time.time()
84 |
85 | data, labels = OneClassStrategy.add_negatives(domain, data, labels, thresholds, 100, background_knowledge)
86 | print("Created {} negative examples".format(len(labels) - original_sample_count))
87 |
88 | directory = "test_output{}bg_sampled{}{}".format(os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss"))
89 |
90 | def learn_inc(_data, _labels, _i, _k, _h):
91 | strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds, background_knowledge=background_knowledge)
92 | learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
93 | initial_indices = LearnOptions.initial_random(20)(list(range(len(_data))))
94 | learner.add_observer(PlottingObserver(domain, directory, "run_{}_{}_{}".format(_i, _k, _h),
95 | domain.real_vars[0], domain.real_vars[1], None, False))
96 | return learner.learn(domain, _data, _labels, initial_indices)
97 |
98 | (new_data, new_labels, learned_formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1, None, None)
99 | if background_knowledge:
100 | learned_formula = learned_formula & background_knowledge
101 |
102 | duration = time.time() - start_time
103 |
104 | print("{}".format(smt_to_nested(learned_formula)))
105 | print("Learned CNF(k={}, h={}) formula {}".format(k, h, pretty_print(learned_formula)))
106 | print("Data-set grew from {} to {} entries".format(len(labels), len(new_labels)))
107 | print("Learning took {:.2f}s".format(duration))
108 |
109 | test_data, labels = OneClassStrategy.add_negatives(domain, data, labels, thresholds, 1000, background_knowledge)
110 | assert all(evaluate(domain, learned_formula, test_data) == labels)
111 |
112 |
113 | def test_negative_samples():
114 | for label in (True, False):
115 | random.seed(888)
116 | np.random.seed(888)
117 | negative_samples_example(label)
118 |
119 |
120 | def test_adaptive_threshold():
121 | random.seed(888)
122 | np.random.seed(888)
123 |
124 | domain = Domain.make([], ["x", "y"], [(0, 1), (0, 1)])
125 | x, y = domain.get_symbols(domain.variables)
126 | formula = (x <= y) & (x <= 0.5) & (y <= 0.5) & domain.get_bounds()
127 | thresholds = {"x": 0.1, "y": 0.1}
128 | data, _ = RejectionEngine(domain, formula, x * x, 100000).get_samples(50)
129 | k = 4
130 | nearest_neighbors = []
131 | for i in range(len(data)):
132 | nearest_neighbors.append([])
133 | for j in range(len(data)):
134 | if i != j:
135 | distance = 1 if any(data[i, b] != data[j, b] for b, v in enumerate(domain.variables)
136 | if domain.is_bool(v))\
137 | else max(abs(data[i, r] - data[j, r]) / (domain.var_domains[v][1] - domain.var_domains[v][0]) for r, v in enumerate(domain.variables) if domain.is_real(v))
138 | if len(nearest_neighbors[i]) < k:
139 | nearest_neighbors[i].append((j, distance))
140 | else:
141 | index_of_furthest = None
142 | for fi, f in enumerate(nearest_neighbors[i]):
143 | if index_of_furthest is None or f[1] > nearest_neighbors[i][index_of_furthest][1]:
144 | index_of_furthest = fi
145 | if distance < nearest_neighbors[i][index_of_furthest][1]:
146 | nearest_neighbors[i][index_of_furthest] = (j, distance)
147 | print(nearest_neighbors)
148 | t = [[sum(n[1] for n in nearest_neighbors[i]) / len(nearest_neighbors[i]) * (domain.var_domains[v][1] - domain.var_domains[v][0]) for v in domain.real_vars]
149 | for i in range(len(nearest_neighbors))]
150 | t = np.array(t)
151 | print(t)
152 | print(data)
153 | # data = uniform(domain, 400)
154 | labels = evaluate(domain, formula, data)
155 | data = data[labels == 1]
156 | labels = labels[labels == 1]
157 | data, labels = OneClassStrategy.add_negatives(domain, data, labels, t, 1000)
158 |
159 | directory = "test_output{}adaptive{}{}".format(os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss"))
160 | os.makedirs(directory)
161 |
162 | name = os.path.join(directory, "combined.png")
163 | plot.plot_combined("x", "y", domain, formula, (data, labels), None, name, set(), set())
164 |
--------------------------------------------------------------------------------
/incal/tests/test_polytope.py:
--------------------------------------------------------------------------------
1 | import matplotlib as mpl
2 |
3 | mpl.use('TkAgg')
4 | import matplotlib.pyplot as plt
5 |
6 | from unittest import TestCase
7 |
8 | from pysmt.typing import REAL
9 | import pysmt.shortcuts as smt
10 |
11 | from pywmi.domain import Domain
12 | from pywmi.plot import RegionBuilder
13 |
14 |
15 | class TestPolytope(TestCase):
16 | def test_example1(self):
17 | domain = Domain(["x", "y"], {"x": REAL, "y": REAL}, {"x": [0, 1], "y": [0, 1]})
18 | x, y = smt.Symbol("x", REAL), smt.Symbol("y", REAL)
19 | formula = (x + y <= 0.5)
20 | RegionBuilder(domain).walk_smt(formula).plot()
21 | # plt.show()
22 |
23 |
24 |
--------------------------------------------------------------------------------
/incal/util/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/util/__init__.py
--------------------------------------------------------------------------------
/incal/util/options.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import time
4 | from typing import Union, Tuple, Any, Dict, List, Optional
5 |
6 |
7 | class Option(object):
8 | def __init__(self, name, from_type=None, default_from=None, converter=None, default=None, arg_name=None):
9 | self.name = name
10 | self.from_type = from_type
11 | self.default_from = default_from
12 | self.converter = converter
13 | self.default = default
14 | self.arg_name = arg_name or name
15 | if converter is not None and default_from is not None and default is None:
16 | self.default = self.convert(default_from)
17 |
18 | def convert(self, value):
19 | if value is None:
20 | return self.default
21 | elif self.converter:
22 | if isinstance(self.from_type, tuple):
23 | parts = value.split(":")
24 | if len(parts) != len(self.from_type):
25 | raise RuntimeError("Could not parse arguments for option {}, got: {}".format(self.name, value))
26 | value = tuple(t(v) for t, v in zip(self.from_type, parts))
27 |
28 | if isinstance(value, tuple):
29 | return self.converter(*value)
30 | else:
31 | return self.converter(value)
32 | else:
33 | return value
34 |
35 |
36 | class Options(object):
37 | def __init__(self, callback=None):
38 | self.options = dict()
39 | self.values = dict()
40 | self.original_values = dict()
41 | self.callback = callback
42 |
43 | def add_option(self, name, from_type=None, default_from=None, converter=None, default=None, arg_name=None):
44 | if isinstance(default_from, tuple):
45 | default_from = ":".join(str(e) for e in default_from)
46 | self.options[name] = Option(name, from_type, default_from, converter, default, arg_name)
47 |
48 | def set_values(self, convert=True, **kwargs):
49 | for key, value in kwargs.items():
50 | self.set_value(key, value, convert)
51 |
52 | @staticmethod
53 | def convert_dict(**kwargs):
54 | def convert(*args):
55 | if args[0] in kwargs:
56 | if len(args) > 1:
57 | return kwargs[args[0]](*args[1:])
58 | return kwargs[args[0]]
59 | raise RuntimeError("Unknown option {}, should be one of: {}".format(args[0], list(kwargs.keys())))
60 |
61 | return convert
62 |
63 | def set_value(self, name, value, convert=True):
64 | self.original_values[name] = value
65 | if convert:
66 | self.values[name] = self.options[name].convert(value)
67 | else:
68 | self.values[name] = value
69 |
70 | def __setattr__(self, key, value):
71 | if key in ["options", "values", "original_values", "callback"] or key.startswith("__"):
72 | return super().__setattr__(key, value)
73 | self.set_value(key, value)
74 |
75 | def __getattr__(self, item):
76 | if item in ["options", "values", "original_values", "callback"] or item.startswith("__"):
77 | return super().__getattr__(item)
78 | return self.values[item] if item in self.values else self.options[item].default
79 |
80 | def add_arguments(self, parser):
81 | for o_name, option in self.options.items():
82 | parser.add_argument(
83 | "--{}".format(option.name),
84 | type=option.from_type if not isinstance(option.from_type, tuple) else str,
85 | default=option.default_from
86 | )
87 |
88 | def parse_arguments(self, args):
89 | for o_name, option in self.options.items():
90 | self.set_value(option.name, getattr(args, option.name))
91 |
92 | def print_arguments(self):
93 | return " ".join("--{} {}".format(name, o_value) for name, o_value in self.original_values.items()
94 | if o_value is not None)
95 |
96 | def call(self, timed=False) -> Union[Tuple[Any, float], Any]:
97 | def make_call():
98 | return self.callback(**{self.options[o_name].arg_name: value for o_name, value in self.values.items()})
99 |
100 | if timed:
101 | start_time = time.time()
102 | result = make_call()
103 | duration = time.time() - start_time
104 | return result, duration
105 | else:
106 | return make_call()
107 |
108 | def execute_from_command_line(self, description: str=None, timed: bool=False) -> Union[Tuple[Any, float], Any]:
109 | import argparse
110 | parser = argparse.ArgumentParser(description=description)
111 | self.add_arguments(parser)
112 | self.parse_arguments(parser.parse_args())
113 | return self.call(timed)
114 |
115 | def copy(self):
116 | options = self.make_copy()
117 | options.options = dict(self.options)
118 | options.values = dict(self.values)
119 | options.original_values = dict(self.original_values)
120 | return options
121 |
122 | def make_copy(self):
123 | return Options(self.callback)
124 |
125 | def export_to_dict(self):
126 | return dict(self.original_values)
127 |
128 | def import_from_dict(self, values_dict):
129 | self.set_values(True, **values_dict)
130 |
131 |
132 | class Results(Options):
133 | @staticmethod
134 | def make_converter(converter):
135 | def convert(result, duration):
136 | return converter(result)
137 | return convert
138 |
139 | def add_result(self, name, converter):
140 | self.add_option(name, converter=Results.make_converter(converter))
141 |
142 | def add_duration(self, name="duration"):
143 | def convert(result, duration):
144 | return duration
145 | self.add_option(name, converter=convert)
146 |
147 | def export_to_dict(self):
148 | return dict(self.values)
149 |
150 | def import_from_dict(self, values_dict):
151 | self.set_values(False, **values_dict)
152 |
153 |
154 | class Experiment(object):
155 | def __init__(self, parameters: Options, results: Options, config: Optional[Options]=None, import_handler=None):
156 | self.parameters = parameters
157 | self.results = results
158 | self.config = config
159 | self.import_handler = import_handler
160 | self.derived = dict()
161 | self.imported_from_file = None
162 |
163 | def register_derived(self, name, callback):
164 | self.derived[name] = callback
165 |
166 | def import_from_command_line(self):
167 | import argparse
168 | parser = argparse.ArgumentParser()
169 | self.parameters.add_arguments(parser)
170 | if self.config:
171 | self.config.add_arguments(parser)
172 | args = parser.parse_args()
173 | self.parameters.parse_arguments(args)
174 | if self.config:
175 | self.config.parse_arguments(args)
176 |
177 | def execute_from_command_line(self):
178 | self.import_from_command_line()
179 | self.execute()
180 |
181 | def execute(self):
182 | result = self.parameters.call(timed=True)
183 | for o_name in self.results.options:
184 | self.results.set_value(o_name, result)
185 |
186 | def export_to_dict(self):
187 | return {"parameters": self.parameters.export_to_dict(), "results": self.results.export_to_dict(),
188 | "config": self.config.export_to_dict() if self.config else None}
189 |
190 | def save(self, filename):
191 | with open(filename, "w") as ref:
192 | json.dump(self.export_to_dict(), ref)
193 |
194 | def import_from_dict(self, values_dict):
195 | parameters_dict, results_dict, config_dict = (values_dict[k] for k in ["parameters", "results", "config"])
196 | if self.import_handler is not None:
197 | self.import_handler(parameters_dict, results_dict, config_dict)
198 | self.parameters.import_from_dict(parameters_dict)
199 | self.results.import_from_dict(results_dict)
200 | if self.config and config_dict:
201 | self.config.import_from_dict(config_dict)
202 |
203 | def load(self, filename):
204 | with open(filename, "r") as ref:
205 | self.import_from_dict(json.load(ref))
206 | self.imported_from_file = os.path.realpath(filename)
207 | return self
208 |
209 |
210 |
211 |
--------------------------------------------------------------------------------
/incal/util/parallel.py:
--------------------------------------------------------------------------------
1 | import os
2 | import signal
3 | import subprocess
4 | from multiprocessing.pool import Pool
5 | from subprocess import TimeoutExpired
6 |
7 |
8 | def run_command(args):
9 | command, time_out = args
10 | with subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, preexec_fn=os.setsid) as process:
11 | try:
12 | process.communicate(timeout=time_out)
13 | except TimeoutExpired:
14 | os.killpg(process.pid, signal.SIGINT) # send signal to the process group
15 | process.communicate()
16 |
17 |
18 | def run_commands(commands, processes=None, time_out=None):
19 | pool = Pool(processes=processes)
20 | commands = [(command, time_out) for command in commands]
21 | pool.map(run_command, commands)
22 |
--------------------------------------------------------------------------------
/incal/util/plot.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | import platform
3 |
4 | import matplotlib as mpl
5 |
6 | if platform.system() == "Darwin":
7 | mpl.use('TkAgg')
8 |
9 | import matplotlib.markers as mark
10 | import matplotlib.pyplot as plt
11 | import matplotlib.cm as cm
12 |
13 | import numpy
14 |
15 |
16 | class ScatterData:
17 | # colors = ["black", "green", "red"]
18 | colors = []
19 | markers = ["o", "v", "x"]
20 |
21 | def __init__(self, title, plot_options):
22 | self.title = title
23 | self.data = []
24 | self.limits = None, None
25 | self.plot_options = plot_options
26 |
27 | def add_data(self, name, x_data, y_data, error=None):
28 | self.data.append((name, x_data, y_data, error))
29 | return self
30 |
31 | @property
32 | def size(self):
33 | return len(self.data)
34 |
35 | def x_lim(self, limits):
36 | self.limits = limits, self.limits[1]
37 |
38 | def y_lim(self, limits):
39 | self.limits = self.limits[0], limits
40 |
41 | def gen_colors(self):
42 | if len(self.data) <= len(self.colors):
43 | return self.colors[:len(self.data)]
44 | iterator = iter(cm.rainbow(numpy.linspace(0, 1, len(self.data))))
45 | return [next(iterator) for _ in range(len(self.data))]
46 |
47 | def gen_markers(self):
48 | if len(self.data) <= len(self.markers):
49 | return self.markers[:len(self.data)]
50 | iterator = itertools.cycle(mark.MarkerStyle.filled_markers)
51 | return [next(iterator) for _ in range(len(self.data))]
52 |
53 | def render(self, ax, lines=True, log_x=True, log_y=True, label_x=None, label_y=None, legend_pos=None,
54 | x_ticks=None, y_ticks=None):
55 |
56 | plots = []
57 | colors = self.gen_colors()
58 | markers = self.gen_markers()
59 |
60 | if legend_pos is None:
61 | legend_pos = "lower right"
62 |
63 | plot_diagonal = False
64 | plot_extra = None
65 | plot_format = "scatter"
66 | show_error = True
67 | steps_x = None
68 | steps_y = None
69 |
70 | cache = None
71 | for plot_option in self.plot_options or ():
72 | if cache is None:
73 | if plot_option == "diagonal":
74 | plot_diagonal = True
75 | else:
76 | cache = plot_option
77 | else:
78 | if cache == "format":
79 | plot_format = plot_option
80 | elif cache == "error":
81 | show_error = (int(plot_option) == 1)
82 | elif cache == "legend_pos":
83 | legend_pos = plot_option
84 | elif cache == "lx":
85 | label_x = plot_option
86 | elif cache == "ly":
87 | label_y = plot_option
88 | elif cache == "steps_x":
89 | steps_x = int(plot_option)
90 | elif cache == "steps_y":
91 | steps_y = int(plot_option)
92 | elif cache == "plot_extra":
93 | plot_extra = plot_option
94 | elif cache == "x_lim":
95 | parts = plot_option.split(":")
96 | limits = (float(parts[0]), float(parts[1]))
97 | self.x_lim(limits)
98 | elif cache == "y_lim":
99 | parts = plot_option.split(":")
100 | limits = (float(parts[0]), float(parts[1]))
101 | self.y_lim(limits)
102 | cache = None
103 |
104 | min_x, max_x, min_y, max_y = numpy.infty, -numpy.infty, numpy.infty, -numpy.infty
105 | for i in range(self.size):
106 | name, x_data, y_data, error = self.data[i]
107 | try:
108 | min_x = min(min_x, numpy.min(x_data))
109 | min_y = min(min_y, numpy.min(y_data))
110 | max_x = max(max_x, numpy.max(x_data))
111 | max_y = max(max_y, numpy.max(y_data))
112 | except TypeError:
113 | pass
114 |
115 | if plot_format == "scatter":
116 | plots.append(ax.scatter(x_data, y_data, color=colors[i], marker=markers[i], s=40))
117 | if lines:
118 | ax.plot(x_data, y_data, color=colors[i])
119 | if show_error and error is not None:
120 | ax.fill_between(x_data, y_data - error, y_data + error, color=colors[i], alpha=0.35,
121 | linewidth=0)
122 | # ax.errorbar(x_data, y_data, error, linestyle='None', color=colors[i])
123 | elif plot_format == "bar":
124 | plots.append(ax.bar(x_data, y_data, color=colors[i]))
125 | else:
126 | raise ValueError("Unknown plot format")
127 |
128 | if plot_diagonal:
129 | ax.plot(numpy.array([min_x, max_x]), numpy.array([min_y, max_y]), linestyle="--")
130 | if plot_extra and plot_extra == "1/x":
131 | ax.plot(x_data, 1 / x_data, linestyle="--")
132 |
133 | ax.grid(True)
134 | legend_names = list(t[0] for t in self.data)
135 | # legend_names = ["No mixing - DT", "No mixing - RF", "Mixing - DT", "Mixing - RF"]
136 | # legend_names = ["No formulas", "Formulas"]
137 | # legend_names = []
138 | if 10 > len(self.data) == len(legend_names):
139 | ax.legend(plots, legend_names, loc=legend_pos)
140 |
141 | if log_x:
142 | ax.set_xscale('log')
143 | if log_y:
144 | ax.set_yscale('log')
145 |
146 | x_lim, y_lim = self.limits
147 | if x_lim is not None:
148 | ax.set_xlim(x_lim)
149 | if y_lim is not None:
150 | ax.set_ylim(y_lim)
151 |
152 | if label_y is not None:
153 | ax.set_ylabel(label_y)
154 | if label_x is not None:
155 | ax.set_xlabel(label_x)
156 |
157 | if steps_x is not None:
158 | x_ticks = numpy.linspace(min_x, max_x, steps_x)
159 | if steps_y is not None:
160 | y_ticks = numpy.linspace(min_y, max_y, steps_y)
161 | # x_ticks = [1, 2, 3]
162 | if x_ticks is not None:
163 | ax.xaxis.set_ticks(x_ticks)
164 | if y_ticks is not None:
165 | ax.yaxis.set_ticks(y_ticks)
166 |
167 | def plot(self, filename=None, size=None, **kwargs):
168 | fig = plt.figure()
169 | if size is not None:
170 | fig.set_size_inches(*size)
171 | self.render(fig.gca(), **kwargs)
172 | if filename is None:
173 | plt.show(block=True)
174 | else:
175 | plt.savefig(filename, format="png", bbox_inches="tight", pad_inches=0.08, dpi=600)
176 |
177 |
178 | def plot(file, *args, **kwargs):
179 | fig = plt.figure()
180 | fig.set_size_inches(12, 12)
181 |
182 | subplots = len(args)
183 | cols = int(numpy.ceil(numpy.sqrt(subplots)))
184 | rows = int(numpy.ceil(subplots / cols))
185 |
186 | import matplotlib.gridspec as grid_spec
187 | gs = grid_spec.GridSpec(rows, cols)
188 |
189 | axes = [plt.subplot(gs[0, 0]), plt.subplot(gs[0, 1]), plt.subplot(gs[1, :])]
190 | legend_positions = ["lower right", "upper right", "lower left"]
191 |
192 | for i in range(subplots):
193 | legend_pos = legend_positions[i]
194 | args[i].render(axes[i], legend_pos=legend_pos, **kwargs)
195 |
196 | if file is None:
197 | plt.show()
198 | else:
199 | plt.savefig(file, format="pdf")
200 |
--------------------------------------------------------------------------------
/incal/util/sampling.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 |
4 | # Implementation modified from https://stackoverflow.com/a/2149533/253387
5 |
6 | class Node(object):
7 | # Each node in the heap has a weight, original weight, value, and total weight.
8 | # The total weight, self.tw, is self.w plus the weight of any children.
9 | __slots__ = ['w', 'ow', 'v', 'tw']
10 |
11 | def __init__(self, w, ow, v, tw):
12 | self.w, self.ow, self.v, self.tw = w, ow, v, tw
13 |
14 |
15 | def rws_heap(items):
16 | # h is the heap. It's like a binary tree that lives in an array.
17 | # It has a Node for each pair in `items`. h[1] is the root. Each
18 | # other Node h[i] has a parent at h[i>>1]. Each node has up to 2
19 | # children, h[i<<1] and h[(i<<1)+1]. To get this nice simple
20 | # arithmetic, we have to leave h[0] vacant.
21 | h = [None] # leave h[0] vacant
22 | for v, w in items:
23 | h.append(Node(w, w, v, w))
24 | for i in range(len(h) - 1, 1, -1): # total up the tws
25 | h[i>>1].tw += h[i].tw # add h[i]'s total to its parent
26 | return h
27 |
28 |
29 | def rws_heap_pop(h):
30 | gas = h[1].tw * random.random() # start with a random amount of gas
31 |
32 | i = 1 # start driving at the root
33 | while gas >= h[i].w: # while we have enough gas to get past node i:
34 | gas -= h[i].w # drive past node i
35 | i <<= 1 # move to first child
36 | if gas >= h[i].tw: # if we have enough gas:
37 | gas -= h[i].tw # drive past first child and descendants
38 | i += 1 # move to second child
39 | w = h[i].w # out of gas! h[i] is the selected node.
40 | v = h[i].v
41 |
42 | h[i].w = 0 # make sure this node isn't chosen again
43 | while i: # fix up total weights
44 | h[i].tw -= w
45 | i >>= 1
46 | return v
47 |
48 |
49 | def sample_weighted(items, n): # items are pairs (value, weight)
50 | heap = rws_heap(items) # just make a heap...
51 | for i in range(n):
52 | yield rws_heap_pop(heap) # and pop n items off it.
53 |
--------------------------------------------------------------------------------
/incal/util/timeout.py:
--------------------------------------------------------------------------------
1 | def timeout(func, args=None, kwargs=None, duration=1, default=None):
2 | if args is None:
3 | args = ()
4 | if kwargs is None:
5 | kwargs = dict()
6 |
7 | if duration is None:
8 | return func(*args, **kwargs)
9 |
10 | import signal
11 |
12 | class TimeoutError(Exception):
13 | pass
14 |
15 | def handler(signum, frame):
16 | raise TimeoutError()
17 |
18 | # set the timeout handler
19 | signal.signal(signal.SIGALRM, handler)
20 | signal.alarm(duration)
21 |
22 | try:
23 | result = func(*args, **kwargs)
24 | except TimeoutError:
25 | result = default
26 | finally:
27 | signal.alarm(0)
28 |
29 | return result
30 |
--------------------------------------------------------------------------------
/incal/violations/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/incal/violations/__init__.py
--------------------------------------------------------------------------------
/incal/violations/core.py:
--------------------------------------------------------------------------------
1 | import random
2 |
3 | import numpy as np
4 |
5 | from pywmi.smt_check import evaluate
6 | from pywmi.smt_print import pretty_print, pretty_print_instance
7 | from typing import Tuple, List
8 |
9 |
10 | class SelectionStrategy(object):
11 | def select_active(self, domain, data, labels, formula, active_indices) -> Tuple[np.ndarray, np.ndarray, List[int]]:
12 | raise NotImplementedError()
13 |
14 |
15 | class AllViolationsStrategy(SelectionStrategy):
16 | def select_active(self, domain, data, labels, formula, active_indices) -> Tuple[np.ndarray, np.ndarray, List[int]]:
17 | active_set = set(active_indices)
18 | learned_labels = evaluate(domain, formula, data)
19 | differences = np.logical_xor(labels, learned_labels)
20 | difference_set = set(np.where(differences)[0])
21 | # print(active_set)
22 | # print(difference_set)
23 | # print(pretty_print(formula))
24 | # for i in active_set & difference_set:
25 | # print(i)
26 | # print(pretty_print_instance(domain, data[i]))
27 | # print(labels[i], learned_labels[i])
28 | # print()
29 | # assert len(active_set & difference_set) == 0
30 | return data, labels, sorted(difference_set - active_set)
31 |
32 |
33 | class RandomViolationsStrategy(AllViolationsStrategy):
34 | def __init__(self, sample_size):
35 | self.sample_size = sample_size
36 | self.last_violations = None
37 |
38 | def select_active(self, domain, data, labels, formula, active_indices) -> Tuple[np.ndarray, np.ndarray, List[int]]:
39 | data, labels, all_violations = AllViolationsStrategy.select_active(self, domain, data, labels, formula, active_indices)
40 | self.last_violations = list(all_violations)
41 | sample_size = min(self.sample_size, len(self.last_violations))
42 | return data, labels, random.sample(self.last_violations, sample_size)
43 |
44 |
45 | class WeightedRandomViolationsStrategy(AllViolationsStrategy):
46 | def __init__(self, sample_size, weights):
47 | self.sample_size = sample_size
48 | self.last_violations = None
49 | self.weights = weights
50 |
51 | def select_active(self, domain, data, labels, formula, active_indices) -> Tuple[np.ndarray, np.ndarray, List[int]]:
52 | data, labels, all_violations = AllViolationsStrategy.select_active(self, domain, data, labels, formula, active_indices)
53 | self.last_violations = list(all_violations)
54 | sample_size = min(self.sample_size, len(self.last_violations))
55 | import sampling
56 | return data, labels, sampling.sample_weighted(zip(self.last_violations, [self.weights[i] for i in self.last_violations]), sample_size)
57 |
58 |
59 | class MaxViolationsStrategy(AllViolationsStrategy):
60 | def __init__(self, sample_size, weights):
61 | self.sample_size = sample_size
62 | self.last_violations = None
63 | self.weights = weights
64 |
65 | def select_active(self, domain, data, labels, formula, active_indices) -> Tuple[np.ndarray, np.ndarray, List[int]]:
66 | data, labels, all_violations = AllViolationsStrategy.select_active(self, domain, data, labels, formula, active_indices)
67 | all_violations = list(all_violations)
68 | self.last_violations = all_violations
69 | sample_size = min(self.sample_size, len(all_violations))
70 | weighted_violations = zip(all_violations, [self.weights[i] for i in all_violations])
71 | weighted_violations = sorted(weighted_violations, key=lambda t: t[1])
72 | # noinspection PyTypeChecker
73 | return data, labels, [t[0] for t in weighted_violations[0:sample_size]]
74 |
--------------------------------------------------------------------------------
/incal/violations/dt_selection.py:
--------------------------------------------------------------------------------
1 | # Given a number of points:
2 | # - Train a DT (scale points?)
3 | # - For every point compute distance to the decision boundary
4 | import sklearn.tree as tree
5 |
6 | import pysmt.shortcuts as smt
7 |
8 | from .core import MaxViolationsStrategy
9 |
10 |
11 | class DecisionTreeSelection(MaxViolationsStrategy):
12 | def __init__(self):
13 | super().__init__(1, None)
14 |
15 | def select_active(self, domain, data, labels, formula, active_indices):
16 | if self.weights is None:
17 | self.weights = [min(d.values()) for d in get_distances(domain, data, labels)]
18 | return super().select_active(domain, data, labels, formula, active_indices)
19 |
20 |
21 | def convert(domain, data, labels):
22 | # def _convert(var, val):
23 | # if domain.var_types[var] == smt.BOOL:
24 | # return 1 if val else 0
25 | # elif domain.var_types[var] == smt.REAL:
26 | # return float(val)
27 |
28 | # feature_matrix = []
29 | # labels = []
30 | # for instance, label in data:
31 | # feature_matrix.append([_convert(v, instance[v]) for v in domain.variables])
32 | # labels.append(1 if label else 0)
33 | return data, labels
34 |
35 |
36 | def learn_dt(feature_matrix, labels, **kwargs):
37 | # noinspection PyArgumentList
38 | estimator = tree.DecisionTreeClassifier(**kwargs)
39 | estimator.fit(feature_matrix, labels)
40 | return estimator
41 |
42 |
43 | def export_dt(dt):
44 | import graphviz
45 | dot_data = tree.export_graphviz(dt, out_file=None)
46 | graph = graphviz.Source(dot_data)
47 | graph.render("DT")
48 |
49 |
50 | def get_distances_dt(dt, domain, feature_matrix):
51 | # Include more features than trained with?
52 |
53 | leave_id = dt.apply(feature_matrix)
54 | feature = dt.tree_.feature
55 | threshold = dt.tree_.threshold
56 | node_indicator = dt.decision_path(feature_matrix)
57 |
58 | distances = []
59 |
60 | for sample_id in range(len(feature_matrix)):
61 | distance = dict()
62 | node_index = node_indicator.indices[node_indicator.indptr[sample_id]: node_indicator.indptr[sample_id + 1]]
63 | for node_id in node_index:
64 | variable = domain.variables[feature[node_id]]
65 | if leave_id[sample_id] != node_id and domain.var_types[variable] == smt.REAL:
66 | new_distance = abs(feature_matrix[sample_id][feature[node_id]] - threshold[node_id])
67 | if variable not in distance or new_distance < distance[variable]:
68 | distance[variable] = new_distance
69 | distances.append(distance)
70 |
71 | return distances
72 |
73 |
74 | def get_distances(domain, data, labels):
75 | # feature_matrix, labels = convert(domain, data, labels)
76 | dt = learn_dt(data, labels)
77 | return get_distances_dt(dt, domain, data)
78 |
79 |
80 | if __name__ == "__main__":
81 | pass
82 |
83 |
--------------------------------------------------------------------------------
/incal/violations/virtual_data.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 |
3 | import numpy as np
4 | import scipy
5 | from pysmt.exceptions import InternalSolverError
6 | from pysmt.environment import Environment
7 | from pysmt.shortcuts import TRUE
8 | from pysmt.typing import REAL, BOOL
9 | from pywmi import Domain, evaluate
10 | from pywmi.sample import uniform
11 | from typing import Dict, Any, Tuple, TYPE_CHECKING
12 |
13 | from .core import SelectionStrategy
14 |
15 | if TYPE_CHECKING:
16 | from pysmt.fnode import FNode
17 |
18 |
19 | class OneClassStrategy(SelectionStrategy):
20 | def __init__(self, regular_strategy, thresholds, tight_mode=True, class_label=True, background_knowledge=None):
21 | self.regular_strategy = regular_strategy # type: SelectionStrategy
22 | self.thresholds = thresholds
23 | self.tight_mode = tight_mode
24 | assert class_label, "Currently only the positive setting is supported"
25 | self.class_label = class_label
26 | self.environment = Environment()
27 | if background_knowledge is None:
28 | self.background_knowledge = self.environment.formula_manager.TRUE()
29 | else:
30 | self.background_knowledge = self.environment.formula_manager.normalize(background_knowledge)
31 |
32 | def find_violating(self, domain, data, labels, formula):
33 | fm = self.environment.formula_manager
34 | formula = fm.normalize(formula)
35 | real_symbols = {name: fm.Symbol(name, REAL) for name in domain.real_vars}
36 | bool_symbols = {name: fm.Symbol(name, BOOL) for name in domain.bool_vars}
37 | symbols = real_symbols.copy()
38 | symbols.update(bool_symbols)
39 | bounds = domain.get_bounds(fm)
40 | try:
41 | with self.environment.factory.Solver() as solver:
42 | solver.add_assertion(formula)
43 | solver.add_assertion(bounds)
44 | solver.add_assertion(self.background_knowledge)
45 | # equalities = []
46 | # for row, label in data:
47 | # for real_var in domain.real_vars:
48 | # sym = real_symbols[real_var]
49 | # val = fm.Real(row[real_var])
50 | # t = fm.Real(self.thresholds[real_var])
51 | # equalities.append(fm.Ite(sym >= val, fm.Equals(sym - val, t), fm.Equals(val - sym, t)))
52 | # solver.add_assertion(fm.Or(*equalities))
53 | for i in range(len(labels)):
54 | row = {v: data[i, j].item() for j, v in enumerate(domain.variables)}
55 | label = labels[i] == 1
56 | if label == self.class_label:
57 | constraint = fm.Implies(fm.And(
58 | *[fm.Iff(bool_symbols[bool_var], fm.Bool(row[bool_var] == 1)) for bool_var in domain.bool_vars]),
59 | fm.Or(*[fm.Ite(real_symbols[real_var] >= fm.Real(row[real_var]),
60 | real_symbols[real_var] - fm.Real(row[real_var]) >= fm.Real(
61 | self.thresholds[real_var]),
62 | fm.Real(row[real_var]) - real_symbols[real_var] >= fm.Real(
63 | self.thresholds[real_var])) for real_var in
64 | domain.real_vars]))
65 | elif label == (not self.class_label):
66 | constraint = fm.Implies(fm.And(
67 | *[fm.Iff(bool_symbols[bool_var], fm.Bool(row[bool_var] == 1)) for bool_var in domain.bool_vars]),
68 | fm.Or(*[fm.Ite(real_symbols[real_var] >= fm.Real(row[real_var]),
69 | real_symbols[real_var] - fm.Real(row[real_var]) >= fm.Real(
70 | self.thresholds[real_var]),
71 | fm.Real(row[real_var]) - real_symbols[real_var] >= fm.Real(
72 | self.thresholds[real_var])) for real_var in
73 | domain.real_vars]))
74 | else:
75 | raise ValueError("Unknown label l_{} = {}".format(i, label))
76 | solver.add_assertion(constraint)
77 | solver.solve()
78 | model = solver.get_model()
79 | example = [float(model.get_value(symbols[var]).constant_value()) for var in domain.variables]
80 | except InternalSolverError:
81 | return None
82 | except Exception as e:
83 | if "Z3Exception" in str(type(e)):
84 | return None
85 | else:
86 | raise e
87 |
88 | return example
89 |
90 | def select_active(self, domain, data, labels, formula, active_indices):
91 | data, labels, selected = self.regular_strategy.select_active(domain, data, labels, formula, active_indices)
92 | if len(selected) > 0:
93 | return data, labels, selected
94 | else:
95 | example = self.find_violating(domain, data, labels, formula)
96 | if example is None:
97 | return data, labels, []
98 | data = np.vstack([data, example])
99 | labels = np.append(labels, np.array([0 if self.class_label else 1]))
100 | return data, labels, [len(labels) - 1]
101 |
102 | @staticmethod
103 | def add_negatives(domain, data, labels, thresholds, sample_count, background_knowledge=None, distance_measure=None):
104 | # type: (Domain, np.ndarray, np.ndarray, Dict, int, FNode, Any) -> Tuple[np.ndarray, np.ndarray]
105 |
106 | new_data = uniform(domain, sample_count)
107 | background_knowledge = background_knowledge or TRUE()
108 | supported_indices = evaluate(domain, background_knowledge, new_data)
109 | boolean_indices = [i for i, v in enumerate(domain.variables) if domain.is_bool(v)]
110 | real_indices = [i for i, v in enumerate(domain.variables) if domain.is_real(v)]
111 | for j in range(new_data.shape[0]):
112 | valid_negative = True
113 | for i in range(data.shape[0]):
114 | # noinspection PyTypeChecker
115 | if labels[i] and all(data[i, boolean_indices] == new_data[j, boolean_indices]):
116 | in_range = True
117 | for ri, v in zip(real_indices, domain.real_vars):
118 | t = thresholds[v] if isinstance(thresholds, dict) else thresholds[i, ri]
119 | if abs(data[i, ri] - new_data[j, ri]) > t:
120 | in_range = False
121 | break
122 | valid_negative = valid_negative and (not in_range)
123 | if not valid_negative:
124 | break
125 | supported_indices[j] = supported_indices[j] and valid_negative
126 | new_data = new_data[supported_indices == 1, :]
127 | return np.concatenate([data, new_data], axis=0), np.concatenate([labels, np.zeros(new_data.shape[0])])
128 |
129 |
130 | """
131 | There is a point e, such that for every example e': d(e, e') > t
132 | AND(OR(d(e, e', r) > t, forall r), forall e)
133 | """
134 |
--------------------------------------------------------------------------------
/notebooks/baldur.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import time
4 |
5 | from pywmi import Domain, RejectionEngine, evaluate, plot
6 |
7 | import numpy as np
8 |
9 | from incal import Formula
10 | from incal.violations.virtual_data import OneClassStrategy
11 | from incal.k_cnf_smt_learner import KCnfSmtLearner
12 | from incal.learn import LearnOptions
13 | from incal.observe.plotting import PlottingObserver
14 | from incal.parameter_free_learner import learn_bottom_up
15 | from incal.violations.dt_selection import DecisionTreeSelection
16 | from incal.violations.core import RandomViolationsStrategy
17 |
18 |
19 | def experiment():
20 | random.seed(888)
21 | np.random.seed(888)
22 |
23 | start = time.time()
24 |
25 | domain = Domain.make([], ["x", "y"], [(0, 1), (0, 1)])
26 | x, y = domain.get_symbols(domain.variables)
27 | thresholds = {"x": 0.1, "y": 0.1}
28 | # data, _ = RejectionEngine(domain, formula, x * x, 100000).get_samples(50)
29 | filename = "/Users/samuelkolb/Downloads/bg512/AR0206SR.map.scen"
30 | data = np.loadtxt(filename, delimiter="\t", skiprows=1, usecols=[4, 5]) / 512
31 | k = 12
32 | nearest_neighbors = []
33 |
34 | for i in range(len(data)):
35 | nearest_neighbors.append([])
36 | for j in range(len(data)):
37 | if i != j:
38 | distance = 1 if any(data[i, b] != data[j, b] for b, v in enumerate(domain.variables)
39 | if domain.is_bool(v))\
40 | else max(abs(data[i, r] - data[j, r]) / (domain.var_domains[v][1] - domain.var_domains[v][0]) for r, v in enumerate(domain.variables) if domain.is_real(v))
41 | if len(nearest_neighbors[i]) < k:
42 | nearest_neighbors[i].append((j, distance))
43 | else:
44 | index_of_furthest = None
45 | for fi, f in enumerate(nearest_neighbors[i]):
46 | if index_of_furthest is None or f[1] > nearest_neighbors[i][index_of_furthest][1]:
47 | index_of_furthest = fi
48 | if distance < nearest_neighbors[i][index_of_furthest][1]:
49 | nearest_neighbors[i][index_of_furthest] = (j, distance)
50 | print(nearest_neighbors)
51 | t = [[sum(n[1] for n in nearest_neighbors[i]) / len(nearest_neighbors[i]) * (domain.var_domains[v][1] - domain.var_domains[v][0]) for v in domain.real_vars]
52 | for i in range(len(nearest_neighbors))]
53 | t = np.array(t) * 4
54 | print(t)
55 | # data = uniform(domain, 400)
56 | labels = np.ones(len(data))
57 | data, labels = OneClassStrategy.add_negatives(domain, data, labels, t, 1000)
58 |
59 | directory = "output{}baldur{}{}".format(os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss"))
60 | os.makedirs(directory)
61 |
62 | name = os.path.join(directory, "combined.png")
63 | plot.plot_combined("x", "y", domain, None, (data, labels), None, name, set(), set())
64 |
65 | def learn_inc(_data, _labels, _i, _k, _h):
66 | # strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds)
67 | strategy = RandomViolationsStrategy(10)
68 | learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
69 | initial_indices = LearnOptions.initial_random(20)(list(range(len(_data))))
70 | learner.add_observer(PlottingObserver(domain, directory, "run_{}_{}_{}".format(_i, _k, _h),
71 | domain.real_vars[0], domain.real_vars[1], None, False))
72 | return learner.learn(domain, _data, _labels, initial_indices)
73 |
74 | (new_data, new_labels, learned_formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 3, 6, None, None)
75 | duration = time.time() - start
76 | Formula(domain, learned_formula).to_file(os.path.join(directory, "result_{}_{}_{}.json".format(k, h, int(duration))))
77 |
78 |
79 | if __name__ == '__main__':
80 | experiment()
--------------------------------------------------------------------------------
/notebooks/experiments.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | if [ $# -ne 1 ]; then
3 | echo "$0 " 1>&2
4 | exit
5 | fi
6 | if ! [ -d "$1" ]; then
7 | echo "Not a directory: $1" 1>&2
8 | exit
9 | fi
10 |
11 | if [ -z "$(which cilly)" ]; then
12 | echo "cilly compiler not found!" 1>&2
13 | exit
14 | fi
15 |
16 | codeflaws_dir=$1
17 | repair_dir=$codeflaws_dir/../
18 | rm $codeflaws_dir/*/autogen* &> /dev/null
19 | rm $codeflaws_dir/*/incal* &> /dev/null
20 |
21 | if ! [ -e $repair_dir/learn2fix ]; then
22 | echo $repair_dir/learn2fix does not exist.
23 | exit
24 | fi
25 | cp $repair_dir/learn2fix/repairs/genprog/* $repair_dir/
26 | if [ -e $repair_dir/genprog-run ]; then
27 | echo "[INFO] Saving $repair_dir/genprog-run.." 1>&2
28 | rm -rf $repair_dir/genprog-run.old 2> /dev/null
29 | mv $repair_dir/genprog-run $repair_dir/genprog-run.old
30 | fi
31 | mkdir $repair_dir/genprog-run
32 |
33 | #TODO Where is genprog_allfixes created?
34 | if [ -e $repair_dir/genprog-allfixes ]; then
35 | echo "[INFO] Saving $repair_dir/genprog-allfixes.." 1>&2
36 | rm -rf $repair_dir/genprog-allfixes.old 2> /dev/null
37 | mv $repair_dir/genprog-allfixes $repair_dir/genprog-allfixes.old
38 | fi
39 | mkdir $repair_dir/genprog-allfixes
40 |
41 |
42 | for s in $(ls -1d $codeflaws_dir/*/); do
43 | found=false;
44 | for f in $(ls -1 $s/*input*); do if [ $(wc -l $f | cut -d" " -f1) -gt 1 ]; then found=true; continue; fi; done;
45 | if [ "$found" = false ]; then
46 | if [ $(cat $s/input-neg1 | grep -x -E '[[:blank:]]*([[:digit:]]+[[:blank:]]*)*' | wc -l) -eq 1 ]; then
47 | #echo $s
48 | subject=$(echo $s | rev | cut -d/ -f2 | rev)
49 | buggy=$(echo $subject | cut -d- -f1,2,4)
50 | golden=$(echo $subject | cut -d- -f1,2,5)
51 | if [ 0 -eq $(grep "$subject" $codeflaws_dir/codeflaws-defect-detail-info.txt | grep "WRONG_ANSWER" | wc -l) ]; then
52 | echo "[INFO] Skipping non-semantic bug $subject" 1>&2
53 | continue
54 | fi
55 | if ! [ -f "$s/$buggy" ]; then
56 | gcc -fno-optimize-sibling-calls -fno-strict-aliasing -fno-asm -std=c99 -c $s/$buggy.c -o $s/$buggy.o &> /dev/null
57 | gcc $s/$buggy.o -o $s/$buggy -lm -s -O2 &> /dev/null
58 | fi
59 | if ! [ -f "$s/$golden" ]; then
60 | gcc -fno-optimize-sibling-calls -fno-strict-aliasing -fno-asm -std=c99 -c $s/$golden.c -o $s/$golden.o &> /dev/null
61 | gcc $s/$golden.o -o $s/$golden -lm -s -O2 &> /dev/null
62 | fi
63 | cp $repair_dir/learn2fix/repairs/genprog/test-genprog-incal.py $s/
64 |
65 | for i in $(seq 1 $(nproc --all)); do
66 | (
67 | autotest=$(timeout 11m ./Learn2Fix.py -t 10 -s $s -i $i)
68 | if [ $? -eq 0 ]; then
69 | manual=$($repair_dir/run-version-genprog.sh $subject $i manual 10m)
70 | autogen=$($repair_dir/run-version-genprog.sh $subject $i autogen 10m)
71 | echo $autotest | tr -d '\n'
72 | echo ,$manual | tr -d '\n'
73 | echo ,$autogen
74 | fi
75 | ) >> results_it_$i.csv &
76 | done
77 | wait
78 | else
79 | echo "[INFO] Skipping non-numeric input subject: $s" 1>&2
80 | fi
81 | else
82 | echo "[INFO] Skipping multi-line input subject: $s" 1>&2
83 | fi
84 | done
85 |
86 |
--------------------------------------------------------------------------------
/notebooks/gps.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import time
4 |
5 | from pywmi import Domain, RejectionEngine, evaluate, plot
6 |
7 | import numpy as np
8 |
9 | from incal import Formula
10 | from incal.violations.virtual_data import OneClassStrategy
11 | from incal.k_cnf_smt_learner import KCnfSmtLearner
12 | from incal.learn import LearnOptions
13 | from incal.observe.plotting import PlottingObserver
14 | from incal.parameter_free_learner import learn_bottom_up
15 | from incal.violations.dt_selection import DecisionTreeSelection
16 | from incal.violations.core import RandomViolationsStrategy
17 |
18 |
19 | def experiment():
20 | random.seed(888)
21 | np.random.seed(888)
22 |
23 | start = time.time()
24 |
25 | domain = Domain.make([], ["x", "y"], [(0, 1), (0, 1)])
26 | x, y = domain.get_symbols(domain.variables)
27 | thresholds = {"x": 0.1, "y": 0.1}
28 | # data, _ = RejectionEngine(domain, formula, x * x, 100000).get_samples(50)
29 | filename = "/Users/samuelkolb/Downloads/gps-trajectory-simpler/cell-0-2.data"
30 | data = np.loadtxt(filename, delimiter=",", skiprows=0, usecols=[0, 1])
31 | minimum_0, minimum_1 = min(data[:, 0]), min(data[:, 1])
32 | maximum_0, maximum_1 = max(data[:, 0]), max(data[:, 1])
33 | data[:, 0] = (data[:, 0] - minimum_0) / (maximum_0 - minimum_0)
34 | data[:, 1] = (data[:, 1] - minimum_1) / (maximum_1 - minimum_1)
35 | k = 3
36 | nearest_neighbors = []
37 |
38 | for i in range(len(data)):
39 | nearest_neighbors.append([])
40 | for j in range(len(data)):
41 | if i != j:
42 | distance = 1 if any(data[i, b] != data[j, b] for b, v in enumerate(domain.variables)
43 | if domain.is_bool(v))\
44 | else max(abs(data[i, r] - data[j, r]) / (domain.var_domains[v][1] - domain.var_domains[v][0]) for r, v in enumerate(domain.variables) if domain.is_real(v))
45 | if len(nearest_neighbors[i]) < k:
46 | nearest_neighbors[i].append((j, distance))
47 | else:
48 | index_of_furthest = None
49 | for fi, f in enumerate(nearest_neighbors[i]):
50 | if index_of_furthest is None or f[1] > nearest_neighbors[i][index_of_furthest][1]:
51 | index_of_furthest = fi
52 | if distance < nearest_neighbors[i][index_of_furthest][1]:
53 | nearest_neighbors[i][index_of_furthest] = (j, distance)
54 | print(nearest_neighbors)
55 | t = [[sum(n[1] for n in nearest_neighbors[i]) / len(nearest_neighbors[i]) * (domain.var_domains[v][1] - domain.var_domains[v][0]) for v in domain.real_vars]
56 | for i in range(len(nearest_neighbors))]
57 | t = np.array(t) * 2
58 | print(t)
59 | # data = uniform(domain, 400)
60 | labels = np.ones(len(data))
61 | data, labels = OneClassStrategy.add_negatives(domain, data, labels, t, 1000)
62 |
63 | directory = "output{}gps{}{}".format(os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss"))
64 | os.makedirs(directory)
65 |
66 | name = os.path.join(directory, "combined.png")
67 | plot.plot_combined("x", "y", domain, None, (data, labels), None, name, set(), set())
68 |
69 | def learn_inc(_data, _labels, _i, _k, _h):
70 | # strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds)
71 | strategy = RandomViolationsStrategy(10)
72 | learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
73 | initial_indices = LearnOptions.initial_random(20)(list(range(len(_data))))
74 | learner.add_observer(PlottingObserver(domain, directory, "run_{}_{}_{}".format(_i, _k, _h),
75 | domain.real_vars[0], domain.real_vars[1], None, False))
76 | return learner.learn(domain, _data, _labels, initial_indices)
77 |
78 | (new_data, new_labels, learned_formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 4, 8, None, None)
79 | duration = time.time() - start
80 | Formula(domain, learned_formula).to_file(os.path.join(directory, "result_{}_{}_{}.json".format(k, h, int(duration))))
81 |
82 |
83 | if __name__ == '__main__':
84 | experiment()
--------------------------------------------------------------------------------
/notebooks/playground.py:
--------------------------------------------------------------------------------
1 | from inspect import signature
2 |
3 | import numpy as np
4 |
5 | from smtlearn.examples import ice_cream_problem
6 | from pywmi.plot import plot_data, plot_formula
7 | from pywmi.sample import uniform
8 | from pywmi.smt_check import evaluate
9 | import random
10 | from smtlearn.violations.core import RandomViolationsStrategy
11 | from smtlearn.k_cnf_smt_learner import KCnfSmtLearner
12 | from pywmi.smt_print import pretty_print
13 |
14 | random.seed(666)
15 | np.random.seed(666)
16 |
17 | domain, formula, name = ice_cream_problem()
18 | # plot_formula(None, domain, formula)
19 |
20 | data = uniform(domain, 100)
21 | labels = evaluate(domain, formula, data)
22 |
23 | learner = KCnfSmtLearner(3, 3, RandomViolationsStrategy(10))
24 | initial_indices = random.sample(range(data.shape[0]), 20)
25 |
26 | learned_theory = learner.learn(domain, data, labels, initial_indices)
27 | print(pretty_print(learned_theory))
28 |
--------------------------------------------------------------------------------
/notebooks/results.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | if [ $# -ne 1 ]; then
3 | echo "$0 "
4 | exit
5 | fi
6 | results=$1
7 |
8 | if ! [ -e $results ]; then
9 | echo $results does not exist.
10 | exit
11 | fi
12 |
13 | if [ 0 -eq $(cat $results | wc -l) ]; then
14 | echo $results is empty.
15 | exit
16 | fi
17 |
18 | n_subjects=$(cat $results | cut -d, -f1 | sort | uniq | wc -l)
19 | avg_labeling=$(echo "scale=2; $(cat $results | cut -d, -f4 | awk '{s+=$1} END {print s}') / $(cat $results | wc -l)" | bc)
20 | no_failing=$(cat $results | cut -d, -f-5 | grep ,0$ | cut -d, -f1 | sort | uniq | wc -l)
21 | no_labled_fail=$(cat $results | cut -d, -f-6 | grep ,0$ | cut -d, -f1 | sort | uniq | wc -l)
22 |
23 | labeling_effort=$(echo "scale=2; $(cat $results | cut -d, -f4 | awk '{s+=$1} END {print s}') / $(cat $results | cut -d, -f3 | awk '{s+=$1} END {print s}') * 100" | bc)
24 |
25 | labeled_fail=$(echo "scale=2; $(cat $results | cut -d, -f6 | awk '{s+=$1} END {print s}') / $(cat $results | cut -d, -f4 | awk '{s+=$1} END {print s}') * 100" | bc)
26 | failure_rate=$(echo "scale=2; $(cat $results | cut -d, -f5 | awk '{s+=$1} END {print s}') / $(cat $results | cut -d, -f3 | awk '{s+=$1} END {print s}') * 100" | bc)
27 | improvement=$(echo "scale=2; $labeled_fail / $failure_rate" | bc)
28 |
29 |
30 | accuracy=$(echo "scale=2; $(cat $results | cut -d, -f8 | awk '{s+=$1} END {print s}') / $(cat $results | cut -d, -f7 | awk '{s+=$1} END {print s}') * 100" | bc)
31 | accuracy_fail=$(echo "scale=2; $(cat $results | cut -d, -f10 | awk '{s+=$1} END {print s}') / $(cat $results | cut -d, -f9 | awk '{s+=$1} END {print s}') * 100" | bc)
32 |
33 |
34 |
35 |
36 | echo "n_subjects $n_subjects"
37 | echo "avg_labeling $avg_labeling"
38 | echo "no_failing $no_failing #subjects where none of the generated test cases are failing."
39 | echo "no_labled_fail $no_labled_fail #subjects where none of the labeled generated test cases are failing."
40 |
41 | echo "labeling_effort $labeling_effort"
42 | echo "labeled_fail $labeled_fail"
43 | echo "failure_rate $failure_rate"
44 | echo "improvement $improvement"
45 | echo "accuracy $accuracy"
46 | echo "accuracy_fail $accuracy_fail"
47 |
--------------------------------------------------------------------------------
/notebooks/synthetic.py:
--------------------------------------------------------------------------------
1 | import os
2 | import random
3 | import time
4 |
5 | from pywmi import Domain, RejectionEngine, evaluate, plot
6 |
7 | import numpy as np
8 |
9 | from incal import Formula
10 | from incal.violations.virtual_data import OneClassStrategy
11 | from incal.k_cnf_smt_learner import KCnfSmtLearner
12 | from incal.learn import LearnOptions
13 | from incal.observe.plotting import PlottingObserver
14 | from incal.parameter_free_learner import learn_bottom_up
15 | from incal.violations.dt_selection import DecisionTreeSelection
16 | from incal.violations.core import RandomViolationsStrategy
17 |
18 |
19 | def experiment():
20 | random.seed(888)
21 | np.random.seed(888)
22 |
23 | start = time.time()
24 |
25 | domain = Domain.make(["b0", "b1", "b2"], ["x0", "x1"], [(0, 1), (0, 1)])
26 | # thresholds = {"x": 0.1, "y": 0.1}
27 | # data, _ = RejectionEngine(domain, formula, x * x, 100000).get_samples(50)
28 | filename = "/Users/samuelkolb/Downloads/input-ijcai-rh/ijcai-rh_2_3_2_100_50_4_3.problem_0.train_dataset.data"
29 | data = np.loadtxt(filename, delimiter=",", skiprows=0)
30 | k = 4
31 | nearest_neighbors = []
32 |
33 | for i in range(len(data)):
34 | nearest_neighbors.append([])
35 | for j in range(len(data)):
36 | if i != j:
37 | distance = 1 if any(data[i, b] != data[j, b] for b, v in enumerate(domain.variables)
38 | if domain.is_bool(v))\
39 | else max(abs(data[i, r] - data[j, r]) / (domain.var_domains[v][1] - domain.var_domains[v][0]) for r, v in enumerate(domain.variables) if domain.is_real(v))
40 | if len(nearest_neighbors[i]) < k:
41 | nearest_neighbors[i].append((j, distance))
42 | else:
43 | index_of_furthest = None
44 | for fi, f in enumerate(nearest_neighbors[i]):
45 | if index_of_furthest is None or f[1] > nearest_neighbors[i][index_of_furthest][1]:
46 | index_of_furthest = fi
47 | if distance < nearest_neighbors[i][index_of_furthest][1]:
48 | nearest_neighbors[i][index_of_furthest] = (j, distance)
49 | print(nearest_neighbors)
50 | t = [[sum(n[1] for n in nearest_neighbors[i]) / len(nearest_neighbors[i]) *
51 | (domain.var_domains[v][1] - domain.var_domains[v][0]) if domain.is_real(v) else 0 for v in domain.variables]
52 | for i in range(len(nearest_neighbors))]
53 | t = np.array(t) * 1.5
54 | print(t)
55 | # data = uniform(domain, 400)
56 | labels = np.ones(len(data))
57 | data, labels = OneClassStrategy.add_negatives(domain, data, labels, t, 1000)
58 |
59 | directory = "output{}lariat-synthetic{}{}".format(os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss"))
60 | os.makedirs(directory)
61 |
62 | name = os.path.join(directory, "combined.png")
63 | plot.plot_combined("x0", "x1", domain, None, (data, labels), None, name, set(), set())
64 |
65 | def learn_inc(_data, _labels, _i, _k, _h):
66 | # strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds)
67 | strategy = DecisionTreeSelection()
68 | learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
69 | initial_indices = LearnOptions.initial_random(20)(list(range(len(_data))))
70 | learner.add_observer(PlottingObserver(domain, directory, "run_{}_{}_{}".format(_i, _k, _h),
71 | domain.real_vars[0], domain.real_vars[1], None, False))
72 | return learner.learn(domain, _data, _labels, initial_indices)
73 |
74 | (new_data, new_labels, learned_formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 2, 4, None, None)
75 | duration = time.time() - start
76 | Formula(domain, learned_formula).to_file(os.path.join(directory, "result_{}_{}_{}.json".format(k, h, int(duration))))
77 |
78 |
79 | if __name__ == '__main__':
80 | experiment()
--------------------------------------------------------------------------------
/plotting_commands.txt:
--------------------------------------------------------------------------------
1 | # Effect of symmetries
2 | incal-experiments smt-lib-benchmark analyze --dirs remote_res/smt_lib_benchmark/qf_lra_results_plain/ remote_res/smt_lib_benchmark/qf_lra_results_m/ remote_res/smt_lib_benchmark/qf_lra_results_n/ remote_res/smt_lib_benchmark/qf_lra_results_v/ remote_res/smt_lib_benchmark/qf_lra_results_mvn/ remote_res/smt_lib_benchmark/qf_lra_results_h --res_path remote_res show -p -t duration -g k learner -o ly "Duration (s)" lx "Number of learned clauses (k)" steps_x 3 error 0 -w ../incal_mlj/figures/symmetries_benchmark.png
3 |
4 | # Counts
5 | incal-experiments smt-lib-benchmark analyze --dirs remote_res/smt_lib_benchmark/qf_lra_results_plain/ remote_res/smt_lib_benchmark/qf_lra_results_m/ remote_res/smt_lib_benchmark/qf_lra_results_n/ remote_res/smt_lib_benchmark/qf_lra_results_v/ remote_res/smt_lib_benchmark/qf_lra_results_mvn/ remote_res/smt_lib_benchmark/qf_lra_results_h --res_path remote_res show -p -a count -t id -g k learner -o ly "Number of runs" lx "Number of learned clauses (k)" steps_x 3 error 0
6 |
7 | # Learned halfspaces
8 | incal-experiments smt-lib-benchmark analyze --dirs remote_res/smt_lib_benchmark/qf_lra_results_plain/ remote_res/smt_lib_benchmark/qf_lra_results_m/ remote_res/smt_lib_benchmark/qf_lra_results_n/ remote_res/smt_lib_benchmark/qf_lra_results_v/ remote_res/smt_lib_benchmark/qf_lra_results_mvn/ remote_res/smt_lib_benchmark/qf_lra_results_h --res_path remote_res show -p -t h -g k learner -o error 1
9 |
10 | # Effect of dt
11 | incal-experiments smt-lib-benchmark analyze --dirs remote_res/smt_lib_benchmark/qf_lra_results_plain/ remote_res/smt_lib_benchmark/qf_lra_results_dt/ remote_res/smt_lib_benchmark/qf_lra_results_mvn remote_res/smt_lib_benchmark/qf_lra_results_dt_mvn --res_path remote_res show -p -t duration -g k selection_strategy learner -o ly "Duration (s)" lx "Number of learned clauses (k)" steps_x 3 error 0 -w ../incal_mlj/figures/dt_benchmark.png
12 |
13 | # --- Exploring H
14 | # Duration over increasing original h
15 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/hh_plain res/synthetic/output/hh_mvn res/synthetic/output/hh_dt res/synthetic/output/hh_dt_mvn res/synthetic/output/hh_dt_mn --res_path res show -p -t duration -g original_h selection_strategy learner -o ly "Duration (s)" lx "Number of inequalities in ground truth (h)" -w ../incal_mlj/figures/time_over_oh_synthetic.png
16 | # Learned h over original h
17 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/hh_plain res/synthetic/output/hh_mvn res/synthetic/output/hh_dt res/synthetic/output/hh_dt_mvn res/synthetic/output/hh_dt_mn --res_path res show -p -t h -g original_h selection_strategy learner -o ly "Number of learned hyperplanes (h)" lx "Number of inequalities in ground truth (h)" -w ../incal_mlj/figures/h_over_oh_synthetic.png
18 | # Duration over learned h
19 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/hh_plain res/synthetic/output/hh_mvn res/synthetic/output/hh_dt res/synthetic/output/hh_dt_mvn res/synthetic/output/hh_dt_mn --res_path res show -p -t duration -g h selection_strategy learner -o ly "Duration (s)" lx "Number of learned inequalities (h)" -w ../incal_mlj/figures/time_over_h_synthetic.png
20 | # Accuracy over learned h
21 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/hh_plain res/synthetic/output/hh_mvn res/synthetic/output/hh_dt res/synthetic/output/hh_dt_mvn res/synthetic/output/hh_dt_mn --res_path res show -p -t accuracy_approx -g h selection_strategy learner -o ly "Accuracy on test set" lx "Number of learned inequalities (h)" y_lim "0.5:1" legend_pos "lower center" -w ../incal_mlj/figures/acc_over_h_synthetic.png
22 |
23 | # Duration over positive ratio
24 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/hh_plain --res_path res show -p -t duration -g pos_rate__batch0.1 h -o ly "Duration (s)" lx "Positive rate" -e "k!=2" "h<5" -w ../incal_mlj/figures/time_over_pos_rate.png
25 |
26 |
27 | # --- Exploring L
28 | # Timeouts over increasing original l
29 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/ll_plain --res_path res show -p -t executed -g original_l learner -o error 0 y_lim 0:1 lx "Number of literals in ground truth" ly "Fraction of experiments completed within time limit" steps_x 5 -w ../incal_mlj/figures/ex_over_l_synthetic.png
30 |
31 |
32 | # --- Exploring K
33 | # Timeouts over increasing original l
34 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/kk_plain res/synthetic/output/kk_dt_mvn --res_path res show -p -t executed -g original_k learner -o error 0 y_lim 0:1 lx "Number of clauses in ground truth" ly "Fraction of experiments completed within time limit" steps_x 5 -w ../incal_mlj/figures/ex_over_k_synthetic.png
35 | # Learned k over original k
36 | incal-experiments smt-lib-benchmark analyze --dirs res/synthetic/output/kk_plain res/synthetic/output/kk_dt_mvn --res_path res show -p -t k -g original_k selection_strategy learner -o ly "Number of learned clauses (k)" lx "Number of clauses in ground truth (k)" -e "executed=0" -w ../incal_mlj/figures/k_over_original_k_synthetic.png
37 |
--------------------------------------------------------------------------------
/repairs/genprog/run-version-genprog.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Script to run spr on subjects in codeflaws directory
3 | #The following variables needs to be changed:
4 | rootdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" #directory of this script
5 | rundir="$rootdir/genprog-run" # directory in which genprog is called from, a temporary output directory where everything will be copied to during the repair
6 | versiondir="$rootdir/codeflaws" #directory where the codeflaws.tar.gz is extracted
7 | version=$1
8 | genprog="/opt/genprog/bin/genprog" # location of the installed genprog
9 | iteration=$2
10 | testsuite=$3
11 | timelimit=$4
12 |
13 | kill_descendant_processes() {
14 | local pid="$1"
15 | local and_self="${2:-false}"
16 | if children="$(pgrep -P "$pid")"; then
17 | for child in $children; do
18 | kill_descendant_processes "$child" true
19 | done
20 | fi
21 | if [[ "$and_self" == true ]]; then
22 | kill -9 "$pid"
23 | fi
24 | }
25 |
26 | print_results(){
27 | if ! [ -z "$4" ]; then
28 | echo "$testsuite,$2,$3,$4"
29 | else
30 | echo "$testsuite,$2,$3,,,"
31 | fi
32 | exit
33 | }
34 |
35 | if [[ "$version" == *"-bug-"* ]]; then
36 | if ! grep -q "$version" $rootdir/versions-ignored-all.txt; then
37 | var=$((var+1))
38 | #get buggy filename from directory name:
39 | contestnum=$(echo $version | cut -d$'-' -f1)
40 | probnum=$(echo $version | cut -d$'-' -f2)
41 | buggyfile=$(echo $version | cut -d$'-' -f4)
42 | cfile=$(echo "$contestnum-$probnum-$buggyfile".c)
43 | cilfile=$(echo "$contestnum-$probnum-$buggyfile".cil.c)
44 | cfixfile=$(echo "$contestnum-$probnum-$buggyfile"-fix.c)
45 | if [[ "$testsuite" = autogen ]]; then
46 | diffc=$(ls -1 $versiondir/$version/autogen-$iteration-n* | wc -l)
47 | posc=$(ls -1 $versiondir/$version/autogen-$iteration-p* | wc -l)
48 | else
49 | diffc=$(grep "Diff Cases" $versiondir/$version/$cfile.revlog | awk '{print $NF}')
50 | posc=$(grep "Positive Cases" $versiondir/$version/$cfile.revlog | awk '{print $NF}')
51 | fi
52 |
53 | echo "[INFO] Repairing $version ($testsuite, iteration $iteration with $posc positive and $diffc negative test cases)" 1>&2
54 |
55 | DIRECTORY="$versiondir/$version"
56 | if [ ! -d "$DIRECTORY" ]; then
57 | echo "[ERROR] FOLDER DOESNT EXIST: $version" 1>&2
58 | exit 1
59 | fi
60 |
61 | cd $rundir/
62 | rm -rf $rundir/tempworkdir-$version-$iteration-$testsuite
63 | rm -rf $rundir/tempworkdir-$version-$iteration-$testsuite-validation
64 | cp -r $versiondir/$version $rundir/tempworkdir-$version-$iteration-$testsuite
65 | cd $rundir/tempworkdir-$version-$iteration-$testsuite
66 |
67 | cp $rootdir/configuration-default configuration-$version
68 | if [[ "$testsuite" = autogen ]]; then
69 | sed -i "s/test-genprog.sh/test-genprog-incal.py $iteration $version/g" configuration-$version
70 | else
71 | sed -i "s/50s/2s/g" test-genprog.sh #Timeout management
72 | fi
73 |
74 | cp $rootdir/compile.pl compile.pl
75 | echo "$cfile">>bugged-program.txt
76 | echo "--pos-tests $posc">>configuration-$version
77 | echo "--neg-tests $diffc">>configuration-$version
78 | rm -rf preprocessed
79 | rm -rf coverage
80 | mkdir -p preprocessed
81 | make CC="cilly" CFLAGS="--save-temps -std=c99 -fno-optimize-sibling-calls -fno-strict-aliasing -fno-asm" &> initialbuild
82 | if grep -q "Error:" initialbuild; then
83 | if grep -q "Length of array is not" initialbuild; then
84 | printf "[ERROR] %s\t%s\t%s\n" "$version" "MAKE:ARRAY BUG" "0s" 1>&2
85 | else
86 | printf "[ERROR] %s\t%s\t%s\n" "$version" "MAKEFAILED!" "0s" 1>&2
87 | fi
88 | exit 1
89 | fi
90 | cp $cilfile preprocessed/$cfile
91 | cp preprocessed/$cfile $cfile
92 | rm -rf coverage
93 | rm -rf coverage.path.*
94 | rm -rf repair.cache
95 | rm -rf repair.debug.*
96 | #echo "[INFO] RUNNING CMD:$genprog configuration-$version" 1>&2
97 | timeout -k 0 $timelimit $genprog configuration-$version &> $rundir/temp-$version-$iteration-$testsuite.out
98 | timespent=$(grep "TOTAL" "$rundir/temp-$version-$iteration-$testsuite.out" | cut -d'=' -f1 | awk '{print $NF}')
99 | #echo "[INFO] Time Spent: $timespent" 1>&2
100 | if [ -z "${timespent}" ]; then
101 | print_results $version "TIMEOUT" $timelimit
102 | fi
103 | if [ ! -f "$rundir/tempworkdir-$version-$iteration-$testsuite/build.log" ]; then
104 | print_results $version "BUILDFAILED:FILE" $timespent
105 | elif grep -q "Failed to make" $rundir/tempworkdir-$version-$iteration-$testsuite/build.log; then
106 | print_results $version "BUILDFAILED" $timespent
107 | elif grep -q "nexpected" "$rundir/temp-$version-$iteration-$testsuite.out"; then
108 | print_results $version "VERIFICATIONFAILED" $timespent
109 | elif grep -q "Timeout" "$rundir/temp-$version-$iteration-$testsuite.out"; then
110 | print_results $version "TIMEOUT" $timelimit
111 | elif grep -q "Repair Found" "$rundir/temp-$version-$iteration-$testsuite.out"; then
112 | contestnum=$(echo "$version" | cut -d$'-' -f1)
113 | probnum=$(echo "$version" | cut -d$'-' -f2)
114 | buggyfile=$(echo "$version" | cut -d$'-' -f4)
115 | cfile=$(echo "$contestnum-$probnum-$buggyfile".c)
116 | cfixfile=$(echo "$version-fix".c)
117 | fixf="$rundir/tempworkdir-$version-$iteration-$testsuite/repair/$cfile"
118 | #for fixing the asm_booo instruction that GenProg introduced
119 | sed -i '/booo/d' "$fixf"
120 | cp $fixf $rootdir/genprog-allfixes/repair-$contestnum-$probnum-$buggyfile-$iteration-$testsuite.c
121 | validity=$($rootdir/validate-fix-genprog.sh "$version" "$rundir/temp-$version-$iteration-$testsuite.out" "$rundir/tempworkdir-$version-$iteration-$testsuite")
122 | print_results $version "REPAIR" $timespent $validity
123 | elif grep -q "no repair" "$rundir/temp-$version-$iteration-$testsuite.out"; then
124 | print_results $version "NOREPAIR" $timespent
125 | elif grep -q "Assertion failed" "$rundir/temp-$version-$iteration-$testsuite.out"; then
126 | print_results $version "COVERAGEFAIL" $timespent
127 | fi
128 | echo "[ERROR] No interpretation:" 1>&2
129 | cat "$rundir/temp-$version-$iteration-$testsuite.out" 1>&2
130 | print_results $version "????" $timespent
131 | else
132 | echo "[INFO] IGNORING:$version" 1>&2
133 | fi
134 | #if test -e "$rundir/tempworkdir-$version-$iteration-manual/bugreport.txt";then
135 | # echo "BUG"
136 | # if grep -q "BOOO" "$rundir/tempworkdir-$version-$iteration-manual/bugreport.txt"; then
137 | # printf "%s\tBOOOBUG\n" "$version" >> "$rootdir/genprog-bugs.log"
138 | # fi
139 | #fi
140 | fi
141 | #kill_descendant_processes $$
142 |
--------------------------------------------------------------------------------
/repairs/genprog/test-genprog-incal.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import sys
4 | import numpy as np
5 | import subprocess
6 |
7 | # Write: Convert input value(s) into string
8 | def format_input(test_input):
9 |
10 | input_vals=""
11 | if((type(test_input) is np.ndarray) or (type(test_input) is list)):
12 |
13 | for i_val in test_input:
14 | input_vals+=str(int(i_val))+" "
15 |
16 | input_vals=input_vals.strip()
17 |
18 | else:
19 | input_vals=str(int(test_input))
20 |
21 |
22 | return input_vals
23 |
24 | #return str(int(test_input))
25 |
26 | # Read: Convert input string into value(s)
27 | def unformat_input(test_input, input_size):
28 | if((type(test_input) is np.ndarray) or (type(test_input) is list)):
29 | if test_input.size != input_size and input_size > 0:
30 | sys.exit("[ERROR "+subject_name+"] Input has variable length")
31 | return test_input
32 |
33 | if input_size > 1:
34 | sys.exit("[ERROR "+subject_name+"] Input has variable length")
35 | return int(test_input)
36 |
37 | # Read: Convert output string into value.
38 | def unformat_output(test_output):
39 | if test_output == "": return 0
40 | if test_output.strip().lower() == "yes": return 1
41 | if test_output.strip().lower() == "no" : return 0
42 |
43 | try: return int(test_output)
44 | except:
45 | try: return int(round(float(test_output),0))
46 | except: sys.exit("[ERROR "+subject_name+"] Unknown output")
47 |
48 | # Execute test_input on program and return output value
49 | def run_test(test_input, program):
50 | formatted_input = format_input(test_input)
51 | process = subprocess.Popen(('echo', formatted_input), stdout=subprocess.PIPE)
52 | test_output=""
53 | try:
54 | test_output = subprocess.check_output(["timeout", "-k", "2s", "2s", program], stdin=process.stdout,encoding="utf-8")
55 | except:
56 | pass
57 |
58 | unformatted_output = unformat_output(test_output)
59 |
60 | return unformatted_output
61 |
62 | # Execute test_input on buggy and golden program. Return false (test failure) if output differs
63 | def ask_human(test_input, bug_prog, gold_prog):
64 | actual_output = run_test(test_input, bug_prog)
65 | expected_output = run_test(test_input, gold_prog)
66 | return actual_output == expected_output
67 |
68 |
69 | iteration = sys.argv[1]
70 | subject = sys.argv[2]
71 | test = sys.argv[3]
72 |
73 | bug_dir = subject.rstrip("/")
74 | temp = bug_dir.split("/")[-1].split("-")
75 | bug_prog = "./"+temp[0] + "-" + temp[1] + "-" + temp[3]
76 | gold_prog = "./"+temp[0] + "-" + temp[1] + "-" + temp[4]
77 | subject_name = bug_dir.split("/")[-1]
78 |
79 | test_suite = []
80 | test_data = np.genfromtxt("autogen-"+iteration+"-"+test)
81 | test_suite.append(unformat_input(test_data, 0))
82 | test_suite = np.array(test_suite)
83 |
84 | if ask_human(test_suite[0], bug_prog, gold_prog):
85 | sys.exit(0)
86 | else:
87 | sys.exit(1)
88 |
89 |
90 |
91 |
--------------------------------------------------------------------------------
/repairs/genprog/validate-fix-genprog.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #rootdir="/home/ubuntu/codeforces-crawler/CodeforcesSpider" #directory of this script
3 | rootdir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" #directory of this script
4 | rundir=$rootdir/genprog-run # directory in which genprog is called from
5 | version="$1"
6 | filen="$2"
7 | repairdir="$3"
8 |
9 | contestnum=$(echo $version | cut -d$'-' -f1)
10 | probnum=$(echo $version | cut -d$'-' -f2)
11 | buggyfile=$(echo $version | cut -d$'-' -f4)
12 | cfile=$(echo "$contestnum-$probnum-$buggyfile".c)
13 | cexefile=$(echo "$contestnum-$probnum-$buggyfile")
14 | correctfile=$(echo $version | cut -d$'-' -f5)
15 | goldenfile=$(echo "$contestnum-$probnum-$correctfile".c)
16 |
17 | validdir=$repairdir-validation
18 |
19 | runalltest(){
20 | i="$1"
21 | origversion="$2"
22 | goldenpass="$3"
23 | filen=$validdir/test-valid.sh
24 | passt=0
25 | failt=0
26 | totalt=0
27 | alltests=($(grep -E "p[0-9]+\)" $filen | cut -d')' -f 1))
28 | for t in "${alltests[@]}"
29 | do
30 | timeout -k 2s 2s $filen "$t"&>/dev/null
31 | if [ $? -ne 0 ]; then
32 | failt=$((failt+1))
33 | else
34 | passt=$((passt+1))
35 | fi
36 | totalt=$((totalt+1))
37 | done
38 | if [ ! -z "$goldenpass" ]; then
39 | echo "$passt,$goldenpass,$totalt"
40 | else
41 | echo $passt
42 | fi
43 | #pkill $cexefile
44 | #pkill test-valid.sh
45 | }
46 |
47 |
48 | if grep -q "Repair Found" $filen; then
49 | fixf="$repairdir/repair/$cfile"
50 | cp -r $repairdir $validdir
51 | cd $validdir
52 | cp $validdir/preprocessed/$cfile $cfile
53 | make clean &>/dev/null
54 | make CFLAGS="-std=c99 -fno-optimize-sibling-calls -fno-strict-aliasing -fno-asm" &> /dev/null
55 | runalltest "orig-$cfile" "$version" "" > /dev/null
56 | cp "$cfile" "$cfile.orig"
57 | #copy and compile the fix file before running validation tests
58 | cp $goldenfile $cfile
59 | make clean &> /dev/null
60 | make CFLAGS="-std=c99 -fno-optimize-sibling-calls -fno-strict-aliasing -fno-asm" &> /dev/null
61 |
62 |
63 | goldenpasst=$(runalltest "golden-$goldenfile" "$version" "")
64 | #echo "GOLDEN:$goldenpasst"
65 |
66 | cp $fixf $cfile
67 | make clean &> /dev/null
68 | make CFLAGS="-std=c99 -fno-optimize-sibling-calls -fno-strict-aliasing -fno-asm" &> /dev/null
69 |
70 | runalltest "fix-$cfile" "$version" "$goldenpasst"
71 |
72 | #restore the file
73 | cp "$cfile.orig" "$cfile"
74 | #rm -rf "$repairdir"
75 | fi
76 |
--------------------------------------------------------------------------------
/results/Plots.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/Plots.pdf
--------------------------------------------------------------------------------
/results/accuracy.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/accuracy.pdf
--------------------------------------------------------------------------------
/results/effort1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/effort1.pdf
--------------------------------------------------------------------------------
/results/effort2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/effort2.pdf
--------------------------------------------------------------------------------
/results/patchquality.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/patchquality.pdf
--------------------------------------------------------------------------------
/results/repairability.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/repairability.pdf
--------------------------------------------------------------------------------
/results/training.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/training.pdf
--------------------------------------------------------------------------------
/results/validation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/validation.pdf
--------------------------------------------------------------------------------
/results/validation2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mboehme/learn2fix/0589fc3faa295aa155e8ec9c971af4e0fccfa0b8/results/validation2.pdf
--------------------------------------------------------------------------------
/scripts/h_combine.sh:
--------------------------------------------------------------------------------
1 | function join_by { local IFS="$1"; shift; echo "$*"; }
2 |
3 | cd ../smtlearn
4 | declare -a options=(3 4 5 6 7 8 9 10)
5 |
6 | mkdir ../synthetic/hh/all
7 | for i in "${options[@]}"; do
8 | cp ../synthetic/hh/$i/* ../synthetic/hh/all/
9 | done
10 |
11 | scp -r samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/output/synthetic/hh ../output/synthetic/hh
12 |
13 | python api.py combine ../output/synthetic/hh/summary $(join_by " " "${options[@]}") -p ../output/synthetic/hh/
14 | python api.py migrate ratio ../output/synthetic/hh/summary/ -d ../synthetic/hh/all -s 1000 -f
15 | python api.py migrate accuracy ../output/synthetic/hh/summary/ -d ../synthetic/hh/all -s 1000 -f
16 |
--------------------------------------------------------------------------------
/scripts/h_generate.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ../smtlearn
4 |
5 | declare -a options=(3 4 5 6 7 8 9 10)
6 |
7 | for i in "${options[@]}"; do
8 | python api.py generate ../synthetic/hh/$i -n 100 -b 0 -r 2 -k 2 -l 3 --half_spaces $i
9 | done
10 | wait
11 |
12 | scp -r ../synthetic/hh samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/synthetic/
--------------------------------------------------------------------------------
/scripts/h_learn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ../smtlearn
4 |
5 | declare -a options=(3 4 5 6 7 8 9 10)
6 |
7 | for i in "${options[@]}"; do
8 | python experiments.py ../synthetic/hh/$i "" ../output/synthetic/hh/$i cnf -t 200 &
9 | done
10 | wait
--------------------------------------------------------------------------------
/scripts/k_combine.sh:
--------------------------------------------------------------------------------
1 | function join_by { local IFS="$1"; shift; echo "$*"; }
2 |
3 | cd ../smtlearn
4 | declare -a options=(1 2 3 4 5)
5 |
6 | mkdir ../synthetic/kk/all
7 | for i in "${options[@]}"; do
8 | cp ../synthetic/kk/$i/* ../synthetic/kk/all/
9 | done
10 |
11 | scp -r samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/output/synthetic/kk ../output/synthetic/kk
12 |
13 | python api.py combine ../output/synthetic/kk/summary $(join_by " " "${options[@]}") -p ../output/synthetic/kk/
14 | python api.py migrate ratio ../output/synthetic/kk/summary/ -d ../synthetic/kk/all -s 1000 -f
15 | python api.py migrate accuracy ../output/synthetic/kk/summary/ -d ../synthetic/kk/all -s 1000 -f
16 |
--------------------------------------------------------------------------------
/scripts/k_generate.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ../smtlearn
4 |
5 | declare -a options=(1 2 3 4 5 6)
6 |
7 | for i in "${options[@]}"; do
8 | python api.py generate ../synthetic/kk/$i -n 100 -b 6 -r 2 -k $i -l 3 --half_spaces 6
9 | done
10 | wait
11 |
12 | scp -r ../synthetic/kk samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/synthetic/
--------------------------------------------------------------------------------
/scripts/k_learn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ../smtlearn
4 |
5 | declare -a options=(1 2 3 4 5)
6 |
7 | for i in "${options[@]}"; do
8 | python experiments.py ../synthetic/kk/$i "" ../output/synthetic/kk/$i cnf -t 200 &
9 | done
10 | wait
--------------------------------------------------------------------------------
/scripts/l_combine.sh:
--------------------------------------------------------------------------------
1 | function join_by { local IFS="$1"; shift; echo "$*"; }
2 |
3 | cd ../smtlearn
4 | declare -a options=(1 2 3 4 5)
5 |
6 | mkdir ../synthetic/ll/all
7 | for i in "${options[@]}"; do
8 | cp ../synthetic/ll/$i/* ../synthetic/ll/all/
9 | done
10 |
11 | scp -r samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/output/synthetic/ll ../output/synthetic/ll
12 |
13 | python api.py combine ../output/synthetic/ll/summary $(join_by " " "${options[@]}") -p ../output/synthetic/ll/
14 | python api.py migrate ratio ../output/synthetic/ll/summary/ -d ../synthetic/ll/all -s 1000 -f
15 | python api.py migrate accuracy ../output/synthetic/ll/summary/ -d ../synthetic/ll/all -s 1000 -f
16 |
--------------------------------------------------------------------------------
/scripts/l_generate.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ../smtlearn
4 |
5 | declare -a options=(1 2 3 4 5)
6 |
7 | for i in "${options[@]}"; do
8 | python api.py generate ../synthetic/ll/$i -n 100 -b 0 -r 2 -k 3 -l $i --half_spaces 10
9 | done
10 | wait
11 |
12 | scp -r ../synthetic/ll samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/synthetic/
--------------------------------------------------------------------------------
/scripts/l_learn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ../smtlearn
4 |
5 | declare -a options=(1 2 3 4 5)
6 |
7 | for i in "${options[@]}"; do
8 | python experiments.py ../synthetic/ll/$i "" ../output/synthetic/ll/$i cnf -t 200 &
9 | done
10 | wait
--------------------------------------------------------------------------------
/scripts/pf_combine.sh:
--------------------------------------------------------------------------------
1 | function join_by { local IFS="$1"; shift; echo "$*"; }
2 |
3 | cd ../smtlearn
4 | declare -a options=(1 2 3 4 5)
5 |
6 | scp -r samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/output/synthetic/pf ../output/synthetic/pf
7 |
8 | python api.py migrate ratio ../output/synthetic/pf -d ../synthetic/pf -s 1000 -f
9 | python api.py migrate accuracy ../output/synthetic/pf -d ../synthetic/pf -s 1000 -f
10 |
--------------------------------------------------------------------------------
/scripts/pf_generate.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ../smtlearn
4 |
5 | python api.py generate ../synthetic/pf -n 100 -b 6 -r 2 -k 3 -l 3 --half_spaces 6
6 |
7 | scp -r ../synthetic/pf samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/synthetic/
--------------------------------------------------------------------------------
/scripts/pf_learn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ../smtlearn
4 |
5 | python experiments.py ../synthetic/pf "" ../output/synthetic/pf cnf -t 200 -f &
6 | wait
--------------------------------------------------------------------------------
/scripts/plot.sh:
--------------------------------------------------------------------------------
1 | cd ../smtlearn
2 |
3 | # Plot samples
4 | python api.py table id samples acc ../output/synthetic/ss/summary/ ../synthetic/ss/10000/ INCAL ../output/synthetic/ss/esummary_1000/ ../synthetic/ss/10000/ "non-incremental" plot -a -o ../../ijcai18/figures/s_inc_acc.png
5 | python api.py table id samples time ../output/synthetic/ss/summary/ ../synthetic/ss/10000/ INCAL ../output/synthetic/ss/esummary_1000/ ../synthetic/ss/10000/ "non-incremental" plot -a -o ../../ijcai18/figures/s_inc_time.png --legend_pos "upper right"
6 | python api.py table id samples active ../output/synthetic/ss/summary/ ../synthetic/ss/all/ INCAL plot -a -o ../../ijcai18/figures/s_inc_active.png
7 | python api.py table id samples active_ratio ../output/synthetic/ss/summary/ ../synthetic/ss/all/ "" plot -a -o ../../ijcai18/figures/s_inc_active_ratio.png --legend_pos "upper right"
8 |
9 | python api.py table id samples time ../output/synthetic/ss/summary ../synthetic/ss/all INCAL print -a
10 | python api.py table id samples time ../output/synthetic/ss/esummary ../synthetic/ss/all INCAL print -a
11 |
12 | # Plot k
13 | python api.py table id k acc ../output/synthetic/kk/summary/ ../synthetic/kk/all/ INCAL plot -a -o ../../ijcai18/figures/k_inc_acc.png
14 | python api.py table id k time ../output/synthetic/kk/summary/ ../synthetic/kk/all/ INCAL plot -a -o ../../ijcai18/figures/k_inc_time.png
15 |
16 | # Plot l
17 | python api.py table id l acc ../output/synthetic/ll/summary/ ../synthetic/ll/all/ INCAL plot -a -o ../../ijcai18/figures/l_inc_acc.png
18 | python api.py table id l time ../output/synthetic/ll/summary/ ../synthetic/ll/all/ INCAL plot -a -o ../../ijcai18/figures/l_inc_time.png
19 |
20 | # Plot h
21 | python api.py table id h acc ../output/synthetic/hh/summary/ ../synthetic/hh/all/ INCAL plot -a -o ../../ijcai18/figures/h_inc_acc.png
22 | python api.py table id h time ../output/synthetic/hh/summary/ ../synthetic/hh/all/ INCAL plot -a -o ../../ijcai18/figures/h_inc_time.png
23 |
24 | # Print parameter-free ratio
25 | python api.py table id samples time_ratio ../output/synthetic/pf/ ../synthetic/pf/ INCAL print -a
26 |
27 | # Print benchmark
28 | python api.py table id constant full_time ../output/benchmark/pf1000/ ../demo/cache/ INCAL print -a
29 | python api.py table id constant time ../output/benchmark/pf1000/ ../demo/cache/ INCAL print -a
30 | python api.py table id constant time_ratio ../output/benchmark/pf1000/ ../demo/cache/ INCAL print -a
31 | python api.py table id constant acc ../output/benchmark/pf1000/ ../demo/cache/ INCAL print -a
32 |
--------------------------------------------------------------------------------
/scripts/samples_combine.sh:
--------------------------------------------------------------------------------
1 | function join_by { local IFS="$1"; shift; echo "$*"; }
2 |
3 | cd ../smtlearn
4 | declare -a options=(25 50 75 100 250 500 750 1000 2500 5000 7500 10000)
5 | declare -a options_e=(25 50 75 100 250 500 750 1000 2500)
6 |
7 | scp -r samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/output/synthetic/ss ../output/synthetic/ss
8 |
9 | python api.py combine ../output/synthetic/ss/summary $(join_by " " "${options[@]}") -p ../output/synthetic/ss/
10 | python api.py combine ../output/synthetic/ss/esummary $(join_by " " "${options_e[@]}") -p ../output/synthetic/ss/e
11 | python api.py migrate ratio ../output/synthetic/ss/summary/ -d ../synthetic/ss/10000 -s 1000 -f
12 | python api.py migrate accuracy ../output/synthetic/ss/summary/ -d ../synthetic/ss/10000 -s 1000 -f
13 | python api.py migrate ratio ../output/synthetic/ss/esummary/ -d ../synthetic/ss/10000 -s 1000 -f
14 | python api.py migrate accuracy ../output/synthetic/ss/esummary/ -d ../synthetic/ss/10000 -s 1000 -f
15 |
--------------------------------------------------------------------------------
/scripts/samples_generate.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ../smtlearn
4 |
5 | python api.py generate ../synthetic/ss/10000 -n 100 -b 6 -r 2 -k 2 -l 3 --half_spaces 6 -s 10000
6 | scp -r ../synthetic/ss samuelk@himec04.cs.kuleuven.be:/home/samuelk/projects/smtlearn/synthetic/
--------------------------------------------------------------------------------
/scripts/samples_learn.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd ../smtlearn
4 |
5 | declare -a options=(25 50 75 100 250 500 750 1000 2500 5000 7500 10000)
6 | declare -a options_e=(25 50 75 100 250 500 750 1000 2500)
7 |
8 | for i in "${options[@]}"; do
9 | python experiments.py ../synthetic/ss/10000/ "" ../output/synthetic/ss/$i cnf -s $i -t 200 &
10 | done
11 |
12 | for i in "${options_e[@]}"; do
13 | python experiments.py ../synthetic/ss/10000/ "" ../output/synthetic/ss/e$i cnf -s $i -t 200 -a &
14 | done
15 | wait
16 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import sys
4 |
5 | from setuptools import setup, find_packages, Command
6 | from os import path
7 |
8 | # To upload:
9 | # pip install --upgrade twine wheel setuptools
10 | # python setup.py upload
11 |
12 | NAME = 'incal'
13 | DESCRIPTION = 'Learning SMT(LRA) formulas'
14 | URL = 'https://github.com/smtlearning/incal'
15 | EMAIL = 'samuel.kolb@me.com'
16 | AUTHOR = 'Samuel Kolb'
17 | REQUIRES_PYTHON = '>=3.5.0'
18 | VERSION = "0.1.1"
19 |
20 | # What packages are required for this module to be executed?
21 | REQUIRED = [
22 | 'pywmi', 'numpy', 'typing', 'pysmt', 'matplotlib', 'scikit-learn', 'pickledb'
23 | ]
24 |
25 | # What packages are optional?
26 | EXTRAS = {
27 | 'sdd': ["pysdd"]
28 | }
29 |
30 | here = os.path.abspath(os.path.dirname(__file__))
31 |
32 | with open(path.join(here, "README.md")) as ref:
33 | long_description = ref.read()
34 |
35 |
36 | class UploadCommand(Command):
37 | """Support setup.py upload."""
38 |
39 | description = 'Build and publish the package.'
40 | user_options = []
41 |
42 | @staticmethod
43 | def status(s):
44 | """Prints things in bold."""
45 | print('\033[1m{0}\033[0m'.format(s))
46 |
47 | def initialize_options(self):
48 | pass
49 |
50 | def finalize_options(self):
51 | pass
52 |
53 | def run(self):
54 | try:
55 | self.status('Removing previous builds…')
56 | shutil.rmtree(os.path.join(here, 'dist'))
57 | except OSError:
58 | pass
59 |
60 | self.status('Building Source and Wheel (universal) distribution…')
61 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
62 |
63 | self.status('Uploading the package to PyPI via Twine…')
64 | os.system('twine upload dist/*')
65 |
66 | # self.status('Pushing git tags…')
67 | # os.system('git tag v{0}'.format(about['__version__']))
68 | # os.system('git push --tags')
69 |
70 | sys.exit()
71 |
72 |
73 | setup(
74 | name=NAME,
75 | version=VERSION,
76 | description=DESCRIPTION,
77 | long_description=long_description,
78 | long_description_content_type="text/markdown",
79 | url=URL,
80 | author=AUTHOR,
81 | author_email=EMAIL,
82 | license='MIT',
83 | classifiers=[
84 | 'License :: OSI Approved :: MIT License',
85 | 'Programming Language :: Python',
86 | 'Programming Language :: Python :: 3',
87 | ],
88 | python_requires=REQUIRES_PYTHON,
89 | packages=find_packages(exclude=('tests',)),
90 | zip_safe=False,
91 | install_requires=REQUIRED,
92 | extras_require=EXTRAS,
93 | setup_requires=['pytest-runner'],
94 | tests_require=["pytest"],
95 | entry_points={
96 | "console_scripts": [
97 | "incal-experiments = incal.experiments.cli:main",
98 | "incal-track = incal.experiments.learn:track",
99 | ]
100 | },
101 | cmdclass={
102 | 'upload': UploadCommand,
103 | },
104 | )
105 |
--------------------------------------------------------------------------------