├── requirements.txt ├── .gitignore ├── sampler ├── examples │ ├── add │ ├── xtunnel │ ├── division │ ├── snapchat │ ├── add.ini │ ├── xtunnel.ini │ ├── division.ini │ ├── snapchat.ini │ └── expr.ini ├── dune ├── README.md └── modeleval.ml ├── examples ├── bin │ ├── Makefile │ ├── add.c │ └── add.ini └── samples │ ├── example_8bits.json │ ├── example_8_and_16bits.json │ └── example.json ├── scripts ├── utils │ ├── gdbscript.txt │ ├── sample.py │ ├── get_stats.py │ ├── ghidra │ │ └── DumpBlocks.py │ ├── all_from_trace.sh │ └── traceDisassembler.py └── bench │ ├── recompute_stats.py │ └── bench.py ├── test ├── dune ├── bitvector_test.ml ├── heuristic_test.ml └── simplify_test.ml ├── src ├── lib │ ├── dune │ ├── exceptions.ml │ ├── sampler.mli │ ├── utility.mli │ ├── checker.mli │ ├── heuristic.mli │ ├── utility.ml │ ├── tree │ │ ├── simplifier.mli │ │ ├── mutations.mli │ │ ├── tree.mli │ │ └── tree.ml │ ├── oracle.mli │ ├── sampler.ml │ ├── checker.ml │ ├── operators.mli │ ├── distance.mli │ ├── bitvector.ml │ ├── sygus.ml │ ├── oracle.ml │ └── heuristic.ml └── dune ├── .gitlab-ci.yml ├── datasets ├── if_the_else │ ├── merged1 │ ├── merged2 │ ├── merged3 │ ├── merged4 │ └── merged5 ├── complex_handlers │ ├── bp1 │ ├── bp2 │ └── bp3 └── syntia │ └── b1 ├── xyntiasampler.opam ├── xyntia.opam ├── dune-project ├── Makefile └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | plumbum==1.7.2 2 | tqdm==4.64.0 3 | joblib==1.2.0 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/.merlin 2 | **/*.pyc 3 | _build/** 4 | results 5 | _opam 6 | -------------------------------------------------------------------------------- /sampler/examples/add: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binsec/xyntia/HEAD/sampler/examples/add -------------------------------------------------------------------------------- /sampler/examples/xtunnel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binsec/xyntia/HEAD/sampler/examples/xtunnel -------------------------------------------------------------------------------- /sampler/examples/division: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binsec/xyntia/HEAD/sampler/examples/division -------------------------------------------------------------------------------- /sampler/examples/snapchat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/binsec/xyntia/HEAD/sampler/examples/snapchat -------------------------------------------------------------------------------- /examples/bin/Makefile: -------------------------------------------------------------------------------- 1 | all: add 2 | 3 | add: 4 | $(CC) -Wall -Werror -m32 add.c -o add 5 | 6 | clean: 7 | rm -f add 8 | -------------------------------------------------------------------------------- /scripts/utils/gdbscript.txt: -------------------------------------------------------------------------------- 1 | set confirm off 2 | 3 | source scripts/utils/traceDisassembler.py 4 | 5 | trace_disassembler 6 | 7 | quit 8 | -------------------------------------------------------------------------------- /examples/bin/add.c: -------------------------------------------------------------------------------- 1 | 2 | 3 | int add(int x, int y) { 4 | return x + y; 5 | } 6 | 7 | int main() { 8 | add(0, 0); 9 | return 0; 10 | } 11 | -------------------------------------------------------------------------------- /test/dune: -------------------------------------------------------------------------------- 1 | (tests 2 | (names heuristic_test bitvector_test simplify_test) 3 | (libraries xyntia_utils ounit2 qcheck binsec) 4 | (flags (:standard -open Binsec)) 5 | ) 6 | -------------------------------------------------------------------------------- /examples/bin/add.ini: -------------------------------------------------------------------------------- 1 | starting from 2 | 3 | set sample output stdout 4 | 5 | explore all 6 | 7 | hook with 8 | sample 100 eax 9 | halt 10 | end 11 | 12 | 13 | -------------------------------------------------------------------------------- /sampler/examples/add.ini: -------------------------------------------------------------------------------- 1 | starting from 0x1196 2 | 3 | set sample output stdout 4 | 5 | explore all 6 | 7 | hook 0x119f with 8 | sample 100 eax 9 | halt 10 | end 11 | 12 | 13 | -------------------------------------------------------------------------------- /sampler/examples/xtunnel.ini: -------------------------------------------------------------------------------- 1 | starting from 0x00490879 2 | 3 | set sample output stdout 4 | 5 | explore all 6 | 7 | hook 0x0049089e with 8 | sample 100 eax, edx 9 | halt 10 | end 11 | 12 | 13 | -------------------------------------------------------------------------------- /sampler/examples/division.ini: -------------------------------------------------------------------------------- 1 | starting from 0x0 2 | 3 | prune constant outputs 4 | 5 | set sample output "/tmp/lala" 6 | set optimal sampling 7 | 8 | explore all 9 | 10 | hook <.raw:size> with 11 | sample 100 12 | halt 13 | end 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/lib/dune: -------------------------------------------------------------------------------- 1 | (include_subdirs unqualified) 2 | 3 | (library 4 | (package xyntia) 5 | (name xyntia_utils) 6 | (preprocess (pps ppx_deriving.eq ppx_deriving.enum ppx_deriving.ord)) 7 | (libraries yojson binsec binsec.smt binsec.sse xyntiasampler fileutils)) 8 | -------------------------------------------------------------------------------- /sampler/examples/snapchat.ini: -------------------------------------------------------------------------------- 1 | starting from 0x14 2 | 3 | set sample output stdout 4 | 5 | @[sp+0x1c0, 8] := nondet as tmp 6 | 7 | set domain tmp [0, 999] 8 | 9 | explore all 10 | 11 | hook 0x7c with 12 | sample 100 x8 13 | halt 14 | end 15 | 16 | 17 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (public_name xyntia) 3 | (package xyntia) 4 | (name main) 5 | (libraries 6 | unix 7 | xyntia_utils 8 | binsec 9 | binsec.smt 10 | binsec.armv7decoder 11 | binsec.amd64decoder 12 | binsec.armv8decoder 13 | binsec.ppc64decoder)) 14 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | stages: 2 | - opam 3 | 4 | opam:pin: 5 | tags: 6 | - docker 7 | stage: opam 8 | image: 'ocaml/opam:$OS-ocaml-$OCAML' 9 | parallel: 10 | matrix: 11 | - OS: [ 'debian' ] 12 | OCAML: [ '4.14' ] 13 | script: > 14 | true 15 | && opam pin -n -k path add . 16 | && opam depext -u -i xyntia 17 | && opam exec -- xyntia -help 18 | # We should place a -version instead of the help but 19 | # we do not have such an option in Xyntia 20 | # Credit : Copied from BINSEC -------------------------------------------------------------------------------- /sampler/examples/expr.ini: -------------------------------------------------------------------------------- 1 | starting from 0x0 2 | 3 | set sample output stdout 4 | 5 | explore all 6 | 7 | v0<32> := nondet 8 | v1<32> := nondet 9 | v2<32> := nondet 10 | v3<32> := nondet 11 | v4<32> := nondet 12 | v5<32> := nondet 13 | v6<32> := nondet 14 | set domain v0 [-50, 50] 15 | set domain v1 [-50, 50] 16 | set domain v2 [-50, 50] 17 | set domain v3 [-50, 50] 18 | set domain v4 [-50, 50] 19 | set domain v5 [-50, 50] 20 | set domain v6 [-50, 50] 21 | 22 | hook 0x0 with 23 | res1<32> := v0 * v1 24 | sample 100 25 | halt 26 | end 27 | 28 | -------------------------------------------------------------------------------- /sampler/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (public_name xyntiasampler) 3 | (name xyntiasampler) 4 | (modules modeleval termsampler) 5 | (flags 6 | (:standard 7 | -open 8 | Binsec 9 | -open 10 | Smt 11 | -open 12 | Libsse 13 | -open 14 | Libterm 15 | -open 16 | Yojson 17 | -w 18 | "-58")) 19 | (libraries binsec.smt binsec.sse binsec.sse.term yojson)) 20 | 21 | (plugin 22 | (package xyntiasampler) 23 | (name xyntiasampler) 24 | (libraries xyntiasampler) 25 | (site 26 | (binsec plugins))) 27 | 28 | (plugin 29 | (package xyntiasampler) 30 | (name sampling) 31 | (libraries xyntiasampler) 32 | (site 33 | (binsec plugins))) 34 | -------------------------------------------------------------------------------- /datasets/if_the_else/merged1: -------------------------------------------------------------------------------- 1 | v2 = 0 ? (v0 + v1) : (v0 - v1) 2 | v2 = 0 ? (v0 + v1) : (v0*v1) 3 | v2 = 0 ? (v0 + v1) : (v0 & v1) 4 | v2 = 0 ? (v0 + v1) : (v0 | v1) 5 | v2 = 0 ? (v0 + v1) : (v0 ^ v1) 6 | v2 = 0 ? (v0 - v1) : (v0*v1) 7 | v2 = 0 ? (v0 - v1) : (v0 & v1) 8 | v2 = 0 ? (v0 - v1) : (v0 | v1) 9 | v2 = 0 ? (v0 - v1) : (v0 ^ v1) 10 | v2 = 0 ? (v0*v1) : (v0 & v1) 11 | v2 = 0 ? (v0*v1) : (v0 | v1) 12 | v2 = 0 ? (v0*v1) : (v0 ^ v1) 13 | v2 = 0 ? (v0 & v1) : (v0 | v1) 14 | v2 = 0 ? (v0 & v1) : (v0 ^ v1) 15 | v2 = 0 ? (v0 | v1) : (v0 ^ v1) 16 | v2 = 0 ? (v0 - v1) : (v0 + v1) 17 | v2 = 0 ? (v0*v1) : (v0 + v1) 18 | v2 = 0 ? (v0*v1) : (v0 - v1) 19 | v2 = 0 ? (v0 & v1) : (v0 + v1) 20 | v2 = 0 ? (v0 & v1) : (v0 - v1) 21 | -------------------------------------------------------------------------------- /xyntiasampler.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | version: "0.2.0" 4 | synopsis: "Sampler based on Binsec for black-box deobfuscation" 5 | description: 6 | "Detects inputs and outputs of a binary code snippet and sample them" 7 | authors: ["Grégoire Menguy" "Cauim de Souza Lima"] 8 | license: "LGPL-2.1-or-later" 9 | depends: [ 10 | "dune" {>= "3.0"} 11 | "binsec" {>= "0.10.0"} 12 | "yojson" 13 | "odoc" {with-doc} 14 | ] 15 | build: [ 16 | ["dune" "subst"] {dev} 17 | [ 18 | "dune" 19 | "build" 20 | "-p" 21 | name 22 | "-j" 23 | jobs 24 | "--promote-install-files=false" 25 | "@install" 26 | "@runtest" {with-test} 27 | "@doc" {with-doc} 28 | ] 29 | ["dune" "install" "-p" name "--create-install-files" name] 30 | ] 31 | -------------------------------------------------------------------------------- /datasets/complex_handlers/bp1: -------------------------------------------------------------------------------- 1 | # Add 2 | v0 + v1 + (-(((v2 - (v0 * v0))) - (v0 * v1))) 3 | (((v2 - (v0 * v0))) - (v0 * v1)) + (-((v1 - (v2 & v0)) * (v1 ^ v0))) 4 | (v1 - (v2 & v0)) * (v1 ^ v0) 5 | 6 | # Sub 7 | v0 - v1 + (-((v0*v2) - (v1 | v2))) 8 | ((v0*v2) - (v1 | v2)) + (-((((v1 & v2) + v0) ^ v1) * v0)) 9 | (((v1 & v2) + v0) ^ v1) * v0 10 | 11 | # Mul 12 | v0 * v1 + (-((v0*v2*v2) - (v0*v1))) 13 | ((v0*v2*v2) - (v0*v1)) + (-((v1 ^ v0) - (v2*(v1+v0)))) 14 | ((v1 ^ v0) - (v2*(v1+v0))) 15 | 16 | # And 17 | (v0 & v1) ^ ((v0 & v2)*(v0 & v2) | v1) 18 | ((v0 & v2)*(v0 & v2) | v1) ^ ((v2 * v1) - ((v0 ^ v2) + v1)) 19 | (v2 * v1) - ((v0 ^ v2) + v1) 20 | 21 | # Or 22 | (v0 | v1) + (- (((v1*v2) + (v0)) * ((v1*v2) + (v0)) )) 23 | (((v1*v2) + (v0)) * ((v1*v2) + (v0))) + (-((v2 * v0) ^ (v1 - (v0 & v2 )))) 24 | ((v2 * v0) ^ (v1 - (v0 & v2 ))) 25 | -------------------------------------------------------------------------------- /examples/samples/example_8bits.json: -------------------------------------------------------------------------------- 1 | { 2 | "initial": { 3 | "inputs": { 4 | "0": { 5 | "location": "DL", 6 | "size": "0x8", 7 | "value": "0x03" 8 | } 9 | }, 10 | "outputs": { 11 | "0": { 12 | "location": "AL", 13 | "size": "0x8", 14 | "value": "0x06" 15 | } 16 | } 17 | }, 18 | "sampling": { 19 | "0": { 20 | "inputs": { 21 | "0": { 22 | "location": "DL", 23 | "size": "0x8", 24 | "value": "0x1a" 25 | } 26 | }, 27 | "outputs": { 28 | "0": { 29 | "location": "AL", 30 | "size": "0x8", 31 | "value": "0x34" 32 | } 33 | } 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /datasets/if_the_else/merged2: -------------------------------------------------------------------------------- 1 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v0*v1)) 2 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v0 & v1)) 3 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v0 | v1)) 4 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v0 ^ v1)) 5 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0*v1) : (v0 & v1)) 6 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0*v1) : (v0 | v1)) 7 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0*v1) : (v0 ^ v1)) 8 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 & v1) : (v0 | v1)) 9 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 & v1) : (v0 ^ v1)) 10 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 | v1) : (v0 ^ v1)) 11 | v2 = 0 ? (v0 - v1) : (v2 = 1 ? (v0*v1) : (v0 & v1)) 12 | v2 = 0 ? (v0 - v1) : (v2 = 1 ? (v0*v1) : (v0 | v1)) 13 | v2 = 0 ? (v0 - v1) : (v2 = 1 ? (v0*v1) : (v0 ^ v1)) 14 | v2 = 0 ? (v0 - v1) : (v2 = 1 ? (v0 & v1) : (v0 | v1)) 15 | v2 = 0 ? (v0 - v1) : (v2 = 1 ? (v0 & v1) : (v0 ^ v1)) 16 | v2 = 0 ? (v0 - v1) : (v2 = 1 ? (v0 | v1) : (v0 ^ v1)) 17 | v2 = 0 ? (v0*v1) : (v2 = 1 ? (v0 & v1) : (v0 | v1)) 18 | v2 = 0 ? (v0*v1) : (v2 = 1 ? (v0 & v1) : (v0 ^ v1)) 19 | v2 = 0 ? (v0*v1) : (v2 = 1 ? (v0 | v1) : (v0 ^ v1)) 20 | v2 = 0 ? (v0 & v1) : (v2 = 1 ? (v0 | v1) : (v0 ^ v1)) 21 | -------------------------------------------------------------------------------- /xyntia.opam: -------------------------------------------------------------------------------- 1 | # This file is generated by dune, edit dune-project instead 2 | opam-version: "2.0" 3 | version: "0.2.0" 4 | synopsis: "Xyntia: Search-based Local Black-box Deobfuscator" 5 | description: 6 | "Xyntia is a black-box deobfuscator, relying on S-metaheuristics to synthesize the semantics of highly obfuscated code blocks" 7 | authors: ["Grégoire Menguy" "Cauim de Souza Lima"] 8 | license: "LGPL-2.1-or-later" 9 | tags: ["binary code analysis" "deobfuscation" "program synthesis"] 10 | depends: [ 11 | "dune" {>= "3.0"} 12 | "ocaml" {>= "4.07"} 13 | "zarith" {>= "1.10"} 14 | "yojson" {>= "1.7.0"} 15 | "qcheck" {>= "0.15"} 16 | "unisim_archisec" {>= "0.0.10"} 17 | "fileutils" {>= "0.6.6"} 18 | "bitwuzla" {>= "1.0.5"} 19 | "binsec" {>= "0.10.0"} 20 | "ppx_deriving" {>= "6.1.0"} 21 | "xyntiasampler" {= version} 22 | "odoc" {with-doc} 23 | ] 24 | build: [ 25 | ["dune" "subst"] {dev} 26 | [ 27 | "dune" 28 | "build" 29 | "-p" 30 | name 31 | "-j" 32 | jobs 33 | "--promote-install-files=false" 34 | "@install" 35 | "@runtest" {with-test} 36 | "@doc" {with-doc} 37 | ] 38 | ["dune" "install" "-p" name "--create-install-files" name] 39 | ] 40 | -------------------------------------------------------------------------------- /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 3.0) 2 | (generate_opam_files true) 3 | 4 | (using dune_site 0.1) 5 | (name xyntia) 6 | (version "0.2.0") 7 | (authors 8 | "Grégoire Menguy" 9 | "Cauim de Souza Lima" 10 | ) 11 | (license LGPL-2.1-or-later) 12 | 13 | (package 14 | (name xyntia) 15 | (synopsis "Xyntia: Search-based Local Black-box Deobfuscator") 16 | (description "Xyntia is a black-box deobfuscator, relying on S-metaheuristics to synthesize the semantics of highly obfuscated code blocks") 17 | (depends 18 | (ocaml (>= 4.07)) 19 | (zarith (>= 1.10)) 20 | (yojson (>= 1.7.0)) 21 | (qcheck (>= 0.15)) 22 | (unisim_archisec (>= 0.0.10)) 23 | (fileutils (>= 0.6.6)) 24 | (bitwuzla (>= 1.0.5)) 25 | (binsec (>= 0.10.0)) 26 | (ppx_deriving (>= 6.1.0)) 27 | (xyntiasampler (= :version)) 28 | ) 29 | (tags ( 30 | "binary code analysis" 31 | "deobfuscation" 32 | "program synthesis" 33 | )) 34 | ) 35 | 36 | (package 37 | (name xyntiasampler) 38 | (synopsis "Sampler based on Binsec for black-box deobfuscation") 39 | (description "Detects inputs and outputs of a binary code snippet and sample them") 40 | (depends (binsec (>= 0.10.0)) yojson) 41 | ) 42 | -------------------------------------------------------------------------------- /examples/samples/example_8_and_16bits.json: -------------------------------------------------------------------------------- 1 | { 2 | "initial": { 3 | "inputs": { 4 | "0": { 5 | "location": "DL", 6 | "size": "0x8", 7 | "value": "0x03" 8 | }, 9 | "1": { 10 | "location": "BX", 11 | "size": "0x10", 12 | "value": "0x0103" 13 | } 14 | }, 15 | "outputs": { 16 | "0": { 17 | "location": "AL", 18 | "size": "0x8", 19 | "value": "0x0106" 20 | } 21 | } 22 | }, 23 | "sampling": { 24 | "0": { 25 | "inputs": { 26 | "0": { 27 | "location": "DL", 28 | "size": "0x8", 29 | "value": "0x1a" 30 | }, 31 | "1": { 32 | "location": "BX", 33 | "size": "0x10", 34 | "value": "0x1b" 35 | } 36 | }, 37 | "outputs": { 38 | "0": { 39 | "location": "AL", 40 | "size": "0x8", 41 | "value": "0x35" 42 | } 43 | } 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /datasets/complex_handlers/bp2: -------------------------------------------------------------------------------- 1 | # Add 2 | (v0 + v1) + ( (( ~ (v0 * (v1 * v1))) ^ (( - v2) | v3)) - (v0 * (v5 & v4))) 3 | (-( (( ~ (v0 * (v1 * v1))) ^ (( - v2) | v3)) - (v0 * (v5 & v4)))) + ( ((~ v0) | (v3 * v5)) & (( v2 - (v1 + (v5))) ^ ((- v4) & (v5 - v0)))) 4 | - ( ((~ v0) | (v3 * v5)) & (( v2 - (v1 + (v5))) ^ ((- v4) & (v5 - v0)))) 5 | 6 | # Sub 7 | (v0 - v1) + (((v1 ^ v4) * (v0 & (- (v4 | (v3 * v3))))) - (v2 + (v3 & (~ v5)))) 8 | (-(((v1 ^ v4) * (v0 & (- (v4 | (v3 * v3))))) - (v2 + (v3 & (~ v5))))) + (((v5 - (v3 | ( ~ v1))) & (v4 + v2)) ^ (v0 * (- v2))) 9 | - (((v5 - (v3 | ( ~ v1))) & (v4 + v2)) ^ (v0 * (- v2))) 10 | 11 | 12 | # Mul 13 | (v0 * v1) ^ (((v1 + (~ v5)) & (v0 * v2)) ^ ( (-v3) | (v4 - v2))) 14 | (((v1 + (~ v5)) & (v0 * v2)) ^ ( (-v3) | (v4 - v2))) ^ (((v5 | (-v3)) ^ ((v0 - v1) * (v0 - v1))) & ((~ v2) + v4)) 15 | (((v5 | (-v3)) ^ ((v0 - v1) * (v0 - v1))) & ((~ v2) + v4)) 16 | 17 | 18 | # And 19 | (v0 & v1) ^ (((v0 + v5) & (v1 * (~ (v3 ^ (- v2))))) - (v4 | v3)) 20 | (((v0 + v5) & (v1 * (~ (v3 ^ (- v2))))) - (v4 | v3)) ^ (((v5 & v3) - ((v1 * v1) | (-v2))) ^ ( v0 + (~ v4))) 21 | (((v5 & v3) - ((v1 * v1) | (-v2))) ^ ( v0 + (~ v4))) 22 | 23 | 24 | # Or 25 | (v0 | v1) + ((((v5 * v4) | (v2 - v3)) ^ (- (v0 + (~ v1)))) & v4) 26 | (-((((v5 * v4) | (v2 - v3)) ^ (- (v0 + (~ v1)))) & v4)) + (((~(v4) - (v0 | (-v3))) ^ ( v1 + (v2 & v1))) * v5) 27 | -(((~(v4) - (v0 | (-v3))) ^ ( v1 + (v2 & v1))) * v5) 28 | 29 | -------------------------------------------------------------------------------- /datasets/if_the_else/merged3: -------------------------------------------------------------------------------- 1 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v0 & v1))) 2 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v0 | v1))) 3 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v0 ^ v1))) 4 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v0 | v1))) 5 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v0 ^ v1))) 6 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v0 ^ v1))) 7 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0*v1) : (v2 = 2 ? (v0 & v1) : (v0 | v1))) 8 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0*v1) : (v2 = 2 ? (v0 & v1) : (v0 ^ v1))) 9 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0*v1) : (v2 = 2 ? (v0 | v1) : (v0 ^ v1))) 10 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 & v1) : (v2 = 2 ? (v0 | v1) : (v0 ^ v1))) 11 | v2 = 0 ? (v0 - v1) : (v2 = 1 ? (v0*v1) : (v2 = 2 ? (v0 & v1) : (v0 | v1))) 12 | v2 = 0 ? (v0 - v1) : (v2 = 1 ? (v0*v1) : (v2 = 2 ? (v0 & v1) : (v0 ^ v1))) 13 | v2 = 0 ? (v0 - v1) : (v2 = 1 ? (v0*v1) : (v2 = 2 ? (v0 | v1) : (v0 ^ v1))) 14 | v2 = 0 ? (v0 - v1) : (v2 = 1 ? (v0 & v1) : (v2 = 2 ? (v0 | v1) : (v0 ^ v1))) 15 | v2 = 0 ? (v0*v1) : (v2 = 1 ? (v0 & v1) : (v2 = 2 ? (v0 | v1) : (v0 ^ v1))) 16 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v0*v1))) 17 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v0*v1))) 18 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v0 & v1))) 19 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 ^ v1) : (v0*v1))) 20 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 ^ v1) : (v0 & v1))) 21 | -------------------------------------------------------------------------------- /src/lib/exceptions.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | exception Halt 23 | exception CEGISHalt 24 | 25 | exception SynthesisNotStarted -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | # This file is part of BINSEC. # 3 | # # 4 | # Copyright (C) 2019-2025 # 5 | # CEA (Commissariat à l'énergie atomique et aux énergies # 6 | # alternatives) # 7 | # # 8 | # you can redistribute it and/or modify it under the terms of the GNU # 9 | # Lesser General Public License as published by the Free Software # 10 | # Foundation, version 2.1. # 11 | # # 12 | # It is distributed in the hope that it will be useful, # 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 | # GNU Lesser General Public License for more details. # 16 | # # 17 | # See the GNU Lesser General Public License version 2.1 # 18 | # for more details (enclosed in the file licenses/LGPLv2.1). # 19 | # # 20 | ########################################################################## 21 | 22 | .PHONY: test 23 | 24 | all: build test 25 | 26 | test: 27 | @dune runtest 28 | 29 | build: 30 | @dune build @install 31 | 32 | install: 33 | dune install 34 | 35 | clean: 36 | rm -rf ./_build 37 | -------------------------------------------------------------------------------- /src/lib/sampler.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | val sample : string -> string -> string option -> string option -> unit 23 | 24 | val get_formula : unit -> string 25 | 26 | val get_samples : unit -> string list 27 | 28 | val remove_samples : unit -> unit -------------------------------------------------------------------------------- /datasets/if_the_else/merged4: -------------------------------------------------------------------------------- 1 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v2 = 3 ? (v0 & v1) : (v0 | v1)))) 2 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v2 = 3 ? (v0 & v1) : (v0 ^ v1)))) 3 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v2 = 3 ? (v0 | v1) : (v0 ^ v1)))) 4 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0 | v1) : (v0 ^ v1)))) 5 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0*v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0 | v1) : (v0 ^ v1)))) 6 | v2 = 0 ? (v0 - v1) : (v2 = 1 ? (v0*v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0 | v1) : (v0 ^ v1)))) 7 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v2 = 3 ? (v0 | v1) : (v0 & v1)))) 8 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v2 = 3 ? (v0 ^ v1) : (v0 & v1)))) 9 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v2 = 3 ? (v0 ^ v1) : (v0 | v1)))) 10 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0*v1) : (v0 | v1)))) 11 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0*v1) : (v0 ^ v1)))) 12 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0 | v1) : (v0*v1)))) 13 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0 ^ v1) : (v0*v1)))) 14 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0 ^ v1) : (v0 | v1)))) 15 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v2 = 3 ? (v0*v1) : (v0 & v1)))) 16 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v2 = 3 ? (v0*v1) : (v0 ^ v1)))) 17 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v2 = 3 ? (v0 & v1) : (v0*v1)))) 18 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v2 = 3 ? (v0 & v1) : (v0 ^ v1)))) 19 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v2 = 3 ? (v0 ^ v1) : (v0*v1)))) 20 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v2 = 3 ? (v0 ^ v1) : (v0 & v1)))) 21 | -------------------------------------------------------------------------------- /src/lib/utility.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | val fold2 : 23 | f:('acc -> 'b -> 'c -> 'acc) -> init:'acc -> 'b array -> 'c array -> 'acc 24 | 25 | val map3 : 26 | ('a -> 'b -> 'c -> 'd) -> 'a array -> 'b array -> 'c array -> 'd array 27 | 28 | (* Computes the minimum element of the list, returning the Some (element, cost) if it exists and None else *) 29 | val min_list : ('a -> 'b) -> 'a list -> ('a * 'b) option 30 | 31 | val bv_to_smtlib : Bitvector.t -> string 32 | 33 | val cst_to_smtlib : int -> int -> string -------------------------------------------------------------------------------- /sampler/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Plugin Binsec to sample code blocks 3 | 4 | This page explains how to use the Binsec plugin without Xyntia. 5 | 6 | **The recommended way to use the sampler is to go through Xyntia ([see Xyntia documentation](../README.md))** 7 | 8 | ### Build 9 | 10 | Dependencies: 11 | * ocaml 12 | * dune 13 | * Binsec (0.10.0) with its dependencies 14 | 15 | You can compile and install the plugin as follows: 16 | ```bash 17 | dune build 18 | dune install 19 | ``` 20 | 21 | ### Usage 22 | 23 | ```bash 24 | binsec [ -isa ISA ] -sse -sse-script SCRIPT BINARY -sse-quiet -xyntiasampler 25 | ``` 26 | 27 | where: 28 | * ISA is the target binary instruction set (e.g., x86 -- required if working on raw blocks) 29 | * SCRIPT is the binsec script specifying what to sample (see below) 30 | * BINARY is the binary code to sample (can be a full executable or raw binary) 31 | 32 | ##### Scripts 33 | 34 | The plugin defines new declarations and instructions in DBA: 35 | * `sample N [ reg_1, ..., reg_n ]`, which specify to generate N samples for each output. A list of target register outputs can be set, in such a case, only these are sampled otherwise all detected outputs are. 36 | * `set sample output TARGET`, which specify where should be stored the sampling results. TARGET can equal `stdout` or `"path/to/directory"`. 37 | * `set domain VAR [MIN, MAX]` which specify the sampling domain for the VAR input. VAR can be a register or any DBA variable but not a memory cell. To specify the domain of a memory cell, see `examples/snapshat.ini` 38 | * `prune constant outputs` which prunes all the constant outputs (i.e., with no input variables) from the set of sampled outputs 39 | 40 | Examples of scripts can be found in the `examples` directory. 41 | 42 | It is also possible to sample a symbolic expression directly. An example is given in `examples/expr.ini`. To sample it, use: 43 | ``` 44 | # The <() is here to replace the binary path. Indeed, in this case we do not need any binary (only an empty file) 45 | binsec -isa x86 -sse -sse-engine sampling -sse-script examples/expr.ini <() -sse-quiet -termsampler 46 | ``` 47 | 48 | -------------------------------------------------------------------------------- /src/lib/checker.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | type equiv_answer = YES | NO | UKN 23 | 24 | type counter_example = { 25 | inputs : (Oracle.variable * Bitvector.t) array; 26 | output : Oracle.variable * Bitvector.t; 27 | } 28 | 29 | module type CheckerType = sig 30 | val open_session : unit -> unit 31 | val close_session : unit -> unit 32 | val check : string -> string -> equiv_answer 33 | val get_counter_example : unit -> counter_example 34 | end 35 | 36 | module EquivChecker (O : Oracle.ORACLE) (Solver : Smt.Smt_sig.Solver) : 37 | CheckerType 38 | 39 | val answer_to_string : equiv_answer -> string 40 | val make_checker : (module Oracle.ORACLE) -> string option -> int option -> (module CheckerType) -------------------------------------------------------------------------------- /src/lib/heuristic.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | type summary = { 23 | success : bool ; 24 | expression : string ; 25 | simplified : string ; 26 | smtlib : string ; 27 | size : int ; 28 | time_synthesis : float ; 29 | time_simplify : float ; 30 | nb_mutations : int ; 31 | } 32 | 33 | module type S = sig 34 | val search : int -> summary 35 | end 36 | 37 | (* Generates a search heuristics. 38 | The first VECDIST is the guiding distance while the second is the final one *) 39 | val of_string : (module Mutations.MUTATOR) -> (module Distance.VECDIST with type t = Mutations.mutd) -> (module Distance.VECDIST with type t = Mutations.mutd) -> (module Oracle.ORACLE) -> string -> (module S) 40 | -------------------------------------------------------------------------------- /test/bitvector_test.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | open OUnit2 23 | 24 | let test_size_of _ = 25 | let b = Bitvector.of_int32 (Int32.of_int 5) in 26 | assert_equal 32 (Bitvector.size_of b) 27 | 28 | let qcheck = [ 29 | QCheck.Test.make ~count:1000 ~name:"test_sgt" 30 | QCheck.(pair int32 int32) 31 | (fun (i1, i2) -> 32 | let b1, b2 = Bitvector.of_int32 i1, Bitvector.of_int32 i2 in 33 | (Bitvector.sgt b1 b2) = (i1 > i2)); 34 | ] 35 | 36 | let suite = 37 | let ounit_suite = [ 38 | "test_sizeof">:: test_size_of; 39 | ] in 40 | let qcheck_suite = List.map QCheck_ounit.to_ounit2_test qcheck in 41 | 42 | "suite">:::(List.append ounit_suite qcheck_suite) 43 | 44 | 45 | let () = 46 | run_test_tt_main suite 47 | -------------------------------------------------------------------------------- /datasets/if_the_else/merged5: -------------------------------------------------------------------------------- 1 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v2 = 3 ? (v0 & v1) : (v2 = 4 ? (v0 | v1) : (v0 ^ v1))))) 2 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v2 = 3 ? (v0 & v1) : (v2 = 4 ? (v0 ^ v1) : (v0 | v1))))) 3 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v2 = 3 ? (v0 | v1) : (v2 = 4 ? (v0 & v1) : (v0 ^ v1))))) 4 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v2 = 3 ? (v0 | v1) : (v2 = 4 ? (v0 ^ v1) : (v0 & v1))))) 5 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v2 = 3 ? (v0 ^ v1) : (v2 = 4 ? (v0 & v1) : (v0 | v1))))) 6 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0*v1) : (v2 = 3 ? (v0 ^ v1) : (v2 = 4 ? (v0 | v1) : (v0 & v1))))) 7 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0*v1) : (v2 = 4 ? (v0 | v1) : (v0 ^ v1))))) 8 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0*v1) : (v2 = 4 ? (v0 ^ v1) : (v0 | v1))))) 9 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0 | v1) : (v2 = 4 ? (v0*v1) : (v0 ^ v1))))) 10 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0 | v1) : (v2 = 4 ? (v0 ^ v1) : (v0*v1))))) 11 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0 ^ v1) : (v2 = 4 ? (v0*v1) : (v0 | v1))))) 12 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 & v1) : (v2 = 3 ? (v0 ^ v1) : (v2 = 4 ? (v0 | v1) : (v0*v1))))) 13 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v2 = 3 ? (v0*v1) : (v2 = 4 ? (v0 & v1) : (v0 ^ v1))))) 14 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v2 = 3 ? (v0*v1) : (v2 = 4 ? (v0 ^ v1) : (v0 & v1))))) 15 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v2 = 3 ? (v0 & v1) : (v2 = 4 ? (v0*v1) : (v0 ^ v1))))) 16 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v2 = 3 ? (v0 & v1) : (v2 = 4 ? (v0 ^ v1) : (v0*v1))))) 17 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v2 = 3 ? (v0 ^ v1) : (v2 = 4 ? (v0*v1) : (v0 & v1))))) 18 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 | v1) : (v2 = 3 ? (v0 ^ v1) : (v2 = 4 ? (v0 & v1) : (v0*v1))))) 19 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 ^ v1) : (v2 = 3 ? (v0*v1) : (v2 = 4 ? (v0 & v1) : (v0 | v1))))) 20 | v2 = 0 ? (v0 + v1) : (v2 = 1 ? (v0 - v1) : (v2 = 2 ? (v0 ^ v1) : (v2 = 3 ? (v0*v1) : (v2 = 4 ? (v0 | v1) : (v0 & v1))))) 21 | -------------------------------------------------------------------------------- /scripts/utils/sample.py: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | # This file is part of BINSEC. # 3 | # # 4 | # Copyright (C) 2019-2025 # 5 | # CEA (Commissariat à l'énergie atomique et aux énergies # 6 | # alternatives) # 7 | # # 8 | # you can redistribute it and/or modify it under the terms of the GNU # 9 | # Lesser General Public License as published by the Free Software # 10 | # Foundation, version 2.1. # 11 | # # 12 | # It is distributed in the hope that it will be useful, # 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 | # GNU Lesser General Public License for more details. # 16 | # # 17 | # See the GNU Lesser General Public License version 2.1 # 18 | # for more details (enclosed in the file licenses/LGPLv2.1). # 19 | # # 20 | ########################################################################## 21 | 22 | from plumbum import local 23 | import argparse 24 | import tempfile 25 | 26 | 27 | xyntia = local["xyntia"] 28 | 29 | config = """\ 30 | starting from 0x0 31 | 32 | set optimal sampling 33 | 34 | prune constant outputs 35 | explore all 36 | 37 | 38 | hook <.raw:size> with 39 | sample 100 40 | halt 41 | end 42 | """ 43 | 44 | def main(outdir, binary, arch): 45 | with tempfile.NamedTemporaryFile(mode="w") as tmp: 46 | tmp.write(config) 47 | tmp.flush() 48 | xyntia["-isa", arch, 49 | "-sample-only", 50 | "-bin", binary, "-config", tmp.name, 51 | "-sampleout", outdir 52 | ]() 53 | return 54 | 55 | if __name__ == "__main__": 56 | parser = argparse.ArgumentParser() 57 | parser.add_argument('--bin', required=True, type=str, help="binary to load") 58 | parser.add_argument('--arch', required=True, type=str, help="architecture to give to binsec") 59 | parser.add_argument('--out', required=True, type=str, help="output directory") 60 | args = parser.parse_args() 61 | 62 | main(args.out, args.bin, args.arch) 63 | -------------------------------------------------------------------------------- /src/lib/utility.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | let fold2 ~f ~init arr1 arr2 = 23 | let rec loop acc i = 24 | match i with 25 | | 0 -> f acc arr1.(i) arr2.(i) 26 | | _ -> loop (f acc arr1.(i) arr2.(i)) (i - 1) 27 | in 28 | if Array.length arr1 <> Array.length arr2 then 29 | invalid_arg "Array don't have the same size" 30 | else loop init (Array.length arr1 - 1) 31 | 32 | let map3 f x y z = 33 | let lx = Array.length x in 34 | let ly = Array.length y in 35 | let lz = Array.length z in 36 | if lx <> ly || lx <> lz then assert false 37 | else 38 | Array.init lx (fun i -> f (Array.get x i) (Array.get y i) (Array.get z i)) 39 | 40 | let min_list f l = 41 | let foldf acc y = 42 | match acc with 43 | | None -> Some (y, f y) 44 | | Some (x, vx) -> 45 | let vy = f y in 46 | if vx <= vy then Some (x, vx) else Some (y, vy) 47 | in 48 | List.fold_left foldf None l 49 | 50 | let bv_to_smtlib bv = 51 | let s = Bitvector.to_hexstring bv in 52 | String.mapi (fun i c -> if i = 0 then '#' else c) s 53 | 54 | let cst_to_smtlib cst size = 55 | let bv = Bitvector.of_int ~size cst in 56 | bv_to_smtlib bv 57 | -------------------------------------------------------------------------------- /src/lib/tree/simplifier.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | module MkShortcut (C : sig 23 | type t 24 | 25 | val mk_sizemod : Operators.sizemod_t -> t -> t 26 | val mk_unop : Operators.unop_t -> t -> t 27 | val mk_binop : Operators.binop_t -> t -> t -> t 28 | val mk_triop : Operators.triop_t -> t -> t -> t -> t 29 | end) : sig 30 | type t := C.t 31 | 32 | (* Unop *) 33 | val (~.) : t -> t 34 | val (~-) : t -> t 35 | 36 | (* Binop *) 37 | val (+) : t -> t -> t 38 | val (-) : t -> t -> t 39 | val ( * ) : t -> t -> t 40 | val ( && ) : t -> t -> t 41 | val ( || ) : t -> t -> t 42 | val (^) : t -> t -> t 43 | val (>>) : t -> t -> t 44 | val (<<) : t -> t -> t 45 | val (>>-) : t -> t -> t 46 | val rot : t -> t -> t 47 | val (/) : t -> t -> t 48 | val (/-) : t -> t -> t 49 | val (%) : t -> t -> t 50 | val (%-) : t -> t -> t 51 | val (>>%) : t -> t -> t 52 | val (<<%) : t -> t -> t 53 | val (>>-%) : t -> t -> t 54 | val umax : t -> t -> t 55 | val umin : t -> t -> t 56 | val smax : t -> t -> t 57 | val smin : t -> t -> t 58 | end 59 | 60 | module TreeSimplifier (C : Tree.TreeConstructor) (O : Oracle.ORACLE) : sig 61 | val simplify : C.data Tree.t -> C.data Tree.t 62 | end -------------------------------------------------------------------------------- /src/lib/oracle.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | type variable = private { name : string ; sz : int } 23 | type constant = { name : string ; value : Bitvector.t } 24 | type sample 25 | 26 | module type ORACLE = sig 27 | val nvars : unit -> int 28 | val nconsts : unit -> int 29 | val nsamples : unit -> int 30 | val var_values : variable -> Bitvector.t array 31 | val const_values : constant -> Bitvector.t array 32 | val out_values : unit -> Bitvector.t array 33 | val out_var : unit -> variable 34 | val random_var : unit -> variable 35 | val random_const : unit -> constant 36 | val print : unit -> unit 37 | val vars : unit -> variable array 38 | val consts : unit -> constant array 39 | val ops : unit -> string list option 40 | val const_of_int : int -> int -> constant 41 | val const_of_bitv : Bitvector.t -> constant 42 | val get_sample : int -> sample 43 | val sample_output : sample -> variable * Bitvector.t 44 | (* TODO : Temporary, we should prevent the modification but easier for now *) 45 | val sample_inputs : sample -> (variable * Bitvector.t) array 46 | val add_sample : (variable * Bitvector.t) array -> (variable * Bitvector.t) -> unit 47 | val get_expr_size : unit -> int option 48 | end 49 | 50 | val of_json : filename:string -> int array -> (module ORACLE) 51 | val dummyOracle : (module ORACLE) -------------------------------------------------------------------------------- /src/lib/tree/mutations.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | type mutd = { 23 | mutable last_mutation_indice : int option; 24 | all_choices : Tree.op_t array; 25 | mutable left_choices : Tree.op_t array; 26 | vals : Bitvector.t array; 27 | } 28 | 29 | module type MUTATIONS = sig 30 | val all_mut : unit -> Tree.op_t array 31 | val unop_mut : unit -> Tree.op_t array 32 | val binop_mut : unit -> Tree.op_t array 33 | val triop_mut : unit -> Tree.op_t array 34 | val unop_sygus : Tree.op_t -> string -> int -> string 35 | val binop_sygus : Tree.op_t -> string -> int -> string -> int -> string 36 | val triop_sygus : Tree.op_t -> string -> int -> string -> int -> string -> int -> string 37 | end 38 | 39 | module type MUTATOR = sig 40 | include Tree.TreeConstructor with type data = mutd 41 | type t := mutd Tree.t 42 | 43 | val singleton : unit -> t 44 | val mutate : int -> t -> int option -> t 45 | val mutate_mh : int -> t -> int option -> t * float 46 | val cut : t -> t 47 | val eval : t -> Bitvector.t array 48 | val to_string : t -> string 49 | val to_smtlib : t -> string 50 | val get_expr_size : t -> int 51 | val get_mutations : unit -> (module MUTATIONS) 52 | end 53 | 54 | val mutations_of_str : (module Oracle.ORACLE) -> string -> (module MUTATIONS) 55 | 56 | val mutations_of_operators : 57 | (module Oracle.ORACLE) -> string array -> (module MUTATIONS) 58 | 59 | module Mk_Mutator (O : Oracle.ORACLE) (M : MUTATIONS) : MUTATOR -------------------------------------------------------------------------------- /scripts/utils/get_stats.py: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | # This file is part of BINSEC. # 3 | # # 4 | # Copyright (C) 2019-2025 # 5 | # CEA (Commissariat à l'énergie atomique et aux énergies # 6 | # alternatives) # 7 | # # 8 | # you can redistribute it and/or modify it under the terms of the GNU # 9 | # Lesser General Public License as published by the Free Software # 10 | # Foundation, version 2.1. # 11 | # # 12 | # It is distributed in the hope that it will be useful, # 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 | # GNU Lesser General Public License for more details. # 16 | # # 17 | # See the GNU Lesser General Public License version 2.1 # 18 | # for more details (enclosed in the file licenses/LGPLv2.1). # 19 | # # 20 | ########################################################################## 21 | 22 | from pathlib import Path 23 | import argparse 24 | import json 25 | 26 | def mean(l): 27 | if len(l) > 0: 28 | return sum(l) / len(l) 29 | else: 30 | return None 31 | 32 | def display(res): 33 | print("Success Rate: {}%".format(res["nsucc"] * 100 / res["total"])) 34 | print("Equiv Rate: {} - {}%".format( 35 | res["nequiv"] * 100 / res["total"], 36 | (res["nequiv"] + res["nukn"]) * 100 / res["total"] 37 | )) 38 | print("Mean quality: {}".format(mean(res["quals"]))) 39 | 40 | def size(expr): 41 | """ 42 | Size in term of number of operators, variables and constant values 43 | """ 44 | l = [ x for x in expr.replace("(", "").replace(")", "").split() if x.strip() != "" ] 45 | return len(l) 46 | 47 | def main(resdir, samplesdir): 48 | 49 | res = { 50 | "nsucc": 0, 51 | "nequiv": 0, 52 | "nukn": 0, 53 | "total": 0, 54 | "quals" : [], 55 | } 56 | 57 | for filepath in resdir.glob("*/*.json"): 58 | with open(filepath, "r") as f: 59 | resdata = json.load(f) 60 | 61 | with open(samplesdir / filepath.parent.name / filepath.name, "r") as f: 62 | sampdata = json.load(f) 63 | 64 | if resdata["success"] == "yes": 65 | res["nsucc"] += 1 66 | res["quals"].append(size(resdata["simplified"]) / sampdata["info"]["exprsize"]) 67 | 68 | if resdata["equiv"] == "yes": 69 | res["nequiv"] += 1 70 | elif resdata["equiv"] == "ukn": 71 | res["nukn"] += 1 72 | 73 | res["total"] += 1 74 | 75 | display(res) 76 | 77 | if __name__ == "__main__": 78 | parser = argparse.ArgumentParser() 79 | parser.add_argument('--resdir', required=True, type=str, help="results directory") 80 | parser.add_argument('--sampdir', required=True, type=str, help="samples directory") 81 | args = parser.parse_args() 82 | 83 | main(Path(args.resdir), Path(args.sampdir)) 84 | -------------------------------------------------------------------------------- /src/lib/sampler.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | let outdir = ref None 23 | 24 | let sample binary config isa_opt out_opt = 25 | Libsse.Options.Engine.set Libterm.Senv.Vanilla; 26 | Libsse.Options.ScriptFiles.set [config]; 27 | Binsec.Kernel_options.ExecFile.set binary; 28 | Libsse.Options.Logger.quiet (); 29 | (match isa_opt with 30 | | Some "x86" -> Binsec.Kernel_options.Machine.set Binsec.Machine.x86 31 | | Some "amd64" -> Binsec.Kernel_options.Machine.set Binsec.Machine.amd64 32 | | Some "arm32" -> Binsec.Kernel_options.Machine.set Binsec.(Machine.armv7 LittleEndian) 33 | | Some "aarch64" -> Binsec.Kernel_options.Machine.set Binsec.(Machine.armv8 LittleEndian) 34 | | Some "ppc64" -> Binsec.Kernel_options.Machine.set Binsec.(Machine.ppc64 BigEndian) 35 | | Some "riscv" -> Binsec.Kernel_options.Machine.set Binsec.(Machine.riscv `x32) 36 | | Some "riscv64" -> Binsec.Kernel_options.Machine.set Binsec.(Machine.riscv `x64) 37 | | None -> Binsec.Kernel_options.Machine.set Binsec.Machine.x86; 38 | Binsec.Kernel_functions.Loader.set_arch_from_file ~filename:binary; 39 | | _ -> failwith "unknown ISA"); 40 | 41 | Xyntiasampler.Termsampler.enable (); 42 | Xyntiasampler.Termsampler.Mode.set (Xyntiasampler.Termsampler.Xyntia); 43 | 44 | outdir := (match out_opt with 45 | | Some _ -> out_opt 46 | | None -> Some (Printf.sprintf "/tmp/xyntiasampling_%d" (Unix.getpid ()))); 47 | 48 | Xyntiasampler.Termsampler.OutDir.set (Option.get !outdir); 49 | let module Eng = (val (Libsse.Options.Engine.get_factory ())) in 50 | let module R = Libsse.Exec.Run (Eng) (Libsse.Heuristic.Dfs) () in 51 | R.unit 52 | 53 | let get_formula () = 54 | (Option.get !outdir) ^ "/formula" 55 | 56 | let get_samples () = 57 | Sys.readdir (Option.get !outdir) 58 | |> Array.to_list 59 | |> List.filter (fun x -> Filename.extension x = ".json") 60 | |> List.map (fun x -> (Option.get !outdir) ^ "/" ^ x) 61 | 62 | let remove_samples () = 63 | FileUtil.rm ~recurse:true [Option.get !outdir]; 64 | outdir := None -------------------------------------------------------------------------------- /scripts/bench/recompute_stats.py: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | # This file is part of BINSEC. # 3 | # # 4 | # Copyright (C) 2019-2025 # 5 | # CEA (Commissariat à l'énergie atomique et aux énergies # 6 | # alternatives) # 7 | # # 8 | # you can redistribute it and/or modify it under the terms of the GNU # 9 | # Lesser General Public License as published by the Free Software # 10 | # Foundation, version 2.1. # 11 | # # 12 | # It is distributed in the hope that it will be useful, # 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 | # GNU Lesser General Public License for more details. # 16 | # # 17 | # See the GNU Lesser General Public License version 2.1 # 18 | # for more details (enclosed in the file licenses/LGPLv2.1). # 19 | # # 20 | ########################################################################## 21 | 22 | #!/usr/bin/env python3 23 | 24 | from pathlib import Path 25 | import argparse 26 | import json 27 | 28 | def mean(l): 29 | if len(l) == 0: 30 | return None 31 | return round(sum(l) / len(l), 6) 32 | 33 | def display_stats(stats): 34 | print("Mean success rate: {}%".format(100*mean(stats["successes"]))) 35 | print("Mean equiv. rate: {} - {}%".format(100*mean(stats["equiv_lower"]), 100*mean(stats["equiv_upper"]))) 36 | print("Mean quality: {}".format(mean(stats["qualities"]))) 37 | print("Mean Synthesis Time: {}".format(mean(stats["synth_time"]))) 38 | print("Mean Simplification Time: {}".format(mean(stats["simp_time"]))) 39 | 40 | def main(directory): 41 | stats = { 42 | "successes": [], 43 | "equiv_lower": [], 44 | "equiv_upper": [], 45 | "qualities": [], 46 | "synth_time": [], 47 | "simp_time": [] 48 | } 49 | 50 | for resfile in (directory / "synthesized").glob("*.json"): 51 | with open(resfile, "r") as f: 52 | res = json.load(f) 53 | 54 | stats["successes"].append(res.get("success") == "yes") 55 | if res.get("success") == "yes": 56 | stats["equiv_lower"].append(res.get("equiv") == "yes") 57 | stats["equiv_upper"].append(res.get("equiv") != "no") 58 | if res.get("equiv") != "no" and res.get("quality"): 59 | stats["qualities"].append(res["quality"]) 60 | stats["synth_time"].append(res.get("synthesis_time")) 61 | if res.get("simplification_time"): 62 | stats["simp_time"].append(res.get("simplification_time")) 63 | else: 64 | stats["equiv_lower"].append(False) 65 | stats["equiv_upper"].append(False) 66 | 67 | display_stats(stats) 68 | 69 | 70 | 71 | if __name__ == "__main__": 72 | parser = argparse.ArgumentParser() 73 | parser.add_argument('--dir', required=True, type=str, help="directory where results are stored") 74 | args = parser.parse_args() 75 | main(Path(args.dir)) 76 | -------------------------------------------------------------------------------- /scripts/utils/ghidra/DumpBlocks.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | ########################################################################## 3 | # This file is part of BINSEC. # 4 | # # 5 | # Copyright (C) 2019-2025 # 6 | # CEA (Commissariat à l'énergie atomique et aux énergies # 7 | # alternatives) # 8 | # # 9 | # you can redistribute it and/or modify it under the terms of the GNU # 10 | # Lesser General Public License as published by the Free Software # 11 | # Foundation, version 2.1. # 12 | # # 13 | # It is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU Lesser General Public License for more details. # 17 | # # 18 | # See the GNU Lesser General Public License version 2.1 # 19 | # for more details (enclosed in the file licenses/LGPLv2.1). # 20 | # # 21 | ########################################################################## 22 | 23 | #!/usr/bin/env python3 24 | from ghidra.program.model.block import BasicBlockModel 25 | from ghidra.util.task import TaskMonitor 26 | import binascii 27 | 28 | def save(filename, opcodes): 29 | if len(opcodes) > 1: 30 | with open(filename, "wb") as f: 31 | for opcode in opcodes: 32 | f.write(opcode) 33 | 34 | 35 | def isControlFlow(ins): 36 | flowtype = ins.getFlowType() 37 | if flowtype.isCall(): 38 | return True 39 | elif flowtype.isJump(): 40 | return True 41 | elif flowtype.isTerminal(): 42 | return True 43 | elif "REP" in ins.getMnemonicString(): 44 | return True 45 | else: 46 | return False 47 | 48 | 49 | def dump(block, outdir): 50 | # Warning a block may contains calls 51 | filename = "0x{}.bin".format(block.getFirstStartAddress()) 52 | 53 | listing = currentProgram.getListing() 54 | ins_iter = listing.getInstructions(block, True) 55 | 56 | opcodes = [] 57 | 58 | while ins_iter.hasNext(): 59 | ins = ins_iter.next() 60 | filename = filename if filename != None else "0x{}.bin".format(ins.getAddress()) 61 | if isControlFlow(ins): 62 | save("{}/{}".format(outdir, filename), opcodes) 63 | filename = None 64 | opcodes = [] 65 | 66 | else: 67 | opcodes.append(ins.getBytes()) 68 | 69 | save("{}/{}".format(outdir, filename), opcodes) 70 | 71 | 72 | def main(outdir): 73 | section_text = getMemoryBlock('.text') 74 | bbm = BasicBlockModel(currentProgram) 75 | blocks = bbm.getCodeBlocks(TaskMonitor.DUMMY) 76 | block = blocks.next() 77 | 78 | while block: 79 | if section_text.contains(block.getFirstStartAddress()): 80 | dump(block, outdir) 81 | block = blocks.next() 82 | 83 | 84 | 85 | if __name__ == "__main__": 86 | args = getScriptArgs() 87 | if len(args) != 1: 88 | print("[*] Parameters: ") 89 | exit(-1) 90 | main(args[0]) 91 | -------------------------------------------------------------------------------- /src/lib/tree/tree.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | type op_t = 23 | | OP_Var of Oracle.variable 24 | | OP_Const of Oracle.constant 25 | | OP_Unop of Operators.unop_t 26 | | OP_Binop of Operators.binop_t 27 | | OP_Triop of Operators.triop_t 28 | 29 | type 'a node = 30 | | Var of Oracle.variable 31 | | Const of Oracle.constant 32 | | SizeMod of Operators.sizemod_t * 'a t 33 | | Unop of Operators.unop_t * 'a t 34 | | Binop of Operators.binop_t * 'a t * 'a t 35 | | Triop of Operators.triop_t * 'a t * 'a t * 'a t 36 | 37 | and 'a t = { 38 | node : 'a node; 39 | (* number of nodes in this subtree, size modifiers do not count *) 40 | sz : int; 41 | (* bitsize of the values calculated by this tree *) 42 | bitsz : int; 43 | data : 'a; 44 | } 45 | 46 | val compare : 'a t -> 'a t -> int 47 | val tree_equal : 'a t -> 'a t -> bool 48 | (** Checks tree equality (with respect to the commutativity of operators) *) 49 | 50 | val to_string : ('a -> string) -> 'a t -> string 51 | 52 | (* Constructors *) 53 | val mk_var : 'a -> Oracle.variable -> 'a t 54 | val mk_const : 'a -> Oracle.constant -> 'a t 55 | val mk_const_of_bv : 'a -> Bitvector.t -> 'a t 56 | val mk_sizemod : 'a -> Operators.sizemod_t -> 'a t -> 'a t 57 | val mk_unop : 'a -> Operators.unop_t -> 'a t -> 'a t 58 | val mk_binop : 'a -> Operators.binop_t -> 'a t -> 'a t -> 'a t 59 | val mk_triop : 'a -> Operators.triop_t -> 'a t -> 'a t -> 'a t -> 'a t 60 | 61 | module type TreeConstructor = sig 62 | type data 63 | type t' := data t 64 | 65 | (* Specialized Constructor *) 66 | val mk_var : Oracle.variable -> t' 67 | val mk_const : Oracle.constant -> t' 68 | val mk_const_of_bv : Bitvector.t -> t' 69 | val mk_sizemod : Operators.sizemod_t -> t' -> t' 70 | val mk_unop : Operators.unop_t -> t' -> t' 71 | val mk_binop : Operators.binop_t -> t' -> t' -> t' 72 | val mk_triop : Operators.triop_t -> t' -> t' -> t' -> t' 73 | 74 | val eq : t' -> t' -> bool 75 | val compare : t' -> t' -> int 76 | 77 | val transfer : 'a t -> t' 78 | end 79 | 80 | module UnitTreeConstructor : TreeConstructor with type data = unit -------------------------------------------------------------------------------- /src/lib/checker.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | type equiv_answer = YES | NO | UKN 23 | 24 | let answer_to_string = function 25 | | YES -> "yes" 26 | | NO -> "no" 27 | | UKN -> "ukn" 28 | 29 | type counter_example = { 30 | inputs : (Oracle.variable * Bitvector.t) array; 31 | output : Oracle.variable * Bitvector.t; 32 | } 33 | 34 | module type CheckerType = sig 35 | val open_session : unit -> unit 36 | val close_session : unit -> unit 37 | val check : string -> string -> equiv_answer 38 | val get_counter_example : unit -> counter_example 39 | end 40 | 41 | module EquivChecker (O : Oracle.ORACLE) (Solver : Smt.Smt_sig.Solver) = struct 42 | let solver = ref None 43 | 44 | let open_session () = 45 | match !solver with 46 | | Some _ -> failwith "Cannot open a session twice" 47 | | None -> solver := Some (Solver.open_session ()) 48 | 49 | let close_session () = 50 | match !solver with 51 | | Some s -> 52 | Solver.close_session s; 53 | solver := None 54 | | None -> () 55 | 56 | let check formula_file smtexpr = 57 | let solv = match !solver with 58 | | Some s -> s 59 | | None -> failwith "Session not open" 60 | in 61 | 62 | let ovar = O.out_var () in 63 | let smtexpr = Format.sprintf "(define-fun cegis_checker () (_ BitVec %d) %s)" ovar.sz smtexpr in 64 | 65 | let orig_scr = Binsec.(Parse_utils.read_file ~parser:Smtlib_parser.script ~lexer:Smtlib_lexer.token ~filename:formula_file) in 66 | let smtexpr_scr = Binsec.(Parse_utils.read_string ~parser:Smtlib_parser.script ~lexer:Smtlib_lexer.token ~string:smtexpr) in 67 | 68 | let fullscr : Binsec.Smtlib.script = { 69 | script_commands=List.append orig_scr.script_commands smtexpr_scr.script_commands; 70 | script_loc=Binsec.Location.dummy_loc 71 | } in 72 | 73 | 74 | let out_term = Binsec.(Formula.(mk_bv_var (bv_var ovar.name ovar.sz))) in 75 | let cegis_term = Binsec.(Formula.(mk_bv_var (bv_var "cegis_checker" ovar.sz))) in 76 | 77 | let fm = Binsec.Smtlib_to_formula.script fullscr in 78 | let fm = Binsec.(Formula.push_front_assert (Formula.mk_bv_distinct out_term cegis_term) fm) in 79 | 80 | Binsec.Formula.iter_forward (Solver.put solv) fm; 81 | match Solver.check_sat solv with 82 | | SAT -> NO 83 | | UNSAT -> YES 84 | | TIMEOUT | UNKNOWN -> UKN 85 | 86 | let get_counter_example () = 87 | let solv = match !solver with 88 | | Some s -> s 89 | | None -> failwith "Session not open" 90 | in 91 | let inputs = Array.map (fun (v : Oracle.variable) -> 92 | let term = Binsec.Formula.(mk_bv_var (bv_var v.name v.sz)) in 93 | let bv = Solver.get_bv_value solv term in 94 | (v, bv) 95 | ) (O.vars ()) in 96 | let ovar = O.out_var () in 97 | let out_term = Binsec.(Formula.(mk_bv_var (bv_var ovar.name ovar.sz))) in 98 | let output = ovar, Solver.get_bv_value solv out_term in 99 | { inputs; output} 100 | 101 | end 102 | 103 | let make_checker (module O : Oracle.ORACLE) name timeout = 104 | (if Option.is_some name then 105 | let solv : Binsec.Formula_options.solver = match Option.get name with 106 | | "z3" -> Z3 107 | | "cvc4" -> CVC4 108 | | "yices" -> Yices 109 | | "boolector" -> Boolector 110 | | "bitwuzla" -> Bitwuzla 111 | | _ -> failwith "Unknown smt solver" 112 | in 113 | Binsec.Formula_options.Solver.set solv); 114 | 115 | (if Option.is_some timeout then 116 | Binsec.Formula_options.Solver.Timeout.set (Option.get timeout)); 117 | 118 | let module Solver = (val Smt.Smt_solver.get_solver ()) in 119 | (module EquivChecker (O) (Solver) : CheckerType) 120 | 121 | 122 | -------------------------------------------------------------------------------- /examples/samples/example.json: -------------------------------------------------------------------------------- 1 | { 2 | "initial": { 3 | "inputs": { 4 | "0": { 5 | "location": "EDI", 6 | "size": "0x20", 7 | "value": "0x2084fea3" 8 | } 9 | }, 10 | "outputs": { 11 | "0": { 12 | "location": "EAX", 13 | "size": "0x20", 14 | "value": "0x4109fd46" 15 | } 16 | } 17 | }, 18 | "sampling": { 19 | "0": { 20 | "inputs": { 21 | "0": { 22 | "location": "EDI", 23 | "size": "0x20", 24 | "value": "0x3463bf1a" 25 | } 26 | }, 27 | "outputs": { 28 | "0": { 29 | "location": "EAX", 30 | "size": "0x20", 31 | "value": "0x68c77e34" 32 | } 33 | } 34 | }, 35 | "1": { 36 | "inputs": { 37 | "0": { 38 | "location": "EDI", 39 | "size": "0x20", 40 | "value": "0xaae46f3a" 41 | } 42 | }, 43 | "outputs": { 44 | "0": { 45 | "location": "EAX", 46 | "size": "0x20", 47 | "value": "0x55c8de74" 48 | } 49 | } 50 | }, 51 | "2": { 52 | "inputs": { 53 | "0": { 54 | "location": "EDI", 55 | "size": "0x20", 56 | "value": "0x70c42f57" 57 | } 58 | }, 59 | "outputs": { 60 | "0": { 61 | "location": "EAX", 62 | "size": "0x20", 63 | "value": "0xe1885eae" 64 | } 65 | } 66 | }, 67 | "3": { 68 | "inputs": { 69 | "0": { 70 | "location": "EDI", 71 | "size": "0x20", 72 | "value": "0x971b89e" 73 | } 74 | }, 75 | "outputs": { 76 | "0": { 77 | "location": "EAX", 78 | "size": "0x20", 79 | "value": "0x12e3713c" 80 | } 81 | } 82 | }, 83 | "4": { 84 | "inputs": { 85 | "0": { 86 | "location": "EDI", 87 | "size": "0x20", 88 | "value": "0x50827757" 89 | } 90 | }, 91 | "outputs": { 92 | "0": { 93 | "location": "EAX", 94 | "size": "0x20", 95 | "value": "0xa104eeae" 96 | } 97 | } 98 | }, 99 | "5": { 100 | "inputs": { 101 | "0": { 102 | "location": "EDI", 103 | "size": "0x20", 104 | "value": "0x1ce494be" 105 | } 106 | }, 107 | "outputs": { 108 | "0": { 109 | "location": "EAX", 110 | "size": "0x20", 111 | "value": "0x39c9297c" 112 | } 113 | } 114 | }, 115 | "6": { 116 | "inputs": { 117 | "0": { 118 | "location": "EDI", 119 | "size": "0x20", 120 | "value": "0x2190bc03" 121 | } 122 | }, 123 | "outputs": { 124 | "0": { 125 | "location": "EAX", 126 | "size": "0x20", 127 | "value": "0x43217806" 128 | } 129 | } 130 | }, 131 | "7": { 132 | "inputs": { 133 | "0": { 134 | "location": "EDI", 135 | "size": "0x20", 136 | "value": "0x9da642ca" 137 | } 138 | }, 139 | "outputs": { 140 | "0": { 141 | "location": "EAX", 142 | "size": "0x20", 143 | "value": "0x3b4c8594" 144 | } 145 | } 146 | }, 147 | "8": { 148 | "inputs": { 149 | "0": { 150 | "location": "EDI", 151 | "size": "0x20", 152 | "value": "0xe576516c" 153 | } 154 | }, 155 | "outputs": { 156 | "0": { 157 | "location": "EAX", 158 | "size": "0x20", 159 | "value": "0xcaeca2d8" 160 | } 161 | } 162 | }, 163 | "9": { 164 | "inputs": { 165 | "0": { 166 | "location": "EDI", 167 | "size": "0x20", 168 | "value": "0xf2c64e78" 169 | } 170 | }, 171 | "outputs": { 172 | "0": { 173 | "location": "EAX", 174 | "size": "0x20", 175 | "value": "0xe58c9cf0" 176 | } 177 | } 178 | } 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /src/lib/operators.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | type sizemod_t = 23 | | Extend of int 24 | | Sextend of int 25 | | Reduce of int 26 | 27 | type unop_t = 28 | | Minus 29 | | Not 30 | | Byteswap 31 | | Ehad (* >> 1 *) 32 | | Arba (* >> 4 *) 33 | | Shesh (* >> 16 *) 34 | | Smol (* << 1 *) 35 | 36 | type binop_t = 37 | | Add 38 | | Sub 39 | | Mul 40 | | And 41 | | Or 42 | | Xor 43 | | RShiftu 44 | | LShift 45 | | RShifts 46 | | RotateRight 47 | | Div_x86 48 | | SDiv_x86 49 | | Mod_x86 50 | | SMod_x86 51 | | RShiftu_x86 52 | | LShift_x86 53 | | RShifts_x86 54 | | UMin 55 | | SMin 56 | | UMax 57 | | SMax 58 | 59 | type triop_t = Div | SDiv | Mod | SMod | ITE | Im 60 | 61 | module SizeMod : sig 62 | type t := sizemod_t 63 | 64 | val compare : t -> t -> int 65 | 66 | val to_string : t -> string 67 | val apply : t -> Bitvector.t -> Bitvector.t 68 | val to_sygus : t -> string -> int -> string 69 | 70 | val eq : t -> t -> bool 71 | end 72 | 73 | module Unop : sig 74 | type t := unop_t 75 | 76 | val number : int 77 | val of_int : int -> t 78 | val compare : t -> t -> int 79 | 80 | val to_string : t -> string 81 | val of_string : string -> t option 82 | val apply : t -> Bitvector.t -> Bitvector.t 83 | val to_sygus : t -> string -> int -> string 84 | 85 | val eq : t -> t -> bool 86 | 87 | (* True if the operator is involutif (f f (x) = x), used for simplification 88 | Default value is safe but reduces simplification *) 89 | val is_involutif : t -> bool 90 | end 91 | 92 | module Binop : sig 93 | type t := binop_t 94 | 95 | val number : int 96 | val of_int : int -> t 97 | val compare : t -> t -> int 98 | 99 | val to_string : t -> string 100 | val of_string : string -> t option 101 | val apply : t -> Bitvector.t -> Bitvector.t -> Bitvector.t 102 | val to_sygus : t -> string -> int -> string -> int -> string 103 | 104 | val eq : t -> t -> bool 105 | 106 | (* True if the operator is commutatif (f(x,y) = f(y,x)), used for simplification 107 | Default value is safe but reduces simplifications *) 108 | val is_commut : t -> bool 109 | 110 | (* True if the operator is associatif (f(x,f(y,z)) = f(f(x,y),z)), used for simplification 111 | Default value is safe but reduces simplifications *) 112 | val is_assoc : t -> bool 113 | 114 | (* True if the operator is idempotent (f(x,x) = x), used for simplification 115 | Default value is safe but reduces simplifications *) 116 | val is_idempotent : t -> bool 117 | 118 | (* True if the operator is nilpotent (f(x,x) = 0), used for simplification 119 | Default value is safe but reduces simplifications *) 120 | val is_nilpotent : t -> bool 121 | 122 | (* Check if the bv is a right neutral element of the operator (f(x, bv) = x), used for simplification 123 | Default value is safe but reduces simplifications *) 124 | val neutral_element : t -> Bitvector.t -> bool 125 | 126 | (* Check if the bv is a right absorbing element of the operator (f(x, bv) = bv), used for simplification 127 | Default value is safe but reduces simplifications *) 128 | val absorbing_element : t -> Bitvector.t -> bool 129 | end 130 | 131 | module Triop : sig 132 | type t := triop_t 133 | 134 | val number : int 135 | val of_int : int -> t 136 | val compare : t -> t -> int 137 | 138 | val to_string : t -> string 139 | val of_string : string -> t option 140 | val apply : t -> Bitvector.t -> Bitvector.t -> Bitvector.t -> Bitvector.t 141 | val to_sygus : t -> string -> int -> string -> int -> string -> int -> string 142 | 143 | val eq : t -> t -> bool 144 | end 145 | 146 | module Mk_Unop (O : Oracle.ORACLE) : sig 147 | include module type of Unop 148 | 149 | val rand : unit -> unop_t 150 | val cardinal : unit -> int 151 | end 152 | 153 | module Mk_Binop (O : Oracle.ORACLE) : sig 154 | include module type of Binop 155 | 156 | val rand : unit -> binop_t 157 | val cardinal : unit -> int 158 | end 159 | 160 | module Mk_Triop (O : Oracle.ORACLE) : sig 161 | include module type of Triop 162 | 163 | val rand : unit -> triop_t 164 | val cardinal : unit -> int 165 | end 166 | -------------------------------------------------------------------------------- /src/lib/distance.mli: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | module type DIST = sig 23 | val name : string 24 | val dist : Bitvector.t -> Bitvector.t -> float 25 | val is_zero : float -> bool 26 | end 27 | 28 | module type VECDIST = sig 29 | type t 30 | val name : string 31 | val vecdist : Bitvector.t array -> Bitvector.t array -> float 32 | val is_zero : float -> bool 33 | val extract : t Tree.t -> Bitvector.t array -> Bitvector.t array -> t Tree.t 34 | end 35 | 36 | module Arith : DIST 37 | (** Arithmetic distance **) 38 | 39 | module Hamming : DIST 40 | (** Hamming distance **) 41 | 42 | module Xor : DIST 43 | (** Xor distance **) 44 | 45 | module Logarith : DIST 46 | (** Logarithmetic distance **) 47 | 48 | module Syntia : DIST 49 | (** Distance used by Syntia in the paper "Syntia: Synthesizing the Semantics of Obfuscated Code" by Blazytko et al. **) 50 | 51 | val dist_vec : float array -> float 52 | (** How do we aggregate the distance along a vector of input output *) 53 | 54 | val vectorizeDist : (float array -> float) -> (module DIST) -> (module Tree.TreeConstructor with type data = 'a) -> (module VECDIST with type t = 'a) 55 | (** Transform a bitvector distance into a (in,out) vector distance *) 56 | 57 | module GuideModCst (D : DIST) (O : Oracle.ORACLE) (M : Tree.TreeConstructor) : sig 58 | module NoGuide : VECDIST with type t = M.data 59 | (** Distance without guiding *) 60 | 61 | module Offset : VECDIST with type t = M.data 62 | (** Guiding distance for add/sub *) 63 | 64 | module MinOffset : VECDIST with type t = M.data 65 | (** Guiding distance for add/sub searching the minimum offset instead of the first one *) 66 | 67 | module Xor : VECDIST with type t = M.data 68 | (** Guiding distance for xor *) 69 | 70 | module MinXor : VECDIST with type t = M.data 71 | (** Guiding distance for xor searching minimal distance on all the output,expected pair *) 72 | 73 | module Mask : VECDIST with type t = M.data 74 | (** Guiding distance for combined 'and' and 'or'. 75 | The generated masks are the one that adds the most constraints to the output. 76 | *) 77 | 78 | module MaskOTF : VECDIST with type t = M.data 79 | (** Guiding distance for combined 'and' and 'or' which computes the mask on-the-fly. 80 | This produces masks that adds the least constraints to the output. 81 | *) 82 | 83 | module ShiftLeft : VECDIST with type t = M.data 84 | (** Guiding distance for logical shift left *) 85 | 86 | module MinShiftLeft : VECDIST with type t = M.data 87 | (** Guiding distance for logical shift left searching minimal distance on all the possible shifts *) 88 | 89 | module UShiftRight : VECDIST with type t = M.data 90 | (** Guiding distance for logical shift right *) 91 | 92 | module MinUShiftRight : VECDIST with type t = M.data 93 | (** Guiding distance for logical shift right searching minimal distance on all the possible shifts *) 94 | 95 | module Mul : VECDIST with type t = M.data 96 | (** Guiding distance for multiplication *) 97 | 98 | module MinMul : VECDIST with type t = M.data 99 | (** Guiding distance for multiplication searching the minimum scalar instead of the first one *) 100 | 101 | module Rotate : VECDIST with type t = M.data 102 | (** Guiding distance for rotate *) 103 | 104 | module Affine : VECDIST with type t = M.data 105 | (** Guiding distance for affine function *) 106 | 107 | module LinearComb () : VECDIST with type t = M.data 108 | (** Guiding distance for linear combination of variables and expression *) 109 | 110 | module Byteswap : VECDIST with type t = M.data 111 | (** Guiding distance for byteswap *) 112 | 113 | module Poly2 : VECDIST with type t = M.data 114 | (** Guiding distance for 2nd degree polynomial *) 115 | end 116 | 117 | (** Combines multiple vectorial distance into a single one *) 118 | module CombineVecDist (M : sig 119 | type t 120 | val combinator : float -> float -> float 121 | val dists : (module VECDIST with type t = t) list 122 | end) : VECDIST with type t = M.t 123 | 124 | val infrules_of_str : string -> (module DIST) -> (module Oracle.ORACLE) -> (module Tree.TreeConstructor with type data = 'a) -> (module VECDIST with type t = 'a) list 125 | -------------------------------------------------------------------------------- /sampler/modeleval.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | open Libterm.Sexpr 23 | 24 | (* Reimplementation of Libterm.Sexpr.eval to be able to read memory not only char by char but with a given size *) 25 | 26 | let read_bytes = 27 | let rec fill_bytes cache value addr size bits i succ = 28 | if size = 8 then ( 29 | let byte = Bv.to_char value in 30 | BiTbl.add cache addr byte; 31 | Bytes.set bits i byte) 32 | else 33 | let byte = Bv.to_char (Bv.extract value { hi = 7; lo = 0 }) in 34 | BiTbl.add cache addr byte; 35 | Bytes.set bits i byte; 36 | fill_bytes cache 37 | (Bv.extract value { hi = size - 1; lo = 8 }) 38 | (Z.add addr (Z.of_int succ)) 39 | (size - 8) bits (i + 1) succ 40 | in 41 | let succ (dir : Machine.endianness) = 42 | match dir with LittleEndian -> 1 | BigEndian -> -1 43 | in 44 | let fill_bytes f term addr len (dir : Machine.endianness) cache bits i = 45 | let addr = Bv.add_int addr (-len) in 46 | let i = match dir with LittleEndian -> i - len | BigEndian -> i + 1 in 47 | fill_bytes cache (f term addr len dir) (Bv.value_of addr) (len lsl 3) bits i 48 | (succ dir) 49 | in 50 | let rec read_bytes_hit f term addr len dir cache bits i = 51 | if len <> 0 then 52 | match BiTbl.find cache (Bv.value_of addr) with 53 | | byte -> 54 | Bytes.set bits i byte; 55 | read_bytes_hit f term (Bv.succ addr) (len - 1) dir cache bits 56 | (i + succ dir) 57 | | exception Not_found -> 58 | read_bytes_miss f term (Bv.succ addr) (len - 1) dir cache bits 59 | (i + succ dir) 60 | 1 61 | and read_bytes_miss f term addr len (dir : Machine.endianness) cache bits i 62 | len' = 63 | if len = 0 then fill_bytes f term addr len' dir cache bits i 64 | else 65 | match BiTbl.find cache (Bv.value_of addr) with 66 | | byte -> 67 | Bytes.set bits i byte; 68 | fill_bytes f term addr len' dir cache bits i; 69 | read_bytes_hit f term (Bv.succ addr) (len - 1) dir cache bits 70 | (i + succ dir) 71 | | exception Not_found -> 72 | read_bytes_miss f term (Bv.succ addr) (len - 1) dir cache bits 73 | (i + succ dir) 74 | (len' + 1) 75 | in 76 | fun f term addr len (dir : Machine.endianness) cache -> 77 | let bits = Bytes.create len in 78 | read_bytes_hit f term addr len dir cache bits 79 | (match dir with LittleEndian -> 0 | BigEndian -> len - 1); 80 | Bv.create (Z.of_bits (Bytes.unsafe_to_string bits)) (len lsl 3) 81 | 82 | let rec eval 83 | ?(symbols = 84 | fun e -> Bitvector.create (Z.of_int (Expr.hash e)) (Expr.sizeof e)) 85 | ?(memory = fun _ _ size _ -> Bv.zeros (size lsl 3)) 86 | ((vars, values, _, _, _) as m) = function 87 | | Expr.Cst bv -> bv 88 | | e -> ( 89 | try BvTbl.find values e 90 | with Not_found -> 91 | let value = 92 | match e with 93 | | Expr.Cst _ -> assert false 94 | | Expr.Var { name; _ } -> 95 | StTbl.add vars name e; 96 | symbols e 97 | | Expr.Load { addr; len; dir; label; _ } -> 98 | eval_load ~symbols ~memory m 99 | (eval ~symbols ~memory m addr) 100 | len dir label 101 | | Expr.Unary { f; x; _ } -> 102 | Term.Bv.unary f (eval ~symbols ~memory m x) 103 | | Expr.Binary { f; x; y; _ } -> 104 | Term.Bv.binary f 105 | (eval ~symbols ~memory m x) 106 | (eval ~symbols ~memory m y) 107 | | Expr.Ite { c; t; e; _ } -> 108 | if Bv.zero = eval ~symbols ~memory m c then 109 | eval ~symbols ~memory m e 110 | else eval ~symbols ~memory m t 111 | in 112 | BvTbl.add values e value; 113 | value) 114 | 115 | and eval_load ~symbols ~memory ((_, _, cache, arrays, _) as t) ptr len dir 116 | (memory_term : Memory.t) = 117 | match memory_term with 118 | | Root -> read_bytes memory memory_term ptr len dir cache 119 | | Symbol n -> 120 | read_bytes memory memory_term ptr len dir 121 | (try StTbl.find arrays n 122 | with Not_found -> 123 | let arr = BiTbl.create 16 in 124 | StTbl.add arrays n arr; 125 | arr) 126 | | Layer { addr; store; over; _ } -> 127 | let addr = eval ~symbols ~memory t addr in 128 | let size = Bv.size_of addr in 129 | let offset = Bv.sub ptr addr in 130 | let miss i s = 131 | Chunk.of_term 132 | (Expr.load s Expr.LittleEndian 133 | (Expr.constant (Bv.add addr (Bv.create i size))) 134 | over) 135 | in 136 | let bytes = Chunk.to_term (Store.select miss offset len store) in 137 | let bytes = 138 | match dir with LittleEndian -> bytes | BigEndian -> bswap bytes 139 | in 140 | eval ~symbols ~memory t bytes 141 | -------------------------------------------------------------------------------- /src/lib/bitvector.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | include Binsec.Bitvector 23 | 24 | let is_even x = not (get_bit x 0) 25 | 26 | let mod_inverse x = 27 | let f8bits x = 28 | let onee = of_int ~size:8 1 in 29 | let twoo = of_int ~size:8 2 in 30 | let three = of_int ~size:8 3 in 31 | let x0 = mul three x in 32 | let x0 = logxor x0 twoo in 33 | let y = mul x x0 in 34 | let y = sub onee y in 35 | 36 | let x1 = add onee y in 37 | let x1 = mul x0 x1 in 38 | if equal (mul x1 x) onee then x1 else assert false 39 | in 40 | 41 | let f16bits x = 42 | let onee = of_int ~size:16 1 in 43 | let twoo = of_int ~size:16 2 in 44 | let three = of_int ~size:16 3 in 45 | let x0 = mul three x in 46 | let x0 = logxor x0 twoo in 47 | let y = mul x x0 in 48 | let y = sub onee y in 49 | let x1 = add onee y in 50 | let x1 = mul x0 x1 in 51 | let y = mul y y in 52 | let x2 = add onee y in 53 | let x2 = mul x1 x2 in 54 | if equal (mul x2 x) onee then x2 else assert false 55 | in 56 | 57 | let f32bits x = 58 | let onee = of_int ~size:32 1 in 59 | let twoo = of_int ~size:32 2 in 60 | let three = of_int ~size:32 3 in 61 | let x0 = mul three x in 62 | let x0 = logxor x0 twoo in 63 | let y = mul x x0 in 64 | let y = sub onee y in 65 | let x1 = add onee y in 66 | let x1 = mul x0 x1 in 67 | let y = mul y y in 68 | let x2 = add onee y in 69 | let x2 = mul x1 x2 in 70 | let y = mul y y in 71 | let x3 = add onee y in 72 | let x3 = mul x2 x3 in 73 | 74 | if equal (mul x3 x) onee then x3 else assert false 75 | in 76 | let f64bits x = 77 | let onee = of_int ~size:64 1 in 78 | let twoo = of_int ~size:64 2 in 79 | let three = of_int ~size:64 3 in 80 | let x0 = mul three x in 81 | let x0 = logxor x0 twoo in 82 | let y = mul x x0 in 83 | let y = sub onee y in 84 | let x1 = add onee y in 85 | let x1 = mul x0 x1 in 86 | let y = mul y y in 87 | let x2 = add onee y in 88 | let x2 = mul x1 x2 in 89 | let y = mul y y in 90 | let x3 = add onee y in 91 | let x3 = mul x2 x3 in 92 | let y = mul y y in 93 | let x4 = add onee y in 94 | let x4 = mul x3 x4 in 95 | 96 | if equal (mul x4 x) onee then x4 else assert false 97 | in 98 | 99 | let f128bits x = 100 | let onee = of_int ~size:128 1 in 101 | let twoo = of_int ~size:128 2 in 102 | let three = of_int ~size:128 3 in 103 | let x0 = mul three x in 104 | let x0 = logxor x0 twoo in 105 | let y = mul x x0 in 106 | let y = sub onee y in 107 | let x1 = add onee y in 108 | let x1 = mul x0 x1 in 109 | let y = mul y y in 110 | let x2 = add onee y in 111 | let x2 = mul x1 x2 in 112 | let y = mul y y in 113 | let x3 = add onee y in 114 | let x3 = mul x2 x3 in 115 | let y = mul y y in 116 | let x4 = add onee y in 117 | let x4 = mul x3 x4 in 118 | let y = mul y y in 119 | let x5 = add onee y in 120 | let x5 = mul x4 x5 in 121 | 122 | if equal (mul x5 x) onee then x5 else assert false 123 | in 124 | 125 | match size_of x with 126 | | 8 -> f8bits x 127 | | 16 -> f16bits x 128 | | 32 -> f32bits x 129 | | 64 -> f64bits x 130 | | 128 -> f128bits x 131 | | _ -> failwith ("Size not supported - " ^ string_of_int (size_of x)) 132 | 133 | let byteswap (bv : t) : t = 134 | let n = size_of bv in 135 | let rec loop i x = 136 | if i = n then x 137 | else 138 | let y = extract bv { lo = i; hi = i + 8 - 1 } in 139 | match x with 140 | | None -> loop (i + 8) (Some y) 141 | | Some z -> loop (i + 8) (Some (concat [ z; y ])) 142 | in 143 | loop 0 None |> Option.get 144 | 145 | let fold f init bv = 146 | let len = size_of bv in 147 | let rec loop acc i = 148 | if i = len then acc else loop (f acc (get_bit bv i)) (i + 1) 149 | in 150 | loop init 0 151 | 152 | let to_float bv = Z.to_float (signed_of bv) 153 | 154 | let bit_count x = fold (fun acc b -> if b then acc + 1 else acc) 0 x 155 | 156 | (** Retrieve the number of set bit at the start of the Bv (in little-endian). 157 | For example : trailing_ones 0b00011 = 2 158 | trailing_ones 0b11010 = 0 159 | *) 160 | let trailing_ones x = 161 | let rec loop acc x = 162 | (* The loop is not infinite as any number shifted enough time is equal to zero 163 | and thus has its first bit equal to zero *) 164 | let bit = get_bit x 0 in 165 | if not bit then acc else loop (acc + 1) (shift_right x 1) 166 | in 167 | loop 0 x 168 | 169 | (** Retrieve the number of unset bit at the start of the Bv (in little-endian). 170 | For example : trailing_zeros 0b00011 = 0 171 | trailing_zeros 0b11010 = 1 172 | *) 173 | let trailing_zeros x = trailing_ones (lognot x) 174 | 175 | (** Retrieve the number of set bit at the end of the Bv (in little-endian). 176 | For example : leading_ones 0b00011 = 0 177 | leading_ones 0b11010 = 2 178 | *) 179 | let leading_ones x = 180 | let size = size_of x - 1 in 181 | let rec loop acc x = 182 | (* The loop is not infinite as any number shifted enough time is equal to zero 183 | and thus has its last bit equal to zero *) 184 | let bit = get_bit x size in 185 | if not bit then acc else loop (acc + 1) (shift_left x 1) 186 | in 187 | loop 0 x 188 | 189 | (** Retrieve the number of unset bit at the end of the Bv (in little-endian). 190 | For example : leading_zeros 0b00011 = 2 191 | leading_zeros 0b11010 = 0 192 | *) 193 | let leading_zeros x = leading_ones (lognot x) -------------------------------------------------------------------------------- /src/lib/sygus.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | open Format 23 | (* let template = "\ 24 | (set-logic BV) 25 | 26 | (synth-fun f ({args}) (_ BitVec {outsize}) 27 | ( 28 | (Start (_ BitVec {outsize})) 29 | {"(EightBytes (_ BitVec 64))" if outsize == 64 else ("(FourBytes (_ BitVec 32))" if outsize == 32 else ("(TwoBytes (_ BitVec 16))" if outsize == 16 else ("(Eightbits (_ BitVec 8))" if outsize == 8 else None)))} 30 | {"(EightBytes (_ BitVec 64))" if len(vars64) > 0 and outsize != 64 else ""} 31 | {"(FourBytes (_ BitVec 32))" if len(vars32) > 0 and outsize != 32 else ""} 32 | {"(TwoBytes (_ BitVec 16))" if len(vars16) > 0 and outsize != 16 else ""} 33 | {"(Eightbits (_ BitVec 8))" if len(vars8) > 0 and outsize != 8 else ""} 34 | ) ( 35 | (Start (_ BitVec {outsize}) ( 36 | {"EightBytes" if outsize == 64 else ("FourBytes" if outsize == 32 else ("TwoBytes" if outsize == 16 else ("Eightbits" if outsize == 8 else None)))} 37 | )) 38 | {get_glue(vars8, vars16, vars32, vars64, outsize)} 39 | {gram64 if len(vars64) > 0 else ""} 40 | {gram32 if len(vars32) > 0 else ""} 41 | {gram16 if len(vars16) > 0 else ""} 42 | {gram8 if len(vars8) > 0 else ""} 43 | )) 44 | 45 | {constrs} 46 | 47 | (check-synth)" *) 48 | 49 | 50 | (* 51 | (EightBytes (_ BitVec 64) (#x0000000000000001 {vars64} 52 | *) 53 | 54 | let nonterm_of_size = function 55 | | 8 -> "OneByte" 56 | | 16 -> "TwoBytes" 57 | | 32 -> "FourBytes" 58 | | 64 -> "EightBytes" 59 | | _ -> assert false 60 | 61 | module SyGusPrinter (O: Oracle.ORACLE) (MU : Mutations.MUTATIONS) = struct 62 | 63 | let get_sz_modifiers sz all_sizes = 64 | let res = List.fold_left (fun acc ext_sz -> 65 | if sz == ext_sz then acc 66 | else if sz > ext_sz then 67 | let zext = Format.sprintf "((_ zero_extend %d) %s)" (sz - ext_sz) (nonterm_of_size ext_sz) in 68 | let sext = Format.sprintf "((_ sign_extend %d) %s)" (sz - ext_sz) (nonterm_of_size ext_sz) in 69 | acc ^ zext ^ " " ^ sext ^ " " 70 | else 71 | let red = Format.sprintf "((_ extract %d 0) %s)" (sz-1) (nonterm_of_size ext_sz) in 72 | acc ^ red ^ " " 73 | ) "" all_sizes in 74 | res 75 | 76 | let get_rule sz all_sizes = 77 | let res = sprintf "(%s (_ BitVec %d) (" (nonterm_of_size sz) sz in 78 | let res = Array.fold_left (fun acc (op : Tree.op_t) -> 79 | acc ^ (MU.unop_sygus op (nonterm_of_size sz) sz) ^ " ") res (MU.unop_mut ()) in 80 | let res = Array.fold_left (fun acc (op : Tree.op_t) -> 81 | acc ^ (MU.binop_sygus op (nonterm_of_size sz) sz (nonterm_of_size sz) sz) ^ " ") res (MU.binop_mut ()) in 82 | let res = Array.fold_left (fun acc (op : Tree.op_t) -> 83 | acc ^ (MU.triop_sygus op (nonterm_of_size sz) sz (nonterm_of_size sz) sz (nonterm_of_size sz) sz) ^ " ") res (MU.triop_mut ()) in 84 | 85 | (* Extenstions *) 86 | let res = res ^ get_sz_modifiers sz all_sizes in 87 | res ^ "))\n" 88 | 89 | 90 | let print () = 91 | let nvars = O.nvars () in 92 | let ovar = O.out_var () in 93 | let osize = ovar.sz in 94 | let invars = O.vars () in 95 | assert (Array.length invars == nvars); 96 | 97 | (* Create (synth-fun ...)*) 98 | let synth_fun = (Array.fold_left (fun acc (var : Oracle.variable) -> 99 | let n = sprintf " (%s (_ BitVec %d)) " var.name var.sz in 100 | acc ^ n 101 | ) "(synth-fun f (" invars) ^ sprintf ") (_ BitVec %d)\n" osize in 102 | 103 | (* Declare non terminal symbols *) 104 | let decl = 105 | (sprintf "\t((Start (_ BitVec %d))\n" osize) 106 | (* ^ 107 | (sprintf "\t(%s (_ BitVec %d))\n" (nonterm_of_size osize) osize) *) 108 | in 109 | let input_sizes = Array.fold_left (fun acc (var : Oracle.variable) -> 110 | let sz = var.sz in 111 | if List.mem sz acc then acc 112 | else sz::acc 113 | ) [] invars in 114 | let decl = List.fold_left (fun acc sz -> 115 | (* if sz == osize then acc 116 | else *) 117 | acc ^ sprintf "\t(%s (_ BitVec %d))\n" (nonterm_of_size sz) sz 118 | ) decl input_sizes in 119 | let decl = 120 | if List.mem osize input_sizes then decl 121 | else decl ^ sprintf "\t(%s (_ BitVec %d))\n" (nonterm_of_size osize) osize 122 | in 123 | let decl = decl ^ ") (\n" in 124 | 125 | (* Define rules *) 126 | let rules = sprintf "\t(Start (_ BitVec %d) (%s))\n" osize (nonterm_of_size osize) in 127 | let rules = List.fold_left (fun acc sz -> 128 | acc ^ "\t" ^ (get_rule sz input_sizes)) rules input_sizes in 129 | 130 | (* add extensions *) 131 | let glue = if List.mem osize input_sizes then "" 132 | else 133 | sprintf "\t(%s (_ BitVec %d) (%s))\n" (nonterm_of_size osize) osize (get_sz_modifiers osize input_sizes) 134 | in 135 | 136 | let rules = rules ^ glue ^ "))\n" in 137 | 138 | (* Add samples *) 139 | let samples = Array.init (O.nsamples ()) (fun i -> 140 | let s = O.get_sample i in 141 | let inputs = O.sample_inputs s in 142 | let _ovar, obv = O.sample_output s in 143 | let constr = "(constraint (= (f" in 144 | let constr = Array.fold_left (fun acc (_, ibv) -> 145 | let v = (Utility.bv_to_smtlib ibv) in 146 | acc ^ " " ^ v) constr inputs in 147 | constr ^ sprintf ") %s))" (Utility.bv_to_smtlib obv) 148 | ) 149 | in 150 | let samples = (Array.fold_left (fun acc s -> acc ^ "\n" ^ s) "" samples) ^ "\n" in 151 | 152 | let checksynth = "(check-synth)" in 153 | "(set-logic BV)\n" ^ synth_fun ^ decl ^ rules ^ samples ^ checksynth 154 | end 155 | 156 | -------------------------------------------------------------------------------- /scripts/utils/all_from_trace.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ########################################################################## 3 | # This file is part of BINSEC. # 4 | # # 5 | # Copyright (C) 2019-2025 # 6 | # CEA (Commissariat à l'énergie atomique et aux énergies # 7 | # alternatives) # 8 | # # 9 | # you can redistribute it and/or modify it under the terms of the GNU # 10 | # Lesser General Public License as published by the Free Software # 11 | # Foundation, version 2.1. # 12 | # # 13 | # It is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU Lesser General Public License for more details. # 17 | # # 18 | # See the GNU Lesser General Public License version 2.1 # 19 | # for more details (enclosed in the file licenses/LGPLv2.1). # 20 | # # 21 | ########################################################################## 22 | 23 | set -e # force the script to exit if any command failed 24 | 25 | 26 | GREEN='\033[1;32m' 27 | RED='\033[0;31m' 28 | NC='\033[0m' 29 | 30 | PYTHON=python3 31 | 32 | bin="" 33 | args="" 34 | 35 | all=false 36 | sample=false 37 | synthesis=false 38 | gdb=false 39 | ghidra=false 40 | outdir="" 41 | ARCH="x86" 42 | 43 | manual="""\ 44 | usage: $0 --outdir [--all] [--gdb] [--sample] [--learn] -- binary arg1 arg2 ... 45 | 46 | ENVIRONEMENT VARIABLES: 47 | XYNTIA : the xyntia command to run 48 | GHIDRA : ghidra directory path 49 | 50 | arguments: 51 | -o / --outdir : path to store results 52 | -a / --all : extract traces/cfg, sample and synthesize (equivalent to -gh -s -l -c) 53 | -g / --gdb : extract traces using GDB 54 | -gh / --ghidra : extract blocks using ghidra (no need to give binary argument) 55 | -s / --sample : sample blocks of code 56 | -l / --learn : synthesize blocks of code 57 | -ar / --arch : the architecture to use among x86, amd64 (default x86) 58 | """ 59 | 60 | POSITIONAL=() 61 | while [[ $# -gt 0 ]];do 62 | key="$1" 63 | 64 | case $key in 65 | -a|--all) 66 | all=true 67 | shift # past argument 68 | ;; 69 | -g|--gdb) # Exrtact blocks using GDB 70 | gdb=true 71 | shift # past argument 72 | ;; 73 | -gh|--ghidra) # Extract blocks using Ghidra (default) 74 | ghidra=true 75 | shift # past argument 76 | ;; 77 | -s|--sample) 78 | sample=true 79 | shift # past argument 80 | ;; 81 | -l|--learn) 82 | synthesis=true 83 | shift # past argument 84 | ;; 85 | -o|--outdir) 86 | outdir="$2" 87 | shift # past argument 88 | shift # past value 89 | ;; 90 | -ar|--arch) 91 | ARCH="$2" 92 | shift # past argument 93 | shift # past value 94 | ;; 95 | -h|--help) 96 | echo -e "$manual" 97 | exit 0 98 | ;; 99 | --) 100 | bin="$2" 101 | shift # past argument 102 | shift # past value 103 | args=$@ 104 | break 105 | ;; 106 | *) # unknown option 107 | POSITIONAL+=("$1") # save it in an array for later 108 | shift # past argument 109 | ;; 110 | esac 111 | done 112 | set -- "${POSITIONAL[@]}" # restore positional parameters 113 | 114 | if [[ "$XYNTIA" = "" ]];then 115 | echo -e "${RED}[arg error] the XYNTIA variable must be set${NC}" 116 | exit 1 117 | fi 118 | 119 | if [[ "$outdir" = "" ]];then 120 | echo -e "${RED}[arg error] option -o/--outdir must be set${NC}" 121 | echo -e "$manual" 122 | exit 1 123 | fi 124 | 125 | if [[ "$bin" = "" ]];then 126 | echo -e "${RED}[arg error] no path to binary given${NC}" 127 | echo -e "$manual" 128 | exit 1 129 | fi 130 | 131 | if $all;then 132 | # If $all set to true, then set all other options to true 133 | ghidra=true 134 | sample=true 135 | synthesis=true 136 | fi 137 | 138 | cfgdir="$outdir/$(basename $bin)/cfgs" 139 | samplesdir="$outdir/$(basename $bin)/samples" 140 | resdir="$outdir/$(basename $bin)/synthesized" 141 | 142 | 143 | echo -e "$GREEN[CONFIG] Xyntia command: $XYNTIA$NC" 144 | echo -e "$GREEN[CONFIG] Binary to analyze: $bin $args$NC" 145 | 146 | if [ ! -d "$outdir/$(basename $bin)" ];then 147 | mkdir -p "$outdir/$(basename $bin)" 148 | fi 149 | 150 | if $gdb || $ghidra;then 151 | echo -e "$GREEN[CONFIG] Save CFGs to \"$cfgdir\"$NC" 152 | fi 153 | if $sample;then 154 | echo -e "$GREEN[CONFIG] Save samples to \"$samplesdir\"$NC" 155 | fi 156 | if $synthesis;then 157 | echo -e "$GREEN[CONFIG] Save synthesis results to \"$outdir/$(basename $bin)/synthesized\"$NC" 158 | fi 159 | 160 | if $gdb || $ghidra;then 161 | # Extract blocks of code from execution trace 162 | echo -e "$GREEN[INFO] Extrat CFG graph ($bin $args) to ${cfgdir}${NC}" 163 | mkdir $cfgdir 164 | if $gdb; then 165 | echo -e "$GREEN[INFO] Run GDB to extract CFG${NC}" 166 | gdb -batch-silent -ex "py args = \"$args\"; outdir = \"$cfgdir\"" -x scripts/utils/gdbscript.txt $bin 167 | else 168 | echo -e "$GREEN[INFO] Run Ghidra to extract CFG${NC}" 169 | tmpfile=/tmp/$(basename $bin)_$(cat /dev/urandom | tr -dc '[:alpha:]' | fold -w ${1:-20} | head -n 1).$(date +"%H-%M-%S-%N") 170 | mkdir $tmpfile 171 | ${GHIDRA}/support/analyzeHeadless $tmpfile empty -import $bin -postscript scripts/utils/ghidra/DumpBlocks.py $cfgdir 1> /dev/null 172 | rm -rf $tmpfile 173 | fi 174 | 175 | fi 176 | 177 | if $sample;then 178 | # Sample each block of code 179 | echo -e "$GREEN\n[INFO] Sample each block$NC" 180 | mkdir $samplesdir 181 | for block in $(ls $cfgdir/*.bin);do 182 | echo $block 183 | blockfile=$(basename $block) 184 | addr=${blockfile%.*} 185 | $PYTHON ./scripts/utils/sample.py --bin $block --arch $ARCH --out $samplesdir/$addr 186 | done 187 | fi 188 | 189 | if $synthesis;then 190 | # Synthesize each block of code 191 | echo -e "$GREEN\n[INFO] Synthesize each block$NC" 192 | mkdir $resdir 193 | for samples in $(ls $samplesdir);do 194 | echo $samplesdir/$samples 195 | mkdir $resdir/$samples 196 | for sample in $(ls $samplesdir/$samples/*.json);do 197 | bname=$(basename $sample) 198 | dirname=$(dirname $sample) 199 | $XYNTIA -json $sample -formula $dirname/formula > $resdir/$samples/$bname 200 | done 201 | done 202 | fi 203 | 204 | echo -e "$GREEN\n[INFO] Compute statistics$NC" 205 | $PYTHON ./scripts/utils/get_stats.py --resdir $resdir --sampdir $samplesdir 206 | -------------------------------------------------------------------------------- /test/heuristic_test.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | open OUnit2 23 | open Xyntia_utils 24 | 25 | let test_arith_dist _ = 26 | let module D = Distance.Arith in 27 | 28 | let x, y = Bitvector.of_int32 (Int32.of_int 5), Bitvector.of_int32 (Int32.of_int 2) in 29 | assert_equal 3.0 (D.dist x y); 30 | assert_equal 3.0 (D.dist y x) 31 | 32 | 33 | let test_hamm_dist _ = 34 | let module D = Distance.Hamming in 35 | 36 | let x, y = Bitvector.of_int32 (Int32.of_int 0xffffffff), Bitvector.of_int32 (Int32.of_int 0xffffffff) in 37 | assert_equal 0.0 (D.dist x y); 38 | 39 | let x, y = Bitvector.of_int32 (Int32.of_int 0xffffffff), Bitvector.of_int32 (Int32.of_int 0x7fffffff) in 40 | assert_equal 1.0 (D.dist x y); 41 | 42 | let x, y = Bitvector.of_int32 (Int32.of_int 1000), Bitvector.of_int32 (Int32.of_int 459) in 43 | assert_equal 4.0 (D.dist x y); 44 | assert_equal 4.0 (D.dist y x) 45 | 46 | let test_xor_dist _ = 47 | let module D = Distance.Xor in 48 | 49 | let x, y = Bitvector.of_int32 (Int32.of_int 5), Bitvector.of_int32 (Int32.of_int 2) in 50 | assert_equal 7.0 (D.dist x y); 51 | assert_equal 7.0 (D.dist y x) 52 | 53 | let test_logarith_dist _ = 54 | let module D = Distance.Logarith in 55 | 56 | let x, y = Bitvector.of_int32 (Int32.of_int 5), Bitvector.of_int32 (Int32.of_int 4) in 57 | assert_equal 1.0 (D.dist x y); 58 | assert_equal 1.0 (D.dist y x) 59 | 60 | let qcheck = [ 61 | QCheck.Test.make ~count:1000 ~name:"positive_arith_dist" 62 | QCheck.(pair small_int small_int) 63 | (fun (i1, i2) -> 64 | let module D = Distance.Arith in 65 | let bit1, bit2 = (Bitvector.of_int32 (Int32.of_int i1)), (Bitvector.of_int32 (Int32.of_int i2)) in 66 | (D.dist bit1 bit2) >= 0.0); 67 | 68 | QCheck.Test.make ~count:1000 ~name:"min_diff_arith_dist" 69 | QCheck.(pair small_int small_int) 70 | (fun (i1, i2) -> 71 | let module D = Distance.Arith in 72 | let bit1, bit2 = (Bitvector.of_int32 (Int32.of_int i1)), (Bitvector.of_int32 (Int32.of_int i2)) in 73 | if Bitvector.equal bit1 bit2 then 74 | (D.dist bit1 bit2) = 0.0 75 | else 76 | not (D.is_zero (D.dist bit1 bit2))); 77 | 78 | QCheck.Test.make ~count:1000 ~name:"positive_hamming_dist" 79 | QCheck.(pair small_int small_int) 80 | (fun (i1, i2) -> 81 | let module D = Distance.Hamming in 82 | let bit1, bit2 = (Bitvector.of_int32 (Int32.of_int i1)), (Bitvector.of_int32 (Int32.of_int i2)) in 83 | (D.dist bit1 bit2) >= 0.0); 84 | 85 | QCheck.Test.make ~count:1000 ~name:"min_diff_hamming_dist" 86 | QCheck.(pair small_int small_int) 87 | (fun (i1, i2) -> 88 | let module D = Distance.Hamming in 89 | let bit1, bit2 = (Bitvector.of_int32 (Int32.of_int i1)), (Bitvector.of_int32 (Int32.of_int i2)) in 90 | if Bitvector.equal bit1 bit2 then 91 | (D.dist bit1 bit2) = 0.0 92 | else 93 | not (D.is_zero (D.dist bit1 bit2))); 94 | 95 | QCheck.Test.make ~count:1000 ~name:"positive_xor_dist" 96 | QCheck.(pair small_int small_int) 97 | (fun (i1, i2) -> 98 | let module D = Distance.Xor in 99 | let bit1, bit2 = (Bitvector.of_int32 (Int32.of_int i1)), (Bitvector.of_int32 (Int32.of_int i2)) in 100 | (D.dist bit1 bit2) >= 0.0); 101 | 102 | QCheck.Test.make ~count:1000 ~name:"min_diff_xor_dist" 103 | QCheck.(pair small_int small_int) 104 | (fun (i1, i2) -> 105 | let module D = Distance.Xor in 106 | let bit1, bit2 = (Bitvector.of_int32 (Int32.of_int i1)), (Bitvector.of_int32 (Int32.of_int i2)) in 107 | if Bitvector.equal bit1 bit2 then 108 | (D.dist bit1 bit2) = 0.0 109 | else 110 | not (D.is_zero (D.dist bit1 bit2))); 111 | 112 | QCheck.Test.make ~count:1000 ~name:"positive_logarith_dist" 113 | QCheck.(pair small_int small_int) 114 | (fun (i1, i2) -> 115 | let module D = Distance.Logarith in 116 | let bit1, bit2 = (Bitvector.of_int32 (Int32.of_int i1)), (Bitvector.of_int32 (Int32.of_int i2)) in 117 | (D.dist bit1 bit2) >= 0.0); 118 | 119 | QCheck.Test.make ~count:1000 ~name:"min_diff_logarith_dist" 120 | QCheck.(pair small_int small_int) 121 | (fun (i1, i2) -> 122 | let module D = Distance.Logarith in 123 | let bit1, bit2 = (Bitvector.of_int32 (Int32.of_int i1)), (Bitvector.of_int32 (Int32.of_int i2)) in 124 | if Bitvector.equal bit1 bit2 then 125 | (D.dist bit1 bit2) = 0.0 126 | else 127 | not (D.is_zero (D.dist bit1 bit2))); 128 | 129 | QCheck.Test.make ~count:1000 ~name:"positive_syntia_dist" 130 | QCheck.(pair small_int small_int) 131 | (fun (i1, i2) -> 132 | let module D = Distance.Syntia in 133 | let bit1, bit2 = (Bitvector.of_int32 (Int32.of_int i1)), (Bitvector.of_int32 (Int32.of_int i2)) in 134 | (D.dist bit1 bit2) >= 0.0); 135 | 136 | QCheck.Test.make ~count:1000 ~name:"min_diff_syntia_dist" 137 | QCheck.(pair small_int small_int) 138 | (fun (i1, i2) -> 139 | let module D = Distance.Syntia in 140 | let bit1, bit2 = (Bitvector.of_int32 (Int32.of_int i1)), (Bitvector.of_int32 (Int32.of_int i2)) in 141 | if Bitvector.equal bit1 bit2 then 142 | (D.dist bit1 bit2) = 0.0 143 | else 144 | not (D.is_zero (D.dist bit1 bit2))); 145 | ] 146 | 147 | let suite = 148 | let ounit_suite = [ 149 | "test_arith_dist">:: test_arith_dist; 150 | "test_hamming_dist">:: test_hamm_dist; 151 | "test_xor_dist">:: test_xor_dist; 152 | "test_logarith_dist">:: test_logarith_dist; 153 | ] in 154 | let qcheck_suite = List.map QCheck_ounit.to_ounit2_test qcheck in 155 | 156 | "suite">:::(List.append ounit_suite qcheck_suite) 157 | 158 | 159 | let () = 160 | run_test_tt_main suite 161 | -------------------------------------------------------------------------------- /datasets/complex_handlers/bp3: -------------------------------------------------------------------------------- 1 | # Add 2 | (v0 + v1) + (- (-(v0 ^ ((v2 * (- (((~ v3) * (~ (v3 - ((~ (((v5 * (((- (- ((- (v3 * v4)) - ((- ((((((~ ((((- ((((v4 | v3) - v3) & v0) | v0)) ^ (v1 - (- (((((((((~ ((v5 - v3) | (v1 ^ (v0 * (v5 ^ v1))))) | v0) & (v4 | ((((v1 ^ (- (v1 | (((((~ (((((~ (- v2)) | v3) | ((((v2 - v1) * v4) | v3) + v3)) + v4) | v5)) - v2) - v3) ^ v3) & v3)))) | v1) & v3) * v0))) & v2) ^ v1) | v0) - v3) ^ v3) - v1)))) + v2) ^ v5)) * v3) ^ v0) + v5) * v5) ^ v3)) & v3)))) + v2) | v1)) + v5) | v0)) | v2)))) + v1))) + v1)))) 3 | 4 | (-(v0 ^ ((v2 * (- (((~ v3) * (~ (v3 - ((~ (((v5 * (((- (- ((- (v3 * v4)) - ((- ((((((~ ((((- ((((v4 | v3) - v3) & v0) | v0)) ^ (v1 - (- (((((((((~ ((v5 - v3) | (v1 ^ (v0 * (v5 ^ v1))))) | v0) & (v4 | ((((v1 ^ (- (v1 | (((((~ (((((~ (- v2)) | v3) | ((((v2 - v1) * v4) | v3) + v3)) + v4) | v5)) - v2) - v3) ^ v3) & v3)))) | v1) & v3) * v0))) & v2) ^ v1) | v0) - v3) ^ v3) - v1)))) + v2) ^ v5)) * v3) ^ v0) + v5) * v5) ^ v3)) & v3)))) + v2) | v1)) + v5) | v0)) | v2)))) + v1))) + v1))) + (- ((v1 ^ v1) + (v3 + (((((((((v3 & v3) | ((((v2 + v5) + v1) - (((((v1 * (v2 - (v0 - ((v3 ^ (((((v4 | v2) * v5) + v4) * (~ ((v1 | (- ((- v3) & v1))) ^ (- v4)))) - ((v0 + (v5 - v5)) * v2))) ^ (((v0 - v5) * ((v5 & ((v2 ^ v3) & v0)) + (- v0))) ^ ((~ ((((v4 - v4) - (((- ((v0 ^ v0) - ((((v0 ^ (v4 | v2)) | v3) & v3) - v0))) - v3) - v2)) * v4) + v5)) ^ v5)))))) * v3) - v1) - v3) + v4)) | v0)) & v3) ^ v5) & v5) | v4) | v4) - v0) ^ v3)))) 5 | 6 | ((v1 ^ v1) + (v3 + (((((((((v3 & v3) | ((((v2 + v5) + v1) - (((((v1 * (v2 - (v0 - ((v3 ^ (((((v4 | v2) * v5) + v4) * (~ ((v1 | (- ((- v3) & v1))) ^ (- v4)))) - ((v0 + (v5 - v5)) * v2))) ^ (((v0 - v5) * ((v5 & ((v2 ^ v3) & v0)) + (- v0))) ^ ((~ ((((v4 - v4) - (((- ((v0 ^ v0) - ((((v0 ^ (v4 | v2)) | v3) & v3) - v0))) - v3) - v2)) * v4) + v5)) ^ v5)))))) * v3) - v1) - v3) + v4)) | v0)) & v3) ^ v5) & v5) | v4) | v4) - v0) ^ v3))) 7 | 8 | 9 | # Sub 10 | (v0 - v1) + (- (~ (((- (~ (~ (~ ((~ (((v2 + v4) ^ ((((v3 + (v3 + ((v4 ^ (v2 ^ v5)) | (v1 * (- ((v2 | (((v3 - ((~ (- ((- v1) - ((v4 * ((~ (v4 ^ (~ v0))) | v2)) ^ (v3 ^ (((- v1) * v0) ^ v2)))))) | v2)) + (((- v5) & (v3 | (((v0 & (v0 | v5)) ^ (v0 ^ v0)) | (((~ v5) ^ (~ v0)) ^ ((~ v5) + v0))))) & (~ ((v3 - v5) * v1)))) & (v0 * v4))) & v3)))))) - v4) + v5) - v5)) | v2)) | v2))))) * v3) | v4))) 11 | 12 | (~ (((- (~ (~ (~ ((~ (((v2 + v4) ^ ((((v3 + (v3 + ((v4 ^ (v2 ^ v5)) | (v1 * (- ((v2 | (((v3 - ((~ (- ((- v1) - ((v4 * ((~ (v4 ^ (~ v0))) | v2)) ^ (v3 ^ (((- v1) * v0) ^ v2)))))) | v2)) + (((- v5) & (v3 | (((v0 & (v0 | v5)) ^ (v0 ^ v0)) | (((~ v5) ^ (~ v0)) ^ ((~ v5) + v0))))) & (~ ((v3 - v5) * v1)))) & (v0 * v4))) & v3)))))) - v4) + v5) - v5)) | v2)) | v2))))) * v3) | v4)) + (- (((- ((((((v0 * v0) ^ ((~ v5) ^ (v1 ^ v0))) + ((- ((- ((~ (v3 | (((~ v1) & ((~ ((v1 - v3) * (v3 + v5))) * v3)) ^ v2))) ^ (- v1))) * (v2 | (((v4 | (- ((((((~ ((~ (((~ ((v2 * v3) - (- (((v5 + v3) | v3) | v1)))) & (v4 | v4)) - v3)) & ((v3 ^ (((((v5 + ((v4 ^ v1) | v2)) | v3) - v5) - (v4 * v3)) ^ ((- ((- (v1 + v4)) ^ v3)) ^ v4))) | v1))) ^ v3) + v2) * v1) * v0) + v4))) + v0) & v3)))) & v2)) & v1) ^ v2) + v5)) & v3) | v1)) 13 | 14 | (((- ((((((v0 * v0) ^ ((~ v5) ^ (v1 ^ v0))) + ((- ((- ((~ (v3 | (((~ v1) & ((~ ((v1 - v3) * (v3 + v5))) * v3)) ^ v2))) ^ (- v1))) * (v2 | (((v4 | (- ((((((~ ((~ (((~ ((v2 * v3) - (- (((v5 + v3) | v3) | v1)))) & (v4 | v4)) - v3)) & ((v3 ^ (((((v5 + ((v4 ^ v1) | v2)) | v3) - v5) - (v4 * v3)) ^ ((- ((- (v1 + v4)) ^ v3)) ^ v4))) | v1))) ^ v3) + v2) * v1) * v0) + v4))) + v0) & v3)))) & v2)) & v1) ^ v2) + v5)) & v3) | v1) 15 | 16 | 17 | # Mul 18 | (v0 * v1) ^ (~ (((((~ (v0 * ((- v2) * v2))) | (((v3 ^ (v1 & v2)) & v2) + ((v2 ^ v3) + (((- (v1 * (((v0 ^ v3) - v3) * (~ ((((v1 ^ v4) + (v1 | v3)) | v5) * (v3 | (((((v5 & (- v2)) + (v2 * (v5 - (v3 & v1)))) & v0) - (~ v5)) ^ v1))))))) ^ (((- v2) + (~ (v0 + v1))) * (- (- (v4 | (((v0 - (~ v2)) - v4) - (~ (v2 + (~ ((~ (((~ (- (v4 + v1))) - v0) | v5)) ^ v2)))))))))) & v4)))) * v3) * v1) | v1)) 19 | 20 | (~ (((((~ (v0 * ((- v2) * v2))) | (((v3 ^ (v1 & v2)) & v2) + ((v2 ^ v3) + (((- (v1 * (((v0 ^ v3) - v3) * (~ ((((v1 ^ v4) + (v1 | v3)) | v5) * (v3 | (((((v5 & (- v2)) + (v2 * (v5 - (v3 & v1)))) & v0) - (~ v5)) ^ v1))))))) ^ (((- v2) + (~ (v0 + v1))) * (- (- (v4 | (((v0 - (~ v2)) - v4) - (~ (v2 + (~ ((~ (((~ (- (v4 + v1))) - v0) | v5)) ^ v2)))))))))) & v4)))) * v3) * v1) | v1)) ^ (((((~ (((~ (- (v5 + v5))) | (- v5)) * v5)) + (v2 - (v0 * (v1 & ((~ ((v3 * (((v3 ^ v3) - ((((- (((~ v1) ^ v2) & ((v2 - (((((~ ((- v0) + v3)) | ((v1 * v5) ^ (((~ (~ (v1 & (v5 * v4)))) + v0) | v3))) - v4) - v2) ^ ((v4 + v0) ^ v4))) * v1))) + (~ (((((v3 | (v3 & v4)) * v1) | ((v1 | (v1 & v2)) | (v0 * v2))) - (~ v0)) + v2))) + v3) & (v0 | v1))) | v3)) - v4)) & v4))))) ^ v0) + v4) & v0) 21 | 22 | 23 | (((((~ (((~ (- (v5 + v5))) | (- v5)) * v5)) + (v2 - (v0 * (v1 & ((~ ((v3 * (((v3 ^ v3) - ((((- (((~ v1) ^ v2) & ((v2 - (((((~ ((- v0) + v3)) | ((v1 * v5) ^ (((~ (~ (v1 & (v5 * v4)))) + v0) | v3))) - v4) - v2) ^ ((v4 + v0) ^ v4))) * v1))) + (~ (((((v3 | (v3 & v4)) * v1) | ((v1 | (v1 & v2)) | (v0 * v2))) - (~ v0)) + v2))) + v3) & (v0 | v1))) | v3)) - v4)) & v4))))) ^ v0) + v4) & v0) 24 | 25 | 26 | # And 27 | (v0 & v1) ^ (v2 & ((v3 + (v2 & (v3 | (- (((v2 & v0) + v1) ^ (((v3 & (v0 & v4)) * (((- (v3 | (v5 ^ ((v3 | v5) | (((v1 * ((v5 - v1) | v3)) | v4) - (v0 - (- (((v0 ^ ((((((~ v2) | v0) - ((- ((~ v3) & (((v5 - ((((v3 & v2) | ((((- v0) + (v5 | v5)) | v3) | v2)) ^ (v0 | v5)) ^ v2)) + ((((v4 ^ ((((- ((~ (v5 * v4)) | v3)) + v5) ^ v4) ^ v0)) & v0) | v1) | v1)) & v2))) * v5)) & v2) * v2) ^ v2)) - v1) - v4)))))))) * v5) & v3)) ^ v0)))))) ^ v1)) 28 | 29 | (v2 & ((v3 + (v2 & (v3 | (- (((v2 & v0) + v1) ^ (((v3 & (v0 & v4)) * (((- (v3 | (v5 ^ ((v3 | v5) | (((v1 * ((v5 - v1) | v3)) | v4) - (v0 - (- (((v0 ^ ((((((~ v2) | v0) - ((- ((~ v3) & (((v5 - ((((v3 & v2) | ((((- v0) + (v5 | v5)) | v3) | v2)) ^ (v0 | v5)) ^ v2)) + ((((v4 ^ ((((- ((~ (v5 * v4)) | v3)) + v5) ^ v4) ^ v0)) & v0) | v1) | v1)) & v2))) * v5)) & v2) * v2) ^ v2)) - v1) - v4)))))))) * v5) & v3)) ^ v0)))))) ^ v1)) ^ (- ((v1 & ((((((v1 * (v0 + (- (v2 | ((v0 - (v2 & (((v1 ^ (~ v4)) - v0) ^ v1))) | (v0 - (v4 | v4))))))) ^ v2) ^ v3) & (~ v5)) - v4) ^ ((v4 | ((((((v0 | (v2 * ((v5 & v4) + v1))) ^ (- v4)) | (~ v4)) + (- (v5 & (v2 ^ (v4 + v1))))) & ((v0 | ((~ v2) + v1)) + v4)) * (((~ v5) | (((((v1 & (v1 | v1)) | ((~ v5) - (v4 * v4))) | (v2 ^ (v4 & (~ v0)))) - v5) ^ v3)) ^ v1))) + v0))) * v3)) 30 | 31 | (- ((v1 & ((((((v1 * (v0 + (- (v2 | ((v0 - (v2 & (((v1 ^ (~ v4)) - v0) ^ v1))) | (v0 - (v4 | v4))))))) ^ v2) ^ v3) & (~ v5)) - v4) ^ ((v4 | ((((((v0 | (v2 * ((v5 & v4) + v1))) ^ (- v4)) | (~ v4)) + (- (v5 & (v2 ^ (v4 + v1))))) & ((v0 | ((~ v2) + v1)) + v4)) * (((~ v5) | (((((v1 & (v1 | v1)) | ((~ v5) - (v4 * v4))) | (v2 ^ (v4 & (~ v0)))) - v5) ^ v3)) ^ v1))) + v0))) * v3)) 32 | 33 | 34 | # Or 35 | (v0 | v1) + ( -((((((v4 & v1) + v3) | (((v4 ^ v1) ^ (- (((~ v1) ^ (~ v2)) - v4))) & (v4 + (((v1 | (~ ((v3 ^ v4) ^ (v1 & ((((v3 ^ ((((v3 + v1) * v4) + ((- (v1 ^ (((((v0 | v3) + v0) & ((~ v0) | (v5 ^ v3))) & (((- (~ (- ((~ (v0 + v5)) & (~ v5))))) + (v2 * v2)) | (- (- v3)))) ^ (v3 ^ v1)))) + (v4 ^ ((((v4 + (((- (- v1)) - v3) & (v5 & v1))) ^ v5) - v5) + v1)))) * v1)) - v4) ^ v1) | v3))))) * v4) ^ v1)))) - v3) - v3) ^ v0)) 36 | 37 | ((((((v4 & v1) + v3) | (((v4 ^ v1) ^ (- (((~ v1) ^ (~ v2)) - v4))) & (v4 + (((v1 | (~ ((v3 ^ v4) ^ (v1 & ((((v3 ^ ((((v3 + v1) * v4) + ((- (v1 ^ (((((v0 | v3) + v0) & ((~ v0) | (v5 ^ v3))) & (((- (~ (- ((~ (v0 + v5)) & (~ v5))))) + (v2 * v2)) | (- (- v3)))) ^ (v3 ^ v1)))) + (v4 ^ ((((v4 + (((- (- v1)) - v3) & (v5 & v1))) ^ v5) - v5) + v1)))) * v1)) - v4) ^ v1) | v3))))) * v4) ^ v1)))) - v3) - v3) ^ v0) + (- ((((- (((v4 ^ (((v5 * v1) ^ (- v5)) ^ (v5 | v3))) - (v0 ^ (~ v3))) * v3)) - (~ (v0 & v1))) ^ (((v4 ^ (((v4 | ((~ (v2 ^ v1)) & (~ (- (((~ ((- v2) ^ v5)) * v4) + v0))))) * v2) | ((((v3 & (((((v0 - ((((((v5 ^ v2) ^ ((v0 | v0) - v2)) - v0) | (~ (v3 + (((- v1) + v4) | v5)))) ^ v2) & (v4 - (((v4 & (v0 - v0)) - (v0 | v3)) * (~ v1))))) * v0) + v4) | v3) & v1)) * v4) ^ v3) - v0))) ^ v5) ^ v0)) & v3)) 38 | 39 | ((((- (((v4 ^ (((v5 * v1) ^ (- v5)) ^ (v5 | v3))) - (v0 ^ (~ v3))) * v3)) - (~ (v0 & v1))) ^ (((v4 ^ (((v4 | ((~ (v2 ^ v1)) & (~ (- (((~ ((- v2) ^ v5)) * v4) + v0))))) * v2) | ((((v3 & (((((v0 - ((((((v5 ^ v2) ^ ((v0 | v0) - v2)) - v0) | (~ (v3 + (((- v1) + v4) | v5)))) ^ v2) & (v4 - (((v4 & (v0 - v0)) - (v0 | v3)) * (~ v1))))) * v0) + v4) | v3) & v1)) * v4) ^ v3) - v0))) ^ v5) ^ v0)) & v3) 40 | -------------------------------------------------------------------------------- /scripts/utils/traceDisassembler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | ########################################################################## 3 | # This file is part of BINSEC. # 4 | # # 5 | # Copyright (C) 2019-2025 # 6 | # CEA (Commissariat à l'énergie atomique et aux énergies # 7 | # alternatives) # 8 | # # 9 | # you can redistribute it and/or modify it under the terms of the GNU # 10 | # Lesser General Public License as published by the Free Software # 11 | # Foundation, version 2.1. # 12 | # # 13 | # It is distributed in the hope that it will be useful, # 14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 16 | # GNU Lesser General Public License for more details. # 17 | # # 18 | # See the GNU Lesser General Public License version 2.1 # 19 | # for more details (enclosed in the file licenses/LGPLv2.1). # 20 | # # 21 | ########################################################################## 22 | 23 | import argparse 24 | import gdb 25 | import re 26 | import os 27 | import sys 28 | 29 | if (sys.version_info > (3, 0)): 30 | from pathlib import Path 31 | 32 | def toint(string): 33 | if (sys.version_info > (3, 0)): 34 | return int(string) 35 | else: 36 | return long(string) 37 | 38 | def tobytes(integer): 39 | assert integer < 256 and integer >= 0, "Not a byte: {}".format(integer) 40 | if (sys.version_info > (3, 0)): 41 | hexstring = "%02x"%(integer) 42 | return bytes.fromhex(hexstring) 43 | else: 44 | assert False, "TODO" 45 | 46 | 47 | def instn_length(addr_expr): 48 | t = gdb.execute('x/2i ' + addr_expr, to_string=True) 49 | return toint(gdb.parse_and_eval('$_')) - toint(gdb.parse_and_eval(addr_expr)) 50 | 51 | def isJumpInstr(addr_expr): 52 | t = gdb.execute('x/i ' + addr_expr, to_string=True) 53 | t = t.split(":")[1].strip() 54 | if "j" in t: 55 | return True 56 | else: 57 | return False 58 | 59 | 60 | def parse_opcodes(string): 61 | """ 62 | Parse bytes values. Format: 63 | addr1: byte1 byte2 ... 64 | addr2: byteX ... 65 | """ 66 | 67 | hexvalues = [] 68 | lines = [ line.strip() for line in string.split("\n") if line.strip() != "" ] 69 | 70 | for line in lines: 71 | for val in line.split(":")[1].strip().split(): 72 | assert val.startswith("0x"), "WTF {}".format(val) 73 | hexvalues.append(int(val, 16)) 74 | assert hexvalues[-1] < 256 and hexvalues[-1] >= 0, "not a byte in disassembly" 75 | return hexvalues 76 | 77 | 78 | 79 | class TraceDisassembler(gdb.Command): 80 | def __init__(self, saveDir, args): 81 | super(TraceDisassembler, self).__init__("trace_disassembler", gdb.COMMAND_USER) 82 | self.dir = saveDir 83 | 84 | # Check if the main symbol exists 85 | info = gdb.execute("p main", True, True) 86 | assert "0x" in info, "No symbol for the main function" 87 | 88 | info = gdb.execute("b *main", True, True) # break at the real start of main (!= b main) 89 | baddr = [ int(i, 16) for i in re.findall(r"0x[a-f0-9]+", info) ] 90 | assert len(baddr) == 1 91 | baddr = baddr[0] 92 | 93 | gdb.execute("r {}".format(args), True, True) # run with arguments 94 | info = gdb.execute("p $pc", True, True) # run with arguments 95 | gdb_baddr = [ int(i, 16) for i in re.findall(r"0x[a-f0-9]+", info) ] 96 | assert len(gdb_baddr) == 1 97 | gdb_baddr = gdb_baddr[0] 98 | 99 | self.base_addr = gdb_baddr - baddr 100 | 101 | self.binname = self._getbinname() 102 | self.end = self._get_end() 103 | self.lower, self.upper = self._getbinarymapping() 104 | 105 | def _getbinname(self): 106 | exe = gdb.execute("info proc exe", True, True).split("\n")[1].split("=")[1].strip() 107 | exe = exe[1:-1] # 'lala' -> lala 108 | return exe 109 | 110 | def _getbinarymapping(self): 111 | """ 112 | Returns address intervals where is loaded the binary (other addresses are thus libraries that we don't monitor 113 | """ 114 | lowers, uppers = [], [] 115 | mappings = gdb.execute("info proc mappings", True, True) 116 | lines = mappings.split("\n") 117 | for line in lines: 118 | if self.binname in line: 119 | fields = [ field.strip() for field in line.split() if field.strip() != "" ] 120 | lowers.append(int(fields[0], 16)) 121 | uppers.append(int(fields[1], 16)) 122 | interval = (min(lowers), max(uppers)) 123 | return interval 124 | 125 | def _get_end(self): 126 | """ 127 | Returns first and last address of main function 128 | """ 129 | 130 | # Check if main function symbol 131 | info = gdb.execute("p main", True, True) 132 | assert "0x" in info, "No symbol for the main function" 133 | 134 | rets = [ line.strip() for line in gdb.execute("disas main", True, True).split("\n") if "0x" in line and "ret" in line ] 135 | assert len(rets) == 1, "Multiple returns ?!" 136 | end = int(rets[0].split()[0], 16) 137 | print("end = ", end) 138 | return end 139 | 140 | def invoke(self, args, from_tty): 141 | blockopcodes = [] 142 | nextrip = None 143 | blockaddr = None 144 | addr = None 145 | old_addr = None 146 | instrsize = None 147 | ninstr = 0 148 | 149 | while addr == None or addr != self.end: 150 | old_addr = addr 151 | addr = toint(gdb.selected_frame().read_register('pc')) 152 | 153 | if blockaddr == None: 154 | blockaddr = addr 155 | 156 | if (nextrip != None and nextrip != addr) or (isJumpInstr(str(old_addr)) if old_addr != None else False): 157 | assert(instrsize != None) 158 | # Save block 159 | if not self.filter_blocks(blockaddr, blockopcodes[:-instrsize], ninstr): 160 | self._saveOpcodes(blockaddr, blockopcodes[:-instrsize]) 161 | 162 | blockaddr = addr 163 | blockopcodes = [] 164 | ninstr = 0 165 | 166 | instrsize = instn_length(str(addr)) 167 | nextrip = addr + instrsize 168 | 169 | opcodes = parse_opcodes(gdb.execute("x/{}b {}".format(instrsize, addr), True, True)) 170 | 171 | blockopcodes += opcodes 172 | ninstr += 1 173 | 174 | _ = gdb.execute("stepi", True, True) 175 | 176 | assert(instrsize != None) 177 | self._saveOpcodes(blockaddr, blockopcodes[:-instrsize]) # save last opcodes 178 | 179 | def filter_blocks(self, blockaddr, opcodes, ninstr): 180 | """ 181 | Filter blocks we don't want ot synthesize (e.g. because there are to little)" 182 | """ 183 | if ninstr <= 2: # 2 because the last one is removed by default 184 | return True 185 | 186 | elif len(opcodes) <= 1: 187 | return True 188 | 189 | elif blockaddr < self.lower or blockaddr > self.upper: 190 | return True 191 | 192 | else: 193 | return False 194 | 195 | def _saveOpcodes(self, blockaddr, opcodes): 196 | blockfile = "{}/{}.bin".format(self.dir, hex(blockaddr - self.base_addr)) 197 | rawops = b"".join([ tobytes(op) for op in opcodes ]) 198 | 199 | with open(blockfile, "wb") as f: 200 | f.write(rawops) 201 | 202 | TraceDisassembler(outdir, args) # outdir, start and end are defined in ./all_from_trace.sh when calling gdb 203 | -------------------------------------------------------------------------------- /test/simplify_test.ml: -------------------------------------------------------------------------------- 1 | [@@@warning "-32"] (* Ignore the warning of unused function for the TS module *) 2 | open Xyntia_utils 3 | open OUnit2 4 | 5 | module T = Tree.UnitTreeConstructor 6 | module S = Simplifier.TreeSimplifier (T) 7 | 8 | module TS = struct 9 | include T 10 | include Simplifier.MkShortcut (struct 11 | type t = unit Tree.t 12 | include T 13 | end) 14 | end 15 | 16 | let suite = 17 | let (module O) = Oracle.dummyOracle in 18 | let module S = S (O) in 19 | 20 | (* Setup variable and constants *) 21 | let vars = Array.map TS.mk_var (O.vars ()) in 22 | let x0 = vars.(0) in 23 | let x1 = vars.(1) in 24 | let x2 = vars.(2) in 25 | let x3 = vars.(3) in 26 | let x4 = vars.(4) in 27 | let zero = TS.mk_const_of_bv (Bitvector.zeros 32) in 28 | let one = TS.mk_const_of_bv (Bitvector.ones 32) in 29 | let minusone = TS.mk_const_of_bv (Bitvector.max_ubv 32) in 30 | let minustwo = TS.mk_const_of_bv (Bitvector.of_int ~size:32 (-2)) in 31 | let two = TS.mk_const_of_bv (Bitvector.of_int ~size:32 2) in 32 | let four = TS.mk_const_of_bv (Bitvector.of_int ~size:32 4) in 33 | let six = TS.mk_const_of_bv (Bitvector.of_int ~size:32 6) in 34 | let sminbv = TS.mk_const_of_bv (Bitvector.min_sbv 32) in 35 | let smaxbv = TS.mk_const_of_bv (Bitvector.max_sbv 32) in 36 | 37 | let simp_test tested expected _ctx = 38 | (* Test that it works *) 39 | assert_equal 40 | ~cmp:Tree.tree_equal 41 | ~printer:(Tree.to_string (fun () -> "")) 42 | expected (S.simplify tested) 43 | in 44 | let (-->) = simp_test in 45 | 46 | let ounit_suite = [ 47 | (* Involutions *) 48 | "involution_neg">:: TS.(~. (~. x0)) --> x0; 49 | "involution_not">:: TS.(- (- x0)) --> x0; 50 | 51 | (* Nilpotence *) 52 | "nilpotent_xor">:: TS.(x0 ^ x0) --> zero; 53 | "nilpotent_sub">:: TS.(x0 - x0) --> zero; 54 | 55 | (* Idempotence *) 56 | "idempotence_and">:: TS.(x0 && x0) --> x0; 57 | "idempotence_or">:: TS.(x0 || x0) --> x0; 58 | "idempotence_umax">:: TS.(umax x0 x0) --> x0; 59 | "idempotence_umin">:: TS.(umin x0 x0) --> x0; 60 | "idempotence_smax">:: TS.(smax x0 x0) --> x0; 61 | "idempotence_smin">:: TS.(smin x0 x0) --> x0; 62 | 63 | (* Neutral elements *) 64 | "neutral_add_right">:: TS.(x0 + zero) --> x0; 65 | "neutral_add_left">:: TS.(zero + x0) --> x0; 66 | 67 | "neutral_sub">:: TS.(x0 - zero) --> x0; 68 | 69 | "neutral_mul_right">:: TS.(x0 * one) --> x0; 70 | "neutral_mul_left">:: TS.(one * x0) --> x0; 71 | 72 | "neutral_and_right">:: TS.(x0 && minusone) --> x0; 73 | "neutral_and_left">:: TS.(minusone && x0) --> x0; 74 | 75 | "neutral_or_right">:: TS.(x0 || zero) --> x0; 76 | "neutral_or_left">:: TS.(zero || x0) --> x0; 77 | 78 | "neutral_xor_right">:: TS.(x0 ^ zero) --> x0; 79 | "neutral_xor_left">:: TS.(zero ^ x0) --> x0; 80 | 81 | "neutral_rshiftu_left">:: TS.(x0 >> zero) --> x0; 82 | "neutral_rshifts_left">:: TS.(x0 >>- zero) --> x0; 83 | "neutral_lshift_left">:: TS.(x0 << zero) --> x0; 84 | "neutral_rshiftux86_left">:: TS.(x0 >>% zero) --> x0; 85 | "neutral_rshiftsx86_left">:: TS.(x0 >>-% zero) --> x0; 86 | "neutral_lshiftx86_left">:: TS.(x0 <<% zero) --> x0; 87 | 88 | "neutral_umax_right">:: TS.(umax x0 zero) --> x0; 89 | "neutral_umax_left">:: TS.(umax zero x0) --> x0; 90 | 91 | "neutral_umin_right">:: TS.(umin x0 minusone) --> x0; 92 | "neutral_umin_left">:: TS.(umin minusone x0) --> x0; 93 | 94 | (* Combination of AC + idempotent *) 95 | "ac_idem_and1">:: TS.(( x0 && x1 ) && x0) --> TS.( x0 && x1 ); 96 | "ac_idem_and2">:: TS.(( x1 && x0 ) && x0) --> TS.( x1 && x0 ); 97 | "ac_idem_and3">:: TS.( x0 && ( x0 && x1 )) --> TS.( x0 && x1 ); 98 | "ac_idem_and4">:: TS.( x0 && ( x1 && x0 )) --> TS.( x1 && x0 ); 99 | 100 | (* Combination of AC + nilpotent *) 101 | "ac_nil_xor1">:: TS.(( x0 ^ x1 ) ^ x0) --> x1; 102 | "ac_nil_xor2">:: TS.(( x1 ^ x0 ) ^ x0) --> x1; 103 | "ac_nil_xor3">:: TS.( x0 ^ ( x0 ^ x1 )) --> x1; 104 | "ac_nil_xor4">:: TS.( x0 ^ ( x1 ^ x0 )) --> x1; 105 | 106 | "ac_nil_xor5">:: TS.( (x0 ^ x1) ^ ( x2 ^ x0 )) --> TS.(x1 ^ x2); 107 | "ac_nil_xor6">:: TS.( (x1 ^ x0) ^ ( x2 ^ x0 )) --> TS.(x1 ^ x2); 108 | 109 | (* (x && y) && (x && z) -> x && (y && z) *) 110 | "ac_idem_and5">:: TS.(( x0 && x1 ) && ( x0 && x2 )) --> TS.( x0 && (x1 && x2) ); 111 | "ac_idem_and6">:: TS.(( x0 && x1 ) && ( x2 && x0 )) --> TS.( x0 && (x1 && x2) ); 112 | "ac_idem_and7">:: TS.(( x1 && x0 ) && ( x0 && x2 )) --> TS.( x0 && (x1 && x2) ); 113 | "ac_idem_and8">:: TS.(( x1 && x0 ) && ( x2 && x0 )) --> TS.( x0 && (x1 && x2) ); 114 | (* Hard AC + idem case *) 115 | "ac_idem_and9">:: TS.(( x0 && x1 ) && ( x3 && (x4 && x0) )) --> TS.( ((x4 && x3) && x1) && x0 ); 116 | "ac_idem_and10">:: TS.(( x0 && four ) && ( x3 && (x4 && six) )) --> TS.( ((four && x4) && x3) && x0) ; 117 | 118 | (* Absorbing elements *) 119 | "abs_or1">:: TS.(x0 || minusone) --> minusone; 120 | "abs_or2">:: TS.(minusone || x0) --> minusone; 121 | 122 | "abs_and1">:: TS.(x0 && zero) --> zero; 123 | "abs_and2">:: TS.(zero && x0) --> zero; 124 | 125 | "abs_mul1">:: TS.(x0 * zero) --> zero; 126 | "abs_mul2">:: TS.(zero * x0) --> zero; 127 | 128 | "abs_umin1">:: TS.(umin x0 zero) --> zero; 129 | "abs_umin2">:: TS.(umin zero x0) --> zero; 130 | 131 | "abs_umax1">:: TS.(umax x0 minusone) --> minusone; 132 | "abs_umax2">:: TS.(umax minusone x0) --> minusone; 133 | 134 | "abs_smin1">:: TS.(smin x0 sminbv) --> sminbv; 135 | "abs_smin2">:: TS.(smin sminbv x0) --> sminbv; 136 | 137 | "abs_smax1">:: TS.(smax x0 smaxbv) --> smaxbv; 138 | "abs_smax2">:: TS.(smax smaxbv x0) --> smaxbv; 139 | 140 | (* Specialized opt *) 141 | "add_xor_and1">:: TS.( (x0 ^ x1) + (x0 && x1) ) --> TS.(x0 || x1); 142 | "add_xor_and2">:: TS.( (x1 ^ x0) + (x0 && x1) ) --> TS.(x0 || x1); 143 | 144 | "add_sub1">:: TS.( (x0 - x1) + x1 ) --> x0; 145 | "add_sub2">:: TS.( x1 + (x0 - x1) ) --> x0; 146 | 147 | (* (- a) + b = b - a *) 148 | "add_minus1">:: TS.(- x0 + x1) --> TS.( x1 - x0 ); 149 | "add_minus2">:: TS.( x1 + (- x0) ) --> TS.( x1 - x0 ); 150 | 151 | (* ~a + a = -1 *) 152 | "add_not1">:: TS.( (~. x0) + x0 ) --> minusone; 153 | "add_not2">:: TS.( x0 + (~. x0) ) --> minusone; 154 | 155 | (* ~a + 1 = -a *) 156 | "add_not3">:: TS.( (~. x0) + one ) --> TS.( - x0); 157 | "add_not4">:: TS.( one + (~. x0) ) --> TS.( - x0); 158 | 159 | (* ~a + (a + b) = b - 1 *) 160 | "add_not_add1">:: TS.( (~. x0) + (x0 + x1) ) --> TS.( x1 - one ); 161 | "add_not_add2">:: TS.( (~. x0) + (x1 + x0) ) --> TS.( x1 - one ); 162 | "add_not_add3">:: TS.( (x1 + x0) + (~. x0) ) --> TS.( x1 - one ); 163 | "add_not_add4">:: TS.( (x0 + x1) + (~. x0) ) --> TS.( x1 - one ); 164 | 165 | (* a + a = 2*a *) 166 | "add_var">:: TS.( x0 + x0 ) --> TS.( two * x0 ); 167 | 168 | (* -a * -b = a * b *) 169 | "mul_uminus_uminus">:: TS.( (-x0) * (-x1)) --> TS.(x0 * x1); 170 | 171 | (* -a * b = - (a * b) *) 172 | "mul_uminus1">:: TS.( (-x0) * x1) --> TS.(- (x0 * x1)); 173 | "mul_uminus2">:: TS.( x0 * (-x1)) --> TS.(- (x0 * x1)); 174 | 175 | (* -a - 1 = ~a *) 176 | "sub_minus1">:: TS.( (- x0) - one ) --> TS.( ~. x0 ); 177 | 178 | (* a - (-b) = a + b *) 179 | "sub_minus2">:: TS.( x0 - ( -x1 ) ) --> TS.( x0 + x1 ); 180 | 181 | (* -a - b = - (a + b) *) 182 | "sub_minus3">:: TS.( (- x0) - x1 ) --> TS.( - ( x0 + x1 ) ); 183 | 184 | (* (a ^ b) & (a & b) = 0 *) 185 | "and_xor_and">:: TS.( (x0 ^ x1) && ( x0 && x1 ) ) --> zero; 186 | 187 | (* a & ~a = 0 *) 188 | "and_not">:: TS.( x0 && ( ~. x0 ) ) --> zero; 189 | 190 | (* (a & b) & ~a = 0 *) 191 | "and_and_not">:: TS.( (x0 && x1) && ( ~. x0 ) ) --> zero; 192 | 193 | (* a & (~a & b) = 0 *) 194 | "and_and_not2">:: TS.( x0 && ( (~. x0) && x1) ) --> zero; 195 | 196 | (* (a ^ b) | a = a | b *) 197 | "or_xor">:: TS.( (x0 ^ x1) || x0 ) --> TS.( x0 || x1 ); 198 | 199 | (* a | ~a = -1 *) 200 | "or_not">:: TS.( x0 || (~. x0) ) --> minusone; 201 | 202 | (* (a & b) | a = a *) 203 | "or_and">:: TS.( ( x0 && x1 ) || x0 ) --> x0; 204 | 205 | (* (a | b) | ~a = -1 *) 206 | "or_or_not">:: TS.( ( x0 || x1 ) || ( ~. x0 ) ) --> minusone; 207 | 208 | (* a ^ ~a = -1 *) 209 | "xor_not">:: TS.( x0 ^ ( ~. x0 ) ) --> minusone; 210 | 211 | (* (-(1 | a)) ^ (1 | a) = -2 *) 212 | "xor_spe">:: TS.( (- (one || x0)) ^ (one || x0) ) --> minustwo; 213 | ] 214 | in 215 | 216 | "suite">:::(ounit_suite) 217 | 218 | 219 | let () = 220 | run_test_tt_main suite -------------------------------------------------------------------------------- /scripts/bench/bench.py: -------------------------------------------------------------------------------- 1 | ########################################################################## 2 | # This file is part of BINSEC. # 3 | # # 4 | # Copyright (C) 2019-2025 # 5 | # CEA (Commissariat à l'énergie atomique et aux énergies # 6 | # alternatives) # 7 | # # 8 | # you can redistribute it and/or modify it under the terms of the GNU # 9 | # Lesser General Public License as published by the Free Software # 10 | # Foundation, version 2.1. # 11 | # # 12 | # It is distributed in the hope that it will be useful, # 13 | # but WITHOUT ANY WARRANTY; without even the implied warranty of # 14 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # 15 | # GNU Lesser General Public License for more details. # 16 | # # 17 | # See the GNU Lesser General Public License version 2.1 # 18 | # for more details (enclosed in the file licenses/LGPLv2.1). # 19 | # # 20 | ########################################################################## 21 | 22 | #!/usr/bin/env python3 23 | 24 | import argparse 25 | import json 26 | import os 27 | import sys 28 | from joblib import Parallel, delayed 29 | from pathlib import Path 30 | from plumbum import local 31 | import tempfile 32 | from tqdm import tqdm 33 | 34 | UTOPCONST = None 35 | SAMPLEONLY = False 36 | 37 | xyntia = None 38 | timeout = local["timeout"] 39 | cegis_to = None 40 | 41 | def exec_xyntia(inifile, cmdargs, sampledir): 42 | with tempfile.NamedTemporaryFile(mode="w") as tmp: 43 | tmp.flush() 44 | 45 | if SAMPLEONLY: 46 | retcode, output, stderr = xyntia[cmdargs][ 47 | "-sampleout", sampledir, 48 | "-bin", tmp.name, 49 | "-config", inifile.name, 50 | "-sample-only", 51 | ].run() 52 | return None 53 | 54 | if cegis_to != None: 55 | output = timeout[ 56 | "-s", "ABRT", to, 57 | xyntia, 58 | cmdargs, 59 | "-json", 60 | "-sampleout", sampledir, 61 | "-bin", tmp.name, 62 | "-config", inifile.name, 63 | ](retcode=[0, 124]) 64 | 65 | else: 66 | retcode, output, stderr = xyntia[cmdargs][ 67 | "-json", 68 | "-sampleout", sampledir, 69 | "-bin", tmp.name, 70 | "-config", inifile.name, 71 | ].run() 72 | 73 | if not (sampledir / "res.json").exists(): 74 | # the output was a constant value 75 | os.remove(sampledir / "formula") 76 | sampledir.rmdir() 77 | return None 78 | 79 | errormsg = "Not enough time to start synthesis" 80 | if errormsg in output: 81 | return { 82 | "success": "no", 83 | "equiv": "no", 84 | "errormsg": errormsg, 85 | } 86 | else: 87 | return json.loads(output) 88 | 89 | def synthesize(index, expr, nsamples, cmdargs, outdir): 90 | ini = """\ 91 | starting from 0x0 92 | 93 | prune constant outputs 94 | 95 | set optimal sampling 96 | 97 | explore all 98 | 99 | v0<32> := nondet 100 | v1<32> := nondet 101 | v2<32> := nondet 102 | v3<32> := nondet 103 | v4<32> := nondet 104 | v5<32> := nondet 105 | v6<32> := nondet 106 | set domain v0 [-50, 50] 107 | set domain v1 [-50, 50] 108 | set domain v2 [-50, 50] 109 | set domain v3 [-50, 50] 110 | set domain v4 [-50, 50] 111 | set domain v5 [-50, 50] 112 | set domain v6 [-50, 50] 113 | 114 | hook 0x0 with 115 | res<32> := {} 116 | sample {} res 117 | halt 118 | end 119 | """.format(expr, nsamples) 120 | 121 | sampledir = outdir / "samples" / f"{index}" 122 | resfile = outdir / "synthesized" / f"{index}.json" 123 | 124 | with tempfile.NamedTemporaryFile(mode="w") as tmp: 125 | tmp.write(ini) 126 | tmp.flush() 127 | res = exec_xyntia(tmp, cmdargs, sampledir) 128 | 129 | if res == None: 130 | # the expression to sample was a constant value 131 | return None 132 | 133 | res_stats = { 134 | "success": res["success"], 135 | "quality": None, 136 | "equiv": res["equiv"], 137 | } 138 | 139 | if res["equiv"] != "no" and res["orig_size"] > 0: 140 | # if we check equivalence, we compute the quality of expression that are equivalent 141 | # or at least not proven not equivalent 142 | res["quality"] = res["synth_size"] / res["orig_size"] 143 | res_stats["quality"] = res["quality"] 144 | 145 | with open(resfile, "w") as f: 146 | json.dump(res, f, indent=4, sort_keys=True) 147 | 148 | return res_stats 149 | 150 | 151 | def synthesize_parallel(njobs, cmdargs, exprs, nsamples, outdir): 152 | all_res_stats = Parallel(n_jobs=njobs)(delayed(synthesize)(index, expr, nsamples, cmdargs, outdir) 153 | for index, expr in enumerate(tqdm(exprs, leave=False))) 154 | 155 | all_res_stats = [ v for v in all_res_stats if v != None ] # Remove constant expressions from stats 156 | successes = [ 1 if stat["success"] == "yes" else 0 for stat in all_res_stats ] 157 | qualities = [ stat["quality"] for stat in all_res_stats if stat["quality"] != None ] 158 | equiv_proven = [ 1 if stat["equiv"] == "yes" else 0 for stat in all_res_stats ] 159 | equiv_optim = [ 1 if stat["equiv"] != "no" else 0 for stat in all_res_stats ] 160 | 161 | assert len(successes) != 0 and (len(equiv_proven) != 0 and len(equiv_optim) != 0), "No expression to synthesize" 162 | 163 | successrate = round(100*sum(successes) / len(successes), 1) if len(successes) != 0 else 0 164 | print("Success rate : {}%".format(successrate)) 165 | 166 | equivmin = round(100*sum(equiv_proven) / len(equiv_proven), 1) if len(equiv_proven) != 0 else 0 167 | equivmax = round(100*sum(equiv_optim) / len(equiv_optim), 1) if len(equiv_optim) != 0 else 0 168 | print("Equiv range : {} - {}%".format(equivmin, equivmax)) 169 | 170 | mqual = round(sum(qualities) / len(qualities), 2) if len(qualities) != 0 else None 171 | print("Mean Quality : {}".format(mqual)) 172 | 173 | 174 | def check_dir(directory): 175 | if not directory.exists(): 176 | print("[ERROR] {} must be a directory".format(directory)) 177 | sys.exit(1) 178 | elif not directory.is_dir(): 179 | print("[ERROR] {} must be a directory".format(directory)) 180 | sys.exit(1) 181 | 182 | def check_create(directory): 183 | if not directory.exists(): 184 | os.mkdir(directory) 185 | else: 186 | print("[ERROR] {} already exists".format(directory)) 187 | sys.exit(1) 188 | 189 | if __name__ == "__main__": 190 | parser = argparse.ArgumentParser() 191 | parser.add_argument('--parallel', action="store_true", help="Run synthesis in parallel") 192 | parser.add_argument('--sample-only', action="store_true", help="Run synthesis in parallel") 193 | parser.add_argument('--dataset', required=True, type=str, help="benchmark file") 194 | parser.add_argument('--cegis-to', required=False, type=int, help="timeout of the cegis process") 195 | parser.add_argument('--nsamples', required=False, type=int, help="number of samples (default: 100)") 196 | parser.add_argument('--out', required=True, type=str, help="output directory") 197 | 198 | parser.add_argument('cmd', nargs="+", type=str, help="command to bench") 199 | arguments = parser.parse_args() 200 | 201 | resdir = Path(arguments.out) 202 | check_dir(resdir) 203 | 204 | sampledir = resdir / "samples" 205 | check_create(sampledir) 206 | 207 | synthesized_dir = resdir / "synthesized" 208 | check_create(synthesized_dir) 209 | 210 | njobs = -1 if arguments.parallel else 1 211 | xyntia = local[arguments.cmd[0]] 212 | cmdargs = arguments.cmd[1:] 213 | cegis_to = arguments.cegis_to 214 | nsamples = arguments.nsamples if arguments.nsamples != None else 100 215 | SAMPLEONLY = arguments.sample_only 216 | 217 | with open(arguments.dataset, "r") as f: 218 | exprs = [ e for e in f.readlines() if e.strip() != "" and not e.strip().startswith("#") ] 219 | 220 | synthesize_parallel(njobs, cmdargs, exprs, nsamples, Path(arguments.out)) 221 | -------------------------------------------------------------------------------- /datasets/syntia/b1: -------------------------------------------------------------------------------- 1 | (v2 ^ v1) 2 | (- (v4 * v2)) 3 | (v0 & v1) 4 | (- (- (v4 * v0))) 5 | (v2 ^ v0) 6 | ((~ (~ v3)) + v0) 7 | (- (~ v4)) 8 | (v3 - v2) 9 | (v2 | v0) 10 | (- (~ (v3 + v3))) 11 | ((~ v0) - v4) 12 | (v3 ^ (v1 * v3)) 13 | (v2 - v0) 14 | ((v0 * v2) - v2) 15 | ((~ (~ v0)) | v4) 16 | (v1 + v2) 17 | ((~ v2) * v0) 18 | (v4 ^ (v3 - v2)) 19 | (v0 | v1) 20 | (v1 | v4) 21 | (v0 + v0) 22 | (v2 | (v0 ^ v2)) 23 | ((- v3) - v4) 24 | (v0 + v0) 25 | (v4 + v2) 26 | (v3 * v2) 27 | (v3 | (v2 * v0)) 28 | (v3 - v1) 29 | (v0 & v3) 30 | (- (v2 ^ v1)) 31 | (v3 & v4) 32 | (v3 - v4) 33 | (v0 + (~ v3)) 34 | (- (v3 & v0)) 35 | (- (v2 + v0)) 36 | (v0 ^ v3) 37 | (v1 + v4) 38 | (v2 | v3) 39 | (v2 - (v4 * v1)) 40 | (~ (v3 | v0)) 41 | (v1 * v4) 42 | ((v1 + v0) - v4) 43 | (v1 - v3) 44 | (v2 + v3) 45 | (v1 | v2) 46 | (v0 + (v4 * v2)) 47 | ((~ (- v2)) & v4) 48 | ((- v3) + (~ v1)) 49 | (v3 + v3) 50 | (v1 & (v3 - v4)) 51 | (v1 * v2) 52 | ((v3 + v2) | v3) 53 | (v3 - (- v3)) 54 | (v3 ^ v0) 55 | (v3 * v1) 56 | (v3 - (v1 * v3)) 57 | (v0 - (- v0)) 58 | (v4 | (v2 & v4)) 59 | (v1 | v3) 60 | (v3 + v0) 61 | (v1 + v3) 62 | (v3 ^ v2) 63 | (~ (- v0)) 64 | (~ (- v1)) 65 | ((v4 & v1) * v3) 66 | (v4 ^ v2) 67 | ((v2 + v4) * v2) 68 | (v2 ^ (v2 * v0)) 69 | (v4 | (~ v3)) 70 | (v3 * v0) 71 | (- ((- v0) + v1)) 72 | (- (v3 & v2)) 73 | (~ (- v3)) 74 | (v3 - v2) 75 | (- ((- v2) & v3)) 76 | (v1 ^ (- (~ v1))) 77 | ((~ v3) + (~ v1)) 78 | (v3 * (- v2)) 79 | ((v2 + v2) - v3) 80 | ((v1 ^ v3) + v4) 81 | (v0 & v1) 82 | (v2 - v3) 83 | (v3 | v1) 84 | (v0 & v4) 85 | (v4 ^ v3) 86 | (v0 + v4) 87 | (- (v1 + v3)) 88 | (v0 & v4) 89 | (v0 * v0) 90 | (v4 - (v4 * v2)) 91 | ((v4 ^ v3) | v4) 92 | (v3 + (v3 + v3)) 93 | ((v3 * v3) ^ v0) 94 | ((v4 | v3) | v3) 95 | (v3 | v0) 96 | (v1 * v3) 97 | (v3 + v1) 98 | ((~ v0) | v0) 99 | (v4 & (v4 * v2)) 100 | (~ (- v0)) 101 | ((v0 * v0) * v0) 102 | ((~ (- v1)) + v1) 103 | (~ (v4 ^ v0)) 104 | ((v3 - v1) ^ v3) 105 | ((v4 * v4) & v3) 106 | (~ (v0 | v2)) 107 | ((~ (~ v4)) - v0) 108 | (v1 - v3) 109 | ((- v2) * v0) 110 | (v1 | v3) 111 | (v3 | (~ v1)) 112 | (v3 ^ v0) 113 | (v3 - v0) 114 | ((v4 | v4) ^ v3) 115 | (v1 - v4) 116 | (~ (~ (v2 - v4))) 117 | (v2 - v1) 118 | (v4 & v3) 119 | ((v4 * v3) ^ v4) 120 | (v1 + v0) 121 | ((- v0) | v4) 122 | (v0 - v3) 123 | (v4 | v2) 124 | (v4 + v1) 125 | (v2 + v4) 126 | ((~ v4) & v1) 127 | (v1 & v3) 128 | (v3 * (v0 | v0)) 129 | (v3 * v0) 130 | (v1 & v4) 131 | ((v0 + v3) * v4) 132 | (v0 - v4) 133 | (v4 + v4) 134 | (v2 ^ (- v1)) 135 | ((v2 | v0) + v1) 136 | ((v1 | v2) - v3) 137 | (v1 | (v3 & v4)) 138 | (v1 & (~ (~ v3))) 139 | ((v4 + v3) * v2) 140 | (v4 - (v2 * v3)) 141 | (v0 ^ v3) 142 | (v1 * v2) 143 | (v0 + v0) 144 | (v4 * v0) 145 | (~ (v4 * (- v2))) 146 | (v1 | v4) 147 | (v4 * v1) 148 | (v0 * v3) 149 | (v0 & v2) 150 | (v4 & v3) 151 | (v1 * (v2 - v3)) 152 | (~ (- (v1 + v0))) 153 | (v4 | v2) 154 | (v2 - (v1 & v3)) 155 | (v0 * v0) 156 | (v1 * v2) 157 | ((v4 * v4) ^ v2) 158 | (v0 ^ v4) 159 | (v3 ^ (v4 ^ v1)) 160 | ((~ v2) + v0) 161 | (v2 & v4) 162 | (v1 | v0) 163 | (- (v3 | v4)) 164 | (v4 * v1) 165 | (v1 | (~ v1)) 166 | ((v3 & v1) & v2) 167 | (v4 & (v1 | v1)) 168 | ((~ (- v0)) * v3) 169 | (v4 ^ (v2 * v2)) 170 | (~ (v0 - v3)) 171 | (v2 + (v2 ^ v3)) 172 | (v2 ^ v4) 173 | ((~ v3) | (~ v1)) 174 | (v2 & v1) 175 | (v3 ^ (- (~ v0))) 176 | (v3 & v1) 177 | (v1 - (v4 ^ v0)) 178 | (v2 & v1) 179 | (- (~ v1)) 180 | (v4 * v0) 181 | ((~ v0) + v0) 182 | (v2 + v0) 183 | (v0 + v3) 184 | ((- v4) * (~ v2)) 185 | ((~ v2) | (~ v1)) 186 | (v1 - (v2 & v1)) 187 | (v2 - v1) 188 | (- (- (~ (- v1)))) 189 | ((v4 * v3) ^ v2) 190 | (v1 + (~ v3)) 191 | (v2 ^ v0) 192 | (v2 | (v3 | v0)) 193 | (v1 + v2) 194 | (- (v2 - (- v3))) 195 | (v3 + (v3 + v1)) 196 | (v0 + (- (~ v4))) 197 | (v4 ^ (v2 * v4)) 198 | (v0 + (v3 | v3)) 199 | (v0 & v1) 200 | (~ (v3 & v3)) 201 | (v0 | (v0 + v2)) 202 | ((v4 | v3) - v1) 203 | (v4 * v1) 204 | (~ (- v4)) 205 | (v2 - (~ v1)) 206 | ((v2 | v3) & v1) 207 | (v1 | v4) 208 | ((- v3) + v1) 209 | (~ (~ (~ v2))) 210 | (v3 * v0) 211 | (~ (v1 - (~ v1))) 212 | ((~ v1) - v1) 213 | (v1 | (- v1)) 214 | (v1 ^ v0) 215 | (v4 + v1) 216 | (v3 - v0) 217 | (- ((~ v2) & v1)) 218 | (v1 ^ (v4 | v1)) 219 | (v0 & v3) 220 | (v0 ^ v1) 221 | (v1 + v4) 222 | ((- v0) ^ (~ v1)) 223 | (v2 & v0) 224 | (v4 + v3) 225 | (~ (- (v0 & v2))) 226 | (v3 ^ v2) 227 | (v4 | v2) 228 | (v4 & (v1 + v3)) 229 | (v0 * v2) 230 | (~ (- (v0 | v0))) 231 | (v3 & (~ (~ v1))) 232 | (v4 | (v3 * v0)) 233 | (~ (v1 - v3)) 234 | ((v3 - v2) | v2) 235 | ((v2 * v1) ^ v2) 236 | (v1 | v2) 237 | ((v1 | v1) + v2) 238 | (- (v3 & (~ v1))) 239 | ((~ v0) + (- v3)) 240 | (v1 * (- v4)) 241 | (v0 | (v2 | v4)) 242 | ((v3 & v2) | v4) 243 | (v4 & (v0 & v2)) 244 | (v4 & (v4 + v4)) 245 | (v3 ^ v4) 246 | (v0 | v2) 247 | (~ (- (v3 ^ v0))) 248 | (v1 | v4) 249 | (v4 + (v3 + v3)) 250 | ((v0 + v2) + v1) 251 | (v0 + (v0 & v3)) 252 | ((v0 * v4) - v0) 253 | (v3 | v0) 254 | (~ (v4 - v4)) 255 | ((v0 + v3) | v3) 256 | (v2 + v0) 257 | (~ (- v3)) 258 | ((v4 ^ v3) ^ v1) 259 | (~ (- v0)) 260 | (v1 - (v1 & v0)) 261 | (v3 + (v0 * v1)) 262 | (v0 & v2) 263 | ((v1 ^ v2) + v1) 264 | (v1 | v4) 265 | (v3 * v3) 266 | (v3 + v3) 267 | (v4 - v1) 268 | (v1 & (- v2)) 269 | ((v2 ^ v4) * v3) 270 | (- ((- v2) & v0)) 271 | (- (v3 + v4)) 272 | (v2 - v0) 273 | (v1 + (v2 * v2)) 274 | (v2 + v3) 275 | (v2 + (v0 | v3)) 276 | (v2 | (v2 | v0)) 277 | (~ (~ (v3 * v2))) 278 | (v1 * v3) 279 | (- (v2 - v1)) 280 | (~ (v2 + v3)) 281 | (v3 ^ (- (~ v1))) 282 | (v4 ^ v1) 283 | (v0 + v3) 284 | (v0 * v3) 285 | (v4 ^ v1) 286 | (~ (v3 * v2)) 287 | ((v4 - v2) & v1) 288 | (v3 + (v0 | v0)) 289 | (- (v0 - v1)) 290 | (v0 - v1) 291 | ((- v4) ^ v0) 292 | ((v1 + v0) | v3) 293 | (v0 + (v0 | v0)) 294 | (v0 & (v3 | v4)) 295 | (v3 ^ (v1 & v0)) 296 | ((v4 | v2) * v2) 297 | ((v0 * v2) | v2) 298 | (v2 & v3) 299 | (v4 - v3) 300 | (v4 | (v1 & v2)) 301 | ((- v0) + v1) 302 | (v0 ^ (~ (- v0))) 303 | ((- (~ v3)) * v0) 304 | ((- (~ v3)) | v3) 305 | ((- v4) * (~ v3)) 306 | (~ (v0 + v0)) 307 | (~ (v2 | (- v1))) 308 | (v3 + v1) 309 | (v1 + v1) 310 | ((- v1) & (~ v4)) 311 | ((v4 ^ v3) & v0) 312 | (- (~ v2)) 313 | ((~ v3) | (~ v3)) 314 | (v3 - v1) 315 | (v0 * v1) 316 | (v0 * v1) 317 | (v1 | v0) 318 | (v2 & v0) 319 | (v2 * v3) 320 | (v4 * v4) 321 | (v0 * v3) 322 | (- ((- v3) - v4)) 323 | (v3 & (v1 | v4)) 324 | ((- (~ v4)) + v2) 325 | (v0 - (v0 ^ v1)) 326 | (v4 ^ v3) 327 | (v2 * v0) 328 | (- (v2 * v2)) 329 | (v3 & (~ v2)) 330 | ((v4 + v2) ^ v0) 331 | (v0 - v1) 332 | (v1 & v0) 333 | ((v0 & v3) * v2) 334 | ((v3 * v2) * v0) 335 | (v4 & v3) 336 | (v4 ^ (v0 * v3)) 337 | (v3 + v0) 338 | ((v4 + v2) | v0) 339 | (v3 * v2) 340 | (- ((- v0) * v2)) 341 | ((~ v4) ^ v4) 342 | (~ (v4 ^ v3)) 343 | (v0 | v2) 344 | ((v4 - v2) | v3) 345 | (v4 & (v2 ^ v4)) 346 | (v2 ^ v0) 347 | (~ ((- v2) & v0)) 348 | ((- (~ v1)) + v0) 349 | (v0 & v1) 350 | (- (v1 + v3)) 351 | (v0 - v1) 352 | ((v0 + v2) | v2) 353 | ((- v2) | (~ v3)) 354 | ((- v2) - v3) 355 | (v1 - v0) 356 | ((~ (~ v0)) + v3) 357 | (~ (- v0)) 358 | ((- v2) + (- v1)) 359 | (v4 + v4) 360 | (~ (- v1)) 361 | (v1 + (v3 * v0)) 362 | ((v1 + v3) - v0) 363 | (v1 ^ (v0 | v4)) 364 | (~ (- v2)) 365 | (v3 * (v2 | v2)) 366 | (v0 | v2) 367 | ((v2 + v1) + v1) 368 | (v1 + v3) 369 | (v1 & (v0 - v4)) 370 | (v0 | v3) 371 | ((~ v1) | (- v2)) 372 | ((- v0) + v2) 373 | (v0 - v2) 374 | ((v1 + v4) ^ v2) 375 | (v2 + v0) 376 | (v0 * v3) 377 | (v3 * v0) 378 | (- (v4 | v4)) 379 | (v3 & (- v2)) 380 | (v4 | v2) 381 | (v0 | v2) 382 | (v1 ^ v2) 383 | (v1 - v2) 384 | (- (~ v1)) 385 | ((- v0) * v2) 386 | (v2 * v2) 387 | ((- v3) & v1) 388 | (- ((- v2) - v3)) 389 | (v4 ^ (~ (~ v1))) 390 | (- ((~ v2) + v1)) 391 | (v3 * (v0 * v3)) 392 | ((v3 | v2) & v0) 393 | (v1 + (v1 | v4)) 394 | ((v0 ^ v4) & v2) 395 | (v2 * v3) 396 | ((v4 | v0) - v2) 397 | (v1 & v4) 398 | (~ (v1 ^ v1)) 399 | (v3 | v4) 400 | (- (v2 - (- v1))) 401 | ((v3 * v2) & v0) 402 | (v1 | (v4 | v3)) 403 | (v3 * v4) 404 | (v0 & v4) 405 | (v1 & v0) 406 | (v4 - v3) 407 | (v1 - v4) 408 | (v3 ^ v0) 409 | (v4 | v0) 410 | (v1 | (~ v3)) 411 | (v2 - v0) 412 | (v0 * v0) 413 | (~ (- v4)) 414 | ((~ v4) * v0) 415 | (v1 & v4) 416 | (v0 - v2) 417 | (~ ((~ v2) ^ v0)) 418 | (v4 | (v0 ^ v1)) 419 | (v0 & v3) 420 | (v2 + v4) 421 | (v2 * (v2 & v0)) 422 | (- (- (v0 & v3))) 423 | (v4 | v0) 424 | (v4 ^ (v0 * v3)) 425 | (v0 ^ (v2 ^ v1)) 426 | (v2 & (v0 | v3)) 427 | ((v0 + v0) - v1) 428 | ((v2 & v3) & v3) 429 | (v1 & (v1 & v4)) 430 | (v4 & v3) 431 | ((~ (- v1)) ^ v0) 432 | (v1 ^ (- v1)) 433 | (v1 + v0) 434 | ((- v0) - v1) 435 | (~ (- v2)) 436 | ((v3 * v2) - v1) 437 | (v3 + v2) 438 | (~ (v3 + (~ v1))) 439 | (v2 & v0) 440 | (v4 | (v2 ^ v3)) 441 | ((v1 | v0) + v2) 442 | (~ (- v3)) 443 | (v4 & v1) 444 | (- (~ v0)) 445 | (v3 - v2) 446 | (~ ((~ v0) & v2)) 447 | (v2 + v4) 448 | (v0 ^ (v3 - v0)) 449 | (v2 + (v1 ^ v4)) 450 | (v4 + (v4 - v3)) 451 | ((v1 | v4) | v3) 452 | (v4 | v0) 453 | (v4 + (v2 | v0)) 454 | (v0 | v1) 455 | ((v3 & v4) * v4) 456 | (v1 - v4) 457 | (v1 - v2) 458 | (v2 + (~ v1)) 459 | (v4 + (~ v0)) 460 | (v3 + v4) 461 | (v4 * (v1 | v2)) 462 | (v0 - v3) 463 | ((- v3) - v3) 464 | ((v0 * v0) - v1) 465 | ((v2 * v3) - v1) 466 | (~ (- v0)) 467 | (v3 & (~ v0)) 468 | (v3 - v0) 469 | (v1 * (v0 + v3)) 470 | (~ (v1 * v0)) 471 | ((v3 | v3) ^ v2) 472 | (~ (v1 ^ (- v4))) 473 | (~ (- v4)) 474 | (v1 + v4) 475 | (~ (v4 * (~ v0))) 476 | ((v2 + v4) & v2) 477 | (v2 & (v3 + v0)) 478 | (v4 + (v0 - v3)) 479 | (v1 - (v4 * v3)) 480 | (v2 & (v4 * v4)) 481 | ((v3 | v2) ^ v1) 482 | ((v1 - v3) - v1) 483 | ((- v2) | (- v3)) 484 | (v1 + v1) 485 | (v4 * (- v2)) 486 | (~ (v0 | v2)) 487 | (~ (v4 ^ v4)) 488 | (v4 & v1) 489 | (v2 ^ v4) 490 | ((~ v4) ^ v0) 491 | (- (v4 & v3)) 492 | ((- (~ v0)) & v3) 493 | (v1 + v1) 494 | ((v4 & v1) ^ v1) 495 | (v1 + v3) 496 | (v4 * v2) 497 | ((v1 * v2) ^ v0) 498 | (v0 + v0) 499 | (v1 * v3) 500 | (v1 - v0) 501 | -------------------------------------------------------------------------------- /src/lib/oracle.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | open Yojson.Basic.Util 23 | 24 | type variable = { name : string ; sz : int } 25 | type constant = { name : string ; value : Bitvector.t } 26 | 27 | type sample = { 28 | vars : (variable * Bitvector.t) array ; 29 | res : variable * Bitvector.t ; 30 | } 31 | 32 | type t = { 33 | ctx : variable array * variable ; 34 | consts : constant array ; 35 | mutable samples : sample array ; 36 | ops : string list option ; 37 | orig_size : int option; 38 | } 39 | 40 | (** Oracle **) 41 | module type ORACLE = sig 42 | val nvars : unit -> int 43 | val nconsts : unit -> int 44 | val nsamples : unit -> int 45 | val var_values : variable -> Bitvector.t array 46 | val const_values : constant -> Bitvector.t array 47 | val out_values : unit -> Bitvector.t array 48 | val out_var : unit -> variable 49 | val random_var : unit -> variable 50 | val random_const : unit -> constant 51 | val print : unit -> unit 52 | val vars : unit -> variable array 53 | val consts : unit -> constant array 54 | val ops : unit -> string list option 55 | val const_of_int : int -> int -> constant 56 | val const_of_bitv : Bitvector.t -> constant 57 | val get_sample : int -> sample 58 | val sample_output : sample -> variable * Bitvector.t 59 | val sample_inputs : sample -> (variable * Bitvector.t) array 60 | val add_sample : (variable * Bitvector.t) array -> (variable * Bitvector.t) -> unit 61 | val get_expr_size : unit -> int option 62 | end 63 | 64 | let ops {ops ; _} = ops 65 | 66 | let vars { ctx ; _ } = fst ctx 67 | 68 | let consts { consts ; _ } = consts 69 | 70 | let nvars t = Array.length (fst t.ctx) 71 | 72 | let nconsts t = Array.length (t.consts) 73 | 74 | let nsamples { samples ; _ } = Array.length samples 75 | 76 | let output_of_sample sample = sample.res 77 | let inputs_of_sample sample = sample.vars 78 | 79 | 80 | let var_values t (var : variable) = 81 | let rec aux vars name n i = 82 | if i = n then invalid_arg "Variable is not present in the oracle" 83 | else 84 | let ((var : variable), vl) = vars.(i) in 85 | if var.name = name then vl else aux vars name n (i + 1) 86 | in 87 | Array.map (fun { vars ; _ } -> aux vars var.name (Array.length vars) 0) t.samples 88 | 89 | let const_values t c = 90 | Array.make (nsamples t) c.value 91 | 92 | let out_values { samples ; _ } = Array.map (fun { res ; _ } -> snd res) samples 93 | 94 | let out_var { ctx = (_, var) ; _ } = var 95 | 96 | let const_of_int value sz = {name=(string_of_int value); value=(Bitvector.of_int ~size:sz value)} 97 | let const_of_bitv value = { name=(Bitvector.to_string value); value=value} 98 | 99 | (** Create a variable **) 100 | let mk_var name sz = 101 | if sz <= 0 then invalid_arg "Variable size should be positive" 102 | else if String.length name = 0 then 103 | invalid_arg "Variable name should be non-empty"; 104 | { name ; sz } 105 | 106 | (** Get a random variable **) 107 | let random_var t = 108 | let vars = fst t.ctx in 109 | vars.(Random.int (nvars t)) 110 | 111 | (** Get a random constant value **) 112 | let random_const t = 113 | (t.consts).(Random.int (Array.length t.consts)) 114 | 115 | let hex_to_int json = int_of_string (to_string json) 116 | let to_string json = String.lowercase_ascii (to_string json) 117 | 118 | (** Extract sampled inputs from json file **) 119 | let get_sample_input json = 120 | json |> to_assoc 121 | |> List.map 122 | (fun (_, t) -> 123 | let var = { 124 | name = to_string (member "location" t) ; 125 | sz = hex_to_int (member "size" t); 126 | } 127 | in 128 | let value = Bitvector.create 129 | (Z.of_string (to_string (member "value" t))) 130 | var.sz 131 | in 132 | (var, value)) 133 | |> Array.of_list 134 | 135 | (** Extract observed outputs from json file **) 136 | let get_sample_output json = 137 | let json = member "0" json in 138 | let var = { 139 | name = to_string (member "location" json) ; 140 | sz = hex_to_int (member "size" json); 141 | } 142 | in 143 | let hex_val = to_string (member "value" json) in 144 | (var, Bitvector.create (Z.of_string hex_val) var.sz) 145 | 146 | (** Extract sampled inputs and observed outputs from json file **) 147 | let get_sample json = { 148 | vars = get_sample_input (member "inputs" json) ; 149 | res = get_sample_output (member "outputs" json) ; 150 | } 151 | 152 | (** Extract all samples from json file **) 153 | let get_samplings json = 154 | json |> member "sampling" |> to_assoc 155 | |> (List.map (fun (_, t) -> get_sample t)) 156 | |> Array.of_list 157 | 158 | (** Extract inputs meta information from json file **) 159 | let get_meta_inputs json = 160 | json |> member "initial" |> member "inputs" |> to_assoc 161 | |> List.map 162 | (fun (_, t) -> 163 | { name = to_string (member "location" t) ; 164 | sz = hex_to_int (member "size" t)}) 165 | |> Array.of_list 166 | 167 | (** Extract outputs meta information from json file **) 168 | let get_meta_output json = 169 | let json = json |> member "initial" |> member "outputs" |> member "0" in 170 | let name = to_string (member "location" json) in 171 | let size = hex_to_int (member "size" json) in 172 | mk_var name size 173 | 174 | 175 | (** Extract inputs and outputs meta information from json file **) 176 | let get_meta json = 177 | (get_meta_inputs json, get_meta_output json) 178 | 179 | let get_ops json = 180 | try 181 | Some (List.map (fun x -> to_string x) (to_list (member "ops" json))) 182 | with Type_error (_, _) -> None 183 | 184 | let get_size json = 185 | hex_to_int (json |> member "initial" |> member "outputs" |> member "0" |> member "size") 186 | 187 | let get_expr_size json = 188 | match json |> member "info" with 189 | | `Null -> -1 190 | | m -> to_int (member "exprsize" m) 191 | 192 | (** Create constant values between min and max **) 193 | let mk_consts min max sz = 194 | Array.init (1 + max - min) (fun i -> 195 | const_of_int (min + i) sz) 196 | 197 | (** Pretty printer **) 198 | let print t = 199 | let printf = Printf.printf in 200 | let print_context { ctx = (in_vars, out_var) ; _ } = 201 | printf "Variables:\n"; 202 | Array.iter (fun (v : variable) -> printf "\t%s, %d\n" v.name v.sz) in_vars; 203 | printf "\n\t%s, %d\n" out_var.name out_var.sz 204 | in 205 | let print_sampling t = 206 | printf "Samples:\n"; 207 | Array.iter (fun { vars ; res } -> 208 | Array.iter 209 | (fun ((v : variable), vl) -> 210 | printf "\t%s: %s\n" v.name (Bitvector.to_hexstring vl)) vars; 211 | printf "\tResult: %s\n\n" (Bitvector.to_hexstring (snd res))) t.samples 212 | in 213 | print_context t; print_sampling t 214 | 215 | let add_sample oracle inputs output = 216 | let newsample = { vars=inputs; res=output} in 217 | oracle.samples <- Array.append oracle.samples [|newsample|] 218 | 219 | let gen_oracle (t : t) : (module ORACLE) = 220 | (module struct 221 | let oracle = t 222 | let nvars () = nvars oracle 223 | let nconsts () = nconsts oracle 224 | let nsamples () = nsamples oracle 225 | let var_values var = var_values oracle var 226 | let const_values const = const_values oracle const 227 | let out_values () = out_values oracle 228 | let out_var () = out_var oracle 229 | let random_var () = random_var oracle 230 | let random_const () = random_const oracle 231 | let print () = print oracle 232 | let vars () = vars oracle 233 | let consts () = consts oracle 234 | let ops () = ops oracle 235 | let const_of_int = const_of_int 236 | let const_of_bitv = const_of_bitv 237 | let get_sample i = oracle.samples.(i) 238 | let sample_inputs = inputs_of_sample 239 | let sample_output = output_of_sample 240 | let add_sample = add_sample oracle 241 | let get_expr_size () = oracle.orig_size 242 | end : ORACLE) 243 | 244 | (** Create oracle from json file **) 245 | let of_json ~filename (cst: int array): (module ORACLE) = 246 | let json = Yojson.Basic.from_file filename in 247 | let sz = get_size json in 248 | let newcst = Array.map (fun v -> {name=Int.to_string v; value=(Bitvector.of_int ~size:sz v)}) cst in 249 | let oracle = { 250 | ctx = get_meta json ; 251 | consts = Array.append (mk_consts 1 1 sz) newcst ; 252 | samples = get_samplings json ; 253 | ops = get_ops json ; 254 | orig_size = Some (get_expr_size json); 255 | } in 256 | gen_oracle oracle 257 | 258 | let dummyOracle = 259 | let oracle = { 260 | ctx = ([| 261 | {name="v0"; sz=32}; {name="v1"; sz=32}; 262 | {name="v2"; sz=32}; {name="v3"; sz=32}; 263 | {name="v4"; sz=32}; {name="v5"; sz=32}; 264 | |], 265 | {name="output"; sz=32}) ; 266 | consts = [||] ; 267 | samples = [||] ; 268 | ops = None ; 269 | orig_size= Some 32; 270 | } in 271 | gen_oracle oracle -------------------------------------------------------------------------------- /src/lib/tree/tree.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | open Operators 23 | 24 | type op_t = 25 | | OP_Var of Oracle.variable 26 | | OP_Const of Oracle.constant 27 | | OP_Unop of unop_t 28 | | OP_Binop of binop_t 29 | | OP_Triop of triop_t 30 | 31 | type 'a node = 32 | | Var of Oracle.variable 33 | | Const of Oracle.constant 34 | | SizeMod of sizemod_t * 'a t 35 | | Unop of unop_t * 'a t 36 | | Binop of binop_t * 'a t * 'a t 37 | | Triop of triop_t * 'a t * 'a t * 'a t 38 | 39 | and 'a t = { 40 | node : 'a node; 41 | (* number of nodes in this subtree, size modifiers do not count *) 42 | sz : int; 43 | (* bitsize of the values calculated by this tree *) 44 | bitsz : int; 45 | data : 'a; 46 | } 47 | 48 | let compare t1 t2 = 49 | let to_int = function 50 | | Const _ -> 0 51 | | Var _ -> 1 52 | | SizeMod _ -> 2 53 | | Unop _ -> 3 54 | | Binop _ -> 4 55 | | Triop _ -> 5 56 | in 57 | let rec aux t1 t2 = 58 | match t1.node, t2.node with 59 | | Var v1, Var v2 -> String.compare v1.name v2.name 60 | | Const c1, Const c2 -> Bitvector.compare c1.value c2.value 61 | | SizeMod (op1,t1'), SizeMod (op2, t2') -> ( 62 | match SizeMod.compare op1 op2 with 63 | | 0 -> aux t1' t2' 64 | | x -> x) 65 | | Unop (op1,t1'), Unop (op2, t2') -> ( 66 | match Unop.compare op1 op2 with 67 | | 0 -> aux t1' t2' 68 | | x -> x) 69 | | Binop (op1, t11, t12), Binop (op2, t21, t22) -> ( 70 | match Binop.compare op1 op2 with 71 | | 0 -> ( 72 | match aux t11 t21 with 73 | | 0 -> aux t12 t22 74 | | x -> x 75 | ) 76 | | x -> x 77 | ) 78 | | Triop (op1, t11, t12, t13), Triop (op2, t21, t22, t23) -> ( 79 | match Triop.compare op1 op2 with 80 | | 0 -> ( 81 | match aux t11 t21 with 82 | | 0 -> ( 83 | match aux t12 t22 with 84 | | 0 -> aux t13 t23 85 | | x -> x 86 | ) 87 | | x -> x 88 | ) 89 | | x -> x 90 | ) 91 | | _, _ -> Int.compare (to_int t1.node) (to_int t2.node) 92 | in 93 | aux t1 t2 94 | 95 | let _fold_topdown (f : 'a t -> 'b -> 'b) (tree : 'a t) (x : 'b) = 96 | let rec aux t x = 97 | let x = f t x in 98 | match t.node with 99 | | Const _ | Var _ -> x 100 | | SizeMod (_, t1) -> aux t1 x 101 | | Unop (_, t1) -> aux t1 x 102 | | Binop (_, t1, t2) -> aux t2 (aux t1 x) 103 | | Triop (_, t1, t2, t3) -> aux t3 (aux t2 (aux t1 x)) 104 | in 105 | aux tree x 106 | 107 | let _fold_bottomup (f : 'a t -> 'b -> 'b) (tree : 'a t) (x : 'b) = 108 | let rec aux t x = 109 | let y = 110 | match t.node with 111 | | Const _ | Var _ -> x 112 | | SizeMod (_, t1) -> aux t1 x 113 | | Unop (_, t1) -> aux t1 x 114 | | Binop (_, t1, t2) -> aux t2 (aux t1 x) 115 | | Triop (_, t1, t2, t3) -> aux t3 (aux t2 (aux t1 x)) 116 | in 117 | f t y 118 | in 119 | aux tree x 120 | 121 | let rec tree_equal t1 t2 = 122 | if t1.sz != t2.sz then false 123 | else ( 124 | match (t1.node, t2.node) with 125 | | Const c1, Const c2 when c1 = c2 -> true 126 | | Var v1, Var v2 when v1 = v2 -> true 127 | | Unop (op1, t1'), Unop (op2, t2') when op1 = op2 -> 128 | tree_equal t1' t2' 129 | | Binop (op1, t11', t12'), Binop (op2, t21', t22') when op1 = op2 -> 130 | if Binop.is_commut op1 then 131 | (tree_equal t11' t21' && tree_equal t12' t22') || (tree_equal t11' t22' && tree_equal t12' t21') 132 | else 133 | tree_equal t11' t21' && tree_equal t12' t22' 134 | | Triop (op1, t11', t12', t13'), Triop (op2, t21', t22', t23') when op1 = op2 -> 135 | tree_equal t11' t21' && tree_equal t12' t22' && tree_equal t13' t23' 136 | | _, _ -> false 137 | ) 138 | 139 | let to_string f (t : 'a t) = 140 | let open Printf in 141 | let rec aux t = 142 | match t.node with 143 | | Var l -> sprintf "%s<%d>%s" l.name l.sz (f t.data) 144 | | Const c -> sprintf "%s%s" c.name (f t.data) 145 | | SizeMod (op, t) -> sprintf "%s (%s)%s" (SizeMod.to_string op) (aux t) (f t.data) 146 | | Unop (op, t) -> sprintf "%s (%s)%s" (Unop.to_string op) (aux t) (f t.data) 147 | | Binop (op, t1, t2) -> 148 | sprintf "(%s %s %s)%s" (aux t1) (Binop.to_string op) (aux t2) (f t.data) 149 | | Triop (op, t1, t2, t3) -> ( 150 | match op with 151 | | Div | SDiv | Mod | SMod -> 152 | sprintf "((%s ++ %s) %s %s)%s" (aux t1) (aux t2) 153 | (Triop.to_string op) (aux t3) (f t.data) 154 | | ITE | Im -> 155 | sprintf "%s(%s, %s, %s)%s" (Triop.to_string op) (aux t1) (aux t2) 156 | (aux t3) (f t.data)) 157 | in 158 | aux t 159 | 160 | let mk_var data var = 161 | { 162 | node = Var var; 163 | sz = 1; 164 | bitsz = var.sz; 165 | data 166 | } 167 | 168 | let mk_const data c = 169 | { 170 | node = Const c; 171 | sz = 1; 172 | bitsz = Bitvector.size_of c.value; 173 | data 174 | } 175 | 176 | let mk_const_of_bv data bv = 177 | mk_const data { name = Bitvector.to_string bv; value = bv } 178 | 179 | let mk_sizemod data op t = 180 | let sz, bitsz = 181 | match op with 182 | | Sextend x | Extend x | Reduce x -> (t.sz, x) 183 | in 184 | { 185 | node = SizeMod (op, t); 186 | sz; 187 | bitsz; 188 | data 189 | } 190 | let mk_unop data op t = 191 | let sz, bitsz = (t.sz + 1, t.bitsz) 192 | in 193 | { 194 | node = Unop (op, t); 195 | sz; 196 | bitsz; 197 | data 198 | } 199 | 200 | let mk_binop data op t1 t2 = 201 | { 202 | node = Binop (op, t1, t2); 203 | sz = 1 + t1.sz + t2.sz; 204 | bitsz = t1.bitsz; 205 | data 206 | } 207 | 208 | let mk_triop data op t1 t2 t3 = 209 | { 210 | node = Triop (op, t1, t2, t3); 211 | sz = 1 + t1.sz + t2.sz + t3.sz; 212 | bitsz = t2.bitsz; 213 | (* NB: We should check that t2.bitsz == t3.bitsz in the ITE and Im case *) 214 | data; 215 | } 216 | 217 | module type TreeConstructor = sig 218 | type data 219 | type t' := data t 220 | 221 | (* Specialized Constructor *) 222 | val mk_var : Oracle.variable -> t' 223 | val mk_const : Oracle.constant -> t' 224 | val mk_const_of_bv : Bitvector.t -> t' 225 | val mk_sizemod : sizemod_t -> t' -> t' 226 | val mk_unop : unop_t -> t' -> t' 227 | val mk_binop : binop_t -> t' -> t' -> t' 228 | val mk_triop : triop_t -> t' -> t' -> t' -> t' 229 | 230 | val eq : t' -> t' -> bool 231 | val compare : t' -> t' -> int 232 | 233 | val transfer : 'a t -> t' 234 | end 235 | 236 | module UnitTreeConstructor = struct 237 | type data = unit 238 | 239 | module H = Hashtbl 240 | 241 | let table = H.create 251 242 | 243 | let hashcons x = 244 | try H.find table x 245 | with Not_found -> 246 | H.add table x x; 247 | x 248 | 249 | (* A specialized version that handles commutative 250 | operators (which helps to ensure maximal sharing) *) 251 | let hashcons_binop x = 252 | match x.node with 253 | | Binop (op, t1, t2) when Operators.Binop.is_commut op -> ( 254 | try H.find table x 255 | with Not_found -> ( 256 | let x' = mk_binop () op t2 t1 in 257 | try H.find table x' 258 | with Not_found -> 259 | H.add table x x; 260 | x)) 261 | | _ -> hashcons x 262 | 263 | (* Specialized Constructor *) 264 | let mk_var s = hashcons (mk_var () s) 265 | let mk_const c = hashcons (mk_const () c) 266 | let mk_const_of_bv bv = hashcons (mk_const_of_bv () bv) 267 | let mk_sizemod op t = hashcons (mk_sizemod () op t) 268 | let mk_unop op t = hashcons (mk_unop () op t) 269 | let mk_binop op t1 t2 = hashcons_binop (mk_binop () op t1 t2) 270 | let mk_triop op t1 t2 t3 = hashcons (mk_triop () op t1 t2 t3) 271 | 272 | let eq = (==) 273 | 274 | (* Optimized version using physical equality to short-cut 275 | when possible *) 276 | let compare t1 t2 = 277 | let to_int = function 278 | | Const _ -> 0 279 | | Var _ -> 1 280 | | SizeMod _ -> 2 281 | | Unop _ -> 3 282 | | Binop _ -> 4 283 | | Triop _ -> 5 284 | in 285 | let rec aux t1 t2 = 286 | if t1 == t2 then 0 287 | else ( 288 | match t1.node, t2.node with 289 | | Var v1, Var v2 -> String.compare v1.name v2.name 290 | | Const c1, Const c2 -> Bitvector.compare c1.value c2.value 291 | | SizeMod (op1,t1'), SizeMod (op2, t2') -> ( 292 | match SizeMod.compare op1 op2 with 293 | | 0 -> aux t1' t2' 294 | | x -> x) 295 | | Unop (op1,t1'), Unop (op2, t2') -> ( 296 | match Unop.compare op1 op2 with 297 | | 0 -> aux t1' t2' 298 | | x -> x) 299 | | Binop (op1, t11, t12), Binop (op2, t21, t22) -> ( 300 | match Binop.compare op1 op2 with 301 | | 0 -> ( 302 | match aux t11 t21 with 303 | | 0 -> aux t12 t22 304 | | x -> x 305 | ) 306 | | x -> x 307 | ) 308 | | Triop (op1, t11, t12, t13), Triop (op2, t21, t22, t23) -> ( 309 | match Triop.compare op1 op2 with 310 | | 0 -> ( 311 | match aux t11 t21 with 312 | | 0 -> ( 313 | match aux t12 t22 with 314 | | 0 -> aux t13 t23 315 | | x -> x 316 | ) 317 | | x -> x 318 | ) 319 | | x -> x 320 | ) 321 | | _, _ -> Int.compare (to_int t1.node) (to_int t2.node) 322 | ) 323 | in 324 | aux t1 t2 325 | 326 | let rec transfer t = 327 | match t.node with 328 | | Var s -> mk_var s 329 | | Const bv -> mk_const bv 330 | | SizeMod (op,t) -> 331 | mk_sizemod op (transfer t) 332 | | Unop (op,t) -> 333 | mk_unop op (transfer t) 334 | | Binop (op,t1,t2) -> 335 | mk_binop op (transfer t1) (transfer t2) 336 | | Triop (op,t1,t2,t3) -> 337 | mk_triop op (transfer t1) (transfer t2) (transfer t3) 338 | end -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Table of Contents 3 | 4 | 1. [Installation](#installation) 5 | 2. [Usage](#usage) 6 | 1. [Synthesizing functions from sampling files](#synthesizing-functions-from-sampling-files) 7 | 2. [Synthesizing functions from a binary](#synthesizing-functions-from-a-binary) 8 | 3. [Grammar abbreviations](#grammar-abbreviations) 9 | 4. [Heuristic abbreviations](#heuristic-abbreviations) 10 | 5. [Inference rules](#inference-rules) 11 | 6. [Generating SyGUS problems](#generating-sygus-problems) 12 | 3. [Experiments](#experiments) 13 | 4. [Synthesize all blocks from a execution trace](#synthesize-all-blocks-from-a-execution-trace) 14 | 5. [References](#references) 15 | 16 | 17 | # Installation 18 | 19 |
Show 20 | 21 | ## System requirements 22 | 23 | On debian like systems, run the following command: 24 | ``` 25 | sudo apt install libgmp3-dev gcc-multilib gdb python3 python3-pip python3-venv openjdk-17-jdk libgmp-dev pkg-config opam 26 | ``` 27 | 28 | You must also install [Ghidra](https://ghidra-sre.org/) and add the GHIDRA environment variable with the installation directory of ghidra 29 | ```bash 30 | export GHIDRA= 31 | ``` 32 | 33 | # Installation 34 | 35 | 36 | The easiest way to install xyntia is to create an opam switch. It will automatically install xyntia and its dependencies: 37 | ``` 38 | $ cd 39 | $ opam switch create . 4.14.1 -y # or any version >= 4.14.1 40 | $ eval $(opam env) 41 | ``` 42 |
43 | 44 | # Usage 45 | 46 | The help of xyntia is available through `xyntia -help`. In the following we will explain the two ways for using xyntia. 47 | 48 | ## Synthesizing functions from sampling files 49 | 50 |
Show 51 | 52 | 53 | To synthesize a function from a sampling file, execute the following command: 54 | 55 | ``` 56 | $ xyntia [-ops ] [-time
63 | 64 | ## Synthesizing functions from a binary 65 | 66 |
Show 67 | 68 | 69 | You can let xyntia sample the output from a binary and synthesize them with the 70 | following command: 71 | ``` 72 | $ xyntia [-ops ] [-time
117 | 118 | ## Grammar abbreviations 119 | 120 |
Show 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 |
grammarabbreviation
Mixed Boolean Arithmetic (MBA)mba
MBA+Divisionexpr
MBA+Division+Mod+Shiftfull
MBA+Shiftmba_shift
MBA+If then elsemba_ite
169 | 170 |
171 | 172 | ## Heuristic abbreviations 173 | 174 |
Show 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 |
heuristicabbreviation
Iterated Local Searchils
Hill Climbinghc
Random Walkrw
Simulated Annealingsa
Metropolis-Hastingsmh
222 | 223 |
224 | 225 | ## Inference rules 226 | 227 |
Show 228 | 229 | 230 | ### When to use Inference Rules ? 231 | 232 | 233 | **In Short:** If you expect your target expression to contain constant values. 234 | 235 | **Full answer:** Program synthesis has core limitations, namely the handling of arbitrary constant values and big expressions. To bypass these limitations Xyntia includes *Inference Rules* to better guide the search and elevate in one step a candidate solutions into the target expression (possibly with arbitrary constant values). 236 | Hence if your target expression is likely to contain constant values use the inference rules. 237 | 238 | To better understand, read our paper [3]: 239 | > **Augmenting Search-based Program Synthesis with Local Inference Rules to Improve Black-box Deobfuscation**, Vidal Attias, Nicolas Bellec, Grégoire Menguy, Sébastien Bardin, Jean-Yves Marion, ACM Conference on Computer and Communications Security 2025 240 | 241 | 242 | ### Usage 243 | 244 | To use inference rules you must set the option 245 | `-infrules ` where `` can be: 246 | 1. A list of rules (`r1, r2, ..., rk`) where each rules aim to handle a specific case as described below 247 | 248 | | Inference Rule | Expression kind | 249 | |--------------------------------------------|---------------------------------| 250 | | $\diamond \in \{ +, *, \oplus, >>u, <<, ror \}$ | $e \diamond c$ | 251 | | maskotf | $(e \land c_1) \lor c_2$ | 252 | | affine | $(c_1 * e) + c_2$ | 253 | | poly2 | $(c_1 * e^2) + (c_2 * e) + c_3$ | 254 | 255 | > where `e` is a expression from the grammar and `c, c1, c2, c3` are arbitrary constant values. 256 | 257 | 258 | 259 | 2. A keyword among `mba` (same as: `+, maskotf, <<, *, ^`) and `all` (same as: `+, maskotf, <<, >>u, *, ^, ror, poly2, affine`). 260 | 261 | 262 | ### You want to try it ? 263 | 264 | We provide an example of obfuscated expression from the snapchat app. To apply Xyntia over it, run: 265 | ``` 266 | $ xyntia -bin samplers/examples/snapchat -config samplers/examples/snapchat.ini -infrules all 267 | ``` 268 | 269 |
270 | 271 | ## Generating SyGUS problems 272 | 273 |
Show 274 | 275 | To easily compare other synthesizers with Xyntia or apply them to deobfuscation tasks, we provide a way to extract the 276 | the synthesis problem in the standard SyGUS format. To do so, just use the `-sygus` option. 277 | 278 | For instance, to extract the sygus problem from a binary code, run: 279 | ``` 280 | $ xyntia -bin -config -sygus 281 | ``` 282 | 283 |
284 | 285 | # Experiments 286 | 287 |
Show 288 | 289 | 290 | All datasets and scripts are given to reproduce expriments presented in [1]. Especially, it contains the B1 dataset from the [Syntia](https://github.com/RUB-SysSec/syntia) paper [2] 291 | (Thank you [Tim Blazytko](https://synthesis.to/) for sharing it with us), our B2 dataset and the datasets used to evaluate anti-black-box deobfuscation. 292 | 293 | ## Python dependencies 294 | 295 | To facilitate installation, we also give the `requirements.txt` to easily install the python dependencies. 296 | 297 | To create and activate a python environment, execute the following commands (optional): 298 | ``` 299 | $ python3 -m venv # create a virtual environment for python3 300 | $ source /bin/activate # active the virtual environment 301 | ``` 302 | 303 | Then, install dependencies: 304 | ``` 305 | $ pip install -r requirements.txt 306 | ``` 307 | 308 | ## Launch expriments 309 | 310 | Datasets used in [1] can be found in the `./datasets` directory. 311 | To launch Xyntia over a dataset (e.g., B2) with a given timeout (e.g., 1s) execute the following commands: 312 | 313 | ``` 314 | $ python3 ./scripts/bench/bench.py --dataset datasets/b2 --out results --parallel -- xyntia -check -time 1 315 | ``` 316 | 317 | The option and their meanings can be found through the `--help` option. 318 | 319 | 320 |
321 | 322 | # Synthesize all blocks from a execution trace 323 | 324 |
Show 325 | 326 | 327 | We also give `./scripts/utils/all_from_trace.sh` which traces code execution with Ghidra or GDB, extracts each code block executed, samples and synthesizes them. 328 | The manual is available through `./scripts/utils/all_from_trace.sh --help` and it can be run as follows: 329 | 330 | ``` 331 | $ XYNTIA="xyntia " # the xyntia command to use 332 | $ ./scripts/utils/all_from_trace.sh --outdir --all -- binary arg1 arg2 ... 333 | ``` 334 | 335 | Here is an example: 336 | 337 | ``` 338 | $ cd examples/bin && make && cd - 339 | $ ./scripts/utils/all_from_trace.sh --outdir --all -- ./examples/bin/add 340 | ``` 341 | 342 |
343 | 344 | # References 345 | 346 | [1] Menguy, G., Bardin, S., Bonichon, R., & Lima, C. D. S. (2021, November). Search-Based Local Black-Box Deobfuscation: Understand, Improve and Mitigate. In Proceedings of the 2021 ACM SIGSAC Conference on Computer and Communications Security. 347 | 348 | [2] Blazytko, T., Contag, M., Aschermann, C., & Holz, T. (2017). Syntia: Synthesizing the semantics of obfuscated code. In 26th USENIX Security Symposium (USENIX Security 17). 349 | 350 | [3] Attias, V., Bellec, N., Menguy, G., Bardin, S., Marion, J. (2025, October). Augmenting Search-based Program Synthesis with Local Inference Rules to Improve Black-box Deobfuscation. In Proceedings of the 2025 ACM SIGSAC Conference on Computer and Communications Security. 351 | -------------------------------------------------------------------------------- /src/lib/heuristic.ml: -------------------------------------------------------------------------------- 1 | (**************************************************************************) 2 | (* This file is part of BINSEC. *) 3 | (* *) 4 | (* Copyright (C) 2019-2025 *) 5 | (* CEA (Commissariat à l'énergie atomique et aux énergies *) 6 | (* alternatives) *) 7 | (* *) 8 | (* you can redistribute it and/or modify it under the terms of the GNU *) 9 | (* Lesser General Public License as published by the Free Software *) 10 | (* Foundation, version 2.1. *) 11 | (* *) 12 | (* It is distributed in the hope that it will be useful, *) 13 | (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) 14 | (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *) 15 | (* GNU Lesser General Public License for more details. *) 16 | (* *) 17 | (* See the GNU Lesser General Public License version 2.1 *) 18 | (* for more details (enclosed in the file licenses/LGPLv2.1). *) 19 | (* *) 20 | (**************************************************************************) 21 | 22 | (** Results summary i.e. what is returned by S-metaheuristics **) 23 | type summary = { 24 | success : bool ; 25 | expression : string ; 26 | simplified : string ; 27 | smtlib : string ; 28 | size : int ; 29 | time_synthesis : float ; 30 | time_simplify : float ; 31 | nb_mutations : int ; 32 | } 33 | 34 | module type DIST = Distance.DIST 35 | module type VECDIST = Distance.VECDIST 36 | 37 | module type S = sig 38 | val search : int -> summary 39 | end 40 | 41 | 42 | exception Halt = Exceptions.Halt 43 | exception CEGISHalt = Exceptions.CEGISHalt 44 | exception SynthesisNotStarted = Exceptions.SynthesisNotStarted 45 | 46 | type 'a solution = { tree : 'a Tree.t ; cost : float ; final_cost : float } 47 | 48 | module type BaseSearch = sig 49 | val search : terminated: (Mutations.mutd solution -> bool) -> 50 | gen_sol: (Mutations.mutd Tree.t -> Mutations.mutd solution) -> 51 | best_sol: Mutations.mutd solution ref -> 52 | maxdepth: int -> 53 | unit 54 | end 55 | 56 | module MkSearch (BSF: functor (M : Mutations.MUTATOR) -> BaseSearch) (M : Mutations.MUTATOR) (GD : VECDIST with type t = M.data) (FD : VECDIST with type t = M.data) (O : Oracle.ORACLE) : S = struct 57 | module Simp = Simplifier.TreeSimplifier (M) (O) 58 | module BS = BSF (M) 59 | 60 | let terminated sol = 61 | FD.is_zero sol.final_cost 62 | 63 | (* Compute the cost of an AST i.e. 64 | * the distance between its outputs 65 | * and the observed ones *) 66 | let cost tree = 67 | let actual = M.eval tree in 68 | let expected = O.out_values () in 69 | GD.vecdist actual expected 70 | 71 | let final_cost tree = 72 | let actual = M.eval tree in 73 | let expected = O.out_values () in 74 | FD.vecdist actual expected 75 | 76 | let mutnb = ref 0 77 | 78 | let gen_sol tree = 79 | incr mutnb; 80 | { tree ; cost = cost tree ; final_cost = final_cost tree } 81 | 82 | let search maxdepth = 83 | mutnb := 0; 84 | let time_base = Sys.time() in 85 | 86 | try 87 | let best_sol = ref (gen_sol (M.singleton ())) in (* Initial state: an AST of size 1 i.e. a constant or a variable *) 88 | 89 | let finished = 90 | try 91 | BS.search ~terminated ~gen_sol ~best_sol ~maxdepth; 92 | true 93 | with Halt | CEGISHalt -> false 94 | in 95 | 96 | let actual = M.eval !best_sol.tree in 97 | let expected = O.out_values () in 98 | 99 | if finished then ( 100 | (* Convert the resulting solution if we activated top-level constant analysis *) 101 | best_sol := gen_sol (FD.extract !best_sol.tree actual expected); 102 | 103 | (* Checks that the extraction did not broke the solution *) 104 | if (not (terminated !best_sol)) then 105 | failwith (Printf.sprintf "pattern does not zeroify %s\n" (M.to_string !best_sol.tree)) 106 | ); 107 | 108 | let time_synth = Sys.time() in 109 | let simpl = Simp.simplify !best_sol.tree in (* Postprocess to clean the synthesized expression *) 110 | let time_simpl = Sys.time() in 111 | let simpl_sol = gen_sol simpl in 112 | 113 | (* Disable the timeout *) 114 | Sys.set_signal Sys.sigalrm (Signal_handle (fun _ -> ())); 115 | 116 | (* Check that the postprocess did not changed the observable behaviors *) 117 | if finished && (terminated !best_sol) && not (terminated simpl_sol) then 118 | (* Postprocess changed the observable behaviors: there is a bug in the postprocess *) 119 | failwith (Printf.sprintf "%s != %s\n" (M.to_string !best_sol.tree) (M.to_string simpl_sol.tree)); 120 | 121 | { 122 | (* Return the solution *) 123 | success = terminated !best_sol ; 124 | expression = M.to_string !best_sol.tree; 125 | simplified = M.to_string simpl; 126 | smtlib = M.to_smtlib simpl; 127 | size = M.get_expr_size simpl; 128 | time_synthesis = time_synth -. time_base; 129 | time_simplify = time_simpl -. time_synth; 130 | nb_mutations = !mutnb; 131 | } 132 | with Halt | CEGISHalt -> raise SynthesisNotStarted 133 | end 134 | 135 | (** Module for the Iterated Local Search S-metaheuristic **) 136 | module Iterated_local_search (M : Mutations.MUTATOR) : BaseSearch = struct 137 | let is_stuck n = n > 100 (** number of mutations where no improvement is observed before doing a perturbation **) 138 | 139 | (* Perturbation step *) 140 | let perturbate sol = 141 | let num_cuts = 2 in (* Number of time to perturbate the AST *) 142 | let rec perturbate_loop n t = 143 | if n = num_cuts then t 144 | else perturbate_loop (n + 1) (M.cut t) 145 | in 146 | perturbate_loop 0 sol.tree 147 | 148 | let search ~terminated ~gen_sol ~best_sol ~maxdepth = 149 | (* Mutate the AST iteratively 150 | * sol: the AST 151 | * n : number of non progressing iterations 152 | * depth : states how many perturbations have been done (use to reset the possible mutation choices) *) 153 | let rec loop sol n depth = 154 | (* Iterative mutations of the AST *) 155 | if is_stuck n || terminated sol then sol 156 | else 157 | let mutated = M.mutate maxdepth sol.tree (Some depth) in 158 | let sol' = gen_sol mutated in 159 | if terminated sol' then sol' 160 | else if sol'.cost < sol.cost then loop sol' 0 depth 161 | else loop sol (n + 1) depth 162 | in 163 | let rec iteration best_sol depth = 164 | (* Iterate until the distance equals 0 or timeout *) 165 | if terminated !best_sol then () 166 | else 167 | let perturbated_sol = gen_sol (perturbate !best_sol) in 168 | let sol' = loop perturbated_sol 0 depth in 169 | best_sol := if (terminated sol') || (sol'.cost < !best_sol.cost) then sol' else !best_sol; 170 | iteration best_sol (depth + 1) 171 | in 172 | iteration best_sol 0 173 | end 174 | 175 | (** Module for the Random Walk S-metaheuristic **) 176 | module Random_walk (M : Mutations.MUTATOR) : BaseSearch = struct 177 | let search ~terminated ~gen_sol ~best_sol ~maxdepth = 178 | let rec loop sol = 179 | if terminated !best_sol then () 180 | else 181 | let sol' = gen_sol (M.mutate maxdepth sol.tree None) in 182 | best_sol := if (terminated sol') || (sol'.cost < !best_sol.cost) then sol' else !best_sol; 183 | loop sol' 184 | in 185 | loop !best_sol 186 | end 187 | 188 | (** Module for the Hill Climbing S-metaheuristics **) 189 | module Hill_climbing (M : Mutations.MUTATOR) : BaseSearch = struct 190 | let is_stuck n = n > 100 191 | 192 | let search ~terminated ~gen_sol ~best_sol ~maxdepth = 193 | let rec loop sol n = 194 | if terminated !best_sol then () 195 | else if is_stuck n then loop (gen_sol (M.singleton ())) 0 196 | else 197 | let sol' = gen_sol (M.mutate maxdepth !best_sol.tree None) in 198 | if terminated sol' || (sol'.cost < !best_sol.cost) then 199 | best_sol := sol'; 200 | if sol'.cost < sol.cost then loop sol' 0 201 | else loop sol (n + 1) 202 | in 203 | loop !best_sol 0 204 | end 205 | 206 | (** Module for the Simulated Annealing S-metaheuristic **) 207 | module Simulated_annealing (M : Mutations.MUTATOR) : BaseSearch = struct 208 | (* rate of the geometric rate cooling *) 209 | let alpha = 0.8 210 | 211 | (* factor for determining batch size *) 212 | let factor = 100 213 | 214 | (* we stop if we don't improve the best solution 215 | after this number of batches *) 216 | let is_stuck n = n > 2 217 | 218 | let replace temp sol sol' = 219 | Random.float 1. < exp((sol.cost -. sol'.cost) /. temp) 220 | 221 | let search ~terminated ~gen_sol ~best_sol ~maxdepth = 222 | let t0 = 223 | let acceptance = 0.90 in 224 | let lim = 100 in 225 | let rec loop acc count sol = 226 | if count = lim then acc /. float count 227 | else 228 | let sol' = gen_sol (M.mutate maxdepth sol.tree None) in 229 | let delta = sol'.cost -. sol.cost in 230 | if delta > 0. then loop (acc +. delta) (count + 1) sol' 231 | else loop acc count sol' 232 | in 233 | 234 | let sol = gen_sol (M.singleton ()) in 235 | -.(loop 0. 0 sol) /. log acceptance 236 | in 237 | let t_lim = t0 /. 1000. in 238 | 239 | (* batch size should be proportional to the neighborhood size, 240 | which in our case is proportional to the number of nodes in 241 | the tree *) 242 | let batch_size = factor * (1 lsl maxdepth) in 243 | 244 | let rec loop temp sol count = 245 | if terminated !best_sol then () 246 | else if temp < t_lim || is_stuck count then 247 | batch_loop t0 (gen_sol (M.singleton ())) 1 0 248 | else 249 | batch_loop temp sol count 0 250 | 251 | and batch_loop temp sol count n = 252 | if n = batch_size then 253 | loop (temp *. alpha) sol (count + 1) 254 | else 255 | let sol' = gen_sol (M.mutate maxdepth sol.tree None) in 256 | let (best_sol', count) = 257 | if terminated sol' || (sol'.cost < !best_sol.cost) then (sol', 0) 258 | else (!best_sol, count) 259 | in 260 | best_sol := best_sol'; 261 | if replace temp sol sol' then 262 | batch_loop temp sol' count (n + 1) 263 | else 264 | batch_loop temp sol count (n + 1) 265 | in 266 | loop t0 !best_sol 1 267 | end 268 | 269 | 270 | (** Module for the Metropolis Hasting S-metaheuristic **) 271 | module Metropolis_hastings (M : Mutations.MUTATOR) : BaseSearch = struct 272 | (* rate of the geometric rate cooling *) 273 | let alpha = 0.8 274 | 275 | (* factor for determining batch size *) 276 | let factor = 100 277 | 278 | (* we stop if we don't improve the best solution 279 | after this number of batches *) 280 | let is_stuck n = n > 2 281 | 282 | let replace temp sol sol' p = 283 | Random.float 1. < p *. exp((sol.cost -. sol'.cost) /. temp) 284 | 285 | let search ~terminated ~gen_sol ~best_sol ~maxdepth = 286 | let t0 = 287 | let acceptance = 0.90 in 288 | let lim = 100 in 289 | let rec loop acc count sol = 290 | if count = lim then acc /. float count 291 | else 292 | let sol' = gen_sol (M.mutate maxdepth sol.tree None) in 293 | let delta = sol'.cost -. sol.cost in 294 | if delta > 0. then loop (acc +. delta) (count + 1) sol' 295 | else loop acc count sol' 296 | in 297 | let sol = gen_sol (M.singleton ()) in 298 | -.(loop 0. 0 sol) /. log acceptance 299 | in 300 | let t_lim = t0 /. 1000. in 301 | (* batch size should be proportional to the neighborhood size, 302 | which in our case is proportional to the number of nodes in 303 | the tree *) 304 | let batch_size = factor * (1 lsl maxdepth) in 305 | (* @TODO (NB): Could be transformed into immutable solutions (I think) *) 306 | let rec loop temp sol count = 307 | if terminated !best_sol then () 308 | else if temp < t_lim || is_stuck count then 309 | batch_loop t0 (gen_sol (M.singleton ())) 1 0 310 | else 311 | batch_loop temp sol count 0 312 | and batch_loop temp sol count n = 313 | if n = batch_size then 314 | loop (temp *. alpha) sol (count + 1) 315 | else 316 | let (t', p) = M.mutate_mh maxdepth sol.tree None in 317 | let sol' = gen_sol t' in 318 | let (best_sol', count) = 319 | if terminated sol' || (sol'.cost < !best_sol.cost) then (sol', 0) 320 | else (!best_sol, count) 321 | in 322 | best_sol := best_sol'; 323 | if replace temp sol sol' p then 324 | batch_loop temp sol' count (n + 1) 325 | else 326 | batch_loop temp sol count (n + 1) 327 | in 328 | loop t0 !best_sol 1 329 | end 330 | 331 | (** Select from a string which distance to use **) 332 | let of_string (module M : Mutations.MUTATOR) (module GD : VECDIST with type t = M.data) (module FD : VECDIST with type t = M.data) (module O : Oracle.ORACLE) = function 333 | | "ils" -> (module MkSearch (Iterated_local_search) (M) (GD) (FD) (O) : S) 334 | | "hc" -> (module MkSearch (Hill_climbing) (M) (GD) (FD) (O) : S) 335 | | "rw" -> (module MkSearch (Random_walk) (M) (GD) (FD) (O) : S) 336 | | "sa" -> (module MkSearch (Simulated_annealing) (M) (GD) (FD) (O) : S) 337 | | "mh" -> (module MkSearch (Metropolis_hastings) (M) (GD) (FD) (O) : S) 338 | | _ -> invalid_arg "Undefined heuristic option" 339 | --------------------------------------------------------------------------------