├── dune-project ├── src ├── analysis │ ├── domain │ │ ├── null_dom.mli │ │ ├── unit_dom.mli │ │ ├── shape │ │ │ ├── state.mli │ │ │ ├── dune │ │ │ ├── env.ml │ │ │ └── memloc.ml │ │ ├── dune │ │ ├── addr.ml │ │ ├── null_val.mli │ │ ├── array_bounds.mli │ │ ├── oct_array_bounds.mli │ │ ├── itv.mli │ │ ├── octagon.mli │ │ ├── unit_dom.ml │ │ ├── relation.ml │ │ ├── null_val.ml │ │ ├── product.ml │ │ └── abstract.ml │ ├── dune │ ├── query.ml │ ├── dsg.mli │ ├── context.ml │ └── daig.mli ├── syntax │ ├── dune │ ├── declared_fields.ml │ ├── alloc_site.ml │ ├── method_id.ml │ ├── class_hierarchy.ml │ └── ast.ml ├── synthetic_benchmarks │ ├── dune │ ├── random_edits.mli │ └── exec.ml ├── frontend │ ├── dune │ ├── src_file.mli │ ├── text_diff.mli │ ├── src_file.ml │ ├── tree.mli │ ├── tree_diff.mli │ ├── loc_map.mli │ ├── callgraph.mli │ ├── loc_map.ml │ ├── tree.ml │ ├── cfg_parser.mli │ ├── callgraph.ml │ └── text_diff.ml ├── dune ├── import.ml └── experiment_harness.ml ├── test_cases ├── java │ ├── SyntaxError.java │ ├── HelloWorld.java │ ├── Fields.java │ ├── While.java │ ├── While2.java │ ├── While3.java │ ├── HelloWorlds.java │ ├── HelloWorlds2.java │ ├── NestedClasses.java │ ├── Constructors.java │ ├── Conditional3.java │ ├── Conditional.java │ ├── Conditional4.java │ ├── ConditionalAtLoopExit.java │ ├── FieldInitializers.java │ ├── Conditional2.java │ ├── SuperMethodInvocation.java │ ├── BitwiseStuff.java │ ├── ForEach.java │ ├── Srh.java │ ├── TryWithResources.java │ ├── Break.java │ ├── Variadic.java │ ├── InstanceInitializer.java │ ├── NestedLoops.java │ ├── NestedLoops2.java │ ├── Procedures.java │ ├── Exceptions.java │ ├── CibaiExample │ │ └── MiniBag.java │ ├── MethodReferences.java │ ├── Switch.java │ ├── Literals.java │ └── Nullability.java ├── js │ ├── while_syntax.js │ ├── array_oob.js │ ├── arith_syntax.js │ ├── array_syntax.js │ ├── list_append.js │ ├── foo.js │ ├── functions.js │ ├── buckets_swap.js │ ├── buckets_contains.js │ ├── buckets_equals.js │ └── buckets_indexof.js ├── varargs.callgraph ├── srh.callgraph ├── diff │ ├── post │ │ └── Srh.java │ └── pre │ │ └── Srh.java ├── nullability.callgraph └── procedures.callgraph ├── experiment_inputs ├── junkdog-artemis-odb-71816517 │ ├── fail │ │ └── main │ └── pass │ │ └── main ├── junkdog-artemis-odb-74488108 │ ├── fail │ │ └── main │ └── pass │ │ └── main ├── mitreid-connect-OpenID-Connect-Java-Spring-Server-358117230 │ ├── fail │ │ └── callgraph │ └── pass │ │ └── callgraph ├── style_artifacts ├── davidmoten-rxjava-jdbc-172208959 │ ├── fail │ │ └── entry │ ├── pass │ │ └── entry │ └── query ├── raphw-byte-buddy-140517154 │ ├── fail │ │ ├── entry │ │ └── entry_alt │ └── pass │ │ ├── entry │ │ └── entry_alt ├── raphw-byte-buddy-140517155 │ ├── fail │ │ ├── entry │ │ └── entry_alt │ └── pass │ │ ├── entry │ │ └── entry_alt ├── raphw-byte-buddy-140517156 │ ├── fail │ │ ├── entry │ │ └── entry_alt │ └── pass │ │ ├── entry │ │ └── entry_alt ├── raphw-byte-buddy-140517158 │ ├── fail │ │ ├── entry │ │ └── entry_alt │ └── pass │ │ ├── entry │ │ └── entry_alt ├── raphw-byte-buddy-140517159 │ ├── fail │ │ ├── entry │ │ └── entry_alt │ └── pass │ │ ├── entry │ │ └── entry_alt ├── raphw-byte-buddy-148830162 │ ├── fail │ │ ├── entry │ │ └── entry_alt │ └── pass │ │ ├── entry │ │ └── entry_alt ├── raphw-byte-buddy-148830163 │ ├── fail │ │ ├── entry │ │ └── entry_alt │ └── pass │ │ ├── entry │ │ └── entry_alt ├── raphw-byte-buddy-148830165 │ ├── fail │ │ ├── entry │ │ └── entry_alt │ └── pass │ │ ├── entry │ │ └── entry_alt ├── raphw-byte-buddy-148830166 │ ├── fail │ │ ├── entry │ │ └── entry_alt │ └── pass │ │ ├── entry │ │ └── entry_alt ├── SpigotMC-BungeeCord-130330788 │ └── query ├── square-okhttp-95014919 │ └── query ├── fayder-restcountries-146970880 │ ├── fail │ │ ├── entry1 │ │ └── entry2 │ └── pass │ │ ├── entry1 │ │ └── entry2 ├── fayder-restcountries-146970881 │ ├── fail │ │ ├── entry1 │ │ └── entry2 │ └── pass │ │ ├── entry1 │ │ └── entry2 ├── fayder-restcountries-207869551 │ ├── fail │ │ ├── entry1 │ │ └── entry2 │ └── pass │ │ ├── entry1 │ │ └── entry2 ├── tananaev-traccar-164537301 │ └── query ├── tananaev-traccar-165995608 │ └── query ├── tananaev-traccar-188473749 │ └── query ├── tananaev-traccar-255051211 │ └── query ├── tananaev-traccar-64783123 │ └── query ├── vkostyukov-la4j-45524419 │ ├── pass │ │ └── .callgraph.swp │ └── query ├── apache-commons-lang-224267191 │ └── query ├── tananaev-traccar-191125671 │ └── query ├── raphw-byte-buddy-234970609 │ ├── query │ ├── fail │ │ └── compile.patch │ └── pass │ │ └── compile.patch ├── query_artifacts ├── raphw-byte-buddy-160374689 │ ├── pass │ │ └── compile.patch │ └── fail │ │ └── compile.patch ├── raphw-byte-buddy-202917180 │ ├── fail │ │ └── compile.patch │ └── pass │ │ └── compile.patch └── artifacts ├── .gitmodules ├── run_diagnostic ├── .gitignore ├── print_as_csv_row ├── run_experiments ├── scripts ├── average_queries ├── to_moving_average_per_edit.py ├── diff_counts.py ├── cdf_ktt.py ├── cdf.py ├── update_old_bugswarm_programs.py ├── cdf_full.py ├── scatter_config_ktt.py ├── scatter_config_small.py ├── scatter_config_full.py ├── scatter_config_large.py ├── bugswarm_filter.py └── artifact │ ├── kick_the_tires.sh │ ├── scalability_experiments_small.sh │ ├── scalability_experiments.sh │ ├── scalability_experiments_large.sh │ └── scalability_experiments_full.sh ├── diffCounts ├── run_synthetic_parallel ├── run_synthetic ├── dai.opam ├── callgraphSize ├── linesOfSourceCode ├── run_null_configs ├── run_callstring_configs ├── run_configs ├── Makefile ├── README.md ├── Dockerfile └── generate_table.py /dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 2.1) 2 | (allow_approximate_merlin) -------------------------------------------------------------------------------- /src/analysis/domain/null_dom.mli: -------------------------------------------------------------------------------- 1 | include Abstract.Dom 2 | -------------------------------------------------------------------------------- /src/analysis/domain/unit_dom.mli: -------------------------------------------------------------------------------- 1 | include Abstract.Dom 2 | -------------------------------------------------------------------------------- /src/analysis/domain/shape/state.mli: -------------------------------------------------------------------------------- 1 | include Domain.Abstract.Dom 2 | -------------------------------------------------------------------------------- /test_cases/java/SyntaxError.java: -------------------------------------------------------------------------------- 1 | \f . (\x . f (x x)) (\x . f (x x)) 2 | -------------------------------------------------------------------------------- /test_cases/js/while_syntax.js: -------------------------------------------------------------------------------- 1 | var x = 0 2 | while (x < 3) { 3 | x = x + 1 4 | } 5 | -------------------------------------------------------------------------------- /experiment_inputs/junkdog-artemis-odb-71816517/fail/main: -------------------------------------------------------------------------------- 1 | com.artemis.cli.CliApplication 2 | -------------------------------------------------------------------------------- /experiment_inputs/junkdog-artemis-odb-71816517/pass/main: -------------------------------------------------------------------------------- 1 | com.artemis.cli.CliApplication 2 | -------------------------------------------------------------------------------- /experiment_inputs/junkdog-artemis-odb-74488108/fail/main: -------------------------------------------------------------------------------- 1 | com.artemis.cli.CliApplication 2 | -------------------------------------------------------------------------------- /experiment_inputs/junkdog-artemis-odb-74488108/pass/main: -------------------------------------------------------------------------------- 1 | com.artemis.cli.CliApplication 2 | -------------------------------------------------------------------------------- /experiment_inputs/mitreid-connect-OpenID-Connect-Java-Spring-Server-358117230/fail/callgraph: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiment_inputs/mitreid-connect-OpenID-Connect-Java-Spring-Server-358117230/pass/callgraph: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiment_inputs/style_artifacts: -------------------------------------------------------------------------------- 1 | tananaev-traccar-191125671 2 | tananaev-traccar-165995608 3 | -------------------------------------------------------------------------------- /experiment_inputs/davidmoten-rxjava-jdbc-172208959/fail/entry: -------------------------------------------------------------------------------- 1 | com/github/davidmoten/rx/jdbc/Database 2 | -------------------------------------------------------------------------------- /experiment_inputs/davidmoten-rxjava-jdbc-172208959/pass/entry: -------------------------------------------------------------------------------- 1 | com/github/davidmoten/rx/jdbc/Database 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517154/fail/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517154/pass/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517155/fail/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517155/pass/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517156/fail/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517156/pass/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517158/fail/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517158/pass/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517159/fail/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517159/pass/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830162/fail/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830162/pass/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830163/fail/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830163/pass/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830165/fail/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830165/pass/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830166/fail/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830166/pass/entry: -------------------------------------------------------------------------------- 1 | net.bytebuddy.build.Plugin$Engine$Default#() 2 | -------------------------------------------------------------------------------- /test_cases/js/array_oob.js: -------------------------------------------------------------------------------- 1 | var a = [4, 8, 15, 16, 23, 42] 2 | 3 | var okay = a[2]; 4 | 5 | var err = a[6]; 6 | -------------------------------------------------------------------------------- /experiment_inputs/SpigotMC-BungeeCord-130330788/query: -------------------------------------------------------------------------------- 1 | net.md_5.bungee.ConnectionThrottle#throttle(InetAddress) 2 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "WALA-callgraph"] 2 | path = WALA-callgraph 3 | url = git@github.com:bennostein/WALA-callgraph.git 4 | -------------------------------------------------------------------------------- /experiment_inputs/square-okhttp-95014919/query: -------------------------------------------------------------------------------- 1 | com.squareup.okhttp.Call#getResponse(com.squareup.okhttp.Request,boolean) 2 | -------------------------------------------------------------------------------- /run_diagnostic: -------------------------------------------------------------------------------- 1 | _build/default/src/exec.exe -unit -diag _bugswarm2/$1/pass -edit _bugswarm2/$1/fail > diagnostic_results/$1 2 | -------------------------------------------------------------------------------- /experiment_inputs/davidmoten-rxjava-jdbc-172208959/query: -------------------------------------------------------------------------------- 1 | com.github.davidmoten.rx.jdbc.QueryUpdateOnSubscribe#call(Subscriber) 2 | -------------------------------------------------------------------------------- /experiment_inputs/fayder-restcountries-146970880/fail/entry1: -------------------------------------------------------------------------------- 1 | eu.fayder-restcountries-146970880/fail.restcountries.v2.rest.CountryRest 2 | -------------------------------------------------------------------------------- /experiment_inputs/fayder-restcountries-146970880/pass/entry1: -------------------------------------------------------------------------------- 1 | eu.fayder-restcountries-146970880/pass.restcountries.v2.rest.CountryRest 2 | -------------------------------------------------------------------------------- /experiment_inputs/fayder-restcountries-146970881/fail/entry1: -------------------------------------------------------------------------------- 1 | eu.fayder-restcountries-146970881/fail.restcountries.v2.rest.CountryRest 2 | -------------------------------------------------------------------------------- /experiment_inputs/fayder-restcountries-146970881/pass/entry1: -------------------------------------------------------------------------------- 1 | eu.fayder-restcountries-146970881/pass.restcountries.v2.rest.CountryRest 2 | -------------------------------------------------------------------------------- /experiment_inputs/fayder-restcountries-207869551/fail/entry1: -------------------------------------------------------------------------------- 1 | eu.fayder-restcountries-207869551/fail.restcountries.v2.rest.CountryRest 2 | -------------------------------------------------------------------------------- /experiment_inputs/fayder-restcountries-207869551/pass/entry1: -------------------------------------------------------------------------------- 1 | eu.fayder-restcountries-207869551/pass.restcountries.v2.rest.CountryRest 2 | -------------------------------------------------------------------------------- /experiment_inputs/tananaev-traccar-164537301/query: -------------------------------------------------------------------------------- 1 | org.traccar.protocol.SmokeyProtocolDecoder#decode(Channel,SocketAddress,Object) 2 | -------------------------------------------------------------------------------- /experiment_inputs/tananaev-traccar-165995608/query: -------------------------------------------------------------------------------- 1 | org.traccar.api.resource.StatisticsResource#get(java.lang.String,java.lang.String) 2 | -------------------------------------------------------------------------------- /experiment_inputs/tananaev-traccar-188473749/query: -------------------------------------------------------------------------------- 1 | org.traccar.protocol.GoSafeProtocolDecoder#decode(Channel,SocketAddress,Object) 2 | -------------------------------------------------------------------------------- /experiment_inputs/tananaev-traccar-255051211/query: -------------------------------------------------------------------------------- 1 | org.traccar.BaseProtocolDecoder#getDeviceSession(Channel,SocketAddress,String[]) 2 | -------------------------------------------------------------------------------- /experiment_inputs/fayder-restcountries-146970880/fail/entry2: -------------------------------------------------------------------------------- 1 | eu.fayder-restcountries-146970880/fail.restcountries.servlet.RestApplication 2 | -------------------------------------------------------------------------------- /experiment_inputs/fayder-restcountries-146970880/pass/entry2: -------------------------------------------------------------------------------- 1 | eu.fayder-restcountries-146970880/pass.restcountries.servlet.RestApplication 2 | -------------------------------------------------------------------------------- /experiment_inputs/fayder-restcountries-146970881/fail/entry2: -------------------------------------------------------------------------------- 1 | eu.fayder-restcountries-146970881/fail.restcountries.servlet.RestApplication 2 | -------------------------------------------------------------------------------- /experiment_inputs/fayder-restcountries-146970881/pass/entry2: -------------------------------------------------------------------------------- 1 | eu.fayder-restcountries-146970881/pass.restcountries.servlet.RestApplication 2 | -------------------------------------------------------------------------------- /experiment_inputs/fayder-restcountries-207869551/fail/entry2: -------------------------------------------------------------------------------- 1 | eu.fayder-restcountries-207869551/fail.restcountries.servlet.RestApplication 2 | -------------------------------------------------------------------------------- /experiment_inputs/fayder-restcountries-207869551/pass/entry2: -------------------------------------------------------------------------------- 1 | eu.fayder-restcountries-207869551/pass.restcountries.servlet.RestApplication 2 | -------------------------------------------------------------------------------- /test_cases/js/arith_syntax.js: -------------------------------------------------------------------------------- 1 | var x = 0 2 | 3 | var b = true 4 | 5 | if (b) { 6 | x += 1 7 | } else { 8 | x -= 1 9 | } 10 | -------------------------------------------------------------------------------- /test_cases/js/array_syntax.js: -------------------------------------------------------------------------------- 1 | var a = [4, 8, 15, 16, 23, 42] 2 | var third = a[2]; 3 | a[3] = a[5] * 2; 4 | var len = a.length; 5 | return a[0]; 6 | -------------------------------------------------------------------------------- /test_cases/js/list_append.js: -------------------------------------------------------------------------------- 1 | if (p == null) {return q;} 2 | r = p; 3 | while (r.next != null) 4 | r = r.next; 5 | r.next = q 6 | return p 7 | -------------------------------------------------------------------------------- /experiment_inputs/tananaev-traccar-64783123/query: -------------------------------------------------------------------------------- 1 | org.traccar.protocol.CastelProtocolDecoder#decode(ChannelHandlerContext,Channel,SocketAddress,Object) 2 | -------------------------------------------------------------------------------- /test_cases/java/HelloWorld.java: -------------------------------------------------------------------------------- 1 | class HelloWorld { 2 | public static void main(String[] args) { 3 | System.out.println("Hello World!"); 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /test_cases/varargs.callgraph: -------------------------------------------------------------------------------- 1 | CALLER: static Variadic#main(java.lang.String[]) 2 | CALLEE: static Variadic#sum(int[]) 3 | CALLER: static Variadic#sum(int[]) 4 | -------------------------------------------------------------------------------- /experiment_inputs/vkostyukov-la4j-45524419/pass/.callgraph.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cuplv/dai/HEAD/experiment_inputs/vkostyukov-la4j-45524419/pass/.callgraph.swp -------------------------------------------------------------------------------- /experiment_inputs/apache-commons-lang-224267191/query: -------------------------------------------------------------------------------- 1 | static org.apache.commons.lang3.reflect.MethodUtils#getMethodsWithAnnotation(java.lang.Class,java.lang.Class,boolean,boolean) 2 | -------------------------------------------------------------------------------- /src/syntax/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name syntax) 3 | (libraries dai) 4 | (inline_tests) 5 | (preprocess (pps ppx_inline_test ppx_sexp_conv ppx_sexp_value ppx_compare ppx_hash)) 6 | ) -------------------------------------------------------------------------------- /src/synthetic_benchmarks/dune: -------------------------------------------------------------------------------- 1 | (executable 2 | (name exec) 3 | (modules exec random_edits) 4 | (libraries dai frontend analysis domain syntax) 5 | (preprocess (pps ppx_let)) 6 | ) -------------------------------------------------------------------------------- /test_cases/java/Fields.java: -------------------------------------------------------------------------------- 1 | public class Fields { 2 | int x; 3 | 4 | final int y = 1; 5 | 6 | static int z = 2; 7 | 8 | static final int z2 = 3; 9 | } 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dai 2 | callstrings_dai 3 | _build 4 | **/.merlin 5 | out/* 6 | run_d1a_experiment 7 | **/*.dot 8 | **/*.png 9 | **/*.jar 10 | **/*.class 11 | _bugswarm* 12 | diagnostic* -------------------------------------------------------------------------------- /experiment_inputs/tananaev-traccar-191125671/query: -------------------------------------------------------------------------------- 1 | org.traccar.MainEventHandler#channelDisconnected(org.jboss.netty.channel.ChannelHandlerContext,org.jboss.netty.channel.ChannelStateEvent) 2 | -------------------------------------------------------------------------------- /src/frontend/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name frontend) 3 | (libraries dai syntax tree-sitter-lang.java patience_diff) 4 | (inline_tests) 5 | (preprocess (pps ppx_let ppx_inline_test ppx_sexp_conv))) 6 | -------------------------------------------------------------------------------- /test_cases/srh.callgraph: -------------------------------------------------------------------------------- 1 | CALLER: static foo.bar.Srh#main(java.lang.String[]) 2 | CALLEE: static foo.bar.Srh#p(int) 3 | CALLER: static foo.bar.Srh#p(int) 4 | CALLEE: static foo.bar.Srh#p(int) 5 | -------------------------------------------------------------------------------- /src/analysis/domain/shape/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name shape) 3 | (libraries dai domain) 4 | (inline_tests) 5 | (preprocess (pps ppx_let ppx_inline_test ppx_sexp_conv ppx_sexp_value ppx_compare ppx_hash))) -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517154/fail/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517154/pass/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517155/fail/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517155/pass/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517156/fail/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517156/pass/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517158/fail/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517158/pass/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517159/fail/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-140517159/pass/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830162/fail/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830162/pass/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830163/fail/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830163/pass/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830165/fail/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830165/pass/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830166/fail/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-148830166/pass/entry_alt: -------------------------------------------------------------------------------- 1 | net.bytebuddy.dynamic.scaffold.MethodRegistry$Default#prepare(InstrumentedType,Compiler,TypeValidation,VisibilityBridgeStrategy,LatentMatcher) 2 | -------------------------------------------------------------------------------- /src/analysis/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name analysis) 3 | (libraries dai domain frontend) 4 | (inline_tests) 5 | (preprocess (pps ppx_let ppx_inline_test ppx_sexp_conv ppx_sexp_value ppx_compare ppx_hash)) 6 | ) -------------------------------------------------------------------------------- /experiment_inputs/vkostyukov-la4j-45524419/query: -------------------------------------------------------------------------------- 1 | org.la4j.matrix.operation.ooplace.OoPlaceMatrixByVectorMultiplication#apply(org.la4j.matrix.sparse.RowMajorSparseMatrix,org.la4j.vector.sparse.SparseVector) 2 | -------------------------------------------------------------------------------- /test_cases/java/While.java: -------------------------------------------------------------------------------- 1 | class While { 2 | public static void main(String[] args) { 3 | int i = 0; 4 | while (i < 10) { 5 | System.out.println(i); 6 | i++; 7 | } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /test_cases/java/While2.java: -------------------------------------------------------------------------------- 1 | class While { 2 | public static void main(String[] args) { 3 | int i = 0; 4 | while (i < 10) { 5 | i++; 6 | System.out.println(i); 7 | } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /test_cases/java/While3.java: -------------------------------------------------------------------------------- 1 | class While { 2 | public static void main(String[] args) { 3 | int i = 0; 4 | while (i < 20) { 5 | System.out.println(i); 6 | i++; 7 | } 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /src/analysis/domain/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name domain) 3 | (libraries dai syntax apron apron.boxMPQ apron.octMPQ) 4 | (inline_tests) 5 | (preprocess (pps ppx_let ppx_inline_test ppx_sexp_conv ppx_sexp_value ppx_compare ppx_hash))) -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-234970609/query: -------------------------------------------------------------------------------- 1 | static net.bytebuddy.agent.ByteBuddyAgent#attach(java.io.File,net.bytebuddy.agent.ByteBuddyAgent.ProcessProvider,java.lang.String,net.bytebuddy.agent.ByteBuddyAgent.AttachmentProvider) 2 | -------------------------------------------------------------------------------- /test_cases/java/HelloWorlds.java: -------------------------------------------------------------------------------- 1 | class HelloWorld { 2 | public static void main(String[] args) { 3 | System.out.println("Hello World!"); 4 | System.out.println("Hola Mundo!"); 5 | System.out.println("Bonjour Monde!"); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /test_cases/java/HelloWorlds2.java: -------------------------------------------------------------------------------- 1 | class HelloWorld { 2 | public static void main(String[] args) { 3 | System.out.println("Hola Mundo!"); 4 | System.out.println("Hello World!"); 5 | System.out.println("Bonjour Monde!"); 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /src/analysis/domain/addr.ml: -------------------------------------------------------------------------------- 1 | open Dai 2 | open Import 3 | include Syntax.Alloc_site 4 | 5 | module Abstract = struct 6 | include Set [@@deriving sexp] 7 | 8 | let hash = seeded_hash 9 | 10 | let of_alloc_site = singleton 11 | end 12 | -------------------------------------------------------------------------------- /src/analysis/domain/null_val.mli: -------------------------------------------------------------------------------- 1 | (* open Syntax *) 2 | 3 | type nullness = Top | Null | NotNull | Bot 4 | 5 | include Abstract.Val with type t = nullness 6 | 7 | val is_null_or_bot : t -> bool 8 | 9 | val is_null_or_top : t -> bool 10 | -------------------------------------------------------------------------------- /test_cases/java/NestedClasses.java: -------------------------------------------------------------------------------- 1 | public class NestedClasses { 2 | class Inner { 3 | static int bar (int x) { 4 | return x + x; 5 | } 6 | } 7 | int foo () { 8 | int y = 7; 9 | return Inner.bar(y); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /print_as_csv_row: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | b=$(tail -n 1 $2/$1.batch | sed 's/.*: //') 4 | d=$(tail -n 1 $2/$1.dd | sed 's/.*: //') 5 | i=$(tail -n 1 $2/$1.incr | sed 's/.*: //') 6 | di=$(tail -n 1 $2/$1.ddincr | sed 's/.*: //') 7 | 8 | echo $1, $b, $d, $i, $di 9 | -------------------------------------------------------------------------------- /run_experiments: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | for i in {1..10}; do 4 | mkdir -p out/run$i 5 | echo "BEGIN RUN $i" 6 | cat experiment_inputs/query_artifacts | xargs -Iartifact ./run_configs artifact out/run$i 7 | echo "END RUN $i" 8 | done 9 | 10 | -------------------------------------------------------------------------------- /src/synthetic_benchmarks/random_edits.mli: -------------------------------------------------------------------------------- 1 | module G : module type of Analysis.Dsg.Make (Domain.Oct_array_bounds) 2 | 3 | val init : unit -> G.t 4 | 5 | val random_edit : G.t -> G.t 6 | 7 | val random_query : G.t -> G.t 8 | 9 | val exit_query : G.t -> G.t 10 | -------------------------------------------------------------------------------- /test_cases/js/foo.js: -------------------------------------------------------------------------------- 1 | var foo = 5; 2 | var bar = 6.2; 3 | var baz = null, 4 | quux = "hello world", 5 | cond = true; 6 | 7 | if (cond){ 8 | foo = foo + bar 9 | } else if (cond) { 10 | bar = foo * bar 11 | } else { 12 | foo += bar 13 | } 14 | -------------------------------------------------------------------------------- /test_cases/java/Constructors.java: -------------------------------------------------------------------------------- 1 | class Constructors { 2 | int x; 3 | int y; 4 | public Constructors(int i) { 5 | this.x = i; 6 | this.y = i; 7 | } 8 | public Constructors(int i, int j) { 9 | this.x = i; 10 | this.y = j; 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /test_cases/java/Conditional3.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | 3 | class Conditional { 4 | public static void main(String[] args) { 5 | Random r = new Random (); 6 | if (r.nextBoolean()) { 7 | System.out.println("Hello, world!"); 8 | } 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /src/analysis/domain/array_bounds.mli: -------------------------------------------------------------------------------- 1 | open Syntax 2 | 3 | include Abstract.Dom 4 | 5 | val is_safe : string -> Ast.Expr.t -> t -> bool option 6 | 7 | val array_accesses : Ast.Stmt.t -> (Ast.Expr.t * Ast.Expr.t) list 8 | 9 | val of_alist : (string * float * float) list -> t 10 | -------------------------------------------------------------------------------- /test_cases/js/functions.js: -------------------------------------------------------------------------------- 1 | function double(x) { 2 | return x + x; 3 | } 4 | 5 | function square(x) { 6 | var irrelevant_local = 42; 7 | return x * x; 8 | } 9 | 10 | var x = 42 11 | var five = 5; 12 | var twentyfive = square(5); 13 | var fifty = double(twentyfive); 14 | -------------------------------------------------------------------------------- /src/analysis/domain/oct_array_bounds.mli: -------------------------------------------------------------------------------- 1 | open Syntax 2 | 3 | include Abstract.Dom 4 | 5 | val is_safe : string -> Ast.Expr.t -> t -> bool option 6 | 7 | val array_accesses : Ast.Stmt.t -> (Ast.Expr.t * Ast.Expr.t) list 8 | 9 | val of_alist : (string * float * float) list -> t 10 | -------------------------------------------------------------------------------- /test_cases/java/Conditional.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | class Conditional { 3 | public static void main(String[] args) { 4 | Random r = new Random (); 5 | if (r.nextBoolean()) { 6 | System.out.println("Hello, world!"); 7 | } else { 8 | System.out.println("Farewell, world!"); 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /test_cases/java/Conditional4.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | class Conditional { 3 | public static void main(String[] args) { 4 | Random r = new Random (); 5 | if (r.nextInt(100) > 42) { 6 | System.out.println("Hello, world!"); 7 | } else { 8 | System.out.println("Farewell, world!"); 9 | } 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /test_cases/java/ConditionalAtLoopExit.java: -------------------------------------------------------------------------------- 1 | class ConditionalAtLoopExit { 2 | 3 | public static void main(String[] args) { 4 | int x = 10; 5 | while (x > 0) { 6 | x = x - 1; 7 | if (x % 2 == 0) 8 | System.out.println("I'm even!"); 9 | else 10 | System.out.println("I'm odd!"); 11 | } 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /test_cases/java/FieldInitializers.java: -------------------------------------------------------------------------------- 1 | public class FieldInitializers { 2 | int x = 5; 3 | 4 | Object o1 = new Object(); 5 | Object o2; 6 | static Object o3 = new Object(); 7 | static Object o4; 8 | 9 | FieldInitializers () { 10 | o2 = new Object(); 11 | } 12 | 13 | static { o4 = new Object(); } 14 | } 15 | -------------------------------------------------------------------------------- /experiment_inputs/query_artifacts: -------------------------------------------------------------------------------- 1 | davidmoten-rxjava-jdbc-172208959 2 | raphw-byte-buddy-234970609 3 | SpigotMC-BungeeCord-130330788 4 | square-okhttp-95014919 5 | tananaev-traccar-164537301 6 | tananaev-traccar-188473749 7 | tananaev-traccar-191125671 8 | tananaev-traccar-255051211 9 | tananaev-traccar-64783123 10 | vkostyukov-la4j-45524419 11 | -------------------------------------------------------------------------------- /test_cases/java/Conditional2.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | 3 | class Conditional { 4 | public static void main(String[] args) { 5 | Random r = new Random (); 6 | if (r.nextBoolean()) { 7 | System.out.println("Hello, beautiful world!"); 8 | } else { 9 | System.out.println("Farewell, world!"); 10 | } 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /test_cases/java/SuperMethodInvocation.java: -------------------------------------------------------------------------------- 1 | public class SuperMethodInvocation { 2 | 3 | public void callSuper() { 4 | System.out.println(super.hashCode()); 5 | } 6 | 7 | public static void main(String[] args) { 8 | SuperMethodInvocation obj = new SuperMethodInvocation(); 9 | obj.callSuper(); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /test_cases/java/BitwiseStuff.java: -------------------------------------------------------------------------------- 1 | class BitwiseStuff { 2 | public static void main(String[] args) { 3 | int x = 5, 4 | y = -2, 5 | z = 0; 6 | System.out.println("x: " + x); 7 | System.out.println("y: " + y); 8 | System.out.println("z: " + z); 9 | System.out.println("x & y: " + (x & y)); 10 | System.out.println("x | y: " + (x | y)); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /scripts/average_queries: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import statistics 4 | import sys 5 | 6 | def grouped(iterator, n): 7 | args=[iterator] * n 8 | return zip(*args) 9 | 10 | with open(sys.argv[1], "r") as infile, open(sys.argv[1] + ".avgs", "w") as outfile: 11 | for group in grouped(infile, 3): 12 | outfile.write(str(statistics.mean([float(x) for x in group])) + "\n") 13 | -------------------------------------------------------------------------------- /diffCounts: -------------------------------------------------------------------------------- 1 | # generate using scripts/diff_counts.py 2 | tananaev-traccar-64783123 : 4 3 | tananaev-traccar-164537301 : 8 4 | tananaev-traccar-188473749 : 4 5 | tananaev-traccar-255051211 : 4 6 | davidmoten-rxjava-jdbc-172208959 : 4 7 | SpigotMC-BungeeCord-130330788 : 4 8 | raphw-byte-buddy-234970609 : 56 9 | tananaev-traccar-191125671 : 8 10 | vkostyukov-la4j-45524419 : 20 11 | square-okhttp-95014919 : 134 12 | -------------------------------------------------------------------------------- /test_cases/java/ForEach.java: -------------------------------------------------------------------------------- 1 | import java.util.ArrayList; 2 | 3 | public class ForEach { 4 | public static void main(String[] args) { 5 | ArrayList myList = new ArrayList(); 6 | myList.add(1); 7 | myList.add(2); 8 | myList.add(3); 9 | for (int num : myList) { 10 | System.out.println(num); 11 | } 12 | System.out.println("Done"); 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /run_synthetic_parallel: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | DAI=./_build/default/src/synthetic_benchmarks/exec.exe 4 | N=2500 5 | JOBS=6 6 | 7 | # NOTE the following are just for chameleon cloud machines in the setup for TOPLAS revisions... 8 | DAI_ROOT=/home/cc/code/dai/ 9 | LD_LIBRARY_PATH=/home/cc/.opam/dai/share/apron/lib:/home/cc 10 | DAI=/home/cc/dai-synth 11 | 12 | parallel -j $JOBS ::: $DAI "$DAI -d" "$DAI -i" "$DAI -d -i" ::: $N ::: -s ::: $@ 13 | -------------------------------------------------------------------------------- /scripts/to_moving_average_per_edit.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import csv 4 | import sys 5 | import statistics 6 | 7 | qpe = int(sys.argv[1]) 8 | 9 | with sys.stdin as csvfile: 10 | data = csv.reader(csvfile, delimiter='\t') 11 | 12 | raw = [] 13 | 14 | for (idx,row) in enumerate(data): 15 | raw.append(float(row[0])) 16 | if len(raw) % qpe == 0: 17 | print( statistics.mean(raw[-qpe:])) 18 | -------------------------------------------------------------------------------- /src/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name dai) 3 | (public_name dai) 4 | (modules import) 5 | (libraries adapton core graphlib regular) 6 | (preprocess (pps ppx_let)) ; this preprocessor is not actually needed for compilation, but helps dune generate proper `.merlin` info to handle it for the executable stanza below 7 | ) 8 | 9 | (executable 10 | (name exec) 11 | (modules exec experiment_harness) 12 | (libraries dai frontend analysis domain) 13 | (preprocess (pps ppx_let)) 14 | ) -------------------------------------------------------------------------------- /src/frontend/src_file.mli: -------------------------------------------------------------------------------- 1 | type t = Tree_sitter_run.Src_file.t 2 | 3 | val path : t -> string option 4 | 5 | val lines : t -> string array 6 | 7 | val of_file : string -> t 8 | 9 | val line_offsets : t -> int list 10 | (** return the byte offsets to the first character of each line in the input program*) 11 | 12 | val read_fn : t -> int -> int -> int -> string option 13 | (** create a read function to be passed to tree-sitter (see TSInput struct of tree-sitter C API) *) 14 | -------------------------------------------------------------------------------- /run_synthetic: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env zsh 2 | 3 | SEED=$1 4 | N=$2 5 | ./_build/default/src/synthetic_benchmarks/exec.exe $N -seed $SEED 6 | ./_build/default/src/synthetic_benchmarks/exec.exe $N -seed $SEED -i 7 | ./_build/default/src/synthetic_benchmarks/exec.exe $N -seed $SEED -d 8 | ./_build/default/src/synthetic_benchmarks/exec.exe $N -seed $SEED -d -i 9 | ./scripts/average_queries out/log/dd_seed$SEED\_n$N\_qpe5.log 10 | ./scripts/average_queries out/log/incr_dd_seed$SEED\_n$N\_qpe5.log 11 | -------------------------------------------------------------------------------- /test_cases/java/Srh.java: -------------------------------------------------------------------------------- 1 | package foo.bar; 2 | // Example adapated from Sagiv,Reps,Horwitz '96 3 | // "Precise interprocedural dataflow analysis with applications to constant propagation" 4 | public class Srh { 5 | static int x = 0; 6 | public static void main(String[] args) { 7 | p(3); 8 | System.out.println(x); 9 | } 10 | static void p(int a) { 11 | if (a > 0) { 12 | a -= 2; 13 | p(a); 14 | a += 2; 15 | } 16 | x = -2 * a + 5; 17 | } 18 | } 19 | 20 | -------------------------------------------------------------------------------- /test_cases/java/TryWithResources.java: -------------------------------------------------------------------------------- 1 | import java.io.FileOutputStream; 2 | import java.io.IOException; 3 | 4 | class TryWithResources { 5 | public static void main(String[] args) { 6 | try(FileOutputStream fos = new FileOutputStream("test_outputfile")) { 7 | fos.write("Hello, world!".getBytes()); 8 | } catch (IOException e) { 9 | System.out.println("got an IOException"); 10 | } finally { 11 | System.out.println("in finally block"); 12 | } 13 | 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-160374689/pass/compile.patch: -------------------------------------------------------------------------------- 1 | diff --git a/pom.xml b/pom.xml 2 | index bfec215..7bff1cf 100644 3 | --- a/pom.xml 4 | +++ b/pom.xml 5 | @@ -43,7 +43,7 @@ 6 | byte-buddy-agent 7 | byte-buddy-android 8 | byte-buddy-maven-plugin 9 | - byte-buddy-gradle-plugin 10 | + 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-202917180/fail/compile.patch: -------------------------------------------------------------------------------- 1 | diff --git a/pom.xml b/pom.xml 2 | index 85f1f23..e8d2833 100644 3 | --- a/pom.xml 4 | +++ b/pom.xml 5 | @@ -43,7 +43,7 @@ 6 | byte-buddy-agent 7 | byte-buddy-android 8 | byte-buddy-maven-plugin 9 | - byte-buddy-gradle-plugin 10 | + 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-202917180/pass/compile.patch: -------------------------------------------------------------------------------- 1 | diff --git a/pom.xml b/pom.xml 2 | index 85f1f23..e8d2833 100644 3 | --- a/pom.xml 4 | +++ b/pom.xml 5 | @@ -43,7 +43,7 @@ 6 | byte-buddy-agent 7 | byte-buddy-android 8 | byte-buddy-maven-plugin 9 | - byte-buddy-gradle-plugin 10 | + 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-234970609/fail/compile.patch: -------------------------------------------------------------------------------- 1 | diff --git a/pom.xml b/pom.xml 2 | index 3d1cc29..7a93a9b 100644 3 | --- a/pom.xml 4 | +++ b/pom.xml 5 | @@ -43,7 +43,7 @@ 6 | byte-buddy-agent 7 | byte-buddy-android 8 | byte-buddy-maven-plugin 9 | - byte-buddy-gradle-plugin 10 | + 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-234970609/pass/compile.patch: -------------------------------------------------------------------------------- 1 | diff --git a/pom.xml b/pom.xml 2 | index 3d1cc29..7a93a9b 100644 3 | --- a/pom.xml 4 | +++ b/pom.xml 5 | @@ -43,7 +43,7 @@ 6 | byte-buddy-agent 7 | byte-buddy-android 8 | byte-buddy-maven-plugin 9 | - byte-buddy-gradle-plugin 10 | + 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /test_cases/diff/post/Srh.java: -------------------------------------------------------------------------------- 1 | package foo.bar; 2 | // Example adapated from Sagiv,Reps,Horwitz '96 3 | // "Precise interprocedural dataflow analysis with applications to constant propagation" 4 | public class Srh { 5 | static int x = 0; 6 | public static void main(String[] args) { 7 | int n = 5; 8 | p(n); 9 | System.out.println(x); 10 | } 11 | static void p(int a) { 12 | if (a > 0) { 13 | a -= 2; 14 | p(a); 15 | a += 2; 16 | } 17 | x = -2 * a + 5; 18 | } 19 | } 20 | 21 | -------------------------------------------------------------------------------- /test_cases/diff/pre/Srh.java: -------------------------------------------------------------------------------- 1 | package foo.bar; 2 | // Example adapated from Sagiv,Reps,Horwitz '96 3 | // "Precise interprocedural dataflow analysis with applications to constant propagation" 4 | public class Srh { 5 | static int x = 0; 6 | public static void main(String[] args) { 7 | int n = 3; 8 | p(n); 9 | System.out.println(x); 10 | } 11 | static void p(int a) { 12 | if (a > 0) { 13 | a -= 2; 14 | p(a); 15 | a += 2; 16 | } 17 | x = -2 * a + 5; 18 | } 19 | } 20 | 21 | -------------------------------------------------------------------------------- /test_cases/nullability.callgraph: -------------------------------------------------------------------------------- 1 | CALLER: static Nullability#main(java.lang.String[]) 2 | CALLEE: Nullability.MyObj#(Nullability.MyObj,Nullability.MyObj) 3 | CALLEE: static Nullability#foo(Nullability.MyObj) 4 | CALLEE: static Nullability#bar(Nullability.MyObj) 5 | CALLER: static Nullability#foo(Nullability.MyObj) 6 | CALLER: static Nullability#bar(Nullability.MyObj) 7 | CALLEE: Nullability.MyObj#doThing() 8 | CALLER: Nullability.MyObj#(Nullability.MyObj,Nullability.MyObj) 9 | CALLER: Nullability.MyObj#doThing() 10 | -------------------------------------------------------------------------------- /test_cases/java/Break.java: -------------------------------------------------------------------------------- 1 | public class Break { 2 | public static void main(String[] args) { 3 | outer: for (int i = 0; i < 3; i++) { 4 | System.out.println("Outer"); 5 | 6 | for (int j = 0; j < 2; j++) { 7 | System.out.println("Inner"); 8 | break outer; 9 | } 10 | 11 | System.out.println("Outer end"); 12 | } 13 | 14 | System.out.println("Almost Done"); 15 | System.out.println("Done"); 16 | } 17 | } 18 | 19 | -------------------------------------------------------------------------------- /test_cases/java/Variadic.java: -------------------------------------------------------------------------------- 1 | class Variadic { 2 | static int sum(int... xs){ 3 | int result = 0; 4 | for (int i = 0; i < xs.length ; i++) 5 | result += xs[i]; 6 | return result; 7 | } 8 | 9 | public static void main(String[] args) { 10 | int zero = sum(); 11 | int one = sum(1); 12 | int three = sum(1,2); 13 | int ten = sum(1,2,3,4); 14 | System.out.println("zero: " + zero); 15 | System.out.println("one: " + one); 16 | System.out.println("three: " + three); 17 | System.out.println("ten: " + ten); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /dai.opam: -------------------------------------------------------------------------------- 1 | opam-version: "2.0" 2 | maintainer: "Benno Stein " 3 | authors: "Benno Stein " 4 | dev-repo: "git://github.com/cuplv/dai.git" 5 | build: [ 6 | ["dune" "build" "-p" name "-j" jobs] 7 | ] 8 | depends: [ 9 | "ocaml" 10 | "core" 11 | "regular" 12 | "graphlib" 13 | "dune" {>= "2.1.2" build} 14 | "yojson" 15 | "apron" 16 | "patience_diff" 17 | "ocamlformat" {= "0.19.0" build} 18 | ] 19 | synopsis: "Demanded Abstract Interpretation" 20 | description: "Demanded Abstract Interpretation" 21 | -------------------------------------------------------------------------------- /test_cases/js/buckets_swap.js: -------------------------------------------------------------------------------- 1 | function swap(array, i, j) { 2 | var temp; 3 | 4 | if (i < 0 || i >= array.length || j < 0 || j >= array.length) { 5 | return false; 6 | } 7 | temp = array[i]; 8 | array[i] = array[j]; 9 | array[j] = temp; 10 | return true; 11 | }; 12 | 13 | var numberArray = [1, 8, 8, 8, 10, 10]; 14 | 15 | // test cases "swap only accepts valid positions" 16 | var test1 = swap(numberArray, 0, 5); 17 | var test2 = swap(numberArray, 0, 6); 18 | var test3 = swap(numberArray, 7, 2); 19 | var test4 = swap(numberArray, -1, 9); 20 | -------------------------------------------------------------------------------- /callgraphSize: -------------------------------------------------------------------------------- 1 | # generate with xargs -a experiment_inputs/query_artifacts -Iprog sh -c 'echo prog && sed "s/CALLER:\s*//" experiment_inputs/prog/pass/callgraph | sed "s/\sCALLEE:\s*//" | uniq | wc -l' | paste -s -d' \n' 2 | SpigotMC-BungeeCord-130330788 1599 3 | tananaev-traccar-188473749 8212 4 | tananaev-traccar-255051211 10048 5 | tananaev-traccar-64783123 2559 6 | tananaev-traccar-164537301 7379 7 | square-okhttp-95014919 3866 8 | davidmoten-rxjava-jdbc-172208959 1233 9 | raphw-byte-buddy-234970609 2920 10 | tananaev-traccar-191125671 8260 11 | vkostyukov-la4j-45524419 3558 12 | -------------------------------------------------------------------------------- /test_cases/procedures.callgraph: -------------------------------------------------------------------------------- 1 | CALLER: static Procedures#main(java.lang.String[]) 2 | CALLEE: Procedures#(int) 3 | CALLEE: Procedures#quux(int) 4 | CALLEE: java.io.PrintStream#println(int) 5 | CALLER: Procedures#(int) 6 | CALLEE: java.lang.Object#() 7 | CALLER: Procedures#quux(int) 8 | CALLEE: Procedures#bar() 9 | CALLEE: Procedures#foo(int) 10 | CALLEE: static Procedures#baz() 11 | CALLER: Procedures#bar() 12 | CALLER: Procedures#foo(int) 13 | CALLER: static Procedures#baz() 14 | CALLEE: static Procedures.Inner#baz() 15 | CALLER: static Procedures.Inner#baz() 16 | -------------------------------------------------------------------------------- /linesOfSourceCode: -------------------------------------------------------------------------------- 1 | # generate using xargs -t -a experiment_inputs/query_artifacts -Iprog sloc -f simple -k source _bugswarm/prog/pass | grep "ource" | sed "s/.*ource //" and careful editing 2 | SpigotMC-BungeeCord-130330788 : 15774 3 | tananaev-traccar-188473749 : 39377 4 | tananaev-traccar-255051211 : 63758 5 | tananaev-traccar-64783123 : 23455 6 | tananaev-traccar-164537301 : 36196 7 | square-okhttp-95014919 : 45679 8 | davidmoten-rxjava-jdbc-172208959 : 32308 9 | raphw-byte-buddy-234970609 : 147639 10 | tananaev-traccar-191125671 : 39622 11 | vkostyukov-la4j-45524419 : 15214 12 | -------------------------------------------------------------------------------- /src/frontend/text_diff.mli: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | 3 | type t = { 4 | prev_start_line : int; 5 | prev_length : int; 6 | next_start_line : int; 7 | new_lines : string array; 8 | } 9 | (** A single contiguous edit, removing [prev_size] lines at line [prev_start_line] and adding [new_lines] in their place. 10 | NOTA BENE: [prev_start_line] and [next_start_line] are 1-indexed per standard intuitions about line-numbering 11 | *) 12 | 13 | val btwn : prev:string array -> next:string array -> t list 14 | (** Compute the text diff between [prev] and [next], at the granularity of lines *) 15 | 16 | val pp : t pp 17 | -------------------------------------------------------------------------------- /test_cases/java/InstanceInitializer.java: -------------------------------------------------------------------------------- 1 | class InstanceInitializer { 2 | // Test for interactions between various ways to initialize a class instance. 3 | // x will be 0 first, then set to 10 by initializer, then to 50 by constructor. 4 | 5 | int x = 0; // field with default value 6 | 7 | { // instance initializer block 8 | x = x + 10; 9 | } 10 | 11 | InstanceInitializer() { // constructor 12 | x = x * 5; 13 | } 14 | 15 | public static void main(String[] args) { 16 | InstanceInitializer ii = new InstanceInitializer(); 17 | System.out.println("Done with instantiation; ii.x = " + ii.x); 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /experiment_inputs/artifacts: -------------------------------------------------------------------------------- 1 | SpigotMC-BungeeCord-130330788 2 | raphw-byte-buddy-95795966 3 | raphw-byte-buddy-95795967 4 | raphw-byte-buddy-95795968 5 | tananaev-traccar-255051211 6 | tananaev-traccar-64783123 7 | tananaev-traccar-162333127 8 | tananaev-traccar-164537301 9 | tananaev-traccar-165995608 10 | tananaev-traccar-185122214 11 | tananaev-traccar-188473749 12 | tananaev-traccar-191125671 13 | davidmoten-rxjava-jdbc-172208959 14 | square-okhttp-95014919 15 | apache-commons-lang-224267191 16 | raphw-byte-buddy-134720975 17 | raphw-byte-buddy-94017588 18 | raphw-byte-buddy-160374689 19 | raphw-byte-buddy-202917180 20 | raphw-byte-buddy-234970609 21 | -------------------------------------------------------------------------------- /test_cases/java/NestedLoops.java: -------------------------------------------------------------------------------- 1 | public class NestedLoops { 2 | int foo (int x) { 3 | for (int i = 0, result = 0; i < x; i++) 4 | for(int j = 0; j < i; j++) 5 | result += j; 6 | return result; 7 | } 8 | int baz (int x) { 9 | int result = 0, i=0; 10 | while (i < x) { 11 | int j = 0; 12 | while (j < i) { 13 | result += j; 14 | j++; 15 | } 16 | i++; 17 | } 18 | return result; 19 | } 20 | int quux (int x) { 21 | int result = 0, i=0; 22 | do{ 23 | int j = 0; 24 | do { 25 | result += j; 26 | j++; 27 | } while (j < i); 28 | i++; 29 | } while (i < x); 30 | return result; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /test_cases/java/NestedLoops2.java: -------------------------------------------------------------------------------- 1 | public class NestedLoops { 2 | int foo (int x) { 3 | for (int i = 0, result = 0; i < x; i++) 4 | for(int j = i; j > 0; j--) 5 | result += j; 6 | return result; 7 | } 8 | int baz (int x) { 9 | int result = 0, i=0; 10 | while (i < x) { 11 | int j = 0; 12 | while (j < i) { 13 | result += j; 14 | j++; 15 | } 16 | i++; 17 | } 18 | return result; 19 | } 20 | int quux (int x) { 21 | int result = 0, i=0; 22 | do{ 23 | int j = 0; 24 | do { 25 | result += j; 26 | j++; 27 | } while (j < i); 28 | i++; 29 | } while (i < x); 30 | return result; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/frontend/src_file.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | module Ts_src_file = Tree_sitter_run.Src_file 3 | 4 | type t = Ts_src_file.t 5 | 6 | let path = Ts_src_file.info >> function { path; _ } -> path 7 | 8 | let lines = Ts_src_file.lines 9 | 10 | let of_file = Ts_src_file.load_file 11 | 12 | let line_offsets = lines >> Array.map ~f:(String.length >> Int.succ) >> Array.to_list 13 | 14 | let read_fn file _byte_offset row col = 15 | if row < Array.length (lines file) then 16 | let line = Ts_src_file.safe_get_row file row ^ "\n" in 17 | let len = String.length line in 18 | if col < len then Some (String.sub line ~pos:col ~len:(len - col)) else None 19 | else None 20 | -------------------------------------------------------------------------------- /test_cases/java/Procedures.java: -------------------------------------------------------------------------------- 1 | public class Procedures { 2 | 3 | static class Inner { 4 | static int baz () { return 5; } 5 | } 6 | 7 | int n; 8 | 9 | Procedures(int nn) { 10 | n = nn; 11 | } 12 | 13 | int foo(int x) { 14 | return x * x + n; 15 | } 16 | 17 | int bar () { 18 | return 5 + n; 19 | } 20 | 21 | static int baz () { 22 | return 2 + Inner.baz(); 23 | } 24 | 25 | int quux(int y) { 26 | return foo(y + bar()) * baz(); 27 | } 28 | 29 | public static void main(String[] args){ 30 | Procedures ps = new Procedures(10); 31 | int result = ps.quux(30); 32 | System.out.println(result); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /test_cases/java/Exceptions.java: -------------------------------------------------------------------------------- 1 | class Exceptions { 2 | static class MyError extends Throwable {} 3 | 4 | static int foo(int x) { 5 | try { 6 | x = baz(x); 7 | } catch (MyError e) { 8 | x = 100; 9 | } finally { 10 | x = x + 5; 11 | } 12 | return x; 13 | } 14 | 15 | static int bar(int y) { 16 | try{ 17 | return baz(y); 18 | } 19 | catch(MyError e) { 20 | return 100; 21 | } 22 | } 23 | 24 | static int baz (int z) throws MyError { 25 | if (0 <= z && z < 10) return z; 26 | else throw new MyError(); 27 | } 28 | 29 | public static void main (String[] args) { 30 | int a = foo(5); 31 | int b = bar(a); 32 | int c = foo(b); 33 | int d = bar(c); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/frontend/tree.mli: -------------------------------------------------------------------------------- 1 | type t 2 | 3 | type java_cst = Tree_sitter_java.CST.program 4 | 5 | val parse : old_tree:t option -> file:Src_file.t -> (t, 'a) result 6 | (** parse the contents of [file], doing so incrementally if an [old_tree] is provided *) 7 | 8 | val apply : Text_diff.t list -> offsets:int list -> t -> t 9 | (** Update the in-memory representation of input tree with new offsets according to this (textual, not tree!) diff. 10 | This operation MUST be performed before [Parse.parse] can be invoked incrementally. 11 | *) 12 | 13 | val as_java_cst : Src_file.t -> t -> (java_cst, Tree_sitter_run.Tree_sitter_error.t list) result 14 | (** Convert a raw tree-sitter tree (opaque wrapper around C struct) to a typed OCaml-native concrete syntax tree *) 15 | -------------------------------------------------------------------------------- /src/analysis/domain/itv.mli: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | open Apron 3 | open Syntax 4 | 5 | include Abstract.Dom with type t = Box.t Abstract1.t 6 | 7 | val get_man : unit -> Box.t Manager.t 8 | 9 | val pp_interval : Interval.t pp 10 | 11 | val texpr_of_expr : 12 | ?fallback:(t -> Ast.Expr.t -> Texpr1.expr option) -> t -> Ast.Expr.t -> Texpr1.expr option 13 | 14 | val eval_texpr : t -> Texpr1.expr -> Interval.t 15 | 16 | val meet_with_constraint : ?fallback:(t -> Ast.Expr.t -> Texpr1.expr option) -> t -> Ast.Expr.t -> t 17 | 18 | val filter_env : t -> f:(string -> bool) -> t 19 | 20 | val assign : t -> Var.t -> Texpr1.expr -> t 21 | 22 | val weak_assign : t -> Var.t -> Texpr1.expr -> t 23 | 24 | val lookup : t -> Var.t -> Interval.t 25 | 26 | val forget : Var.t array -> t -> t 27 | 28 | val meet : t -> t -> t 29 | -------------------------------------------------------------------------------- /src/analysis/domain/octagon.mli: -------------------------------------------------------------------------------- 1 | open Apron 2 | open Syntax 3 | 4 | include Abstract.Dom with type t = Oct.t Abstract1.t 5 | 6 | val get_man : unit -> Oct.t Manager.t 7 | 8 | val change_environment : Oct.t Manager.t -> t -> Environment.t -> bool -> t 9 | 10 | val texpr_of_expr : 11 | ?fallback:(t -> Ast.Expr.t -> Texpr1.expr option) -> t -> Ast.Expr.t -> Texpr1.expr option 12 | 13 | val eval_texpr : t -> Texpr1.expr -> Interval.t 14 | 15 | val meet_with_constraint : ?fallback:(t -> Ast.Expr.t -> Texpr1.expr option) -> t -> Ast.Expr.t -> t 16 | 17 | val filter_env : t -> f:(string -> bool) -> t 18 | 19 | val assign : t -> Var.t -> Texpr1.expr -> t 20 | 21 | val weak_assign : t -> Var.t -> Texpr1.expr -> t 22 | 23 | val lookup : t -> Var.t -> Interval.t 24 | 25 | val forget : Var.t array -> t -> t 26 | 27 | val meet : t -> t -> t 28 | -------------------------------------------------------------------------------- /test_cases/java/CibaiExample/MiniBag.java: -------------------------------------------------------------------------------- 1 | class MiniBag { 2 | private int[] elements; 3 | private int top; 4 | 5 | class BoundError extends Throwable {} 6 | 7 | MiniBag(int initial) { 8 | top = 0; 9 | elements = new int[initial]; 10 | } 11 | 12 | int remove () throws BoundError { 13 | int r; 14 | if (top > 0 ) { 15 | top--; 16 | r = elements[top]; 17 | } else throw new BoundError(); 18 | return r; 19 | } 20 | 21 | void add(int i) { 22 | if (top < elements.length) { 23 | elements[top] = i; 24 | top++; 25 | } else throw new BoundError(); 26 | } 27 | void removeMinimum() { 28 | if(top > 0) { 29 | int min = 0, i; 30 | for(i=1;i elements[i]) 32 | min = i; 33 | elements[min] = elements[i=1]; top--; 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/analysis/domain/shape/env.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | 3 | type t = Memloc.t String.Map.t [@@deriving compare, equal] 4 | 5 | let pp fs (env : t) = 6 | let pp_assoc fs (k, v) = Format.fprintf fs "%s -> %a" k Memloc.pp v in 7 | String.Map.to_alist env |> List.pp ~pre:"{" ~suf:"}" ", " pp_assoc fs 8 | 9 | (* I'm sure there's a better way to expose String.Map functionality -- this is quick and dirty *) 10 | 11 | let add_exn = String.Map.add_exn 12 | 13 | let empty = String.Map.empty 14 | 15 | let find = String.Map.find 16 | 17 | let find_exn = String.Map.find_exn 18 | 19 | let fold = String.Map.fold 20 | 21 | let keys = String.Map.keys 22 | 23 | let mem = String.Map.mem 24 | 25 | let of_alist_exn = String.Map.of_alist_exn 26 | 27 | let remove = String.Map.remove 28 | 29 | let to_alist = String.Map.to_alist 30 | 31 | let update = String.Map.update 32 | -------------------------------------------------------------------------------- /experiment_inputs/raphw-byte-buddy-160374689/fail/compile.patch: -------------------------------------------------------------------------------- 1 | diff --git a/pom.xml b/pom.xml 2 | index bfec215..0e81682 100644 3 | --- a/pom.xml 4 | +++ b/pom.xml 5 | @@ -43,9 +43,15 @@ 6 | byte-buddy-agent 7 | byte-buddy-android 8 | byte-buddy-maven-plugin 9 | - byte-buddy-gradle-plugin 10 | + 11 | 12 | - 13 | + 14 | + 15 | + alimaven 16 | + aliyun maven 17 | + http://maven.aliyun.com/nexus/content/groups/public/ 18 | + 19 | + 20 | 21 | false 22 | false 23 | -------------------------------------------------------------------------------- /test_cases/java/MethodReferences.java: -------------------------------------------------------------------------------- 1 | import java.util.Arrays; 2 | 3 | public class MethodReferences { 4 | 5 | String fld; 6 | MethodReferences(String s){ 7 | fld = s; 8 | } 9 | 10 | String concat(String s){ 11 | return fld + s; 12 | } 13 | 14 | public static void main(String[] args) { 15 | // Per https://docs.oracle.com/javase/tutorial/java/javaOO/methodreferences.html, 16 | // there are four kinds of method reference: 17 | // 1. reference to a static method 18 | Arrays.stream(args).map(Integer::valueOf); 19 | // 2. reference to an instance method of a particular object 20 | MethodReferences mr = new MethodReferences("foobar"); 21 | Arrays.stream(args).map(mr::concat); 22 | // 3. Reference to an instance method of an arbitrary object of a particular type 23 | Arrays.sort(args, String::compareToIgnoreCase); 24 | // 4. Reference to a constructor 25 | Arrays.stream(args).map(MethodReferences::new); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/analysis/domain/unit_dom.ml: -------------------------------------------------------------------------------- 1 | open Dai 2 | open Import 3 | open Syntax 4 | 5 | type t = unit 6 | 7 | type stmt = Ast.Stmt.t [@@deriving compare, equal, sexp_of] 8 | 9 | let is_bot () = true 10 | 11 | let widen () () = () 12 | 13 | let join () () = () 14 | 15 | let implies () () = true 16 | 17 | let ( <= ) = implies 18 | 19 | let interpret _ () = () 20 | 21 | let init () = () 22 | 23 | let bottom () = () 24 | 25 | let top () = () 26 | 27 | let pp fs () = Format.fprintf fs "()" 28 | 29 | let sanitize () = () 30 | 31 | let show () = "()" 32 | 33 | let hash seed () = seed 34 | 35 | let compare () () = 0 36 | 37 | let equal () () = true 38 | 39 | let sexp_of_t () = Sexp.Atom "()" 40 | 41 | let t_of_sexp _ = () 42 | 43 | let hash_fold_t seed () = seed 44 | 45 | let call ~callee:_ ~callsite:_ ~caller_state:_ ~fields:_ = () 46 | 47 | let return ~callee:_ ~caller:_ ~callsite:_ ~caller_state:_ ~return_state:_ ~fields:_ = () 48 | 49 | let approximate_missing_callee ~caller_state:_ ~callsite:_ = () 50 | -------------------------------------------------------------------------------- /test_cases/java/Switch.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | 3 | class Switch { 4 | public static void main(String[] args) { 5 | Random r = new Random(); 6 | int n = r.nextInt(3); 7 | switch (n) { 8 | case 0: 9 | System.out.println("Small number"); 10 | break; 11 | case 1: 12 | System.out.println("Medium number"); 13 | break; 14 | case 2: 15 | System.out.println("Big number"); 16 | } 17 | switch (r.nextInt(10)) { 18 | case 0, 1: 19 | System.out.println("tiny"); 20 | case 2, 3: 21 | System.out.println("number"); 22 | break; 23 | case 4: 24 | case 5: 25 | default: 26 | System.out.println("middle number"); 27 | break; 28 | case 9: 29 | System.out.println("biggest number"); 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /run_null_configs: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo "RUNNING ANALYSIS CONFIGS ON $1" 3 | echo "BATCH" 4 | ./dai _bugswarm/$1/fail -edit _bugswarm/$1/pass -prev experiment_inputs/$1/fail/callgraph -next experiment_inputs/$1/pass/callgraph -null | grep EXPERIMENT > $2/$1.batch 5 | 6 | echo "INCREMENTAL" 7 | ./dai _bugswarm/$1/fail -edit _bugswarm/$1/pass -prev experiment_inputs/$1/fail/callgraph -next experiment_inputs/$1/pass/callgraph -incr -null | grep EXPERIMENT > $2/$1.incr 8 | 9 | if test -f "experiment_inputs/$1/query"; then 10 | echo "DEMAND-DRIVEN" 11 | ./dai _bugswarm/$1/fail -edit _bugswarm/$1/pass -prev experiment_inputs/$1/fail/callgraph -next experiment_inputs/$1/pass/callgraph -demand "$(cat experiment_inputs/$1/query)" -null | grep EXPERIMENT > $2/$1.dd 12 | echo "DEMAND-DRIVEN and INCREMENTAL" 13 | ./dai _bugswarm/$1/fail -edit _bugswarm/$1/pass -prev experiment_inputs/$1/fail/callgraph -next experiment_inputs/$1/pass/callgraph -demand "$(cat experiment_inputs/$1/query)" -incr -null | grep EXPERIMENT > $2/$1.ddincr 14 | fi 15 | -------------------------------------------------------------------------------- /run_callstring_configs: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo "RUNNING ANALYSIS CONFIGS ON $1 (CALLSTRINGS MODE)" 3 | echo "BATCH" 4 | ./callstrings_dai _bugswarm/$1/fail -edit _bugswarm/$1/pass -prev experiment_inputs/$1/fail/callgraph -next experiment_inputs/$1/pass/callgraph | grep EXPERIMENT > $2/$1.cs.batch 5 | 6 | echo "INCREMENTAL" 7 | ./callstrings_dai _bugswarm/$1/fail -edit _bugswarm/$1/pass -prev experiment_inputs/$1/fail/callgraph -next experiment_inputs/$1/pass/callgraph -incr | grep EXPERIMENT > $2/$1.cs.incr 8 | 9 | if test -f "experiment_inputs/$1/query"; then 10 | echo "DEMAND-DRIVEN" 11 | ./callstrings_dai _bugswarm/$1/fail -edit _bugswarm/$1/pass -prev experiment_inputs/$1/fail/callgraph -next experiment_inputs/$1/pass/callgraph -demand "$( $2/$1.cs.dd 12 | echo "DEMAND-DRIVEN and INCREMENTAL" 13 | ./callstrings_dai _bugswarm/$1/fail -edit _bugswarm/$1/pass -prev experiment_inputs/$1/fail/callgraph -next experiment_inputs/$1/pass/callgraph -demand "$( $2/$1.cs.ddincr 14 | fi 15 | -------------------------------------------------------------------------------- /test_cases/js/buckets_contains.js: -------------------------------------------------------------------------------- 1 | // buckets.arrays.indexOf, modified to remove dynamic "equalsFunction" equality operator and rewrite [for] to [while] 2 | function indexOf(array, item) { 3 | var length = array.length, 4 | i = 0; 5 | // for (i = 0; i < length; i += 1) { 6 | // if (equalsFunction(array[i], item)) { 7 | // return i; 8 | // } 9 | // } 10 | while ( i < length ) { 11 | if (array[i] === item) { 12 | return i; 13 | } 14 | i += 1; 15 | } 16 | return -1; 17 | }; 18 | 19 | // buckets.arrays.contains, modified to remove dynamic "equalsFunction" equality operator 20 | function contains(array, item) { 21 | var tmp = indexOf(array,item); 22 | return tmp >= 0; 23 | }; 24 | 25 | var numberArray = [1, 8, 8, 8, 10, 10]; 26 | 27 | //test cases "contains returns true for existing numbers" 28 | var test1 = contains(numberArray, 1); 29 | var test2 = contains(numberArray, 8); 30 | var test3 = contains(numberArray, 10); 31 | 32 | //test cases "contains returns false for non existing numbers" 33 | var test4 = contains(numberArray, 11); 34 | var test5 = contains([], 8); 35 | -------------------------------------------------------------------------------- /run_configs: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | echo "RUNNING ANALYSIS CONFIGS ON $1" 3 | 4 | echo "BATCH" 5 | /usr/bin/time -o $2/$1.batch.time ./dai _bugswarm/$1/fail -edit _bugswarm/$1/pass -prev experiment_inputs/$1/fail/callgraph -next experiment_inputs/$1/pass/callgraph | grep EXPERIMENT > $2/$1.batch 6 | echo "INCREMENTAL" 7 | /usr/bin/time -o $2/$1.incr.time ./dai _bugswarm/$1/fail -edit _bugswarm/$1/pass -prev experiment_inputs/$1/fail/callgraph -next experiment_inputs/$1/pass/callgraph -incr | grep EXPERIMENT > $2/$1.incr 8 | 9 | if test -f "experiment_inputs/$1/query"; then 10 | echo "DEMAND-DRIVEN" 11 | /usr/bin/time -o $2/$1.dd.time ./dai _bugswarm/$1/fail -edit _bugswarm/$1/pass -prev experiment_inputs/$1/fail/callgraph -next experiment_inputs/$1/pass/callgraph -demand "$( $2/$1.dd 12 | echo "DEMAND-DRIVEN and INCREMENTAL" 13 | /usr/bin/time -o $2/$1.ddincr.time ./dai _bugswarm/$1/fail -edit _bugswarm/$1/pass -prev experiment_inputs/$1/fail/callgraph -next experiment_inputs/$1/pass/callgraph -demand "$( $2/$1.ddincr 14 | fi 15 | 16 | -------------------------------------------------------------------------------- /test_cases/js/buckets_equals.js: -------------------------------------------------------------------------------- 1 | // buckets.defaultEquals 2 | function equalsFunction(a, b) { 3 | return a === b; 4 | }; 5 | 6 | // buckets.arrays.equals, modified to remove dynamic "equalsFunction" equality and rewrite [for] to [while] 7 | function equals (array1, array2) { 8 | var length = array1.length, 9 | i = 0; 10 | if (array1.length !== array2.length) { 11 | return false; 12 | } 13 | // for (i = 0; i < length; i += 1) { 14 | // if (!equalsFunction(array1[i], array2[i])) { 15 | // return false; 16 | // } 17 | // } 18 | 19 | while ( i < length ) { 20 | // var tmp = equalsFunction(array1[i], array2[i]); 21 | if (array1[i] === array2[i]) { 22 | return false; 23 | } 24 | i += 1; 25 | } 26 | return true; 27 | }; 28 | 29 | 30 | //test cases "equals returns true for matching number arrays" 31 | var a = [1, 8, 8, 8, 10, 10], 32 | b = [1, 8, 8, 8, 10, 10]; 33 | 34 | var test1 = equals(a, a); 35 | var test2 = equals(a, b); 36 | 37 | //test cases "equals returns false for non-matching number arrays" 38 | var a = [1, 8, 8, 8, 10, 10], 39 | c = [1, 8, 5, 8, 10, 10], 40 | d = [1, 8, 8, 8, 10]; 41 | 42 | var test3 = equals(a, []); 43 | var test4 = equals(a, c); 44 | var test5 = equals(a, d); 45 | var test6 = equals(a, []); 46 | 47 | -------------------------------------------------------------------------------- /test_cases/js/buckets_indexof.js: -------------------------------------------------------------------------------- 1 | // buckets.arrays.indexOf, modified to remove dynamic "equalsFunction" equality operator and rewrite [for] to [while] 2 | function indexOf(array, item) { 3 | var length = array.length, 4 | i = 0; 5 | // for (i = 0; i < length; i += 1) { 6 | // if (equalsFunction(array[i], item)) { 7 | // return i; 8 | // } 9 | // } 10 | while ( i < length ) { 11 | if (array[i] === item) { 12 | return i; 13 | } 14 | i += 1; 15 | } 16 | return -1; 17 | }; 18 | 19 | // Using singleton arrays instead of js objects, since we don't have objects in our toy language 20 | var a = [1]; 21 | var b = [8]; 22 | var c = [10]; 23 | var customObjectArray = [a, a, b, c]; 24 | var numberArray = [1, 8, 8, 8, 10, 10]; 25 | 26 | // test cases "indexOf gives the right index for valid numbers" 27 | var test1 = indexOf(numberArray, 1); 28 | var test2 = indexOf(numberArray, 8); 29 | var test3 = indexOf(numberArray, 10); 30 | 31 | // test cases "indexOf returns -1 when not found in number array" 32 | var test4 = indexOf(numberArray, 11); 33 | var test5 = indexOf([], 8); 34 | 35 | // test cases "indexOf with custom equals gives the right index for valid objects" 36 | var test = [1]; 37 | var test6 = indexOf(customObjectArray, test); 38 | test[0] = 8; 39 | var test7 = indexOf(numberArray, test); 40 | test[0] = 10; 41 | var test8 = indexOf(numberArray, test); 42 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # DAI_ROOT is set to the directory of this Makefile by default for all targets 2 | export DAI_ROOT=$(dir $(abspath $(lastword $(MAKEFILE_LIST)))) 3 | 4 | .PHONY: default 5 | default: build 6 | 7 | .PHONY: build 8 | build: fmt 9 | dune build 10 | cp _build/default/src/exec.exe ./dai 11 | 12 | .PHONY: test 13 | test: build 14 | dune runtest src 15 | 16 | .PHONY: clean 17 | clean: 18 | dune clean 19 | rm -f out/cfg/* out/log/* out/daig/* ./*.dot ./*.png 20 | 21 | .PHONY: fmt 22 | fmt: 23 | find src \ 24 | \( -name '*.ml' -o -name '*.mli' \) \ 25 | -exec ocamlformat {} -i --enable-outside-detected-project -m 100 \; 26 | 27 | .PHONY: repl 28 | repl: 29 | dune utop 30 | 31 | .PHONY: experiments 32 | experiments: build 33 | cat experiment_inputs/query_artifacts | xargs -Iartifact ./run_configs artifact out/log 34 | 35 | .PHONY: null_experiments 36 | null_experiments: build 37 | cat experiment_inputs/query_artifacts | xargs -Iartifact ./run_null_configs artifact out/log 38 | 39 | .PHONY: callstring_experiments 40 | callstring_experiments: build 41 | cat experiment_inputs/query_artifacts | xargs -Iartifact ./run_callstring_configs artifact out/log 42 | 43 | 44 | .PHONY: csv 45 | csv: 46 | echo NAME, BATCH, DEMAND, INCREMENTAL, DEMANDINCREMENTAL > out/experiments.csv 47 | cat experiment_inputs/query_artifacts | xargs -Ix ./print_as_csv_row x out/log >> out/experiments.csv 48 | -------------------------------------------------------------------------------- /scripts/diff_counts.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import os 4 | 5 | # This script queries BugSwarm for suitable pass/fail program pairs and pulls their source, into directory structure of the form: 6 | # ./_bugswarm//{pass, fail}/ 7 | # where is a unique identifier of each pass/fail program pair in the BugSwarm DB 8 | 9 | if len(sys.argv) < 2: 10 | print("ERROR: No API token passed via command-line argument.") 11 | sys.exit(1) 12 | 13 | from bugswarm.common.rest_api.database_api import DatabaseAPI 14 | 15 | bugswarmapi = DatabaseAPI(token=sys.argv[1]) 16 | 17 | api_filter = ( 18 | '{"lang":{"$in":["Java"]},' + # JAVA source language 19 | '"stability":"5/5",' + # non-flaky 20 | #'"classification.exceptions":["NullPointerException"],' + # NPE error type 21 | '"classification.code":"Yes",' + 22 | '"metrics.changes":{"$gt":0,"$lt":500}}' # diff touches at least 1 and at most 500 lines of code 23 | ) 24 | 25 | artifacts = bugswarmapi.filter_artifacts(api_filter) 26 | 27 | with open("experiment_inputs/query_artifacts") as f: 28 | progs = [prog.strip() for prog in f.readlines()] 29 | 30 | artifacts_of_interest = [art for art in artifacts if art['image_tag'] in progs] 31 | 32 | changes = [(art['image_tag'], sum(art['metrics'].values())) for art in artifacts_of_interest] 33 | 34 | for (prog, change) in changes: 35 | print(f"{prog} : {change}") 36 | 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Demanded Abstract Interpretation 2 | 3 | _Incremental_ and _demand-driven_ abstract interpretation framework in OCaml 4 | 5 | DAI requires: 6 | * OCaml version 4.09.0+ 7 | * OPAM version 2.0.7+ 8 | * Dune version 2.5.1+ 9 | * System packages: libgmp-dev libmpfr-dev (for APRON numerical domains) 10 | * [Adapton](https://github.com/plum-umd/adapton.ocaml) version 0.1-dev (pinned 11 | as a local OPAM package via `make install`, per its README) 12 | * tree-sitter OCaml bindings for Java, generated using 13 | [semgrep/ocaml-tree-sitter-core](https://github.com/semgrep/ocaml-tree-sitter-core) 14 | and 15 | [semgrep/ocaml-tree-sitter-languages](https://github.com/semgrep/ocaml-tree-sitter-languages). 16 | These are under active development so there has been some drift as this 17 | project is not currently actively maintained. We have pinned patched 18 | versions known to work with this project at the `dai` branch of forks at 19 | `https://github.com/bennostein/ocaml-tree-sitter-core` and 20 | `https://github.com/bennostein/ocaml-tree-sitter-languages`; check these out 21 | and follow their README instructions to generate the necessary package, then 22 | install with `(cd ocaml-tree-sitter-languages/lang/java/ocaml-src && dune 23 | install --root .)` 24 | 25 | Build with `make build` and run synthetic-edit experiments (as in PLDI 26 | [paper](https://arxiv.org/abs/2104.01270)) with `./run_d1a_experiment` 27 | -------------------------------------------------------------------------------- /src/analysis/domain/relation.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | 3 | module type Sig = sig 4 | type state 5 | 6 | type t = { pre : state; post : state } [@@deriving compare, sexp_of] 7 | 8 | val implies : t -> t -> bool 9 | 10 | include Comparator.S with type t := t 11 | 12 | module Set : sig 13 | type rel := t 14 | 15 | type t = (rel, comparator_witness) Set.t 16 | 17 | val empty : t 18 | 19 | val singleton : rel -> t 20 | 21 | val of_list : rel list -> t 22 | end 23 | end 24 | 25 | (** binary relation domain functor over state domains -- to be interpreted as Hoare triples *) 26 | module Make (State : Abstract.Dom) : Sig with type state := State.t = struct 27 | module T = struct 28 | type t = { pre : State.t; post : State.t } [@@deriving compare, sexp_of] 29 | 30 | (** Standard Hoare-logic rule of consequence *) 31 | let implies l r = State.implies r.pre l.pre && State.implies l.post r.post 32 | end 33 | 34 | module T_comparator = struct 35 | include Comparator.Make (T) 36 | include T 37 | end 38 | 39 | module Set = struct 40 | include (Set : module type of Set with type ('a, 'cmp) t := ('a, 'cmp) Set.t) 41 | 42 | type t = Set.M(T_comparator).t [@@deriving compare] 43 | 44 | let empty = Set.empty (module T_comparator) 45 | 46 | let singleton = Set.singleton (module T_comparator) 47 | 48 | let of_list = Set.of_list (module T_comparator) 49 | end 50 | 51 | include T_comparator 52 | end 53 | -------------------------------------------------------------------------------- /src/analysis/query.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | open Domain 3 | open Syntax 4 | 5 | module type Sig = sig 6 | type state 7 | 8 | type t = { fn : Cfg.Fn.t; is_exc : bool; entry_state : state } [@@deriving compare, sexp_of] 9 | 10 | val pp : t pp 11 | 12 | val subsumes : t -> t -> bool 13 | 14 | val exit_loc : t -> Cfg.Loc.t 15 | 16 | include Comparator.S with type t := t 17 | 18 | module Set : sig 19 | type qry := t 20 | 21 | type t = (qry, comparator_witness) Set.t 22 | end 23 | end 24 | 25 | module Make (Dom : Abstract.Dom) : Sig with type state := Dom.t = struct 26 | module T = struct 27 | type t = { fn : Cfg.Fn.t; is_exc : bool; entry_state : Dom.t } [@@deriving compare, sexp_of] 28 | 29 | let pp fs { fn; is_exc; entry_state } = 30 | Format.fprintf fs "QRY[%s(%a) with entry_state %a]" 31 | (if is_exc then "exc_exit" else "exit") 32 | Cfg.Fn.pp fn Dom.pp entry_state 33 | 34 | let subsumes new_qry old_qry = 35 | Cfg.Fn.equal new_qry.fn old_qry.fn 36 | && Bool.equal new_qry.is_exc old_qry.is_exc 37 | && Dom.(old_qry.entry_state <= new_qry.entry_state) 38 | 39 | let exit_loc { fn; is_exc; _ } = if is_exc then fn.exc_exit else fn.exit 40 | end 41 | 42 | module T_comparator = struct 43 | include Comparator.Make (T) 44 | include T 45 | end 46 | 47 | module Set = struct 48 | include (Set : module type of Set with type ('a, 'cmp) t := ('a, 'cmp) Set.t) 49 | 50 | type t = Set.M(T_comparator).t [@@deriving compare] 51 | end 52 | 53 | include T_comparator 54 | end 55 | -------------------------------------------------------------------------------- /scripts/cdf_ktt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # expect arguments:
4 | # where each is one analysis latency observation per row, rows 5 | 6 | import matplotlib as mpl 7 | mpl.use('Agg') 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | import csv 11 | import sys 12 | import statistics 13 | 14 | blue = '#1f77b4' 15 | orange = '#ff7f0e' 16 | green = '#2ca02c' 17 | red = '#d62728' 18 | 19 | observations = int(sys.argv[2]) 20 | 21 | ys = np.array(range(observations))/float(observations) 22 | 23 | batch = np.sort([float(line) for line in open(sys.argv[3])])/float(1000) 24 | incr = np.sort([float(line) for line in open(sys.argv[4])])/float(1000) 25 | dd = np.sort([float(line) for line in open(sys.argv[5])])/float(1000) 26 | dd_incr = np.sort([float(line) for line in open(sys.argv[6])])/float(1000) 27 | 28 | plt.rc('text') 29 | plt.rc('font', family='serif',size=16.0) 30 | plt.rc('legend', edgecolor='white',fontsize="x-large",handlelength=0,framealpha=0) 31 | 32 | plt.rc('axes',labelsize='x-large',linewidth=1.5,labelpad=-15) 33 | plt.rc('xtick.major',width=1.5) 34 | plt.rc('ytick.major',width=1.5) 35 | 36 | plt.rc('xtick',labelsize='large') 37 | plt.rc('ytick',labelsize='large') 38 | 39 | plt.axis([0,1,0.5,1]) 40 | 41 | plt.xlabel(r"Analysis Latency (sec)") 42 | 43 | plt.xticks([0,0.5,1]) 44 | plt.yticks([0.5,0.6,0.7,0.8,0.9,1.0]) 45 | 46 | plt.plot(batch,ys,color=blue) 47 | plt.plot(incr,ys,color=orange) 48 | plt.plot(dd,ys,color=green) 49 | plt.plot(dd_incr,ys,color=red) 50 | 51 | plt.savefig(sys.argv[1],dpi=400, bbox_inches='tight') 52 | 53 | -------------------------------------------------------------------------------- /scripts/cdf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # expect arguments:
4 | # where each is one analysis latency observation per line, line 5 | 6 | import matplotlib as mpl 7 | mpl.use('Agg') 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | import csv 11 | import sys 12 | import statistics 13 | 14 | blue = '#1f77b4' 15 | orange = '#ff7f0e' 16 | green = '#2ca02c' 17 | red = '#d62728' 18 | 19 | observations = int(sys.argv[2]) 20 | 21 | ys = np.array(range(observations))/float(observations) 22 | 23 | batch = np.sort([float(line) for line in open(sys.argv[3])])/float(1000) 24 | incr = np.sort([float(line) for line in open(sys.argv[4])])/float(1000) 25 | dd = np.sort([float(line) for line in open(sys.argv[5])])/float(1000) 26 | dd_incr = np.sort([float(line) for line in open(sys.argv[6])])/float(1000) 27 | 28 | plt.rc('text') 29 | plt.rc('font', family='serif',size=16.0) 30 | plt.rc('legend', edgecolor='white',fontsize="x-large",handlelength=0,framealpha=0) 31 | 32 | plt.rc('axes',labelsize='x-large',linewidth=1.5,labelpad=-15) 33 | plt.rc('xtick.major',width=1.5) 34 | plt.rc('ytick.major',width=1.5) 35 | 36 | plt.rc('xtick',labelsize='large') 37 | plt.rc('ytick',labelsize='large') 38 | 39 | plt.axis([0,5,0.5,1]) 40 | 41 | plt.xlabel(r"Analysis Latency (sec)") 42 | 43 | plt.xticks([0,1,2,3,4,5],label=True) 44 | plt.yticks([0.5,0.6,0.7,0.8,0.9,1.0]) 45 | 46 | plt.plot(batch,ys,color=blue) 47 | plt.plot(incr,ys,color=orange) 48 | plt.plot(dd,ys,color=green) 49 | plt.plot(dd_incr,ys,color=red) 50 | 51 | plt.savefig(sys.argv[1],dpi=400, bbox_inches='tight') 52 | 53 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:18.04 2 | 3 | ## Grab apt packages as root, including latest OPAM 4 | RUN apt-get update && \ 5 | apt-get install -y software-properties-common gcc libgmp-dev libmpfr-dev cmake m4 python3 python3-matplotlib && \ 6 | add-apt-repository ppa:avsm/ppa && \ 7 | apt-get update && \ 8 | apt-get install -y opam ocamlbuild && \ 9 | apt-get clean 10 | 11 | ## Create a new user 12 | RUN useradd -ms /bin/bash pldi && \ 13 | apt-get install -y sudo && \ 14 | adduser pldi sudo && \ 15 | echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers 16 | USER pldi 17 | WORKDIR /home/pldi 18 | 19 | ## Initialize OPAM, create a switch, and install dependencies (disable sandboxing inside of docker sandbox) 20 | RUN opam init --disable-sandboxing --bare && \ 21 | opam switch create d1a ocaml-variants.4.09.0+flambda && \ 22 | opam switch d1a && \ 23 | opam install -y ocamlfind num ppx_deriving apron core graphlib regular shell yojson && \ 24 | opam clean -a -c -s -r --logs && \ 25 | eval $(opam env) 26 | 27 | ## Pull source for adapton, build, and pin as local opam package 28 | RUN git clone https://github.com/plum-umd/adapton.ocaml.git && \ 29 | make -C adapton.ocaml install 30 | 31 | ## Pull source for d1a, build 32 | ARG D1A_VERSION=unknown 33 | RUN git clone https://github.com/cuplv/d1a_impl.git -b artifact && \ 34 | PATH=/home/pldi/.opam/d1a/bin:$PATH make -C d1a_impl build clean && \ 35 | mkdir -p d1a_impl/out/experiments d1a_impl/out/plots d1a_impl/out/daig d1a_impl/out/cfg d1a_impl/out/log && \ 36 | tar xf d1a_impl/semantic.tar.gz && \ 37 | sudo mv semantic /usr/bin 38 | 39 | RUN opam env >> /home/pldi/.bashrc 40 | 41 | CMD ["/bin/bash"] 42 | -------------------------------------------------------------------------------- /test_cases/java/Literals.java: -------------------------------------------------------------------------------- 1 | class Literals { 2 | public static void main(String[] args) { 3 | // various syntactic forms of literal 4 | // most examples drawn from the JLS, see here: 5 | // https://docs.oracle.com/javase/specs/jls/se7/html/jls-3.html#jls-3.10.4 6 | 7 | // bools 8 | boolean t = true; 9 | boolean f = false; 10 | 11 | // ints/longs 12 | int a = 42; // decimal 13 | int b = 0x2A; // hex 14 | int c = 052; // octal 15 | int d = 0b00101010; // binary 16 | // same as above, underscores should be ignored 17 | int a1 = 4_2; 18 | int b1 = 0x2_A; 19 | int c1 = 05_2; 20 | int d1 = 0b00_10_10_10; 21 | // same as above, l/L suffix denotes `long` type 22 | long a2 = 42l; 23 | long b2 = 0x2al; 24 | long c2 = 052l; 25 | long d2 = 0b00101010l; 26 | int binary64bit = 0b1010101010101010101010101010101010101010101010101010101010101010; 27 | int prefixed_octal = 0o12345; 28 | long int64_min_value = -9223372036854775808L; 29 | 30 | // floats/doubles 31 | float a3 = 1e1f; 32 | float b3 = 2.f; 33 | float c3 = .3f; 34 | float d3 = 0f; 35 | float e3 = 3.14f; 36 | float f3 = 6.022137e+23f; 37 | double g3 = 1e1; 38 | double h3 = 2.; 39 | double i3 = .3; 40 | double j3 = 0.0; 41 | double k3 = 3.14; 42 | double l3 = 1e-9d; 43 | double m3 = 1e137; 44 | 45 | // chars (java treats as UTF-16 value) 46 | char a4 = 'a'; // regular old ascii (= 97) 47 | char b4 = 'α'; // unicode \u03B1 48 | char c4 = 100; // ascii value of 'd' 49 | // char escape sequences 50 | char d4 = '\t'; 51 | char e4 = '\''; 52 | char f4 = '\\'; 53 | char g4 = '\n'; 54 | char h4 = '\r'; 55 | char i4 = '\"'; 56 | 57 | // strings 58 | String a5 = "foo"; 59 | String b5 = ""; 60 | String c5 = "\\\n"; 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /scripts/update_old_bugswarm_programs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import os 4 | 5 | # This script queries BugSwarm for suitable pass/fail program pairs and pulls their source, into directory structure of the form: 6 | # ./_bugswarm//{pass, fail}/ 7 | # where is a unique identifier of each pass/fail program pair in the BugSwarm DB 8 | 9 | if len(sys.argv) < 2: 10 | print("ERROR: No API token passed via command-line argument.") 11 | sys.exit(1) 12 | 13 | from bugswarm.common.rest_api.database_api import DatabaseAPI 14 | 15 | bugswarmapi = DatabaseAPI(token=sys.argv[1]) 16 | 17 | api_filter = ( 18 | '{"lang":{"$in":["Java"]},' + # JAVA source language 19 | '"stability":"5/5",' + # non-flaky 20 | '"classification.exceptions":["NullPointerException"],' + # NPE error type 21 | '"metrics.changes":{"$lt":1000}}' # diff touches at most 1000 lines of code 22 | ) 23 | 24 | artifacts = bugswarmapi.filter_artifacts(api_filter) 25 | 26 | print("found artifacts: " + str(len(artifacts))) 27 | 28 | if not os.path.isdir("_bugswarm/"): 29 | print("_bugswarm directory does not exist, attempting to create") 30 | os.mkdir("_bugswarm") 31 | 32 | for a in artifacts: 33 | print("IMAGE TAG: " + a["image_tag"]) 34 | print("fetching+checkouting trigger_sha's ...") 35 | image_dir = "_bugswarm/" + a["image_tag"] 36 | os.system("cd {}/pass && git fetch --depth 1 origin {} 2> /dev/null && git checkout {} && cd -".format(image_dir, a["passed_job"]["trigger_sha"], a["passed_job"]["trigger_sha"])) 37 | os.system("cd {}/fail && git fetch --depth 1 origin {} 2> /dev/null && git checkout {} && cd -".format(image_dir, a["failed_job"]["trigger_sha"], a["failed_job"]["trigger_sha"])) 38 | 39 | -------------------------------------------------------------------------------- /test_cases/java/Nullability.java: -------------------------------------------------------------------------------- 1 | import java.util.Random; 2 | // Some annotated test cases to understand the semantics of the nullability domain 3 | public class Nullability { 4 | static class MyObj { 5 | MyObj f; 6 | MyObj g; 7 | 8 | void doThing() { /* skip */ }; 9 | 10 | MyObj(MyObj ff, MyObj gg){ 11 | f = ff; 12 | g = gg; 13 | } 14 | } 15 | 16 | public static void main(String[] args) { 17 | MyObj o1 = null; 18 | MyObj o2 = new MyObj(o1, null); 19 | // should have: { o1 -> NULL ; o2 -> NONNULL ; o2.f -> NULL ; o2.g -> NULL} 20 | 21 | foo(o1); 22 | foo(o2); 23 | 24 | // should have: { o1 -> NULL ; o2 -> NONNULL ; o2.f -> NULL ; o2.g -> NULL} 25 | 26 | bar(o2); 27 | 28 | // should have: { o1 -> NULL ; o2 -> NONNULL ; o2.f -> NULL ; o2.g -> NULL} 29 | 30 | MyObj o3 = new MyObj(o2, o2); 31 | 32 | // should have: { o1 -> NULL ; o2 -> NONNULL ; o2.f -> NULL ; o2.g -> NULL ; o3 -> NONNULL ; o3.f -> NONNULL ; o3.g -> NONNULL} 33 | 34 | bar(o1); 35 | 36 | // should have: bottom 37 | } 38 | 39 | // assuming you ONLY pass the parameters' nullability and not their fields' nullability 40 | // at callsites, should have two summaries: 41 | // pre:{o -> NULL} post:{o -> NULL} 42 | // pre:{o -> NONNULL} post:{o -> NONNULL ; a -> TOP } 43 | // (let's do this for now, can look into passing fields' nullability if needed later) 44 | static void foo(MyObj o) { 45 | if (o != null) { 46 | Object a = o.f; 47 | } else { 48 | /*skip*/; 49 | } 50 | } 51 | 52 | //should have two summaries: 53 | // pre:{o -> NULL} post:bottom 54 | // pre:{o -> NONNULL} post:{o -> NONNULL} 55 | static void bar(MyObj o) { 56 | o.doThing(); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/frontend/tree_diff.mli: -------------------------------------------------------------------------------- 1 | open Dai 2 | open Syntax 3 | open Tree_sitter_java 4 | 5 | type edit = 6 | | Add_function of { 7 | method_id : Method_id.t; 8 | decl : CST.class_body_declaration; 9 | init_info : init_info; 10 | } 11 | | Delete_function of { method_id : Method_id.t } 12 | | Modify_function of { method_id : Method_id.t; new_header : CST.method_header } 13 | | Add_statements of { method_id : Method_id.t; at_loc : Cfg.Loc.t; stmts : CST.statement list } 14 | | Modify_statements of { 15 | method_id : Method_id.t; 16 | from_loc : Cfg.Loc.t; 17 | to_loc : Cfg.Loc.t; 18 | new_stmts : CST.statement list; 19 | } 20 | | Modify_header of { 21 | method_id : Method_id.t; 22 | prev_loc_ctx : Loc_map.loc_ctx; 23 | next_stmt : CST.statement; 24 | loop_body_exit : Cfg.Loc.t option; 25 | } 26 | | Delete_statements of { method_id : Method_id.t; from_loc : Cfg.Loc.t; to_loc : Cfg.Loc.t } 27 | 28 | and init_info = { instance_init : CST.program option; field_decls : CST.field_declaration list } 29 | 30 | val method_id_of_edit : edit -> Method_id.t 31 | 32 | type t = edit list 33 | 34 | val pp : t Import.pp 35 | 36 | val btwn : Loc_map.t -> prev:Tree.java_cst -> next:Tree.java_cst -> t 37 | 38 | val apply : 39 | Class_hierarchy.t -> t -> Loc_map.t -> Cfg.t Cfg.Fn.Map.t -> Loc_map.t * Cfg.t Cfg.Fn.Map.t 40 | 41 | type cfg_edit_result = { 42 | cfg : Cfg.t; 43 | new_loc_map : Loc_map.t; 44 | added_edges : Cfg_parser.edge list; 45 | deleted_edges : Cfg.G.edge list; 46 | added_loc : Cfg.Loc.t option; 47 | added_for_loop_backedge : Cfg_parser.edge option; 48 | } 49 | 50 | val apply_edit : edit -> Loc_map.t -> Cfg.t -> ret:Cfg.Loc.t -> exc:Cfg.Loc.t -> cfg_edit_result 51 | -------------------------------------------------------------------------------- /src/frontend/loc_map.mli: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | open Syntax 3 | open Cfg 4 | 5 | type loc_ctx = { entry : Loc.t; exit : Loc.t; ret : Loc.t; exc : Loc.t } 6 | (** the necessary context to build a CFG region for some code: an entry location, exit location, and containing-method return location and exceptional-exit location *) 7 | 8 | type t 9 | (** a cache containing the CFG-location context for each statement of a parse tree *) 10 | 11 | val empty : t 12 | (** an empty cache *) 13 | 14 | type 'a or_collision = [ `Ok of 'a | `Collision ] 15 | 16 | val add : Method_id.t -> Tree_sitter_java.CST.statement -> loc_ctx -> t -> t or_collision 17 | (** add [loc_ctx] for a [statement] in a method identified by the [Method_id.t]. 18 | If that statement is syntactically identical to another statement in the method, return [`Collision]. 19 | *) 20 | 21 | val get : Method_id.t -> Tree_sitter_java.CST.statement -> t -> loc_ctx 22 | (** get the [loc_ctx] for some [statement] in a method identified by the [Method_id.t] *) 23 | 24 | val remove : Method_id.t -> Tree_sitter_java.CST.statement -> t -> t 25 | (** remove the [loc_ctx] for some [statement] in a method identified by the [Method_id.t] *) 26 | 27 | val remove_fn : t -> Method_id.t -> t 28 | (** remove the [loc_ctx] for an entire method identified by the [Method_id.t] *) 29 | 30 | val remove_region : Method_id.t -> Syntax.Cfg.Loc.Set.t -> t -> t 31 | (** remove all cached [loc_ctx]'s within the given CFG region (for a method identified by the [Method_id.t]) *) 32 | 33 | val rebase_edges : Method_id.t -> old_src:Loc.t -> new_src:Loc.t -> t -> t 34 | (** for any [loc_ctx] where [entry]==[old_src], change [entry] to [new_src] (for handling statement insertion edits) *) 35 | 36 | val union : t -> t -> t 37 | 38 | val pp : t pp 39 | -------------------------------------------------------------------------------- /src/analysis/domain/shape/memloc.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | 3 | module T : sig 4 | type t [@@deriving equal, compare, hash, sexp] 5 | 6 | val fresh : unit -> t 7 | 8 | val pp : t pp 9 | 10 | val null : t 11 | 12 | val of_int : int -> t 13 | 14 | val to_string : t -> string 15 | end = struct 16 | type t = int [@@deriving equal, compare, hash, sexp] 17 | 18 | let next = ref 0 19 | 20 | let fresh () = 21 | let curr = !next in 22 | next := curr + 1; 23 | curr 24 | 25 | let null = -1 26 | 27 | let of_int i = 28 | next := Int.max !next (succ i); 29 | i 30 | 31 | let pp fs a = if equal a null then Format.fprintf fs "null" else Format.fprintf fs "a%i" a 32 | 33 | let to_string a = 34 | Format.fprintf Format.str_formatter "%a" pp a; 35 | Format.flush_str_formatter () 36 | end 37 | 38 | include T 39 | 40 | module T_comp : sig 41 | type t = T.t 42 | 43 | type comparator_witness 44 | 45 | val comparator : (t, comparator_witness) Comparator.t 46 | end = struct 47 | include T 48 | include Comparable.Make (T) 49 | end 50 | 51 | module Map = struct 52 | include (Map : module type of Map with type ('key, 'value, 'cmp) t := ('key, 'value, 'cmp) Map.t) 53 | 54 | type 'v t = 'v Map.M(T_comp).t 55 | 56 | let empty = Map.empty (module T_comp) 57 | end 58 | 59 | module Set = struct 60 | include (Set : module type of Set with type ('a, 'cmp) t := ('a, 'cmp) Set.t) 61 | 62 | type t = Set.M(T_comp).t [@@deriving compare] 63 | 64 | let empty = Set.empty (module T_comp) 65 | 66 | let singleton = Set.singleton (module T_comp) 67 | end 68 | 69 | module Labelled_pair = struct 70 | type t = T.t * T.t * string [@@deriving equal, compare, hash, sexp] 71 | 72 | let pp fs (x, y, lbl) = Format.fprintf fs "%s(%a,%a)" lbl T.pp x T.pp y 73 | end 74 | -------------------------------------------------------------------------------- /src/analysis/domain/null_val.ml: -------------------------------------------------------------------------------- 1 | open Dai 2 | open Import 3 | open Syntax 4 | 5 | (* use Make_env_with_heap to get a domain *) 6 | type nullness = Top | Null | NotNull | Bot [@@deriving hash, equal, sexp, compare] 7 | 8 | type t = nullness [@@deriving hash, equal, sexp, compare] 9 | 10 | let is_null_or_bot = function Null | Bot -> true | _ -> false 11 | 12 | let is_null_or_top = function Null | Top -> true | _ -> false 13 | 14 | let join = function 15 | | Top -> fun _ -> Top 16 | | Bot -> fun n2 -> n2 17 | | Null -> ( function Top -> Top | Null -> Null | NotNull -> Top | Bot -> Null) 18 | | NotNull -> ( function Top -> Top | Null -> Top | NotNull -> NotNull | Bot -> NotNull) 19 | 20 | let widen = join 21 | 22 | let implies n1 n2 = 23 | match (n1, n2) with 24 | | Bot, _ -> true 25 | | _, Top -> true 26 | | NotNull, NotNull -> true 27 | | Null, Null -> true 28 | | _, _ -> false 29 | 30 | let ( <= ) = implies 31 | 32 | (* Because after the operation, we get a nonnull value, or an exception, which is an error *) 33 | let eval_unop _uop _n = NotNull 34 | 35 | let eval_binop _n1 _binop _n2 = NotNull 36 | 37 | let of_lit = function Ast.Lit.Null -> Null | _ -> NotNull 38 | 39 | let models nullness lit = 40 | match nullness with 41 | | Bot -> false 42 | | Top -> true 43 | | Null -> phys_equal lit Ast.Lit.Null 44 | | NotNull -> not (phys_equal lit Ast.Lit.Null) 45 | 46 | let truthiness = function Null | Bot -> `Neither | _ -> `Either 47 | 48 | (* boilerplate *) 49 | let sanitize = Fn.id 50 | 51 | let pp fs = function 52 | | Top -> Format.pp_print_string fs "Top" 53 | | Bot -> Format.pp_print_string fs "Bot" 54 | | Null -> Format.pp_print_string fs "Null" 55 | | NotNull -> Format.pp_print_string fs "NotNull" 56 | 57 | let show x = 58 | pp Format.str_formatter x; 59 | Format.flush_str_formatter () 60 | 61 | let hash = seeded_hash 62 | -------------------------------------------------------------------------------- /src/analysis/domain/product.ml: -------------------------------------------------------------------------------- 1 | open Dai 2 | open Import 3 | 4 | module Make (L : Abstract.Dom) (R : Abstract.Dom) : Abstract.Dom = struct 5 | type t = L.t * R.t [@@deriving compare, equal, hash, sexp] 6 | 7 | let pp fs (l, r) = Format.fprintf fs "(%a, %a)" L.pp l R.pp r 8 | 9 | let top () = (L.top (), R.top ()) 10 | 11 | let bottom () = (L.bottom (), R.bottom ()) 12 | 13 | let init () = (L.init (), R.init ()) 14 | 15 | let sanitize (l, r) = (L.sanitize l, R.sanitize r) 16 | 17 | let show x = Format.asprintf "%a" pp x 18 | 19 | let hash seed (l, r) = seeded_hash (seeded_hash seed l) r 20 | 21 | let hash_fold_t h x = Ppx_hash_lib.Std.Hash.fold_int h (hash 31 x) 22 | 23 | let interpret stmt (l, r) = 24 | let l = L.interpret stmt l in 25 | let r = R.interpret stmt r in 26 | (l, r) 27 | 28 | let implies (l1, r1) (l2, r2) = L.implies l1 l2 && R.implies r1 r2 29 | 30 | let ( <= ) = implies 31 | 32 | let join (l1, r1) (l2, r2) = (L.join l1 l2, R.join r1 r2) 33 | 34 | let widen (l1, r1) (l2, r2) = (L.widen l1 l2, R.widen r1 r2) 35 | 36 | let is_bot (l, r) = L.is_bot l || R.is_bot r 37 | 38 | let call ~callee ~callsite ~caller_state:(l, r) ~fields = 39 | ( L.call ~callee ~callsite ~caller_state:l ~fields, 40 | R.call ~callee ~callsite ~caller_state:r ~fields ) 41 | 42 | let return ~callee ~caller ~callsite ~caller_state ~return_state ~fields = 43 | ( L.return ~callee ~caller ~callsite ~caller_state:(fst caller_state) 44 | ~return_state:(fst return_state) ~fields, 45 | R.return ~callee ~caller ~callsite ~caller_state:(snd caller_state) 46 | ~return_state:(snd return_state) ~fields ) 47 | 48 | let approximate_missing_callee ~caller_state:(l, r) ~callsite = 49 | ( L.approximate_missing_callee ~caller_state:l ~callsite, 50 | R.approximate_missing_callee ~caller_state:r ~callsite ) 51 | end 52 | -------------------------------------------------------------------------------- /src/syntax/declared_fields.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | 3 | type fields = { static : String.Set.t; instance : String.Set.t } 4 | 5 | type t = fields String.Map.t 6 | (** for each application class, keep a set of instance field names on that class *) 7 | 8 | let pp_fields fs flds = 9 | let pp_set = Set.pp String.pp in 10 | Format.fprintf fs "static: %a; instance: %a" pp_set flds.static pp_set flds.instance 11 | 12 | let pp : t pp = Map.pp String.pp pp_fields 13 | 14 | let empty = String.Map.empty 15 | 16 | let merge : t -> t -> t = 17 | Map.merge ~f:(fun ~key:_ -> function 18 | | `Right v | `Left v -> Some v | `Both _ -> failwith "class name collision") 19 | 20 | let lookup_cid ~class_id = 21 | flip Map.find class_id >> function 22 | | Some fs -> fs 23 | | None -> { static = String.Set.empty; instance = String.Set.empty } 24 | 25 | let lookup ~package ~class_name = 26 | let class_id = String.(concat ~sep:"." package ^ "." ^ class_name) in 27 | lookup_cid ~class_id 28 | 29 | let lookup_static ~package ~class_name fs = 30 | lookup ~package ~class_name fs |> fun { static; _ } -> static 31 | 32 | let lookup_instance ~package ~class_name fs = 33 | lookup ~package ~class_name fs |> fun { instance; _ } -> instance 34 | 35 | let pointwise_union fs1 fs2 = 36 | let static = String.Set.union fs1.static fs2.static in 37 | let instance = String.Set.union fs1.instance fs2.instance in 38 | { static; instance } 39 | 40 | let add_cid ~class_id ~fields df = 41 | lookup_cid df ~class_id |> pointwise_union fields |> fun data -> 42 | String.Map.set df ~key:class_id ~data 43 | 44 | let add ~package ~class_name ~fields : t -> t = 45 | let class_id = String.(concat ~sep:"." package ^ "." ^ class_name) in 46 | add_cid ~class_id ~fields 47 | 48 | let current_fields = ref empty 49 | 50 | let set_current_fields (fields : t) = current_fields := fields 51 | 52 | let get_current_fields () = !current_fields 53 | -------------------------------------------------------------------------------- /src/frontend/callgraph.mli: -------------------------------------------------------------------------------- 1 | open Syntax 2 | 3 | type forward_t 4 | (** regular callgraphs, mapping callers to callees *) 5 | 6 | type reverse_t 7 | (** reverse callgraphs, mapping callees to callers -- this can be done with a regular callgraph, but 8 | it is expensive so we hide the operation behind this opaque type to ensure that the callgraph is 9 | reversed once-and-for-all rather than each time callers are needed *) 10 | 11 | type scc 12 | (** strongly-connected components of a callgraph, to detect mutual recursion *) 13 | 14 | type t = { forward : forward_t; reverse : reverse_t; scc : scc option } 15 | 16 | val empty : t 17 | 18 | val add : caller:Cfg.Fn.t -> callee:Cfg.Fn.t -> t -> t 19 | 20 | val deserialize : fns:Cfg.Fn.t list -> Src_file.t -> t 21 | (** deserialize a forward callgraph as in [deserialize_forward], and construct the reversed version and strongly-connected components partition*) 22 | 23 | val deserialize_forward : fns:Cfg.Fn.t list -> Src_file.t -> forward_t 24 | (** construct an internal representation of a serialized callgraph generated using github.com/bennostein/WALA-callgraph; 25 | (use the provided pool of [fns] to resolve callee [Method_id]'s to corresponding [Cfg.Fn]'s at deserialization-time rather 26 | than keeping a Method_id -> Method_id callgraph and resolving method_id's to fn's at analysis-time 27 | *) 28 | 29 | val filter : fns:Cfg.Fn.t list -> Src_file.t -> unit 30 | (** write to stdout the provided serialized callgraph, stripped of any edges for which there is no corresponding function in [fns] *) 31 | 32 | val callees : callsite:Ast.Stmt.t -> caller_method:Method_id.t -> cg:forward_t -> Cfg.Fn.t list 33 | 34 | val is_syntactically_compatible : Ast.Stmt.t -> Cfg.Fn.t -> bool 35 | 36 | val is_mutually_recursive : scc option -> Cfg.Fn.t -> Cfg.Fn.t -> bool 37 | 38 | val methods_mutually_recursive : scc option -> Method_id.t -> Method_id.t -> bool 39 | 40 | val reverse : fns:Cfg.Fn.t list -> forward_t -> reverse_t 41 | 42 | val callers : callee_method:Method_id.t -> reverse_cg:reverse_t -> Cfg.Fn.t list 43 | 44 | val dump_dot : filename:string -> forward_t -> unit 45 | -------------------------------------------------------------------------------- /src/analysis/dsg.mli: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | open Domain 3 | open Frontend 4 | open Syntax 5 | 6 | (** Demanded Summarization Graph : per-procedure DAIGs interoperating to perform summary-based interprocedural analysis *) 7 | module Make (Dom : Abstract.Dom) : sig 8 | module Dom : module type of Abstract.DomWithDataStructures (Dom) 9 | 10 | module D : Daig.Sig with type absstate := Dom.t 11 | (** underlying intra-procedural DAIGs *) 12 | 13 | module Q : Query.Sig with type state := Dom.t 14 | (** summary queries *) 15 | 16 | (*module R : Relation.Sig with type state := Dom.t*) 17 | (** relations over abstract states, representing summaries *) 18 | 19 | type t = (Cfg.t * D.t Dom.Map.t) Cfg.Fn.Map.t 20 | 21 | val print_summaries : ?num_summaries:int -> t pp 22 | 23 | val print_stats : t pp 24 | 25 | val apply_edit : cha:Class_hierarchy.t -> diff:Tree_diff.t -> Loc_map.t -> t -> Loc_map.t * t 26 | (** apply a syntactic edit to a DSG, updating a Loc_map in the process *) 27 | 28 | val init : cfgs:Cfg.t Cfg.Fn.Map.t -> t 29 | (** construct the initial DSG for some collection of procedure CFGs *) 30 | 31 | val add_exn : cfgs:Cfg.t Cfg.Fn.Map.t -> t -> t 32 | (** add some new cfgs to this DSG *) 33 | 34 | val fns : t -> Cfg.Fn.t list 35 | 36 | val dump_dot : filename:string -> ?num_daigs:int -> t -> unit 37 | (** dump a DOT representation of a DSG to [filename] *) 38 | 39 | val query : 40 | fn:Cfg.Fn.t -> 41 | entry_state:Dom.t -> 42 | loc:Cfg.Loc.t -> 43 | cg:Callgraph.t -> 44 | fields:Declared_fields.t -> 45 | t -> 46 | Dom.t * t 47 | (** query for the abstract state at some [loc] under some [entry_state] precondition *) 48 | 49 | val loc_only_query : 50 | fn:Cfg.Fn.t -> 51 | loc:Cfg.Loc.t -> 52 | cg:Callgraph.t -> 53 | fields:Declared_fields.t -> 54 | entrypoints:Cfg.Fn.t list -> 55 | t -> 56 | Dom.t list * t 57 | (** query for the abstract state at some [loc] under _any_ reachable [entry_state] precondition, 58 | exploring back to the specified [entrypoints] 59 | *) 60 | 61 | val drop_daigs : t -> t 62 | 63 | val dirty_interproc_deps : ?ctx:Dom.t -> Cfg.Fn.t -> t -> t 64 | 65 | val f0_daigs : t -> D.t list 66 | 67 | val check_deps : t -> unit 68 | end 69 | -------------------------------------------------------------------------------- /src/syntax/alloc_site.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | 3 | module T : sig 4 | type t [@@deriving equal, compare, hash, sexp] 5 | 6 | include Adapton.Data.S with type t := t 7 | 8 | val hash : int -> t -> int 9 | 10 | val fresh : unit -> t 11 | 12 | val reset : unit -> unit 13 | 14 | val sanitize : t -> t 15 | 16 | val pp : t pp 17 | 18 | val of_int : int -> t 19 | 20 | val of_varargs : Method_id.t -> t 21 | end = struct 22 | type t = int [@@deriving equal, compare, hash, sexp] 23 | 24 | let next = ref 0 25 | 26 | let hash = seeded_hash 27 | 28 | let fresh () = 29 | let curr = !next in 30 | next := curr + 1; 31 | curr 32 | 33 | let pp fs a = Format.fprintf fs "alloc_%i" a 34 | 35 | let reset () = next := 0 36 | 37 | let sanitize x = x 38 | 39 | let show x = 40 | Format.( 41 | pp str_formatter x; 42 | flush_str_formatter ()) 43 | 44 | let of_int = Fn.id 45 | 46 | let varargs_alloc_sites : t Method_id.Map.t ref = ref Method_id.Map.empty 47 | 48 | let of_varargs method_ = 49 | match Map.find !varargs_alloc_sites method_ with 50 | | Some alloc_site -> alloc_site 51 | | None -> 52 | let alloc_site = fresh () in 53 | varargs_alloc_sites := Map.add_exn !varargs_alloc_sites ~key:method_ ~data:alloc_site; 54 | alloc_site 55 | end 56 | 57 | module T_comparator = struct 58 | include T 59 | include Comparator.Make (T) 60 | end 61 | 62 | include T_comparator 63 | 64 | module Set = struct 65 | include (Set : module type of Set with type ('a, 'cmp) t := ('a, 'cmp) Set.t) 66 | 67 | type t = Set.M(T_comparator).t [@@deriving compare, equal, hash, sexp] 68 | 69 | let empty = Set.empty (module T_comparator) 70 | 71 | let singleton = Set.singleton (module T_comparator) 72 | 73 | let sanitize xs = xs 74 | 75 | let pp fs = to_list >> (List.pp ", " ~pre:"{" ~suf:"}" T.pp) fs 76 | 77 | let show x = 78 | pp Format.str_formatter x; 79 | Format.flush_str_formatter () 80 | end 81 | 82 | module Map = struct 83 | include ( 84 | Base.Map : 85 | module type of Base.Map with type ('key, 'value, 'cmp) t := ('key, 'value, 'cmp) Base.Map.t) 86 | 87 | type 'v t = 'v Base.Map.M(T_comparator).t 88 | 89 | let empty = Base.Map.empty (module T_comparator) 90 | end 91 | -------------------------------------------------------------------------------- /scripts/cdf_full.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # expect arguments:
4 | # where each is a file with one analysis latency observation per line, lines 5 | 6 | import matplotlib as mpl 7 | mpl.use('Agg') 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | import csv 11 | import sys 12 | import statistics 13 | 14 | blue = '#1f77b4' 15 | orange = '#ff7f0e' 16 | green = '#2ca02c' 17 | red = '#d62728' 18 | 19 | 20 | batch_observations = int(sys.argv[3]) 21 | batch_ys = np.array(range(batch_observations))/float(batch_observations) 22 | incr_observations = int(sys.argv[5]) 23 | incr_ys = np.array(range(incr_observations))/float(incr_observations) 24 | dd_observations = int(sys.argv[7]) 25 | dd_ys = np.array(range(dd_observations))/float(dd_observations) 26 | dd_incr_observations = int(sys.argv[9]) 27 | dd_incr_ys = np.array(range(dd_incr_observations))/float(dd_incr_observations) 28 | 29 | batch = np.sort([float(line) for line in open(sys.argv[2])])/float(1000) 30 | incr = np.sort([float(line) for line in open(sys.argv[4])])/float(1000) 31 | dd = np.sort([float(line) for line in open(sys.argv[6])])/float(1000) 32 | dd_incr = np.sort([float(line) for line in open(sys.argv[8])])/float(1000) 33 | 34 | plt.rc('text') 35 | plt.rc('font', family='serif',size=16.0) 36 | plt.rc('legend', edgecolor='white',fontsize="x-large",handlelength=0,framealpha=0) 37 | 38 | plt.rc('axes',labelsize='x-large',linewidth=1.5,labelpad=-15) 39 | plt.rc('xtick.major',width=1.5) 40 | plt.rc('ytick.major',width=1.5) 41 | 42 | plt.rc('xtick',labelsize='large') 43 | plt.rc('ytick',labelsize='large') 44 | 45 | plt.yticks([0.6,0.7,0.8,0.9,1.0]) 46 | plt.xlabel(r"Analysis Latency (sec)") 47 | 48 | ## LINEAR X AXIS 49 | plt.axis([0,60,0.6,1]) 50 | plt.xticks([0,15,30,45,60]) 51 | plt.plot(batch,batch_ys,color=blue) 52 | plt.plot(incr,incr_ys,color=orange) 53 | plt.plot(dd,dd_ys,color=green) 54 | plt.plot(dd_incr,dd_incr_ys,color=red) 55 | 56 | ## LOG X AXIS 57 | #plt.axis([1.0,100,0.6,1]) 58 | #plt.semilogx(batch,batch_ys,color=blue) 59 | #plt.semilogx(incr,incr_ys,color=orange) 60 | #plt.semilogx(dd,dd_ys,color=green) 61 | #plt.semilogx(dd_incr,dd_incr_ys,color=red) 62 | 63 | plt.savefig(sys.argv[1],dpi=400, bbox_inches='tight') 64 | 65 | -------------------------------------------------------------------------------- /scripts/scatter_config_ktt.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Expects to run as ./scatter_config_ktt.py 4 | # where: 5 | # - output-file is a .png to output the scatter plot to 6 | # - config is in {batch, dd, incr, dd_incr} and controls the color/label of the plot 7 | # - columns is the number of columns in the input data 8 | # - input data (on stdin) is a csv where each column is the log output by one run of the experimental harness (../run_d1a_experiment) 9 | 10 | import matplotlib as mpl 11 | mpl.use('Agg') 12 | import matplotlib.pyplot as plt 13 | import csv 14 | import sys 15 | import statistics 16 | 17 | blue = '#1f77b4' 18 | orange = '#ff7f0e' 19 | green = '#2ca02c' 20 | red = '#d62728' 21 | 22 | output_file = sys.argv[1] 23 | if sys.argv[2] == "batch": 24 | color = blue 25 | label = "Batch" 26 | elif sys.argv[2] == "dd": 27 | color = green 28 | label = "Demand-Driven" 29 | elif sys.argv[2] == "incr": 30 | color = orange 31 | label = "Incremental" 32 | elif sys.argv[2] == "dd_incr": 33 | color = red 34 | label = "Incremental \& Demand-Driven" 35 | else: 36 | raise Exception("Unknown configuration: " + sys.argv[2]) 37 | runs = int(sys.argv[3]) 38 | 39 | with sys.stdin as csvfile: 40 | data = csv.reader(csvfile, delimiter=',') 41 | x_coords = [] 42 | 43 | raw = [[] for i in range(runs)] 44 | 45 | for idx,row in enumerate(data): 46 | x_coords.append(int(idx)) 47 | for i in range(runs): 48 | try: 49 | raw[i].append(float(row[i])) 50 | except: 51 | raw[i].append(0.0) 52 | 53 | plt.rc('text') 54 | plt.rc('font', family='serif',size=16.0) 55 | plt.rc('legend', edgecolor='white',fontsize="x-large",handlelength=0,framealpha=0) 56 | 57 | plt.rc('axes',labelsize='x-large',linewidth=1.5,labelpad=-15.0) 58 | plt.rc('xtick.major',width=1.5) 59 | plt.rc('ytick.major',width=1.5) 60 | 61 | plt.rc('xtick',labelsize='large') 62 | plt.rc('ytick',labelsize='large') 63 | 64 | #linear axes 65 | #plt.axis([0,3000,0,30000]) 66 | plt.axis([0,100,0,1000]) 67 | 68 | plt.ylabel(r"Analysis Time (ms)") 69 | plt.xlabel(r"Cumulative Program Edits") 70 | 71 | plt.xticks([0,50,100]) 72 | plt.yticks([0,500,1000]) 73 | 74 | for i in range(runs): 75 | plt.scatter(x_coords, raw[i], s=0.08, alpha=0.3, color=color,marker=",") 76 | 77 | plt.plot([1], [1],color=color, label=label) 78 | 79 | plt.savefig(output_file, dpi=400, bbox_inches='tight') 80 | 81 | -------------------------------------------------------------------------------- /scripts/scatter_config_small.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Expects to run as ./scatter_config.py 4 | # where: 5 | # - output-file is a .png to output the scatter plot to 6 | # - config is in {batch, dd, incr, dd_incr} and controls the color/label of the plot 7 | # - columns is the number of columns in the input data 8 | # - input data (on stdin) is a csv where each column is the log output by one run of the experimental harness (../run_d1a_experiment) 9 | 10 | import matplotlib as mpl 11 | mpl.use('Agg') 12 | import matplotlib.pyplot as plt 13 | import csv 14 | import sys 15 | import statistics 16 | 17 | blue = '#1f77b4' 18 | orange = '#ff7f0e' 19 | green = '#2ca02c' 20 | red = '#d62728' 21 | 22 | output_file = sys.argv[1] 23 | if sys.argv[2] == "batch": 24 | color = blue 25 | label = "Batch" 26 | elif sys.argv[2] == "dd": 27 | color = green 28 | label = "Demand-Driven" 29 | elif sys.argv[2] == "incr": 30 | color = orange 31 | label = "Incremental" 32 | elif sys.argv[2] == "dd_incr": 33 | color = red 34 | label = "Incremental \& Demand-Driven" 35 | else: 36 | raise Exception("Unknown configuration: " + sys.argv[2]) 37 | runs = int(sys.argv[3]) 38 | 39 | with sys.stdin as csvfile: 40 | data = csv.reader(csvfile, delimiter=',') 41 | x_coords = [] 42 | 43 | raw = [[] for i in range(runs)] 44 | 45 | for idx,row in enumerate(data): 46 | x_coords.append(int(idx)) 47 | for i in range(runs): 48 | try: 49 | raw[i].append(float(row[i])) 50 | except: 51 | raw[i].append(0.0) 52 | 53 | plt.rc('text') 54 | plt.rc('font', family='serif',size=16.0) 55 | plt.rc('legend', edgecolor='white',fontsize="x-large",handlelength=0,framealpha=0) 56 | 57 | plt.rc('axes',labelsize='x-large',linewidth=1.5,labelpad=-15.0) 58 | plt.rc('xtick.major',width=1.5) 59 | plt.rc('ytick.major',width=1.5) 60 | 61 | plt.rc('xtick',labelsize='large') 62 | plt.rc('ytick',labelsize='large') 63 | 64 | #linear axes 65 | #plt.axis([0,3000,0,30000]) 66 | plt.axis([0,500,0,10000]) 67 | 68 | plt.ylabel(r"Analysis Time (ms)") 69 | plt.xlabel(r"Cumulative Program Edits") 70 | 71 | plt.xticks([0,250,500],label=True) 72 | plt.yticks([0,5000,10000],label=True) 73 | 74 | for i in range(runs): 75 | plt.scatter(x_coords, raw[i], s=0.08, alpha=0.3, color=color,marker=",") 76 | 77 | plt.plot([1], [1],color=color, label=label) 78 | 79 | plt.savefig(output_file, dpi=400) 80 | 81 | -------------------------------------------------------------------------------- /scripts/scatter_config_full.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Expects to run as ./scatter_config_full.py 4 | # where: 5 | # - output-file is a .png to output the scatter plot to 6 | # - config is in {batch, dd, incr, dd_incr} and controls the color/label of the plot 7 | # - columns is the number of columns in the input data 8 | # - input data (on stdin) is a csv where each column is the log output by one run of the experimental harness (../run_d1a_experiment) 9 | 10 | import matplotlib as mpl 11 | mpl.use('Agg') 12 | import matplotlib.pyplot as plt 13 | import csv 14 | import sys 15 | import statistics 16 | 17 | blue = '#1f77b4' 18 | orange = '#ff7f0e' 19 | green = '#2ca02c' 20 | red = '#d62728' 21 | 22 | output_file = sys.argv[1] 23 | if sys.argv[2] == "batch": 24 | color = blue 25 | label = "Batch" 26 | elif sys.argv[2] == "dd": 27 | color = green 28 | label = "Demand-Driven" 29 | elif sys.argv[2] == "incr": 30 | color = orange 31 | label = "Incremental" 32 | elif sys.argv[2] == "dd_incr": 33 | color = red 34 | label = "Incremental \& Demand-Driven" 35 | else: 36 | raise Exception("Unknown configuration: " + sys.argv[2]) 37 | runs = int(sys.argv[3]) 38 | 39 | with sys.stdin as csvfile: 40 | data = csv.reader(csvfile, delimiter=',') 41 | x_coords = [] 42 | 43 | raw = [[] for i in range(runs)] 44 | 45 | for idx,row in enumerate(data): 46 | x_coords.append(int(idx)) 47 | for i in range(runs): 48 | try: 49 | raw[i].append(float(row[i])) 50 | except: 51 | raw[i].append(0.0) 52 | 53 | plt.rc('text') 54 | plt.rc('font', family='serif',size=16.0) 55 | plt.rc('legend', edgecolor='white',fontsize="x-large",handlelength=0,framealpha=0) 56 | 57 | plt.rc('axes',labelsize='x-large',linewidth=1.5,labelpad=-15.0) 58 | plt.rc('xtick.major',width=1.5) 59 | plt.rc('ytick.major',width=1.5) 60 | 61 | plt.rc('xtick',labelsize='large') 62 | plt.rc('ytick',labelsize='large') 63 | 64 | #linear axes 65 | plt.axis([0,3000,0,10000]) 66 | 67 | 68 | plt.ylabel(r"Analysis Time (ms)") 69 | plt.xlabel(r"Cumulative Program Edits") 70 | 71 | #plt.xticks([0,1000,2000,3000]) 72 | #plt.yticks([0,5000,10000,15000,20000,25000,30000]) 73 | 74 | for i in range(runs): 75 | plt.scatter(x_coords, raw[i], s=0.08, alpha=0.2, color=color,marker=",") 76 | 77 | plt.plot([1], [1],color=color, label=label) 78 | 79 | plt.savefig(output_file, dpi=400, bbox_inches='tight') 80 | 81 | -------------------------------------------------------------------------------- /scripts/scatter_config_large.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Expects to run as ./scatter_config.py 4 | # where: 5 | # - output-file is a .png to output the scatter plot to 6 | # - config is in {batch, dd, incr, dd_incr} and controls the color/label of the plot 7 | # - columns is the number of columns in the input data 8 | # - input data (on stdin) is a csv where each column is the log output by one run of the experimental harness (../run_d1a_experiment) 9 | 10 | import matplotlib as mpl 11 | mpl.use('Agg') 12 | import matplotlib.pyplot as plt 13 | import csv 14 | import sys 15 | import statistics 16 | 17 | blue = '#1f77b4' 18 | orange = '#ff7f0e' 19 | green = '#2ca02c' 20 | red = '#d62728' 21 | 22 | output_file = sys.argv[1] 23 | if sys.argv[2] == "batch": 24 | color = blue 25 | label = "Batch" 26 | elif sys.argv[2] == "dd": 27 | color = green 28 | label = "Demand-Driven" 29 | elif sys.argv[2] == "incr": 30 | color = orange 31 | label = "Incremental" 32 | elif sys.argv[2] == "dd_incr": 33 | color = red 34 | label = "Incremental \& Demand-Driven" 35 | else: 36 | raise Exception("Unknown configuration: " + sys.argv[2]) 37 | runs = int(sys.argv[3]) 38 | 39 | with sys.stdin as csvfile: 40 | data = csv.reader(csvfile, delimiter=',') 41 | x_coords = [] 42 | 43 | raw = [[] for i in range(runs)] 44 | 45 | for idx,row in enumerate(data): 46 | x_coords.append(int(idx)) 47 | for i in range(runs): 48 | try: 49 | raw[i].append(float(row[i])) 50 | except: 51 | raw[i].append(0.0) 52 | 53 | plt.rc('text') 54 | plt.rc('font', family='serif',size=16.0) 55 | plt.rc('legend', edgecolor='white',fontsize="x-large",handlelength=0,framealpha=0) 56 | 57 | plt.rc('axes',labelsize='x-large',linewidth=1.5,labelpad=-15.0) 58 | plt.rc('xtick.major',width=1.5) 59 | plt.rc('ytick.major',width=1.5) 60 | 61 | plt.rc('xtick',labelsize='large') 62 | plt.rc('ytick',labelsize='large') 63 | 64 | #linear axes 65 | #plt.axis([0,3000,0,30000]) 66 | plt.axis([0,1000,0,30000]) 67 | 68 | plt.ylabel(r"Analysis Time (ms)") 69 | plt.xlabel(r"Cumulative Program Edits") 70 | 71 | plt.xticks([0,500,1000],label=True) 72 | plt.yticks([0,10000,20000,30000],label=True) 73 | 74 | for i in range(runs): 75 | plt.scatter(x_coords, raw[i], s=0.08, alpha=0.3, color=color,marker=",") 76 | 77 | plt.plot([1], [1],color=color, label=label) 78 | 79 | plt.savefig(output_file, dpi=400) 80 | 81 | -------------------------------------------------------------------------------- /src/frontend/loc_map.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | open Tree_sitter_java 3 | open Syntax 4 | open Cfg 5 | 6 | type loc_ctx = { entry : Loc.t; exit : Loc.t; ret : Loc.t; exc : Loc.t } 7 | 8 | let pp_loc_ctx fs { entry; exit; ret; exc } = 9 | Format.fprintf fs "{%a -> %a; ret=%a; exc=%a}" Loc.pp entry Loc.pp exit Loc.pp ret Loc.pp exc 10 | 11 | type t = loc_ctx Int.Map.t Method_id.Map.t 12 | 13 | let empty = Map.empty (module Method_id) 14 | 15 | type 'a or_collision = [ `Ok of 'a | `Collision ] 16 | 17 | let add method_id stmt loc_ctx lmap = 18 | let stmt_hash = CST.sexp_of_statement stmt |> Sexp.hash in 19 | match Method_id.Map.find lmap method_id with 20 | | None -> 21 | `Ok (Method_id.Map.add_exn lmap ~key:method_id ~data:(Int.Map.singleton stmt_hash loc_ctx)) 22 | | Some stmt_hash_map -> ( 23 | match Int.Map.add stmt_hash_map ~key:stmt_hash ~data:loc_ctx with 24 | | `Duplicate -> `Collision 25 | | `Ok data -> `Ok (Method_id.Map.set lmap ~key:method_id ~data)) 26 | 27 | let remove method_id stmt lmap = 28 | let stmt_hash = CST.sexp_of_statement stmt |> Sexp.hash in 29 | let method_lmap = Method_id.Map.find_exn lmap method_id |> flip Int.Map.remove stmt_hash in 30 | Method_id.Map.set lmap ~key:method_id ~data:method_lmap 31 | 32 | let remove_fn = Method_id.Map.remove 33 | 34 | let remove_region method_id (region : Loc.Set.t) lmap = 35 | let new_method_lmap = 36 | Int.Map.filter (Method_id.Map.find_exn lmap method_id) 37 | ~f:(fun { entry; exit; ret = _; exc = _ } -> Loc.Set.(mem region entry && mem region exit)) 38 | in 39 | Method_id.Map.set lmap ~key:method_id ~data:new_method_lmap 40 | 41 | let get method_id stmt lmap = 42 | let stmt_hash = CST.sexp_of_statement stmt |> Sexp.hash in 43 | Method_id.Map.find_exn lmap method_id |> flip Int.Map.find_exn stmt_hash 44 | 45 | let union l r = 46 | Method_id.Map.merge l r ~f:(fun ~key:_ -> function 47 | | `Both (x, y) -> 48 | Int.Map.merge x y ~f:(fun ~key:_ -> function 49 | | `Both _ -> failwith "collision" | `Left x | `Right x -> Some x) 50 | |> Option.return 51 | | `Left x -> Some x 52 | | `Right y -> Some y) 53 | 54 | let rebase_edges method_id ~old_src ~new_src loc_map = 55 | let new_method_locs = 56 | Int.Map.map (Method_id.Map.find_exn loc_map method_id) ~f:(fun { entry; exit; ret; exc } -> 57 | if Loc.equal entry old_src then { entry = new_src; exit; ret; exc } 58 | else { entry; exit; ret; exc }) 59 | in 60 | Method_id.Map.set loc_map ~key:method_id ~data:new_method_locs 61 | 62 | let pp = 63 | let pp_inner = Map.pp Int.pp pp_loc_ctx in 64 | Map.pp Method_id.pp pp_inner 65 | -------------------------------------------------------------------------------- /scripts/bugswarm_filter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import os 4 | 5 | # This script queries BugSwarm for suitable pass/fail program pairs and pulls their source, into directory structure of the form: 6 | # ./_bugswarm//{pass, fail}/ 7 | # where is a unique identifier of each pass/fail program pair in the BugSwarm DB 8 | 9 | if len(sys.argv) < 2: 10 | print("ERROR: No API token passed via command-line argument.") 11 | sys.exit(1) 12 | 13 | from bugswarm.common.rest_api.database_api import DatabaseAPI 14 | 15 | bugswarmapi = DatabaseAPI(token=sys.argv[1]) 16 | 17 | api_filter = ( 18 | '{"lang":{"$in":["Java"]},' + # JAVA source language 19 | '"stability":"5/5",' + # non-flaky 20 | #'"classification.exceptions":["NullPointerException"],' + # NPE error type 21 | '"classification.code":"Yes",' + 22 | '"metrics.changes":{"$gt":0,"$lt":500}}' # diff touches at least 1 and at most 500 lines of code 23 | ) 24 | 25 | artifacts = bugswarmapi.filter_artifacts(api_filter) 26 | 27 | print("found artifacts: " + str(len(artifacts))) 28 | 29 | if not os.path.isdir("_bugswarm/"): 30 | print("_bugswarm directory does not exist, attempting to create") 31 | os.mkdir("_bugswarm") 32 | 33 | #import pprint 34 | #pp = pprint.PrettyPrinter(indent=2,width=120) 35 | 36 | 37 | for a in artifacts: 38 | print("IMAGE TAG: " + a["image_tag"] + "\n") 39 | print("PULLING SOURCES...") 40 | try: 41 | # underscore prefix prevents dune from searching these repos for dune config files 42 | image_dir = "_bugswarm/" + a["image_tag"] 43 | os.mkdir(image_dir) 44 | # Pull a shallow (origin/master@HEAD only) copy of the repo to the `pass` directory 45 | os.system("git clone git@github.com:{}.git {}/pass --depth=1 2> /dev/null".format(a["repo"], image_dir)) 46 | # Copy that to the `fail` directory 47 | os.system("cp -r {}/pass {}/fail".format(image_dir, image_dir)) 48 | # Fetch a shallow copy of the commit at which the `pass` occurred in the `pass` directory 49 | os.system("cd {}/pass && git fetch --depth 1 origin {} 2> /dev/null && git checkout {} && cd -".format(image_dir, a["passed_job"]["trigger_sha"], a["passed_job"]["trigger_sha"])) 50 | # Fetch a shallow copy of the commit at which the `fail` occurred in the `fail` directory 51 | os.system("cd {}/fail && git fetch --depth 1 origin {} 2> /dev/null && git checkout {} && cd -".format(image_dir, a["failed_job"]["trigger_sha"], a["failed_job"]["trigger_sha"])) 52 | 53 | os.system("touch diagnostic/{}.stdout.log diagnostic/{}.stderr.log".format(a["image_tag"],a["image_tag"])) 54 | os.system("./run_diagnostic {} > diagnostic/{}.stdout.log 2>diagnostic/{}.stderr.log ".format(a["image_tag"],a["image_tag"],a["image_tag"])) 55 | except FileExistsError: 56 | print("SKIPPING IMAGE: sources already pulled") 57 | -------------------------------------------------------------------------------- /src/syntax/method_id.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | 3 | module T = struct 4 | type t = { 5 | package : string list; 6 | class_name : string; 7 | method_name : string; 8 | static : bool; 9 | arg_types : string list; 10 | } 11 | [@@deriving compare, equal, hash, sexp_of] 12 | (** NB: 13 | * [class_name] is "Foo$Bar" for inner class "Bar" of outer class "Foo" 14 | * [arg_types] are _unqualified_: always "String", never "java.lang.String" 15 | *) 16 | 17 | let pp fs { package; class_name; method_name; static; arg_types } = 18 | Format.fprintf fs "%a.%s%s%s(%a)" (List.pp "." String.pp) package class_name 19 | (if static then "." else "#") 20 | method_name (List.pp "," String.pp) arg_types 21 | end 22 | 23 | module T_comparator : sig 24 | type t = T.t [@@deriving compare, equal, hash, sexp_of] 25 | 26 | type comparator_witness 27 | 28 | val comparator : (t, comparator_witness) Comparator.t 29 | 30 | val pp : t pp 31 | end = struct 32 | include T 33 | include Comparator.Make (T) 34 | end 35 | 36 | module Set = struct 37 | include (Set : module type of Set with type ('a, 'cmp) t := ('a, 'cmp) Set.t) 38 | 39 | type t = Set.M(T_comparator).t 40 | end 41 | 42 | module Map = struct 43 | include (Map : module type of Map with type ('k, 'v, 'cmp) t := ('k, 'v, 'cmp) Map.t) 44 | 45 | type 'v t = 'v Map.M(T_comparator).t 46 | 47 | let empty = Map.empty (module T_comparator) 48 | end 49 | 50 | include T_comparator 51 | 52 | let deserialize m : t = 53 | let open String in 54 | let static, rest_of_m = 55 | if is_prefix m ~prefix:"static " then (true, drop_prefix m 7) else (false, m) 56 | in 57 | let pkg_and_class_str, rest_of_m = 58 | match split rest_of_m ~on:'#' with 59 | | [ before; after ] -> (before, after) 60 | | _ -> failwith ("malformed serialized method: " ^ m) 61 | in 62 | let package = deserialize_package pkg_and_class_str in 63 | let class_name = deserialize_class pkg_and_class_str in 64 | let method_name, arg_types = 65 | match split rest_of_m ~on:'(' with 66 | | [ meth; args_and_close_paren ] -> 67 | let args = 68 | sub args_and_close_paren ~pos:0 ~len:(length args_and_close_paren - 1) 69 | |> split ~on:',' 70 | |> List.filter ~f:(String.is_empty >> not) 71 | in 72 | let arg_types = 73 | List.map args ~f:(fun arg_type -> 74 | let last_dot_idx = rindex arg_type '.' in 75 | match last_dot_idx with 76 | | Some idx -> drop_prefix arg_type (idx + 1) 77 | | None -> arg_type) 78 | in 79 | (meth, arg_types) 80 | | _ -> failwith ("malformed serialized method: " ^ m) 81 | in 82 | { package; class_name; method_name; static; arg_types } 83 | 84 | let current_method_id = ref [] 85 | 86 | let set_current_method_id (method_id : t) = current_method_id := method_id :: !current_method_id 87 | 88 | let clear_current_method_id () = current_method_id := List.tl_exn !current_method_id 89 | 90 | let get_current_method_id () = List.hd !current_method_id 91 | -------------------------------------------------------------------------------- /scripts/artifact/kick_the_tires.sh: -------------------------------------------------------------------------------- 1 | ################################ 2 | ## Run some small experiments ## 3 | ################################ 4 | echo "Running some miniature experiments... this should just take 30 seconds or so." 5 | for seed in 0 1 2 3; do 6 | for n in 100; do 7 | for qpe in 5; do 8 | echo "dd+incr, $qpe qpe, $n iterations with seed $seed" 9 | ./run_d1a_experiment -d -i -s $seed -q $qpe $n 10 | echo "dd-only, $qpe qpe, $n iterations with seed $seed" 11 | ./run_d1a_experiment -d -s $seed -q $qpe $n 12 | done 13 | echo "incr-only, $n iterations with seed $seed" 14 | ./run_d1a_experiment -i -s $seed $n 15 | echo "batch, $n iterations with seed $seed" 16 | ./run_d1a_experiment -s $seed $n 17 | done 18 | done 19 | 20 | #################### 21 | ## Generate plots ## 22 | #################### 23 | echo "Done with experiments" 24 | echo "" 25 | echo "Generating miniature versions of Fig. 10 plots..." 26 | mkdir -p tmp 27 | touch tmp/tmp # silence "no files match tmp/*" warning in next line 28 | rm tmp/* # clean up from previous runs 29 | for seed in 0 1 2 3; do 30 | # normalize to same numbers of rows per config 31 | scripts/to_moving_average_per_edit.py 5 < out/experiments/dd_5qpe_incr_n100_seed$seed.log > tmp/dd_5qpe_incr_n100_seed$seed.log.avgs 32 | scripts/to_moving_average_per_edit.py 5 < out/experiments/dd_5qpe_n100_seed$seed.log > tmp/dd_5qpe_n100_seed$seed.log.avgs 33 | # combine all logs for each config (in series) 34 | cat tmp/dd_5qpe_incr_n100_seed$seed.log.avgs >> tmp/dd_incr_all.log 35 | cat tmp/dd_5qpe_n100_seed$seed.log.avgs >> tmp/dd_all.log 36 | cat out/experiments/incr_n100_seed$seed.log >> tmp/incr_all.log 37 | cat out/experiments/n100_seed$seed.log >> tmp/batch_all.log 38 | done 39 | 40 | # combine all logs for each config (in parallel) 41 | paste -d "," tmp/dd_5qpe_incr_n100_seed0.log.avgs tmp/dd_5qpe_incr_n100_seed1.log.avgs tmp/dd_5qpe_incr_n100_seed2.log.avgs tmp/dd_5qpe_incr_n100_seed3.log.avgs > tmp/dd_incr_parallel.log 42 | paste -d "," tmp/dd_5qpe_n100_seed0.log.avgs tmp/dd_5qpe_n100_seed1.log.avgs tmp/dd_5qpe_n100_seed2.log.avgs tmp/dd_5qpe_n100_seed3.log.avgs > tmp/dd_parallel.log 43 | paste -d "," out/experiments/incr_n100_seed0.log out/experiments/incr_n100_seed1.log out/experiments/incr_n100_seed2.log out/experiments/incr_n100_seed3.log > tmp/incr_parallel.log 44 | paste -d "," out/experiments/n100_seed0.log out/experiments/n100_seed1.log out/experiments/n100_seed2.log out/experiments/n100_seed3.log > tmp/batch_parallel.log 45 | 46 | # generate scatter plots 47 | echo " ... generating scatter plot: Demand-Driven \& Incremental" 48 | scripts/scatter_config_ktt.py out/plots/dd_incr_scatter.png dd_incr 4 < tmp/dd_incr_parallel.log 49 | echo " ... generating scatter plot: Demand-Driven" 50 | scripts/scatter_config_ktt.py out/plots/dd_scatter.png dd 4 < tmp/dd_parallel.log 51 | echo " ... generating scatter plot: Incremental" 52 | scripts/scatter_config_ktt.py out/plots/incr_scatter.png incr 4 < tmp/incr_parallel.log 53 | echo " ... generating scatter plot: Batch" 54 | scripts/scatter_config_ktt.py out/plots/batch_scatter.png batch 4 < tmp/batch_parallel.log 55 | 56 | # generate CDF at out/plots/cdf.png 57 | echo " ... generating CDF" 58 | scripts/cdf_ktt.py out/plots/cdf.png 400 tmp/batch_all.log tmp/incr_all.log tmp/dd_all.log tmp/dd_incr_all.log 59 | -------------------------------------------------------------------------------- /src/analysis/context.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | open Syntax 3 | open Domain 4 | 5 | (** Get the callee [f] of a callsite [stmt] of the form `y = f(x_1,...,x_k)`*) 6 | let get_callee_unsafe stmt = 7 | match stmt with 8 | | Ast.Stmt.Call { meth; _ } -> meth 9 | | _ -> failwith "can't get callee of non-call statement" 10 | 11 | module type CtxFunctor = functor (Dom : Abstract.Dom) -> sig 12 | include Abstract.CtxSensitiveDom with type t := Dom.t 13 | 14 | type t = Dom.t 15 | end 16 | 17 | module MakeInsensitive (Dom : Abstract.Dom) : sig 18 | include Abstract.CtxSensitiveDom with type t := Dom.t 19 | 20 | type t = Dom.t 21 | end = struct 22 | include Dom 23 | 24 | module Ctx = struct 25 | type dom = t 26 | 27 | type t = unit [@@deriving compare, equal, hash, sexp_of] 28 | 29 | let pp fs () = Format.fprintf fs "()" 30 | 31 | let sanitize () = () 32 | 33 | let show () = "()" 34 | 35 | let hash = seeded_hash 36 | 37 | let init = () 38 | 39 | let callee_ctx ~caller_state:_ ~callsite:_ ~ctx:() = () 40 | 41 | let is_feasible_callchain _ _ = true 42 | end 43 | end 44 | 45 | module Make1CFA (Dom : Abstract.Dom) : sig 46 | include Abstract.CtxSensitiveDom with type t := Dom.t 47 | 48 | type t = Dom.t 49 | end = struct 50 | include Dom 51 | 52 | module Ctx = struct 53 | type dom = t 54 | 55 | type t = Ast.Stmt.t option [@@deriving compare, equal, hash, sexp_of] 56 | 57 | let pp fs = function 58 | | Some caller -> Format.fprintf fs "[%a]" Ast.Stmt.pp caller 59 | | None -> Format.fprintf fs "[]" 60 | 61 | let sanitize x = x 62 | 63 | let show = Format.asprintf "%a" pp 64 | 65 | let hash = seeded_hash 66 | 67 | let init = None 68 | 69 | let callee_ctx ~caller_state:_ ~callsite ~ctx:_ = Some callsite 70 | 71 | let is_feasible_callchain ctx chain = 72 | match (ctx, chain) with 73 | | None, [] -> true 74 | | Some ctx_caller, chain_caller :: _ -> Ast.Stmt.equal ctx_caller chain_caller 75 | | _ -> false 76 | end 77 | end 78 | 79 | module Make2CFA (Dom : Abstract.Dom) : sig 80 | include Abstract.CtxSensitiveDom with type t := Dom.t 81 | 82 | type t = Dom.t 83 | end = struct 84 | include Dom 85 | 86 | module Ctx = struct 87 | type dom = t 88 | 89 | type t = Ast.Stmt.t list [@@deriving compare, equal, hash, sexp_of] 90 | 91 | let pp fs = function 92 | | [] -> Format.fprintf fs "[]" 93 | | [ caller ] -> Format.fprintf fs "[%a]" Ast.Stmt.pp caller 94 | | [ caller; callers_caller ] -> 95 | Format.fprintf fs "[%a :: %a]" Ast.Stmt.pp caller Ast.Stmt.pp callers_caller 96 | | _ -> failwith "callstring length capped at 2" 97 | 98 | let sanitize x = x 99 | 100 | let show = Format.asprintf "%a" pp 101 | 102 | let hash = seeded_hash 103 | 104 | let init = [] 105 | 106 | let callee_ctx ~caller_state:_ ~callsite ~ctx = 107 | match ctx with [] -> [ callsite ] | caller :: _ -> [ callsite; caller ] 108 | 109 | let is_feasible_callchain ctx chain = 110 | match (ctx, chain) with 111 | | [], [] -> true 112 | | [ ctx_caller ], [ chain_caller ] -> Ast.Stmt.equal ctx_caller chain_caller 113 | | [ ctx1; ctx2 ], chain1 :: chain2 :: _ -> Ast.Stmt.(equal ctx1 chain1 && equal ctx2 chain2) 114 | | _ -> false 115 | end 116 | end 117 | -------------------------------------------------------------------------------- /scripts/artifact/scalability_experiments_small.sh: -------------------------------------------------------------------------------- 1 | # In order to add, remove, or change random seeds, adjust (1) the values for $seed in both for loops, (2) the names of log files supplied to the `paste` commands, (3) the constant passed to the scatter_config.py script (currently 4, set to the total number of seeds), and (4) the constant passed to the cdf.py script (currently 4000, set to seeds * n) 2 | # In order to change the number of edits per experiment, adjust (1) the value of n on line 9, (2) the names of log files throughout, replacing "n500" by "nX" for whatever X you choose, and (3) the constant passed to the cdf.py script (currently 2000, set to seeds * n) 3 | 4 | # NB: if you want to re-generate plots without re-running experiments for any reason, just comment out the first for-loop here. 5 | ########################## 6 | ## Run some experiments ## 7 | ########################## 8 | for seed in 4 5 6 7; do 9 | for n in 500; do 10 | for qpe in 5; do 11 | echo "dd+incr, $qpe qpe, $n iterations with seed $seed" 12 | ./run_d1a_experiment -d -i -s $seed -q $qpe $n 13 | echo "dd-only, $qpe qpe, $n iterations with seed $seed" 14 | ./run_d1a_experiment -d -s $seed -q $qpe $n 15 | done 16 | echo "incr-only, $n iterations with seed $seed" 17 | ./run_d1a_experiment -i -s $seed $n 18 | echo "batch, $n iterations with seed $seed" 19 | ./run_d1a_experiment -s $seed $n 20 | done 21 | done 22 | 23 | #################### 24 | ## Generate plots ## 25 | #################### 26 | echo "\nGenerating miniature versions of Fig. 10 plots..." 27 | mkdir -p tmp 28 | rm tmp/* 29 | for seed in 4 5 6 7; do 30 | # normalize to same numbers of rows per config 31 | scripts/to_moving_average_per_edit.py 5 < out/experiments/dd_5qpe_incr_n500_seed$seed.log > tmp/dd_5qpe_incr_n500_seed$seed.log.avgs 32 | scripts/to_moving_average_per_edit.py 5 < out/experiments/dd_5qpe_n500_seed$seed.log > tmp/dd_5qpe_n500_seed$seed.log.avgs 33 | # combine all logs for each config (in series) 34 | cat tmp/dd_5qpe_incr_n500_seed$seed.log.avgs >> tmp/dd_incr_all.log 35 | cat tmp/dd_5qpe_n500_seed$seed.log.avgs >> tmp/dd_all.log 36 | cat out/experiments/incr_n500_seed$seed.log >> tmp/incr_all.log 37 | cat out/experiments/n500_seed$seed.log >> tmp/batch_all.log 38 | done 39 | 40 | # combine all logs for each config (in parallel) 41 | paste -d "," tmp/dd_5qpe_incr_n500_seed4.log.avgs tmp/dd_5qpe_incr_n500_seed5.log.avgs tmp/dd_5qpe_incr_n500_seed6.log.avgs tmp/dd_5qpe_incr_n500_seed7.log.avgs > tmp/dd_incr_parallel.log 42 | paste -d "," tmp/dd_5qpe_n500_seed4.log.avgs tmp/dd_5qpe_n500_seed5.log.avgs tmp/dd_5qpe_n500_seed6.log.avgs tmp/dd_5qpe_n500_seed7.log.avgs > tmp/dd_parallel.log 43 | paste -d "," out/experiments/incr_n500_seed4.log out/experiments/incr_n500_seed5.log out/experiments/incr_n500_seed6.log out/experiments/incr_n500_seed7.log > tmp/incr_parallel.log 44 | paste -d "," out/experiments/n500_seed4.log out/experiments/n500_seed5.log out/experiments/n500_seed6.log out/experiments/n500_seed7.log > tmp/batch_parallel.log 45 | 46 | # generate scatter plots 47 | echo " ... generating scatter plot: Demand-Driven \& Incremental" 48 | scripts/scatter_config_small.py out/plots/dd_incr_scatter_small.png dd_incr 4 < tmp/dd_incr_parallel.log 49 | echo " ... generating scatter plot: Demand-Driven" 50 | scripts/scatter_config_small.py out/plots/dd_scatter_small.png dd 4 < tmp/dd_parallel.log 51 | echo " ... generating scatter plot: Incremental" 52 | scripts/scatter_config_small.py out/plots/incr_scatter_small.png incr 4 < tmp/incr_parallel.log 53 | echo " ... generating scatter plot: Batch" 54 | scripts/scatter_config_small.py out/plots/batch_scatter_small.png batch 4 < tmp/batch_parallel.log 55 | 56 | # generate CDF at out/plots/cdf.png 57 | echo " ... generating CDF" 58 | scripts/cdf.py out/plots/cdf_small.png 2000 tmp/batch_all.log tmp/incr_all.log tmp/dd_all.log tmp/dd_incr_all.log 59 | -------------------------------------------------------------------------------- /scripts/artifact/scalability_experiments.sh: -------------------------------------------------------------------------------- 1 | # In order to add, remove, or change random seeds, adjust (1) the values for $seed in both for loops, (2) the names of log files supplied to the `paste` commands, (3) the constant passed to the scatter_config.py script (currently 4, set to the total number of seeds), and (4) the constant passed to the cdf.py script (currently 4000, set to seeds * n) 2 | # In order to change the number of edits per experiment, adjust (1) the value of n on line 9, (2) the names of log files throughout, replacing "n1000" by "nX" for whatever X you choose, and (3) the constant passed to the cdf.py script (currently 4000, set to seeds * n) 3 | 4 | # NB: if you want to re-generate plots without re-running experiments for any reason, just comment out the first for-loop here. 5 | ########################## 6 | ## Run some experiments ## 7 | ########################## 8 | echo "Running some miniature experiments... this should just take 20 seconds or so." 9 | for seed in 4 5 6 7; do 10 | for n in 1000; do 11 | for qpe in 5; do 12 | echo "dd+incr, $qpe qpe, $n iterations with seed $seed" 13 | ./run_d1a_experiment -d -i -s $seed -q $qpe $n 14 | echo "dd-only, $qpe qpe, $n iterations with seed $seed" 15 | ./run_d1a_experiment -d -s $seed -q $qpe $n 16 | done 17 | echo "incr-only, $n iterations with seed $seed" 18 | ./run_d1a_experiment -i -s $seed $n 19 | echo "batch, $n iterations with seed $seed" 20 | ./run_d1a_experiment -s $seed $n 21 | done 22 | done 23 | 24 | #################### 25 | ## Generate plots ## 26 | #################### 27 | echo "\nGenerating miniature versions of Fig. 10 plots..." 28 | mkdir -p tmp 29 | rm tmp/* 30 | for seed in 4 5 6 7; do 31 | # normalize to same numbers of rows per config 32 | scripts/to_moving_average_per_edit.py 5 < out/experiments/dd_5qpe_incr_n1000_seed$seed.log > tmp/dd_5qpe_incr_n1000_seed$seed.log.avgs 33 | scripts/to_moving_average_per_edit.py 5 < out/experiments/dd_5qpe_n1000_seed$seed.log > tmp/dd_5qpe_n1000_seed$seed.log.avgs 34 | # combine all logs for each config (in series) 35 | cat tmp/dd_5qpe_incr_n1000_seed$seed.log.avgs >> tmp/dd_incr_all.log 36 | cat tmp/dd_5qpe_n1000_seed$seed.log.avgs >> tmp/dd_all.log 37 | cat out/experiments/incr_n1000_seed$seed.log >> tmp/incr_all.log 38 | cat out/experiments/n1000_seed$seed.log >> tmp/batch_all.log 39 | done 40 | 41 | # combine all logs for each config (in parallel) 42 | paste -d "," tmp/dd_5qpe_incr_n1000_seed4.log.avgs tmp/dd_5qpe_incr_n1000_seed5.log.avgs tmp/dd_5qpe_incr_n1000_seed6.log.avgs tmp/dd_5qpe_incr_n1000_seed7.log.avgs > tmp/dd_incr_parallel.log 43 | paste -d "," tmp/dd_5qpe_n1000_seed4.log.avgs tmp/dd_5qpe_n1000_seed5.log.avgs tmp/dd_5qpe_n1000_seed6.log.avgs tmp/dd_5qpe_n1000_seed7.log.avgs > tmp/dd_parallel.log 44 | paste -d "," out/experiments/incr_n1000_seed4.log out/experiments/incr_n1000_seed5.log out/experiments/incr_n1000_seed6.log out/experiments/incr_n1000_seed7.log > tmp/incr_parallel.log 45 | paste -d "," out/experiments/n1000_seed4.log out/experiments/n1000_seed5.log out/experiments/n1000_seed6.log out/experiments/n1000_seed7.log > tmp/batch_parallel.log 46 | 47 | # generate scatter plots 48 | echo " ... generating scatter plot: Demand-Driven \& Incremental" 49 | scripts/scatter_config.py out/plots/dd_incr_scatter.png dd_incr 4 < tmp/dd_incr_parallel.log 50 | echo " ... generating scatter plot: Demand-Driven" 51 | scripts/scatter_config.py out/plots/dd_scatter.png dd 4 < tmp/dd_parallel.log 52 | echo " ... generating scatter plot: Incremental" 53 | scripts/scatter_config.py out/plots/incr_scatter.png incr 4 < tmp/incr_parallel.log 54 | echo " ... generating scatter plot: Batch" 55 | scripts/scatter_config.py out/plots/batch_scatter.png batch 4 < tmp/batch_parallel.log 56 | 57 | # generate CDF at out/plots/cdf.png 58 | echo " ... generating CDF" 59 | scripts/cdf.py out/plots/cdf.png 4000 tmp/batch_all.log tmp/incr_all.log tmp/dd_all.log tmp/dd_incr_all.log 60 | -------------------------------------------------------------------------------- /scripts/artifact/scalability_experiments_large.sh: -------------------------------------------------------------------------------- 1 | # In order to add, remove, or change random seeds, adjust (1) the values for $seed in both for loops, (2) the names of log files supplied to the `paste` commands, (3) the constant passed to the scatter_config.py script (currently 4, set to the total number of seeds), and (4) the constant passed to the cdf.py script (currently 4000, set to seeds * n) 2 | # In order to change the number of edits per experiment, adjust (1) the value of n on line 9, (2) the names of log files throughout, replacing "n1000" by "nX" for whatever X you choose, and (3) the constant passed to the cdf.py script (currently 4000, set to seeds * n) 3 | 4 | # NB: if you want to re-generate plots without re-running experiments for any reason, just comment out the first for-loop here. 5 | ########################## 6 | ## Run some experiments ## 7 | ########################## 8 | echo "Running some miniature experiments... this should just take 20 seconds or so." 9 | for seed in 4 5 6 7; do 10 | for n in 1000; do 11 | for qpe in 5; do 12 | echo "dd+incr, $qpe qpe, $n iterations with seed $seed" 13 | ./run_d1a_experiment -d -i -s $seed -q $qpe $n 14 | echo "dd-only, $qpe qpe, $n iterations with seed $seed" 15 | ./run_d1a_experiment -d -s $seed -q $qpe $n 16 | done 17 | echo "incr-only, $n iterations with seed $seed" 18 | ./run_d1a_experiment -i -s $seed $n 19 | echo "batch, $n iterations with seed $seed" 20 | ./run_d1a_experiment -s $seed $n 21 | done 22 | done 23 | 24 | #################### 25 | ## Generate plots ## 26 | #################### 27 | echo "\nGenerating miniature versions of Fig. 10 plots..." 28 | mkdir -p tmp 29 | rm tmp/* 30 | for seed in 4 5 6 7; do 31 | # normalize to same numbers of rows per config 32 | scripts/to_moving_average_per_edit.py 5 < out/experiments/dd_5qpe_incr_n1000_seed$seed.log > tmp/dd_5qpe_incr_n1000_seed$seed.log.avgs 33 | scripts/to_moving_average_per_edit.py 5 < out/experiments/dd_5qpe_n1000_seed$seed.log > tmp/dd_5qpe_n1000_seed$seed.log.avgs 34 | # combine all logs for each config (in series) 35 | cat tmp/dd_5qpe_incr_n1000_seed$seed.log.avgs >> tmp/dd_incr_all.log 36 | cat tmp/dd_5qpe_n1000_seed$seed.log.avgs >> tmp/dd_all.log 37 | cat out/experiments/incr_n1000_seed$seed.log >> tmp/incr_all.log 38 | cat out/experiments/n1000_seed$seed.log >> tmp/batch_all.log 39 | done 40 | 41 | # combine all logs for each config (in parallel) 42 | paste -d "," tmp/dd_5qpe_incr_n1000_seed4.log.avgs tmp/dd_5qpe_incr_n1000_seed5.log.avgs tmp/dd_5qpe_incr_n1000_seed6.log.avgs tmp/dd_5qpe_incr_n1000_seed7.log.avgs > tmp/dd_incr_parallel.log 43 | paste -d "," tmp/dd_5qpe_n1000_seed4.log.avgs tmp/dd_5qpe_n1000_seed5.log.avgs tmp/dd_5qpe_n1000_seed6.log.avgs tmp/dd_5qpe_n1000_seed7.log.avgs > tmp/dd_parallel.log 44 | paste -d "," out/experiments/incr_n1000_seed4.log out/experiments/incr_n1000_seed5.log out/experiments/incr_n1000_seed6.log out/experiments/incr_n1000_seed7.log > tmp/incr_parallel.log 45 | paste -d "," out/experiments/n1000_seed4.log out/experiments/n1000_seed5.log out/experiments/n1000_seed6.log out/experiments/n1000_seed7.log > tmp/batch_parallel.log 46 | 47 | # generate scatter plots 48 | echo " ... generating scatter plot: Demand-Driven \& Incremental" 49 | scripts/scatter_config_large.py out/plots/dd_incr_scatter_large.png dd_incr 4 < tmp/dd_incr_parallel.log 50 | echo " ... generating scatter plot: Demand-Driven" 51 | scripts/scatter_config_large.py out/plots/dd_scatter_large.png dd 4 < tmp/dd_parallel.log 52 | echo " ... generating scatter plot: Incremental" 53 | scripts/scatter_config_large.py out/plots/incr_scatter_large.png incr 4 < tmp/incr_parallel.log 54 | echo " ... generating scatter plot: Batch" 55 | scripts/scatter_config_large.py out/plots/batch_scatter_large.png batch 4 < tmp/batch_parallel.log 56 | 57 | # generate CDF 58 | echo " ... generating CDF" 59 | scripts/cdf.py out/plots/cdf_large.png 4000 tmp/batch_all.log tmp/incr_all.log tmp/dd_all.log tmp/dd_incr_all.log 60 | -------------------------------------------------------------------------------- /src/analysis/daig.mli: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | open Domain 3 | open Frontend 4 | open Syntax 5 | 6 | module type Sig = sig 7 | type absstate 8 | 9 | (** names are opaque from outside a DAIG, except for comparison, hashing, and sexp utilities (for use in hashsets/maps) *) 10 | module Name : sig 11 | type t [@@deriving compare, hash, sexp_of] 12 | 13 | val pp : t pp 14 | end 15 | 16 | module Ref : sig 17 | type t = 18 | | Stmt of { mutable stmt : Ast.Stmt.t; name : Name.t } 19 | | AState of { mutable state : absstate option; name : Name.t } 20 | [@@deriving sexp_of, equal, compare] 21 | 22 | val name : t -> Name.t 23 | 24 | val hash : t -> int 25 | 26 | val pp : t pp 27 | end 28 | 29 | module Comp : sig 30 | type t = [ `Transfer | `Join | `Widen | `Fix | `Transfer_after_fix of Cfg.Loc.t ] 31 | [@@deriving compare, equal, hash, sexp_of] 32 | 33 | val pp : t pp 34 | 35 | val to_string : t -> string 36 | end 37 | 38 | module Opaque_ref : module type of struct 39 | include Regular.Std.Opaque.Make (Ref) 40 | 41 | type t = Ref.t 42 | end 43 | 44 | module G : module type of Graph.Make (Opaque_ref) (Comp) 45 | 46 | type t = G.t 47 | 48 | val of_cfg : entry_state:absstate -> cfg:Cfg.t -> fn:Cfg.Fn.t -> t 49 | (** Construct a DAIG for a procedure with body [cfg] and metadata [fn], with [init_state] at the procedure entry *) 50 | 51 | val apply_edit : 52 | daig:t -> cfg_edit:Tree_diff.cfg_edit_result -> fn:Cfg.Fn.t -> Tree_diff.edit -> t 53 | (** apply the specified [Tree_diff.edit] to the input [daig]; [cfg_edit] and [fn] are passed as additional information needed for certain types of edit *) 54 | 55 | val dirty : Name.t -> t -> t 56 | (** dirty all dependencies of some name (including that name itself) *) 57 | 58 | val dump_dot : filename:string -> ?loc_labeller:(Cfg.Loc.t -> string option) -> t -> unit 59 | (** dump a DOT representation of a DAIG to [filename], decorating abstract-state cells according to [loc_labeller] if provided *) 60 | 61 | val is_solved : Cfg.Loc.t -> t -> bool 62 | (** true iff an abstract state is available at the given location *) 63 | 64 | type 'a or_summary_query = 65 | | Result of 'a 66 | | Summ_qry of { callsite : Ast.Stmt.t; caller_state : absstate } 67 | (** sum type representing the possible cases when a query is issued to a DAIG: 68 | (case 1: Result) the result is available or can be computed with no new method summaries 69 | (case 2: Summ_qry) additional method summaries are needed to evaluate some [callsite] in [caller_state] 70 | *) 71 | 72 | type summarizer = callsite:Ast.Stmt.t * Name.t -> absstate -> absstate option 73 | 74 | exception Ref_not_found of [ `By_loc of Cfg.Loc.t | `By_name of Name.t ] 75 | 76 | val get_by_loc : ?summarizer:summarizer -> Cfg.Loc.t -> t -> absstate or_summary_query * t 77 | 78 | val get_by_name : ?summarizer:summarizer -> Name.t -> t -> absstate or_summary_query * t 79 | (** GET functions attempt to compute the requested value, analyzing its backward dependencies *) 80 | 81 | val read_by_loc : Cfg.Loc.t -> t -> absstate option 82 | 83 | val read_by_name : Name.t -> t -> absstate option 84 | (** READ functions return the current contents of the requested cell, performing no analysis computation*) 85 | 86 | val write_by_name : Name.t -> absstate -> t -> t 87 | (** WRITE functions write the given [absstate] to the cell named by the given [Name.t], dirtying any forward dependencies *) 88 | 89 | val pred_state_exn : Name.t -> t -> absstate 90 | (** returns the predecessor absstate of the cell named by the given [Name.t], if there is exactly one *) 91 | 92 | val assert_wf : t -> unit 93 | 94 | val total_astate_refs : t -> int 95 | 96 | val nonempty_astate_refs : t -> int 97 | end 98 | 99 | module Make (Dom : Abstract.Dom) : Sig with type absstate := Dom.t 100 | -------------------------------------------------------------------------------- /src/analysis/domain/abstract.ml: -------------------------------------------------------------------------------- 1 | open Dai 2 | open Import 3 | open Syntax 4 | 5 | module type Val = sig 6 | type t [@@deriving compare, equal, hash, sexp] 7 | 8 | include Adapton.Data.S with type t := t 9 | 10 | val pp : t pp 11 | 12 | val join : t -> t -> t 13 | 14 | val widen : t -> t -> t 15 | 16 | val implies : t -> t -> bool 17 | 18 | val ( <= ) : t -> t -> bool 19 | 20 | (* infix alias of [implies] *) 21 | 22 | val eval_binop : t -> Ast.Binop.t -> t -> t 23 | 24 | val eval_unop : Ast.Unop.t -> t -> t 25 | 26 | val of_lit : Ast.Lit.t -> t 27 | 28 | val truthiness : t -> [ `Neither | `T | `F | `Either ] 29 | 30 | val models : t -> Ast.Lit.t -> bool 31 | end 32 | 33 | module type Dom = sig 34 | type t [@@deriving compare, equal, hash, sexp] 35 | 36 | include Adapton.Data.S with type t := t 37 | 38 | val pp : t pp 39 | 40 | (* [unit -> t] type allows for lazy apron manager allocation, unlike [t] *) 41 | val init : unit -> t 42 | 43 | val bottom : unit -> t 44 | 45 | val top : unit -> t 46 | 47 | val interpret : Ast.Stmt.t -> t -> t 48 | 49 | val implies : t -> t -> bool 50 | 51 | val ( <= ) : t -> t -> bool 52 | 53 | (* infix alias of [implies] *) 54 | 55 | val join : t -> t -> t 56 | 57 | val widen : t -> t -> t 58 | 59 | val is_bot : t -> bool 60 | 61 | val call : 62 | callee:Cfg.Fn.t -> callsite:Ast.Stmt.t -> caller_state:t -> fields:Declared_fields.t -> t 63 | 64 | val return : 65 | callee:Cfg.Fn.t -> 66 | caller:Cfg.Fn.t -> 67 | callsite:Ast.Stmt.t -> 68 | caller_state:t -> 69 | return_state:t -> 70 | fields:Declared_fields.t -> 71 | t 72 | 73 | val approximate_missing_callee : caller_state:t -> callsite:Ast.Stmt.t -> t 74 | end 75 | 76 | module type CtxSensitiveDom = sig 77 | include Dom 78 | 79 | module Ctx : sig 80 | type dom = t 81 | 82 | type t [@@deriving compare, equal, hash, sexp_of] 83 | 84 | include Adapton.Data.S with type t := t 85 | 86 | val pp : t pp 87 | 88 | val init : t 89 | 90 | val callee_ctx : caller_state:dom -> callsite:Ast.Stmt.t -> ctx:t -> t 91 | 92 | (* Given a context and a callchain, returns true if the context MAY be reached via the callchain. This signature allows syntactic context sensitivity policies (e.g. kCFA) to filter out infeasible chains. (e.g. in 1CFA with context f, those not ending in f) 93 | *) 94 | val is_feasible_callchain : t -> Ast.Stmt.t list -> bool 95 | end 96 | end 97 | 98 | module DomWithDataStructures (T : sig 99 | include Dom 100 | 101 | val compare : t -> t -> int 102 | 103 | val sexp_of_t : t -> Ppx_sexp_conv_lib.Sexp.t 104 | end) : sig 105 | include Dom with type t = T.t 106 | 107 | include Comparator.S with type t := T.t 108 | 109 | module Set : sig 110 | type absstate := T.t 111 | 112 | type t = (absstate, comparator_witness) Set.t 113 | 114 | val empty : t 115 | end 116 | 117 | module Map : sig 118 | type absstate := T.t 119 | 120 | type 'v t = (absstate, 'v, comparator_witness) Map.t 121 | 122 | val empty : 'v t 123 | 124 | val singleton : absstate -> 'v -> 'v t 125 | end 126 | end = struct 127 | module T_comparator = struct 128 | include Comparator.Make (T) 129 | include T 130 | end 131 | 132 | include T_comparator 133 | 134 | module Set = struct 135 | include (Set : module type of Set with type ('a, 'cmp) t := ('a, 'cmp) Set.t) 136 | 137 | type t = Set.M(T_comparator).t [@@deriving compare] 138 | 139 | let empty = Set.empty (module T_comparator) 140 | end 141 | 142 | module Map = struct 143 | include (Map : module type of Map with type ('k, 'v, 'cmp) t := ('k, 'v, 'cmp) Map.t) 144 | 145 | type 'v t = 'v Map.M(T_comparator).t 146 | 147 | let empty = Map.empty (module T_comparator) 148 | 149 | let singleton k v = Base.Map.of_alist_exn (module T_comparator) [ (k, v) ] 150 | end 151 | end 152 | -------------------------------------------------------------------------------- /scripts/artifact/scalability_experiments_full.sh: -------------------------------------------------------------------------------- 1 | echo "NOTE: This script just generates the figures shown in the paper and does _not_ actually run the experiments." 2 | echo " The raw data used for the figures can be found in ./data." 3 | echo " Comments in this script show the seeds/configurations in which the data were gathered, but note that" 4 | echo " the experiments were run in parallel and spread out across several servers in a cloud." 5 | echo 6 | echo 7 | 8 | 9 | # EXPERIMENT CONFIGURATIONS: 10 | # This script would (sequentially) run the experiments shown in the paper. 11 | #for seed in 2 4 5 6 7 9 13 15 101; do 12 | # for n in 3000; do 13 | # echo "dd+incr, 5 qpe, $n iterations with seed $seed" 14 | # ./run_d1a_experiment -d -i -s $seed -q 5 $n 15 | # echo "dd-only, 5 qpe, $n iterations with seed $seed" 16 | # ./run_d1a_experiment -d -s $seed -q 5 $n 17 | # echo "incr-only, $n iterations with seed $seed" 18 | # ./run_d1a_experiment -i -s $seed $n 19 | # echo "batch, $n iterations with seed $seed" 20 | # ./run_d1a_experiment -s $seed $n 21 | # done 22 | #done 23 | 24 | #################### 25 | ## Generate plots ## 26 | #################### 27 | echo "Generating Fig. 10 plots..." 28 | mkdir -p tmp 29 | rm tmp/* 30 | for seed in 2 4 5 6 7 9 13 15 101; do 31 | # normalize to same numbers of rows per config 32 | scripts/to_moving_average_per_edit.py 5 < data/dd_5qpe_incr_n3000_seed$seed.log > tmp/dd_5qpe_incr_n3000_seed$seed.log.avgs 33 | scripts/to_moving_average_per_edit.py 5 < data/dd_5qpe_n3000_seed$seed.log > tmp/dd_5qpe_n3000_seed$seed.log.avgs 34 | # combine all logs for each config (in series) 35 | cat tmp/dd_5qpe_incr_n3000_seed$seed.log.avgs >> tmp/dd_incr_all.log 36 | cat tmp/dd_5qpe_n3000_seed$seed.log.avgs >> tmp/dd_all.log 37 | cat data/incr_n3000_seed$seed.log >> tmp/incr_all.log 38 | cat data/n3000_seed$seed.log >> tmp/batch_all.log 39 | done 40 | 41 | # combine all logs for each config (in parallel) 42 | paste -d "," tmp/dd_5qpe_incr_n3000_seed2.log.avgs tmp/dd_5qpe_incr_n3000_seed4.log.avgs tmp/dd_5qpe_incr_n3000_seed5.log.avgs tmp/dd_5qpe_incr_n3000_seed6.log.avgs tmp/dd_5qpe_incr_n3000_seed7.log.avgs tmp/dd_5qpe_incr_n3000_seed9.log.avgs tmp/dd_5qpe_incr_n3000_seed13.log.avgs tmp/dd_5qpe_incr_n3000_seed15.log.avgs tmp/dd_5qpe_incr_n3000_seed101.log.avgs > tmp/dd_incr_parallel.log 43 | 44 | paste -d "," tmp/dd_5qpe_n3000_seed2.log.avgs tmp/dd_5qpe_n3000_seed4.log.avgs tmp/dd_5qpe_n3000_seed5.log.avgs tmp/dd_5qpe_n3000_seed6.log.avgs tmp/dd_5qpe_n3000_seed7.log.avgs tmp/dd_5qpe_n3000_seed9.log.avgs tmp/dd_5qpe_n3000_seed13.log.avgs tmp/dd_5qpe_n3000_seed15.log.avgs tmp/dd_5qpe_n3000_seed101.log.avgs > tmp/dd_parallel.log 45 | 46 | paste -d "," data/incr_n3000_seed2.log data/incr_n3000_seed4.log data/incr_n3000_seed5.log data/incr_n3000_seed6.log data/incr_n3000_seed7.log data/incr_n3000_seed9.log data/incr_n3000_seed13.log data/incr_n3000_seed15.log data/incr_n3000_seed101.log > tmp/incr_parallel.log 47 | paste -d "," data/n3000_seed2.log data/n3000_seed4.log data/n3000_seed5.log data/n3000_seed6.log data/n3000_seed7.log data/n3000_seed9.log data/n3000_seed13.log data/n3000_seed15.log data/n3000_seed101.log > tmp/batch_parallel.log 48 | 49 | # generate scatter plots 50 | echo " ... generating scatter plot: Demand-Driven \& Incremental" 51 | scripts/scatter_config_full.py out/plots/dd_incr_scatter.png dd_incr 9 < tmp/dd_incr_parallel.log 52 | echo " ... generating scatter plot: Demand-Driven" 53 | scripts/scatter_config_full.py out/plots/dd_scatter.png dd 9 < tmp/dd_parallel.log 54 | echo " ... generating scatter plot: Incremental" 55 | scripts/scatter_config_full.py out/plots/incr_scatter.png incr 9 < tmp/incr_parallel.log 56 | echo " ... generating scatter plot: Batch" 57 | scripts/scatter_config_full.py out/plots/batch_scatter.png batch 9 < tmp/batch_parallel.log 58 | 59 | # generate CDF at out/plots/cdf.png 60 | echo " ... generating CDF" 61 | scripts/cdf_full.py out/plots/cdf.png 27000 tmp/batch_all.log tmp/incr_all.log tmp/dd_all.log tmp/dd_incr_all.log 62 | -------------------------------------------------------------------------------- /src/synthetic_benchmarks/exec.ml: -------------------------------------------------------------------------------- 1 | open Dai 2 | open Import 3 | open Command 4 | open Command.Let_syntax 5 | 6 | module Mode = struct 7 | type t = Incr_and_dd | Incr_only | Demand_only | Batch 8 | 9 | let pp fs a = 10 | Format.fprintf fs 11 | (match a with 12 | | Incr_and_dd -> "INCREMENTAL & DEMAND-DRIVEN" 13 | | Incr_only -> "INCREMENTAL" 14 | | Demand_only -> "DEMAND-DRIVEN" 15 | | Batch -> "BATCH") 16 | 17 | let pp_short fs a = 18 | Format.fprintf fs 19 | (match a with 20 | | Incr_and_dd -> "incr_dd" 21 | | Incr_only -> "incr" 22 | | Demand_only -> "dd" 23 | | Batch -> "batch") 24 | end 25 | 26 | module RE = Random_edits 27 | 28 | let rec apply_n_times ~n ~init ~f = 29 | if n <= 0 then init else (apply_n_times [@tailcall]) ~n:(pred n) ~init:(f init n) ~f 30 | 31 | let time ~f ~x fs = 32 | let st = systime () in 33 | let res = f x in 34 | let st' = systime () in 35 | Format.fprintf fs "%.3f\n" (1000. *. (st' -. st)); 36 | res 37 | 38 | let run = 39 | basic ~summary:"Run scalability experiments with synthetic benchmarks" 40 | [%map_open 41 | let seed = flag "seed" (required int) ~doc:" random number generator seed" 42 | and n = anon ("n" %: int) 43 | and incr = 44 | flag "incremental" no_arg 45 | ~doc:"incremental analysis: reuse results across the program edit where possible" 46 | and dd = 47 | flag "demand" no_arg 48 | ~doc:"demand-driven analysis: analyze only as needed to respond to queries" 49 | and qpe = 50 | flag "qpe" (optional_with_default 3 int) 51 | ~doc:" If \"-demand\" flag is enabled, issue X queries between each edit. Default 3." 52 | in 53 | fun () -> 54 | let mode = 55 | match (incr, dd) with 56 | | true, true -> Mode.Incr_and_dd 57 | | true, false -> Mode.Incr_only 58 | | false, true -> Mode.Demand_only 59 | | false, false -> Mode.Batch 60 | in 61 | Format.printf "[INFO] initializing experiment in %a mode with seed %i\n" Mode.pp mode seed; 62 | Format.print_flush (); 63 | let fs_log = 64 | let logfile = 65 | Format.asprintf "out/log/%a_seed%i_n%i%s.log" Mode.pp_short mode seed n 66 | (if dd then "_qpe" ^ Int.to_string qpe else "") 67 | in 68 | Unix.openfile ~mode:[ Unix.O_WRONLY; Unix.O_CREAT ] (abs_of_rel_path logfile) 69 | |> Unix.out_channel_of_descr |> Format.formatter_of_out_channel 70 | in 71 | 72 | let gc_settings = Gc.get () in 73 | (* disable heap compaction *) 74 | gc_settings.max_overhead <- 1000000; 75 | (* per docs suggestion, switch to "best-fit" allocation when heap compaction is off *) 76 | gc_settings.allocation_policy <- 2; 77 | (* massively increase interval between GC cycles *) 78 | gc_settings.space_overhead <- 8000; 79 | Gc.set gc_settings; 80 | 81 | Random.init seed; 82 | let g = RE.init () in 83 | 84 | let issue_demand_queries init = 85 | apply_n_times ~n:qpe ~init ~f:(fun x _ -> time fs_log ~f:RE.random_query ~x) 86 | in 87 | let flush_mod k ~n = if Int.(equal 0 (n % k)) then Format.pp_print_flush fs_log () in 88 | let f = 89 | match mode with 90 | | Mode.Incr_and_dd -> 91 | fun x n -> 92 | let x = RE.random_edit x in 93 | flush_mod 100 ~n; 94 | issue_demand_queries x 95 | | Mode.Incr_only -> 96 | fun x n -> 97 | let x = RE.random_edit x in 98 | flush_mod 100 ~n; 99 | time fs_log ~f:RE.exit_query ~x 100 | | Mode.Demand_only -> 101 | fun x n -> 102 | let x = RE.random_edit (RE.G.drop_daigs x) in 103 | flush_mod 100 ~n; 104 | issue_demand_queries x 105 | | Mode.Batch -> 106 | fun x n -> 107 | let x = RE.random_edit (RE.G.drop_daigs x) in 108 | flush_mod 100 ~n; 109 | time fs_log ~f:RE.exit_query ~x 110 | in 111 | ignore @@ apply_n_times ~n ~init:g ~f] 112 | 113 | let () = Command.run ~version:"0.1" run 114 | -------------------------------------------------------------------------------- /src/import.ml: -------------------------------------------------------------------------------- 1 | let seeded_hash = Hashtbl.seeded_hash 2 | 3 | let systime = Sys.time 4 | 5 | include Core 6 | 7 | type 'a pp = Formatter.t -> 'a -> unit 8 | 9 | let ( >> ) f g = Fn.compose g f 10 | 11 | let flip = Fn.flip 12 | 13 | let ( $> ) x f = 14 | f x; 15 | x 16 | 17 | let fst3 (x, _, _) = x 18 | 19 | let snd3 (_, x, _) = x 20 | 21 | let trd3 (_, _, x) = x 22 | 23 | let uncurry f (x, y) = f x y 24 | 25 | let curry f x y = f (x, y) 26 | 27 | let uncurry3 f (x, y, z) = f x y z 28 | 29 | let curry3 f x y z = f (x, y, z) 30 | 31 | let pair x y = (x, y) 32 | 33 | let range i j = 34 | let rec aux n acc = if n < i then acc else aux (n - 1) (n :: acc) in 35 | aux j [] 36 | 37 | let rec apply_n_times ~n ~init ~f = 38 | if n <= 0 then init else apply_n_times ~n:(pred n) ~init:(f init n) ~f 39 | 40 | let time ~f ~x fs descr = 41 | let st = systime () in 42 | f x $> fun _ -> Format.fprintf fs "%s%.3f\n" descr (1000. *. (systime () -. st)) 43 | 44 | let abs_of_rel_path rel_path = 45 | match Sys.getenv "DAI_ROOT" with 46 | | Some prefix -> prefix ^ rel_path 47 | | None -> 48 | failwith 49 | "environment variable DAI_ROOT is unset; either use `make` or set manually to project root" 50 | 51 | (* "com.example.MyClass.Inner" -> ["com" ; "example"] *) 52 | let deserialize_package = 53 | String.split ~on:'.' 54 | >> List.filter ~f:(String.length >> ( <> ) 0) 55 | >> List.take_while ~f:(flip String.get 0 >> Char.is_lowercase) 56 | 57 | (* "com.example.MyClass.Inner" -> "MyClass$Inner" *) 58 | let deserialize_class = 59 | String.split ~on:'.' 60 | >> List.filter ~f:(String.length >> ( <> ) 0) 61 | >> List.drop_while ~f:(flip String.get 0 >> Char.is_lowercase) 62 | >> String.concat ~sep:"$" 63 | 64 | module Option = struct 65 | include Base.Option 66 | 67 | let pp ?(default = "None") pp_elt fs = function 68 | | Some x -> pp_elt fs x 69 | | None -> String.pp fs default 70 | 71 | let cons xo xs = match xo with Some x -> x :: xs | None -> xs 72 | 73 | let merge l r f = 74 | match (l, r) with 75 | | None, None -> None 76 | | Some _, None -> l 77 | | None, Some _ -> r 78 | | Some x, Some y -> Some (f x y) 79 | 80 | let ( let* ) = ( >>= ) 81 | 82 | let ( let+ ) = ( >>| ) 83 | end 84 | 85 | include Option.Monad_infix 86 | 87 | module List = struct 88 | include Base.List 89 | 90 | let rec pp ?pre ?suf sep pp_elt fs = function 91 | | [] -> () 92 | | x :: xs -> 93 | Option.iter pre ~f:(Format.fprintf fs); 94 | pp_elt fs x; 95 | (match xs with [] -> () | xs -> Format.fprintf fs "%( %)%a" sep (pp sep pp_elt) xs); 96 | Option.iter suf ~f:(Format.fprintf fs) 97 | end 98 | 99 | module Result = struct 100 | include Base.Result 101 | 102 | let pp fmt pp_elt fs = function Ok x -> Format.fprintf fs fmt pp_elt x | Error _ -> () 103 | end 104 | 105 | module Set = struct 106 | include Base.Set 107 | 108 | type ('elt, 'cmp) tree = ('elt, 'cmp) Using_comparator.Tree.t 109 | 110 | let equal_m__t (module Elt : Compare_m) = equal 111 | 112 | let pp pp_elt fs x = Format.fprintf fs "{%a}" (List.pp ~pre:"" ~suf:"" ",@ " pp_elt) (to_list x) 113 | 114 | let disjoint x y = is_empty (inter x y) 115 | 116 | let add_option yo x = Option.fold ~f:add ~init:x yo 117 | 118 | let add_list ys x = List.fold ~f:add ~init:x ys 119 | 120 | let to_tree = Using_comparator.to_tree 121 | 122 | let union x y = 123 | let xy = union x y in 124 | let xy_tree = to_tree xy in 125 | if phys_equal xy_tree (to_tree x) then x else if phys_equal xy_tree (to_tree y) then y else xy 126 | end 127 | 128 | module Array = struct 129 | include Base.Array 130 | 131 | let pp sep pp_elt fs a = List.pp sep pp_elt fs (to_list a) 132 | end 133 | 134 | module Map = struct 135 | include Base.Map 136 | 137 | let pp pp_key pp_data fs m = 138 | Format.fprintf fs "{@["; 139 | iteri m ~f:(fun ~key ~data -> Format.fprintf fs " %a : %a;@," pp_key key pp_data data); 140 | Format.fprintf fs "@]}" 141 | end 142 | 143 | type ('a, 'b) fmt = ('a, Formatter.t, unit, 'b) format4 144 | 145 | module Colors = struct 146 | let red = "\x1b[91m" 147 | 148 | let green = "\x1b[92m" 149 | 150 | let yellow = "\x1b[93m" 151 | 152 | let blue = "\x1b[94m" 153 | 154 | let magenta = "\x1b[95m" 155 | 156 | let cyan = "\x1b[96m" 157 | 158 | let reset = "\x1b[0m" 159 | end 160 | 161 | module Engine = Adapton.Engine.Make (Adapton.Engine.Default_params) 162 | module DefaultArtLib = Engine.ArtLib 163 | 164 | module Name = struct 165 | include Adapton.Name 166 | 167 | let extend nm str = pair nm (of_string str) 168 | end 169 | 170 | module Graph = Graphlib.Std.Graphlib 171 | module Seq = Regular.Std.Seq 172 | 173 | let ( = ) = Stdlib.( = ) 174 | 175 | let ( < ) = Stdlib.( < ) 176 | 177 | let ( > ) = Stdlib.( > ) 178 | 179 | let ( <= ) = Stdlib.( <= ) 180 | 181 | let ( >= ) = Stdlib.( >= ) 182 | -------------------------------------------------------------------------------- /src/syntax/class_hierarchy.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | module G = Graph.Make (String) (Unit) 3 | 4 | type t = G.t 5 | (** keep a class hierarchy as a directed graph, where an edge from "com.example.Foo" to "com.example.Bar" denotes that Foo extends/implements Bar*) 6 | 7 | let empty = Graph.create (module G) () 8 | 9 | let class_id_of package class_name = String.concat ~sep:"." package ^ "." ^ class_name 10 | 11 | let add ~(package : string list) ~(class_name : string) ~(super_package : string list) 12 | ~(superclass_name : string) : t -> t = 13 | let class_id = class_id_of package class_name in 14 | let superclass_id = class_id_of super_package superclass_name in 15 | G.Edge.(insert (create class_id superclass_id ())) 16 | 17 | let get_superclass_name ~(package : string list) ~(class_name : string) : t -> string option = 18 | let class_id = class_id_of package class_name in 19 | G.Node.succs class_id >> Seq.to_list >> function 20 | | [] -> None 21 | | [ superclass_id ] -> 22 | Some 23 | (match String.rsplit2 superclass_id ~on:'.' with 24 | | Some (_pkg, cls) -> cls 25 | | None -> superclass_id) 26 | | _ -> 27 | (* only single inheritance in java *) 28 | failwith "multiple inheritance? in _this_ economy???" 29 | 30 | let ancestors ~(package : string list) ~(class_name : string) cha : string list = 31 | let class_id = class_id_of package class_name in 32 | Graph.fold_reachable (module G) cha class_id ~init:[] ~f:(flip List.cons) 33 | 34 | let compute_closure ~(cha : t) ~(fields : Declared_fields.t) : Declared_fields.t = 35 | Graph.depth_first_search 36 | (module G) 37 | cha 38 | ~enter_edge:(fun _ e acc -> 39 | let parent_cid = G.Edge.dst e in 40 | let child_cid = G.Edge.src e in 41 | Declared_fields.( 42 | add_cid acc ~class_id:child_cid ~fields:(lookup_cid acc ~class_id:parent_cid))) 43 | ~init:fields 44 | 45 | let merge = Graph.union (module G) 46 | 47 | let%test "closure operation" = 48 | let cha = 49 | empty 50 | |> add ~package:[ "foo"; "bar" ] ~class_name:"One" ~super_package:[ "com"; "example" ] 51 | ~superclass_name:"Two" 52 | |> add ~package:[ "com"; "example" ] ~class_name:"Two" ~super_package:[ "foo"; "bar" ] 53 | ~superclass_name:"Three" 54 | |> add ~package:[ "com"; "example" ] ~class_name:"Four" ~super_package:[ "foo"; "bar" ] 55 | ~superclass_name:"Three" 56 | |> add ~package:[ "com"; "example" ] ~class_name:"Five" ~super_package:[ "foo"; "bar" ] 57 | ~superclass_name:"Six" 58 | |> add ~package:[ "com"; "example" ] ~class_name:"Seven" ~super_package:[ "foo"; "bar" ] 59 | ~superclass_name:"Six" 60 | |> add ~package:[ "foo"; "bar" ] ~class_name:"Eight" ~super_package:[ "com"; "example" ] 61 | ~superclass_name:"Seven" 62 | in 63 | let singleton_field name : Declared_fields.fields = 64 | { static = String.Set.empty; instance = String.Set.singleton name } 65 | in 66 | let fields = 67 | Declared_fields.( 68 | empty 69 | |> add ~package:[ "foo"; "bar" ] ~class_name:"One" ~fields:(singleton_field "A") 70 | |> add ~package:[ "com"; "example" ] ~class_name:"Two" ~fields:(singleton_field "B") 71 | |> add ~package:[ "foo"; "bar" ] ~class_name:"Three" ~fields:(singleton_field "C") 72 | |> add ~package:[ "com"; "example" ] ~class_name:"Four" ~fields:(singleton_field "D") 73 | |> add ~package:[ "com"; "example" ] ~class_name:"Five" ~fields:(singleton_field "E") 74 | |> add ~package:[ "foo"; "bar" ] ~class_name:"Six" ~fields:(singleton_field "F") 75 | |> add ~package:[ "com"; "example" ] ~class_name:"Seven" ~fields:(singleton_field "G") 76 | |> add ~package:[ "foo"; "bar" ] ~class_name:"Eight" ~fields:(singleton_field "H")) 77 | in 78 | let transitive_fields = compute_closure ~cha ~fields in 79 | Declared_fields.( 80 | ( lookup transitive_fields ~package:[ "foo"; "bar" ] ~class_name:"One" |> fun { instance; _ } -> 81 | String.Set.(equal instance @@ of_list [ "A"; "B"; "C" ]) ) 82 | && ( lookup transitive_fields ~package:[ "com"; "example" ] ~class_name:"Two" 83 | |> fun { instance; _ } -> String.Set.(equal instance @@ of_list [ "B"; "C" ]) ) 84 | && ( lookup transitive_fields ~package:[ "foo"; "bar" ] ~class_name:"Three" 85 | |> fun { instance; _ } -> String.Set.(equal instance @@ of_list [ "C" ]) ) 86 | && ( lookup transitive_fields ~package:[ "com"; "example" ] ~class_name:"Four" 87 | |> fun { instance; _ } -> String.Set.(equal instance @@ of_list [ "C"; "D" ]) ) 88 | && ( lookup transitive_fields ~package:[ "com"; "example" ] ~class_name:"Five" 89 | |> fun { instance; _ } -> String.Set.(equal instance @@ of_list [ "E"; "F" ]) ) 90 | && ( lookup transitive_fields ~package:[ "foo"; "bar" ] ~class_name:"Six" 91 | |> fun { instance; _ } -> String.Set.(equal instance @@ of_list [ "F" ]) ) 92 | && ( lookup transitive_fields ~package:[ "com"; "example" ] ~class_name:"Seven" 93 | |> fun { instance; _ } -> String.Set.(equal instance @@ of_list [ "F"; "G" ]) ) 94 | && lookup transitive_fields ~package:[ "foo"; "bar" ] ~class_name:"Eight" 95 | |> fun { instance; _ } -> String.Set.(equal instance @@ of_list [ "F"; "G"; "H" ])) 96 | -------------------------------------------------------------------------------- /src/frontend/tree.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | open Tree_sitter_java 3 | module TSB = Tree_sitter_bindings 4 | 5 | type t = TSB.Tree_sitter_API.ts_tree 6 | 7 | type java_cst = CST.program 8 | 9 | let parse ~old_tree ~file = 10 | try 11 | Result.return 12 | @@ TSB.Tree_sitter_API.Parser.parse Parse.ts_parser old_tree (Src_file.read_fn file) 13 | with e -> 14 | Printexc.print_backtrace Stdlib.stdout; 15 | raise e 16 | 17 | (* todo: fail gracefully *) 18 | 19 | let as_java_cst src ts_tree = 20 | let root = TSB.Tree_sitter_output.of_ts_tree ts_tree in 21 | match Parse.parse_input_tree { src; root } with 22 | | { program = Some p; errors = []; stat = _ } -> Ok p 23 | | { errors; _ } -> Error errors 24 | 25 | let rec update_offsets (old_offsets : int list) (text_diff : Text_diff.t) = 26 | match (old_offsets, text_diff) with 27 | | offsets_hd :: offsets_tl, { prev_start_line; prev_length; next_start_line; new_lines } 28 | when prev_start_line > 1 -> 29 | let prev_start_line = prev_start_line - 1 in 30 | offsets_hd 31 | :: update_offsets offsets_tl { prev_start_line; prev_length; next_start_line; new_lines } 32 | | _, { prev_start_line = 1; prev_length; next_start_line = _; new_lines } -> 33 | let new_offsets = Array.to_list new_lines |> List.map ~f:(String.length >> Int.succ) in 34 | List.append new_offsets (List.drop old_offsets prev_length) 35 | | _ -> failwith "malformed text diff" 36 | 37 | let cumulative_offset offsets ~(lines : int) = List.take offsets lines |> List.fold ~init:0 ~f:( + ) 38 | 39 | let apply (diff : Text_diff.t list) ~(offsets : int list) (tree : t) = 40 | let line_offset = ref 0 in 41 | List.fold diff ~init:(tree, offsets) ~f:(fun (tree, offsets) d -> 42 | let new_tree = 43 | match d with 44 | | { prev_start_line; prev_length; next_start_line; new_lines } -> 45 | let prev_start_line = !line_offset + prev_start_line in 46 | let prev_end_line = prev_start_line + prev_length in 47 | let next_end_line = next_start_line + Array.length new_lines in 48 | line_offset := !line_offset - prev_length + Array.length new_lines; 49 | let start_byte = cumulative_offset offsets ~lines:(prev_start_line - 1) in 50 | let prev_end_byte = 51 | cumulative_offset offsets ~lines:(prev_start_line + prev_length - 1) 52 | in 53 | let next_end_byte = 54 | Array.fold new_lines ~init:start_byte ~f:(fun bytes new_line -> 55 | bytes + 1 + String.length new_line) 56 | in 57 | TSB.Tree_sitter_API.Tree.edit tree start_byte prev_end_byte next_end_byte 58 | next_start_line prev_end_line next_end_line 59 | in 60 | (new_tree, update_offsets offsets d)) 61 | |> fst 62 | 63 | open Result 64 | open Option.Monad_infix 65 | 66 | let%test "initial parse, valid syntax" = 67 | let file = Src_file.of_file @@ abs_of_rel_path "test_cases/java/HelloWorld.java" in 68 | parse ~old_tree:None ~file |> bind ~f:(as_java_cst file) |> is_ok 69 | 70 | let%test "initial parse, invalid syntax" = 71 | let file = Src_file.of_file @@ abs_of_rel_path "test_cases/java/SyntaxError.java" in 72 | parse ~old_tree:None ~file |> bind ~f:(as_java_cst file) |> is_error 73 | 74 | let%test "incremental parse, valid syntax, one-hunk edit" = 75 | let prev_file = Src_file.of_file @@ abs_of_rel_path "test_cases/java/HelloWorld.java" in 76 | let next_file = Src_file.of_file @@ abs_of_rel_path "test_cases/java/HelloWorlds.java" in 77 | let prev_tree = parse ~old_tree:None ~file:prev_file in 78 | assert (bind ~f:(as_java_cst prev_file) prev_tree |> is_ok); 79 | let updated_prev_tree = 80 | prev_tree |> ok 81 | >>| apply 82 | (Text_diff.btwn ~prev:(Src_file.lines prev_file) ~next:(Src_file.lines next_file)) 83 | ~offsets:(Src_file.line_offsets prev_file) 84 | in 85 | parse ~old_tree:updated_prev_tree ~file:next_file |> bind ~f:(as_java_cst next_file) |> is_ok 86 | 87 | let%test "incremental parse, valid syntax, two additions" = 88 | let prev_file = Src_file.of_file @@ abs_of_rel_path "test_cases/java/HelloWorld.java" in 89 | let next_file = Src_file.of_file @@ abs_of_rel_path "test_cases/java/HelloWorlds2.java" in 90 | let prev_tree = parse ~old_tree:None ~file:prev_file in 91 | assert (bind ~f:(as_java_cst prev_file) prev_tree |> is_ok); 92 | let updated_prev_tree = 93 | prev_tree |> ok 94 | >>| apply 95 | (Text_diff.btwn ~prev:(Src_file.lines prev_file) ~next:(Src_file.lines next_file)) 96 | ~offsets:(Src_file.line_offsets prev_file) 97 | in 98 | parse ~old_tree:updated_prev_tree ~file:next_file |> bind ~f:(as_java_cst next_file) |> is_ok 99 | 100 | let%test "incremental parse, valid syntax, two deletions" = 101 | let prev_file = Src_file.of_file @@ abs_of_rel_path "test_cases/java/HelloWorlds2.java" in 102 | let next_file = Src_file.of_file @@ abs_of_rel_path "test_cases/java/HelloWorld.java" in 103 | let prev_tree = parse ~old_tree:None ~file:prev_file in 104 | assert (bind ~f:(as_java_cst prev_file) prev_tree |> is_ok); 105 | let diff = Text_diff.btwn ~prev:(Src_file.lines prev_file) ~next:(Src_file.lines next_file) in 106 | let updated_prev_tree = 107 | prev_tree |> ok >>| apply diff ~offsets:(Src_file.line_offsets prev_file) 108 | in 109 | parse ~old_tree:updated_prev_tree ~file:next_file |> bind ~f:(as_java_cst next_file) |> is_ok 110 | -------------------------------------------------------------------------------- /src/frontend/cfg_parser.mli: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | open Tree_sitter_java 3 | open Syntax 4 | 5 | type edge = Cfg.Loc.t * Cfg.Loc.t * Ast.Stmt.t 6 | 7 | type prgm_parse_result = { 8 | loc_map : Loc_map.t; 9 | cfgs : Cfg.t Cfg.Fn.Map.t; 10 | fields : Declared_fields.t; 11 | cha : Class_hierarchy.t; 12 | } 13 | 14 | val set_parse_result : 15 | ?loc_map:Loc_map.t -> 16 | ?cfgs:Cfg.t Cfg.Fn.Map.t -> 17 | ?fields:Declared_fields.t -> 18 | ?cha:Class_hierarchy.t -> 19 | prgm_parse_result -> 20 | prgm_parse_result 21 | 22 | val empty_parse_result : prgm_parse_result 23 | 24 | val set_diagnostic : bool -> unit 25 | 26 | (* run in [diagnostic] mode if set, gathering information about our compatibility with the syntax of the given CST rather than failing fast on incompatible syntactic form*) 27 | val print_diagnostic_results : unit -> unit 28 | (* print information about unimplemented syntax encountered *) 29 | 30 | val of_java_cst : ?acc:prgm_parse_result -> CST.program -> prgm_parse_result 31 | (** Parse each method in a (java) tree-sitter concrete syntax tree to a CFG, adding to an [acc]umulator parse result if provided*) 32 | 33 | val parse_file_exn : ?acc:prgm_parse_result -> string -> prgm_parse_result 34 | (** Parse a (java) source file to CFGs, adding to an [acc]umulator parse result if provided *) 35 | 36 | val parse_files_exn : files:string list -> prgm_parse_result 37 | (** Parse some (java) source [files] to CFGs *) 38 | 39 | val parse_trees_exn : trees:(string * Tree.t) list -> prgm_parse_result 40 | (** Translate the given tree-sitter parse [trees] to CFGs *) 41 | 42 | val expr : 43 | ?exit_loc:Cfg.Loc.t -> 44 | curr_loc:Cfg.Loc.t -> 45 | exc:Cfg.Loc.t -> 46 | CST.expression -> 47 | Ast.Expr.t * (Cfg.Loc.t * edge list) 48 | (** Convert an expression concrete syntax tree to an expression in our IR, along with potentially some preceding statements for any function invocations and assignments therein, and a shifted current program location to accomodate those intermediate statements. 49 | That is, 50 | * if `cst` represents a simple expression with no function invocations or assignments, return value is (, (curr_loc,[])) 51 | * if `cst` contains function invocations f_1 ... f_k and assignments x_1=e_1 ... x_n=e_n, return value is 52 | ( 53 | [ tmp_var_i / f_i][x_i / x_i=e_i], 54 | Some (fresh_loc_k+n, [curr_loc -[tmp_var_1 := f_1]-> fresh_loc_1, ... , fresh_loc_(k-1) -[tmp_var_k := f_k]-> fresh_loc_k] ++ [fresh_loc_k -[x_1=e_1]-> fresh_loc_k+1, ... , fresh_loc_(k+n-1) -[x_n=e_n]-> fresh_loc_(k+n)]) 55 | Optional [exit_loc] param is used to special-case the common statement syntax of [Exp_stmt (`Assign_exp _)] and avoid generating extraneous locations and [Skip] edges 56 | 57 | *) 58 | 59 | val package_of_cst : CST.program -> string list 60 | 61 | val imports_of_cst : ?package:string list -> CST.program -> string list String.Map.t 62 | (** best-effort local name resolution: 63 | * For each "import foo.bar.Baz;", [imports] maps "Baz" to ["foo" ; "bar"] 64 | * For each "class Foo { ... }" in this file, also map "Foo" to its [package] declaration 65 | *) 66 | 67 | val of_method_decl : 68 | Loc_map.t -> 69 | ?package:string list -> 70 | class_name:string -> 71 | CST.method_declaration -> 72 | (Loc_map.t * edge list * Cfg.Fn.t) option 73 | (** construct a procedure's CFG from its declaration's concrete syntax tree *) 74 | 75 | val of_constructor_decl : 76 | Loc_map.t -> 77 | ?package:string list -> 78 | class_name:string -> 79 | instance_init:CST.program option -> 80 | field_decls:CST.field_declaration list -> 81 | cha:Class_hierarchy.t -> 82 | CST.constructor_declarator -> 83 | CST.constructor_body -> 84 | Loc_map.t * edge list * Cfg.Fn.t 85 | (** construct a constructor's CFG from its declaration's concrete syntax tree *) 86 | 87 | val instance_field_decls : CST.class_body_declaration list -> CST.field_declaration list 88 | 89 | val of_static_init : 90 | Loc_map.t -> 91 | ?package:string list -> 92 | class_name:string -> 93 | CST.block -> 94 | Loc_map.t * edge list * Cfg.Fn.t 95 | 96 | val types_of_formals : CST.formal_parameters -> string list 97 | (** simpler representation of a formal parameter list, for distinguishing overloading *) 98 | 99 | val edge_list_of_stmt_list : 100 | Method_id.t -> 101 | Loc_map.t -> 102 | entry:Cfg.Loc.t -> 103 | exit:Cfg.Loc.t -> 104 | ret:Cfg.Loc.t -> 105 | exc:Cfg.Loc.t -> 106 | ?brk:Cfg.Loc.t option * Cfg.Loc.t Dai.Import.String.Map.t -> 107 | ?cont:Cfg.Loc.t option * Cfg.Loc.t Dai.Import.String.Map.t -> 108 | CST.program -> 109 | Loc_map.t * edge list 110 | 111 | val for_loop_header : 112 | Method_id.t -> 113 | body_entry:Cfg.Loc.t -> 114 | body_exit:Cfg.Loc.t -> 115 | entry:Cfg.Loc.t -> 116 | exit:Cfg.Loc.t -> 117 | ret:Cfg.Loc.t -> 118 | exc:Cfg.Loc.t -> 119 | Loc_map.t -> 120 | CST.for_statement -> 121 | Loc_map.t * edge list * edge 122 | (** Return value is composed of: 123 | (1) updated loc_map, 124 | (2) all CFG edges for the for-loop excluding its body, 125 | (3) the back edge from the update back up to the condition. 126 | 127 | This is distinguished to support updates to the loop-header without updating the full loop body. 128 | 129 | Return value (3) is contained in (2), but is useful when constructing the updated DAIG region for an edited loop. 130 | 131 | Analogs to this function are not needed for conditional/while-loop headers because those can easily be identified from the CFG structure. 132 | *) 133 | -------------------------------------------------------------------------------- /generate_table.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Generates table 1 for the paper. 4 | # depends on three suplemental files which describe their creation: 5 | # 'linesOfSourceCode' 6 | # 'diffCounts' 7 | # 'callgraphSize' 8 | 9 | num_stats = 6 10 | def processFile(pathToFile): 11 | # print(f"procesing {pathToFile}") 12 | with open(pathToFile) as f: 13 | isIncremental = 'incr' in pathToFile 14 | lines = [line.strip() for line in f.readlines() if "SUMMARIES" not in line] 15 | run_time = float(lines[-2].split()[-1]) 16 | stats = [int(stat.split(',')[0]) for stat in lines[-1].split()[-num_stats:]] 17 | # stats is a list of ints, representing |D*|, |Delta|, |unique procedures rho in D*|, total cells, and nonempty cells after analysis, and number of self loops in Delta 18 | # depending on the incr flag, also add on the nonempty cells before analysis 19 | if isIncremental: 20 | before_stats = [int(stat.split(',')[0]) for stat in lines[-3].split()[-num_stats:]] 21 | stats.append(before_stats[4]) 22 | else: 23 | stats.append(0) 24 | return run_time, stats 25 | 26 | 27 | def readFile(pathToFile): 28 | with open(pathToFile) as f: 29 | lines = [line.strip().split() for line in f.readlines()] 30 | # [1:] to drop the file generation line 31 | progMap = {line[0]: int(line[-1]) for line in lines[1:]} 32 | return progMap # map of program names to some integer data 33 | 34 | 35 | def percent(mode, file_prefix): 36 | _, stats = processFile(file_prefix+mode) 37 | _, batch_stats = processFile(file_prefix+postfixes[0]) 38 | nonEmpty_pre = stats[-1] 39 | nonEmpty_post = stats[4] 40 | nonEmpty_batch= batch_stats[4] 41 | return (nonEmpty_post - nonEmpty_pre) / nonEmpty_batch 42 | 43 | def multirow(cell): 44 | return f"\multirow{{2}}{{*}}{{{cell}}}" 45 | 46 | def citeProgram(prog): 47 | return f"\citetalias{{bugswarm{prog.split('-')[-1]}}}" 48 | 49 | 50 | def averageDict(dictionary): 51 | return sum(dictionary.values())/len(dictionary) 52 | 53 | 54 | def strPercent(percentage): 55 | percent = percentage*100 56 | if percent == 0: 57 | return "0.0" 58 | if percent < 0.1: 59 | return "<0.1" 60 | else: 61 | return f"{percent:.1f}" 62 | 63 | 64 | def strTime(ms_time): 65 | seconds = ms_time/1000.0 66 | if seconds == 0: 67 | return "0.00" 68 | if seconds < 0.01: 69 | return "<0.01" 70 | else: 71 | return f"{seconds:.2f}" 72 | 73 | def filePrefix(analysis_prefix, run_number, program): 74 | return f'out/{analysis_prefix}{run_number}/{program}' 75 | 76 | 77 | def generateDataRow(analysis, program): 78 | row_output = "" 79 | # the abstract work done should be the same for each program, so just use the first file for that 80 | fprefix = filePrefix(analysis, 1, program) 81 | for run_mode in postfixes: 82 | average = 0 83 | for exp_run in range(1,num_runs+1): 84 | time, stats = processFile(filePrefix(analysis, exp_run, program)+run_mode) 85 | average = average+time 86 | average = average/num_runs 87 | total_of_averages[analysis,run_mode] = total_of_averages[analysis,run_mode] + average 88 | total_of_percents[analysis,run_mode] = total_of_percents[analysis,run_mode] + percent(run_mode, fprefix) 89 | num_varphi = stats[4]-stats[-1] 90 | total_of_absStates[analysis,run_mode] = total_of_absStates[analysis,run_mode] + num_varphi 91 | if run_mode == postfixes[0]: 92 | row_output = row_output + f" & {num_varphi}" 93 | else: 94 | row_output = row_output + f" & {strPercent(percent(run_mode, fprefix))}" 95 | row_output = row_output + f' & {strTime(average)}' 96 | return row_output + ' \\\\' 97 | 98 | 99 | def generateAverageRow(analysis): 100 | row_output = "" 101 | for mode in postfixes: 102 | if mode == postfixes[0]: 103 | row_output = row_output + f' & {total_of_absStates[analysis,mode]/num_programs:.0f}' 104 | else: 105 | row_output = row_output + f' & {strPercent(total_of_percents[analysis,mode]/num_programs)}' 106 | row_output = row_output + f' & {strTime(total_of_averages[analysis,mode]/num_programs)}' 107 | return row_output + ' \\\\' 108 | 109 | postfixes = [".batch", ".incr", ".dd", ".ddincr"] 110 | runs = [f"run{i}/" for i in range(1, 11)] 111 | interval_prefix = "run" 112 | nullability_prefix = "log" 113 | analysis_prefixes = [interval_prefix, nullability_prefix] 114 | num_runs = 10 # 10 115 | excluded_programs = \ 116 | [] 117 | # ["tananaev-traccar-188473749", "tananaev-traccar-255051211", "raphw-byte-buddy-234970609"] 118 | with open('experiment_inputs/query_artifacts') as f: 119 | programs = [line.strip() for line in f.readlines() ] 120 | programs = [program for program in programs if program not in excluded_programs] 121 | num_programs = len(programs) 122 | 123 | locs = readFile('linesOfSourceCode') 124 | edited_locs = readFile('diffCounts') 125 | callgraph_sizes = readFile('callgraphSize') 126 | 127 | output = 'program' 128 | for mode in postfixes: 129 | output = output + ', ' + mode 130 | print(output) 131 | 132 | offset = 6*" " 133 | total_of_averages = {} 134 | total_of_percents = {} 135 | total_of_absStates = {} 136 | for run_mode in postfixes: 137 | for analysis in analysis_prefixes: 138 | total_of_averages[analysis,run_mode] = 0 139 | total_of_percents[analysis,run_mode] = 0 140 | total_of_absStates[analysis,run_mode] = 0 141 | first = True 142 | for program in programs: 143 | if first: 144 | first = False 145 | else: 146 | print(offset+"\\arrayrulecolor{gray}\\hline") 147 | kloc = f'{locs[program]/1000.0:.1f}' 148 | output = offset + f"{multirow(citeProgram(program))} & {multirow(kloc)} & {multirow(edited_locs[program])} & {multirow(callgraph_sizes[program])}" 149 | print(output) 150 | print(offset + 3*" " + "& I" + generateDataRow(interval_prefix, program)) 151 | output = offset + 3*"& " 152 | print(output + "& N" + generateDataRow(nullability_prefix, program)) 153 | 154 | print(offset + '\\arrayrulecolor{black}\\midrule') 155 | 156 | 157 | average_kloc = f'{averageDict(locs)/1000:.1f}' 158 | average_eloc = f'{averageDict(edited_locs):.0f}' 159 | average_cg = f'{averageDict(callgraph_sizes):.0f}' 160 | output = offset + f'{multirow("average")} & {multirow(average_kloc)} & {multirow(average_eloc)} & {multirow(average_cg)}' 161 | print(output) 162 | output = offset + 3*" " + "& I" 163 | print(output + generateAverageRow(interval_prefix)) 164 | output = offset + 3*"& " + "& N" 165 | print(output + generateAverageRow(nullability_prefix)) 166 | 167 | # data = [(prog, [(postfix, processFile('out/'+runs[0]+prog+postfix)[1]) for postfix in postfixes]) for prog in programs] 168 | # print(data) 169 | 170 | -------------------------------------------------------------------------------- /src/frontend/callgraph.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | open Syntax 3 | 4 | type forward_t = Cfg.Fn.t list Method_id.Map.t 5 | 6 | type reverse_t = Cfg.Fn.t list Method_id.Map.t 7 | 8 | type scc = string Graphlib.Std.partition 9 | 10 | type t = { forward : forward_t; reverse : reverse_t; scc : scc option } 11 | 12 | let empty = { forward = Method_id.Map.empty; reverse = Method_id.Map.empty; scc = None } 13 | 14 | let add ~(caller : Cfg.Fn.t) ~(callee : Cfg.Fn.t) cg = 15 | let update map m_id fn = 16 | Map.update map m_id ~f:(function None -> [ fn ] | Some fns -> fn :: fns) 17 | in 18 | { 19 | cg with 20 | forward = update cg.forward caller.method_id callee; 21 | reverse = update cg.reverse callee.method_id caller; 22 | } 23 | 24 | (* a callgraph is a map from caller [Method_id]'s to sets of callee [Cfg.Fn]'s *) 25 | 26 | let is_syntactically_compatible (callsite : Ast.Stmt.t) (callee : Cfg.Fn.t) = 27 | let num_formals = List.length callee.formals in 28 | let compatible_args actuals = 29 | num_formals = List.length actuals 30 | || (* WALA callgraph output doesn't know about varargs, 31 | so we conservatively assume any function whose last argument has array type may be variadic *) 32 | (num_formals > 0 && String.is_suffix ~suffix:"[]" (List.last_exn callee.method_id.arg_types)) 33 | in 34 | match callsite with 35 | | Ast.Stmt.Call { lhs = _; rcvr; meth = ""; actuals; alloc_site = _ } 36 | | Ast.Stmt.Exceptional_call { rcvr; meth = ""; actuals } -> 37 | String.equal rcvr (String.split callee.method_id.class_name ~on:'$' |> List.last_exn) 38 | && String.equal "" callee.method_id.method_name 39 | && compatible_args actuals 40 | | Ast.Stmt.Call { lhs = _; rcvr = _; meth; actuals; alloc_site = _ } 41 | | Ast.Stmt.Exceptional_call { rcvr = _; meth; actuals } -> 42 | String.equal meth callee.method_id.method_name && compatible_args actuals 43 | | _ -> false 44 | 45 | let callees ~callsite ~caller_method ~(cg : forward_t) = 46 | let candidates = match Method_id.Map.find cg caller_method with Some cs -> cs | None -> [] in 47 | List.filter candidates ~f:(is_syntactically_compatible callsite) 48 | 49 | (* serialized format is "(caller_line callee_line^* )^*", where 50 | * a caller_line is "CALLER: \n" 51 | * a callee_line is "\tCALLEE: \n" 52 | * 's can be deserialized by [deserialized_method] 53 | * there exists a call edge to each CALLEE from the preceding CALLER 54 | *) 55 | let deserialize_forward ~fns = 56 | Src_file.lines 57 | >> Array.fold ~init:(Method_id.Map.empty, None) ~f:(fun (acc_cg, curr_caller) line -> 58 | if String.is_prefix line ~prefix:"CALLER: " then 59 | let caller = String.chop_prefix_exn ~prefix:"CALLER: " line |> Method_id.deserialize in 60 | (acc_cg, Some caller) 61 | else 62 | let caller = Option.value_exn curr_caller in 63 | let callee_method = 64 | String.chop_prefix_exn ~prefix:"\tCALLEE: " line |> Method_id.deserialize 65 | in 66 | let callee = 67 | List.find fns ~f:(fun (f : Cfg.Fn.t) -> Method_id.equal f.method_id callee_method) 68 | in 69 | match callee with 70 | | None -> (acc_cg, curr_caller) 71 | | Some callee -> 72 | let cg = 73 | Method_id.Map.set acc_cg ~key:caller 74 | ~data: 75 | (match Method_id.Map.find acc_cg caller with 76 | | Some callees -> callee :: callees 77 | | None -> [ callee ]) 78 | in 79 | (cg, curr_caller)) 80 | >> fst 81 | 82 | let reverse ~(fns : Cfg.Fn.t list) (cg : forward_t) : reverse_t = 83 | Map.fold cg ~init:Method_id.Map.empty ~f:(fun ~key:caller ~data:callees acc -> 84 | match List.find fns ~f:(fun (f : Cfg.Fn.t) -> Method_id.equal f.method_id caller) with 85 | | Some caller_fn -> 86 | List.fold callees ~init:acc ~f:(fun acc callee -> 87 | Map.update acc callee.method_id ~f:(function 88 | | Some callers -> caller_fn :: callers 89 | | None -> [ caller_fn ])) 90 | | None -> acc) 91 | 92 | module G = Graph.Make (String) (Unit) 93 | 94 | let to_graph (cg : forward_t) : G.t = 95 | let edges = 96 | Map.fold cg ~init:[] ~f:(fun ~key:caller ~data:callees acc -> 97 | let caller = Format.asprintf "%a" Method_id.pp caller in 98 | List.fold callees ~init:acc ~f:(fun acc callee -> 99 | let callee = Format.asprintf "%a" Method_id.pp callee.method_id in 100 | (caller, callee, ()) :: acc)) 101 | in 102 | Graph.create (module G) ~edges () 103 | 104 | let strongly_connected_components (cg : forward_t) : scc = 105 | Graph.strong_components (module G) (to_graph cg) 106 | 107 | let deserialize ~fns file = 108 | let forward = deserialize_forward ~fns file in 109 | let reverse = reverse ~fns forward in 110 | let scc = strongly_connected_components forward in 111 | { forward; reverse; scc = Some scc } 112 | 113 | let callers ~callee_method ~reverse_cg = 114 | match Map.find reverse_cg callee_method with Some callers -> callers | None -> [] 115 | 116 | let methods_mutually_recursive scc m1 m2 = 117 | match scc with 118 | | None -> Method_id.equal m1 m2 119 | | Some scc -> 120 | Graphlib.Std.Partition.equiv scc 121 | (Format.asprintf "%a" Method_id.pp m1) 122 | (Format.asprintf "%a" Method_id.pp m2) 123 | 124 | let is_mutually_recursive scc (fn1 : Cfg.Fn.t) (fn2 : Cfg.Fn.t) = 125 | methods_mutually_recursive scc fn1.method_id fn2.method_id 126 | 127 | let filter ~fns file = 128 | let have_fn m_id = List.exists fns ~f:(fun (f : Cfg.Fn.t) -> Method_id.equal f.method_id m_id) in 129 | Src_file.lines file 130 | |> Array.fold ~init:None ~f:(fun curr_caller line -> 131 | if String.is_prefix line ~prefix:"CALLER: " then 132 | let caller_method = 133 | String.chop_prefix_exn ~prefix:"CALLER: " line |> Method_id.deserialize 134 | in 135 | if have_fn caller_method then ( 136 | Format.print_string (line ^ "\n"); 137 | Some caller_method) 138 | else None 139 | else 140 | match curr_caller with 141 | | None -> None 142 | | Some _ -> 143 | let callee_method = 144 | String.chop_prefix_exn ~prefix:"\tCALLEE: " line |> Method_id.deserialize 145 | in 146 | if have_fn callee_method then ( 147 | Format.print_string (line ^ "\n"); 148 | curr_caller) 149 | else curr_caller) 150 | |> ignore 151 | 152 | let dump_dot ~filename (cg : forward_t) : unit = 153 | Graph.to_dot (module G) (to_graph cg) ~filename ~string_of_node:(Format.asprintf "\"%s\"") 154 | 155 | let%test "procedures example" = 156 | let src_file = Src_file.of_file (abs_of_rel_path "test_cases/procedures.callgraph") in 157 | let fns = 158 | Cfg_parser.parse_file_exn (abs_of_rel_path "test_cases/java/Procedures.java") 159 | |> fun { cfgs; _ } -> Map.keys cfgs 160 | in 161 | let cg = deserialize_forward ~fns src_file in 162 | List.length (Map.keys cg) |> Int.equal 3 163 | -------------------------------------------------------------------------------- /src/frontend/text_diff.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | 3 | type t = { 4 | prev_start_line : int; 5 | prev_length : int; 6 | next_start_line : int; 7 | new_lines : string array; 8 | } 9 | 10 | let pp fs { prev_start_line; prev_length; next_start_line; new_lines } = 11 | Format.fprintf fs 12 | "text diff: {prev_start_line:%i; prev_length:%i; next_start_line:%i; length(new_lines):%i" 13 | prev_start_line prev_length next_start_line (Array.length new_lines) 14 | 15 | module Patdiff = Patience_diff_lib.Patience_diff.Make (Core.String) 16 | module Hunk = Patience_diff_lib.Patience_diff.Hunk 17 | 18 | (* In patience-diff jargon, a "diff" is a collection of "hunks" and a "hunk" is a collection of "ranges", which are each either a "same" for unchanged regions, a "next" for additions, a "prev" for deletions, or a "replace" for modifications *) 19 | 20 | (* The [context] named argument to [get_hunks] controls the maximum size of "same" regions -- e.g. to control the amount of unchanged-code context you want to see around changes in a git diff -- so I set it to 0 because we only care about the changes themselves. *) 21 | 22 | (* The [big_enough] named arg "governs how aggressively we try to clean up spurious matches, by restricting our attention to only matches of length less than [big_enough]". I set it to 3, which is the patdiff default for line diffs *) 23 | 24 | let btwn ~prev ~next = 25 | Patdiff.get_hunks ~transform:(fun x -> x) ~context:0 ~big_enough:3 ~prev ~next 26 | |> List.map ~f:(fun hunk -> 27 | match Hunk.ranges hunk with 28 | | [ Same _ ] -> 29 | { 30 | prev_start_line = Hunk.prev_start hunk; 31 | prev_length = Hunk.prev_size hunk; 32 | next_start_line = Hunk.next_start hunk; 33 | new_lines = [||]; 34 | } 35 | | [ Same [||]; r; Same [||] ] -> 36 | let new_lines = 37 | match r with 38 | | Prev _ -> [||] 39 | | Next new_lines | Replace (_, new_lines) -> new_lines 40 | | Same _ -> failwith "Malformed hunk -- adjacent \"Same\" ranges" 41 | | Unified _ -> failwith "Unrecognized diff type: \"Unified\"" 42 | in 43 | { 44 | prev_start_line = Hunk.prev_start hunk; 45 | prev_length = Hunk.prev_size hunk; 46 | next_start_line = Hunk.next_start hunk; 47 | new_lines; 48 | } 49 | | ranges -> 50 | let string_of_range : 'a Patience_diff_lib.Patience_diff.Range.t -> string = function 51 | | Prev _ -> "Prev" 52 | | Next _ -> "Next" 53 | | Replace _ -> "Replace" 54 | | Same _ -> "Same" 55 | | Unified _ -> "Unified" 56 | in 57 | failwith 58 | (Format.asprintf 59 | "With ~context:0, patdiff should always yield hunks of form [Same] or \ 60 | [Same,r,Same]; got %a instead" 61 | (List.pp ~pre:"[" ~suf:"]" "," String.pp) 62 | (List.map ranges ~f:string_of_range))) 63 | 64 | let%test "adding adjacent lines" = 65 | let prev = 66 | [| 67 | "class Foo {"; 68 | " //My first program!"; 69 | " public static void main(String[] args){"; 70 | " System.out.println(\"Hello, world!\");"; 71 | " }"; 72 | "}"; 73 | |] 74 | in 75 | let next = 76 | [| 77 | "class Foo {"; 78 | " //My first program!"; 79 | " public static void main(String[] args){"; 80 | " System.out.println(\"Hello, world!\");"; 81 | " System.out.println(\"Hello, world!1\");"; 82 | " System.out.println(\"Hello, world!2\");"; 83 | " System.out.println(\"Hello, world!3\");"; 84 | " System.out.println(\"Hello, world!4\");"; 85 | " }"; 86 | "}"; 87 | |] 88 | in 89 | match btwn ~prev ~next with 90 | | [ { prev_start_line = 5; prev_length = 0; next_start_line = 5; new_lines = [| _; _; _; _ |] } ] 91 | -> 92 | true 93 | | _ -> false 94 | 95 | let%test "adding non-adjacent lines" = 96 | let prev = 97 | [| 98 | "class Foo {"; 99 | " //My first program!"; 100 | " public static void main(String[] args){"; 101 | " System.out.println(\"Hello, world!\");"; 102 | " }"; 103 | "}"; 104 | |] 105 | in 106 | let next = 107 | [| 108 | "class Foo {"; 109 | " //My first program!"; 110 | " public static void main(String[] args){"; 111 | " System.out.println(\"Hello, world!-1\");"; 112 | " System.out.println(\"Hello, world!\");"; 113 | " System.out.println(\"Hello, world!+1\");"; 114 | " }"; 115 | "}"; 116 | |] 117 | in 118 | match btwn ~prev ~next with 119 | | [ 120 | { prev_start_line = 4; prev_length = 0; next_start_line = 4; new_lines = [| _ |] }; 121 | { prev_start_line = 5; prev_length = 0; next_start_line = 6; new_lines = [| _ |] }; 122 | ] -> 123 | true 124 | | _ -> false 125 | 126 | let%test "removing a line" = 127 | let prev = 128 | [| 129 | "class Foo {"; 130 | " //My first program!"; 131 | " public static void main(String[] args){"; 132 | " System.out.println(\"Hello, world!\");"; 133 | " }"; 134 | "}"; 135 | |] 136 | in 137 | let next = 138 | [| 139 | "class Foo {"; 140 | " public static void main(String[] args){"; 141 | " System.out.println(\"Hello, world!\");"; 142 | " }"; 143 | "}"; 144 | |] 145 | in 146 | match btwn ~prev ~next with 147 | | [ { prev_start_line = 2; prev_length = 1; next_start_line = 2; new_lines = [||] } ] -> true 148 | | _ -> false 149 | 150 | let%test "replacing a line" = 151 | let prev = 152 | [| 153 | "class Foo {"; 154 | " //My first program!"; 155 | " public static void main(String[] args){"; 156 | " System.out.println(\"Hello, world!\");"; 157 | " }"; 158 | "}"; 159 | |] 160 | in 161 | let next = 162 | [| 163 | "class Foo {"; 164 | " /** now it's documented */"; 165 | " public static void main(String[] args){"; 166 | " System.out.println(\"Hello, world!\");"; 167 | " }"; 168 | "}"; 169 | |] 170 | in 171 | match btwn ~prev ~next with 172 | | [ { prev_start_line = 2; prev_length = 1; next_start_line = 2; new_lines = [| _ |] } ] -> true 173 | | _ -> false 174 | 175 | let%test "1 replacement, 1 addition, 1 deletion, all non-adjacent" = 176 | let prev = 177 | [| 178 | "class Foo {"; 179 | " //My first program!"; 180 | " public static void main(String[] args){"; 181 | " System.out.println(\"Hello, world!\");"; 182 | " }"; 183 | "}"; 184 | |] 185 | in 186 | let next = 187 | [| 188 | "class Foo {"; 189 | " /** now it's documented */"; 190 | " public static void main(String[] args){"; 191 | " }"; 192 | "//added line"; 193 | "}"; 194 | |] 195 | in 196 | match btwn ~prev ~next with 197 | | [ 198 | { prev_start_line = 2; prev_length = 1; next_start_line = 2; new_lines = [| _ |] }; 199 | { prev_start_line = 4; prev_length = 1; next_start_line = 4; new_lines = [||] }; 200 | { prev_start_line = 6; prev_length = 0; next_start_line = 5; new_lines = [| _ |] }; 201 | ] -> 202 | true 203 | | _ -> false 204 | -------------------------------------------------------------------------------- /src/syntax/ast.ml: -------------------------------------------------------------------------------- 1 | open Dai 2 | open Import 3 | 4 | type ident = string [@@deriving equal, hash, compare, sexp_of] 5 | 6 | module Lit = struct 7 | module T = struct 8 | type t = 9 | | Bool of bool 10 | | Int of int64 11 | | Float of float 12 | | Null 13 | | String of string 14 | | Char of string (* store chars as strings, representing (potentially) multi-byte UTF8-encoded java chars *) 15 | [@@deriving equal, compare, sexp, hash] 16 | end 17 | 18 | include T 19 | include Comparable.Make (T) 20 | 21 | let of_int i = Int i 22 | 23 | let of_float f = Float f 24 | 25 | let char_of_string c = Char c 26 | 27 | let of_string s = String s 28 | 29 | let pp fs = function 30 | | Bool b -> Bool.pp fs b 31 | | Int i -> Int64.pp fs i 32 | | Float f -> Float.pp fs f 33 | | Null -> Format.pp_print_string fs "null" 34 | | String s -> Format.pp_print_string fs ("\\\"" ^ s ^ "\\\"") 35 | | Char c -> Format.pp_print_string fs ("\\'" ^ c ^ "\\'") 36 | end 37 | 38 | module Binop = struct 39 | type t = 40 | | Lt 41 | | Le 42 | | Gt 43 | | Ge 44 | | Eq 45 | | NEq 46 | | SEq 47 | | Plus 48 | | Minus 49 | | Times 50 | | Divided_by 51 | | Mod 52 | | Pow 53 | | Or 54 | | And 55 | | BOr 56 | | BAnd 57 | | BXor 58 | | LShift 59 | | RShift 60 | | URShift 61 | | Instanceof 62 | [@@deriving equal, hash, compare, sexp_of] 63 | 64 | let pp fs = 65 | let ps = Format.pp_print_string fs in 66 | function 67 | | Lt -> ps "<" 68 | | Le -> ps "<=" 69 | | Gt -> ps ">" 70 | | Ge -> ps ">=" 71 | | Eq -> ps "==" 72 | | NEq -> ps "!=" 73 | | SEq -> ps "===" 74 | | Plus -> ps "+" 75 | | Minus -> ps "-" 76 | | Times -> ps "*" 77 | | Divided_by -> ps "/" 78 | | Mod -> ps "%" 79 | | Pow -> ps "**" 80 | | Or -> ps "||" 81 | | And -> ps "&&" 82 | | BOr -> ps "|" 83 | | BAnd -> ps "&" 84 | | BXor -> ps "^" 85 | | LShift -> ps "<<" 86 | | RShift -> ps ">>" 87 | | URShift -> ps ">>>" 88 | | Instanceof -> ps "instanceof" 89 | end 90 | 91 | module Unop = struct 92 | type t = Plus | Neg | Not | BNot | Incr | Decr | Typeof 93 | [@@deriving compare, equal, hash, sexp_of] 94 | 95 | let pp fs = 96 | let ps = Format.pp_print_string fs in 97 | function 98 | | Plus -> ps "+" 99 | | Neg -> ps "-" 100 | | Not -> ps "!" 101 | | BNot -> ps "~" 102 | | Incr -> ps "++" 103 | | Decr -> ps "--" 104 | | Typeof -> ps "typeof " 105 | end 106 | 107 | module Expr = struct 108 | type t = 109 | | Var of ident 110 | | Lit of Lit.t 111 | | Binop of { l : t; op : Binop.t; r : t } 112 | | Unop of { op : Unop.t; e : t } 113 | | Deref of { rcvr : ident; field : ident } 114 | | Array_access of { rcvr : t; idx : t } 115 | | Array_literal of { elts : t list; alloc_site : Alloc_site.t } 116 | | Array_create of { elt_type : string; size : t; alloc_site : Alloc_site.t } 117 | | Method_ref of { rcvr : string; meth : string } 118 | | Class_lit of { name : string } 119 | [@@deriving equal, compare, hash, sexp_of] 120 | 121 | let rec pp fs e = 122 | match e with 123 | | Var v -> Format.pp_print_string fs v 124 | | Lit l -> Lit.pp fs l 125 | | Binop { l; op; r } -> Format.fprintf fs "%a %a %a" pp l Binop.pp op pp r 126 | | Unop { op; e } -> Format.fprintf fs "%a(%a)" Unop.pp op pp e 127 | | Deref { rcvr; field } -> Format.fprintf fs "%s.%s" rcvr field 128 | | Array_access { rcvr; idx } -> Format.fprintf fs "%a[%a]" pp rcvr pp idx 129 | | Array_literal { elts; alloc_site } -> 130 | Format.fprintf fs "%a%@%a" (List.pp ", " ~pre:"{" ~suf:"}" pp) elts Alloc_site.pp alloc_site 131 | | Array_create { elt_type; size; alloc_site } -> 132 | Format.fprintf fs "new@%a %a[%a]" Alloc_site.pp alloc_site String.pp elt_type pp size 133 | | Method_ref { rcvr; meth } -> Format.fprintf fs "%s::%s" rcvr meth 134 | | Class_lit { name } -> Format.fprintf fs "%s.class" name 135 | 136 | let rec uses = 137 | let uses_in_list exprs = 138 | List.fold exprs ~init:String.Set.empty ~f:(fun a c -> Set.union a (uses c)) 139 | in 140 | function 141 | | Var v -> String.Set.singleton v 142 | | Binop { l; op = _; r } -> Set.union (uses l) (uses r) 143 | | Unop { op = _; e } -> uses e 144 | | Deref { rcvr; field = _ } -> String.Set.singleton rcvr 145 | | Array_literal { elts; alloc_site = _ } -> uses_in_list elts 146 | | Array_access { rcvr; idx } -> Set.union (uses rcvr) (uses idx) 147 | | Array_create { elt_type = _; size; alloc_site = _ } -> uses size 148 | | _ -> String.Set.empty 149 | 150 | (** fold hash as int, rather than as Ppx_hash_lib.Std.Hash.state *) 151 | let hash_fold_int acc curr = 152 | let open Ppx_hash_lib.Std in 153 | hash_fold_t (Hash.fold_int (Hash.alloc ()) acc) curr |> Hash.get_hash_value 154 | 155 | let unop op e = Unop { op; e } 156 | 157 | let binop l op r = Binop { l; op; r } 158 | end 159 | 160 | module Stmt = struct 161 | type t = 162 | | Array_write of { rcvr : string; idx : Expr.t; rhs : Expr.t } 163 | | Assign of { lhs : string; rhs : Expr.t } 164 | | Assume of Expr.t 165 | | Call of { 166 | lhs : string option; 167 | rcvr : string; 168 | meth : string; 169 | actuals : Expr.t list; 170 | alloc_site : Alloc_site.t option; 171 | } 172 | | Exceptional_call of { rcvr : string; meth : string; actuals : Expr.t list } 173 | | Expr of Expr.t 174 | | Skip 175 | | Write of { rcvr : string; field : string; rhs : Expr.t } 176 | [@@deriving compare, equal, hash, sexp_of] 177 | 178 | let pp fs stmt = 179 | match stmt with 180 | | Array_write { rcvr; idx; rhs } -> 181 | Format.fprintf fs "%s[%a] := %a" rcvr Expr.pp idx Expr.pp rhs 182 | | Assign { lhs; rhs } -> Format.fprintf fs "%s := %a" lhs Expr.pp rhs 183 | | Assume e -> Format.fprintf fs "assume %a" Expr.pp e 184 | | Call { lhs = Some lhs; rcvr; meth; actuals; alloc_site = _ } -> 185 | Format.fprintf fs "%s := %s.%s(%a)" lhs rcvr meth (List.pp ", " Expr.pp) actuals 186 | | Call { lhs = None; rcvr; meth; actuals; alloc_site = _ } -> 187 | Format.fprintf fs "%s.%s(%a)" rcvr meth (List.pp ", " Expr.pp) actuals 188 | | Exceptional_call { rcvr; meth; actuals } -> 189 | Format.fprintf fs "exc-return %s.%s(%a)" rcvr meth (List.pp ", " Expr.pp) actuals 190 | | Expr e -> Expr.pp fs e 191 | | Skip -> Format.pp_print_string fs "skip" 192 | | Write { rcvr; field; rhs } -> Format.fprintf fs "%s.%s := %a" rcvr field Expr.pp rhs 193 | 194 | let uses = function 195 | | Array_write { rcvr = _; idx; rhs } -> Set.union (Expr.uses idx) (Expr.uses rhs) 196 | | Assign { lhs = _; rhs } -> Expr.uses rhs 197 | | Assume e -> Expr.uses e 198 | | Call { lhs = _; rcvr; meth = _; actuals; alloc_site = _ } 199 | | Exceptional_call { rcvr; meth = _; actuals } -> 200 | List.fold actuals ~init:(String.Set.singleton rcvr) ~f:(fun a c -> 201 | Set.union a (Expr.uses c)) 202 | | Expr e -> Expr.uses e 203 | | Skip -> String.Set.empty 204 | | Write { rcvr; field = _; rhs } -> String.Set.add (Expr.uses rhs) rcvr 205 | 206 | let def = function Assign { lhs; _ } -> Some lhs | Call { lhs; _ } -> lhs | _ -> None 207 | 208 | let to_string stmt : string = 209 | Format.fprintf Format.str_formatter "%a" pp stmt; 210 | Format.flush_str_formatter () 211 | 212 | let sanitize x = x 213 | 214 | let show x = 215 | pp Format.str_formatter x; 216 | Format.flush_str_formatter () 217 | 218 | let hash = seeded_hash 219 | 220 | let skip = Skip 221 | 222 | let is_exc = function Exceptional_call _ -> true | _ -> false 223 | end 224 | -------------------------------------------------------------------------------- /src/experiment_harness.ml: -------------------------------------------------------------------------------- 1 | open Dai.Import 2 | open Frontend 3 | open Analysis 4 | open Domain 5 | open Syntax 6 | 7 | let ( / ) pre post = pre ^ Stdlib.Filename.dir_sep ^ post 8 | 9 | let base_exclusions = 10 | [ "test"; "package-info.java"; "module-info.java"; "annotations"; "annotation"; ".m2" ] 11 | 12 | let experiment_exclusions = [] 13 | (* files containing "@interface" annotations that crash tree sitter *) 14 | 15 | let exclusions = experiment_exclusions @ base_exclusions 16 | 17 | let rec java_srcs dir = 18 | let open Sys in 19 | let open Stdlib.Filename in 20 | match is_directory dir with 21 | | `No | `Unknown -> 22 | failwith (Format.asprintf "can't get java sources from non-directory file %s" dir) 23 | | `Yes -> 24 | List.bind (ls_dir dir) ~f:(fun f -> 25 | if List.mem exclusions f ~equal:String.equal then [] 26 | else 27 | let file = dir / f in 28 | if is_directory_exn file then java_srcs file 29 | else if is_file_exn file && String.equal ".java" (extension f) then [ file ] 30 | else []) 31 | 32 | let relative_java_srcs dir = List.map (java_srcs dir) ~f:(String.chop_prefix_exn ~prefix:(dir / "")) 33 | 34 | module type S = sig 35 | type t 36 | 37 | val cg : t -> Callgraph.forward_t 38 | 39 | val init : ?cg:string -> string -> t 40 | 41 | val fns : t -> Cfg.Fn.t list 42 | 43 | val update : ?cg:string -> next_dir:string -> t -> t 44 | 45 | val entrypoints : string option -> t -> Cfg.Fn.t list 46 | 47 | val issue_exit_queries : Cfg.Fn.t list -> t -> t 48 | 49 | val issue_demand_query : qry_loc:string -> Cfg.Fn.t list -> t -> t 50 | 51 | val dump_dot : t -> filename:string -> unit 52 | end 53 | 54 | module DSG_wrapper (Dom : Abstract.Dom) : S = struct 55 | module Dom = Abstract.DomWithDataStructures (Dom) 56 | module G = Dsg.Make (Dom) 57 | module D = G.D 58 | 59 | type parse_info = { 60 | src_dir : string; 61 | trees : Tree.t String.Map.t; 62 | loc_map : Loc_map.t; 63 | fields : Declared_fields.t; 64 | cha : Class_hierarchy.t; 65 | } 66 | 67 | type t = { dsg : G.t; cg : Callgraph.t; parse : parse_info } 68 | 69 | let dump_dot { dsg; _ } = G.dump_dot dsg 70 | 71 | let cg x = x.cg.forward 72 | 73 | let fns x = G.fns x.dsg 74 | 75 | (* Initialize a DSG over src_dir/**/*.java, with the callgraph serialized at [cg] *) 76 | let init ?cg src_dir = 77 | let cg = match cg with Some cg -> cg | None -> "/dev/null" in 78 | let trees = 79 | List.fold (java_srcs src_dir) ~init:String.Map.empty ~f:(fun trees src -> 80 | let file = Src_file.of_file src in 81 | match Tree.parse ~old_tree:None ~file with 82 | | Ok tree -> 83 | let key = String.chop_prefix_exn src ~prefix:(src_dir / "") in 84 | Map.set trees ~key ~data:tree 85 | | Error _ -> failwith ("parse error in " ^ src)) 86 | in 87 | let ({ cfgs; loc_map; fields; cha } : Cfg_parser.prgm_parse_result) = 88 | let trees = List.map (Map.to_alist trees) ~f:(fun (file, tree) -> (src_dir / file, tree)) in 89 | Cfg_parser.parse_trees_exn ~trees 90 | in 91 | let dsg = G.init ~cfgs in 92 | let fns = G.fns dsg in 93 | let cg = Callgraph.deserialize ~fns (Src_file.of_file cg) in 94 | let () = Declared_fields.set_current_fields fields in 95 | let parse = { src_dir; trees; loc_map; fields; cha } in 96 | { dsg; cg; parse } 97 | 98 | (* For the bugswarm experiments, we point the analysis at two side-by-side program versions 99 | * rather than editing the program in place. 100 | * This function applies the "edit" between the two program versions to an analysis [state] 101 | *) 102 | let update ?(cg = "/dev/null") ~(next_dir : string) (g : t) : t = 103 | let prev_src_dir = g.parse.src_dir in 104 | let prev_files = String.Map.keys g.parse.trees |> String.Set.of_list in 105 | let next_files = relative_java_srcs next_dir |> String.Set.of_list in 106 | let shared_files, new_files = Set.(partition_tf next_files ~f:(mem prev_files)) in 107 | let changed_files = 108 | Set.filter shared_files ~f:(fun file -> 109 | let file = String.substr_replace_all ~pattern:"$" ~with_:"\\$" file in 110 | Sys.command 111 | (Format.asprintf "cmp %s %s >/dev/null" (prev_src_dir / file) (next_dir / file)) 112 | |> (Int.equal 0 >> not)) 113 | in 114 | Format.printf 115 | "prev_files: %i\n\ 116 | next_files: %i\n\ 117 | shared_files: %i\n\ 118 | new_files: %i\n\ 119 | changed_files: %a\n" 120 | (Set.length prev_files) (Set.length next_files) (Set.length shared_files) 121 | (Set.length new_files) (Set.pp String.pp) changed_files; 122 | let init = (g.parse.trees, g.dsg, g.parse.loc_map) in 123 | let trees, dsg, loc_map = 124 | Set.fold changed_files ~init ~f:(fun (trees, dsg, lm) filename -> 125 | let prev_tree = String.Map.find_exn g.parse.trees filename in 126 | let prev_file = Src_file.of_file (prev_src_dir / filename) in 127 | let next_file = Src_file.of_file (next_dir / filename) in 128 | Result.Let_syntax.( 129 | let%bind prev = Tree.as_java_cst prev_file prev_tree in 130 | let%bind tree = Tree.parse ~old_tree:(Some prev_tree) ~file:next_file in 131 | let%map next = Tree.as_java_cst next_file tree in 132 | let diff = Tree_diff.btwn lm ~prev ~next in 133 | Format.printf "tree diff for %s:\n%a\n" filename Tree_diff.pp diff; 134 | let lm, dsg = G.apply_edit ~cha:g.parse.cha ~diff lm dsg in 135 | (Map.set trees ~key:filename ~data:tree, dsg, lm)) 136 | |> function 137 | | Ok res -> res 138 | | _ -> failwith ("failed to update file: " ^ filename)) 139 | in 140 | let open Cfg_parser in 141 | let open Result.Monad_infix in 142 | let ({ cfgs; loc_map; cha; fields } : prgm_parse_result) = 143 | let init = 144 | set_parse_result ~loc_map ~cha:g.parse.cha ~fields:g.parse.fields empty_parse_result 145 | in 146 | Set.fold new_files ~init ~f:(fun acc filename -> 147 | let file = Src_file.of_file (next_dir / filename) in 148 | Tree.parse ~old_tree:None ~file >>= Tree.as_java_cst file >>| Cfg_parser.of_java_cst ~acc 149 | |> function 150 | | Ok res -> res 151 | | _ -> failwith ("error parsing file: " ^ filename)) 152 | in 153 | let dsg = G.add_exn ~cfgs dsg in 154 | let fns = G.fns dsg in 155 | let cg = Callgraph.deserialize ~fns (Src_file.of_file cg) in 156 | (* TODO: handle added fields and CHA edges in edited files; add corresponding Tree_diff.edit's 157 | and expose functions there to use here to apply diffs to our fields/cha structures *) 158 | let () = Declared_fields.set_current_fields fields in 159 | let parse = { src_dir = next_dir; trees; loc_map; fields; cha } in 160 | Format.printf "\n[EXPERIMENT][INFO] dirtying\n"; 161 | G.print_stats Format.std_formatter dsg; 162 | { dsg; cg; parse } 163 | 164 | let entrypoints entry_class g = 165 | let f = 166 | match entry_class with 167 | | Some cls -> 168 | let package = deserialize_package cls in 169 | let class_name = deserialize_class cls in 170 | fun (f : Cfg.Fn.t) -> 171 | String.equal class_name f.method_id.class_name 172 | && (List.equal String.equal) package f.method_id.package 173 | && String.equal "main" f.method_id.method_name 174 | | None -> fun (_f : Cfg.Fn.t) -> true 175 | (*String.equal "main" f.method_id.method_name*) 176 | in 177 | List.filter (G.fns g.dsg) ~f 178 | 179 | let issue_exit_queries entrypoints (g : t) = 180 | let st = systime () in 181 | let dsg = 182 | List.fold entrypoints ~init:g.dsg ~f:(fun dsg (fn : Cfg.Fn.t) -> 183 | G.query dsg ~fn ~entry_state:(Dom.init ()) ~loc:fn.exit ~cg:g.cg ~fields:g.parse.fields 184 | |> snd) 185 | in 186 | Format.(fprintf std_formatter) 187 | "\n[EXPERIMENT] exhaustive analysis took: %.3f\n" 188 | (1000. *. (systime () -. st)); 189 | G.print_stats Format.std_formatter dsg; 190 | G.print_summaries Format.std_formatter dsg ~num_summaries:100; 191 | (* G.dump_dot dsg ~filename:(abs_of_rel_path "solved_experiement.dsg.dot") ~num_daigs:100; *) 192 | { dsg; cg = g.cg; parse = g.parse } 193 | 194 | let issue_demand_query ~qry_loc entrypoints (g : t) : t = 195 | let method_id = Method_id.deserialize qry_loc in 196 | match List.find (G.fns g.dsg) ~f:(fun fn -> Method_id.equal method_id fn.method_id) with 197 | | None -> failwith ("no procedure found matching demand query " ^ qry_loc) 198 | | Some fn -> 199 | let st = systime () in 200 | let _res, dsg = 201 | G.loc_only_query g.dsg ~fn ~loc:fn.exit ~cg:g.cg ~fields:g.parse.fields ~entrypoints 202 | in 203 | Format.(fprintf std_formatter) 204 | "\n[EXPERIMENT] demand query took: %.3f\n" 205 | (1000. *. (systime () -. st)); 206 | G.print_stats Format.std_formatter dsg; 207 | { dsg; cg = g.cg; parse = g.parse } 208 | end 209 | --------------------------------------------------------------------------------