├── lib └── .gitkeep ├── tests ├── __init__.py ├── taint-front │ ├── ex3.codeflows │ ├── ex4.codeflows │ ├── global2.codeflows │ ├── indirect4.codeflows │ ├── sanitizer1.nocodeflows │ ├── alias2.tnt │ ├── sanitizer1.tnt │ ├── alias2.json │ ├── ex3.tnt │ ├── indirect1.tnt │ ├── alias1.tnt │ ├── global1.json │ ├── indirect4.tnt │ ├── global2.tnt │ ├── compositional-ctx.json │ ├── README.md │ ├── ex4.tnt │ ├── alias1.json │ ├── global1.tnt │ ├── compositional-ctx2.json │ ├── compositional-ctx4.json │ ├── compositional-ctx4.tnt │ ├── compositional-ctx2.tnt │ ├── indirect1.json │ └── compositional-ctx.tnt ├── bin │ ├── check-static │ ├── check-analysis-options │ ├── sarif_has_code_flows.py │ ├── test │ ├── check-taintfront2 │ ├── check-pcode │ ├── check-ActivityCommunication4 │ └── check-taintfront ├── java │ ├── ArrayTest.java │ └── ArrayTest.json ├── index_nginx.dl ├── field-source.json ├── query_nginx.dl ├── find-field-query.json ├── test_query.json ├── taint_intent.json ├── androidtest-query.json ├── test_ctadlir.py ├── test_sarif_paths.py └── custom_index.dl ├── .envrc ├── src └── ctadl │ ├── VERSION │ ├── models │ ├── squiggli_java │ │ └── default-index.json │ ├── squiggli_php │ │ └── default-index.json │ └── taint-front │ │ ├── default-index.json │ │ └── default-query.json │ ├── util │ ├── __init__.py │ ├── lazy_logging.py │ └── diff.py │ ├── souffle-logic │ ├── analysis │ │ └── index.dl │ ├── graph │ │ ├── slice │ │ │ ├── declarations.dl │ │ │ └── callee.dl │ │ ├── import.dl │ │ ├── export.dl │ │ ├── fieldprop_ref.dl │ │ ├── dataflow_graph.dl │ │ ├── declarations.dl │ │ └── fieldprop.dl │ ├── taint-front │ │ ├── types.dl │ │ ├── macros.dl │ │ ├── importdb.dl │ │ ├── export.dl │ │ ├── import.dl │ │ ├── declarations.dl │ │ ├── taintquery.dl │ │ └── index.dl │ ├── pcode │ │ ├── generictaintquery.dl │ │ ├── types.dl │ │ ├── taintquery.dl │ │ ├── analyze_headless.sh │ │ ├── vtable.dl │ │ ├── analyzeHeadlessBigMem.bat │ │ ├── analyzeHeadlessBigMem │ │ └── index.dl │ ├── jadx │ │ ├── customquery.dl │ │ ├── importdb.dl │ │ ├── export.dl │ │ ├── index.dl │ │ └── jadx-fact-decls.dl │ ├── ctadl_ir_types.dl │ ├── information-flow │ │ ├── export.dl │ │ ├── java-transfer.dl │ │ └── declarations.dl │ ├── taint_schema.dl │ ├── macros.dl │ ├── graph_schema.dl │ ├── imports.dl │ └── export.dl │ └── vis │ ├── __init__.py │ ├── types.py │ └── richutils.py ├── docs ├── requirements.txt ├── VSCode-SARIF-screenshot.png ├── index.rst ├── Makefile ├── make.bat ├── conf.py ├── intro.rst ├── analysis.rst ├── DevGuide.rst └── usage.rst ├── taint-front ├── .gitignore ├── star4.tnt ├── paper-ex.tnt ├── dune-project ├── ll.tnt ├── recursion2.tnt ├── alias3.tnt ├── alias6.tnt ├── star3.tnt ├── compositional-ctx3.tnt ├── test.tnt ├── indirect3.tnt ├── star-query.tnt ├── slice1.tnt ├── star2.tnt ├── alias4.tnt ├── bslice1.tnt ├── global3.tnt ├── backward.tnt ├── indirect2.tnt ├── star1.tnt ├── bslice2.tnt ├── ex-vtable.tnt ├── dune ├── Lex.mll ├── README.md ├── taint.ml ├── alias-rule.d2 ├── flake.lock ├── flake.nix ├── Parse.mly ├── Syntax.ml └── summarytest.ml ├── plugins ├── ghidra │ ├── src │ │ └── ctadl_ghidra_fact_generator_plugin │ │ │ ├── VERSION │ │ │ └── __init__.py │ ├── MANIFEST.in │ ├── pyproject.toml │ ├── setup.py │ ├── README.md │ └── setup.cfg ├── networkx-export │ ├── src │ │ └── ctadl_networkx_export_plugin │ │ │ ├── VERSION │ │ │ └── __init__.py │ ├── MANIFEST.in │ ├── pyproject.toml │ ├── setup.py │ ├── README.md │ └── setup.cfg └── taint-front │ ├── src │ └── ctadl_taint_front_fact_generator_plugin │ │ ├── VERSION │ │ └── __init__.py │ ├── MANIFEST.in │ ├── pyproject.toml │ ├── setup.py │ ├── setup.cfg │ └── README.md ├── setup.py ├── .gitignore ├── CHANGELOG.md ├── nix ├── sarif-multitool │ ├── default.nix │ └── checksarif.nix ├── taintfront │ ├── taintfront.nix │ └── taintfront-plugin.nix ├── singularity.nix ├── export │ └── networkx.nix ├── ghidra │ └── plugin.nix ├── no-kvm-overlay.nix ├── docker.nix ├── nginx │ ├── nginx.nix │ └── nginx-wllvm.nix ├── souffle │ ├── remove-lld.patch │ ├── souffle2.1.nix │ ├── souffle-2.3-sources.patch │ ├── souffle2.2.nix │ ├── souffle-2.4.patch │ ├── souffle2.0.2.nix │ ├── remove-index-warning.patch │ ├── souffle2.3-warning.patch │ ├── souffle2.4.nix │ ├── souffle2.3.nix │ ├── souffle2.4.1.nix │ ├── soufflegit.nix │ ├── packages.nix │ ├── git-and-completion.patch │ ├── remove-cpack.patch │ └── common.nix └── singularity-overlay.nix ├── CONTRIBUTING.md ├── pyproject.toml ├── plugins.nix ├── MANIFEST.in ├── utils ├── merge_summaries.py ├── hashdl.py ├── gen_help_docs.py └── souffle_windows_build.ps1 ├── .readthedocs.yaml ├── setup.cfg ├── .release_scripts └── make_images ├── ctadl.nix └── indexers.nix /lib/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.envrc: -------------------------------------------------------------------------------- 1 | use flake 2 | -------------------------------------------------------------------------------- /src/ctadl/VERSION: -------------------------------------------------------------------------------- 1 | 0.13.0-b3 2 | -------------------------------------------------------------------------------- /tests/taint-front/ex3.codeflows: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/taint-front/ex4.codeflows: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/taint-front/global2.codeflows: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/taint-front/indirect4.codeflows: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | Sphinx==5.1.1 2 | -------------------------------------------------------------------------------- /tests/taint-front/sanitizer1.nocodeflows: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /taint-front/.gitignore: -------------------------------------------------------------------------------- 1 | /facts 2 | /_build 3 | -------------------------------------------------------------------------------- /src/ctadl/models/squiggli_java/default-index.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /src/ctadl/models/squiggli_php/default-index.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /src/ctadl/models/taint-front/default-index.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /src/ctadl/models/taint-front/default-query.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /src/ctadl/util/__init__.py: -------------------------------------------------------------------------------- 1 | from .functions import * 2 | -------------------------------------------------------------------------------- /plugins/ghidra/src/ctadl_ghidra_fact_generator_plugin/VERSION: -------------------------------------------------------------------------------- 1 | 0.0.2 2 | -------------------------------------------------------------------------------- /plugins/networkx-export/src/ctadl_networkx_export_plugin/VERSION: -------------------------------------------------------------------------------- 1 | 0.0.1 2 | -------------------------------------------------------------------------------- /plugins/taint-front/src/ctadl_taint_front_fact_generator_plugin/VERSION: -------------------------------------------------------------------------------- 1 | 1.0.0 2 | -------------------------------------------------------------------------------- /plugins/ghidra/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include src/ctadl_ghidra_fact_generator_plugin/VERSION 2 | -------------------------------------------------------------------------------- /plugins/networkx-export/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include src/ctadl_networkx_export_plugin/VERSION 2 | -------------------------------------------------------------------------------- /plugins/taint-front/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include src/ctadl_taint_front_fact_generator_plugin/VERSION 2 | -------------------------------------------------------------------------------- /tests/taint-front/alias2.tnt: -------------------------------------------------------------------------------- 1 | def f(p1, p2, p3) { 2 | z = p1; 3 | w = p3; 4 | z.foo = w.bar; 5 | } 6 | -------------------------------------------------------------------------------- /docs/VSCode-SARIF-screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sandialabs/ctadl/HEAD/docs/VSCode-SARIF-screenshot.png -------------------------------------------------------------------------------- /taint-front/star4.tnt: -------------------------------------------------------------------------------- 1 | def A(p) { 2 | p = source(Input); 3 | x.f.g = p; 4 | sink(x.*, Dangerous); 5 | } 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | 6 | setup( 7 | # other setup arguments... 8 | ) 9 | -------------------------------------------------------------------------------- /taint-front/paper-ex.tnt: -------------------------------------------------------------------------------- 1 | def F(p, q) { 2 | p.f = q.g; 3 | } 4 | 5 | def Main() { 6 | x.h = source(Network); 7 | r = F(y, x.h); 8 | } 9 | -------------------------------------------------------------------------------- /taint-front/dune-project: -------------------------------------------------------------------------------- 1 | (lang dune 2.9) 2 | (using menhir 2.1) 3 | 4 | (name taintfront) 5 | (package 6 | (name taintfront) 7 | (depends menhir)) 8 | -------------------------------------------------------------------------------- /taint-front/ll.tnt: -------------------------------------------------------------------------------- 1 | def A(p) { 2 | x = p; 3 | // while (x) { 4 | t = x.next; 5 | process(x); 6 | x = t; 7 | // } 8 | } 9 | -------------------------------------------------------------------------------- /tests/taint-front/sanitizer1.tnt: -------------------------------------------------------------------------------- 1 | def F() { 2 | x = source(Network); 3 | y = sanitize(x, Network); 4 | z = y; 5 | sink(z, Network); 6 | } 7 | -------------------------------------------------------------------------------- /plugins/ghidra/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | 8 | -------------------------------------------------------------------------------- /tests/bin/check-static: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | top="$(git rev-parse --show-toplevel)" 4 | 5 | pyright 6 | 7 | black --check src bin/ctadl bin/dctadl 8 | -------------------------------------------------------------------------------- /plugins/taint-front/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | 8 | -------------------------------------------------------------------------------- /plugins/networkx-export/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | 8 | -------------------------------------------------------------------------------- /taint-front/recursion2.tnt: -------------------------------------------------------------------------------- 1 | def F(x) { 2 | r=G(x, y); 3 | return r; 4 | } 5 | 6 | def G(a, b) { 7 | return a; 8 | r = F(b); 9 | return r; 10 | } 11 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/analysis/index.dl: -------------------------------------------------------------------------------- 1 | // Indexes a CTADL IR database regardless of the target language 2 | 3 | #include "ctadl_ir.dl" 4 | #include "ctadl_ir_importdb.dl" 5 | -------------------------------------------------------------------------------- /taint-front/alias3.tnt: -------------------------------------------------------------------------------- 1 | // 1 => {p2, p1, x, y} 2 | // 2 => {} 3 | def f(p1, p2) { 4 | p1 = p2; 5 | x = p1; 6 | y = p2; 7 | 8 | a = b; 9 | b = a; 10 | } 11 | -------------------------------------------------------------------------------- /taint-front/alias6.tnt: -------------------------------------------------------------------------------- 1 | def func(a, b) { 2 | sink(b.f, Bad); 3 | } 4 | 5 | def Main() { 6 | x.f = source(File); 7 | func(x, y); 8 | 9 | z = zz; 10 | func(z, zz); 11 | } 12 | -------------------------------------------------------------------------------- /taint-front/star3.tnt: -------------------------------------------------------------------------------- 1 | // summary should flow ret <- p.f.* 2 | // no summary without star abstraction 3 | def F(p) { 4 | a = p; 5 | b = a.f; 6 | c = b.g; 7 | return c; 8 | } 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /tags 2 | __pycache__ 3 | *.db 4 | .DS_Store 5 | /_build 6 | /out*/ 7 | /profiler_html 8 | .direnv 9 | lib/* 10 | !.gitkeep 11 | .cache/ 12 | *.dylib 13 | *~ 14 | *.sarif 15 | *.csv 16 | /docs/_build 17 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/graph/slice/declarations.dl: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define FORWARD_SLICE_INTRA_EDGE 1 4 | #define FORWARD_SLICE_CALL_EDGE 2 5 | #define FORWARD_SLICE_RETURN_EDGE 3 6 | 7 | #define SLICE_EMPTY_TAG "" 8 | -------------------------------------------------------------------------------- /taint-front/compositional-ctx3.tnt: -------------------------------------------------------------------------------- 1 | F(y, p) { 2 | y.g(p); 3 | } 4 | def Main() { 5 | s = source(Thing); 6 | // pass a field of the thing 7 | x.f.g = allocsite; 8 | t = F(x.f, s); 9 | sink(t, Thing); 10 | } 11 | -------------------------------------------------------------------------------- /taint-front/test.tnt: -------------------------------------------------------------------------------- 1 | def foo(x) { 2 | z=bar(x.y); 3 | w = z; 4 | baz(w); 5 | } 6 | 7 | def bar(x) { 8 | x.z = source(Network); 9 | return x; 10 | } 11 | 12 | def baz(x) { 13 | sink(x.z, Network); 14 | } 15 | -------------------------------------------------------------------------------- /taint-front/indirect3.tnt: -------------------------------------------------------------------------------- 1 | def f() { 2 | fret = source(Network); 3 | return fret; 4 | } 5 | 6 | def g(p) { 7 | p.func = f; 8 | } 9 | 10 | def Main() { 11 | g(c); 12 | x = c.func(); 13 | sink(x, Network); 14 | } 15 | -------------------------------------------------------------------------------- /taint-front/star-query.tnt: -------------------------------------------------------------------------------- 1 | // summary should flow ret <- p.f.* 2 | // no summary without star abstraction 3 | def F(p) { 4 | // set up source as p.f.g 5 | a = p; 6 | b = a.f; 7 | c = b.g; 8 | return c; 9 | } 10 | -------------------------------------------------------------------------------- /tests/taint-front/alias2.json: -------------------------------------------------------------------------------- 1 | { 2 | "SummaryFlow": { 3 | "includes": [{ 4 | "m2": "f", 5 | "m1": "f", 6 | "n1": 0, 7 | "p1": ".foo", 8 | "n2": 2, 9 | "p2": ".bar" 10 | }] 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/taint-front/types.dl: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | .type TaintFrontStatement = symbol 4 | .type TaintFrontMethod = symbol 5 | .type TaintFrontCallRecord = [id: number, inmeth: TaintFrontMethod] 6 | .type TaintFrontAccessPath = symbol 7 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # v0.11.2 2 | 3 | - Improved all documentation and hosted on readthedocs. 4 | 5 | # v0.11.1 6 | 7 | - Exported relations for JADX queries. 8 | - Fixed python 3.10 compatibility. 9 | 10 | # v0.11.0 11 | 12 | - First release! 13 | -------------------------------------------------------------------------------- /taint-front/slice1.tnt: -------------------------------------------------------------------------------- 1 | def call_source() { 2 | x = source(File); 3 | y = x; 4 | return x; 5 | } 6 | 7 | def call_sink(x) { 8 | sink(x.data, File); 9 | } 10 | 11 | def Main() { 12 | x.data = call_source(); 13 | call_sink(x); 14 | } 15 | -------------------------------------------------------------------------------- /taint-front/star2.tnt: -------------------------------------------------------------------------------- 1 | // this example uses the backward star rule. 2 | 3 | // SummaryFlow: ret <- p.f.* 4 | def A(p) { 5 | a = p.f; 6 | return a.g; 7 | } 8 | 9 | def B(p) { 10 | a.g = p; 11 | x.f = a; 12 | return x; 13 | } 14 | -------------------------------------------------------------------------------- /tests/taint-front/ex3.tnt: -------------------------------------------------------------------------------- 1 | def Main() { 2 | x.bar = source(Network); 3 | r = A(x, y); 4 | sink(y.foo, Network); 5 | } 6 | 7 | def A(ain, aout) { 8 | r = B(ain, aout); 9 | } 10 | 11 | def B(bin, bout) { 12 | bout.foo = bin.bar; 13 | } 14 | -------------------------------------------------------------------------------- /tests/taint-front/indirect1.tnt: -------------------------------------------------------------------------------- 1 | def f() { 2 | tmp = source(Network); 3 | return tmp; 4 | } 5 | 6 | def g(c1) { 7 | tmp = c1.func(); 8 | return tmp; 9 | } 10 | 11 | def Main() { 12 | p.func = f; 13 | x = g(p); 14 | sink(x, Network); 15 | } 16 | -------------------------------------------------------------------------------- /tests/taint-front/alias1.tnt: -------------------------------------------------------------------------------- 1 | // This is a common alias pattern. Something aliases the "this" object and a 2 | // parameter flows to it. 3 | def F(this, p) { 4 | tmp = this; 5 | tmp.field = p; 6 | 7 | tmp2.g = tmp.g; 8 | tmp2.g.h = p.j; 9 | } 10 | -------------------------------------------------------------------------------- /tests/taint-front/global1.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "forward_flow.ReachableVertex": { 4 | "includes": [{ 5 | "v1": "Main/", 6 | "p1": ".g5" 7 | }, 8 | { 9 | "v1": "Main/", 10 | "p1": ".g3" 11 | } 12 | ] 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /tests/taint-front/indirect4.tnt: -------------------------------------------------------------------------------- 1 | def f() { 2 | fret = source(Network); 3 | return fret; 4 | } 5 | 6 | def g(p) { 7 | p.func = f; 8 | r.thing = p; 9 | return r; 10 | } 11 | 12 | def Main() { 13 | r = g(c); 14 | x = r.thing.func(); 15 | sink(x, Network); 16 | } 17 | -------------------------------------------------------------------------------- /taint-front/alias4.tnt: -------------------------------------------------------------------------------- 1 | def f(p1, p2, p3) { 2 | l = p1; 3 | l.foo = p2; 4 | 5 | z = p1; 6 | w = p3; 7 | z.foo = w.bar; 8 | a = z.foo.baz; 9 | m.bar.baz = l.foo; 10 | 11 | p3.manager.thing = nothing; 12 | w.thing = source(Test); 13 | p3.manager = w; 14 | } 15 | -------------------------------------------------------------------------------- /tests/java/ArrayTest.java: -------------------------------------------------------------------------------- 1 | package defpackage; 2 | 3 | class ArrayTest { 4 | public static boolean Foo(String a) { 5 | String[] array = {"hello", "new", "world", a}; 6 | ArrayTest.Bar(array); 7 | return false; 8 | } 9 | 10 | public static void Bar(String[] arr) { 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /tests/taint-front/global2.tnt: -------------------------------------------------------------------------------- 1 | var global; 2 | 3 | def id(x) { 4 | return x; 5 | } 6 | 7 | def A(x) { 8 | tmp = id(x); 9 | global = tmp; 10 | } 11 | 12 | def B() { 13 | return global; 14 | } 15 | 16 | def Main() { 17 | s = source(File); 18 | A(s); 19 | t = B(); 20 | sink(t, File); 21 | } 22 | -------------------------------------------------------------------------------- /src/ctadl/util/lazy_logging.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def debug(fmt, arg_fun, varargs=False): 5 | if logging.getLogger().isEnabledFor(logging.DEBUG): 6 | args = arg_fun() 7 | if varargs: 8 | logging.debug(fmt, *args) 9 | else: 10 | logging.debug(fmt, args) 11 | -------------------------------------------------------------------------------- /taint-front/bslice1.tnt: -------------------------------------------------------------------------------- 1 | def process_get(fname, connfd) { 2 | x = fname; 3 | sink(x, File); 4 | } 5 | 6 | def main(p1, p2, p3) { 7 | fd = source(File); 8 | command = source(Network); 9 | fname = source(Network); 10 | 11 | // if command == GET 12 | command = GET; 13 | process_get(fname, fd); 14 | } 15 | -------------------------------------------------------------------------------- /taint-front/global3.tnt: -------------------------------------------------------------------------------- 1 | var g1; 2 | 3 | def A(a) { 4 | g1 = a; 5 | } 6 | 7 | def C(a) { 8 | A(a); 9 | } 10 | 11 | def B() { 12 | return g1; 13 | } 14 | 15 | def D() { 16 | tmp = B(); 17 | return tmp; 18 | } 19 | 20 | def Main() { 21 | s = source(Data); 22 | C(s); 23 | x = D(); 24 | sink(x, Data); 25 | } 26 | -------------------------------------------------------------------------------- /tests/index_nginx.dl: -------------------------------------------------------------------------------- 1 | #include "analysis/pcode/index.dl" 2 | 3 | UnhandledOnPurpose(i) :- 4 | PCODE_MNEMONIC(i, _), 5 | ( i = "ngx_http_destination_charset@10005aa40:10005ab34:224"; 6 | i = "ngx_http_destination_charset@10005aa40:10005ab30:222"; 7 | i = "ngx_http_destination_charset@10005aa40:10005aad3:158"). 8 | -------------------------------------------------------------------------------- /nix/sarif-multitool/default.nix: -------------------------------------------------------------------------------- 1 | { buildDotnetGlobalTool, makeWrapper, lib }: 2 | buildDotnetGlobalTool { 3 | pname = "Sarif.Multitool"; 4 | version = "4.5.4"; 5 | 6 | nativeBuildInputs = [ makeWrapper ]; 7 | 8 | executables = [ "sarif" ]; 9 | 10 | nugetSha256 = "sha256-OulbIBGGEMjLAIUTCQ55NJ4ikjex/UOwiCxTKkZedxA="; 11 | } 12 | -------------------------------------------------------------------------------- /nix/taintfront/taintfront.nix: -------------------------------------------------------------------------------- 1 | { ocamlPackages 2 | , nix-gitignore}: 3 | ocamlPackages.buildDunePackage { 4 | useDune2 = true; 5 | 6 | pname = "taintfront"; 7 | version = "0.1.0"; 8 | 9 | src = nix-gitignore.gitignoreSource [] ../../taint-front; 10 | 11 | nativeBuildInputs = [ 12 | ocamlPackages.menhir 13 | ]; 14 | } 15 | -------------------------------------------------------------------------------- /tests/taint-front/compositional-ctx.json: -------------------------------------------------------------------------------- 1 | { 2 | "VirtualAssign": { 3 | "includes": [{ 4 | "insn": "service/call/2", 5 | "v1": "service/second", 6 | "v2": "service/first" 7 | }], 8 | "excludes": [{ 9 | "insn": "service/call/3", 10 | "v1": "service/third", 11 | "v2": "service/first" 12 | }] 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /plugins/ghidra/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | # It is very important that we install as an uncompressed dir structure, not a 6 | # zip (egg). Souffle needs access to the entire datalog directory structure in 7 | # order to run. If that dir structure is in a zipfile, we all lose. 8 | setup(zip_safe=False) 9 | 10 | 11 | -------------------------------------------------------------------------------- /plugins/taint-front/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | # It is very important that we install as an uncompressed dir structure, not a 6 | # zip (egg). Souffle needs access to the entire datalog directory structure in 7 | # order to run. If that dir structure is in a zipfile, we all lose. 8 | setup(zip_safe=False) 9 | 10 | 11 | -------------------------------------------------------------------------------- /taint-front/backward.tnt: -------------------------------------------------------------------------------- 1 | def f() { 2 | fret = source(Network); 3 | return fret; 4 | } 5 | 6 | def g() { 7 | gret.func = f; 8 | return gret; 9 | } 10 | 11 | def h(x) { 12 | sink(x, File); 13 | } 14 | 15 | def Main() { 16 | c = g(); 17 | x = c.func(); // calls f 18 | y = x; 19 | //sink(x, Network); 20 | //sink(y, Network); 21 | h(y); 22 | } 23 | -------------------------------------------------------------------------------- /taint-front/indirect2.tnt: -------------------------------------------------------------------------------- 1 | def f() { 2 | fret = source(Network); 3 | return fret; 4 | } 5 | 6 | def g() { 7 | gret.func = f; 8 | return gret; 9 | } 10 | 11 | def h(x) { 12 | sink(x, Network); 13 | } 14 | 15 | def Main() { 16 | c = g(); 17 | x = c.func(); // calls f 18 | y = x; 19 | sink(x, Network); 20 | sink(y, Network); 21 | h(y); 22 | } 23 | -------------------------------------------------------------------------------- /tests/taint-front/README.md: -------------------------------------------------------------------------------- 1 | # Taint-front test runner 2 | 3 | Any .tnt testcases in this directory are run. 4 | If there is a corresponding .json file, we test the ctadlir against with test_ctadlir.py. 5 | If there is a .codeflows file (can be empty), we check for code flows with sarif_has_code_flows.py. 6 | If there is a .nocodeflows file, we check for absence of code flows. 7 | -------------------------------------------------------------------------------- /plugins/networkx-export/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | # It is very important that we install as an uncompressed dir structure, not a 6 | # zip (egg). Souffle needs access to the entire datalog directory structure in 7 | # order to run. If that dir structure is in a zipfile, we all lose. 8 | setup(zip_safe=False) 9 | 10 | 11 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/taint-front/macros.dl: -------------------------------------------------------------------------------- 1 | #define GlobalsParamName "" 2 | #define TaintFront_MkVar(fid, vin, vout, pout) \ 3 | ( Global(vin, _), vout = cat(fid, "/", GlobalsParamName), fid=fid, pout = cat(".", vin); \ 4 | !Global(vin, _), vout = cat(fid, "/", vin), pout = "" ) 5 | #define TaintFront_MkStmt(fid, ty, id) cat(fid, "/", ty, "/", id) 6 | 7 | 8 | -------------------------------------------------------------------------------- /tests/taint-front/ex4.tnt: -------------------------------------------------------------------------------- 1 | def Main() { 2 | x = new(); 3 | y = new(); 4 | x.bar = source(Network); 5 | r = A(x, y); 6 | sink(y.foo, Network); 7 | } 8 | 9 | def A(ain, aout) { 10 | r1 = B(ain, tmp); 11 | r2 = C(tmp, aout); 12 | } 13 | 14 | def B(bin, bout) { 15 | bout.baz = bin.bar; 16 | } 17 | 18 | def C(cin, cout) { 19 | cout.foo = cin.baz; 20 | } 21 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Style 2 | 3 | Use `black` to format the code and `isort` to sort the imports. 4 | 5 | See [PEP 8](https://peps.python.org/pep-0008/#documentation-strings) and [PEP 6 | 257](https://peps.python.org/pep-0257/) for conventions to follow. 7 | 8 | # On new feature 9 | 10 | - Add documentation somewhere in the tree about the commit 11 | - Link to issue, if relevant 12 | -------------------------------------------------------------------------------- /tests/taint-front/alias1.json: -------------------------------------------------------------------------------- 1 | { 2 | "SummaryFlow": { 3 | "includes": [{ 4 | "m2": "F", 5 | "m1": "F", 6 | "n1": 0, 7 | "p1": ".field", 8 | "n2": 1, 9 | "p2": "" 10 | }, 11 | { 12 | "m2": "F", 13 | "m1": "F", 14 | "n1": 0, 15 | "p1": ".g.h", 16 | "n2": 1, 17 | "p2": ".j" 18 | }] 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /taint-front/star1.tnt: -------------------------------------------------------------------------------- 1 | // this example uses the star summary rule 2 | 3 | // SummaryFlow: p1.g.h <- p2 4 | def A(p1, p2) { 5 | p1.g.h = p2; 6 | } 7 | 8 | // SummaryFlow: p1.d <- p2.g.h 9 | def C(p1, p2) { 10 | p1.d = p2.g.h; 11 | } 12 | 13 | // SummaryFlow: p1.f.* <- p2 14 | // SummaryFlow: p1.d <- p2.f.* 15 | def B(p1, p2) { 16 | A(p1.f, p2); 17 | C(p1, p2.f); 18 | } 19 | -------------------------------------------------------------------------------- /nix/singularity.nix: -------------------------------------------------------------------------------- 1 | { singularity-tools 2 | , name 3 | , ctadl 4 | , souffle 5 | , coreutils 6 | , binutils 7 | , less 8 | , more 9 | , additionalDeps ? [] }: 10 | singularity-tools.buildImage { 11 | inherit name; 12 | diskSize = 1024*16; 13 | memSize = 1024*8; 14 | contents = [ coreutils binutils less more souffle ctadl ] ++ additionalDeps; 15 | runScript = "/bin/ctadl"; 16 | } 17 | -------------------------------------------------------------------------------- /taint-front/bslice2.tnt: -------------------------------------------------------------------------------- 1 | def process_get(fname, connfd) { 2 | x = fname; 3 | sink(x, File); 4 | } 5 | 6 | def F() { 7 | process_get(othername, fd); 8 | } 9 | 10 | def main(p1, p2, p3) { 11 | fd = source(File); 12 | command = source(Network); 13 | fname = source(Network); 14 | 15 | // if command == GET 16 | command = GET; 17 | process_get(fname, fd); 18 | F(); 19 | } 20 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/pcode/generictaintquery.dl: -------------------------------------------------------------------------------- 1 | // Generic taint query for pcode uses sources and sinks common to C languages. 2 | 3 | #include "taintquery.dl" 4 | #include "../common/c-src-sink.dl" 5 | 6 | .pragma "suppress-warnings" "TaintSourceMethod,TaintSourceCallArg,LeakingSinkMethod,TaintTransferCallArg,TaintTransferCallArgs,SuspiciousTaintTransferCallArg,TaintSanitizeEdge,CInsn_ModelAssign" 7 | -------------------------------------------------------------------------------- /src/ctadl/vis/__init__.py: -------------------------------------------------------------------------------- 1 | color_reset = "\u001b[0m" 2 | color_fg = [] 3 | color_bg = [] 4 | 5 | for i in range(0, 16): 6 | for j in range(0, 16): 7 | code = str(i * 16 + j) 8 | color_fg.append("\u001b[38;5;" + code + "m") 9 | 10 | for i in range(0, 16): 11 | for j in range(0, 16): 12 | code = str(i * 16 + j) 13 | color_bg.append("\u001b[48;5;" + code + "m") 14 | -------------------------------------------------------------------------------- /taint-front/ex-vtable.tnt: -------------------------------------------------------------------------------- 1 | def m1(this, x1, y1) { 2 | x1.foo = y1; 3 | } 4 | 5 | def m2(this, x2, y2) { 6 | x2.foo = y2; 7 | } 8 | 9 | def Main() { 10 | cls_vtbl.m1 = m1; 11 | cls_vtbl.m2 = m2; 12 | obj.vtbl = cls_vtbl; 13 | a0 = source(Network); 14 | obj.vtbl.m1(obj, a1, a0); 15 | tmp = a1.foo; 16 | obj.vtbl.m2(obj, a2, tmp); 17 | tmp2 = a2.foo; 18 | sink(tmp2, Network); 19 | } 20 | -------------------------------------------------------------------------------- /tests/field-source.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_generators": [ 3 | { 4 | "find": "fields", 5 | "where": [ 6 | { 7 | "constraint": "signature_match", 8 | "names": ["pid", "mimetypes"] 9 | } 10 | ], 11 | "model": { 12 | "sources": [ 13 | { 14 | "kind": "Label" 15 | } 16 | ] 17 | } 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /tests/query_nginx.dl: -------------------------------------------------------------------------------- 1 | TaintSourceVertex(label, v, p) :- 2 | CisFunction(func), 3 | contains("recv", func), 4 | CFunction_FormalParam(func, n, v), 5 | n = 1, 6 | p = "", 7 | label = "SOURCE". 8 | 9 | LeakingSinkVertex(label, v, p) :- 10 | CisFunction(func), 11 | contains("send", func), 12 | CFunction_FormalParam(func, n, v), 13 | n = 1, 14 | p = "", 15 | label = "SINK". 16 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/graph/import.dl: -------------------------------------------------------------------------------- 1 | .input isComputedAccessPath(CTADL_INPUT_DB_IO) 2 | .input Vertex(CTADL_INPUT_DB_IO) 3 | .input VirtualAssign(CTADL_INPUT_DB_IO) 4 | .input CallEdge(CTADL_INPUT_DB_IO) 5 | .input SummaryFlow(CTADL_INPUT_DB_IO) 6 | .input VirtualAlloc(CTADL_INPUT_DB_IO) 7 | .input SummaryAlloc(CTADL_INPUT_DB_IO) 8 | .input AliasedBy(CTADL_INPUT_DB_IO) 9 | .input IntCInsn_InFunction(CTADL_INPUT_DB_IO) 10 | -------------------------------------------------------------------------------- /tests/find-field-query.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_generators": [ 3 | { 4 | "find": "instructions", 5 | "where": [ 6 | { 7 | "constraint": "uses_field", 8 | "names": ["mAdapter", "pid"] 9 | } 10 | ], 11 | "model": { 12 | "taint": [ 13 | { 14 | "kind": "Uses_mAdapter" 15 | } 16 | ] 17 | } 18 | } 19 | ] 20 | } 21 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/graph/export.dl: -------------------------------------------------------------------------------- 1 | .output isComputedAccessPath(CTADL_OUTPUT_DB_IO) 2 | .output Vertex(CTADL_OUTPUT_DB_IO) 3 | .output VirtualAssign(CTADL_OUTPUT_DB_IO) 4 | .output CallEdge(CTADL_OUTPUT_DB_IO) 5 | .output SummaryFlow(CTADL_OUTPUT_DB_IO) 6 | .output VirtualAlloc(CTADL_OUTPUT_DB_IO) 7 | .output SummaryAlloc(CTADL_OUTPUT_DB_IO) 8 | .output IntCInsn_InFunction(CTADL_OUTPUT_DB_IO) 9 | .output AliasedBy(CTADL_OUTPUT_DB_IO) 10 | -------------------------------------------------------------------------------- /taint-front/dune: -------------------------------------------------------------------------------- 1 | (library 2 | (name taintlang) 3 | (modules parse genFacts syntax lex) 4 | ) 5 | 6 | (executable 7 | (name taint) 8 | (public_name taintfront ) 9 | (libraries unix taintlang) 10 | (modules taint) 11 | ) 12 | 13 | (executable 14 | (name summarytest) 15 | (public_name taintfront-summarytest) 16 | (libraries unix taintlang) 17 | (modules summarytest) 18 | ) 19 | 20 | (ocamllex Lex) 21 | 22 | (menhir (modules Parse)) 23 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/taint-front/importdb.dl: -------------------------------------------------------------------------------- 1 | .input IndirectCallStmt(CTADL_INPUT_DB_IO) 2 | .input Function(CTADL_INPUT_DB_IO) 3 | .input Global(CTADL_INPUT_DB_IO) 4 | .input FormalParam(CTADL_INPUT_DB_IO) 5 | .input ActualParam(CTADL_INPUT_DB_IO) 6 | .input Assign(CTADL_INPUT_DB_IO) 7 | .input AssignFunction(CTADL_INPUT_DB_IO) 8 | .input DirectCall(CTADL_INPUT_DB_IO) 9 | .input IndirectCall(CTADL_INPUT_DB_IO) 10 | .input TaintSpec(CTADL_INPUT_DB_IO) 11 | 12 | -------------------------------------------------------------------------------- /tests/taint-front/global1.tnt: -------------------------------------------------------------------------------- 1 | var g1; 2 | var g2; 3 | var g3; 4 | var g4; 5 | var g5; 6 | var g6; 7 | 8 | def B() { 9 | g6 = g5; 10 | D(); 11 | } 12 | 13 | 14 | def D() { 15 | p.f = g6; 16 | A(p); 17 | } 18 | 19 | def A(p) { 20 | g3 = p.f; 21 | } 22 | 23 | def Main() { 24 | g5 = source(Net); 25 | B(); 26 | sink(g3, Net); 27 | } 28 | 29 | // With compositional globals: 30 | 31 | // SummaryFlow 32 | // B: 33 | // g6 <- g5 34 | // D: 35 | // g3 <- g6 36 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/taint-front/export.dl: -------------------------------------------------------------------------------- 1 | .output IndirectCallStmt(CTADL_OUTPUT_DB_IO) 2 | .output Function(CTADL_OUTPUT_DB_IO) 3 | .output Global(CTADL_OUTPUT_DB_IO) 4 | .output FormalParam(CTADL_OUTPUT_DB_IO) 5 | .output ActualParam(CTADL_OUTPUT_DB_IO) 6 | .output Assign(CTADL_OUTPUT_DB_IO) 7 | .output AssignFunction(CTADL_OUTPUT_DB_IO) 8 | .output DirectCall(CTADL_OUTPUT_DB_IO) 9 | .output IndirectCall(CTADL_OUTPUT_DB_IO) 10 | .output TaintSpec(CTADL_OUTPUT_DB_IO) 11 | 12 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/jadx/customquery.dl: -------------------------------------------------------------------------------- 1 | // Regular query but with all the JADX facts imported from the database. 2 | 3 | #include "information-flow/query.dl" 4 | #include "jadx-fact-decls.dl" 5 | #include "importdb.dl" 6 | 7 | 8 | // Bringing this in to resolve methods to specific classes 9 | // This is a jadx-only table at the moment! 10 | .decl Method_DeclaringType(method: CFunction, cls: symbol) 11 | .input Method_DeclaringType(IO=sqlite, filename=QUOTE(CTADL_INPUT_DB)) 12 | 13 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/taint-front/import.dl: -------------------------------------------------------------------------------- 1 | .input Function(filename="Function.facts") 2 | .input Global(filename="Global.facts") 3 | .input FormalParam(filename="FormalParam.facts") 4 | .input ActualParam(filename="ActualParam.facts") 5 | .input Assign(filename="Assign.facts") 6 | .input AssignFunction(filename="AssignFunction.facts") 7 | .input DirectCall(filename="DirectCall.facts") 8 | .input IndirectCall(filename="IndirectCall.facts") 9 | .input TaintSpec(filename="TaintSpec.facts") 10 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/ctadl_ir_types.dl: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "macros.dl" 4 | 5 | .type CAccessPath = symbol 6 | .type CVar = symbol 7 | .type CFunction = symbol 8 | .type CInsn = symbol 9 | .type CFunctionInvocation = symbol // XXX remove 10 | 11 | // for handling call-return matching 12 | .type FS_ParenMatch = symbol // "call" or "nocall" 13 | 14 | .type SliceLabelType = symbol 15 | 16 | .decl SanitizeAssign( 17 | v1: CVar, p1: CAccessPath, v2: CVar, p2: CAccessPath 18 | ) 19 | -------------------------------------------------------------------------------- /tests/taint-front/compositional-ctx2.json: -------------------------------------------------------------------------------- 1 | { 2 | "VirtualAssign": { 3 | "includes": [{ 4 | "insn": "Main/call/1", 5 | "v1": "Main/second", 6 | "v2": "Main/first" 7 | }], 8 | "excludes": [{ 9 | "insn": "Main/call/1", 10 | "v1": "Main/third", 11 | "v2": "Main/first" 12 | }] 13 | }, 14 | "VirtualAlloc": { 15 | "includes": [{ 16 | "to": "Main/third", 17 | "to_path": "", 18 | "obj": "allocsite" 19 | }] 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" 7 | 8 | [tool.pyright] 9 | include = [ "bin/ctadl", "src", "tests"] 10 | exclude = [ 11 | "**/__pycache__" 12 | ] 13 | ignore = [] 14 | 15 | reportMissingImports = false 16 | reportMissingTypeStubs = false 17 | 18 | pythonVersion = "3.9" 19 | 20 | executionEnvironments = [ 21 | { root = "src" } 22 | ] 23 | 24 | [tool.isort] 25 | profile = "black" 26 | -------------------------------------------------------------------------------- /tests/bin/check-analysis-options: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -o pipefail 5 | set -x 6 | 7 | top="$(git rev-parse --show-toplevel)" 8 | outdir="$top/ctadl-test-output" 9 | cd $outdir 10 | 11 | ctadl import pcode $(nix build --inputs-from . nixpkgs#curl --no-link --json | jq -r ".[0].outputs.bin")/bin/curl -o out/curl-pcode 12 | 13 | # Test some options 14 | for index_opt in --match-star '--cha --no-hybrid-inlining' --star; do 15 | ctadl --dir out/curl-pcode index -f $index_opt 16 | done 17 | -------------------------------------------------------------------------------- /tests/taint-front/compositional-ctx4.json: -------------------------------------------------------------------------------- 1 | { 2 | "VirtualAssign": { 3 | "includes": [{ 4 | "insn": "Main/call/1", 5 | "v1": "Main/tmp", 6 | "v2": "Main/x", 7 | "ctx": "" 8 | }, { 9 | "v1": "H/tmp1", 10 | "v2": "H/x", 11 | "ctx": "G#F#Main#" 12 | },{ 13 | "v1": "H/tmp2", 14 | "v2": "H/tmp1", 15 | "ctx": "G#F#Main#" 16 | },{ 17 | "v1": "H/tmp3", 18 | "v2": "H/tmp2", 19 | "ctx": "" 20 | }], 21 | "excludes": [] 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /nix/export/networkx.nix: -------------------------------------------------------------------------------- 1 | { 2 | python3, 3 | makeWrapper, 4 | lib, 5 | }: 6 | python3.pkgs.buildPythonPackage rec { 7 | pname = "ctadl-networkx-export-plugin"; 8 | version = lib.strings.removeSuffix "\n" (builtins.readFile ../../plugins/networkx-export/src/ctadl_networkx_export_plugin/VERSION); 9 | src = ../../plugins/networkx-export; 10 | 11 | doCheck = false; 12 | 13 | nativeBuildInputs = [] ++ (with python3.pkgs; [setuptools]); 14 | propagatedBuildInputs = with python3.pkgs; [networkx]; 15 | } 16 | -------------------------------------------------------------------------------- /plugins.nix: -------------------------------------------------------------------------------- 1 | { 2 | lib, 3 | callPackage, 4 | llvmPackages, 5 | jadxPlugin, 6 | ghidra, 7 | withPythonWheel, 8 | }: let 9 | plugins = { 10 | taintfront = callPackage ./nix/taintfront/taintfront-plugin.nix {}; 11 | ghidra = callPackage ./nix/ghidra/plugin.nix {inherit ghidra;}; 12 | networkxExport = callPackage ./nix/export/networkx.nix {}; 13 | }; 14 | in 15 | {jadx = jadxPlugin;} 16 | // ( 17 | lib.attrsets.mapAttrs (name: pkg: pkg.overrideAttrs withPythonWheel) plugins 18 | ) 19 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/pcode/types.dl: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // instruction is an occurrence of a pcode operation 4 | .type PCodeInstruction <: symbol 5 | .type PCodeVarnode <: symbol 6 | .type PCodeHighFunc <: symbol 7 | .type PCodeHighPrototype <: symbol 8 | .type PCodeHighVar <: symbol 9 | .type PCodeHighSymbol <: symbol 10 | .type PCodeType <: symbol 11 | .type PCodeBlockBasic <: symbol 12 | .type PCodeAddress <: number 13 | .type PCodeParameterDefinition <: symbol 14 | .type PCodeGenericCallingConvention <: symbol 15 | -------------------------------------------------------------------------------- /plugins/networkx-export/README.md: -------------------------------------------------------------------------------- 1 | # CTADL networkx graph export 2 | 3 | This project is part of the [CTADL Taint Analyzer](https://github.com/sandialabs/ctadl). 4 | 5 | This project provides a plugin for CTADL to export data flow graph information via networkx. 6 | 7 | # Installation 8 | 9 | Use pip. 10 | 11 | $ pip install ctadl-networx-export-plugin 12 | 13 | Afterward, if ctadl is installed, you can do: 14 | 15 | $ ctadl export --format gml pcode /path/to/a/binary 16 | 17 | See `ctadl --help` for more detauls. 18 | -------------------------------------------------------------------------------- /tests/bin/sarif_has_code_flows.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import json 4 | import sys 5 | 6 | def main(): 7 | with open(sys.argv[1], 'r') as fd: 8 | sarif = json.loads(fd.read()) 9 | try: 10 | for run in sarif.get('runs', []): 11 | for result in run.get('results', []): 12 | if result.get('ruleId') == 'C0001': 13 | return 0 14 | except: 15 | pass 16 | return 1 17 | 18 | if __name__ == '__main__': 19 | exit(main()) 20 | -------------------------------------------------------------------------------- /nix/taintfront/taintfront-plugin.nix: -------------------------------------------------------------------------------- 1 | { 2 | python3, 3 | makeWrapper, 4 | taintfront, 5 | lib, 6 | }: 7 | python3.pkgs.buildPythonPackage rec { 8 | pname = "ctadl-taint-front-fact-generator-plugin"; 9 | version = lib.strings.removeSuffix "\n" (builtins.readFile ../../plugins/taint-front/src/ctadl_taint_front_fact_generator_plugin/VERSION); 10 | src = ../../plugins/taint-front; 11 | 12 | doCheck = false; 13 | 14 | passthru = { 15 | makeWrapperArgs = [ 16 | ''--prefix PATH : ${lib.makeBinPath [taintfront]}'' 17 | ]; 18 | }; 19 | } 20 | -------------------------------------------------------------------------------- /nix/ghidra/plugin.nix: -------------------------------------------------------------------------------- 1 | { 2 | python3, 3 | makeWrapper, 4 | lib, 5 | ghidra, 6 | }: 7 | python3.pkgs.buildPythonPackage rec { 8 | pname = "ctadl-ghidra-fact-generator-plugin"; 9 | version = lib.strings.removeSuffix "\n" (builtins.readFile ../../plugins/ghidra/src/ctadl_ghidra_fact_generator_plugin/VERSION); 10 | src = ../../plugins/ghidra; 11 | 12 | doCheck = false; 13 | 14 | # Set here but useless. ctadl's .withPlugins knows how to use these args to wrap. 15 | passthru = { 16 | makeWrapperArgs = [ 17 | ''--set GHIDRA_HOME ${ghidra}/lib/ghidra'' 18 | ]; 19 | }; 20 | } 21 | -------------------------------------------------------------------------------- /tests/test_query.json: -------------------------------------------------------------------------------- 1 | { "model_generators": [ 2 | { "find": "methods", 3 | "where": [ 4 | { "constraint": "signature_match", 5 | "names": ["_TestSource_"] } 6 | ], 7 | "model": { 8 | "sources": [ { 9 | "kind": "test", 10 | "port": "Return" 11 | }] 12 | } 13 | }, 14 | { "find": "methods", 15 | "where": [ 16 | { "constraint": "signature_match", 17 | "names": ["_TestSink_"] } 18 | ], 19 | "model": { 20 | "sinks": [{ 21 | "kind": "test", 22 | "port": "Argument(*)" 23 | }] 24 | } 25 | } 26 | ]} 27 | 28 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include src/ctadl/VERSION 2 | include docs/*.md 3 | include docs/dev/*.md 4 | include src/ctadl/models/*.json 5 | include src/ctadl/models/jadx/*.json 6 | include src/ctadl/models/pcode/*.json 7 | include src/ctadl/models/taint-front/*.json 8 | include src/ctadl/souffle-logic/functors.cpp 9 | include src/ctadl/souffle-logic/*.dl 10 | include src/ctadl/souffle-logic/information-flow/*.dl 11 | include src/ctadl/souffle-logic/match/*.dl 12 | include src/ctadl/souffle-logic/graph/*.dl 13 | include src/ctadl/souffle-logic/graph/slice/*.dl 14 | include src/ctadl/souffle-logic/taint-front/*.dl 15 | include src/ctadl/souffle-logic/jadx/*.dl 16 | include src/ctadl/souffle-logic/pcode/* 17 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. CTADL documentation master file, created by 2 | sphinx-quickstart on Sat May 17 11:21:15 2025. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to CTADL's documentation! 7 | ================================= 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | :caption: Contents: 12 | 13 | 14 | intro 15 | install 16 | usage 17 | workflows 18 | models 19 | SARIF 20 | analysis 21 | star 22 | windows 23 | DevGuide 24 | debugging 25 | 26 | 27 | 28 | Indices and tables 29 | ================== 30 | 31 | * :ref:`genindex` 32 | * :ref:`modindex` 33 | * :ref:`search` 34 | -------------------------------------------------------------------------------- /taint-front/Lex.mll: -------------------------------------------------------------------------------- 1 | { 2 | open Parse 3 | 4 | let kw = function 5 | | "def" -> KWDEF 6 | | "var" -> KWGLOBAL 7 | | "return" -> KWRETURN 8 | | ident -> IDENT ident 9 | } 10 | 11 | rule token = parse 12 | | [' ' '\t']+ { token lexbuf } 13 | | ("\r" | "\n" | "\r\n") { Lexing.new_line lexbuf; token lexbuf } 14 | | "//" [^ '\r' '\n']* { token lexbuf } 15 | | ['a'-'z' 'A'-'Z' '_']['a'-'z' 'A'-'Z' '_' '0'-'9']* as ident { kw ident } 16 | | "*" { STAR } 17 | | "," { COMMA } 18 | | "(" { LPAREN } 19 | | ")" { RPAREN } 20 | | "{" { LCURLY } 21 | | "}" { RCURLY } 22 | | "[" { LSQUARE } 23 | | "]" { RSQUARE } 24 | | "." { DOT } 25 | | "=" { ASSIGN } 26 | | ";" { SEMI } 27 | | eof { EOF } 28 | | _ { raise Error } 29 | -------------------------------------------------------------------------------- /tests/taint-front/compositional-ctx4.tnt: -------------------------------------------------------------------------------- 1 | def id_h(x) { 2 | return x; 3 | } 4 | def id_i(x) { 5 | return x; 6 | } 7 | def id_j(x) { 8 | return x; 9 | } 10 | 11 | def H(f, g, x) { 12 | h = id_j; 13 | // tmp1 <- x (G#F#Main#) 14 | tmp1 = f(x); 15 | // tmp1 <- tmp1 (G#F#Main#) 16 | tmp2 = g(tmp1); 17 | // tmp3 <- tmp2 18 | tmp3 = h(tmp2); 19 | return tmp3; 20 | } 21 | def G(f, g, x) { 22 | // F#Main#: f -> G 23 | // F#Main#: g -> H 24 | par2 = id_i; 25 | tmp = g(f, par2, x); 26 | return tmp; 27 | } 28 | def F(f, x) { 29 | g = H; 30 | par1 = id_h; 31 | tmp = f(par1, g, x); 32 | return tmp; 33 | } 34 | def Main() { 35 | f = G; 36 | // x -> tmp 37 | tmp = F(f, x); 38 | } 39 | -------------------------------------------------------------------------------- /utils/merge_summaries.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | 4 | if len(sys.argv) < 2: 5 | print(f'Usage {sys.argv[0]} ...') 6 | sys.exit(1) 7 | out_file = sys.argv[1] 8 | in_files = sys.argv[2:] 9 | if len(in_files) < 2: 10 | print('You need to merge at least two files') 11 | sys.exit(1) 12 | 13 | mega_json_file = None 14 | for fname in in_files: 15 | with open(fname) as f: 16 | jdata = json.load(f) 17 | if mega_json_file is None: 18 | mega_json_file = jdata 19 | else: 20 | mega_json_file['model_generators'].extend(jdata['model_generators']) 21 | with open(out_file, 'w') as f: 22 | json.dump(mega_json_file, f) 23 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/information-flow/export.dl: -------------------------------------------------------------------------------- 1 | // core taint results 2 | .output TaintSourceVertex(CTADL_OUTPUT_DB_IO) 3 | .output LeakingSinkVertex(CTADL_OUTPUT_DB_IO) 4 | .output TaintSanitizeEdge(CTADL_OUTPUT_DB_IO) 5 | .output TaintSanitizeVertex(CTADL_OUTPUT_DB_IO) 6 | .output forward_flow.ReachableVertex(CTADL_OUTPUT_DB_IO) 7 | .output forward_flow.ReachableEdge(CTADL_OUTPUT_DB_IO) 8 | .output backward_flow.ReachableVertex(CTADL_OUTPUT_DB_IO) 9 | .output backward_flow.ReachableEdge(CTADL_OUTPUT_DB_IO) 10 | .output isTaintedArgUnmodeled(CTADL_OUTPUT_DB_IO) 11 | .output CTADLStats(CTADL_OUTPUT_DB_IO) 12 | 13 | // program relations that are affected by taint computation 14 | .output IntCInsn_InFunction(CTADL_OUTPUT_DB_IO) 15 | -------------------------------------------------------------------------------- /tests/taint-front/compositional-ctx2.tnt: -------------------------------------------------------------------------------- 1 | def id(x) { 2 | return x; 3 | } 4 | 5 | def Leaf(f, g, x) { 6 | tmp1 = f(x); 7 | tmp2 = g(tmp1); 8 | return tmp2; 9 | } 10 | 11 | def bar1(f, x) { 12 | g = id; 13 | tmp = Leaf(f.func, g, x); 14 | return tmp; 15 | } 16 | 17 | def bar(x) { 18 | f.y.func = id; 19 | tmp = bar1(f.y, x); 20 | return tmp; 21 | } 22 | 23 | def allocsite() {} 24 | def alloc(x) { 25 | tmp = allocsite; 26 | return tmp; 27 | } 28 | def baz(x) { 29 | f.func = alloc; 30 | tmp = bar1(f, x); 31 | return tmp; 32 | } 33 | 34 | def Main() { 35 | first = source(Thing); 36 | // second = first is the desired discovery 37 | second = bar(first); 38 | third = baz(first); 39 | sink(second, Thing); 40 | } 41 | -------------------------------------------------------------------------------- /plugins/ghidra/README.md: -------------------------------------------------------------------------------- 1 | # CTADL Ghidra Pcode Fact Generator Plugin 2 | 3 | This project is part of the [CTADL Taint Analyzer](https://github.com/sandialabs/ctadl). 4 | 5 | This project provides a plugin for CTADL so that it can perform taint analysis on [Ghidra](https://github.com/NationalSecurityAgency/ghidra) Pcode. 6 | 7 | # Installation 8 | 9 | Use pip. 10 | 11 | $ pip install ctadl-ghidra-fact-generator 12 | 13 | Afterward, if ctadl is installed, you can do: 14 | 15 | $ ctadl import pcode /path/to/a/binary 16 | 17 | See `ctadl --help` for more detauls. 18 | 19 | Make sure, when you run ctadl, that [Ghidra](https://ghidra-sre.org) runs in your environment and that `GHIDRA_HOME` is set. 20 | If you forget, it'll remind you. 21 | 22 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/graph/fieldprop_ref.dl: -------------------------------------------------------------------------------- 1 | // propagate in the direction of assignment, "forward" 2 | VirtualAssign(s, v1, p13, v2, p23, "fp") :- 3 | VirtualAssign(s, v1, p1, v2, p2, _ty), 4 | Vertex(v2, p23), 5 | MatchComputedPrefix(p23, p2, p3), 6 | MatchComputedPrefix(p13, p1, p3), 7 | !CVar_isGlobal(v2), 8 | !CVar_isGlobal(v1), 9 | (v1 != v2; p13 != p23). 10 | 11 | // propagate in reverse direction of assignment, "backward" 12 | VirtualAssign(s, v1, p13, v2, p23, "fp") :- 13 | VirtualAssign(s, v1, p1, v2, p2, _ty), 14 | Vertex(v1, p13), 15 | MatchComputedPrefix(p13, p1, p3), 16 | MatchComputedPrefix(p23, p2, p3), 17 | !CVar_isGlobal(v1), 18 | !CVar_isGlobal(v2), 19 | (v1 != v2; p13 != p23). 20 | -------------------------------------------------------------------------------- /tests/taint_intent.json: -------------------------------------------------------------------------------- 1 | { 2 | "VirtualAssign": { 3 | "includes": [ 4 | { 5 | "v1": "Ledu/mit/icc_concat_action_string/InFlowActivity;.onCreate:(Landroid/os/Bundle;)V/ssa/r0v0", 6 | "p1": "", 7 | "v2": "Ledu/mit/icc_concat_action_string/OutFlowActivity;.onCreate:(Landroid/os/Bundle;)V/ssa/r0v0", 8 | "p2": "", 9 | "reason": "intent" } 10 | ], 11 | "excludes": [ 12 | { 13 | "v1": "Ledu/mit/icc_concat_action_string/IsolateActivity;.onCreate:(Landroid/os/Bundle;)V/ssa/r0v0", 14 | "p1": "", 15 | "v2": "Ledu/mit/icc_concat_action_string/OutFlowActivity;.onCreate:(Landroid/os/Bundle;)V/ssa/r0v0", 16 | "p2": "", 17 | "reason": "intent" } 18 | ] 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /nix/no-kvm-overlay.nix: -------------------------------------------------------------------------------- 1 | # Building docker images with Nix requires kvm but CI doesn't support that, 2 | # so we disable kvm the requirement with an overlay. 3 | # See https://github.com/NixOS/nixpkgs/issues/67079 4 | # This is still super hacky because other functions in vmTools will not use the overridden runInLinuxVM 5 | final: prev: 6 | let 7 | runInLinuxVMNoKVM = drv: final.lib.overrideDerivation (final.vmTools.runInLinuxVM drv) (_: { requiredSystemFeatures = []; }); 8 | modifiedVmTools = prev.vmTools // { runInLinuxVM = runInLinuxVMNoKVM; }; 9 | in 10 | { 11 | dockerTools = prev.dockerTools.override { vmTools = modifiedVmTools; }; 12 | singularity-tools = prev.singularity-tools.override { vmTools = modifiedVmTools; }; 13 | } 14 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | SOURCE_DATE_EPOCH = $(shell git log -1 --format=%ct) 11 | 12 | # Put it first so that "make" without argument is like "make help". 13 | help: 14 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 15 | 16 | .PHONY: help Makefile 17 | 18 | # Catch-all target: route all unknown targets to Sphinx using the new 19 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 20 | %: Makefile 21 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 22 | -------------------------------------------------------------------------------- /nix/docker.nix: -------------------------------------------------------------------------------- 1 | { self, name, tag, ctadl, pkgs, system, deps }: 2 | let 3 | stdenv = pkgs.stdenv; 4 | in 5 | pkgs.dockerTools.buildImage { 6 | inherit name tag; 7 | 8 | #fromImage = someBaseImage; 9 | fromImageName = null; 10 | fromImageTag = "latest"; 11 | 12 | copyToRoot = pkgs.buildEnv { 13 | name = "image-root"; 14 | paths = [ ctadl ] ++ deps 15 | ++ (with pkgs.dockerTools; [ usrBinEnv caCertificates fakeNss ]); 16 | pathsToLink = [ "/bin" ]; 17 | }; 18 | 19 | config = { 20 | Entrypoint = [ "/bin/ctadl" ]; 21 | WorkingDir = "/var/empty"; 22 | Volumes = { 23 | "/var" = { }; 24 | "/data" = { }; 25 | "/tmp" = { }; 26 | }; 27 | }; 28 | 29 | diskSize = 2048; 30 | buildVMMemorySize = 8192; 31 | } 32 | -------------------------------------------------------------------------------- /nix/nginx/nginx.nix: -------------------------------------------------------------------------------- 1 | { self 2 | , makeWrapper 3 | , fetchFromGitHub 4 | , fetchzip 5 | , stdenv 6 | , zlib 7 | , libxcrypt 8 | , lib 9 | }: 10 | 11 | stdenv.mkDerivation rec { 12 | pname = "nginx"; 13 | version = "1.24.0"; 14 | 15 | src = fetchzip { 16 | url = "https://nginx.org/download/${pname}-${version}.tar.gz"; 17 | sha256 = "sha256-Alm9XPSARyAeiA+ePXhTlE/gKY4zUP2Wa/+ZdM+G8E8="; 18 | }; 19 | 20 | enableParallelBuilding = true; 21 | dontStrip = true; 22 | 23 | buildInputs = [ zlib libxcrypt ]; 24 | 25 | configureFlags = [ 26 | "--with-debug" 27 | "--with-cc-opt=-gdwarf-4" 28 | "--without-http_gzip_module" 29 | "--without-http_rewrite_module" 30 | ]; 31 | 32 | postFixup = lib.optionalString stdenv.isDarwin '' 33 | dsymutil $out/bin/nginx 34 | ''; 35 | 36 | } 37 | -------------------------------------------------------------------------------- /nix/sarif-multitool/checksarif.nix: -------------------------------------------------------------------------------- 1 | { 2 | stdenv, 3 | callPackage, 4 | fetchFromGitHub, 5 | writeShellScriptBin, 6 | }: let 7 | sarif-multitool = callPackage ./default.nix {}; 8 | sarif-spec = stdenv.mkDerivation { 9 | name = "sarif-spec"; 10 | version = "2.1.0"; 11 | src = fetchFromGitHub { 12 | owner = "oasis-tcs"; 13 | repo = "sarif-spec"; 14 | "rev" = "53296faddf08e610230739d7d6a2f061f6e587d8"; 15 | "hash" = "sha256-44UTa4DdXrF4DB2EtZO22wv1b/p1sLMKYKyLEBF0UeA="; 16 | }; 17 | installPhase = '' 18 | mkdir $out 19 | cp -r * $out 20 | ''; 21 | }; 22 | in 23 | writeShellScriptBin "checksarif" '' 24 | export DOTNET_ROLL_FORWARD=Major 25 | exec ${sarif-multitool}/bin/sarif validate -c ${./sarif-validation.xml} -j ${sarif-spec}/Schemata/sarif-schema-2.1.0.json "$@" 26 | '' 27 | -------------------------------------------------------------------------------- /tests/taint-front/indirect1.json: -------------------------------------------------------------------------------- 1 | { 2 | "forward_flow.ReachableVertex": { 3 | "includes": [{ 4 | "v1": "f/", 5 | "p1": "", 6 | "label": "Network" 7 | }, 8 | { 9 | "v1": "f/tmp", 10 | "label": "Network", 11 | "call_state": "free" 12 | }, 13 | { 14 | "v1": "Main/x", 15 | "label": "Network" 16 | }, 17 | { 18 | "v1": "Main/x", 19 | "label": "Network" 20 | }, 21 | { 22 | "v1": "g/tmp", 23 | "label": "Network" 24 | }, 25 | { 26 | "v1": "g/tmp", 27 | "label": "Network" 28 | }, 29 | { 30 | "v1": "f/", 31 | "label": "Network" 32 | }, 33 | { 34 | "v1": "g/", 35 | "label": "Network" 36 | }, 37 | { 38 | "v1": "g/", 39 | "label": "Network" 40 | }] 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /plugins/taint-front/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = ctadl-taint-front-fact-generator_plugin 3 | version = attr: ctadl_taint_front_fact_generator_plugin.version 4 | description = CTADL TAINT-FRONT fact generator plugin 5 | long_description = file: README.md 6 | long_description_content_type = text/markdown 7 | url = https://github.com/sandialabs/ctadl 8 | project_urls = 9 | Bug Tracker = https://github.com/sandialabs/ctadl/issues 10 | classifiers = 11 | Programming Language :: Python :: 3.9 12 | Environment :: Console 13 | Environment :: Plugins 14 | License :: OSI Approved :: Apache Software License 15 | Operating System :: POSIX 16 | 17 | [options] 18 | packages = find: 19 | package_dir = 20 | = src 21 | include_package_data = True 22 | python_requires = >=3.9 23 | install_requires = 24 | 25 | [options.packages.find] 26 | where = src 27 | -------------------------------------------------------------------------------- /taint-front/README.md: -------------------------------------------------------------------------------- 1 | Simple front-end for taint analysis 2 | =================================== 3 | 4 | Make sure `menhir` is installed: 5 | 6 | ``` 7 | $ opam install menhir 8 | ``` 9 | 10 | Then compile with `dune` 11 | 12 | ``` 13 | $ dune build 14 | ``` 15 | 16 | Facts can then be generated: 17 | 18 | ``` 19 | $ ./_build/default/taintfront.exe test.tnt 20 | ``` 21 | 22 | This will compile `test.tnt` into the `facts` directory expected by `souffle`. 23 | 24 | The language is fairly self explanatory -- see `test.tnt` for an example. 25 | The frontend does not optimize any access paths -- it simply preserves what's in the source. 26 | 27 | Sources, sanitizers, and sinks are specified using function call syntax. 28 | 29 | x = source(label); 30 | y = sanitize(x, label); 31 | sink(y, label); 32 | 33 | `label` is the label for the source/sink. 34 | 35 | -------------------------------------------------------------------------------- /plugins/networkx-export/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = ctadl-networkx-export-plugin 3 | version = attr: ctadl_networkx_export_plugin.version 4 | description = Exports the CTADL database to networkx format 5 | long_description = file: README.md 6 | long_description_content_type = text/markdown 7 | url = https://github.com/sandialabs/ctadl 8 | project_urls = 9 | Bug Tracker = https://github.com/sandialabs/ctadl/issues 10 | classifiers = 11 | Programming Language :: Python :: 3.9 12 | Environment :: Console 13 | Environment :: Plugins 14 | License :: OSI Approved :: Apache Software License 15 | Operating System :: POSIX 16 | 17 | [options] 18 | packages = find: 19 | package_dir = 20 | = src 21 | include_package_data = True 22 | python_requires = >=3.9 23 | install_requires = 24 | networkx >= 2.8.6 25 | 26 | [options.packages.find] 27 | where = src 28 | -------------------------------------------------------------------------------- /nix/souffle/remove-lld.patch: -------------------------------------------------------------------------------- 1 | diff --git a/CMakeLists.txt b/CMakeLists.txt 2 | index b19214120..a8ed5cbe5 100644 3 | --- a/CMakeLists.txt 4 | +++ b/CMakeLists.txt 5 | @@ -102,13 +102,6 @@ option(SOUFFLE_BASH_COMPLETION "Enable/Disable bash completion" OFF) 6 | cmake_dependent_option(SOUFFLE_USE_LIBCPP "Link to libc++ instead of libstdc++" ON 7 | "CMAKE_CXX_COMPILER_ID STREQUAL Clang" OFF) 8 | 9 | -# Using Clang? Likely want to use `lld` too. 10 | -if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") 11 | - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld") 12 | - set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=lld") 13 | - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=lld") 14 | -endif() 15 | - 16 | # Add aditional modules to CMake 17 | set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) 18 | 19 | -------------------------------------------------------------------------------- /plugins/ghidra/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = ctadl-ghidra-fact-generator_plugin 3 | version = attr: ctadl_ghidra_fact_generator_plugin.version 4 | description = CTADL GHIDRA Pcode fact generator plugin 5 | long_description = file: README.md 6 | long_description_content_type = text/markdown 7 | url = https://github.com/sandialabs/ctadl 8 | project_urls = 9 | Bug Tracker = https://github.com/sandialabs/ctadl/issues 10 | classifiers = 11 | Programming Language :: Python :: 3.9 12 | Environment :: Console 13 | Environment :: Plugins 14 | License :: OSI Approved :: Apache Software License 15 | Operating System :: POSIX 16 | license_files = LICENSE.txt 17 | 18 | [options] 19 | packages = find: 20 | package_dir = 21 | = src 22 | include_package_data = True 23 | python_requires = >=3.9 24 | install_requires = 25 | 26 | [options.packages.find] 27 | where = src 28 | -------------------------------------------------------------------------------- /tests/bin/test: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -o pipefail 5 | set -x 6 | 7 | SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" 8 | PATH="$SCRIPT_DIR:$PATH" 9 | 10 | 11 | top="$(git rev-parse --show-toplevel)" 12 | outdir="$top/ctadl-test-output" 13 | mkdir -p $outdir 14 | 15 | bash $top/tests/bin/check-static 16 | 17 | cd $outdir 18 | 19 | bash $top/tests/bin/check-taintfront 20 | bash $top/tests/bin/check-taintfront2 21 | 22 | bash $top/tests/bin/check-ActivityCommunication4 23 | 24 | bash $top/tests/bin/check-pcode 25 | 26 | bash $top/tests/bin/check-analysis-options 27 | 28 | # Checks the sarif logs for validation failures 29 | jq '.runs[].results[] | select(.ruleId | test("JSON.*"))' sarif-validation-log.sarif > sarif-validation-failures.txt 30 | if [ -s "sarif-validation-failures.txt" ]; then 31 | exit 1 32 | fi 33 | 34 | echo "all tests passed" 35 | -------------------------------------------------------------------------------- /taint-front/taint.ml: -------------------------------------------------------------------------------- 1 | open! Taintlang 2 | open! Parse 3 | 4 | let _ = 5 | let out_dir = ref "facts" in 6 | let files = ref [] in 7 | Arg.parse [ 8 | "-o", Arg.Set_string out_dir, " output directory"; 9 | ] (fun fname -> 10 | files := fname :: !files 11 | ) "Sample language for taint analysis"; 12 | 13 | let files = List.rev !files in 14 | 15 | let ast = List.filter_map (fun fname -> 16 | let inch = open_in fname in 17 | let lexbuf = Lexing.from_channel inch in 18 | try 19 | let ast = Parse.top Lex.token lexbuf in 20 | Some ast 21 | with Parse.Error -> 22 | let pos = lexbuf.lex_curr_p in 23 | Format.printf "Parse error %s(%d:%d)@." 24 | fname 25 | pos.Lexing.pos_lnum 26 | (pos.Lexing.pos_cnum - pos.Lexing.pos_bol); 27 | None 28 | ) files |> List.flatten in 29 | 30 | GenFacts.process !out_dir ast 31 | -------------------------------------------------------------------------------- /utils/hashdl.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import sys 3 | 4 | 5 | def calculate_sha256_hexdigest(filename): 6 | sha256_hash = hashlib.sha256() 7 | 8 | try: 9 | with open(filename, "rb") as f: 10 | # Read and update hash string value in blocks of 4K 11 | for byte_block in iter(lambda: f.read(4096), b""): 12 | sha256_hash.update(byte_block) 13 | return sha256_hash.hexdigest() 14 | except FileNotFoundError: 15 | print(f"Error: The file '{filename}' was not found.") 16 | sys.exit(1) 17 | except Exception as e: 18 | print(f"An error occurred: {e}") 19 | sys.exit(1) 20 | 21 | 22 | if __name__ == "__main__": 23 | if len(sys.argv) != 2: 24 | print("Usage: python sha256_hash.py ") 25 | sys.exit(1) 26 | 27 | filename = sys.argv[1] 28 | hexdigest = calculate_sha256_hexdigest(filename) 29 | print(hexdigest) 30 | -------------------------------------------------------------------------------- /tests/java/ArrayTest.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_generators": [ 3 | { 4 | "find": "methods", 5 | "where": [ 6 | { 7 | "constraint": "signature_match", 8 | "name": "Foo", 9 | "parent": "Ldefpackage/ArrayTest;" 10 | } 11 | ], 12 | "model": { 13 | "sources": [ 14 | { 15 | "kind": "Label", 16 | "port": "Argument(0)" 17 | } 18 | ] 19 | } 20 | }, 21 | { 22 | "find": "methods", 23 | "where": [ 24 | { 25 | "constraint": "signature_match", 26 | "name": "Bar", 27 | "parent": "Ldefpackage/ArrayTest;" 28 | } 29 | ], 30 | "model": { 31 | "sinks": [ 32 | { 33 | "kind": "Label", 34 | "port": "Argument(0)", 35 | "all_fields": true 36 | } 37 | ] 38 | } 39 | } 40 | ] 41 | } 42 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/graph/dataflow_graph.dl: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ctadl_schema.dl" 4 | 5 | // --------------------------------------------------------------------------- 6 | 7 | #include "graph/declarations.dl" 8 | #include "graph/access_path.dl" 9 | 10 | // These access paths are important for correctness. 11 | CisAccessPath(""). 12 | CisAccessPath(STAR). 13 | 14 | isComputedAccessPath(ap) :- CisAccessPath(ap). 15 | 16 | // Imports CTADL IR from DB instead of, perhaps, a language frontend. 17 | #ifdef CTADL_IMPORT_IR_FROM_DB 18 | #include "imports.dl" 19 | #endif 20 | 21 | // Imports dataflow graph from DB so we can run new rules on top of it. 22 | #ifdef CTADL_IMPORT_GRAPH_FROM_DB 23 | #include "graph/import.dl" 24 | #endif 25 | 26 | 27 | .comp CGraphRules { 28 | #include "graph/dataflow.dl" 29 | #include "graph/callgraph.dl" 30 | } 31 | 32 | #ifndef CTADL_GRAPH_DISABLE_OUTPUT_FACTS 33 | #include 34 | #endif 35 | -------------------------------------------------------------------------------- /tests/bin/check-taintfront2: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | top="$(git rev-parse --show-toplevel)" 4 | outdir="$top/ctadl-test-output" 5 | testdir="$top/tests/taint-front" 6 | cd $outdir 7 | 8 | # Test --no-compile-analysis 9 | ctadl --dir "$outdir/global2" index -f --no-compile-analysis 10 | ctadl --dir "$outdir/global2" query --no-compile-analysis --format sarif -o global2.sarif 11 | sarif_has_code_flows.py global2.sarif 12 | 13 | ! ctadl --dir "$outdir/global2" index # index already exists 14 | ! ctadl index -f facts2 # nonexistent facts dir 15 | rm -f ctadlir.db 16 | ! ctadl query # nonexistent index 17 | 18 | # Test without global data flow 19 | ctadl --dir "$outdir/global2" index -vv -f --no-interprocedural-data-flow 20 | ctadl --dir "$outdir/global2" export --format gml -o global2.gml 21 | ctadl --dir "$outdir/global2" query --format sarif -o global2.sarif 22 | ! python3 "$top/tests/bin/sarif_has_code_flows.py" "$outdir/global2/global2.sarif" 23 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /nix/souffle/souffle2.1.nix: -------------------------------------------------------------------------------- 1 | { stdenv, callPackage, fetchFromGitHub 2 | , openmp 3 | , python3 4 | , enable64BitDomain ? false 5 | , enableDebug ? false 6 | }: 7 | 8 | 9 | let 10 | souffle = callPackage ./common.nix rec { 11 | version = "2.1"; 12 | src = fetchFromGitHub { 13 | owner = "souffle-lang"; 14 | repo = "souffle"; 15 | rev = "${version}"; 16 | sha256 = "11x3v78kciz8j8p1j0fppzcyl2lbm6ib4svj6a9cwi836p9h3fma"; 17 | }; 18 | inherit openmp enableDebug enable64BitDomain python3; 19 | }; 20 | in 21 | souffle.overrideAttrs (attrs: rec { 22 | 23 | patches = [ ./git-and-completion.patch ./remove-cpack.patch ./remove-index-warning.patch ]; 24 | 25 | NIX_CFLAGS_COMPILE = 26 | " -Wno-format-security"; 27 | 28 | postFixup = '' 29 | wrapProgram "$out/bin/souffle-compile" --prefix CXXFLAGS " " "$souffleCompileIncludes $souffleCompileCxxFlags" --prefix LDFLAGS " " "$souffleCompileLdFlags" 30 | ''; 31 | }) 32 | 33 | -------------------------------------------------------------------------------- /nix/souffle/souffle-2.3-sources.patch: -------------------------------------------------------------------------------- 1 | diff --git a/src/main.cpp b/src/main.cpp 2 | index 8f4ba085c..b03db7286 100644 3 | --- a/src/main.cpp 4 | +++ b/src/main.cpp 5 | @@ -196,9 +196,9 @@ void compileToBinary(const std::string& command, std::string_view sourceFilename 6 | argv.push_back(std::string(sourceFilename)); 7 | 8 | #if defined(_MSC_VER) 9 | - const char* interpreter = "python"; 10 | + const char* interpreter = "bash"; 11 | #else 12 | - const char* interpreter = "python3"; 13 | + const char* interpreter = "bash"; 14 | #endif 15 | auto exit = execute(interpreter, argv); 16 | if (!exit) throw std::invalid_argument(tfm::format("unable to execute tool ", command)); 17 | @@ -303,7 +303,7 @@ public: 18 | return Stream; 19 | } 20 | 21 | - bool endInput() { 22 | + bool endInput() override { 23 | const int Status = pclose(Stream); 24 | Stream = nullptr; 25 | if (Status == -1) { 26 | -------------------------------------------------------------------------------- /tests/androidtest-query.json: -------------------------------------------------------------------------------- 1 | { 2 | "model_generators": [ 3 | { 4 | "find": "methods", 5 | "where": [ 6 | { 7 | "constraint": "signature_match", 8 | "name": "i", 9 | "parent": "Landroid/util/Log;" 10 | } 11 | ], 12 | "model": { 13 | "sinks": [ 14 | { 15 | "kind": "Persistence", 16 | "port": "Argument(*)", 17 | "all_fields": true 18 | } 19 | ] 20 | } 21 | }, 22 | { 23 | "find": "methods", 24 | "where": [ 25 | { 26 | "constraint": "signature_match", 27 | "name": "getDeviceId", 28 | "parent": "Landroid/telephony/TelephonyManager;" 29 | } 30 | ], 31 | "model": { 32 | "sources": [ 33 | { 34 | "kind": "Telephony", 35 | "port": "Return" 36 | } 37 | ] 38 | } 39 | } 40 | ] 41 | } 42 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/taint_schema.dl: -------------------------------------------------------------------------------- 1 | // Taint analysis outputs a set of reachable vertices and edges. Vertices 2 | // contain the bulk of the information, including a unique id. Edges are 3 | // defined on those ids. 4 | 5 | // This schema is included in a souffle component which produces forward and 6 | // backward slices. So in the database these tables have a forward_flow or 7 | // backward_flow prefix. 8 | 9 | // Note that the same vertex, when reached from forward vs when reached from 10 | // backward, may have two different ids. 11 | .decl ReachableVertex( 12 | id: number, 13 | v1: CVar, p1: CAccessPath, label: SliceLabelType, call_state: FS_ParenMatch, suspicion: number, 14 | ctx: symbol 15 | ) 16 | choice-domain (v1, p1, label, call_state, ctx) 17 | 18 | // An edge in the taint graph corresponds to an instruction. The 'kind' 19 | // argument is either "move", "actual-to-formal", or "formal-to-actual." 20 | .decl ReachableEdge(vertex_to: number, vertex_from: number, insn: CInsn, kind: symbol) 21 | -------------------------------------------------------------------------------- /taint-front/alias-rule.d2: -------------------------------------------------------------------------------- 1 | prop-alias: { 2 | x -> x_G: .G 3 | x_G: " " 4 | x_G -> x_GF: .F 5 | x_GF: " " { style: { fill: green } } 6 | 7 | y -> y_H: .H 8 | y_H: " " 9 | y_H -> y_HF: .F 10 | 11 | # Alias 12 | x_G -> y_H { style: { stroke: green; stroke-width: 3 } } 13 | y_HF: " " { style: { fill: "linear-gradient(#f69d3c, green)" } } 14 | } 15 | 16 | prop-original: { 17 | x -> x_G: .G 18 | x_G: " " 19 | x_G -> x_GF: .F 20 | x_GF: " " 21 | 22 | y -> y_H: .H 23 | y_H: " " 24 | y_H -> y_HF: .F 25 | y_HF: " " { style: { fill: green } } 26 | 27 | # Alias 28 | x_G -> y_H { style: { stroke: green; stroke-width: 3 } } 29 | x_GF: " " { style: { fill: "linear-gradient(#f69d3c, green)" } } 30 | } 31 | 32 | interproc: { 33 | x -> x_G: .G 34 | x_G: actual*->formal 35 | x_G -> x_GJ: .J 36 | x_GJ: " " 37 | 38 | y -> y_H: .H 39 | y_H: actual*->formal 40 | y_H -> y_HK: .K 41 | y_HK: " " 42 | 43 | # Alias 44 | x_GJ -> y_HK { style: { stroke: green; stroke-width: 3 } } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Read the Docs configuration file for Sphinx projects 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 3 | 4 | # Required 5 | version: 2 6 | 7 | # Set the OS, Python version and other tools you might need 8 | build: 9 | os: ubuntu-22.04 10 | tools: 11 | python: "3.9" 12 | 13 | # Build documentation in the "docs/" directory with Sphinx 14 | sphinx: 15 | configuration: docs/conf.py 16 | # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs 17 | # builder: "dirhtml" 18 | # Fail on all warnings to avoid broken references 19 | # fail_on_warning: true 20 | 21 | # Optionally build your docs in additional formats such as PDF and ePub 22 | # formats: 23 | # - pdf 24 | # - epub 25 | 26 | # Optional but recommended, declare the Python requirements required 27 | # to build your documentation 28 | # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html 29 | python: 30 | install: 31 | - requirements: docs/requirements.txt 32 | -------------------------------------------------------------------------------- /plugins/taint-front/README.md: -------------------------------------------------------------------------------- 1 | # CTADL Taint-front Fact Generator Plugin 2 | 3 | This project is part of the [CTADL Taint Analyzer](https://github.com/sandialabs/ctadl). 4 | 5 | This project provides a plugin for CTADL so that it can perform taint analysis on a custom language called taint-front, useful for hand-writing taint analysis examples. 6 | See the taint-front [README](https://github.com/sandialabs/ctadl/blob/main/taint-front/README.md) for more info. 7 | 8 | # Installation 9 | 10 | ## Dependency - taint-front fact generator 11 | 12 | The taint-front fact generator is written in OCaml and built with dune: 13 | 14 | ```sh 15 | cd taint-front 16 | dune build 17 | dune install 18 | ``` 19 | 20 | Ensure that the installed binary, `taintfront`, is on your PATH. 21 | 22 | ## Install plugin 23 | 24 | Use pip. 25 | 26 | ```sh 27 | pip install ctadl-taint-front-fact-generator 28 | ``` 29 | 30 | Afterward, if ctadl is installed, you can do: 31 | 32 | ```sh 33 | ctadl import taint-front example.tnt 34 | ``` 35 | 36 | See `ctadl --help` for more detauls. 37 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/pcode/taintquery.dl: -------------------------------------------------------------------------------- 1 | // Implements a pcode query. 2 | // Here we don't specify sources and sinks; includers do that. We load the 3 | // PCODE facts and the map of vertex PC addresses. We also define macros that 4 | // make it easier for includers to define sources and sinks. 5 | 6 | #include "macros.dl" 7 | // load original pcode facts 8 | #include "pcode/types.dl" 9 | #include "pcode/declarations.dl" 10 | #include "pcode/importdb.dl" 11 | 12 | 13 | #ifndef PCODE_DISABLE_TAINT 14 | 15 | #include "information-flow/query.dl" 16 | 17 | #endif // PCODE_DISABLE_TAINT 18 | 19 | #define ReturnArgIndex (-1) 20 | 21 | #define TaintSourceArg(label, method, n) \ 22 | TaintSourceMethodArg(label, n, fid) :- \ 23 | (f = method; f = cat("_", method)), \ 24 | CFunction_Name(fid, f), \ 25 | CisFunction(fid) 26 | 27 | #define TaintSinkArg(label, method, n) \ 28 | LeakingSinkMethodArg(label, n, fid) :- \ 29 | (f = method; f = cat("_", method)), \ 30 | CFunction_Name(fid, f), \ 31 | CisFunction(fid) 32 | 33 | -------------------------------------------------------------------------------- /nix/souffle/souffle2.2.nix: -------------------------------------------------------------------------------- 1 | { stdenv, callPackage, fetchFromGitHub, lib 2 | , openmp 3 | , python3 4 | , enable64BitDomain ? false 5 | , enableDebug ? false 6 | }: 7 | 8 | 9 | let 10 | souffle = callPackage ./common.nix rec { 11 | version = "2.2"; 12 | src = fetchFromGitHub { 13 | owner = "dbueno"; 14 | repo = "souffle"; 15 | rev = "v${version}-fix-sqlite"; 16 | sha256 = "sha256-0cMgsrguooEfrq/vOO8M16QvP9jxD2rVC+rutRlJ0IY="; 17 | }; 18 | inherit openmp enableDebug enable64BitDomain python3; 19 | }; 20 | toolsPath = lib.makeBinPath [ stdenv.cc ]; 21 | in 22 | souffle.overrideAttrs (attrs: rec { 23 | 24 | patches = [ ./remove-lld.patch ]; 25 | 26 | cmakeFlags = [ "-DSOUFFLE_GIT=0" "-DPACKAGE_VERSION=${attrs.version}" ] 27 | ++ attrs.cmakeFlags; 28 | 29 | postFixup = '' 30 | wrapProgram "$out/bin/souffle-compile" \ 31 | --prefix PATH : "${toolsPath}" \ 32 | --prefix CXXFLAGS " " "$souffleCompileIncludes $souffleCompileCxxFlags" \ 33 | --prefix LDFLAGS " " "$souffleCompileLdFlags" 34 | ''; 35 | }) 36 | 37 | -------------------------------------------------------------------------------- /nix/nginx/nginx-wllvm.nix: -------------------------------------------------------------------------------- 1 | { 2 | self, 3 | makeWrapper, 4 | fetchFromGitHub, 5 | fetchzip, 6 | stdenv, 7 | llvmPackages, 8 | wllvm, 9 | zlib, 10 | }: 11 | stdenv.mkDerivation rec { 12 | pname = "nginx-llvm"; 13 | version = "1.24.0"; 14 | 15 | src = fetchzip { 16 | url = "https://nginx.org/download/${pname}-${version}.tar.gz"; 17 | sha256 = "sha256-Alm9XPSARyAeiA+ePXhTlE/gKY4zUP2Wa/+ZdM+G8E8="; 18 | }; 19 | 20 | enableParallelBuilding = true; 21 | 22 | buildInputs = [llvmPackages.llvm llvmPackages.clang zlib wllvm]; 23 | 24 | preConfigure = '' 25 | export LLVM_COMPILER_PATH="${llvmPackages.clang}/bin" 26 | export LLVM_COMPILER=${ 27 | if stdenv.cc.isClang 28 | then "clang" 29 | else "gcc" 30 | } 31 | ''; 32 | 33 | configureFlags = [ 34 | "--with-debug" 35 | "--with-cc='${wllvm}/bin/wllvm'" 36 | "--without-http_gzip_module" 37 | "--without-http_rewrite_module" 38 | ]; 39 | 40 | postInstall = '' 41 | export LLVM_COMPILER_PATH="${llvmPackages.llvm}/bin" 42 | extract-bc $out/sbin/nginx 43 | ''; 44 | } 45 | -------------------------------------------------------------------------------- /tests/bin/check-pcode: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -o pipefail 5 | set -x 6 | 7 | CHECKSARIF="checksarif -o sarif-validation-log.sarif --log ForceOverwrite --max-file-size-in-kb $((500*1024))" 8 | top="$(git rev-parse --show-toplevel)" 9 | outdir="$top/ctadl-test-output" 10 | cd $outdir 11 | 12 | ctadl import pcode $(nix build --inputs-from . nixpkgs#curl --no-link --json | jq -r ".[0].outputs.bin")/bin/curl -o pcode_curl_facts -f 13 | ctadl index -f pcode_curl_facts 14 | ctadl query --format=sarif > curl.pcode.sarif 15 | $CHECKSARIF curl.pcode.sarif 16 | mv -f ctadlir.db pcodeir.db 17 | rm -rf pcode_curl_facts 18 | 19 | nginx=$(nix build .#nginx-debug --print-out-paths --no-link) 20 | ctadl import pcode $nginx/bin/nginx -o out/nginx-pcode -f 21 | ctadl --dir out/nginx-pcode index -f 22 | XDG_DATA_HOME=out/nginx-pcode ctadl --dir out/nginx-pcode query --dl $top/tests/query_nginx.dl --compute-slice fwd --format summary > out/nginx-pcode/summary.txt 23 | n_nginx_results=$(grep 'forward source-slice' out/nginx-pcode/summary.txt | awk '{print $1}') 24 | [[ $n_nginx_results -gt 10 ]] 25 | rm -rf out/nginx-pcode 26 | -------------------------------------------------------------------------------- /tests/taint-front/compositional-ctx.tnt: -------------------------------------------------------------------------------- 1 | def polyY(obj) { 2 | return obj; 3 | } 4 | 5 | def polyZAlloc() {} 6 | 7 | def polyZ(obj) { 8 | tmp = source(Alloc14); 9 | tmp = polyZAlloc; 10 | return tmp; 11 | } 12 | 13 | def id(x) { 14 | tv = x; 15 | return tv; 16 | } 17 | 18 | def foo(x, obj) { 19 | tx = id(x); 20 | // Indirect call. 21 | // When called from bar1, calls polyY 22 | // When called from bar2, calls polyZ 23 | tmp = tx.poly(obj); 24 | return tmp; 25 | } 26 | 27 | def mid(x, obj) { 28 | tmp = foo(x, obj); 29 | return tmp; 30 | } 31 | 32 | def bar1(obj) { 33 | y = source(Alloc31); 34 | y.poly = polyY; 35 | tmp = mid(y, obj); 36 | return tmp; 37 | } 38 | 39 | def bar2(obj) { 40 | z = source(Alloc34); 41 | z.poly = polyZ; 42 | tmp = mid(z, obj); 43 | return tmp; 44 | } 45 | 46 | def serviceFirst() {} 47 | def service() { 48 | first = source(Alloc37); 49 | first = serviceFirst; 50 | second = bar1(first); 51 | third = bar2(first); 52 | // assert(first == second); 53 | sink(second, EqualToAlloc37); 54 | // assert(first != third); 55 | sink(third, NotEqualToAlloc37); 56 | } 57 | 58 | -------------------------------------------------------------------------------- /taint-front/flake.lock: -------------------------------------------------------------------------------- 1 | { 2 | "nodes": { 3 | "flake-utils": { 4 | "locked": { 5 | "lastModified": 1642700792, 6 | "narHash": "sha256-XqHrk7hFb+zBvRg6Ghl+AZDq03ov6OshJLiSWOoX5es=", 7 | "owner": "numtide", 8 | "repo": "flake-utils", 9 | "rev": "846b2ae0fc4cc943637d3d1def4454213e203cba", 10 | "type": "github" 11 | }, 12 | "original": { 13 | "owner": "numtide", 14 | "repo": "flake-utils", 15 | "type": "github" 16 | } 17 | }, 18 | "nixpkgs": { 19 | "locked": { 20 | "lastModified": 1643472818, 21 | "narHash": "sha256-qzVPxKDUubYIxKRSuDl/JgzXWSmGvUVYny9SxFMfPJM=", 22 | "owner": "NixOS", 23 | "repo": "nixpkgs", 24 | "rev": "7f65e4abd5ecaad12d2d26e4380d1a7d8edafea7", 25 | "type": "github" 26 | }, 27 | "original": { 28 | "id": "nixpkgs", 29 | "type": "indirect" 30 | } 31 | }, 32 | "root": { 33 | "inputs": { 34 | "flake-utils": "flake-utils", 35 | "nixpkgs": "nixpkgs" 36 | } 37 | } 38 | }, 39 | "root": "root", 40 | "version": 7 41 | } 42 | -------------------------------------------------------------------------------- /nix/singularity-overlay.nix: -------------------------------------------------------------------------------- 1 | self: super: { 2 | # Singularity without suid binaries for testing 3 | singularity = super.singularity.overrideAttrs (attrs: with attrs; { 4 | # Configures --without-suid 5 | postConfigure = '' 6 | cd go/src/github.com/sylabs/singularity 7 | 8 | patchShebangs . 9 | sed -i 's|defaultPath := "[^"]*"|defaultPath := "${super.lib.makeBinPath propagatedBuildInputs}"|' cmd/internal/cli/actions.go 10 | 11 | ./mconfig -V ${version} -p $out --localstatedir=/var --without-suid # added 12 | ''; 13 | # Removes line: #chmod 755 $out/libexec/singularity/bin/starter-suid 14 | installPhase = '' 15 | runHook preInstall 16 | make -C builddir install LOCALSTATEDIR=$out/var 17 | 18 | # Explicitly configure paths in the config file 19 | sed -i 's|^# mksquashfs path =.*$|mksquashfs path = ${super.lib.makeBinPath [self.squashfsTools]}/mksquashfs|' $out/etc/singularity/singularity.conf 20 | sed -i 's|^# cryptsetup path =.*$|cryptsetup path = ${super.lib.makeBinPath [self.cryptsetup]}/cryptsetup|' $out/etc/singularity/singularity.conf 21 | 22 | runHook postInstall 23 | ''; 24 | 25 | }); 26 | } 27 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/taint-front/declarations.dl: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // used by clients 4 | .decl IndirectCallStmt(stmt: TaintFrontStatement, v: symbol, path: TaintFrontAccessPath) 5 | .decl AssignFunctionStmt(stmt: TaintFrontStatement, v: symbol, path: TaintFrontAccessPath, func: TaintFrontMethod) 6 | 7 | // raw input 8 | .decl Function(fn: TaintFrontMethod, arity: number) 9 | .decl Global(var: symbol, index: number) 10 | .decl FormalParam(index: number, fn: TaintFrontMethod, var: symbol) 11 | .decl ActualParam(index: number, var: symbol, path: TaintFrontAccessPath, info: TaintFrontCallRecord) 12 | .decl Assign(v1: symbol, p1: TaintFrontAccessPath, v2: symbol, p2: TaintFrontAccessPath, fn: TaintFrontMethod, id: number) 13 | .decl AssignFunction(v1: symbol, p1: TaintFrontAccessPath, m: TaintFrontMethod, fn: TaintFrontMethod, id: number) 14 | .decl DirectCall(fn: TaintFrontMethod, info: TaintFrontCallRecord) 15 | .decl IndirectCall(var: symbol, path: TaintFrontAccessPath, info: TaintFrontCallRecord) 16 | .decl TaintSpec(source_sink_sanitizer: symbol, v: symbol, p: TaintFrontAccessPath, label: symbol, fn: TaintFrontMethod, id: number, sanitize_from: symbol, sanitize_from_path: TaintFrontAccessPath) 17 | 18 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/information-flow/java-transfer.dl: -------------------------------------------------------------------------------- 1 | // taint models for java language libraries 2 | 3 | TaintTransferCallArg(call, n1, n2) :- 4 | ( 5 | method = "(java.io.InputStream)>"; 6 | method = "(java.io.Reader)>" 7 | ), 8 | CallEdge(call, method, ""), 9 | n2 = 0, 10 | n1 = -2. 11 | 12 | TaintTransferCallArg(call, n1, n2) :- 13 | ( 14 | method = ""; 15 | method = "" 16 | ), 17 | CallEdge(call, method, ""), 18 | n2 = -2, 19 | n1 = -1. 20 | 21 | TaintTransferCallArg(call, n1, n2) :- 22 | ( 23 | method = "(java.lang.String)>"; 24 | method = "(java.io.File)>" 25 | ), 26 | CallEdge(call, method, ""), 27 | n2 = 0, 28 | n1 = -2. 29 | 30 | // taint from this to arg 31 | TaintTransferCallArg(call, n1, n2) :- 32 | ( 33 | method = "" 34 | ), 35 | CallEdge(call, method, ""), 36 | n2 = -2, 37 | n1 = 0. 38 | -------------------------------------------------------------------------------- /tests/bin/check-ActivityCommunication4: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | set -o pipefail 5 | set -x 6 | 7 | top="$(git rev-parse --show-toplevel)" 8 | outdir="$top/ctadl-test-output" 9 | cd $outdir 10 | 11 | ctadlbench=$outdir/DroidBench 12 | git clone https://github.com/secure-software-engineering/DroidBench.git $outdir/DroidBench || echo "Clone failed, ignoring" 13 | 14 | # Intent test 15 | ctadl import JADX $ctadlbench/apk/InterComponentCommunication/ActivityCommunication4.apk --out ActivityCommunication4_facts -f 16 | ctadl index -f ActivityCommunication4_facts 17 | ctadl query $top/tests/androidtest-query.json --format=sarif > ActivityCommunication4.sarif 18 | rm -rf ActivityCommunication4_facts 19 | 20 | # Test intent assignments are added correctly 21 | python3 $top/tests/test_ctadlir.py $top/tests/taint_intent.json 22 | 23 | # Test intent taint path is found 24 | jq '.runs[].results[].ruleId | select(. == "C0001")' ActivityCommunication4.sarif > ActivityCommunication4_success.txt 25 | if [ -s ActivityCommunication4_success.txt ]; then 26 | echo "Intent Test Success" 27 | else 28 | echo "Intent Test failed; exiting" 29 | exit 1 30 | fi 31 | 32 | ctadl export --format gml -o ActivityCommunication4-export.gml 33 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/taint-front/taintquery.dl: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | // --------------------------------------------------------------------------- 8 | // phase 2 9 | 10 | #ifndef TAINT_FRONT_DISABLE_TAINT 11 | 12 | #include 13 | 14 | TaintSourceVertex(label, v, p) :- 15 | TaintSpec("source", iv, ip, label, fid, _, _, _), 16 | TaintFront_MkVar(fid, iv, v, tp), 17 | p = cat(tp, ip). 18 | 19 | LeakingSinkVertex(label, v, p) :- 20 | TaintSpec("sink", iv, ip, label, fid, _, _, _), 21 | TaintFront_MkVar(fid, iv, v, tp), 22 | p = cat(tp, ip). 23 | 24 | TaintSanitizeEdge(label, v1, p1, v2, p2) :- 25 | TaintSpec("sanitize", iv1, ip1, label, fid, _, iv2, ip2), 26 | TaintFront_MkVar(fid, iv1, v1, tp1), 27 | p1 = cat(tp1, ip1), 28 | TaintFront_MkVar(fid, iv2, v2, tp2), 29 | p2 = cat(tp2, ip2). 30 | 31 | #endif // TAINT_FRONT_DISABLE_TAINT 32 | 33 | .pragma "suppress-warnings" "TaintSourceMethod,TaintSourceCallArg,LeakingSinkMethod,LeakingSinkCallArg,LeakingSinkCallArg,TaintTransferCallArgs,TaintTransferCallArg,SuspiciousTaintTransferCallArg" 34 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/pcode/analyze_headless.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | set -e 4 | set -x 5 | 6 | usage() { echo "$(basename $0) " >&2; exit 1; } 7 | 8 | if [ $# -lt 1 ]; then 9 | usage 10 | fi 11 | factsdir=$1 12 | shift 13 | 14 | this_dir=$(dirname $(readlink -f $0)) 15 | tmpdir=$(mktemp -d) 16 | trap 'rm -rf $tmpdir' EXIT 17 | ghidra_bin=$(readlink -e $(which ghidra)) 18 | if [ ! -z "$GHIDRA_HOME" ]; then 19 | ghidra_base=$GHIDRA_HOME 20 | elif [ ! -z "$ghidra_bin" ]; then 21 | ghidra_base=$(dirname "$ghidra_bin") 22 | else 23 | echo "Could not find ghidra in path. Add 'ghidra' to path or set GHIDRA_HOME" >&2 24 | exit 1 25 | fi 26 | if [ -e "$ghidra_base/../lib/ghidra/support/analyzeHeadless" ]; then 27 | analyze_headless=$ghidra_base/../lib/ghidra/support/analyzeHeadless 28 | elif [ -e "$ghidra_base/support/analyzeHeadless" ]; then 29 | analyze_headless=$ghidra_base/support/analyzeHeadless 30 | else 31 | echo "Could not find ghidra analyzeHeadless from ghidra directory $ghidra_base" >&2 32 | exit 1 33 | fi 34 | echo "Analyze headless path: $analyze_headless" >&2 35 | 36 | mkdir -p $(readlink -f $factsdir) 37 | ./analyzeHeadlessBigMem "$@" -scriptPath $this_dir -postScript ExportPCodeForCTADL.java $(readlink -f $factsdir) 38 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/macros.dl: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // used to quote the name of the input and output dbs 4 | #define Q(x) #x 5 | #define QUOTE(x) Q(x) 6 | 7 | // .output Relation(CTADL_OUTPUT_DB_IO) 8 | #define CTADL_OUTPUT_DB_IO IO=sqlite, filename=QUOTE(CTADL_OUTPUT_DB) 9 | // .input Relation(CTADL_INPUT_DB_IO) 10 | #define CTADL_INPUT_DB_IO IO=sqlite, filename=QUOTE(CTADL_INPUT_DB) 11 | 12 | // At least x or y must be empty 13 | // "one empty" cat() 14 | #define oecat(out, x, y) \ 15 | (x = "", out = y; y = "", out = x) 16 | 17 | // Precondition: 18 | // tainted in ctx c1 19 | // edge in c2 20 | // output: tainted context ctx0 21 | // I wrote this this way because I think it will have more efficient joins. 22 | // Most of the rows will fall under the first three constraints, where we don't 23 | // have to call the functor. 24 | #define call_string_taint_constraint(ctx0, ctx1, ctx2) \ 25 | ( ctx1 = "", ctx2 = "", ctx0 = ""; \ 26 | ctx1 = "", ctx2 != "", ctx0 = ctx2; \ 27 | ctx2 = "", ctx1 != "", ctx0 = ctx1; \ 28 | ctx1 = ctx2, ctx0 = ctx1; \ 29 | ctx1 != ctx2, ctx1 != "", ctx2 != "", 1 = @CallStringUnder(ctx2, ctx1), ctx0 = ctx1) 30 | 31 | #define ContextOut(c, c1, c2, tmp) (tmp = @CheckSubstring(c1, c2), (tmp = 1, c = c1; tmp = 2, c = c2)) 32 | -------------------------------------------------------------------------------- /taint-front/flake.nix: -------------------------------------------------------------------------------- 1 | { 2 | description = "Taint frontend flake"; 3 | 4 | inputs = { 5 | flake-utils.url = "github:numtide/flake-utils"; 6 | }; 7 | 8 | outputs = { self, nixpkgs, flake-utils }: 9 | { } // 10 | flake-utils.lib.eachDefaultSystem (system: 11 | let 12 | pkgs = import nixpkgs { inherit system; }; 13 | # fbinfer = pkgs.callPackage ./fbinfer.nix { }; 14 | in 15 | { 16 | packages = { 17 | # inherit fbinfer; 18 | # inherit (mopsa) devShell ciShell; 19 | taint-front = pkgs.ocamlPackages.buildDunePackage { 20 | useDune2 = true; 21 | 22 | pname = "taint-front"; 23 | version = "0.1.0"; 24 | 25 | src = pkgs.nix-gitignore.gitignoreSource [] ./.; 26 | 27 | nativeBuildInputs = [ 28 | pkgs.ocamlPackages.menhir 29 | ]; 30 | }; 31 | default = self.packages.${system}.taint-front; 32 | }; 33 | devShell = pkgs.mkShell { 34 | inputsFrom = [ ]; 35 | packages = with pkgs.ocamlPackages; [ 36 | merlin odoc utop menhir ocamlbuild ocaml dune_2 37 | ]; 38 | # inherit (fbinfer) configureFlags makeFlags preBuild; 39 | }; 40 | }); 41 | 42 | } 43 | -------------------------------------------------------------------------------- /plugins/taint-front/src/ctadl_taint_front_fact_generator_plugin/__init__.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import importlib.metadata 3 | import importlib.resources as resources 4 | import subprocess 5 | import os 6 | import logging 7 | from pathlib import Path 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | def read_version(): 13 | filepath = str(resources.files(__name__) / "VERSION") 14 | with open(filepath) as file: 15 | for line in file: 16 | line = line.strip() 17 | return line.strip(' "') 18 | return "unknown-dev" 19 | 20 | 21 | version = read_version() 22 | language = "TAINT-FRONT" 23 | 24 | 25 | def run(ctadl, args, artifact: str, out: str, **kwargs): 26 | ctadl.status(f"ctadl_taint_front_fact_generator_plugin {version}") 27 | logger.debug("artifact: %s", artifact) 28 | logger.debug("out: %s", out) 29 | ctadl.status(f"exporting program with taintfront to '{out}'") 30 | factsdir = str(Path(out) / "facts") 31 | os.makedirs(factsdir, exist_ok=True) 32 | 33 | command = ["taintfront"] 34 | opts = [["-o", factsdir], [artifact]] 35 | for opt_list in opts: 36 | command.extend(opt_list) 37 | command.extend(kwargs.get("argument_passthrough", [])) 38 | logger.debug("taintfront command: %s", " ".join(command)) 39 | return subprocess.run(command) 40 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/pcode/vtable.dl: -------------------------------------------------------------------------------- 1 | // the constructor stores a vtable, one pattern: 2 | // tmp = PTRSUB 0, vtable 3 | // STORE spid, , tmp 4 | // 5 | // we turn this into allocs: 6 | // [0] -> func 7 | // [1] -> func 8 | // ... 9 | // for each offset in the vtable pointing to a func 10 | 11 | // instruction that gets a pointer to vtable 12 | .decl AssignVTableInstruction(i: PCodeInstruction, vtable_address: PCodeAddress) 13 | .output AssignVTableInstruction(CTADL_OUTPUT_DB_IO) 14 | 15 | AssignVTableInstruction(i, vtable_address) :- 16 | PCODE_MNEMONIC(i, "PTRSUB"), 17 | PCODE_INPUT(i, 0, zero), 18 | isConstVarnode(zero), 19 | VNODE_OFFSET_N(zero, 0), 20 | PCODE_INPUT(i, 1, v_from), 21 | VNODE_OFFSET_N(v_from, vtable_address), 22 | VTABLE(_, vtable_address, _, _). 23 | 24 | 25 | // create allocs for individual vtable functions 26 | CisAccessPath(ap1), 27 | CisAlloc(var, ap1, func) :- 28 | AssignVTableInstruction(i0, vtable), 29 | PCODE_OUTPUT(i0, dst), 30 | (MaybeCastCopy(dst_copy, dst); dst_copy = dst), 31 | PCODE_INPUT(i1, 2, dst_copy), 32 | Store(i1, var, ap0), 33 | VTABLE(_, vtable, offset, func_address), 34 | // filters invalid vtable entries 35 | HFUNC_EP(func, func_address), 36 | ap1 = cat(ap0, ".[", to_string(offset), "]"). 37 | 38 | 39 | // the indirect call is the "CALLIND" op 40 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/taint-front/index.dl: -------------------------------------------------------------------------------- 1 | #include "taint-front/taint_front_lang.dl" 2 | #include "config.dl" 3 | #ifdef CTADL_IMPORT_LANG_FACTS_FROM_DB 4 | #include "taint-front/importdb.dl" 5 | #else 6 | #include "taint-front/import.dl" 7 | #endif 8 | 9 | // --------------------------------------------------------------------------- 10 | // phase 1 11 | 12 | CTADLConfig("CTADL_ANALYSIS_LANG", "taint-front"). 13 | 14 | #ifndef CTADL_DISABLE_INDEX_PHASE 15 | 16 | #include "graph/dataflow_graph.dl" 17 | 18 | .init ctadl_taint_front_phase1 = CGraphRules 19 | 20 | #define FS_CallEdgePlan \ 21 | .plan 1: (4, 1, 2, 3, 5, 6), \ 22 | 2: (6, 5, 4, 1, 2, 3) 23 | 24 | #include "graph/slice/callee.dl" 25 | 26 | .init track_assign_func = SliceCallee 27 | track_assign_func.input_.StartVertex(v, p, func) :- 28 | VirtualAlloc(v, p, func, ""), 29 | EnableContextFreeObjectTracking(). 30 | 31 | CallEdge(s, fid, "") :- 32 | track_assign_func.isReachable(v, p, fid), 33 | IndirectCallStmt(s, v, p), 34 | EnableContextFreeObjectTracking(). 35 | 36 | #ifdef ALL_OUTPUTS 37 | .output track_assign_func.ReachableVertex(CTADL_OUTPUT_DB_IO) 38 | #endif 39 | 40 | #endif // CTADL_DISABLE_INDEX_PHASE 41 | 42 | #include 43 | #include 44 | 45 | .pragma "suppress-warnings" "CFunction_ModelAssign,track_assign_func.input_.SanitizeAssign" 46 | -------------------------------------------------------------------------------- /taint-front/Parse.mly: -------------------------------------------------------------------------------- 1 | %token KWDEF "def" 2 | %token KWGLOBAL "var" 3 | %token KWRETURN "return" 4 | %token IDENT 5 | %token COMMA "," 6 | %token LPAREN "(" 7 | %token RPAREN ")" 8 | %token LCURLY "{" 9 | %token RCURLY "}" 10 | %token LSQUARE "[" 11 | %token RSQUARE "]" 12 | %token DOT "." 13 | %token ASSIGN "=" 14 | %token SEMI ";" 15 | %token STAR "*" 16 | %token EOF 17 | 18 | %type top 19 | %{ 20 | exception Error 21 | %} 22 | %start top 23 | %% 24 | 25 | top: defs=list(def) EOF { defs } ; 26 | 27 | def: 28 | "var" global_name=IDENT ";" { 29 | Syntax.(Global { global_name }) 30 | } 31 | | "def" name=IDENT "(" formals=separated_list(",", IDENT) ")" "{" body=list(stmt) "}" { 32 | Syntax.(Fn { name; formals; body }) 33 | } 34 | ; 35 | 36 | p: 37 | | "[" aps=separated_list(",", ap) "]" { Syntax.Array aps } 38 | | "." fld=IDENT { Syntax.Field (fld, false) } 39 | | "." STAR { Syntax.Field ("", true) } 40 | ; 41 | 42 | ap: 43 | | base=IDENT path=list(p) 44 | { (base, path) } 45 | ; 46 | 47 | stmt: 48 | | lhs=ap "=" rhs=ap ";" 49 | { Syntax.Assign (lhs, rhs) } 50 | | lhs=ap "=" fn=ap "(" actuals=separated_list(",", ap) ")" ";" 51 | { Syntax.Call (Some lhs, fn, actuals) } 52 | | fn=ap "(" actuals=separated_list(",", ap) ")" ";" 53 | { Syntax.Call (None, fn, actuals) } 54 | | "return" expr=ap ";" 55 | { Syntax.Return expr } 56 | ; 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | # Configuration file for the Sphinx documentation builder. 4 | # 5 | # For the full list of built-in configuration values, see the documentation: 6 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 7 | 8 | # -- Project information ----------------------------------------------------- 9 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 10 | 11 | project = "CTADL" 12 | copyright = "2025 National Technology & Engineering Solutions of Sandia, LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. Government retains certain rights in this software" 13 | author = "Sandia National Laboratories" 14 | 15 | with open( 16 | os.path.join(os.path.dirname(__file__), "..", "src", "ctadl", "VERSION"), "r" 17 | ) as file: 18 | release = file.read().strip() 19 | 20 | # -- General configuration --------------------------------------------------- 21 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 22 | 23 | extensions = [] 24 | 25 | templates_path = ["_templates"] 26 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 27 | 28 | 29 | # -- Options for HTML output ------------------------------------------------- 30 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 31 | 32 | html_theme = "sphinxdoc" 33 | html_static_path = ["_static"] 34 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = ctadl 3 | version = attr: ctadl.__version__ 4 | description = CTADL Static Taint Analyzer 5 | long_description = file: README.md 6 | long_description_content_type = text/markdown 7 | url = https://github.com/sandialabs/ctadl 8 | project_urls = 9 | Bug Tracker = https://github.com/sandialabs/ctadl/issues 10 | classifiers = 11 | Programming Language :: Python :: 3.9 12 | Environment :: Console 13 | License :: OSI Approved :: Apache Software License 14 | Operating System :: POSIX 15 | 16 | [options] 17 | # It's important that we install as an uncompressed dir structure, not a zip 18 | # (egg). Souffle needs access to the entire datalog directory structure in order 19 | # to run. If that dir structure is in a zipfile, we all lose. 20 | zip_safe = False 21 | packages = find_namespace: 22 | package_dir = 23 | = src 24 | include_package_data = True 25 | python_requires = >=3.9 26 | install_requires = 27 | json5 28 | scripts = 29 | bin/ctadl 30 | bin/dctadl 31 | 32 | [options.packages.find] 33 | where = src 34 | 35 | [options.extras_require] 36 | jadx-fact-generator = 37 | ctadl-jadx-fact-generator-plugin>=0.6.2 38 | 39 | ghidra-fact-generator = 40 | ctadl-ghidra-fact-generator-plugin>=0.0.2 41 | 42 | taint-front-fact-generator = 43 | ctadl-taint-front-fact-generator-plugin>=0.0.2 44 | 45 | networkx-exporter = 46 | ctadl-networkx-export-plugin>=0.0.1 47 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/information-flow/declarations.dl: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ctadl_ir_types.dl" 4 | 5 | // --------------------------------------------------------------------------- 6 | // inputs 7 | 8 | // tags a vertex as a source 9 | .decl TaintSourceVertex(tag: SliceLabelType, v: CVar, p: CAccessPath) 10 | // report a leak if taint reaches the vertex 11 | .decl LeakingSinkVertex(tag: SliceLabelType, v: CVar, p: CAccessPath) 12 | // if v.p tainted with tag, don't let data flow out 13 | .decl TaintSanitizeVertex(tag: SliceLabelType, v: CVar, p: CAccessPath) 14 | // if v2.p2 is tainted with tag, don't let data flow on edge 15 | .decl TaintSanitizeEdge(tag: SliceLabelType, v1: CVar, p1: CAccessPath, v2: CVar, p2: CAccessPath) 16 | 17 | // prefer to use AllEdge and AllVertex as it gives the most consistent results for users 18 | 19 | // if v2.p2 is tainted, don't let data flow on edge 20 | .decl TaintSanitizeAllEdge(v1: CVar, p1: CAccessPath, v2: CVar, p2: CAccessPath) 21 | // if v.p tainted, don't let any data flows out 22 | .decl TaintSanitizeAllVertex(v: CVar, p: CAccessPath) 23 | 24 | // --------------------------------------------------------------------------- 25 | // outputs 26 | 27 | // primary outputs are "forward_flow.ReachableVertex", 28 | // "forward_flow.ReachableEdge" and the backward_flow version of each 29 | 30 | // call to unmodeled function with tainted argument 31 | .decl isTaintedArgUnmodeled(tag: SliceLabelType, function: CFunction, index: number, ap: CAccessPath, direction: symbol) 32 | -------------------------------------------------------------------------------- /nix/souffle/souffle-2.4.patch: -------------------------------------------------------------------------------- 1 | diff --git a/CMakeLists.txt b/CMakeLists.txt 2 | index 97502f2f0..b0d115c2d 100644 3 | --- a/CMakeLists.txt 4 | +++ b/CMakeLists.txt 5 | @@ -106,13 +106,6 @@ option(SOUFFLE_CUSTOM_GETOPTLONG "Enable/Disable custom getopt_long implementati 6 | cmake_dependent_option(SOUFFLE_USE_LIBCPP "Link to libc++ instead of libstdc++" ON 7 | "CMAKE_CXX_COMPILER_ID STREQUAL Clang" OFF) 8 | 9 | -# Using Clang? Likely want to use `lld` too. 10 | -if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") 11 | - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=lld") 12 | - set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=lld") 13 | - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=lld") 14 | -endif() 15 | - 16 | # Add aditional modules to CMake 17 | set(CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) 18 | 19 | diff --git a/src/MainDriver.cpp b/src/MainDriver.cpp 20 | index bb8a1e51b..24bc10f92 100644 21 | --- a/src/MainDriver.cpp 22 | +++ b/src/MainDriver.cpp 23 | @@ -213,9 +213,9 @@ void compileToBinary( 24 | argv.push_back(binary.string()); 25 | 26 | #if defined(_MSC_VER) 27 | - const char* interpreter = "python"; 28 | + const char* interpreter = "bash"; 29 | #else 30 | - const char* interpreter = "python3"; 31 | + const char* interpreter = "bash"; 32 | #endif 33 | auto exit = execute(interpreter, argv); 34 | if (!exit) throw std::invalid_argument(tfm::format("unable to execute tool ", command)); 35 | -------------------------------------------------------------------------------- /nix/souffle/souffle2.0.2.nix: -------------------------------------------------------------------------------- 1 | { stdenv, callPackage, lib, fetchFromGitHub 2 | , autoreconfHook269, cmake 3 | , openmp 4 | , enable64BitDomain ? false 5 | , enableDebug ? false 6 | }: 7 | 8 | 9 | let 10 | souffle = callPackage ./common.nix rec { 11 | version = "2.0.2"; 12 | src = fetchFromGitHub { 13 | owner = "souffle-lang"; 14 | repo = "souffle"; 15 | rev = "${version}"; 16 | sha256 = "1fa6yssgndrln8qbbw2j7j199glxp63irfrz1c2y424rq82mm2r5"; 17 | }; 18 | inherit openmp enableDebug enable64BitDomain; 19 | }; 20 | in 21 | souffle.overrideAttrs (attrs: rec { 22 | 23 | # patches = [ ./git-and-completion.patch ./remove-cpack.patch ./remove-index-warning.patch ]; 24 | 25 | nativeBuildInputs = (lib.remove cmake attrs.nativeBuildInputs) ++ [ autoreconfHook269 ]; 26 | configureFlags = [ "--enable-openmp" ] 27 | ++ lib.optional enable64BitDomain "--enable-64bit-domain"; 28 | 29 | # see 565a8e73e80a1bedbb6cc037209c39d631fc393f and parent commits upstream for 30 | # Wno-error fixes 31 | patchPhase = '' 32 | substituteInPlace ./src/Makefile.am \ 33 | --replace '-Werror' '-Werror -Wno-error=deprecated -Wno-error=other' 34 | 35 | substituteInPlace configure.ac \ 36 | --replace "souffle_version=$(git describe --tags --always)" "souffle_version=${attrs.version}" 37 | 38 | substituteInPlace ./src/souffle-config.in \ 39 | --replace 'CXX=@CXX@' 'CXX="@CXX@"' 40 | ''; 41 | 42 | NIX_CFLAGS_COMPILE = 43 | " -Wno-format-security"; 44 | 45 | postFixup = '' 46 | wrapProgram "$out/bin/souffle-compile" --prefix CXXFLAGS " " "$souffleCompileIncludes $souffleCompileCxxFlags" --prefix LDFLAGS " " "$souffleCompileLdFlags" 47 | ''; 48 | }) 49 | 50 | -------------------------------------------------------------------------------- /docs/intro.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | CTADL is a static taint analysis tool. 5 | 6 | CTADL (pronounced “citadel”) takes as input a program, known as a system 7 | under test (SUT), and allows you to perform taint analysis across 8 | procedures. Taint analysis discovers data flow paths in the SUT between 9 | user-designated sources and sinks. CTADL — which stands for 10 | Compositional Taint Analysis in DataLog — is customizable, performant, 11 | and uses simple heuristics. CTADL supports the languages: 12 | 13 | - Java and Android using JADX, 14 | - Pcode from Ghidra, and 15 | - taint-front, a custom language for hand-writing taint analysis 16 | examples. 17 | 18 | Its primary output format is 19 | `SARIF `__, a results interchange 20 | format that enables VSCode visualization of taint analysis results. 21 | 22 | 23 | Support 24 | ------- 25 | 26 | - File an issue: https://github.com/sandialabs/ctadl/issues 27 | - Ask a question: https://github.com/sandialabs/ctadl/discussions 28 | 29 | Known issues 30 | ------------ 31 | 32 | - If the analyzer you compiled mysteriously crashes, it may be because 33 | the C++ compiler has been updated since the last time Souffle was 34 | installed. If you update the compiler, then souffle needs to be 35 | updated and all our analyses need to be recompiled. After you rebuild 36 | and reinstall Souffle, remove the 37 | ``$XDG_CONFIG_DIR/share/ctadl/analysis`` directory. On Windows the 38 | share directory is instead under ``%APPDATA%``. 39 | 40 | Copyright 41 | --------- 42 | 43 | Copyright 2025 National Technology & Engineering Solutions of Sandia, 44 | LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the 45 | U.S. Government retains certain rights in this software. 46 | -------------------------------------------------------------------------------- /nix/souffle/remove-index-warning.patch: -------------------------------------------------------------------------------- 1 | diff --git a/src/interpreter/Engine.cpp b/src/interpreter/Engine.cpp 2 | index 39c06afbc..02cfb562c 100644 3 | --- a/src/interpreter/Engine.cpp 4 | +++ b/src/interpreter/Engine.cpp 5 | @@ -605,8 +605,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { 6 | try { 7 | sub_str = str.substr(idx, len); 8 | } catch (...) { 9 | - std::cerr << "warning: wrong index position provided by substr(\""; 10 | - std::cerr << str << "\"," << (int32_t)idx << "," << (int32_t)len << ") functor.\n"; 11 | + // std::cerr << "warning: wrong index position provided by substr(\""; 12 | + // std::cerr << str << "\"," << (int32_t)idx << "," << (int32_t)len << ") functor.\n"; 13 | } 14 | return getSymbolTable().encode(sub_str); 15 | } 16 | diff --git a/src/synthesiser/Synthesiser.cpp b/src/synthesiser/Synthesiser.cpp 17 | index 19c27c2f6..1d618da5e 100644 18 | --- a/src/synthesiser/Synthesiser.cpp 19 | +++ b/src/synthesiser/Synthesiser.cpp 20 | @@ -2422,9 +2422,9 @@ void Synthesiser::generateCode(std::ostream& os, const std::string& id, bool& wi 21 | "len) {\n"; 22 | os << " std::string result; \n"; 23 | os << " try { result = str.substr(idx,len); } catch(...) { \n"; 24 | - os << " std::cerr << \"warning: wrong index position provided by substr(\\\"\";\n"; 25 | - os << " std::cerr << str << \"\\\",\" << (int32_t)idx << \",\" << (int32_t)len << \") " 26 | - "functor.\\n\";\n"; 27 | + // os << " std::cerr << \"warning: wrong index position provided by substr(\\\"\";\n"; 28 | + // os << " std::cerr << str << \"\\\",\" << (int32_t)idx << \",\" << (int32_t)len << \") " 29 | + // "functor.\\n\";\n"; 30 | os << " } return result;\n"; 31 | os << "}\n"; 32 | 33 | -------------------------------------------------------------------------------- /nix/souffle/souffle2.3-warning.patch: -------------------------------------------------------------------------------- 1 | diff --git a/src/interpreter/Engine.cpp b/src/interpreter/Engine.cpp 2 | index 2396c4090..c824feac3 100644 3 | --- a/src/interpreter/Engine.cpp 4 | +++ b/src/interpreter/Engine.cpp 5 | @@ -744,8 +744,8 @@ RamDomain Engine::execute(const Node* node, Context& ctxt) { 6 | try { 7 | sub_str = str.substr(idx, len); 8 | } catch (...) { 9 | - std::cerr << "warning: wrong index position provided by substr(\""; 10 | - std::cerr << str << "\"," << (int32_t)idx << "," << (int32_t)len << ") functor.\n"; 11 | + // std::cerr << "warning: wrong index position provided by substr(\""; 12 | + // std::cerr << str << "\"," << (int32_t)idx << "," << (int32_t)len << ") functor.\n"; 13 | } 14 | return getSymbolTable().encode(sub_str); 15 | } 16 | diff --git a/src/synthesiser/Synthesiser.cpp b/src/synthesiser/Synthesiser.cpp 17 | index 563066db1..8a3a16298 100644 18 | --- a/src/synthesiser/Synthesiser.cpp 19 | +++ b/src/synthesiser/Synthesiser.cpp 20 | @@ -2709,9 +2709,9 @@ void Synthesiser::generateCode(std::ostream& sos, const std::string& id, bool& w 21 | "len) {\n"; 22 | os << " std::string result; \n"; 23 | os << " try { result = str.substr(idx,len); } catch(...) { \n"; 24 | - os << " std::cerr << \"warning: wrong index position provided by substr(\\\"\";\n"; 25 | - os << " std::cerr << str << \"\\\",\" << (int32_t)idx << \",\" << (int32_t)len << \") " 26 | - "functor.\\n\";\n"; 27 | + // os << " std::cerr << \"warning: wrong index position provided by substr(\\\"\";\n"; 28 | + // os << " std::cerr << str << \"\\\",\" << (int32_t)idx << \",\" << (int32_t)len << \") " 29 | + // "functor.\\n\";\n"; 30 | os << " } return result;\n"; 31 | os << "}\n"; 32 | 33 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/jadx/importdb.dl: -------------------------------------------------------------------------------- 1 | #pragma once 2 | .input _CTADLLanguage(CTADL_INPUT_DB_IO) 3 | .input _ActualParam(CTADL_INPUT_DB_IO) 4 | .input _FormalParam(CTADL_INPUT_DB_IO) 5 | .input _ThisParam(CTADL_INPUT_DB_IO) 6 | .input _ReturnStmt(CTADL_INPUT_DB_IO) 7 | .input _Method(CTADL_INPUT_DB_IO) 8 | .input _MethodInvocation(CTADL_INPUT_DB_IO) 9 | .input _MethodInvocationReturn(CTADL_INPUT_DB_IO) 10 | .input _ExternalMethod(CTADL_INPUT_DB_IO) 11 | .input _Move(CTADL_INPUT_DB_IO) 12 | .input _StmtInMethod(CTADL_INPUT_DB_IO) 13 | .input _StmtSourceLine(CTADL_INPUT_DB_IO) 14 | .input _ClassFileName(CTADL_INPUT_DB_IO) 15 | .input _VarInMethod(CTADL_INPUT_DB_IO) 16 | .input _VarHasType(CTADL_INPUT_DB_IO) 17 | .input _VarHasInternalName(CTADL_INPUT_DB_IO) 18 | .input _VarHasName(CTADL_INPUT_DB_IO) 19 | .input _StaticGet(CTADL_INPUT_DB_IO) 20 | .input _StaticPut(CTADL_INPUT_DB_IO) 21 | .input _StaticExternalField(CTADL_INPUT_DB_IO) 22 | .input _StmtInBasicblock(CTADL_INPUT_DB_IO) 23 | .input _IfStmt(CTADL_INPUT_DB_IO) 24 | .input _TernaryStmt(CTADL_INPUT_DB_IO) 25 | .input _TernaryArg(CTADL_INPUT_DB_IO) 26 | .input _SwitchStmt(CTADL_INPUT_DB_IO) 27 | .input _SwitchTarget(CTADL_INPUT_DB_IO) 28 | .input _TypeInstance(CTADL_INPUT_DB_IO) 29 | .input _AGet(CTADL_INPUT_DB_IO) 30 | .input _APut(CTADL_INPUT_DB_IO) 31 | .input _PhiAssign(CTADL_INPUT_DB_IO) 32 | .input _VarIsConst(CTADL_INPUT_DB_IO) 33 | .input _FieldIsFinal(CTADL_INPUT_DB_IO) 34 | .input _FieldConstInit(CTADL_INPUT_DB_IO) 35 | .input _DirectSuperclass(CTADL_INPUT_DB_IO) 36 | .input _SuperInterface(CTADL_INPUT_DB_IO) 37 | .input _ClassHasName(CTADL_INPUT_DB_IO) 38 | .input _ClassDefinedIn(CTADL_INPUT_DB_IO) 39 | .input _InterfaceType(CTADL_INPUT_DB_IO) 40 | .input _MethodImplemented(CTADL_INPUT_DB_IO) 41 | .input _ManifestRoot(CTADL_INPUT_DB_IO) 42 | .input _ManifestNode(CTADL_INPUT_DB_IO) 43 | .input _ManifestNodeChild(CTADL_INPUT_DB_IO) 44 | .input _ManifestNodeAttr(CTADL_INPUT_DB_IO) 45 | .input _TopParentClass(CTADL_INPUT_DB_IO) 46 | -------------------------------------------------------------------------------- /.release_scripts/make_images: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | shopt -s expand_aliases 5 | 6 | alias crane="nix shell --inputs-from . nixpkgs#crane -c crane" 7 | alias twine="nix shell --inputs-from . nixpkgs#twine -c twine" 8 | 9 | # Uploads docker image to container registry 10 | gzip -d --stdout --force < "$(nix build -L --print-out-paths --no-link .#docker)" > image.tar 11 | crane auth login -u "$CI_DEPLOY_USER" --password-stdin "$CI_REGISTRY" <<<"$CI_DEPLOY_PASSWORD" 12 | crane push image.tar "$CI_REGISTRY_IMAGE:${CI_COMMIT_TAG//+/_}" 13 | rm -f image.tar 14 | 15 | release_version="$CI_COMMIT_TAG" 16 | release_tag="$CI_COMMIT_TAG" 17 | releases_url="$CI_API_V4_URL/projects/$CI_PROJECT_ID/releases" 18 | 19 | read -d '' -r release_template <<'EOF' || : 20 | { 21 | "name": ("ctadl " + $version), 22 | "tag_name": $tag, 23 | "assets": {} 24 | } 25 | EOF 26 | 27 | # Uploads release 28 | jq --null-input \ 29 | --arg version "$release_version" \ 30 | --arg tag "$release_tag" \ 31 | "$release_template" \ 32 | | curl --oauth2-bearer "$CI_JOB_TOKEN" --include --fail-with-body \ 33 | --header 'Content-Type: application/json' --request POST --data '@-' \ 34 | "$releases_url" 35 | 36 | # Uploads python packages to package registry 37 | for pkg in "ctadl" "ctadl-plugins.taintfront" "ctadl-plugins.jadx" "ctadl-plugins.ghidra" "ctadl-plugins.networkxExport"; do 38 | TWINE_USERNAME=gitlab-ci-token TWINE_PASSWORD="${CI_JOB_TOKEN}" twine upload \ 39 | --verbose --repository-url "${CI_API_V4_URL}"/projects/"${CI_PROJECT_ID}"/packages/pypi \ 40 | --skip-existing \ 41 | "$(nix build -L .#ctadlPackages."${pkg}"^whl --no-link --print-out-paths)"/*.whl 42 | done 43 | 44 | 45 | #gzip -d --stdout --force < "$(nix build -L --print-out-paths --no-link .#singularity)" > image.tar 46 | #curl --header "JOB-TOKEN: $CI_JOB_TOKEN" --upload-file "image.tar" "${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/generic/ctadl/$CI_COMMIT_TAG/COYOTE-CTADL-$CI_COMMIT_TAG-SINGULARITY.tar" 47 | -------------------------------------------------------------------------------- /nix/souffle/souffle2.4.nix: -------------------------------------------------------------------------------- 1 | { gccStdenv, lib, callPackage, fetchFromGitHub 2 | , python3 3 | , openmp ? null 4 | , enableOpenMP ? true 5 | , enable64BitDomain ? false 6 | , enableDebug ? false 7 | , enableSanitizeMemory ? false 8 | }: 9 | 10 | 11 | let 12 | souffle = callPackage ./common.nix rec { 13 | stdenv = gccStdenv; 14 | version = "2.4"; 15 | src = fetchFromGitHub { 16 | owner = "dbueno"; 17 | repo = "souffle"; 18 | rev = "v${version}-fix-sqlite"; 19 | sha256 = "sha256-BDAwwOCntnV3MJ6ON8zy+x9/toWYx4VQJX+BHi/YGAI="; 20 | }; 21 | inherit openmp enableOpenMP enableDebug enable64BitDomain python3; 22 | }; 23 | toolsPath = lib.makeBinPath [ gccStdenv.cc ]; 24 | in 25 | souffle.overrideAttrs (attrs: rec { 26 | 27 | patches = [ ./souffle-2.4.patch ]; 28 | # ./souffle-2.3-sources.patch ]; 29 | 30 | cmakeFlags = [ "-DSOUFFLE_GIT=0" "-DPACKAGE_VERSION=${attrs.version}" "-DVERBOSE=1" ] 31 | ++ lib.optional enableSanitizeMemory "-DSOUFFLE_SANITISE_MEMORY=ON" 32 | ++ attrs.cmakeFlags; 33 | 34 | postInstall = '' 35 | sed 's#"includes": "#"includes": "-I'$out'/include ${attrs.souffleCompileIncludes} #' -i $out/bin/souffle-compile.py 36 | sed 's#"cxx_flags": "#"cxx_flags": " ${attrs.souffleCompileCxxFlags} #' -i $out/bin/souffle-compile.py 37 | sed 's#"link_options": "#"link_options": "${attrs.souffleCompileLdFlags} #' -i $out/bin/souffle-compile.py 38 | ${attrs.postInstallWrap} 39 | wrapProgram "$out/bin/souffle-compile.py" \ 40 | --prefix PATH : "${toolsPath}" 41 | ''; 42 | 43 | postFixup = '' 44 | sed 's#"includes": "#"includes": "${attrs.souffleCompileIncludes} #' -i $out/bin/souffle-compile.py 45 | sed "s#\"includes\": \"#\"includes\": \"-I$out/include #" -i $out/bin/souffle-compile.py 46 | sed 's#"cxx_flags": "#"cxx_flags": " ${attrs.souffleCompileCxxFlags} #' -i $out/bin/souffle-compile.py 47 | sed 's#"link_options": "#"link_options": "${attrs.souffleCompileLdFlags} #' -i $out/bin/souffle-compile.py 48 | wrapPythonPrograms 49 | ''; 50 | }) 51 | -------------------------------------------------------------------------------- /docs/analysis.rst: -------------------------------------------------------------------------------- 1 | Analysis Internals 2 | ================== 3 | 4 | This page documents the basics of how CTADL's analysis works 5 | internally but glosses over implementation details. 6 | 7 | Global Variables 8 | ------------------------- 9 | 10 | Introduction 11 | ^^^^^^^^^^^^ 12 | 13 | The CTADL IR language supports global variables. They are used to model 14 | variables like ``public static`` variables in Java. Unlike tidy 15 | intraprocedural data flow, globals can flow data from one function to 16 | many others. A typical example is below, where ``a`` flows to ``b`` in 17 | ``main`` because of a global variable ``g``. 18 | 19 | :: 20 | 21 | var g; 22 | main() { 23 | WriteGlobal(a); 24 | b = ReadGlobal(); 25 | } 26 | WriteGlobal(p) { 27 | g = p; 28 | } 29 | ReadGlobal() { 30 | return g; 31 | } 32 | 33 | CTADL uses a threading strategy to model globals. 34 | 35 | Compositional strategy: Threading globals as parameters 36 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 37 | 38 | One way to handle global variables compositionally is by threading them 39 | through each function as a parameter. This way they are handled like 40 | local variables, they can occur in function summaries, be instantiated, 41 | etc. Below is the same program as above, but transformed by removing and 42 | threading the global variable: 43 | 44 | :: 45 | 46 | // var g; is removed 47 | main(globals) { 48 | F(a, globals); 49 | b = H(globals); 50 | } 51 | WriteGlobal(p, globals) { 52 | globals.g = p; 53 | } 54 | ReadGlobal(, globals) { 55 | return globals.g; 56 | } 57 | 58 | An in-out parameter ``globals`` is added to each function, each global 59 | access is translated into a corresponding access of the ``globals`` 60 | parameter, and each call site threads the parameter. For this 61 | instrumentation, one more thing needs to be done, which is not shown: 62 | the set of access paths is augmented with paths for each global. The 63 | path added for this example is ``.g``. 64 | -------------------------------------------------------------------------------- /plugins/networkx-export/src/ctadl_networkx_export_plugin/__init__.py: -------------------------------------------------------------------------------- 1 | import networkx as nx 2 | import importlib.metadata 3 | import importlib.resources as resources 4 | import logging 5 | from pathlib import Path 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | def read_version(): 11 | filepath = str(resources.files(__name__) / "VERSION") 12 | with open(filepath) as file: 13 | for line in file: 14 | line = line.strip() 15 | return line.strip(' "') 16 | return "unknown-dev" 17 | 18 | 19 | version = read_version() 20 | export_formats: list[str] = ["gml"] 21 | 22 | 23 | def run(ctadl, args, format: str, index: Path, out: str, **kwargs) -> int: 24 | import ctadl.vis.model as model 25 | from ctadl.util.functions import writer 26 | 27 | ctadl.status(f"ctadl_networkx_export_plugin {version}") 28 | logger.debug("format is '%s'", format) 29 | logger.debug("index is '%s'", index) 30 | logger.debug("out is '%s'", out) 31 | 32 | g = nx.DiGraph() 33 | counter = 0 34 | ids = dict() 35 | 36 | def get_id(t): 37 | nonlocal counter 38 | if t not in ids: 39 | id = counter 40 | ids[t] = id 41 | counter += 1 42 | return ids[t] 43 | 44 | with model.DB(index) as con: 45 | cur = model.execute( 46 | con, 47 | """ 48 | SELECT * from CVar_Type 49 | """, 50 | ) 51 | for row in cur: 52 | u = get_id((row["var"], row["path"])) 53 | g.add_node(u, variable=row["var"], access=row["path"], type=row["type"]) 54 | cur = model.execute( 55 | con, 56 | """ 57 | SELECT * FROM VirtualAssign 58 | """, 59 | ) 60 | for row in cur: 61 | u = get_id((row["v2"], row["p2"])) 62 | v = get_id((row["v1"], row["p1"])) 63 | g.add_edge(u, v, ctx=row["ctx"]) 64 | if format == "gml": 65 | nx.write_gml(g, out) 66 | return 0 67 | return 1 68 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/graph_schema.dl: -------------------------------------------------------------------------------- 1 | // depends on ctadl_ir_types 2 | 3 | // Our dataflow analysis is compositional and summary-based. 4 | // Graphs for each function are defined on Vertex and VirtualAssign. 5 | 6 | // A vertex in the graph. 7 | .decl Vertex(var: CVar, path: CAccessPath) 8 | 9 | // A possibly-derived data flow edge between two verices. Each corresponds to 10 | // some instruction from the program. The reason is there to aid debugging. The 11 | // ctx is the context of the flow; if the flow only happens due to some chain 12 | // of function calls, that is recorded in the context. 13 | // VirtualAssign contains all data flow edges implied by field propagation 14 | // starting with CInsn_Move edges. 15 | .decl VirtualAssign( 16 | insn: CInsn, 17 | v1: CVar, p1: CAccessPath, v2: CVar, p2: CAccessPath, 18 | reason: symbol, 19 | ctx: symbol 20 | ) 21 | #ifndef CTADL_DISABLE_CHOICE 22 | choice-domain (v1, p1, v2, p2, ctx) 23 | #endif 24 | 25 | // Derived call edge. 26 | .decl CallEdge(insn: CFunctionInvocation, function: CFunction, ctx: symbol) 27 | 28 | // A generic data flow summary between parameters of functions. method m2 flows 29 | // parameter n2.p2 to method m1, parameter n1.p1 summaries are general: one 30 | // method parameter (+ ap) may flow to a distinct method and parameter (+ ap). 31 | // the flow between methods occurs due to globals. 32 | .decl SummaryFlow( 33 | m1: CFunction, n1: number, p1: CAccessPath, 34 | m2: CFunction, n2: number, p2: CAccessPath, 35 | ctx: symbol 36 | ) 37 | 38 | // VirtualAlloc contains all derived vertices that point to obj beginning with 39 | // CisAlloc. 40 | .decl VirtualAlloc(to: CVar, to_path: CAccessPath, obj: symbol, ctx: symbol) 41 | 42 | // An allocation summary. Function f produces object in formal n.p. 43 | .decl SummaryAlloc( 44 | f: CFunction, n: number, p: CAccessPath, obj: symbol, ctx: symbol 45 | ) 46 | 47 | // In the given context, the object flows to the virtual base of the 48 | // indirect_call argument 49 | .decl IndirectCallResolvent(ctx: symbol, indirect_call: CInsn, obj: symbol) 50 | -------------------------------------------------------------------------------- /nix/souffle/souffle2.3.nix: -------------------------------------------------------------------------------- 1 | { stdenv, lib, callPackage, fetchFromGitHub 2 | , bash 3 | , python3 4 | , openmp ? null 5 | , enableOpenMP ? true 6 | , enable64BitDomain ? false 7 | , enableDebug ? false 8 | , enableSanitizeMemory ? false 9 | }: 10 | 11 | 12 | let 13 | souffle = callPackage ./common.nix rec { 14 | inherit stdenv; 15 | version = "2.3"; 16 | src = fetchFromGitHub { 17 | owner = "dbueno"; 18 | repo = "souffle"; 19 | rev = "v${version}-fix-sqlite"; 20 | sha256 = "sha256-d8Nsd0501L4xsl6L15D0OLxJUylcodLpqb2TykSkczA="; 21 | }; 22 | inherit openmp enableOpenMP enableDebug enable64BitDomain python3; 23 | }; 24 | toolsPath = lib.makeBinPath [ stdenv.cc ]; 25 | in 26 | souffle.overrideAttrs (attrs: rec { 27 | 28 | patches = [ ./remove-lld.patch ]; 29 | 30 | preConfigure = '' 31 | substituteInPlace src/main.cpp \ 32 | --replace 'const char* interpreter = "python3";' 'const char* interpreter = "${bash}/bin/bash";' 33 | substituteInPlace src/main.cpp \ 34 | --replace 'bool endInput() {' 'bool endInput() override {' 35 | 36 | substituteInPlace src/CMakeLists.txt \ 37 | --replace '\"source_include_dir\": \"''${CMAKE_CURRENT_SOURCE_DIR}/include\"' \ 38 | '\"source_include_dir\": \"''${CMAKE_INSTALL_PREFIX}/include\"' 39 | 40 | ''; 41 | 42 | cmakeFlags = [ "-DSOUFFLE_GIT=0" "-DPACKAGE_VERSION=${attrs.version}" ] 43 | ++ lib.optional enableSanitizeMemory "-DSOUFFLE_SANITISE_MEMORY=ON" 44 | ++ attrs.cmakeFlags; 45 | 46 | postInstall = '' 47 | sed 's#"includes": "#"includes": "-I'$out'/include ${attrs.souffleCompileIncludes} #' -i $out/bin/souffle-compile.py 48 | sed 's#"cxx_flags": "#"cxx_flags": " ${attrs.souffleCompileCxxFlags} #' -i $out/bin/souffle-compile.py 49 | sed 's#"link_options": "#"link_options": "${attrs.souffleCompileLdFlags} #' -i $out/bin/souffle-compile.py 50 | ${attrs.postInstallWrap} 51 | wrapProgram "$out/bin/souffle-compile.py" \ 52 | --prefix PATH : "${toolsPath}" 53 | ''; 54 | 55 | postFixup = '' 56 | wrapPythonPrograms 57 | ''; 58 | }) 59 | -------------------------------------------------------------------------------- /plugins/ghidra/src/ctadl_ghidra_fact_generator_plugin/__init__.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import importlib.metadata 3 | import importlib.resources as resources 4 | import subprocess 5 | import os 6 | import logging 7 | from pathlib import Path 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | def read_version(): 13 | filepath = str(resources.files(__name__) / "VERSION") 14 | with open(filepath) as file: 15 | for line in file: 16 | line = line.strip() 17 | return line.strip(' "') 18 | return "unknown-dev" 19 | 20 | 21 | version = read_version() 22 | language = "PCODE" 23 | 24 | 25 | def run(ctadl, args, artifact: str, out: str, **kwargs): 26 | ctadl.status(f"ctadl_ghidra_fact_generator_plugin {version}") 27 | logger.debug("artifact: %s", artifact) 28 | logger.debug("out: %s", out) 29 | 30 | pcode_files = resources.files(ctadl) / "souffle-logic/pcode" 31 | ctadl.status(f"exporting program with Ghidra to '{out}'") 32 | factsdir = str(Path(out) / "facts") 33 | os.makedirs(factsdir, exist_ok=True) 34 | with contextlib.ExitStack() as ctx: 35 | analyzer = str( 36 | ctx.enter_context( 37 | resources.as_file(pcode_files / "analyzeHeadlessBigMem") 38 | ).resolve() 39 | ) 40 | command = [analyzer] 41 | project_path = args.tmpdir + "/ghidra_headless" 42 | os.makedirs(project_path) 43 | project = "headless" 44 | opts = [ 45 | [project_path], 46 | [project], 47 | ["-import", artifact, "-deleteProject"], 48 | ["-postScript", "ExportPCodeForCTADL.java", factsdir], 49 | [ 50 | "-scriptPath", 51 | str(ctx.enter_context(resources.as_file(pcode_files)).resolve()), 52 | ], 53 | ] 54 | for opt_list in opts: 55 | command.extend(opt_list) 56 | command.extend(kwargs.get("argument_passthrough", [])) 57 | logger.debug("analyzeHeadless command: %s", " ".join(command)) 58 | return subprocess.run(command) 59 | -------------------------------------------------------------------------------- /src/ctadl/vis/types.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from enum import Enum 3 | from typing import Literal, NamedTuple 4 | 5 | from ctadl.util import OrderedSet 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class VertexTy(NamedTuple): 11 | var: str 12 | path: str # Access path 13 | 14 | 15 | VertexId = int 16 | InsnIdStr = str 17 | VarIdStr = str 18 | FuncIdStr = str 19 | LabelSet = OrderedSet 20 | InsnKind = Literal["move", "actual-to-formal", "formal-to-actual"] 21 | 22 | 23 | class IllegalStackStateTransition(Exception): 24 | def __init__(self, reason): 25 | self.reason = reason 26 | 27 | def __str__(self): 28 | return "IllegalStackStateTransition: " + self.reason 29 | 30 | 31 | class CtadlModelError(Exception): 32 | def __init__(self, reason): 33 | self.reason = reason 34 | 35 | def __str__(self): 36 | return "CtadlModelError: " + self.reason 37 | 38 | 39 | class StackState(Enum): 40 | """State of the stack in the taint traversal""" 41 | 42 | free = 1 43 | restricted = 2 44 | 45 | def do_call(self): 46 | if self == StackState.free: 47 | return StackState.restricted 48 | elif self == StackState.restricted: 49 | return StackState.restricted 50 | 51 | def do_return(self): 52 | if self == StackState.free: 53 | return StackState.free 54 | elif self == StackState.restricted: 55 | raise IllegalStackStateTransition("return in restricted state") 56 | 57 | 58 | class SliceDirection(Enum): 59 | forward = "f" 60 | backward = "b" 61 | 62 | def is_forward(self): 63 | return self is SliceDirection.forward 64 | 65 | @staticmethod 66 | def from_str(s: str) -> "SliceDirection": 67 | if s not in ["forward", "backward", "fwd", "bwd", "f", "b"]: 68 | raise ValueError(f"SliceDirection from invalid string: '{s}'") 69 | if s.startswith("f"): 70 | return SliceDirection.forward 71 | if s.startswith("b"): 72 | return SliceDirection.backward 73 | raise ValueError("Should be impossible") 74 | -------------------------------------------------------------------------------- /nix/souffle/souffle2.4.1.nix: -------------------------------------------------------------------------------- 1 | { gccStdenv, lib, callPackage, fetchFromGitHub 2 | , python3 3 | , openmp ? null 4 | , enableOpenMP ? true 5 | , enable64BitDomain ? false 6 | , enableDebug ? false 7 | , enableSanitizeMemory ? false 8 | }: 9 | 10 | 11 | let 12 | souffle = callPackage ./common.nix rec { 13 | stdenv = gccStdenv; 14 | version = "2.4.1"; 15 | src = fetchFromGitHub { 16 | #owner = "dbueno"; 17 | #repo = "souffle"; 18 | owner = "souffle-lang"; 19 | repo = "souffle"; 20 | "rev" = "01f11777b4b09329b8232466d82376e039ac1ba8"; 21 | "hash" = "sha256-U3/1iNOLFzuXiBsVDAc5AXnK4F982Uifp18jjFNUv2o="; 22 | }; 23 | inherit openmp enableOpenMP enableDebug enable64BitDomain python3; 24 | }; 25 | toolsPath = lib.makeBinPath [ gccStdenv.cc ]; 26 | in 27 | souffle.overrideAttrs (attrs: rec { 28 | 29 | patches = [ ./souffle-2.4.patch ]; 30 | # ./souffle-2.3-sources.patch ]; 31 | 32 | cmakeFlags = [ "-DSOUFFLE_GIT=0" "-DPACKAGE_VERSION=${attrs.version}" "-DVERBOSE=1" ] 33 | ++ lib.optional enableSanitizeMemory "-DSOUFFLE_SANITISE_MEMORY=ON" 34 | ++ attrs.cmakeFlags; 35 | 36 | postInstall = '' 37 | sed 's#"includes": "#"includes": "-I'$out'/include ${attrs.souffleCompileIncludes} #' -i $out/bin/souffle-compile.py 38 | sed 's#"cxx_flags": "#"cxx_flags": " ${attrs.souffleCompileCxxFlags} #' -i $out/bin/souffle-compile.py 39 | sed 's#"link_options": "#"link_options": "${attrs.souffleCompileLdFlags} #' -i $out/bin/souffle-compile.py 40 | ${attrs.postInstallWrap} 41 | wrapProgram "$out/bin/souffle-compile.py" \ 42 | --prefix PATH : "${toolsPath}" 43 | ''; 44 | 45 | postFixup = '' 46 | sed 's#"includes": "#"includes": "${attrs.souffleCompileIncludes} #' -i $out/bin/souffle-compile.py 47 | sed "s#\"includes\": \"#\"includes\": \"-I$out/include #" -i $out/bin/souffle-compile.py 48 | sed 's#"cxx_flags": "#"cxx_flags": " ${attrs.souffleCompileCxxFlags} #' -i $out/bin/souffle-compile.py 49 | sed 's#"link_options": "#"link_options": "${attrs.souffleCompileLdFlags} #' -i $out/bin/souffle-compile.py 50 | wrapPythonPrograms 51 | ''; 52 | }) 53 | -------------------------------------------------------------------------------- /nix/souffle/soufflegit.nix: -------------------------------------------------------------------------------- 1 | { gccStdenv, lib, callPackage, fetchFromGitHub 2 | , python3 3 | , openmp ? null 4 | , enableOpenMP ? true 5 | , enable64BitDomain ? false 6 | , enableDebug ? false 7 | , enableSanitizeMemory ? false 8 | }: 9 | 10 | 11 | let 12 | souffle = callPackage ./common.nix rec { 13 | stdenv = gccStdenv; 14 | version = "e6cc66820e0d3c537f57aab2f5c80b0d54cb5208"; 15 | src = fetchFromGitHub { 16 | #owner = "dbueno"; 17 | #repo = "souffle"; 18 | owner = "souffle-lang"; 19 | repo = "souffle"; 20 | "rev" = "${version}"; 21 | "hash" = "sha256-e+AA7p3ag/RhqZ7iKvJoPRJLGjtjT7eJIUze+fpsP4A="; 22 | }; 23 | inherit openmp enableOpenMP enableDebug enable64BitDomain python3; 24 | }; 25 | toolsPath = lib.makeBinPath [ gccStdenv.cc ]; 26 | in 27 | souffle.overrideAttrs (attrs: rec { 28 | 29 | patches = [ ./souffle-2.4.patch ]; 30 | # ./souffle-2.3-sources.patch ]; 31 | 32 | cmakeFlags = [ "-DSOUFFLE_GIT=0" "-DPACKAGE_VERSION=${attrs.version}" "-DVERBOSE=1" ] 33 | ++ lib.optional enableSanitizeMemory "-DSOUFFLE_SANITISE_MEMORY=ON" 34 | ++ attrs.cmakeFlags; 35 | 36 | postInstall = '' 37 | sed 's#"includes": "#"includes": "-I'$out'/include ${attrs.souffleCompileIncludes} #' -i $out/bin/souffle-compile.py 38 | sed 's#"cxx_flags": "#"cxx_flags": " ${attrs.souffleCompileCxxFlags} #' -i $out/bin/souffle-compile.py 39 | sed 's#"link_options": "#"link_options": "${attrs.souffleCompileLdFlags} #' -i $out/bin/souffle-compile.py 40 | ${attrs.postInstallWrap} 41 | wrapProgram "$out/bin/souffle-compile.py" \ 42 | --prefix PATH : "${toolsPath}" 43 | ''; 44 | 45 | postFixup = '' 46 | sed 's#"includes": "#"includes": "${attrs.souffleCompileIncludes} #' -i $out/bin/souffle-compile.py 47 | sed "s#\"includes\": \"#\"includes\": \"-I$out/include #" -i $out/bin/souffle-compile.py 48 | sed 's#"cxx_flags": "#"cxx_flags": " ${attrs.souffleCompileCxxFlags} #' -i $out/bin/souffle-compile.py 49 | sed 's#"link_options": "#"link_options": "${attrs.souffleCompileLdFlags} #' -i $out/bin/souffle-compile.py 50 | wrapPythonPrograms 51 | ''; 52 | }) 53 | -------------------------------------------------------------------------------- /nix/souffle/packages.nix: -------------------------------------------------------------------------------- 1 | { 2 | stdenv, 3 | callPackage, 4 | llvmPackages, 5 | clangStdenv, 6 | gccStdenv 7 | }: rec { 8 | souffle_clang = callPackage ./souffle2.1.nix { 9 | stdenv = clangStdenv; 10 | inherit (llvmPackages) openmp; 11 | }; 12 | souffle_gcc = callPackage ./souffle2.1.nix { 13 | stdenv = gccStdenv; 14 | inherit (llvmPackages) openmp; 15 | }; 16 | souffle202 = callPackage ./souffle2.0.2.nix {inherit (llvmPackages) openmp;}; 17 | souffle21 = callPackage ./souffle2.1.nix {inherit (llvmPackages) openmp;}; 18 | souffle22 = callPackage ./souffle2.2.nix {inherit (llvmPackages) openmp;}; 19 | souffle23 = callPackage ./souffle2.3.nix {inherit (llvmPackages) openmp;}; 20 | souffle23_64bit = callPackage ./souffle2.3.nix { 21 | inherit (llvmPackages) openmp; 22 | enable64BitDomain = true; 23 | }; 24 | souffle24 = callPackage ./souffle2.4.nix {inherit (llvmPackages) openmp;}; 25 | souffle24_64bit = callPackage ./souffle2.4.nix { 26 | inherit (llvmPackages) openmp; 27 | enable64BitDomain = true; 28 | }; 29 | souffle23_debug = callPackage ./souffle2.3.nix { 30 | inherit (llvmPackages) openmp; 31 | enableSanitizeMemory = true; 32 | }; 33 | souffle241 = callPackage ./souffle2.4.1.nix {inherit (llvmPackages) openmp;}; 34 | souffle241_64bit = callPackage ./souffle2.4.1.nix { 35 | inherit (llvmPackages) openmp; 36 | enable64BitDomain = true; 37 | }; 38 | 39 | souffle23_no_openmp = callPackage ./souffle2.3.nix {enableOpenMP = false;}; 40 | souffle23_64bit_no_openmp = callPackage ./souffle2.3.nix { 41 | enable64BitDomain = true; 42 | enableOpenMP = false; 43 | }; 44 | souffle24_64bit_no_openmp = callPackage ./souffle2.4.nix { 45 | enable64BitDomain = true; 46 | enableOpenMP = false; 47 | }; 48 | souffle241_64bit_no_openmp = callPackage ./souffle2.4.1.nix { 49 | enable64BitDomain = true; 50 | enableOpenMP = false; 51 | }; 52 | soufflegit_64bit_no_openmp = callPackage ./soufflegit.nix { 53 | enable64BitDomain = true; 54 | enableOpenMP = false; 55 | }; 56 | souffle = 57 | if stdenv.system == "aarch64-darwin" 58 | then souffle23_64bit_no_openmp 59 | else souffle23_64bit; 60 | } 61 | -------------------------------------------------------------------------------- /src/ctadl/vis/richutils.py: -------------------------------------------------------------------------------- 1 | # Extracted and modified from https://github.com/tiangolo/typer 2 | # Extracted and modified from https://github.com/ewels/rich-click 3 | 4 | import contextlib 5 | import sys 6 | import textwrap 7 | from functools import singledispatchmethod, wraps 8 | from os import getenv 9 | from typing import Optional, Protocol, Union 10 | 11 | from rich import box, print 12 | from rich.align import Align 13 | from rich.columns import Columns 14 | from rich.console import Console, Group, NewLine, RenderableType, group 15 | from rich.emoji import Emoji 16 | from rich.highlighter import RegexHighlighter 17 | from rich.markdown import Markdown 18 | from rich.padding import Padding 19 | from rich.panel import Panel 20 | from rich.pretty import Pretty 21 | from rich.rule import Rule 22 | from rich.style import Style 23 | from rich.table import Table 24 | from rich.text import Text 25 | from rich.theme import Theme 26 | from rich.tree import Tree 27 | 28 | from ctadl.vis.types import * 29 | 30 | if sys.version_info >= (3, 8): 31 | from typing import Literal 32 | else: 33 | from typing_extensions import Literal 34 | 35 | from . import model 36 | 37 | # Default styles 38 | STYLE_VAR = "yellow" 39 | STYLE_FIELD = "blue" 40 | STYLE_TAINT_LABEL = "red" 41 | STYLE_INT_ID = "light_slate_blue" 42 | 43 | _TERMINAL_WIDTH = getenv("TERMINAL_WIDTH") 44 | MAX_WIDTH = int(_TERMINAL_WIDTH) if _TERMINAL_WIDTH else None 45 | COLOR_SYSTEM: Optional[ 46 | Literal["auto", "standard", "256", "truecolor", "windows"] 47 | ] = "truecolor" # Set to None to disable colors 48 | FORCE_TERMINAL = ( 49 | True 50 | if getenv("GITHUB_ACTIONS") or getenv("FORCE_COLOR") or getenv("PY_COLORS") 51 | else None 52 | ) 53 | 54 | 55 | def _get_rich_console(stderr: bool = False) -> Console: 56 | return Console( 57 | theme=Theme( 58 | { 59 | "taint_label": STYLE_TAINT_LABEL, 60 | "var": STYLE_VAR, 61 | "field": STYLE_FIELD, 62 | "leak_graph_id": STYLE_INT_ID, 63 | }, 64 | ), 65 | highlight=True, 66 | color_system=COLOR_SYSTEM, 67 | force_terminal=FORCE_TERMINAL, 68 | width=MAX_WIDTH, 69 | stderr=stderr, 70 | ) 71 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/pcode/analyzeHeadlessBigMem.bat: -------------------------------------------------------------------------------- 1 | REM To call this, set GHIDRA_HOME to your path, then invoke it e.g. like this: 2 | REM analyzeHeadlessBigMem.bat project_parent_dir \path\to\project\dir -import \path\to\binary -deleteProject -scriptPath \path\to\ctadl\src\ctadl\souffle-logic\pcode -postScript ExportPCodeForCTADL.java \path\to\facts\export\dir 3 | :: Ghidra Headless Analyzer launch (see analyzeHeadlessREADME.html) 4 | 5 | @echo off 6 | setlocal 7 | 8 | :: Maximum heap memory size. For headless, it is recommended to not use the default value 9 | :: because garbage collection could take too long on systems with a large amount of physical 10 | :: memory. 11 | set MAXMEM=40G 12 | 13 | :: Launch mode can be changed to one of the following: 14 | :: fg, debug, debug-suspend 15 | set LAUNCH_MODE=fg 16 | 17 | :: Set the debug address to listen on. 18 | :: NOTE: This variable is ignored if not launching in a debugging mode. 19 | set DEBUG_ADDRESS=127.0.0.1:13002 20 | 21 | :: Limit the # of garbage collection and JIT compiler threads in case many headless 22 | :: instances are run in parallel. By default, Java will assign one thread per core 23 | :: which does not scale well on servers with many cores. 24 | set VMARG_LIST=-XX:ParallelGCThreads=4 -XX:CICompilerCount=4 25 | 26 | :: Store current path (%0 gets modified below by SHIFT) 27 | set SCRIPT_DIR=%GHIDRA_HOME%\support\ 28 | 29 | :: Loop through parameters (if there aren't any, just continue) and store 30 | :: in params variable. 31 | 32 | setlocal EnableDelayedExpansion 33 | set params= 34 | 35 | :Loop 36 | if "%~1" == "" goto cont 37 | 38 | :: If -import is found and Windows has not done proper wildcard expansion, force 39 | :: this to happen and save expansion to params variable. 40 | if "%~1" == "-import" ( 41 | set params=!params! -import 42 | for %%f in ("%~2") DO ( 43 | call set params=!params! "%%~ff" 44 | ) 45 | SHIFT 46 | ) else ( 47 | set params=!params! "%~1" 48 | ) 49 | 50 | shift 51 | goto Loop 52 | 53 | :cont 54 | 55 | setlocal DisableDelayedExpansion 56 | 57 | call "%SCRIPT_DIR%launch.bat" %LAUNCH_MODE% jdk Ghidra-Headless "%MAXMEM%" "%VMARG_LIST%" ghidra.app.util.headless.AnalyzeHeadless %params% 58 | -------------------------------------------------------------------------------- /nix/souffle/git-and-completion.patch: -------------------------------------------------------------------------------- 1 | diff --git a/CMakeLists.txt b/CMakeLists.txt 2 | index 6f2090ffa..8aac6099a 100644 3 | --- a/CMakeLists.txt 4 | +++ b/CMakeLists.txt 5 | @@ -24,36 +24,6 @@ 6 | 7 | cmake_minimum_required(VERSION 3.15) 8 | 9 | -find_package(Git REQUIRED) 10 | - 11 | -# PACKAGE_VERSION is the full tag with git hash 12 | -execute_process(COMMAND ${GIT_EXECUTABLE} describe --tags --always 13 | - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" 14 | - RESULT_VARIABLE GIT_RESULT 15 | - OUTPUT_VARIABLE GIT_PACKAGE_VERSION) 16 | - # FIXME: Use in cmake 3.19 or later 17 | - # COMMAND_ERROR_IS_FATAL ANY) 18 | - 19 | -# Figure out the version number, depends on whether building from the git repo 20 | -if (NOT GIT_RESULT EQUAL 0) 21 | - # Not building from a git clone 22 | - message(WARNING "Unable to find git repository: version number will be incomplete") 23 | - set(PACKAGE_VERSION "UNKOWN") 24 | - set(SOUFFLE_VERSION "") 25 | -else() 26 | - string(REGEX REPLACE "\n$" "" PACKAGE_VERSION "${GIT_PACKAGE_VERSION}") 27 | - message(STATUS "Building souffle version ${PACKAGE_VERSION}") 28 | - 29 | - # SOUFFLE_VERSION only includes the major/minor triplet 30 | - string(REGEX REPLACE "-.*$" "" SOUFFLE_VERSION "${PACKAGE_VERSION}") 31 | - 32 | - # If building from a shallow clone where tag is not available. 33 | - if (NOT ${SOUFFLE_VERSION} MATCHES "^[0-9.]+$") 34 | - message(WARNING "Cannot find a valid tag: cmake project version will be incomplete") 35 | - set (SOUFFLE_VERSION "") 36 | - endif() 37 | -endif() 38 | - 39 | project(souffle VERSION "${SOUFFLE_VERSION}" 40 | DESCRIPTION "A datalog compiler" 41 | LANGUAGES CXX) 42 | @@ -312,7 +282,7 @@ find_package (bash-completion) 43 | if (BASH_COMPLETION_FOUND) 44 | message(STATUS "Using bash completion dir ${BASH_COMPLETION_COMPLETIONSDIR}") 45 | else() 46 | - set (BASH_COMPLETION_COMPLETIONSDIR "/etc/bash_completion.d") 47 | + set (BASH_COMPLETION_COMPLETIONSDIR "${CMAKE_INSTALL_PREFIX}/share/bash-completion/completions" CACHE PATH "Location of bash_completion.d") 48 | message (STATUS "Using fallback bash completion dir ${BASH_COMPLETION_COMPLETIONSDIR}") 49 | endif() 50 | 51 | 52 | -------------------------------------------------------------------------------- /taint-front/Syntax.ml: -------------------------------------------------------------------------------- 1 | type p = 2 | | Array of ap list 3 | (* field + collapse. if collapse is true, field is empty *) 4 | | Field of (string * bool) 5 | 6 | and ap = string * p list 7 | 8 | let rec pp_p ff = function 9 | | Array aps -> 10 | Format.fprintf ff "[%a]" 11 | (Format.pp_print_list ~pp_sep:(fun ff () -> Format.pp_print_string ff ", ") pp_ap) aps 12 | | Field (fld, collapse) -> 13 | Format.fprintf ff ".%s%s" fld (if collapse then "*" else "") 14 | 15 | 16 | and pp_ap ff (base, path) = 17 | Format.fprintf ff "%s" base; 18 | List.iter (fun p -> 19 | Format.fprintf ff "%a" pp_p p 20 | ) path 21 | 22 | type stmt = 23 | | Assign of ap * ap 24 | | Call of ap option * ap * ap list 25 | | Return of ap 26 | 27 | let pp_stmt ff = function 28 | | Assign (lhs, rhs) -> 29 | Format.fprintf ff "%a = %a;" 30 | pp_ap lhs 31 | pp_ap rhs 32 | | Call (None, fn, actuals) -> 33 | Format.fprintf ff "%a(%a);" 34 | pp_ap fn 35 | (Format.pp_print_list ~pp_sep:(fun ff () -> Format.pp_print_string ff ", ") pp_ap) actuals 36 | | Call (Some lhs, fn, actuals) -> 37 | Format.fprintf ff "%a = %a(%a);" 38 | pp_ap lhs 39 | pp_ap fn 40 | (Format.pp_print_list ~pp_sep:(fun ff () -> Format.pp_print_string ff ", ") pp_ap) actuals 41 | | Return ap -> 42 | Format.fprintf ff "return %a;" pp_ap ap 43 | 44 | type fn = { 45 | name: string; 46 | formals: string list; 47 | body: stmt list; 48 | } 49 | 50 | let pp_fn ff fn = 51 | Format.fprintf ff "@[@[def %s(%a) {" 52 | fn.name 53 | (Format.pp_print_list ~pp_sep:(fun ff () -> Format.pp_print_string ff ", ") Format.pp_print_string) fn.formals; 54 | List.iter (fun stmt -> 55 | Format.fprintf ff "@,%a" pp_stmt stmt 56 | ) fn.body; 57 | Format.fprintf ff "@]@,}@]" 58 | 59 | type global = { 60 | global_name: string; 61 | } 62 | 63 | let pp_global ff global = 64 | Format.fprintf ff "global %s;@." global.global_name 65 | 66 | type def = Global of global | Fn of fn 67 | 68 | let pp_def ff def = 69 | match def with 70 | | Global g -> pp_global ff g 71 | | Fn fn -> pp_fn ff fn 72 | 73 | type t = def list 74 | 75 | let pp ff fns = 76 | Format.fprintf ff "@["; 77 | List.iter (fun def -> 78 | Format.fprintf ff "%a@," pp_def def 79 | ) fns; 80 | Format.fprintf ff "@]" 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/jadx/export.dl: -------------------------------------------------------------------------------- 1 | .output _ActualParam(CTADL_OUTPUT_DB_IO) 2 | .output _FormalParam(CTADL_OUTPUT_DB_IO) 3 | .output _ThisParam(CTADL_OUTPUT_DB_IO) 4 | .output _ReturnStmt(CTADL_OUTPUT_DB_IO) 5 | .output _Method(CTADL_OUTPUT_DB_IO) 6 | .output _MethodInvocation(CTADL_OUTPUT_DB_IO) 7 | .output _MethodInvocationReturn(CTADL_OUTPUT_DB_IO) 8 | .output _ExternalMethod(CTADL_OUTPUT_DB_IO) 9 | .output _Move(CTADL_OUTPUT_DB_IO) 10 | .output _StmtInMethod(CTADL_OUTPUT_DB_IO) 11 | .output _StmtSourceLine(CTADL_OUTPUT_DB_IO) 12 | .output _ClassFileName(CTADL_OUTPUT_DB_IO) 13 | .output _VarInMethod(CTADL_OUTPUT_DB_IO) 14 | .output _VarHasType(CTADL_OUTPUT_DB_IO) 15 | .output _VarHasInternalName(CTADL_OUTPUT_DB_IO) 16 | .output _VarHasName(CTADL_OUTPUT_DB_IO) 17 | .output _StaticGet(CTADL_OUTPUT_DB_IO) 18 | .output _StaticPut(CTADL_OUTPUT_DB_IO) 19 | .output _StaticExternalField(CTADL_OUTPUT_DB_IO) 20 | .output _InstanceField(CTADL_OUTPUT_DB_IO) 21 | .output _IPut(CTADL_OUTPUT_DB_IO) 22 | .output _IGet(CTADL_OUTPUT_DB_IO) 23 | .output _StmtInBasicblock(CTADL_OUTPUT_DB_IO) 24 | .output _IfStmt(CTADL_OUTPUT_DB_IO) 25 | .output _TernaryStmt(CTADL_OUTPUT_DB_IO) 26 | .output _TernaryArg(CTADL_OUTPUT_DB_IO) 27 | .output _SwitchStmt(CTADL_OUTPUT_DB_IO) 28 | .output _SwitchTarget(CTADL_OUTPUT_DB_IO) 29 | .output _TypeInstance(CTADL_OUTPUT_DB_IO) 30 | .output _AGet(CTADL_OUTPUT_DB_IO) 31 | .output _APut(CTADL_OUTPUT_DB_IO) 32 | .output _PhiAssign(CTADL_OUTPUT_DB_IO) 33 | .output _VarIsConst(CTADL_OUTPUT_DB_IO) 34 | .output _FieldIsFinal(CTADL_OUTPUT_DB_IO) 35 | .output _FieldConstInit(CTADL_OUTPUT_DB_IO) 36 | .output _DirectSuperclass(CTADL_OUTPUT_DB_IO) 37 | .output _SuperInterface(CTADL_OUTPUT_DB_IO) 38 | .output _ClassHasName(CTADL_OUTPUT_DB_IO) 39 | .output _ClassDefinedIn(CTADL_OUTPUT_DB_IO) 40 | .output _InterfaceType(CTADL_OUTPUT_DB_IO) 41 | .output _MethodImplemented(CTADL_OUTPUT_DB_IO) 42 | .output _ManifestRoot(CTADL_OUTPUT_DB_IO) 43 | .output _ManifestNode(CTADL_OUTPUT_DB_IO) 44 | .output _ManifestNodeChild(CTADL_OUTPUT_DB_IO) 45 | .output _ManifestNodeAttr(CTADL_OUTPUT_DB_IO) 46 | .output _TopParentClass(CTADL_OUTPUT_DB_IO) 47 | .output _InsnBytecodeLocation(CTADL_OUTPUT_DB_IO) 48 | .output _SARIFByteRegion(CTADL_OUTPUT_DB_IO) 49 | .output _SARIFCharRegion(CTADL_OUTPUT_DB_IO) 50 | .output _SARIFLineRegion(CTADL_OUTPUT_DB_IO) 51 | .output _SARIFArtifactLocation(CTADL_OUTPUT_DB_IO) 52 | .output _DecompilerSourceMap(CTADL_OUTPUT_DB_IO) 53 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/pcode/analyzeHeadlessBigMem: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -x 4 | 5 | #---------------------------------------------------------------------- 6 | # Ghidra Headless Analyzer launch (see analyzeHeadlessREADME.html) 7 | #---------------------------------------------------------------------- 8 | 9 | # Maximum heap memory may be changed if default is inadequate. This will generally be up to 1/4 of 10 | # the physical memory available to the OS. Uncomment MAXMEM setting if non-default value is needed. 11 | MAXMEM=40G 12 | 13 | # Launch mode can be changed to one of the following: fg, debug, debug-suspend 14 | LAUNCH_MODE=fg 15 | 16 | # Set the debug address to listen on. 17 | # NOTE: This variable is ignored if not launching in a debugging mode. 18 | DEBUG_ADDRESS=127.0.0.1:13002 19 | 20 | # Limit the # of garbage collection and JIT compiler threads in case many headless 21 | # instances are run in parallel. By default, Java will assign one thread per core 22 | # which does not scale well on servers with many cores. 23 | VMARG_LIST="-XX:ParallelGCThreads=4 -XX:CICompilerCount=4 " 24 | 25 | # Resolve symbolic link if present and get the directory this script lives in. 26 | # NOTE: "readlink -f" is best but works on Linux only, "readlink" will only work if your PWD 27 | # contains the link you are calling (which is the best we can do on macOS), and the "echo" is the 28 | # fallback, which doesn't attempt to do anything with links. 29 | ghidra_bin=$(readlink -e $(which ghidra)) 30 | if [ ! -z "$GHIDRA_HOME" ]; then 31 | ghidra_base=$GHIDRA_HOME 32 | elif [ ! -z "$ghidra_bin" ]; then 33 | ghidra_base=$(dirname "$ghidra_bin") 34 | else 35 | echo "Could not find ghidra in path. Add 'ghidra' to path or set GHIDRA_HOME" >&2 36 | exit 1 37 | fi 38 | if [ -e "$ghidra_base/../lib/ghidra/support/analyzeHeadless" ]; then 39 | analyze_headless=$ghidra_base/../lib/ghidra/support/analyzeHeadless 40 | elif [ -e "$ghidra_base/support/analyzeHeadless" ]; then 41 | analyze_headless=$ghidra_base/support/analyzeHeadless 42 | else 43 | echo "Could not find ghidra analyzeHeadless from ghidra directory $ghidra_base" >&2 44 | exit 1 45 | fi 46 | echo "Analyze headless path: $analyze_headless" >&2 47 | SCRIPT_DIR="${analyze_headless%/*}" 48 | 49 | # Launch HeadlessAnalyzer. 50 | # DEBUG_ADDRESS set via environment for launch.sh 51 | DEBUG_ADDRESS=${DEBUG_ADDRESS} "${SCRIPT_DIR}"/launch.sh "${LAUNCH_MODE}" jdk Ghidra-Headless "${MAXMEM}" "${VMARG_LIST}" ghidra.app.util.headless.AnalyzeHeadless "$@" 52 | -------------------------------------------------------------------------------- /tests/bin/check-taintfront: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # set -e 4 | # set -o pipefail 5 | set -x 6 | 7 | top="$(git rev-parse --show-toplevel)" 8 | outdir="$top/ctadl-test-output" 9 | testdir="$top/tests/taint-front" 10 | cd $outdir 11 | 12 | any_failed=0 13 | 14 | # Finds .tnt testcases and runs them. 15 | # If there is a corresponding .json file, we test the ctadlir against it 16 | # If there is a .codeflows file, we check for code flows 17 | # If there is a .nocodeflows file, we check for absence of code flows 18 | for f in $(ls -1 "$testdir"/*.tnt); do 19 | testname="$(basename $f .tnt)" 20 | echo "TAINT-FRONT TEST:" "$testname" 21 | sarif_file="$outdir/$testname.sarif" 22 | testcase_dir="$outdir/$testname" 23 | options_file="$testdir/$(basename $f .tnt).options" 24 | ir_file="$testdir/$(basename $f .tnt).json" 25 | flows_file="$testdir/$(basename $f .tnt).codeflows" 26 | noflows_file="$testdir/$(basename $f .tnt).nocodeflows" 27 | mkdir -p "$testcase_dir" 28 | ctadl import taint-front "$f" -o "$testcase_dir" -f 29 | option_string="" 30 | if [ -f "$options_file" ]; then 31 | option_string=$(<"$options_file") 32 | option_string=$(echo "$option_string" | tr -d '\n') 33 | fi 34 | ctadl --dir "$testcase_dir" index $option_string -f 35 | if [ $? -ne 0 ]; then 36 | echo "index failure" 37 | any_failed=1 38 | continue 39 | fi 40 | ctadl --dir "$testcase_dir" query --format sarif -o "$sarif_file" 41 | if [ $? -ne 0 ]; then 42 | echo "$testname: query failure" 43 | any_failed=1 44 | continue 45 | fi 46 | if [ -f "$ir_file" ]; then 47 | ( cd "$testcase_dir" && python3 "$top/tests/test_ctadlir.py" "$ir_file" ) 48 | if [ $? -ne 0 ]; then 49 | echo "$testname: test_ctadlir failure" 50 | any_failed=1 51 | continue 52 | fi 53 | fi 54 | if [ -f "$flows_file" ]; then 55 | ( cd "$testcase_dir" && python3 "$top/tests/bin/sarif_has_code_flows.py" "$sarif_file" ) 56 | if [ $? -ne 0 ]; then 57 | echo "$testname: sarif_has_code_flows failure" 58 | any_failed=1 59 | continue 60 | fi 61 | fi 62 | if [ -f "$noflows_file" ]; then 63 | ( cd "$testcase_dir" && ! python3 "$top/tests/bin/sarif_has_code_flows.py" "$sarif_file" ) 64 | if [ $? -ne 0 ]; then 65 | echo "$testname: ! sarif_has_code_flows failure" 66 | any_failed=1 67 | continue 68 | fi 69 | fi 70 | done 71 | 72 | if [ $any_failed -ne 0 ]; then 73 | echo "taint-front test failure, see errors above" 74 | fi 75 | exit $any_failed 76 | -------------------------------------------------------------------------------- /utils/gen_help_docs.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | 3 | 4 | def convert_indented_paragraphs_to_rst_literal(plaintext): 5 | # Split the input text into lines 6 | lines = plaintext.splitlines() 7 | rst_lines = [] 8 | 9 | # Variable to track if we are currently in an indented paragraph 10 | in_indented_paragraph = False 11 | 12 | for line in lines: 13 | # Check if the line is indented (starts with whitespace) 14 | if line.startswith(" "): 15 | # If we are not already in an indented paragraph, add the literal block marker 16 | if not in_indented_paragraph: 17 | rst_lines.append("\n::\n") 18 | in_indented_paragraph = True 19 | 20 | # Add the indented line to the output with additional indentation 21 | rst_lines.append(" " + line) 22 | else: 23 | # If we encounter a non-indented line and we were in an indented paragraph 24 | if in_indented_paragraph: 25 | rst_lines.append("") # Add a blank line to separate paragraphs 26 | in_indented_paragraph = False 27 | 28 | # Add the non-indented line as is 29 | rst_lines.append(line) 30 | 31 | # If the last paragraph was indented, ensure we close it properly 32 | if in_indented_paragraph: 33 | rst_lines.append("") # Add a blank line to separate paragraphs 34 | 35 | return "\n".join(rst_lines) 36 | 37 | 38 | def generate_help_docs(commands): 39 | with open("docs/cli_help.rst", "w") as f: 40 | f.write("Command Line\n") 41 | f.write("================\n\n") 42 | 43 | f.write(".. note::\n") 44 | f.write( 45 | " This page is autogenerated from a CTADL installation with all available plugins\n\n" 46 | ) 47 | 48 | f.write(f"``ctadl``\n") 49 | f.write("-----\n") 50 | result = subprocess.run(["ctadl", "--help"], capture_output=True, text=True) 51 | f.write(result.stdout + "\n\n") 52 | 53 | for command in commands: 54 | f.write(f"{command}\n") 55 | f.write("-" * len(command) + "\n") 56 | result = subprocess.run( 57 | ["ctadl", command, "--help"], capture_output=True, text=True 58 | ) 59 | f.write(result.stdout + "\n\n") 60 | 61 | 62 | if __name__ == "__main__": 63 | commands = [ 64 | "index", 65 | "query", 66 | "inspect", 67 | "import", 68 | "export", 69 | ] 70 | generate_help_docs(commands) 71 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/graph/declarations.dl: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | // Derived access path. We add access paths due to the way the analysis is 6 | // configured. 7 | .decl isComputedAccessPath(ap: CAccessPath) 8 | 9 | // aliasing is subset based. we start from a target and add to it all the 10 | // included things, recursively. a target will be one for which results are 11 | // calculated in AliasedBy. this is populated elsewhere in the analysis. 12 | .decl AliasedBy_Target(target: CVar) 13 | 14 | // target aliased by alias, i.e., alias is aliased to target, i.e., alias = 15 | // target happened so writes to alias may also write to target. 16 | .decl AliasedBy(targetv: CVar, targetp: CAccessPath, aliasv: CVar, aliasp: CAccessPath, ctx: symbol) 17 | choice-domain (targetv, targetp, aliasv, aliasp) 18 | 19 | .decl IntraObjectAliasedBy(targetv: CVar, f: CAccessPath, g: CAccessPath, ctx: symbol) 20 | choice-domain (targetv, f, g) 21 | .output IntraObjectAliasedBy(CTADL_OUTPUT_DB_IO) 22 | 23 | // Empty call string "" 24 | .functor NewCallString(): symbol stateful 25 | // If two call strings are comparable and s1 is more specific than s2, that is 26 | // everything from s2 applies to s1, then this returns 1. 27 | .functor CallStringLte(s1: symbol, s2: symbol): number 28 | .functor CallStringUnder(s1: symbol, s2: symbol): number 29 | // Returns 1 if call string has at least one frame 30 | .functor CallStringNonEmpty(s: symbol): number 31 | // Returns the number of frames in the call string 32 | .functor CallStringSize(s: symbol): number 33 | // Returns 1 if s1 contains s2; 2 if s2 contains s1; 0 otherwise 34 | .functor CheckSubstring(s1: symbol, s2: symbol): number 35 | // Returns the top frame of the call string. Precondition: there is at least 36 | // one frame in the call string 37 | .functor TopFrame(stack: symbol): symbol stateful 38 | // Pushes a frame onto the call string 39 | .functor PushFrame(func: symbol, stack: symbol): symbol stateful 40 | // Pushes a new frame onto the call string. The resulting call string keeps the 41 | // top K (at most) frames of the new call string. 42 | .functor PushFrameK(func: symbol, stack: symbol, k: number): symbol stateful 43 | // Pops the top frame from the call string 44 | .functor PopFrame(dst:symbol):symbol stateful 45 | .functor AccessPathCycle(ap:symbol): number 46 | .functor AccessPathSize(ap:symbol): number 47 | 48 | // Specialized functors 49 | .functor AndroidManifestClassId(manifest_name: symbol): symbol stateful 50 | 51 | 52 | .decl IntCInsn_InFunction(insn: symbol, index: number, function: symbol) 53 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/pcode/index.dl: -------------------------------------------------------------------------------- 1 | #include "pcode/pcode_lang.dl" 2 | #ifdef CTADL_IMPORT_LANG_FACTS_FROM_DB 3 | #include "pcode/importdb.dl" 4 | #else 5 | #include "pcode/import.dl" 6 | #endif 7 | 8 | CTADLConfig("CTADL_ANALYSIS_LANG", "PCODE"). 9 | 10 | #ifndef CTADL_DISABLE_INDEX_PHASE 11 | 12 | #ifdef CTADL_PCODE_PRINT 13 | .output PCodePrint 14 | .output PCodePrint2 15 | #endif 16 | 17 | // --------------------------------------------------------------------------- 18 | // phase 1 19 | 20 | #include "graph/dataflow_graph.dl" 21 | 22 | .init ctadl_pcode_phase1 = CGraphRules 23 | 24 | // --------------------------------------------------------------------------- 25 | // tracks flows of known function pointers to resolve indirect calls 26 | 27 | #ifndef CTADL_PCODE_DISABLE_INDIRECT_CALL_RESOLUTION 28 | 29 | #define FS_ReachableCallEdgePlan 30 | #define FS_CalledFormalPlan 31 | #define FS_ReachableReturnEdgePlan 32 | 33 | /* #define FS_VirtualAssignPlan \ */ 34 | /* .plan 1: (2, 1) */ 35 | /* #define FS_ReachableCallEdgePlan \ */ 36 | /* .plan 1: (3, 2, 1) */ 37 | /* #define FS_CalledFormalPlan \ */ 38 | /* .plan 1: (3, 2, 1) */ 39 | /* #define FS_ReachableReturnEdgePlan \ */ 40 | /* .plan 1: (2, 1) */ 41 | 42 | #include "graph/slice/callee.dl" 43 | 44 | #ifdef ALL_OUTPUTS 45 | .output track_assign_func.ReachableVertex(CTADL_OUTPUT_DB_IO) 46 | #endif 47 | 48 | .init track_assign_func = SliceCallee 49 | track_assign_func.input_.StartVertex(v, p, func) :- 50 | VirtualAlloc(v, p, func, ""). 51 | 52 | CallEdge(i, fid, "") :- 53 | track_assign_func.isReachable(func_op, "", fid), 54 | CCall_ActualParam(i, PCODE_FUNC_ARG_INDEX, func_op, ""). 55 | 56 | #if 0 57 | .decl CallInSameFuncAsReachable(call: PCodeInstruction, reachable: PCodeVarnode) 58 | .output CallInSameFuncAsReachable(CTADL_OUTPUT_DB_IO) 59 | 60 | CallInSameFuncAsReachable(i, func_op) :- 61 | // something pointing at fid is reachable inside func_op_fid 62 | track_assign_func.isReachable(func_op, "", fid), 63 | VNODE_HFUNC(func_op, func_op_fid), 64 | func_op_fid != fid, 65 | 66 | // call is in func_op_fid, too 67 | CallInstruction(i, call_op), 68 | PCODE_MNEMONIC(i, "CALLIND"), 69 | PCODE_PARENT(i, bb), 70 | BB_HFUNC(bb, func_op_fid). 71 | #endif 72 | 73 | #endif // CTADL_PCODE_DISABLE_INDIRECT_CALL_RESOLUTION 74 | 75 | //#include "pcode/models.dl" 76 | 77 | #endif // CTADL_DISABLE_INDEX_PHASE 78 | 79 | #include 80 | #include 81 | 82 | .pragma "suppress-warnings" "CFunction_ModelAssign,track_assign_func.input_.SanitizeAssign,GEPVarnode_NamedAccessPath,CInsn_ModelAssign" 83 | -------------------------------------------------------------------------------- /nix/souffle/remove-cpack.patch: -------------------------------------------------------------------------------- 1 | diff --git a/CMakeLists.txt b/CMakeLists.txt 2 | index 6f2090f..6963746 100644 3 | --- a/CMakeLists.txt 4 | +++ b/CMakeLists.txt 5 | @@ -340,58 +340,3 @@ function(get_linux_lsb_release_information) 6 | set(LSB_RELEASE_VERSION_SHORT "${LSB_RELEASE_VERSION_SHORT}" PARENT_SCOPE) 7 | set(LSB_RELEASE_CODENAME_SHORT "${LSB_RELEASE_CODENAME_SHORT}" PARENT_SCOPE) 8 | endfunction() 9 | - 10 | -# -------------------------------------------------- 11 | -# CPack configuration 12 | -# -------------------------------------------------- 13 | - 14 | -SET(CPACK_PACKAGE_CONTACT "Patrick H.") 15 | -SET(CPACK_PACKAGE_DESCRIPTION "Souffle - A Datalog Compiler") 16 | -SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "A Datalog Compiler") 17 | - 18 | -# Use all available threads (primarily for compression of files) 19 | -SET(CPACK_THREADS 0) 20 | - 21 | -# Make sure changelog, bash-completion and other important files in debian directory also packaged 22 | -SET(CPACK_DEBIAN_PACKAGE_CONTROL_EXTRA "${CMAKE_SOURCE_DIR}/debian/changelog.in" "${CMAKE_SOURCE_DIR}/debian/souffle.bash-completion" "${CMAKE_SOURCE_DIR}/debian/copyright") 23 | - 24 | -# -------------------------------------------------- 25 | -# CPack configuration for Linux 26 | -# -------------------------------------------------- 27 | -if (CMAKE_SYSTEM_NAME MATCHES "Linux") 28 | - get_linux_lsb_release_information() 29 | - if (LSB_RELEASE_ID_SHORT MATCHES "Ubuntu") 30 | - # Generate just DEB 31 | - SET(CPACK_GENERATOR "DEB") 32 | - # -------------------------------------------------- 33 | - # Variables relevent to DEB packages 34 | - # -------------------------------------------------- 35 | - 36 | - # Specifying runtime dependencies 37 | - set(CPACK_DEBIAN_PACKAGE_DEPENDS "g++ (>= 7), libffi-dev, libncurses5-dev, libsqlite3-dev, mcpp, zlib1g-dev") 38 | - 39 | - # Auto-generate any runtime dependencies that are required 40 | - SET(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON) 41 | - 42 | - # Architectures are actually auto-detected so no need to set this variable 43 | - # SET(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "i386") 44 | - endif() 45 | - 46 | - if (LSB_RELEASE_ID_SHORT MATCHES "Fedora") 47 | - # Generate both DEB and RPM packages 48 | - SET(CPACK_GENERATOR "RPM") 49 | - 50 | - # -------------------------------------------------- 51 | - # Variables relevent to RPM packages 52 | - # -------------------------------------------------- 53 | - 54 | - # Specifying runtime dependencies 55 | - set(CPACK_RPM_PACKAGE_REQUIRES "g++ >= 7, libffi, libffi-devel, ncurses-devel, libsqlite3x, mcpp, zlib-devel") 56 | - 57 | - # Don't auto-detect dependencies and provides 58 | - SET(CPACK_RPM_PACKAGE_AUTOREQPROV "no") 59 | - endif() 60 | -endif() 61 | - 62 | - 63 | -INCLUDE(CPack) 64 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/imports.dl: -------------------------------------------------------------------------------- 1 | .input CCall_ActualParam(CTADL_INPUT_DB_IO) 2 | .input CCall_VirtualBase(CTADL_INPUT_DB_IO) 3 | .input CFunction_Arity(CTADL_INPUT_DB_IO) 4 | .input CFunction_FormalParam(CTADL_INPUT_DB_IO) 5 | .input CFunction_ModelAssign(CTADL_INPUT_DB_IO) 6 | .input CFunction_ModelInfluenceFrom(CTADL_INPUT_DB_IO) 7 | .input CFunction_ModelInfluenceTo(CTADL_INPUT_DB_IO) 8 | .input CFunction_Name(CTADL_INPUT_DB_IO) 9 | .input CFunction_OmitSummaries(CTADL_INPUT_DB_IO) 10 | .input CFunction_Signature(CTADL_INPUT_DB_IO) 11 | .input CFunction_SourceInfo(CTADL_INPUT_DB_IO) 12 | .input CFunction_isFormalParamByRef(CTADL_INPUT_DB_IO) 13 | .input CInsn_Call(CTADL_INPUT_DB_IO) 14 | .input CInsn_InFunction(CTADL_INPUT_DB_IO) 15 | .input CInsn_ModelAssign(CTADL_INPUT_DB_IO) 16 | .input CInsn_Move(CTADL_INPUT_DB_IO) 17 | .input CInsn_SourceInfo(CTADL_INPUT_DB_IO) 18 | .input CInsn_UnsuspiciousCall(CTADL_INPUT_DB_IO) 19 | .input CInsn_Use(CTADL_INPUT_DB_IO) 20 | .input CInsn_isDataflowOnly(CTADL_INPUT_DB_IO) 21 | .input CNamespace_Parent(CTADL_INPUT_DB_IO) 22 | .input CReturnParameter(CTADL_INPUT_DB_IO) 23 | .input CGlobalParameter(CTADL_INPUT_DB_IO) 24 | .input CTADLConfig(CTADL_INPUT_DB_IO) 25 | .input CTADLStats(CTADL_INPUT_DB_IO) 26 | .input CVar_InFunction(CTADL_INPUT_DB_IO) 27 | .input CVar_Name(CTADL_INPUT_DB_IO) 28 | .input CVar_Type(CTADL_INPUT_DB_IO) 29 | .input CType_Subtype(CTADL_INPUT_DB_IO) 30 | .input CVar_SourceInfo(CTADL_INPUT_DB_IO) 31 | .input CVar_isGlobal(CTADL_INPUT_DB_IO) 32 | .input CisField(CTADL_INPUT_DB_IO) 33 | .input CField_Name(CTADL_INPUT_DB_IO) 34 | .input CisAccessPath(CTADL_INPUT_DB_IO) 35 | .input CisAlloc(CTADL_INPUT_DB_IO) 36 | .input CisFunction(CTADL_INPUT_DB_IO) 37 | .input CisNamespace(CTADL_INPUT_DB_IO) 38 | .input CSourceInfo_Location(CTADL_INPUT_DB_IO) 39 | .input CSourceInfo_File(CTADL_INPUT_DB_IO) 40 | .input CFile_UriBaseId(CTADL_INPUT_DB_IO) 41 | .input CSourceInfo_LineRegion(CTADL_INPUT_DB_IO) 42 | .input CLineRegion_StartColumn(CTADL_INPUT_DB_IO) 43 | .input CLineRegion_EndLine(CTADL_INPUT_DB_IO) 44 | .input CLineRegion_EndColumn(CTADL_INPUT_DB_IO) 45 | .input CSourceInfo_CharRegion(CTADL_INPUT_DB_IO) 46 | .input CCharRegion_Length(CTADL_INPUT_DB_IO) 47 | .input CSourceInfo_ByteRegion(CTADL_INPUT_DB_IO) 48 | .input CByteRegion_Length(CTADL_INPUT_DB_IO) 49 | .input CSourceInfo_Address(CTADL_INPUT_DB_IO) 50 | .input CAddress_AbsoluteAddress(CTADL_INPUT_DB_IO) 51 | .input CAddress_RelativeAddress(CTADL_INPUT_DB_IO) 52 | .input CAddress_OffsetFromParent(CTADL_INPUT_DB_IO) 53 | .input CAddress_Length(CTADL_INPUT_DB_IO) 54 | .input CAddress_Name(CTADL_INPUT_DB_IO) 55 | .input CAddress_FullyQualifiedName(CTADL_INPUT_DB_IO) 56 | .input CAddress_Kind(CTADL_INPUT_DB_IO) 57 | .input CAddress_Parent(CTADL_INPUT_DB_IO) 58 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/export.dl: -------------------------------------------------------------------------------- 1 | .output CCall_ActualParam(CTADL_OUTPUT_DB_IO) 2 | .output CCall_VirtualBase(CTADL_OUTPUT_DB_IO) 3 | .output CFunction_Arity(CTADL_OUTPUT_DB_IO) 4 | .output CFunction_FormalParam(CTADL_OUTPUT_DB_IO) 5 | .output CFunction_ModelAssign(CTADL_OUTPUT_DB_IO) 6 | .output CFunction_ModelInfluenceFrom(CTADL_OUTPUT_DB_IO) 7 | .output CFunction_ModelInfluenceTo(CTADL_OUTPUT_DB_IO) 8 | .output CFunction_Name(CTADL_OUTPUT_DB_IO) 9 | .output CFunction_OmitSummaries(CTADL_OUTPUT_DB_IO) 10 | .output CFunction_Signature(CTADL_OUTPUT_DB_IO) 11 | .output CFunction_SourceInfo(CTADL_OUTPUT_DB_IO) 12 | .output CFunction_isFormalParamByRef(CTADL_OUTPUT_DB_IO) 13 | .output CInsn_Call(CTADL_OUTPUT_DB_IO) 14 | .output CInsn_InFunction(CTADL_OUTPUT_DB_IO) 15 | .output CInsn_ModelAssign(CTADL_OUTPUT_DB_IO) 16 | .output CInsn_Move(CTADL_OUTPUT_DB_IO) 17 | .output CInsn_SourceInfo(CTADL_OUTPUT_DB_IO) 18 | .output CInsn_UnsuspiciousCall(CTADL_OUTPUT_DB_IO) 19 | .output CInsn_Use(CTADL_OUTPUT_DB_IO) 20 | .output CInsn_isDataflowOnly(CTADL_OUTPUT_DB_IO) 21 | .output CNamespace_Parent(CTADL_OUTPUT_DB_IO) 22 | .output CReturnParameter(CTADL_OUTPUT_DB_IO) 23 | .output CGlobalParameter(CTADL_OUTPUT_DB_IO) 24 | .output CTADLConfig(CTADL_OUTPUT_DB_IO) 25 | .output CVar_InFunction(CTADL_OUTPUT_DB_IO) 26 | .output CVar_Name(CTADL_OUTPUT_DB_IO) 27 | .output CVar_Type(CTADL_OUTPUT_DB_IO) 28 | .output CType_Subtype(CTADL_OUTPUT_DB_IO) 29 | .output CVar_SourceInfo(CTADL_OUTPUT_DB_IO) 30 | .output CVar_isGlobal(CTADL_OUTPUT_DB_IO) 31 | .output CisField(CTADL_OUTPUT_DB_IO) 32 | .output CField_Name(CTADL_OUTPUT_DB_IO) 33 | .output CisAccessPath(CTADL_OUTPUT_DB_IO) 34 | .output CisAlloc(CTADL_OUTPUT_DB_IO) 35 | .output CisFunction(CTADL_OUTPUT_DB_IO) 36 | .output CisNamespace(CTADL_OUTPUT_DB_IO) 37 | .output CSourceInfo_Location(CTADL_OUTPUT_DB_IO) 38 | .output CSourceInfo_File(CTADL_OUTPUT_DB_IO) 39 | .output CFile_UriBaseId(CTADL_OUTPUT_DB_IO) 40 | .output CSourceInfo_LineRegion(CTADL_OUTPUT_DB_IO) 41 | .output CLineRegion_StartColumn(CTADL_OUTPUT_DB_IO) 42 | .output CLineRegion_EndLine(CTADL_OUTPUT_DB_IO) 43 | .output CLineRegion_EndColumn(CTADL_OUTPUT_DB_IO) 44 | .output CSourceInfo_CharRegion(CTADL_OUTPUT_DB_IO) 45 | .output CCharRegion_Length(CTADL_OUTPUT_DB_IO) 46 | .output CSourceInfo_ByteRegion(CTADL_OUTPUT_DB_IO) 47 | .output CByteRegion_Length(CTADL_OUTPUT_DB_IO) 48 | .output CSourceInfo_Address(CTADL_OUTPUT_DB_IO) 49 | .output CAddress_AbsoluteAddress(CTADL_OUTPUT_DB_IO) 50 | .output CAddress_RelativeAddress(CTADL_OUTPUT_DB_IO) 51 | .output CAddress_OffsetFromParent(CTADL_OUTPUT_DB_IO) 52 | .output CAddress_Length(CTADL_OUTPUT_DB_IO) 53 | .output CAddress_Name(CTADL_OUTPUT_DB_IO) 54 | .output CAddress_FullyQualifiedName(CTADL_OUTPUT_DB_IO) 55 | .output CAddress_Kind(CTADL_OUTPUT_DB_IO) 56 | .output CAddress_Parent(CTADL_OUTPUT_DB_IO) 57 | -------------------------------------------------------------------------------- /tests/test_ctadlir.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ensures that all the tuples from a given json file are present in an IR database 3 | 4 | Example testcase file: 5 | 6 | { 7 | "VirtualAssign": { 8 | "includes": [{ 9 | "insn": "service/call/2", 10 | "v1": "service/second", 11 | "v2": "service/first" 12 | }], 13 | "excludes": [{ 14 | "insn": "service/call/3", 15 | "v1": "service/third", 16 | "v2": "service/first" 17 | }] 18 | } 19 | } 20 | """ 21 | 22 | import sys 23 | import json 24 | import sqlite3 25 | import os 26 | 27 | 28 | def testcase_matches_row(testcase: dict, row): 29 | all_true = True 30 | for col, val in testcase.items(): 31 | all_true &= row[col] == val 32 | return all_true 33 | 34 | 35 | class TestDatabase: 36 | def __init__(self, tests): 37 | self.tests = tests 38 | 39 | def process_row(self, table, row): 40 | testcases = self.tests[table].get("includes", []) 41 | if testcases: 42 | upd = [ 43 | testcase 44 | for testcase in testcases 45 | if not testcase_matches_row(testcase, row) 46 | ] 47 | self.tests[table]["includes"] = upd 48 | 49 | testcases = self.tests[table].get("excludes", []) 50 | for testcase in testcases: 51 | if testcase_matches_row(testcase, row): 52 | print(f"error: excludes violated on table {table}") 53 | print(f"exclude entry: {testcase}") 54 | row_fmt = "\n".join(f"{k}: '{row[k]}'" for k in row.keys()) 55 | print(f"violating row:\n{row_fmt}") 56 | exit(1) 57 | self.tests[table]["excludes"] = [] 58 | 59 | def check_errors(self) -> bool: 60 | ok = True 61 | for table, testcases in self.tests.items(): 62 | if testcases["includes"]: 63 | print(table, testcases) 64 | ok = False 65 | return ok 66 | 67 | 68 | def main(tests, db): 69 | tables = tests.keys() 70 | test_db = TestDatabase(tests) 71 | # We go through the tables in the database and all the rows and remove 72 | # matches from the test databes. If at the end the database is empty, 73 | # success. 74 | for table in tables: 75 | rows = db.execute(f"""select * from "{table}" """).fetchall() 76 | for row in rows: 77 | test_db.process_row(table, row) 78 | 79 | exit(1 if not test_db.check_errors() else 0) 80 | 81 | 82 | if __name__ == "__main__": 83 | json_file = sys.argv[1] 84 | db_file = "ctadlir.db" 85 | if len(sys.argv) > 2: 86 | db_file = sys.argv[2] 87 | tests = None 88 | with open(json_file, "r") as fp: 89 | tests = json.load(fp) 90 | with sqlite3.connect(db_file) as db: 91 | db.row_factory = sqlite3.Row 92 | print(f"testing '{db_file}'") 93 | main(tests, db) 94 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/jadx/index.dl: -------------------------------------------------------------------------------- 1 | #define CTADL_ENABLE_HYBRID_INLINING 2 | #define CTADL_HYBRID_CHA_THRESHOLD 1 3 | 4 | #include "ctadl_schema.dl" 5 | #include "config.dl" 6 | #include "jadx/jadx_lang.dl" 7 | #ifdef CTADL_IMPORT_LANG_FACTS_FROM_DB 8 | #include "jadx/importdb.dl" 9 | #else 10 | #include "jadx/jadx-fact-imports.dl" 11 | #endif 12 | 13 | 14 | CTADLConfig("CTADL_ANALYSIS_LANG", "JADX"). 15 | 16 | #include "graph/declarations.dl" 17 | #ifndef CTADL_DISABLE_INDEX_PHASE 18 | #include "android-intents.dl" 19 | 20 | // --------------------------------------------------------------------------- 21 | // phase 1 22 | 23 | #include "graph/dataflow_graph.dl" 24 | 25 | .init ctadl_jadx_phase1 = CGraphRules 26 | 27 | // hybrid call graph: 28 | /* .decl CInsn_UnsuspiciousCall(insn: CInsn, fid: CFunction) */ 29 | 30 | CallEdge(insn, fid, "") :- CInsn_UnsuspiciousCall(insn, fid). 31 | 32 | CInsn_Call(insn, fid) :- CHA_ResolveCall(insn, fid), EnableClassHierarchyAnalysis(). 33 | 34 | // (1) use CHA when it has precise edges 35 | CInsn_UnsuspiciousCall(insn, id) :- 36 | hasExactlyOneCHAEdge(insn), 37 | CHA_ResolveCall(insn, id). 38 | 39 | // models 40 | //#include "analysis/jadx/models.dl" 41 | //#include "analysis/jadx/android-models.dl" 42 | 43 | // --------------------------------------------------------------------------- 44 | // track flow of alloc'd types 45 | 46 | #define FS_VirtualAssignPlan .plan 1: (2, 1) 47 | #include "graph/slice/callee.dl" 48 | 49 | .init track_alloc = SliceCallee 50 | CTADLStats("track_alloc.Reachable", n) :- n = count : { track_alloc.ReachableVertex(_, _, _, _) }. 51 | 52 | track_alloc.input_.StartVertex(v, p, ty) :- 53 | VirtualAlloc(v, p, ty, ""), 54 | EnableContextFreeObjectTracking(). 55 | 56 | // (2) flow types of allocations to resolve methods 57 | CallEdge(insn, id, "") :- 58 | track_alloc.isReachable(var, "", ty), 59 | VirtualMethodInvocation_Base(insn, var), 60 | !hasExactlyOneCHAEdge(insn), 61 | MethodInvocation_Method(insn, method), 62 | Method_SimpleName(method, simple_name), 63 | Method_Descriptor(method, descriptor), 64 | CHA_SuperMethod(ty, simple_name, descriptor, id), 65 | EnableContextFreeObjectTracking(). 66 | 67 | .decl hasExactlyOneCHAEdge(insn: CInsn) 68 | hasExactlyOneCHAEdge(stmt) :- 69 | VirtualMethodInvocation_Base(stmt, _), 70 | CTADL_HYBRID_CHA_THRESHOLD = count : { CHA_ResolveCall(stmt, _) }. 71 | 72 | #ifdef ALL_OUTPUTS 73 | //.output track_alloc.ReachableVertex(CTADL_OUTPUT_DB_IO) 74 | #endif 75 | 76 | #endif // CTADL_DISABLE_INDEX_PHASE 77 | 78 | // --------------------------------------------------------------------------- 79 | // output 80 | 81 | #include 82 | #include 83 | 84 | // suppress-warnings needs to be given exactly once at the top-level because 85 | // souffle overrides previous pragmas with the last pragma 86 | .pragma "suppress-warnings" "track_intent.tracker.input_.SanitizeAssign,ErrorMethod,track_alloc.input_.SanitizeAssign,ClassObjectToClassName,CFunction_SourceInfo,CVar_SourceInfo" 87 | -------------------------------------------------------------------------------- /docs/DevGuide.rst: -------------------------------------------------------------------------------- 1 | Development Guide 2 | ================= 3 | 4 | There are three primary schemas used in CTADL: 5 | 6 | - The CTADL IR schema, documented in 7 | `ctadl_schema.dl `__ 8 | - The data flow graph schema, documented in 9 | `graph_schema.dl `__ 10 | - The taint slice schema, documented in 11 | `taint_schema.dl `__ 12 | 13 | CTADL Import & Export Plugins 14 | ----------------------------- 15 | 16 | CTADL supports two types of plugins: import and export, which enable 17 | features in the corresponding ``ctadl import`` and ``ctadl export`` 18 | commands, respectively. All plugins must define a few attributes and a 19 | ``run`` function whose characteristics depend on the type of plugin. The 20 | plugin API is currently subject to change without notice. 21 | 22 | Import Plugins 23 | ^^^^^^^^^^^^^^ 24 | 25 | Import plugins are used to define new languages for CTADL to analyze. 26 | Import plugins must define two global attributes: ``language`` and 27 | ``version``. 28 | 29 | CTADL filters plugins based on the ``language`` attribute. The Ghidra 30 | import plugin, for example, defines the ``language = "PCODE"`` 31 | attribute, and this is what appears as the first argument to the 32 | ``ctadl import`` command when the plugin is installed. 33 | 34 | The ``run`` function must support the following positional arguments: 35 | 36 | - ``ctadl``: The ``ctadl`` module 37 | - ``args``: The raw command-line arguments from ``argparse`` 38 | - ``artifact``: A ``str`` that points at the artifact the plugin uses 39 | as input. For some plugins, e.g. JADX, this is a directory. For 40 | others, it’s a file. 41 | - ``out``: A ``str`` that denotes a path where output should be stored. 42 | Could be a file or directory depending on the plugin 43 | - Other keyword arguments that the plugin knows how to interpret. 44 | 45 | Export Plugins 46 | ^^^^^^^^^^^^^^ 47 | 48 | Export plugins are used to define external formats to which we can 49 | export CTADL index information. Export plugins must define two global 50 | attributes: ``export_formats`` and ``version``. 51 | 52 | CTADL filters plugins based on the ``export_formats`` attribute. The 53 | networkx export plugin, for example, defines the 54 | ``export_formats = ["gml"]`` attribute because it can export to the GML 55 | format. The format name appears as an option for the 56 | ``ctadl export --format`` flag when the plugin is installed. 57 | 58 | The ``run`` function must support the following positional arguments: 59 | 60 | - ``ctadl``: The ``ctadl`` module 61 | - ``args``: The raw command-line arguments from ``argparse`` 62 | - ``format``: A ``str``, the format chosen by the user 63 | - ``index``: A ``Path`` pointing at the index to export 64 | - ``out``: A ``str`` that denotes a path where output should be stored. 65 | Could be a file or directory depending on the plugin 66 | - Other keyword arguments that the plugin knows how to interpret. 67 | -------------------------------------------------------------------------------- /nix/souffle/common.nix: -------------------------------------------------------------------------------- 1 | { stdenv, lib, fetchFromGitHub, fetchgit 2 | , perl, ncurses, zlib, sqlite, libffi, libtool 3 | , mcpp, bison, flex, doxygen, graphviz 4 | , makeWrapper, openmp, which, cmake 5 | , python3 6 | , llvmPackages 7 | , src 8 | , version 9 | , macosx-version-min ? "10.14" 10 | , enableOpenMP 11 | , enable64BitDomain 12 | , enableDebug 13 | }: 14 | 15 | 16 | let 17 | toolsPath = lib.makeBinPath [ mcpp stdenv.cc ]; 18 | libsPath = lib.makeLibraryPath ([ stdenv.cc.cc ] ++ lib.optional stdenv.cc.isClang llvmPackages.libcxxabi); 19 | in 20 | stdenv.mkDerivation rec { 21 | pname = "souffle"; 22 | inherit src version; 23 | 24 | enableParallelBuilding = true; 25 | nativeBuildInputs = [ cmake bison flex mcpp makeWrapper libtool which python3.pkgs.wrapPython ]; 26 | buildInputs = [ ncurses zlib sqlite libffi ] 27 | ++ lib.optional (stdenv.cc.isClang && enableOpenMP) openmp; 28 | 29 | postPatch = '' 30 | substituteInPlace CMakeLists.txt \ 31 | --replace "''${SOUFFLE_VERSION}" "${version}" 32 | ''; 33 | 34 | # CXX gets embedded into the software, so we want to expand it to a full path 35 | # macosx-version-min used to ensure aligned allocators are available 36 | # NIX_CFLAGS_COMPILE= [ "-fno-aligned-allocation" ]; # also works 37 | preConfigure = '' 38 | ${ lib.optionalString stdenv.isDarwin "MACOSX_DEPLOYMENT_TARGET=${macosx-version-min}" } 39 | ''; 40 | 41 | cmakeFlags = [] 42 | ++ (if enableOpenMP then [ "-DSOUFFLE_USE_OPENMP=1" ] else [ "-DSOUFFLE_USE_OPENMP=0" ]) 43 | ++ lib.optionals enable64BitDomain [ "-DSOUFFLE_DOMAIN_64BIT=ON" ] 44 | ++ lib.optionals enableDebug [ "-DCMAKE_BUILD_TYPE=Debug" ]; 45 | 46 | CXXFLAGS = '' -Wno-unused-command-line-argument ${lib.optionalString stdenv.isDarwin "-mmacosx-version-min=${macosx-version-min}"}''; 47 | 48 | # to embed the C++ compiler, we also need various paths 49 | souffleCompileIncludes = "-I${ncurses.dev}/include -I${zlib.dev}/include -I${sqlite.dev}/include -I${libffi.dev}/include" 50 | + lib.optionalString (stdenv.cc.isClang && enableOpenMP) " -I${openmp}/include"; 51 | 52 | souffleCompileCxxFlags = lib.optionalString stdenv.isDarwin "-mmacosx-version-min=${macosx-version-min}"; 53 | 54 | souffleCompileLdFlags = "-L${ncurses}/lib -L${zlib}/lib -L${sqlite.out}/lib -L${libffi}/lib" 55 | + lib.optionalString (stdenv.cc.isClang && enableOpenMP) " -L${openmp}/lib" 56 | + lib.optionalString stdenv.cc.isClang " -L${llvmPackages.libcxxabi}/lib"; 57 | 58 | # Ensure debug symbols when enableDebug is passed 59 | hardeningDisable = lib.optional enableDebug "all"; 60 | dontStrip = enableDebug; 61 | 62 | # This needs to be set in the derivation in postInstall 63 | postInstallWrap = '' 64 | wrapProgram "$out/bin/souffle" --prefix PATH : "${toolsPath}" 65 | ''; 66 | 67 | outputs = [ "out" ]; 68 | 69 | meta = with lib; { 70 | description = "A translator of declarative Datalog programs into the C++ language"; 71 | homepage = "https://souffle-lang.github.io/"; 72 | platforms = platforms.unix; 73 | maintainers = with maintainers; [ thoughtpolice copumpkin wchresta ]; 74 | license = licenses.upl; 75 | }; 76 | } 77 | -------------------------------------------------------------------------------- /tests/test_sarif_paths.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import subprocess 4 | import unittest 5 | from pathlib import Path 6 | 7 | from ctadl.vis.model import DB 8 | from ctadl.vis.sarifpaths import find_paths 9 | from ctadl.vis.types import VertexTy 10 | 11 | test_path = Path(__file__).parent 12 | import_path = test_path / "import" / "compositional-ctx2" 13 | db_path = import_path / "ctadlir.db" 14 | taintfront_path = Path(__file__).parent.parent / "taint-front" 15 | 16 | 17 | class TestPaths(unittest.TestCase): 18 | @classmethod 19 | def setUpClass(cls): 20 | # Generates test input database 21 | cmd = [ 22 | "ctadl", 23 | "import", 24 | "taint-front", 25 | str(taintfront_path / "compositional-ctx2.tnt"), 26 | "-o", 27 | str(import_path), 28 | "-f", 29 | ] 30 | subprocess.check_call(cmd) 31 | try: 32 | os.environ["CTADL_DEFAULT_DIRECTORY"] = str(import_path) 33 | 34 | cmd = ["ctadl", "index", "-f"] 35 | subprocess.check_call(cmd) 36 | cmd = [ 37 | "ctadl", 38 | "query", 39 | # str(test_path / "test-query.json"), 40 | "--compute-slices", 41 | "all", 42 | ] 43 | subprocess.check_call(cmd) 44 | finally: 45 | del os.environ["CTADL_DEFAULT_DIRECTORY"] 46 | 47 | def test_simple_case(self): 48 | # First is the source and, due to summaries, connects to the sink in 49 | # one edge 50 | with DB(db_path) as conn: 51 | vertices = [VertexTy("Main/first", "")] 52 | paths = find_paths(conn, vertices) 53 | self.assertTrue( 54 | any( 55 | [VertexTy("Main/first", ""), VertexTy("Main/second", "")] == elt 56 | for elt in paths 57 | ) 58 | ) 59 | 60 | def test_forward_path(self): 61 | with DB(db_path) as conn: 62 | vertices = [VertexTy("Leaf/tmp2", "")] 63 | paths = find_paths(conn, vertices) 64 | target = [ 65 | VertexTy("Main/first", ""), 66 | VertexTy("bar/x", ""), 67 | VertexTy("bar1/x", ""), 68 | VertexTy("Leaf/x", ""), 69 | VertexTy("Leaf/tmp1", ""), 70 | VertexTy("Leaf/tmp2", ""), 71 | ] 72 | self.assertTrue(any(target == path for path in paths)) 73 | 74 | def test_backward_path(self): 75 | with DB(db_path) as conn: 76 | vertices = [VertexTy("bar1/tmp", "")] 77 | paths = find_paths(conn, vertices) 78 | target = [ 79 | VertexTy("bar1/tmp", ""), 80 | VertexTy("bar1/", ""), 81 | VertexTy("bar/tmp", ""), 82 | VertexTy("bar/", ""), 83 | VertexTy("Main/second", ""), 84 | ] 85 | self.assertTrue(any(target == path for path in paths)) 86 | 87 | 88 | if __name__ == "__main__": 89 | logging.basicConfig(filename="tests.log", filemode="a", level=logging.DEBUG) 90 | unittest.main() 91 | -------------------------------------------------------------------------------- /src/ctadl/util/diff.py: -------------------------------------------------------------------------------- 1 | import difflib 2 | import sqlite3 3 | import sys 4 | from pathlib import Path 5 | from typing import Optional 6 | 7 | from ctadl import status, warn 8 | 9 | bad_tables = ["CTADLConfig", "CTADLStats"] 10 | 11 | 12 | def print_difference( 13 | *, 14 | ca: int, 15 | cb: int, 16 | aconn: sqlite3.Connection, 17 | bconn: sqlite3.Connection, 18 | table: str, 19 | diff_table: Optional[str], 20 | diff_cols, 21 | ): 22 | rel = "<" if ca < cb else (">" if ca > cb else "=") 23 | print(table, ":", ca, rel, cb) 24 | 25 | def row_to_str(row): 26 | return "|".join(map(str, row)) + "\n" 27 | 28 | if table == diff_table: 29 | a_rows = list( 30 | sorted( 31 | row_to_str(row) 32 | for row in aconn.execute(f'select {diff_cols} from "{table}"') 33 | ) 34 | ) 35 | b_rows = list( 36 | sorted( 37 | row_to_str(row) 38 | for row in bconn.execute(f'select {diff_cols} from "{table}"') 39 | ) 40 | ) 41 | sys.stdout.writelines( 42 | difflib.unified_diff(a_rows, b_rows, fromfile="a", tofile="b") 43 | ) 44 | exit(1) 45 | 46 | 47 | def main( 48 | a: sqlite3.Connection, b: sqlite3.Connection, diff_table: Optional[str], diff_cols 49 | ): 50 | good_tables = [] 51 | exit_code = 0 52 | for (table,) in a.execute("select name from sqlite_master where type = 'view'"): 53 | if table in bad_tables or table.startswith("_"): 54 | continue 55 | other_table_exists = b.execute( 56 | "select name from sqlite_master where name = ?1", (table,) 57 | ).fetchall() 58 | if not other_table_exists: 59 | warn(f"{table} doesn't exist in b, skipping") 60 | continue 61 | ca = a.execute(f'select count(*) from "{table}"').fetchall()[0][0] 62 | cb = b.execute(f'select count(*) from "{table}"').fetchall()[0][0] 63 | if ca != cb: 64 | print_difference( 65 | ca=ca, 66 | cb=cb, 67 | table=table, 68 | diff_table=diff_table, 69 | diff_cols=diff_cols, 70 | aconn=a, 71 | bconn=b, 72 | ) 73 | else: 74 | good_tables.append(table) 75 | status(f"good tables: {good_tables}", verb=1) 76 | 77 | 78 | def diff(a: Path, b: Path, diff_table, diff_cols): 79 | print(f"a is '{a}'") 80 | print(f"b is '{b}'") 81 | with sqlite3.connect(a) as con_a: 82 | with sqlite3.connect(b) as con_b: 83 | return main(con_a, con_b, diff_table, diff_cols) 84 | 85 | 86 | if __name__ == "__main__": 87 | with sqlite3.connect(sys.argv[1]) as a: 88 | with sqlite3.connect(sys.argv[2]) as b: 89 | table = None 90 | cols = "*" 91 | try: 92 | table = sys.argv[3] 93 | except IndexError: 94 | pass 95 | try: 96 | cols = sys.argv[4] 97 | except IndexError: 98 | pass 99 | main(a, b, table, cols) 100 | -------------------------------------------------------------------------------- /ctadl.nix: -------------------------------------------------------------------------------- 1 | { 2 | ctadl, 3 | lib, 4 | callPackage, 5 | stdenv, 6 | python3, 7 | souffle, 8 | mcpp, 9 | makeWrapper, 10 | withPythonWheel, 11 | llvmPackages, 12 | jdk ? null, 13 | enableRich ? true, 14 | enableJdk ? false, 15 | enableExternalIndexers ? true, 16 | ... 17 | }: let 18 | withPlugins = plugins: 19 | python3.pkgs.buildPythonPackage { 20 | pname = "${ctadl.pname}-with-plugins"; 21 | inherit (ctadl) version; 22 | format = "other"; 23 | 24 | dontUnpack = true; 25 | dontBuild = true; 26 | doCheck = false; 27 | 28 | propagatedBuildInputs = 29 | plugins 30 | ++ ctadl.propagatedBuildInputs; 31 | pluginWrapArgs = 32 | lib.lists.concatMap (plugin: plugin.makeWrapperArgs or []) plugins; 33 | 34 | installPhase = '' 35 | runHook preInstall 36 | 37 | makeWrapperArgs+=( 38 | "''${pluginWrapArgs[@]}" 39 | --prefix PYTHONPATH ':' "${ctadl}/${python3.sitePackages}:$PYTHONPATH" 40 | ) 41 | 42 | mkdir -p $out 43 | for file in ctadl; do 44 | makeWrapper "${ctadl}/bin/$file" "$out/bin/$file" \ 45 | ''${makeWrapperArgs[@]} 46 | done 47 | ln -sfv ${ctadl}/lib $out/lib 48 | 49 | runHook postInstall 50 | ''; 51 | 52 | passthru = 53 | ctadl.passthru 54 | // { 55 | withPlugins = morePlugins: withPlugins (morePlugins ++ plugins); 56 | }; 57 | 58 | meta.mainProgram = "ctadl"; 59 | }; 60 | pkg = python3.pkgs.buildPythonPackage rec { 61 | pname = "ctadl"; 62 | version = lib.strings.removeSuffix "\n" (builtins.readFile ./src/ctadl/VERSION); 63 | 64 | src = with builtins; 65 | builtins.path { 66 | path = ./.; 67 | name = "ctadl-${version}"; 68 | filter = path: type: let 69 | ignoreFiles = ["tests/bin/test"]; 70 | ignoreDirs = ["nix" "refimpl" "lib" "talks" "jars" "utils" "benchtest" "release_scripts"]; 71 | in 72 | if 73 | (lib.lists.any (p: (builtins.match (".*" + p) path != null)) ignoreFiles) 74 | || (lib.lists.any (p: p == (baseNameOf path)) ignoreDirs) 75 | then false 76 | else true; 77 | }; 78 | 79 | # disable tests, won't work without index which can't be generated without building 80 | doCheck = false; 81 | 82 | nativeBuildInputs = 83 | [makeWrapper souffle] 84 | ++ (with python3.pkgs; [setuptools]); 85 | propagatedBuildInputs = with python3.pkgs; 86 | [jsonschema psutil json5 mcpp] 87 | ++ lib.optionals enableRich [rich]; 88 | 89 | passthru = { 90 | inherit jdk withPlugins python3; 91 | }; 92 | 93 | prePatch = '' 94 | patchShebangs src/ctadl/souffle-logic/pcode 95 | ''; 96 | 97 | makeWrapperArgs = 98 | [ 99 | ''--prefix PATH ':' "${lib.makeBinPath [souffle]}"'' 100 | ] 101 | ++ lib.optionals stdenv.cc.isClang [ 102 | ''--set LDFLAGS "-L${llvmPackages.libcxxabi}/lib"'' 103 | ] 104 | ++ lib.optionals enableJdk [ 105 | ''--set JAVA_HOME "${jdk.home}"'' 106 | ]; 107 | 108 | meta.mainProgram = "ctadl"; 109 | }; 110 | in 111 | pkg.overrideAttrs withPythonWheel 112 | -------------------------------------------------------------------------------- /taint-front/summarytest.ml: -------------------------------------------------------------------------------- 1 | open! Taintlang 2 | open! Syntax 3 | 4 | module Seq = struct 5 | include Seq 6 | 7 | let mapi f s = 8 | Seq.unfold (fun (i, s) -> 9 | let open Seq in 10 | match s () with 11 | | Nil -> None 12 | | Cons (x, rest) -> begin 13 | let elt = f (i, x) in 14 | let next = (i+1, rest) in 15 | Some (elt, next) 16 | end 17 | ) (0, s) 18 | end 19 | 20 | let f ~num_assigns ~num_calls ?(distinct=false) () = 21 | let _ = ignore (distinct) in 22 | let gentup seed ~id = 23 | Seq.unfold (fun n -> 24 | match n with 25 | | 0 -> None 26 | | _ -> Some ( 27 | let elt = ((Format.asprintf "%s%d" id n, []), (Format.asprintf "%s%d" id (n+1), [])) in 28 | let next = n-1 in 29 | (elt, next) 30 | ) 31 | ) seed 32 | in 33 | let _genargs seed ~id = 34 | Seq.unfold (fun n -> 35 | match n with 36 | | 0 -> None 37 | | _ -> Some ( 38 | let elt = (Format.asprintf "%s%d" id n, Format.asprintf "%s%d" id (n+1)) in 39 | let next = n-1 in 40 | (elt, next) 41 | ) 42 | ) seed 43 | in 44 | let genstmt n = 45 | Seq.map (fun (x,y) -> Assign (x,y)) (gentup ~id:"x" n) 46 | in 47 | let gencall n = 48 | Seq.map (fun (x,y) -> 49 | (Call (None, ("f1", []), [x; y])) 50 | ) (gentup ~id:"a" n) 51 | in 52 | let gencalls n = 53 | Seq.mapi (fun (n, (x,y)) -> 54 | (Call (None, (Format.asprintf "f%d" (n+1), []), [x; y])) 55 | ) (gentup ~id:"a" n) 56 | in 57 | let genfuncs count = 58 | Seq.unfold (fun n -> 59 | if n = 0 then None 60 | else Some ( 61 | let elt = Fn { 62 | name= Format.asprintf "f%d" n; 63 | formals= ["x1"; Format.asprintf "x%d" (num_assigns+1)]; 64 | body= List.of_seq @@ genstmt num_assigns; 65 | } 66 | in 67 | let next = n-1 in 68 | (elt, next) 69 | ) 70 | ) count 71 | in 72 | (List.of_seq @@ genfuncs (if distinct then num_calls else 1)) @ 73 | [ 74 | Fn { 75 | name= "main"; 76 | formals= []; 77 | body= List.concat [ 78 | [Call (Some (Format.asprintf "a%d" (num_calls+1), []), ("source", []), [("Network", [])])]; 79 | List.of_seq @@ (if not distinct then gencall else gencalls) num_calls; 80 | [Call (None, ("sink", []), [("a1", []); ("Network", [])])]; 81 | ]; 82 | 83 | }; 84 | ] 85 | 86 | let _ = 87 | let num_assigns = ref 1 in 88 | let num_calls = ref 1 in 89 | let distinct = ref false in 90 | let print = ref false in 91 | Arg.parse [ 92 | "--num-assigns", Arg.Set_int num_assigns, " number of assignments in f"; 93 | "--num-calls", Arg.Set_int num_calls, " number of calls to f"; 94 | "--distinct", Arg.Set distinct, " false=all calls to f, else distinct calls"; 95 | "--print", Arg.Set print, " print ast"; 96 | ] (fun _ -> ()) 97 | "Generate function summarization benchmark"; 98 | let ast = 99 | let num_assigns = !num_assigns in 100 | let num_calls = !num_calls in 101 | let distinct = !distinct in 102 | f ~num_assigns ~num_calls ~distinct () 103 | in 104 | let out_dir = ref "facts" in 105 | if !print then Format.printf "%a" pp ast; 106 | GenFacts.process !out_dir ast 107 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/jadx/jadx-fact-decls.dl: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #define JADX_THIS_ARG_INDEX (0) 3 | #define JADX_RET_ARG_INDEX (-1) 4 | #define JADX_UNDEFINED_SOURCE_LINE (-1) 5 | #define JADX_STRING_TYPE "Ljava/lang/String;" 6 | #define JADX_TRUE "true" 7 | #define JADX_FALSE "false" 8 | 9 | .decl _CTADLLanguage(language:symbol) 10 | .decl _ActualParam(stmt:symbol, argN:number, arg:symbol) 11 | .decl _FormalParam(method:symbol, argN:number, formal:symbol) 12 | .decl _ThisParam(method:symbol, this:symbol) 13 | .decl _ReturnStmt(retStmt:symbol, retVar:symbol) 14 | .decl _Method(method:symbol, simpleName:symbol, declClass:symbol, returnType:symbol, descriptor:symbol, arity:number) 15 | .decl _MethodInvocation(stmt:symbol, callee:symbol, isResolved:symbol, declClass:symbol, simpleName:symbol, descriptor:symbol, invokeType:symbol) 16 | .decl _MethodInvocationReturn(stmt:symbol, retVar:symbol) 17 | .decl _ExternalMethod(externalMethod:symbol) 18 | .decl _Move(stmt:symbol, to:symbol, from:symbol) 19 | .decl _StmtInMethod(stmt:symbol, index:number, method:symbol) 20 | .decl _StmtSourceLine(stmt:symbol, file:symbol, line:number, column:number, offset:number) 21 | .decl _ClassFileName(class:symbol, fileName:symbol) 22 | .decl _VarInMethod(localVar:symbol, method:symbol) 23 | .decl _VarHasType(var:symbol, type:symbol) 24 | .decl _VarHasInternalName(var:symbol, name:symbol) 25 | .decl _VarHasName(var:symbol, name:symbol) 26 | .decl _StaticGet(stmt:symbol, class:symbol, field:symbol, fieldType:symbol, toVar:symbol) 27 | .decl _StaticPut(stmt:symbol, class:symbol, field:symbol, fieldType:symbol, fromVar:symbol) 28 | .decl _StaticExternalField(class:symbol, field:symbol, type:symbol) 29 | .decl _InstanceField(class:symbol, field:symbol, type:symbol) 30 | .decl _IPut(stmt:symbol, class:symbol, field:symbol, toVar:symbol, fromVar:symbol) 31 | .decl _IGet(stmt:symbol, class:symbol, field:symbol, toVar:symbol, fromVar:symbol) 32 | .decl _StmtInBasicblock(stmt:symbol, basic_block:symbol) 33 | .decl _IfStmt(stmt:symbol, ifop:symbol, arg1:symbol, arg2:symbol, if_block:symbol, else_block:symbol) 34 | .decl _TernaryStmt(stmt:symbol, ternary_cond:symbol, if_var:symbol, else_var:symbol) 35 | .decl _TernaryArg(stmt:symbol, ternary_arg:symbol) 36 | .decl _SwitchStmt(stmt:symbol, arg:symbol) 37 | .decl _SwitchTarget(stmt:symbol, key:number, target_block:symbol) 38 | .decl _TypeInstance(stmt:symbol, var:symbol, type:symbol) 39 | .decl _AGet(stmt:symbol, index:number, toVar:symbol, fromVar:symbol) 40 | .decl _APut(stmt:symbol, index:number, toVar:symbol, fromVar:symbol) 41 | .decl _PhiAssign(phi_stmt:symbol, ssa_from_var:symbol, src_block:symbol) 42 | .decl _VarIsConst(constVar:symbol, constValue:symbol) 43 | .decl _FieldIsFinal(class:symbol, field:symbol) 44 | .decl _FieldConstInit(class:symbol, field:symbol, constVar:symbol) 45 | .decl _DirectSuperclass(superclassType:symbol, subclassType:symbol) 46 | .decl _SuperInterface(superinterfaceType:symbol, subclassType:symbol) 47 | .decl _ClassHasName(class:symbol, className:symbol) 48 | .decl _ClassDefinedIn(class:symbol, fileName:symbol, fileLine:number) 49 | .decl _InterfaceType(interfaceType:symbol) 50 | .decl _MethodImplemented(class:symbol, mthShortName:symbol, paramList:symbol, mth:symbol) 51 | .decl _ManifestRoot(nodeId:symbol) 52 | .decl _ManifestNode(nodeId:symbol, nodeName:symbol) 53 | .decl _ManifestNodeChild(nodeId:symbol, childId:symbol) 54 | .decl _ManifestNodeAttr(nodeId:symbol, key:symbol, value:symbol) 55 | .decl _TopParentClass(class:symbol, topParentClass:symbol) 56 | .decl _InsnBytecodeLocation(insn:symbol, artifactUri:symbol, artifactUriBaseId:symbol, byteOffset:number, byteLength:number) 57 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/graph/fieldprop.dl: -------------------------------------------------------------------------------- 1 | // - means path is empty 2 | // * means path is non-empty 3 | // p3 is always nonempty or else the rule is REDUNDANT 4 | // p1 p2 p3 5 | // A - - - REDUNDANT 6 | // B - - * 7 | // C - * - REDUNDANT 8 | // D - * * 9 | // E * - - REDUNDANT 10 | // F * - * 11 | // G * * - REDUNDANT 12 | // H * * * 13 | 14 | // forces isComputedAccessPath and propagation rules to be mutually recursive 15 | // but shouldn't actually add any tuple. 16 | //isComputedAccessPath(cat(ap, ".NO")) :- 17 | // VirtualAssign(_, _, ap, _, _, _, ""), 18 | // contains("*********", ap). 19 | 20 | // Forward 21 | 22 | VirtualAssign(s, v1, p13, v2, p23, "fp", ctx) :- //B 23 | Vertex(v2, p23), 24 | VirtualAssign(s, v1, p1, v2, p2, _ty, ctx), 25 | p1 = "", 26 | p2 = "", 27 | p13 = p23, 28 | !CVar_isGlobal(v1), 29 | !CVar_isGlobal(v2), 30 | (v1 != v2; p13 != p23). 31 | .plan 1: (2, 1) 32 | VirtualAssign(s, v1, p13, v2, p23, "fp", ctx) :- //D 33 | Vertex(v2, p23), 34 | VirtualAssign(s, v1, p1, v2, p2, _ty, ctx), 35 | p1 = "", 36 | p2 != "", 37 | ComputedAccessPathNonEmptySplit(p23, p2, p3), 38 | p13 = p3, 39 | isComputedAccessPath(p13), 40 | !CVar_isGlobal(v1), 41 | !CVar_isGlobal(v2), 42 | (v1 != v2; p13 != p23). 43 | .plan 1: (2, 1, 3, 4), 2: (3, 1, 2, 4), 3: (4, 3, 1, 2) 44 | VirtualAssign(s, v1, p13, v2, p23, "fp", ctx) :- //F 45 | Vertex(v2, p23), 46 | VirtualAssign(s, v1, p1, v2, p2, _ty, ctx), 47 | p1 != "", 48 | p2 = "", 49 | p23 = p3, 50 | ComputedAccessPathNonEmptySplit(p13, p1, p3), 51 | !CVar_isGlobal(v1), 52 | !CVar_isGlobal(v2), 53 | (v1 != v2; p13 != p23). 54 | .plan 1: (2, 1, 3), 2: (3, 2, 1) 55 | VirtualAssign(s, v1, p13, v2, p23, "fp", ctx) :- //H 56 | Vertex(v2, p23), 57 | ComputedAccessPathNonEmptySplit(p23, p2, p3), 58 | VirtualAssign(s, v1, p1, v2, p2, _ty, ctx), 59 | ComputedAccessPathNonEmptySplit(p13, p1, p3), 60 | !CVar_isGlobal(v1), 61 | !CVar_isGlobal(v2), 62 | (v1 != v2; p13 != p23). 63 | .plan 1: (2, 3, 1, 4), 2: (3, 1, 2, 4), 3: (4, 3, 2, 1) 64 | 65 | 66 | 67 | // Backward 68 | 69 | VirtualAssign(s, v2, p23, v1, p13, "fp", ctx) :- //B 70 | Vertex(v2, p23), 71 | VirtualAssign(s, v2, p2, v1, p1, _ty, ctx), 72 | p1 = "", 73 | p2 = "", 74 | p13 = p23, 75 | !CVar_isGlobal(v1), 76 | !CVar_isGlobal(v2), 77 | (v1 != v2; p13 != p23). 78 | .plan 1: (2, 1) 79 | VirtualAssign(s, v2, p23, v1, p13, "fp", ctx) :- //D 80 | Vertex(v2, p23), 81 | VirtualAssign(s, v2, p2, v1, p1, _ty, ctx), 82 | p1 = "", 83 | ComputedAccessPathNonEmptySplit(p23, p2, p3), 84 | p13 = p3, 85 | isComputedAccessPath(p13), 86 | !CVar_isGlobal(v1), 87 | !CVar_isGlobal(v2), 88 | (v1 != v2; p13 != p23). 89 | .plan 1: (2, 1, 3, 4), 2: (3, 1, 2, 4), 3: (4, 3, 1, 2) 90 | VirtualAssign(s, v2, p23, v1, p13, "fp", ctx) :- //F 91 | Vertex(v2, p23), 92 | VirtualAssign(s, v2, p2, v1, p1, _ty, ctx), 93 | p2 = "", 94 | p23 = p3, 95 | ComputedAccessPathNonEmptySplit(p13, p1, p3), 96 | !CVar_isGlobal(v1), 97 | !CVar_isGlobal(v2), 98 | (v1 != v2; p13 != p23). 99 | .plan 1: (2, 1, 3), 2: (3, 2, 1) 100 | VirtualAssign(s, v2, p23, v1, p13, "fp", ctx) :- //H 101 | Vertex(v2, p23), 102 | ComputedAccessPathNonEmptySplit(p23, p2, p3), 103 | VirtualAssign(s, v2, p2, v1, p1, _ty, ctx), 104 | ComputedAccessPathNonEmptySplit(p13, p1, p3), 105 | !CVar_isGlobal(v1), 106 | !CVar_isGlobal(v2), 107 | (v1 != v2; p13 != p23). 108 | .plan 1: (2, 1, 3, 4), 2: (3, 2, 1, 4), 3: (4, 3, 2, 1) 109 | -------------------------------------------------------------------------------- /utils/souffle_windows_build.ps1: -------------------------------------------------------------------------------- 1 | # install visual studio with C++ desktop build stuff so you have the mscv compiler 2 | # if needed, apply license key with help -> register visual studio 3 | # install vcpkg; make sure to do right click, properties, click unblock and apply in scripts/bootstrap.ps1 and anything else this is required for 4 | # > git clone https://github.com/microsoft/vcpkg.git && cd vcpkg && bootstrap-vcpkg.bat 5 | # then, put vcpkg dir somewhere and add it to your path (vcpkg.exe, built from above, needs to know where the vcpkg dir is, so you can't just put it by itself in System32 for example) 6 | # run this in powershell to download chocolatey 7 | # > [System.Net.ServicePointManager]::SecurityProtocol = [System.Net.ServicePointManager]::SecurityProtocol -bor 3072; iex ((New-Object System.Net.WebClient).DownloadString('https://community.chocolatey.org/install.ps1')) 8 | # install souffle binary dependencies; choco-packages.config is in the root of the souffle source dir 9 | # > choco install choco-packages.config -y --no-progress --installargs 'ADD_CMAKE_TO_PATH=""System""' 10 | # then, with the above done, run the following in the souffle source directory 11 | # > vcpkg new --application 12 | # > vcpkg add port sqlite3 zlib libffi 13 | # then, run this script in that directory 14 | $ErrorActionPreference = "Stop" 15 | echo "Make sure you are running this in the souffle source dir and that you have followed the setup instructions in the comments at the top of this script first" 16 | echo "Also, make sure to set the VS_VERSION variable (commented out below as an example)" 17 | # Help -> About Microsoft Visual Studio 18 | #$VS_VERSION = "Visual Studio 17 2022" 19 | if (-not (Test-Path variable:VS_VERSION)) { 20 | echo 'Set the VS_VERSION environment variable to your visual studio version (SET VS_VERSION=... in bat or $VS_VERSION = "" in ps)' 21 | Exit 22 | } 23 | 24 | # build with cmake (under powershell, in souffle dir; Visual Studio 16 2019 32-bit and Visual Studio 17 2022 64-bit have both worked) 25 | mkdir build -ErrorAction SilentlyContinue 26 | $env:ChocolateyInstall = Convert-Path "$((Get-Command choco).Path)\..\.." 27 | Import-Module "$env:ChocolateyInstall\helpers\chocolateyProfile.psm1" 28 | refreshenv 29 | # this gets it from the exe in your path, but you can also just set VCPKG_ROOT manually to your vcpkg source directory 30 | $VCPKG_ROOT = (Get-Item (gcm vcpkg).Path).DirectoryName 31 | # unblock vcpkg\scripts\buildsystems\msbuild\applocal.ps1 first (see above) 32 | # I got errors with cmake finding gzip.exe, choco install --force gzip to force reinstall it worked (it downloaded files but didn't run the install script for some reason) 33 | # -DCMAKE_BUILD_TYPE=Release 34 | cmake -S . -B build -G "${VS_VERSION}" -A x64 "-DCMAKE_TOOLCHAIN_FILE=${VCPKG_ROOT}/scripts/buildsystems/vcpkg.cmake" -DCMAKE_BUILD_TYPE=Debug -DCMAKE_CXX_FLAGS=/bigobj -DSOUFFLE_DOMAIN_64BIT=ON -DCMAKE_FIND_LIBRARY_PREFIXES=";lib" -DCMAKE_FIND_LIBRARY_SUFFIXES=".lib;.dll" -DSOUFFLE_USE_CURSES=OFF -DSOUFFLE_USE_ZLIB=ON -DCMAKE_FIND_DEBUG_MODE=FALSE -DSOUFFLE_BASH_COMPLETION=OFF 35 | $nthreads = (Get-CimInstance Win32_ComputerSystem).NumberOfLogicalProcessors 36 | cmake --build build --config Release -j ${nthreads} 37 | # now, find the .exe in build\src\souffle.exe 38 | 39 | # run tests (in souffle source dir build from above); replace visual studio path with your path 40 | # Visual Studio must be in the environment because cl.exe is required for compiled Souffle. 41 | # these will fail unless you provide mcpp.exe or another supported C preprocessor in your path 42 | # > & "$env:ProgramFiles\Microsoft Visual Studio\2022\Professional\VC\Auxiliary\Build\vcvars64.bat" 43 | # > ctest --output-on-failure --build-config Release --progress -j4 -L interpreted 44 | # > ctest --output-on-failure --build-config Release --progress -j2 -LE interpreted -------------------------------------------------------------------------------- /indexers.nix: -------------------------------------------------------------------------------- 1 | { 2 | ctadl, 3 | lib, 4 | callPackage, 5 | stdenv, 6 | souffle, 7 | ... 8 | }: let 9 | libfunctorsPkg = {souffle}: let 10 | shlibFlags = 11 | if stdenv.isDarwin 12 | then ''-dynamiclib -install_name @executable_path/libfunctors.dylib'' 13 | else ''-shared -Wl,-soname,'$ORIGIN'/libfunctors.so''; 14 | shlibExt = 15 | if stdenv.isDarwin 16 | then "dylib" 17 | else "so"; 18 | in 19 | stdenv.mkDerivation { 20 | pname = "ctadl-libfunctors"; 21 | version = "${ctadl.version}"; 22 | nativeBuildInputs = [souffle]; 23 | dontUnpack = true; 24 | buildPhase = '' 25 | set -x 26 | c++ -O2 --std=c++17 -I ${souffle}/include -I ${souffle}/include/souffle ${./src/ctadl/souffle-logic/functors.cpp} -c -fPIC -o functors.o 27 | c++ ${shlibFlags} -o libfunctors.${shlibExt} functors.o 28 | set +x 29 | ''; 30 | 31 | installPhase = '' 32 | mkdir $out 33 | cp libfunctors.${shlibExt} $out/ 34 | ''; 35 | }; 36 | libfunctors = callPackage libfunctorsPkg {}; 37 | # This package is to avoid datalog recompilation unless it's necessary. 38 | souffleCompileDatalog = { 39 | ctadl, 40 | python3, 41 | mcpp, 42 | datalogSpec, 43 | lib, 44 | souffle, 45 | libfunctors, 46 | souffleMacros ? [], 47 | }: let 48 | defaultMacros = [ 49 | "CTADL_VERSION=${ctadl.version}" 50 | "CTADL_OUTPUT_DB=ctadlir.db" 51 | "CTADL_INPUT_DB=ctadlir.db" 52 | ]; 53 | #finalMacros = lib.strings.concatStringsSep " " (defaultMacros ++ (datalogSpec.macros or [])); 54 | finalMacros = lib.strings.concatStringsSep " " (map (m: "-D" + m) (defaultMacros ++ (datalogSpec.macros or []))); 55 | in 56 | stdenv.mkDerivation { 57 | pname = "ctadl-analysis-${datalogSpec.language}"; 58 | version = "${ctadl.version}"; 59 | src = with builtins; 60 | builtins.path { 61 | path = ./src/ctadl/souffle-logic; 62 | name = "${datalogSpec.language}-${ctadl.version}"; 63 | filter = path: type: type != "regular" || lib.strings.hasSuffix ".dl" path; 64 | }; 65 | nativeBuildInputs = [souffle libfunctors python3 ctadl]; 66 | 67 | buildPhase = '' 68 | ctadl dump-analysis --language ${datalogSpec.language} index -o analysis.dl 69 | echo $(python3 ${./utils/hashdl.py} analysis.dl) 70 | souffle -I. -L${libfunctors} -j8 analysis.dl -o "$(python3 ${./utils/hashdl.py} analysis.dl)" 71 | ctadl dump-analysis --language ${datalogSpec.language} query -o taintquery.dl 72 | echo $(python3 ${./utils/hashdl.py} taintquery.dl) 73 | souffle -I. -L${libfunctors} -j8 taintquery.dl -o "$(python3 ${./utils/hashdl.py} taintquery.dl)" 74 | ''; 75 | 76 | installPhase = '' 77 | mkdir $out 78 | cp "$(python3 ${./utils/hashdl.py} analysis.dl)" $out 79 | cp "$(python3 ${./utils/hashdl.py} taintquery.dl)" $out 80 | ''; 81 | }; 82 | dlSpecs = [ 83 | {language = "jadx";} 84 | {language = "taint-front";} 85 | {language = "pcode";} 86 | ]; 87 | indexers = map (datalogSpec: callPackage souffleCompileDatalog {inherit libfunctors datalogSpec;}) dlSpecs; 88 | indexersPkg = stdenv.mkDerivation { 89 | pname = "ctadl-analysis"; 90 | version = "${ctadl.version}"; 91 | dontUnpack = true; 92 | dontBuild = true; 93 | installPhase = '' 94 | mkdir -p $out/ctadl/analysis/${ctadl.version} 95 | # Setuptools wants to build things in build/lib 96 | for d in ${lib.strings.concatStringsSep " " indexers}; do 97 | cp -f $d/* $out/ctadl/analysis/${ctadl.version} 98 | done 99 | cp -f ${libfunctors}/* $out/ctadl/analysis/${ctadl.version} 100 | ''; 101 | }; 102 | in 103 | indexersPkg 104 | -------------------------------------------------------------------------------- /src/ctadl/souffle-logic/graph/slice/callee.dl: -------------------------------------------------------------------------------- 1 | // This forward slice is really just half a slice -- it's the part that follows 2 | // "call" edges, i.e., forward from callers to callees. It's intended to be 3 | // used whenever some other rules have already computed summaries and we just 4 | // want to push the summaries forward in execution. 5 | 6 | // --------------------------------------------------------------------------- 7 | // macros for overriding 8 | 9 | // by default, we assume that VirtualAssign is recursive. So the default plan 10 | // will prioritize it for deltas. 11 | #ifndef FS_VirtualAssignPlan 12 | #define FS_VirtualAssignPlan .plan 1: (2, 1) 13 | #endif 14 | 15 | #ifndef FS_ICPlan 16 | #define FS_ICPlan .plan 1: (4, 2, 1, 3) 17 | #endif 18 | 19 | #ifndef FS_CallFlowEdgePlan 20 | #define FS_CallFlowEdgePlan .plan 1: (2, 1, 3, 4) 21 | #endif 22 | 23 | .comp SliceCallee { 24 | 25 | // --------------------------------------------------------------------------- 26 | // inputs 27 | 28 | .comp Input { 29 | // computes flows from root vertex 30 | .decl StartVertex(v: CVar, p: CAccessPath, label: SliceLabelType) inline 31 | } 32 | 33 | .init input_ = Input 34 | 35 | // --------------------------------------------------------------------------- 36 | // outputs 37 | 38 | .decl isReachable(v: CVar, p: CAccessPath, label: SliceLabelType) inline 39 | isReachable(v, p, label) :- ReachableVertex(_, v, p, label). 40 | 41 | .decl ReachableVertex( 42 | id: number, 43 | v1: CVar, p1: CAccessPath, label: SliceLabelType 44 | ) 45 | choice-domain (v1, p1, label) 46 | 47 | #define ReachableVertexId(v, p, label) \ 48 | ord(cat(v, ":", p, ":", label)) 49 | 50 | // start vertex is reachable 51 | Vertex(v, p), 52 | ReachableVertex(ReachableVertexId(v, p, label), v, p, label) :- 53 | input_.StartVertex(v, p, label). 54 | 55 | // reachable across intraprocedural edge 56 | ReachableVertex(vertex_to, v1, p1, tag) :- 57 | ReachableVertex(_, v2, p2, tag), 58 | VirtualAssign(_, v1, p1, v2, p2, _, _), 59 | vertex_to = ReachableVertexId(v1, p1, tag). 60 | FS_VirtualAssignPlan 61 | 62 | // reachable across call 63 | ReachableVertex(vertex_to, v_to, p_to, label) :- 64 | ReachableVertex(_, v_from, p_from, label), 65 | CallFlowEdge(_, v_to, p_to, v_from, p_from), 66 | vertex_to = ReachableVertexId(v_to, p_to, label). 67 | .plan 1: (2, 1) 68 | 69 | // --------------------------------------------------------------------------- 70 | // support 71 | 72 | // an edge between the vertices of caller and callee, created because at least 73 | // one vertex is tainted 74 | .decl CallFlowEdge( 75 | insn: CInsn, v_callee: CVar, p_callee: CAccessPath, v_caller: CVar, p_caller: CAccessPath 76 | ) 77 | // call arg n. ++

tainted, potential call flow 78 | .decl IC(call: CInsn, n: number, p: CAccessPath) 79 | 80 | // call is tainted because caller actual is tainted 81 | // the suffix p2 of the n'th actual is tainted 82 | IC(call, n, p2) :- 83 | ReachableVertex(_, v, p12, _label), 84 | CCall_ActualParam(call, n, v, p1), 85 | MatchComputedPrefix(p12, p1, p2), 86 | CallEdge(call, _id, ""). 87 | FS_ICPlan 88 | 89 | // propagate vertex to callee because this increases precision for tainted things. 90 | Vertex(v_callee, p2), 91 | // call flow edge enabled by reachable(actual_param) 92 | // this concatenation is safe because the precondition of IC is that p2 is the tainted suffix of p1 93 | CallFlowEdge(call, v_callee, p2, v_caller, cat(p1, p2)) :- 94 | // the path p12 of the n'th actual is tainted 95 | IC(call, n, p2), 96 | CCall_ActualParam(call, n, v_caller, p1), 97 | CallEdge(call, id, ""), 98 | CFunction_FormalParam(id, n, v_callee). 99 | FS_CallFlowEdgePlan 100 | 101 | #ifdef ALL_OUTPUTS 102 | .output IC(CTADL_OUTPUT_DB_IO) 103 | .output CallFlowEdge(CTADL_OUTPUT_DB_IO) 104 | #endif 105 | 106 | } 107 | -------------------------------------------------------------------------------- /docs/usage.rst: -------------------------------------------------------------------------------- 1 | Usage 2 | ===== 3 | 4 | Import the SUT 5 | ---------------------------- 6 | 7 | A system under test (SUT) must be imported to be used with CTADL. 8 | The general form of the command is: 9 | 10 | .. code-block:: sh 11 | 12 | ctadl import -o 13 | 14 | ``ctadl import --help`` lists, among other things, the languages 15 | your installation supports. 16 | The language is the language of the SUT. 17 | Artifacts are specific to the language, as you'll see below. 18 | Importing creates a directory, ```` with a variety of results. 19 | 20 | - The ``facts`` subdir represents the entire native program in a TSV 21 | (tab-separated values) formatted, suitable for input to CTADL. 22 | This format is typically referred to as Datalog "facts." 23 | - Other subdirs, such as ``sources``, contain decompiled output 24 | 25 | Analyze Android APKs and Java bytecode 26 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 27 | 28 | To import ``myapp.apk``, you’d execute: 29 | 30 | .. code-block:: sh 31 | 32 | ctadl import jadx myapp.apk -o 33 | 34 | This creates an ```` directory with everything needed to run 35 | CTADL for that ``myapp.apk``. It includes decompiled sources (in the 36 | ``sources`` subdir). 37 | 38 | Ghidra PCODE 39 | ^^^^^^^^^^^^ 40 | 41 | To decompile and import ``/usr/bin/ls``: 42 | 43 | .. code-block:: sh 44 | 45 | ctadl import pcode /usr/bin/ls 46 | 47 | .. note:: 48 | 49 | Importing a binary through Ghidra requires that Ghidra is 50 | installed and that the ``GHIDRA_HOME`` environment variable is 51 | set properly, typically to ``GHIDRA/lib/ghidra`` where GHIDRA 52 | is the place where Ghidra was extracted. 53 | 54 | Index the SUT 55 | ---------------- 56 | 57 | Indexing runs our compositional data flow analysis over the entire 58 | SUT. 59 | 60 | Run the CTADL indexer with: 61 | 62 | .. code-block:: sh 63 | 64 | ctadl [--directory ] index 65 | 66 | By default, it looks for the import in the current directory, but you 67 | can provide a path, too. The indexing process autodetects the import 68 | language. 69 | 70 | First, CTADL generates an ``index.dl`` containing the Datalog code for 71 | the indexer. CTADL then checks whether it’s compiled an indexer for this 72 | language before. If not, it calls out to Souffle to compile the indexer, 73 | then runs it. 74 | 75 | Next, this command creates an index, a sqlite database file 76 | ``ctadlir.db``. The index contains a data flow graph, a call graph, and 77 | other analysis artifacts. The filename is unfortunately *not* 78 | configurable due to the limitations of the Souffle Datalog engine’s 79 | compiler. To optimize indexing, ensure that the index is not being 80 | written to over the network. You can pass ``-j`` to set the number of 81 | cores to use. I’d recommend using as many as you can. 82 | 83 | Indexing can take some time and unfortunately there’s no good way to 84 | measure its progress. We print a live view of resources consumed, 85 | including load average and RAM consumption (if ``psutil`` is installed). 86 | 87 | Query the SUT: Run Taint Analysis 88 | -------------------------------------------- 89 | 90 | Run a CTADL query with the command: 91 | 92 | .. code-block:: sh 93 | 94 | $ ctadl query [models.json] 95 | 96 | CTADL reads the index from ``ctadlir.db`` and performs taint analysis. 97 | It creates a ``query.dl`` file containing the complete Datalog code for 98 | the query. It prints a summary of the paths, sources, sinks, and taint 99 | labels found. CTADL outputs the query results into ``ctadlir.db``. You 100 | can skip the query analysis with ``--skip`` if it’s already cached in 101 | the index. 102 | 103 | Without a ``models.json`` argument, CTADL chooses a default query. The 104 | default query uses a pre-selected, language-specific set of interesting 105 | sources and sinks. 106 | -------------------------------------------------------------------------------- /tests/custom_index.dl: -------------------------------------------------------------------------------- 1 | 2 | // alternative ways to designate source vertices 3 | 4 | // taint originates in any byref formal of the function 5 | .decl TaintSourceMethod(tag: TaintLabelType, method: CFunction) 6 | // taint originates in parameter index of the function 7 | .decl TaintSourceMethodArg(tag: TaintLabelType, index: number, method: CFunction) 8 | // taint originates at the call statement 9 | .decl TaintSourceCallArg(tag: TaintLabelType, index: number, call: CInsn) 10 | 11 | 12 | // alternative ways to designated sink vertices 13 | 14 | // report a leak if tainted data reaches any argument of the function 15 | .decl LeakingSinkMethod(tag: TaintLabelType, method: CFunction) 16 | // report a leak if tainted data reaches the indexed argument of the function 17 | .decl LeakingSinkMethodArg(tag: TaintLabelType, index: number, method: CFunction) 18 | // report a leak if tainted data reaches the indexed argument of the call 19 | .decl LeakingSinkCallArg(tag: TaintLabelType, index: number, call: CInsn) 20 | 21 | // if n2 is tainted, taint n1 at the call site 22 | .decl TaintTransferCallArg(stmt: CInsn, n1: number, n2: number) 23 | .decl TaintTransferCallArgs(call1: CInsn, n1: number, call2: CInsn, n2: number) 24 | 25 | // alternative ways to designate sanitizer edges 26 | .decl TaintSanitizeFunctionArg(tag: TaintLabelType, function: CFunction, input_index: number, output_index: number) 27 | 28 | // --------------------------------------------------------------------------- 29 | // source rules 30 | 31 | TaintSourceMethodArg(tag, n, mid) :- 32 | TaintSourceMethod(tag, mid), 33 | CFunction_FormalParam(mid, n, _), 34 | CFunction_isFormalParamByRef(mid, n), 35 | !CGlobalParameter(n). 36 | 37 | /* TaintSourceVertex(tag, v, p) :- */ 38 | /* TaintSourceMethodArg(tag, n, mid), */ 39 | /* forward_flow.input_.CallEdge(call_stmt, mid), */ 40 | /* forward_flow.input_.ActualParam(call_stmt, n, v, p). */ 41 | 42 | TaintSourceVertex(label, param, "") :- 43 | TaintSourceMethodArg(label, n, fid), 44 | CFunction_FormalParam(fid, n, param). 45 | 46 | TaintSourceVertex(tag, v, p) :- 47 | TaintSourceCallArg(tag, n, stmt), 48 | CCall_ActualParam(stmt, n, v, p). 49 | 50 | // --------------------------------------------------------------------------- 51 | // sink rules 52 | 53 | LeakingSinkMethodArg(t, n, sink_mid) :- 54 | LeakingSinkMethod(t, sink_mid), 55 | CFunction_FormalParam(sink_mid, n, _), 56 | !CReturnParameter(n), 57 | !CGlobalParameter(n). 58 | 59 | // i think methodarg should set up the formal as the sink, but keeping this 60 | // here for now in case i'm wrong 61 | /* LeakingSinkVertex(tag, v, p) :- */ 62 | /* LeakingSinkMethodArg(tag, n, mid), */ 63 | /* forward_flow.input_.CallEdge(call, mid), */ 64 | /* forward_flow.input_.ActualParam(call, n, v, p). */ 65 | 66 | // below, we set up the ap suffixes of formals and actuals, respectively, as 67 | // sinks. this is because it seems intuitive that if you want to know if 68 | // tainted data is passed to some function, you don't care whether the taint is 69 | // on a field or on the object. 70 | 71 | // if users desire more fine grained control, they can just populate 72 | // LeakingSinkVertex directly. 73 | 74 | // sets up every ap to be a sink 75 | LeakingSinkVertex(label, param, ap) :- 76 | LeakingSinkMethodArg(label, n, fid), 77 | CFunction_FormalParam(fid, n, param), 78 | Vertex(param, ap). 79 | 80 | // sets up any suffix of the actual ap to be a sink 81 | LeakingSinkVertex(tag, v, taint_ap) :- 82 | LeakingSinkCallArg(tag, index, call), 83 | CCall_ActualParam(call, index, v, actual_ap), 84 | MatchComputedPrefix(taint_ap, actual_ap, _suffix), 85 | Vertex(v, taint_ap). 86 | 87 | // --------------------------------------------------------------------------- 88 | // sanitizer rules 89 | 90 | TaintSanitizeEdge(label, var_out, ap_out, var_in, ap_in) :- 91 | TaintSanitizeFunctionArg(label, function, input_index, output_index), 92 | CInsn_Call(call, function), 93 | CCall_ActualParam(call, input_index, var_in, ap_in), 94 | CCall_ActualParam(call, output_index, var_out, ap_out). 95 | 96 | --------------------------------------------------------------------------------