├── .github
    └── workflows
    │   └── check.yml
├── .gitignore
├── .ocamlformat
├── Makefile
├── README.md
├── build.sh
├── dune
├── dune-project
├── script
    └── check-format
├── src
    ├── bugDesc.ml
    ├── cmdline.ml
    ├── coverage.ml
    ├── dune
    ├── instrument.ml
    ├── localizer.ml
    ├── logging.ml
    ├── main.ml
    ├── scenario.ml
    ├── utils.ml
    └── visualizer.ml
├── test
    └── simple1
    │   ├── Makefile
    │   ├── bug_desc.json
    │   ├── compile.sh
    │   ├── src
    │       └── bug.c
    │   └── test.sh
└── unival-docker
    ├── Dockerfile
    └── src
        ├── ProcessDataProfile.java
        ├── RFC.R
        └── StructuredDataCollector.java


/.github/workflows/check.yml:
--------------------------------------------------------------------------------
 1 | name: check
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   check:
 7 |     name: check-format
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Checkout code
11 |         uses: actions/checkout@v2
12 | 
13 |       - name: Set up OCaml
14 |         uses: ocaml/setup-ocaml@v2
15 |         with:
16 |           ocaml-compiler: 4.13.0
17 | 
18 |       - name: Install dependencies
19 |         run: opam install ocamlformat.0.20.1
20 | 
21 |       - name: Check format
22 |         run:
23 |           eval $(opam env)
24 |           script/check-format
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | _build
 2 | localizer
 3 | localizer-out
 4 | sparrow-out
 5 | coverage.xml
 6 | *.gc*
 7 | bug
 8 | *.i
 9 | *.o
10 | *.s
11 | CausalMap.txt
12 | *.dot
13 | *.svg
14 | visualizer
15 | 


--------------------------------------------------------------------------------
/.ocamlformat:
--------------------------------------------------------------------------------
 1 | profile=default
 2 | quiet=false
 3 | max-iters=10
 4 | comment-check=true
 5 | wrap-fun-args=true
 6 | wrap-comments=false
 7 | type-decl-indent=2
 8 | type-decl=compact
 9 | stritem-extension-indent=0
10 | space-around-variants=true
11 | space-around-records=true
12 | space-around-lists=true
13 | space-around-arrays=true
14 | single-case=compact
15 | sequence-style=terminator
16 | sequence-blank-line=preserve-one
17 | parse-docstrings=false
18 | parens-tuple-patterns=multi-line-only
19 | parens-tuple=always
20 | parens-ite=false
21 | ocp-indent-compat=false
22 | nested-match=wrap
23 | module-item-spacing=sparse
24 | max-indent=68
25 | match-indent-nested=never
26 | match-indent=0
27 | margin=80
28 | let-module=compact
29 | let-binding-spacing=compact
30 | let-binding-indent=2
31 | let-and=compact
32 | leading-nested-match-parens=false
33 | infix-precedence=indent
34 | indicate-nested-or-patterns=unsafe-no
35 | indicate-multiline-delimiters=no
36 | indent-after-in=0
37 | if-then-else=compact
38 | function-indent-nested=never
39 | function-indent=2
40 | field-space=loose
41 | extension-indent=2
42 | exp-grouping=parens
43 | dock-collection-brackets=true
44 | doc-comments-tag-only=default
45 | doc-comments-padding=2
46 | doc-comments=after-when-possible
47 | disambiguate-non-breaking-match=false
48 | disable=false
49 | cases-matching-exp-indent=normal
50 | cases-exp-indent=4
51 | break-struct=force
52 | break-string-literals=auto
53 | break-sequences=true
54 | break-separators=after
55 | break-infix-before-func=false
56 | break-infix=wrap
57 | break-fun-sig=wrap
58 | break-fun-decl=wrap
59 | break-collection-expressions=fit-or-vertical
60 | break-cases=fit
61 | break-before-in=fit-or-vertical
62 | assignment-operator=end-line
63 | align-variants-decl=false
64 | align-constructors-decl=false
65 | align-cases=false
66 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | MAKE=@make
 2 | DUNE=@dune
 3 | LN=@ln -sf
 4 | RM=@rm
 5 | EXE=localizer
 6 | 
 7 | all:
 8 | 	$(DUNE) build src/main.exe
 9 | 	$(DUNE) build src/visualizer.exe
10 | 	$(LN) _build/default/src/main.exe $(EXE)
11 | 	$(LN) _build/default/src/visualizer.exe visualizer
12 | 
13 | test: all
14 | 	$(MAKE) -C test
15 | 	$(DUNE) test
16 | 
17 | clean:
18 | 	$(MAKE) -C test clean
19 | 	$(DUNE) clean
20 | 	$(RM) -rf $(EXE)
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Bug-localizer
2 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | export OPAMYES=1
 6 | 
 7 | NCPU="$(getconf _NPROCESSORS_ONLN 2>/dev/null || echo 1)"
 8 | OCAML_VERSION="4.13.0"
 9 | OPAM_SWITCH=program-repair-project-$OCAML_VERSION
10 | 
11 | opam init --compiler=$OCAML_VERSION -j $NCPU --no-setup
12 | 
13 | switch_exists=no
14 | for installed_switch in $(opam switch list --short); do
15 |   if [[ "$installed_switch" == "$OPAM_SWITCH" ]]; then
16 |     switch_exists=yes
17 |     break
18 |   fi
19 | done
20 | 
21 | if [ "$switch_exists" = "no" ]; then
22 |   opam switch create $OPAM_SWITCH $OCAML_VERSION
23 | else
24 |   opam switch $OPAM_SWITCH
25 | fi
26 | 
27 | eval $(SHELL=bash opam config env --switch=$SPARROW_OPAM_SWITCH)
28 | 
29 | opam pin add cil https://github.com/prosyslab/cil.git -n
30 | opam install -j $NCPU dune batteries cil ppx_compare ocamlformat merlin yojson xmlm
31 | 
32 | make
33 | 


--------------------------------------------------------------------------------
/dune:
--------------------------------------------------------------------------------
1 | (env
2 |  (dev
3 |   (flags
4 |    (:standard -warn-error -A))))
5 | 


--------------------------------------------------------------------------------
/dune-project:
--------------------------------------------------------------------------------
1 | (lang dune 2.3)
2 | 


--------------------------------------------------------------------------------
/script/check-format:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | PROJECT_HOME="$(cd "$(dirname "${BASH_SOURCE[0]}")" && cd ../ && pwd)"
 6 | 
 7 | for file in $(find $PROJECT_HOME/src -name "*.ml"); do
 8 |   echo "Checking $file"
 9 |   ocamlformat $file | diff $file - ||
10 |   if [ $? -ne 0 ]; then
11 |     echo "Failed: Please check json format"
12 |     echo "See error message after running ocamlformat $file"
13 |     exit 1
14 |   fi
15 | done
16 | 


--------------------------------------------------------------------------------
/src/bugDesc.ml:
--------------------------------------------------------------------------------
 1 | module F = Format
 2 | 
 3 | type t = {
 4 |   program : string;
 5 |   compiler_type : string;
 6 |   test_cases : string list;
 7 |   test_time_limit : int;
 8 | }
 9 | 
10 | let find : string -> Yojson.Safe.t -> Yojson.Safe.t =
11 |  fun name -> function
12 |   | `Assoc l -> List.find (function n, _ -> n = name) l |> snd
13 |   | _ -> raise Not_found
14 | 
15 | let to_string = function `String s -> s | _ -> raise Not_found
16 | 
17 | let to_int = function `Int i -> i | _ -> raise Not_found
18 | 
19 | let compiler_type_of desc = find "compiler" desc |> find "type" |> to_string
20 | 
21 | let program_of desc = find "program" desc |> to_string
22 | 
23 | let test_cases_of desc =
24 |   let test_info = find "test-harness" desc in
25 |   let num_of_passing = find "passing" test_info |> to_int in
26 |   let num_of_failing = find "failing" test_info |> to_int in
27 |   List.init num_of_passing (fun n -> "p" ^ string_of_int (n + 1))
28 |   @ List.init num_of_failing (fun n -> "n" ^ string_of_int (n + 1))
29 | 
30 | let test_limit_of desc = find "test-harness" desc |> find "time-limit" |> to_int
31 | 
32 | let read work_dir =
33 |   let json =
34 |     let fn = Filename.concat work_dir "bug_desc.json" in
35 |     if Sys.file_exists fn then Yojson.Safe.from_file fn
36 |     else
37 |       let fn = Filename.concat "/bugfixer" "bug_desc.json" in
38 |       if Sys.file_exists fn then Yojson.Safe.from_file fn
39 |       else failwith "Bug description not found"
40 |   in
41 |   Logging.log "Bug desc: %a" Yojson.Safe.pp json;
42 |   let program = program_of json in
43 |   let compiler_type = compiler_type_of json in
44 |   let test_cases = test_cases_of json in
45 |   let test_time_limit = test_limit_of json in
46 |   { program; compiler_type; test_cases; test_time_limit }
47 | 
48 | let pp_test_cases fmt l =
49 |   F.fprintf fmt "[";
50 |   List.iter (fun x -> F.fprintf fmt "%s," x) l;
51 |   F.fprintf fmt "]"
52 | 
53 | let pp fmt desc =
54 |   F.fprintf fmt
55 |     "{program: %s, compiler_type: %s, test_cases: %a, test_time_limit: %d}"
56 |     desc.program desc.compiler_type pp_test_cases desc.test_cases
57 |     desc.test_time_limit
58 | 


--------------------------------------------------------------------------------
/src/cmdline.ml:
--------------------------------------------------------------------------------
 1 | let work_dir : string option ref = ref None
 2 | 
 3 | let out_dir = ref "localizer-out"
 4 | 
 5 | let faulty_func = ref false
 6 | 
 7 | type instrument = DfSan | GSA | Coverage | Nothing
 8 | 
 9 | let instrument = ref Nothing
10 | 
11 | let select_instrument s =
12 |   match s with
13 |   | "dfsan" -> instrument := DfSan
14 |   | "gsa" -> instrument := GSA
15 |   | "coverage" -> instrument := Coverage
16 |   | _ -> failwith "Unknown instrument"
17 | 
18 | let skip_compile = ref false
19 | 
20 | type engine =
21 |   | Tarantula
22 |   | Prophet
23 |   | Jaccard
24 |   | Ochiai
25 |   | Dummy
26 |   | UniVal
27 |   | Coverage
28 |   | All
29 | 
30 | let engine = ref Dummy
31 | 
32 | let select_engine s =
33 |   match s with
34 |   | "tarantula" -> engine := Tarantula
35 |   | "prophet" -> engine := Prophet
36 |   | "jaccard" -> engine := Jaccard
37 |   | "ochiai" -> engine := Ochiai
38 |   | "dummy" -> engine := Dummy
39 |   | "unival" ->
40 |       engine := UniVal;
41 |       instrument := GSA
42 |   | "coverage" ->
43 |       engine := Coverage;
44 |       instrument := Coverage
45 |   | "all" -> engine := All
46 |   | _ -> failwith "Unknown engine"
47 | 
48 | let jobs = ref 0 (* i.e., #cpus *)
49 | 
50 | let blacklist = ref []
51 | 
52 | let gnu_source = ref false
53 | 
54 | let bic = ref false
55 | 
56 | let no_seg = ref false
57 | 
58 | let gcov = ref false
59 | 
60 | let options =
61 |   [
62 |     ("-outdir", Arg.Set_string out_dir, "Output directory");
63 |     ( "-instrument",
64 |       Arg.String select_instrument,
65 |       "Specify instrument method (default: Nothing)" );
66 |     ("-faulty_func", Arg.Set faulty_func, "Set faulty functions");
67 |     ("-skip_compile", Arg.Set skip_compile, "Skip compilation");
68 |     ( "-engine",
69 |       Arg.String select_engine,
70 |       "Specify localization engine (default: Dummy)" );
71 |     ("-j", Arg.Set_int jobs, "Number of parallel jobs for make (default: -j)");
72 |     ( "-blacklist",
73 |       Arg.String (fun x -> blacklist := x :: !blacklist),
74 |       "Blacklist for instrumentation" );
75 |     ( "-gnu_source",
76 |       Arg.Set gnu_source,
77 |       "Add #define _GNU_SOURCE when instrumentation for some programs (e.g., \
78 |        gimp)" );
79 |     ("-bic", Arg.Set bic, "Select whether using bic or not");
80 |     ( "-no_seg",
81 |       Arg.Set no_seg,
82 |       "Do not instrument fflush after every line if there is no segfault" );
83 |     ("-gcov", Arg.Set gcov, "Use gcov when extracting coverage");
84 |   ]
85 | 
86 | let parse_arg x =
87 |   work_dir := Some x;
88 |   ()
89 | 


--------------------------------------------------------------------------------
/src/coverage.ml:
--------------------------------------------------------------------------------
  1 | module F = Format
  2 | module StrMap = Map.Make (String)
  3 | 
  4 | (* Line-level coverage using gcov *)
  5 | module LineCoverage = struct
  6 |   (* reference: https://github.com/squaresLab/BugZoo/blob/a87f03b2e33c2097c21c0175e613f4e95d9825eb/bugzoo/core/coverage.py#L106 *)
  7 |   type elem = {
  8 |     test : string;
  9 |     coverage : int list StrMap.t;
 10 |     linehistory : (int * int) list;
 11 |   }
 12 | 
 13 |   type t = elem list
 14 | 
 15 |   let empty = []
 16 | 
 17 |   type tree = E of Xmlm.tag * tree list | D of string
 18 | 
 19 |   let elem_of test = { test; coverage = StrMap.empty; linehistory = [] }
 20 | 
 21 |   let read_xml file =
 22 |     let ic = open_in file in
 23 |     let i = Xmlm.make_input (`Channel ic) in
 24 |     let el tag childs = E (tag, childs) in
 25 |     let data d = D d in
 26 |     Xmlm.input_doc_tree ~el ~data i
 27 | 
 28 |   let find_filename al =
 29 |     match
 30 |       List.find_map
 31 |         (function (_, "filename"), data -> Some data | _ -> None)
 32 |         al
 33 |     with
 34 |     | Some data -> data
 35 |     | _ -> failwith "Unknown filename"
 36 | 
 37 |   let elem_of_attr filename elem al =
 38 |     if List.exists (function (_, "hits"), hits -> hits <> "0" | _ -> false) al
 39 |     then
 40 |       match
 41 |         List.find_map
 42 |           (function
 43 |             | (_, "number"), data -> Some (int_of_string data) | _ -> None)
 44 |           al
 45 |       with
 46 |       | Some line ->
 47 |           {
 48 |             elem with
 49 |             coverage =
 50 |               StrMap.update filename
 51 |                 (function Some l -> Some (line :: l) | None -> Some [ line ])
 52 |                 elem.coverage;
 53 |           }
 54 |       | _ -> elem
 55 |     else elem
 56 | 
 57 |   let rec elem_of_xml ?(filename = "") elem xml =
 58 |     match xml with
 59 |     | E (((_, "coverage"), _), l) ->
 60 |         List.find
 61 |           (function E (((_, "packages"), _), _) -> true | _ -> false)
 62 |           l
 63 |         |> elem_of_xml elem
 64 |     | E (((_, "packages"), _), l)
 65 |     | E (((_, "package"), _), l)
 66 |     | E (((_, "classes"), _), l) ->
 67 |         List.fold_left (fun elem e -> elem_of_xml elem e) elem l
 68 |     | E (((_, "class"), al), cl) ->
 69 |         let filename = find_filename al in
 70 |         List.fold_left (fun elem e -> elem_of_xml ~filename elem e) elem cl
 71 |     | E (((_, "lines"), _), l) ->
 72 |         List.fold_left (fun elem e -> elem_of_xml ~filename elem e) elem l
 73 |     | E (((_, "line"), al), _) -> elem_of_attr filename elem al
 74 |     | _ -> elem
 75 | 
 76 |   let pp_lines fmt lines = List.iter (fun l -> F.fprintf fmt "%d, " l) lines
 77 | 
 78 |   let pp_coverage fmt cov =
 79 |     StrMap.iter
 80 |       (fun file lines -> F.fprintf fmt "%s: %a\n" file pp_lines lines)
 81 |       cov
 82 | 
 83 |   let pp_elem fmt { test; coverage; _ } =
 84 |     F.fprintf fmt "test: %s\ncoverage:\n%a\n" test pp_coverage coverage
 85 | 
 86 |   let pp fmt cov = List.iter (fun elem -> pp_elem fmt elem) cov
 87 | 
 88 |   let compute_coverage coverage_data =
 89 |     if Sys.file_exists coverage_data then Unix.unlink coverage_data;
 90 |     Unix.create_process "gcovr"
 91 |       [| "gcovr"; "-o"; coverage_data; "-x"; "-d"; "-r"; "." |]
 92 |       Unix.stdin Unix.stdout Unix.stderr
 93 |     |> ignore;
 94 |     match Unix.wait () |> snd with
 95 |     | Unix.WEXITED 0 -> ()
 96 |     | Unix.WEXITED n ->
 97 |         failwith ("Error " ^ string_of_int n ^ ": coverage failed")
 98 |     | _ -> failwith "Coverage failed"
 99 | 
100 |   let update_coverage coverage_data test coverage =
101 |     let xml = read_xml coverage_data |> snd in
102 |     let elem = elem_of_xml (elem_of test) xml in
103 |     elem :: coverage
104 | 
105 |   let run work_dir bug_desc =
106 |     let scenario = Scenario.init work_dir in
107 |     Unix.chdir scenario.work_dir;
108 |     if not !Cmdline.skip_compile then (
109 |       Logging.log "Start compile";
110 |       Scenario.compile scenario bug_desc.BugDesc.compiler_type;
111 |       Unix.chdir scenario.work_dir);
112 |     Logging.log "Start test";
113 |     List.fold_left
114 |       (fun coverage test ->
115 |         Scenario.run_test scenario.test_script test;
116 |         compute_coverage scenario.coverage_data;
117 |         update_coverage scenario.coverage_data test coverage)
118 |       empty bug_desc.BugDesc.test_cases
119 | end
120 | 
121 | (* Line-level coverage using our own implementation *)
122 | module LineCoverage2 = struct
123 |   include LineCoverage
124 | 
125 |   module IntSet = Set.Make (struct
126 |     type t = int
127 | 
128 |     let compare = compare
129 |   end)
130 | 
131 |   (* reference: https://github.com/squaresLab/BugZoo/blob/a87f03b2e33c2097c21c0175e613f4e95d9825eb/bugzoo/core/coverage.py#L106 *)
132 |   type elem_internal = {
133 |     test : string;
134 |     coverage_set : IntSet.t StrMap.t;
135 |     linehistory : (int * int) list;
136 |   }
137 | 
138 |   let elem_of test = { test; coverage_set = StrMap.empty; linehistory = [] }
139 | 
140 |   let elem_of_internal { test; coverage_set; linehistory } =
141 |     { test; coverage = StrMap.map IntSet.elements coverage_set; linehistory }
142 | 
143 |   let compute_coverage coverage_data =
144 |     if Sys.file_exists coverage_data then Unix.unlink coverage_data;
145 |     Unix.create_process "gcovr"
146 |       [| "gcovr"; "-o"; coverage_data; "-x"; "-d"; "-r"; "." |]
147 |       Unix.stdin Unix.stdout Unix.stderr
148 |     |> ignore;
149 |     match Unix.wait () |> snd with
150 |     | Unix.WEXITED 0 -> ()
151 |     | Unix.WEXITED n ->
152 |         failwith ("Error " ^ string_of_int n ^ ": coverage failed")
153 |     | _ -> failwith "Coverage failed"
154 | 
155 |   let read_whole_file filename =
156 |     let ch = open_in filename in
157 |     let s = really_input_string ch (in_channel_length ch) in
158 |     close_in ch;
159 |     s
160 | 
161 |   let update_coverage coverage_data test coverage =
162 |     let data =
163 |       try read_whole_file coverage_data |> String.split_on_char '\n'
164 |       with Sys_error _ -> []
165 |     in
166 |     let elem =
167 |       List.fold_left
168 |         (fun elem line ->
169 |           if List.mem line [ ""; "__START_NEW_EXECUTION__" ] then elem
170 |           else
171 |             let lst = String.split_on_char ':' line in
172 |             try
173 |               let filename, lineno =
174 |                 (List.nth lst 0, List.nth lst 1 |> int_of_string)
175 |               in
176 |               {
177 |                 elem with
178 |                 coverage_set =
179 |                   StrMap.update filename
180 |                     (function
181 |                       | Some s -> Some (IntSet.add lineno s)
182 |                       | None -> Some (IntSet.singleton lineno))
183 |                     elem.coverage_set;
184 |                 linehistory =
185 |                   []
186 |                   (*( lineno,
187 |                       if elem.linehistory = [] then 0
188 |                       else snd (List.hd elem.linehistory) + 1 )
189 |                     :: elem.linehistory;*);
190 |               }
191 |             with _ -> elem)
192 |         (elem_of test) data
193 |     in
194 |     elem :: coverage
195 | 
196 |   let run work_dir bug_desc =
197 |     let scenario = Scenario.init ~stdio_only:true work_dir in
198 |     Unix.chdir scenario.work_dir;
199 |     (* compile to extract *.i *)
200 |     Scenario.compile scenario bug_desc.BugDesc.compiler_type;
201 |     let src_dir = Filename.concat scenario.work_dir "src" in
202 |     Instrument.Coverage.run scenario.work_dir src_dir;
203 |     Unix.chdir scenario.Scenario.work_dir;
204 |     (* compile instrumented files *)
205 |     Scenario.compile scenario bug_desc.BugDesc.compiler_type;
206 |     Unix.chdir scenario.Scenario.work_dir;
207 |     Logging.log "Start test";
208 |     let _cov_path = Filename.concat scenario.work_dir "coverage.txt" in
209 |     List.fold_left
210 |       (fun coverage test ->
211 |         (*let regexp_pos = Str.regexp "p.*" in
212 |             if Str.string_match regexp_pos test 0 then coverage
213 |           else*)
214 |         Scenario.run_test scenario.test_script test;
215 |         Unix.system
216 |           "cat /experiment/coverage_data/tmp/*.txt > \
217 |            /experiment/coverage_data/coverage.txt"
218 |         |> ignore;
219 |         Unix.system "rm -f /experiment/coverage_data/tmp/*.txt" |> ignore;
220 |         let cur_cov_path =
221 |           (* Filename.concat "coverage_data" ("coverage." ^ test ^ ".txt") *)
222 |           Filename.concat "coverage_data" "coverage.txt"
223 |         in
224 |         (*Unix.system ("mv " ^ cov_path ^ " " ^ cur_cov_path) |> ignore;*)
225 |         update_coverage cur_cov_path test coverage)
226 |       empty bug_desc.BugDesc.test_cases
227 |     |> List.map elem_of_internal
228 | end
229 | 


--------------------------------------------------------------------------------
/src/dune:
--------------------------------------------------------------------------------
 1 | (executable
 2 |  (name main)
 3 |  (link_flags -ccopt -static)
 4 |  (modules
 5 |   main
 6 |   cmdline
 7 |   logging
 8 |   localizer
 9 |   coverage
10 |   bugDesc
11 |   scenario
12 |   instrument
13 |   utils)
14 |  (libraries str cil cil.all-features xmlm unix yojson))
15 | 
16 | (executable
17 |  (name visualizer)
18 |  (link_flags -ccopt -static)
19 |  (modules visualizer)
20 |  (libraries ocamlgraph))
21 | 


--------------------------------------------------------------------------------
/src/instrument.ml:
--------------------------------------------------------------------------------
  1 | module DfSan = struct
  2 |   module NodeInfo = struct
  3 |     type t = Yojson.Safe.t
  4 | 
  5 |     let cmd_of t =
  6 |       ((t |> function
  7 |         | `Assoc l -> List.assoc "cmd" l
  8 |         | _ -> failwith "Invalid format")
  9 |        |> function
 10 |        | `List l -> List.hd l
 11 |        | _ -> failwith "Invalid format")
 12 |       |> function
 13 |       | `String s -> s
 14 |       | _ -> failwith "Invalid format"
 15 | 
 16 |     let filename_of t =
 17 |       t
 18 |       |> (function
 19 |            | `Assoc l -> List.assoc "loc" l | _ -> failwith "Invalid format")
 20 |       |> (function `String s -> s | _ -> failwith "Invalid format")
 21 |       |> String.split_on_char ':' |> Fun.flip List.nth 0
 22 |   end
 23 | 
 24 |   module NodeInfoMap = struct
 25 |     module M = Map.Make (String)
 26 | 
 27 |     type t = NodeInfo.t M.t
 28 | 
 29 |     let empty = M.empty
 30 | 
 31 |     let add = M.add
 32 | 
 33 |     let find = M.find
 34 |   end
 35 | 
 36 |   module LineSet = Set.Make (String)
 37 |   module FileToEdges = Map.Make (String)
 38 | 
 39 |   let read_nodes file =
 40 |     let ic = open_in file in
 41 |     Yojson.Safe.from_channel ic
 42 |     |> (function
 43 |          | `Assoc l -> List.assoc "nodes" l | _ -> failwith "Invalid format")
 44 |     |> (function `Assoc l -> l | _ -> failwith "Invalid format")
 45 |     |> List.fold_left
 46 |          (fun map (name, info) -> NodeInfoMap.add name info map)
 47 |          NodeInfoMap.empty
 48 |     |> fun x ->
 49 |     close_in ic;
 50 |     x
 51 | 
 52 |   let read_covered_lines file =
 53 |     let ic = open_in file in
 54 |     let rec loop lst =
 55 |       match input_line ic with
 56 |       | line -> (
 57 |           String.split_on_char '\t' line |> function
 58 |           | h :: _ -> loop (LineSet.add h lst)
 59 |           | _ -> failwith "Invalid format")
 60 |       | exception End_of_file -> lst
 61 |     in
 62 |     loop LineSet.empty |> fun x ->
 63 |     close_in ic;
 64 |     x
 65 | 
 66 |   let read_duedges nodes file =
 67 |     let ic = open_in file in
 68 |     let rec loop map =
 69 |       match input_line ic with
 70 |       | line -> (
 71 |           String.split_on_char '\t' line |> function
 72 |           | src :: dst :: _ ->
 73 |               let file = NodeInfoMap.find src nodes |> NodeInfo.filename_of in
 74 |               FileToEdges.update file
 75 |                 (function
 76 |                   | None -> Some [ (src, dst) ]
 77 |                   | Some l -> Some ((src, dst) :: l))
 78 |                 map
 79 |               |> loop
 80 |           | _ -> failwith "Invalid format")
 81 |       | exception End_of_file -> map
 82 |     in
 83 |     loop FileToEdges.empty |> fun x ->
 84 |     close_in ic;
 85 |     x
 86 | 
 87 |   type dfsan_funs = {
 88 |     create_label : Cil.varinfo;
 89 |     set_label : Cil.varinfo;
 90 |     get_label : Cil.varinfo;
 91 |     has_label : Cil.varinfo;
 92 |   }
 93 | 
 94 |   let initialize work_dir =
 95 |     let result_file = Filename.concat work_dir "localizer-out/result.txt" in
 96 |     let sparrow_out_dir = Filename.concat work_dir "sparrow-out" in
 97 |     let node_file = Filename.concat sparrow_out_dir "node.json" in
 98 |     let duedge_file =
 99 |       Filename.concat sparrow_out_dir "interval/datalog/DUEdge.facts"
100 |     in
101 |     let nodes = read_nodes node_file in
102 |     let lines = read_covered_lines result_file in
103 |     let duedges = read_duedges nodes duedge_file in
104 |     (nodes, lines, duedges)
105 | 
106 |   let rec instrument_instr dfsan_funs edges instrs results =
107 |     match instrs with
108 |     | (Cil.Set ((Var vi, NoOffset), _, loc) as i) :: tl ->
109 |         let name = Cil.mkString vi.vname in
110 |         Cil.Call
111 |           ( None,
112 |             Cil.Lval (Cil.Var dfsan_funs.create_label, Cil.NoOffset),
113 |             [ name; Cil.zero ],
114 |             loc )
115 |         :: i :: results
116 |         |> instrument_instr dfsan_funs edges tl
117 |     | i :: tl -> i :: results |> instrument_instr dfsan_funs edges tl
118 |     | [] -> List.rev results
119 | 
120 |   class assignVisitor dfsan_funs edges =
121 |     object
122 |       inherit Cil.nopCilVisitor
123 | 
124 |       method! vstmt s =
125 |         match s.Cil.skind with
126 |         | Cil.Instr i ->
127 |             s.Cil.skind <- Cil.Instr (instrument_instr dfsan_funs edges i []);
128 |             DoChildren
129 |         | _ -> DoChildren
130 |     end
131 | 
132 |   let instrument file pp_file _ edges =
133 |     Logging.log "Instrument %s (%s)" file pp_file;
134 |     let cil = Frontc.parse pp_file () in
135 |     let dfsan_funs =
136 |       {
137 |         create_label =
138 |           Cil.findOrCreateFunc cil "dfsan_create_label"
139 |             (Cil.TFun (Cil.voidType, None, false, []));
140 |         set_label =
141 |           Cil.findOrCreateFunc cil "dfsan_set_label"
142 |             (Cil.TFun (Cil.voidType, None, false, []));
143 |         get_label =
144 |           Cil.findOrCreateFunc cil "dfsan_get_label"
145 |             (Cil.TFun (Cil.voidType, None, false, []));
146 |         has_label =
147 |           Cil.findOrCreateFunc cil "dfsan_has_label"
148 |             (Cil.TFun (Cil.voidType, None, false, []));
149 |       }
150 |     in
151 |     Cil.visitCilFile (new assignVisitor dfsan_funs edges) cil;
152 |     let oc = open_out pp_file in
153 |     Cil.dumpFile !Cil.printerForMaincil oc "" cil;
154 |     close_out oc
155 | 
156 |   let run work_dir src_dir =
157 |     let nodes, _, duedges = initialize work_dir in
158 |     FileToEdges.iter
159 |       (fun file edges ->
160 |         if file = "" then ()
161 |         else
162 |           let name = Filename.remove_extension file in
163 |           let pp_file = Filename.concat src_dir (name ^ ".i") in
164 |           if Sys.file_exists pp_file then instrument file pp_file nodes edges
165 |           else Logging.log "%s not found" file)
166 |       duedges
167 | end
168 | 
169 | let preamble src_dir mode =
170 |   String.concat ""
171 |     ([
172 |        "/* COVERAGE :: INSTRUMENTATION :: START */\n";
173 |        "typedef struct _IO_FILE FILE;\n";
174 |        "struct _IO_FILE *__inst_stream ;\n";
175 |        "extern FILE *fopen(char const   * __restrict  __filename , char \
176 |         const   * __restrict  __modes ) ;\n";
177 |        "extern int fclose(FILE *__stream ) ;\n";
178 |        "static void coverage_ctor (void) __attribute__ ((constructor));\n";
179 |        "static void coverage_ctor (void) {\n";
180 |      ]
181 |     @ (if mode = "output" then
182 |        [ "__inst_stream = fopen(\"" ^ src_dir ^ "/output.txt\", \"a\");\n" ]
183 |       else
184 |         [
185 |           "  int pid = getpid();\n";
186 |           "  char filename[64];\n";
187 |           "  sprintf(filename, \"" ^ src_dir ^ "/coverage_data" ^ "/tmp/" ^ mode
188 |           ^ "-%d.txt\", pid);\n";
189 |           "  __inst_stream = fopen(filename, \"a\");\n";
190 |           "  fprintf(__inst_stream, \"__START_NEW_EXECUTION__\\n\");\n";
191 |           "  fflush(__inst_stream);\n";
192 |         ])
193 |     @ [
194 |         "}\n";
195 |         "static void coverage_dtor (void) __attribute__ ((destructor));\n";
196 |         "static void coverage_dtor (void) {\n";
197 |         "  fclose(__inst_stream);\n";
198 |         "}\n";
199 |         "/* COVERAGE :: INSTRUMENTATION :: END */\n";
200 |       ])
201 | 
202 | let found_type = ref None
203 | 
204 | let found_gvar = ref None
205 | 
206 | class findTypeVisitor name =
207 |   object
208 |     inherit Cil.nopCilVisitor
209 | 
210 |     method! vglob g =
211 |       match g with
212 |       | GCompTag (ci, _) ->
213 |           if ci.Cil.cname = name then found_type := Some ci;
214 |           SkipChildren
215 |       | _ -> SkipChildren
216 |   end
217 | 
218 | class findGVarVisitor name =
219 |   object
220 |     inherit Cil.nopCilVisitor
221 | 
222 |     method! vglob g =
223 |       match g with
224 |       | GVarDecl (vi, _) ->
225 |           if vi.Cil.vname = name then found_gvar := Some vi;
226 |           SkipChildren
227 |       | _ -> SkipChildren
228 |   end
229 | 
230 | let append_constructor work_dir filename mode =
231 |   let read_whole_file filename =
232 |     let ch = open_in filename in
233 |     let s = really_input_string ch (in_channel_length ch) in
234 |     close_in ch;
235 |     s
236 |   in
237 |   let code = read_whole_file filename in
238 |   if
239 |     String.length code > 42
240 |     && String.equal (String.sub code 0 42)
241 |          "/* COVERAGE :: INSTRUMENTATION :: START */"
242 |   then ()
243 |   else
244 |     let instr_c_code = preamble work_dir mode ^ read_whole_file filename in
245 |     let oc = open_out filename in
246 |     Printf.fprintf oc "%s" instr_c_code;
247 |     close_out oc
248 | 
249 | module GSA = struct
250 |   let pred_num = ref (-1)
251 | 
252 |   let new_pred () =
253 |     pred_num := !pred_num + 1;
254 |     "OOJAHOOO_PRED_" ^ string_of_int !pred_num
255 | 
256 |   class assignInitializer f =
257 |     let add_predicate_var result stmt =
258 |       match stmt.Cil.skind with
259 |       | Cil.If (pred, then_branch, else_branch, loc) ->
260 |           let pred_var = new_pred () in
261 |           let vi = Cil.makeLocalVar f pred_var (Cil.TInt (Cil.IInt, [])) in
262 |           stmt.Cil.skind <-
263 |             Cil.If
264 |               ( Cil.Lval (Cil.Var vi, Cil.NoOffset),
265 |                 then_branch,
266 |                 else_branch,
267 |                 loc );
268 |           let assign =
269 |             Cil.mkStmtOneInstr (Cil.Set ((Cil.Var vi, Cil.NoOffset), pred, loc))
270 |           in
271 |           let temp = assign.skind in
272 |           assign.skind <- stmt.skind;
273 |           stmt.skind <- temp;
274 |           result @ [ stmt; assign ]
275 |       | _ -> result @ [ stmt ]
276 |     in
277 |     object
278 |       inherit Cil.nopCilVisitor
279 | 
280 |       method! vblock b =
281 |         let new_stmts = List.fold_left add_predicate_var [] b.Cil.bstmts in
282 |         b.bstmts <- new_stmts;
283 |         DoChildren
284 |     end
285 | 
286 |   class predicateVisitor faulty_func_list =
287 |     object
288 |       inherit Cil.nopCilVisitor
289 | 
290 |       method! vfunc f =
291 |         if
292 |           String.length f.svar.vname >= 6
293 |           && (String.equal (String.sub f.svar.vname 0 6) "bugzoo"
294 |              || String.equal (String.sub f.svar.vname 0 6) "unival")
295 |           || List.length faulty_func_list > 0
296 |              && not (List.mem f.svar.vname faulty_func_list)
297 |         then SkipChildren
298 |         else ChangeTo (Cil.visitCilFunction (new assignInitializer f) f)
299 |     end
300 | 
301 |   let predicate_transform ?(faulty_func_list = []) pp_file =
302 |     let origin_file = Filename.basename (Filename.basename pp_file) in
303 |     Logging.log "Predicate transform %s (%s)" origin_file pp_file;
304 |     let cil_opt =
305 |       try Some (Frontc.parse pp_file ()) with Frontc.ParseError _ -> None
306 |     in
307 |     if Option.is_none cil_opt then pp_file
308 |     else
309 |       let cil = Option.get cil_opt in
310 |       Cil.visitCilFile (new predicateVisitor faulty_func_list) cil;
311 |       let oc = open_out pp_file in
312 |       Cil.dumpFile !Cil.printerForMaincil oc "" cil;
313 |       close_out oc;
314 |       pp_file
315 | 
316 |   module CausalMap = Map.Make (String)
317 |   module VarSet = Set.Make (String)
318 |   module VarVerMap = Map.Make (String)
319 |   module VarMap = Map.Make (String)
320 | 
321 |   let causal_map = ref CausalMap.empty
322 | 
323 |   let var_ver = ref VarVerMap.empty
324 | 
325 |   class assignVisitor (printf, flush, stream) f =
326 |     let vname_of lv =
327 |       match lv with Cil.Var vi, Cil.NoOffset -> vi.Cil.vname | _ -> ""
328 |     in
329 |     let varinfo_of lv =
330 |       match lv with
331 |       | Cil.Var vi, Cil.NoOffset -> vi
332 |       | _ -> Cil.makeVarinfo false "" (Cil.TVoid [])
333 |     in
334 |     let rec var_names_of exp =
335 |       let result =
336 |         match exp with
337 |         | Cil.Lval lv -> VarMap.singleton (vname_of lv) (varinfo_of lv)
338 |         | Cil.SizeOfE e -> var_names_of e
339 |         | Cil.AlignOfE e -> var_names_of e
340 |         | Cil.UnOp (_, e, _) -> var_names_of e
341 |         | Cil.BinOp (_, e1, e2, _) ->
342 |             VarMap.union
343 |               (fun _ va1 _ -> Some va1)
344 |               (var_names_of e1) (var_names_of e2)
345 |         | Cil.Question (e1, e2, e3, _) ->
346 |             VarMap.union
347 |               (fun _ va1 _ -> Some va1)
348 |               (VarMap.union
349 |                  (fun _ va1 _ -> Some va1)
350 |                  (var_names_of e1) (var_names_of e2))
351 |               (var_names_of e3)
352 |         | Cil.CastE (_, e) -> var_names_of e
353 |         | _ -> VarMap.empty
354 |       in
355 |       VarMap.remove "" result
356 |     in
357 |     let is_pred vname =
358 |       let pred_prefix = Str.regexp "OOJAHOOO_PRED_\\[0-9\\]\\+" in
359 |       Str.string_match pred_prefix vname 0
360 |     in
361 |     let rec string_of_typ = function
362 |       | Cil.TInt (Cil.IChar, _) -> "char"
363 |       | Cil.TInt (Cil.ISChar, _) -> "signed char"
364 |       | Cil.TInt (Cil.IUChar, _) -> "unsigned char"
365 |       | Cil.TInt (Cil.IInt, _) -> "int"
366 |       | Cil.TInt (Cil.IUInt, _) -> "unsigned int"
367 |       | Cil.TInt (Cil.IShort, _) -> "short"
368 |       | Cil.TInt (Cil.IUShort, _) -> "unsigned short"
369 |       | Cil.TInt (Cil.ILong, _) -> "long"
370 |       | Cil.TInt (Cil.IULong, _) -> "unsigned long"
371 |       | Cil.TFloat (Cil.FFloat, _) -> "float"
372 |       | Cil.TFloat (Cil.FDouble, _) -> "double"
373 |       | Cil.TFloat (Cil.FLongDouble, _) -> "long double"
374 |       | Cil.TPtr (Cil.TInt (Cil.IChar, _), _) -> "string"
375 |       | Cil.TNamed (t, _) -> string_of_typ t.ttype
376 |       | _ -> "NA"
377 |     in
378 |     let call_record var vname ver loc =
379 |       let call_printf filename funcname line varname version typ var_exp =
380 |         let fmt =
381 |           match typ with
382 |           | "char" | "signed char" | "unsigned char" -> "%c"
383 |           | "unsigned int" -> "%u"
384 |           | "int" | "short" -> "%d"
385 |           | "unsigned short" -> "%hd"
386 |           | "long" -> "%ld"
387 |           | "unsigned long" -> "%lu"
388 |           | "float" -> "%f"
389 |           | "double" | "long double" -> "%lf"
390 |           | "string" -> "%s"
391 |           | "NA" -> "NA"
392 |           | _ -> ""
393 |         in
394 |         Cil.Call
395 |           ( None,
396 |             Cil.Lval (Cil.Var printf, Cil.NoOffset),
397 |             [
398 |               Cil.Lval (Cil.Var stream, Cil.NoOffset);
399 |               Cil.Const
400 |                 (Cil.CStr
401 |                    (Printf.sprintf "%s,%s,%d,%s,%d" filename funcname line
402 |                       ("UNIVAL_" ^ funcname ^ "_" ^ varname)
403 |                       version
404 |                    ^ "," ^ fmt ^ "\n"));
405 |               var_exp;
406 |             ],
407 |             loc )
408 |       in
409 |       let call_flush loc =
410 |         Cil.Call
411 |           ( None,
412 |             Cil.Lval (Cil.Var flush, Cil.NoOffset),
413 |             [ Cil.Lval (Cil.Var stream, Cil.NoOffset) ],
414 |             loc )
415 |       in
416 |       let fun_name = f.Cil.svar.vname in
417 |       let t = string_of_typ (Cil.typeOfLval var) in
418 |       if
419 |         String.length vname >= 13
420 |         && String.equal (String.sub vname 0 13) "OOJAHOOO_PRED"
421 |       then
422 |         [
423 |           call_printf loc.Cil.file fun_name loc.Cil.line vname ver t
424 |             (Cil.Question
425 |                ( Cil.BinOp (Eq, Cil.Lval var, Cil.zero, Cil.intType),
426 |                  Cil.zero,
427 |                  Cil.one,
428 |                  Cil.intType ));
429 |           call_flush loc;
430 |         ]
431 |         (* printf("%s,%s,%d,%s,%d,%d\n", filename, funcname, line, varname, version, i_val) *)
432 |       else
433 |         [
434 |           call_printf loc.Cil.file fun_name loc.Cil.line vname ver t (Lval var);
435 |           call_flush loc;
436 |         ]
437 |     in
438 |     let ass2gsa result instr =
439 |       let gogo, lv, lval, exp_vars, loc =
440 |         match instr with
441 |         | Cil.Set (lv, exp, loc) ->
442 |             let exp_vars = var_names_of exp in
443 |             let lval = vname_of lv in
444 |             (true, lv, lval, exp_vars, loc)
445 |         | Call (lv_opt, _, params, loc) ->
446 |             if Option.is_none lv_opt then
447 |               ( false,
448 |                 (Var (Cil.makeVarinfo false "" (Cil.TVoid [])), Cil.NoOffset),
449 |                 "",
450 |                 VarMap.empty,
451 |                 loc )
452 |             else
453 |               let lv = Option.get lv_opt in
454 |               let exp_vars =
455 |                 List.fold_left
456 |                   (fun ev param ->
457 |                     VarMap.union
458 |                       (fun _ vi1 _ -> Some vi1)
459 |                       ev (var_names_of param))
460 |                   VarMap.empty params
461 |               in
462 |               let lval = vname_of lv in
463 |               (true, lv, lval, exp_vars, loc)
464 |         | _ ->
465 |             ( false,
466 |               (Var (Cil.makeVarinfo false "" (Cil.TVoid [])), Cil.NoOffset),
467 |               "",
468 |               VarMap.empty,
469 |               { line = -1; file = ""; byte = -1 } )
470 |       in
471 |       if (not gogo) || lval = "" then result @ [ instr ]
472 |       else if is_pred lval then (
473 |         let exp_vars_with_ver, exp_vars_with_new_ver =
474 |           VarMap.fold
475 |             (fun ev _ (vs, nvs) ->
476 |               (* for debugging *)
477 |               (* print_endline "a"; *)
478 |               let unival_fn_ev = "UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ ev in
479 |               let unival_ev = "UNIVAL_" ^ ev in
480 |               if VarVerMap.mem unival_fn_ev !var_ver then
481 |                 let ver = VarVerMap.find unival_fn_ev !var_ver in
482 |                 ( (unival_fn_ev ^ "_" ^ string_of_int ver) :: vs,
483 |                   (unival_fn_ev ^ "_" ^ string_of_int (ver + 1)) :: nvs )
484 |               else if VarVerMap.mem unival_ev !var_ver then
485 |                 let ver = VarVerMap.find ev !var_ver in
486 |                 ( (unival_ev ^ "_" ^ string_of_int ver) :: vs,
487 |                   (unival_ev ^ "_" ^ string_of_int (ver + 1)) :: nvs )
488 |               else (
489 |                 print_endline unival_fn_ev;
490 |                 raise (Failure "Not_Found_Var")))
491 |             exp_vars ([], [])
492 |         in
493 |         let unival_fn_lval = "UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ lval in
494 |         causal_map := CausalMap.add unival_fn_lval exp_vars_with_ver !causal_map;
495 |         let unival_fn_exp_vars =
496 |           VarMap.fold
497 |             (fun v _ fevs ->
498 |               VarSet.add ("UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ v) fevs)
499 |             exp_vars VarSet.empty
500 |         in
501 |         let unival_exp_vars =
502 |           VarMap.fold
503 |             (fun v _ fevs -> VarSet.add ("UNIVAL_" ^ v) fevs)
504 |             exp_vars VarSet.empty
505 |         in
506 |         let new_var_ver =
507 |           VarVerMap.mapi
508 |             (fun v ver ->
509 |               if VarSet.mem v unival_fn_exp_vars || VarSet.mem v unival_exp_vars
510 |               then ver + 1
511 |               else ver)
512 |             !var_ver
513 |         in
514 |         List.iter2
515 |           (fun old_ver new_ver ->
516 |             causal_map := CausalMap.add new_ver [ old_ver ] !causal_map)
517 |           exp_vars_with_ver exp_vars_with_new_ver;
518 |         let pred_record = call_record lv lval 0 loc in
519 |         let records =
520 |           VarMap.fold
521 |             (fun vname vi rs ->
522 |               (* for debugging *)
523 |               (* print_endline "b"; *)
524 |               let unival_fn_vname =
525 |                 "UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ vname
526 |               in
527 |               let unival_vname = "UNIVAL_" ^ vname in
528 |               call_record (Cil.Var vi, Cil.NoOffset) vname
529 |                 (if VarMap.mem unival_fn_vname new_var_ver then
530 |                  VarMap.find unival_fn_vname new_var_ver
531 |                 else VarMap.find unival_vname new_var_ver)
532 |                 loc
533 |               @ rs)
534 |             exp_vars []
535 |         in
536 |         var_ver := new_var_ver;
537 |         result @ (instr :: (pred_record @ records)))
538 |       else
539 |         let unival_fn_lval = "UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ lval in
540 |         let unival_lval = "UNIVAL_" ^ lval in
541 |         let new_var_ver =
542 |           if VarVerMap.mem unival_fn_lval !var_ver then
543 |             VarVerMap.update unival_fn_lval
544 |               (fun ver -> Some (Option.get ver + 1))
545 |               !var_ver
546 |           else if VarVerMap.mem unival_lval !var_ver then
547 |             VarVerMap.update unival_lval
548 |               (fun ver -> Some (Option.get ver + 1))
549 |               !var_ver
550 |           else VarVerMap.add unival_fn_lval 0 !var_ver
551 |         in
552 |         let exp_vars_with_ver =
553 |           VarMap.fold
554 |             (fun ev _ vs ->
555 |               (* for debugging *)
556 |               (* print_endline ev; *)
557 |               let unival_fn_ev = "UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ ev in
558 |               let unival_ev = "UNIVAL_" ^ ev in
559 |               if VarVerMap.mem unival_fn_ev !var_ver then
560 |                 let ver = VarVerMap.find unival_fn_ev !var_ver in
561 |                 (unival_fn_ev ^ "_" ^ string_of_int ver) :: vs
562 |               else if VarVerMap.mem unival_ev !var_ver then
563 |                 let ver = VarVerMap.find unival_ev !var_ver in
564 |                 (unival_ev ^ "_" ^ string_of_int ver) :: vs
565 |               else (
566 |                 print_endline unival_fn_ev;
567 |                 raise (Failure "Not_Found_Var")))
568 |             exp_vars []
569 |         in
570 |         (* for debugging *)
571 |         (* print_endline "d"; *)
572 |         let final_lval, ver_of_lval =
573 |           if VarVerMap.mem unival_fn_lval new_var_ver then
574 |             (unival_fn_lval, VarVerMap.find unival_fn_lval new_var_ver)
575 |           else (unival_lval, VarVerMap.find unival_lval new_var_ver)
576 |         in
577 |         let lval_with_ver = final_lval ^ "_" ^ string_of_int ver_of_lval in
578 |         causal_map := CausalMap.add lval_with_ver exp_vars_with_ver !causal_map;
579 |         let lv_record = call_record lv lval ver_of_lval loc in
580 |         var_ver := new_var_ver;
581 |         result @ (instr :: lv_record)
582 |     in
583 |     object
584 |       inherit Cil.nopCilVisitor
585 | 
586 |       method! vstmt s =
587 |         match s.Cil.skind with
588 |         | Instr is ->
589 |             s.Cil.skind <- Instr (List.fold_left ass2gsa [] is);
590 |             DoChildren
591 |         | _ -> DoChildren
592 |     end
593 | 
594 |   class funAssignVisitor (printf, flush, stream) faulty_func_list =
595 |     object
596 |       inherit Cil.nopCilVisitor
597 | 
598 |       method! vglob g =
599 |         let loc = Cil.get_globalLoc g in
600 |         if String.starts_with ~prefix:"/usr" loc.file then SkipChildren
601 |         else DoChildren
602 | 
603 |       method! vfunc f =
604 |         if
605 |           String.length f.svar.vname >= 6
606 |           && String.equal (String.sub f.svar.vname 0 6) "unival"
607 |           || List.length faulty_func_list > 0
608 |              && not (List.mem f.svar.vname faulty_func_list)
609 |         then Cil.SkipChildren
610 |         else (
611 |           List.iter
612 |             (fun form ->
613 |               var_ver :=
614 |                 VarVerMap.add
615 |                   ("UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ form.Cil.vname)
616 |                   0 !var_ver)
617 |             f.Cil.sformals;
618 |           List.iter
619 |             (fun local ->
620 |               var_ver :=
621 |                 VarVerMap.add
622 |                   ("UNIVAL_" ^ f.Cil.svar.vname ^ "_" ^ local.Cil.vname)
623 |                   0 !var_ver)
624 |             f.Cil.slocals;
625 |           ChangeTo
626 |             (Cil.visitCilFunction
627 |                (new assignVisitor (printf, flush, stream) f)
628 |                f))
629 |     end
630 | 
631 |   let extract_gvar globals =
632 |     List.filter_map
633 |       (fun g ->
634 |         match g with
635 |         | Cil.GVarDecl (vi, _) | Cil.GVar (vi, _, _) -> Some vi.Cil.vname
636 |         | _ -> None)
637 |       globals
638 | 
639 |   let gsa_gen ?(faulty_func_list = []) work_dir origin_file_opt pt_file =
640 |     Cil.resetCIL ();
641 |     Cil.insertImplicitCasts := false;
642 |     let cil_opt =
643 |       try Some (Frontc.parse pt_file ()) with Frontc.ParseError _ -> None
644 |     in
645 |     if Option.is_none cil_opt then ()
646 |     else
647 |       let cil = Option.get cil_opt in
648 |       let origin_file_cand = Filename.remove_extension pt_file ^ ".c" in
649 |       let origin_file =
650 |         if Sys.file_exists origin_file_cand then origin_file_cand
651 |         else if Option.is_some origin_file_opt then Option.get origin_file_opt
652 |         else (
653 |           prerr_endline origin_file_cand;
654 |           Utils.find_file (Filename.basename origin_file_cand) work_dir
655 |           |> List.hd)
656 |       in
657 |       Logging.log "GSA_Gen %s (%s)" origin_file pt_file;
658 |       Cil.visitCilFile (new findTypeVisitor "_IO_FILE") cil;
659 |       Cil.visitCilFile (new findGVarVisitor "stderr") cil;
660 |       if Option.is_none !found_type || Option.is_none !found_gvar then ()
661 |       else
662 |         let fileptr = Cil.TPtr (Cil.TComp (Option.get !found_type, []), []) in
663 |         let printf =
664 |           Cil.findOrCreateFunc cil "fprintf"
665 |             (Cil.TFun
666 |                ( Cil.voidType,
667 |                  Some
668 |                    [ ("stream", fileptr, []); ("format", Cil.charPtrType, []) ],
669 |                  true,
670 |                  [] ))
671 |         in
672 |         let flush =
673 |           Cil.findOrCreateFunc cil "fflush"
674 |             (Cil.TFun (Cil.voidType, Some [ ("stream", fileptr, []) ], false, []))
675 |         in
676 |         let stream = Cil.makeGlobalVar "__inst_stream" fileptr in
677 |         cil.Cil.globals <- Cil.GVarDecl (stream, Cil.locUnknown) :: cil.globals;
678 |         let global_vars = extract_gvar cil.Cil.globals in
679 |         var_ver :=
680 |           List.fold_left
681 |             (fun vv gv -> VarVerMap.add ("UNIVAL_" ^ gv) 0 vv)
682 |             VarVerMap.empty global_vars;
683 |         Cil.visitCilFile
684 |           (new funAssignVisitor (printf, flush, stream) faulty_func_list)
685 |           cil;
686 |         Unix.system
687 |           ("cp " ^ origin_file ^ " "
688 |           ^ Filename.remove_extension origin_file
689 |           ^ ".origin.c")
690 |         |> ignore;
691 |         (if List.mem (Filename.basename origin_file) [ "proc_open.c"; "cast.c" ]
692 |         then ()
693 |         else
694 |           let oc = open_out (Filename.remove_extension origin_file ^ ".c") in
695 |           Cil.dumpFile !Cil.printerForMaincil oc "" cil;
696 |           close_out oc);
697 |         if
698 |           List.mem
699 |             (Filename.basename origin_file)
700 |             [
701 |               "gzip.c";
702 |               "tif_unix.c";
703 |               "http_auth.c";
704 |               "main.c";
705 |               "version.c";
706 |               "grep.c";
707 |               "readelf.c";
708 |               "core_shntool.c";
709 |               "sed.c";
710 |               "tar.c";
711 |             ]
712 |         then append_constructor work_dir origin_file "output"
713 | 
714 |   let print_cm work_dir causal_map =
715 |     let output_file = Filename.concat work_dir "CausalMap.txt" in
716 |     let oc = open_out output_file in
717 |     let cm_str =
718 |       Utils.join
719 |         (CausalMap.fold
720 |            (fun var parents res -> Utils.join (var :: parents) "," :: res)
721 |            causal_map [])
722 |         "\n"
723 |     in
724 |     Printf.fprintf oc "%s" cm_str;
725 |     close_out oc
726 | 
727 |   let print_fc work_dir causal_map =
728 |     let output_file = Filename.concat work_dir "FaultCandidates.txt" in
729 |     let oc = open_out output_file in
730 |     let fc_str =
731 |       Utils.join
732 |         (CausalMap.fold
733 |            (fun var _ res ->
734 |              let var_without_ver =
735 |                Utils.join
736 |                  (List.rev (List.tl (List.rev (String.split_on_char '_' var))))
737 |                  "_"
738 |              in
739 |              if List.mem var_without_ver res then res
740 |              else var_without_ver :: res)
741 |            causal_map [])
742 |         "_1\n"
743 |     in
744 |     Printf.fprintf oc "%s" fc_str;
745 |     close_out oc
746 | 
747 |   let run work_dir src_dir =
748 |     let faulty_func_list =
749 |       if !Cmdline.faulty_func then
750 |         let ff_path = Filename.concat work_dir "faulty_func.txt" in
751 |         let ic = open_in ff_path in
752 |         let rec read_lines ic ffs =
753 |           try
754 |             let line = input_line ic in
755 |             read_lines ic (line :: ffs)
756 |           with End_of_file -> ffs
757 |         in
758 |         read_lines ic []
759 |       else []
760 |     in
761 |     Utils.traverse_pp_file
762 |       (fun pp_file ->
763 |         let origin_file_opt = Utils.find_origin_file_opt pp_file in
764 |         pp_file
765 |         |> predicate_transform ~faulty_func_list
766 |         |> gsa_gen ~faulty_func_list work_dir origin_file_opt)
767 |       src_dir;
768 |     Utils.remove_temp_files src_dir;
769 |     print_cm work_dir !causal_map;
770 |     print_fc work_dir !causal_map
771 | end
772 | 
773 | module Coverage = struct
774 |   let location_of_instr = function
775 |     | Cil.Set (_, _, l) | Cil.Call (_, _, _, l) | Cil.Asm (_, _, _, _, _, l) ->
776 |         l
777 | 
778 |   let printf_of printf stream loc =
779 |     Cil.Call
780 |       ( None,
781 |         Cil.Lval (Cil.Var printf, Cil.NoOffset),
782 |         [
783 |           Cil.Lval (Cil.Var stream, Cil.NoOffset);
784 |           Cil.Const (Cil.CStr "%s:%d\n");
785 |           Cil.Const (Cil.CStr loc.Cil.file);
786 |           Cil.integer loc.Cil.line;
787 |         ],
788 |         loc )
789 | 
790 |   let flush_of flush stream loc =
791 |     Cil.Call
792 |       ( None,
793 |         Cil.Lval (Cil.Var flush, Cil.NoOffset),
794 |         [ Cil.Lval (Cil.Var stream, Cil.NoOffset) ],
795 |         loc )
796 | 
797 |   class instrumentVisitor printf flush stream =
798 |     object
799 |       inherit Cil.nopCilVisitor
800 | 
801 |       method! vglob g =
802 |         let loc = Cil.get_globalLoc g in
803 |         if String.starts_with ~prefix:"/usr" loc.file then SkipChildren
804 |         else DoChildren
805 | 
806 |       method! vfunc fd =
807 |         if fd.Cil.svar.vname = "bugzoo_ctor" then SkipChildren else DoChildren
808 | 
809 |       method! vblock blk =
810 |         let bstmts =
811 |           List.fold_left
812 |             (fun bstmts s ->
813 |               match s.Cil.skind with
814 |               | Cil.Instr insts ->
815 |                   let new_insts =
816 |                     List.fold_left
817 |                       (fun is i ->
818 |                         let loc = Cil.get_instrLoc i in
819 |                         let call = printf_of printf stream loc in
820 |                         if not !Cmdline.no_seg then
821 |                           let flush = flush_of flush stream loc in
822 |                           i :: flush :: call :: is
823 |                         else i :: call :: is)
824 |                       [] insts
825 |                     |> List.rev
826 |                   in
827 |                   s.skind <- Cil.Instr new_insts;
828 |                   s :: bstmts
829 |               | _ ->
830 |                   let loc = Cil.get_stmtLoc s.Cil.skind in
831 |                   let call =
832 |                     printf_of printf stream loc |> Cil.mkStmtOneInstr
833 |                   in
834 |                   if not !Cmdline.no_seg then
835 |                     let flush =
836 |                       flush_of flush stream loc |> Cil.mkStmtOneInstr
837 |                     in
838 |                     s :: flush :: call :: bstmts
839 |                   else s :: call :: bstmts)
840 |             [] blk.Cil.bstmts
841 |           |> List.rev
842 |         in
843 |         blk.bstmts <- bstmts;
844 |         Cil.DoChildren
845 |     end
846 | 
847 |   let instrument work_dir origin_file_opt pt_file =
848 |     Cil.resetCIL ();
849 |     Cil.insertImplicitCasts := false;
850 |     let cil_opt =
851 |       try Some (Frontc.parse pt_file ()) with
852 |       | Frontc.ParseError _ -> None
853 |       | Stack_overflow ->
854 |           Logging.log "%s" "Stack overflow";
855 |           None
856 |       | e ->
857 |           Logging.log "%s" (Printexc.to_string e);
858 |           None
859 |     in
860 |     if Option.is_none cil_opt then ()
861 |     else
862 |       let cil = Option.get cil_opt in
863 |       let origin_file_cand = Filename.remove_extension pt_file ^ ".c" in
864 |       let origin_file =
865 |         if Sys.file_exists origin_file_cand then origin_file_cand
866 |         else Option.get origin_file_opt
867 |       in
868 |       Logging.log "Instrument Coverage %s (%s)" origin_file pt_file;
869 |       (* TODO: clean up *)
870 |       Cil.visitCilFile (new findTypeVisitor "_IO_FILE") cil;
871 |       Cil.visitCilFile (new findGVarVisitor "stderr") cil;
872 |       if Option.is_none !found_type || Option.is_none !found_gvar then ()
873 |       else
874 |         let fileptr = Cil.TPtr (Cil.TComp (Option.get !found_type, []), []) in
875 |         let printf =
876 |           Cil.findOrCreateFunc cil "fprintf"
877 |             (Cil.TFun
878 |                ( Cil.voidType,
879 |                  Some
880 |                    [ ("stream", fileptr, []); ("format", Cil.charPtrType, []) ],
881 |                  true,
882 |                  [] ))
883 |         in
884 |         let flush =
885 |           Cil.findOrCreateFunc cil "fflush"
886 |             (Cil.TFun (Cil.voidType, Some [ ("stream", fileptr, []) ], false, []))
887 |         in
888 |         let stream = Cil.makeGlobalVar "__inst_stream" fileptr in
889 |         cil.globals <- Cil.GVarDecl (stream, Cil.locUnknown) :: cil.globals;
890 |         Cil.visitCilFile (new instrumentVisitor printf flush stream) cil;
891 |         Unix.system
892 |           ("cp " ^ origin_file ^ " "
893 |           ^ Filename.remove_extension origin_file
894 |           ^ ".origin.c")
895 |         |> ignore;
896 |         (if List.mem (Filename.basename origin_file) [ "proc_open.c"; "cast.c" ]
897 |         then ()
898 |         else
899 |           let oc = open_out origin_file in
900 |           Cil.dumpFile !Cil.printerForMaincil oc "" cil;
901 |           close_out oc);
902 |         if
903 |           List.mem
904 |             (Unix.realpath origin_file)
905 |             [
906 |               "/experiment/src/gzip.c";
907 |               "/experiment/src/libtiff/tif_unix.c";
908 |               "/experiment/src/src/http_auth.c";
909 |               "/experiment/src/main/main.c";
910 |               "/experiment/src/version.c";
911 |             ]
912 |         then append_constructor work_dir origin_file "coverage"
913 | 
914 |   let run work_dir src_dir =
915 |     Utils.traverse_pp_file
916 |       (fun pp_file ->
917 |         let origin_file_opt = Utils.find_origin_file_opt pp_file in
918 |         instrument work_dir origin_file_opt pp_file)
919 |       src_dir
920 | end
921 | 
922 | let run work_dir =
923 |   Cil.initCIL ();
924 |   Cil.insertImplicitCasts := false;
925 |   let src_dir = Filename.concat work_dir "src" in
926 |   match !Cmdline.instrument with
927 |   | Cmdline.DfSan -> DfSan.run work_dir src_dir
928 |   | Cmdline.GSA -> GSA.run work_dir src_dir
929 |   | Cmdline.Coverage -> Coverage.run work_dir src_dir
930 |   | Cmdline.Nothing -> ()
931 | 


--------------------------------------------------------------------------------
/src/localizer.ml:
--------------------------------------------------------------------------------
  1 | module F = Format
  2 | module LineCoverage = Coverage.LineCoverage
  3 | module LineCoverageInst = Coverage.LineCoverage2
  4 | 
  5 | module BugLocation = struct
  6 |   type t = Cil.location * float * float * float * int
  7 | 
  8 |   let pp fmt (l, score_neg, score_pos, score, score_time) =
  9 |     F.fprintf fmt "%s:%d\t%f %f %f %d" l.Cil.file l.Cil.line score_neg score_pos
 10 |       score score_time
 11 | 
 12 |   let pp_cov fmt (l, score_neg, score_pos, score, _score_time) =
 13 |     F.fprintf fmt "%s:%d,%d,%d,%f"
 14 |       (l.Cil.file |> Filename.basename)
 15 |       l.Cil.line (int_of_float score_pos) (int_of_float score_neg) score
 16 | 
 17 |   let pp_file fmt file = F.fprintf fmt "%s" file
 18 | end
 19 | 
 20 | let print_file bic_locations parent_locations resultname =
 21 |   let locations =
 22 |     List.fold_left
 23 |       (fun acc (l, s1, _, _, _) ->
 24 |         if List.mem (l.Cil.file |> Filename.basename) acc || s1 = 0. then acc
 25 |         else (l.Cil.file |> Filename.basename) :: acc)
 26 |       [] bic_locations
 27 |   in
 28 |   let locations =
 29 |     List.fold_left
 30 |       (fun acc (l, s1, _, _, _) ->
 31 |         if List.mem (l.Cil.file |> Filename.basename) acc || s1 = 0. then acc
 32 |         else (l.Cil.file |> Filename.basename) :: acc)
 33 |       locations parent_locations
 34 |   in
 35 |   let oc3 = Filename.concat !Cmdline.out_dir resultname |> open_out in
 36 |   let fmt3 = F.formatter_of_out_channel oc3 in
 37 |   List.iter (fun l -> F.fprintf fmt3 "%a\n" BugLocation.pp_file l) locations;
 38 |   close_out oc3
 39 | 
 40 | let print_coverage locations resultname =
 41 |   let oc2 = Filename.concat !Cmdline.out_dir resultname |> open_out in
 42 |   let fmt2 = F.formatter_of_out_channel oc2 in
 43 |   List.iter (fun l -> F.fprintf fmt2 "%a\n" BugLocation.pp_cov l) locations;
 44 |   close_out oc2;
 45 |   locations
 46 | 
 47 | let print locations resultname =
 48 |   let oc = Filename.concat !Cmdline.out_dir resultname |> open_out in
 49 |   let fmt = F.formatter_of_out_channel oc in
 50 |   List.iter (fun l -> F.fprintf fmt "%a\n" BugLocation.pp l) locations;
 51 |   close_out oc
 52 | 
 53 | let copy_src () =
 54 |   Unix.create_process "cp"
 55 |     [| "cp"; "-rf"; "src"; !Cmdline.out_dir |]
 56 |     Unix.stdin Unix.stdout Unix.stderr
 57 |   |> ignore;
 58 | 
 59 |   match Unix.wait () |> snd with
 60 |   | Unix.WEXITED 0 -> ()
 61 |   | Unix.WEXITED n ->
 62 |       () (*failwith ("Error " ^ string_of_int n ^ ": copy failed")*)
 63 |   | _ -> ()
 64 | (*failwith "copy failed"*)
 65 | 
 66 | let dummy_localizer work_dir bug_desc =
 67 |   let coverage = LineCoverage.run work_dir bug_desc in
 68 |   Logging.log "Coverage: %a" LineCoverage.pp coverage;
 69 |   copy_src ();
 70 |   List.fold_left
 71 |     (fun locs elem ->
 72 |       Coverage.StrMap.fold
 73 |         (fun file lines locs ->
 74 |           let new_locs =
 75 |             List.map
 76 |               (fun line -> ({ Cil.file; line; byte = 0 }, 0.0, 0.0, 0.0, 0))
 77 |               lines
 78 |           in
 79 |           locs @ new_locs)
 80 |         elem.LineCoverage.coverage locs)
 81 |     [] coverage
 82 | 
 83 | let spec_localizer work_dir bug_desc localizer_list =
 84 |   let coverage =
 85 |     if !Cmdline.gcov then LineCoverage.run work_dir bug_desc
 86 |     else LineCoverageInst.run work_dir bug_desc
 87 |   in
 88 |   Logging.log "Coverage: %a" LineCoverage.pp coverage;
 89 |   copy_src ();
 90 |   let table = Hashtbl.create 99999 in
 91 |   List.fold_left
 92 |     (fun locs (elem : LineCoverage.elem) ->
 93 |       let regexp_pos = Str.regexp "p.*" in
 94 |       Coverage.StrMap.fold
 95 |         (fun file lines locs ->
 96 |           let new_locs =
 97 |             if Str.string_match regexp_pos elem.LineCoverage.test 0 then
 98 |               List.rev_map
 99 |                 (fun line -> ({ Cil.file; line; byte = 0 }, 0.0, 1.0, 0.0, 0))
100 |                 lines
101 |             else
102 |               List.rev_map
103 |                 (fun line ->
104 |                   ( { Cil.file; line; byte = 0 },
105 |                     1.0,
106 |                     0.0,
107 |                     0.0,
108 |                     (*List.find
109 |                         (fun (x, y) -> x = line)
110 |                         elem.LineCoverage.linehistory
111 |                       |> snd *)
112 |                     0 ))
113 |                 lines
114 |           in
115 |           List.rev_append new_locs locs)
116 |         elem.LineCoverage.coverage locs)
117 |     [] coverage
118 |   |> List.iter (fun (l, s1, s2, s3, s4) ->
119 |          match Hashtbl.find_opt table l with
120 |          | Some (new_s1, new_s2, new_s3, new_s4) ->
121 |              Hashtbl.replace table l
122 |                (s1 +. new_s1, s2 +. new_s2, s3 +. new_s3, s4 + new_s4)
123 |          | _ -> Hashtbl.add table l (s1, s2, s3, s4));
124 |   if bug_desc.BugDesc.program = "php" then (
125 |     Unix.create_process "sudo"
126 |       [| "sudo"; "rm"; "-rf"; "/experiment/src/test/bad" |]
127 |       Unix.stdin Unix.stdout Unix.stderr
128 |     |> ignore;
129 |     match Unix.wait () |> snd with
130 |     | Unix.WEXITED 0 -> ()
131 |     | Unix.WEXITED n -> failwith ("Error " ^ string_of_int n ^ ": rm bad failed")
132 |     | _ -> failwith "rm bad failed");
133 | 
134 |   let spec_coverage =
135 |     List.map
136 |       (fun (l, (s1, s2, s3, s4)) -> (l, s1, s2, s3, s4))
137 |       (List.of_seq (Hashtbl.to_seq table))
138 |   in
139 |   match localizer_list with
140 |   | (v, _) :: _ -> v work_dir bug_desc spec_coverage
141 |   | _ -> spec_coverage
142 | 
143 | let prophet_localizer _work_dir _bug_desc locations =
144 |   List.stable_sort
145 |     (fun (_, s11, s12, _, s14) (_, s21, s22, _, s24) ->
146 |       if s21 -. s11 <> 0. then int_of_float (s21 -. s11)
147 |       else if s12 -. s22 <> 0. then int_of_float (s12 -. s22)
148 |       else s24 - s14)
149 |     locations
150 | 
151 | let tarantula_localizer _work_dir bug_desc locations =
152 |   let test_cases = bug_desc.BugDesc.test_cases in
153 |   let pos_num =
154 |     List.fold_left
155 |       (fun acc t ->
156 |         let regexp_pos = Str.regexp "p.*" in
157 |         if Str.string_match regexp_pos t 0 then acc + 1 else acc)
158 |       0 test_cases
159 |   in
160 |   let neg_num =
161 |     List.fold_left
162 |       (fun acc t ->
163 |         let regexp_neg = Str.regexp "n.*" in
164 |         if Str.string_match regexp_neg t 0 then acc + 1 else acc)
165 |       0 test_cases
166 |   in
167 |   let taran_loc =
168 |     List.map
169 |       (fun (l, s1, s2, _, _) ->
170 |         let nep = s2 in
171 |         let nnp = float_of_int pos_num -. s2 in
172 |         let nef = s1 in
173 |         let nnf = float_of_int neg_num -. s1 in
174 |         let numer = nef /. (nef +. nnf) in
175 |         let denom1 = nef /. (nef +. nnf) in
176 |         let denom2 = nep /. (nep +. nnp) in
177 |         let score = numer /. (denom1 +. denom2) in
178 |         (l, s1, s2, score, 0))
179 |       locations
180 |   in
181 |   List.stable_sort
182 |     (fun (_, _, _, s13, _) (_, _, _, s23, _) ->
183 |       if s23 > s13 then 1 else if s23 = s13 then 0 else -1)
184 |     taran_loc
185 | 
186 | let ochiai_localizer _work_dir bug_desc locations =
187 |   let test_cases = bug_desc.BugDesc.test_cases in
188 |   let pos_num =
189 |     List.fold_left
190 |       (fun acc t ->
191 |         let regexp_pos = Str.regexp "p.*" in
192 |         if Str.string_match regexp_pos t 0 then acc + 1 else acc)
193 |       0 test_cases
194 |   in
195 |   let neg_num =
196 |     List.fold_left
197 |       (fun acc t ->
198 |         let regexp_neg = Str.regexp "n.*" in
199 |         if Str.string_match regexp_neg t 0 then acc + 1 else acc)
200 |       0 test_cases
201 |   in
202 |   let ochiai_loc =
203 |     List.map
204 |       (fun (l, s1, s2, _, _) ->
205 |         let nep = s2 in
206 |         let _nnp = float_of_int pos_num -. s2 in
207 |         let nef = s1 in
208 |         let nnf = float_of_int neg_num -. s1 in
209 |         let sub_denom1 = nef +. nnf in
210 |         let sub_denom2 = nef +. nep in
211 |         let denom = sqrt (sub_denom1 *. sub_denom2) in
212 |         let score = nef /. denom in
213 |         (l, s1, s2, score, 0))
214 |       locations
215 |   in
216 |   List.stable_sort
217 |     (fun (_, _, _, s13, _) (_, _, _, s23, _) ->
218 |       if s23 > s13 then 1 else if s23 = s13 then 0 else -1)
219 |     ochiai_loc
220 | 
221 | let jaccard_localizer _work_dir bug_desc locations =
222 |   let test_cases = bug_desc.BugDesc.test_cases in
223 |   let pos_num =
224 |     List.fold_left
225 |       (fun acc t ->
226 |         let regexp_pos = Str.regexp "p.*" in
227 |         if Str.string_match regexp_pos t 0 then acc + 1 else acc)
228 |       0 test_cases
229 |   in
230 |   let neg_num =
231 |     List.fold_left
232 |       (fun acc t ->
233 |         let regexp_neg = Str.regexp "n.*" in
234 |         if Str.string_match regexp_neg t 0 then acc + 1 else acc)
235 |       0 test_cases
236 |   in
237 |   let jaccard_loc =
238 |     List.map
239 |       (fun (l, s1, s2, _, _) ->
240 |         let nep = s2 in
241 |         let _nnp = float_of_int pos_num -. s2 in
242 |         let nef = s1 in
243 |         let nnf = float_of_int neg_num -. s1 in
244 |         let denom = nef +. nnf +. nep in
245 |         let score = nef /. denom in
246 |         (l, s1, s2, score, 0))
247 |       locations
248 |   in
249 |   List.stable_sort
250 |     (fun (_, _, _, s13, _) (_, _, _, s23, _) ->
251 |       if s23 > s13 then 1 else if s23 = s13 then 0 else -1)
252 |     jaccard_loc
253 | 
254 | let diff_localizer work_dir bug_desc localizer_list =
255 |   Unix.chdir "/experiment/src";
256 |   Unix.create_process "make" [| "make"; "clean" |] Unix.stdin Unix.stdout
257 |     Unix.stderr
258 |   |> ignore;
259 |   (match Unix.wait () |> snd with
260 |   | Unix.WEXITED 0 -> ()
261 |   | Unix.WEXITED n ->
262 |       failwith ("Error " ^ string_of_int n ^ ": make clean failed test")
263 |   | _ -> failwith "make clean failed");
264 |   Unix.create_process "make" [| "make"; "distclean" |] Unix.stdin Unix.stdout
265 |     Unix.stderr
266 |   |> ignore;
267 |   (match Unix.wait () |> snd with
268 |   | Unix.WEXITED 0 -> ()
269 |   | Unix.WEXITED n ->
270 |       failwith ("Error " ^ string_of_int n ^ ": make distclean failed")
271 |   | _ -> failwith "make distclean failed");
272 | 
273 |   Unix.chdir "/experiment";
274 |   Unix.create_process "cp"
275 |     [| "cp"; "-rf"; "src"; "bic" |]
276 |     Unix.stdin Unix.stdout Unix.stderr
277 |   |> ignore;
278 |   (match Unix.wait () |> snd with
279 |   | Unix.WEXITED 0 -> ()
280 |   | Unix.WEXITED n -> failwith ("Error " ^ string_of_int n ^ ": cp failed")
281 |   | _ -> failwith "cp failed");
282 | 
283 |   Unix.chdir "/experiment/src";
284 |   Unix.create_process "./configure" [| "./configure" |] Unix.stdin Unix.stdout
285 |     Unix.stderr
286 |   |> ignore;
287 |   (match Unix.wait () |> snd with
288 |   | Unix.WEXITED 0 -> ()
289 |   | Unix.WEXITED n ->
290 |       failwith ("Error " ^ string_of_int n ^ ": configure failed")
291 |   | _ -> failwith "configure failed");
292 | 
293 |   Unix.chdir "/experiment";
294 | 
295 |   (*let bic_locations = spec_localizer work_dir bug_desc () in*)
296 |   let table = Hashtbl.create 99999 in
297 |   let table_parent = Hashtbl.create 99999 in
298 | 
299 |   spec_localizer work_dir bug_desc []
300 |   |> List.iter (fun (l, s1, s2, s3, s4) ->
301 |          match Hashtbl.find_opt table l with
302 |          | Some (new_s1, new_s2, new_s3, new_s4) ->
303 |              Hashtbl.replace table l
304 |                (s1 +. new_s1, s2 +. new_s2, s3 +. new_s3, s4 + new_s4)
305 |          | _ -> Hashtbl.add table l (s1, s2, s3, s4));
306 | 
307 |   Unix.chdir "/experiment";
308 |   Unix.create_process "./parent_checkout.sh"
309 |     [| "./parent_checkout.sh" |]
310 |     Unix.stdin Unix.stdout Unix.stderr
311 |   |> ignore;
312 |   (match Unix.wait () |> snd with
313 |   | Unix.WEXITED 0 -> ()
314 |   | Unix.WEXITED n ->
315 |       failwith ("Error " ^ string_of_int n ^ ": parent script failed test2")
316 |   | _ -> failwith "parent script failed");
317 | 
318 |   Unix.chdir "/experiment/src";
319 |   Unix.create_process "./configure" [| "./configure" |] Unix.stdin Unix.stdout
320 |     Unix.stderr
321 |   |> ignore;
322 |   (match Unix.wait () |> snd with
323 |   | Unix.WEXITED 0 -> ()
324 |   | Unix.WEXITED n ->
325 |       failwith ("Error " ^ string_of_int n ^ ": configure failed")
326 |   | _ -> failwith "configure failed");
327 | 
328 |   Unix.chdir "/experiment";
329 |   spec_localizer work_dir bug_desc []
330 |   |> List.iter (fun (l, s1, s2, s3, s4) ->
331 |          match Hashtbl.find_opt table_parent l with
332 |          | Some (new_s1, new_s2, new_s3, new_s4) ->
333 |              Hashtbl.replace table_parent l
334 |                (s1 +. new_s1, s2 +. new_s2, s3 +. new_s3, s4 + new_s4)
335 |          | _ -> Hashtbl.add table_parent l (s1, s2, s3, s4));
336 | 
337 |   Unix.chdir "/experiment";
338 |   (*
339 |   let open Yojson.Basic.Util in
340 |   let json = Yojson.Basic.from_file "line_matching.json" in
341 |   let changed_file = json |> member "changed_files" |> to_assoc in
342 |   let unchanged_file =
343 |     json |> member "unchanged_files" |> to_list
344 |     |> List.map (fun a -> a |> to_string)
345 |   in
346 |   *)
347 |   let bic_result =
348 |     List.map
349 |       (fun (l, (s1, s2, s3, s4)) -> (l, s1, s2, s3, s4))
350 |       (List.of_seq (Hashtbl.to_seq table))
351 |   in
352 | 
353 |   List.iter
354 |     (fun (localizer, engine_name) ->
355 |       "coverage_" ^ engine_name ^ "_bic.txt"
356 |       |> (bic_result |> localizer work_dir bug_desc |> print_coverage)
357 |       |> ignore)
358 |     localizer_list;
359 | 
360 |   (*
361 |   let parent_result =
362 |     List.map
363 |       (fun (l, (s1, s2, s3, s4)) -> (l, s1, s2, s3, s4))
364 |       (List.of_seq (Hashtbl.to_seq table_parent))
365 |   in
366 |   *)
367 |   "coverage_file.txt"
368 |   |> ("coverage_parent.txt"
369 |      |> (List.map
370 |            (fun (l, (s1, s2, s3, s4)) -> (l, s1, s2, s3, s4))
371 |            (List.of_seq (Hashtbl.to_seq table_parent))
372 |         |> print_coverage)
373 |      |> print_file bic_result);
374 |   []
375 | (*
376 |   List.iter
377 |     (fun (l, s1, s2, s3, s4) ->
378 |       let new_l =
379 |         if List.mem l.Cil.file unchanged_file then Some l
380 |         else
381 |           match List.assoc_opt l.Cil.file changed_file with
382 |           | Some v
383 |             when l.Cil.line - 1 < List.length (v |> to_list)
384 |                  && List.nth (v |> to_list) (l.Cil.line - 1) |> to_int <> 0 ->
385 |               Some
386 |                 {
387 |                   Cil.file = l.Cil.file;
388 |                   line = List.nth (v |> to_list) (l.Cil.line - 1) |> to_int;
389 |                   byte = 0;
390 |                 }
391 |           | _ -> None
392 |       in
393 |       if new_l <> None then
394 |         let l = Option.get new_l in
395 |         match Hashtbl.find_opt table l with
396 |         | Some (new_s1, new_s2, new_s3, new_s4) ->
397 |             Hashtbl.replace table l
398 |               (new_s1, s1 +. s2 +. new_s2, s3 +. new_s3, s4 + new_s4)
399 |         | _ -> Hashtbl.add table l (0., s1 +. s2, s3, s4))
400 |     parent_result;
401 | 
402 |   "coverage_diff.txt"
403 |   |> (List.map
404 |         (fun (l, (s1, s2, s3, s4)) -> (l, s1, s2, s3, s4))
405 |         (List.of_seq (Hashtbl.to_seq table))
406 |      |> print_coverage)
407 |   *)
408 | 
409 | let unival_compile scenario bug_desc =
410 |   Unix.chdir scenario.Scenario.work_dir;
411 |   if not !Cmdline.skip_compile then Logging.log "Start compile";
412 |   Scenario.compile scenario bug_desc.BugDesc.compiler_type;
413 |   Unix.chdir scenario.Scenario.work_dir
414 | 
415 | let unival_run_test scenario bug_desc =
416 |   Logging.log "Start test";
417 |   let text_file_name =
418 |     Filename.concat scenario.Scenario.work_dir "output.txt"
419 |   in
420 |   List.iter
421 |     (fun test ->
422 |       let oc = open_out_gen [ Open_append; Open_creat ] 0o775 text_file_name in
423 |       Printf.fprintf oc "*** new execution ***,%s,%d\n" test
424 |         (if String.get test 0 = 'p' then 0 else 1);
425 |       close_out oc;
426 |       Unix.create_process scenario.Scenario.test_script
427 |         [| scenario.Scenario.test_script; test |]
428 |         Unix.stdin Unix.stdout
429 |         (* (Unix.openfile
430 |            (Filename.concat work_dir "output.txt")
431 |            [ Unix.O_CREAT; Unix.O_WRONLY; Unix.O_CREAT ]
432 |            0o775) *)
433 |         Unix.stderr
434 |       |> ignore;
435 |       Unix.wait () |> ignore;
436 |       Logging.log "End test %s" test)
437 |     bug_desc.BugDesc.test_cases
438 | 
439 | let unival_localizer work_dir bug_desc =
440 |   let scenario = Scenario.init work_dir in
441 |   unival_compile scenario bug_desc;
442 |   Instrument.run scenario.work_dir;
443 |   unival_compile scenario bug_desc;
444 |   unival_run_test scenario bug_desc;
445 |   List.iter
446 |     (fun filename ->
447 |       Unix.create_process "cp"
448 |         [|
449 |           "cp";
450 |           "-rf";
451 |           Filename.concat scenario.work_dir filename;
452 |           Filename.concat scenario.work_dir "localizer-out/";
453 |         |]
454 |         Unix.stdin Unix.stdout Unix.stderr
455 |       |> ignore;
456 |       Unix.wait () |> ignore)
457 |     [ "output.txt"; "CausalMap.txt"; "FaultCandidates.txt"; "src/" ]
458 | 
459 | let coverage work_dir bug_desc =
460 |   let scenario = Scenario.init ~stdio_only:true work_dir in
461 |   Unix.chdir scenario.Scenario.work_dir;
462 |   Scenario.compile scenario bug_desc.BugDesc.compiler_type;
463 |   Instrument.run scenario.work_dir;
464 |   Unix.chdir scenario.Scenario.work_dir;
465 |   Scenario.compile scenario bug_desc.BugDesc.compiler_type
466 | 
467 | let run work_dir =
468 |   Logging.log "Start localization";
469 |   let bug_desc = BugDesc.read work_dir in
470 |   Logging.log "Bug desc: %a" BugDesc.pp bug_desc;
471 |   let localizer = if !Cmdline.bic then diff_localizer else spec_localizer in
472 |   match !Cmdline.engine with
473 |   | Cmdline.Dummy ->
474 |       "result_dummy.txt" |> (dummy_localizer work_dir bug_desc |> print)
475 |   | Cmdline.Tarantula ->
476 |       localizer work_dir bug_desc [ (tarantula_localizer, "tarantula") ]
477 |       |> Fun.flip print "result_tarantula.txt"
478 |   | Cmdline.Prophet ->
479 |       localizer work_dir bug_desc [ (prophet_localizer, "prophet") ]
480 |       |> Fun.flip print "result_prophet.txt"
481 |   | Cmdline.Jaccard ->
482 |       localizer work_dir bug_desc [ (jaccard_localizer, "jaccard") ]
483 |       |> Fun.flip print "result_jaccard.txt"
484 |   | Cmdline.Ochiai ->
485 |       localizer work_dir bug_desc [ (ochiai_localizer, "ochiai") ]
486 |       |> Fun.flip print "result_ochiai.txt"
487 |   | Cmdline.UniVal -> unival_localizer work_dir bug_desc
488 |   | Cmdline.All ->
489 |       localizer work_dir bug_desc
490 |         [
491 |           (prophet_localizer, "prophet");
492 |           (tarantula_localizer, "tarantula");
493 |           (jaccard_localizer, "jaccard");
494 |           (ochiai_localizer, "ochiai");
495 |         ]
496 |       |> ignore
497 |   | Cmdline.Coverage -> coverage work_dir bug_desc
498 | 


--------------------------------------------------------------------------------
/src/logging.ml:
--------------------------------------------------------------------------------
 1 | module F = Format
 2 | module P = Printf
 3 | 
 4 | let log_file : out_channel option ref = ref None
 5 | 
 6 | let log_formatter = ref None
 7 | 
 8 | let string_of_current_time () =
 9 |   Unix.time () |> Unix.localtime |> fun tm ->
10 |   P.sprintf "%d%02d%02d-%02d:%02d:%02d" (1900 + tm.tm_year) (tm.tm_mon + 1)
11 |     tm.tm_mday tm.tm_hour tm.tm_min tm.tm_sec
12 | 
13 | let log fmt =
14 |   match !log_formatter with
15 |   | Some log_formatter ->
16 |       F.fprintf log_formatter "[%s] " (string_of_current_time ());
17 |       F.kfprintf
18 |         (fun log_formatter ->
19 |           F.fprintf log_formatter "\n";
20 |           F.pp_print_flush log_formatter ())
21 |         log_formatter fmt
22 |   | None -> failwith "Cannot open logfile"
23 | 


--------------------------------------------------------------------------------
/src/main.ml:
--------------------------------------------------------------------------------
 1 | module F = Format
 2 | 
 3 | let initialize work_dir =
 4 |   let out_dir = Filename.concat work_dir !Cmdline.out_dir in
 5 |   (try Unix.mkdir out_dir 0o775 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
 6 |   let cov_dir = Filename.concat work_dir "coverage_data" in
 7 |   (try Unix.mkdir cov_dir 0o775 with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
 8 |   let cov_tmp_dir = Filename.concat cov_dir "tmp" in
 9 |   (try Unix.mkdir cov_tmp_dir 0o775
10 |    with Unix.Unix_error (Unix.EEXIST, _, _) -> ());
11 |   print_endline ("Logging to " ^ out_dir);
12 |   Logging.log_file :=
13 |     Filename.concat out_dir "log.txt" |> open_out |> Option.some;
14 |   Logging.log_formatter :=
15 |     Option.map F.formatter_of_out_channel !Logging.log_file
16 | 
17 | let main () =
18 |   let usageMsg = "Usage: localizer [options] [work dir]" in
19 |   Arg.parse Cmdline.options Cmdline.parse_arg usageMsg;
20 |   match !Cmdline.work_dir with
21 |   | None ->
22 |       prerr_endline "Error: No work directory is given";
23 |       exit 1
24 |   | Some work_dir ->
25 |       initialize work_dir;
26 |       Localizer.run work_dir
27 | 
28 | let _ = main ()
29 | 


--------------------------------------------------------------------------------
/src/scenario.ml:
--------------------------------------------------------------------------------
  1 | type t = {
  2 |   work_dir : string;
  3 |   compile_script : string;
  4 |   test_script : string;
  5 |   coverage_data : string;
  6 | }
  7 | 
  8 | let file_instrument filename preamble =
  9 |   let read_whole_file filename =
 10 |     let ch = open_in filename in
 11 |     let s = really_input_string ch (in_channel_length ch) in
 12 |     close_in ch;
 13 |     s
 14 |   in
 15 |   let c_code = read_whole_file filename in
 16 |   let instr_c_code = preamble ^ c_code in
 17 |   let oc = open_out filename in
 18 |   Printf.fprintf oc "%s" instr_c_code;
 19 |   close_out oc
 20 | 
 21 | let file_instrument_all work_dir preamble =
 22 |   let rec traverse_file f root_dir =
 23 |     let files = Sys.readdir root_dir in
 24 |     Array.iter
 25 |       (fun file ->
 26 |         let file_path = Filename.concat root_dir file in
 27 |         if (Unix.lstat file_path).st_kind = Unix.S_LNK then ()
 28 |         else if List.mem file !Cmdline.blacklist then ()
 29 |         else if Sys.is_directory file_path then traverse_file f file_path
 30 |         else if Filename.extension file = ".c" then f file_path preamble
 31 |         else ())
 32 |       files
 33 |   in
 34 |   traverse_file file_instrument work_dir
 35 | 
 36 | let init ?(stdio_only = false) work_dir =
 37 |   let work_dir =
 38 |     if Filename.is_relative work_dir then
 39 |       Filename.concat (Unix.getcwd ()) work_dir
 40 |     else work_dir
 41 |   in
 42 |   {
 43 |     work_dir;
 44 |     compile_script = Filename.concat work_dir "compile.sh";
 45 |     test_script = Filename.concat work_dir "test.sh";
 46 |     coverage_data = Filename.concat work_dir "coverage.xml";
 47 |   }
 48 | 
 49 | let simple_compiler compile_script =
 50 |   Unix.create_process compile_script [| compile_script |] Unix.stdin Unix.stdout
 51 |     Unix.stderr
 52 |   |> ignore;
 53 |   match Unix.wait () |> snd with
 54 |   | Unix.WEXITED 0 -> ()
 55 |   | Unix.WEXITED n ->
 56 |       failwith ("Error " ^ string_of_int n ^ ": " ^ compile_script ^ " failed")
 57 |   | _ -> failwith (compile_script ^ " failed")
 58 | 
 59 | let make () =
 60 |   let jobs =
 61 |     if !Cmdline.jobs = 0 then "-j" else "-j" ^ string_of_int !Cmdline.jobs
 62 |   in
 63 |   Unix.create_process "make" [| "make"; jobs |] Unix.stdin Unix.stdout
 64 |     Unix.stderr
 65 |   |> ignore;
 66 |   match Unix.wait () |> snd with
 67 |   | Unix.WEXITED 0 -> ()
 68 |   | Unix.WEXITED n -> failwith ("Error " ^ string_of_int n ^ ": make failed")
 69 |   | _ -> failwith "make failed"
 70 | 
 71 | let configure () =
 72 |   Unix.create_process "./configure"
 73 |     [|
 74 |       "./configure";
 75 |       "CFLAGS=--coverage -save-temps=obj -Wno-error";
 76 |       "CXXFLAGS=--coverage -save-temps=obj";
 77 |       "LDFLAGS=-lgcov --coverage";
 78 |     |]
 79 |     Unix.stdin Unix.stdout Unix.stderr
 80 |   |> ignore;
 81 |   match Unix.wait () |> snd with
 82 |   | Unix.WEXITED 0 -> ()
 83 |   | Unix.WEXITED n ->
 84 |       failwith ("Error " ^ string_of_int n ^ ": configure failed")
 85 |   | _ -> failwith "configure failed"
 86 | 
 87 | let make_clean () =
 88 |   Unix.create_process "make" [| "make"; "clean" |] Unix.stdin Unix.stdout
 89 |     Unix.stderr
 90 |   |> ignore;
 91 |   match Unix.wait () |> snd with
 92 |   | Unix.WEXITED 0 -> ()
 93 |   | Unix.WEXITED n ->
 94 |       failwith ("Error " ^ string_of_int n ^ ": make clean failed")
 95 |   | _ -> failwith "make clean failed"
 96 | 
 97 | let make_distclean () =
 98 |   Unix.create_process "make" [| "make"; "distclean" |] Unix.stdin Unix.stdout
 99 |     Unix.stderr
100 |   |> ignore;
101 |   match Unix.wait () |> snd with
102 |   | Unix.WEXITED 0 -> ()
103 |   | Unix.WEXITED n ->
104 |       failwith ("Error " ^ string_of_int n ^ ": make distclean failed")
105 |   | _ -> failwith "make distclean failed"
106 | 
107 | let configure_and_make () =
108 |   Unix.chdir "src";
109 |   make_clean ();
110 |   make_distclean ();
111 |   configure ();
112 |   make ()
113 | 
114 | let compile scenario compiler_type =
115 |   match compiler_type with
116 |   | "compile" -> simple_compiler scenario.compile_script
117 |   | "configure-and-make" -> configure_and_make ()
118 |   | _ -> failwith "Unknown compiler"
119 | 
120 | let run_test test_script name =
121 |   Unix.create_process test_script [| test_script; name |] Unix.stdin Unix.stdout
122 |     Unix.stderr
123 |   |> ignore;
124 |   Unix.wait () |> ignore
125 | 


--------------------------------------------------------------------------------
/src/utils.ml:
--------------------------------------------------------------------------------
 1 | let rec join strlist delimiter =
 2 |   match strlist with
 3 |   | [ hd ] -> hd
 4 |   | hd :: tl -> hd ^ delimiter ^ join tl delimiter
 5 |   | [] -> ""
 6 | 
 7 | let remove_unnec_file filename =
 8 |   if not (Sys.file_exists (Filename.remove_extension filename ^ ".c")) then ()
 9 |   else (
10 |     print_endline ("Remove " ^ filename);
11 |     Unix.create_process "rm" [| "rm"; "-f"; filename |] Unix.stdin Unix.stdout
12 |       Unix.stderr
13 |     |> ignore;
14 |     Unix.wait () |> ignore)
15 | 
16 | let rec remove_temp_files root_dir =
17 |   let files = Sys.readdir root_dir in
18 |   Array.iter
19 |     (fun file ->
20 |       let file_path = Filename.concat root_dir file in
21 |       print_endline file_path;
22 |       if (Unix.lstat file_path).st_kind = Unix.S_LNK then ()
23 |       else if Sys.is_directory file_path then
24 |         if Filename.check_suffix file_path ".hg" then ()
25 |         else remove_temp_files file_path
26 |       else if
27 |         List.mem (Filename.extension file)
28 |           [ ".i"; ".lo"; ".s"; ".gcno"; ".o"; ".asm" ]
29 |       then remove_unnec_file file_path
30 |       else ())
31 |     files
32 | 
33 | let dash2under_bar s = String.map (fun c -> if c = '-' then '_' else c) s
34 | 
35 | let rec traverse_pp_file f root_dir =
36 |   let files = Sys.readdir root_dir in
37 |   Array.iter
38 |     (fun file ->
39 |       let file_path = Filename.concat root_dir file in
40 |       if (Unix.lstat file_path).st_kind = Unix.S_LNK then ()
41 |       else if Sys.is_directory file_path then
42 |         if
43 |           Filename.check_suffix file_path ".libs"
44 |           || Filename.check_suffix file_path ".hg"
45 |         then ()
46 |         else traverse_pp_file f file_path
47 |       else if
48 |         List.mem (Filename.basename file)
49 |           [
50 |             "libldtestplug_la-testplug.i";
51 |             "sysinfo.i";
52 |             "sed_sed-compile.i";
53 |             "sed_sed-regexp.i";
54 |             "sed_sed-execute.i";
55 |             "sed_sed-mbcs.i";
56 |             "sed_sed-sed.i";
57 |             "sed_sed-utils.i";
58 |             "dummy-1522.i";
59 |           ]
60 |       then ()
61 |       else if Filename.extension file = ".i" then f file_path
62 |       else ())
63 |     files
64 | 
65 | let rec find_file filename root_dir =
66 |   let files = Sys.readdir root_dir in
67 |   Array.fold_left
68 |     (fun paths file ->
69 |       let file_path = Filename.concat root_dir file in
70 |       if (Unix.lstat file_path).st_kind = Unix.S_LNK then paths
71 |       else if Sys.is_directory file_path then
72 |         if
73 |           Filename.check_suffix file_path "mytest"
74 |           || Filename.check_suffix file_path ".libs"
75 |         then paths
76 |         else paths @ find_file filename file_path
77 |       else if Filename.basename file_path = Filename.basename filename then
78 |         file_path :: paths
79 |       else paths)
80 |     [] files
81 | 
82 | let find_origin_file_opt pp_file =
83 |   let ic = open_in pp_file in
84 |   let line = input_line ic in
85 |   assert (String.starts_with ~prefix:"# 1" line);
86 |   let filename = String.split_on_char '"' line |> Fun.flip List.nth 1 in
87 |   if Filename.is_relative filename then
88 |     try Some (Unix.realpath filename) with _ -> None
89 |   else Some filename
90 | 


--------------------------------------------------------------------------------
/src/visualizer.ml:
--------------------------------------------------------------------------------
 1 | module Node = struct
 2 |   include String
 3 | 
 4 |   let hash = Hashtbl.hash
 5 | end
 6 | 
 7 | module Edge = struct
 8 |   type t = string
 9 | 
10 |   let compare = compare
11 | 
12 |   let default = ""
13 | end
14 | 
15 | module G = struct
16 |   include Graph.Persistent.Digraph.ConcreteBidirectionalLabeled (Node) (Edge)
17 | 
18 |   let default_vertex_attributes _ = []
19 | 
20 |   let default_edge_attributes _ = []
21 | 
22 |   let edge_attributes (_, edge, _) =
23 |     if edge = "n1" then [ `Color 0xff0000 ] else [ `Color 0x000000 ]
24 | 
25 |   let get_subgraph _ = None
26 | 
27 |   let vertex_attributes _ = [ `Shape `Box ]
28 | 
29 |   let vertex_name v = "\"" ^ v ^ "\""
30 | 
31 |   let graph_attributes _ = []
32 | end
33 | 
34 | module Graphviz = Graph.Graphviz.Dot (G)
35 | 
36 | let rec draw ic name pred graph =
37 |   match input_line ic with
38 |   | s -> G.add_edge_e graph (pred, name, s) |> draw ic name s
39 |   | exception _ -> graph
40 | 
41 | let main () =
42 |   Array.to_list Sys.argv |> List.tl
43 |   |> List.fold_left
44 |        (fun graph file ->
45 |          let ic = open_in file in
46 |          let graph = draw ic file "__START__" graph in
47 |          close_in ic;
48 |          graph)
49 |        G.empty
50 |   |> Graphviz.output_graph stdout
51 | 
52 | let _ = main ()
53 | 


--------------------------------------------------------------------------------
/test/simple1/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	./compile.sh
3 | 
4 | clean:
5 | 	rm -rf bug *.gc* *.xml
6 | 


--------------------------------------------------------------------------------
/test/simple1/bug_desc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compiler": {
 3 |     "time-limit": 300,
 4 |     "type": "compile"
 5 |   },
 6 |   "test-harness": {
 7 |     "failing": 1,
 8 |     "passing": 5,
 9 |     "time-limit": 300
10 |   }
11 | }
12 | 


--------------------------------------------------------------------------------
/test/simple1/compile.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd src
3 | gcc -o bug --coverage --save-temps -lgcov -g bug.c
4 | cd ..
5 | 


--------------------------------------------------------------------------------
/test/simple1/src/bug.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | 
 5 | int calc(int arg) {
 6 |   if (arg != 487) {
 7 |     return arg * arg;
 8 |   } else {
 9 |     return arg + 1;
10 |   }
11 | }
12 | 
13 | int main(int argc, char *argv[]) {
14 |   if (argc != 2) {
15 |     printf("Please, provide a positive float number.");
16 |     return 1;
17 |   }
18 | 
19 |   int arg = atoi(argv[1]);
20 |   int res = calc(arg);
21 | 
22 |   printf("%d\n", res);
23 |   return 0;
24 | }
25 | 


--------------------------------------------------------------------------------
/test/simple1/test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | TEST_HOME="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 4 | BIN=$TEST_HOME/bug
 5 | 
 6 | function run_test() {
 7 |   diff -q <($BIN $1) <(expr $1 \* $1) >/dev/null
 8 |   return $?
 9 | }
10 | 
11 | case $1 in
12 | p1) run_test 1 && exit 0 ;;
13 | p2) run_test 12 && exit 0 ;;
14 | p3) run_test 123 && exit 0 ;;
15 | p4) run_test 1234 && exit 0 ;;
16 | p5) run_test 12345 && exit 0 ;;
17 | n1) run_test 487 && exit 0 ;;
18 | esac
19 | exit 1
20 | 


--------------------------------------------------------------------------------
/unival-docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM openjdk:8-jdk
 2 | MAINTAINER oojahooo <oojahooo@gmail.com>
 3 | 
 4 | WORKDIR /unival/
 5 | 
 6 | ARG DEBIAN_FRONTEND=noninteractive
 7 | RUN apt-get update && apt-get install -y r-base
 8 | 
 9 | RUN Rscript -e "install.packages(c(\"glmnet\", \"ranger\", \"stringdist\", \"dbscan\", \"dplyr\", \"tidyr\"))"
10 | 
11 | ADD src/* /unival/
12 | 


--------------------------------------------------------------------------------
/unival-docker/src/ProcessDataProfile.java:
--------------------------------------------------------------------------------
 1 | import java.io.BufferedReader;
 2 | import java.io.FileReader;
 3 | import java.io.IOException;
 4 | import java.util.HashMap;
 5 | import java.util.HashSet;
 6 | import java.util.Set;
 7 | 
 8 | public class ProcessDataProfile {
 9 | 
10 |     public static void main(String[] args) throws IOException {
11 | 
12 | 
13 |         // Change file name to the same as the file being produced by the instrumented program
14 |         String dataFileName = "output.txt";
15 | 
16 | 
17 |         HashMap<String, HashSet<String>> causalMap = createCausalMap();
18 |         Set<String> faultCandidates = getFaultCandidates();
19 |         StructuredDataCollector.structureData(dataFileName, causalMap, faultCandidates);
20 |     }
21 | 
22 |     public static HashMap<String, HashSet<String>> createCausalMap() {
23 |         HashMap<String, HashSet<String>> causalMap = new HashMap<>();
24 | 
25 |         BufferedReader reader;
26 |         try {
27 |             // Change name of data file accordingly
28 |             reader = new BufferedReader(new FileReader("CausalMap.txt"));
29 |             String line = reader.readLine();
30 |             while (line != null) {
31 |                 String[] row = line.split(",");
32 |                 String
33 |                         var = row[0];
34 |                 causalMap.put(var, new HashSet<>());
35 | 
36 |                 if (row.length > 1) {
37 |                     for (int i = 1; i < row.length; i++) {
38 |                         causalMap.get(var).add(row[i]);
39 |                     }
40 |                 }
41 | 
42 |                 line = reader.readLine();
43 |             }
44 |             reader.close();
45 |             return causalMap;
46 |         } catch (IOException e) {
47 |             return null;
48 |         }
49 |     }
50 | 
51 |     public static Set<String> getFaultCandidates() {
52 |         Set<String> faultCandidates = new HashSet<String>();
53 | 
54 |         BufferedReader reader;
55 |         try {
56 |             // Change name of data file accordingly
57 |             reader = new BufferedReader(new FileReader("FaultCandidates.txt"));
58 |             String line = reader.readLine();
59 |             while (line != null) {
60 | 
61 |                 faultCandidates.add(line);
62 | 
63 |                 line = reader.readLine();
64 |             }
65 |             reader.close();
66 |             return faultCandidates;
67 |         } catch (IOException e) {
68 |             return null;
69 |         }
70 |     }
71 | }
72 | 


--------------------------------------------------------------------------------
/unival-docker/src/RFC.R:
--------------------------------------------------------------------------------
  1 | source("RforCFmeansRF.R")
  2 | 
  3 | # 3 RF
  4 | predCFoutRF <-
  5 |   function(dataFrame,
  6 |            outVarName,
  7 |            treatVarName,
  8 |            treatVal) {
  9 |     library("ranger")
 10 |     
 11 |     forest <-
 12 |       ranger(paste(outVarName, " ~ .", sep = ""), data = dataFrame)
 13 |     
 14 |     CFdata <- data.frame(dataFrame)
 15 |     
 16 |     CFdata[[treatVarName]] <-
 17 |       rep(treatVal, length(CFdata[[treatVarName]]))
 18 |     
 19 |     CFout <- predictions(predict(forest, CFdata))
 20 |     
 21 |     return(CFout)
 22 |     
 23 |   }
 24 | 
 25 | # 3 LM
 26 | predCFoutLM <-
 27 |   function(dataFrame,
 28 |            outVarName,
 29 |            treatVarName,
 30 |            treatVal) {
 31 |     M <- lm(paste(outVarName, " ~ .", sep = ""), data = dataFrame)
 32 |     
 33 |     CFdata <- data.frame(dataFrame)
 34 |     
 35 |     CFdata[[treatVarName]] <-
 36 |       rep(treatVal, length(CFdata[[treatVarName]]))
 37 |     
 38 |     CFout <- predict(M, CFdata)
 39 |     
 40 |     return(CFout)
 41 |     
 42 |   }
 43 | 
 44 | # 3 predict with lasso
 45 | predCFoutLM <-
 46 |   function(dataFrame,
 47 |            outVarName,
 48 |            treatVarName,
 49 |            treatVal) {
 50 |     library(glmnet)
 51 |     # newoutput <- apply(newoutput, 2, as.numeric)
 52 |     # Y <- apply(Y, 2, as.numeric)
 53 |     fit.glmnet <-
 54 |       cv.glmnet(x = data.matrix(dataFrame[,-1]),
 55 |                 data.matrix(dataFrame[, 1]),
 56 |                 family = "gaussian")
 57 |     
 58 |     CFdata <- data.frame(dataFrame)
 59 |     
 60 |     CFdata[[treatVarName]] <-
 61 |       rep(treatVal, length(CFdata[[treatVarName]]))
 62 |     
 63 |     
 64 |     # CFdata <- apply(CFdata, 2, as.numeric)
 65 |     CFout <-
 66 |       predict(fit.glmnet,
 67 |               newx = data.matrix(CFdata[,-1]),
 68 |               s = "lambda.min",
 69 |               type = 'response')
 70 |     
 71 |     # return(CFout);
 72 |   }
 73 | #prediction happens here
 74 | predCFprobRF <-
 75 |   function(dataFrame,
 76 |            outVarName,
 77 |            treatVarName,
 78 |            treatVal) {
 79 |     library("ranger")
 80 |     
 81 |     forest <-
 82 |       ranger(paste(outVarName, " ~ .", sep = ""),
 83 |              data = dataFrame,
 84 |              probability = TRUE)
 85 |     
 86 |     CFdata <- data.frame(dataFrame)
 87 |     
 88 |     CFdata[[treatVarName]] <-
 89 |       rep(treatVal, length(CFdata[[treatVarName]]))
 90 |     
 91 |     CFout <- predictions(predict(forest, data = CFdata))
 92 |     return(CFout[, 2])
 93 |   }
 94 | 
 95 | # 0
 96 | trainCFoutPredRF <- function(dataFrame, outVarName) {
 97 |   library("ranger")
 98 |   
 99 |   forest <-
100 |     ranger(paste(outVarName, " ~ .", sep = ""), data = dataFrame)
101 |   
102 |   return(forest)
103 |   
104 | }
105 | 
106 | trainCFprobPredRF <- function(dataFrame, outVarName) {
107 |   library("ranger")
108 |   
109 |   forest <-
110 |     ranger(paste(outVarName, " ~ .", sep = ""),
111 |            data = dataFrame,
112 |            probability = TRUE)
113 |   
114 |   return(forest)
115 |   
116 | }
117 | 
118 | CFmeansForTreatRangeRF <-
119 |   function(dataFrame,
120 |            outVarName,
121 |            treatVarName,
122 |            treatVec,
123 |            minTreat,
124 |            maxTreat) {
125 |     CFmeans <- rep(0, maxTreat - minTreat + 1)
126 |     
127 |     
128 |     for (i in minTreat:maxTreat) {
129 |       CFmeans[i] <-
130 |         mean(predCFoutRF(dataFrame, outVarName, treatVarName, treatVec[i]))
131 |       
132 |     }
133 |     
134 |     return(CFmeans)
135 |     
136 |     
137 |   }
138 | 
139 | # 2 RF
140 | CFmeansForTreatVecRF <-
141 |   function(dataFrame,
142 |            outVarName,
143 |            treatVarName,
144 |            treatVec) {
145 |     CFmeans <- rep(0, length(treatVec))
146 |     
147 |     
148 |     
149 |     for (i in 1:length(treatVec)) {
150 |       CFmeans[i] <-
151 |         mean(predCFoutRF(dataFrame, outVarName, treatVarName, treatVec[i]))
152 |       
153 |     }
154 |     
155 |     return(CFmeans)
156 |     
157 |     
158 |   }
159 | 
160 | # 2 LM
161 | CFmeansForTreatVecLM <-
162 |   function(dataFrame,
163 |            outVarName,
164 |            treatVarName,
165 |            treatVec) {
166 |     CFmeans <- rep(0, length(treatVec))
167 |     
168 |     
169 |     for (i in 1:length(treatVec)) {
170 |       CFmeans[i] <-
171 |         mean(predCFoutLM(dataFrame, outVarName, treatVarName, treatVec[i]))
172 |       
173 |     }
174 |     
175 |     return(CFmeans)
176 |     
177 |     
178 |   }
179 | 
180 | 
181 | CFprobsForTreatVecRF <-
182 |   function(dataFrame,
183 |            outVarName,
184 |            treatVarName,
185 |            treatVec) {
186 |     CFprobs <- rep(0, length(treatVec))
187 |     
188 |     
189 |     for (i in 1:length(treatVec)) {
190 |       CFprobs[i] <-
191 |         mean(predCFprobRF(dataFrame, outVarName, treatVarName, treatVec[i]))
192 |       
193 |     }
194 |     
195 |     return(CFprobs)
196 |     
197 |     
198 |   }
199 | 
200 | 
201 | 
202 | # 1 LM for lasso
203 | # CFmeansForDecileBinsLM <- function(dataFrame, outVarName, treatVarName) {
204 | #   fivePercentQuantiles <- quantile(dataFrame[[treatVarName]], prob = seq(0, 1, length = 21), type = 5, na.rm = TRUE)
205 | #   evenQuantiles <- fivePercentQuantiles[seq(2, 20, by=2)]
206 | #
207 | #   vec <- dataFrame[c(treatVarName)]
208 | #   average <- mean(vec[,1])
209 | #   s = 0
210 | #   for (i in 1:nrow(dataFrame[c(treatVarName)])){
211 | #     temp <- vec[i,]
212 | #     s = s + abs(temp - average);
213 | #   }
214 | #   if(s > 0){
215 | #     return(CFmeansForTreatVecLM(dataFrame, outVarName, treatVarName, evenQuantiles))
216 | #   }else{
217 | #     return(-1)
218 | #   }
219 | # }
220 | 
221 | #1 LM for Lasso
222 | CFmeansForDecileBinsLM <-
223 |   function(dataFrame, outVarName, treatVarName) {
224 |     fivePercentQuantiles <-
225 |       quantile(
226 |         dataFrame[[treatVarName]],
227 |         prob = seq(0, 1, length = 21),
228 |         type = 5,
229 |         na.rm = TRUE
230 |       )
231 |     evenQuantiles <- fivePercentQuantiles[seq(2, 20, by = 2)]
232 |     
233 |     # replace Inf with NA
234 |     dataFrame <-
235 |       do.call(data.frame, lapply(dataFrame, function(x)
236 |         replace(x, is.infinite(x), NA)))
237 |     # remove NaN and NA
238 |     dataFrame <- dataFrame[complete.cases(dataFrame),]
239 |     if ((nrow(dataFrame) == 0) || (mean(dataFrame$Y) == 0)) {
240 |       return (-1)
241 |     }
242 |     else{
243 |       vec <- dataFrame[c(treatVarName)]
244 |       medianValue <- median(vec[, 1])
245 |       count <- 0
246 |       for (i in 1:nrow(dataFrame[c(treatVarName)])) {
247 |         temp <- vec[i,]
248 |         if (temp == medianValue) {
249 |           count <- count + 1
250 |         }
251 |       }
252 |       if (count < nrow(dataFrame[c(treatVarName)]) - 3) {
253 |         return(CFmeansForTreatVecLM(dataFrame, outVarName, treatVarName, evenQuantiles))
254 |       } else{
255 |         return(-1)
256 |       }
257 |     }
258 |   }
259 | 
260 | # 1 LM
261 | CFmeansForDecileBinsLM <-
262 |   function(dataFrame, outVarName, treatVarName) {
263 |     fivePercentQuantiles <-
264 |       quantile(
265 |         dataFrame[[treatVarName]],
266 |         prob = seq(0, 1, length = 21),
267 |         type = 5,
268 |         na.rm = TRUE
269 |       )
270 |     evenQuantiles <- fivePercentQuantiles[seq(2, 20, by = 2)]
271 |     return(CFmeansForTreatVecLM(dataFrame, outVarName, treatVarName, evenQuantiles))
272 |   }
273 | 
274 | #Where I quantile the treatment values
275 | CFprobsForDecileBinsRF <-
276 |   function(dataFrame, outVarName, treatVarName) {
277 |     fivePercentQuantiles <-
278 |       quantile(dataFrame[[treatVarName]],
279 |                prob = seq(0, 1, length = 21),
280 |                type = 5)
281 |     evenQuantiles <- fivePercentQuantiles[seq(2, 20, by = 2)]
282 |     return(CFprobsForTreatVecRF(dataFrame, outVarName, treatVarName, evenQuantiles))
283 |   }
284 | #Find the largest and smallest numbers and return the difference
285 | maxContrast <- function(CFMeanVec) {
286 |   maxCon <- 0
287 |   index1 <- -1
288 |   index2 <- -1
289 |   
290 |   for (i in 1:(length(CFMeanVec) - 1)) {
291 |     for (j in (i + 1):length(CFMeanVec)) {
292 |       if ((CFMeanVec[i] - CFMeanVec[j]) > maxCon) {
293 |         maxCon <- CFMeanVec[i] - CFMeanVec[j]
294 |         index1 <- i
295 |         index2 <- j
296 |       }
297 |       else if ((CFMeanVec[j] - CFMeanVec[i]) > maxCon) {
298 |         maxCon <- CFMeanVec[j] - CFMeanVec[i]
299 |         index1 <- j
300 |         index2 <- i
301 |       }
302 |     }
303 |   }
304 |   
305 |   return(c(maxCon, index1, index2))
306 |   
307 | }
308 | 
309 | normalize <- function(x) {
310 |   # From https://stats.stackexchange.com/questions/70801/how-to-normalize-data-to-0-1-range
311 |   x <- as.matrix(x)
312 |   minAttr = apply(x, 2, min)
313 |   maxAttr = apply(x, 2, max)
314 |   x <- sweep(x, 2, minAttr, FUN = "-")
315 |   x = sweep(x, 2,  maxAttr - minAttr, "/")
316 |   attr(x, 'normalized:min') = minAttr
317 |   attr(x, 'normalized:max') = maxAttr
318 |   return (x)
319 | }
320 | #Take the min and maximum of each outcome prediction (averaged) assign a suspiciousness for the variable
321 | computeSuspiciousness <- function(dataframe) {
322 |   headers <- names(dataframe)
323 |   result <- c(1, 1, 1)
324 |   for (i in 1:length(dataframe)) {
325 |     currentVec <- unlist(dataframe[[i]], use.names = FALSE)
326 |     if (length(currentVec) >= 2) {
327 |       vec <- maxContrast(currentVec)
328 |     } else{
329 |       vec <- c(0, 0, 0)
330 |     }
331 |     result <- data.frame(result, vec)
332 |   }
333 |   result[, 1] <- NULL
334 |   names(result) <- headers
335 |   return(result)
336 | }
337 | 
338 | getTheBiggest <- function(dataframe) {
339 |   # return (names(dataframe)[order(-dataframe[1,])])
340 |   return (dataframe[order(-dataframe[1,])])
341 | }
342 | #cleaning NA's
343 | ditch <- function(x) {
344 |   temp <- as.matrix(x)
345 |   for (i in temp) {
346 |     if (i == "NaN") {
347 |       print(i)
348 |       i <- 0
349 |     }
350 |     if (i == "Inf") {
351 |       print(i)
352 |     }
353 |   }
354 |   y <- as.matrix(temp)
355 |   print(y)
356 |   # ifelse(is.infinite(x), 2147483647, x)
357 | }
358 | # 1 RF
359 | CFmeansForDecileBinsRF <-
360 |   function(dataFrame, outVarName, treatVarName) {
361 |     # replace Inf with NA
362 |     dataFrame <-
363 |       do.call(data.frame, lapply(dataFrame, function(x)
364 |         replace(x, is.infinite(x), NA)))
365 |     # remove NaN and NA
366 |     dataFrame <- dataFrame[complete.cases(dataFrame), ]
367 |     
368 |     if ((nrow(dataFrame) < 1)) {
369 |       return(0)
370 |     }
371 |     #print(treatVarName)
372 |     if (grepl("P[0-9]", treatVarName, perl = TRUE)) {
373 |       #if it is a predicate only make 2 bins for 1 and 0 cases
374 |       
375 |       quantiles <-
376 |         quantile(
377 |           dataFrame[[treatVarName]],
378 |           prob = seq(0, 1, length = 2),
379 |           type = 5,
380 |           na.rm = TRUE
381 |         )
382 |       return(CFmeansForTreatVecRF(dataFrame, outVarName, treatVarName, quantiles))
383 |     } else {
384 |       if (is.finite(dataFrame[[treatVarName]]) &&
385 |           is.numeric(dataFrame[[treatVarName]])) {
386 |         fivePercentQuantiles <-
387 |           quantile(
388 |             dataFrame[[treatVarName]],
389 |             prob = seq(0, 1, length = 21),
390 |             type = 5,
391 |             na.rm = TRUE
392 |           )
393 |         
394 |         # Define the bins
395 |         evenQuantiles <-
396 |           fivePercentQuantiles[seq(2, 20, by = 2)] # 10 bins
397 |         
398 |         
399 |         # evenQuantiles <- fivePercentQuantiles[seq(2, 20, by = 1)] # 19 bins
400 |         return(CFmeansForTreatVecRF(dataFrame, outVarName, treatVarName, evenQuantiles))
401 |       } else{
402 |         if (includeStrings == 1) {
403 |           suppressPackageStartupMessages({
404 |             library(stringdist)
405 |             library(dbscan)
406 |           })
407 |           dftemp <- dataFrame
408 |           distmatrix <-
409 |             stringdistmatrix(as.character(dftemp[[treatVarName]]),
410 |                              as.character(dftemp[[treatVarName]]),
411 |                              method = "dl")
412 |           clustering.dbscan <-
413 |             dbscan::dbscan(distmatrix, eps = 0.30, minPts = 10)
414 |           
415 |           dftemp$clusters <- clustering.dbscan$cluster
416 |           
417 |           dftemp <-
418 |             aggregate(clusters ~ ., data = dftemp, FUN = median)
419 |           
420 |           return(CFmeansForTreatVecRF(dataFrame, outVarName, treatVarName, dftemp[[treatVarName]]))
421 |         }
422 |       }
423 |     }
424 |   }
425 | 
426 | computeESP <- function(S_p_obs, F_p_obs, NumF, dataFrame) {
427 |   S_p <- nrow(subset(dataFrame, Y == 0))
428 |   
429 |   F_p <- nrow(subset(dataFrame, Y == 1))
430 |   
431 |   sensitivity <- log(F_p) / log(NumF)
432 |   
433 |   increase_p <- F_p / (S_p + F_p) - F_p_obs / (S_p_obs + F_p_obs)
434 |   
435 |   importance_p <-
436 |     2 / ((1 / increase_p) + 1 / (log(F_p) / log(NumF)))
437 |   return(importance_p)
438 | }
439 | 
440 | CFmeansForESP <- function(dataFrame, outVarName, treatVarName) {
441 |   NumF <- nrow(subset(dataFrame, Y == 1))
442 |   # print("NumF")
443 |   
444 |   # replace Inf with NA
445 |   dataFrame <-
446 |     do.call(data.frame, lapply(dataFrame, function(x)
447 |       replace(x, is.infinite(x), NA)))
448 |   # remove NaN and NA
449 |   dataFrame <- dataFrame[complete.cases(dataFrame), ]
450 |   
451 |   if (nrow(dataFrame) < 1) {
452 |     return(-1)
453 |   } else{
454 |     if (is.finite(dataFrame[[treatVarName]]) &&
455 |         is.numeric(dataFrame[[treatVarName]])) {
456 |       elastic <- data.frame(importance = c(0, 0, 0, 0, 0, 0, 0, 0, 0))
457 |       
458 |       vec <- dataFrame[c(treatVarName)]
459 |       mu <- mean(vec[, 1])
460 |       tau <- sd(vec[, 1])
461 |       
462 |       S_p_obs <- nrow(subset(dataFrame, Y == 0))
463 |       # print("S_p_obs")
464 |       # print(S_p_obs)
465 |       F_p_obs <- nrow(subset(dataFrame, Y == 1))
466 |       # print("F_p_obs")
467 |       # print(F_p_obs)
468 |       
469 |       elastic[1, 1] <-
470 |         computeESP(S_p_obs,
471 |                    F_p_obs,
472 |                    NumF,
473 |                    subset(dataFrame, eval(as.name(treatVarName)) < mu - 3 * tau))
474 |       elastic[2, 1] <-
475 |         computeESP(S_p_obs,
476 |                    F_p_obs,
477 |                    NumF,
478 |                    subset(
479 |                      dataFrame,
480 |                      eval(as.name(treatVarName)) >= mu - 3 * tau &
481 |                        eval(as.name(treatVarName)) < mu - 2 * tau
482 |                    ))
483 |       elastic[3, 1] <-
484 |         computeESP(S_p_obs,
485 |                    F_p_obs,
486 |                    NumF,
487 |                    subset(
488 |                      dataFrame,
489 |                      eval(as.name(treatVarName)) >= mu - 2 * tau &
490 |                        eval(as.name(treatVarName)) < mu - tau
491 |                    ))
492 |       elastic[4, 1] <-
493 |         computeESP(S_p_obs,
494 |                    F_p_obs,
495 |                    NumF,
496 |                    subset(dataFrame, eval(as.name(treatVarName)) >= mu - tau &
497 |                             eval(as.name(treatVarName)) < mu))
498 |       elastic[5, 1] <-
499 |         computeESP(S_p_obs, F_p_obs, NumF, subset(dataFrame, eval(as.name(treatVarName)) == mu))
500 |       elastic[6, 1] <-
501 |         computeESP(S_p_obs,
502 |                    F_p_obs,
503 |                    NumF,
504 |                    subset(dataFrame, eval(as.name(treatVarName)) > mu &
505 |                             eval(as.name(treatVarName)) <= mu + tau))
506 |       elastic[7, 1] <-
507 |         computeESP(S_p_obs,
508 |                    F_p_obs,
509 |                    NumF,
510 |                    subset(
511 |                      dataFrame,
512 |                      eval(as.name(treatVarName)) > mu + tau &
513 |                        eval(as.name(treatVarName)) <= mu + 2 * tau
514 |                    ))
515 |       elastic[8, 1] <-
516 |         computeESP(S_p_obs,
517 |                    F_p_obs,
518 |                    NumF,
519 |                    subset(
520 |                      dataFrame,
521 |                      eval(as.name(treatVarName)) > mu + 2 * tau &
522 |                        eval(as.name(treatVarName)) <= mu + 3 * tau
523 |                    ))
524 |       elastic[9, 1] <-
525 |         computeESP(S_p_obs,
526 |                    F_p_obs,
527 |                    NumF,
528 |                    subset(dataFrame, eval(as.name(treatVarName)) > mu + 3 * tau))
529 |       
530 |       elastic <- data.frame(elastic[complete.cases(elastic),])
531 |       
532 |       if (nrow(elastic) == 0) {
533 |         maxValue = -Inf
534 |         
535 |         
536 |       } else{
537 |         maxValue <- abs(sort(elastic[, 1])[length(elastic[, 1])])
538 |       }
539 |       
540 |       return (maxValue)
541 |     } else{
542 |       if (includeStrings == 1) {
543 |         return (-1)
544 |       }
545 |     }
546 |   }
547 | }
548 | computeBaah <- function(dataFrame, outVarName, treatVarName) {
549 |   if (includeStrings == 1) {
550 |     suppressPackageStartupMessages({
551 |       library(dplyr)
552 |       library(tidyr)
553 |     })
554 |     #library(tidyverse)
555 |     dataFrame %>% mutate_if(is.numeric, replace_na, 0) %>%
556 |       mutate_if(is.character, replace_na, "0")
557 |     #print(dataFrame)
558 |     dataFrame[-1][!is.na(dataFrame[-1])] <- 1
559 |     dataFrame[-1][is.na(dataFrame[-1])] <- 0
560 |     dataFrame[] <-
561 |       lapply(dataFrame, function(x)
562 |         as.numeric(as.character(x)))
563 |     model <-
564 |       lm(paste(outVarName, " ~ .", sep = ""), data = dataFrame)
565 |     
566 |     return(model$coefficients[2])
567 |   } else{
568 |     if (is.numeric(dataFrame[[treatVarName]])) {
569 |       suppressPackageStartupMessages({
570 |         library(dplyr)
571 |         library(tidyr)
572 |       })
573 |       #library(tidyverse)
574 |       dataFrame %>% mutate_if(is.numeric, replace_na, 0) %>%
575 |         mutate_if(is.character, replace_na, "0")
576 |       #print(dataFrame)
577 |       dataFrame[-1][!is.na(dataFrame[-1])] <- 1
578 |       dataFrame[-1][is.na(dataFrame[-1])] <- 0
579 |       dataFrame[] <-
580 |         lapply(dataFrame, function(x)
581 |           as.numeric(as.character(x)))
582 |       model <-
583 |         lm(paste(outVarName, " ~ .", sep = ""), data = dataFrame)
584 |       
585 |       return(model$coefficients[2])
586 |     }
587 |   }
588 | }
589 | 
590 | is.nan.data.frame <- function(x) {
591 |   do.call(cbind, lapply(x, is.nan))
592 | }
593 | 
594 | is.infinite.data.frame <- function(x) {
595 |   do.call(cbind, lapply(x, is.infinite))
596 | }
597 | 
598 | #newoutput2 <- data.frame(t(newoutput))
599 | #colnames(newoutput2) <- newoutput2[1,]
600 | # =============================
601 | # Start HERE
602 | # input: newoutput, outY
603 | args <- commandArgs(trailingOnly = TRUE)
604 | includeStrings <- as.numeric(args[1])
605 | 
606 | newoutput <-
607 |   read.table(
608 |     "/unival/newoutput.txt",
609 |     sep = "\t",
610 |     quote = "",
611 |     comment.char = "",
612 |     stringsAsFactors = FALSE,
613 |     fill = TRUE
614 |   )
615 | 
616 | 
617 | outY <-
618 |   read.table(
619 |     "/unival/outY.txt",
620 |     quote = "\"",
621 |     comment.char = ""
622 |   )
623 | # =============================
624 | newoutput <- data.frame(newoutput)
625 | rownames(newoutput) <- newoutput[, 1]
626 | newoutput <- newoutput[,-1]
627 | newoutput <- as.data.frame(t(newoutput), stringsAsFactors = FALSE)
628 | newoutput[] <- lapply(newoutput, type.convert, as.is = TRUE)
629 | # Y <- data.frame(t(outY))
630 | # names(Y) <- c("Y")
631 | outY <- data.frame(outY[, 2])
632 | names(outY) <- c("Y")
633 | numfldata <- newoutput
634 | #newoutput <-do.call(data.frame, lapply(newoutput, function(x)replace(x,!is.finite(x), NA)))
635 | fault_binerrs_all <- data.frame(outY, newoutput)
636 | 
637 | # trainCFoutPredRF(TestShimple_fault_binerrs_all, "Y")
638 | CFmeanResult <- genCFmeansRF_fault_binerrs()
639 | # for RF
640 | maxContrastDF <- computeSuspiciousness(CFmeanResult)
641 | result <- getTheBiggest(maxContrastDF)
642 | # for ESP
643 | resultESP <- getTheBiggest(genCFmeansESP_fault_binerrs())
644 | #maxContrastESP<- compute(resultESP)
645 | #resultESP<-getTheBiggest(maxContrastESP)
646 | 
647 | #=========For boxplot===============
648 | # cbind the fault rate
649 | faultRate <- sum(outY) / nrow(outY)
650 | tempResultCF <- result
651 | tempResultESP <- resultESP
652 | 
653 | write.csv(
654 |   tempResultCF,
655 |   file = "/unival/resultUniVal.csv",
656 | )
657 | write.csv(
658 |   tempResultESP,
659 |   file = "/unival/resultESP.csv"
660 | )
661 | # only the first time
662 | #meanResult <- tempResult
663 | 
664 | # other runs (2nd, 3rd, 4th, 5th time)
665 | #meanResult <- rbind(meanResult, tempResult)
666 | 
667 | # after 5 runs
668 | # meanResult <- rbind(meanResult, colMeans(meanResult))
669 | # meanResult <- meanResult[order(meanResult[6,], decreasing = T)]
670 | 
671 | # output to a excel
672 | #write.csv(Result, file = "/result.csv")
673 | 
674 | #=========================
675 | 
676 | # rbind with each result, run FOR 10 TIMES
677 | #meanResult <- rbind    (meanResult, result[1,])
678 | # remove wrong lines
679 | # meanResult<- meanResult[-c(2),]
680 | # make a copy of meanResult
681 | # meanResultCopy <- meanResult
682 | 
683 | # sort by the mean Y
684 | #meanResult <- rbind(meanResult, colMeans(meanResult))
685 | #meanResult <- meanResult[order(meanResult[6,], decreasing = T)]
686 | # resultForPlot <- resultForPlot[-nrow(resultForPlot),]
687 | # only cares about the top 20 variable in the rank
688 | # resultForPlot <- resultForPlot[,1:20]
689 | # boxplot(resultForPlot, las = 2)
690 | 


--------------------------------------------------------------------------------
/unival-docker/src/StructuredDataCollector.java:
--------------------------------------------------------------------------------
  1 | import java.io.*;
  2 | import java.util.*;
  3 | import java.util.regex.Pattern;
  4 | 
  5 | public class StructuredDataCollector {
  6 |     public static void structureData(String filePath, HashMap<String, HashSet<String>> causalMap,
  7 |             Set<String> usedVariables) {
  8 |         // 7 Columns of data processed from output file - output.txt is the default
  9 |         // Class, method, line, scope, variable, version, value
 10 |         BufferedReader reader;
 11 |         HashMap<String, ArrayList<String>> variableVersionValueArrayMap = new HashMap<>();
 12 |         try {
 13 |             BufferedWriter testWrt = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("outY.txt")));
 14 |             BufferedWriter wrtTruth = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("truth.txt")));
 15 |             BufferedWriter wrtDiff = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("diff.txt")));
 16 |             // Change name of data file accordingly
 17 |             reader = new BufferedReader(new FileReader(filePath));
 18 |             String line = reader.readLine();
 19 |             HashMap<String, String> variableVersionValueMap = new HashMap<>();
 20 | 
 21 |             int numExecutions = 0;
 22 |             int index = 1;
 23 |             String failIndicator = "";
 24 |             while ((line = reader.readLine()) != null) {
 25 | 
 26 |                 if (line.contains("*** new execution ***")) {
 27 | 
 28 |                     failIndicator = line.split(",")[2];
 29 |                     testWrt.append(index + "\t" + failIndicator);
 30 |                     wrtTruth.write("0");
 31 |                     wrtTruth.write('\n');
 32 |                     if (failIndicator.equals("1")) {
 33 |                         wrtDiff.write("1");
 34 |                     } else {
 35 |                         wrtDiff.write("0");
 36 |                     }
 37 |                     wrtDiff.write('\n');
 38 |                     testWrt.append('\n');
 39 |                     index++;
 40 |                     testWrt.flush();
 41 |                     HashMap<String, Boolean> variableVersionValueArrayChecklist = new HashMap<>();
 42 | 
 43 |                     for (String variable : variableVersionValueArrayMap.keySet()) {
 44 |                         variableVersionValueArrayChecklist.put(variable, false);
 45 |                     }
 46 |                     for (String variable : variableVersionValueMap.keySet()) {
 47 |                         if (!variableVersionValueArrayMap.containsKey(variable)) {
 48 |                             variableVersionValueArrayMap.put(variable, new ArrayList<String>());
 49 |                             for (int i = 0; i < numExecutions; i++) {
 50 | 
 51 |                                 variableVersionValueArrayMap.get(variable).add("NA");
 52 | 
 53 |                             }
 54 |                             variableVersionValueArrayMap.get(variable).add(variableVersionValueMap.get(variable));
 55 | 
 56 |                             variableVersionValueArrayChecklist.put(variable, true);
 57 |                         } else {
 58 | 
 59 |                             variableVersionValueArrayMap.get(variable).add(variableVersionValueMap.get(variable));
 60 |                             variableVersionValueArrayChecklist.put(variable, true);
 61 |                         }
 62 |                     }
 63 | 
 64 |                     for (String variable : variableVersionValueArrayChecklist.keySet()) {
 65 |                         if (!variableVersionValueArrayChecklist.get(variable)) {
 66 | 
 67 |                             variableVersionValueArrayMap.get(variable).add("NA");
 68 |                         }
 69 |                     }
 70 |                     variableVersionValueMap.clear();
 71 |                     line = reader.readLine();
 72 |                     numExecutions++;
 73 |                     continue;
 74 |                 }
 75 |                 String[] row = line.split(",");
 76 |                 int len = row.length;
 77 |                 // Where the magic happens with each row containing information is decomposed
 78 |                 if (len == 6) {
 79 |                     // String className = row[0];
 80 |                     // String methodName = row[1];
 81 |                     // String lineNumber = row[2];
 82 |                     // String scope = row[3];
 83 |                     String variable = row[3];
 84 |                     String version = row[4];
 85 |                     Double value = 0.0;
 86 |                     String strValue = "";
 87 |                     boolean isnum = false;
 88 | 
 89 |                     if (row[len - 1] != null) {
 90 |                         row[len - 1] = row[len - 1].trim();
 91 |                         // if (row[len - 1].contains("@") || row[len - 1].contains("java") || row[len -
 92 |                         // 1].contains("org") || row[len - 1].contains(":") || row[len -
 93 |                         // 1].contains("[")|| row[len - 1].contains("\n")||row[len - 1].contains("\t")){
 94 |                         if (row[len - 1].contains("\n") || row[len - 1].contains("\t")) {
 95 |                             row[len - 1] = row[len - 1].replace("\n", "");
 96 |                             row[len - 1] = row[len - 1].replace("\t", "");
 97 |                         } else if (row[len - 1].equals("true")) {
 98 |                             value = 1.0;
 99 |                             isnum = true;
100 |                         } else if (row[len - 1].equals("false")) {
101 |                             value = 0.0;
102 |                             isnum = true;
103 |                         } else if (row[len - 1].equalsIgnoreCase("null")) {
104 |                             value = Double.POSITIVE_INFINITY;
105 |                             isnum = true;
106 |                         } else if (row[len - 1].contains("/")) {
107 |                             String[] fract = row[len - 1].split("/");
108 |                             if (fract.length == 2 && isNumeric(fract[0]) && isNumeric(fract[1])) {
109 | 
110 |                                 double num = Double.valueOf(fract[0]);
111 |                                 double den = Double.valueOf(fract[1]);
112 |                                 value = num / den;
113 |                                 isnum = true;
114 |                             } else {
115 |                                 strValue = row[len - 1];
116 |                             }
117 |                         } else {
118 |                             if (isNumeric(row[len - 1])) {
119 |                                 value = Double.valueOf(row[len - 1]);
120 |                                 isnum = true;
121 |                             } else {
122 |                                 strValue = row[len - 1];
123 |                             }
124 |                         }
125 |                         if (variable.startsWith("_"))
126 |                             variable = "UNDERSCORE" + variable;
127 | 
128 |                         if (isnum) {
129 |                             variableVersionValueMap.put(variable + "_" + version, Double.toString(value));
130 |                         } else {
131 |                             variableVersionValueMap.put(variable + "_" + version, strValue);
132 |                         }
133 | 
134 |                     }
135 |                 } else {
136 |                     continue;
137 |                 }
138 |             }
139 |             reader.close();
140 | 
141 |             BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("newoutput.txt")));
142 |             BufferedWriter writer2 = new BufferedWriter(
143 |                     new OutputStreamWriter(new FileOutputStream("newoutput.txt.full")));
144 | 
145 |             for (String s : variableVersionValueArrayMap.keySet()) {
146 |                 ArrayList<String> list = variableVersionValueArrayMap.get(s);
147 | 
148 |                 if (Pattern.matches("^[a-zA-Z_]\\w*$", s)) {
149 |                     writer.write(s);
150 |                     for (int i = 0; i < list.size(); i++) {
151 |                         try {
152 |                             if (isNumeric(list.get(i))) {
153 |                                 writer.write("\t" + Double.parseDouble(list.get(i)));
154 |                             } else {
155 |                                 writer.write("\t" + list.get(i));
156 |                             }
157 |                         } catch (NumberFormatException e) {
158 |                             writer.write("\tNA");
159 |                         }
160 |                     }
161 |                     writer.write("\n");
162 |                     writer.flush();
163 |                 }
164 |                 writer2.write(s);
165 |                 for (int i = 0; i < list.size(); i++) {
166 |                     try {
167 |                         if (isNumeric(list.get(i))) {
168 |                             writer2.write("\t" + Double.parseDouble(list.get(i)));
169 |                         } else {
170 |                             writer2.write("\t" + list.get(i));
171 |                         }
172 |                     } catch (NumberFormatException e) {
173 |                         writer2.write("\tNA");
174 |                     }
175 |                 }
176 |                 writer2.write("\n");
177 |                 writer2.flush();
178 | 
179 |             }
180 | 
181 |             writer.close();
182 |             writer2.close();
183 |             testWrt.close();
184 |             wrtTruth.close();
185 |             wrtDiff.close();
186 |         } catch (IOException e) {
187 |             e.printStackTrace();
188 |         }
189 |         try {
190 |             genRForCFmeansRF("RforCFmeansRF.R", "fault_binerrs_all", "fault_binerrs", "Y", causalMap,
191 |                     variableVersionValueArrayMap.keySet(), usedVariables);
192 |         } catch (IOException e) {
193 |             e.printStackTrace();
194 |         }
195 |     }
196 | 
197 |     public static boolean isDigit(String str) {
198 |         return str.matches("-?\\d+(\\.\\d+)?"); // match a number with optional '-' and decimal.
199 |     }
200 | 
201 |     public static boolean isNumeric(String strNum) {
202 |         if (strNum == null) {
203 |             return false;
204 |         }
205 |         try {
206 |             double d = Double.parseDouble(strNum);
207 |         } catch (NumberFormatException nfe) {
208 |             return false;
209 |         }
210 |         return true;
211 |     }
212 | 
213 |     // Method that generates the R function that will be used by the Random forest
214 |     // (RFC.R) script
215 |     private static void genRForCFmeansRF(String RFileName, String varFrameName, String prefix, String outName,
216 |             HashMap<String, HashSet<String>> covariant, Set<String> usedVariables, Set<String> faultCandidates)
217 |             throws IOException {
218 | 
219 |         OutputStream out = new FileOutputStream(RFileName);
220 |         BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
221 |         BufferedWriter testInf = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("info.txt")));
222 |         int variableCounter = 1;
223 | 
224 |         writer.write("genCFmeansRF_" + prefix + " <- function() {\n\n");
225 |         // for RF
226 |         writer.write("results <- list()\n\n");
227 |         writer.write("varnames <- list()\n\n");
228 |         // for esp
229 |         // writer.write("results <- data.frame(row.names = \"mean\")\n\n");
230 | 
231 |         for (String t : covariant.keySet()) {
232 |             System.out.println(t);
233 |             if (!usedVariables.contains(t))// ||!faultCandidates.contains(t)) //Change this accordingly if restrictions
234 |                                            // on which variables are considered exist
235 |                 continue;
236 |             if (t.startsWith("_"))
237 |                 t = "UNDERSCORE" + t;
238 |             // for (Value t : treatNames){
239 |             String vfn = varFrameName;
240 |             // for confounder
241 |             String tfn = prefix + "_" + t + "_treat_df";
242 |             // for no confounder
243 |             String tfn_nocnfd = prefix + "_" + t + "_treat_nocnfd_df";
244 | 
245 |             // // for tfn
246 |             writer.write(
247 |                     tfn + " <- data.frame(" + outName + "=" + vfn + "$" + outName + ", " + t + "=" + vfn + "$" + t);
248 |             HashSet<String> set = covariant.get(t);
249 |             for (String c : set) {
250 |                 if (!usedVariables.contains(c))
251 |                     continue;
252 |                 if (c.startsWith("_"))
253 |                     c = "UNDERSCORE" + c;
254 |                 writer.write(", " + c + "=" + vfn + "$" + c);
255 |             }
256 | 
257 |             // for tfn_nocnfd
258 |             // writer.write(tfn_nocnfd + " <- data.frame(" + outName + "=" + vfn + "$" +
259 |             // outName + ", " + t + "=" + vfn + "$" + t);
260 | 
261 |             writer.write(", stringsAsFactors = FALSE)\n");
262 | 
263 |             // to remove NA
264 |             // writer.write(tfn + " <- " + tfn + "[complete.cases(" + tfn + "),]" + '\n');
265 | 
266 |             // Only treatement, no confounder (ESP)
267 |             // writer.write("results[[\"" + t + "\"]] <- CFmeansForESP(" + tfn_nocnfd + ",
268 |             // \"" + outName + "\", \"" + t + "\"");
269 |             writer.write("numFLOut <- data.frame(" + t + "=" + "numfldata" + "$" + t);
270 |             for (String c : set) {
271 |                 if (!usedVariables.contains(c))
272 |                     continue;
273 |                 if (c.startsWith("_"))
274 |                     c = "UNDERSCORE" + c;
275 |                 writer.write(", " + c + "=" + "numfldata" + "$" + c);
276 |             }
277 |             if (set.isEmpty()) {
278 |                 writer.write(", " + t + "=" + "numfldata" + "$" + t);
279 |             }
280 |             writer.write(")\n");
281 |             // For random forest
282 |             writer.write("id <- rownames(numFLOut)\n");
283 |             writer.write("numFLOut<-cbind(id,numFLOut)\n");
284 |             writer.write("write.table(numFLOut,file=\"./NUMFL/" + variableCounter + ".txt\""
285 |                     + ",quote = F,row.names = F, col.names=T)\n");
286 | 
287 |             writer
288 |                     .write("results[[\"" + t + "\"]] <- CFmeansForDecileBinsRF(" + tfn + ", \"" + outName + "\", \"" + t
289 |                             + "\"");
290 | 
291 |             writer.write(")\n");
292 |             writer
293 |                     .write("varnames[[" + variableCounter + "]] <-  \"" + t + "\"");
294 |             writer.write("\n\n");
295 | 
296 |             variableCounter++;
297 |         }
298 |         // write info for numfl and coverage based methods
299 |         testInf.write(variableCounter + "\n");
300 |         Random rand = new Random();
301 |         int dummy_version = rand.nextInt(variableCounter);
302 |         testInf.write(dummy_version + "\n");
303 |         testInf.close();
304 |         writer.write(
305 |                 "varframe <- data.frame(matrix(unlist(varnames), nrow = length(varnames), byrow=T),stringsAsFactors=FALSE)\n");
306 |         writer.write("names(varframe)<- \" Variables \"\n");
307 |         writer.write("ID<-rownames(varframe)\n");
308 |         writer.write("varframe<-cbind(ID,varframe)\n");
309 |         writer.write("write.csv(varframe, file=\"./NUMFL/numflvariables.csv\",row.names = F)\n\n");
310 |         writer.write("return(results)\n\n");
311 |         writer.write("}\n");
312 |         writer.flush();
313 | 
314 |         writer.write("genCFmeansESP_" + prefix + " <- function() {\n\n");
315 |         // for RF
316 |         writer.write("results <- data.frame(row.names=seq(1, 10))\n\n");
317 |         writer.write("Baah2010 <- data.frame(row.names=\"Baah2010\")\n\n");
318 |         // for esp
319 |         // writer.write("results <- data.frame(row.names = \"mean\")\n\n");
320 | 
321 |         for (String t : covariant.keySet()) {
322 |             System.out.println(t);
323 |             if (!usedVariables.contains(t))
324 |                 continue;
325 |             if (t.startsWith("_"))
326 |                 t = "UNDERSCORE" + t;
327 |             // for (Value t : treatNames){
328 |             String vfn = varFrameName;
329 |             // for confounder
330 |             String tfn = prefix + "_" + t + "_treat_df";
331 |             // for no confounder
332 |             String tfn_nocnfd = prefix + "_" + t + "_treat_nocnfd_df";
333 | 
334 |             // // for tfn
335 |             writer.write(
336 |                     tfn + " <- data.frame(" + outName + "=" + vfn + "$" + outName + ", " + t + "=" + vfn + "$" + t);
337 |             HashSet<String> set = covariant.get(t);
338 |             for (String c : set) {
339 |                 if (!usedVariables.contains(c))
340 |                     continue;
341 |                 if (c.startsWith("_"))
342 |                     c = "UNDERSCORE" + c;
343 |                 writer.write(", " + c + "=" + vfn + "$" + c);
344 |             }
345 | 
346 |             // for tfn_nocnfd
347 |             // writer.write(tfn_nocnfd + " <- data.frame(" + outName + "=" + vfn + "$" +
348 |             // outName + ", " + t + "=" + vfn + "$" + t);
349 | 
350 |             writer.write(", stringsAsFactors = FALSE)\n");
351 | 
352 |             // to remove NA
353 |             // writer.write(tfn + " <- " + tfn + "[complete.cases(" + tfn + "),]" + '\n');
354 | 
355 |             // Only treatement, no confounder (ESP)
356 |             // writer.write("results[[\"" + t + "\"]] <- CFmeansForESP(" + tfn_nocnfd + ",
357 |             // \"" + outName + "\", \"" + t + "\"");
358 | 
359 |             // For random forest
360 |             writer
361 |                     .write("results[[\"" + t + "\"]] <- CFmeansForESP(" + tfn + ", \"" + outName + "\", \"" + t + "\"");
362 |             writer.write(")\n");
363 | 
364 |             writer
365 |                     .write("Baah2010[[\"" + t + "\"]] <- computeBaah(" + tfn + ", \"" + outName + "\", \"" + t + "\"");
366 |             writer.write(")\n\n");
367 |             // For LM and LASSO
368 |             // writer.write("results[[\"" + t + "\"]] <- CFmeansForDecileBinsLM(" + tfn + ",
369 |             // \"" + outName + "\", \"" + t + "\"");
370 | 
371 |         }
372 |         writer.write(
373 |                 "write.csv(getTheBiggest(Baah2010),file = \"/unival/resultBaah2010.csv\")\n\n");
374 | 
375 |         writer.write("return(results)\n\n");
376 |         writer.write("}\n");
377 |         writer.flush();
378 |         writer.close();
379 |     }
380 | }
381 | 


--------------------------------------------------------------------------------