├── .gitignore ├── LICENSE ├── README.md ├── db-scripts ├── clear-database.sh ├── init-joern.sh ├── start-joern.sh └── stop-joern.sh ├── instr ├── _run.sh ├── get_location.py ├── glibc_function_names.txt ├── if_inspection.py ├── instrumentation.py └── joern_con.py ├── joern ├── build.xml └── src │ ├── ddg │ └── DDGCreator.java │ ├── misc │ └── HashMapOfSets.java │ ├── neo4j │ ├── readWriteDB │ │ └── Neo4JDBInterface.java │ └── traversals │ │ └── readWriteDB │ │ └── Traversals.java │ └── tools │ ├── argumentTainter │ ├── ArgumentTainter.java │ ├── ArgumentTainterMain.java │ ├── CallsForFunction.java │ ├── CommandLineInterface.java │ ├── DDGPatcher.java │ ├── DefUseCFGPatcher.java │ └── FunctionPatcher.java │ └── data_flow │ ├── Arg_in_call_return_type.java │ ├── Best_effort_topological_sort.java │ ├── Circle_exception.java │ ├── Data_transfer.java │ ├── Def_tree.java │ ├── Find_all_function_pointers.java │ ├── Find_control_flow_paths.java │ ├── Find_data_paths.java │ ├── Function_sets_parameter.java │ ├── Get_call_graph.java │ ├── Init_glibc_data_trans.java │ ├── Is_parameter_return_type.java │ ├── Joern_db.java │ ├── Location.java │ ├── Pair.java │ ├── Param_data_source.java │ ├── Pickle.java │ ├── Pipeline.java │ ├── Remove_duplicated_edges.java │ ├── Replace_member_edges.java │ ├── Retrace_arg_till_source.java │ ├── Sensitive_sinks.java │ ├── Taint_all.java │ ├── Timecap_queue.java │ ├── Topological_sort.java │ └── User_controlled_sources.java └── preprocess ├── gcc_cmd_transformer.py └── preprocess_it.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | ._.DS_Store 3 | **/.DS_Store 4 | **/._.DS_Store 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jannik Pewny 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EvilCoder 2 | 3 | This code accompanies the paper 4 | *EvilCoder: Automated Bug Insertion* 5 | (http://syssec.rub.de/research/publications/evilcoder/) 6 | published as ACSAC'16. 7 | 8 | In a nutshell, the task is to insert a (likely exploitable) bug into the 9 | source code of a given project, 10 | in order to improve bug-finding software by supplying it with examples of 11 | how vulnerable code could look like. 12 | Please have a look at the paper to get an overview for further details. 13 | 14 | Most of the Java-code is actually a port from an earlier implementation in Python, 15 | which is why it may look more like Python than Java and is 16 | "integrated" with a flat-hierarchy. 17 | Also, since it is not runtime-critical, 18 | the last stage (instrumentation) was not ported and is therefore still in Python. 19 | 20 | 21 | ## Basics 22 | We used a Debian Jessie for our setup, but any decent Linux should work. 23 | The code depends on JOERN v0.2.5 (http://www.mlsec.org/joern/), 24 | so please follow its excellent install guide. 25 | 26 | ## Compilation 27 | Please copy the content of the */joern/*/ folder into your joern-installation. 28 | In particular, this will overwrite some files, 29 | so please make sure to save a version of your joern-directory. 30 | In particular, it should change the *build.xml*-file, which contains the 31 | ant-command to compile our added java-code, and modified sourcefiles. 32 | For the most part, the changes should be marked. 33 | Our code is in the folder *src/tools/data_flow/*. 34 | Please note that the original argumentTainter-class was written by the authors 35 | of Joern and only modified to accept a file for batch-processing. 36 | 37 | In our setup, the following libraries have to be supplied in */joern/libs/*: 38 | * antlr4-annotations-4.2.1-SNAPSHOT.jar 39 | * antlr4-runtime-4.2.1-SNAPSHOT.jar 40 | * blueprints-core-2.5.0.jar 41 | * blueprints-neo4j2-graph-2.5.0.jar 42 | * commons-cli-1.2.jar 43 | * commons-configuration-1.6.jar 44 | * concurrentlinkedhashmap-lru-1.3.1.jar 45 | * geronimo-jta_1.1_spec-1.1.1.jar 46 | * gremlin-groovy-2.5.0.jar 47 | * gremlin-java-2.5.0.jar 48 | * groovy-1.8.9.jar 49 | * hamcrest-core-1.2.jar 50 | * junit.jar 51 | * lucene-core-3.6.2.jar 52 | * neo4j-cypher-2.1.6.jar 53 | * neo4j-cypher-commons-2.1.6.jar 54 | * neo4j-cypher-compiler-1.9-2.0.3.jar 55 | * neo4j-cypher-compiler-2.0-2.0.3.jar 56 | * neo4j-cypher-compiler-2.1-2.1.6.jar 57 | * neo4j-kernel-2.1.5.jar 58 | * neo4j-kernel-2.1.6.jar 59 | * neo4j-lucene-index-2.1.5.jar 60 | * neo4j-lucene-index-2.1.6.jar 61 | * neo4j-primitive-collections-2.1.5.jar 62 | * neo4j-primitive-collections-2.1.6.jar 63 | * parboiled-core-1.1.6.jar 64 | * parboiled-scala_2.10-1.1.6.jar 65 | * pipes-2.5.0.jar 66 | * scala-library-2.10.4.jar 67 | 68 | After that, you should be able to compile the tools-folder. 69 | In particular, you should find the following jar-files in */joern/bin/* afterwards: 70 | * init_glibc_data_trans.jar 71 | * taint_all.jar 72 | * argumentTainter.jar 73 | * replace_member_edges.jar 74 | * remove_duplicated_edges.jar 75 | * find_data_paths.jar 76 | 77 | An example-script for how to run the sequence of tools is in *instr/_run.sh*. 78 | For clarity, the steps are described here, as well. 79 | 80 | 81 | 82 | 83 | ## Input 84 | As input, we expect the folder of a preprocessed C-project. 85 | The scripts supplied in */preprocess/* should help to preprocess the files, 86 | just like it would happen during the compilation of the project. 87 | Some hand-tuning may be necessary, however, since compilation itself is often 88 | non-trivial. 89 | 90 | 91 | ## Step 0. Initialiaze data-transfer for glibc 92 | *Init_glibc_data_trans.java* 93 | holds information on the data-transfer happening in well-known glibc-functions. 94 | You have to run it to get a Java-serialized version of this dictionary used in 95 | later stages. 96 | 97 | 98 | ## Step 1. Import the code-base into joern 99 | Run */db-scripts/init-joern.sh * 100 | See joern-manual for details. 101 | 102 | 103 | ## Step 2. Determine, which arguments influence one another 104 | This is done by *taint_all.jar*. 105 | Various data is generated at this point. 106 | This includes a serialized version of the call-graph, 107 | candidates for function-pointers 108 | and the batch-file of modified function-arguments (generated_taint_all.txt). 109 | 110 | 111 | ## Step 3. Transfer this knowledge into the database 112 | This step makes use of the (modified) argumentTainter.jar. 113 | It marks function-arguments as modified on-call and adjusts the graph accordingly. 114 | 115 | 116 | ## (Step 3.5. Tidy up) 117 | This step is not described in the paper. However, we noted some characteristic 118 | errors in the graph after the aforementioned steps, so we run two small tidy-up 119 | scripts (replace_member_edges and remove_duplicated_edges). 120 | 121 | 122 | ## Step 4. Find data-paths between user-controlled sources and sensitive sinks 123 | This step tries to find data-paths from a sensitive sink, 124 | meaning a certain function-argument of a certain sensitive function 125 | (defined in Sensitive_sinks.java) 126 | to a user-controlled source (defined in User_controlled_sources.java). 127 | 128 | Should a data-path (an array of node-IDs) be found, 129 | it is accompanied by an array of the variables, 130 | which "overarch" these nodes (aka, the name of the variable in the data-transfer 131 | for this step). 132 | 133 | Next, the control-flows between the individual data-flow-nodes are found and printed, as well. 134 | 135 | ## Step 5. Instrumentation 136 | As mentioned, this step was not reimplemented in Java and is therefore not 137 | coupled tightly. 138 | You can pick a random data-flow path and then a random control-flow 139 | path inbetween those nodes. 140 | Then, you invoke the Python-script *instrument.py* with pairs of a control-flow node-ID 141 | and the corresponding overarching variable. 142 | 143 | The script will first find all checks (i.e., if-conditions) 144 | and then filter the relevant ones, i.e. for those, 145 | which depend on the current overarched variable. 146 | 147 | Next, it has to be determined, if the if- or the else-branch has to be executed 148 | in order to allow a malicious data-flow. 149 | A simple heuristic, based on which branch terminates the execution, is used here. 150 | 151 | At this point, a possible instrumentation can be chosen. 152 | Lacking a sophisticated code-checking and -manipulation DSL, 153 | we used ad-hoc modification using Python. 154 | 155 | An individual instrumenation is implemented as a Subclass of *Instrumentation*. 156 | First and foremost, it has to analyze the check at hand, 157 | to determine if it can be applied (*possible()*). 158 | If so, it can be chosen at random to perform the instrumentation (*instrument*). 159 | Should you implement your own instrumentations, 160 | don't forget to add it to *get_all_instrumentations()*. 161 | 162 | 163 | # Contact 164 | jannik DOT pewny AT rub DOT de) 165 | 166 | -------------------------------------------------------------------------------- /db-scripts/clear-database.sh: -------------------------------------------------------------------------------- 1 | sudo rm -r .joernIndex/ 2 | -------------------------------------------------------------------------------- /db-scripts/init-joern.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | JOERN_HOME=~/joern 3 | NEO4J_HOME=~/neo4j-community-2.1.6 4 | 5 | 6 | if [ "$#" -ne 1 ]; then 7 | echo "Usage: " 8 | exit 9 | fi 10 | 11 | pushd . 12 | cd /home/user/bugdooring_share/joern/ 13 | 14 | #echo "Stopping neo4j server" 15 | sudo $NEO4J_HOME/bin/neo4j stop >>neo4j.log 2>&1 16 | 17 | 18 | rm -rf /home/user/bugdooring_share/joern/.joernIndex 19 | java -jar $JOERN_HOME/bin/joern.jar "$1" | tee joern.log 20 | 21 | sudo $NEO4J_HOME/bin/neo4j start >>neo4j.log 2>&1 22 | 23 | popd 24 | 25 | -------------------------------------------------------------------------------- /db-scripts/start-joern.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | JOERN_HOME=~/joern 3 | NEO4J_HOME=~/neo4j-community-2.1.6 4 | 5 | pushd . 6 | cd /home/user/bugdooring_share/joern/ 7 | 8 | sudo $NEO4J_HOME/bin/neo4j start >>neo4j.log 2>&1 9 | 10 | popd 11 | 12 | -------------------------------------------------------------------------------- /db-scripts/stop-joern.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | JOERN_HOME=~/joern 3 | NEO4J_HOME=~/neo4j-community-2.1.6 4 | 5 | pushd . 6 | cd /home/user/bugdooring_share/joern/ 7 | 8 | #echo "Stopping neo4j server" 9 | sudo $NEO4J_HOME/bin/neo4j stop >>neo4j.log 2>&1 10 | 11 | popd 12 | 13 | -------------------------------------------------------------------------------- /instr/_run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | python init_glibc_data_trans.dat 4 | time java -jar /home/user/joern/bin/init_glibc_data_trans.jar 5 | 6 | time ./scripts/init-joern.sh /home/user/Desktop/data_flow/example_code/busybox/ 7 | 8 | time java -Xmx2048m -jar /home/user/joern/bin/taint_all.jar > taint_ergo.txt 9 | 10 | 11 | sudo /home/user/neo4j-community-2.1.6/bin/neo4j stop 12 | 13 | time java -Xmx2048m -jar /home/user/joern/bin/argumentTainter.jar ./generated_taint_all.txt /home/user/joern/.joernIndex/ > java_taint_ergo.txt 14 | sudo /home/user/neo4j-community-2.1.6/bin/neo4j start 15 | 16 | 17 | time java -Xmx2048m -jar /home/user/joern/bin/replace_member_edges.jar 18 | time java -Xmx2048m -jar /home/user/joern/bin/remove_duplicated_edges.jar 19 | 20 | 21 | time java -Xmx2048m -jar /home/user/joern/bin/find_data_paths.java 2> exceptions.txt 1> ergo.txt 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /instr/get_location.py: -------------------------------------------------------------------------------- 1 | from instrumentation import * 2 | 3 | def get_file_path(joern_db, node): 4 | query = """g.v(%d).in().filter{it.type=='File'}.filepath""" % node['functionId'] 5 | file_path = joern_db.runGremlinQuery(query)[0] 6 | if(len(file_path) == 0): 7 | raise Exception("No file path found") 8 | return file_path 9 | 10 | 11 | def get_file_path_from_id(joern_db, node_id): 12 | query = """g.v(%s)""" % node_id 13 | node = joern_db.runGremlinQuery(query) 14 | return get_file_path(joern_db, node) 15 | 16 | 17 | 18 | def get_general_parent(joern_db, node_id): 19 | #AST_EDGE = 'IS_AST_PARENT' 20 | #CFG_EDGE = 'FLOWS_TO' 21 | #USES_EDGE = 'USE' 22 | #DEFINES_EDGE = 'DEF' 23 | #DATA_FLOW_EDGE = 'REACHES' 24 | #FUNCTION_TO_AST_EDGE = 'IS_FUNCTION_OF_AST' 25 | #CFG_TO_FUNCION_EDGE = 'IS_FUNCTION_OF_CFG' // JANNIK 26 | 27 | parent = joern_db.runGremlinQuery("g.v(%s).in(AST_EDGE)" % node_id) 28 | if(parent != []): 29 | # print "ast_parent:", parent 30 | return parent[0]._id 31 | 32 | parent = joern_db.runGremlinQuery("g.v(%s).in(FUNCTION_TO_AST_EDGE)" % node_id) 33 | if(parent != []): 34 | # print "func_ast_parents:", parent 35 | return parent[0]._id 36 | 37 | parent = joern_db.runGremlinQuery("g.v(%s).in(CFG_TO_FUNCTION_EDGE)" % node_id) 38 | if(parent != []): 39 | # print "func_cfg_parents:", parent 40 | return parent[0]._id 41 | raise Exception("Cannot find general parent for " + str(node_id)) 42 | 43 | 44 | def get_function_name_for_node_id(joern_db, node_id): 45 | cur_id = node_id 46 | func_id = None 47 | while(1): 48 | func_id = joern_db.runGremlinQuery("g.v(%s).functionId" % cur_id) 49 | if(func_id != None): 50 | break 51 | cur_id = get_general_parent(joern_db, cur_id) 52 | # print "cur_id:", cur_id 53 | func_name = joern_db.runGremlinQuery("g.v(%s).name" % func_id) 54 | return func_name 55 | 56 | 57 | def get_location_for_node_id(joern_db, node_id): 58 | cur_id = node_id 59 | location = None 60 | while(1): 61 | location = joern_db.runGremlinQuery("g.v(%s).location" % cur_id) 62 | if(location != None): 63 | break 64 | cur_id = get_general_parent(joern_db, cur_id) 65 | # print "cur_id:", cur_id 66 | return location 67 | 68 | 69 | read_files = dict() 70 | def get_location_tuple(joern_db, node_id): 71 | global read_files 72 | node = joern_db.runGremlinQuery("g.v(%s)" % node_id) 73 | if(node["type"] in ["CFGExitNode", "Symbol"]): 74 | raise Exception("Cannot find location for CFGExitNode or Symbol") 75 | 76 | file_name = get_file_path_from_id(joern_db, node_id) 77 | func_name = get_function_name_for_node_id(joern_db, node_id) 78 | location = get_location_for_node_id(joern_db, node_id) 79 | line_no = int(location.split(":")[0]) 80 | 81 | if(not file_name in read_files): 82 | f = open(file_name, "r") 83 | data = f.read() 84 | f.close() 85 | read_files[file_name] = data 86 | # extent = read_files[file_name][line_no-1] 87 | loc_splitters = location.split(":") 88 | extent = get_source_range(read_files[file_name], int(loc_splitters[2]), int(loc_splitters[3])) 89 | return (file_name, func_name, line_no, extent) 90 | 91 | 92 | 93 | 94 | 95 | def test_get_location_tuple(joern_db): 96 | # Testing get_location_tuple for libpng, with function "png_do_write_transformations" 97 | all_nodes_of_func = joern_db.runGremlinQuery("""g.V.filter{it.functionId == %s}""" % "130007") 98 | for a in all_nodes_of_func: 99 | if(a["type"] in ["CFGExitNode", "Symbol"]): 100 | continue 101 | print a._id 102 | print get_location_tuple(joern_db, a._id) 103 | sys.exit(1) 104 | 105 | -------------------------------------------------------------------------------- /instr/glibc_function_names.txt: -------------------------------------------------------------------------------- 1 | abort 2 | abs 3 | labs 4 | llabs 5 | imaxabs 6 | fabs 7 | fabsf 8 | fabsl 9 | cabs 10 | cabsf 11 | cabsl 12 | accept 13 | addseverity 14 | sigaction 15 | ngettext 16 | dngettext 17 | dcngettext 18 | aligned_alloc 19 | memalign 20 | posix_memalign 21 | valloc 22 | calloc 23 | obstack_alloc 24 | obstack_copy 25 | obstack_copy0 26 | getpt 27 | grantpt 28 | unlockpt 29 | ptsname 30 | ptsname_r 31 | argp_help 32 | argp_usage 33 | argp_error 34 | argp_failure 35 | argp_state_help 36 | argp_parse 37 | argz_create 38 | argz_create_sep 39 | argz_count 40 | argz_extract 41 | argz_stringify 42 | argz_add 43 | argz_add_sep 44 | argz_append 45 | argz_delete 46 | argz_insert 47 | argz_next 48 | argz_replace 49 | lfind 50 | lsearch 51 | bsearch 52 | qsort 53 | aio_read 54 | aio_read64 55 | aio_write 56 | aio_write64 57 | lio_listio 58 | lio_listio64 59 | getauxval 60 | random 61 | srandom 62 | initstate 63 | setstate 64 | random_r 65 | srandom_r 66 | initstate_r 67 | setstate_r 68 | siginterrupt 69 | sigblock 70 | sigsetmask 71 | sigpause 72 | gtty 73 | stty 74 | wait3 75 | backtrace 76 | backtrace_symbols 77 | backtrace_symbols_fd 78 | malloc 79 | sched_setscheduler 80 | sched_getscheduler 81 | sched_setparam 82 | sched_getparam 83 | sched_get_priority_min 84 | sched_get_priority_max 85 | sched_rr_get_interval 86 | sched_yield 87 | signal 88 | sysv_signal 89 | ssignal 90 | fread 91 | fread_unlocked 92 | fwrite 93 | fwrite_unlocked 94 | localtime 95 | localtime_r 96 | gmtime 97 | gmtime_r 98 | mktime 99 | timelocal 100 | timegm 101 | htons 102 | ntohs 103 | htonl 104 | ntohl 105 | sched_getaffinity 106 | sched_setaffinity 107 | clock 108 | glob 109 | glob64 110 | wordexp 111 | wordfree 112 | aio_cancel 113 | aio_cancel64 114 | tolower 115 | toupper 116 | toascii 117 | _tolower 118 | _toupper 119 | realloc 120 | fgetc 121 | fgetwc 122 | fgetc_unlocked 123 | fgetwc_unlocked 124 | getc 125 | getwc 126 | getc_unlocked 127 | getwc_unlocked 128 | getchar 129 | getwchar 130 | getchar_unlocked 131 | getwchar_unlocked 132 | getw 133 | bind_textdomain_codeset 134 | sigpending 135 | islower 136 | isupper 137 | isalpha 138 | isdigit 139 | isalnum 140 | isxdigit 141 | ispunct 142 | isspace 143 | isblank 144 | isgraph 145 | isprint 146 | iscntrl 147 | isascii 148 | wctype 149 | iswctype 150 | iswalnum 151 | iswalpha 152 | iswcntrl 153 | iswdigit 154 | iswgraph 155 | iswlower 156 | iswprint 157 | iswpunct 158 | iswspace 159 | iswupper 160 | iswxdigit 161 | iswblank 162 | atexit 163 | on_exit 164 | fclose 165 | fcloseall 166 | shutdown 167 | strcoll 168 | wcscoll 169 | strxfrm 170 | wcsxfrm 171 | aio_init 172 | connect 173 | fegetenv 174 | feholdexcept 175 | fesetenv 176 | feupdateenv 177 | feenableexcept 178 | fedisableexcept 179 | fegetexcept 180 | fcntl 181 | setvbuf 182 | setbuf 183 | setbuffer 184 | setlinebuf 185 | __flbf 186 | __fbufsize 187 | __fpending 188 | mbsrtowcs 189 | wcsrtombs 190 | mbsnrtowcs 191 | wcsnrtombs 192 | btowc 193 | wctob 194 | mbrtowc 195 | mbrlen 196 | wcrtomb 197 | memcpy 198 | wmemcpy 199 | mempcpy 200 | wmempcpy 201 | memmove 202 | wmemmove 203 | memccpy 204 | memset 205 | wmemset 206 | strcpy 207 | wcscpy 208 | strncpy 209 | wcsncpy 210 | strdup 211 | wcsdup 212 | strndup 213 | stpcpy 214 | wcpcpy 215 | stpncpy 216 | wcpncpy 217 | strcat 218 | wcscat 219 | strncat 220 | wcsncat 221 | bcopy 222 | bzero 223 | mkdir 224 | pipe 225 | fork 226 | vfork 227 | socket 228 | setkey 229 | encrypt 230 | setkey_r 231 | encrypt_r 232 | ecb_crypt 233 | DES_FAILED 234 | cbc_crypt 235 | des_setparity 236 | pthread_getattr_default_np 237 | pthread_setattr_default_np 238 | unlink 239 | rmdir 240 | remove 241 | fdopen 242 | fileno 243 | fileno_unlocked 244 | IFTODT 245 | DTTOIF 246 | dup 247 | dup2 248 | asprintf 249 | obstack_printf 250 | feof 251 | feof_unlocked 252 | ferror 253 | ferror_unlocked 254 | difftime 255 | l64a 256 | a64l 257 | getenv 258 | secure_getenv 259 | putenv 260 | setenv 261 | unsetenv 262 | clearenv 263 | envz_entry 264 | envz_get 265 | envz_add 266 | envz_merge 267 | envz_strip 268 | strerror 269 | strerror_r 270 | perror 271 | error 272 | error_at_line 273 | warn 274 | vwarn 275 | warnx 276 | vwarnx 277 | err 278 | verr 279 | errx 280 | verrx 281 | clearerr 282 | clearerr_unlocked 283 | execv 284 | execl 285 | execve 286 | execle 287 | execvp 288 | execlp 289 | exp 290 | expf 291 | expl 292 | exp2 293 | exp2f 294 | exp2l 295 | exp10 296 | exp10f 297 | exp10l 298 | pow10 299 | pow10f 300 | pow10l 301 | log 302 | logf 303 | logl 304 | log10 305 | log10f 306 | log10l 307 | log2 308 | log2f 309 | log2l 310 | logb 311 | logbf 312 | logbl 313 | ilogb 314 | ilogbf 315 | ilogbl 316 | pow 317 | powf 318 | powl 319 | sqrt 320 | sqrtf 321 | sqrtl 322 | cbrt 323 | cbrtf 324 | cbrtl 325 | hypot 326 | hypotf 327 | hypotl 328 | expm1 329 | expm1f 330 | expm1l 331 | log1p 332 | log1pf 333 | log1pl 334 | cexp 335 | cexpf 336 | cexpl 337 | clog 338 | clogf 339 | clogl 340 | clog10 341 | clog10f 342 | clog10l 343 | csqrt 344 | csqrtf 345 | csqrtl 346 | cpow 347 | cpowf 348 | cpowl 349 | obstack_room 350 | obstack_1grow_fast 351 | obstack_ptr_grow_fast 352 | obstack_int_grow_fast 353 | obstack_blank_fast 354 | mkfifo 355 | copysign 356 | copysignf 357 | copysignl 358 | signbit 359 | nextafter 360 | nextafterf 361 | nextafterl 362 | nexttoward 363 | nexttowardf 364 | nexttowardl 365 | nan 366 | nanf 367 | nanl 368 | chown 369 | fchown 370 | lseek 371 | lseek64 372 | ftell 373 | ftello 374 | ftello64 375 | fseek 376 | fseeko 377 | fseeko64 378 | rewind 379 | truncate 380 | truncate64 381 | ftruncate 382 | ftruncate64 383 | utime 384 | utimes 385 | lutimes 386 | futimes 387 | strtok 388 | wcstok 389 | strtok_r 390 | strsep 391 | basename 392 | basename 393 | dirname 394 | isinf 395 | isinff 396 | isinfl 397 | isnan 398 | isnanf 399 | isnanl 400 | finite 401 | finitef 402 | finitel 403 | fflush 404 | fflush_unlocked 405 | _flushlbf 406 | __fpurge 407 | scanf 408 | wscanf 409 | fscanf 410 | fwscanf 411 | sscanf 412 | swscanf 413 | printf 414 | wprintf 415 | fprintf 416 | fwprintf 417 | sprintf 418 | swprintf 419 | snprintf 420 | asctime 421 | asctime_r 422 | ctime 423 | ctime_r 424 | strftime 425 | wcsftime 426 | strfmon 427 | obstack_free 428 | free 429 | cfree 430 | getdate 431 | getdate_r 432 | iconv_open 433 | iconv_close 434 | iconv 435 | getopt_long 436 | getopt_long_only 437 | obstack_blank 438 | obstack_grow 439 | obstack_grow0 440 | obstack_1grow 441 | obstack_ptr_grow 442 | obstack_int_grow 443 | obstack_finish 444 | obstack_object_size 445 | link 446 | hcreate 447 | hdestroy 448 | hsearch 449 | hcreate_r 450 | hdestroy_r 451 | hsearch_r 452 | mcheck 453 | mprobe 454 | ntp_gettime 455 | ntp_adjtime 456 | gettimeofday 457 | settimeofday 458 | adjtime 459 | adjtimex 460 | inet_aton 461 | inet_addr 462 | inet_network 463 | inet_ntoa 464 | inet_makeaddr 465 | inet_lnaof 466 | inet_netof 467 | inet_pton 468 | inet_ntop 469 | gethostname 470 | sethostname 471 | getdomainnname 472 | setdomainname 473 | gethostid 474 | sethostid 475 | gethostbyname 476 | gethostbyname2 477 | gethostbyaddr 478 | gethostbyname_r 479 | gethostbyname2_r 480 | gethostbyaddr_r 481 | sethostent 482 | gethostent 483 | endhostent 484 | ungetc 485 | ungetwc 486 | sinh 487 | sinhf 488 | sinhl 489 | cosh 490 | coshf 491 | coshl 492 | tanh 493 | tanhf 494 | tanhl 495 | csinh 496 | csinhf 497 | csinhl 498 | ccosh 499 | ccoshf 500 | ccoshl 501 | ctanh 502 | ctanhf 503 | ctanhl 504 | asinh 505 | asinhf 506 | asinhl 507 | acosh 508 | acoshf 509 | acoshl 510 | atanh 511 | atanhf 512 | atanhl 513 | casinh 514 | casinhf 515 | casinhl 516 | cacosh 517 | cacoshf 518 | cacoshl 519 | catanh 520 | catanhf 521 | catanhl 522 | ioctl 523 | rand 524 | srand 525 | rand_r 526 | read 527 | pread 528 | pread64 529 | write 530 | pwrite 531 | pwrite64 532 | ctermid 533 | div 534 | ldiv 535 | lldiv 536 | imaxdiv 537 | if_nametoindex 538 | if_indextoname 539 | if_nameindex 540 | if_freenameindex 541 | asin 542 | asinf 543 | asinl 544 | acos 545 | acosf 546 | acosl 547 | atan 548 | atanf 549 | atanl 550 | atan2 551 | atan2f 552 | atan2l 553 | casin 554 | casinf 555 | casinl 556 | cacos 557 | cacosf 558 | cacosl 559 | catan 560 | catanf 561 | catanl 562 | isatty 563 | ttyname 564 | ttyname_r 565 | mbsinit 566 | getrlimit 567 | getrlimit64 568 | setrlimit 569 | setrlimit64 570 | ulimit 571 | vlimit 572 | tcsendbreak 573 | tcdrain 574 | tcflush 575 | tcflow 576 | getline 577 | getdelim 578 | fgets 579 | fgetws 580 | fgets_unlocked 581 | fgetws_unlocked 582 | cfgetospeed 583 | cfgetispeed 584 | cfsetospeed 585 | cfsetispeed 586 | cfsetspeed 587 | listen 588 | textdomain 589 | bindtextdomain 590 | login_tty 591 | login 592 | logout 593 | logwtmp 594 | getgrgid 595 | getgrgid_r 596 | getgrnam 597 | getgrnam_r 598 | setnetgrent 599 | getnetgrent 600 | getnetgrent_r 601 | endnetgrent 602 | getpwuid 603 | getpwuid_r 604 | getpwnam 605 | getpwnam_r 606 | strptime 607 | mknod 608 | mallopt 609 | setutent 610 | getutent 611 | endutent 612 | getutid 613 | getutline 614 | pututline 615 | getutent_r 616 | getutid_r 617 | getutline_r 618 | utmpname 619 | updwtmp 620 | regexec 621 | mmap 622 | mmap64 623 | munmap 624 | msync 625 | mremap 626 | madvise 627 | shm_open 628 | shm_unlink 629 | fmin 630 | fminf 631 | fminl 632 | fmax 633 | fmaxf 634 | fmaxl 635 | fdim 636 | fdimf 637 | fdiml 638 | fma 639 | fmaf 640 | fmal 641 | tcgetattr 642 | tcsetattr 643 | globfree 644 | globfree64 645 | mount 646 | umount2 647 | umount 648 | innetgr 649 | getnetbyname 650 | getnetbyaddr 651 | setnetent 652 | getnetent 653 | endnetent 654 | longjmp 655 | sigsetjmp 656 | siglongjmp 657 | mbtowc 658 | wctomb 659 | mblen 660 | mbstowcs 661 | wcstombs 662 | cfmakeraw 663 | exit 664 | frexp 665 | frexpf 666 | frexpl 667 | ldexp 668 | ldexpf 669 | ldexpl 670 | scalb 671 | scalbf 672 | scalbl 673 | scalbn 674 | scalbnf 675 | scalbnl 676 | scalbln 677 | scalblnf 678 | scalblnl 679 | significand 680 | significandf 681 | significandl 682 | fopen 683 | fopen64 684 | freopen 685 | freopen64 686 | __freadable 687 | __fwritable 688 | __freading 689 | __fwriting 690 | opendir 691 | fdopendir 692 | dirfd 693 | open 694 | open64 695 | close 696 | creal 697 | crealf 698 | creall 699 | cimag 700 | cimagf 701 | cimagl 702 | conj 703 | conjf 704 | conjl 705 | carg 706 | cargf 707 | cargl 708 | cproj 709 | cprojf 710 | cprojl 711 | regcomp 712 | mlock 713 | munlock 714 | mlockall 715 | munlockall 716 | parse_printf_format 717 | strtod 718 | strtof 719 | strtold 720 | wcstod 721 | wcstof 722 | wcstold 723 | atof 724 | strtol 725 | wcstol 726 | strtoul 727 | wcstoul 728 | strtoll 729 | wcstoll 730 | strtoq 731 | wcstoq 732 | strtoull 733 | wcstoull 734 | strtouq 735 | wcstouq 736 | strtoimax 737 | wcstoimax 738 | strtoumax 739 | wcstoumax 740 | atol 741 | atoi 742 | atoll 743 | pathconf 744 | fpathconf 745 | popen 746 | pclose 747 | uname 748 | fgetpos 749 | fgetpos64 750 | fsetpos 751 | fsetpos64 752 | __ppc_get_timebase 753 | __ppc_get_timebase_freq 754 | __ppc_yield 755 | __ppc_mdoio 756 | __ppc_mdoom 757 | __ppc_set_ppr_med 758 | __ppc_set_ppr_low 759 | __ppc_set_ppr_med_low 760 | printf_size 761 | printf_size_info 762 | obstack_init 763 | fmtmsg 764 | waitpid 765 | wait 766 | wait4 767 | setsid 768 | getsid 769 | getpgrp 770 | getpgid 771 | setpgid 772 | setpgrp 773 | getpid 774 | getppid 775 | sigprocmask 776 | get_nprocs_conf 777 | get_nprocs 778 | getloadavg 779 | times 780 | getprotobyname 781 | getprotobynumber 782 | setprotoent 783 | getprotoent 784 | endprotoent 785 | openpty 786 | forkpty 787 | getpagesize 788 | get_phys_pages 789 | get_avphys_pages 790 | rewinddir 791 | telldir 792 | seekdir 793 | getsockname 794 | stat 795 | stat64 796 | fstat 797 | fstat64 798 | lstat 799 | lstat64 800 | getuid 801 | getgid 802 | geteuid 803 | getegid 804 | getgroups 805 | readdir 806 | readdir_r 807 | readdir64 808 | readdir64_r 809 | closedir 810 | recv 811 | recvfrom 812 | regfree 813 | regerror 814 | register_printf_function 815 | fmod 816 | fmodf 817 | fmodl 818 | drem 819 | dremf 820 | dreml 821 | remainder 822 | remainderf 823 | remainderl 824 | rename 825 | brk 826 | *sbrk 827 | getrusage 828 | vtimes 829 | ceil 830 | ceilf 831 | ceill 832 | floor 833 | floorf 834 | floorl 835 | trunc 836 | truncf 837 | truncl 838 | rint 839 | rintf 840 | rintl 841 | nearbyint 842 | nearbyintf 843 | nearbyintl 844 | round 845 | roundf 846 | roundl 847 | lrint 848 | lrintf 849 | lrintl 850 | llrint 851 | llrintf 852 | llrintl 853 | lround 854 | lroundf 855 | lroundl 856 | llround 857 | llroundf 858 | llroundl 859 | modf 860 | modff 861 | modfl 862 | fegetround 863 | fesetround 864 | system 865 | drand48 866 | erand48 867 | lrand48 868 | nrand48 869 | mrand48 870 | jrand48 871 | srand48 872 | seed48 873 | lcong48 874 | drand48_r 875 | erand48_r 876 | lrand48_r 877 | nrand48_r 878 | mrand48_r 879 | jrand48_r 880 | srand48_r 881 | seed48_r 882 | lcong48_r 883 | fgetgrent 884 | fgetgrent_r 885 | setgrent 886 | getgrent 887 | getgrent_r 888 | endgrent 889 | fgetpwent 890 | fgetpwent_r 891 | setpwent 892 | getpwent 893 | getpwent_r 894 | endpwent 895 | scandir 896 | alphasort 897 | versionsort 898 | scandir64 899 | alphasort64 900 | versionsort64 901 | readv 902 | writev 903 | memchr 904 | wmemchr 905 | rawmemchr 906 | memrchr 907 | strchr 908 | wcschr 909 | strchrnul 910 | wcschrnul 911 | strrchr 912 | wcsrchr 913 | strstr 914 | wcsstr 915 | wcswcs 916 | strcasestr 917 | memmem 918 | strspn 919 | wcsspn 920 | strcspn 921 | wcscspn 922 | strpbrk 923 | wcspbrk 924 | index 925 | rindex 926 | semctl 927 | semget 928 | semop 929 | semtimedop 930 | sem_init 931 | sem_destroy 932 | *sem_open 933 | sem_close 934 | sem_unlink 935 | sem_wait 936 | sem_timedwait 937 | sem_trywait 938 | sem_post 939 | sem_getvalue 940 | send 941 | sendto 942 | getservbyname 943 | getservbyport 944 | setservent 945 | getservent 946 | endservent 947 | bind 948 | setegid 949 | setgid 950 | setregid 951 | setgroups 952 | initgroups 953 | getgrouplist 954 | umask 955 | getumask 956 | chmod 957 | fchmod 958 | seteuid 959 | setuid 960 | setreuid 961 | setitimer 962 | getitimer 963 | alarm 964 | setlocale 965 | strsignal 966 | psignal 967 | sigemptyset 968 | sigfillset 969 | sigaddset 970 | sigdelset 971 | sigismember 972 | sigaltstack 973 | sigstack 974 | kill 975 | killpg 976 | raise 977 | gsignal 978 | sigsuspend 979 | time 980 | stime 981 | fputc 982 | fputwc 983 | fputc_unlocked 984 | fputwc_unlocked 985 | putc 986 | putwc 987 | putc_unlocked 988 | putwc_unlocked 989 | putchar 990 | putwchar 991 | putchar_unlocked 992 | putwchar_unlocked 993 | fputs 994 | fputws 995 | fputs_unlocked 996 | fputws_unlocked 997 | puts 998 | putw 999 | sleep 1000 | nanosleep 1001 | getsockopt 1002 | setsockopt 1003 | socketpair 1004 | erf 1005 | erff 1006 | erfl 1007 | erfc 1008 | erfcf 1009 | erfcl 1010 | lgamma 1011 | lgammaf 1012 | lgammal 1013 | lgamma_r 1014 | lgammaf_r 1015 | lgammal_r 1016 | gamma 1017 | gammaf 1018 | gammal 1019 | tgamma 1020 | tgammaf 1021 | tgammal 1022 | j0 1023 | j0f 1024 | j0l 1025 | j1 1026 | j1f 1027 | j1l 1028 | jn 1029 | jnf 1030 | jnl 1031 | y0 1032 | y0f 1033 | y0l 1034 | y1 1035 | y1f 1036 | y1l 1037 | yn 1038 | ynf 1039 | ynl 1040 | mallinfo 1041 | feclearexcept 1042 | feraiseexcept 1043 | fetestexcept 1044 | fegetexceptflag 1045 | fesetexceptflag 1046 | aio_error 1047 | aio_error64 1048 | aio_return 1049 | aio_return64 1050 | obstack_base 1051 | obstack_next_free 1052 | obstack_object_size 1053 | fopencookie 1054 | fwide 1055 | flockfile 1056 | ftrylockfile 1057 | funlockfile 1058 | __fsetlocking 1059 | strlen 1060 | wcslen 1061 | strnlen 1062 | wcsnlen 1063 | confstr 1064 | fmemopen 1065 | open_memstream 1066 | memcmp 1067 | wmemcmp 1068 | strcmp 1069 | wcscmp 1070 | strcasecmp 1071 | wcscasecmp 1072 | strncmp 1073 | wcsncmp 1074 | strncasecmp 1075 | wcsncasecmp 1076 | strverscmp 1077 | bcmp 1078 | getsubopt 1079 | symlink 1080 | readlink 1081 | canonicalize_file_name 1082 | realpath 1083 | aio_fsync 1084 | aio_fsync64 1085 | aio_suspend 1086 | aio_suspend64 1087 | sync 1088 | fsync 1089 | fdatasync 1090 | sysconf 1091 | syscall 1092 | sysctl 1093 | ecvt 1094 | fcvt 1095 | gcvt 1096 | qecvt 1097 | qfcvt 1098 | qgcvt 1099 | ecvt_r 1100 | fcvt_r 1101 | qecvt_r 1102 | qfcvt_r 1103 | getcontext 1104 | makecontext 1105 | setcontext 1106 | swapcontext 1107 | tmpfile 1108 | tmpfile64 1109 | tmpnam 1110 | tmpnam_r 1111 | tempnam 1112 | mktemp 1113 | mkstemp 1114 | mkdtemp 1115 | tcgetpgrp 1116 | tcsetpgrp 1117 | tcgetsid 1118 | _exit 1119 | _Exit 1120 | access 1121 | nl_langinfo 1122 | localeconv 1123 | catopen 1124 | catgets 1125 | catclose 1126 | pthread_key_create 1127 | pthread_key_delete 1128 | *pthread_getspecific 1129 | pthread_setspecific 1130 | tzset 1131 | mtrace 1132 | muntrace 1133 | getpriority 1134 | setpriority 1135 | nice 1136 | gettext 1137 | dgettext 1138 | dcgettext 1139 | tsearch 1140 | tfind 1141 | tdelete 1142 | tdestroy 1143 | twalk 1144 | sin 1145 | sinf 1146 | sinl 1147 | cos 1148 | cosf 1149 | cosl 1150 | tan 1151 | tanf 1152 | tanl 1153 | sincos 1154 | sincosf 1155 | sincosl 1156 | csin 1157 | csinf 1158 | csinl 1159 | ccos 1160 | ccosf 1161 | ccosl 1162 | ctan 1163 | ctanf 1164 | ctanl 1165 | memfrob 1166 | getopt 1167 | pause 1168 | vscanf 1169 | vwscanf 1170 | vfscanf 1171 | vfwscanf 1172 | vsscanf 1173 | vswscanf 1174 | vprintf 1175 | vwprintf 1176 | vfprintf 1177 | vfwprintf 1178 | vsprintf 1179 | vswprintf 1180 | vsnprintf 1181 | vasprintf 1182 | obstack_vprintf 1183 | alloca 1184 | select 1185 | getlogin 1186 | cuserid 1187 | getpeername 1188 | wctrans 1189 | towctrans 1190 | towlower 1191 | towupper 1192 | fnmatch 1193 | getcwd 1194 | getwd 1195 | get_current_dir_name 1196 | chdir 1197 | fchdir 1198 | ftw 1199 | ftw64 1200 | nftw 1201 | nftw64 1202 | putpwent 1203 | setutxent 1204 | getutxent 1205 | endutxent 1206 | getutxid 1207 | getutxline 1208 | pututxline 1209 | utmpxname 1210 | getutmp 1211 | getutmpx 1212 | rpmatch 1213 | closelog 1214 | crypt 1215 | crypt_r 1216 | setfsent 1217 | endfsent 1218 | getfsent 1219 | getfsspec 1220 | getfsfile 1221 | getpass 1222 | setmntent 1223 | endmntent 1224 | getmntent 1225 | getmntent_r 1226 | addmntent 1227 | hasmntopt 1228 | openlog 1229 | setlogmask 1230 | strfry 1231 | syslog 1232 | vsyslog 1233 | -------------------------------------------------------------------------------- /instr/if_inspection.py: -------------------------------------------------------------------------------- 1 | # Function to count error keywords and secondary error keywords in a piece of code. 2 | def count_error_keywords(src): 3 | error_keywords = "return exit abort throw warning error".split(" ") 4 | secondary_error_keywords = "warn err signal raise longjmp".split(" ") 5 | 6 | nof_error_keywords = 0 7 | for it in error_keywords: 8 | if(src.find(it) != -1): 9 | nof_error_keywords += 1 10 | # print "found error-keyword \"" + it + "\"" 11 | 12 | nof_secondary_error_keywords = 0 13 | for it in secondary_error_keywords: 14 | if(src.find(it) != -1): 15 | nof_secondary_error_keywords += 1 16 | # print "found secondary-error-keyword \"" + it + "\"" 17 | 18 | return nof_error_keywords, nof_secondary_error_keywords 19 | 20 | 21 | 22 | # Count error-keywords in if- and else-body, and uses this to guess, 23 | # whether the if should be instrumented to be always executed. 24 | # Return values: 25 | # 1: Always execute 26 | # 0: Don't know 27 | # -1: Never execute 28 | def always_execute_guess(if_src, else_src): 29 | if_only_threshold = 2 30 | if_else_threshold = 2 31 | 32 | 33 | if_ek, if_sek = count_error_keywords(if_src) 34 | else_ek, else_sek = count_error_keywords(else_src) 35 | print "error-keywords in if: ", if_ek 36 | print "secondary error-keywords in if: ", if_sek 37 | print "error-keywords in else: ", else_ek 38 | print "secondary error-keywords in else:", else_sek 39 | 40 | if(else_src == ""): 41 | val = 2*if_ek + if_sek 42 | if(val >= 2): 43 | return -1 44 | else: 45 | return 1 46 | else: 47 | val = 2*if_ek + if_sek - (2*else_ek + else_sek) 48 | if(abs(val) >= if_else_threshold): 49 | if(val > 0): 50 | return -1 51 | else: 52 | return 1 53 | else: 54 | return 0 55 | 56 | -------------------------------------------------------------------------------- /instr/instrumentation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | def read_file(file_name): 4 | f = open(file_name, "r") 5 | data = f.read() 6 | f.close() 7 | return data 8 | 9 | 10 | 11 | def get_extent_of_compound(joern_db, compound_id): 12 | # For some reason, the compound_statement has boggled location-info 13 | # workaround... 14 | 15 | query = """g.v(%s).children()""" % compound_id 16 | children = joern_db.runGremlinQuery(query) 17 | start = sys.maxint 18 | end = -1 19 | for c in children: 20 | location_splitters = c['location'].split(':') 21 | start = min(start, int(location_splitters[2])) 22 | end = max(end, int(location_splitters[3])) 23 | return start, end 24 | 25 | 26 | def get_body_extent(joern_db, body, data): 27 | body_start = None 28 | body_end = None 29 | if(body[0]["type"] == "CompoundStatement"): 30 | compound_id = body[0]._id 31 | 32 | # point to first/last char of content 33 | body_start, body_end = get_extent_of_compound(joern_db, compound_id) 34 | 35 | 36 | # rectify body to start at braces 37 | body_start -= 1 38 | while(data[body_start] != "{"): 39 | body_start -= 1 40 | body_end += 1 41 | while(data[body_end] != "}"): 42 | body_end += 1 43 | else: 44 | # simple instruction 45 | location_splitters = body[0]["location"].split(":") 46 | # point to first/last char of content 47 | body_start = int(location_splitters[2]) 48 | body_end = int(location_splitters[3]) 49 | 50 | return body_start, body_end 51 | 52 | 53 | def get_if_extent(joern_db, if_condition_id): 54 | query = """g.v(%s)""" % if_condition_id 55 | if_condition = joern_db.runGremlinQuery(query) 56 | 57 | query = """g.v(%s).parents()[0].children()""" % if_condition_id 58 | ifs_childs = joern_db.runGremlinQuery(query) 59 | assert(len(ifs_childs) in [2,3]) 60 | 61 | file_path = get_file_path(joern_db, if_condition) 62 | location_splitters = if_condition['location'].split(':') 63 | cond_start = int(location_splitters[2]) # points to first char of condition 64 | cond_end = int(location_splitters[3]) # points to last char of condition 65 | 66 | # rectify condition to start at braces 67 | data = read_file(file_path) 68 | cond_start -= 1 69 | while(data[cond_start] != "("): 70 | cond_start -= 1 71 | cond_end += 1 72 | while(data[cond_end] != ")"): 73 | cond_end += 1 74 | 75 | ret_dict = {"cond_start": cond_start, "cond_end": cond_end} 76 | 77 | if(len(ifs_childs) == 3): 78 | # it's an if-else 79 | 80 | # parent of condition => if; third child: ElseStatement, 81 | # it's child: compound_statement or instruction 82 | # For some reason, the compound_statement has boggled location-info 83 | # workaround... 84 | query = """g.v(%s).parents().children()[2].children()[0]""" % if_condition_id 85 | body = joern_db.runGremlinQuery(query) 86 | 87 | ret_dict["else_start"], ret_dict["else_end"] = get_body_extent(joern_db, body, data) 88 | 89 | 90 | # parent of condition => if; second child: compound_statement or instruction 91 | query = """g.v(%s).parents().children()[1]""" % if_condition_id 92 | body = joern_db.runGremlinQuery(query) 93 | ret_dict["body_start"], ret_dict["body_end"] = get_body_extent(joern_db, body, data) 94 | 95 | return file_path, ret_dict 96 | 97 | 98 | def get_source_range(data, start, end): 99 | return data[start:end+1] 100 | 101 | 102 | def get_if_else_source(data, data_dict):#body_start, body_end, else_start, else_end): 103 | if_src = get_source_range(data, data_dict["body_start"], data_dict["body_end"]) 104 | else_src = "" 105 | if("else_start" in data_dict): 106 | else_src = get_source_range(data, data_dict["else_start"], data_dict["else_end"]) 107 | return if_src, else_src 108 | 109 | 110 | def instrument_if(joern_db, if_condition_id): 111 | file_path, data_dict = get_if_extent(joern_db, if_condition_id) 112 | data = read_file(file_path) 113 | if_src, else_src = get_if_else_source(data, data_dict) 114 | 115 | guess = if_inspection.always_execute_guess(if_src, else_src) 116 | always_execute = None 117 | if(guess == 1): 118 | always_execute = True 119 | elif(guess == -1): 120 | always_execute = False 121 | else: 122 | print "Dunno" 123 | sys.exit(1) 124 | print "Should always execute:", always_execute 125 | 126 | all_instr = instrumentation.get_all_instrumentations() 127 | possible = [] 128 | for it in all_instr: 129 | dummy = it(file_path, data_dict) 130 | if(dummy.is_possible(always_execute)): 131 | possible.append(it) 132 | print "Choosing from", len(possible), "possible instrumentations" 133 | random_index = randrange(0, len(possible)) 134 | instr = possible[random_index](file_path, data_dict) 135 | print "Instrumenting using:", instr.description 136 | instr.instrument() 137 | 138 | 139 | 140 | 141 | def find_all_checks(joern_db, path): 142 | checks = [] 143 | for p in path: 144 | ergo = joern_db.runGremlinQuery("g.v(%s).filter{it.type == 'Condition'}" % (p)) 145 | if(len(ergo) != 0): 146 | checks.append(p) 147 | return checks 148 | 149 | 150 | 151 | 152 | 153 | def is_relevant_check(joern_db, check_id, overarched_by): 154 | uses = joern_db.runGremlinQuery("""g.v(%s).uses().code""" % check_id) 155 | uses = set(uses) 156 | # print uses 157 | 158 | if(not overarched_by in uses): 159 | return False 160 | 161 | check_code = joern_db.runGremlinQuery("""g.v(%s).code""" % check_id) 162 | and_checks = check_code.split("&&") 163 | all_checks = [] 164 | for it in and_checks: 165 | all_checks += it.split("||") 166 | 167 | all_checks = map(lambda c: " " + c + " ", all_checks) 168 | 169 | comp_ops = "< > <= >=".split(" ") 170 | for c in all_checks: 171 | for op in comp_ops: 172 | if((" " + op + " " in c) and " " + overarched_by + " " in c): 173 | return True 174 | return False 175 | 176 | 177 | 178 | 179 | def insert_at_position(original, insert, position): 180 | return original[:position] + insert + original[position:] 181 | 182 | 183 | 184 | 185 | class Instrumentation(object): 186 | def is_possible(self, always_execute): 187 | if(self.always_execute != always_execute): 188 | return False 189 | required = "cond_start cond_end".split(" ") 190 | for r in required: 191 | if(not r in self.data_dict): 192 | return False 193 | return True 194 | 195 | def __init__(self, file_path, data_dict): 196 | self.always_execute = None 197 | self.description = "" 198 | 199 | self.file_path = file_path 200 | self.data_dict = data_dict 201 | 202 | def instrument(self): 203 | pass 204 | 205 | 206 | 207 | 208 | class Instrumentation_0_and(Instrumentation): 209 | # Just do as the base-class does 210 | def __init__(self, file_path, data_dict): 211 | super(Instrumentation_0_and, self).__init__(file_path, data_dict) 212 | self.always_execute = False 213 | self.description = "if(len < 256) => if(0 && (len < 256))" 214 | 215 | def is_possible(self, always_execute): 216 | return super(Instrumentation_0_and, self).is_possible(always_execute) 217 | 218 | def instrument(self): 219 | self.cond_start = self.data_dict["cond_start"] 220 | self.cond_end = self.data_dict["cond_end"] 221 | 222 | data = read_file(self.file_path) 223 | data = insert_at_position(data, ")", self.cond_end) 224 | data = insert_at_position(data, "0 && (", self.cond_start+1) 225 | print data 226 | # write it to some file... 227 | 228 | 229 | class Instrumentation_1_or(Instrumentation): 230 | # Just do as the base-class does 231 | def __init__(self, file_path, data_dict): 232 | super(Instrumentation_1_or, self).__init__(file_path, data_dict) 233 | self.always_execute = True 234 | self.description = "if(len < 256) => if(1 || (len < 256))" 235 | 236 | 237 | def is_possible(self, always_execute): 238 | return super(Instrumentation_1_or, self).is_possible(always_execute) 239 | 240 | 241 | def instrument(self): 242 | self.cond_start = self.data_dict["cond_start"] 243 | self.cond_end = self.data_dict["cond_end"] 244 | 245 | data = read_file(self.file_path) 246 | data = insert_at_position(data, ")", self.cond_end) 247 | data = insert_at_position(data, "1 || (", self.cond_start+1) 248 | print data 249 | # write it to some file... 250 | 251 | 252 | # More instrumentations... 253 | 254 | 255 | 256 | 257 | 258 | def get_all_instrumentations(): 259 | ret = [ 260 | Instrumentation_0_and 261 | , Instrumentation_1_or 262 | ] 263 | return ret 264 | 265 | 266 | 267 | def main(argv): 268 | if(len(argv) == 1): 269 | print "Usage:", argv[0], "[ ]^+" 270 | sys.exit(1) 271 | 272 | joern_db = init_joern() 273 | 274 | path = map(lambda it: int(it), argv[1::2]) 275 | overarch = argv[2::2] 276 | 277 | all_checks = find_all_checks(joern_db, path) 278 | for c in all_checks: 279 | overarched_by = overarch[cur_path.index(c)-1] 280 | # print c, "is overarched by", overarched_by 281 | is_rel = is_relevant_check(joern_db, c, overarched_by) 282 | 283 | if(is_rel): 284 | print c, "seems to be relevant check:", joern_db.runGremlinQuery("g.v(%s).code" % (c)) 285 | instrument_if(joern_db, c) 286 | 287 | 288 | if(__name__ == "__main__"): 289 | main(sys.argv) 290 | 291 | 292 | 293 | 294 | 295 | -------------------------------------------------------------------------------- /instr/joern_con.py: -------------------------------------------------------------------------------- 1 | from joern.all import JoernSteps 2 | 3 | #joern_db = None 4 | 5 | #def clear_joern(): 6 | # global joern_db 7 | # joern_db = None 8 | 9 | def init_joern(): 10 | joern_db = JoernSteps() 11 | joern_db.setGraphDbURL("http://localhost:7474/db/data/") 12 | joern_db.connectToDatabase() 13 | return joern_db 14 | 15 | def queryNodeIndex(joern_db, query): 16 | return joern_db.runGremlinQuery("g.queryNodeIndex('%s')" % (query)) 17 | 18 | 19 | # Find the first parent-id, which has an incoming reaches-edge 20 | # Input: id of node in question 21 | # Output: node_id <=> node has incoming REACHES-edge 22 | # -1 <=> node has no parent 23 | # recursive <=> node has parents 24 | def find_reached_parent(joern_db, node_id, edge_type="DATA_FLOW_EDGE", in_edge = True): # REACHES 25 | e = None 26 | if(in_edge): 27 | e = joern_db.runGremlinQuery("g.v(%s).in(%s)" % (node_id, edge_type)) 28 | else: 29 | e = joern_db.runGremlinQuery("g.v(%s).out(%s)" % (node_id, edge_type)) 30 | if(len(e) > 0): 31 | return node_id 32 | 33 | p = joern_db.runGremlinQuery("g.v(%s).parents()" % (node_id)) 34 | if(len(p) == 0): 35 | return -1 36 | elif(len(p) != 1): 37 | raise Exception("expected only one parent") 38 | 39 | return find_reached_parent(joern_db, p[0]._id) 40 | 41 | 42 | def get_function_id(joern_db, node_id): 43 | node = joern_db.runGremlinQuery("g.v(%s)" % (node_id)) 44 | if("functionId" in node): 45 | return node["functionId"] 46 | else: 47 | p = joern_db.runGremlinQuery("g.v(%s).parents()" % (node_id))[0] 48 | return get_function_id(joern_db, p._id) 49 | 50 | # Find the ID of a function-definition, the function's name 51 | # Assumes that the function's name is unique 52 | def get_function_id_by_name(joern_db, func_name): 53 | call = joern_db.runGremlinQuery("getFunctionsByName('%s')" % func_name) 54 | if(len(call) != 1): 55 | return -1 56 | # raise Exception("expected length 1") 57 | return call[0]._id 58 | 59 | 60 | def get_function_ids_by_name(joern_db, func_name): 61 | calls = joern_db.runGremlinQuery("getFunctionsByName('%s').id" % func_name) 62 | return calls 63 | 64 | 65 | 66 | 67 | 68 | # Returns all calls to a function, given the function's name. 69 | # Simple frontend to the getCallsTo-Query. 70 | def get_calls_to(joern_db, func_name): 71 | calls = joern_db.runGremlinQuery("getCallsTo('%s')" % func_name) 72 | return calls 73 | 74 | 75 | 76 | def remove_edge_from_db(joern_db, edge_id): 77 | joern_db.runGremlinQuery("g.e(%s).remove()" % (edge_id)) 78 | 79 | 80 | def properties_to_gremlin_list(properties): 81 | str_properties = "["; 82 | for k in properties.keys(): 83 | str_properties += k + ": \"" + str(properties[k]).replace("\"", "\\\"") + "\", " 84 | if(len(str_properties) > 1): 85 | str_properties = str_properties[0:-2] + "]" 86 | else: 87 | str_properties += "]" 88 | 89 | return str_properties 90 | 91 | 92 | 93 | 94 | def addNode(joern_db, properties): 95 | str_properties = properties_to_gremlin_list(properties) 96 | q = "g.addVertex(null, %s)" % str_properties 97 | # print q 98 | newNode = joern_db.runGremlinQuery(q) 99 | return newNode 100 | 101 | 102 | def addRelationship(joern_db, src, dst, relType, properties): 103 | if(len(properties) == 0): 104 | q = "g.addEdge(null, g.v(%s), g.v(%s), '%s')" % (src, dst, relType) 105 | else: 106 | str_properties = properties_to_gremlin_list(properties) 107 | q = "g.addEdge(null, g.v(%s), g.v(%s), '%s', %s)" % (src, dst, relType, str_properties) 108 | # print q 109 | joern_db.runGremlinQuery(q) 110 | 111 | 112 | 113 | def getNodeById(joern_db, node_id): 114 | node = joern_db.runGremlinQuery("g.v(%s)" % (node_id)) 115 | return node 116 | 117 | 118 | def getCalleeFromCall(joern_db, node_id): 119 | node = joern_db.runGremlinQuery("g.v(%s).callToCallee()" % (node_id)) 120 | return node 121 | 122 | def getNodeType(joern_db, node_id): 123 | node = joern_db.runGremlinQuery("g.v(%s)" % (node_id)) 124 | return node["type"] 125 | 126 | def getNodeCode(joern_db, node_id): 127 | node = joern_db.runGremlinQuery("g.v(%s)" % (node_id)) 128 | return node["code"] 129 | 130 | def getOperatorCode(joern_db, node_id): 131 | node = joern_db.runGremlinQuery("g.v(%s)" % (node_id)) 132 | return node["operator"] 133 | 134 | def getNodeChildNum(joern_db, node_id): 135 | node = joern_db.runGremlinQuery("g.v(%s)" % (node_id)) 136 | if("childNum" in node): 137 | return int(node["childNum"]) 138 | else: 139 | return 0 140 | 141 | -------------------------------------------------------------------------------- /joern/build.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 80 | 81 | 82 | 83 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | -------------------------------------------------------------------------------- /joern/src/ddg/DDGCreator.java: -------------------------------------------------------------------------------- 1 | package ddg; 2 | 3 | import java.util.HashSet; 4 | import java.util.Iterator; 5 | import java.util.List; 6 | 7 | import misc.HashMapOfSets; 8 | import ddg.DataDependenceGraph.DDG; 9 | import ddg.DefUseCFG.BatchInserterFactory; 10 | import ddg.DefUseCFG.DefUseCFG; 11 | import ddg.DefUseCFG.DefUseCFGFactory; 12 | 13 | public class DDGCreator 14 | { 15 | 16 | DefUseCFG cfg; 17 | DefUseCFGFactory cfgFactory = new BatchInserterFactory(); 18 | 19 | HashMapOfSets in = new HashMapOfSets(); 20 | HashMapOfSets out = new HashMapOfSets(); 21 | HashMapOfSets gen = new HashMapOfSets(); 22 | HashSet changedNodes; 23 | 24 | private class Definition 25 | { 26 | public Definition(Object aStatement, String aIdentifier) 27 | { 28 | statement = aStatement; 29 | identifier = aIdentifier; 30 | } 31 | 32 | public Object statement; 33 | public String identifier; 34 | }; 35 | 36 | public void setFactory(DefUseCFGFactory aFactory) 37 | { 38 | cfgFactory = aFactory; 39 | } 40 | 41 | public DDG createForFunctionById(Long funcId) 42 | { 43 | DefUseCFG cfg = cfgFactory.create(funcId); 44 | return createForDefUseCFG(cfg); 45 | } 46 | 47 | public DDG createForDefUseCFG(DefUseCFG aCfg) 48 | { 49 | cfg = aCfg; 50 | calculateReachingDefs(); 51 | return createDDGFromReachingDefs(); 52 | } 53 | 54 | private void calculateReachingDefs() 55 | { 56 | initReachingDefs(); 57 | 58 | while (!changedNodes.isEmpty()) 59 | { 60 | 61 | Object currentBlock = popFromChangedNodes(); 62 | 63 | updateIn(currentBlock); 64 | boolean changed = updateOut(currentBlock); 65 | 66 | if (!changed) 67 | continue; 68 | 69 | List children = cfg.getChildBlocks().getListForKey( 70 | currentBlock); 71 | if (children == null) 72 | continue; 73 | 74 | for (Object o : children) 75 | changedNodes.add(o); 76 | 77 | } 78 | 79 | } 80 | 81 | private void initReachingDefs() 82 | { 83 | initOut(); 84 | initGenFromOut(); 85 | changedNodes = new HashSet(); 86 | changedNodes.addAll(cfg.getStatements()); 87 | } 88 | 89 | private Object popFromChangedNodes() 90 | { 91 | Object x = changedNodes.iterator().next(); 92 | changedNodes.remove(x); 93 | return x; 94 | } 95 | 96 | private void initOut() 97 | { 98 | for (Object statement : cfg.getStatements()) 99 | { 100 | 101 | // this has the nice side-effect that an 102 | // empty hash is created for the statement. 103 | 104 | // JANNIK 105 | // out.removeAllForKey(statement); 106 | HashSet tmp = out.getListForKey(statement); 107 | if (tmp == null) 108 | out.initForKey(statement); 109 | 110 | 111 | List symsDefined = cfg.getSymbolsDefined().getListForKey( 112 | statement); 113 | if (symsDefined == null) 114 | continue; 115 | 116 | for (Object s : symsDefined) 117 | { 118 | String symbol = (String) s; 119 | out.add(statement, new Definition(statement, symbol)); 120 | } 121 | } 122 | } 123 | 124 | private void initGenFromOut() 125 | { 126 | for (Object statement : cfg.getStatements()) 127 | { 128 | for (Object o : out.getListForKey(statement)) 129 | gen.add(statement, o); 130 | } 131 | } 132 | 133 | private void updateIn(Object x) 134 | { 135 | List parents = cfg.getParentBlocks().getListForKey(x); 136 | if (parents == null) 137 | return; 138 | 139 | // JANNIK 140 | // in.removeAllForKey(x); 141 | HashSet tmp = in.getListForKey(x); 142 | if (tmp == null) 143 | in.initForKey(x); 144 | 145 | 146 | // in(x) = union(out(p))_{p in parents(x)} 147 | for (Object parent : parents) 148 | { 149 | HashSet parentOut = out.getListForKey(parent); 150 | if (parentOut == null) 151 | continue; 152 | for (Object o : parentOut) 153 | in.add(x, o); 154 | } 155 | } 156 | 157 | private boolean updateOut(Object x) 158 | { 159 | HashSet listForKey = out.getListForKey(x); 160 | HashSet oldOut = new HashSet(listForKey); 161 | 162 | // JANNIK 163 | // out.removeAllForKey(x); 164 | HashSet tmp = out.getListForKey(x); 165 | if (tmp == null) 166 | out.initForKey(x); 167 | 168 | // in(x) 169 | HashSet inForX = in.getListForKey(x); 170 | if (inForX != null) 171 | { 172 | for (Object o : inForX) 173 | { 174 | out.add(x, o); 175 | } 176 | } 177 | 178 | // -kill(x) 179 | List killX = cfg.getSymbolsDefined().getListForKey(x); 180 | if (killX != null) 181 | { 182 | 183 | Iterator it = out.getListForKey(x).iterator(); 184 | while (it.hasNext()) 185 | { 186 | Definition def = (Definition) it.next(); 187 | if (killX.contains(def.identifier)) 188 | it.remove(); 189 | } 190 | 191 | } 192 | 193 | // gen(X) 194 | HashSet genX = gen.getListForKey(x); 195 | 196 | if (genX != null) 197 | { 198 | for (Object o : genX) 199 | { 200 | out.add(x, o); 201 | } 202 | } 203 | 204 | return !oldOut.equals(out.getListForKey(x)); 205 | } 206 | 207 | private DDG createDDGFromReachingDefs() 208 | { 209 | DDG ddg = new DDG(); 210 | 211 | for (Object statement : cfg.getStatements()) 212 | { 213 | HashSet inForBlock = in.getListForKey(statement); 214 | if (inForBlock == null) 215 | continue; 216 | List usedSymbols = cfg.getSymbolsUsed().getListForKey( 217 | statement); 218 | if (usedSymbols == null) 219 | continue; 220 | 221 | for (Object d : inForBlock) 222 | { 223 | Definition def = (Definition) d; 224 | 225 | if (usedSymbols.contains(def.identifier)) 226 | ddg.add(def.statement, statement, def.identifier); 227 | } 228 | } 229 | 230 | return ddg; 231 | } 232 | 233 | } 234 | -------------------------------------------------------------------------------- /joern/src/misc/HashMapOfSets.java: -------------------------------------------------------------------------------- 1 | package misc; 2 | 3 | // This is the same as MultiHashMap. Only difference is 4 | // that LinkedList has been replaced by HashSet. 5 | // Clean this up at some point and use a generic to 6 | // reduce duplication. 7 | 8 | import java.util.HashMap; 9 | import java.util.HashSet; 10 | import java.util.Iterator; 11 | import java.util.Map.Entry; 12 | import java.util.Set; 13 | 14 | public class HashMapOfSets 15 | { 16 | 17 | public HashMap> hashMap = new HashMap>(); 18 | 19 | public Iterator>> getEntrySetIterator() 20 | { 21 | return hashMap.entrySet().iterator(); 22 | } 23 | 24 | public Iterator getKeySetIterator() 25 | { 26 | return hashMap.keySet().iterator(); 27 | } 28 | 29 | public void add(Object key, Object val) 30 | { 31 | HashSet valList = hashMap.get(key); 32 | if (valList == null) 33 | { 34 | valList = new HashSet(); 35 | hashMap.put(key, valList); 36 | } 37 | valList.add(val); 38 | } 39 | 40 | public void addHashMapOfSets(HashMapOfSets otherHashMap) 41 | { 42 | Set>> entrySet = otherHashMap.hashMap 43 | .entrySet(); 44 | Iterator>> it = entrySet.iterator(); 45 | while (it.hasNext()) 46 | { 47 | Entry> pair = it.next(); 48 | Iterator it2 = pair.getValue().iterator(); 49 | while (it2.hasNext()) 50 | { 51 | add(pair.getKey(), it2.next()); 52 | } 53 | } 54 | } 55 | 56 | public void remove(Object key, Object val) 57 | { 58 | HashSet dstList = hashMap.get(key); 59 | if (dstList == null) 60 | return; 61 | dstList.remove(val); 62 | } 63 | 64 | //JANNIK 65 | public void initForKey(Object key) 66 | { 67 | hashMap.put(key, new HashSet()); 68 | } 69 | 70 | public void removeAllForKey(Object key) 71 | { 72 | hashMap.put(key, new HashSet()); 73 | } 74 | 75 | public int size() 76 | { 77 | int s = 0; 78 | Set>> entrySet = hashMap.entrySet(); 79 | Iterator>> it = entrySet.iterator(); 80 | while (it.hasNext()) 81 | { 82 | Entry> pair = it.next(); 83 | s += pair.getValue().size(); 84 | } 85 | return s; 86 | } 87 | 88 | public HashSet getListForKey(Object k) 89 | { 90 | return hashMap.get(k); 91 | } 92 | 93 | } 94 | -------------------------------------------------------------------------------- /joern/src/neo4j/readWriteDB/Neo4JDBInterface.java: -------------------------------------------------------------------------------- 1 | package neo4j.readWriteDB; 2 | 3 | import java.util.Iterator; 4 | import java.util.Map; 5 | import java.util.Map.Entry; 6 | import java.util.Set; 7 | 8 | import org.neo4j.graphdb.GraphDatabaseService; 9 | import org.neo4j.graphdb.Node; 10 | import org.neo4j.graphdb.Relationship; 11 | import org.neo4j.graphdb.RelationshipType; 12 | import org.neo4j.graphdb.Transaction; 13 | import org.neo4j.graphdb.factory.GraphDatabaseFactory; 14 | import org.neo4j.graphdb.index.Index; 15 | import org.neo4j.graphdb.index.IndexHits; 16 | 17 | public class Neo4JDBInterface 18 | { 19 | // Made it public... 20 | public static GraphDatabaseService graphDb; 21 | public static Index nodeIndex; 22 | // 23 | 24 | static String databaseDir = ""; 25 | 26 | static Transaction tx; 27 | 28 | public static void startTransaction() 29 | { 30 | tx = graphDb.beginTx(); 31 | } 32 | 33 | public static void finishTransaction() 34 | { 35 | tx.success(); 36 | tx.close(); 37 | } 38 | 39 | public static void setDatabaseDir(String aDir) 40 | { 41 | databaseDir = aDir; 42 | } 43 | 44 | public static void openDatabase() 45 | { 46 | 47 | Map conf = ConfigurationGenerator 48 | .generateConfiguration(); 49 | 50 | graphDb = new GraphDatabaseFactory() 51 | .newEmbeddedDatabaseBuilder(databaseDir).setConfig(conf) 52 | .newGraphDatabase(); 53 | 54 | registerShutdownHook(); 55 | startTransaction(); 56 | 57 | nodeIndex = graphDb.index().forNodes("nodeIndex"); 58 | 59 | } 60 | private static void registerShutdownHook() 61 | { 62 | // Registers a shutdown hook for the Neo4j and index service instances 63 | // so that it shuts down nicely when the VM exits (even if you 64 | // "Ctrl-C" the running example before it's completed) 65 | Runtime.getRuntime().addShutdownHook( new Thread() 66 | { 67 | @Override 68 | public void run() 69 | { 70 | graphDb.shutdown(); 71 | } 72 | } ); 73 | } 74 | 75 | 76 | 77 | public static IndexHits queryIndex(String query) 78 | { 79 | return nodeIndex.query(query); 80 | } 81 | 82 | public static void closeDatabase() 83 | { 84 | finishTransaction(); 85 | graphDb.shutdown(); 86 | } 87 | 88 | public static Node getNodeById(Long nodeId) 89 | { 90 | return graphDb.getNodeById(nodeId); 91 | } 92 | 93 | public static void removeEdge(long id) 94 | { 95 | graphDb.getRelationshipById(id).delete(); 96 | } 97 | 98 | public static void addRelationship(long src, long dst, 99 | RelationshipType relType, Map properties, boolean check_for_duplicate) 100 | { 101 | Node node = graphDb.getNodeById(src); 102 | 103 | //JANNIK: check, if that relationship already exists 104 | if(check_for_duplicate) 105 | { 106 | Iterable rels = node.getRelationships(); 107 | for (Relationship rel : rels) 108 | { 109 | Node endNode = rel.getEndNode(); 110 | 111 | if (endNode.getId() != dst) 112 | continue; 113 | if (!rel.getType().name().equals(relType.name())) 114 | continue; 115 | 116 | boolean all_equal = true; 117 | Iterable keys = rel.getPropertyKeys(); 118 | Iterator iter = keys.iterator(); 119 | while (iter.hasNext()) 120 | { 121 | String key = iter.next(); 122 | if(!rel.getProperty(key).equals(properties.get(key))) 123 | { 124 | all_equal = false; 125 | break; 126 | } 127 | } 128 | 129 | if(properties != null) 130 | { 131 | keys = properties.keySet(); 132 | iter = keys.iterator(); 133 | while (iter.hasNext()) 134 | { 135 | String key = iter.next(); 136 | if(!rel.getProperty(key).equals(properties.get(key))) 137 | { 138 | all_equal = false; 139 | break; 140 | } 141 | } 142 | } 143 | if (!all_equal) 144 | continue; 145 | 146 | return; 147 | } 148 | } 149 | //System.out.println(String.valueOf(src) + " -> " + String.valueOf(dst) + " " + relType.name()); 150 | 151 | Relationship rel = node.createRelationshipTo(graphDb.getNodeById(dst), 152 | relType); 153 | if (properties == null) 154 | return; 155 | for (Entry entry : properties.entrySet()) 156 | { 157 | rel.setProperty(entry.getKey(), entry.getValue()); 158 | } 159 | } 160 | 161 | public static void addRelationship(long src, long dst, 162 | RelationshipType relType, Map properties) 163 | { 164 | Neo4JDBInterface.addRelationship(src, dst, relType, properties, false); 165 | } 166 | 167 | public static Node addNode(Map properties) 168 | { 169 | Node newNode = graphDb.createNode(); 170 | 171 | Set> entrySet = properties.entrySet(); 172 | Iterator> it = entrySet.iterator(); 173 | while(it.hasNext()){ 174 | Entry next = it.next(); 175 | newNode.setProperty(next.getKey(), next.getValue()); 176 | } 177 | 178 | return newNode; 179 | } 180 | 181 | 182 | } 183 | -------------------------------------------------------------------------------- /joern/src/neo4j/traversals/readWriteDB/Traversals.java: -------------------------------------------------------------------------------- 1 | package neo4j.traversals.readWriteDB; 2 | 3 | import java.util.LinkedList; 4 | import java.util.List; 5 | 6 | import misc.Pair; 7 | import neo4j.readWriteDB.Neo4JDBInterface; 8 | 9 | import org.neo4j.graphdb.Direction; 10 | import org.neo4j.graphdb.Node; 11 | import org.neo4j.graphdb.NotFoundException; 12 | import org.neo4j.graphdb.Relationship; 13 | import org.neo4j.graphdb.index.IndexHits; 14 | 15 | import databaseNodes.EdgeTypes; 16 | import databaseNodes.NodeKeys; 17 | import ddg.DataDependenceGraph.DDG; 18 | 19 | public class Traversals 20 | { 21 | 22 | public static IndexHits getStatementsForFunction(Node funcNode) 23 | { 24 | String query = String.format("%s:True AND %s:%d", NodeKeys.IS_CFG_NODE, 25 | NodeKeys.FUNCTION_ID, funcNode.getId()); 26 | 27 | return Neo4JDBInterface.queryIndex(query); 28 | } 29 | 30 | public static List> getSymbolsDefinedByStatement( 31 | Long statementId) 32 | { 33 | Node node = Neo4JDBInterface.getNodeById(statementId); 34 | return getIdAndCodeOfChildrenConnectedBy(node, "DEF"); 35 | } 36 | 37 | public static List> getSymbolsUsedByStatement( 38 | Long statementId) 39 | { 40 | Node node = Neo4JDBInterface.getNodeById(statementId); 41 | return getIdAndCodeOfChildrenConnectedBy(node, "USE"); 42 | } 43 | 44 | public static List getCodeOfChildrenConnectedBy(Node node, 45 | String edgeType) 46 | { 47 | List retval = new LinkedList(); 48 | 49 | List children = getChildrenConnectedBy(node, edgeType); 50 | for (Node childNode : children) 51 | { 52 | String childCode = childNode.getProperty(NodeKeys.CODE).toString(); 53 | retval.add(childCode); 54 | } 55 | return retval; 56 | } 57 | 58 | public static List> getIdAndCodeOfChildrenConnectedBy( 59 | Node node, String edgeType) 60 | { 61 | List> retval = new LinkedList>(); 62 | List children = getChildrenConnectedBy(node, edgeType); 63 | 64 | for (Node childNode : children) 65 | { 66 | String childCode = childNode.getProperty(NodeKeys.CODE).toString(); 67 | Pair pair = new Pair(childNode.getId(), 68 | childCode); 69 | retval.add(pair); 70 | } 71 | 72 | return retval; 73 | } 74 | 75 | public static List getChildrenConnectedBy(Node node, String edgeType) 76 | { 77 | List retval = new LinkedList(); 78 | 79 | long nodeId = node.getId(); 80 | 81 | Iterable rels = node.getRelationships(); 82 | for (Relationship rel : rels) 83 | { 84 | if (!rel.getType().name().equals(edgeType)) 85 | continue; 86 | Node childNode = rel.getEndNode(); 87 | if (childNode.getId() == nodeId) 88 | continue; 89 | 90 | retval.add(childNode); 91 | } 92 | return retval; 93 | } 94 | 95 | public static List getParentsConnectedBy(Node node, String edgeType) 96 | { 97 | List retval = new LinkedList(); 98 | 99 | long nodeId = node.getId(); 100 | 101 | Iterable rels = node.getRelationships(); 102 | for (Relationship rel : rels) 103 | { 104 | if (!rel.getType().name().equals(edgeType)) 105 | continue; 106 | Node parentNode = rel.getStartNode(); 107 | if (parentNode.getId() == nodeId) 108 | continue; 109 | 110 | retval.add(parentNode); 111 | } 112 | return retval; 113 | } 114 | 115 | public static List getCallsTo(String source) 116 | { 117 | List retval = new LinkedList(); 118 | 119 | // JANNIK 120 | String my = source; 121 | my = my.replace("*", "\\*"); 122 | my = my.replace("(", "\\("); 123 | my = my.replace(")", "\\)"); 124 | my = my.replace("-", "\\-"); 125 | my = my.replace(" ", "\\ "); 126 | 127 | String query = String.format("%s:Callee AND %s:%s", NodeKeys.TYPE, NodeKeys.CODE, my); 128 | IndexHits hits = Neo4JDBInterface.queryIndex(query); 129 | for (Node n : hits) 130 | { 131 | List parents = getParentsConnectedBy(n, "IS_AST_PARENT"); 132 | retval.add(parents.get(0)); 133 | } 134 | return retval; 135 | } 136 | 137 | // public static List getCallsTo(String source) 138 | // { 139 | // List retval = new LinkedList(); 140 | // 141 | // String query = String.format("%s:CallExpression AND %s:%s" + "*", 142 | // NodeKeys.TYPE, NodeKeys.CODE, source); 143 | // IndexHits hits = Neo4JDBInterface.queryIndex(query); 144 | // for (Node n : hits) 145 | // { 146 | // if (n.getProperty(NodeKeys.CODE).toString() 147 | // .startsWith(source + " ")) 148 | // retval.add(n); 149 | // } 150 | // return retval; 151 | // } 152 | 153 | public static List getCallsToForFunction(String source, 154 | long functionId) 155 | { 156 | List retval = new LinkedList(); 157 | // JANNIK 158 | String my = source; 159 | my = my.replace("*", "\\*"); 160 | my = my.replace("(", "\\("); 161 | my = my.replace(")", "\\)"); 162 | my = my.replace("-", "\\-"); 163 | my = my.replace(" ", "\\ "); 164 | 165 | String query = String.format("%s:Callee AND %s:%s AND %s:%s", NodeKeys.TYPE, NodeKeys.FUNCTION_ID, functionId, NodeKeys.CODE, my); 166 | 167 | IndexHits hits = Neo4JDBInterface.queryIndex(query); 168 | for (Node n : hits) 169 | { 170 | List parents = getParentsConnectedBy(n, "IS_AST_PARENT"); 171 | retval.add(parents.get(0)); 172 | } 173 | return retval; 174 | } 175 | 176 | // public static List getCallsToForFunction(String source, 177 | // long functionId) 178 | // { 179 | // List retval = new LinkedList(); 180 | // 181 | // String query = String.format("%s:CallExpression AND %s:%d AND %s:%s" 182 | // + "*", NodeKeys.TYPE, NodeKeys.FUNCTION_ID, functionId, 183 | // NodeKeys.CODE, source); 184 | // 185 | // IndexHits hits = Neo4JDBInterface.queryIndex(query); 186 | // for (Node n : hits) 187 | // { 188 | // if (n.getProperty(NodeKeys.CODE).toString() 189 | // .startsWith(source + " ")) 190 | // retval.add(n); 191 | // } 192 | // return retval; 193 | // } 194 | 195 | public static DDG getDDGForFunction(Node funcNode) 196 | { 197 | DDG retval = new DDG(); 198 | for (Node statement : Traversals.getStatementsForFunction(funcNode)) 199 | { 200 | Iterable rels = statement 201 | .getRelationships(Direction.OUTGOING); 202 | long srcId = statement.getId(); 203 | 204 | for (Relationship rel : rels) 205 | { 206 | if (!rel.getType().toString().equals(EdgeTypes.REACHES)) 207 | continue; 208 | long dstId = rel.getEndNode().getId(); 209 | String symbol = rel.getProperty("var").toString(); 210 | retval.add(srcId, dstId, symbol); 211 | } 212 | 213 | } 214 | return retval; 215 | } 216 | 217 | // The two following functions are somewhat disgraceful 218 | // but should work for now. 219 | 220 | public static Node getStatementForASTNode(Node node) 221 | { 222 | Node n = node; 223 | Node parent = node; 224 | 225 | while (true) 226 | { 227 | 228 | try 229 | { 230 | Object property = n.getProperty(NodeKeys.IS_CFG_NODE); 231 | return n; 232 | } 233 | catch (NotFoundException ex) 234 | { 235 | 236 | } 237 | 238 | Iterable rels = n 239 | .getRelationships(Direction.INCOMING); 240 | for (Relationship rel : rels) 241 | { 242 | parent = rel.getStartNode(); 243 | break; 244 | } 245 | 246 | if (n == parent) 247 | return null; 248 | n = parent; 249 | } 250 | } 251 | 252 | public static String getNthArgCodeOfCall(Node callNode, int n) 253 | { 254 | String nStr = String.format("%d", n); 255 | 256 | Iterable rels = callNode 257 | .getRelationships(Direction.OUTGOING); 258 | for (Relationship rel : rels) 259 | { 260 | 261 | if (!rel.getType().toString().equals(EdgeTypes.IS_AST_PARENT)) 262 | continue; 263 | 264 | Node endNode = rel.getEndNode(); 265 | String childNum; 266 | 267 | try 268 | { 269 | childNum = (String) endNode.getProperty(NodeKeys.CHILD_NUMBER); 270 | } 271 | catch (RuntimeException ex) 272 | { 273 | childNum = "0"; 274 | } 275 | 276 | if (childNum.equals("1")) 277 | { 278 | // found argument list 279 | Node argList = rel.getEndNode(); 280 | Iterable rels2 = argList 281 | .getRelationships(Direction.OUTGOING); 282 | for (Relationship rel2 : rels2) 283 | { 284 | if (!rel2.getType().toString() 285 | .equals(EdgeTypes.IS_AST_PARENT)) 286 | continue; 287 | 288 | String childNum2; 289 | try 290 | { 291 | childNum2 = (String) rel2.getEndNode().getProperty( 292 | NodeKeys.CHILD_NUMBER); 293 | } 294 | catch (RuntimeException ex) 295 | { 296 | childNum2 = "0"; 297 | } 298 | 299 | if (childNum2.equals(nStr)) 300 | return rel2.getEndNode().getProperty(NodeKeys.CODE) 301 | .toString(); 302 | } 303 | } 304 | } 305 | return null; 306 | } 307 | 308 | public static Long getFunctionIdFromASTNode(Node astNode) 309 | { 310 | return Long.valueOf(astNode.getProperty(NodeKeys.FUNCTION_ID) 311 | .toString()); 312 | } 313 | 314 | public static IndexHits getFunctionsByName(String functionName) 315 | { 316 | return Neo4JDBInterface.queryIndex(NodeKeys.NAME + ":" + functionName); 317 | } 318 | 319 | public static Node getASTForStatement(Node statement) 320 | { 321 | return statement; 322 | } 323 | 324 | public static String getNodeType(Long nodeId) 325 | { 326 | Node node = Neo4JDBInterface.getNodeById(nodeId); 327 | return node.getProperty(NodeKeys.TYPE).toString(); 328 | 329 | } 330 | 331 | public static String getCalleeFromCall(Long nodeId) 332 | { 333 | Node node = Neo4JDBInterface.getNodeById(nodeId); 334 | Iterable rels = node.getRelationships(); 335 | for (Relationship rel : rels) 336 | { 337 | if (!rel.getType().name().equals(EdgeTypes.IS_AST_PARENT)) 338 | continue; 339 | 340 | Node endNode = rel.getEndNode(); 341 | 342 | if (endNode.getId() == node.getId()) 343 | continue; 344 | 345 | try 346 | { 347 | String childNumStr = (String) endNode 348 | .getProperty(NodeKeys.CHILD_NUMBER); 349 | if (childNumStr.equals("0")) 350 | return endNode.getProperty(NodeKeys.CODE).toString(); 351 | } 352 | catch (RuntimeException ex) 353 | { 354 | return endNode.getProperty(NodeKeys.CODE).toString(); 355 | } 356 | } 357 | return ""; 358 | } 359 | 360 | public static String getNodeCode(long nodeId) 361 | { 362 | Node node = Neo4JDBInterface.getNodeById(nodeId); 363 | return node.getProperty(NodeKeys.CODE).toString(); 364 | } 365 | 366 | public static String getOperatorCode(long nodeId) 367 | { 368 | Node node = Neo4JDBInterface.getNodeById(nodeId); 369 | return node.getProperty(NodeKeys.OPERATOR).toString(); 370 | } 371 | 372 | public static int getNodeChildNum(long nodeId) 373 | { 374 | Node node = Neo4JDBInterface.getNodeById(nodeId); 375 | String childNumStr = (String) node.getProperty(NodeKeys.CHILD_NUMBER, 376 | null); 377 | if (childNumStr == null) 378 | return 0; 379 | return Integer.parseInt(childNumStr); 380 | } 381 | 382 | } 383 | -------------------------------------------------------------------------------- /joern/src/tools/argumentTainter/ArgumentTainter.java: -------------------------------------------------------------------------------- 1 | package tools.argumentTainter; 2 | 3 | import java.util.Collection; 4 | import java.util.HashMap; 5 | import java.util.HashSet; 6 | import java.util.List; 7 | 8 | import neo4j.readWriteDB.Neo4JDBInterface; 9 | import neo4j.traversals.readWriteDB.Traversals; 10 | 11 | import org.neo4j.graphdb.Node; 12 | 13 | // Determine functions to patch and hand over 14 | // individual functions to FunctionPatcher 15 | 16 | public class ArgumentTainter 17 | { 18 | 19 | HashMap sourceCallsByFuncId; 20 | Collection functionsToPatch = new HashSet(); 21 | FunctionPatcher functionPatcher = new FunctionPatcher(); 22 | private String source; 23 | 24 | public void initialize(String databaseDir) 25 | { 26 | Neo4JDBInterface.setDatabaseDir(databaseDir); 27 | Neo4JDBInterface.openDatabase(); 28 | } 29 | 30 | public void setSourceToPatch(String sourceToPatch) 31 | { 32 | source = sourceToPatch; 33 | //System.out.println("sourceToPatch: " + sourceToPatch); 34 | functionPatcher.setSourceToPatch(sourceToPatch); 35 | } 36 | 37 | public void setArgToPatch(int taintedArg) 38 | { 39 | //System.out.println("argToPatch: " + taintedArg); 40 | functionPatcher.setArgumentToPatch(taintedArg); 41 | } 42 | 43 | public void patch() 44 | { 45 | determineFunctionsToPatch(source); 46 | //System.out.println("mr> set patched_functions"); 47 | //System.out.println("mr> end patched_functions"); 48 | 49 | for (Long funcId : functionsToPatch) 50 | { 51 | //System.out.println("\nmr> patched_functions patching function with id: " + funcId.toString()); 52 | patchFunction(funcId); 53 | 54 | } 55 | } 56 | 57 | private void determineFunctionsToPatch(String source) 58 | { 59 | List hits = Traversals.getCallsTo(source); 60 | for (Node callASTNode : hits) 61 | { 62 | Long functionId = Traversals.getFunctionIdFromASTNode(callASTNode); 63 | functionsToPatch.add(functionId); 64 | } 65 | 66 | } 67 | 68 | public void patchFunction(Long funcId) 69 | { 70 | functionPatcher.reset(); 71 | functionPatcher.patch(funcId); 72 | } 73 | 74 | public void shutdown() 75 | { 76 | Neo4JDBInterface.closeDatabase(); 77 | } 78 | 79 | } 80 | -------------------------------------------------------------------------------- /joern/src/tools/argumentTainter/ArgumentTainterMain.java: -------------------------------------------------------------------------------- 1 | package tools.argumentTainter; 2 | import java.util.Vector; 3 | import java.io.FileInputStream; 4 | import java.io.InputStreamReader; 5 | import java.io.BufferedReader; 6 | 7 | // Parse command line and hand over to to ArgumentTainter 8 | 9 | public class ArgumentTainterMain 10 | { 11 | static Vector sources = new Vector(); 12 | static Vector tainted_args = new Vector(); 13 | static String databaseDir; 14 | 15 | public static void main(String[] args) throws Exception 16 | { 17 | parseCommandLine(args); 18 | 19 | ArgumentTainter argumentTainter = new ArgumentTainter(); 20 | argumentTainter.initialize(databaseDir); 21 | 22 | for(int i=0; i \n"); 37 | System.out.println("[/] holds lines, which contain \n"); 38 | System.exit(1); 39 | } 40 | 41 | String file_name = args[0]; 42 | databaseDir = args[1]; 43 | while(databaseDir.endsWith("/") && databaseDir.length() > 0) 44 | { 45 | databaseDir = databaseDir.substring(0, databaseDir.length()-1); 46 | } 47 | 48 | if(!databaseDir.endsWith(".joernIndex")) 49 | { 50 | System.err.println("[-] DatabaseDir has to end with .joernIndex"); 51 | System.exit(0); 52 | } 53 | 54 | FileInputStream fstream = new FileInputStream(file_name); 55 | BufferedReader br = new BufferedReader(new InputStreamReader(fstream)); 56 | String line; 57 | while((line = br.readLine()) != null) 58 | { 59 | line = line.trim(); 60 | if(line.equals("")) 61 | { 62 | continue; 63 | } 64 | 65 | String[] splitters = line.split("\t"); 66 | if(splitters.length != 2) 67 | { 68 | System.err.println("[-] Expected two splitters per line"); 69 | System.exit(0); 70 | } 71 | sources.add(splitters[0]); 72 | tainted_args.add(Integer.parseInt(splitters[1])); 73 | } 74 | br.close(); 75 | } 76 | 77 | } // EOF class 78 | 79 | -------------------------------------------------------------------------------- /joern/src/tools/argumentTainter/CallsForFunction.java: -------------------------------------------------------------------------------- 1 | package tools.argumentTainter; 2 | 3 | import java.util.LinkedList; 4 | import java.util.List; 5 | 6 | public class CallsForFunction 7 | { 8 | 9 | List blocksCallingSource = new LinkedList(); 10 | List symbolsUsed = new LinkedList(); 11 | 12 | public CallsForFunction() 13 | { 14 | } 15 | }; 16 | -------------------------------------------------------------------------------- /joern/src/tools/argumentTainter/CommandLineInterface.java: -------------------------------------------------------------------------------- 1 | package tools.argumentTainter; 2 | 3 | import tools.UtilCommandLineInterface; 4 | 5 | public class CommandLineInterface extends UtilCommandLineInterface 6 | { 7 | 8 | public void printHelp() 9 | { 10 | 11 | } 12 | 13 | public String getSource() 14 | { 15 | return cmd.getArgs()[0]; 16 | } 17 | 18 | public int getArgNum() 19 | { 20 | return Integer.parseInt(cmd.getArgs()[1]); 21 | } 22 | 23 | public int getNumberOfArgs() 24 | { 25 | return cmd.getArgs().length; 26 | } 27 | 28 | public String getDatabaseDir() 29 | { 30 | if(getNumberOfArgs() > 2) 31 | { 32 | return cmd.getArgs()[2]; 33 | } 34 | else 35 | { 36 | return super.getDatabaseDir(); 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /joern/src/tools/argumentTainter/DDGPatcher.java: -------------------------------------------------------------------------------- 1 | package tools.argumentTainter; 2 | 3 | import java.util.HashMap; 4 | import java.util.List; 5 | import java.util.Map; 6 | 7 | import neo4j.readWriteDB.Neo4JDBInterface; 8 | import neo4j.traversals.readWriteDB.Traversals; 9 | 10 | import org.neo4j.graphdb.Direction; 11 | import org.neo4j.graphdb.DynamicRelationshipType; 12 | import org.neo4j.graphdb.Node; 13 | import org.neo4j.graphdb.Relationship; 14 | import org.neo4j.graphdb.RelationshipType; 15 | 16 | import databaseNodes.EdgeTypes; 17 | import ddg.DDGCreator; 18 | import ddg.DataDependenceGraph.DDG; 19 | import ddg.DataDependenceGraph.DDGDifference; 20 | import ddg.DataDependenceGraph.DefUseRelation; 21 | import ddg.DefUseCFG.DefUseCFG; 22 | 23 | public class DDGPatcher 24 | { 25 | 26 | DDGDifference diff; 27 | 28 | public void patchDDG(DefUseCFG defUseCFG, Long funcId) 29 | { 30 | Node node = Neo4JDBInterface.getNodeById(funcId); 31 | 32 | DDG oldDDG = Traversals.getDDGForFunction(node); 33 | DDGCreator ddgCreator = new DDGCreator(); 34 | DDG newDDG = ddgCreator.createForDefUseCFG(defUseCFG); 35 | 36 | diff = oldDDG.difference(newDDG); 37 | } 38 | 39 | public void writeChangesToDatabase() 40 | { 41 | removeOldEdges(diff); 42 | addNewEdges(diff); 43 | } 44 | 45 | private void addNewEdges(DDGDifference diff) 46 | { 47 | List relsToAdd = diff.getRelsToAdd(); 48 | for (DefUseRelation rel : relsToAdd) 49 | { 50 | 51 | Map properties = new HashMap(); 52 | properties.put("var", rel.symbol); 53 | RelationshipType relType = DynamicRelationshipType 54 | .withName(EdgeTypes.REACHES); 55 | 56 | Neo4JDBInterface.addRelationship((Long) rel.src, (Long) rel.dst, 57 | relType, properties, true); //JANNIK: added flag 58 | } 59 | } 60 | 61 | private void removeOldEdges(DDGDifference diff) 62 | { 63 | List relsToRemove = diff.getRelsToRemove(); 64 | 65 | for (DefUseRelation rel : relsToRemove) 66 | { 67 | Node srcStatement = Neo4JDBInterface.getNodeById((Long) rel.src); 68 | 69 | Iterable rels = srcStatement 70 | .getRelationships(Direction.OUTGOING); 71 | 72 | for (Relationship reachRel : rels) 73 | { 74 | if (!reachRel.getType().name().equals(EdgeTypes.REACHES)) 75 | continue; 76 | 77 | if (reachRel.getEndNode().getId() != (Long) rel.dst) 78 | continue; 79 | 80 | Object var = reachRel.getProperty("var"); 81 | if (var == null || !var.toString().equals(rel.symbol)) 82 | continue; 83 | 84 | Neo4JDBInterface.removeEdge(reachRel.getId()); 85 | } 86 | } 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /joern/src/tools/argumentTainter/DefUseCFGPatcher.java: -------------------------------------------------------------------------------- 1 | package tools.argumentTainter; 2 | 3 | import java.util.Collection; 4 | import java.util.HashMap; 5 | import java.util.LinkedList; 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | import neo4j.readWriteDB.Neo4JDBInterface; 10 | import neo4j.traversals.readWriteDB.Traversals; 11 | 12 | import org.neo4j.graphdb.DynamicRelationshipType; 13 | import org.neo4j.graphdb.Node; 14 | import org.neo4j.graphdb.RelationshipType; 15 | 16 | import udg.useDefAnalysis.ASTDefUseAnalyzer; 17 | import udg.useDefGraph.ReadWriteDbASTProvider; 18 | import udg.useDefGraph.UseOrDef; 19 | import databaseNodes.EdgeTypes; 20 | import ddg.DefUseCFG.DefUseCFG; 21 | 22 | public class DefUseCFGPatcher 23 | { 24 | 25 | List newlyAddedLinks = new LinkedList(); 26 | DefUseCFG defUseCFG; 27 | ASTDefUseAnalyzer astDefUseAnalyzer = new ASTDefUseAnalyzer(); 28 | 29 | public class DefUseLink 30 | { 31 | public DefUseLink(String aSymbol, Long aStatementId, boolean aIsDef) 32 | { 33 | symbol = aSymbol; 34 | statement = aStatementId; 35 | isDef = aIsDef; 36 | } 37 | 38 | public String symbol; 39 | public long statement; 40 | public boolean isDef; 41 | } 42 | 43 | static final Map EMPTY_PROPERTIES = new HashMap(); 44 | 45 | public void setSourceToPatch(String sourceToPatch, int argToPatch) 46 | { 47 | astDefUseAnalyzer.addTaintSource(sourceToPatch, argToPatch); 48 | } 49 | 50 | public Collection getDefUseLinksToAdd() 51 | { 52 | return newlyAddedLinks; 53 | } 54 | 55 | public void patchDefUseCFG(DefUseCFG defUseCFG, 56 | Collection statementsToPatch) 57 | { 58 | 59 | this.defUseCFG = defUseCFG; 60 | newlyAddedLinks.clear(); 61 | 62 | for (Node statement : statementsToPatch) 63 | { 64 | 65 | if(statement == null) continue; 66 | 67 | long statementId = statement.getId(); 68 | 69 | Node node = Traversals.getASTForStatement(statement); 70 | 71 | ReadWriteDbASTProvider astProvider = new ReadWriteDbASTProvider(); 72 | astProvider.setNodeId(node.getId()); 73 | 74 | Collection newDefs = astDefUseAnalyzer 75 | .analyzeAST(astProvider); 76 | 77 | Collection oldDefs = defUseCFG 78 | .getSymbolsDefinedBy(statementId); 79 | updateDefsToAdd(oldDefs, newDefs, statementId); 80 | 81 | } 82 | 83 | } 84 | 85 | private void updateDefsToAdd(Collection oldDefs, 86 | Collection newDefs, Long statementId) 87 | { 88 | for (UseOrDef newDef : newDefs) 89 | { 90 | if (oldDefs.contains(newDef.symbol)) 91 | continue; 92 | if (!newDef.isDef) 93 | continue; 94 | DefUseLink e = new DefUseLink(newDef.symbol, statementId, 95 | newDef.isDef); 96 | // add to newlyAddedLinks 97 | newlyAddedLinks.add(e); 98 | defUseCFG.addSymbolDefined(statementId, newDef.symbol); 99 | 100 | // Add def-links from AST nodes to symbols 101 | long nodeId = ((ReadWriteDbASTProvider) newDef.astProvider) 102 | .getNodeId(); 103 | if (statementId != nodeId) 104 | { 105 | DefUseLink e2 = new DefUseLink(newDef.symbol, nodeId, 106 | newDef.isDef); 107 | newlyAddedLinks.add(e2); 108 | defUseCFG.addSymbolDefined(nodeId, newDef.symbol); 109 | } 110 | 111 | } 112 | } 113 | 114 | public void writeChangesToDatabase() 115 | { 116 | if (defUseCFG == null) 117 | { 118 | return; 119 | } 120 | 121 | for (DefUseLink link : newlyAddedLinks) 122 | { 123 | Long fromId = link.statement; 124 | Long toId = (Long) defUseCFG.getIdForSymbol(link.symbol); 125 | 126 | if (toId == null) 127 | { 128 | Map properties = new HashMap(); 129 | Node statementNode = Neo4JDBInterface.getNodeById(link.statement); 130 | 131 | properties.put("functionId", statementNode.getProperty("functionId")); 132 | properties.put("type", "Symbol"); 133 | properties.put("code", link.symbol); 134 | 135 | Node symbolNode = Neo4JDBInterface.addNode(properties); 136 | toId = (Long) symbolNode.getId(); 137 | } 138 | 139 | RelationshipType relType = DynamicRelationshipType 140 | .withName(EdgeTypes.DEF); 141 | Neo4JDBInterface.addRelationship(fromId, toId, relType, null, true); //JANNIK: added flag 142 | } 143 | 144 | 145 | } 146 | 147 | } 148 | -------------------------------------------------------------------------------- /joern/src/tools/argumentTainter/FunctionPatcher.java: -------------------------------------------------------------------------------- 1 | package tools.argumentTainter; 2 | 3 | import java.util.Collection; 4 | import java.util.LinkedList; 5 | import java.util.List; 6 | 7 | import neo4j.readWriteDB.Neo4JDBInterface; 8 | import neo4j.traversals.readWriteDB.Traversals; 9 | 10 | import org.neo4j.graphdb.Node; 11 | 12 | import ddg.DefUseCFG.DefUseCFG; 13 | import ddg.DefUseCFG.DefUseCFGFactory; 14 | import ddg.DefUseCFG.ReadWriteDbFactory; 15 | 16 | public class FunctionPatcher 17 | { 18 | 19 | private DefUseCFGFactory defUseGraphFactory = new ReadWriteDbFactory(); 20 | private Collection statementsToPatch = new LinkedList(); 21 | private DefUseCFG defUseCFG = null; 22 | 23 | private String sourceToPatch; 24 | private int argumentToPatch; 25 | 26 | public void setSourceToPatch(String source) 27 | { 28 | sourceToPatch = source; 29 | } 30 | 31 | public void setArgumentToPatch(int argToPatch) 32 | { 33 | argumentToPatch = argToPatch; 34 | } 35 | 36 | public void reset() 37 | { 38 | statementsToPatch.clear(); 39 | defUseCFG = null; 40 | } 41 | 42 | public void patch(Long funcId) 43 | { 44 | System.out.print(funcId.toString()); 45 | System.out.print(" "); 46 | determineCallsToPatch(funcId); 47 | retrieveDefUseCFGFromDatabase(funcId); 48 | patchDefUseCFG(); 49 | patchDDG(funcId); 50 | } 51 | 52 | private void determineCallsToPatch(Long funcId) 53 | { 54 | List callNodes = Traversals.getCallsToForFunction(sourceToPatch, 55 | funcId); 56 | for (Node callNode : callNodes) 57 | { 58 | 59 | Node parent = Traversals.getStatementForASTNode(callNode); 60 | 61 | statementsToPatch.add(parent); 62 | } 63 | } 64 | 65 | private void retrieveDefUseCFGFromDatabase(long funcId) 66 | { 67 | defUseCFG = defUseGraphFactory.create(funcId); 68 | } 69 | 70 | private void patchDefUseCFG() 71 | { 72 | DefUseCFGPatcher patcher = new DefUseCFGPatcher(); 73 | patcher.setSourceToPatch(sourceToPatch, argumentToPatch); 74 | patcher.patchDefUseCFG(defUseCFG, statementsToPatch); 75 | patcher.writeChangesToDatabase(); 76 | } 77 | 78 | private void patchDDG(Long funcId) 79 | { 80 | DDGPatcher patcher = new DDGPatcher(); 81 | patcher.patchDDG(defUseCFG, funcId); 82 | patcher.writeChangesToDatabase(); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Arg_in_call_return_type.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | class Arg_in_call_return_type 4 | { 5 | public Boolean is_arg; 6 | public String func_name; 7 | public Long arg_index; 8 | public Long nof_args; 9 | 10 | public Arg_in_call_return_type(Boolean is_arg, String func_name, int arg_index, int nof_args) 11 | { 12 | this.is_arg = is_arg; 13 | this.func_name = func_name; 14 | this.arg_index = new Long(arg_index); 15 | this.nof_args = new Long(nof_args); 16 | } 17 | 18 | } 19 | 20 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Best_effort_topological_sort.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import java.util.HashMap; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | import java.util.LinkedList; 7 | import java.util.HashSet; 8 | import java.util.Set; 9 | 10 | import java.util.Iterator; 11 | 12 | class Best_effort_topological_sort> implements Iterable 13 | { 14 | private HashMap> data = null; 15 | private LinkedList cur = null; 16 | private Topological_sort t = null; 17 | private int data_size; 18 | private int cur_index; 19 | public Best_effort_topological_sort(HashMap> data) 20 | { 21 | this.data = new HashMap<>(data); 22 | this.cur = new LinkedList<>(); 23 | this.t = new Topological_sort(this.data); 24 | this.data_size = data.size(); 25 | this.cur_index = 0; 26 | } 27 | 28 | private void remove_dep(T circle_element) 29 | { 30 | T key = data.keySet().iterator().next(); 31 | data.put(circle_element, new HashSet()); 32 | } 33 | 34 | 35 | @Override 36 | public Iterator iterator() 37 | { 38 | Iterator it = new Iterator() 39 | { 40 | @Override 41 | public boolean hasNext() 42 | { 43 | return cur_index < data_size; 44 | } 45 | 46 | @Override 47 | public T next() 48 | { 49 | cur_index += 1; 50 | if(cur.size() != 0) 51 | { 52 | return cur.pollFirst(); 53 | } 54 | 55 | try 56 | { 57 | cur = new LinkedList<>(t.next_elements()); 58 | return next(); 59 | } 60 | catch(Circle_exception e) 61 | { 62 | remove_dep((T)(e.circle_element)); 63 | t = new Topological_sort(data); 64 | return next(); 65 | } 66 | } 67 | 68 | @Override 69 | public void remove() 70 | { 71 | throw new UnsupportedOperationException(); 72 | } 73 | }; 74 | return it; 75 | } 76 | 77 | public static void self_test() 78 | { 79 | HashMap> test_data = new HashMap<>(); 80 | test_data.put(new Long(1), new HashSet()); 81 | test_data.put(new Long(2), new HashSet()); 82 | test_data.put(new Long(3), new HashSet()); 83 | test_data.put(new Long(4), new HashSet()); 84 | 85 | test_data.get(new Long(1)).add(new Long(2)); 86 | test_data.get(new Long(3)).add(new Long(4)); 87 | test_data.get(new Long(4)).add(new Long(3)); 88 | 89 | Best_effort_topological_sort tp = new Best_effort_topological_sort(test_data); 90 | for(Iterator iter = tp.iterator(); iter.hasNext();) 91 | { 92 | // Should be 2, 1, 3, 4 or 2, 1, 4, 3 93 | Long it = iter.next(); 94 | System.out.println(it); 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Circle_exception.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | class Circle_exception extends Exception 4 | { 5 | public Object circle_element; 6 | 7 | public Circle_exception(Object circle_element) 8 | { 9 | this.circle_element = circle_element; 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Data_transfer.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import java.util.HashMap; 4 | import java.util.List; 5 | import java.util.ArrayList; 6 | 7 | class Data_transfer 8 | { 9 | // Key: Func name 10 | // Value: List of data-transfer pairs; each pair (i, [j]): arg i stems from args j 11 | // Values < 0: Function itself fills it... e.g. fread 12 | static HashMap>>> data_transfer = null; 13 | 14 | static 15 | { 16 | HashMap data = (HashMap)Pickle.load_from_file("data_trans.ser"); 17 | 18 | data_transfer = (HashMap>>>)data.get("data_trans"); 19 | 20 | // NOTE: Irrelevant, as init_glibc_data_trans does this 21 | // data_transfer["memcpy"] = [(0, [1])] 22 | // data_transfer["strcpy"] = [(0, [1])] 23 | // data_transfer["strncpy"] = [(0, [1])] 24 | // data_transfer["sprintf"] = [(0, [2, 3, 4, 5, 6, 7, 8, 9])] 25 | // data_transfer["snprintf"] = [(0, [3, 4, 5, 6, 7, 8, 9])] 26 | // data_transfer["strcat"] = [(0, [0, 1])] 27 | // data_transfer["sscanf"] = [(0, [2, 3, 4, 5, 6, 7, 8, 9])] 28 | // data_transfer["fread"] = [(0, [-1])] 29 | } 30 | 31 | 32 | 33 | public static HashMap>>> get_data_transfer_dict() 34 | { 35 | return data_transfer; 36 | } 37 | 38 | 39 | public static List get_data_transfer_for_argument(String func_name, Long ith_argument) throws Exception 40 | { 41 | List>> val = get_data_transfer(func_name); 42 | for(Pair> v : val) 43 | { 44 | if(v.first == ith_argument) 45 | { 46 | return v.second; 47 | } 48 | } 49 | throw new Exception("Argument " + ith_argument.toString() + " not found in list " + val.toString()); 50 | } 51 | 52 | 53 | public static List>> get_data_transfer(String func_name) throws Exception 54 | { 55 | if(!data_transfer.containsKey(func_name)) 56 | { 57 | throw new Exception("Function not found"); 58 | } 59 | return data_transfer.get(func_name); 60 | } 61 | 62 | 63 | public static Boolean data_transfer_has(String func_name) 64 | { 65 | return data_transfer.containsKey(func_name); 66 | } 67 | 68 | 69 | public static Long data_transfer_sets_arg(String func_name, Long ith_argument) 70 | { 71 | if(!data_transfer_has(func_name)) 72 | { 73 | return new Long(0); 74 | } 75 | 76 | try 77 | { 78 | get_data_transfer_for_argument(func_name, ith_argument); 79 | return new Long(1); 80 | } 81 | catch(Exception e) 82 | { 83 | return new Long(-1); 84 | } 85 | } 86 | } 87 | 88 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Def_tree.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import java.util.List; 4 | import java.util.ArrayList; 5 | 6 | class Def_tree 7 | { 8 | public Long node_id; 9 | public Object var_name; 10 | public Def_tree parent; 11 | public Boolean continue_at_self; 12 | public List call_stack; 13 | public List sub_trees; 14 | 15 | public Def_tree(Long node_id, Object var_name, Def_tree parent, Boolean continue_at_self, List call_stack) 16 | { 17 | this.node_id = node_id; 18 | this.var_name = var_name; 19 | this.parent = parent; 20 | this.continue_at_self = continue_at_self; 21 | this.call_stack = call_stack; 22 | 23 | this.sub_trees = new ArrayList<>(); 24 | } 25 | 26 | public Def_tree(Long node_id, Object var_name, Def_tree parent, Boolean continue_at_self) 27 | { 28 | this(node_id, var_name, parent, continue_at_self, new ArrayList()); 29 | } 30 | 31 | public Def_tree(Long node_id, Object var_name, Def_tree parent) 32 | { 33 | this(node_id, var_name, parent, false, new ArrayList()); 34 | } 35 | 36 | 37 | public void add_sub_tree(Def_tree s) 38 | { 39 | this.sub_trees.add(s); 40 | } 41 | 42 | public List> path_to_root() 43 | { 44 | List> path = new ArrayList<>(); 45 | if(parent != null) 46 | { 47 | path = parent.path_to_root(); 48 | } 49 | path.add(new Pair(node_id, var_name)); 50 | return path; 51 | } 52 | 53 | public String toString() 54 | { 55 | String s = "[("; 56 | 57 | s += node_id.toString(); 58 | s += ", "; 59 | s += var_name.toString(); 60 | if(call_stack.size() != 0) 61 | { 62 | s += ", "; 63 | s += call_stack.toString(); 64 | } 65 | s += "), ["; 66 | for(Def_tree it : sub_trees) 67 | { 68 | s += it.toString(); 69 | } 70 | s += "]]"; 71 | return s; 72 | } 73 | } // EOF class 74 | 75 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Find_all_function_pointers.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.HashSet; 6 | import java.util.HashMap; 7 | import java.util.Set; 8 | 9 | import java.nio.file.Paths; 10 | import java.nio.file.Files; 11 | 12 | import org.neo4j.graphdb.Node; 13 | 14 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Vertex; 15 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Edge; 16 | import com.tinkerpop.pipes.PipeFunction; 17 | 18 | class Find_all_function_pointers 19 | { 20 | private static HashSet externally_defined_functions = null; 21 | public static void fill_externally_defined_functions(Joern_db joern_db) 22 | { 23 | // completeType extern void ( ) 24 | // baseType extern void 25 | // type Decl 26 | // identifier extern_func 27 | if(externally_defined_functions != null) return; 28 | 29 | externally_defined_functions = new HashSet<>(); 30 | // decls = joern_db.runGremlinQuery("queryNodeIndex('type:Decl').filter{it.completeType.startsWith('extern') && it.completeType.endsWith(')')}.identifier") 31 | // decls = joern_db.runGremlinQuery("g.V.filter{it.type == 'Decl' && it.completeType.startsWith('extern') && it.completeType.endsWith(')')}.identifier") 32 | 33 | List decl_nodes = Joern_db.queryNodeIndex("type:Decl"); 34 | for(Node d : decl_nodes) 35 | { 36 | String completeType = (String)d.getProperty("completeType"); 37 | if(completeType.startsWith("extern") && completeType.endsWith(")")) 38 | { 39 | String identifier = (String)d.getProperty("identifier"); 40 | externally_defined_functions.add(identifier); 41 | } 42 | } 43 | } 44 | 45 | 46 | public static Boolean func_definition_exists(Joern_db joern_db, String func_name) 47 | { 48 | fill_externally_defined_functions(joern_db); 49 | return externally_defined_functions.contains(func_name); 50 | } 51 | 52 | 53 | private static HashSet glibc_function_names = null; 54 | public static void fill_glibc_function_names() throws Exception 55 | { 56 | if(glibc_function_names != null) return; 57 | 58 | String data = new String(Files.readAllBytes(Paths.get("glibc_function_names.txt"))); 59 | String[] lines = data.split("\n", -1); 60 | for(int i=0, i_end = lines.length; i(); 66 | for(String l : lines) 67 | { 68 | if(l.equals("")) continue; 69 | glibc_function_names.add(l); 70 | } 71 | } 72 | 73 | 74 | private static ArrayList assign_expression_codes = null; 75 | public static Boolean is_function_pointer(Joern_db joern_db, String callee_code) throws Exception 76 | { 77 | fill_glibc_function_names(); 78 | 79 | if(glibc_function_names.contains(callee_code)) return false; 80 | 81 | // starts with * ( 82 | if(callee_code.startsWith("* (")) return true; 83 | 84 | // uses a field 85 | if(callee_code.contains(" -> ") || callee_code.contains(" . ")) return true; 86 | 87 | // there exists no func-declaration for it 88 | List func_ids = Joern_db.get_function_ids_by_name(callee_code); 89 | if(func_ids.size() != 0) return false; 90 | 91 | // there exists no func-DEFINITION for it 92 | if(!func_definition_exists(joern_db, callee_code)) return false; 93 | 94 | // there an assign to it 95 | if(assign_expression_codes == null) 96 | { 97 | List assign_expr = Joern_db.queryNodeIndex("type:AssignmentExpr"); 98 | assign_expression_codes = new ArrayList<>(); 99 | for(Node n : assign_expr) 100 | { 101 | assign_expression_codes.add((String)n.getProperty("code")); 102 | } 103 | } 104 | 105 | boolean assigned = false; 106 | for(String a : assign_expression_codes) 107 | { 108 | if(a.contains(callee_code + " =")) 109 | { 110 | assigned = true; 111 | break; 112 | } 113 | } 114 | if(assigned) return true; 115 | 116 | // assigned = joern_db.runGremlinQuery("g.V.filter{it.type == 'AssignmentExpr'}.filter{-1 != it.code.indexOf(' %s = ')}" % callee_code) 117 | // print "assigned:", assigned 118 | // if(len(assigned) != 0): 119 | // return True 120 | 121 | return false; 122 | } 123 | 124 | 125 | public static HashMap find_all_function_pointers(Joern_db joern_db) throws Exception 126 | { 127 | List callees = joern_db.queryNodeIndex("type:Callee"); 128 | HashMap > callee_codes = new HashMap<>(); 129 | for(Node c : callees) 130 | { 131 | Long c_id = c.getId(); 132 | String code = (String)c.getProperty("code"); 133 | if(!callee_codes.containsKey(code)) 134 | { 135 | callee_codes.put(code, new HashSet()); 136 | } 137 | callee_codes.get(code).add(c_id); 138 | } 139 | 140 | HashMap func_ptrs = new HashMap<>(); 141 | for(String code : callee_codes.keySet()) 142 | { 143 | if(!is_function_pointer(joern_db, code)) continue; 144 | 145 | // print code, "->", list(callee_codes[code]) 146 | for(Long c_id : callee_codes.get(code)) 147 | { 148 | // id -> Callee -> parents() = CallExpression -> children() ArgumentList -> children(), len 149 | List args = Pipeline.v(c_id).parents().children().has("type", "ArgumentList").children().to_list(); 150 | func_ptrs.put(c_id, new Long(args.size())); 151 | // func_ptrs[code] = len(args) 152 | } 153 | } 154 | // print "len(func_ptrs)", len(func_ptrs) 155 | return func_ptrs; 156 | } 157 | 158 | 159 | 160 | public static HashSet find_all_func_names(Joern_db joern_db) throws Exception 161 | { 162 | fill_glibc_function_names(); 163 | fill_externally_defined_functions(joern_db); 164 | 165 | HashSet all_funcs = new HashSet(glibc_function_names); 166 | for(String it : externally_defined_functions) all_funcs.add(it); 167 | 168 | // get all function-defs, extract their names 169 | List func_defs = joern_db.queryNodeIndex("type:FunctionDef"); 170 | for(Node n : func_defs) 171 | { 172 | String code = (String)n.getProperty("code"); 173 | String[] splitters = code.split(" ", -1); 174 | all_funcs.add(splitters[0]); 175 | } 176 | 177 | return all_funcs; 178 | } 179 | 180 | 181 | public static HashSet all_used_func_names(Joern_db joern_db) throws Exception 182 | { 183 | HashSet all_funcs = find_all_func_names(joern_db); 184 | 185 | HashSet used = new HashSet<>(); 186 | 187 | // uses = joern_db.runGremlinQuery("g.V.uses()") 188 | List uses = Joern_db.queryNodeIndex("type:Symbol"); 189 | HashSet checked_already = new HashSet<>(); 190 | for(Node u : uses) 191 | { 192 | Long id = (Long)u.getId(); 193 | if(checked_already.contains(id)) 194 | { 195 | continue; 196 | } 197 | 198 | checked_already.add(id); 199 | String code = (String)u.getProperty("code"); 200 | if(all_funcs.contains(code)) 201 | { 202 | List context_list = Pipeline.v(id).in("USE").to_list(); 203 | if(context_list.isEmpty()) 204 | { 205 | continue; 206 | } 207 | String context = (String)context_list.get(0).getProperty("code"); 208 | if(context.contains(code + " (")) 209 | { 210 | used.add(code); 211 | } 212 | } 213 | } 214 | return used; 215 | 216 | // assigned = set() 217 | // assign_codes = joern_db.runGremlinQuery("g.V.filter{it.type == 'AssignmentExpr'}.code") 218 | // for a in assign_codes: 219 | // a = a.replace(")", "") # func-ptr may be casted 220 | // a = a.split(" ")[-1] # saves endswith-operations, allows "in" 221 | // if(a in all_funcs): 222 | // assigned.add(a) 223 | // 224 | // return assigned 225 | } 226 | 227 | 228 | public static HashSet get_nof_arguments_of_func(Joern_db joern_db, String func_name) 229 | { 230 | HashSet nof_args = new HashSet<>(); 231 | List func_ids = Joern_db.get_function_ids_by_name(func_name); 232 | for(Long f_id : func_ids) 233 | { 234 | // print "f_id:", f_id 235 | // FunctionDef -> ParameterList -> children -> len 236 | List args = Pipeline.v(f_id).functionToAST().children().has("type", "ParameterList").children().to_list(); 237 | if(args.size() > 0) 238 | { 239 | if(args.get(args.size()-1).getProperty("code").equals("...")) 240 | { 241 | for(Long i = new Long(args.size()-1); i<=10; ++i) 242 | { 243 | nof_args.add(i); 244 | } 245 | } 246 | else 247 | { 248 | nof_args.add(new Long(args.size())); 249 | } 250 | } 251 | } 252 | 253 | // if there were funcs, but no args => assume length-0 parameter list 254 | if(nof_args.size() == 0 && func_ids.size() > 0) 255 | { 256 | nof_args.add(new Long(0)); 257 | } 258 | 259 | if(nof_args.size() == 0) 260 | { 261 | // Fallback: if we cant find something, allow everything 262 | for(Long i = new Long(0); i<=10; ++i) 263 | { 264 | nof_args.add(i); 265 | } 266 | } 267 | return nof_args; 268 | } 269 | 270 | 271 | public static HashMap > get_func_ptr_candidates(Joern_db joern_db) throws Exception 272 | { 273 | HashMap func_ptrs = find_all_function_pointers(joern_db); 274 | // print "func_ptrs:", func_ptrs 275 | 276 | HashSet used = all_used_func_names(joern_db); 277 | Long of = new Long(used.size()); 278 | Long counter = new Long(0); 279 | HashMap > funcs_using_nof_args = new HashMap<>(); 280 | for(String u : used) 281 | { 282 | ++counter; 283 | System.out.println(counter.toString() + " of " + of.toString() + ": " + u); 284 | System.out.flush(); 285 | HashSet nof_args = get_nof_arguments_of_func(joern_db, u); 286 | for(Long n : nof_args) 287 | { 288 | if(!funcs_using_nof_args.containsKey(n)) 289 | { 290 | funcs_using_nof_args.put(n, new HashSet()); 291 | } 292 | funcs_using_nof_args.get(n).add(u); 293 | } 294 | // print u, list(nof_args) 295 | } 296 | // print funcs_using_nof_args 297 | 298 | 299 | HashMap > func_ptr_candidates = new HashMap<>(); 300 | for(Long callee : func_ptrs.keySet()) 301 | { 302 | func_ptr_candidates.put(callee, new HashSet()); 303 | for(String candidate : funcs_using_nof_args.get(func_ptrs.get(callee))) 304 | { 305 | func_ptr_candidates.get(callee).add(candidate); 306 | } 307 | } 308 | // print "func_ptr_candidates:", func_ptr_candidates 309 | return func_ptr_candidates; 310 | } 311 | 312 | 313 | public static void main(String[] args) throws Exception 314 | { 315 | Joern_db joern_db = new Joern_db(); 316 | joern_db.initialize(); 317 | 318 | // ENSURE follow_field.c is loaded in Database 319 | System.out.println(get_func_ptr_candidates(joern_db).toString()); 320 | } 321 | } 322 | 323 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Find_control_flow_paths.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.HashSet; 6 | import java.util.HashMap; 7 | import java.util.Set; 8 | import java.util.Collections; 9 | 10 | import org.neo4j.graphdb.Node; 11 | 12 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Vertex; 13 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Edge; 14 | import com.tinkerpop.pipes.PipeFunction; 15 | import com.tinkerpop.pipes.branch.LoopPipe.LoopBundle; 16 | 17 | class Find_control_flow_paths 18 | { 19 | public static Boolean is_loop_free(List path) 20 | { 21 | // Helper-function for find_all_paths 22 | HashSet as_set = new HashSet<>(path); 23 | return path.size() == as_set.size(); 24 | } 25 | 26 | 27 | // .loop("similar", new PipeFunction, Boolean>() { 28 | // @Override 29 | // public Boolean compute(LoopBundle bundle) { 30 | // return bundle.getLoops() < 4 && bundle.getObject() != v2; 31 | // } 32 | // }) 33 | 34 | 35 | // List edges = Pipeline.Vs().outE("REACHES").property("var").filter( 36 | //new PipeFunction() 37 | // { 38 | // public Boolean compute(String it) {return it.indexOf('.') != -1;} 39 | // } 40 | 41 | 42 | // Find all paths of length between two nodes, using only edges. 43 | // public static find_all_paths(joern_db, from_id, to_id, length=6, edge_type="CFG_EDGE"): 44 | public static List> find_all_paths(Joern_db joern_db, final Long from_id, final Long to_id, final Long length, String edge_type) 45 | { 46 | // s = "g.v({0:s}).out({3:s}).loop(1){{it.loops<={2:s} && !(it.object.id in [{0:s},{1:s}])}}.filter{{it.id=={1:s}}}.path" 47 | // s = s.format(str(from_id), str(to_id), str(length), edge_type) 48 | List> node_paths = Pipeline.v(from_id).out(edge_type).loop(1, 49 | new PipeFunction, Boolean>() 50 | { 51 | @Override 52 | public Boolean compute(LoopBundle bundle) 53 | { 54 | Long id = bundle.getObject().getId(); 55 | return bundle.getLoops() < length && id != from_id && id != to_id; 56 | } 57 | }).has("id", to_id).path().toList(); 58 | 59 | List> paths = new ArrayList<>(); 60 | for(List p : node_paths) 61 | { 62 | List cur_path = new ArrayList<>(); 63 | for(Neo4j2Vertex n : p) 64 | { 65 | cur_path.add(n.getRawVertex().getId()); 66 | } 67 | paths.add(cur_path); 68 | } 69 | 70 | List> loop_free_paths = new ArrayList<>(); 71 | for(List p : paths) 72 | { 73 | if(is_loop_free(p)) 74 | { 75 | loop_free_paths.add(p); 76 | } 77 | } 78 | 79 | return paths; 80 | } 81 | 82 | public static List> find_all_paths(Joern_db joern_db, Long from_id, Long to_id) 83 | { 84 | return find_all_paths(joern_db, from_id, to_id, new Long(6), "CFG_EDGE"); 85 | } 86 | 87 | public static List> find_all_paths(Joern_db joern_db, Long from_id, Long to_id, Long length) 88 | { 89 | return find_all_paths(joern_db, from_id, to_id, length, "CFG_EDGE"); 90 | } 91 | 92 | // Front-end to the "find_all_paths"-function. 93 | // Calls it for a range of integers and returns the list of all found paths 94 | public static List> find_all_paths_in_range(Joern_db joern_db, Long from_id, Long to_id, Long min_length, Long max_length, String edge_type) throws Exception 95 | { 96 | if(min_length < 1) throw new Exception("min_length has to be >= 1"); 97 | 98 | List> all_paths = new ArrayList<>(); 99 | for(Long i = min_length; i> paths = find_all_paths(joern_db, from_id, to_id, i, edge_type); 102 | for(List p : paths) 103 | { 104 | all_paths.add(p); 105 | } 106 | } 107 | return all_paths; 108 | } 109 | 110 | public static List> find_all_paths_in_range(Joern_db joern_db, Long from_id, Long to_id, Long min_length, Long max_length) throws Exception 111 | { 112 | return find_all_paths_in_range(joern_db, from_id, to_id, min_length, max_length, "CFG_EDGE"); 113 | } 114 | 115 | public static List> find_all_paths_in_range(Joern_db joern_db, Long from_id, Long to_id, Long min_length) throws Exception 116 | { 117 | return find_all_paths_in_range(joern_db, from_id, to_id, min_length, new Long(6), "CFG_EDGE"); 118 | } 119 | 120 | public static List> find_all_paths_in_range(Joern_db joern_db, Long from_id, Long to_id) throws Exception 121 | { 122 | return find_all_paths_in_range(joern_db, from_id, to_id, new Long(1), new Long(6), "CFG_EDGE"); 123 | } 124 | 125 | 126 | //def tree_like_path_append(all_paths, paths): 127 | // if(all_paths == []): 128 | // return paths[:] 129 | // 130 | // new_paths = [] 131 | // for a in all_paths: 132 | // for p in paths: 133 | // new_paths.append(a + p) 134 | // return new_paths 135 | 136 | //# Find the indices in path, where path[i-1] == path[i] 137 | public static > List find_double_element_indices(List path) 138 | { 139 | List ret = new ArrayList<>(); 140 | for(int i = 1, i_end = path.size(); i with an index occurring in is removed. 151 | // To that end, the indices are sorted and elements are removed from the end. 152 | public static > List remove_indices(List the_list, List indices) 153 | { 154 | List ret = new ArrayList(the_list); 155 | Collections.sort(indices); 156 | for(int i=indices.size()-1; i>=0; --i) 157 | { 158 | ret.remove(indices.get(i).intValue()); 159 | } 160 | return ret; 161 | } 162 | 163 | 164 | public static > List remove_double_elements(List path) 165 | { 166 | List indices = find_double_element_indices(path); 167 | return remove_indices(path, indices); 168 | } 169 | 170 | // Collect the CFG-member-parents for each node in the path 171 | // Assumption: The first node is run-time wise also the first 172 | public static List find_cfg_parents(Joern_db joern_db, List path) throws Exception 173 | { 174 | List cfg_parents = new ArrayList<>(); 175 | cfg_parents.add(Joern_db.find_reached_parent(path.get(0), "CFG_EDGE", true)); 176 | for(int i=1, i_end = path.size(); i>> find_all_cfg_connections(Joern_db joern_db, List cfg_parents, HashMap> func_id_call_graph) throws Exception 185 | { 186 | // Get the function-ids for each member in a path 187 | List function_ids = new ArrayList<>(); 188 | for(Long cfg_parent : cfg_parents) 189 | { 190 | function_ids.add(Joern_db.get_function_id(cfg_parent)); 191 | } 192 | // print "function_ids:", function_ids 193 | 194 | // print "func_id_call_graph:", func_id_call_graph 195 | 196 | List>> path_chain = new ArrayList<>(); 197 | List> paths = new ArrayList<>(); 198 | 199 | for(Long i = new Long(1), i_end = new Long(cfg_parents.size()); i" + to_id.toString()); 206 | if(from_id == to_id) 207 | { 208 | throw new Exception("from_id == to_id... Should not happen"); 209 | } 210 | else if(function_ids.get(from_index.intValue()) != function_ids.get(to_index.intValue())) 211 | { 212 | // print "function_jump", from_id, "->", to_id 213 | Boolean from_calls_to = func_id_call_graph.get(function_ids.get(from_index.intValue()).intValue()).contains(function_ids.get(to_index.intValue())); 214 | Boolean to_calls_from = func_id_call_graph.get(function_ids.get(to_index.intValue()).intValue()).contains(function_ids.get(from_index.intValue())); 215 | // print "from_calls_to:", from_calls_to 216 | // print "to_calls_from:", to_calls_from 217 | 218 | if(from_calls_to && to_calls_from) 219 | { 220 | // Note: In this case, there is a circular dependency. 221 | // This should happend mostly for recursive functions, 222 | // where there is no function-jump in the first place. 223 | throw new Exception("Cannot decide, which function called which"); 224 | } 225 | else if(from_calls_to && !to_calls_from) 226 | { 227 | //print "from-func calls to-func => find CFG-Entry in to-func" 228 | Long cfg_entry_id = ((Node)(Pipeline.v(function_ids.get(to_index.intValue())).out("IS_FUNCTION_OF_CFG").toList().get(0))).getId(); 229 | // print "cfg_entry_id:", cfg_entry_id 230 | paths = find_all_paths_in_range(joern_db, cfg_entry_id, to_id, new Long(1), new Long(30)); 231 | for(int j=0, j_end=paths.size(); j find CFG-Exits in from-func" 240 | List exit_nodes = Joern_db.queryNodeIndex("type:CFGExitNode AND functionId:" + function_ids.get(from_index.intValue()).toString()); 241 | for(Node e : exit_nodes) 242 | { 243 | List> this_paths = find_all_paths_in_range(joern_db, from_id, e.getId(), new Long(1), new Long(30)); 244 | for(int j=0, j_end=this_paths.size(); j p : this_paths) 250 | { 251 | paths.add(p); 252 | } 253 | } 254 | } 255 | else // no calls 256 | { 257 | throw new Exception("No calls found - should not happen"); 258 | } 259 | } 260 | else 261 | { 262 | paths = find_all_paths_in_range(joern_db, from_id, to_id, new Long(1), new Long(30)); 263 | // print "paths:", paths 264 | // all_paths = tree_like_path_append(all_paths, paths) 265 | } 266 | 267 | for(int j=0, j_end=paths.size(); j find_var_name_overarch(List path, List cfg_parents, List var_names) 287 | { 288 | List overarch = new ArrayList<>(); 289 | int data_index = 1; 290 | int cfg_index = 1; 291 | 292 | while(cfg_index < path.size()) 293 | { 294 | while(path.get(cfg_index) != cfg_parents.get(data_index)) 295 | { 296 | overarch.add(var_names.get(data_index-1)); 297 | cfg_index += 1; 298 | } 299 | overarch.add(var_names.get(data_index-1)); 300 | data_index += 1; 301 | cfg_index += 1; 302 | } 303 | return overarch; 304 | } 305 | 306 | 307 | public static Long count_paths(List>> path_chain) 308 | { 309 | Long counter = new Long(1); 310 | for(List> paths : path_chain) 311 | { 312 | counter *= paths.size(); 313 | } 314 | return counter; 315 | } 316 | 317 | 318 | public static List get_ith_path(List>> path_chain, int ith_path) 319 | { 320 | List place_values = new ArrayList<>(); 321 | place_values.add(new Long(1)); 322 | 323 | for(int i=1, i_end=path_chain.size(); i indices = new ArrayList<>(path_chain.size()); 332 | Long counter = new Long(ith_path); 333 | for(int i=0, i_end=path_chain.size(); i= place_values.get(i)) 336 | { 337 | Long modulo = counter % place_values.get(i); 338 | Long divisible = (counter - modulo); 339 | indices.set(i, indices.get(i) + divisible / place_values.get(i)); 340 | counter = modulo; 341 | } 342 | } 343 | 344 | List path = new ArrayList<>(); 345 | for(int i =0, i_end=indices.size(); i to_list(int... args) 359 | { 360 | List ret = new ArrayList<>(); 361 | for(int arg : args) 362 | { 363 | ret.add(new Long(arg)); 364 | } 365 | return ret; 366 | } 367 | 368 | 369 | public static void get_ith_path() 370 | { 371 | List> one_two = new ArrayList<>(); 372 | one_two.add(to_list(1)); 373 | one_two.add(to_list(2)); 374 | 375 | 376 | List> three_four_five = new ArrayList<>(); 377 | three_four_five.add(to_list(3)); 378 | three_four_five.add(to_list(4)); 379 | three_four_five.add(to_list(5)); 380 | 381 | List>> path_chain = new ArrayList<>(); 382 | path_chain.add(one_two); 383 | path_chain.add(three_four_five); 384 | 385 | Long chain_len = count_paths(path_chain); 386 | System.out.println("chain_len: " + chain_len.toString()); 387 | assert 6 == chain_len; 388 | 389 | List path = null; 390 | path = get_ith_path(path_chain, 0); 391 | System.out.println("path: " + path.toString()); 392 | assert path.equals(to_list(1, 3)); 393 | 394 | path = get_ith_path(path_chain, 1); 395 | System.out.println("path: " + path.toString()); 396 | assert path.equals(to_list(1, 4)); 397 | 398 | path = get_ith_path(path_chain, 2); 399 | System.out.println("path: " + path.toString()); 400 | assert path.equals(to_list(1, 5)); 401 | 402 | path = get_ith_path(path_chain, 3); 403 | System.out.println("path: " + path.toString()); 404 | assert path.equals(to_list(2, 3)); 405 | 406 | path = get_ith_path(path_chain, 4); 407 | System.out.println("path: " + path.toString()); 408 | assert path.equals(to_list(2, 4)); 409 | 410 | path = get_ith_path(path_chain, 5); 411 | System.out.println("path: " + path.toString()); 412 | assert path.equals(to_list(2, 5)); 413 | } 414 | } // EOF class 415 | 416 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Get_call_graph.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import java.util.Arrays; 4 | import java.util.Collection; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | import java.util.HashSet; 8 | import java.util.HashMap; 9 | import java.util.Set; 10 | 11 | import org.neo4j.graphdb.Node; 12 | 13 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Vertex; 14 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Edge; 15 | import com.tinkerpop.pipes.PipeFunction; 16 | 17 | 18 | class Get_call_graph 19 | { 20 | // Compute the func-id call graph. 21 | // Key: func-id 22 | // Value: set of callee_func-id 23 | public static HashMap> get_func_id_call_graph(Joern_db joern_db) 24 | { 25 | HashMap> cg = new HashMap<>(); 26 | List funcs = Joern_db.queryNodeIndex("type:Function"); 27 | List func_ids = new ArrayList<>(); 28 | for(Node f : funcs) func_ids.add(f.getId()); 29 | // funcs = joern_db.runGremlinQuery("g.V.filter{it.type == 'Function'}.id") 30 | 31 | for(Long f : func_ids) 32 | { 33 | HashSet calls = new HashSet<>(); 34 | List callees = Pipeline.v(f).functionToStatements().has("type", "CallExpression").callToCallee().to_list(); 35 | 36 | for(Node callee : callees) 37 | { 38 | Long called_func_id = Joern_db.get_function_id_by_name((String)(callee.getProperty("code"))); 39 | if(called_func_id != -1) 40 | { 41 | calls.add(called_func_id); 42 | } 43 | } 44 | 45 | if(!cg.containsKey(f)) 46 | { 47 | cg.put(f, new HashSet()); 48 | } 49 | 50 | for(Long c : calls) 51 | { 52 | cg.get(f).add(c); 53 | } 54 | } 55 | return cg; 56 | } 57 | 58 | 59 | private static String join(Collection collection, String delimiter) 60 | { 61 | String joined = ""; 62 | boolean first = true; 63 | for(String it : collection) 64 | { 65 | joined += it; 66 | if(first) first = false; 67 | else joined += delimiter; 68 | } 69 | return joined; 70 | } 71 | 72 | // Compute the call graph. 73 | // Key: (func_name, nof_params) 74 | // Value: (func_sig, List of (callee_name, nof_args)) 75 | public static HashMap, Pair>>> get_call_graph(Joern_db joern_db) 76 | { 77 | HashMap, Pair>>> cg = new HashMap<>(); 78 | // funcs = joern_db.runGremlinQuery("g.V.filter{it.type == 'Function'}") 79 | List funcs = Joern_db.queryNodeIndex("type:Function"); 80 | for(Long i = new Long(0), i_end = new Long(funcs.size()); i it = Pipeline.v(f_id).functionToAST().to_list(); 87 | String func_sig = (String)(it.get(0).getProperty("code")); 88 | String func_name = (String)(f.getProperty("name")); 89 | 90 | it = Pipeline.v(f_id).functionToAST().children().has("type", "ParameterList").children().to_list(); 91 | HashSet as_set = new HashSet<>(it); 92 | Long nof_params = new Long(as_set.size()); 93 | 94 | HashSet> calls = new HashSet<>(); 95 | List callees = Pipeline.v(f_id).functionToStatements().has("type", "CallExpression").callToCallee().to_list(); 96 | for(Node callee : callees) 97 | { 98 | String callee_name = (String)(callee.getProperty("code")); 99 | it = Pipeline.v((Long)(callee.getId())).calleeToCall().callToArguments().to_list(); 100 | as_set = new HashSet<>(it); 101 | Long nof_args = new Long(it.size()); 102 | 103 | calls.add(new Pair(callee_name, nof_args)); 104 | } 105 | 106 | Pair key = new Pair<>(func_name, nof_params); 107 | if(cg.containsKey(key)) 108 | { 109 | System.out.println("func-collision for " + func_sig + " and " + cg.get(key).first); 110 | 111 | String now_func_sig = cg.get(key).first; 112 | HashSet> now_calls = cg.get(key).second; 113 | String[] names = now_func_sig.split("\n", -1); 114 | List names_as_list = new ArrayList<>(Arrays.asList(names)); 115 | if(!names_as_list.contains(func_sig)) 116 | { 117 | names_as_list.add(func_sig); 118 | now_func_sig = join(names_as_list, "\n"); 119 | } 120 | 121 | for(Pair c : calls) 122 | { 123 | now_calls.add(c); 124 | } 125 | cg.put(key, new Pair>>(now_func_sig, now_calls)); 126 | } 127 | else 128 | { 129 | cg.put(key, new Pair>>(func_sig, calls)); 130 | } 131 | } 132 | return cg; 133 | } 134 | 135 | // Filter out func-signatures 136 | public static HashMap, HashSet>> get_simple_call_graph(HashMap, Pair>>> cg) 137 | { 138 | HashMap, HashSet>> simple = new HashMap<>(); 139 | for(Pair k : cg.keySet()) 140 | { 141 | simple.put(k, cg.get(k).second); 142 | } 143 | return simple; 144 | } 145 | 146 | //## Filter out func-signatures and turn around the edges 147 | //#def get_simple_dual_call_graph(cg): 148 | //# simple = dict() 149 | //# for from_func in cg.keys(): 150 | //# for to_func in cg[from_func][1]: 151 | //# if(not to_func in simple): 152 | //# simple[to_func] = set() 153 | //# simple[to_func].add(from_func) 154 | //# return simple 155 | 156 | 157 | 158 | // Helper-function to print a single entry of the call-graph 159 | public static void print_call_graph_entry(Pair cg_key, HashMap, Pair>>> cg) 160 | { 161 | System.out.println(cg_key.first + "\t(" + cg_key.second.toString() + " params)"); 162 | Pair>> val = cg.get(cg_key); 163 | 164 | System.out.println(val.first); 165 | for(Pair it : val.second) 166 | { 167 | System.out.println("\t->" + it.first + "\t(" + it.second.toString() + "params)"); 168 | } 169 | } 170 | 171 | // Helper-function to print the call-graph 172 | public static void print_call_graph(HashMap, Pair>>> cg) 173 | { 174 | for(Pair func : cg.keySet()) 175 | { 176 | print_call_graph_entry(func, cg); 177 | } 178 | } 179 | 180 | public static void main(String[] args) 181 | { 182 | Joern_db joern_db = new Joern_db(); 183 | joern_db.initialize(); 184 | 185 | HashMap, Pair>>> cg = get_call_graph(joern_db); 186 | print_call_graph(cg); 187 | } 188 | 189 | } // EOF class 190 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Init_glibc_data_trans.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import java.util.HashMap; 4 | import java.util.List; 5 | import java.util.ArrayList; 6 | import java.util.Arrays; 7 | 8 | class Init_glibc_data_trans 9 | { 10 | public static HashMap>> sets_param = null; 11 | public static HashMap>>> data_trans = null; 12 | 13 | private static List list_helper(int ... args) 14 | { 15 | List ret = new ArrayList<>(); 16 | for(int arg : args) 17 | { 18 | ret.add(new Long(arg)); 19 | } 20 | return ret; 21 | } 22 | 23 | 24 | private static void sets_param_helper(String func_name, List values) 25 | { 26 | if(!sets_param.containsKey(func_name)) 27 | { 28 | sets_param.put(func_name, new HashMap>()); 29 | } 30 | sets_param.get(func_name).put(new Long(values.size()), values); 31 | } 32 | 33 | private static void data_trans_helper(String func_name, int arg_index, List values) 34 | { 35 | if(!data_trans.containsKey(func_name)) 36 | { 37 | data_trans.put(func_name, new ArrayList>>()); 38 | } 39 | data_trans.get(func_name).add(new Pair>(new Long(arg_index), values)); 40 | } 41 | 42 | 43 | public static void main(String[] argv) 44 | { 45 | sets_param = new HashMap<>(); 46 | data_trans = new HashMap<>(); 47 | 48 | // Nothing happens 49 | List nothing_happens = Arrays.asList("abort abs access alloca atof atoi atol bind calloc ceil cos error exit exp fabs fclose fcntl fdopen feof ferror fflush fgetc fileno floor fopen fork fprintf fputc fputs free fseek ftell fwrite getc getenv getopt getpid gettext getuid htonl htons index int sleep isalnum isalpha isdigit isspace localtime log lseek malloc memcmp mmap ntohl ntohs perror pow printf putc putchar puts rand random realloc remove send setlocale setsockopt signal sin sqrt strcasecmp strchr strcmp strdup strerror strlen strncasecmp strncmp strrchr strstr strtok syslog system tolower toupper unlink warn".split(" ", -1)); 50 | 51 | for(String it : nothing_happens) 52 | { 53 | List the_list = new ArrayList<>(); 54 | for(Long i = new Long(1); i<10; ++i) 55 | { 56 | the_list.add(new Long(-1)); 57 | sets_param_helper(it, new ArrayList(the_list)); 58 | } 59 | } 60 | 61 | //uid_t getuid(void); 62 | //ssize_t send(int sockfd, const void *buf, size_t len, int flags); 63 | //int setsockopt(int sockfd, int level, int optname, const void *optval, socklen_t optlen); 64 | //pid_t fork(void); 65 | //void *mmap(void *addr, size_t length, int " prot ", int " flags, int fd, off_t offset); 66 | //int access(const char *pathname, int mode); 67 | //off_t lseek(int fd, off_t offset, int whence); 68 | //int bind(int sockfd, const struct sockaddr *addr, socklen_t addrlen); 69 | //int fcntl(int fd, int cmd, ... /* arg */ ); 70 | //pid_t getpid(void); 71 | //sighandler_t signal(int signum, sighandler_t handler); 72 | //int unlink(const char *pathname); 73 | //char * gettext (const char * msgid); 74 | //char *getenv(const char *name); 75 | //char *index(const char *s, int c); 76 | //char *setlocale(int category, const char *locale); 77 | //char *strchr(const char *s, int c); 78 | //char *strdup(const char *s); 79 | //char *strerror(int errnum); 80 | //char *strrchr(const char *s, int c); 81 | //char *strstr(const char *haystack, const char *needle); 82 | //char *strtok(char *str, const char *delim); 83 | //double atof(const char *nptr); 84 | //double ceil(double x); 85 | //double cos(double x); 86 | //double exp(double x); 87 | //double fabs(double x); 88 | //double floor(double x); 89 | //double log(double x); 90 | //double pow(double x, double y); 91 | //double sin(double x); 92 | //double sqrt(double x); 93 | //FILE *fdopen(int fd, const char *mode); 94 | //FILE *fopen(const char *path, const char *mode); 95 | //int abs(int j); 96 | //int atoi(const char *nptr); 97 | //int fclose(FILE *fp); 98 | //int feof(FILE *stream); 99 | //int ferror(FILE *stream); 100 | //int fflush(FILE *stream); 101 | //int fgetc(FILE *stream); 102 | //int fileno(FILE *stream); 103 | //int fprintf(FILE *stream, const char *format, ...); 104 | //int fputc(int c, FILE *stream); 105 | //int fputs(const char *s, FILE *stream); 106 | //int fseek(FILE *stream, long offset, int whence); 107 | //int getc(FILE *stream); 108 | //int getopt(int argc, char * const argv[], const char *optstring); 109 | //int isalnum(int c); 110 | //int isalpha(int c); 111 | //int isdigit(int c); 112 | //int isspace(int c); 113 | //int memcmp(const void *s1, const void *s2, size_t n); 114 | //int printf(const char *format, ...); 115 | //int putchar(int c); 116 | //int putc(int c, FILE *stream); 117 | //int puts(const char *s); 118 | //int rand(void); 119 | //int remove(const char *pathname); 120 | //int strcasecmp(const char *s1, const char *s2); 121 | //int strcmp(const char *s1, const char *s2); 122 | //int strncasecmp(const char *s1, const char *s2, size_t n); 123 | //int strncmp(const char *s1, const char *s2, size_t n); 124 | //int system(const char *command); 125 | //int tolower(int c); 126 | //int toupper(int c); 127 | //long atol(const char *nptr); 128 | //long ftell(FILE *stream); 129 | //long int random(void); 130 | //size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); 131 | //size_t strlen(const char *s); 132 | //struct tm *localtime(const time_t *timep); 133 | //uint16_t htons(uint16_t hostshort); 134 | //uint16_t ntohs(uint16_t netshort); 135 | //uint32_t htonl(uint32_t hostlong); 136 | //uint32_t ntohl(uint32_t netlong); 137 | //unsigned int sleep(unsigned int seconds); 138 | //void abort(void); 139 | //void *alloca(size_t size); 140 | //void *calloc(size_t nmemb, size_t size); 141 | //void exit(int status); 142 | //void free(void *ptr); 143 | //void *malloc(size_t size); 144 | //void perror(const char *s); 145 | //void *realloc(void *ptr, size_t size); 146 | //void syslog(int priority, const char *format, ...); 147 | //void warn(const char *fmt, ...); 148 | //void error(int status, int errnum, const char *format, ...); 149 | 150 | 151 | 152 | 153 | 154 | //Dunno 155 | //int select(int nfds, fd_set *readfds, fd_set *writefds, fd_set *exceptfds, struct timeval *timeout); 156 | sets_param_helper("select", list_helper(-1, -1, -1, -1, -1)); 157 | 158 | 159 | // Sets something 160 | //data_trans[""] = [(,[])] 161 | //sets_param[""][] = [] 162 | // data_trans_helper("", , list_helper()); 163 | //pid_t wait(int *status); 164 | data_trans_helper("wait", 0, list_helper(-1)); 165 | sets_param_helper("wait", list_helper(1)); 166 | //int dup2(int oldfd, int newfd); 167 | data_trans_helper("dup2", 1, list_helper(0)); 168 | sets_param_helper("dup2", list_helper(0, 1)); 169 | //int sigaction(int signum, const struct sigaction *act, struct sigaction *oldact); 170 | data_trans_helper("sigaction", 2, list_helper(1)); 171 | sets_param_helper("sigaction", list_helper(1, -1, 1)); 172 | //int fstat(int fd, struct stat *buf); 173 | data_trans_helper("fstat", 1, list_helper(-1)); 174 | sets_param_helper("fstat", list_helper(1, 1)); 175 | //int gettimeofday(struct timeval *tv, struct timezone *tz); 176 | data_trans_helper("gettimeofday", 0, list_helper(-1)); 177 | data_trans_helper("gettimeofday", 1, list_helper(-1)); 178 | sets_param_helper("gettimeofday", list_helper(1, 1)); 179 | //int accept(int sockfd, struct sockaddr *addr, socklen_t *addrlen); 180 | data_trans_helper("accept", 1, list_helper(-1)); 181 | data_trans_helper("accept", 2, list_helper(-1)); 182 | sets_param_helper("accept", list_helper(1, 1, 1)); 183 | //char *fgets(char *s, int size, FILE *stream); 184 | data_trans_helper("fgets", 0, list_helper(-1)); 185 | sets_param_helper("fgets", list_helper(1, -1, -1)); 186 | //char *strcat(char *dest, const char *src); 187 | data_trans_helper("strcat", 0, list_helper(1)); 188 | sets_param_helper("strcat", list_helper(1, -1)); 189 | //char *strcpy(char *dest, const char *src); 190 | data_trans_helper("strcpy", 0, list_helper(1)); 191 | sets_param_helper("strcpy", list_helper(1, -1)); 192 | //char *strncpy(char *dest, const char *src, size_t n); 193 | data_trans_helper("strncpy", 0, list_helper(1)); 194 | sets_param_helper("strncpy", list_helper(1, -1, -1)); 195 | //int snprintf(char *str, size_t size, const char *format, ...); 196 | data_trans_helper("snprintf", 0, list_helper(3, 4, 5, 6, 7, 8, 9)); 197 | sets_param_helper("snprintf", list_helper(1, -1, -1, -1)); 198 | sets_param_helper("snprintf", list_helper(1, -1, -1, -1, -1)); 199 | sets_param_helper("snprintf", list_helper(1, -1, -1, -1, -1, -1)); 200 | sets_param_helper("snprintf", list_helper(1, -1, -1, -1, -1, -1, -1)); 201 | sets_param_helper("snprintf", list_helper(1, -1, -1, -1, -1, -1, -1, -1)); 202 | sets_param_helper("snprintf", list_helper(1, -1, -1, -1, -1, -1, -1, -1, -1)); 203 | //int sprintf(char *str, const char *format, ...); 204 | data_trans_helper("sprintf", 0, list_helper(2, 3, 4, 5, 6, 7, 8, 9)); 205 | sets_param_helper("sprintf", list_helper(1, -1, -1)); 206 | sets_param_helper("sprintf", list_helper(1, -1, -1, -1)); 207 | sets_param_helper("sprintf", list_helper(1, -1, -1, -1, -1)); 208 | sets_param_helper("sprintf", list_helper(1, -1, -1, -1, -1, -1)); 209 | sets_param_helper("sprintf", list_helper(1, -1, -1, -1, -1, -1, -1)); 210 | sets_param_helper("sprintf", list_helper(1, -1, -1, -1, -1, -1, -1, -1)); 211 | sets_param_helper("sprintf", list_helper(1, -1, -1, -1, -1, -1, -1, -1, -1)); 212 | //int sscanf(const char *str, const char *format, ...); 213 | data_trans_helper("sscanf", 2, list_helper(0)); 214 | data_trans_helper("sscanf", 3, list_helper(0)); 215 | data_trans_helper("sscanf", 4, list_helper(0)); 216 | data_trans_helper("sscanf", 5, list_helper(0)); 217 | data_trans_helper("sscanf", 6, list_helper(0)); 218 | data_trans_helper("sscanf", 7, list_helper(0)); 219 | data_trans_helper("sscanf", 8, list_helper(0)); 220 | data_trans_helper("sscanf", 9, list_helper(0)); 221 | 222 | sets_param_helper("sscanf", list_helper(1, -1, 1)); 223 | sets_param_helper("sscanf", list_helper(1, -1, 1, 1)); 224 | sets_param_helper("sscanf", list_helper(1, -1, 1, 1, 1)); 225 | sets_param_helper("sscanf", list_helper(1, -1, 1, 1, 1, 1)); 226 | sets_param_helper("sscanf", list_helper(1, -1, 1, 1, 1, 1, 1)); 227 | sets_param_helper("sscanf", list_helper(1, -1, 1, 1, 1, 1, 1, 1)); 228 | sets_param_helper("sscanf", list_helper(1, -1, 1, 1, 1, 1, 1, 1, 1)); 229 | //long int strtol(const char *nptr, char **endptr, int base); 230 | data_trans_helper("strtol", 1, list_helper(-1)); 231 | sets_param_helper("strtol", list_helper(1, 1, -1)); 232 | //size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream); 233 | data_trans_helper("fread", 0, list_helper(-1)); 234 | sets_param_helper("fread", list_helper(1, -1, -1, -1)); 235 | //unsigned long int strtoul(const char *nptr, char **endptr, int base); 236 | data_trans_helper("strtoul", 1, list_helper(-1)); 237 | sets_param_helper("strtoul", list_helper(1, 1, -1)); 238 | //void bzero(void *s, size_t n); 239 | data_trans_helper("bzero", 0, list_helper(-1)); 240 | sets_param_helper("bzero", list_helper(1, -1)); 241 | //void *memcpy(void *dest, const void *src, size_t n); 242 | data_trans_helper("memcpy", 0, list_helper(1)); 243 | sets_param_helper("memcpy", list_helper(1, -1, -1)); 244 | //void *memmove(void *dest, const void *src, size_t n); 245 | data_trans_helper("memmove", 0, list_helper(1)); 246 | sets_param_helper("memmove", list_helper(1, -1, -1)); 247 | //void *memset(void *s, int c, size_t n); 248 | data_trans_helper("memset", 0, list_helper(1)); 249 | sets_param_helper("memset", list_helper(1, -1, -1)); 250 | //int fscanf(FILE *stream, const char *format, ...); 251 | data_trans_helper("fscanf", 2, list_helper(0)); 252 | data_trans_helper("fscanf", 3, list_helper(0)); 253 | data_trans_helper("fscanf", 4, list_helper(0)); 254 | data_trans_helper("fscanf", 5, list_helper(0)); 255 | data_trans_helper("fscanf", 6, list_helper(0)); 256 | data_trans_helper("fscanf", 7, list_helper(0)); 257 | data_trans_helper("fscanf", 8, list_helper(0)); 258 | data_trans_helper("fscanf", 9, list_helper(0)); 259 | 260 | sets_param_helper("fscanf", list_helper(1, -1, 1)); 261 | sets_param_helper("fscanf", list_helper(1, -1, 1, 1)); 262 | sets_param_helper("fscanf", list_helper(1, -1, 1, 1, 1)); 263 | sets_param_helper("fscanf", list_helper(1, -1, 1, 1, 1, 1)); 264 | sets_param_helper("fscanf", list_helper(1, -1, 1, 1, 1, 1, 1)); 265 | sets_param_helper("fscanf", list_helper(1, -1, 1, 1, 1, 1, 1, 1)); 266 | sets_param_helper("fscanf", list_helper(1, -1, 1, 1, 1, 1, 1, 1, 1)); 267 | 268 | 269 | 270 | HashMap data = new HashMap<>(); 271 | data.put("sets_param", sets_param); 272 | data.put("data_trans", data_trans); 273 | Pickle.save_to_file("data_trans.ser", data); 274 | } // EOF main 275 | 276 | } // EOF class 277 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Is_parameter_return_type.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | class Is_parameter_return_type 4 | { 5 | public Boolean is_param; 6 | public String func_name; 7 | public Long param_index; 8 | 9 | public Is_parameter_return_type(Boolean is_param, String func_name, int param_index) 10 | { 11 | this.is_param = is_param; 12 | this.func_name = func_name; 13 | this.param_index = new Long(param_index); 14 | } 15 | } 16 | 17 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Joern_db.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | //import joern.all.JoernSteps; 4 | 5 | import neo4j.readWriteDB.Neo4JDBInterface; 6 | //import neo4j.traversals.readWriteDB.Traversals; 7 | 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | import org.neo4j.graphdb.index.*; 11 | import org.neo4j.graphdb.Node; 12 | //import org.neo4j.graphdb.Relationship; 13 | //import org.neo4j.graphdb.Direction; 14 | 15 | import com.tinkerpop.gremlin.*; 16 | import com.tinkerpop.gremlin.Tokens.*; 17 | import com.tinkerpop.gremlin.java.*; 18 | 19 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Graph; 20 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Vertex; 21 | 22 | 23 | 24 | public class Joern_db 25 | { 26 | public static Neo4j2Graph g = null; 27 | // public static Pipeline pipe = null; 28 | 29 | public static void initialize(String databaseDir) 30 | { 31 | if(g != null) return; 32 | 33 | Neo4JDBInterface.setDatabaseDir(databaseDir); 34 | Neo4JDBInterface.openDatabase(); 35 | 36 | g = new Neo4j2Graph(Neo4JDBInterface.graphDb); 37 | Pipeline.g = g; 38 | 39 | System.out.println("initialized"); 40 | } 41 | 42 | public static void initialize() 43 | { 44 | initialize("/home/user/bugdooring_share/joern/.joernIndex"); 45 | } 46 | 47 | 48 | private static List IndexHits_to_List(IndexHits hits) 49 | { 50 | List ret = new ArrayList(); 51 | for(Node n : hits) 52 | { 53 | ret.add(n); 54 | } 55 | hits.close(); 56 | return ret; 57 | } 58 | 59 | private static List Neo4j2Vertices_to_Nodes(List vertices) 60 | { 61 | List ret = new ArrayList(); 62 | for(Neo4j2Vertex v : vertices) 63 | { 64 | ret.add(v.getRawVertex()); 65 | } 66 | return ret; 67 | } 68 | 69 | public static List queryNodeIndex(String query) 70 | { 71 | IndexHits result = Neo4JDBInterface.nodeIndex.query(query); 72 | return IndexHits_to_List(result); 73 | } 74 | 75 | public static List getNodesWithTypeAndName(String type, String name) 76 | { 77 | String query = "type:" + type + " AND name:" + name; 78 | return queryNodeIndex(query); 79 | } 80 | 81 | public static List getNodesWithTypeAndCode(String type, String code) 82 | { 83 | String query = "type:" + type + " AND code:" + code; 84 | return queryNodeIndex(query); 85 | } 86 | 87 | public static List getFunctionsByName(String name) 88 | { 89 | return getNodesWithTypeAndName("Function", name); 90 | } 91 | 92 | public static List getCallsTo(String name) 93 | { 94 | List callees = getNodesWithTypeAndCode("Callee", name); 95 | // return Neo4j2Vertices_to_Nodes(new Pipeline().start(callees).parents().toList()); 96 | return new Pipeline().start(callees).parents().to_list(); 97 | } 98 | 99 | 100 | 101 | 102 | 103 | private static void print_class_name(Object o) 104 | { 105 | System.out.println(o.getClass().getName()); 106 | } 107 | 108 | 109 | // Find the first parent-id, which has an incoming reaches-edge 110 | // Input: id of node in question 111 | // Output: node_id <=> node has incoming REACHES-edge 112 | // -1 <=> node has no parent 113 | // recursive <=> node has parents 114 | //public static String find_reached_parent(Joern_db joern_db, String node_id, String edge_type="DATA_FLOW_EDGE", boolean in_edge = true) // REACHES 115 | public static Long find_reached_parent(Long node_id, String edge_type, boolean in_edge) throws Exception // REACHES 116 | { 117 | // System.out.println(pipe.start(g.getVertex(Long.valueOf(117))).parents().to_list()); 118 | // System.out.println(pipe.to_list()); 119 | 120 | List e = null; 121 | if(in_edge) 122 | { 123 | e = Pipeline.v(node_id).in(edge_type).to_list(); 124 | } 125 | else 126 | { 127 | e = Pipeline.v(node_id).out(edge_type).to_list(); 128 | } 129 | 130 | if(e.size() > 0) 131 | { 132 | return node_id; 133 | } 134 | 135 | // List p = joern_db.runGremlinQuery("g.v(" + node_id + ").parents()"); 136 | List p = Pipeline.v(node_id).parents().to_list(); 137 | if(p.size() == 0) 138 | { 139 | return new Long(-1); 140 | } 141 | else if(p.size() != 1) 142 | { 143 | throw new Exception("expected only one parent"); 144 | } 145 | 146 | Object ret = p.get(0).getId(); 147 | return find_reached_parent((Long)ret); 148 | } 149 | 150 | public static Long find_reached_parent(Long node_id, String edge_type) throws Exception // REACHES 151 | { 152 | return find_reached_parent(node_id, edge_type, true); // REACHES 153 | } 154 | 155 | public static Long find_reached_parent(Long node_id) throws Exception // REACHES 156 | { 157 | return find_reached_parent(node_id, "REACHES", true); // REACHES 158 | } 159 | 160 | public static Long get_function_id(Long node_id) 161 | { 162 | List ret = Pipeline.v(node_id).to_list(); 163 | Long function_id = (Long)(((Node)(ret.get(0))).getProperty("functionId")); 164 | if(function_id != null) return function_id; 165 | 166 | ret = Pipeline.v(node_id).parents().to_list(); 167 | Long p_id = (Long)ret.get(0).getId(); 168 | return get_function_id(p_id); 169 | } 170 | 171 | 172 | // Find the ID of a function-definition, the function's name 173 | // Assumes that the function's name is unique 174 | public static Long get_function_id_by_name(String func_name) 175 | { 176 | List calls = getFunctionsByName(func_name); 177 | if(calls.size() != 1) 178 | { 179 | return new Long(-1); 180 | // throw new Exception("expected length 1"); 181 | } 182 | return calls.get(0).getId(); 183 | } 184 | 185 | 186 | public static List get_function_ids_by_name(String func_name) 187 | { 188 | List calls = getFunctionsByName(func_name); 189 | List ids = new ArrayList(); 190 | 191 | for(Node n : calls) 192 | { 193 | ids.add(n.getId()); 194 | } 195 | 196 | return ids; 197 | } 198 | 199 | 200 | 201 | 202 | 203 | // Returns all calls to a function, given the function's name. 204 | // Simple frontend to the getCallsTo-Query. 205 | public static List get_calls_to(String func_name) 206 | { 207 | return getCallsTo(func_name); 208 | } 209 | 210 | 211 | 212 | public static void remove_edge_from_db(Long edge_id) 213 | { 214 | Pipeline.e(edge_id).remove(); 215 | } 216 | 217 | 218 | //def properties_to_gremlin_list(properties): 219 | // str_properties = "["; 220 | // for k in properties.keys(): 221 | // str_properties += k + ": \"" + str(properties[k]).replace("\"", "\\\"") + "\", " 222 | // if(len(str_properties) > 1): 223 | // str_properties = str_properties[0:-2] + "]" 224 | // else: 225 | // str_properties += "]" 226 | // 227 | // return str_properties 228 | 229 | 230 | //def addNode(joern_db, properties): 231 | // str_properties = properties_to_gremlin_list(properties) 232 | // q = "g.addVertex(null, %s)" % str_properties 233 | //# print q 234 | // newNode = joern_db.runGremlinQuery(q) 235 | // return newNode 236 | 237 | 238 | //def addRelationship(joern_db, src, dst, relType, properties): 239 | // if(len(properties) == 0): 240 | // q = "g.addEdge(null, g.v(%s), g.v(%s), '%s')" % (src, dst, relType) 241 | // else: 242 | // str_properties = properties_to_gremlin_list(properties) 243 | // q = "g.addEdge(null, g.v(%s), g.v(%s), '%s', %s)" % (src, dst, relType, str_properties) 244 | //# print q 245 | // joern_db.runGremlinQuery(q) 246 | 247 | 248 | 249 | //def getNodeById(joern_db, node_id): 250 | // node = joern_db.runGremlinQuery("g.v(%s)" % (node_id)) 251 | // return node 252 | 253 | 254 | //def getCalleeFromCall(joern_db, node_id): 255 | // node = joern_db.runGremlinQuery("g.v(%s).callToCallee()" % (node_id)) 256 | // return node 257 | 258 | //def getNodeType(joern_db, node_id): 259 | // node = joern_db.runGremlinQuery("g.v(%s)" % (node_id)) 260 | // return node["type"] 261 | // 262 | //def getNodeCode(joern_db, node_id): 263 | // node = joern_db.runGremlinQuery("g.v(%s)" % (node_id)) 264 | // return node["code"] 265 | 266 | //def getOperatorCode(joern_db, node_id): 267 | // node = joern_db.runGremlinQuery("g.v(%s)" % (node_id)) 268 | // return node["operator"] 269 | 270 | //def getNodeChildNum(joern_db, node_id): 271 | // node = joern_db.runGremlinQuery("g.v(%s)" % (node_id)) 272 | // if("childNum" in node): 273 | // return int(node["childNum"]) 274 | // else: 275 | // return 0 276 | 277 | } // EOF class 278 | 279 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Location.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import org.neo4j.graphdb.Node; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.nio.file.Paths; 7 | import java.nio.file.Files; 8 | import java.nio.charset.StandardCharsets; 9 | 10 | class Location 11 | { 12 | public String file_name; 13 | public String function_name; 14 | public Long line_no; 15 | public String extent; 16 | 17 | public Location(String file_name, String function_name, Long line_no, String extent) 18 | { 19 | this.file_name = file_name; 20 | this.function_name = function_name; 21 | this.line_no = line_no; 22 | this.extent = extent; 23 | } 24 | 25 | public Location(String file_name, String function_name, int line) 26 | { 27 | this.file_name = file_name; 28 | this.function_name = function_name; 29 | this.line_no = new Long(line); 30 | this.extent = null; 31 | } 32 | 33 | @Override 34 | public int hashCode() 35 | { 36 | int hash = 1; 37 | hash = 31 * hash + file_name.hashCode(); 38 | hash = 31 * hash + function_name.hashCode(); 39 | hash = 31 * hash + line_no.hashCode(); 40 | return hash; 41 | } 42 | 43 | @Override 44 | public boolean equals(Object obj) 45 | { 46 | if(!(obj instanceof Location)) return false; 47 | if(this == obj) return true; 48 | Location other = (Location)obj; 49 | if(!file_name.equals(other.file_name)) return false; 50 | if(!function_name.equals(other.function_name)) return false; 51 | if(!line_no.equals(other.line_no)) return false; 52 | // Left out extent on purpose 53 | return true; 54 | } 55 | 56 | 57 | @Override 58 | public String toString() 59 | { 60 | String s = ""; 61 | s += file_name; 62 | s += ":"; 63 | s += function_name; 64 | s += "@"; 65 | s += line_no; 66 | return s; 67 | } 68 | 69 | public static String get_file_path(Joern_db joern_db, Node node) throws Exception 70 | { 71 | List nodes = Pipeline.v((Long)(node.getProperty("functionId"))).in().has("type", "File").to_list(); //.filepath""" % node['functionId'] 72 | if(nodes.size() == 0) 73 | { 74 | throw new Exception("No file path found"); 75 | } 76 | String file_path = (String)(nodes.get(0).getProperty("filepath")); 77 | return file_path; 78 | } 79 | 80 | 81 | public static String get_file_path_from_id(Joern_db joern_db, Long node_id) throws Exception 82 | { 83 | List nodes = Pipeline.v(node_id).to_list(); 84 | return get_file_path(joern_db, nodes.get(0)); 85 | } 86 | 87 | 88 | 89 | public static Long get_general_parent(Joern_db joern_db, Long node_id) throws Exception 90 | { 91 | //AST_EDGE = 'IS_AST_PARENT' 92 | //CFG_EDGE = 'FLOWS_TO' 93 | //USES_EDGE = 'USE' 94 | //DEFINES_EDGE = 'DEF' 95 | //DATA_FLOW_EDGE = 'REACHES' 96 | //FUNCTION_TO_AST_EDGE = 'IS_FUNCTION_OF_AST' 97 | //CFG_TO_FUNCION_EDGE = 'IS_FUNCTION_OF_CFG' // JANNIK 98 | List parent; 99 | 100 | parent = Pipeline.v(node_id).in("IS_AST_PARENT").to_list(); 101 | if(!parent.isEmpty()) 102 | { 103 | // print "ast_parent:", parent 104 | return parent.get(0).getId(); 105 | } 106 | 107 | parent = Pipeline.v(node_id).in("IS_FUNCTION_OF_AST").to_list(); 108 | if(!parent.isEmpty()) 109 | { 110 | // print "func_ast_parents:", parent 111 | return parent.get(0).getId(); 112 | } 113 | 114 | parent = Pipeline.v(node_id).in("IS_FUNCTION_OF_CFG").to_list(); 115 | if(!parent.isEmpty()) 116 | { 117 | // print "func_cfg_parents:", parent 118 | return parent.get(0).getId(); 119 | } 120 | throw new Exception("Cannot find general parent for " + node_id.toString()); 121 | } 122 | 123 | public static String get_function_name_for_node_id(Joern_db joern_db, Long node_id) throws Exception 124 | { 125 | Long cur_id = node_id; 126 | Long func_id = null; 127 | while(true) 128 | { 129 | func_id = (Long)(((Node)(Pipeline.v(cur_id).to_list().get(0))).getProperty("functionId")); 130 | if(func_id != null) 131 | { 132 | break; 133 | } 134 | cur_id = get_general_parent(joern_db, cur_id); 135 | // print "cur_id:", cur_id 136 | } 137 | String func_name = (String)(((Node)(Pipeline.v(func_id).to_list().get(0))).getProperty("name")); 138 | return func_name; 139 | } 140 | 141 | 142 | public static String get_location_for_node_id(Joern_db joern_db, Long node_id) throws Exception 143 | { 144 | Long cur_id = node_id; 145 | String location = null; 146 | while(true) 147 | { 148 | List nodes = Pipeline.v(cur_id).to_list(); 149 | Node first = nodes.get(0); 150 | if(first.hasProperty("location")) 151 | { 152 | location = (String)(first.getProperty("location")); 153 | break; 154 | } 155 | cur_id = get_general_parent(joern_db, cur_id); 156 | // print "cur_id:", cur_id 157 | } 158 | return location; 159 | } 160 | 161 | public static HashMap read_files = null; 162 | static 163 | { 164 | read_files = new HashMap<>(); 165 | } 166 | 167 | public static Location get_location_tuple(Joern_db joern_db, Long node_id) throws Exception 168 | { 169 | String the_type = (String)(((Node)(Pipeline.v(node_id).to_list().get(0))).getProperty("type")); 170 | if(the_type.equals("CFGExitNode") || the_type.equals("Symbol")) 171 | { 172 | throw new Exception("Cannot find location for CFGExitNode or Symbol"); 173 | } 174 | String file_name = get_file_path_from_id(joern_db, node_id); 175 | //System.out.println("file_name: " + file_name); 176 | String func_name = get_function_name_for_node_id(joern_db, node_id); 177 | //System.out.println("func_name: " + func_name); 178 | String location = get_location_for_node_id(joern_db, node_id); 179 | //System.out.println("location: " + location); 180 | 181 | String[] loc_splitters = location.split(":", -1); 182 | Long line_no = Long.parseLong(loc_splitters[0]); 183 | 184 | if(!read_files.containsKey(file_name)) 185 | { 186 | byte[] encoded = Files.readAllBytes(Paths.get(file_name)); 187 | String data = new String(encoded, StandardCharsets.UTF_8); 188 | read_files.put(file_name, data); 189 | } 190 | // extent = read_files[file_name][line_no-1] 191 | String extent = get_source_range(read_files.get(file_name), Long.parseLong(loc_splitters[2]), Long.parseLong(loc_splitters[3])); 192 | return new Location(file_name, func_name, line_no, extent); 193 | } 194 | 195 | // stolen from instrumentation.py 196 | public static String get_source_range(String data, Long start, Long end) 197 | { 198 | return data.substring(start.intValue(), 1 + end.intValue()); 199 | } 200 | 201 | 202 | 203 | public static void test_get_location_tuple(Joern_db joern_db) throws Exception 204 | { 205 | // Testing get_location_tuple for libpng, with function "png_do_write_transformations" 206 | List all_nodes_of_func = Pipeline.Vs().has("functionId", "130007").to_list(); 207 | for(Node a : all_nodes_of_func) 208 | { 209 | String its_type = (String)(a.getProperty("type")); 210 | if(its_type.equals("CFGExitNode") || its_type.equals("Symbol")) 211 | { 212 | continue; 213 | } 214 | System.out.println(a.getId()); 215 | System.out.println(get_location_tuple(joern_db, new Long(a.getId())).toString()); 216 | } 217 | } 218 | 219 | 220 | } 221 | 222 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Pair.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | public class Pair implements Comparable>, java.io.Serializable 4 | { 5 | public FIRST first; 6 | public SECOND second; 7 | 8 | public Pair(FIRST first, SECOND second) 9 | { 10 | this.first = first; 11 | this.second = second; 12 | } 13 | 14 | private static int compare_helper(Object o1, Object o2) 15 | { 16 | return o1 == null ? o2 == null ? 0 : -1 : o2 == null ? +1 : ((Comparable)o1).compareTo(o2); 17 | } 18 | 19 | @Override 20 | public int compareTo(Pair o) 21 | { 22 | int cmp = compare_helper(first, o.first); 23 | return cmp == 0 ? compare_helper(second, o.second) : cmp; 24 | } 25 | 26 | private static int hashcode_helper(Object o) 27 | { 28 | return o == null ? 0 : o.hashCode(); 29 | } 30 | 31 | @Override 32 | public int hashCode() 33 | { 34 | return 65497 * hashcode_helper(first) ^ hashcode_helper(second); 35 | } 36 | 37 | private boolean equal_helper(Object o1, Object o2) 38 | { 39 | return o1 == null ? o2 == null : (o1 == o2 || o1.equals(o2)); 40 | } 41 | 42 | @Override 43 | public boolean equals(Object obj) 44 | { 45 | if(!(obj instanceof Pair)) return false; 46 | if (this == obj) return true; 47 | return equal_helper(first, ((Pair) obj).first) && equal_helper(second, ((Pair) obj).second); 48 | } 49 | 50 | @Override 51 | public String toString() 52 | { 53 | return "(" + first.toString() + ", " + second.toString() + ')'; 54 | } 55 | } // EOF class 56 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Param_data_source.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | class Param_data_source 4 | { 5 | public Long node_id; 6 | public Object var_name; 7 | public Boolean continue_at_self; 8 | 9 | public Param_data_source(Long node_id, Object var_name, Boolean continue_at_self) 10 | { 11 | this.node_id = node_id; 12 | this.var_name = var_name; 13 | this.continue_at_self = continue_at_self; 14 | } 15 | 16 | public String toString() 17 | { 18 | String s = ""; 19 | s += "node_id:" + node_id.toString(); 20 | s += "; " + var_name.toString(); 21 | s += "; " + continue_at_self.toString(); 22 | return s; 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Pickle.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import java.io.*; 4 | 5 | public class Pickle 6 | { 7 | public static Boolean save_to_file(String file_name, Object to_write) 8 | { 9 | try 10 | { 11 | FileOutputStream fos = new FileOutputStream(file_name); 12 | ObjectOutputStream oos = new ObjectOutputStream(fos); 13 | oos.writeObject(to_write); 14 | oos.close(); 15 | fos.close(); 16 | return true; 17 | } 18 | catch(IOException i) 19 | { 20 | i.printStackTrace(); 21 | return false; 22 | } 23 | } 24 | 25 | public static Object load_from_file(String file_name) 26 | { 27 | try 28 | { 29 | FileInputStream fis = new FileInputStream(file_name); 30 | ObjectInputStream ois = new ObjectInputStream(fis); 31 | Object o = ois.readObject(); 32 | ois.close(); 33 | fis.close(); 34 | return o; 35 | } 36 | catch(IOException i) 37 | { 38 | i.printStackTrace(); 39 | return null; 40 | } 41 | catch(ClassNotFoundException c) 42 | { 43 | System.out.println("Class not found:"); 44 | c.printStackTrace(); 45 | return null; 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Pipeline.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | import com.tinkerpop.gremlin.java.*; 3 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Graph; 4 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Vertex; 5 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Edge; 6 | import com.tinkerpop.blueprints.Vertex; 7 | 8 | import com.tinkerpop.pipes.PipeFunction; 9 | 10 | import org.neo4j.graphdb.Node; 11 | 12 | import java.util.ArrayList; 13 | import java.util.List; 14 | 15 | public class Pipeline extends GremlinPipeline 16 | { 17 | public static Neo4j2Graph g; 18 | 19 | // Starter-Methods 20 | public static Pipeline v(Long node_id) 21 | { 22 | return new Pipeline().start(g.getVertex(node_id)); 23 | } 24 | 25 | public static Pipeline e(Long edge_id) 26 | { 27 | return new Pipeline().start(g.getEdge(edge_id)); 28 | } 29 | 30 | public static Pipeline Vs() 31 | { 32 | return new Pipeline().start_Neo4j(g.getVertices()); 33 | // GremlinPipeline pipe = new GremlinPipeline().start(g.getVertices()); 34 | // return (Pipeline)pipe; 35 | } 36 | 37 | public Pipeline start_Neo4j(Iterable vs) 38 | { 39 | return (Pipeline)(super.start(vs)); 40 | } 41 | 42 | public static Pipeline v(List node_ids) 43 | { 44 | List nodes = new ArrayList<>(); 45 | for(Long id : node_ids) 46 | { 47 | nodes.add(g.getVertex(id).getRawVertex()); 48 | } 49 | return new Pipeline().start(nodes); 50 | } 51 | 52 | 53 | 54 | // Helper 55 | public List to_list() 56 | { 57 | List ret = new ArrayList<>(); 58 | List vertices = super.toList(); 59 | for(Neo4j2Vertex v : vertices) 60 | { 61 | ret.add(v.getRawVertex()); 62 | } 63 | 64 | return ret; 65 | } 66 | 67 | 68 | // JoernSteps 69 | 70 | public Pipeline parents() 71 | { 72 | return (Pipeline)this.in("IS_AST_PARENT"); 73 | } 74 | 75 | public Pipeline children() 76 | { 77 | return (Pipeline)this.out("IS_AST_PARENT"); 78 | } 79 | 80 | public Pipeline defines() 81 | { 82 | return (Pipeline)this.out("DEF"); 83 | } 84 | 85 | 86 | // public Pipeline functionToStatements() 87 | // { 88 | // return (Pipeline)(this.transform( 89 | // new PipeFunction>() 90 | // { 91 | // public List compute(Neo4j2Vertex it) 92 | // { 93 | // return Joern_db.queryNodeIndex("isCFGNode:True AND functionId:" + it.getId().toString()); 94 | // } 95 | // } 96 | // ).scatter()); 97 | // } 98 | 99 | public Pipeline functionToStatements() 100 | { 101 | return (Pipeline)(this.transform( 102 | new PipeFunction>() 103 | { 104 | public List compute(Neo4j2Vertex it) 105 | { 106 | List tmp = Joern_db.queryNodeIndex("isCFGNode:True AND functionId:" + it.getId().toString()); 107 | List ret = new ArrayList<>(); 108 | for(Node n : tmp) 109 | { 110 | ret.add(new Neo4j2Vertex(n, Joern_db.g) ); 111 | } 112 | return ret; 113 | } 114 | } 115 | ).scatter()); 116 | } 117 | 118 | 119 | // public Pipeline has(final String property, final Object value) 120 | // { 121 | //return (Pipeline)(this.filter( 122 | //new PipeFunction() 123 | // { 124 | // public Boolean compute(Node it) {return it.getProperty(property).equals(value);} 125 | // } 126 | //)); 127 | // } 128 | 129 | 130 | public Pipeline functionToAST() 131 | { 132 | return (Pipeline)(this.out("IS_FUNCTION_OF_AST")); 133 | } 134 | 135 | public Pipeline callToCallee() 136 | { 137 | return ((Pipeline)(this.out("IS_AST_PARENT"))).has("type", "Callee"); 138 | } 139 | 140 | public Pipeline calleeToCall() 141 | { 142 | return (Pipeline)(this.in("IS_AST_PARENT")); 143 | } 144 | 145 | public Pipeline callToArguments() 146 | { 147 | return (Pipeline)(this.children().has("type", "ArgumentList").children()); 148 | } 149 | 150 | public Pipeline lval() 151 | { 152 | return ((Pipeline)(this.out("IS_AST_PARENT"))).has("childNum", "0"); 153 | } 154 | 155 | public Pipeline rval() 156 | { 157 | return ((Pipeline)(this.out("IS_AST_PARENT"))).has("childNum", "1"); 158 | } 159 | 160 | public Pipeline uses() 161 | { 162 | return (Pipeline)(this.out("USE")); 163 | } 164 | 165 | public Pipeline ithArguments(Long i) 166 | { 167 | return ((Pipeline)(this.callToArguments())).has("childNum", i.toString()); 168 | } 169 | 170 | 171 | 172 | 173 | 174 | // Overwriting original GremlinPipeline-Functions to return Pipeline 175 | public Pipeline start(Neo4j2Vertex object) 176 | { 177 | return (Pipeline)(super.start(object)); 178 | } 179 | 180 | public Pipeline start(Neo4j2Edge object) 181 | { 182 | return (Pipeline)(super.start(object)); 183 | } 184 | 185 | public Pipeline start(List nodes) 186 | { 187 | ArrayList vertices = new ArrayList(); 188 | for(Node n : nodes) 189 | { 190 | vertices.add(new Neo4j2Vertex(n, g)); 191 | } 192 | return (Pipeline)(super.start(vertices)); 193 | } 194 | 195 | public Pipeline in(String... labels) 196 | { 197 | return (Pipeline)(super.in(labels)); 198 | } 199 | 200 | public Pipeline out(String... labels) 201 | { 202 | return (Pipeline)(super.out(labels)); 203 | } 204 | 205 | public Pipeline inE(String... labels) 206 | { 207 | return (Pipeline)(super.inE(labels)); 208 | } 209 | 210 | public Pipeline outE(String... labels) 211 | { 212 | return (Pipeline)(super.outE(labels)); 213 | } 214 | 215 | public Pipeline property(String prop) 216 | { 217 | return (Pipeline)(super.property(prop)); 218 | } 219 | 220 | public Pipeline outV() 221 | { 222 | return (Pipeline)(super.outV()); 223 | } 224 | 225 | public Pipeline has(String key, String value) 226 | { 227 | return (Pipeline)(super.has(key, value)); 228 | } 229 | 230 | public Pipeline id() 231 | { 232 | return (Pipeline)(super.id()); 233 | } 234 | } 235 | 236 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Remove_duplicated_edges.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.HashSet; 6 | import java.util.Set; 7 | 8 | import org.neo4j.graphdb.Node; 9 | 10 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Vertex; 11 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Edge; 12 | import com.tinkerpop.pipes.PipeFunction; 13 | 14 | class Remove_duplicated_edges 15 | { 16 | public static void remove_edge_from_db(Joern_db joern_db, Long edge_id) 17 | { 18 | // print "removing edge", edge_id 19 | // e = joern_db.runGremlinQuery("g.e(%s)" % (edge_id)) 20 | // print "Remove edge", e._id, ":", e.start_node._id, "->", e.end_node._id, ":", e.properties["var"] 21 | joern_db.g.getEdge(edge_id).remove(); 22 | } 23 | 24 | 25 | public static void remove_edges_from_db(Joern_db joern_db, Iterable edge_ids) 26 | { 27 | for(Long edge_id : edge_ids) 28 | { 29 | remove_edge_from_db(joern_db, edge_id); 30 | } 31 | } 32 | 33 | 34 | static Boolean is_same_edge(Neo4j2Edge e1, Neo4j2Edge e2) 35 | { 36 | Neo4j2Vertex e1_s = e1.getProperty("start_node"); 37 | Neo4j2Vertex e2_s = e2.getProperty("start_node"); 38 | if((Long)e1_s.getId() != (Long)e2_s.getId()) return false; 39 | 40 | Neo4j2Vertex e1_e = e1.getProperty("end_node"); 41 | Neo4j2Vertex e2_e = e2.getProperty("end_node"); 42 | if((Long)e1_e.getId() != (Long)e2_e.getId()) return false; 43 | 44 | if(!e1.getProperty("type").equals(e2.getProperty("type"))) return false; 45 | 46 | Set p1 = e1.getPropertyKeys(); 47 | Set p2 = e2.getPropertyKeys(); 48 | if(p1.size() != p2.size()) return false; 49 | for(String it : p1) 50 | { 51 | if(!p2.contains(it)) return false; 52 | if(!e1.getProperty(it).equals(e1.getProperty(it))) return false; 53 | } 54 | 55 | return true; 56 | } 57 | 58 | 59 | static List remove_duplicates(List list) 60 | { 61 | ArrayList result = new ArrayList<>(); 62 | HashSet set = new HashSet<>(); 63 | 64 | for(Neo4j2Edge it : list) 65 | { 66 | if(!set.contains(it)) 67 | { 68 | result.add(it); 69 | set.add(it); 70 | } 71 | } 72 | return result; 73 | } 74 | 75 | 76 | public static void remove_duplicated_edges_of_node(Joern_db joern_db, Long node_id) 77 | { 78 | // List edges = joern_db.runGremlinQuery("g.v(%s).outE().gather{it}" % (node_id)) 79 | List edges = Pipeline.v(node_id).outE().toList(); // left out the gather... 80 | if(edges.size() == 0) 81 | { 82 | return; 83 | } 84 | edges = remove_duplicates(edges); 85 | // print edges 86 | 87 | HashSet edges_to_remove = new HashSet<>(); 88 | for(int i = 0, i_end = edges.size(); i < i_end; ++i) 89 | { 90 | // print "edges[" + str(i) + "]: ", edges[i]._id 91 | // print "edges[" + str(i) + "] start: ", edges[i].start_node._id 92 | // print "edges[" + str(i) + "] end: ", edges[i].end_node._id 93 | // print "edges[" + str(i) + "] type: ", edges[i].type 94 | // print "edges[" + str(i) + "] properties: ", edges[i].properties 95 | 96 | for(int j = i+1, j_end = edges.size(); j < j_end; ++j) 97 | { 98 | if(is_same_edge(edges.get(i), edges.get(j))) 99 | { 100 | //# remove_edge[k] # run gremlin-query 101 | // print "I'd remove edge", edges[k]._id, "; equal to", edges[i]._id 102 | // print "Remove edge", edges[k]._id, ":", edges[k].start_node._id, "->", edges[k].end_node._id, ":", edges[k].properties["var"] 103 | edges_to_remove.add((Long)edges.get(j).getId()); 104 | } 105 | } 106 | } 107 | 108 | // print "edges_to_remove:", edges_to_remove 109 | remove_edges_from_db(joern_db, edges_to_remove); 110 | } 111 | 112 | 113 | 114 | //def remove_duplicated_edges(joern_db): 115 | // Ns = joern_db.runGremlinQuery("g.V.gather{it.id}") 116 | // Ns = Ns[0] 117 | // 118 | // counter = 0 119 | // of = len(Ns) 120 | // for n in Ns: 121 | // counter += 1 122 | // print counter, "of", of 123 | // remove_duplicated_edges_of_node(joern_db, n) 124 | 125 | 126 | public static void remove_duplicated_edges_in_function(Joern_db joern_db, Long func_id) 127 | { 128 | System.out.print("remove_duplicated_edges_in_function: "); 129 | System.out.println(func_id); 130 | 131 | List Ns = Pipeline.v(func_id).functionToStatements().to_list(); 132 | // Ns = Ns[0] 133 | 134 | Long counter = new Long(0); 135 | Long of = new Long(Ns.size()); 136 | for(Node n : Ns) 137 | { 138 | counter += 1; 139 | System.out.println(counter.toString() + " of " + of.toString()); 140 | remove_duplicated_edges_of_node(joern_db, n.getId()); 141 | } 142 | } 143 | 144 | 145 | public static void main(String[] args) throws Exception 146 | { 147 | if(args.length != 1) 148 | { 149 | System.out.println("[/] Usage: \n"); 150 | System.exit(1); 151 | } 152 | String func_name = args[0]; 153 | 154 | Joern_db joern_db = new Joern_db(); 155 | joern_db.initialize(); 156 | 157 | List hits = Joern_db.get_calls_to(func_name); 158 | for(Node h : hits) 159 | { 160 | remove_duplicated_edges_in_function(joern_db, (Long)h.getProperty("functionId")); 161 | } 162 | // remove_duplicated_edges(); 163 | joern_db.g.commit(); 164 | } 165 | } // EOF class 166 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Replace_member_edges.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | //http://localhost:7474/db/data/node/30546/relationships/all/REACHES 4 | //http://localhost:7474/db/data/relationship/43993 5 | 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | 9 | 10 | //import com.tinkerpop.blueprints.Vertex; 11 | 12 | import com.tinkerpop.blueprints.impls.neo4j2.Neo4j2Edge; 13 | import com.tinkerpop.pipes.PipeFunction; 14 | 15 | class Replace_member_edges 16 | { 17 | public static void replace_for_edge(Joern_db joern_db, Long edge_id) 18 | { 19 | List ret = Pipeline.e(edge_id).toList(); 20 | String before = ret.get(0).getProperty("var"); 21 | String after = before.replaceAll("\\.", "->"); 22 | 23 | System.out.print("replacing in edge "); 24 | System.out.println(edge_id); 25 | System.out.println("\t" + before); 26 | System.out.println("\t" + after); 27 | // joern_db.runGremlinQuery("g.e(%s).setProperty('var', '%s')" % (edge_id, after)) 28 | joern_db.g.getEdge(edge_id).setProperty("var", after); 29 | 30 | // print joern_db.runGremlinQuery("g.e(%s).var" % edge_id) 31 | } 32 | 33 | 34 | public static List get_all_reaches_edges_with_member_access(Joern_db joern_db) 35 | { 36 | List edges = Pipeline.Vs().outE("REACHES").filter( 37 | new PipeFunction() 38 | { 39 | public Boolean compute(Neo4j2Edge it) 40 | { 41 | String var = (String)(it.getProperty("var")); 42 | return var.indexOf('.') != -1; 43 | } 44 | } 45 | ).id().toList(); 46 | return edges; 47 | 48 | // List ids = new ArrayList(); 49 | // for(Neo4j2Edge e : edges) 50 | // { 51 | //Long it = e.getId(); 52 | // ids.add(it); 53 | // } 54 | // return ids; 55 | 56 | // edges = joern_db.runGremlinQuery("g.V.outE(DATA_FLOW_EDGE).has('var').filter{it.var.contains('.')}.id") 57 | // return edges 58 | } 59 | 60 | 61 | public static void main(String[] args) throws Exception 62 | { 63 | Joern_db joern_db = new Joern_db(); 64 | joern_db.initialize(); 65 | 66 | List edges = get_all_reaches_edges_with_member_access(joern_db); 67 | System.out.println(edges.size()); 68 | 69 | for(Long e : edges) 70 | { 71 | replace_for_edge(joern_db, e); 72 | } 73 | 74 | edges = get_all_reaches_edges_with_member_access(joern_db); 75 | System.out.println(edges.size()); 76 | 77 | joern_db.g.commit(); 78 | } 79 | } // EOF class 80 | 81 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Retrace_arg_till_source.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | import java.util.concurrent.Callable; 3 | 4 | import java.util.Set; 5 | import java.util.List; 6 | import org.neo4j.graphdb.Node; 7 | 8 | 9 | 10 | class Retrace_arg_till_source implements Callable 11 | { 12 | private Joern_db joern_db; 13 | public String task_id; 14 | private Set ergo_set; 15 | private Set had_already; 16 | private Long call_id; 17 | private Long ith_argument; 18 | 19 | // Retrace the i-th arg of a certain call to it's source. 20 | // Returns the source and whether that source is user-controlled. 21 | public Retrace_arg_till_source(Joern_db joern_db, String task_id, Set ergo_set, Set had_already, Long call_id, Long ith_argument) 22 | { 23 | this.joern_db = joern_db; 24 | this.task_id = task_id; 25 | this.ergo_set = ergo_set; 26 | this.had_already = had_already; 27 | this.call_id = call_id; 28 | this.ith_argument = ith_argument; 29 | } 30 | 31 | @Override 32 | public Void call() throws Exception 33 | { 34 | System.out.println("STARTING ON " + task_id + ": " + call_id.toString() + ", " + ith_argument.toString()); 35 | 36 | Node arg = null; 37 | while(true) 38 | { 39 | try 40 | { 41 | arg = Find_data_paths.get_argument_i(joern_db, call_id, ith_argument); 42 | break; 43 | } 44 | catch(Exception e) 45 | { 46 | ; 47 | } 48 | } 49 | // print "arg:", arg 50 | List var_names = Find_data_paths.get_arg_variables(joern_db, arg.getId()); 51 | // print "var_names:", var_names 52 | for(String v : var_names) 53 | { 54 | Find_data_paths.get_defs_of(joern_db, arg.getId(), v, task_id, ergo_set, had_already); 55 | } 56 | 57 | return null; 58 | } 59 | } 60 | 61 | 62 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Sensitive_sinks.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import java.util.HashMap; 4 | import java.util.List; 5 | import java.util.ArrayList; 6 | 7 | class Sensitive_sinks 8 | { 9 | public static HashMap> sensitive_sinks; 10 | private static List list_helper(int ... args) 11 | { 12 | List ret = new ArrayList<>(); 13 | for(int arg : args) 14 | { 15 | ret.add(new Long(arg)); 16 | } 17 | return ret; 18 | } 19 | 20 | static 21 | { 22 | sensitive_sinks = new HashMap<>(); 23 | 24 | // Key: Function name 25 | // Value: Dunno yet 26 | sensitive_sinks.put("strcpy", list_helper(1)); 27 | sensitive_sinks.put("memcpy", list_helper(2)); // [1,2] 28 | //sensitive_sinks.put("png_crc_read", list_helper(1, 2)); // TEST 29 | sensitive_sinks.put("strncpy", list_helper(2)); //# [1,2] 30 | sensitive_sinks.put("strcat", list_helper(1)); 31 | sensitive_sinks.put("sprintf", list_helper(1)); 32 | sensitive_sinks.put("snprintf", list_helper(2)); 33 | sensitive_sinks.put("malloc", list_helper(0)); 34 | sensitive_sinks.put("realloc", list_helper(1)); 35 | sensitive_sinks.put("calloc", list_helper(0, 1)); 36 | sensitive_sinks.put("fread", list_helper(2)); 37 | } 38 | 39 | public static HashMap> get_sensitive_sinks() 40 | { 41 | return sensitive_sinks; 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Taint_all.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import java.util.HashMap; 4 | import java.util.HashSet; 5 | import java.util.List; 6 | import java.util.Set; 7 | import java.util.ArrayList; 8 | import java.util.Iterator; 9 | 10 | import java.io.PrintWriter; 11 | 12 | import org.neo4j.graphdb.Node; 13 | 14 | class Taint_all 15 | { 16 | public static HashMap data; 17 | public static HashMap>> libc_sets_param_dict; 18 | public static HashMap > func_ptr_candidates = null; 19 | static 20 | { 21 | data = (HashMap)Pickle.load_from_file("data_trans.ser"); 22 | libc_sets_param_dict = (HashMap>>)data.get("sets_param"); 23 | } 24 | 25 | 26 | public static void fill_func_ptr_candidates(Joern_db joern_db) throws Exception 27 | { 28 | if(func_ptr_candidates != null) return; 29 | System.out.println("Collecting func_ptr_candidates"); 30 | func_ptr_candidates = new HashMap<>(); 31 | 32 | func_ptr_candidates = Find_all_function_pointers.get_func_ptr_candidates(joern_db); //SHORTCUT 33 | } 34 | 35 | 36 | private static Long set_max(Set s) 37 | { 38 | Long max_elem = s.iterator().next(); 39 | for(Long it : s) 40 | { 41 | if(it > max_elem) 42 | { 43 | max_elem = it; 44 | } 45 | } 46 | return max_elem; 47 | } 48 | 49 | public static void main(String[] argv) throws Exception 50 | { 51 | // TODO: passing sets_param_dict accomplishes nothing, 52 | // because to_taint does not seem to get filled in that case. 53 | 54 | if(argv.length > 2) 55 | { 56 | System.out.println("Usage: [full_call_graph_file] [sets_param_dict_file]"); 57 | System.exit(1); 58 | } 59 | 60 | Joern_db joern_db = new Joern_db(); 61 | joern_db.initialize(); 62 | // func_id = get_function_id_by_name(joern_db, "png_set_packswap") 63 | // print function_sets_parameter_i(joern_db, func_id, 0) 64 | // sys.exit(1) 65 | 66 | fill_func_ptr_candidates(joern_db); 67 | HashMap> func_ptr_names = new HashMap<>(); 68 | 69 | for(Long callee_id : func_ptr_candidates.keySet()) //SHORTCUT 70 | { //SHORTCUT 71 | Node n = (Node)(Pipeline.v(callee_id).to_list().get(0)); //SHORTCUT 72 | String code = (String)(n.getProperty("code")); //SHORTCUT 73 | if(!func_ptr_names.containsKey(code)) //SHORTCUT 74 | { //SHORTCUT 75 | func_ptr_names.put(code, new ArrayList()); //SHORTCUT 76 | } //SHORTCUT 77 | func_ptr_names.get(code).add(callee_id); //SHORTCUT 78 | } //SHORTCUT 79 | Pickle.save_to_file("func_ptr_candidates.ser", func_ptr_candidates); 80 | Pickle.save_to_file("func_ptr_names.ser", func_ptr_names); 81 | 82 | 83 | HashMap, Pair>>> full_cg = null; 84 | if(argv.length < 1) 85 | { 86 | System.out.println("Computing call-graph"); 87 | full_cg = Get_call_graph.get_call_graph(joern_db); 88 | Pickle.save_to_file("full_cg.ser", full_cg); 89 | } 90 | else 91 | { 92 | full_cg = (HashMap, Pair>>>)Pickle.load_from_file(argv[1]); 93 | } 94 | 95 | HashMap>> sets_param_dict = new HashMap<>(libc_sets_param_dict); 96 | if(argv.length > 1) 97 | { 98 | sets_param_dict = (HashMap>>)Pickle.load_from_file(argv[2]); 99 | } 100 | 101 | 102 | HashMap, HashSet>> CG = Get_call_graph.get_simple_call_graph(full_cg); 103 | 104 | Set> all_funcs = new HashSet<>(); 105 | for(Pair k : CG.keySet()) 106 | { 107 | all_funcs.add(k); 108 | for(Pair it : CG.get(k)) 109 | { 110 | all_funcs.add(it); 111 | } 112 | } 113 | Long all_funcs_counter = new Long(all_funcs.size()); 114 | 115 | 116 | // use topo to get sequence of funcs 117 | Best_effort_topological_sort tp = new Best_effort_topological_sort(CG); 118 | // for each func... 119 | Long counter = new Long(0); 120 | List> to_taint = new ArrayList<>(); 121 | for(Iterator> iter = tp.iterator(); iter.hasNext();) 122 | { 123 | Pair func_info = iter.next(); 124 | 125 | counter += 1; 126 | String func_name = func_info.first; 127 | Long nof_params = func_info.second; 128 | System.out.println("Handling function " + counter.toString() + " of " + all_funcs_counter.toString() + ": " + func_name); 129 | System.out.flush(); 130 | 131 | List sets_param = new ArrayList<>(); 132 | if(sets_param_dict.containsKey(func_name) && sets_param_dict.get(func_name).containsKey(nof_params)) 133 | { 134 | for(Long param : sets_param_dict.get(func_name).get(nof_params)) 135 | { 136 | if(param != 1) 137 | { 138 | continue; 139 | } 140 | System.out.println("tainting param " + param.toString() + " for func " + func_name + ", because sets_param_dict says so"); 141 | to_taint.add(new Pair(func_name, param)); 142 | } 143 | } 144 | else if(Data_transfer.data_transfer_has(func_name)) 145 | { 146 | List>> dt = Data_transfer.get_data_transfer(func_name); 147 | if(dt.size() == 0) 148 | { 149 | continue; 150 | } 151 | HashMap tmp_dic = new HashMap<>(); 152 | for(Pair> it : dt) 153 | { 154 | Long param = it.first; 155 | tmp_dic.put(param, new Long(1)); 156 | System.out.println("tainting param " + param.toString() + " for func " + func_name + ", because data_transfer says so"); 157 | to_taint.add(new Pair(func_name, param)); 158 | } 159 | 160 | Long m = Math.max(set_max(tmp_dic.keySet())+1, nof_params); 161 | sets_param = new ArrayList<>(); 162 | for(int i=0; i(); 171 | for(int i=0; i ns = Joern_db.get_function_ids_by_name(func_name); 179 | if(ns.size() == 0) 180 | { 181 | System.out.println("Cannot find definition for function " + func_name); 182 | if(!func_ptr_names.containsKey(func_name)) 183 | { 184 | System.out.println("also not a func-ptr => next try"); 185 | continue; 186 | } 187 | for(Long callee_id : func_ptr_names.get(func_name)) 188 | { 189 | for(String candidate : func_ptr_candidates.get(callee_id)) 190 | { 191 | // if in func-ptr-candidates, add those functions to ns... 192 | List func_ids = Joern_db.get_function_ids_by_name(candidate); 193 | for(Long func_id : func_ids) 194 | { 195 | ns.add(func_id); 196 | } 197 | } 198 | } 199 | } 200 | if(ns.size() == 0) 201 | { 202 | System.out.println("not even func-ptr did give candidates"); 203 | continue; 204 | } 205 | HashSet guesses = new HashSet<>(); 206 | for(Long n : ns) 207 | { 208 | // print "func_name:", func_name 209 | // print "n:", n 210 | Long sets_it = Function_sets_parameter.function_sets_parameter_i(joern_db, n, i, sets_param_dict); 211 | // print "sets_it:", sets_it 212 | guesses.add(sets_it); 213 | } 214 | if(guesses.size() == 1) 215 | { 216 | sets_param.set(i.intValue(), guesses.iterator().next()); 217 | } 218 | else 219 | { 220 | sets_param.set(i.intValue(), set_max(guesses)); // 0 221 | } 222 | 223 | if(sets_param.get(i.intValue()) == 1) 224 | { 225 | System.out.println("tainting param " + i.toString() + " for func " + func_name + ", because it sets this param"); 226 | to_taint.add(new Pair(func_name, i)); 227 | } 228 | } 229 | } 230 | 231 | if(!sets_param_dict.containsKey(func_name)) 232 | { 233 | sets_param_dict.put(func_name, new HashMap>()); 234 | } 235 | if(!sets_param_dict.get(func_name).containsKey(nof_params)) 236 | { 237 | System.out.println("func_name, sets_param: " + func_name + " " + sets_param.toString()); 238 | sets_param_dict.get(func_name).put(nof_params, sets_param); 239 | } 240 | } 241 | 242 | for(String it : sets_param_dict.keySet()) 243 | { 244 | if(libc_sets_param_dict.containsKey(it)) 245 | { 246 | continue; 247 | } 248 | 249 | for(Long it2 : sets_param_dict.get(it).keySet()) 250 | { 251 | System.out.println(it + " -> " + sets_param_dict.get(it).get(it2).toString()); 252 | } 253 | } 254 | 255 | Pickle.save_to_file("sets_param_dict.ser", sets_param_dict); 256 | 257 | List> to_taint_unique = new ArrayList<>(); 258 | Set> had = new HashSet<>(); 259 | for(Pair func_param : to_taint) 260 | { 261 | if(had.contains(func_param)) 262 | { 263 | System.out.println("Avoided tainting " + func_param.toString() + " twice"); 264 | } 265 | else 266 | { 267 | had.add(func_param); 268 | to_taint_unique.add(func_param); 269 | } 270 | } 271 | 272 | PrintWriter f = new PrintWriter("generated_taint_all.txt", "UTF-8"); 273 | for(Pair func_param : to_taint_unique) 274 | { 275 | f.print(func_param.first + "\t" + func_param.second.toString() + "\n"); 276 | } 277 | f.close(); 278 | } 279 | } 280 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Timecap_queue.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import java.util.concurrent.ExecutorService; 4 | import java.util.concurrent.Executors; 5 | import java.util.concurrent.Future; 6 | import java.util.concurrent.TimeUnit; 7 | import java.util.concurrent.TimeoutException; 8 | 9 | import java.util.LinkedList; 10 | 11 | 12 | class Timecap_queue 13 | { 14 | private LinkedList task_list; 15 | private int timecap_in_seconds; 16 | public Timecap_queue(LinkedList task_list, int nof_threads, int timecap_in_seconds) throws Exception 17 | { 18 | if(nof_threads > 1) 19 | { 20 | throw new Exception("nof_threads > 1 is not implemented"); 21 | } 22 | this.task_list = task_list; 23 | this.timecap_in_seconds = timecap_in_seconds; 24 | } 25 | 26 | public void start() throws Exception 27 | { 28 | ExecutorService executor = Executors.newSingleThreadExecutor(); 29 | 30 | while(!task_list.isEmpty()) 31 | { 32 | Retrace_arg_till_source it = task_list.pollFirst(); 33 | 34 | Future future = executor.submit(it); 35 | 36 | try 37 | { 38 | future.get(timecap_in_seconds, TimeUnit.SECONDS); 39 | } 40 | catch(TimeoutException e) 41 | { 42 | future.cancel(true); 43 | System.out.println("[-] Premature termination of " + it.task_id); 44 | } 45 | } 46 | 47 | executor.shutdownNow(); 48 | } 49 | } 50 | 51 | 52 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/Topological_sort.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | import java.util.HashSet; 3 | import java.util.HashMap; 4 | import java.util.LinkedList; 5 | import java.util.List; 6 | import java.util.Set; 7 | 8 | 9 | public class Topological_sort> 10 | { 11 | private HashMap> data; 12 | public Topological_sort(HashMap> data) 13 | { 14 | this.data = data; 15 | } 16 | 17 | public List next_elements() throws Circle_exception 18 | { 19 | List leafs = new LinkedList<>(); 20 | List to_remove = new LinkedList<>(); 21 | for(T node : data.keySet()) 22 | { 23 | if(data.get(node).isEmpty()) 24 | { 25 | leafs.add(node); 26 | to_remove.add(node); 27 | } 28 | } 29 | 30 | for(T node : to_remove) 31 | { 32 | data.remove(node); 33 | } 34 | 35 | if(leafs.isEmpty()) 36 | { 37 | T min_node = null; 38 | int min_edges = Integer.MAX_VALUE; 39 | for(T node : data.keySet()) 40 | { 41 | int its_size = data.get(node).size(); 42 | if(its_size < min_edges) 43 | { 44 | min_edges = its_size; 45 | min_node = node; 46 | } 47 | } 48 | throw new Circle_exception(min_node); 49 | } 50 | 51 | for(T leaf : leafs) 52 | { 53 | for(T remaining : data.keySet()) 54 | { 55 | data.get(remaining).remove(leaf); 56 | } 57 | } 58 | 59 | return leafs; 60 | } 61 | } 62 | 63 | -------------------------------------------------------------------------------- /joern/src/tools/data_flow/User_controlled_sources.java: -------------------------------------------------------------------------------- 1 | package tools.data_flow; 2 | 3 | import java.util.HashMap; 4 | import java.util.List; 5 | import java.util.ArrayList; 6 | 7 | class User_controlled_sources 8 | { 9 | public static HashMap> user_controlled_funcs; 10 | public static HashMap> user_controlled_in_for; 11 | private static List list_helper(int ... args) 12 | { 13 | List ret = new ArrayList<>(); 14 | for(int arg : args) 15 | { 16 | ret.add(new Long(arg)); 17 | } 18 | return ret; 19 | } 20 | 21 | static 22 | { 23 | user_controlled_funcs = new HashMap<>(); 24 | // Key: Function name 25 | // Value: User controlled argument (negative, if output-parameter) 26 | user_controlled_funcs.put("getenv", list_helper(1)); 27 | user_controlled_funcs.put("gets", list_helper(0)); 28 | user_controlled_funcs.put("fgets", list_helper(0)); 29 | user_controlled_funcs.put("fread", list_helper(0)); 30 | user_controlled_funcs.put("read", list_helper(1)); 31 | user_controlled_funcs.put("recv", list_helper(1)); 32 | user_controlled_funcs.put("recvfrom", list_helper(1)); 33 | //user_controlled_funcs.put("recvmsg", list_helper(1)); 34 | user_controlled_funcs.put("scanf", list_helper(1, 2, 3, 4, 5, 6, 7, 8, 9)); 35 | user_controlled_funcs.put("fscanf", list_helper(2, 3, 4, 5, 6, 7, 8, 9)); 36 | user_controlled_funcs.put("getc", list_helper(1)); 37 | user_controlled_funcs.put("fgetc", list_helper(1)); 38 | 39 | 40 | user_controlled_in_for = new HashMap<>(); 41 | // Key: Function name 42 | // Value: User controlled argument (negative, if output-parameter) 43 | user_controlled_in_for.put("getc", list_helper(1)); 44 | user_controlled_in_for.put("fgetc", list_helper(1)); 45 | } 46 | 47 | public static HashMap> get_user_controlled() 48 | { 49 | return user_controlled_funcs; 50 | } 51 | 52 | public static List get_user_controlled_args(String func_name) 53 | { 54 | return user_controlled_funcs.get(func_name); 55 | } 56 | 57 | public static Boolean func_is_user_controlled(String func_name) 58 | { 59 | return user_controlled_funcs.containsKey(func_name); 60 | } 61 | 62 | public static Boolean arg_is_user_controlled(String func_name, Long ith_arg) 63 | { 64 | if(!user_controlled_funcs.containsKey(func_name)) 65 | { 66 | return false; 67 | } 68 | return user_controlled_funcs.get(func_name).contains(ith_arg); 69 | } 70 | 71 | 72 | //def get_function_name_from_ExpressionStatement(joern_db, node_id): 73 | // code = joern.runGremlinQuery("g.v(%s).out(IS_AST_PARENT).filter{it.type == 'CallExpression'}.out(IS_AST_PARENT).filter(it.type == 'Callee').gather{it.code}" % (node_id)) 74 | // if(len(code) != 1): 75 | // raise Exception("Expected one Callee") 76 | // return code[0] 77 | // 78 | //def ExpressionStatement_is_user_controlled(joern_db, node_id): 79 | // func_name = get_function_name_from_ExpressionStatement(joern_db, node_id) 80 | // return func_is_user_controlled(func_name) 81 | // 82 | } 83 | 84 | -------------------------------------------------------------------------------- /preprocess/gcc_cmd_transformer.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from pipes import quote 4 | 5 | 6 | def main(argv): 7 | argv = argv[1:] # Remove this script's name 8 | 9 | # Extract c-files in the commandline 10 | files = filter(lambda a: a.endswith(".c"), argv) 11 | 12 | # remove all c-files 13 | argv = filter(lambda a: not a in files, argv) 14 | 15 | # remove -o and next 16 | i = 1 17 | while(i < len(argv)): 18 | if(argv[i-1] == "-o"): 19 | argv = argv[0:i-1] + argv[i+1:] 20 | else: 21 | i += 1 22 | 23 | # remove -c's 24 | argv = filter(lambda a: not a.startswith("-c"), argv) 25 | 26 | # remove -l's 27 | argv = filter(lambda a: not a.startswith("-l"), argv) 28 | 29 | # remove -L's 30 | argv = filter(lambda a: not a.startswith("-L"), argv) 31 | 32 | 33 | #-MT -MD -MP -MF 34 | argv = filter(lambda a: not a.startswith("-MT"), argv) 35 | argv = filter(lambda a: not a.startswith("-MD"), argv) 36 | argv = filter(lambda a: not a.startswith("-MP"), argv) 37 | argv = filter(lambda a: not a.startswith("-MF"), argv) 38 | 39 | # mod_flv_streaming.lo .deps/lemon.Tpo 40 | argv = filter(lambda a: not a.endswith(".lo"), argv) 41 | argv = filter(lambda a: not a.endswith(".Tpo"), argv) 42 | 43 | # remove object-files? possible? 44 | argv = filter(lambda a: not a.endswith(".o"), argv) 45 | 46 | 47 | 48 | 49 | other_dir = "/home/user/Desktop/preprocess/preprocessed/" 50 | 51 | # build new gcc line 52 | for file_name in files: 53 | # -E # no include 54 | # -P # no linemarkers 55 | cmd = ["gcc", "-c", "-E", "-P"] 56 | cmd += argv 57 | target_file = other_dir + file_name 58 | cmd += [file_name, "-o", target_file] 59 | 60 | cmd = map(lambda c: quote(c), cmd) 61 | 62 | target_dir = os.path.dirname(os.path.realpath(target_file)) 63 | cmd2 = "mkdir -p " + quote(target_dir) 64 | print cmd2 65 | 66 | print " ".join(cmd) 67 | 68 | if(__name__ == "__main__"): 69 | main(sys.argv) 70 | -------------------------------------------------------------------------------- /preprocess/preprocess_it.sh: -------------------------------------------------------------------------------- 1 | cp gcc_cmd_transformer.py ./original/ 2 | cd original 3 | #make --dry-run > dry_run.txt 4 | perl -p -e 's/\\\n//' dry_run.txt > concat_lines.txt 5 | grep "^gcc " concat_lines.txt > proc_dry_run.sh 6 | sed -i 's/^gcc/python gcc_cmd_transformer.py/' proc_dry_run.sh 7 | bash proc_dry_run.sh > proc_run.sh 8 | bash proc_run.sh 9 | cd .. 10 | 11 | --------------------------------------------------------------------------------