├── .gitignore ├── LICENSE ├── README.md ├── docs ├── Makefile ├── make.bat └── source │ ├── chunking.rst │ ├── conf.py │ ├── gettingstarted.rst │ ├── index.rst │ ├── installation.rst │ ├── joerndomain │ ├── __init__.py │ └── domain.py │ ├── joernnodes.rst │ └── joernsteps.rst ├── joern ├── __init__.py ├── all.py └── joernsteps │ ├── __init__.py │ ├── _constants.groovy │ ├── cfg.groovy │ ├── composition.groovy │ ├── function.groovy │ ├── info.groovy │ ├── learning │ ├── __init__.py │ └── neighborhoodHash.groovy │ ├── lookup.groovy │ ├── misc.groovy │ ├── nodeHiding.groovy │ ├── symbolGraph.groovy │ ├── syntax │ ├── __init__.py │ ├── assignment.groovy │ ├── ast.groovy │ ├── call.groovy │ ├── match.groovy │ └── param.groovy │ ├── taintTracking │ ├── __init__.py │ ├── dataflow.groovy │ ├── initGraphs │ │ ├── __init__.py │ │ ├── ast.groovy │ │ ├── conditions.groovy │ │ ├── creation.groovy │ │ ├── decompression.groovy │ │ ├── defStmtsPerArg.groovy │ │ └── localDefTree.groovy │ ├── interproc.groovy │ ├── queryTemplate.groovy │ └── taintedArg.groovy │ └── typeInference │ └── local.groovy ├── setup.py └── testing ├── test.py └── tests ├── PythonJoernTests.py ├── __init__.py ├── cfgTests.py ├── compositionTests.py ├── dataFlowTests.py ├── initGraphTests.py ├── interproc.py ├── lookupTests.py ├── parsingTests.py └── udgTests.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | # C extensions 4 | *.so 5 | 6 | # Packages 7 | *.egg 8 | *.egg-info 9 | dist 10 | build 11 | eggs 12 | parts 13 | bin 14 | var 15 | sdist 16 | develop-eggs 17 | .installed.cfg 18 | lib 19 | lib64 20 | __pycache__ 21 | 22 | # Installer logs 23 | pip-log.txt 24 | 25 | # Unit test / coverage reports 26 | .coverage 27 | .tox 28 | nosetests.xml 29 | 30 | # Translations 31 | *.mo 32 | 33 | # Mr Developer 34 | .mr.developer.cfg 35 | .project 36 | .pydevproject 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | Copyright (C) 2007 Free Software Foundation, Inc. 5 | Everyone is permitted to copy and distribute verbatim copies 6 | of this license document, but changing it is not allowed. 7 | 8 | Preamble 9 | 10 | The GNU General Public License is a free, copyleft license for 11 | software and other kinds of works. 12 | 13 | The licenses for most software and other practical works are designed 14 | to take away your freedom to share and change the works. By contrast, 15 | the GNU General Public License is intended to guarantee your freedom to 16 | share and change all versions of a program--to make sure it remains free 17 | software for all its users. We, the Free Software Foundation, use the 18 | GNU General Public License for most of our software; it applies also to 19 | any other work released this way by its authors. You can apply it to 20 | your programs, too. 21 | 22 | When we speak of free software, we are referring to freedom, not 23 | price. Our General Public Licenses are designed to make sure that you 24 | have the freedom to distribute copies of free software (and charge for 25 | them if you wish), that you receive source code or can get it if you 26 | want it, that you can change the software or use pieces of it in new 27 | free programs, and that you know you can do these things. 28 | 29 | To protect your rights, we need to prevent others from denying you 30 | these rights or asking you to surrender the rights. Therefore, you have 31 | certain responsibilities if you distribute copies of the software, or if 32 | you modify it: responsibilities to respect the freedom of others. 33 | 34 | For example, if you distribute copies of such a program, whether 35 | gratis or for a fee, you must pass on to the recipients the same 36 | freedoms that you received. You must make sure that they, too, receive 37 | or can get the source code. And you must show them these terms so they 38 | know their rights. 39 | 40 | Developers that use the GNU GPL protect your rights with two steps: 41 | (1) assert copyright on the software, and (2) offer you this License 42 | giving you legal permission to copy, distribute and/or modify it. 43 | 44 | For the developers' and authors' protection, the GPL clearly explains 45 | that there is no warranty for this free software. For both users' and 46 | authors' sake, the GPL requires that modified versions be marked as 47 | changed, so that their problems will not be attributed erroneously to 48 | authors of previous versions. 49 | 50 | Some devices are designed to deny users access to install or run 51 | modified versions of the software inside them, although the manufacturer 52 | can do so. This is fundamentally incompatible with the aim of 53 | protecting users' freedom to change the software. The systematic 54 | pattern of such abuse occurs in the area of products for individuals to 55 | use, which is precisely where it is most unacceptable. Therefore, we 56 | have designed this version of the GPL to prohibit the practice for those 57 | products. If such problems arise substantially in other domains, we 58 | stand ready to extend this provision to those domains in future versions 59 | of the GPL, as needed to protect the freedom of users. 60 | 61 | Finally, every program is threatened constantly by software patents. 62 | States should not allow patents to restrict development and use of 63 | software on general-purpose computers, but in those that do, we wish to 64 | avoid the special danger that patents applied to a free program could 65 | make it effectively proprietary. To prevent this, the GPL assures that 66 | patents cannot be used to render the program non-free. 67 | 68 | The precise terms and conditions for copying, distribution and 69 | modification follow. 70 | 71 | TERMS AND CONDITIONS 72 | 73 | 0. Definitions. 74 | 75 | "This License" refers to version 3 of the GNU General Public License. 76 | 77 | "Copyright" also means copyright-like laws that apply to other kinds of 78 | works, such as semiconductor masks. 79 | 80 | "The Program" refers to any copyrightable work licensed under this 81 | License. Each licensee is addressed as "you". "Licensees" and 82 | "recipients" may be individuals or organizations. 83 | 84 | To "modify" a work means to copy from or adapt all or part of the work 85 | in a fashion requiring copyright permission, other than the making of an 86 | exact copy. The resulting work is called a "modified version" of the 87 | earlier work or a work "based on" the earlier work. 88 | 89 | A "covered work" means either the unmodified Program or a work based 90 | on the Program. 91 | 92 | To "propagate" a work means to do anything with it that, without 93 | permission, would make you directly or secondarily liable for 94 | infringement under applicable copyright law, except executing it on a 95 | computer or modifying a private copy. Propagation includes copying, 96 | distribution (with or without modification), making available to the 97 | public, and in some countries other activities as well. 98 | 99 | To "convey" a work means any kind of propagation that enables other 100 | parties to make or receive copies. Mere interaction with a user through 101 | a computer network, with no transfer of a copy, is not conveying. 102 | 103 | An interactive user interface displays "Appropriate Legal Notices" 104 | to the extent that it includes a convenient and prominently visible 105 | feature that (1) displays an appropriate copyright notice, and (2) 106 | tells the user that there is no warranty for the work (except to the 107 | extent that warranties are provided), that licensees may convey the 108 | work under this License, and how to view a copy of this License. If 109 | the interface presents a list of user commands or options, such as a 110 | menu, a prominent item in the list meets this criterion. 111 | 112 | 1. Source Code. 113 | 114 | The "source code" for a work means the preferred form of the work 115 | for making modifications to it. "Object code" means any non-source 116 | form of a work. 117 | 118 | A "Standard Interface" means an interface that either is an official 119 | standard defined by a recognized standards body, or, in the case of 120 | interfaces specified for a particular programming language, one that 121 | is widely used among developers working in that language. 122 | 123 | The "System Libraries" of an executable work include anything, other 124 | than the work as a whole, that (a) is included in the normal form of 125 | packaging a Major Component, but which is not part of that Major 126 | Component, and (b) serves only to enable use of the work with that 127 | Major Component, or to implement a Standard Interface for which an 128 | implementation is available to the public in source code form. A 129 | "Major Component", in this context, means a major essential component 130 | (kernel, window system, and so on) of the specific operating system 131 | (if any) on which the executable work runs, or a compiler used to 132 | produce the work, or an object code interpreter used to run it. 133 | 134 | The "Corresponding Source" for a work in object code form means all 135 | the source code needed to generate, install, and (for an executable 136 | work) run the object code and to modify the work, including scripts to 137 | control those activities. However, it does not include the work's 138 | System Libraries, or general-purpose tools or generally available free 139 | programs which are used unmodified in performing those activities but 140 | which are not part of the work. For example, Corresponding Source 141 | includes interface definition files associated with source files for 142 | the work, and the source code for shared libraries and dynamically 143 | linked subprograms that the work is specifically designed to require, 144 | such as by intimate data communication or control flow between those 145 | subprograms and other parts of the work. 146 | 147 | The Corresponding Source need not include anything that users 148 | can regenerate automatically from other parts of the Corresponding 149 | Source. 150 | 151 | The Corresponding Source for a work in source code form is that 152 | same work. 153 | 154 | 2. Basic Permissions. 155 | 156 | All rights granted under this License are granted for the term of 157 | copyright on the Program, and are irrevocable provided the stated 158 | conditions are met. This License explicitly affirms your unlimited 159 | permission to run the unmodified Program. The output from running a 160 | covered work is covered by this License only if the output, given its 161 | content, constitutes a covered work. This License acknowledges your 162 | rights of fair use or other equivalent, as provided by copyright law. 163 | 164 | You may make, run and propagate covered works that you do not 165 | convey, without conditions so long as your license otherwise remains 166 | in force. You may convey covered works to others for the sole purpose 167 | of having them make modifications exclusively for you, or provide you 168 | with facilities for running those works, provided that you comply with 169 | the terms of this License in conveying all material for which you do 170 | not control copyright. Those thus making or running the covered works 171 | for you must do so exclusively on your behalf, under your direction 172 | and control, on terms that prohibit them from making any copies of 173 | your copyrighted material outside their relationship with you. 174 | 175 | Conveying under any other circumstances is permitted solely under 176 | the conditions stated below. Sublicensing is not allowed; section 10 177 | makes it unnecessary. 178 | 179 | 3. Protecting Users' Legal Rights From Anti-Circumvention Law. 180 | 181 | No covered work shall be deemed part of an effective technological 182 | measure under any applicable law fulfilling obligations under article 183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or 184 | similar laws prohibiting or restricting circumvention of such 185 | measures. 186 | 187 | When you convey a covered work, you waive any legal power to forbid 188 | circumvention of technological measures to the extent such circumvention 189 | is effected by exercising rights under this License with respect to 190 | the covered work, and you disclaim any intention to limit operation or 191 | modification of the work as a means of enforcing, against the work's 192 | users, your or third parties' legal rights to forbid circumvention of 193 | technological measures. 194 | 195 | 4. Conveying Verbatim Copies. 196 | 197 | You may convey verbatim copies of the Program's source code as you 198 | receive it, in any medium, provided that you conspicuously and 199 | appropriately publish on each copy an appropriate copyright notice; 200 | keep intact all notices stating that this License and any 201 | non-permissive terms added in accord with section 7 apply to the code; 202 | keep intact all notices of the absence of any warranty; and give all 203 | recipients a copy of this License along with the Program. 204 | 205 | You may charge any price or no price for each copy that you convey, 206 | and you may offer support or warranty protection for a fee. 207 | 208 | 5. Conveying Modified Source Versions. 209 | 210 | You may convey a work based on the Program, or the modifications to 211 | produce it from the Program, in the form of source code under the 212 | terms of section 4, provided that you also meet all of these conditions: 213 | 214 | a) The work must carry prominent notices stating that you modified 215 | it, and giving a relevant date. 216 | 217 | b) The work must carry prominent notices stating that it is 218 | released under this License and any conditions added under section 219 | 7. This requirement modifies the requirement in section 4 to 220 | "keep intact all notices". 221 | 222 | c) You must license the entire work, as a whole, under this 223 | License to anyone who comes into possession of a copy. This 224 | License will therefore apply, along with any applicable section 7 225 | additional terms, to the whole of the work, and all its parts, 226 | regardless of how they are packaged. This License gives no 227 | permission to license the work in any other way, but it does not 228 | invalidate such permission if you have separately received it. 229 | 230 | d) If the work has interactive user interfaces, each must display 231 | Appropriate Legal Notices; however, if the Program has interactive 232 | interfaces that do not display Appropriate Legal Notices, your 233 | work need not make them do so. 234 | 235 | A compilation of a covered work with other separate and independent 236 | works, which are not by their nature extensions of the covered work, 237 | and which are not combined with it such as to form a larger program, 238 | in or on a volume of a storage or distribution medium, is called an 239 | "aggregate" if the compilation and its resulting copyright are not 240 | used to limit the access or legal rights of the compilation's users 241 | beyond what the individual works permit. Inclusion of a covered work 242 | in an aggregate does not cause this License to apply to the other 243 | parts of the aggregate. 244 | 245 | 6. Conveying Non-Source Forms. 246 | 247 | You may convey a covered work in object code form under the terms 248 | of sections 4 and 5, provided that you also convey the 249 | machine-readable Corresponding Source under the terms of this License, 250 | in one of these ways: 251 | 252 | a) Convey the object code in, or embodied in, a physical product 253 | (including a physical distribution medium), accompanied by the 254 | Corresponding Source fixed on a durable physical medium 255 | customarily used for software interchange. 256 | 257 | b) Convey the object code in, or embodied in, a physical product 258 | (including a physical distribution medium), accompanied by a 259 | written offer, valid for at least three years and valid for as 260 | long as you offer spare parts or customer support for that product 261 | model, to give anyone who possesses the object code either (1) a 262 | copy of the Corresponding Source for all the software in the 263 | product that is covered by this License, on a durable physical 264 | medium customarily used for software interchange, for a price no 265 | more than your reasonable cost of physically performing this 266 | conveying of source, or (2) access to copy the 267 | Corresponding Source from a network server at no charge. 268 | 269 | c) Convey individual copies of the object code with a copy of the 270 | written offer to provide the Corresponding Source. This 271 | alternative is allowed only occasionally and noncommercially, and 272 | only if you received the object code with such an offer, in accord 273 | with subsection 6b. 274 | 275 | d) Convey the object code by offering access from a designated 276 | place (gratis or for a charge), and offer equivalent access to the 277 | Corresponding Source in the same way through the same place at no 278 | further charge. You need not require recipients to copy the 279 | Corresponding Source along with the object code. If the place to 280 | copy the object code is a network server, the Corresponding Source 281 | may be on a different server (operated by you or a third party) 282 | that supports equivalent copying facilities, provided you maintain 283 | clear directions next to the object code saying where to find the 284 | Corresponding Source. Regardless of what server hosts the 285 | Corresponding Source, you remain obligated to ensure that it is 286 | available for as long as needed to satisfy these requirements. 287 | 288 | e) Convey the object code using peer-to-peer transmission, provided 289 | you inform other peers where the object code and Corresponding 290 | Source of the work are being offered to the general public at no 291 | charge under subsection 6d. 292 | 293 | A separable portion of the object code, whose source code is excluded 294 | from the Corresponding Source as a System Library, need not be 295 | included in conveying the object code work. 296 | 297 | A "User Product" is either (1) a "consumer product", which means any 298 | tangible personal property which is normally used for personal, family, 299 | or household purposes, or (2) anything designed or sold for incorporation 300 | into a dwelling. In determining whether a product is a consumer product, 301 | doubtful cases shall be resolved in favor of coverage. For a particular 302 | product received by a particular user, "normally used" refers to a 303 | typical or common use of that class of product, regardless of the status 304 | of the particular user or of the way in which the particular user 305 | actually uses, or expects or is expected to use, the product. A product 306 | is a consumer product regardless of whether the product has substantial 307 | commercial, industrial or non-consumer uses, unless such uses represent 308 | the only significant mode of use of the product. 309 | 310 | "Installation Information" for a User Product means any methods, 311 | procedures, authorization keys, or other information required to install 312 | and execute modified versions of a covered work in that User Product from 313 | a modified version of its Corresponding Source. The information must 314 | suffice to ensure that the continued functioning of the modified object 315 | code is in no case prevented or interfered with solely because 316 | modification has been made. 317 | 318 | If you convey an object code work under this section in, or with, or 319 | specifically for use in, a User Product, and the conveying occurs as 320 | part of a transaction in which the right of possession and use of the 321 | User Product is transferred to the recipient in perpetuity or for a 322 | fixed term (regardless of how the transaction is characterized), the 323 | Corresponding Source conveyed under this section must be accompanied 324 | by the Installation Information. But this requirement does not apply 325 | if neither you nor any third party retains the ability to install 326 | modified object code on the User Product (for example, the work has 327 | been installed in ROM). 328 | 329 | The requirement to provide Installation Information does not include a 330 | requirement to continue to provide support service, warranty, or updates 331 | for a work that has been modified or installed by the recipient, or for 332 | the User Product in which it has been modified or installed. Access to a 333 | network may be denied when the modification itself materially and 334 | adversely affects the operation of the network or violates the rules and 335 | protocols for communication across the network. 336 | 337 | Corresponding Source conveyed, and Installation Information provided, 338 | in accord with this section must be in a format that is publicly 339 | documented (and with an implementation available to the public in 340 | source code form), and must require no special password or key for 341 | unpacking, reading or copying. 342 | 343 | 7. Additional Terms. 344 | 345 | "Additional permissions" are terms that supplement the terms of this 346 | License by making exceptions from one or more of its conditions. 347 | Additional permissions that are applicable to the entire Program shall 348 | be treated as though they were included in this License, to the extent 349 | that they are valid under applicable law. If additional permissions 350 | apply only to part of the Program, that part may be used separately 351 | under those permissions, but the entire Program remains governed by 352 | this License without regard to the additional permissions. 353 | 354 | When you convey a copy of a covered work, you may at your option 355 | remove any additional permissions from that copy, or from any part of 356 | it. (Additional permissions may be written to require their own 357 | removal in certain cases when you modify the work.) You may place 358 | additional permissions on material, added by you to a covered work, 359 | for which you have or can give appropriate copyright permission. 360 | 361 | Notwithstanding any other provision of this License, for material you 362 | add to a covered work, you may (if authorized by the copyright holders of 363 | that material) supplement the terms of this License with terms: 364 | 365 | a) Disclaiming warranty or limiting liability differently from the 366 | terms of sections 15 and 16 of this License; or 367 | 368 | b) Requiring preservation of specified reasonable legal notices or 369 | author attributions in that material or in the Appropriate Legal 370 | Notices displayed by works containing it; or 371 | 372 | c) Prohibiting misrepresentation of the origin of that material, or 373 | requiring that modified versions of such material be marked in 374 | reasonable ways as different from the original version; or 375 | 376 | d) Limiting the use for publicity purposes of names of licensors or 377 | authors of the material; or 378 | 379 | e) Declining to grant rights under trademark law for use of some 380 | trade names, trademarks, or service marks; or 381 | 382 | f) Requiring indemnification of licensors and authors of that 383 | material by anyone who conveys the material (or modified versions of 384 | it) with contractual assumptions of liability to the recipient, for 385 | any liability that these contractual assumptions directly impose on 386 | those licensors and authors. 387 | 388 | All other non-permissive additional terms are considered "further 389 | restrictions" within the meaning of section 10. If the Program as you 390 | received it, or any part of it, contains a notice stating that it is 391 | governed by this License along with a term that is a further 392 | restriction, you may remove that term. If a license document contains 393 | a further restriction but permits relicensing or conveying under this 394 | License, you may add to a covered work material governed by the terms 395 | of that license document, provided that the further restriction does 396 | not survive such relicensing or conveying. 397 | 398 | If you add terms to a covered work in accord with this section, you 399 | must place, in the relevant source files, a statement of the 400 | additional terms that apply to those files, or a notice indicating 401 | where to find the applicable terms. 402 | 403 | Additional terms, permissive or non-permissive, may be stated in the 404 | form of a separately written license, or stated as exceptions; 405 | the above requirements apply either way. 406 | 407 | 8. Termination. 408 | 409 | You may not propagate or modify a covered work except as expressly 410 | provided under this License. Any attempt otherwise to propagate or 411 | modify it is void, and will automatically terminate your rights under 412 | this License (including any patent licenses granted under the third 413 | paragraph of section 11). 414 | 415 | However, if you cease all violation of this License, then your 416 | license from a particular copyright holder is reinstated (a) 417 | provisionally, unless and until the copyright holder explicitly and 418 | finally terminates your license, and (b) permanently, if the copyright 419 | holder fails to notify you of the violation by some reasonable means 420 | prior to 60 days after the cessation. 421 | 422 | Moreover, your license from a particular copyright holder is 423 | reinstated permanently if the copyright holder notifies you of the 424 | violation by some reasonable means, this is the first time you have 425 | received notice of violation of this License (for any work) from that 426 | copyright holder, and you cure the violation prior to 30 days after 427 | your receipt of the notice. 428 | 429 | Termination of your rights under this section does not terminate the 430 | licenses of parties who have received copies or rights from you under 431 | this License. If your rights have been terminated and not permanently 432 | reinstated, you do not qualify to receive new licenses for the same 433 | material under section 10. 434 | 435 | 9. Acceptance Not Required for Having Copies. 436 | 437 | You are not required to accept this License in order to receive or 438 | run a copy of the Program. Ancillary propagation of a covered work 439 | occurring solely as a consequence of using peer-to-peer transmission 440 | to receive a copy likewise does not require acceptance. However, 441 | nothing other than this License grants you permission to propagate or 442 | modify any covered work. These actions infringe copyright if you do 443 | not accept this License. Therefore, by modifying or propagating a 444 | covered work, you indicate your acceptance of this License to do so. 445 | 446 | 10. Automatic Licensing of Downstream Recipients. 447 | 448 | Each time you convey a covered work, the recipient automatically 449 | receives a license from the original licensors, to run, modify and 450 | propagate that work, subject to this License. You are not responsible 451 | for enforcing compliance by third parties with this License. 452 | 453 | An "entity transaction" is a transaction transferring control of an 454 | organization, or substantially all assets of one, or subdividing an 455 | organization, or merging organizations. If propagation of a covered 456 | work results from an entity transaction, each party to that 457 | transaction who receives a copy of the work also receives whatever 458 | licenses to the work the party's predecessor in interest had or could 459 | give under the previous paragraph, plus a right to possession of the 460 | Corresponding Source of the work from the predecessor in interest, if 461 | the predecessor has it or can get it with reasonable efforts. 462 | 463 | You may not impose any further restrictions on the exercise of the 464 | rights granted or affirmed under this License. For example, you may 465 | not impose a license fee, royalty, or other charge for exercise of 466 | rights granted under this License, and you may not initiate litigation 467 | (including a cross-claim or counterclaim in a lawsuit) alleging that 468 | any patent claim is infringed by making, using, selling, offering for 469 | sale, or importing the Program or any portion of it. 470 | 471 | 11. Patents. 472 | 473 | A "contributor" is a copyright holder who authorizes use under this 474 | License of the Program or a work on which the Program is based. The 475 | work thus licensed is called the contributor's "contributor version". 476 | 477 | A contributor's "essential patent claims" are all patent claims 478 | owned or controlled by the contributor, whether already acquired or 479 | hereafter acquired, that would be infringed by some manner, permitted 480 | by this License, of making, using, or selling its contributor version, 481 | but do not include claims that would be infringed only as a 482 | consequence of further modification of the contributor version. For 483 | purposes of this definition, "control" includes the right to grant 484 | patent sublicenses in a manner consistent with the requirements of 485 | this License. 486 | 487 | Each contributor grants you a non-exclusive, worldwide, royalty-free 488 | patent license under the contributor's essential patent claims, to 489 | make, use, sell, offer for sale, import and otherwise run, modify and 490 | propagate the contents of its contributor version. 491 | 492 | In the following three paragraphs, a "patent license" is any express 493 | agreement or commitment, however denominated, not to enforce a patent 494 | (such as an express permission to practice a patent or covenant not to 495 | sue for patent infringement). To "grant" such a patent license to a 496 | party means to make such an agreement or commitment not to enforce a 497 | patent against the party. 498 | 499 | If you convey a covered work, knowingly relying on a patent license, 500 | and the Corresponding Source of the work is not available for anyone 501 | to copy, free of charge and under the terms of this License, through a 502 | publicly available network server or other readily accessible means, 503 | then you must either (1) cause the Corresponding Source to be so 504 | available, or (2) arrange to deprive yourself of the benefit of the 505 | patent license for this particular work, or (3) arrange, in a manner 506 | consistent with the requirements of this License, to extend the patent 507 | license to downstream recipients. "Knowingly relying" means you have 508 | actual knowledge that, but for the patent license, your conveying the 509 | covered work in a country, or your recipient's use of the covered work 510 | in a country, would infringe one or more identifiable patents in that 511 | country that you have reason to believe are valid. 512 | 513 | If, pursuant to or in connection with a single transaction or 514 | arrangement, you convey, or propagate by procuring conveyance of, a 515 | covered work, and grant a patent license to some of the parties 516 | receiving the covered work authorizing them to use, propagate, modify 517 | or convey a specific copy of the covered work, then the patent license 518 | you grant is automatically extended to all recipients of the covered 519 | work and works based on it. 520 | 521 | A patent license is "discriminatory" if it does not include within 522 | the scope of its coverage, prohibits the exercise of, or is 523 | conditioned on the non-exercise of one or more of the rights that are 524 | specifically granted under this License. You may not convey a covered 525 | work if you are a party to an arrangement with a third party that is 526 | in the business of distributing software, under which you make payment 527 | to the third party based on the extent of your activity of conveying 528 | the work, and under which the third party grants, to any of the 529 | parties who would receive the covered work from you, a discriminatory 530 | patent license (a) in connection with copies of the covered work 531 | conveyed by you (or copies made from those copies), or (b) primarily 532 | for and in connection with specific products or compilations that 533 | contain the covered work, unless you entered into that arrangement, 534 | or that patent license was granted, prior to 28 March 2007. 535 | 536 | Nothing in this License shall be construed as excluding or limiting 537 | any implied license or other defenses to infringement that may 538 | otherwise be available to you under applicable patent law. 539 | 540 | 12. No Surrender of Others' Freedom. 541 | 542 | If conditions are imposed on you (whether by court order, agreement or 543 | otherwise) that contradict the conditions of this License, they do not 544 | excuse you from the conditions of this License. If you cannot convey a 545 | covered work so as to satisfy simultaneously your obligations under this 546 | License and any other pertinent obligations, then as a consequence you may 547 | not convey it at all. For example, if you agree to terms that obligate you 548 | to collect a royalty for further conveying from those to whom you convey 549 | the Program, the only way you could satisfy both those terms and this 550 | License would be to refrain entirely from conveying the Program. 551 | 552 | 13. Use with the GNU Affero General Public License. 553 | 554 | Notwithstanding any other provision of this License, you have 555 | permission to link or combine any covered work with a work licensed 556 | under version 3 of the GNU Affero General Public License into a single 557 | combined work, and to convey the resulting work. The terms of this 558 | License will continue to apply to the part which is the covered work, 559 | but the special requirements of the GNU Affero General Public License, 560 | section 13, concerning interaction through a network will apply to the 561 | combination as such. 562 | 563 | 14. Revised Versions of this License. 564 | 565 | The Free Software Foundation may publish revised and/or new versions of 566 | the GNU General Public License from time to time. Such new versions will 567 | be similar in spirit to the present version, but may differ in detail to 568 | address new problems or concerns. 569 | 570 | Each version is given a distinguishing version number. If the 571 | Program specifies that a certain numbered version of the GNU General 572 | Public License "or any later version" applies to it, you have the 573 | option of following the terms and conditions either of that numbered 574 | version or of any later version published by the Free Software 575 | Foundation. If the Program does not specify a version number of the 576 | GNU General Public License, you may choose any version ever published 577 | by the Free Software Foundation. 578 | 579 | If the Program specifies that a proxy can decide which future 580 | versions of the GNU General Public License can be used, that proxy's 581 | public statement of acceptance of a version permanently authorizes you 582 | to choose that version for the Program. 583 | 584 | Later license versions may give you additional or different 585 | permissions. However, no additional obligations are imposed on any 586 | author or copyright holder as a result of your choosing to follow a 587 | later version. 588 | 589 | 15. Disclaimer of Warranty. 590 | 591 | THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY 592 | APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT 593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY 594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, 595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 596 | PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM 597 | IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF 598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 599 | 600 | 16. Limitation of Liability. 601 | 602 | IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS 604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY 605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE 606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF 607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD 608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), 609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF 610 | SUCH DAMAGES. 611 | 612 | 17. Interpretation of Sections 15 and 16. 613 | 614 | If the disclaimer of warranty and limitation of liability provided 615 | above cannot be given local legal effect according to their terms, 616 | reviewing courts shall apply local law that most closely approximates 617 | an absolute waiver of all civil liability in connection with the 618 | Program, unless a warranty or assumption of liability accompanies a 619 | copy of the Program in return for a fee. 620 | 621 | END OF TERMS AND CONDITIONS 622 | 623 | How to Apply These Terms to Your New Programs 624 | 625 | If you develop a new program, and you want it to be of the greatest 626 | possible use to the public, the best way to achieve this is to make it 627 | free software which everyone can redistribute and change under these terms. 628 | 629 | To do so, attach the following notices to the program. It is safest 630 | to attach them to the start of each source file to most effectively 631 | state the exclusion of warranty; and each file should have at least 632 | the "copyright" line and a pointer to where the full notice is found. 633 | 634 | {one line to give the program's name and a brief idea of what it does.} 635 | Copyright (C) {year} {name of author} 636 | 637 | This program is free software: you can redistribute it and/or modify 638 | it under the terms of the GNU General Public License as published by 639 | the Free Software Foundation, either version 3 of the License, or 640 | (at your option) any later version. 641 | 642 | This program is distributed in the hope that it will be useful, 643 | but WITHOUT ANY WARRANTY; without even the implied warranty of 644 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 645 | GNU General Public License for more details. 646 | 647 | You should have received a copy of the GNU General Public License 648 | along with this program. If not, see . 649 | 650 | Also add information on how to contact you by electronic and paper mail. 651 | 652 | If the program does terminal interaction, make it output a short 653 | notice like this when it starts in an interactive mode: 654 | 655 | {project} Copyright (C) {year} {fullname} 656 | This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 657 | This is free software, and you are welcome to redistribute it 658 | under certain conditions; type `show c' for details. 659 | 660 | The hypothetical commands `show w' and `show c' should show the appropriate 661 | parts of the General Public License. Of course, your program's commands 662 | might be different; for a GUI interface, you would use an "about box". 663 | 664 | You should also get your employer (if you work as a programmer) or school, 665 | if any, to sign a "copyright disclaimer" for the program, if necessary. 666 | For more information on this, and how to apply and follow the GNU GPL, see 667 | . 668 | 669 | The GNU General Public License does not permit incorporating your program 670 | into proprietary programs. If your program is a subroutine library, you 671 | may consider it more useful to permit linking proprietary applications with 672 | the library. If this is what you want to do, use the GNU Lesser General 673 | Public License instead of this License. But first, please read 674 | . 675 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | python-joern 2 | == 3 | 4 | Introduction 5 | -- 6 | 7 | A thin python interface for joern and a set of useful utility 8 | traversals. 9 | 10 | Dependencies: 11 | -- 12 | 13 | + py2neo 2.0.7 (http://py2neo.org/2.0/) 14 | 15 | 16 | ### Installation 17 | 18 | $ sudo pip2 install git+git://github.com/fabsx00/python-joern.git 19 | 20 | ### Example 21 | 22 | The following is a simple sample script. It connects to the database 23 | and runs a gremlin traversal to retrieve all node with attribute 24 | 'functionName' set to 'main'. 25 | 26 | ```lang-none 27 | 28 | from joern.all import JoernSteps 29 | 30 | j = JoernSteps() 31 | 32 | j.setGraphDbURL('http://localhost:7474/db/data/') 33 | 34 | # j.addStepsDir('Use this to inject custom steps') 35 | 36 | j.connectToDatabase() 37 | 38 | res = j.runGremlinQuery('g.idx("nodeIndex")[[functionName:"main"]]') 39 | 40 | for r in res: 41 | print r 42 | ``` 43 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build2 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build2 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 23 | 24 | help: 25 | @echo "Please use \`make ' where is one of" 26 | @echo " html to make standalone HTML files" 27 | @echo " dirhtml to make HTML files named index.html in directories" 28 | @echo " singlehtml to make a single large HTML file" 29 | @echo " pickle to make pickle files" 30 | @echo " json to make JSON files" 31 | @echo " htmlhelp to make HTML files and a HTML help project" 32 | @echo " qthelp to make HTML files and a qthelp project" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | 49 | clean: 50 | rm -rf $(BUILDDIR)/* 51 | 52 | html: 53 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 54 | @echo 55 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 56 | 57 | dirhtml: 58 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 59 | @echo 60 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 61 | 62 | singlehtml: 63 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 64 | @echo 65 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 66 | 67 | pickle: 68 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 69 | @echo 70 | @echo "Build finished; now you can process the pickle files." 71 | 72 | json: 73 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 74 | @echo 75 | @echo "Build finished; now you can process the JSON files." 76 | 77 | htmlhelp: 78 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 79 | @echo 80 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 81 | ".hhp project file in $(BUILDDIR)/htmlhelp." 82 | 83 | qthelp: 84 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 85 | @echo 86 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 87 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 88 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/python-joern.qhcp" 89 | @echo "To view the help file:" 90 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/python-joern.qhc" 91 | 92 | devhelp: 93 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 94 | @echo 95 | @echo "Build finished." 96 | @echo "To view the help file:" 97 | @echo "# mkdir -p $$HOME/.local/share/devhelp/python-joern" 98 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/python-joern" 99 | @echo "# devhelp" 100 | 101 | epub: 102 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 103 | @echo 104 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 105 | 106 | latex: 107 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 108 | @echo 109 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 110 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 111 | "(use \`make latexpdf' here to do that automatically)." 112 | 113 | latexpdf: 114 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 115 | @echo "Running LaTeX files through pdflatex..." 116 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 117 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 118 | 119 | latexpdfja: 120 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 121 | @echo "Running LaTeX files through platex and dvipdfmx..." 122 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 123 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 124 | 125 | text: 126 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 127 | @echo 128 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 129 | 130 | man: 131 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 132 | @echo 133 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 134 | 135 | texinfo: 136 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 137 | @echo 138 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 139 | @echo "Run \`make' in that directory to run these through makeinfo" \ 140 | "(use \`make info' here to do that automatically)." 141 | 142 | info: 143 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 144 | @echo "Running Texinfo files through makeinfo..." 145 | make -C $(BUILDDIR)/texinfo info 146 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 147 | 148 | gettext: 149 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 150 | @echo 151 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 152 | 153 | changes: 154 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 155 | @echo 156 | @echo "The overview file is in $(BUILDDIR)/changes." 157 | 158 | linkcheck: 159 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 160 | @echo 161 | @echo "Link check complete; look for any errors in the above output " \ 162 | "or in $(BUILDDIR)/linkcheck/output.txt." 163 | 164 | doctest: 165 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 166 | @echo "Testing of doctests in the sources finished, look at the " \ 167 | "results in $(BUILDDIR)/doctest/output.txt." 168 | 169 | xml: 170 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 171 | @echo 172 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 173 | 174 | pseudoxml: 175 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 176 | @echo 177 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 178 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build2 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | goto end 41 | ) 42 | 43 | if "%1" == "clean" ( 44 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 45 | del /q /s %BUILDDIR%\* 46 | goto end 47 | ) 48 | 49 | 50 | %SPHINXBUILD% 2> nul 51 | if errorlevel 9009 ( 52 | echo. 53 | echo.The 'sphinx-build2' command was not found. Make sure you have Sphinx 54 | echo.installed, then set the SPHINXBUILD environment variable to point 55 | echo.to the full path of the 'sphinx-build2' executable. Alternatively you 56 | echo.may add the Sphinx directory to PATH. 57 | echo. 58 | echo.If you don't have Sphinx installed, grab it from 59 | echo.http://sphinx-doc.org/ 60 | exit /b 1 61 | ) 62 | 63 | if "%1" == "html" ( 64 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 68 | goto end 69 | ) 70 | 71 | if "%1" == "dirhtml" ( 72 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 76 | goto end 77 | ) 78 | 79 | if "%1" == "singlehtml" ( 80 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 84 | goto end 85 | ) 86 | 87 | if "%1" == "pickle" ( 88 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can process the pickle files. 92 | goto end 93 | ) 94 | 95 | if "%1" == "json" ( 96 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 97 | if errorlevel 1 exit /b 1 98 | echo. 99 | echo.Build finished; now you can process the JSON files. 100 | goto end 101 | ) 102 | 103 | if "%1" == "htmlhelp" ( 104 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 105 | if errorlevel 1 exit /b 1 106 | echo. 107 | echo.Build finished; now you can run HTML Help Workshop with the ^ 108 | .hhp project file in %BUILDDIR%/htmlhelp. 109 | goto end 110 | ) 111 | 112 | if "%1" == "qthelp" ( 113 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 114 | if errorlevel 1 exit /b 1 115 | echo. 116 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 117 | .qhcp project file in %BUILDDIR%/qthelp, like this: 118 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\python-joern.qhcp 119 | echo.To view the help file: 120 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\python-joern.ghc 121 | goto end 122 | ) 123 | 124 | if "%1" == "devhelp" ( 125 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished. 129 | goto end 130 | ) 131 | 132 | if "%1" == "epub" ( 133 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 137 | goto end 138 | ) 139 | 140 | if "%1" == "latex" ( 141 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 145 | goto end 146 | ) 147 | 148 | if "%1" == "latexpdf" ( 149 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 150 | cd %BUILDDIR%/latex 151 | make all-pdf 152 | cd %BUILDDIR%/.. 153 | echo. 154 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 155 | goto end 156 | ) 157 | 158 | if "%1" == "latexpdfja" ( 159 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 160 | cd %BUILDDIR%/latex 161 | make all-pdf-ja 162 | cd %BUILDDIR%/.. 163 | echo. 164 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 165 | goto end 166 | ) 167 | 168 | if "%1" == "text" ( 169 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 170 | if errorlevel 1 exit /b 1 171 | echo. 172 | echo.Build finished. The text files are in %BUILDDIR%/text. 173 | goto end 174 | ) 175 | 176 | if "%1" == "man" ( 177 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 178 | if errorlevel 1 exit /b 1 179 | echo. 180 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 181 | goto end 182 | ) 183 | 184 | if "%1" == "texinfo" ( 185 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 186 | if errorlevel 1 exit /b 1 187 | echo. 188 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 189 | goto end 190 | ) 191 | 192 | if "%1" == "gettext" ( 193 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 194 | if errorlevel 1 exit /b 1 195 | echo. 196 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 197 | goto end 198 | ) 199 | 200 | if "%1" == "changes" ( 201 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 202 | if errorlevel 1 exit /b 1 203 | echo. 204 | echo.The overview file is in %BUILDDIR%/changes. 205 | goto end 206 | ) 207 | 208 | if "%1" == "linkcheck" ( 209 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 210 | if errorlevel 1 exit /b 1 211 | echo. 212 | echo.Link check complete; look for any errors in the above output ^ 213 | or in %BUILDDIR%/linkcheck/output.txt. 214 | goto end 215 | ) 216 | 217 | if "%1" == "doctest" ( 218 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 219 | if errorlevel 1 exit /b 1 220 | echo. 221 | echo.Testing of doctests in the sources finished, look at the ^ 222 | results in %BUILDDIR%/doctest/output.txt. 223 | goto end 224 | ) 225 | 226 | if "%1" == "xml" ( 227 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 228 | if errorlevel 1 exit /b 1 229 | echo. 230 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 231 | goto end 232 | ) 233 | 234 | if "%1" == "pseudoxml" ( 235 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 236 | if errorlevel 1 exit /b 1 237 | echo. 238 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 239 | goto end 240 | ) 241 | 242 | :end 243 | -------------------------------------------------------------------------------- /docs/source/chunking.rst: -------------------------------------------------------------------------------- 1 | Chunking 2 | ======== 3 | 4 | Running the same traversal on a large set of start nodes often leads 5 | to unacceptable performance as all nodes and edges touched by the 6 | traversal are kept in server memory before returning results. For 7 | example, the query:: 8 | 9 | getAllStatements().astNodes().id 10 | 11 | which retrieves all astNodes that are part of statements, can already 12 | completely exhaust memory. 13 | 14 | If traversals are independent, the query can be chunked to gain high 15 | performance. The following example code shows how this works:: 16 | 17 | from joern.all import JoernSteps 18 | 19 | j = JoernSteps() 20 | j.connectToDatabase() 21 | 22 | ids = j.runGremlinQuery('getAllStatements.id') 23 | 24 | CHUNK_SIZE = 256 25 | for chunk in j.chunks(ids, CHUNK_SIZE): 26 | 27 | query = """ idListToNodes(%s).astNodes().id """ % (chunk) 28 | 29 | for r in j.runGremlinQuery(query): print r 30 | 31 | This will execute the query in batches of 256 start nodes each. 32 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # python-joern documentation build configuration file, created by 4 | # sphinx-quickstart2 on Fri Jun 6 13:50:56 2014. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys 16 | import os 17 | 18 | # If extensions (or modules to document with autodoc) are in another directory, 19 | # add these directories to sys.path here. If the directory is relative to the 20 | # documentation root, use os.path.abspath to make it absolute, like shown here. 21 | sys.path.insert(0, os.path.abspath('.')) 22 | 23 | # -- General configuration ------------------------------------------------ 24 | 25 | # If your documentation needs a minimal Sphinx version, state it here. 26 | #needs_sphinx = '1.0' 27 | 28 | # Add any Sphinx extension module names here, as strings. They can be 29 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 30 | # ones. 31 | extensions = [ 32 | 'sphinx.ext.autodoc', 33 | 'sphinx.ext.intersphinx', 34 | 'sphinx.ext.mathjax', 35 | 'sphinx.ext.viewcode', 36 | 'joerndomain' 37 | ] 38 | 39 | # Add any paths that contain templates here, relative to this directory. 40 | templates_path = ['_templates'] 41 | 42 | # The suffix of source filenames. 43 | source_suffix = '.rst' 44 | 45 | # The encoding of source files. 46 | #source_encoding = 'utf-8-sig' 47 | 48 | # The master toctree document. 49 | master_doc = 'index' 50 | 51 | # General information about the project. 52 | project = u'python-joern' 53 | copyright = u'2014, Fabian Yamaguchi' 54 | 55 | # The version info for the project you're documenting, acts as replacement for 56 | # |version| and |release|, also used in various other places throughout the 57 | # built documents. 58 | # 59 | # The short X.Y version. 60 | version = '0.2.5' 61 | # The full version, including alpha/beta/rc tags. 62 | release = '0.2.5' 63 | 64 | # The language for content autogenerated by Sphinx. Refer to documentation 65 | # for a list of supported languages. 66 | #language = None 67 | 68 | # There are two options for replacing |today|: either, you set today to some 69 | # non-false value, then it is used: 70 | #today = '' 71 | # Else, today_fmt is used as the format for a strftime call. 72 | #today_fmt = '%B %d, %Y' 73 | 74 | # List of patterns, relative to source directory, that match files and 75 | # directories to ignore when looking for source files. 76 | exclude_patterns = [] 77 | 78 | # The reST default role (used for this markup: `text`) to use for all 79 | # documents. 80 | #default_role = None 81 | 82 | # If true, '()' will be appended to :func: etc. cross-reference text. 83 | #add_function_parentheses = True 84 | 85 | # If true, the current module name will be prepended to all description 86 | # unit titles (such as .. function::). 87 | #add_module_names = True 88 | 89 | # If true, sectionauthor and moduleauthor directives will be shown in the 90 | # output. They are ignored by default. 91 | #show_authors = False 92 | 93 | # The name of the Pygments (syntax highlighting) style to use. 94 | pygments_style = 'sphinx' 95 | 96 | # A list of ignored prefixes for module index sorting. 97 | #modindex_common_prefix = [] 98 | 99 | # If true, keep warnings as "system message" paragraphs in the built documents. 100 | #keep_warnings = False 101 | 102 | 103 | # -- Options for HTML output ---------------------------------------------- 104 | 105 | # The theme to use for HTML and HTML Help pages. See the documentation for 106 | # a list of builtin themes. 107 | html_theme = 'default' 108 | 109 | # Theme options are theme-specific and customize the look and feel of a theme 110 | # further. For a list of options available for each theme, see the 111 | # documentation. 112 | #html_theme_options = {} 113 | 114 | # Add any paths that contain custom themes here, relative to this directory. 115 | #html_theme_path = [] 116 | 117 | # The name for this set of Sphinx documents. If None, it defaults to 118 | # " v documentation". 119 | #html_title = None 120 | 121 | # A shorter title for the navigation bar. Default is the same as html_title. 122 | #html_short_title = None 123 | 124 | # The name of an image file (relative to this directory) to place at the top 125 | # of the sidebar. 126 | #html_logo = None 127 | 128 | # The name of an image file (within the static path) to use as favicon of the 129 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 130 | # pixels large. 131 | #html_favicon = None 132 | 133 | # Add any paths that contain custom static files (such as style sheets) here, 134 | # relative to this directory. They are copied after the builtin static files, 135 | # so a file named "default.css" will overwrite the builtin "default.css". 136 | html_static_path = ['_static'] 137 | 138 | # Add any extra paths that contain custom files (such as robots.txt or 139 | # .htaccess) here, relative to this directory. These files are copied 140 | # directly to the root of the documentation. 141 | #html_extra_path = [] 142 | 143 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 144 | # using the given strftime format. 145 | #html_last_updated_fmt = '%b %d, %Y' 146 | 147 | # If true, SmartyPants will be used to convert quotes and dashes to 148 | # typographically correct entities. 149 | #html_use_smartypants = True 150 | 151 | # Custom sidebar templates, maps document names to template names. 152 | #html_sidebars = {} 153 | 154 | # Additional templates that should be rendered to pages, maps page names to 155 | # template names. 156 | #html_additional_pages = {} 157 | 158 | # If false, no module index is generated. 159 | #html_domain_indices = True 160 | 161 | # If false, no index is generated. 162 | #html_use_index = True 163 | 164 | # If true, the index is split into individual pages for each letter. 165 | #html_split_index = False 166 | 167 | # If true, links to the reST sources are added to the pages. 168 | #html_show_sourcelink = True 169 | 170 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 171 | #html_show_sphinx = True 172 | 173 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 174 | #html_show_copyright = True 175 | 176 | # If true, an OpenSearch description file will be output, and all pages will 177 | # contain a tag referring to it. The value of this option must be the 178 | # base URL from which the finished HTML is served. 179 | #html_use_opensearch = '' 180 | 181 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 182 | #html_file_suffix = None 183 | 184 | # Output file base name for HTML help builder. 185 | htmlhelp_basename = 'python-joerndoc' 186 | 187 | 188 | # -- Options for LaTeX output --------------------------------------------- 189 | 190 | latex_elements = { 191 | # The paper size ('letterpaper' or 'a4paper'). 192 | #'papersize': 'letterpaper', 193 | 194 | # The font size ('10pt', '11pt' or '12pt'). 195 | #'pointsize': '10pt', 196 | 197 | # Additional stuff for the LaTeX preamble. 198 | #'preamble': '', 199 | } 200 | 201 | # Grouping the document tree into LaTeX files. List of tuples 202 | # (source start file, target name, title, 203 | # author, documentclass [howto, manual, or own class]). 204 | latex_documents = [ 205 | ('index', 'python-joern.tex', u'python-joern Documentation', 206 | u'Fabian Yamaguchi', 'manual'), 207 | ] 208 | 209 | # The name of an image file (relative to this directory) to place at the top of 210 | # the title page. 211 | #latex_logo = None 212 | 213 | # For "manual" documents, if this is true, then toplevel headings are parts, 214 | # not chapters. 215 | #latex_use_parts = False 216 | 217 | # If true, show page references after internal links. 218 | #latex_show_pagerefs = False 219 | 220 | # If true, show URL addresses after external links. 221 | #latex_show_urls = False 222 | 223 | # Documents to append as an appendix to all manuals. 224 | #latex_appendices = [] 225 | 226 | # If false, no module index is generated. 227 | #latex_domain_indices = True 228 | 229 | 230 | # -- Options for manual page output --------------------------------------- 231 | 232 | # One entry per manual page. List of tuples 233 | # (source start file, name, description, authors, manual section). 234 | man_pages = [ 235 | ('index', 'python-joern', u'python-joern Documentation', 236 | [u'Fabian Yamaguchi'], 1) 237 | ] 238 | 239 | # If true, show URL addresses after external links. 240 | #man_show_urls = False 241 | 242 | 243 | # -- Options for Texinfo output ------------------------------------------- 244 | 245 | # Grouping the document tree into Texinfo files. List of tuples 246 | # (source start file, target name, title, author, 247 | # dir menu entry, description, category) 248 | texinfo_documents = [ 249 | ('index', 'python-joern', u'python-joern Documentation', 250 | u'Fabian Yamaguchi', 'python-joern', 'One line description of project.', 251 | 'Miscellaneous'), 252 | ] 253 | 254 | # Documents to append as an appendix to all manuals. 255 | #texinfo_appendices = [] 256 | 257 | # If false, no module index is generated. 258 | #texinfo_domain_indices = True 259 | 260 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 261 | #texinfo_show_urls = 'footnote' 262 | 263 | # If true, do not generate a @detailmenu in the "Top" node's menu. 264 | #texinfo_no_detailmenu = False 265 | 266 | 267 | # Example configuration for intersphinx: refer to the Python standard library. 268 | intersphinx_mapping = {'http://docs.python.org/': None} 269 | -------------------------------------------------------------------------------- /docs/source/gettingstarted.rst: -------------------------------------------------------------------------------- 1 | Basic Usage 2 | =========== 3 | 4 | Python-joern currently provides a single class, JoernSteps, that 5 | allows to connect to the database server and run queries. The 6 | following is a simple sample script that employs JoernSteps to 7 | configure the database connection, connect to the server and run a 8 | Gremlin query. 9 | 10 | :: 11 | 12 | from joern.all import JoernSteps 13 | 14 | j = JoernSteps() 15 | 16 | j.setGraphDbURL('http://localhost:7474/db/data/') 17 | 18 | # j.addStepsDir('Use this to inject utility traversals') 19 | 20 | j.connectToDatabase() 21 | 22 | res = j.runGremlinQuery('getFunctionsByName("main")') 23 | # res = j.runCypherQuery('...') 24 | 25 | for r in res: print r 26 | 27 | The sample script employs all methods offered by JoernSteps. We now 28 | discuss each of these methods in detail. 29 | 30 | setGraphDbURL(url) 31 | ------------------- 32 | 33 | **Sets the URL of the graph database server.** The REST API of the 34 | Neo4J Database server is exposed on port 7474 by default. If your 35 | server runs on a different port or server, you can use setGraphDbURL 36 | to specify the alternate URL. 37 | 38 | addStepsDir(dirname) 39 | -------------------- 40 | 41 | **Add a source directory for utility traversals.** By default, 42 | python-joern will inject all utility traversals contained in any of 43 | the source files in joern/joernsteps into the database before running 44 | scripts. Additional traversals specific to your application or 45 | analysis are best placed in a separate directory. python-joern can be 46 | instructed to honor this additional directory using addStepsDir. 47 | 48 | connectToDatabase() 49 | ------------------- 50 | 51 | **Connect to the database.** Call this method once the connection has 52 | been configured to connect to the database server. A connection is 53 | required before queries can be executed. 54 | 55 | runGremlinQuery(query) 56 | ----------------------- 57 | 58 | **Run the specified Gremlin query.** The supplied query is executed 59 | and the result is returned. Depending on the query, the result may 60 | have a different data type, however, it is typically an iterable 61 | containing nodes that match the query. 62 | 63 | 64 | runCypherQuery(query) 65 | ----------------------- 66 | 67 | **Run the specified Cypher query.** The supplied query is executed 68 | and the result is returned. Depending on the query, the result may 69 | have a different data type, however, it is typically an iterable 70 | containing nodes that match the query. 71 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. python-joern documentation master file, created by 2 | sphinx-quickstart2 on Fri Jun 6 13:50:56 2014. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | python-joern documentation 7 | ========================== 8 | 9 | Welcome to python-joern's documentation. Python-joern provides a thin 10 | python wrapper for `the code analysis system joern 11 | `_ and a library of utility traversals that 12 | can be used to quickly navigate in the code property graph. If you use 13 | joern, you will almost definitely want to install this library as well. 14 | 15 | 16 | .. toctree:: 17 | :maxdepth: 2 18 | 19 | installation 20 | gettingstarted 21 | chunking 22 | joernsteps 23 | joernnodes 24 | 25 | You may find answers for more specific questions regarding 26 | python-joern in the mailing list archive at 27 | https://listserv.gwdg.de/mailman/private/joern-users/ . 28 | -------------------------------------------------------------------------------- /docs/source/installation.rst: -------------------------------------------------------------------------------- 1 | Installation 2 | ============= 3 | 4 | Installing python-joern 5 | ------------------------ 6 | 7 | python-joern can be installed using pip:: 8 | 9 | sudo pip2 install git+git://github.com/fabsx00/python-joern.git 10 | 11 | This will (hopefully) automatically take care of installing 12 | dependencies. If dependencies are not installed correctly, you can try 13 | to manually install them as discussed in the following section. 14 | 15 | 16 | Manually Installing Dependencies 17 | ------------------------------------------- 18 | 19 | The following steps are only required if for some reason, installation 20 | of python-joern fails due to unresolved dependencies. 21 | 22 | Install *py2neo 1.6.1* from 23 | 24 | https://pypi.python.org/packages/source/p/py2neo/py2neo-1.6.1.tar.gz 25 | 26 | On Linux and BSD systems, executing the following commands will 27 | typically suffice:: 28 | 29 | wget https://pypi.python.org/packages/source/p/py2neo/py2neo-1.6.1.tar.gz; 30 | tar xfz py2neo-1.6.1.tar.gz; 31 | cd py2neo-1.6.1; 32 | sudo python2 setup.py install; 33 | 34 | Install the gremlin-plugin for neo4j from 35 | https://github.com/fabsx00/py2neo-gremlin/releases/tag/0.1:: 36 | 37 | wget https://github.com/fabsx00/py2neo-gremlin/archive/0.1.tar.gz 38 | tar xfz 0.1.tar.gz 39 | cd py2neo-gremlin-0.1 40 | sudo python2 setup.py install 41 | -------------------------------------------------------------------------------- /docs/source/joerndomain/__init__.py: -------------------------------------------------------------------------------- 1 | def setup(app): 2 | from .domain import JoernDomain 3 | app.add_domain(JoernDomain) 4 | -------------------------------------------------------------------------------- /docs/source/joerndomain/domain.py: -------------------------------------------------------------------------------- 1 | from docutils.parsers.rst import Directive 2 | from docutils import nodes 3 | from sphinx import addnodes 4 | from sphinx.directives import ObjectDescription 5 | from sphinx.util.docfields import Field, GroupedField 6 | from sphinx.util.nodes import make_refnode 7 | from sphinx.domains import Domain, ObjType 8 | from sphinx.roles import XRefRole 9 | 10 | class NodeField(GroupedField): 11 | 12 | is_grouped = True 13 | list_type = nodes.bullet_list 14 | 15 | def __init__(self, name, names=(), label=None, rolename=None): 16 | Field.__init__(self, name, names, label, True, rolename) 17 | self.can_collapse = False 18 | 19 | def make_field(self, types, domain, items): 20 | fieldname = nodes.field_name('', self.label) 21 | listnode = self.list_type() 22 | for fieldarg, content in items: 23 | par = nodes.paragraph() 24 | par += self.make_xref(self.rolename, domain, fieldarg, 25 | nodes.literal) 26 | if content and content[0].children: 27 | par += nodes.Text(' -- ') 28 | par += content 29 | listnode += nodes.list_item('', par) 30 | fieldbody = nodes.field_body('', listnode) 31 | return nodes.field('', fieldname, fieldbody) 32 | 33 | class JoernObj(ObjectDescription): 34 | 35 | def handle_signature(self, sig, signode): 36 | # add prefix like 'traversal' or 'lookup' 37 | if self.desc_annotation: 38 | prefix = '[%s] ' % self.desc_annotation 39 | signode += addnodes.desc_annotation(prefix, prefix) 40 | name = sig.split('(')[0] 41 | signode += addnodes.desc_name(sig, sig) 42 | return name 43 | 44 | def add_target_and_index(self, name, sig, signode): 45 | if name not in self.state.document.ids: 46 | signode['ids'].append(name) 47 | signode['names'].append(name) 48 | self.state.document.note_explicit_target(signode) 49 | objects = self.env.domaindata['joern']['objects'] 50 | objects[name] = (self.env.docname, self.objtype) 51 | indextext = "%s (%s)" % (name, self.desc_annotation) 52 | self.indexnode['entries'].append(('single', indextext, name, '')) 53 | 54 | class JoernTraversal(JoernObj): 55 | 56 | doc_field_types = [ 57 | GroupedField('param', label='Parameter', names=('param',), can_collapse = False), 58 | Field('in*', label='Ingoing nodes', names=('in*',), has_arg = False), 59 | NodeField('in', label='Ingoing nodes', names=('in',), rolename = 'ref'), 60 | Field('out*', label='Outgoing nodes', names=('out*',), has_arg = False), 61 | NodeField('out', label='Outgoing nodes', names=('out',), rolename = 'ref'), 62 | ] 63 | 64 | desc_annotation = 'traversal' 65 | 66 | class JoernLookup(JoernObj): 67 | 68 | doc_field_types = [ 69 | GroupedField('param', label='Parameter', names=('param',), can_collapse = False), 70 | Field('out*', label='Outgoing nodes', names=('out*',), has_arg = False), 71 | NodeField('out', label='Outgoing nodes', names=('out',), rolename = 'ref'), 72 | ] 73 | 74 | desc_annotation = 'lookup' 75 | 76 | class JoernNode(JoernObj): 77 | 78 | doc_field_types = [ 79 | GroupedField('prop', label='Properties', names=('prop',), can_collapse = False), 80 | ] 81 | 82 | desc_annotation = 'node' 83 | 84 | class JoernXRefRole(XRefRole): 85 | def process_link(self, env, refnode, has_explicit_title, title, target): 86 | return title, target 87 | 88 | class JoernDomain(Domain): 89 | 90 | label = 'python-joern' 91 | name = 'joern' 92 | 93 | data_version = 1 94 | 95 | initial_data = {'objects': {}} 96 | 97 | object_types = { 98 | 'traversal': ObjType('traversal', 'traversal'), 99 | 'lookup': ObjType('lookup', 'lookup'), 100 | 'node': ObjType('node', 'node'), 101 | } 102 | 103 | directives = { 104 | 'traversal': JoernTraversal, 105 | 'lookup': JoernLookup, 106 | 'node': JoernNode, 107 | } 108 | 109 | roles = { 110 | 'ref': JoernXRefRole(), 111 | } 112 | 113 | def get_objects(self): 114 | for name, (docname, objtype) in self.data['objects'].iteritems(): 115 | yield (name, name, objtype, docname, name, 1) 116 | 117 | def resolve_xref(self, env, fromdocname, builder, type, target, node, contnode): 118 | doc, _ = self.data['objects'].get(target, (None, None)) 119 | if doc: 120 | return make_refnode(builder, fromdocname, doc, target, contnode, target) 121 | -------------------------------------------------------------------------------- /docs/source/joernnodes.rst: -------------------------------------------------------------------------------- 1 | Joern Nodes 2 | =========== 3 | 4 | .. note:: Just a domain test. 5 | 6 | 7 | Abstract syntax tree nodes 8 | -------------------------- 9 | 10 | .. default-domain:: joern 11 | 12 | .. node:: Function 13 | 14 | :param name: The name of the function. 15 | -------------------------------------------------------------------------------- /docs/source/joernsteps.rst: -------------------------------------------------------------------------------- 1 | Start Node Selection 2 | ==================== 3 | 4 | .. default-domain:: joern 5 | 6 | .. lookup:: queryNodeIndex(query) 7 | 8 | Retrieves nodes from the database index using a Lucene query. 9 | 10 | :param query: The Lucene query. 11 | :out*: Arbitrary node 12 | 13 | .. lookup:: getNodesWithTypeAndCode(type, code) 14 | 15 | Retrieves nodes with the given type and code. This is 16 | equivalent to:: 17 | 18 | queryNodeIndex("type:$t AND code:$c") 19 | 20 | .. seealso:: 21 | 22 | - :ref:`queryNodeIndex` 23 | 24 | :param type: The required type. 25 | :param code: The required code string. 26 | :out*: All nodes of the given type. 27 | 28 | .. lookup:: getNodesWithTypeAndName(type, name) 29 | 30 | Retrieves nodes with the given type and name. This is 31 | equivalent to:: 32 | 33 | queryNodeIndex("type:$t AND name:$n") 34 | 35 | .. seealso:: 36 | 37 | - :ref:`queryNodeIndex` 38 | 39 | :param type: The required type. 40 | :param name: The required name 41 | :out*: All nodes of the given type. 42 | 43 | .. lookup:: getFunctionsByName(name) 44 | 45 | Retrieves function nodes with the given name. 46 | This is equivalent to:: 47 | 48 | getNodesWithTypeAndName('Function', n) 49 | 50 | .. seealso:: 51 | 52 | - :ref:`getNodesWithTypeAndName` 53 | 54 | :param name: The required name of the functions. 55 | :out Function: 56 | 57 | .. lookup:: getFunctionsByFilename(filename) 58 | 59 | Retrieves function nodes with the given filename. 60 | 61 | :param filename: The required filename of the functions. 62 | :out Function: 63 | 64 | .. lookup:: getFunctionsASTsByName(name) 65 | 66 | Retrieves the root node of the abstract syntax tree 67 | by the name of the function. 68 | 69 | :param name: The function name. 70 | :out FunctionDef: 71 | 72 | .. lookup:: getCallsTo(callee): 73 | 74 | Retrieves function calls by the name of the called function. 75 | 76 | :param callee: The name of the called function. 77 | :out CallExpression: 78 | 79 | .. lookup:: getArguments(callee, position) 80 | 81 | Retrieves the i-th argument of a function call by the name of 82 | the called function and the position of the argument. This is 83 | equivalent to:: 84 | 85 | getCallsTo(callee).ithArguments(position) 86 | 87 | .. seealso:: 88 | 89 | - :ref:`getCallsTo` 90 | - :ref:`ithArguments` 91 | 92 | :param callee: The name of the called function. 93 | :param position: The position of the required argument. 94 | :out Argument: 95 | 96 | Traversals 97 | ========== 98 | 99 | Abstract syntax tree traversals 100 | ------------------------------- 101 | 102 | .. traversal:: astNodes() 103 | 104 | Traverses from a abstract syntax tree (AST) node to all child 105 | nodes including the node itself. 106 | 107 | :in*: Arbitrary AST node. 108 | :out*: Arbitrary AST node. 109 | 110 | .. traversal:: parents() 111 | 112 | Traverses from a abstract syntax tree (AST) node to its 113 | parent node. 114 | 115 | .. seealso:: 116 | 117 | - :ref:`children` 118 | 119 | :in*: Arbitrary AST node. 120 | :out*: Arbitrary AST node. 121 | 122 | .. traversal:: children() 123 | 124 | Traverses from a abstract syntax tree (AST) node to all of 125 | its children nodes. 126 | 127 | .. seealso:: 128 | 129 | - :ref:`parents` 130 | - :ref:`ithChildren` 131 | 132 | :in*: Arbitrary AST node. 133 | :out*: Arbitrary AST node. 134 | 135 | .. traversal:: ithChildren(i) 136 | 137 | Traverses from a abstract syntax tree (AST) node to its i-th 138 | children node. 139 | 140 | .. seealso:: 141 | 142 | - :ref:`children` 143 | 144 | :param i: The child number. 145 | :in*: Arbitrary AST node. 146 | :out*: Arbitrary AST node. 147 | 148 | .. traversal:: statements() 149 | 150 | Traverses from a abstract syntax tree (AST) node to its 151 | enclosing statement or predicate node. If the incoming node 152 | is a statement or predicate node, the node itself is returned. 153 | 154 | .. seealso:: 155 | 156 | - :ref:`functionToStatements` 157 | 158 | :in*: Arbitrary AST node. 159 | :out*: Various AST node. 160 | 161 | .. traversal:: functions() 162 | 163 | Traverses from a abstract syntax tree (AST) or symbol node 164 | to the corresponding function. 165 | 166 | :in*: Arbitrary AST node 167 | :in Symbol: 168 | :out Function: 169 | 170 | Traversals for assignment expressions 171 | ------------------------------------- 172 | 173 | .. traversal:: lval 174 | 175 | Traverses from an assignment expression to the left hand side 176 | of the expression. 177 | 178 | .. seealso:: 179 | 180 | - :ref:`rval` 181 | 182 | :in AssignmentExpr: 183 | :out Identifier: 184 | :out MemberAccess: 185 | :out PtrMemberAccess: 186 | :out others: 187 | 188 | .. traversal:: rval 189 | 190 | Traverses from an assignment expression to the right hand 191 | side of the expression. 192 | 193 | .. seealso:: 194 | 195 | - :ref:`lval` 196 | 197 | :in AssignmentExpr: 198 | :out*: Various AST nodes. 199 | 200 | Traversals for function calls 201 | ----------------------------- 202 | 203 | .. traversal:: callToArguments() 204 | 205 | Traverses from a function call to its arguments. 206 | This may return an empty pipe. 207 | 208 | :in CallExpression: 209 | :out Argument: 210 | 211 | .. traversal:: calleeToCall() 212 | 213 | Traverses from a callee node to its corresponding 214 | function call. 215 | 216 | :in Callee: 217 | :out CallExpression: 218 | 219 | .. traversal:: argToCall() 220 | 221 | Traverses from an argument node to its corresponding 222 | function call. 223 | 224 | :in Argument: 225 | :out CallExpression: 226 | 227 | 228 | .. traversal:: ithArguments(i) 229 | 230 | Traverses from a function call to its i-th argument. 231 | This may return an empty pipe. 232 | 233 | :param i: The argument number. 234 | :in CallExpression: 235 | :out Argument: 236 | 237 | Traversals for dataflow analysis 238 | -------------------------------- 239 | 240 | .. traversal:: producers(symbols) 241 | 242 | .. traversal:: users(symbols) 243 | 244 | .. traversal:: sources() 245 | 246 | .. traversal:: sinks() 247 | 248 | .. traversal:: astSinks() 249 | 250 | .. traversal:: astSources() 251 | 252 | .. traversal:: unsanitized(sanitizer, src) 253 | 254 | .. traversal:: firstElem() 255 | 256 | .. traversal:: uPath(sanitizer, src) 257 | 258 | Function traversals 259 | ------------------- 260 | 261 | .. traversal:: functionToAST() 262 | 263 | Traverses from a function node to the root node 264 | of the corresponding abstract syntax tree (AST). 265 | 266 | :in Function: 267 | :out FunctionDef: 268 | 269 | .. traversal:: functionToASTNodes() 270 | 271 | Traverses from a function node to all nodes of the 272 | corresponding abstract syntax tree. 273 | 274 | This is equivalent to:: 275 | 276 | _().functionToAST().astNodes() 277 | 278 | .. seealso:: 279 | 280 | - :ref:`functionToAST` 281 | - :ref:`astNodes` 282 | 283 | :in Function: 284 | :out*: Arbitrary AST node. 285 | 286 | .. traversal:: functionToStatements() 287 | 288 | Traverses from a function node to all statements of 289 | the function. 290 | 291 | .. seealso:: 292 | 293 | - :ref:`statements` 294 | 295 | :in Function: 296 | :out*: Arbitrary AST node. 297 | 298 | .. traversal:: functionToASTNodesOfType(type) 299 | 300 | Traverses from a function node to all abstract syntax 301 | tree nodes of the given type. 302 | 303 | :param type: The node type. 304 | :in Function: 305 | :out*: Nodes of the given **type** 306 | 307 | .. traversal:: functionToFile 308 | 309 | Traverses from a function node to the file node of the file, 310 | which contains the function. 311 | 312 | :in Function: 313 | :out File: 314 | 315 | .. Traversals for information retrieval 316 | .. ------------------------------------ 317 | 318 | .. .. traversal:: locations() 319 | 320 | .. Duplicate 321 | .. .. traversal:: functions() 322 | 323 | .. Duplicate 324 | .. .. traversal:: functionToFiles() 325 | 326 | Match traversals 327 | ---------------- 328 | 329 | .. note:: 330 | 331 | Please note, that tradeoffs in efficientcy are made for 332 | increased robustness and ease of formulation 333 | 334 | .. traversal:: match(predicate) 335 | 336 | Traverses from a abstract syntax tree (AST) node to all child 337 | nodes matching the given *predicate*. This also includes the 338 | starting node. 339 | 340 | This is equivalent to:: 341 | 342 | _().astNodes().filter(predicate) 343 | 344 | .. seealso:: 345 | 346 | - :ref:`astNodes` 347 | 348 | :param predicate: The closure that is used as the filter 349 | predicate. 350 | :in*: Arbitrary AST node. 351 | :out*: Arbitrary AST node. 352 | 353 | .. traversal:: matchParents(predicate) 354 | 355 | Traverses from a abstract syntax tree (AST) node to all 356 | parent nodes stopping at the enclosing statement and emitting 357 | all nodes matching the given predicate. 358 | 359 | :param predicate: The closure that is used as the filter 360 | predicate. 361 | :in*: Arbitrary AST node. 362 | :out*: Arbitrary AST node. 363 | 364 | .. traversal:: arg(function, position) 365 | 366 | Traverses from a abstract syntax tree (AST) node to all child 367 | nodes emitting all parameters at the given position whose 368 | functions match the given function name. 369 | 370 | :param function: The name of the function whose parameters 371 | are of interest. 372 | :param position: The required position of the parameter. 373 | :in*: Arbitrary AST node. 374 | :out Argument: 375 | 376 | .. traversal:: param(name) 377 | 378 | Traverses from a abstract syntax tree (AST) node to all child 379 | nodes emitting all parameters with the given name. 380 | 381 | :param name: The required name of the parameter. 382 | :in*: Arbitrary AST node. 383 | :out Parameter: 384 | 385 | Traversals for function parameters 386 | ---------------------------------- 387 | 388 | .. traversal:: paramsToNames() 389 | 390 | Traverses from a function parameter to its identifier node. 391 | 392 | .. seealso:: 393 | 394 | - :ref:`paramsToTypes` 395 | - :ref:`param` 396 | 397 | :in Parameter: 398 | :out Identifier: 399 | 400 | .. traversal:: paramsToTypes() 401 | 402 | Traverses from a function parameter to its type. 403 | 404 | .. seealso:: 405 | 406 | - :ref:`paramsToNames` 407 | - :ref:`param` 408 | 409 | 410 | :in Parameter: 411 | :out ParameterType: 412 | 413 | Traversals for symbol graphs 414 | ---------------------------- 415 | 416 | .. traversal:: uses() 417 | 418 | Traverses from a statement to all symbols used by this 419 | statement. 420 | 421 | .. seealso:: 422 | 423 | - :ref:`defines` 424 | 425 | :in Statement: 426 | :out Symbol: 427 | 428 | .. traversal:: defines() 429 | 430 | Traverses from a statement to all symbols defined by this 431 | statement. 432 | 433 | .. seealso:: 434 | 435 | - :ref:`uses` 436 | 437 | :in Statement: 438 | :out Symbol: 439 | 440 | .. traversal:: setBy() 441 | 442 | Traverses from a symbol to all statements that change the value 443 | of this symbol. 444 | 445 | :in Symbol: 446 | :outgoing node type: All AST nodes with the property ``isCFGNode = True``. 447 | 448 | .. traversal:: definitions() 449 | 450 | Traverses from a statement to all nodes affecting any symbol 451 | used within this statement. 452 | 453 | :in Statement: 454 | :out IdenifierDeclStmt: 455 | :out Parameter: 456 | -------------------------------------------------------------------------------- /joern/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabsx00/python-joern/540cf96cb95168ee9470698f39a530c7e6ce1913/joern/__init__.py -------------------------------------------------------------------------------- /joern/all.py: -------------------------------------------------------------------------------- 1 | from py2neo import Graph 2 | from py2neo.ext.gremlin import Gremlin 3 | from py2neo.packages.httpstream import http 4 | 5 | import os 6 | 7 | DEFAULT_GRAPHDB_URL = "http://localhost:7474/db/data/" 8 | DEFAULT_STEP_DIR = os.path.dirname(__file__) + '/joernsteps/' 9 | 10 | class JoernSteps: 11 | 12 | def __init__(self): 13 | self._initJoernSteps() 14 | self.initCommandSent = False 15 | 16 | # Bump the py2neo socket timeout from 30s, neo4j doesn't kill queries on timeout so might 17 | # as well let the client pick when to stop. 18 | http.socket_timeout = 100000 19 | 20 | def setGraphDbURL(self, url): 21 | """ Sets the graph database URL. By default, 22 | http://localhost:7474/db/data/ is used.""" 23 | self.graphDbURL = url 24 | 25 | def addStepsDir(self, stepsDir): 26 | """Add an additional directory containing steps to be injected 27 | into the server""" 28 | self.stepsDirs.append(stepsDir) 29 | 30 | def connectToDatabase(self): 31 | """ Connects to the database server.""" 32 | self.graphDb = Graph(self.graphDbURL) 33 | self.gremlin = Gremlin(self.graphDb) 34 | 35 | def runGremlinQuery(self, query): 36 | 37 | """ Runs the specified gremlin query on the database. It is 38 | assumed that a connection to the database has been 39 | established. To allow the user-defined steps located in the 40 | joernsteps directory to be used in the query, these step 41 | definitions are sent before the first query.""" 42 | 43 | if not self.initCommandSent: 44 | self.gremlin.execute(self._createInitCommand()) 45 | self.initCommandSent = True 46 | 47 | return self.gremlin.execute(query) 48 | 49 | def runCypherQuery(self, cmd): 50 | """ Runs the specified cypher query on the graph database.""" 51 | return self.graphDb.cypher.execute(cmd) 52 | 53 | def getGraphDbURL(self): 54 | return self.graphDbURL 55 | 56 | """ 57 | Create chunks from a list of ids. 58 | This method is useful when you want to execute many independent 59 | traversals on a large set of start nodes. In that case, you 60 | can retrieve the set of start node ids first, then use 'chunks' 61 | to obtain disjoint subsets that can be passed to idListToNodes. 62 | """ 63 | def chunks(self, idList, chunkSize): 64 | for i in xrange(0, len(idList), chunkSize): 65 | yield idList[i:i+chunkSize] 66 | 67 | def _initJoernSteps(self): 68 | self.graphDbURL = DEFAULT_GRAPHDB_URL 69 | self.stepsDirs = [DEFAULT_STEP_DIR] 70 | 71 | def _createInitCommand(self): 72 | 73 | initCommand = "" 74 | 75 | for stepsDir in self.stepsDirs: 76 | for (root, dirs, files) in os.walk(stepsDir, followlinks=True): 77 | files.sort() 78 | for f in files: 79 | filename = os.path.join(root, f) 80 | if not filename.endswith('.groovy'): continue 81 | initCommand += file(filename).read() + "\n" 82 | return initCommand 83 | -------------------------------------------------------------------------------- /joern/joernsteps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabsx00/python-joern/540cf96cb95168ee9470698f39a530c7e6ce1913/joern/joernsteps/__init__.py -------------------------------------------------------------------------------- /joern/joernsteps/_constants.groovy: -------------------------------------------------------------------------------- 1 | 2 | Object.metaClass.NODE_INDEX = 'nodeIndex' 3 | 4 | // Node Keys 5 | 6 | Object.metaClass.NODE_TYPE = 'type' 7 | Object.metaClass.NODE_NAME = 'name' 8 | Object.metaClass.NODE_CODE = 'code' 9 | Object.metaClass.NODE_FILEPATH = 'filepath' 10 | 11 | // Node Values 12 | 13 | Object.metaClass.TYPE_CALL = 'CallExpression' 14 | Object.metaClass.TYPE_CALLEE = 'Callee' 15 | Object.metaClass.TYPE_FUNCTION = 'Function' 16 | Object.metaClass.TYPE_ARGLIST = 'ArgumentList' 17 | Object.metaClass.TYPE_ASSIGNMENT = 'AssignmentExpr' 18 | 19 | Object.metaClass.TYPE_IDENTIFIER_DECL_STMT = 'IdentifierDeclStatement' 20 | Object.metaClass.TYPE_PARAMETER = 'Parameter' 21 | 22 | TYPE_FILE = 'File' 23 | 24 | // Edge types 25 | 26 | Object.metaClass.AST_EDGE = 'IS_AST_PARENT' 27 | Object.metaClass.CFG_EDGE = 'FLOWS_TO' 28 | 29 | Object.metaClass.USES_EDGE = 'USE' 30 | Object.metaClass.DEFINES_EDGE = 'DEF' 31 | Object.metaClass.DATA_FLOW_EDGE = 'REACHES' 32 | 33 | Object.metaClass.FUNCTION_TO_AST_EDGE = 'IS_FUNCTION_OF_AST' 34 | 35 | Object.metaClass.FILE_TO_FUNCTION_EDGE = 'IS_FILE_OF' 36 | 37 | // Edge keys 38 | 39 | Object.metaClass.DATA_FLOW_SYMBOL = 'var' 40 | -------------------------------------------------------------------------------- /joern/joernsteps/cfg.groovy: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | For an AST node, traverse to the exit-node 4 | of the function 5 | */ 6 | 7 | Gremlin.defineStep('toExitNode', [Vertex,Pipe], { 8 | _().transform{ queryNodeIndex('functionId:' + it.functionId + " AND type:CFGExitNode ") } 9 | .scatter() 10 | }) 11 | 12 | /** 13 | Search the CFG breadth-first so that we can keep track of all nodes we've visited in 14 | the entire search rather than just along the current path (massive optimization for 15 | high branching-factor CFGs, e.g. state machines). 16 | */ 17 | Object.metaClass._reachableCfgNodes = { curNodes, visited -> 18 | nextNodes = curNodes._().out('FLOWS_TO').toSet() - visited 19 | if (nextNodes.isEmpty()) { return visited } 20 | 21 | visited.addAll(nextNodes) 22 | return _reachableCfgNodes(nextNodes.toList(), visited) 23 | } 24 | 25 | Gremlin.defineStep('reachableCfgNodes', [Vertex, Pipe], { 26 | _().transform { _reachableCfgNodes(it.statements().toList(), new HashSet())}.scatter() 27 | }) 28 | 29 | Object.metaClass.isInLoop = { it -> 30 | it._().reachableCfgNodes().toSet().contains(it.statements().toList()[0]) 31 | } 32 | -------------------------------------------------------------------------------- /joern/joernsteps/composition.groovy: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | Traversals Composition 4 | */ 5 | 6 | /** 7 | OR-composition for lookups: returns a pipe emitting nodes from both 8 | pipes. Please note that if both traversals return the same object, 9 | the object will be emitted twice. You can use 'dedup' to filter 10 | duplicates. 11 | 12 | @param: lookup1 First lookup to perform, a function returning a pipe. 13 | @param: lookup1 Second lookup to perform, a function returning a pipe. 14 | 15 | */ 16 | 17 | Object.metaClass.OR = { lookup1, lookup2 -> 18 | 19 | [1]._().copySplit( _().transform{ lookup1 }.scatter(), 20 | _().transform{ lookup2 }.scatter() 21 | ).fairMerge() 22 | } 23 | 24 | /** 25 | Pipe which filters nodes from functions also matching the traversal 26 | supplied in the closure `cl` 27 | 28 | @param cl The closure containing the traversal 29 | 30 | */ 31 | 32 | Gremlin.defineStep('not', [Vertex,Pipe], { cl, c = [] -> 33 | 34 | X = []; Y = [] 35 | _().aggregate(X) // Watchout! aggregation! 36 | ._emitForFunctions(cl, c) 37 | .functionId.gather{ Y = it; } 38 | .transform{ X }.scatter().filter{ !(it.functionId in Y) } 39 | }) 40 | 41 | 42 | /** 43 | Executes the closure `cl` which is expected to return a 44 | pipe of nodes. Returns a pipe containing all of these nodes 45 | which match the boolean predicate `c`. 46 | 47 | @param cl The closure to execute 48 | @param c The predicate to evaluate on nodes returned by cl. 49 | */ 50 | 51 | Gremlin.defineStep('_emitForFunctions', [Vertex,Pipe], { 52 | cl, c -> 53 | 54 | if(c == []) 55 | c = {it.functionId in ids} 56 | 57 | // aggregation is performed before the 58 | // call because otherwise, we do the 59 | // lookup for each element. 60 | // We should consider offering an 61 | // alternative step that does not aggregate. 62 | 63 | _().functionId.gather() 64 | .transform{ 65 | ids = it; 66 | cl().filter(c) 67 | }.scatter() 68 | }) 69 | 70 | Gremlin.defineStep('pairs', [Vertex,Pipe], { x, y -> 71 | 72 | odd = true; 73 | 74 | _().copySplit(x, y).fairMerge() 75 | .transform{ 76 | if(odd){ 77 | pair = it 78 | odd = false; 79 | return 'none' ; 80 | }else{ 81 | pair = [pair, it] 82 | odd = true; 83 | return pair; 84 | } 85 | }.filter{ it != 'none' } 86 | 87 | }) 88 | -------------------------------------------------------------------------------- /joern/joernsteps/function.groovy: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | (Optimized) match-traversals for functions. 4 | */ 5 | 6 | Gremlin.defineStep("functionToAST", [Vertex,Pipe], { 7 | _().out(FUNCTION_TO_AST_EDGE) 8 | }) 9 | 10 | Gremlin.defineStep("functionToASTNodes", [Vertex,Pipe], { 11 | _().functionToAST().astNodes() 12 | }) 13 | 14 | Gremlin.defineStep("functionToStatements", [Vertex,Pipe],{ 15 | _().transform{ queryNodeIndex('isCFGNode:True AND functionId:' + it.id) } 16 | .scatter() 17 | }) 18 | 19 | Gremlin.defineStep("functionsToASTNodesOfType", [Vertex,Pipe],{ type -> 20 | _().transform{ queryNodeIndex('functionId:' + it.id + " AND $NODE_TYPE:$type") } 21 | .scatter() 22 | }) 23 | 24 | Gremlin.defineStep('functionToFile', [Vertex, Pipe], { 25 | _().in(FILE_TO_FUNCTION_EDGE) 26 | }) 27 | 28 | /** 29 | * For a function node, get callers using `name` property. 30 | **/ 31 | 32 | Gremlin.defineStep('functionToCallers', [Vertex,Pipe], { 33 | _().transform{ 34 | 35 | funcName = it.name 36 | funcName = funcName.split(' ')[-1].trim() 37 | funcName = funcName.replace('*', '') 38 | 39 | getCallsTo(funcName) 40 | }.scatter() 41 | }) 42 | -------------------------------------------------------------------------------- /joern/joernsteps/info.groovy: -------------------------------------------------------------------------------- 1 | 2 | Gremlin.defineStep('locations', [Vertex,Pipe], { 3 | _() 4 | .statements().sideEffect{code = it.code } 5 | .functions().sideEffect{ name = it.name; } 6 | .functionToFiles().sideEffect{ filename = it.filepath; } 7 | .transform{ [code, name, filename] } 8 | }) 9 | 10 | Gremlin.defineStep('functions', [Vertex,Pipe],{ 11 | _().functionId.idsToNodes() 12 | }); 13 | 14 | Gremlin.defineStep("functionToFiles", [Vertex,Pipe], { 15 | _().in(FILE_TO_FUNCTION_EDGE) 16 | }) 17 | 18 | -------------------------------------------------------------------------------- /joern/joernsteps/learning/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabsx00/python-joern/540cf96cb95168ee9470698f39a530c7e6ce1913/joern/joernsteps/learning/__init__.py -------------------------------------------------------------------------------- /joern/joernsteps/learning/neighborhoodHash.groovy: -------------------------------------------------------------------------------- 1 | /** 2 | Implementation of the explicit Neighborhood Hash Kernel for joern. 3 | ------------------------------------------------------------------ 4 | 5 | See: 6 | * http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=5360243&tag=1 7 | * http://user.informatik.uni-goettingen.de/~krieck/docs/2013b-aisec.pdf 8 | 9 | */ 10 | 11 | 12 | LABEL_MASK = 0xffff 13 | LABEL_WIDTH = 16 // TODO: calculate this from mask 14 | 15 | 16 | /** 17 | Create sparse feature vectors for ASTs rooted at the 18 | given AST nodes. Currently, we perform two iterations of 19 | neighborhood hashing, meaning that we characterize each node 20 | of the AST by its children up to depth 2. 21 | */ 22 | 23 | Gremlin.defineStep('featureVectors', [Vertex, Pipe], { 24 | _().transform{ 25 | def vec = [:] 26 | labels = NH(NH(NHGraph(it)))[1] 27 | labels.values().each { vec[it] = (vec[it] ?: 0) + 1 } 28 | libsvmString(vec) 29 | } 30 | }) 31 | 32 | /** 33 | Transform a dictionary ("sparse vector") into 34 | a corresponding libsvm line. 35 | */ 36 | 37 | Object.metaClass.libsvmString = { vec -> 38 | vec.entrySet().sort{ x,y -> x.getKey() <=> y.getKey() }. 39 | inject(""){ acc, val -> acc + val.getKey() + ':' + val.getValue() + ' ' } 40 | } 41 | 42 | /** 43 | Create an initial graph for the neighborhood hash kernel starting 44 | from an AST root node 45 | */ 46 | 47 | Object.metaClass.NHGraph = { it -> 48 | 49 | def children = [:] 50 | def labels = [:] 51 | 52 | def X = it.astNodes().transform{ [it.id, it.astLabel().toList()[0], 53 | it.children().id.toList() ] }.toList() 54 | for (x in X){ 55 | nodeId = x[0] 56 | label = x[1] 57 | childIds = x[2] 58 | 59 | children[nodeId] = childIds 60 | labels[nodeId] = label 61 | } 62 | 63 | [children, labels] 64 | } 65 | 66 | /** 67 | Update an NH-graph, i.e., perform one iteration 68 | of neighborhood hashing. 69 | */ 70 | 71 | Object.metaClass.NH = { it -> 72 | 73 | def children = it[0] 74 | def labels = it[1] 75 | def newLabels = [:] 76 | 77 | def nodeIds = children.keySet() 78 | 79 | for(nodeId in nodeIds){ 80 | nodeLabel = labels[nodeId] 81 | rotNodeLabel = rotate(nodeLabel) 82 | c = children[nodeId] 83 | newLabels[nodeId] = c.inject(rotNodeLabel) { acc, val -> acc ^ labels[val] } 84 | } 85 | 86 | [children, newLabels] 87 | } 88 | 89 | Object.metaClass.rotate = { label -> 90 | ((label << 1) | ((label >>> (LABEL_WIDTH - 1) & 0x1 ))) & LABEL_MASK 91 | } 92 | 93 | Object.metaClass.hashVal = { s -> 94 | s.hashCode() & LABEL_MASK 95 | } 96 | 97 | Gremlin.defineStep('astLabel', [Vertex, Pipe], { 98 | _().transform{ 99 | 100 | if(numChildren(it) != 0 || it.code == null) 101 | hashVal(it.type) 102 | else{ 103 | if(it.type == 'PrimaryExpression' && it.code.matches('[0-9]+')) 104 | return hashVal('A_NUMBER') 105 | 106 | hashVal(it.code) 107 | } 108 | } 109 | }) 110 | -------------------------------------------------------------------------------- /joern/joernsteps/lookup.groovy: -------------------------------------------------------------------------------- 1 | /** 2 | This module contains index lookup functions employed to provide 3 | start node sets for traversals. All of these lookups support wild 4 | cards (you will need to escape spaces though). 5 | 6 | For each index lookup function, we define a corresponding Gremlin 7 | step with the same name which performs the same action as the 8 | lookup-function but returns only matches occuring in the same 9 | functions as the nodes piped to it. 10 | */ 11 | 12 | 13 | /** 14 | Retrieve nodes from index using a Lucene query. 15 | 16 | @param query The lucene query to run 17 | 18 | */ 19 | 20 | Object.metaClass.queryNodeIndex = { query, honorVisibility = true -> 21 | index = g.getRawGraph().index().forNodes(NODE_INDEX) 22 | 23 | try{ 24 | if(honorVisibility) 25 | new Neo4j2VertexIterable(index.query(query), g)._().visible() 26 | else 27 | new Neo4j2VertexIterable(index.query(query), g)._() 28 | }catch(ParseException){ 29 | return []._() 30 | } 31 | } 32 | 33 | /** 34 | Retrieve nodes with given type and code. 35 | 36 | @param type The node type 37 | @param code The node code 38 | 39 | */ 40 | 41 | Object.metaClass.getNodesWithTypeAndCode = { type, code -> 42 | query = "$NODE_TYPE:$type AND $NODE_CODE:$code" 43 | queryNodeIndex(query) 44 | } 45 | 46 | 47 | /** 48 | Retrieve nodes with given type 49 | 50 | @param type The node type 51 | 52 | */ 53 | 54 | Object.metaClass.getNodesWithType = { type -> 55 | query = "$NODE_TYPE:$type" 56 | queryNodeIndex(query) 57 | } 58 | 59 | /** 60 | Retrieve nodes with given type and name. 61 | 62 | @param type The node type 63 | @param name The node name 64 | 65 | */ 66 | 67 | Object.metaClass.getNodesWithTypeAndName = { type, name, honorVisibility = true -> 68 | query = "$NODE_TYPE:$type AND $NODE_NAME:$name" 69 | queryNodeIndex(query, honorVisibility) 70 | } 71 | 72 | /** 73 | Retrieve functions by name. 74 | 75 | @param name name of the function 76 | 77 | */ 78 | 79 | Object.metaClass.getFunctionsByName = { name, honorVisibility = true -> 80 | getNodesWithTypeAndName(TYPE_FUNCTION, name, honorVisibility) 81 | } 82 | 83 | Object.metaClass.getFunctionsByParameter = { param -> 84 | getNodesWithTypeAndCode(TYPE_PARAMETER, param) 85 | .functions() 86 | } 87 | 88 | Object.metaClass.getFunctionsByFilename = { name, honorVisibility = true -> 89 | query = "$NODE_TYPE:$TYPE_FILE AND $NODE_FILEPATH:$name" 90 | queryNodeIndex(query, honorVisibility) 91 | .out('IS_FILE_OF') 92 | .filter{ it.type == TYPE_FUNCTION } 93 | } 94 | 95 | Object.metaClass.getFunctionsByFileAndName = { filename, name, honorVisibility = true -> 96 | getFunctionsByFilename(filename, honorVisibility) 97 | .filter{ it.name == name } 98 | } 99 | 100 | Object.metaClass.getFilesByName = { filename, honorVisibility = true -> 101 | query = "$NODE_TYPE:$TYPE_FILE AND $NODE_FILEPATH:$filename" 102 | queryNodeIndex(query, honorVisibility) 103 | } 104 | 105 | /** 106 | Retrieve functions by name. 107 | 108 | @param name name of the function 109 | 110 | */ 111 | 112 | Object.metaClass.getFunctionASTsByName = { name -> 113 | getNodesWithTypeAndName(TYPE_FUNCTION, name) 114 | .out(FUNCTION_TO_AST_EDGE) 115 | } 116 | 117 | /** 118 | Retrieve all statements (including conditions) 119 | */ 120 | 121 | Object.metaClass.getAllStatements = { 122 | queryNodeIndex('isCFGNode:True') 123 | } 124 | 125 | /** 126 | Retrieve all conditions 127 | */ 128 | 129 | Object.metaClass.getAllConditions = { 130 | getNodesWithType('Condition') 131 | } 132 | 133 | /** 134 | Retrieve all calls. 135 | 136 | */ 137 | 138 | Object.metaClass.getAllCalls = { 139 | getNodesWithType(TYPE_CALL) 140 | } 141 | 142 | /** 143 | Retrieve calls by name. 144 | 145 | @param callee Name of called function 146 | 147 | */ 148 | 149 | Object.metaClass.getCallsTo = { callee -> 150 | 151 | getNodesWithTypeAndCode(TYPE_CALLEE, callee) 152 | .parents() 153 | 154 | } 155 | 156 | 157 | 158 | /** 159 | Retrieve arguments to functions. Corresponds to the traversal 160 | 'ARG' from the paper. 161 | 162 | @param name Name of called function 163 | @param i Argument index 164 | 165 | */ 166 | 167 | Object.metaClass.getArguments = { name, i -> 168 | getCallsTo(name).ithArguments(i) 169 | } 170 | 171 | Object.metaClass.getConditions = { funcname, regex, filename = null -> 172 | 173 | if(filename == null) 174 | getFunctionASTsByName(funcname).match{ it.type == "Condition" && it.code.matches(regex) } 175 | else 176 | getFunctionsByFileAndName(filename, funcname).functionToAST() 177 | .match{ it.type == "Condition" && it.code.matches(regex) } 178 | } 179 | 180 | 181 | ///////////////////////////////////////////////// 182 | // Corresponding Gremlin Steps // 183 | ///////////////////////////////////////////////// 184 | 185 | Gremlin.defineStep('queryNodeIndex', [Vertex,Pipe], { query, c = [] -> 186 | _()._emitForFunctions({ queryNodeIndex(query) }, c ) 187 | }) 188 | 189 | Gremlin.defineStep('getNodesWithTypeAndCode', [Vertex,Pipe], { type, code, c = [] -> 190 | _()._emitForFunctions({ getNodesWithTypeAndCode(type, code) }, c ) 191 | }) 192 | 193 | Gremlin.defineStep('getNodesWithTypeAndName', [Vertex,Pipe], { type, name, c = [] -> 194 | _()._emitForFunctions({ getNodesWithTypeAndName(type, name) }, c ) 195 | }) 196 | 197 | Gremlin.defineStep('getFunctionsByName', [Vertex,Pipe], { name, c = [] -> 198 | _()._emitForFunctions({ getFunctionsByName(name) }, c ) 199 | }) 200 | 201 | Gremlin.defineStep('getCallsTo', [Vertex,Pipe], { callee, c = [] -> 202 | _()._emitForFunctions({ getCallsTo(callee) }, c ) 203 | }) 204 | 205 | Gremlin.defineStep('getArguments', [Vertex,Pipe], { name, i, c = [] -> 206 | _()._emitForFunctions({ getArguments(name, i) }, c ) 207 | }) 208 | 209 | 210 | 211 | 212 | -------------------------------------------------------------------------------- /joern/joernsteps/misc.groovy: -------------------------------------------------------------------------------- 1 | 2 | Gremlin.defineStep('In', [Vertex, Pipe], { edgeType, key, vals -> 3 | 4 | if(Collection.isAssignableFrom(vals.getClass())){ 5 | filterExpr = { it.getProperty(key) in vals } 6 | }else{ 7 | filterExpr = {it.getProperty(key) == vals} 8 | } 9 | 10 | _().inE(edgeType).filter(filterExpr).outV() 11 | }) 12 | 13 | Gremlin.defineStep('Out', [Vertex, Pipe], { edgeType, key, vals -> 14 | 15 | if(Collection.isAssignableFrom(vals.getClass())){ 16 | filterExpr = { it.getProperty(key) in vals } 17 | }else{ 18 | filterExpr = {it.getProperty(key) == vals} 19 | } 20 | 21 | _().outE(edgeType).filter(filterExpr).inV() 22 | }) 23 | 24 | 25 | /** 26 | Map node ids to nodes 27 | */ 28 | 29 | Gremlin.defineStep('idsToNodes', [Vertex,Pipe], { 30 | _().transform{ g.v(it) }.scatter() 31 | }) 32 | 33 | /** 34 | Map node ids to nodes 35 | */ 36 | 37 | Gremlin.defineStep('idsToEdges', [Vertex,Pipe], { 38 | _().transform{ g.e(it) }.scatter() 39 | }) 40 | 41 | /** 42 | Create nodes from a list of node ids 43 | */ 44 | 45 | Object.metaClass.idListToNodes = { listOfIds -> 46 | _().transform{ listOfIds }.scatter().idsToNodes() 47 | } 48 | 49 | /** 50 | Create nodes from a list of node ids 51 | */ 52 | 53 | Object.metaClass.idListToEdges = { listOfIds -> 54 | _().transform{ listOfIds }.scatter().idsToEdges() 55 | } 56 | 57 | Gremlin.defineStep('isCheck', [Vertex, Pipe], { symbol -> 58 | 59 | _().astNodes().filter{ it.type in ['EqualityExpression', 'RelationalExpression'] } 60 | .filter{ it.code.matches(symbol) } 61 | }) 62 | 63 | 64 | 65 | Gremlin.defineStep('codeContains', [Vertex, Pipe], { symbol -> 66 | _().filter{it.code != null}.filter{ it.code.matches(symbol) } 67 | }) 68 | 69 | /** 70 | * Traverse to all API symbols from given AST nodes. 71 | **/ 72 | 73 | Gremlin.defineStep('apiSyms', [Vertex,Pipe], { 74 | 75 | _().match{it.type in ['Callee','IdentifierDeclType', 'Parameter']}.code 76 | }) 77 | 78 | /** 79 | * Like 'flatten' but only flatten by one layer. 80 | * */ 81 | 82 | Object.metaClass.flattenByOne = { lst -> 83 | lst.inject([]) {acc, val-> acc.plus(val)} 84 | } 85 | 86 | Gremlin.defineStep('_or', [Vertex, Pipe], { Object [] closures -> 87 | 88 | _().transform{ 89 | def ret = [] 90 | closures.each{ cl -> 91 | def x = cl(it).toList() 92 | ret.addAll(x) 93 | } 94 | flattenByOne(ret.unique()) 95 | }.scatter() 96 | }) 97 | 98 | 99 | /** 100 | For a given list, create a reverse 101 | index that maps list items to the indices 102 | they occur at. 103 | */ 104 | 105 | Object.metaClass.createReverseIndex = { aList -> 106 | def reverseIndex = [:] 107 | aList.eachWithIndex{ item, i -> 108 | if (!reverseIndex.containsKey(item)){ reverseIndex[item] = [] } 109 | reverseIndex[item] << i 110 | } 111 | reverseIndex 112 | } 113 | 114 | Object.metaClass.compareLists = { x, y -> 115 | if(x == y) return 0 116 | return 1 117 | } 118 | -------------------------------------------------------------------------------- /joern/joernsteps/nodeHiding.groovy: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | Return only visible nodes. 4 | */ 5 | 6 | Gremlin.defineStep('visible', [Vertex, Pipe], { 7 | _().filter{ 8 | if(it.type == 'File') 9 | (it.hidden != '1') 10 | else if(it.type == 'Function'){ 11 | l = it.functionToFiles().visible().toList() 12 | it.hidden != '1' && l != [] 13 | }else{ 14 | l = it.functions().visible.toList() 15 | l != [] 16 | } 17 | } 18 | }); 19 | 20 | -------------------------------------------------------------------------------- /joern/joernsteps/symbolGraph.groovy: -------------------------------------------------------------------------------- 1 | /** 2 | Elementary traversals for the function symbol graph 3 | */ 4 | 5 | /** 6 | Get all symbols used by an AST-node/statement. 7 | */ 8 | 9 | Gremlin.defineStep('uses', [Vertex,Pipe], { 10 | _().out(USES_EDGE) 11 | }) 12 | 13 | Gremlin.defineStep('usesFiltered', [Vertex,Pipe], { 14 | 15 | _().transform{ 16 | L = it.out(USES_EDGE).toList(); 17 | L.sort{ a, b -> a.code.size() <=> b.code.size() } 18 | L = L.reverse() 19 | 20 | acc = [] 21 | L.each{ node -> 22 | // if(node.code.startsWith('*')) return; 23 | if(acc.findAll{ it.code.contains(node.code) }.size() != 0) return; 24 | acc << node; 25 | } 26 | 27 | acc 28 | }.scatter() 29 | 30 | }) 31 | 32 | Gremlin.defineStep('defines', [Vertex,Pipe], { 33 | _().out(DEFINES_EDGE) 34 | }) 35 | 36 | /** 37 | Get all statements assigning a value to a symbol. 38 | */ 39 | 40 | Gremlin.defineStep('setBy', [Vertex,Pipe], { 41 | _().in(DEFINES_EDGE) 42 | }) 43 | 44 | /** 45 | Get all definitions affecting an AST-node/statement. 46 | */ 47 | 48 | Gremlin.defineStep('definitions', [Vertex,Pipe], { 49 | _().uses().in(DEFINES_EDGE) 50 | .filter{it.type in [TYPE_IDENTIFIER_DECL_STMT, TYPE_PARAMETER] } 51 | }) 52 | -------------------------------------------------------------------------------- /joern/joernsteps/syntax/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabsx00/python-joern/540cf96cb95168ee9470698f39a530c7e6ce1913/joern/joernsteps/syntax/__init__.py -------------------------------------------------------------------------------- /joern/joernsteps/syntax/assignment.groovy: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | (Optimized) Match-traversals for assignments 4 | */ 5 | 6 | Gremlin.defineStep('lval', [Vertex,Pipe], { 7 | _().out(AST_EDGE).filter{ it.childNum == "0" } 8 | }); 9 | 10 | Gremlin.defineStep('rval', [Vertex,Pipe], { 11 | _().out(AST_EDGE).filter{ it.childNum == "1" } 12 | }); 13 | -------------------------------------------------------------------------------- /joern/joernsteps/syntax/ast.groovy: -------------------------------------------------------------------------------- 1 | /** 2 | Elementrary traversals starting at AST nodes. 3 | */ 4 | 5 | /** 6 | Traverse from root of AST to all nodes it contains 7 | (including the node itself) This is refered to as 'TNODES' in the 8 | paper simply because otherwise its definition would not fit in a 9 | column ;) 10 | */ 11 | 12 | 13 | Gremlin.defineStep('astNodes', [Vertex, Pipe], { 14 | _().transform{ 15 | def x = [] as Set; 16 | it.children().loop(1){true}{true} 17 | .store(x).optional(2).transform{x+it}.scatter() 18 | }.scatter() 19 | }) 20 | 21 | /** 22 | Traverse to parent-nodes of AST nodes. 23 | */ 24 | 25 | Gremlin.defineStep('parents', [Vertex, Pipe], { 26 | _().in(AST_EDGE) 27 | }) 28 | 29 | /** 30 | Traverse to child-nodes of AST nodes. 31 | */ 32 | 33 | Gremlin.defineStep('children', [Vertex, Pipe], { 34 | _().out(AST_EDGE) 35 | }) 36 | 37 | /** 38 | Traverse to i'th children. 39 | 40 | @param i The child index 41 | */ 42 | 43 | Gremlin.defineStep('ithChildren', [Vertex, Pipe], { i -> 44 | _().children().filter{ it.childNum == i} 45 | }) 46 | 47 | Object.metaClass.isStatement = { it -> 48 | it.isCFGNode == 'True' 49 | } 50 | 51 | 52 | /** 53 | * Traverse to siblings. 54 | */ 55 | Gremlin.defineStep('siblings', [Vertex, Pipe], { 56 | _().sideEffect{ nodeId = it.id } 57 | .parents() 58 | .children() 59 | .filter{ it.id != nodeId } 60 | }); 61 | 62 | /** 63 | Traverse to statements enclosing supplied AST nodes. This may be 64 | the node itself. 65 | */ 66 | 67 | Gremlin.defineStep('statements', [Vertex,Pipe],{ 68 | _().ifThenElse{isStatement(it)} 69 | { it } 70 | { it.in(AST_EDGE).loop(1){it.object.isCFGNode != 'True'} } 71 | }); 72 | 73 | /** 74 | Get number of children of an AST node. 75 | */ 76 | 77 | Gremlin.defineStep('numChildren', [Vertex, Pipe], { 78 | _().transform{ numChildren(it) } 79 | }) 80 | 81 | Object.metaClass.numChildren = { it -> 82 | it.out('IS_AST_PARENT').toList().size() 83 | } 84 | -------------------------------------------------------------------------------- /joern/joernsteps/syntax/call.groovy: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | (Optimized) Match-traversals for Calls. 4 | */ 5 | 6 | Gremlin.defineStep('callToArguments', [Vertex, Pipe], { 7 | _().children().filter{it.type == TYPE_ARGLIST} 8 | .children() 9 | }) 10 | 11 | Gremlin.defineStep('ithArguments', [Vertex,Pipe], { i -> 12 | _().callToArguments() 13 | .filter{ it.childNum == i } 14 | }) 15 | 16 | 17 | Gremlin.defineStep('argToCall', [Vertex, Pipe], { 18 | _().in(AST_EDGE).in(AST_EDGE) 19 | }) 20 | 21 | Gremlin.defineStep('calleeToCall', [Vertex, Pipe], { 22 | _().in(AST_EDGE) 23 | }) 24 | 25 | Gremlin.defineStep('callToCallee', [Vertex, Pipe],{ 26 | _().out(AST_EDGE).filter{it.type == 'Callee'} 27 | }) 28 | 29 | Gremlin.defineStep('callToAssigns', [Vertex, Pipe], { 30 | _().matchParents{it.type == 'AssignmentExpr'} 31 | }) 32 | -------------------------------------------------------------------------------- /joern/joernsteps/syntax/match.groovy: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | Match descriptions as presented in the paper. Please note, that 4 | tradeoffs in efficiency are made for increased robustness and ease 5 | of formulation. 6 | */ 7 | 8 | /** 9 | 10 | */ 11 | 12 | Gremlin.defineStep('match', [Vertex, Pipe], { p -> 13 | _().astNodes().filter(p) 14 | }) 15 | 16 | Gremlin.defineStep('matchChildren', [Vertex, Pipe], { p, q={false} -> 17 | _().children().loop(1){ !q(it.object) }{ p(it.object) } 18 | }) 19 | 20 | /** 21 | Walk the tree into the direction of the root 22 | stopping at the enclosing statement and output 23 | all parents that match the supplied predicate. Note, that this may 24 | include the enclosing statement node. 25 | */ 26 | 27 | Gremlin.defineStep('matchParents', [Vertex,Pipe], { p, q={false} -> 28 | _().parents().loop(1){it.object.isCFGNode != 'True' && !q(it.object) }{ p(it.object) } 29 | }) 30 | 31 | 32 | /** 33 | 34 | */ 35 | 36 | Gremlin.defineStep('arg', [Vertex, Pipe], { f, i -> 37 | _().astNodes().filter{ it.type == 'CallExpression' && it.code.startsWith(f)} 38 | .out(AST_EDGE).filter{ it.childNum == '1' }.out(AST_EDGE).filter{ it.childNum == i} 39 | }) 40 | 41 | /** 42 | 43 | */ 44 | 45 | Gremlin.defineStep('param', [Vertex, Pipe], { x -> 46 | p = { it.type == 'Parameter' && it.code.matches(x) } 47 | _().match(p) 48 | 49 | }) 50 | 51 | -------------------------------------------------------------------------------- /joern/joernsteps/syntax/param.groovy: -------------------------------------------------------------------------------- 1 | 2 | Gremlin.defineStep('paramsToNames', [Vertex,Pipe], { 3 | _().children().filter{ it.type != 'ParameterType'} 4 | }) 5 | 6 | Gremlin.defineStep('paramsToTypes', [Vertex,Pipe], { 7 | _().children().filter{ it.type == 'ParameterType'} 8 | }) 9 | -------------------------------------------------------------------------------- /joern/joernsteps/taintTracking/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabsx00/python-joern/540cf96cb95168ee9470698f39a530c7e6ce1913/joern/joernsteps/taintTracking/__init__.py -------------------------------------------------------------------------------- /joern/joernsteps/taintTracking/dataflow.groovy: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | Data producers of the statement enclosing an AST-node, limited to a 4 | set N of symbols. 5 | 6 | N A set of symbols of interest 7 | */ 8 | 9 | Gremlin.defineStep('producers', [Vertex,Pipe], { N -> 10 | _().statements().In(DATA_FLOW_EDGE, DATA_FLOW_SYMBOL, N ) 11 | }) 12 | 13 | 14 | /** 15 | Data users of the statement enclosing an AST-node, limited to a 16 | set N of symbols. 17 | 18 | N A set of symbols of interest 19 | */ 20 | 21 | Gremlin.defineStep('users', [Vertex,Pipe], { N -> 22 | _().statements().Out(DATA_FLOW_EDGE, DATA_FLOW_SYMBOL, N ) 23 | }) 24 | 25 | /** 26 | Data producers of the statement enclosing an AST-node. 27 | */ 28 | 29 | Gremlin.defineStep('sources', [Vertex,Pipe], { 30 | _().statements() 31 | .in(DATA_FLOW_EDGE) 32 | }) 33 | 34 | /** 35 | Data consumers of the statement enclosing an AST-node. 36 | */ 37 | 38 | Gremlin.defineStep('sinks', [Vertex,Pipe], { 39 | _().statements() 40 | .out(DATA_FLOW_EDGE) 41 | }) 42 | 43 | /** 44 | Data consumers of variables defined in the given ASTs. 45 | */ 46 | 47 | Gremlin.defineStep('astSinks', [Vertex,Pipe], { 48 | _().transform{ N = it.defines().code.toList(); it.users(N) }.scatter() 49 | }) 50 | 51 | /** 52 | Data sources of variables used in the given ASTs. 53 | */ 54 | 55 | Gremlin.defineStep('astSources', [Vertex,Pipe], { 56 | _().transform{ N = it.used().code.toList(); it.producers(N) }.scatter() 57 | }) 58 | 59 | /** 60 | For a set of destination nodes: all paths in the control flow graph 61 | from data sources where no node on the path redefines the produced 62 | symbol and no node on the path matches a sanitizer description. 63 | 64 | @return A pipe containing valid source nodes 65 | 66 | */ 67 | 68 | Gremlin.defineStep('unsanitized', [Vertex, Pipe], { sanitizer, src = { [1]._() } -> 69 | _().uPath(sanitizer, src).firstElem() 70 | }) 71 | 72 | Gremlin.defineStep('unsanitizedPaths', [Vertex, Pipe], { sanitizer, src = {[1]._() } -> 73 | _().uPath(sanitizer, src) 74 | }) 75 | 76 | Gremlin.defineStep('firstElem', [Vertex, Pipe], { 77 | _().transform{it[0]} 78 | }) 79 | 80 | /** 81 | For a set of destination nodes: all paths in the control flow graph 82 | from data sources where no node on the path redefines the produced 83 | symbol and no node on the path matches a sanitizer description. 84 | 85 | @returns A pipe containing a set of paths for each destination 86 | 87 | */ 88 | 89 | Gremlin.defineStep('uPath', [Vertex, Pipe], { sanitizer, src = { [1]._() } -> 90 | _().sideEffect{ dst = it; } 91 | .usesFiltered().sideEffect{ symbol = it.code } 92 | // .uses().sideEffect{ symbol = it.code } 93 | .transform{ dst.producers([symbol]) }.scatter() 94 | .filter{ src(it).toList() != [] } 95 | .transform{ cfgPaths(symbol, sanitizer, it, dst.statements().toList()[0] ) }.scatter() 96 | 97 | }) 98 | 99 | /** 100 | All paths in the control flow graph from src to dst where 101 | none of the nodes on the path match a sanitizer description and 102 | none of the nodes redefine a given symbol. 103 | 104 | This is `u` in the paper. 105 | 106 | @returns Returns a set of paths 107 | 108 | */ 109 | 110 | Object.metaClass.cfgPaths = { symbol, sanitizer, src, dst -> 111 | _cfgPaths(symbol, sanitizer, 112 | src, dst, [:], []) 113 | } 114 | 115 | /** 116 | This is `g` in the paper 117 | 118 | @returns Returns a set of paths 119 | 120 | */ 121 | 122 | Object.metaClass._cfgPaths = {symbol, sanitizer, curNode, dst, visited, path -> 123 | 124 | // return an empty set if this node is a sanitizer 125 | if( ( path != [] ) && isTerminationNode(symbol, sanitizer, curNode, visited)){ 126 | return [] as Set 127 | } 128 | 129 | // return path when destination has been reached 130 | if(curNode == dst){ 131 | return [path + curNode] as Set 132 | } 133 | 134 | 135 | // `h` in the paper is inlined here 136 | 137 | def children = curNode._().out(CFG_EDGE).toList() 138 | def X = [] as Set 139 | def x; 140 | 141 | for(child in children){ 142 | 143 | def curNodeId = curNode.id; 144 | 145 | x = _cfgPaths(symbol, sanitizer, child, dst, 146 | visited + [ (curNodeId) : (visited.get(curNodeId, 0) + 1)], 147 | path + curNode) 148 | 149 | 150 | X += x 151 | 152 | // OPTIMIZATION! 153 | // If we find one path, there's no need to explore the others 154 | if(!x.isEmpty()){ return x } 155 | 156 | // Limit depth of CFG paths to 30 157 | if(path.size() > 30) return [] 158 | 159 | } 160 | 161 | X 162 | } 163 | 164 | /** 165 | Determines whether the node is a termination ode. 166 | This is p(s, m, v, V) in the paper. 167 | 168 | @params symbol The symbol of interest (which the block must not define) 169 | @params sanitizer The sanitizer description (a traversal) 170 | @params curNode The node of interest 171 | @params The map (multiset) of visited nodes 172 | */ 173 | 174 | Object.metaClass.isTerminationNode = { symbol, sanitizer, curNode, visited -> 175 | 176 | def curNodeId = curNode.id 177 | 178 | sanitizer(curNode, symbol).toList() != [] || 179 | (curNode.defines().filter{ it.code == symbol}.toList() != []) || 180 | (visited.get(curNodeId) == 2) 181 | } 182 | -------------------------------------------------------------------------------- /joern/joernsteps/taintTracking/initGraphs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fabsx00/python-joern/540cf96cb95168ee9470698f39a530c7e6ce1913/joern/joernsteps/taintTracking/initGraphs/__init__.py -------------------------------------------------------------------------------- /joern/joernsteps/taintTracking/initGraphs/ast.groovy: -------------------------------------------------------------------------------- 1 | class ASTNode{ 2 | def label; 3 | def children = []; 4 | def g; 5 | def id; 6 | def taintedSymbols; 7 | 8 | ASTNode(nodeId, symbols, graphObj) 9 | { 10 | taintedSymbols = symbols 11 | id = nodeId; 12 | g = graphObj; 13 | def chldren = initLabel(nodeId, symbols) 14 | children = chldren.collect{ new ASTNode(it, symbols, graphObj) } 15 | } 16 | 17 | /** 18 | * This functions labels AST nodes. Labeling 19 | * includes normalization. 20 | * */ 21 | 22 | def initLabel(nodeId, symbols) 23 | { 24 | def node = g.v(nodeId) 25 | 26 | // relabel tracked argument to '_ARG_' 27 | if(node.code in symbols){ 28 | label = "QUERYGEN_ARG" 29 | return [] 30 | } 31 | 32 | // Make sure not to walk into arguments. 33 | 34 | // if(node.type == 'CallExpression'){ 35 | // label = 'Callee: ' + node._().callToCallee.code.toList()[0] 36 | // return [] 37 | // } 38 | 39 | def chldren = node.children().id.toList() 40 | if(chldren.size() != 0 || node.code == null){ 41 | label = node.type 42 | }else{ 43 | label = node.code 44 | // Relabel numbers to '_NUM_' 45 | if(node.type == 'PrimaryExpression' && node.code.matches('[0-9]+')) 46 | label = 'QUERYGEN_NUM' 47 | } 48 | chldren 49 | } 50 | 51 | public String getCode() 52 | { 53 | 54 | def nodeCode = g.v(id).code 55 | taintedSymbols.each{ 56 | def quotedSymbol = Pattern.quote(it) 57 | nodeCode = nodeCode.replaceAll('^' + quotedSymbol, '%s') 58 | nodeCode = nodeCode.replaceAll(quotedSymbol + '$', '%s') 59 | nodeCode = nodeCode.replaceAll(" " + quotedSymbol + " ", '%s') 60 | } 61 | 62 | nodeCode = nodeCode.replaceAll('(^| )[0-9]+( |$)', "(\\\\d+)") 63 | nodeCode = nodeCode.replaceAll(' (<|>)=? ', ' (<|>)=? ') 64 | nodeCode = nodeCode.replaceAll(' (==|!=) ', ' (==|!=) ') 65 | nodeCode = nodeCode.replaceAll('! ', '') 66 | nodeCode 67 | } 68 | 69 | } 70 | 71 | /** 72 | * For a given AST node, obtain all children recursively. 73 | **/ 74 | 75 | Object.metaClass.allASTNodes = { astNode -> 76 | def X = [astNode] 77 | X.addAll(astNode.children.collect{ allASTNodes(it) }.flatten()) 78 | X 79 | } 80 | -------------------------------------------------------------------------------- /joern/joernsteps/taintTracking/initGraphs/conditions.groovy: -------------------------------------------------------------------------------- 1 | 2 | class Condition{ 3 | def cndId; 4 | def syms; 5 | def normalizedAST; 6 | def id; 7 | 8 | Condition(i,symbols, graphObj) 9 | { 10 | cndId = i; syms = symbols; 11 | id = cndId.toString() + syms.toString(); 12 | normalizedAST = new ASTNode(i, syms, graphObj) 13 | } 14 | 15 | public boolean equals(java.lang.Object other) 16 | { 17 | if (other == null) return false 18 | if (this.is(other)) return true 19 | if (!(other instanceof Condition)) return false 20 | if (!other.canEqual(this)) return false 21 | if (cndId != other.cndId) return false 22 | if(syms != other.syms) return false 23 | return true 24 | } 25 | 26 | public boolean canEqual(java.lang.Object other) 27 | { 28 | return other instanceof Condition 29 | } 30 | 31 | public String getCode() 32 | { 33 | normalizedAST.getCode() 34 | } 35 | 36 | } 37 | 38 | /** 39 | * For a given invocation, generate checks per argument. 40 | * The result is a list with (narguments + 1) entries, one 41 | * for each argument and a trailing 'other'-group. Each 42 | * of these entries is a list of Conditions. 43 | * */ 44 | 45 | Object.metaClass.genConditionsPerArg = { taintGraph, invoc -> 46 | def graphlet = taintGraph[invoc[0]] 47 | def args = graphlet.args 48 | checksPerArg_(taintGraph, invoc, graphlet, args) 49 | } 50 | 51 | 52 | Object.metaClass.checksPerArg_ = { def taintGraph, invoc, curGraphlet, args -> 53 | 54 | def edges = curGraphlet.edges 55 | def extEdges = curGraphlet.extEdges 56 | def argToCnd = curGraphlet.argToCnd 57 | def conditions = curGraphlet.conditions 58 | 59 | def parentOthers = [] 60 | 61 | def retval = args.collect{ def arg -> 62 | 63 | def vars = idListToNodes(edges[arg]).code.toList() 64 | def acc = argToCnd[arg].collect{ def c = new Condition(it, vars, g); 65 | curGraphlet.cndIdToObject[it] = c; } 66 | 67 | edges[arg].each{ def var -> edges[var].each{ def stmt -> 68 | 69 | if(!extEdges.containsKey(stmt)){ return } 70 | 71 | def o = extEdges[stmt].findAll{ invoc.contains(it[1]) }; 72 | if(o == []){return } 73 | 74 | def newGraphlet = taintGraph[o[0][1]] 75 | def newNode = o[0][0] 76 | 77 | def r = checksPerArg_(taintGraph, invoc, newGraphlet, [newNode]) 78 | 79 | if(r.size() > 0){ 80 | acc.addAll( flattenByOne(r.take(r.size() - 1))) 81 | parentOthers.addAll(r[r.size() -1]) 82 | } 83 | // acc.unique{ a,b -> a.equals(b) } 84 | 85 | } 86 | } 87 | acc 88 | } 89 | 90 | // now add 'others' 91 | 92 | def others = parentOthers.collect() + conditions.collect{ def c = new Condition(it, [], g); 93 | curGraphlet.cndIdToObject[it] = c; } 94 | args.eachWithIndex{ a, i -> others = others.minus(retval[i])} 95 | 96 | retval << others 97 | retval 98 | 99 | } 100 | 101 | /** 102 | From a node, determine all conditions that control its execution 103 | */ 104 | 105 | Gremlin.defineStep('controllingConditions', [Vertex, Pipe], { order = 1 -> 106 | 107 | _().statements().as('x').in('CONTROLS').loop('x'){it.loops <= order} 108 | {it.object.type == 'Condition'} 109 | .dedup() 110 | }) 111 | -------------------------------------------------------------------------------- /joern/joernsteps/taintTracking/initGraphs/creation.groovy: -------------------------------------------------------------------------------- 1 | 2 | // Referred to as `initialization graph` in the paper 3 | 4 | class TaintGraph{ 5 | def graphlets = [] 6 | def invocations = [] 7 | def callSiteId; 8 | 9 | TaintGraph(gphLets){ 10 | graphlets = gphLets.collect() 11 | } 12 | 13 | } 14 | 15 | /** 16 | Entry point for recursive creation of taint-graphs 17 | for a given call site. The function expects the id 18 | of the corresponding CallExpression database-node. 19 | */ 20 | 21 | Object.metaClass.createInitGraph = { callSiteId -> 22 | 23 | argSet = g.v(callSiteId)._().callToArguments().id.toList() //.sort() 24 | 25 | def tGraph = new TaintGraph(taintGraph_(argSet, [:], 0, 0)); 26 | tGraph.callSiteId = callSiteId 27 | tGraph 28 | } 29 | 30 | /** 31 | * See algorithm for initialization graph creation in the paper. 32 | * */ 33 | 34 | Object.metaClass.taintGraph_ = { def argSet, visited, curIdOffset, depth -> 35 | 36 | def callId, vars, defStmts; 37 | 38 | // maximum depth is four. 39 | if(argSet.size() == 0 || depth == 4) 40 | return [] 41 | 42 | callId = getCallId(argSet.head()) 43 | if(callId in visited) 44 | return [] 45 | 46 | // Create graphlet for this argument set 47 | 48 | def graphlet = createGraphlet(argSet, callId) 49 | 50 | // If there are no leaves, we may be dealing with a global variable 51 | // or a broken parse. 52 | 53 | // if(graphlet.leaves == []) 54 | // return [] 55 | 56 | def rLeaves = createReverseIndex(graphlet.leaves) 57 | def uniqLeaves = graphlet.leaves.unique(false); 58 | 59 | // Get all leaf nodes that are parameters 60 | // If there are none (termination criteria), return graph) 61 | 62 | def retval = [] 63 | def extEdges = [:] 64 | 65 | def paramNodes = uniqLeaves.findAll{ g.v(it).type == 'Parameter' } 66 | def paramNums = paramNodes.collect{ g.v(it)._().childNum.toList().head() } 67 | 68 | if(paramNodes.size() == 0){ 69 | graphlet.extEdges = [:] 70 | return [graphlet] 71 | } 72 | 73 | def callers = g.v(paramNodes[0])._().functions().functionToCallers().id.toList().sort() 74 | 75 | callers.each{ caller -> 76 | 77 | // transform parameterNodes to argumentNodes 78 | // and call taintGraph_ on set of argumentNodes 79 | 80 | def newArgNodes = paramNums.collect{ 81 | def x = g.v(caller)._().ithArguments(it).id.toList() 82 | if(x.size() == 0) return null 83 | x.head() 84 | } 85 | 86 | // Unable to find args for all params, skip this caller. 87 | if(null in newArgNodes) return; 88 | 89 | def graphlets = taintGraph_(newArgNodes, visited.plus([(callId) : 1]), curIdOffset + 1, depth + 1) 90 | 91 | retval.addAll(graphlets) 92 | 93 | if(graphlets.size() == 0) 94 | return // continue 95 | 96 | 97 | // add edges from leaves to argument nodes 98 | paramNodes.eachWithIndex{ paramNode, i -> 99 | argNode = newArgNodes[i] 100 | graphlet.leaves[rLeaves[paramNode]].each{ 101 | if(!extEdges[(it)]){ extEdges[(it)] = [] } 102 | extEdges[(it)] << [argNode,curIdOffset + 1] 103 | } 104 | } 105 | curIdOffset += graphlets.size() 106 | } 107 | 108 | graphlet.extEdges = extEdges 109 | graphlet.leaves = uniqLeaves 110 | 111 | retval.add(0, graphlet) 112 | 113 | return retval 114 | } 115 | 116 | 117 | /** 118 | * For a given node id, return node ids of all 119 | * sub-conditions. This includes the entire condition, 120 | * albeit the 'Condition'-root node is removed so that 121 | * it makes no difference whether a condition is part 122 | * of a larger condition or makes up the entire condition. 123 | * */ 124 | 125 | Object.metaClass.subConditions = { cnd -> 126 | 127 | def X = [] 128 | def sConditions = g.v(cnd)._().match{it.type in ["OrExpression", "AndExpression"] }.children().id.toList().sort() 129 | def firstChild = g.v(cnd)._().children().id.toList()[0]; 130 | 131 | X << firstChild; 132 | X.addAll(sConditions) 133 | X 134 | } 135 | 136 | Object.metaClass.getCallId = { arg -> 137 | g.v(arg).argToCall().id.toList()[0] 138 | } 139 | -------------------------------------------------------------------------------- /joern/joernsteps/taintTracking/initGraphs/decompression.groovy: -------------------------------------------------------------------------------- 1 | 2 | class Invocation{ 3 | 4 | // Invocations are fully defined by their 5 | // graphletIds. All other members simply cache 6 | // information that can be deduced from the 7 | // the invocation. 8 | 9 | def graphletIds = [] 10 | def allGraphlets; 11 | 12 | def defStmtsPerArg = [] 13 | def checksPerArg = [] 14 | 15 | Invocation(ids){ 16 | graphletIds = ids.collect() 17 | } 18 | } 19 | 20 | /** 21 | Extract individual invocations as sub-sets of graphlets. 22 | */ 23 | 24 | /** 25 | Once a taint-graph has been constructed, extract individual 26 | invocations. An invocation is a subset of the graphlet set, i.e., 27 | `taintGraph`. 28 | Property: Parameter-nodes are connected to at most one argument. 29 | */ 30 | 31 | Object.metaClass.decompressInitGraph = { tGraph -> 32 | 33 | def graphlets = tGraph.graphlets 34 | 35 | def invocs = invocations_(graphlets, 0).collect{ new Invocation(it) } 36 | invocs = invocs.unique{ x,y -> compareLists(x.graphletIds, y.graphletIds) } 37 | 38 | invocs.each{ 39 | it.defStmtsPerArg = genDefStmtsPerArg(graphlets, it.graphletIds) 40 | } 41 | 42 | invocs = invocs.unique{ x,y -> compareLists(x.defStmtsPerArg, y.defStmtsPerArg) } 43 | 44 | 45 | invocs.each{ it.allGraphlets = graphlets } 46 | 47 | invocs 48 | } 49 | 50 | Object.metaClass.invocations_ = { def taintGraph, curGraphletId -> 51 | 52 | def ret = [] 53 | 54 | def graphlet = taintGraph[curGraphletId] 55 | def args = graphlet.args 56 | def edges = graphlet.edges 57 | def extEdges = graphlet.extEdges 58 | def leaves = graphlet.leaves 59 | 60 | // If there are no external edges, add a list containing only this 61 | // graphlet to the accumulator. 62 | 63 | if(extEdges == [:]){ return [[curGraphletId]] } 64 | 65 | // Determine all graphlets that are reachable via an external edge 66 | // originating from this graphlet. 67 | 68 | // id[1] is the graphlet-id 69 | 70 | def outGraphlets = extEdges.values().inject([]){ a, val -> a.plus(val) }.collect{ it[1] }.unique() 71 | 72 | // For each reachable graphlet: 73 | 74 | outGraphlets.each{ def id -> 75 | 76 | // Determine its list of invocations. Note, that each entry in 77 | // this list is a set of graphlets. 78 | 79 | def invocs = invocations_(taintGraph, id) 80 | invocs.each{ 81 | 82 | // Combine each of these entries with this graphlet and add the 83 | // resulting list to the accumulator. 84 | 85 | ret << [curGraphletId].plus(it) 86 | } 87 | } 88 | ret 89 | } 90 | 91 | /** 92 | * Utility function to iterative over all invocations 93 | * */ 94 | 95 | Object.metaClass.collectForInvocations = { closure -> 96 | def acc = [] 97 | 98 | taintGraphs.each{ tGraph -> 99 | tGraph.invocations.each{ 100 | acc << closure(it) 101 | } 102 | } 103 | acc 104 | } 105 | -------------------------------------------------------------------------------- /joern/joernsteps/taintTracking/initGraphs/defStmtsPerArg.groovy: -------------------------------------------------------------------------------- 1 | Object.metaClass.genDefStmtsPerArg = { taintGraph, invoc -> 2 | 3 | if(invoc.size() == 0) return 4 | 5 | def graphlet = taintGraph[invoc[0]] 6 | def args = graphlet.args 7 | 8 | defStmtsPerArg_(taintGraph, invoc, graphlet, args) 9 | } 10 | 11 | Object.metaClass.defStmtsPerArg_ = { def taintGraph, invoc, curGraphlet, args -> 12 | 13 | def edges = curGraphlet.edges 14 | def extEdges = curGraphlet.extEdges 15 | 16 | args.collect{ def arg -> 17 | 18 | def acc = [] 19 | 20 | // walk to reachable defStmts in this graphlet 21 | 22 | edges[arg].each{ def var -> 23 | edges[var].each{ def stmt -> 24 | 25 | if( !extEdges.containsKey(stmt) ){ 26 | acc << stmt 27 | return 28 | } 29 | 30 | def o = extEdges[stmt].findAll{ invoc.contains(it[1]) }; 31 | 32 | if( o == []){ 33 | acc << stmt; 34 | return 35 | } 36 | 37 | // For those, which are parameters, calculate result of calling 38 | // there's exactly one external link for this invoc 39 | 40 | def newGraphlet = taintGraph[o[0][1]] 41 | def newNode = o[0][0] 42 | def r = defStmtsPerArg_(taintGraph, invoc, newGraphlet, [newNode]).flatten() 43 | 44 | acc.addAll(r) 45 | 46 | if(r == [] ){ 47 | acc << stmt 48 | } 49 | 50 | } 51 | } 52 | acc 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /joern/joernsteps/taintTracking/initGraphs/localDefTree.groovy: -------------------------------------------------------------------------------- 1 | 2 | // Referred to as `local def tree` in the paper 3 | 4 | class Graphlet{ 5 | def edges; // edges in the local def tree 6 | def extEdges; // edges to other graphlets 7 | def leaves; 8 | def args; 9 | def conditions; 10 | def argToCnd; 11 | def cndIdToObject = [:] 12 | } 13 | 14 | /** 15 | Create a graphlet: 16 | 17 | argSet: set of argument ids 18 | vars: map from argIds to list of variables used 19 | defStmts: map from variables to def-statements 20 | 21 | The resulting tree is rooted at a callId-node 22 | that is linked to all argument nodes. These are 23 | in turn linked to variable nodes, which are linked 24 | to def-statements. 25 | 26 | */ 27 | 28 | Object.metaClass.createGraphlet = { argSet, callId -> 29 | 30 | (vars, defStmts) = varsAndDefStmts(argSet, callId) 31 | 32 | if(argSet.size() == 0) return; 33 | 34 | def graphlet = new Graphlet() 35 | 36 | // Note: 'symbolsUsed' are all symbols used by the condition, 37 | // not the sub-condition as usage information is not available 38 | // per sub-tree. 39 | 40 | // For each defStmt, get controlling conditions 41 | 42 | def cndUsesPairs = defStmts.values().flatten().collect{ g.v(it) } 43 | 44 | ._().controllingConditions(1) 45 | .sideEffect{ symbolsUsed = it.usesFiltered().id.toList() } 46 | .transform{ subConditions(it.id) }.scatter() 47 | .transform{ [it, symbolsUsed.collect()] }.toList() 48 | .sort() 49 | 50 | 51 | // For call-site, get controlling conditions 52 | 53 | cndUsesPairs.addAll(g.v(argSet[0]) 54 | ._().controllingConditions(3) 55 | .sideEffect{ symbolsUsed = it.usesFiltered().id.toList() } 56 | .transform{ subConditions(it.id) }.scatter() 57 | .transform{ [it, symbolsUsed.collect()] }.toList() 58 | .sort()) 59 | 60 | cndUsesPairs.sort() 61 | 62 | def conditions = cndUsesPairs.collect{ it[0] } 63 | 64 | 65 | def leafNodes = defStmts.values().flatten().sort().unique() 66 | def edges = [:] 67 | 68 | // create edges from conditions to variables 69 | // it uses. 70 | 71 | cndUsesPairs.each{ edges[it[0]] = it[1] } 72 | 73 | edges[(callId)] = [] 74 | argSet.each{ edges[(callId)] << it } 75 | 76 | argSet.eachWithIndex{ arg, i -> 77 | if(!edges[(arg)]){ edges[(arg)] = [] } 78 | vars[i].each{ edges[(arg)] << it } 79 | } 80 | 81 | defStmts.each{ varId, stmtIds -> 82 | if(!edges[(varId)]){ edges[(varId)] = [] } 83 | stmtIds.each{ edges[(varId)] << it } 84 | } 85 | 86 | 87 | // create map from conditions to arguments that consume a variable 88 | // used in the condition 89 | 90 | def argToCnd = [:] 91 | 92 | argSet.each{ def arg -> 93 | argToCnd[arg] = [] 94 | conditions.each{ def cond -> 95 | if(!edges[cond].disjoint(edges[arg])){ 96 | argToCnd[arg] << cond 97 | } 98 | } 99 | } 100 | 101 | graphlet.args = argSet 102 | graphlet.edges = edges 103 | graphlet.leaves = leafNodes 104 | graphlet.conditions = conditions 105 | graphlet.argToCnd = argToCnd 106 | 107 | graphlet 108 | } 109 | 110 | Object.metaClass.varsAndDefStmts = { argSet, callId -> 111 | 112 | 113 | // For each argument, determine variables used. 114 | // (The .dedup is a precaution and should not be 115 | // neccessary.) 116 | 117 | /* def vars = argSet.collect{ g.v(it)._().usesFiltered().id.dedup().toList() } 118 | def varsCode = argSet.collect{ g.v(it)._().usesFiltered().code.dedup().toList() }.flatten() 119 | def defStmts = [:] 120 | 121 | // For each variable used, determine direct DEF-statements 122 | // (There can be several direct DEF statements for a variable) 123 | 124 | vars.flatten().eachWithIndex { varId, i -> 125 | X = directDefs(callId, varsCode[i]) 126 | if(X.size() > 0){ defStmts[varId] = X } 127 | } 128 | [vars, defStmts]*/ 129 | 130 | 131 | def vars = [] 132 | def defStmts = [:] 133 | def visited = [] 134 | def varsForArg = [] 135 | 136 | // Perform depth-first traversal for each 137 | // argument independently 138 | 139 | argSet.each{ arg -> 140 | 141 | def nodes = [[arg, 0]] 142 | varsForArg = [] 143 | visited = [] 144 | 145 | while(nodes.size() > 0){ 146 | def curNode = nodes.remove(0) 147 | def newNodes = varsAndDefExpand(curNode, varsForArg, defStmts, visited) 148 | nodes.addAll(newNodes.collect()) 149 | } 150 | 151 | vars.add(varsForArg.collect()) 152 | } 153 | 154 | [vars.collect(), defStmts] 155 | } 156 | 157 | Object.metaClass.varsAndDefExpand = { curNode, varsForArg, defStmts, visited -> 158 | 159 | def newDefs = [] 160 | 161 | (nodeId, depth) = curNode 162 | 163 | if(depth == 1) // MAXDEPTH-parameter 164 | return newDefs 165 | 166 | if(nodeId in visited) 167 | return newDefs 168 | visited.add(nodeId) 169 | 170 | def node = g.v(nodeId) 171 | 172 | if(node.type == "Argument"){ 173 | 174 | def symbolNodeIds = getSymbolNodeIds(node) 175 | varsForArg.addAll(symbolNodeIds.collect()) 176 | varsForArg.unique() 177 | def statementId = getCallId(node.id) 178 | newDefs = expandSymbolNodes(symbolNodeIds, statementId, defStmts) 179 | 180 | }else{ 181 | 182 | def symbolNodeIds = getSymbolNodeIds(node) 183 | varsForArg.addAll(symbolNodeIds.collect()) 184 | varsForArg.unique() 185 | def statementId = node._().statements().id.toList()[0] 186 | newDefs = expandSymbolNodes(symbolNodeIds, statementId, defStmts) 187 | 188 | } 189 | 190 | return newDefs.collect{ [it, depth + 1] } 191 | } 192 | 193 | Object.metaClass.getSymbolNodeIds = {node -> 194 | node._().usesFiltered().id.dedup().toList() 195 | } 196 | 197 | Object.metaClass.expandSymbolNodes = { symbolNodeIds, statementId, defStmts -> 198 | 199 | def newDefs = [] 200 | 201 | symbolNodeIds.each{ symbolNodeId -> 202 | def node = g.v(symbolNodeId) 203 | def varCode = node.code 204 | def defsForSymbol = directDefs(statementId, varCode) 205 | 206 | defsForSymbol.each{ it -> 207 | if(defStmts[symbolNodeId] == null) 208 | defStmts[symbolNodeId] = [] 209 | defStmts[symbolNodeId].add(it) 210 | defStmts[symbolNodeId].unique() 211 | } 212 | 213 | newDefs.addAll(defsForSymbol) 214 | } 215 | 216 | newDefs.unique() 217 | return newDefs 218 | } 219 | 220 | 221 | /** 222 | Get directly connected reaching definitions of 223 | variable `variable` and node with given id. 224 | */ 225 | 226 | Object.metaClass.directDefs = { id, variable -> 227 | g.v(id)._().statements() 228 | .sideEffect{ srcId = it.id; } 229 | .In("REACHES", "var", [variable] ).id.filter{ it != srcId}.toList().sort() 230 | // .backwardSlice([(variable)], 1, ['REACHES']).id.filter{ it != srcId}.toList() 231 | } 232 | 233 | -------------------------------------------------------------------------------- /joern/joernsteps/taintTracking/interproc.groovy: -------------------------------------------------------------------------------- 1 | import java.util.regex.Pattern; 2 | 3 | /****************************************************** 4 | * Steps for interprocedural analysis 5 | * Experimental and subject to change. 6 | ******************************************************/ 7 | 8 | /** 9 | * Identity for non-parameters. 10 | * For parameters, expand into caller arguments. 11 | **/ 12 | 13 | Gremlin.defineStep('expandParameters', [Vertex, Pipe], { 14 | 15 | _().transform{ 16 | if(it.type == 'Parameter'){ 17 | def l = it.parameterToCallerArgs().toList(); 18 | if(l != []) l else it._().toList() 19 | } 20 | else 21 | it._().toList() 22 | }.scatter() 23 | 24 | }) 25 | 26 | /** 27 | For a given parameter, get all nodes of arguments of callers. 28 | */ 29 | 30 | Gremlin.defineStep('parameterToCallerArgs', [Vertex, Pipe], { 31 | _().transform{ 32 | paramNum = it.childNum; 33 | funcName = it.functions().name.toList()[0]; 34 | 35 | funcName = funcName.split(' ')[-1].trim() 36 | funcName = funcName.replace('*', '') 37 | 38 | getCallsTo(funcName).ithArguments(paramNum) 39 | }.scatter() 40 | }) 41 | 42 | 43 | /** 44 | * Identity for nodes that do not contain arguments. 45 | * For arguments, descend into called function 46 | * parameters. 47 | **/ 48 | 49 | Gremlin.defineStep('expandArguments', [Vertex, Pipe], { 50 | _().transform{ 51 | 52 | def args = it.match{ it.type == "Argument"}.toList() 53 | 54 | if(args != []){ 55 | def l = args._().argToParameters().toList(); 56 | if(l != []) l else it._().toList() 57 | }else 58 | it._().toList() 59 | }.scatter() 60 | }) 61 | 62 | Gremlin.defineStep('argToParameters', [Vertex, Pipe], { 63 | _().transform{ 64 | argNum = it.childNum; 65 | def callee = it.argToCall().callToCallee().code.toList()[0] 66 | callee = callee.replace('* ', '') 67 | // callee = callee.split("(::)|(\\.)")[-1].trim() 68 | callee = callee.split(' ')[-1].trim() 69 | 70 | getFunctionASTsByName(callee) 71 | .children().filter{ it.type == "ParameterList"} 72 | .children().filter{ it.childNum == argNum}.toList() 73 | }.scatter() 74 | }) 75 | 76 | 77 | /** 78 | * For a given call-site, return statements in the callee 79 | * that taint arguments, i.e., assign to the function's parameters. 80 | * */ 81 | 82 | Gremlin.defineStep('argTainters', [Vertex,Pipe], { 83 | 84 | _().transform{ 85 | 86 | def params = it.taintedArguments().expandArguments().toList(); 87 | 88 | if(params == []) 89 | return []._() 90 | 91 | symbols = params._().transform{ x = it.code.split(' '); x[1 .. ( x.size()-1)].join(' ') }.toList() 92 | params[0]._().toExitNode().producers(symbols).toList() 93 | }.scatter() 94 | 95 | 96 | }) 97 | 98 | /** 99 | * For a given call-site, return arguments that are tainted. 100 | * */ 101 | 102 | Gremlin.defineStep('taintedArguments', [Vertex,Pipe], { 103 | _().callToArguments() 104 | .filter{ it.defines().toList() != [] } 105 | }) 106 | 107 | 108 | Gremlin.defineStep('checks', [Vertex,Pipe], { regex -> 109 | 110 | _().as('y').match{ it.type in ['EqualityExpression', 'RelationalExpression', 'PrimaryExpression', 'UnaryOp'] } 111 | .back('y').uses().filter{ it.code.matches('.*' + Pattern.quote(regex) + '.*') } 112 | 113 | }) 114 | 115 | Gremlin.defineStep('checksRaw', [Vertex,Pipe], { regex -> 116 | 117 | _().as('y').match{ it.type in ['EqualityExpression', 'RelationalExpression', 'PrimaryExpression', 'UnaryOp'] } 118 | .back('y').uses().filter{ it.code.matches(regex) } 119 | 120 | 121 | }) 122 | 123 | Gremlin.defineStep('calls', [Vertex,Pipe], { regex -> 124 | 125 | _().match{ it.type in ['Callee'] } 126 | .filter{ it.code.matches('.*' + Pattern.quote(regex) + '.*') } 127 | }) 128 | 129 | Gremlin.defineStep('codeMatches', [Vertex, Pipe], { regex, s -> 130 | s = Pattern.quote(s) 131 | if(regex.contains("%s")) 132 | _().filter{it.code.matches(String.format(regex, s)) } 133 | else 134 | _().filter{it.code.matches(regex) } 135 | }) 136 | 137 | Object.metaClass.NO_RESTRICTION = { a,s -> []} 138 | Object.metaClass.ANY_SOURCE = { [1]._() } 139 | 140 | Object.metaClass.source = { closure -> 141 | return { if(closure(it)) [10] else [] } 142 | } 143 | 144 | Object.metaClass.sourceMatches = { regex -> 145 | return { 146 | if(it.apiSyms().filter{ it.matches(regex) }.toList()) 147 | return [10] 148 | if( it.code.matches(regex) ) 149 | return [10] 150 | return [] 151 | } 152 | } 153 | 154 | /** Unused right now */ 155 | 156 | /** 157 | For a given argument node, determine direct initializers, i.e., 158 | the last statements, which tainted any of the variables 159 | within this function or a caller. Note, that this traversal 160 | DOES NOT enter the callee. 161 | 162 | */ 163 | 164 | Gremlin.defineStep('argToInitNodes', [Vertex, Pipe], { 165 | 166 | _().argToInitNodesLocal() 167 | 168 | .ifThenElse{ it.type == 'Parameter'} 169 | { it.parameterToCallerArgs().argToInitNodes().scatter() } 170 | { it } 171 | }) 172 | 173 | /** 174 | For a given argument node, determine local initializers. 175 | These may be parameters of the function. 176 | */ 177 | 178 | Gremlin.defineStep('argToInitNodesLocal', [Vertex, Pipe], { 179 | 180 | _().sideEffect{ stmtId = it.statements().id.toList()[0] } 181 | .sliceBackFromArgument(1, ["REACHES"]) 182 | .filter{ it.id != stmtId } 183 | }) 184 | 185 | 186 | Gremlin.defineStep('nonEmpty', [Vertex,Pipe], { closure -> 187 | _().filter{ closure(it).toList() != [] } 188 | }) 189 | 190 | /** 191 | Starting from a sink-node 'it' and for a given 192 | source-description 'sourceDescription', find all 193 | source nodes that match the source description 194 | even across the boundaries of functions. 195 | Elements in the returned list are pairs of the form 196 | [id, isFinalNode] where 'id' is the node's id and 197 | isFinalNode indicates whether no further expansion 198 | of this node was performed. 199 | **/ 200 | 201 | Object.metaClass.getNodesToSrc = { it, sourceDescription, N_LOOPS -> 202 | 203 | _getNodesToSrc(it, sourceDescription, 0, N_LOOPS).unique() 204 | } 205 | 206 | Object.metaClass._getNodesToSrc = { it, src, depth, N_LOOPS -> 207 | 208 | 209 | if(src(it).toList() != [1] && src(it).toList() != []){ 210 | // found src 211 | return [ [it.id,true] ] 212 | } 213 | 214 | if(depth == N_LOOPS){ 215 | if(src(it).toList() == [1]) 216 | return [ [it.id,true] ] 217 | else 218 | return [] 219 | } 220 | 221 | def children = it._().taintedArgExpand() 222 | // .expandParameters().allProducers() 223 | .toList() 224 | 225 | def x = children.collect{ child -> 226 | _getNodesToSrc(child, src, depth + 1, N_LOOPS) 227 | } 228 | .inject([]) {acc, val-> acc.plus(val)} // flatten by one layer 229 | .unique() 230 | 231 | if(x == []) 232 | return [[it.id, true]] 233 | else 234 | return x.plus([[it.id, false]]) 235 | } 236 | -------------------------------------------------------------------------------- /joern/joernsteps/taintTracking/queryTemplate.groovy: -------------------------------------------------------------------------------- 1 | 2 | 3 | /** 4 | * For a given call-site, get all invocations that match 5 | * argument descriptions. `argDescrs` contains a closure 6 | * for each argument. 7 | * */ 8 | 9 | Gremlin.defineStep('taintedArgs', [Vertex, Pipe], { argDescrs -> 10 | 11 | // Before we can do anything, we need to generate 12 | // an initialization graph for the call-site 13 | 14 | _().transform{ 15 | callId = it.id 16 | tGraph = createInitGraph(callId) 17 | 18 | // Check if tainted arg fulfills necessary condition 19 | // if it doesn't, then we can return an empty set 20 | 21 | if(!canBeTainted(tGraph, argDescrs)) 22 | return [] 23 | 24 | // necessary condition is fulfilled. 25 | // now decompress the initialization graph 26 | 27 | invocs = decompressInitGraph(tGraph) 28 | invocs.findAll{ isTainted(it, argDescrs) } 29 | }.scatter() 30 | }) 31 | 32 | /** 33 | * Necessary condition in paper. 34 | * */ 35 | 36 | Object.metaClass.canBeTainted = { tGraph, argDescrs -> 37 | 38 | // In the future, we want to do this per arg, 39 | // doesn't matter right now, it's only 40 | // a necessary condition anyway. 41 | 42 | def leaveNodes = tGraph.graphlets.leaves.flatten() 43 | .collect{ g.v(it) } 44 | 45 | for(it in argDescrs){ 46 | if (leaveNodes.findAll(it) == []) 47 | return false 48 | } 49 | return true 50 | } 51 | 52 | /** 53 | * Sufficient condition in paper 54 | * */ 55 | 56 | Object.metaClass.isTainted = { invoc, argDescrs -> 57 | 58 | for(int i = 0; i < argDescrs.size(); i++){ 59 | f = argDescrs[i] 60 | 61 | // This allows us to handle 'ANY_SOURCE' 62 | // We take it out, meaning that we ask for 63 | // the source to be initialized in some way 64 | // that discards constants. 65 | 66 | // try{ 67 | // if(invoc.defStmtsPerArg[i] == [] && f() ) 68 | // continue; 69 | 70 | // }catch(RuntimeException r){} 71 | 72 | if(invoc.defStmtsPerArg[i].collect{ g.v(it) }.findAll(f).toList() == []) 73 | return false 74 | } 75 | return true 76 | } 77 | 78 | Gremlin.defineStep('unchecked', [Vertex,Pipe], { argDescrs -> 79 | _().transform{ 80 | 81 | it.checksPerArg = genConditionsPerArg(it.allGraphlets, it.graphletIds) 82 | 83 | def nArgsToSanitize = it.checksPerArg.size() - 1; 84 | 85 | // subtract one because the last one contains conditions unassigned to symbols 86 | for(int i = 0; i < it.checksPerArg.size() -1; i++){ 87 | f = argDescrs[i] 88 | syms = it.checksPerArg[i].syms.flatten() 89 | 90 | if(f == null){ 91 | nArgsToSanitize--; 92 | continue 93 | } 94 | 95 | for(int j = 0; j < syms.size(); j++){ 96 | // if one of the sanitizer-descriptions matches, this is sanitized 97 | X = it.checksPerArg[i].flatten().cndId.collect{ g.v(it) } 98 | .findAll{ x -> f(x, syms[j]) } 99 | if( X != []){ 100 | nArgsToSanitize--; 101 | break; 102 | } 103 | } 104 | 105 | } 106 | 107 | if(nArgsToSanitize == 0) return [] 108 | // none of the sanitizer-descriptions matched 109 | return [it] 110 | }.scatter() 111 | }) 112 | -------------------------------------------------------------------------------- /joern/joernsteps/taintTracking/taintedArg.groovy: -------------------------------------------------------------------------------- 1 | 2 | /** 3 | * For a given call-expression, determine whether it's `argNum`'th 4 | * argument is tainted by a statement matching the source description. 5 | * */ 6 | 7 | Gremlin.defineStep('taintedArg', [Vertex, Pipe], { argNum, src = { [1]._() }, N_LOOPS = 4 -> 8 | _().filter{ 9 | argIsTainted(it, argNum, src, N_LOOPS) 10 | } 11 | }) 12 | 13 | Object.metaClass.argIsTainted = { node, argNum, src, N_LOOPS = 2 -> 14 | 15 | node.ithArguments(argNum) 16 | .as('y').taintedArgExpand() 17 | // .expandParameters().allProducers() 18 | .dedup() 19 | .loop('y'){ it.loops <= N_LOOPS && (src(it.object).toList() == [] || src(it.object).toList() == [1] ) } 20 | {true} 21 | // { src(it.object).toList() == [10] } 22 | .filter{ src(it).toList() != [] } 23 | .toList() != [] 24 | } 25 | 26 | Gremlin.defineStep('taintedArgExpand', [Vertex, Pipe], { 27 | _().transform{ 28 | 29 | if(it.type == 'Parameter') 30 | it.expandParameters().toList() 31 | else{ 32 | def l = []; 33 | tainters = it.match{it.type == 'CallExpression'}.argTainters().toList() 34 | l.addAll(tainters) 35 | l.addAll(it.allProducers().toList()) 36 | l 37 | } 38 | 39 | }.scatter() 40 | 41 | }) 42 | 43 | 44 | /** 45 | * For a given node, obtain producers for 46 | * all symbols it uses. 47 | **/ 48 | 49 | Gremlin.defineStep('allProducers', [Vertex, Pipe], { 50 | _().transform{ 51 | it.producers(it.uses().code.toList()) 52 | }.scatter() 53 | }) 54 | -------------------------------------------------------------------------------- /joern/joernsteps/typeInference/local.groovy: -------------------------------------------------------------------------------- 1 | /** 2 | * For a given statement return all dominating nodes. 3 | */ 4 | Object.metaClass._domset = {stmt -> 5 | def domset = [stmt] as Set 6 | domset += stmt._().in('DOM').loop(1){true}{true}.toList() 7 | return domset 8 | }; 9 | 10 | /** 11 | * For a given statement return the nearest dominator matching 12 | * the closure 'closure'. 13 | */ 14 | Object.metaClass._domMatch = {stmt, closure -> 15 | def v = stmt; 16 | while (v.type != 'CFGEntryNode') { 17 | if (closure(v)) 18 | return [v] 19 | v = v.in('DOM').next() 20 | } 21 | return [] 22 | } 23 | 24 | Gremlin.defineStep('domset', [Vertex, Pipe], { 25 | _().transform{ _domset(it) }.scatter() 26 | }); 27 | 28 | Gremlin.defineStep('domMatch', [Vertex, Pipe], { closure -> 29 | _().transform{ _domMatch(it, closure) }.scatter() 30 | }); 31 | 32 | /** 33 | * Starting at an identifier traverse to the type node. 34 | */ 35 | Gremlin.defineStep('types', [Vertex, Pipe], { 36 | _().sideEffect{var = it.code}.statements() 37 | .domMatch{!it.match{it.type in ['IdentifierDecl', 'Parameter']}.ithChildren('1').filter{it.code == var}.toList().isEmpty()} 38 | .match{it.type in ['IdentifierDecl', 'Parameter']}.ithChildren('0') 39 | }); 40 | 41 | /** 42 | * Filter identifiers by type. 43 | */ 44 | Gremlin.defineStep('hasType', [Vertex, Pipe], { type -> 45 | _().as('identifier').types() 46 | .filter{it.code == type} 47 | .back('identifier') 48 | }); 49 | 50 | Gremlin.defineStep('matchesType', [Vertex, Pipe], { type -> 51 | _().as('identifier').types() 52 | .filter{it.code.matches(type)} 53 | .back('identifier') 54 | }); 55 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from setuptools import setup, find_packages 4 | 5 | def read(fname): 6 | return open(os.path.join(os.path.dirname(__file__), fname)).read() 7 | 8 | setup( 9 | name = "joern", 10 | version = "0.1", 11 | author = "Fabian Yamaguchi", 12 | author_email = "fyamagu@gwdg.de", 13 | description = "A python interface to the code analysis tool joern.", 14 | license = "GPLv3", 15 | url = "http://github.com/fabsx00/", 16 | long_description = read('README.md'), 17 | packages = find_packages(), 18 | package_data={"joern": ['joernsteps/*.groovy', 19 | 'joernsteps/learning/*.groovy', 20 | 'joernsteps/syntax/*.groovy', 21 | 'joernsteps/taintTracking/*.groovy', 22 | 'joernsteps/taintTracking/initGraphs/*.groovy', 23 | 'joernsteps/typeInference/*.groovy', 24 | ]}, 25 | install_requires = ['py2neo >= 2.0.7'] 26 | ) 27 | -------------------------------------------------------------------------------- /testing/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # Run sanity checks against test database 3 | 4 | import unittest 5 | from tests import * 6 | 7 | if __name__ == '__main__': 8 | unittest.main() 9 | 10 | -------------------------------------------------------------------------------- /testing/tests/PythonJoernTests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from joern.all import JoernSteps 3 | 4 | class PythonJoernTests(unittest.TestCase): 5 | 6 | def setUp(self): 7 | self.j = JoernSteps() 8 | self.j.connectToDatabase() 9 | 10 | def tearDown(self): 11 | pass 12 | -------------------------------------------------------------------------------- /testing/tests/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from lookupTests import * 3 | from compositionTests import * 4 | from udgTests import * 5 | from dataFlowTests import * 6 | from cfgTests import * 7 | from parsingTests import * 8 | from interproc import * 9 | from initGraphTests import * 10 | -------------------------------------------------------------------------------- /testing/tests/cfgTests.py: -------------------------------------------------------------------------------- 1 | from PythonJoernTests import * 2 | 3 | class CFGTests(PythonJoernTests): 4 | 5 | def _numberCFGNodes(self, function): 6 | query = """getFunctionsByName('%s') 7 | .functionToStatements()""" % function 8 | return self.j.runGremlinQuery(query) 9 | 10 | def _numberCFGEdges(self, function): 11 | query = """getFunctionsByName('%s') 12 | .functionToStatements().outE('FLOWS_TO')""" % function 13 | return self.j.runGremlinQuery(query) 14 | 15 | def _CFGConditionEdgeLabels(self, function): 16 | query = """getFunctionsByName('%s') 17 | .functionsToASTNodesOfType('Condition') 18 | .outE('FLOWS_TO').flowLabel""" % function 19 | return self.j.runGremlinQuery(query) 20 | 21 | def _CFGInfiniteNodeEdgeLabels(self, function): 22 | query = """getFunctionsByName('%s') 23 | .functionsToASTNodesOfType('InfiniteForNode') 24 | .outE('FLOWS_TO').flowLabel""" % function 25 | return self.j.runGremlinQuery(query) 26 | 27 | def _isInLoop(self, function, callee): 28 | query = """isInLoop(getFunctionsByName('%s') 29 | .functionsToASTNodesOfType('Callee') 30 | .filter { it.code == '%s' }.toList()[0])""" % (function, callee) 31 | return self.j.runGremlinQuery(query) 32 | 33 | 34 | 35 | def testSwitch1(self): 36 | self.assertEquals(len(self._numberCFGNodes('switch_test1')), 9) 37 | self.assertEquals(len(self._numberCFGEdges('switch_test1')), 11) 38 | labels = self._CFGConditionEdgeLabels('switch_test1') 39 | self.assertIn('case 1', labels) 40 | self.assertIn('case 2', labels) 41 | self.assertIn('case 3', labels) 42 | self.assertEqual(len(labels), 4) 43 | 44 | def testSwitch2(self): 45 | self.assertEquals(len(self._numberCFGNodes('switch_test2')), 10) 46 | self.assertEquals(len(self._numberCFGEdges('switch_test2')), 12) 47 | labels = self._CFGConditionEdgeLabels('switch_test2') 48 | self.assertIn('case 1', labels) 49 | self.assertIn('case 2', labels) 50 | self.assertIn('case 3', labels) 51 | self.assertEqual(len(labels), 4) 52 | 53 | def testSwitch3(self): 54 | self.assertEquals(len(self._numberCFGNodes('switch_test3')), 9) 55 | self.assertEquals(len(self._numberCFGEdges('switch_test3')), 10) 56 | labels = self._CFGConditionEdgeLabels('switch_test3') 57 | self.assertIn('case 1', labels) 58 | self.assertIn('case 2', labels) 59 | self.assertIn('default', labels) 60 | self.assertEqual(len(labels), 3) 61 | 62 | def testSwitch4(self): 63 | self.assertEquals(len(self._numberCFGNodes('switch_test4')), 9) 64 | self.assertEquals(len(self._numberCFGEdges('switch_test4')), 11) 65 | labels = self._CFGConditionEdgeLabels('switch_test4') 66 | self.assertIn("case '1'", labels) 67 | self.assertIn("case '2'", labels) 68 | self.assertIn("case '3'", labels) 69 | self.assertEqual(len(labels), 4) 70 | 71 | def testGoto(self): 72 | self.assertEquals(len(self._numberCFGNodes('goto_test')), 5) 73 | self.assertEquals(len(self._numberCFGEdges('goto_test')), 5) 74 | labels = self._CFGConditionEdgeLabels('goto_test') 75 | self.assertIn('True', labels) 76 | self.assertIn('False', labels) 77 | self.assertEqual(len(labels), 2) 78 | 79 | def testSimpleFor(self): 80 | self.assertEquals(len(self._numberCFGNodes('simple_for_test')), 7) 81 | self.assertEquals(len(self._numberCFGEdges('simple_for_test')), 7) 82 | labels = self._CFGConditionEdgeLabels('simple_for_test') 83 | self.assertIn('True', labels) 84 | self.assertIn('False', labels) 85 | self.assertEqual(len(labels), 2) 86 | self.assertTrue(self._isInLoop('simple_for_test', 'A')) 87 | 88 | def testInfiniteFor(self): 89 | self.assertEquals(len(self._numberCFGNodes('infinite_for_test')), 4) 90 | self.assertEquals(len(self._numberCFGEdges('infinite_for_test')), 4) 91 | labels = self._CFGInfiniteNodeEdgeLabels('infinite_for_test') 92 | self.assertIn('True', labels) 93 | self.assertIn('False', labels) 94 | self.assertEqual(len(labels), 2) 95 | self.assertTrue(self._isInLoop('infinite_for_test', 'A')) 96 | 97 | def testFor1(self): 98 | self.assertEquals(len(self._numberCFGNodes('for_test1')), 4) 99 | self.assertEquals(len(self._numberCFGEdges('for_test1')), 4) 100 | labels = self._CFGConditionEdgeLabels('for_test1') 101 | self.assertIn('True', labels) 102 | self.assertIn('False', labels) 103 | self.assertEqual(len(labels), 2) 104 | self.assertTrue(self._isInLoop('for_test1', 'A')) 105 | 106 | def testFor2(self): 107 | self.assertEquals(len(self._numberCFGNodes('for_test2')), 5) 108 | self.assertEquals(len(self._numberCFGEdges('for_test2')), 5) 109 | labels = self._CFGConditionEdgeLabels('for_test2') 110 | self.assertIn('True', labels) 111 | self.assertIn('False', labels) 112 | self.assertEqual(len(labels), 2) 113 | self.assertTrue(self._isInLoop('for_test2', 'A')) 114 | 115 | def testComplexCFG(self): 116 | self.assertEquals(len(self._numberCFGNodes('complex_test')), 29) 117 | self.assertEquals(len(self._numberCFGEdges('complex_test')), 36) 118 | self.assertTrue(self._isInLoop('complex_test', 'A')) 119 | self.assertFalse(self._isInLoop('complex_test', 'B')) 120 | -------------------------------------------------------------------------------- /testing/tests/compositionTests.py: -------------------------------------------------------------------------------- 1 | 2 | from PythonJoernTests import * 3 | 4 | class CompositionTests(PythonJoernTests): 5 | 6 | def testSyntaxOnlyChaining(self): 7 | 8 | # functions calling foo AND bar 9 | 10 | query = "getCallsTo('foo').getCallsTo('bar')" 11 | x = self.j.runGremlinQuery(query) 12 | self.assertEquals(len(x), 1) 13 | 14 | def testNotComposition(self): 15 | 16 | # functions calling foo AND NOT bar 17 | 18 | query = "getCallsTo('foo').not{getCallsTo('bar')}" 19 | x = self.j.runGremlinQuery(query) 20 | self.assertEquals(len(x), 6) 21 | 22 | def testPairsComposition(self): 23 | 24 | query = """queryNodeIndex('type:AssignmentExpr AND code:"x = bar ( y )"') 25 | .pairs( _().lval().code, _().rval().code)""" 26 | x = self.j.runGremlinQuery(query) 27 | self.assertEquals(x[0][0], "x") 28 | self.assertEquals(x[0][1], "bar ( y )") 29 | -------------------------------------------------------------------------------- /testing/tests/dataFlowTests.py: -------------------------------------------------------------------------------- 1 | 2 | from PythonJoernTests import PythonJoernTests 3 | 4 | class DataFlowTests(PythonJoernTests): 5 | 6 | def testSources(self): 7 | query = """getFunctionASTsByName('ddg_simplest_test') 8 | .getCallsTo('foo') 9 | .statements() 10 | .sources().code 11 | """ 12 | x = self.j.runGremlinQuery(query) 13 | self.assertEquals(len(x), 1) 14 | 15 | def testProducers(self): 16 | query = """ getFunctionASTsByName('ddg_simplest_test') 17 | .getCallsTo('foo') 18 | .statements() 19 | .producers(['x']) 20 | """ 21 | x = self.j.runGremlinQuery(query) 22 | self.assertEquals(len(x), 1) 23 | 24 | def testProducersNegative(self): 25 | query = """ getFunctionASTsByName('ddg_simplest_test') 26 | .getCallsTo('foo') 27 | .statements() 28 | .producers(['']) 29 | """ 30 | x = self.j.runGremlinQuery(query) 31 | self.assertEquals(len(x), 0) 32 | 33 | def testCfgPaths(self): 34 | 35 | query = """ 36 | 37 | dstNode = getFunctionASTsByName('ddg_simplest_test') 38 | .getCallsTo('foo').statements().toList()[0] 39 | 40 | srcNode = getFunctionASTsByName('ddg_simplest_test') 41 | .getNodesWithTypeAndCode('AssignmentExpr', '*').statements().toList()[0] 42 | 43 | cfgPaths('x', { it, s -> [] } , srcNode, dstNode ) 44 | """ 45 | x = self.j.runGremlinQuery(query) 46 | self.assertEquals(len(x[0]), 2) 47 | 48 | def testUnsanitized(self): 49 | query = """ 50 | 51 | getFunctionASTsByName('ddg_simplest_test') 52 | .getCallsTo('foo') 53 | .statements() 54 | .unsanitized({ it, s -> []}) 55 | """ 56 | x = self.j.runGremlinQuery(query) 57 | self.assertEquals(len(x), 1) 58 | 59 | def testCallTainting(self): 60 | query = """ 61 | getFunctionASTsByName('test_call_tainting'). 62 | getCallsTo('taint_source'). 63 | sinks().code 64 | """ 65 | x = self.j.runGremlinQuery(query) 66 | self.assertEquals(x[0], 'y == 0') 67 | 68 | 69 | def testTwoTaintSources(self): 70 | query = """ 71 | getFunctionASTsByName('two_taint_sources') 72 | .getCallsTo('taint_source'). 73 | sinks().code 74 | """ 75 | x = self.j.runGremlinQuery(query) 76 | self.assertEquals(x[0], 'y == 0') 77 | 78 | def testTwoTaintSources2(self): 79 | query = """ 80 | getFunctionASTsByName('two_taint_sources') 81 | .getCallsTo('second_taint_source'). 82 | sinks().code 83 | """ 84 | x = self.j.runGremlinQuery(query) 85 | self.assertEquals(x[0], 'z == 0') 86 | 87 | def testNotATaintSource(self): 88 | query = """ 89 | getFunctionASTsByName('test_dataFlowFromUntainted') 90 | .getCallsTo('not_a_taint_source') 91 | .sinks().code 92 | """ 93 | x = self.j.runGremlinQuery(query) 94 | self.assertEquals(x, []) 95 | 96 | def testParamTaintByCall(self): 97 | 98 | query = """ 99 | getFunctionASTsByName('testParamTaint') 100 | .getCallsTo('taint_source') 101 | .sinks().code 102 | """ 103 | x = self.j.runGremlinQuery(query) 104 | self.assertEquals(x, ['EXIT']) 105 | 106 | def testParamTaintByAssign(self): 107 | 108 | query = """ 109 | getFunctionASTsByName('testParamTaintAssign') 110 | .match{it.type == 'AssignmentExpr'} 111 | .sinks().code 112 | """ 113 | x = self.j.runGremlinQuery(query) 114 | self.assertEquals(x, ['EXIT']) 115 | 116 | -------------------------------------------------------------------------------- /testing/tests/initGraphTests.py: -------------------------------------------------------------------------------- 1 | 2 | from PythonJoernTests import * 3 | 4 | class InitGraphTests(PythonJoernTests): 5 | 6 | def testCreate1(self): 7 | query = """ 8 | 9 | callSiteId = getFunctionASTsByName("two_arg_sink_caller_p") 10 | .match{ it.type == "CallExpression" && it.code.startsWith('asink') } 11 | .id.toList()[0]; 12 | 13 | initGraph = createInitGraph(callSiteId) 14 | initGraph.graphlets.size() 15 | 16 | """ 17 | x = self.j.runGremlinQuery(query) 18 | self.assertEquals(x, 3) 19 | 20 | def testCreate2(self): 21 | 22 | query = """ 23 | 24 | callSiteId = getFunctionASTsByName("two_arg_sink_caller") 25 | .match{ it.type == "CallExpression" && it.code.startsWith('asink') } 26 | .id.toList()[0]; 27 | 28 | initGraph = createInitGraph(callSiteId) 29 | initGraph.graphlets.size() 30 | 31 | """ 32 | x = self.j.runGremlinQuery(query) 33 | self.assertEquals(x, 3) 34 | 35 | def testDecompress1(self): 36 | 37 | query = """ 38 | 39 | callSiteId = getFunctionASTsByName("two_arg_sink_caller_p") 40 | .match{ it.type == "CallExpression" && it.code.startsWith('asink') } 41 | .id.toList()[0]; 42 | 43 | initGraph = createInitGraph(callSiteId) 44 | invocs = decompressInitGraph(initGraph) 45 | [ g.v(invocs.defStmtsPerArg[0][0][0]).code, g.v(invocs.defStmtsPerArg[0][1][0]).code] 46 | """ 47 | x = self.j.runGremlinQuery(query) 48 | self.assertTrue(x[0].find('sourceA') != -1) 49 | self.assertTrue(x[1].find('sourceB') != -1) 50 | 51 | def testDecompress2(self): 52 | 53 | query = """ 54 | 55 | callSiteId = getFunctionASTsByName("two_arg_sink_caller") 56 | .match{ it.type == "CallExpression" && it.code.startsWith('asink') } 57 | .id.toList()[0]; 58 | 59 | initGraph = createInitGraph(callSiteId) 60 | invocs = decompressInitGraph(initGraph) 61 | [ g.v(invocs.defStmtsPerArg[0][0][0]).code, g.v(invocs.defStmtsPerArg[0][1][0]).code] 62 | """ 63 | x = self.j.runGremlinQuery(query) 64 | self.assertTrue(x[0].find('sourceA') == -1) 65 | self.assertTrue(x[1].find('sourceB') != -1) 66 | 67 | def testCanBeTainted1(self): 68 | 69 | query = """ 70 | 71 | argDescrs = [{ it.code.contains('sourceA')}, { it.code.contains('sourceB')} ] 72 | callSiteId = getFunctionASTsByName("two_arg_sink_caller") 73 | .match{ it.type == "CallExpression" && it.code.startsWith('asink') } 74 | .id.toList()[0]; 75 | initGraph = createInitGraph(callSiteId) 76 | canBeTainted(initGraph, argDescrs) 77 | 78 | """ 79 | x = self.j.runGremlinQuery(query) 80 | self.assertEquals(x, True) 81 | 82 | def testCanBeTainted2(self): 83 | 84 | query = """ 85 | 86 | callSiteId = getFunctionASTsByName("two_arg_sink_caller") 87 | .match{ it.type == "CallExpression" && it.code.startsWith('asink') } 88 | .id.toList()[0]; 89 | initGraph = createInitGraph(callSiteId) 90 | canBeTainted(initGraph, [{ it.code.contains('sourceX')}, { it.code.contains('sourceB')} ] ) 91 | 92 | """ 93 | x = self.j.runGremlinQuery(query) 94 | self.assertEquals(x, False) 95 | 96 | def testIsTainted1(self): 97 | 98 | query = """ 99 | 100 | argDescrs = [{ it.code.contains('sourceA')}, { it.code.contains('sourceB')} ] 101 | callSiteId = getFunctionASTsByName("two_arg_sink_caller_p") 102 | .match{ it.type == "CallExpression" && it.code.startsWith('asink') } 103 | .id.toList()[0]; 104 | initGraph = createInitGraph(callSiteId) 105 | invocs = decompressInitGraph(initGraph) 106 | invocs.collect{ isTainted(it, argDescrs) } 107 | 108 | """ 109 | x = self.j.runGremlinQuery(query) 110 | self.assertEquals(x[0], True) 111 | 112 | def testIsTainted2(self): 113 | 114 | query = """ 115 | 116 | argDescrs = [{ it.code.contains('sourceA')}, { it.code.contains('sourceB')} ] 117 | callSiteId = getFunctionASTsByName("two_arg_sink_caller") 118 | .match{ it.type == "CallExpression" && it.code.startsWith('asink') } 119 | .id.toList()[0]; 120 | initGraph = createInitGraph(callSiteId) 121 | invocs = decompressInitGraph(initGraph) 122 | invocs.collect{ isTainted(it, argDescrs) } 123 | 124 | """ 125 | x = self.j.runGremlinQuery(query) 126 | self.assertEquals(x[0], False) 127 | 128 | 129 | def testTaintedArgs1(self): 130 | 131 | query = """ 132 | 133 | argDescrs = [{ it.code.contains('sourceA')}, { it.code.contains('sourceB')} ] 134 | 135 | getFunctionASTsByName("two_arg_sink_caller_p") 136 | .match{ it.type == "CallExpression" && it.code.startsWith('asink') } 137 | .taintedArgs(argDescrs) 138 | 139 | """ 140 | x = self.j.runGremlinQuery(query) 141 | self.assertEquals(len(x), 2) 142 | 143 | 144 | def testTaintedArgs2(self): 145 | 146 | query = """ 147 | 148 | argDescrs = [{ it.code.contains('sourceA')}, { it.code.contains('sourceB')} ] 149 | 150 | getFunctionASTsByName("two_arg_sink_caller") 151 | .match{ it.type == "CallExpression" && it.code.startsWith('asink') } 152 | .taintedArgs(argDescrs) 153 | 154 | """ 155 | x = self.j.runGremlinQuery(query) 156 | self.assertEquals(len(x), 0) 157 | 158 | def testUnchecked1(self): 159 | 160 | query = """ 161 | 162 | argDescrs = [{ it.code.contains('sourceA')}, { it.code.contains('sourceB')} ] 163 | sanitizerDescrs = [ null, { it,s -> it.code.contains(s) }] 164 | 165 | getFunctionASTsByName("two_arg_sink_caller_p") 166 | .match{ it.type == "CallExpression" && it.code.startsWith('asink') } 167 | .taintedArgs(argDescrs) 168 | .unchecked(sanitizerDescrs) 169 | 170 | """ 171 | x = self.j.runGremlinQuery(query) 172 | self.assertEquals(len(x), 1) 173 | 174 | def testUnchecked2(self): 175 | 176 | query = """ 177 | 178 | argDescrs = [{ it.code.contains('sourceA')}, { it.code.contains('sourceB')} ] 179 | sanitizerDescrs = [ { it,s -> false }, { it,s -> false } ] 180 | 181 | getFunctionASTsByName("two_arg_sink_caller_p") 182 | .match{ it.type == "CallExpression" && it.code.startsWith('asink') } 183 | .taintedArgs(argDescrs) 184 | .unchecked(sanitizerDescrs) 185 | 186 | """ 187 | x = self.j.runGremlinQuery(query) 188 | self.assertEquals(len(x), 2) 189 | -------------------------------------------------------------------------------- /testing/tests/interproc.py: -------------------------------------------------------------------------------- 1 | from PythonJoernTests import * 2 | 3 | class InterprocTests(PythonJoernTests): 4 | 5 | def testTaintedArgs(self): 6 | query = """ 7 | getFunctionASTsByName("interproc_arg_tainter_test") 8 | .match{ it.type == "CallExpression" && it.code.startsWith('interproc') } 9 | .taintedArguments() 10 | .code 11 | 12 | """ 13 | x = self.j.runGremlinQuery(query) 14 | self.assertEquals(len(x), 1) 15 | 16 | def testArgTainters(self): 17 | query = """ 18 | getFunctionASTsByName("interproc_arg_tainter_test") 19 | .match{ it.type == "CallExpression" && it.code.startsWith('interproc')} 20 | .argTainters() 21 | .code 22 | """ 23 | x = self.j.runGremlinQuery(query) 24 | self.assertEquals(x[0], '* x = source12 ( )') 25 | 26 | def testTaintedArg(self): 27 | query = """ 28 | getFunctionASTsByName("interproc_arg_tainter_test") 29 | .match{ it.type == "CallExpression" && it.code.startsWith('sink12')} 30 | .taintedArg('0', { it -> if(it.code.matches('.*source12.*')) [1] else [] } ) 31 | .code 32 | """ 33 | x = self.j.runGremlinQuery(query) 34 | self.assertEquals(len(x), 1) 35 | 36 | def testGetNodesToSrc(self): 37 | 38 | query = """ 39 | getFunctionASTsByName("interproc_arg_tainter_test") 40 | .match{ it.type == "CallExpression" && it.code.startsWith('sink12')} 41 | .statements() 42 | .transform{ it -> getNodesToSrc(it, { it2 -> if(it2.code.matches('.*source12.*')) [20] else [] } , 4) } 43 | .scatter().transform{ g.v(it[0]).code } 44 | """ 45 | x = self.j.runGremlinQuery(query) 46 | self.assertEquals(x[1], 'interproc_callee ( & x )') 47 | -------------------------------------------------------------------------------- /testing/tests/lookupTests.py: -------------------------------------------------------------------------------- 1 | 2 | from PythonJoernTests import * 3 | 4 | class IndexLookupTests(PythonJoernTests): 5 | 6 | def testGetNodesWithTypeAndCode(self): 7 | 8 | query = 'getNodesWithTypeAndCode("Callee", "bar")' 9 | x = self.j.runGremlinQuery(query) 10 | self.assertEquals(len(x), 1) 11 | 12 | def testGetNodesWithTypeAndName(self): 13 | 14 | query = 'getNodesWithTypeAndName("Function", "foo")' 15 | x = self.j.runGremlinQuery(query) 16 | self.assertEquals(len(x), 1) 17 | 18 | def testGetFunctionsByName(self): 19 | 20 | query = 'getFunctionsByName("foo")' 21 | x = self.j.runGremlinQuery(query) 22 | self.assertEquals(len(x), 1) 23 | 24 | def testGetCallsTo(self): 25 | 26 | query = 'getCallsTo("bar")' 27 | x = self.j.runGremlinQuery(query) 28 | self.assertTrue(len(x) == 1) 29 | 30 | def testGetArguments(self): 31 | 32 | query = 'getArguments("bar", "0").code' 33 | x = self.j.runGremlinQuery(query) 34 | self.assertEquals(x[0], 'y') 35 | -------------------------------------------------------------------------------- /testing/tests/parsingTests.py: -------------------------------------------------------------------------------- 1 | from PythonJoernTests import * 2 | 3 | class ParsingTests(PythonJoernTests): 4 | 5 | def testNewOperator1(self): 6 | 7 | query = """getFunctionASTsByName('new_operator_test') 8 | .astNodes().filter{ it.isCFGNode == 'True'}.code""" 9 | x = self.j.runGremlinQuery(query) 10 | self.assertEquals(len(x), 3) 11 | 12 | -------------------------------------------------------------------------------- /testing/tests/udgTests.py: -------------------------------------------------------------------------------- 1 | 2 | from PythonJoernTests import * 3 | 4 | class UDGTests(PythonJoernTests): 5 | 6 | def testSimpleDecl(self): 7 | query = """getFunctionASTsByName('udg_test_simple_decl') 8 | .astNodes().filter{it.isCFGNode == 'True'} 9 | .defines().filter{it.code == 'x'} 10 | .code 11 | """ 12 | x = self.j.runGremlinQuery(query) 13 | self.assertEquals(len(x), 1) 14 | 15 | def testDeclWithAssign(self): 16 | query = """getFunctionASTsByName('udg_test_decl_with_assign') 17 | .astNodes().filter{it.isCFGNode == 'True'} 18 | .defines().filter{it.code == 'x'} 19 | .code 20 | """ 21 | x = self.j.runGremlinQuery(query) 22 | self.assertEquals(len(x), 1) 23 | 24 | def testParamDecl(self): 25 | query = """getFunctionASTsByName('udg_test_param_decl') 26 | .astNodes().filter{it.isCFGNode == 'True'} 27 | .defines().filter{it.code == 'x'} 28 | .code 29 | """ 30 | x = self.j.runGremlinQuery(query) 31 | self.assertEquals(len(x), 1) 32 | 33 | def testUntaintedParamUse(self): 34 | 35 | query = """getFunctionASTsByName('udg_test_use_untainted_call') 36 | .astNodes().filter{it.isCFGNode == 'True'} 37 | .defines().filter{it.code == 'x'} 38 | .code 39 | """ 40 | 41 | x = self.j.runGremlinQuery(query) 42 | self.assertEquals(len(x), 0) 43 | 44 | query = """getFunctionASTsByName('udg_test_use_untainted_call') 45 | .astNodes().filter{it.isCFGNode == 'True'} 46 | .uses().filter{it.code == 'x'} 47 | .code 48 | """ 49 | 50 | x = self.j.runGremlinQuery(query) 51 | self.assertEquals(len(x), 1) 52 | 53 | def testStructFieldUse(self): 54 | 55 | query = """getFunctionASTsByName('udg_test_struct_field_use') 56 | .astNodes().filter{it.isCFGNode == 'True'} 57 | .uses() 58 | .code 59 | """ 60 | 61 | x = self.j.runGremlinQuery(query) 62 | self.assertTrue('x . y' in x) 63 | self.assertTrue('x' in x) 64 | 65 | def testArrUse(self): 66 | 67 | query = """getFunctionASTsByName('arrUse') 68 | .astNodes().filter{it.isCFGNode == 'True'} 69 | .uses() 70 | .code 71 | """ 72 | 73 | x = self.j.runGremlinQuery(query) 74 | 75 | self.assertTrue('arr' in x) 76 | self.assertTrue('i' in x) 77 | 78 | 79 | def testComplexArg(self): 80 | 81 | query = """getFunctionASTsByName('complexInArgs') 82 | .astNodes().filter{ it.type == 'Argument'} 83 | .uses().code 84 | """ 85 | x = self.j.runGremlinQuery(query) 86 | self.assertEquals(len(x), 3) 87 | 88 | def testStatementContainingCall(self): 89 | 90 | query = """getFunctionASTsByName('complexInArgs') 91 | .astNodes().filter{ it.type == 'Argument'} 92 | .statements() 93 | .uses().code 94 | """ 95 | x = self.j.runGremlinQuery(query) 96 | self.assertEquals(len(x), 5) 97 | 98 | 99 | def testComplexAssign(self): 100 | 101 | query = """getFunctionASTsByName('complexAssign') 102 | .astNodes().filter{ it.type == 'AssignmentExpr'} 103 | .defines().code 104 | """ 105 | x = self.j.runGremlinQuery(query) 106 | 107 | self.assertTrue('* pLtv' in x) 108 | self.assertTrue('pLtv -> u' in x) 109 | self.assertTrue('pLtv -> u . u16' in x) 110 | 111 | def testConditionalExpr(self): 112 | 113 | query = """getFunctionASTsByName('conditional_expr') 114 | .astNodes() 115 | .filter{ it.type == 'Condition'} 116 | .uses() 117 | .code 118 | """ 119 | x = self.j.runGremlinQuery(query) 120 | self.assertEquals(len(x), 1) 121 | 122 | 123 | def testDefEdgeFromTaintedArg(self): 124 | 125 | query = """getFunctionASTsByName('test_call_tainting') 126 | .astNodes() 127 | .filter{ it.type == 'Argument' && it.code == '& y'} 128 | .defines().code 129 | """ 130 | x = self.j.runGremlinQuery(query) 131 | self.assertTrue('y' in x) 132 | 133 | def testPlusEquals(self): 134 | query = """ 135 | getFunctionASTsByName('plusEqualsUse') 136 | .astNodes() 137 | .filter{ it.type == 'ExpressionStatement'} 138 | .out('DEF').code 139 | """ 140 | 141 | x = self.j.runGremlinQuery(query) 142 | self.assertTrue('x' in x) 143 | 144 | def testPlusEqualsExpr(self): 145 | query = """ 146 | getFunctionASTsByName('plusEqualsUse') 147 | .astNodes() 148 | .filter{ it.type == 'AssignmentExpr'} 149 | .out('DEF').code 150 | """ 151 | x = self.j.runGremlinQuery(query) 152 | self.assertEquals(x[0], 'x') 153 | 154 | def testPlusEqualsExprUse(self): 155 | query = """ 156 | getFunctionASTsByName('plusEqualsUse') 157 | .astNodes() 158 | .filter{ it.type == 'AssignmentExpr'} 159 | .out('USE').code 160 | """ 161 | x = self.j.runGremlinQuery(query) 162 | self.assertTrue('x' in x) 163 | 164 | def testPlusPlusDef(self): 165 | query = """ 166 | getFunctionASTsByName('plusplus') 167 | .astNodes() 168 | .filter{ it.type == 'ExpressionStatement'} 169 | .out('DEF').code 170 | """ 171 | 172 | def testPlusPlusDefExpr(self): 173 | query = """ 174 | getFunctionASTsByName('plusplus') 175 | .astNodes() 176 | .filter{ it.type == 'IncDecOp'} 177 | .out('DEF').code 178 | """ 179 | 180 | x = self.j.runGremlinQuery(query) 181 | self.assertTrue('a' in x) 182 | 183 | def testPlusPlusUseExpr(self): 184 | query = """ 185 | getFunctionASTsByName('plusplus') 186 | .astNodes() 187 | .filter{ it.type == 'IncDecOp'} 188 | .out('USE').code 189 | """ 190 | x = self.j.runGremlinQuery(query) 191 | self.assertTrue('a' in x) 192 | 193 | def testAssignToArrayField(self): 194 | query = """ 195 | getFunctionASTsByName('udg_test_assign_to_array_field') 196 | .astNodes().filter{it.isCFGNode == 'True'} 197 | .defines().code 198 | """ 199 | 200 | x = self.j.runGremlinQuery(query) 201 | 202 | self.assertTrue('* arr' in x) 203 | 204 | def testAssignToExprDef(self): 205 | query = """ 206 | getFunctionASTsByName('udg_test_assign_to_expression') 207 | .astNodes().filter{it.isCFGNode == 'True'} 208 | .defines() 209 | .code 210 | """ 211 | 212 | x = self.j.runGremlinQuery(query) 213 | self.assertTrue('* ( a + b )' in x) 214 | self.assertTrue('* a' in x) 215 | self.assertTrue('* b' in x) 216 | 217 | def testAssignToExprUse(self): 218 | query = """ 219 | getFunctionASTsByName('udg_test_assign_to_expression') 220 | .astNodes().filter{it.isCFGNode == 'True'} 221 | .uses() 222 | .code 223 | """ 224 | 225 | x = self.j.runGremlinQuery(query) 226 | 227 | self.assertTrue('a' in x) 228 | self.assertTrue('b' in x) 229 | 230 | 231 | def testArrDefDef(self): 232 | 233 | query = """ 234 | getFunctionASTsByName('test_buf_def') 235 | .astNodes().filter{it.isCFGNode == 'True'} 236 | .defines().code 237 | """ 238 | 239 | x = self.j.runGremlinQuery(query) 240 | 241 | self.assertTrue('* buf' in x) 242 | self.assertTrue('* i' in x) 243 | 244 | def testArrDefUse(self): 245 | 246 | query = """ 247 | getFunctionASTsByName('test_buf_def') 248 | .astNodes().filter{it.isCFGNode == 'True'} 249 | .uses().code 250 | """ 251 | 252 | x = self.j.runGremlinQuery(query) 253 | 254 | self.assertTrue('buf' in x) 255 | self.assertTrue('i' in x) 256 | 257 | def testNonDerefUnary(self): 258 | 259 | query = """ 260 | getFunctionASTsByName('nonDerefUnary') 261 | .astNodes().filter{it.isCFGNode == 'True'} 262 | .uses().code 263 | """ 264 | 265 | x = self.j.runGremlinQuery(query) 266 | self.assertTrue('a' in x) 267 | --------------------------------------------------------------------------------