├── README.md ├── class2java.py ├── java-decompiler.jar └── run.py /README.md: -------------------------------------------------------------------------------- 1 | # Extractor-java 2 | 3 | Create CodeQL database directly from Java source code without compiling 4 | 5 | ## Require 6 | 7 | 1. Installed CodeQL 8 | 2. Linux / Macos 9 | 10 | Otherwise you need to manually specify the value of variables such as codeql_home,codeql_java_home 11 | 12 | ## Usage 13 | 14 | If there is only jar, you need to decompile it to get the java source code 15 | 16 | ```bash 17 | unzip your.jar 18 | python3 class2java.py dir 19 | ``` 20 | 21 | generate database for java source code 22 | 23 | ```text 24 | usage: run.py [-h] [-l [LIB ...]] [-ld [LIBDIR ...]] db srcroot 25 | 26 | CodeQL java extractor. 27 | 28 | positional arguments: 29 | db codeql database name 30 | srcroot java source code dir 31 | 32 | optional arguments: 33 | -h, --help show this help message and exit 34 | -l [LIB ...], --lib [LIB ...] 35 | lib path 36 | -ld [LIBDIR ...], --libdir [LIBDIR ...] 37 | lib dir 38 | ``` 39 | 40 | example 41 | 42 | ```bash 43 | python3 run.py dbname srcroot 44 | python3 run.py dbname srcroot -l lib1.jar lib2.jar 45 | python3 run.py dbname srcroot -ld libdir1 libdir2 46 | ``` -------------------------------------------------------------------------------- /class2java.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import os 4 | import sys 5 | 6 | def decompiler(path): 7 | curdir = os.path.dirname(os.path.realpath(__file__)) 8 | cmd = f"cd {path};java -cp {curdir}/java-decompiler.jar org.jetbrains.java.decompiler.main.decompiler.ConsoleDecompiler -hdc=0 -dgs=1 -rsy=1 -rbr=1 -lit=1 -nls=1 -mpm=60 . ." 9 | # print(cmd) 10 | os.system(cmd) 11 | 12 | 13 | 14 | if __name__ == "__main__": 15 | if len(sys.argv) != 2: 16 | print("[Usage:] python3 class2java.py directory") 17 | else: 18 | decompiler(sys.argv[1]) -------------------------------------------------------------------------------- /java-decompiler.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuligesec/extractor-java/ca41a2bd856775995eb6c1a2884ed9f4a947b055/java-decompiler.jar -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import sys 4 | import subprocess 5 | import glob 6 | import argparse 7 | import platform 8 | 9 | class Extract: 10 | def __init__(self, db, srcroot, lib, libdir): 11 | self.dbname = db 12 | self.srcroot = srcroot 13 | if lib: 14 | self.libs = lib 15 | else: 16 | self.libs = [] 17 | if libdir: 18 | for _dir in libdir: 19 | for lib in glob.glob(f"{_dir}/**/*.jar", recursive=True): 20 | self.libs.append(lib) 21 | 22 | def init_database(self): 23 | p = subprocess.run(["codeql", "database", "init", self.dbname, "-l", "java", "--source-root", self.srcroot]) 24 | if p.returncode == 0: 25 | self.dbpath = os.path.realpath(self.dbname) 26 | print(f"[*extract_log*] dbpath : {self.dbpath}") 27 | else: 28 | sys.exit(1) 29 | 30 | def init_env(self): 31 | codeql_path = subprocess.check_output(["which", "codeql"]).decode() 32 | codeql_home = os.path.dirname(codeql_path) 33 | self.codeql_home = codeql_home 34 | print(f"[*extract_log*] codeql_home : {codeql_home}") 35 | s = platform.system().lower() 36 | MAPPING = {'darwin': 'osx', 37 | 'windows': 'win', 38 | 'linux': 'linux' 39 | } 40 | if s in MAPPING: 41 | s = MAPPING.get(s) 42 | # print(f"{codeql_home}/tools/{s}*/java") 43 | codeql_java_home = glob.glob(f"{codeql_home}/tools/{s}*/java")[0] 44 | self.codeql_java_home = codeql_java_home 45 | print(f"[*extract_log*] codeql_java_home : {codeql_java_home}") 46 | env = { 47 | "CODEQL_DIST": codeql_home, 48 | "CODEQL_EXTRACTOR_JAVA_LOG_DIR": f"{self.dbpath}/log", 49 | "CODEQL_EXTRACTOR_JAVA_ROOT": f"{codeql_home}/java", 50 | "CODEQL_EXTRACTOR_JAVA_SOURCE_ARCHIVE_DIR": f"{self.dbpath}/src", 51 | "CODEQL_EXTRACTOR_JAVA_TRAP_DIR": f"{self.dbpath}/trap/java", 52 | "CODEQL_EXTRACTOR_JAVA_WIP_DATABASE": self.dbpath, 53 | "CODEQL_JAVA_HOME": codeql_java_home 54 | } 55 | for key in env: 56 | print(f"{key}={env[key]}") 57 | return env 58 | 59 | def generate_javacargs(self): 60 | javafiles = glob.glob(f"{self.srcroot}/**/*.java", recursive=True) 61 | print(len(javafiles)) 62 | with open(f"{self.dbpath}/log/javac.args", "w") as f: 63 | f.write("-Xprefer:source" + "\n") 64 | if len(self.libs) > 0: 65 | f.write("-classpath\n") 66 | libstr = "" 67 | for lib in self.libs: 68 | libstr = libstr + lib + ":" 69 | f.write(libstr + "\n") 70 | 71 | for javafile in javafiles: 72 | # if "test" not in javafile: 73 | f.write(javafile + "\n") 74 | 75 | def generate_trap(self): 76 | env = self.init_env() 77 | p = subprocess.run([f"{self.codeql_java_home}/bin/java", "-Xmx1024M", "-Xms256M", "-cp", 78 | f"{self.codeql_home}/java/tools/semmle-extractor-java.jar", 79 | "com.semmle.extractor.java.JavaExtractor", "--javac-args", 80 | f"@@@{self.dbpath}/log/javac.args"], env=env) 81 | 82 | def import_trap(self): 83 | p = subprocess.run(["codeql", "dataset", "import", f"{self.dbpath}/db-java", f"{self.dbpath}/trap", "-S", 84 | f"{self.codeql_home}/java/semmlecode.dbscheme"]) 85 | 86 | def finalize(self): 87 | p = subprocess.run(["codeql", "database", "finalize", self.dbpath]) 88 | 89 | def run(self): 90 | self.init_database() 91 | self.generate_javacargs() 92 | self.generate_trap() 93 | # self.import_trap() 94 | self.finalize() 95 | 96 | 97 | if __name__ == "__main__": 98 | parser = argparse.ArgumentParser(description='CodeQL java extractor.') 99 | parser.add_argument('db', help='codeql database name') 100 | parser.add_argument('srcroot', help='java source code dir') 101 | parser.add_argument('-l', '--lib', nargs='*', help='lib path') 102 | parser.add_argument('-ld', '--libdir', nargs='*', help='lib dir') 103 | 104 | if len(sys.argv) < 2: 105 | parser.print_help() 106 | sys.exit() 107 | 108 | args = parser.parse_args() 109 | print(args) 110 | print(args.db) 111 | print(args.srcroot) 112 | print(args.lib) 113 | print(args.libdir) 114 | extractor = Extract(args.db, args.srcroot, args.lib, args.libdir) 115 | extractor.run() 116 | --------------------------------------------------------------------------------