├── requirements.txt ├── install_jadx.ps1 ├── install_jadx.sh ├── models └── genai_model.py ├── utils ├── extract_apk_helpers.py └── html_helpers.py ├── config.py ├── run.py └── README.md /requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | anthropic 3 | google-genai 4 | colorama 5 | tiktoken 6 | markdown -------------------------------------------------------------------------------- /install_jadx.ps1: -------------------------------------------------------------------------------- 1 | 2 | $jadxVersion = "1.5.1" 3 | $zipName = "jadx-$jadxVersion.zip" 4 | $downloadUrl = "https://github.com/skylot/jadx/releases/download/v$jadxVersion/$zipName" 5 | $installDir = "installed-tools" 6 | $zipPath = Join-Path $installDir $zipName 7 | 8 | if (-Not (Test-Path $installDir)) { 9 | New-Item -ItemType Directory -Path $installDir | Out-Null 10 | } 11 | 12 | Write-Host "[*] Downloading JADX $jadxVersion..." 13 | Invoke-WebRequest -Uri $downloadUrl -OutFile $zipPath 14 | 15 | Write-Host "[*] Unzipping..." 16 | Expand-Archive -LiteralPath $zipPath -DestinationPath $installDir -Force 17 | 18 | Write-Host "[+] JADX $jadxVersion installed in '$installDir'" 19 | -------------------------------------------------------------------------------- /install_jadx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | JADX_VERSION="1.5.1" 4 | JADX_ZIP="jadx-${JADX_VERSION}.zip" 5 | JADX_URL="https://github.com/skylot/jadx/releases/download/v${JADX_VERSION}/${JADX_ZIP}" 6 | INSTALL_DIR="installed-tools" 7 | DOWNLOAD_PATH="${INSTALL_DIR}/${JADX_ZIP}" 8 | 9 | mkdir -p "$INSTALL_DIR" 10 | 11 | echo "[*] Downloading JADX $JADX_VERSION..." 12 | if command -v curl >/dev/null 2>&1; then 13 | curl -L "$JADX_URL" -o "$DOWNLOAD_PATH" 14 | elif command -v wget >/dev/null 2>&1; then 15 | wget -O "$DOWNLOAD_PATH" "$JADX_URL" 16 | else 17 | echo "[!] Neither curl nor wget is installed. Aborting." 18 | exit 1 19 | fi 20 | 21 | echo "[*] Unzipping..." 22 | unzip -q "$DOWNLOAD_PATH" -d "$INSTALL_DIR" 23 | 24 | echo "[+] JADX $JADX_VERSION installed in '$INSTALL_DIR'" 25 | -------------------------------------------------------------------------------- /models/genai_model.py: -------------------------------------------------------------------------------- 1 | from google import genai 2 | from google.genai import types 3 | from config import * 4 | 5 | 6 | 7 | def scan_code(API, MODEL, instruction, code): 8 | result = "" 9 | count = 1 10 | client = genai.Client(api_key=API,) 11 | 12 | model = MODEL 13 | contents = [ 14 | types.Content( 15 | role="user", 16 | parts=[ 17 | types.Part.from_text(text=code), 18 | ], 19 | ), 20 | ] 21 | tools = [ 22 | types.Tool(google_search=types.GoogleSearch()) 23 | ] 24 | generate_content_config = types.GenerateContentConfig( 25 | tools=tools, 26 | response_mime_type="text/plain", 27 | system_instruction=[ 28 | types.Part.from_text(text=instruction), 29 | ], 30 | ) 31 | 32 | for chunk in client.models.generate_content_stream( 33 | model=model, 34 | contents=contents, 35 | config=generate_content_config, 36 | ): 37 | result += chunk.text 38 | count += 1 39 | return result 40 | -------------------------------------------------------------------------------- /utils/extract_apk_helpers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import platform 4 | import sys 5 | 6 | def extract_apk_with_jadx(apk_path, output_dir, target_package): 7 | if not os.path.isfile(apk_path): 8 | raise FileNotFoundError(f"APK file not found: {apk_path}") 9 | 10 | if not os.path.exists(output_dir): 11 | os.makedirs(output_dir) 12 | 13 | print(f"Decompiling APK: {apk_path} to {output_dir}") 14 | 15 | try: 16 | jadx_executable = "jadx.bat" if platform.system() == "Windows" else "jadx" 17 | jadx_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 18 | "installed-tools","jadx", "bin", jadx_executable) 19 | 20 | if not os.path.exists(jadx_path): 21 | raise FileNotFoundError(f"JADX not found at: {jadx_path}") 22 | 23 | command = [jadx_path, "-d", output_dir, apk_path] 24 | subprocess.run(command, check=True) 25 | print(f"Decompilation complete. Files saved to: {output_dir}") 26 | except subprocess.CalledProcessError as e: 27 | print(f"Error during decompilation: {e}") 28 | sys.exit(1) 29 | 30 | manifest_path = os.path.join(output_dir, "resources", "AndroidManifest.xml") 31 | strings_path = os.path.join(output_dir, "resources" ,"res", "values", "strings.xml") 32 | 33 | target_classes = [] 34 | for root, _, files in os.walk(output_dir): 35 | for file in files: 36 | if file.endswith(".java"): 37 | package_path = target_package.replace(".", os.sep) 38 | if package_path in os.path.normpath(root): 39 | target_classes.append(os.path.join(root, file)) 40 | 41 | return manifest_path, strings_path, target_classes 42 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # Instruction for the code scanner 2 | instruction = """ 3 | You are a static analysis tool designed to perform a security review of Android application source code. You will analyze the following files: 4 | 5 | 1. Java files (.java) – Review all Java files for security vulnerabilities and weaknesses. 6 | 2. strings.xml – Review the XML file for hardcoded sensitive data, insecure configurations, and improper encoding. 7 | 3. AndroidManifest.xml – Analyze for improper permissions, exposed components, and security misconfigurations. 8 | 4. Once the analysis is complete, respond with "✅ All code scanned. Coded by @X-Vector" 9 | 10 | 11 | Your goal is to identify security flaws in the Android code and provide: 12 | 1. A complete list of all vulnerabilities found. 13 | 2. A clear explanation of each vulnerability. 14 | 3. The CWE ID associated with the issue (e.g., CWE-798 for Hardcoded Credentials). 15 | 4. A severity rating (Low, Medium, High, Critical). 16 | 5. A CVSS Score 3.1 Rating. 17 | 6. The function name and line number where the issue occurs (do not include the full affected code). 18 | 7. A recommended fix or mitigation approach. 19 | 8. URL Reference for the vulnerability (e.g., OWASP, CWE). 20 | 9. Respond with all vulnerabilities in one go (even if it spans multiple messages) and do not ask for input to proceed. 21 | 10. add line between each vulnerability 22 | 23 | Focus on common issues such as: 24 | - Insecure Data Storage (e.g., hardcoded secrets or sensitive information) 25 | - Input Validation & Output Encoding (e.g., improper sanitization) 26 | - All types of Injection (e.g., SQL Injection, XSS, Command Injection) 27 | - Insecure Communication (e.g., unencrypted network traffic) 28 | - Insecure Deserialization 29 | - Insecure Cryptography (e.g., weak encryption methods) 30 | - Improper Permissions (e.g., excessive permissions in `AndroidManifest.xml`) 31 | - Unsafe File Handling or Permissions 32 | - Unsafe WebViews (e.g., unsanitized URLs or JavaScript injection) 33 | 34 | Your output should be structured in Markdown format, with each issue clearly listed and easily understood by developers. Include code snippets, CWE references, and recommendations for fixes. 35 | 36 | If no vulnerability is found, clearly state that. Only focus on security, not code style or performance. 37 | """ 38 | 39 | # API keys for different models 40 | # https://aistudio.google.com/app/apikey 41 | api_keys = { 42 | "GENEAI": "" 43 | } 44 | 45 | # Available models for each key 46 | Models = { 47 | "GENEAI": { 48 | "gemini-2.0-flash": "gemini-2.0-flash" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /utils/html_helpers.py: -------------------------------------------------------------------------------- 1 | import os 2 | from colorama import init, Fore, Style 3 | 4 | # Initialize colorama 5 | init(autoreset=True) 6 | 7 | def generate_index_html(output_dir): 8 | """ 9 | Generates an index.html file that links to all the HTML reports in the output directory 10 | with a table structure to display the folder and file hierarchy. 11 | """ 12 | # Create an index.html file 13 | index_file_path = os.path.join(output_dir, "index.html") 14 | with open(index_file_path, "w", encoding="utf-8") as index_file: 15 | # Start the HTML structure 16 | index_file.write(""" 17 | 18 | 19 | Code Scan Reports 20 | 38 | 39 | 40 |

Code Scan Reports

41 | 42 | 43 | 44 | 45 | 46 | 47 | """) 48 | 49 | # Walk through the output directory and create a table row for each HTML file 50 | for root, _, files in os.walk(output_dir): 51 | # Skip the root directory itself 52 | if root == output_dir: 53 | continue 54 | 55 | for file in files: 56 | if file.endswith(".html"): 57 | # Get the folder and file name 58 | folder = os.path.relpath(root, output_dir) 59 | file_name = file 60 | 61 | # Create the link to the HTML file (make filename a clickable link) 62 | file_link = os.path.join(folder, file_name) 63 | 64 | # Write the row to the index file 65 | index_file.write(f""" 66 | 67 | 68 | 69 | 70 | 71 | """) 72 | 73 | # Close the table and HTML structure 74 | index_file.write(""" 75 |
FolderFileLink
{folder}{file_name}{file_name}
76 | 77 | 78 | """) 79 | 80 | print(f"{Fore.GREEN}Index file created at {index_file_path}{Style.RESET_ALL}") 81 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import markdown 5 | from termcolor import colored 6 | from concurrent.futures import ThreadPoolExecutor, as_completed 7 | from config import api_keys, Models, instruction 8 | from models.genai_model import scan_code 9 | from utils.html_helpers import generate_index_html 10 | from utils.extract_apk_helpers import extract_apk_with_jadx 11 | 12 | 13 | def get_available_models(): 14 | return ', '.join(api_keys.keys()) 15 | 16 | 17 | def process_file(file_path, code_content, model_key, model_variant, input_dir, output_dir): 18 | print(colored(f"[+] Scanning {file_path} with model {model_key}...", "cyan")) 19 | 20 | result = scan_code( 21 | api_keys[model_key], 22 | model_variant, 23 | instruction, 24 | code_content 25 | ) 26 | 27 | html_result = markdown.markdown(result) 28 | 29 | relative_path = os.path.relpath(file_path, input_dir) 30 | output_file_dir = os.path.join(output_dir, os.path.dirname(relative_path)) 31 | 32 | os.makedirs(output_file_dir, exist_ok=True) 33 | 34 | markdown_filename = f"{os.path.splitext(os.path.basename(file_path))[0]}.md" 35 | html_filename = f"{os.path.splitext(os.path.basename(file_path))[0]}.html" 36 | 37 | markdown_path = os.path.join(output_file_dir, markdown_filename) 38 | html_path = os.path.join(output_file_dir, html_filename) 39 | 40 | with open(markdown_path, "w", encoding="utf-8") as markdown_file: 41 | markdown_file.write(result) 42 | 43 | with open(html_path, "w", encoding="utf-8") as html_file: 44 | html_file.write(html_result) 45 | 46 | print(colored(f"[✓] Report saved to {markdown_path} and {html_path}", "green")) 47 | return file_path, markdown_path, html_path 48 | 49 | 50 | def process_and_generate_reports(all_pathes, model_key, model_variant, input_dir, output_dir, num_threads): 51 | files_to_process = [] 52 | for path in all_pathes: 53 | if os.path.exists(path): 54 | files_to_process.append((path, open(path, encoding="utf-8").read())) 55 | 56 | with ThreadPoolExecutor(max_workers=num_threads) as executor: 57 | futures = [] 58 | for file_path, code_content in files_to_process: 59 | futures.append(executor.submit(process_file, file_path, code_content, model_key, model_variant, input_dir, output_dir)) 60 | 61 | for future in as_completed(futures): 62 | file_path, markdown_path, html_path = future.result() 63 | 64 | generate_index_html(output_dir) 65 | print(colored(f"[✓] Index file created at {os.path.join(output_dir, 'index.html')}", "green")) 66 | 67 | 68 | def main(): 69 | parser = argparse.ArgumentParser(description="AI Code Scanner") 70 | parser.add_argument("--apk-path", required=True, help="Path to the APK file") 71 | parser.add_argument("--out-dir", required=True, help="Directory to save the decompiled files") 72 | parser.add_argument("--target-package", required=True, help="Target package name to find classes (e.g., 'jakhar.aseem.diva')") 73 | parser.add_argument("--model-name", required=True, help="Model key (e.g., GENEAI, OPENAI).") 74 | parser.add_argument("--report", required=True, help="Directory to save the reports.") 75 | parser.add_argument("--threads", type=int, default=1, choices=range(1, 11), help="Number of threads to use for scanning files (default: 1, max: 10).") 76 | 77 | args = parser.parse_args() 78 | 79 | input_dir = args.out_dir 80 | output_dir = args.report 81 | model_key = args.model_name.upper() 82 | num_threads = args.threads 83 | 84 | if model_key not in api_keys or model_key not in Models: 85 | print(colored(f"[!] Invalid model key: {model_key}", "red")) 86 | print(colored(f"[!] Available model keys are: {get_available_models()}", "yellow")) 87 | sys.exit(1) 88 | 89 | os.makedirs(output_dir, exist_ok=True) 90 | 91 | model_variant = list(Models[model_key].values())[0] 92 | 93 | manifest_path, strings_path, target_classes = extract_apk_with_jadx(args.apk_path, args.out_dir, args.target_package) 94 | all_pathes = [] 95 | all_pathes.append(manifest_path) 96 | all_pathes.append(strings_path) 97 | for target_class in target_classes: 98 | all_pathes.append(target_class) 99 | process_and_generate_reports(all_pathes, model_key, model_variant, input_dir, output_dir, num_threads) 100 | 101 | 102 | if __name__ == "__main__": 103 | main() 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Android APK Scanner with AI 2 | 3 | A powerful static analysis tool that uses AI to scan Android APK files for security vulnerabilities and potential issues. The scanner decompiles APK files and analyzes the source code, manifest, and resource files using AI models. 4 | 5 | ## Table of Contents 6 | - [Features](#features) 7 | - [Prerequisites](#prerequisites) 8 | - [Installation](#installation) 9 | - [Usage](#usage) 10 | - [Project Structure](#project-structure) 11 | - [Configuration](#configuration) 12 | - [Output Format](#output-format) 13 | - [Supported AI Models](#supported-ai-models) 14 | - [Security Analysis Coverage](#security-analysis-coverage) 15 | - [Error Handling](#error-handling) 16 | - [Performance](#performance) 17 | - [Contributing](#contributing) 18 | - [License](#license) 19 | - [Acknowledgments](#acknowledgments) 20 | 21 | 22 | 23 | ## Features 24 | 25 | - APK decompilation using JADX 26 | - Multi-threaded file analysis 27 | - Support for multiple AI models (currently supports GEMENAI) 28 | - Comprehensive security vulnerability scanning 29 | - HTML and Markdown report generation 30 | - Cross-platform compatibility (Windows/Linux) 31 | - Interactive report browsing through generated index.html 32 | 33 | 34 | ## Prerequisites 35 | 36 | - Python 3.8 or higher 37 | - JADX decompiler 38 | - Required Python packages (see requirements.txt) 39 | - Valid API key for supported AI models 40 | 41 | ## Installation 42 | 43 | 1. Clone the repository: 44 | ```bash 45 | git clone 46 | cd android-scanner-ai 47 | ``` 48 | 49 | 2. Install required Python packages: 50 | ```bash 51 | pip install -r requirements.txt 52 | ``` 53 | 54 | 3. Set up JADX: 55 | - Windows: Ensure jadx.bat is in the jadx/bin directory 56 | - Linux: Ensure jadx is executable in the jadx/bin directory 57 | ```bash 58 | chmod +x jadx/bin/jadx 59 | ``` 60 | 61 | 4. Configure API Keys in config.py: 62 | - Obtain your API key from [Google AI](https://aistudio.google.com/app/apikey) Studio and add it to the `config.py` file 63 | 64 | 5. Configure Models in config.py: 65 | - Set up your Gemini Model by referring to the [Gemini models](https://ai.google.dev/gemini-api/docs/models) and configure it in the `config.py` file. 66 | 67 | 68 | ## Usage 69 | 70 | Run the scanner using the following command: 71 | 72 | ```bash 73 | python run.py --apk-path \ 74 | --out-dir \ 75 | --target-package \ 76 | --model-name \ 77 | --report \ 78 | --threads 79 | ``` 80 | 81 | ### Arguments 82 | 83 | - `--apk-path`: Path to the APK file to analyze 84 | - `--out-dir`: Directory where decompiled files will be saved 85 | - `--target-package`: Package name to analyze (e.g., 'com.example.app') 86 | - `--model-name`: AI model to use (e.g., 'GENEAI', 'OPENAI') 87 | - `--report`: Directory where analysis reports will be saved 88 | - `--threads`: Number of concurrent analysis threads (1-10, default: 1) 89 | 90 | ## Project Structure 91 | 92 | ``` 93 | android-scanner-ai/ 94 | ├── run.py # Main entry point 95 | ├── config.py # Configuration and API keys 96 | ├── requirements.txt # Python dependencies 97 | ├── jadx/ # JADX decompiler 98 | │ └── bin/ 99 | │ ├── jadx # Linux executable 100 | │ └── jadx.bat # Windows executable 101 | ├── models/ 102 | │ └── genai_model.py # AI model integration 103 | └── utils/ 104 | ├── extract_apk_helpers.py # APK extraction utilities 105 | └── html_helpers.py # Report generation utilities 106 | ``` 107 | 108 | ## Configuration 109 | 110 | Edit `config.py` to configure: 111 | 112 | 1. AI Model API Keys: 113 | ```python 114 | api_keys = { 115 | "GENEAI": "your-api-key-here" 116 | } 117 | ``` 118 | 119 | 2. Available Models: 120 | ```python 121 | Models = { 122 | "GENEAI": { 123 | "gemini-2.0-flash": "gemini-2.0-flash" 124 | } 125 | } 126 | ``` 127 | 128 | 3. Analysis Instructions and Rules 129 | 130 | ## Output Format 131 | 132 | The scanner generates two types of reports for each analyzed file: 133 | 134 | 1. Markdown Report (.md): 135 | ```markdown 136 | ## Vulnerability: [Title] 137 | **Severity**: [Low/Medium/High] 138 | **CWE ID**: [ID] 139 | **Description**: [Details] 140 | **Code Example**: [Relevant Code] 141 | **Recommendation**: [Fix Suggestions] 142 | **References**: [Links] 143 | ``` 144 | 145 | 2. HTML Report: 146 | - Interactive web-based report 147 | - Organized by package structure 148 | - Linked through index.html 149 | 150 | ## Supported AI Models 151 | 152 | Currently supported AI models: 153 | - Google Gemini AI (GENEAI) 154 | - Model: gemini-2.0-flash 155 | 156 | ## Security Analysis Coverage 157 | 158 | The scanner analyzes: 159 | 160 | 1. Java Source Files: 161 | - Security vulnerabilities 162 | - Code weaknesses 163 | - Best practice violations 164 | 165 | 2. AndroidManifest.xml: 166 | - Permission issues 167 | - Component exposure 168 | - Security configurations 169 | 170 | 3. strings.xml: 171 | - Hardcoded credentials 172 | - Sensitive data 173 | - Configuration issues 174 | 175 | ## Error Handling 176 | 177 | The scanner includes robust error handling for: 178 | - Invalid APK files 179 | - Missing JADX executable 180 | - AI model errors 181 | - File system issues 182 | - Threading problems 183 | 184 | ## Performance 185 | 186 | - Multi-threaded analysis (up to 10 threads) 187 | - Concurrent file processing 188 | - Efficient report generation 189 | - Optimized for large APKs 190 | 191 | 192 | ## Acknowledgments 193 | 194 | - JADX decompiler 195 | - Google Gemini AI 196 | --------------------------------------------------------------------------------