├── requirements.txt
├── install_jadx.ps1
├── install_jadx.sh
├── models
    └── genai_model.py
├── utils
    ├── extract_apk_helpers.py
    └── html_helpers.py
├── config.py
├── run.py
└── README.md


/requirements.txt:
--------------------------------------------------------------------------------
1 | openai
2 | anthropic
3 | google-genai
4 | colorama
5 | tiktoken
6 | markdown


--------------------------------------------------------------------------------
/install_jadx.ps1:
--------------------------------------------------------------------------------
 1 | 
 2 | $jadxVersion = "1.5.1"
 3 | $zipName = "jadx-$jadxVersion.zip"
 4 | $downloadUrl = "https://github.com/skylot/jadx/releases/download/v$jadxVersion/$zipName"
 5 | $installDir = "installed-tools"
 6 | $zipPath = Join-Path $installDir $zipName
 7 | 
 8 | if (-Not (Test-Path $installDir)) {
 9 |     New-Item -ItemType Directory -Path $installDir | Out-Null
10 | }
11 | 
12 | Write-Host "[*] Downloading JADX $jadxVersion..."
13 | Invoke-WebRequest -Uri $downloadUrl -OutFile $zipPath
14 | 
15 | Write-Host "[*] Unzipping..."
16 | Expand-Archive -LiteralPath $zipPath -DestinationPath $installDir -Force
17 | 
18 | Write-Host "[+] JADX $jadxVersion installed in '$installDir'"
19 | 


--------------------------------------------------------------------------------
/install_jadx.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | JADX_VERSION="1.5.1"
 4 | JADX_ZIP="jadx-${JADX_VERSION}.zip"
 5 | JADX_URL="https://github.com/skylot/jadx/releases/download/v${JADX_VERSION}/${JADX_ZIP}"
 6 | INSTALL_DIR="installed-tools"
 7 | DOWNLOAD_PATH="${INSTALL_DIR}/${JADX_ZIP}"
 8 | 
 9 | mkdir -p "$INSTALL_DIR"
10 | 
11 | echo "[*] Downloading JADX $JADX_VERSION..."
12 | if command -v curl >/dev/null 2>&1; then
13 |   curl -L "$JADX_URL" -o "$DOWNLOAD_PATH"
14 | elif command -v wget >/dev/null 2>&1; then
15 |   wget -O "$DOWNLOAD_PATH" "$JADX_URL"
16 | else
17 |   echo "[!] Neither curl nor wget is installed. Aborting."
18 |   exit 1
19 | fi
20 | 
21 | echo "[*] Unzipping..."
22 | unzip -q "$DOWNLOAD_PATH" -d "$INSTALL_DIR"
23 | 
24 | echo "[+] JADX $JADX_VERSION installed in '$INSTALL_DIR'"
25 | 


--------------------------------------------------------------------------------
/models/genai_model.py:
--------------------------------------------------------------------------------
 1 | from google import genai
 2 | from google.genai import types
 3 | from config import *
 4 | 
 5 | 
 6 | 
 7 | def scan_code(API, MODEL, instruction, code):
 8 |     result = ""
 9 |     count = 1
10 |     client = genai.Client(api_key=API,)
11 | 
12 |     model = MODEL
13 |     contents = [
14 |         types.Content(
15 |             role="user",
16 |             parts=[
17 |                 types.Part.from_text(text=code),
18 |             ],
19 |         ),
20 |     ]
21 |     tools = [
22 |         types.Tool(google_search=types.GoogleSearch())
23 |     ]
24 |     generate_content_config = types.GenerateContentConfig(
25 |         tools=tools,
26 |         response_mime_type="text/plain",
27 |         system_instruction=[
28 |             types.Part.from_text(text=instruction),
29 |         ],
30 |     )
31 | 
32 |     for chunk in client.models.generate_content_stream(
33 |         model=model,
34 |         contents=contents,
35 |         config=generate_content_config,
36 |     ):
37 |         result += chunk.text
38 |         count += 1
39 |     return result
40 | 


--------------------------------------------------------------------------------
/utils/extract_apk_helpers.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | import platform
 4 | import sys
 5 | 
 6 | def extract_apk_with_jadx(apk_path, output_dir, target_package):
 7 |     if not os.path.isfile(apk_path):
 8 |         raise FileNotFoundError(f"APK file not found: {apk_path}")
 9 |     
10 |     if not os.path.exists(output_dir):
11 |         os.makedirs(output_dir)
12 | 
13 |     print(f"Decompiling APK: {apk_path} to {output_dir}")
14 |     
15 |     try:
16 |         jadx_executable = "jadx.bat" if platform.system() == "Windows" else "jadx"
17 |         jadx_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 
18 |                                 "installed-tools","jadx", "bin", jadx_executable)
19 |         
20 |         if not os.path.exists(jadx_path):
21 |             raise FileNotFoundError(f"JADX not found at: {jadx_path}")
22 |             
23 |         command = [jadx_path, "-d", output_dir, apk_path]        
24 |         subprocess.run(command, check=True)
25 |         print(f"Decompilation complete. Files saved to: {output_dir}")
26 |     except subprocess.CalledProcessError as e:
27 |         print(f"Error during decompilation: {e}")
28 |         sys.exit(1)
29 |     
30 |     manifest_path = os.path.join(output_dir, "resources", "AndroidManifest.xml")
31 |     strings_path = os.path.join(output_dir, "resources" ,"res", "values", "strings.xml")
32 |     
33 |     target_classes = []
34 |     for root, _, files in os.walk(output_dir):
35 |         for file in files:
36 |             if file.endswith(".java"):
37 |                 package_path = target_package.replace(".", os.sep)
38 |                 if package_path in os.path.normpath(root):
39 |                     target_classes.append(os.path.join(root, file))
40 |     
41 |     return manifest_path, strings_path, target_classes
42 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | # Instruction for the code scanner
 2 | instruction = """
 3 | You are a static analysis tool designed to perform a security review of Android application source code. You will analyze the following files:
 4 | 
 5 | 1. Java files (.java) – Review all Java files for security vulnerabilities and weaknesses.
 6 | 2. strings.xml – Review the XML file for hardcoded sensitive data, insecure configurations, and improper encoding.
 7 | 3. AndroidManifest.xml – Analyze for improper permissions, exposed components, and security misconfigurations.
 8 | 4. Once the analysis is complete, respond with "✅ All code scanned. Coded by @X-Vector"
 9 | 
10 | 
11 | Your goal is to identify security flaws in the Android code and provide:
12 | 1. A complete list of all vulnerabilities found.
13 | 2. A clear explanation of each vulnerability.
14 | 3. The CWE ID associated with the issue (e.g., CWE-798 for Hardcoded Credentials).
15 | 4. A severity rating (Low, Medium, High, Critical).
16 | 5. A CVSS Score 3.1 Rating.
17 | 6. The function name and line number where the issue occurs (do not include the full affected code).
18 | 7. A recommended fix or mitigation approach.
19 | 8. URL Reference for the vulnerability (e.g., OWASP, CWE).
20 | 9. Respond with all vulnerabilities in one go (even if it spans multiple messages) and do not ask for input to proceed.
21 | 10. add line between each vulnerability
22 | 
23 | Focus on common issues such as:
24 | - Insecure Data Storage (e.g., hardcoded secrets or sensitive information)
25 | - Input Validation & Output Encoding (e.g., improper sanitization)
26 | - All types of Injection (e.g., SQL Injection, XSS, Command Injection)
27 | - Insecure Communication (e.g., unencrypted network traffic)
28 | - Insecure Deserialization
29 | - Insecure Cryptography (e.g., weak encryption methods)
30 | - Improper Permissions (e.g., excessive permissions in `AndroidManifest.xml`)
31 | - Unsafe File Handling or Permissions
32 | - Unsafe WebViews (e.g., unsanitized URLs or JavaScript injection)
33 | 
34 | Your output should be structured in Markdown format, with each issue clearly listed and easily understood by developers. Include code snippets, CWE references, and recommendations for fixes.
35 | 
36 | If no vulnerability is found, clearly state that. Only focus on security, not code style or performance.
37 | """
38 | 
39 | # API keys for different models
40 | # https://aistudio.google.com/app/apikey
41 | api_keys = {
42 |     "GENEAI": "<your_api_key>"
43 | }
44 | 
45 | # Available models for each key
46 | Models = {
47 |     "GENEAI": {
48 |         "gemini-2.0-flash": "gemini-2.0-flash"
49 |     }
50 | }
51 | 


--------------------------------------------------------------------------------
/utils/html_helpers.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from colorama import init, Fore, Style
 3 | 
 4 | # Initialize colorama
 5 | init(autoreset=True)
 6 | 
 7 | def generate_index_html(output_dir):
 8 |     """
 9 |     Generates an index.html file that links to all the HTML reports in the output directory
10 |     with a table structure to display the folder and file hierarchy.
11 |     """
12 |     # Create an index.html file
13 |     index_file_path = os.path.join(output_dir, "index.html")
14 |     with open(index_file_path, "w", encoding="utf-8") as index_file:
15 |         # Start the HTML structure
16 |         index_file.write("""
17 |         <html>
18 |         <head>
19 |             <title>Code Scan Reports</title>
20 |             <style>
21 |                 table {
22 |                     width: 100%;
23 |                     border-collapse: collapse;
24 |                 }
25 |                 th, td {
26 |                     padding: 8px;
27 |                     text-align: left;
28 |                     border: 1px solid #ddd;
29 |                 }
30 |                 th {
31 |                     background-color: #f2f2f2;
32 |                 }
33 |                 h1 {
34 |                     text-align: center;
35 |                     color: #333;
36 |                 }
37 |             </style>
38 |         </head>
39 |         <body>
40 |             <h1>Code Scan Reports</h1>
41 |             <table>
42 |                 <tr>
43 |                     <th>Folder</th>
44 |                     <th>File</th>
45 |                     <th>Link</th>
46 |                 </tr>
47 |         """)
48 | 
49 |         # Walk through the output directory and create a table row for each HTML file
50 |         for root, _, files in os.walk(output_dir):
51 |             # Skip the root directory itself
52 |             if root == output_dir:
53 |                 continue
54 | 
55 |             for file in files:
56 |                 if file.endswith(".html"):
57 |                     # Get the folder and file name
58 |                     folder = os.path.relpath(root, output_dir)
59 |                     file_name = file
60 | 
61 |                     # Create the link to the HTML file (make filename a clickable link)
62 |                     file_link = os.path.join(folder, file_name)
63 | 
64 |                     # Write the row to the index file
65 |                     index_file.write(f"""
66 |                     <tr>
67 |                         <td>{folder}</td>
68 |                         <td>{file_name}</td>
69 |                         <td><a href="{file_link}">{file_name}</a></td>
70 |                     </tr>
71 |                     """)
72 | 
73 |         # Close the table and HTML structure
74 |         index_file.write("""
75 |             </table>
76 |         </body>
77 |         </html>
78 |         """)
79 | 
80 |     print(f"{Fore.GREEN}Index file created at {index_file_path}{Style.RESET_ALL}")
81 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import argparse
  4 | import markdown
  5 | from termcolor import colored
  6 | from concurrent.futures import ThreadPoolExecutor, as_completed
  7 | from config import api_keys, Models, instruction
  8 | from models.genai_model import scan_code
  9 | from utils.html_helpers import generate_index_html
 10 | from utils.extract_apk_helpers import extract_apk_with_jadx
 11 | 
 12 | 
 13 | def get_available_models():
 14 |     return ', '.join(api_keys.keys())
 15 | 
 16 | 
 17 | def process_file(file_path, code_content, model_key, model_variant, input_dir, output_dir):
 18 |     print(colored(f"[+] Scanning {file_path} with model {model_key}...", "cyan"))
 19 | 
 20 |     result = scan_code(
 21 |         api_keys[model_key],
 22 |         model_variant,
 23 |         instruction,
 24 |         code_content
 25 |     )
 26 | 
 27 |     html_result = markdown.markdown(result)
 28 | 
 29 |     relative_path = os.path.relpath(file_path, input_dir)
 30 |     output_file_dir = os.path.join(output_dir, os.path.dirname(relative_path))
 31 | 
 32 |     os.makedirs(output_file_dir, exist_ok=True)
 33 | 
 34 |     markdown_filename = f"{os.path.splitext(os.path.basename(file_path))[0]}.md"
 35 |     html_filename = f"{os.path.splitext(os.path.basename(file_path))[0]}.html"
 36 | 
 37 |     markdown_path = os.path.join(output_file_dir, markdown_filename)
 38 |     html_path = os.path.join(output_file_dir, html_filename)
 39 | 
 40 |     with open(markdown_path, "w", encoding="utf-8") as markdown_file:
 41 |         markdown_file.write(result)
 42 | 
 43 |     with open(html_path, "w", encoding="utf-8") as html_file:
 44 |         html_file.write(html_result)
 45 | 
 46 |     print(colored(f"[✓] Report saved to {markdown_path} and {html_path}", "green"))
 47 |     return file_path, markdown_path, html_path
 48 | 
 49 | 
 50 | def process_and_generate_reports(all_pathes, model_key, model_variant, input_dir, output_dir, num_threads):
 51 |     files_to_process = []
 52 |     for path in all_pathes:
 53 |         if os.path.exists(path):
 54 |             files_to_process.append((path, open(path, encoding="utf-8").read()))
 55 |     
 56 |     with ThreadPoolExecutor(max_workers=num_threads) as executor:
 57 |         futures = []
 58 |         for file_path, code_content in files_to_process:
 59 |             futures.append(executor.submit(process_file, file_path, code_content, model_key, model_variant, input_dir, output_dir))
 60 | 
 61 |         for future in as_completed(futures):
 62 |             file_path, markdown_path, html_path = future.result()
 63 | 
 64 |     generate_index_html(output_dir)
 65 |     print(colored(f"[✓] Index file created at {os.path.join(output_dir, 'index.html')}", "green"))
 66 | 
 67 | 
 68 | def main():
 69 |     parser = argparse.ArgumentParser(description="AI Code Scanner")
 70 |     parser.add_argument("--apk-path", required=True, help="Path to the APK file")
 71 |     parser.add_argument("--out-dir", required=True, help="Directory to save the decompiled files")
 72 |     parser.add_argument("--target-package", required=True, help="Target package name to find classes (e.g., 'jakhar.aseem.diva')")
 73 |     parser.add_argument("--model-name", required=True, help="Model key (e.g., GENEAI, OPENAI).")
 74 |     parser.add_argument("--report", required=True, help="Directory to save the reports.")
 75 |     parser.add_argument("--threads", type=int, default=1, choices=range(1, 11), help="Number of threads to use for scanning files (default: 1, max: 10).")
 76 | 
 77 |     args = parser.parse_args()
 78 | 
 79 |     input_dir = args.out_dir
 80 |     output_dir = args.report
 81 |     model_key = args.model_name.upper()
 82 |     num_threads = args.threads
 83 | 
 84 |     if model_key not in api_keys or model_key not in Models:
 85 |         print(colored(f"[!] Invalid model key: {model_key}", "red"))
 86 |         print(colored(f"[!] Available model keys are: {get_available_models()}", "yellow"))
 87 |         sys.exit(1)
 88 | 
 89 |     os.makedirs(output_dir, exist_ok=True)
 90 | 
 91 |     model_variant = list(Models[model_key].values())[0]
 92 | 
 93 |     manifest_path, strings_path, target_classes = extract_apk_with_jadx(args.apk_path, args.out_dir, args.target_package)
 94 |     all_pathes = []
 95 |     all_pathes.append(manifest_path)
 96 |     all_pathes.append(strings_path)
 97 |     for target_class in target_classes:
 98 |         all_pathes.append(target_class)
 99 |     process_and_generate_reports(all_pathes, model_key, model_variant, input_dir, output_dir, num_threads)
100 | 
101 | 
102 | if __name__ == "__main__":
103 |     main()
104 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Android APK Scanner with AI
  2 | 
  3 | A powerful static analysis tool that uses AI to scan Android APK files for security vulnerabilities and potential issues. The scanner decompiles APK files and analyzes the source code, manifest, and resource files using AI models.
  4 | 
  5 | ## Table of Contents
  6 | - [Features](#features)
  7 | - [Prerequisites](#prerequisites)
  8 | - [Installation](#installation)
  9 | - [Usage](#usage)
 10 | - [Project Structure](#project-structure)
 11 | - [Configuration](#configuration)
 12 | - [Output Format](#output-format)
 13 | - [Supported AI Models](#supported-ai-models)
 14 | - [Security Analysis Coverage](#security-analysis-coverage)
 15 | - [Error Handling](#error-handling)
 16 | - [Performance](#performance)
 17 | - [Contributing](#contributing)
 18 | - [License](#license)
 19 | - [Acknowledgments](#acknowledgments)
 20 | 
 21 | 
 22 | 
 23 | ## Features
 24 | 
 25 | - APK decompilation using JADX
 26 | - Multi-threaded file analysis
 27 | - Support for multiple AI models (currently supports GEMENAI)
 28 | - Comprehensive security vulnerability scanning
 29 | - HTML and Markdown report generation
 30 | - Cross-platform compatibility (Windows/Linux)
 31 | - Interactive report browsing through generated index.html
 32 | 
 33 | 
 34 | ## Prerequisites
 35 | 
 36 | - Python 3.8 or higher
 37 | - JADX decompiler
 38 | - Required Python packages (see requirements.txt)
 39 | - Valid API key for supported AI models
 40 | 
 41 | ## Installation
 42 | 
 43 | 1. Clone the repository:
 44 | ```bash
 45 | git clone <repository-url>
 46 | cd android-scanner-ai
 47 | ```
 48 | 
 49 | 2. Install required Python packages:
 50 | ```bash
 51 | pip install -r requirements.txt
 52 | ```
 53 | 
 54 | 3. Set up JADX:
 55 |    - Windows: Ensure jadx.bat is in the jadx/bin directory
 56 |    - Linux: Ensure jadx is executable in the jadx/bin directory
 57 |    ```bash
 58 |    chmod +x jadx/bin/jadx
 59 |    ```
 60 | 
 61 | 4. Configure API Keys in config.py:
 62 |    - Obtain your API key from [Google AI](https://aistudio.google.com/app/apikey) Studio and add it to the `config.py` file
 63 | 
 64 | 5. Configure Models in config.py:
 65 |    - Set up your Gemini Model by referring to the [Gemini models](https://ai.google.dev/gemini-api/docs/models) and configure it in the `config.py` file.
 66 | 
 67 | 
 68 | ## Usage
 69 | 
 70 | Run the scanner using the following command:
 71 | 
 72 | ```bash
 73 | python run.py --apk-path <path_to_apk> \
 74 |               --out-dir <output_directory> \
 75 |               --target-package <package_name> \
 76 |               --model-name <AI_model> \
 77 |               --report <report_directory> \
 78 |               --threads <number_of_threads>
 79 | ```
 80 | 
 81 | ### Arguments
 82 | 
 83 | - `--apk-path`: Path to the APK file to analyze
 84 | - `--out-dir`: Directory where decompiled files will be saved
 85 | - `--target-package`: Package name to analyze (e.g., 'com.example.app')
 86 | - `--model-name`: AI model to use (e.g., 'GENEAI', 'OPENAI')
 87 | - `--report`: Directory where analysis reports will be saved
 88 | - `--threads`: Number of concurrent analysis threads (1-10, default: 1)
 89 | 
 90 | ## Project Structure
 91 | 
 92 | ```
 93 | android-scanner-ai/
 94 | ├── run.py # Main entry point
 95 | ├── config.py # Configuration and API keys
 96 | ├── requirements.txt # Python dependencies
 97 | ├── jadx/ # JADX decompiler
 98 | │ └── bin/
 99 | │ ├── jadx # Linux executable
100 | │ └── jadx.bat # Windows executable
101 | ├── models/
102 | │ └── genai_model.py # AI model integration
103 | └── utils/
104 | ├── extract_apk_helpers.py # APK extraction utilities
105 | └── html_helpers.py # Report generation utilities
106 | ```
107 | 
108 | ## Configuration
109 | 
110 | Edit `config.py` to configure:
111 | 
112 | 1. AI Model API Keys:
113 | ```python
114 | api_keys = {
115 |     "GENEAI": "your-api-key-here"
116 | }
117 | ```
118 | 
119 | 2. Available Models:
120 | ```python
121 | Models = {
122 |     "GENEAI": {
123 |         "gemini-2.0-flash": "gemini-2.0-flash"
124 |     }
125 | }
126 | ```
127 | 
128 | 3. Analysis Instructions and Rules
129 | 
130 | ## Output Format
131 | 
132 | The scanner generates two types of reports for each analyzed file:
133 | 
134 | 1. Markdown Report (.md):
135 | ```markdown
136 | ## Vulnerability: [Title]
137 | **Severity**: [Low/Medium/High]
138 | **CWE ID**: [ID]
139 | **Description**: [Details]
140 | **Code Example**: [Relevant Code]
141 | **Recommendation**: [Fix Suggestions]
142 | **References**: [Links]
143 | ```
144 | 
145 | 2. HTML Report:
146 | - Interactive web-based report
147 | - Organized by package structure
148 | - Linked through index.html
149 | 
150 | ## Supported AI Models
151 | 
152 | Currently supported AI models:
153 | - Google Gemini AI (GENEAI)
154 |   - Model: gemini-2.0-flash
155 | 
156 | ## Security Analysis Coverage
157 | 
158 | The scanner analyzes:
159 | 
160 | 1. Java Source Files:
161 |    - Security vulnerabilities
162 |    - Code weaknesses
163 |    - Best practice violations
164 | 
165 | 2. AndroidManifest.xml:
166 |    - Permission issues
167 |    - Component exposure
168 |    - Security configurations
169 | 
170 | 3. strings.xml:
171 |    - Hardcoded credentials
172 |    - Sensitive data
173 |    - Configuration issues
174 | 
175 | ## Error Handling
176 | 
177 | The scanner includes robust error handling for:
178 | - Invalid APK files
179 | - Missing JADX executable
180 | - AI model errors
181 | - File system issues
182 | - Threading problems
183 | 
184 | ## Performance
185 | 
186 | - Multi-threaded analysis (up to 10 threads)
187 | - Concurrent file processing
188 | - Efficient report generation
189 | - Optimized for large APKs
190 | 
191 | 
192 | ## Acknowledgments
193 | 
194 | - JADX decompiler
195 | - Google Gemini AI
196 | 


--------------------------------------------------------------------------------