├── .gitignore ├── Animation.gif ├── README.md ├── deeplinkparser.py └── html_report.png /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | scan_results/* -------------------------------------------------------------------------------- /Animation.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shapa7276/Android-Deeplink-Parser/df9ba50e0e971c1771b47a4993cff6801d037955/Animation.gif -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Android-Deeplink-Parser 2 | 3 | ## This Script to Parse the APK file to List the all deeplinks of the android application 4 | 5 | ``` 6 | usage: deeplinkparser.py [-h] -a APK [-f] [-o OUTPUT] 7 | 8 | Android APK Deeplink Scanner 9 | 10 | options: 11 | -h, --help show this help message and exit 12 | -a, --apk APK Path to the APK file to analyze 13 | -f, --force Force rescan even if results already exist 14 | -o, --output OUTPUT Custom output directory for results (default: scan_results) 15 | 16 | Examples: 17 | python script.py -a path/to/app.apk 18 | python script.py --apk app.apk 19 | python script.py -a app.apk --force # Force rescan even if results exist 20 | ``` 21 | # Output saved in html format 22 | ![Alt Text](html_report.png) 23 | 24 | ![Alt Text](Animation.gif) 25 | 26 | 27 | # Example 28 | 29 | ```XML 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | ``` 46 | 47 | # Output 48 | AndroidManifest.xml and Strings.xml will parsed and all deeplink will be resulted as shown below 49 | 50 | ```bash 51 | ------------------------------------com.lol.test.LaunchActivity---------------------------------------------- 52 | 53 | https://example.lol.com/test 54 | lol://lol 55 | word:// 56 | ``` 57 | 58 | # Installation 59 | ``` 60 | Install the apktool 61 | sudo apt-get install apktool 62 | git clone https://github.com/Shapa7276/Android-Deeplink-Parser.git 63 | Run below command with apk file as input 64 | python3 deeplinkparser.py -a facebook.apk 65 | ``` 66 | 67 | # Reference 68 | * https://developer.android.com/training/app-links/deep-linking 69 | -------------------------------------------------------------------------------- /deeplinkparser.py: -------------------------------------------------------------------------------- 1 | from xml.dom.minidom import parseString 2 | import xml.dom.minidom 3 | import os 4 | import subprocess 5 | import time 6 | import sys 7 | import json 8 | from datetime import datetime 9 | import argparse 10 | from pathlib import Path 11 | 12 | class DeeplinkScanner: 13 | def __init__(self, apk_path): 14 | self.apk_path = apk_path 15 | self.apk_name = os.path.basename(apk_path) 16 | self.out_dir = self.apk_name.rsplit(".", 1)[0].replace(' ', '_') 17 | self.results_dir = "scan_results" 18 | self.results_file = os.path.join(self.results_dir, f"{self.out_dir}_results.json") 19 | self.string_resources = {} # Cache for string resources 20 | self.string_resources_loaded = False 21 | self.resolution_stack = set() 22 | 23 | def should_scan(self): 24 | """Check if results already exist for this APK""" 25 | if not os.path.exists(self.results_dir): 26 | os.makedirs(self.results_dir) 27 | return not os.path.exists(self.results_file) 28 | 29 | def load_string_resources(self): 30 | """Load all string resources from various values directories""" 31 | if self.string_resources_loaded: 32 | return 33 | 34 | try: 35 | # Check main values directory and language-specific directories 36 | values_dirs = [] 37 | res_dir = os.path.join(self.out_dir, 'res') 38 | 39 | if os.path.exists(res_dir): 40 | for dir_name in os.listdir(res_dir): 41 | if dir_name.startswith('values'): 42 | values_dirs.append(os.path.join(res_dir, dir_name)) 43 | 44 | for values_dir in values_dirs: 45 | strings_path = os.path.join(values_dir, 'strings.xml') 46 | if os.path.exists(strings_path): 47 | self._parse_strings_file(strings_path) 48 | 49 | self.string_resources_loaded = True 50 | 51 | except Exception as e: 52 | print(f"Error loading string resources: {str(e)}") 53 | self.string_resources = {} 54 | 55 | def _parse_strings_file(self, file_path): 56 | """Parse a single strings.xml file""" 57 | try: 58 | with open(file_path, 'r', encoding='utf-8') as f: 59 | strdata = f.read() 60 | 61 | strdom = parseString(strdata) 62 | strings = strdom.getElementsByTagName('string') 63 | 64 | for string_elem in strings: 65 | try: 66 | # Get the name attribute 67 | if string_elem.hasAttribute("name"): 68 | name = string_elem.attributes["name"].value 69 | 70 | # Get the text content, handling CDATA and special characters 71 | text = '' 72 | for node in string_elem.childNodes: 73 | if node.nodeType in [node.TEXT_NODE, node.CDATA_SECTION_NODE]: 74 | text += node.data 75 | 76 | # Handle escaped characters 77 | text = text.strip() 78 | text = text.replace("\\'", "'") 79 | text = text.replace('\\"', '"') 80 | text = text.replace('\\n', '\n') 81 | 82 | # Store in cache 83 | self.string_resources[name] = text 84 | 85 | except Exception as e: 86 | print(f"Error parsing string element: {str(e)}") 87 | continue 88 | 89 | except Exception as e: 90 | print(f"Error parsing strings file {file_path}: {str(e)}") 91 | 92 | def resolve_string_reference(self, reference): 93 | """ 94 | Resolve a string reference, handling nested references 95 | 96 | Args: 97 | reference (str): String reference like '@string/some_name' 98 | 99 | Returns: 100 | str: Resolved string value or None if not found 101 | """ 102 | try: 103 | # If it's not a string reference, return as is 104 | if not reference.startswith('@string/'): 105 | return reference 106 | 107 | # Extract the resource name 108 | resource_name = reference.replace('@string/', '') 109 | 110 | # Check for circular references 111 | if resource_name in self.resolution_stack: 112 | print(f"Warning: Circular reference detected for '{resource_name}'") 113 | return None 114 | 115 | # Add to resolution stack 116 | self.resolution_stack.add(resource_name) 117 | 118 | try: 119 | # Get the value from our cached resources 120 | if resource_name not in self.string_resources: 121 | print(f"Warning: String resource '{resource_name}' not found") 122 | return None 123 | 124 | value = self.string_resources[resource_name] 125 | 126 | # If the value is another reference, resolve it recursively 127 | if value.startswith('@string/'): 128 | value = self.resolve_string_reference(value) 129 | 130 | return value 131 | 132 | finally: 133 | # Always remove from resolution stack 134 | self.resolution_stack.remove(resource_name) 135 | 136 | except Exception as e: 137 | print(f"Error resolving string reference '{reference}': {str(e)}") 138 | return None 139 | 140 | def strdomvalue(self, name): 141 | """ 142 | Resolve a string resource to its final value, handling nested references 143 | 144 | Args: 145 | name (str): The string resource reference (e.g., "@string/resource_name") 146 | 147 | Returns: 148 | str: The resolved string value or None if not found 149 | """ 150 | try: 151 | # Load string resources if not already loaded 152 | if not self.string_resources_loaded: 153 | self.load_string_resources() 154 | 155 | # Clear resolution stack for new resolution 156 | self.resolution_stack.clear() 157 | 158 | # Resolve the reference 159 | return self.resolve_string_reference(name) 160 | 161 | except Exception as e: 162 | print(f"Error in strdomvalue for '{name}': {str(e)}") 163 | return None 164 | 165 | def deeplink(self): 166 | deeplinks = [] 167 | try: 168 | with open(f'{self.out_dir}/AndroidManifest.xml', 'r') as f: 169 | data = f.read() 170 | 171 | dom = parseString(data) 172 | activities = dom.getElementsByTagName('activity') + dom.getElementsByTagName('activity-alias') 173 | package = dom.getElementsByTagName('manifest') 174 | 175 | for lol in package: 176 | package_name = lol.attributes["package"].value 177 | 178 | for activity in activities: 179 | activity_deeplinks = [] 180 | intentFilterTag = activity.getElementsByTagName("intent-filter") 181 | if len(intentFilterTag) > 0: 182 | activity_name = activity.attributes["android:name"].value 183 | for intent in intentFilterTag: 184 | # Process all data tags in this intent filter together 185 | dataTags = intent.getElementsByTagName("data") 186 | if len(dataTags) > 0: 187 | deeplink_urls = self.process_intent_filter_data(dataTags) 188 | activity_deeplinks.extend(deeplink_urls) 189 | 190 | if activity_deeplinks: 191 | deeplinks.append({ 192 | "activity": activity_name, 193 | "deeplinks": activity_deeplinks 194 | }) 195 | 196 | return deeplinks 197 | except Exception as e: 198 | print(f"Error processing deeplinks: {str(e)}") 199 | return [] 200 | 201 | def process_intent_filter_data(self, dataTags): 202 | """Process all data tags in an intent filter to handle multiple schemes, hosts, and paths""" 203 | urls = [] 204 | 205 | # Collect all components 206 | schemes = set() 207 | hosts = set() 208 | path_prefixes = set() 209 | path_patterns = set() 210 | 211 | # First pass: collect all components 212 | for data in dataTags: 213 | if data.hasAttribute("android:scheme"): 214 | scheme_value = data.attributes["android:scheme"].value 215 | scheme = self.strdomvalue(scheme_value) if "@string" in scheme_value else scheme_value 216 | if scheme: 217 | schemes.add(scheme) 218 | 219 | if data.hasAttribute("android:host"): 220 | host_value = data.attributes["android:host"].value 221 | host = self.strdomvalue(host_value) if "@string" in host_value else host_value 222 | if host: 223 | hosts.add(host) 224 | 225 | if data.hasAttribute("android:pathPrefix"): 226 | path_value = data.attributes["android:pathPrefix"].value 227 | path = self.strdomvalue(path_value) if "@string" in path_value else path_value 228 | if path: 229 | # Remove trailing wildcard if present 230 | path = path.rstrip('.*') 231 | path_prefixes.add(path) 232 | 233 | if data.hasAttribute("android:pathPattern"): 234 | path_value = data.attributes["android:pathPattern"].value 235 | path = self.strdomvalue(path_value) if "@string" in path_value else path_value 236 | if path: 237 | # Convert Android path pattern to a more readable format 238 | # Replace .* with {wildcard} for clarity 239 | path = path.replace(".*", "{wildcard}") 240 | # Replace /.* at the start with {wildcard}/ for clarity 241 | path = path.replace("/.*", "{wildcard}/") 242 | path_patterns.add(path) 243 | 244 | # Generate URLs for all valid combinations 245 | for scheme in schemes: 246 | for host in hosts: 247 | # Base URL without path 248 | base_url = f"{scheme}://{host}" 249 | 250 | # Add URLs with path prefixes 251 | for prefix in path_prefixes: 252 | # Ensure prefix starts with / 253 | if not prefix.startswith('/'): 254 | prefix = '/' + prefix 255 | urls.append(f"{base_url}{prefix}") 256 | 257 | # Add URLs with path patterns 258 | for pattern in path_patterns: 259 | # Ensure pattern starts with / 260 | if not pattern.startswith('/'): 261 | pattern = '/' + pattern 262 | urls.append(f"{base_url}{pattern}") 263 | 264 | # If no paths specified, add the base URL 265 | if not path_prefixes and not path_patterns: 266 | urls.append(base_url) 267 | 268 | return sorted(list(set(urls))) # Remove duplicates and sort 269 | 270 | def generate_html_report(self, results): 271 | """Generate an HTML report from scan results""" 272 | # Read the HTML template 273 | html_template = """ 274 | 275 | 276 | 277 | APK Deeplink Report 278 | 341 | 342 | 343 |
344 |

Deeplink Report

345 |

{APK_Name}

346 | 347 |
348 | 349 | Generated on: 350 | 351 |
352 | 353 | 357 | 358 |
359 |

Exported Components

360 | {EXPORTED_PLACEHOLDER} 361 |
362 |

363 | 364 | """ # Copy the HTML content from above 365 | 366 | # Generate deeplinks HTML 367 | deeplinks_html = [] 368 | for activity in results['deeplinks']: 369 | activity_html = f""" 370 |
371 |
{activity['activity']}
372 |
" 380 | deeplinks_html.append(activity_html) 381 | 382 | # Generate exported components HTML 383 | exported_html = [] 384 | for comp_type, components in results['exported_components'].items(): 385 | if components: 386 | exported_html.append(f"

{comp_type.title()}

") 387 | for component in components: 388 | exported_html.append(f'
{component}
') 389 | 390 | # Replace placeholders 391 | html_content = html_template.replace( 392 | "{DEEPLINKS_PLACEHOLDER}", 393 | "\n".join(deeplinks_html) 394 | ).replace( 395 | "{EXPORTED_PLACEHOLDER}", 396 | "\n".join(exported_html) 397 | ).replace("{APK_Name}","\n".join(results['apk_name']) 398 | ) 399 | 400 | # Save HTML report 401 | report_path = os.path.join(self.results_dir, f"{self.out_dir}_report.html") 402 | with open(report_path, 'w', encoding='utf-8') as f: 403 | f.write(html_content) 404 | 405 | return report_path 406 | 407 | def process_data_tag(self, data): 408 | if not data.attributes: 409 | return None 410 | 411 | scheme = host = path = None 412 | 413 | if data.hasAttribute("android:scheme"): 414 | scheme_value = data.attributes["android:scheme"].value 415 | scheme = self.strdomvalue(scheme_value) if "@string" in scheme_value else scheme_value 416 | 417 | if data.hasAttribute("android:host"): 418 | host_value = data.attributes["android:host"].value 419 | host = self.strdomvalue(host_value) if "@string" in host_value else host_value 420 | 421 | for path_type in ["android:pathPrefix", "android:pathPattern", "android:path"]: 422 | if data.hasAttribute(path_type): 423 | path_value = data.attributes[path_type].value 424 | path = self.strdomvalue(path_value) if "@string" in path_value else path_value 425 | break 426 | 427 | if scheme: 428 | if host: 429 | return f"{scheme}://{host}{path if path else ''}" 430 | return f"{scheme}://" 431 | return None 432 | 433 | def exported_components(self): 434 | exported = { 435 | "activities": [], 436 | "receivers": [], 437 | "providers": [], 438 | "services": [] 439 | } 440 | 441 | with open(f'{self.out_dir}/AndroidManifest.xml', 'r') as f: 442 | data = f.read() 443 | 444 | dom = parseString(data) 445 | 446 | # Process each component type 447 | components = { 448 | "activities": dom.getElementsByTagName('activity') + dom.getElementsByTagName('activity-alias'), 449 | "receivers": dom.getElementsByTagName('receiver'), 450 | "providers": dom.getElementsByTagName('provider'), 451 | "services": dom.getElementsByTagName('service') 452 | } 453 | 454 | for comp_type, elements in components.items(): 455 | for element in elements: 456 | if element.hasAttribute("android:exported"): 457 | if str(element.attributes["android:exported"].value) == 'true': 458 | exported[comp_type].append(element.attributes["android:name"].value) 459 | 460 | return exported 461 | 462 | def scan(self): 463 | if not self.should_scan(): 464 | print(f"Results already exist for {self.apk_name}. Loading cached results...") 465 | results = self.load_results() 466 | else: 467 | # Decompile APK 468 | cmd = f"apktool d {self.apk_path} -o {self.out_dir}" 469 | print(f"Decompiling APK: {cmd}") 470 | os.system(cmd) 471 | 472 | # Scan for deeplinks and exported components 473 | results = { 474 | "apk_name": self.apk_name, 475 | "scan_date": datetime.now().isoformat(), 476 | "deeplinks": self.deeplink(), 477 | "exported_components": self.exported_components() 478 | } 479 | 480 | # Save results 481 | self.save_results(results) 482 | 483 | # Clean up decompiled files 484 | if os.path.exists(self.out_dir): 485 | import shutil 486 | shutil.rmtree(self.out_dir) 487 | 488 | # Generate HTML report 489 | report_path = self.generate_html_report(results) 490 | print(f"\nHTML report generated: {report_path}") 491 | 492 | return results 493 | def save_results(self, results): 494 | with open(self.results_file, 'w') as f: 495 | json.dump(results, f, indent=4) 496 | print(f"Results saved to {self.results_file}") 497 | 498 | def load_results(self): 499 | with open(self.results_file, 'r') as f: 500 | return json.load(f) 501 | 502 | def validate_apk(apk_path): 503 | """Validate APK file exists and has correct extension""" 504 | path = Path(apk_path) 505 | 506 | if not path.exists(): 507 | print(f"Error: APK file not found: {apk_path}") 508 | sys.exit(1) 509 | 510 | if path.suffix.lower() != '.apk': 511 | print(f"Error: File does not have .apk extension: {apk_path}") 512 | sys.exit(1) 513 | 514 | return path.absolute() 515 | 516 | def parse_arguments(): 517 | """Parse command line arguments""" 518 | parser = argparse.ArgumentParser( 519 | description='Android APK Deeplink Scanner', 520 | formatter_class=argparse.RawDescriptionHelpFormatter, 521 | epilog=''' 522 | Examples: 523 | python script.py -a path/to/app.apk 524 | python script.py --apk app.apk 525 | python script.py -a app.apk --force # Force rescan even if results exist 526 | ''' 527 | ) 528 | 529 | parser.add_argument( 530 | '-a', '--apk', 531 | required=True, 532 | help='Path to the APK file to analyze' 533 | ) 534 | 535 | parser.add_argument( 536 | '-f', '--force', 537 | action='store_true', 538 | help='Force rescan even if results already exist' 539 | ) 540 | 541 | parser.add_argument( 542 | '-o', '--output', 543 | help='Custom output directory for results (default: scan_results)', 544 | default='scan_results' 545 | ) 546 | 547 | return parser.parse_args() 548 | 549 | def main(): 550 | # Parse arguments 551 | args = parse_arguments() 552 | 553 | # Validate APK file 554 | apk_path = validate_apk(args.apk) 555 | 556 | try: 557 | # Initialize scanner with custom output directory 558 | scanner = DeeplinkScanner(str(apk_path)) 559 | 560 | # Override output directory if specified 561 | if args.output: 562 | scanner.results_dir = args.output 563 | 564 | # Override cache check if force flag is used 565 | if args.force: 566 | scanner.should_scan = lambda: True 567 | 568 | # Run the scan 569 | print(f"\nScanning APK: {apk_path}") 570 | results = scanner.scan() 571 | 572 | print("\nScan completed successfully!") 573 | print(f"Results saved to: {scanner.results_file}") 574 | print(f"HTML report available at: {scanner.results_dir}/{scanner.out_dir}_report.html") 575 | 576 | except Exception as e: 577 | print(f"\nError during scan: {str(e)}") 578 | sys.exit(1) 579 | 580 | if __name__ == "__main__": 581 | main() -------------------------------------------------------------------------------- /html_report.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Shapa7276/Android-Deeplink-Parser/df9ba50e0e971c1771b47a4993cff6801d037955/html_report.png --------------------------------------------------------------------------------