├── .github ├── FUNDING.yml └── workflows │ ├── codeql.yml │ └── lint.yml ├── LICENSE ├── README.md ├── furl ├── __init__.py ├── banner.py ├── fetch.py ├── fetch_list.py └── furl.py └── setup.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: maryou # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: [https://saweria.co/Maryou]# Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL Analysis" 13 | 14 | on: 15 | push: 16 | branches: [ "main" ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ "main" ] 20 | schedule: 21 | - cron: '15 12 * * 6' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | # Runner size impacts CodeQL analysis time. To learn more, please see: 27 | # - https://gh.io/recommended-hardware-resources-for-running-codeql 28 | # - https://gh.io/supported-runners-and-hardware-resources 29 | # - https://gh.io/using-larger-runners 30 | # Consider using larger runners for possible analysis time improvements. 31 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 32 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} 33 | permissions: 34 | actions: read 35 | contents: read 36 | security-events: write 37 | 38 | strategy: 39 | fail-fast: false 40 | matrix: 41 | language: [ 'python' ] 42 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ] 43 | # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both 44 | # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both 45 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 46 | 47 | steps: 48 | - name: Checkout repository 49 | uses: actions/checkout@v3 50 | 51 | # Initializes the CodeQL tools for scanning. 52 | - name: Initialize CodeQL 53 | uses: github/codeql-action/init@v2 54 | with: 55 | languages: ${{ matrix.language }} 56 | # If you wish to specify custom queries, you can do so here or in a config file. 57 | # By default, queries listed here will override any specified in a config file. 58 | # Prefix the list here with "+" to use these queries and those in the config file. 59 | 60 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 61 | # queries: security-extended,security-and-quality 62 | 63 | 64 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). 65 | # If this step fails, then you should remove it and run the build manually (see below) 66 | - name: Autobuild 67 | uses: github/codeql-action/autobuild@v2 68 | 69 | # ℹ️ Command-line programs to run using the OS shell. 70 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 71 | 72 | # If the Autobuild fails above, remove it and uncomment the following three lines. 73 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 74 | 75 | # - run: | 76 | # echo "Run, Build Application using script" 77 | # ./location_of_script_within_repo/buildscript.sh 78 | 79 | - name: Perform CodeQL Analysis 80 | uses: github/codeql-action/analyze@v2 81 | with: 82 | category: "/language:${{matrix.language}}" 83 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint Test 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.8", "3.9", "3.10"] 11 | steps: 12 | - uses: actions/checkout@v3 13 | - name: Set up Python ${{ matrix.python-version }} 14 | uses: actions/setup-python@v3 15 | with: 16 | python-version: ${{ matrix.python-version }} 17 | - name: Install dependencies 18 | run: | 19 | python -m pip install --upgrade pip 20 | pip install pylint 21 | pip install requests 22 | pip install colorama 23 | pip install pyfiglet 24 | - name: Analysing the code with pylint 25 | run: | 26 | pylint $(git ls-files '*.py') 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Maryoux 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

Furl

2 |

Mining URLs from Wayback Machine for bug hunting/fuzzing/further probing

3 | 4 |

5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 |

14 | 15 | --- 16 | Furl is a tool for mining URLs from Wayback Machine for bug hunting/fuzzing/further probing 17 | ![ray-so-export (2)](https://github.com/Maryoux/furl/assets/69157007/0ad46660-51d1-47fe-9766-62e070a8234a) 18 | 19 | 20 | # Installation 21 | ```sh 22 | git clone https://github.com/Maryoux/furl.git 23 | cd furl 24 | pip install . 25 | ``` 26 | # Usage 27 | To use `furl`, run this command below: 28 | ``` 29 | furl -d example.com 30 | ``` 31 | -------------------------------------------------------------------------------- /furl/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Maryoux/Furl/d9f66d09d5c62b1a3cc48654a8327f4b54e8d72f/furl/__init__.py -------------------------------------------------------------------------------- /furl/banner.py: -------------------------------------------------------------------------------- 1 | """ 2 | Creating a logo using PyFiglet 3 | Maryoux 4 | """ 5 | from colorama import Style 6 | from pyfiglet import Figlet 7 | 8 | def display_banner(): 9 | """Function used to call pyfiglet to create a 'Furl' text""" 10 | custom_fig = Figlet(font='slant') 11 | print(custom_fig.renderText('furl') + Style.RESET_ALL) 12 | print("\tv1.0.2") 13 | print("\tMaryoux\n\n") 14 | -------------------------------------------------------------------------------- /furl/fetch.py: -------------------------------------------------------------------------------- 1 | """ Fetch URLs from Wayback Machine """ 2 | import urllib.parse 3 | import requests 4 | from requests.exceptions import RequestException 5 | 6 | def fetch_parameters_from_wayback(domain, output_filename=None, parameter_value="FUZZ"): 7 | """ 8 | Fetch unique URLs with parameters from the Wayback Machine for a given domain 9 | and append them to an output file or print them to the console. 10 | 11 | Args: 12 | domain (str): The domain to fetch URLs for. 13 | output_filename (str, optional): The name of the output file. If provided, 14 | unique URLs will be saved to this file. If not provided, unique URLs 15 | will be printed to the console. Default is None. 16 | parameter_value (str, optional): The value to replace query parameters with. 17 | Default is "FUZZ". 18 | 19 | Returns: 20 | None 21 | """ 22 | try: 23 | data = get_wayback_data(domain) 24 | 25 | if data: 26 | interesting_extensions = get_interesting_extensions() 27 | urls_with_parameters = filter_urls_with_parameters( 28 | data, interesting_extensions, parameter_value) 29 | 30 | if urls_with_parameters: 31 | process_urls(urls_with_parameters, output_filename) 32 | else: 33 | print(f"No URLs with parameters found for {domain}.") 34 | else: 35 | print(f"No snapshots found for {domain}.") 36 | 37 | except RequestException as e: 38 | print(f"An error occurred during the request for {domain}: {e}") 39 | 40 | def get_wayback_data(domain): 41 | """ 42 | Retrieve Wayback Machine data for a given domain. 43 | 44 | Args: 45 | domain (str): The domain to fetch data for. 46 | 47 | Returns: 48 | list: A list of data records from the Wayback Machine. 49 | """ 50 | wayback_url = ( 51 | f"http://web.archive.org/cdx/search/cdx?url={domain}/*" 52 | "&output=json&fl=original&collapse=urlkey&statuscode:200" 53 | ) 54 | 55 | response = requests.get(wayback_url, timeout=10) 56 | data = response.json() 57 | return data 58 | 59 | def get_interesting_extensions(): 60 | """ 61 | Get a set of file extensions considered interesting. 62 | 63 | Returns: 64 | set: A set of interesting file extensions. 65 | """ 66 | return { 67 | '.html', '.htm', '.php', '.asp', '.aspx', '.jsp', '.js', '.css', 68 | '.xml', '.json', '.rss', '.atom', '.md', '.pdf', '.doc', 69 | '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.csv', '.zip', '.rar', 70 | '.tar', '.gz', '.7z', '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.ico', 71 | '.webp', '.woff', '.woff2', '.eot', '.ttf', '.otf', '.mp4', '.txt' 72 | } 73 | 74 | def filter_urls_with_parameters(data, interesting_extensions, parameter_value): 75 | """ 76 | Filter URLs with query parameters from Wayback Machine data. 77 | 78 | Args: 79 | data (list): List of data records from the Wayback Machine. 80 | interesting_extensions (set): Set of interesting file extensions. 81 | parameter_value (str): The value to replace query parameters with. 82 | 83 | Returns: 84 | list: List of URLs with modified query parameters. 85 | """ 86 | urls_with_parameters = [] 87 | 88 | for item in data: 89 | url = item[0] 90 | parsed_url = urllib.parse.urlparse(url) 91 | 92 | if ( 93 | parsed_url.query and 94 | any(parsed_url.path.endswith(ext) for ext in interesting_extensions) 95 | ): 96 | query_params = urllib.parse.parse_qs(parsed_url.query, keep_blank_values=True) 97 | 98 | for param_name in query_params: 99 | query_params[param_name] = [parameter_value] 100 | 101 | query_string = urllib.parse.urlencode(query_params, doseq=True) 102 | modified_url = urllib.parse.urlunparse(parsed_url._replace(query=query_string)) 103 | 104 | urls_with_parameters.append(modified_url) 105 | 106 | return urls_with_parameters 107 | 108 | def process_urls(urls, output_filename): 109 | """ 110 | Process URLs by either printing unique ones to the console or saving them to a file. 111 | 112 | Args: 113 | urls (list): List of URLs to process. 114 | output_filename (str): The name of the output file. If provided, 115 | unique URLs will be saved to this file. If not provided, unique URLs 116 | will be printed to the console. 117 | 118 | Returns: 119 | None 120 | """ 121 | unique_urls = list(set(urls)) 122 | count = len(unique_urls) 123 | if output_filename: 124 | with open(output_filename, 'a', encoding='utf-8') as output_file: 125 | for url in unique_urls: 126 | output_file.write(url + '\n') 127 | print(f"Filtered {count} unique URLs saved to {output_filename}") 128 | else: 129 | for url in unique_urls: 130 | print(url) 131 | -------------------------------------------------------------------------------- /furl/fetch_list.py: -------------------------------------------------------------------------------- 1 | """ 2 | Fetching URLs from list of domains 3 | """ 4 | from .fetch import fetch_parameters_from_wayback 5 | 6 | def fetch_parameters_from_list(input_filename, output_filename=None, parameter_value="FUZZ"): 7 | """ 8 | This function is used to fetch parameters from the Wayback Machine 9 | for domains listed in a file and potentially save them to an output file 10 | """ 11 | with open(input_filename, 'r', encoding='utf-8') as input_file: 12 | domains = input_file.read().splitlines() 13 | 14 | for domain in domains: 15 | fetch_parameters_from_wayback(domain, output_filename,parameter_value) 16 | -------------------------------------------------------------------------------- /furl/furl.py: -------------------------------------------------------------------------------- 1 | """ 2 | Furl.py: A script for fetching URLs with parameters from the Wayback Machine. 3 | 4 | Usage: 5 | python furl.py (-d domain.com | -l list.txt) [-o output.txt] 6 | 7 | Options: 8 | -d, --domain Specify a single domain name to search for (e.g., domain.com). 9 | -l, --list Specify a .txt file containing a list of domains, one per line. 10 | -p, --parameter Specify the parameter value. 11 | -o, --output Specify an optional output .txt file name. 12 | """ 13 | import argparse 14 | import sys 15 | from .banner import display_banner 16 | from .fetch import fetch_parameters_from_wayback 17 | from .fetch_list import fetch_parameters_from_list 18 | 19 | def main(): 20 | """ 21 | This function coordinates the overall flow of your script, 22 | from parsing command-line arguments to invoking the core 23 | functionality and controlling the output based on the provided arguments. 24 | """ 25 | display_banner() 26 | parser = argparse.ArgumentParser() 27 | group = parser.add_mutually_exclusive_group(required=True) 28 | group.add_argument('-d', '--domain', help='Single domain name (e.g., domain.com)') 29 | group.add_argument('-l', '--list', help='Path to a .txt file containing a list of domains') 30 | parser.add_argument('-p', '--parameter', help='Specify the parameter value', default='FUZZ') 31 | parser.add_argument('-o', '--output', help='Specify the output .txt file name') 32 | 33 | args = parser.parse_args(args=None if sys.argv[1:] else ['--help']) 34 | 35 | if args.domain: 36 | fetch_parameters_from_wayback(args.domain, args.output, args.parameter) 37 | elif args.list: 38 | fetch_parameters_from_list(args.list, args.output, args.parameter) 39 | 40 | if __name__ == "__main__": 41 | main() 42 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Setup for Furl""" 2 | from setuptools import setup, find_packages 3 | with open('README.md', encoding='utf-8') as f: 4 | desc = f.read() 5 | setup( 6 | name='furl', 7 | version='1.0.2', 8 | author='Maryoux', 9 | description='Mining URLs parameter from Wayback', 10 | packages=find_packages(), 11 | install_requires=[ 12 | 'colorama', 13 | 'pyfiglet', 14 | 'requests', 15 | ], 16 | entry_points={ 17 | 'console_scripts': [ 18 | 'furl = furl.furl:main' 19 | ] 20 | }, 21 | license='MIT', 22 | long_description=desc, 23 | long_description_content_type='text/plain' # Change to 'text/markdown' if using Markdown 24 | ) 25 | --------------------------------------------------------------------------------