├── .github
├── FUNDING.yml
└── workflows
│ ├── codeql.yml
│ └── lint.yml
├── LICENSE
├── README.md
├── furl
├── __init__.py
├── banner.py
├── fetch.py
├── fetch_list.py
└── furl.py
└── setup.py
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: maryou # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: [https://saweria.co/Maryou]# Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 |
--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | # For most projects, this workflow file will not need changing; you simply need
2 | # to commit it to your repository.
3 | #
4 | # You may wish to alter this file to override the set of languages analyzed,
5 | # or to provide custom queries or build logic.
6 | #
7 | # ******** NOTE ********
8 | # We have attempted to detect the languages in your repository. Please check
9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL Analysis"
13 |
14 | on:
15 | push:
16 | branches: [ "main" ]
17 | pull_request:
18 | # The branches below must be a subset of the branches above
19 | branches: [ "main" ]
20 | schedule:
21 | - cron: '15 12 * * 6'
22 |
23 | jobs:
24 | analyze:
25 | name: Analyze
26 | # Runner size impacts CodeQL analysis time. To learn more, please see:
27 | # - https://gh.io/recommended-hardware-resources-for-running-codeql
28 | # - https://gh.io/supported-runners-and-hardware-resources
29 | # - https://gh.io/using-larger-runners
30 | # Consider using larger runners for possible analysis time improvements.
31 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
32 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
33 | permissions:
34 | actions: read
35 | contents: read
36 | security-events: write
37 |
38 | strategy:
39 | fail-fast: false
40 | matrix:
41 | language: [ 'python' ]
42 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby', 'swift' ]
43 | # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both
44 | # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
45 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
46 |
47 | steps:
48 | - name: Checkout repository
49 | uses: actions/checkout@v3
50 |
51 | # Initializes the CodeQL tools for scanning.
52 | - name: Initialize CodeQL
53 | uses: github/codeql-action/init@v2
54 | with:
55 | languages: ${{ matrix.language }}
56 | # If you wish to specify custom queries, you can do so here or in a config file.
57 | # By default, queries listed here will override any specified in a config file.
58 | # Prefix the list here with "+" to use these queries and those in the config file.
59 |
60 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
61 | # queries: security-extended,security-and-quality
62 |
63 |
64 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift).
65 | # If this step fails, then you should remove it and run the build manually (see below)
66 | - name: Autobuild
67 | uses: github/codeql-action/autobuild@v2
68 |
69 | # ℹ️ Command-line programs to run using the OS shell.
70 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
71 |
72 | # If the Autobuild fails above, remove it and uncomment the following three lines.
73 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
74 |
75 | # - run: |
76 | # echo "Run, Build Application using script"
77 | # ./location_of_script_within_repo/buildscript.sh
78 |
79 | - name: Perform CodeQL Analysis
80 | uses: github/codeql-action/analyze@v2
81 | with:
82 | category: "/language:${{matrix.language}}"
83 |
--------------------------------------------------------------------------------
/.github/workflows/lint.yml:
--------------------------------------------------------------------------------
1 | name: Lint Test
2 |
3 | on: [push]
4 |
5 | jobs:
6 | build:
7 | runs-on: ubuntu-latest
8 | strategy:
9 | matrix:
10 | python-version: ["3.8", "3.9", "3.10"]
11 | steps:
12 | - uses: actions/checkout@v3
13 | - name: Set up Python ${{ matrix.python-version }}
14 | uses: actions/setup-python@v3
15 | with:
16 | python-version: ${{ matrix.python-version }}
17 | - name: Install dependencies
18 | run: |
19 | python -m pip install --upgrade pip
20 | pip install pylint
21 | pip install requests
22 | pip install colorama
23 | pip install pyfiglet
24 | - name: Analysing the code with pylint
25 | run: |
26 | pylint $(git ls-files '*.py')
27 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Maryoux
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
Furl
2 | Mining URLs from Wayback Machine for bug hunting/fuzzing/further probing
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 | ---
16 | Furl is a tool for mining URLs from Wayback Machine for bug hunting/fuzzing/further probing
17 | 
18 |
19 |
20 | # Installation
21 | ```sh
22 | git clone https://github.com/Maryoux/furl.git
23 | cd furl
24 | pip install .
25 | ```
26 | # Usage
27 | To use `furl`, run this command below:
28 | ```
29 | furl -d example.com
30 | ```
31 |
--------------------------------------------------------------------------------
/furl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Maryoux/Furl/d9f66d09d5c62b1a3cc48654a8327f4b54e8d72f/furl/__init__.py
--------------------------------------------------------------------------------
/furl/banner.py:
--------------------------------------------------------------------------------
1 | """
2 | Creating a logo using PyFiglet
3 | Maryoux
4 | """
5 | from colorama import Style
6 | from pyfiglet import Figlet
7 |
8 | def display_banner():
9 | """Function used to call pyfiglet to create a 'Furl' text"""
10 | custom_fig = Figlet(font='slant')
11 | print(custom_fig.renderText('furl') + Style.RESET_ALL)
12 | print("\tv1.0.2")
13 | print("\tMaryoux\n\n")
14 |
--------------------------------------------------------------------------------
/furl/fetch.py:
--------------------------------------------------------------------------------
1 | """ Fetch URLs from Wayback Machine """
2 | import urllib.parse
3 | import requests
4 | from requests.exceptions import RequestException
5 |
6 | def fetch_parameters_from_wayback(domain, output_filename=None, parameter_value="FUZZ"):
7 | """
8 | Fetch unique URLs with parameters from the Wayback Machine for a given domain
9 | and append them to an output file or print them to the console.
10 |
11 | Args:
12 | domain (str): The domain to fetch URLs for.
13 | output_filename (str, optional): The name of the output file. If provided,
14 | unique URLs will be saved to this file. If not provided, unique URLs
15 | will be printed to the console. Default is None.
16 | parameter_value (str, optional): The value to replace query parameters with.
17 | Default is "FUZZ".
18 |
19 | Returns:
20 | None
21 | """
22 | try:
23 | data = get_wayback_data(domain)
24 |
25 | if data:
26 | interesting_extensions = get_interesting_extensions()
27 | urls_with_parameters = filter_urls_with_parameters(
28 | data, interesting_extensions, parameter_value)
29 |
30 | if urls_with_parameters:
31 | process_urls(urls_with_parameters, output_filename)
32 | else:
33 | print(f"No URLs with parameters found for {domain}.")
34 | else:
35 | print(f"No snapshots found for {domain}.")
36 |
37 | except RequestException as e:
38 | print(f"An error occurred during the request for {domain}: {e}")
39 |
40 | def get_wayback_data(domain):
41 | """
42 | Retrieve Wayback Machine data for a given domain.
43 |
44 | Args:
45 | domain (str): The domain to fetch data for.
46 |
47 | Returns:
48 | list: A list of data records from the Wayback Machine.
49 | """
50 | wayback_url = (
51 | f"http://web.archive.org/cdx/search/cdx?url={domain}/*"
52 | "&output=json&fl=original&collapse=urlkey&statuscode:200"
53 | )
54 |
55 | response = requests.get(wayback_url, timeout=10)
56 | data = response.json()
57 | return data
58 |
59 | def get_interesting_extensions():
60 | """
61 | Get a set of file extensions considered interesting.
62 |
63 | Returns:
64 | set: A set of interesting file extensions.
65 | """
66 | return {
67 | '.html', '.htm', '.php', '.asp', '.aspx', '.jsp', '.js', '.css',
68 | '.xml', '.json', '.rss', '.atom', '.md', '.pdf', '.doc',
69 | '.docx', '.ppt', '.pptx', '.xls', '.xlsx', '.csv', '.zip', '.rar',
70 | '.tar', '.gz', '.7z', '.jpg', '.jpeg', '.png', '.gif', '.bmp', '.svg', '.ico',
71 | '.webp', '.woff', '.woff2', '.eot', '.ttf', '.otf', '.mp4', '.txt'
72 | }
73 |
74 | def filter_urls_with_parameters(data, interesting_extensions, parameter_value):
75 | """
76 | Filter URLs with query parameters from Wayback Machine data.
77 |
78 | Args:
79 | data (list): List of data records from the Wayback Machine.
80 | interesting_extensions (set): Set of interesting file extensions.
81 | parameter_value (str): The value to replace query parameters with.
82 |
83 | Returns:
84 | list: List of URLs with modified query parameters.
85 | """
86 | urls_with_parameters = []
87 |
88 | for item in data:
89 | url = item[0]
90 | parsed_url = urllib.parse.urlparse(url)
91 |
92 | if (
93 | parsed_url.query and
94 | any(parsed_url.path.endswith(ext) for ext in interesting_extensions)
95 | ):
96 | query_params = urllib.parse.parse_qs(parsed_url.query, keep_blank_values=True)
97 |
98 | for param_name in query_params:
99 | query_params[param_name] = [parameter_value]
100 |
101 | query_string = urllib.parse.urlencode(query_params, doseq=True)
102 | modified_url = urllib.parse.urlunparse(parsed_url._replace(query=query_string))
103 |
104 | urls_with_parameters.append(modified_url)
105 |
106 | return urls_with_parameters
107 |
108 | def process_urls(urls, output_filename):
109 | """
110 | Process URLs by either printing unique ones to the console or saving them to a file.
111 |
112 | Args:
113 | urls (list): List of URLs to process.
114 | output_filename (str): The name of the output file. If provided,
115 | unique URLs will be saved to this file. If not provided, unique URLs
116 | will be printed to the console.
117 |
118 | Returns:
119 | None
120 | """
121 | unique_urls = list(set(urls))
122 | count = len(unique_urls)
123 | if output_filename:
124 | with open(output_filename, 'a', encoding='utf-8') as output_file:
125 | for url in unique_urls:
126 | output_file.write(url + '\n')
127 | print(f"Filtered {count} unique URLs saved to {output_filename}")
128 | else:
129 | for url in unique_urls:
130 | print(url)
131 |
--------------------------------------------------------------------------------
/furl/fetch_list.py:
--------------------------------------------------------------------------------
1 | """
2 | Fetching URLs from list of domains
3 | """
4 | from .fetch import fetch_parameters_from_wayback
5 |
6 | def fetch_parameters_from_list(input_filename, output_filename=None, parameter_value="FUZZ"):
7 | """
8 | This function is used to fetch parameters from the Wayback Machine
9 | for domains listed in a file and potentially save them to an output file
10 | """
11 | with open(input_filename, 'r', encoding='utf-8') as input_file:
12 | domains = input_file.read().splitlines()
13 |
14 | for domain in domains:
15 | fetch_parameters_from_wayback(domain, output_filename,parameter_value)
16 |
--------------------------------------------------------------------------------
/furl/furl.py:
--------------------------------------------------------------------------------
1 | """
2 | Furl.py: A script for fetching URLs with parameters from the Wayback Machine.
3 |
4 | Usage:
5 | python furl.py (-d domain.com | -l list.txt) [-o output.txt]
6 |
7 | Options:
8 | -d, --domain Specify a single domain name to search for (e.g., domain.com).
9 | -l, --list Specify a .txt file containing a list of domains, one per line.
10 | -p, --parameter Specify the parameter value.
11 | -o, --output Specify an optional output .txt file name.
12 | """
13 | import argparse
14 | import sys
15 | from .banner import display_banner
16 | from .fetch import fetch_parameters_from_wayback
17 | from .fetch_list import fetch_parameters_from_list
18 |
19 | def main():
20 | """
21 | This function coordinates the overall flow of your script,
22 | from parsing command-line arguments to invoking the core
23 | functionality and controlling the output based on the provided arguments.
24 | """
25 | display_banner()
26 | parser = argparse.ArgumentParser()
27 | group = parser.add_mutually_exclusive_group(required=True)
28 | group.add_argument('-d', '--domain', help='Single domain name (e.g., domain.com)')
29 | group.add_argument('-l', '--list', help='Path to a .txt file containing a list of domains')
30 | parser.add_argument('-p', '--parameter', help='Specify the parameter value', default='FUZZ')
31 | parser.add_argument('-o', '--output', help='Specify the output .txt file name')
32 |
33 | args = parser.parse_args(args=None if sys.argv[1:] else ['--help'])
34 |
35 | if args.domain:
36 | fetch_parameters_from_wayback(args.domain, args.output, args.parameter)
37 | elif args.list:
38 | fetch_parameters_from_list(args.list, args.output, args.parameter)
39 |
40 | if __name__ == "__main__":
41 | main()
42 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | """Setup for Furl"""
2 | from setuptools import setup, find_packages
3 | with open('README.md', encoding='utf-8') as f:
4 | desc = f.read()
5 | setup(
6 | name='furl',
7 | version='1.0.2',
8 | author='Maryoux',
9 | description='Mining URLs parameter from Wayback',
10 | packages=find_packages(),
11 | install_requires=[
12 | 'colorama',
13 | 'pyfiglet',
14 | 'requests',
15 | ],
16 | entry_points={
17 | 'console_scripts': [
18 | 'furl = furl.furl:main'
19 | ]
20 | },
21 | license='MIT',
22 | long_description=desc,
23 | long_description_content_type='text/plain' # Change to 'text/markdown' if using Markdown
24 | )
25 |
--------------------------------------------------------------------------------