├── README
└── Source2URL


/README:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/Source2URL:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Source2URL: This tool scans a source code directory and harvests URLs from it
 3 | # It then makes HTTP requests to each path via a configured proxy
 4 | # The purpose is to aid in content discovery during web assessments
 5 | 
 6 | # Check for command line arguments
 7 | if [ $# -ne 4 ]
 8 | then
 9 |  echo ""
10 |  echo 'Syntax: Source2URL /some/dir root proxy url'
11 |  echo 'Example: Source2URL ~/downloads/wordpress wordpress localhost:8080 domain.tld'
12 |  echo "The root is the string that separates the parent directories from what we want."
13 |  echo ""
14 |  exit
15 | fi
16 | 
17 | # Define command-line variables
18 | DIR="$1"
19 | ROOT="$2"
20 | PROXY="$3"
21 | URL="$4"
22 | 
23 | # Clear the temp file
24 | echo "" > ./tempdirlist
25 | 
26 | # Configure the wget proxy
27 | export http_proxy="$PROXY"
28 | 
29 | # Notifying on start of work
30 | echo ""
31 | echo "Initiating parsing..."
32 | 
33 | # Set delimiter for awk for directory cutoff
34 | for i in `find $DIR -type f`
35 | do
36 |         echo $i | awk -F"$ROOT" '{ print $2 }' >> tempdirlist
37 | done
38 | 
39 | # Prepend the base URL to the directories
40 | sed -e "s/^/$URL/" tempdirlist > dirlist.txt
41 | 
42 | # Feedback
43 | echo "Making queries..."
44 | 
45 | # Make the HTTP queries, quietly
46 | for i in `cat dirlist.txt`
47 | do
48 |         wget --proxy $i  > /dev/null 2>&1
49 | done
50 | 
51 | # Notify on completion.
52 | echo "Process complete..."
53 | 


--------------------------------------------------------------------------------