├── README └── Source2URL /README: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Source2URL: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Source2URL: This tool scans a source code directory and harvests URLs from it 3 | # It then makes HTTP requests to each path via a configured proxy 4 | # The purpose is to aid in content discovery during web assessments 5 | 6 | # Check for command line arguments 7 | if [ $# -ne 4 ] 8 | then 9 | echo "" 10 | echo 'Syntax: Source2URL /some/dir root proxy url' 11 | echo 'Example: Source2URL ~/downloads/wordpress wordpress localhost:8080 domain.tld' 12 | echo "The root is the string that separates the parent directories from what we want." 13 | echo "" 14 | exit 15 | fi 16 | 17 | # Define command-line variables 18 | DIR="$1" 19 | ROOT="$2" 20 | PROXY="$3" 21 | URL="$4" 22 | 23 | # Clear the temp file 24 | echo "" > ./tempdirlist 25 | 26 | # Configure the wget proxy 27 | export http_proxy="$PROXY" 28 | 29 | # Notifying on start of work 30 | echo "" 31 | echo "Initiating parsing..." 32 | 33 | # Set delimiter for awk for directory cutoff 34 | for i in `find $DIR -type f` 35 | do 36 | echo $i | awk -F"$ROOT" '{ print $2 }' >> tempdirlist 37 | done 38 | 39 | # Prepend the base URL to the directories 40 | sed -e "s/^/$URL/" tempdirlist > dirlist.txt 41 | 42 | # Feedback 43 | echo "Making queries..." 44 | 45 | # Make the HTTP queries, quietly 46 | for i in `cat dirlist.txt` 47 | do 48 | wget --proxy $i > /dev/null 2>&1 49 | done 50 | 51 | # Notify on completion. 52 | echo "Process complete..." 53 | --------------------------------------------------------------------------------