├── README.md ├── go.mod └── main.go /README.md: -------------------------------------------------------------------------------- 1 | # Wayback Machine Snapshot Fetcher 2 | 3 | ## Overview 4 | 5 | The **Waybackfetch** is a command-line tool written in Go that allows users to easily retrieve archived snapshots of web pages from the Internet Archive’s Wayback Machine. With this tool, users can fetch all available snapshot URLs for a given webpage or a list of web pages, enabling easy access to historical versions of content. 6 | 7 | ## Features 8 | 9 | ```console 10 | └─# waybackfetch -h 11 | 12 | 13 | _ _ _ _ _ _______ _ 14 | | || || | | | | | (_______) _ | | 15 | | || || | ____ _ _| | _ ____ ____| | _ _____ ____| |_ ____| | _ 16 | | ||_|| |/ _ | | | | || \ / _ |/ ___) | / ) ___) _ ) _)/ ___) || \ 17 | | |___| ( ( | | |_| | |_) | ( | ( (___| |< (| | ( (/ /| |_( (___| | | | 18 | \______|\_||_|\__ |____/ \_||_|\____)_| \_)_| \____)\___)____)_| |_| 19 | (____/ 20 | 21 | 22 | v1.0 Created by KathanP19 23 | 24 | Usage: 25 | -u Fetch snapshots for a single URL 26 | -l File containing list of URLs to fetch snapshots for 27 | -o Output file to save the results 28 | -d Enable unique snapshot filtering by content digest 29 | --silent Enable silent mode, only print URLs 30 | -h, --help Show this help message and exit 31 | ``` 32 | 33 | 34 | - **Single URL Fetching**: Quickly retrieve all snapshot URLs for a specific web page using the `-u` flag. 35 | - **Batch Processing**: Process multiple URLs by providing a file containing a list of URLs with the `-l` flag. 36 | - **Output Options**: Save the retrieved snapshot URLs to a specified output file using the `-o` flag while also printing the results to the console. 37 | - **Silent Mode**: Enable a clean output experience with the `--silent` flag to display only the results without additional console messages. 38 | - **Input from Standard Input**: Supports reading URLs directly from standard input, allowing for flexible usage in scripts or pipelines. 39 | - **Filter Duplicate Content**: Filter out urls with duplicate snapshot content when the `-d` flags are used. 40 | 41 | ## Installation 42 | 43 | 1. Ensure you have Go installed on your machine. 44 | 2. You can install the tool using the following commands: 45 | ```console 46 | go install github.com/KathanP19/waybackfetch@latest 47 | ``` 48 | OR 49 | ```console 50 | git clone https://github.com/KathanP19/waybackfetch.git 51 | cd waybackfetch 52 | go install 53 | ``` 54 | 55 | ## Usage 56 | ```console 57 | # From STDIN 58 | echo "http://testphp.vulnweb.com/login.php" | waybackfetch 59 | 60 | # Fetch snapshots for a single URL 61 | waybackfetch -u 62 | 63 | # Fetch snapshots for a list of URLs from a file 64 | waybackfetch -l 65 | 66 | # Save results to a file while printing to console 67 | waybackfetch -u -o 68 | 69 | # Enable silent mode 70 | waybackfetch -u --silent 71 | 72 | # Display help message 73 | waybackfetch -h 74 | ``` 75 | 76 | ## Todo 77 | - [x] Add Duplicate content check. (Feature Added by @dwisiswant0) 78 | 79 | ## Contributing 80 | Contributions are welcome! Please feel free to submit issues or pull requests to enhance the functionality and performance of the tool. 81 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/KathanP19/waybackfetch 2 | 3 | go 1.23 4 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "encoding/json" 6 | "flag" 7 | "fmt" 8 | "io" 9 | "io/ioutil" 10 | "net/http" 11 | "net/url" 12 | "os" 13 | ) 14 | 15 | // ANSI color codes for styling 16 | const ( 17 | reset = "\033[0m" 18 | red = "\033[31m" 19 | green = "\033[32m" 20 | yellow = "\033[33m" 21 | cyan = "\033[36m" 22 | ) 23 | 24 | // Banner with colors 25 | func printBanner() { 26 | fmt.Println(string(cyan + ` 27 | 28 | _ _ _ _ _ _______ _ 29 | | || || | | | | | (_______) _ | | 30 | | || || | ____ _ _| | _ ____ ____| | _ _____ ____| |_ ____| | _ 31 | | ||_|| |/ _ | | | | || \ / _ |/ ___) | / ) ___) _ ) _)/ ___) || \ 32 | | |___| ( ( | | |_| | |_) | ( | ( (___| |< (| | ( (/ /| |_( (___| | | | 33 | \______|\_||_|\__ |____/ \_||_|\____)_| \_)_| \____)\___)____)_| |_| 34 | (____/ 35 | 36 | ` + reset)) 37 | fmt.Println(string(yellow + " v1.1 Created by KathanP19" + reset)) 38 | fmt.Println() 39 | } 40 | 41 | // WaybackResponse holds the snapshot timestamps returned from the Wayback Machine API 42 | type WaybackResponse [][]string 43 | 44 | type Snapshot struct { 45 | Timestamp string `json:"timestamp"` 46 | Original string `json:"original"` 47 | Digest string `json:"digest"` 48 | Length string `json:"length"` 49 | } 50 | 51 | const SnapshotURL = "https://web.archive.org/web/%sif_/%s" 52 | 53 | // FetchSnapshotUrls fetches all snapshot URLs for a given URL 54 | func FetchSnapshotUrls(targetUrl string, silent bool, output io.Writer, uniqOnly bool) error { 55 | baseUrl := "http://web.archive.org/cdx/search/cdx" 56 | 57 | u, err := url.Parse(baseUrl) 58 | if err != nil { 59 | return fmt.Errorf(red+"error parsing base URL:"+reset+" %v", err) 60 | } 61 | 62 | q := u.Query() 63 | q.Set("url", targetUrl) 64 | q.Set("matchType", "exact") 65 | q.Set("output", "json") 66 | q.Set("fl", "timestamp,original,digest,length") 67 | u.RawQuery = q.Encode() 68 | 69 | resp, err := http.Get(u.String()) 70 | if err != nil { 71 | return fmt.Errorf(red+"error fetching data:"+reset+" %v", err) 72 | } 73 | defer resp.Body.Close() 74 | 75 | body, err := ioutil.ReadAll(resp.Body) 76 | if err != nil { 77 | return fmt.Errorf(red+"error reading response body:"+reset+" %v", err) 78 | } 79 | 80 | var data WaybackResponse 81 | err = json.Unmarshal(body, &data) 82 | if err != nil { 83 | return fmt.Errorf(red+"error parsing JSON:"+reset+" %v", err) 84 | } 85 | 86 | // Check if the response contains at least one row beyond the header row 87 | if len(data) == 0 { 88 | return fmt.Errorf(red + "unexpected empty response from Wayback Machine API" + reset) 89 | } else if len(data) == 1 { 90 | if !silent { 91 | fmt.Println(yellow + "No snapshots found for the given URL." + reset) 92 | } 93 | return nil 94 | } 95 | 96 | var snapshots []Snapshot 97 | uniqSnapshots := make(map[string]bool) 98 | 99 | for _, row := range data[1:] { 100 | // Ensure row contains exactly 4 fields before processing 101 | if len(row) != 4 { 102 | if !silent { 103 | fmt.Printf(yellow+"Skipping malformed row: %v\n"+reset, row) 104 | } 105 | continue 106 | } 107 | 108 | digest := row[2] 109 | 110 | // Check if only unique snapshots should be returned 111 | if uniqOnly && uniqSnapshots[digest] { 112 | continue 113 | } 114 | uniqSnapshots[digest] = true 115 | 116 | snapshots = append(snapshots, Snapshot{ 117 | Timestamp: row[0], 118 | Original: row[1], 119 | Digest: digest, 120 | Length: row[3], 121 | }) 122 | } 123 | 124 | // Output snapshots 125 | for _, snapshot := range snapshots { 126 | snapshotUrl := fmt.Sprintf(SnapshotURL, snapshot.Timestamp, targetUrl) 127 | fmt.Fprintln(output, snapshotUrl) 128 | } 129 | 130 | return nil 131 | } 132 | 133 | func main() { 134 | url := flag.String("u", "", "Single URL to fetch snapshots for") 135 | list := flag.String("l", "", "File containing list of URLs to fetch snapshots for") 136 | silent := flag.Bool("silent", false, "Enable silent mode, only print URLs") 137 | outputFile := flag.String("o", "", "Output file to write results") 138 | unique := flag.Bool("d", false, "Enable unique snapshot filtering by content digest") 139 | 140 | // Custom help message 141 | flag.Usage = func() { 142 | printBanner() 143 | fmt.Println("Usage:") 144 | fmt.Println(" -u Fetch snapshots for a single URL") 145 | fmt.Println(" -l File containing list of URLs to fetch snapshots for") 146 | fmt.Println(" -o Output file to save the results") 147 | fmt.Println(" -d Enable unique snapshot filtering by content digest") 148 | fmt.Println(" --silent Enable silent mode, only print URLs") 149 | fmt.Println(" -h, --help Show this help message and exit") 150 | } 151 | 152 | flag.Parse() 153 | 154 | // Print the banner unless silent mode is enabled 155 | if !*silent { 156 | printBanner() 157 | } 158 | 159 | var output io.Writer = os.Stdout 160 | if *outputFile != "" { 161 | file, err := os.Create(*outputFile) 162 | if err != nil { 163 | fmt.Printf("Error creating output file: %v\n", err) 164 | return 165 | } 166 | defer file.Close() 167 | output = io.MultiWriter(os.Stdout, file) 168 | } 169 | 170 | processUrls := func(url string) { 171 | if !*silent { 172 | fmt.Printf(green+"\nFetching snapshots for URL:"+reset+" %s\n", url) 173 | } 174 | if err := FetchSnapshotUrls(url, *silent, output, *unique); err != nil && !*silent { 175 | fmt.Println("Error:", err) 176 | } 177 | } 178 | 179 | if *url != "" { 180 | processUrls(*url) 181 | if *outputFile != "" && !*silent { 182 | fmt.Printf(green+"\nResults have been saved to:"+reset+" %s\n", *outputFile) 183 | } 184 | return 185 | } 186 | 187 | if *list != "" { 188 | file, err := os.Open(*list) 189 | if err != nil { 190 | if !*silent { 191 | fmt.Printf(red+"Error opening file:"+reset+" %v\n", err) 192 | } 193 | return 194 | } 195 | defer file.Close() 196 | 197 | scanner := bufio.NewScanner(file) 198 | for scanner.Scan() { 199 | processUrls(scanner.Text()) 200 | } 201 | 202 | if err := scanner.Err(); err != nil && !*silent { 203 | fmt.Printf(red+"Error reading file:"+reset+" %v\n", err) 204 | } 205 | 206 | if *outputFile != "" && !*silent { 207 | fmt.Printf(green+"\nResults have been saved to:"+reset+" %s\n", *outputFile) 208 | } 209 | return 210 | } 211 | 212 | stat, _ := os.Stdin.Stat() 213 | if (stat.Mode() & os.ModeCharDevice) == 0 { 214 | scanner := bufio.NewScanner(os.Stdin) 215 | for scanner.Scan() { 216 | processUrls(scanner.Text()) 217 | } 218 | if err := scanner.Err(); err != nil && !*silent { 219 | fmt.Printf(red+"Error reading stdin:"+reset+" %v\n", err) 220 | } 221 | 222 | if *outputFile != "" && !*silent { 223 | fmt.Printf(green+"\nResults have been saved to:"+reset+" %s\n", *outputFile) 224 | } 225 | return 226 | } 227 | 228 | if !*silent { 229 | fmt.Println(red + "Please provide -u for a single URL, -l for a list of URLs, or input via stdin" + reset) 230 | } 231 | } 232 | --------------------------------------------------------------------------------