├── .gitignore ├── figs ├── firmware_arch_distribution.jpg └── firmware_os_distribution.jpg ├── src ├── main.py ├── fw_unpacker.py └── fw_downloader.py ├── dat └── firmware_ftp_list.csv └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | fws 3 | binwalk -------------------------------------------------------------------------------- /figs/firmware_arch_distribution.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WUSTL-CSPL/Firmware-Dataset/HEAD/figs/firmware_arch_distribution.jpg -------------------------------------------------------------------------------- /figs/firmware_os_distribution.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WUSTL-CSPL/Firmware-Dataset/HEAD/figs/firmware_os_distribution.jpg -------------------------------------------------------------------------------- /src/main.py: -------------------------------------------------------------------------------- 1 | from fw_downloader import download_firmware 2 | from fw_unpacker import unpack_firmware 3 | 4 | def main(firmware_urls, save_path): 5 | download_firmware(firmware_urls, save_path) 6 | unpack_firmware(save_path) 7 | 8 | if __name__ == '__main__': 9 | # Three firmware samples 10 | firmware_urls = [ 11 | "https://static.tp-link.com/TL-WR940N(US)_V4_160617_1476690524248q.zip", 12 | "https://static.tp-link.com/resources/software/TL-WR1043ND_V1_140319.zip", 13 | "https://static.tp-link.com/resources/software/TL-WA801ND_V1_130131_beta.zip" 14 | ] 15 | save_path = "../fws" 16 | main(firmware_urls, save_path) 17 | -------------------------------------------------------------------------------- /src/fw_unpacker.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | def unpack_firmware(save_path): 5 | for root, _, files in os.walk(save_path): 6 | for file in files: 7 | file_path = os.path.join(root, file) 8 | try: 9 | subprocess.run(['binwalk', '-Mre', '--directory', save_path, file_path], check=True) # Run binwalk for unpacking 10 | print(f"Unpacked file using binwalk: {file_path}") 11 | except subprocess.CalledProcessError as e: 12 | print(f"Failed to unpack file {file_path} using binwalk: {e}") 13 | 14 | 15 | if __name__ == '__main__': 16 | save_path = "../fws" 17 | unpack_firmware(save_path) 18 | -------------------------------------------------------------------------------- /dat/firmware_ftp_list.csv: -------------------------------------------------------------------------------- 1 | vendor,url 2 | zyxel,ftp.zyxel.lv 3 | zyxel,ftp.zyxel.com.tr 4 | zyxel,ftp.zyxel.com 5 | weintek,ftp.weintek.com 6 | tyan,ftp.tyan.com 7 | tiger,tiger.satsale.net 8 | simet,ftp.simet.com.tr 9 | sangoma,ftp.sangoma.com 10 | rinotel,rinotel.com 11 | ral,ftp.ral.ro 12 | proinit,188.138.149.64 13 | pctvsystems,ftp.pctvsystems.com 14 | partner-tech,ftp.partner-tech.eu 15 | netgear,downloads.netgear.com 16 | multitech,ftp.multitech.com 17 | luis,ftp.luis.ru 18 | loks,ftp.loks.lv 19 | infinet,ftp.infinet.ru 20 | geoteam,ftp.geoteam.dk 21 | eutronix,ftp.eutronix.be 22 | epson,download.epson-europe.com 23 | draytek,ftp.draytek.com 24 | dlink,ftp.dlink.ca 25 | dlink,ftp.dlink.by 26 | detewe,aux.detewe.ru 27 | depo-computers,ftp.depo.ru 28 | dd-wrt,ftp.dd-wrt.com 29 | d-link,ftp2.dlink.com 30 | d-link,ftp.d-link.co.za 31 | d-link,ftp.dlink.ru 32 | d-link,ftp.dlink.eu 33 | avm,ftp.avm.de 34 | mamont (FTP search engine),https://www.mmnt.ru/int/get?in=f&st=.bin&qw=firmware,https://www.mmnt.ru/int/get?in=f&st=.bin&qw=firmware -------------------------------------------------------------------------------- /src/fw_downloader.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | 4 | def download_firmware(url_list, save_path): 5 | if not os.path.exists(save_path): 6 | os.makedirs(save_path) # Create the folder if it doesn't exist 7 | 8 | for url in url_list: 9 | try: 10 | # Extract the original file name from the URL 11 | original_filename = os.path.basename(url) 12 | full_save_path = os.path.join(save_path, original_filename) 13 | 14 | # Make the request to download the file 15 | response = requests.get(url, stream=True) 16 | response.raise_for_status() # Check for HTTP errors 17 | 18 | with open(full_save_path, 'wb') as file: 19 | for chunk in response.iter_content(chunk_size=8192): 20 | if chunk: # Filter out keep-alive new chunks 21 | file.write(chunk) 22 | print(f"Firmware downloaded successfully and saved to {full_save_path}") 23 | 24 | except requests.exceptions.RequestException as e: 25 | print(f"Failed to download the firmware from {url}: {e}") 26 | 27 | 28 | if __name__ == '__main__': 29 | firmware_urls = [ 30 | "https://static.tp-link.com/TL-WR940N(US)_V4_160617_1476690524248q.zip", 31 | "https://static.tp-link.com/resources/software/TL-WR1043ND_V1_140319.zip", 32 | "https://static.tp-link.com/resources/software/TL-WA801ND_V1_130131_beta.zip" 33 | ] 34 | save_path = "../fws" 35 | download_firmware(firmware_urls, save_path) 36 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Firmware-Dataset 2 | 3 | 4 | ## Introduction 5 | We collected 16.9 TB of firmware images from the official websites of vendors, open FTP sites, and open-source repositories. Currently, 157,141 firmware images (about 6 TB) from 204 vendors have been pre-processed. The corresponding products of these firmware images are commonly used in consumer markets, such as networking devices, cameras, and smart home devices. The pre-processing for other firmware images is still running since these procedures require a large amount of computation. The pre-processed firmware images are open-source for research purposes, the distribution of their architecture type and OS type is shown in Fig.1. We will continue to update this repository as we collect more firmware images in the future. 6 | 7 |
8 |
9 |
10 |