├── README.md └── nsfwDataset.py /README.md: -------------------------------------------------------------------------------- 1 | # nsfw_data_downloader 2 | It is used to download the dataset from urls which were collected by EBazarov 3 | 1. Clone https://github.com/EBazarov/nsfw_data_source_urls to your dir 4 | 2. Modify root_dir to your dir 5 | 3. Run python nsfwDataset.py 6 | -------------------------------------------------------------------------------- /nsfwDataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: UTF-8 -*- 3 | 4 | import os 5 | import requests 6 | 7 | def imageDownload(url, folder): 8 | file_name = url.split("/")[-1] 9 | file_path = folder + "\\" + file_name 10 | if os.path.exists(file_path): 11 | print(file_path, "is exists, skip") 12 | return 13 | 14 | print("Downloading %s from %s" % (file_name, url)) 15 | try: 16 | download_file = requests.get(url) 17 | status = download_file.status_code 18 | if status == 200: 19 | with open(file_path, 'wb') as outfile: 20 | outfile.write(download_file.content) 21 | else: 22 | print("download fail") 23 | except: 24 | print("download fail") 25 | 26 | if __name__ == "__main__": 27 | rootdir = "E:\\nsfw_data_source_urls" 28 | for dirpath, dirnames, filenames in os.walk(rootdir): 29 | if len(filenames) > 0: 30 | for filename in filenames: 31 | suffix = filename.split(".")[-1] 32 | if suffix != "txt": 33 | continue 34 | path = os.path.join(dirpath, filename) 35 | with open(path, 'r') as f: 36 | lines = f.readlines() 37 | for url in lines: 38 | imageDownload(url.strip(), dirpath) 39 | --------------------------------------------------------------------------------