├── README.md └── scraper.py /README.md: -------------------------------------------------------------------------------- 1 | # 500px-scraper 2 | A python web scraper to download full size 500px.com images. 3 | 4 | Just specify the URL and Save File name. 5 | 6 | Usage : 7 | python scraper.py -i 'URL' -s 'SaveFileName' 8 | -------------------------------------------------------------------------------- /scraper.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | import urllib 3 | import sys, getopt 4 | 5 | def main(argv): 6 | url = '' 7 | save = '' 8 | try: 9 | opts, args = getopt.getopt(argv,"hi:s:",["url=","save="]) 10 | except getopt.GetoptError: 11 | print 'scraper.py -i -s ' 12 | sys.exit(2) 13 | for opt, arg in opts: 14 | if opt == '-h': 15 | print 'scraper.py -i -s ' 16 | sys.exit() 17 | elif opt in ("-i", "--url"): 18 | url = arg 19 | elif opt in ("-s", "--save"): 20 | save = arg 21 | print 'URL : "', url 22 | print 'Save File Name : ', save 23 | r=urllib.urlopen(url).read() 24 | soup=BeautifulSoup(r) 25 | img=soup.find_all("meta",property="og:image") 26 | imgurl=img[0]["content"] 27 | urllib.urlretrieve(imgurl, save) 28 | 29 | if __name__ == "__main__": 30 | main(sys.argv[1:]) --------------------------------------------------------------------------------