├── tests ├── __init__.py ├── fixtures │ ├── no_image_page.html │ ├── fragment.html │ ├── page.html │ └── tumblr.html └── test.py ├── haul ├── extenders │ ├── __init__.py │ └── pipeline │ │ ├── __init__.py │ │ ├── pinterest.py │ │ ├── wordpress.py │ │ ├── tumblr.py │ │ └── google.py ├── finders │ ├── __init__.py │ └── pipeline │ │ ├── __init__.py │ │ ├── html.py │ │ └── css.py ├── __init__.py ├── api.py ├── compat.py ├── utils.py ├── settings.py ├── exceptions.py └── core.py ├── requirements_test.txt ├── requirements.txt ├── MANIFEST.in ├── .gitignore ├── .travis.yml ├── HISTORY.rst ├── LICENSE ├── setup.py └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /haul/extenders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /haul/finders/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /haul/finders/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /haul/extenders/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements_test.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | coveralls 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4 2 | cssutils 3 | html5lib 4 | lxml 5 | requests 6 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE 2 | include README.rst 3 | include HISTORY.rst 4 | include requirements.txt 5 | include requirements_test.txt 6 | -------------------------------------------------------------------------------- /haul/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | __version__ = '1.3.1' 4 | 5 | from .api import find_images 6 | from .core import Haul, HaulResult 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | 3 | *.egg 4 | *.egg-info 5 | dist 6 | build 7 | sdist 8 | 9 | .coverage 10 | htmlcov/ 11 | 12 | .codeintel 13 | .DS_STORE 14 | -------------------------------------------------------------------------------- /haul/api.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from .core import Haul 4 | 5 | 6 | def find_images(url_or_html, *args, **kwargs): 7 | h = Haul() 8 | 9 | return h.find_images(url_or_html, *args, **kwargs) 10 | -------------------------------------------------------------------------------- /tests/fixtures/no_image_page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Hello Haul 6 | 7 | 8 |

no image

9 | 10 | 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "2.7" 5 | 6 | before_install: 7 | - sudo apt-get update -qq 8 | - sudo apt-get install -qq libxml2-dev libxslt1-dev 9 | 10 | install: 11 | - pip install -r requirements.txt --use-mirrors 12 | - pip install -r requirements_test.txt --use-mirrors 13 | 14 | script: 15 | - coverage run --source=haul setup.py test 16 | 17 | notifications: 18 | email: 19 | - vinta.chen@gmail.com 20 | 21 | after_success: 22 | - coveralls 23 | -------------------------------------------------------------------------------- /haul/compat.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """ 4 | haul.compat 5 | ~~~~~~~~~~~ 6 | 7 | This module contains imports and declarations for seamless Python 2 and 8 | Python 3 compatibility. 9 | """ 10 | 11 | import sys 12 | 13 | 14 | _version = sys.version_info 15 | 16 | is_py2 = (_version[0] == 2) 17 | is_py3 = (_version[0] == 3) 18 | 19 | if is_py2: 20 | from urlparse import urljoin, urlparse 21 | 22 | str = unicode 23 | 24 | elif is_py3: 25 | from urllib.parse import urljoin, urlparse 26 | 27 | str = str 28 | -------------------------------------------------------------------------------- /haul/utils.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import cStringIO 4 | import sys 5 | 6 | 7 | def import_module(name): 8 | __import__(name) 9 | 10 | return sys.modules[name] 11 | 12 | 13 | def module_member(name): 14 | mod, member = name.rsplit('.', 1) 15 | module = import_module(mod) 16 | 17 | return getattr(module, member) 18 | 19 | 20 | def read_file(path): 21 | with open (path, 'r') as f: 22 | content = f.read() 23 | 24 | return content 25 | 26 | 27 | def pack_image(self, content): 28 | string_io = cStringIO.StringIO(content) 29 | 30 | return string_io 31 | -------------------------------------------------------------------------------- /HISTORY.rst: -------------------------------------------------------------------------------- 1 | History 2 | ======= 3 | 4 | 1.3.2 (2013-11-05) 5 | ++++++++++++++++++ 6 | 7 | - Bug fixed: `#12 `_ 8 | - Support running tests with `python setup.py test` 9 | 10 | 11 | 1.3.1 (2013-10-24) 12 | ++++++++++++++++++ 13 | 14 | - Add `is_found` attribute for `HaulResult` 15 | - Add `to_ordered_dict()` method for `HaulResult` 16 | - `A demo site on Heroku `_ 17 | 18 | 19 | 1.3.0 (2013-10-16) 20 | ++++++++++++++++++ 21 | 22 | - Use unicode for every string 23 | - Fix running test.py from another directory 24 | - Rename module `models` to `core` 25 | - Remove in_ignorecase() 26 | 27 | 28 | 1.2.0 (2013-10-15) 29 | ++++++++++++++++++ 30 | 31 | - Improve error handling 32 | 33 | 34 | 1.1.0 (2013-10-04) 35 | ++++++++++++++++++ 36 | 37 | - Custom finder / extender pipeline support 38 | 39 | 40 | 1.0.0 (2013-10-03) 41 | ++++++++++++++++++ 42 | 43 | - Initial release 44 | -------------------------------------------------------------------------------- /haul/settings.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser 4 | DEFAULT_PARSER = 'lxml' 5 | 6 | ALLOWED_CONTENT_TYPES = [ 7 | 'text/html', 8 | 'image/', 9 | ] 10 | 11 | FINDER_PIPELINE = ( 12 | 'haul.finders.pipeline.html.img_src_finder', 13 | 'haul.finders.pipeline.html.a_href_finder', 14 | 'haul.finders.pipeline.css.background_image_finder', 15 | ) 16 | 17 | EXTENDER_PIPELINE = ( 18 | 'haul.extenders.pipeline.google.blogspot_s1600_extender', 19 | 'haul.extenders.pipeline.google.ggpht_s1600_extender', 20 | 'haul.extenders.pipeline.google.googleusercontent_s1600_extender', 21 | 'haul.extenders.pipeline.pinterest.original_image_extender', 22 | 'haul.extenders.pipeline.wordpress.original_image_extender', 23 | 'haul.extenders.pipeline.tumblr.media_1280_extender', 24 | 'haul.extenders.pipeline.tumblr.avatar_128_extender', 25 | ) 26 | 27 | SHOULD_JOIN_URL = True 28 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Vinta 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /haul/exceptions.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | """ 4 | haul.exceptions 5 | ~~~~~~~~~~~~~~~ 6 | 7 | This module contains the set of Haul's exceptions. 8 | """ 9 | 10 | 11 | class InvalidParameterError(Exception): 12 | """ 13 | Invalid Parameter 14 | """ 15 | 16 | def __init__(self, message): 17 | Exception.__init__(self) 18 | 19 | self.message = message 20 | 21 | def __repr__(self): 22 | return '' % (self.message) 23 | 24 | 25 | class RetrieveError(RuntimeError): 26 | """ 27 | Connection fail or HTTP status code >= 400 28 | """ 29 | 30 | def __init__(self, message): 31 | RuntimeError.__init__(self) 32 | 33 | self.message = message 34 | 35 | def __repr__(self): 36 | return '' % (self.message) 37 | 38 | 39 | class ContentTypeNotSupported(Exception): 40 | """ 41 | ref: settings.ALLOWED_CONTENT_TYPES 42 | """ 43 | 44 | def __init__(self, content_type): 45 | Exception.__init__(self) 46 | 47 | self.content_type = content_type 48 | 49 | def __repr__(self): 50 | return '' % (self.content_type) 51 | -------------------------------------------------------------------------------- /haul/extenders/pipeline/pinterest.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import re 4 | 5 | 6 | def original_image_extender(pipeline_index, 7 | finder_image_urls, 8 | extender_image_urls=[], 9 | *args, **kwargs): 10 | """ 11 | Example: 12 | http://media-cache-ec0.pinimg.com/70x/50/9b/bd/509bbd5c6543d473bc2b49befe75f4c6.jpg 13 | http://media-cache-ec0.pinimg.com/236x/50/9b/bd/509bbd5c6543d473bc2b49befe75f4c6.jpg 14 | http://media-cache-ec0.pinimg.com/736x/50/9b/bd/509bbd5c6543d473bc2b49befe75f4c6.jpg 15 | to 16 | http://media-cache-ec0.pinimg.com/originals/50/9b/bd/509bbd5c6543d473bc2b49befe75f4c6.jpg 17 | """ 18 | 19 | now_extender_image_urls = [] 20 | 21 | search_re = re.compile(r'.com/\d+x/', re.IGNORECASE) 22 | 23 | for image_url in finder_image_urls: 24 | if 'pinimg.com/' in image_url.lower(): 25 | if search_re.search(image_url): 26 | extender_image_url = search_re.sub('.com/originals/', image_url, count=1) 27 | now_extender_image_urls.append(extender_image_url) 28 | 29 | output = {} 30 | output['extender_image_urls'] = extender_image_urls + now_extender_image_urls 31 | 32 | return output 33 | -------------------------------------------------------------------------------- /haul/extenders/pipeline/wordpress.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import re 4 | 5 | 6 | def original_image_extender(pipeline_index, 7 | finder_image_urls, 8 | extender_image_urls=[], 9 | *args, **kwargs): 10 | """ 11 | Example: 12 | http://fashion-fever.nl/wp-content/upload/2013/09/DSC_0058-110x110.jpg 13 | http://www.wendyslookbook.com/wp-content/uploads/2013/09/Morning-Coffee-Run-7-433x650.jpg 14 | to 15 | http://fashion-fever.nl/wp-content/upload/2013/09/DSC_0058.jpg 16 | http://www.wendyslookbook.com/wp-content/uploads/2013/09/Morning-Coffee-Run-7.jpg 17 | """ 18 | 19 | now_extender_image_urls = [] 20 | 21 | check_re = re.compile(r'wp-content/uploads?/', re.IGNORECASE) 22 | search_re = re.compile(r'(\-\d+x\d+).', re.IGNORECASE) 23 | 24 | for image_url in finder_image_urls: 25 | if check_re.search(image_url): 26 | if search_re.search(image_url): 27 | extender_image_url = search_re.sub('.', image_url) 28 | now_extender_image_urls.append(extender_image_url) 29 | 30 | output = {} 31 | output['extender_image_urls'] = extender_image_urls + now_extender_image_urls 32 | 33 | return output 34 | -------------------------------------------------------------------------------- /tests/fixtures/fragment.html: -------------------------------------------------------------------------------- 1 |
2 | 3 |

4 | some image 5 | 6 |

7 |
8 | 9 | img with data-src 10 | 11 | 12 | 13 | unicode image url 14 | 15 | 16 |
17 |
18 | 35 | -------------------------------------------------------------------------------- /haul/finders/pipeline/html.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from ...compat import str 4 | 5 | 6 | def img_src_finder(pipeline_index, 7 | soup, 8 | finder_image_urls=[], 9 | *args, **kwargs): 10 | """ 11 | Find image URL in 's src attribute 12 | """ 13 | 14 | now_finder_image_urls = [] 15 | 16 | for img in soup.find_all('img'): 17 | src = img.get('src', None) 18 | if src: 19 | src = str(src) 20 | if (src not in finder_image_urls) and \ 21 | (src not in now_finder_image_urls): 22 | now_finder_image_urls.append(src) 23 | 24 | output = {} 25 | output['finder_image_urls'] = finder_image_urls + now_finder_image_urls 26 | 27 | return output 28 | 29 | 30 | def a_href_finder(pipeline_index, 31 | soup, 32 | finder_image_urls=[], 33 | *args, **kwargs): 34 | """ 35 | Find image URL in 's href attribute 36 | """ 37 | 38 | now_finder_image_urls = [] 39 | 40 | for a in soup.find_all('a'): 41 | href = a.get('href', None) 42 | if href: 43 | href = str(href) 44 | if filter(href.lower().endswith, ('.jpg', '.jpeg', '.gif', '.png')): 45 | if (href not in finder_image_urls) and \ 46 | (href not in now_finder_image_urls): 47 | now_finder_image_urls.append(href) 48 | 49 | output = {} 50 | output['finder_image_urls'] = finder_image_urls + now_finder_image_urls 51 | 52 | return output 53 | -------------------------------------------------------------------------------- /haul/finders/pipeline/css.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import cssutils 4 | 5 | 6 | def background_image_finder(pipeline_index, 7 | soup, 8 | finder_image_urls=[], 9 | *args, **kwargs): 10 | """ 11 | Find image URL in background-image 12 | 13 | Example: 14 |
15 | to 16 | http://distilleryimage10.ak.instagram.com/bde04558a43b11e28e5d22000a1f979a_7.jpg 17 | """ 18 | 19 | now_finder_image_urls = [] 20 | 21 | for tag in soup.find_all(style=True): 22 | style_string = tag['style'] 23 | if 'background-image' in style_string.lower(): 24 | style = cssutils.parseStyle(style_string) 25 | background_image = style.getProperty('background-image') 26 | if background_image: 27 | for property_value in background_image.propertyValue: 28 | background_image_url = str(property_value.value) 29 | if background_image_url: 30 | if (background_image_url not in finder_image_urls) and \ 31 | (background_image_url not in now_finder_image_urls): 32 | now_finder_image_urls.append(background_image_url) 33 | 34 | output = {} 35 | output['finder_image_urls'] = finder_image_urls + now_finder_image_urls 36 | 37 | return output 38 | -------------------------------------------------------------------------------- /tests/fixtures/page.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Hello 中文 6 | 7 | 8 | 9 | 10 |
11 | 12 |

13 | some image 14 | 15 |

16 |
17 | 18 | img with data-src 19 | 20 | 21 | 22 | unicode image url 23 | 24 | 25 |
26 |
27 |
44 |
45 | Created by Vinta 46 |
47 | 48 | 49 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import sys 5 | 6 | try: 7 | from setuptools import setup 8 | except ImportError: 9 | from distutils.core import setup 10 | 11 | 12 | if sys.argv[-1] == 'publish': 13 | os.system('python setup.py sdist upload') 14 | sys.exit() 15 | 16 | long_description = open('README.rst').read() + '\n\n' + open('HISTORY.rst').read() 17 | 18 | license = open('LICENSE').read() 19 | 20 | requirements_lines = [line.strip() for line in open('requirements.txt').readlines()] 21 | install_requires = list(filter(None, requirements_lines)) 22 | 23 | packages = [ 24 | 'haul', 25 | 'haul.finders', 26 | 'haul.finders.pipeline', 27 | 'haul.extenders', 28 | 'haul.extenders.pipeline', 29 | ] 30 | 31 | setup( 32 | name='haul', 33 | version='1.3.2', 34 | description='An Extensible Image Crawler', 35 | long_description=long_description, 36 | keywords='haul web image content scraper parser crawler', 37 | author='Vinta Chen', 38 | author_email='vinta.chen@gmail.com', 39 | url='https://github.com/vinta/Haul', 40 | license=license, 41 | install_requires=install_requires, 42 | include_package_data=True, 43 | packages=packages, 44 | test_suite='tests', 45 | zip_safe=False, 46 | classifiers=( 47 | 'Development Status :: 3 - Alpha', 48 | 'Environment :: Web Environment', 49 | 'Intended Audience :: Developers', 50 | 'License :: OSI Approved :: MIT License', 51 | 'Natural Language :: English', 52 | 'Natural Language :: Chinese (Traditional)', 53 | 'Operating System :: OS Independent', 54 | 'Programming Language :: Python', 55 | 'Programming Language :: Python :: 2', 56 | 'Programming Language :: Python :: 2.7', 57 | 'Topic :: Internet :: WWW/HTTP', 58 | 'Topic :: Internet :: WWW/HTTP :: Dynamic Content', 59 | 'Topic :: Multimedia :: Graphics', 60 | 'Topic :: Software Development :: Libraries :: Python Modules', 61 | 'Topic :: Utilities', 62 | ), 63 | ) 64 | -------------------------------------------------------------------------------- /haul/extenders/pipeline/tumblr.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import re 4 | 5 | 6 | def media_1280_extender(pipeline_index, 7 | finder_image_urls, 8 | extender_image_urls=[], 9 | *args, **kwargs): 10 | """ 11 | Example: 12 | http://25.media.tumblr.com/3f5f10d7216f1dd5eacb5eb3e302286a/tumblr_mtpcwdzKBT1qh9n5lo1_250.png 13 | http://25.media.tumblr.com/3f5f10d7216f1dd5eacb5eb3e302286a/tumblr_mtpcwdzKBT1qh9n5lo1_500.png 14 | to 15 | http://25.media.tumblr.com/3f5f10d7216f1dd5eacb5eb3e302286a/tumblr_mtpcwdzKBT1qh9n5lo1_1280.png 16 | """ 17 | 18 | now_extender_image_urls = [] 19 | 20 | search_re = re.compile(r'(tumblr_[a-zA-Z0-9_]+)_(\d+).', re.IGNORECASE) 21 | 22 | for image_url in finder_image_urls: 23 | if 'media.tumblr.com/' in image_url.lower(): 24 | if search_re.search(image_url): 25 | extender_image_url = search_re.sub(r'\1_1280.', image_url) 26 | now_extender_image_urls.append(extender_image_url) 27 | 28 | output = {} 29 | output['extender_image_urls'] = extender_image_urls + now_extender_image_urls 30 | 31 | return output 32 | 33 | 34 | def avatar_128_extender(pipeline_index, 35 | finder_image_urls, 36 | extender_image_urls=[], 37 | *args, **kwargs): 38 | """ 39 | Example: 40 | http://25.media.tumblr.com/avatar_2909d6610c26_16.png 41 | to 42 | http://25.media.tumblr.com/avatar_2909d6610c26_128.png 43 | """ 44 | 45 | now_extender_image_urls = [] 46 | 47 | search_re = re.compile(r'(avatar_[a-zA-Z0-9_]+)_(\d+).', re.IGNORECASE) 48 | 49 | for image_url in finder_image_urls: 50 | if 'media.tumblr.com/' in image_url.lower(): 51 | if search_re.search(image_url): 52 | extender_image_url = search_re.sub(r'\1_128.', image_url) 53 | now_extender_image_urls.append(extender_image_url) 54 | 55 | output = {} 56 | output['extender_image_urls'] = extender_image_urls + now_extender_image_urls 57 | 58 | return output 59 | -------------------------------------------------------------------------------- /haul/extenders/pipeline/google.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import re 4 | 5 | 6 | def blogspot_s1600_extender(pipeline_index, 7 | finder_image_urls, 8 | extender_image_urls=[], 9 | *args, **kwargs): 10 | """ 11 | Example: 12 | http://1.bp.blogspot.com/-S97wTYQKbrY/UkWukhKhTKI/AAAAAAAAJ0g/fcRDiqVC8Us/s898/aaPOP+001.jpg 13 | to 14 | http://1.bp.blogspot.com/-S97wTYQKbrY/UkWukhKhTKI/AAAAAAAAJ0g/fcRDiqVC8Us/s1600/aaPOP+001.jpg 15 | """ 16 | 17 | now_extender_image_urls = [] 18 | 19 | search_re = re.compile(r'/s\d+/', re.IGNORECASE) 20 | 21 | for image_url in finder_image_urls: 22 | if 'bp.blogspot.com/' in image_url.lower(): 23 | if search_re.search(image_url): 24 | extender_image_url = search_re.sub('/s1600/', image_url) 25 | now_extender_image_urls.append(extender_image_url) 26 | 27 | output = {} 28 | output['extender_image_urls'] = extender_image_urls + now_extender_image_urls 29 | 30 | return output 31 | 32 | 33 | def ggpht_s1600_extender(pipeline_index, 34 | finder_image_urls, 35 | extender_image_urls=[], 36 | *args, **kwargs): 37 | """ 38 | Example: 39 | http://lh4.ggpht.com/-fFi-qJRuxeY/UjwHSOTHGOI/AAAAAAAArgE/SWTMT-hXzB4/s640/Celeber-ru-Emma-Watson-Net-A-Porter-The-Edit-Magazine-Photoshoot-2013-01.jpg 40 | to 41 | http://lh4.ggpht.com/-fFi-qJRuxeY/UjwHSOTHGOI/AAAAAAAArgE/SWTMT-hXzB4/s1600/Celeber-ru-Emma-Watson-Net-A-Porter-The-Edit-Magazine-Photoshoot-2013-01.jpg 42 | """ 43 | 44 | now_extender_image_urls = [] 45 | 46 | search_re = re.compile(r'/s\d+/', re.IGNORECASE) 47 | 48 | for image_url in finder_image_urls: 49 | if 'ggpht.com/' in image_url.lower(): 50 | if search_re.search(image_url): 51 | extender_image_url = search_re.sub('/s1600/', image_url) 52 | now_extender_image_urls.append(extender_image_url) 53 | 54 | output = {} 55 | output['extender_image_urls'] = extender_image_urls + now_extender_image_urls 56 | 57 | return output 58 | 59 | 60 | def googleusercontent_s1600_extender(pipeline_index, 61 | finder_image_urls, 62 | extender_image_urls=[], 63 | *args, **kwargs): 64 | """ 65 | Example: 66 | https://lh6.googleusercontent.com/-T6V-utZHzbE/Ukjn-1MDOSI/AAAAAAAAA3g/H6Qcw1zt4n0/w555-h399-no/2101_aa2cac09d1c6431b8a635d61cd9c4471.jpg 67 | to 68 | https://lh6.googleusercontent.com/-T6V-utZHzbE/Ukjn-1MDOSI/AAAAAAAAA3g/H6Qcw1zt4n0/s1600/2101_aa2cac09d1c6431b8a635d61cd9c4471.jpg 69 | """ 70 | 71 | now_extender_image_urls = [] 72 | 73 | search_re = re.compile(r'/w\d+\-h\d+\-no/', re.IGNORECASE) 74 | 75 | for image_url in finder_image_urls: 76 | if 'googleusercontent.com/' in image_url.lower(): 77 | if search_re.search(image_url): 78 | extender_image_url = search_re.sub('/s1600/', image_url) 79 | now_extender_image_urls.append(extender_image_url) 80 | 81 | output = {} 82 | output['extender_image_urls'] = extender_image_urls + now_extender_image_urls 83 | 84 | return output 85 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Haul 2 | ==== 3 | 4 | .. image:: https://travis-ci.org/vinta/Haul.png?branch=master 5 | :alt: Build Badge 6 | :target: https://travis-ci.org/vinta/Haul 7 | 8 | .. image:: https://coveralls.io/repos/vinta/Haul/badge.png?branch=master 9 | :alt: Coverage Badge 10 | :target: https://coveralls.io/r/vinta/Haul 11 | 12 | .. image:: https://badge.fury.io/py/haul.png 13 | :alt: Version Badge 14 | :target: http://badge.fury.io/py/haul 15 | 16 | .. image:: https://d2weczhvl823v0.cloudfront.net/vinta/haul/trend.png 17 | :alt: Bitdeli Badge 18 | :target: https://bitdeli.com/free 19 | 20 | 21 | Find thumbnails and original images from URL or HTML file. 22 | 23 | Demo 24 | ==== 25 | 26 | `Hauler on Heroku `_ 27 | 28 | Installation 29 | ============ 30 | 31 | on Ubuntu 32 | 33 | .. code-block:: bash 34 | 35 | $ sudo apt-get install build-essential python-dev libxml2-dev libxslt1-dev 36 | $ pip install haul 37 | 38 | on Mac OS X 39 | 40 | .. code-block:: bash 41 | 42 | $ pip install haul 43 | 44 | Fail to install haul? `It is probably caused by lxml `_. 45 | 46 | Usage 47 | ===== 48 | 49 | Find images from ``img src``, ``a href`` and even ``background-image``: 50 | 51 | .. code-block:: python 52 | 53 | import haul 54 | 55 | url = 'http://gibuloto.tumblr.com/post/62525699435/fuck-yeah' 56 | result = haul.find_images(url) 57 | 58 | print(result.image_urls) 59 | """ 60 | output: 61 | [ 62 | 'http://25.media.tumblr.com/3f5f10d7216f1dd5eacb5eb3e302286a/tumblr_mtpcwdzKBT1qh9n5lo1_500.png', 63 | ... 64 | 'http://24.media.tumblr.com/avatar_a3a119b674e2_16.png', 65 | 'http://25.media.tumblr.com/avatar_9b04f54875e1_16.png', 66 | 'http://31.media.tumblr.com/avatar_0acf8f9b4380_16.png', 67 | ] 68 | """ 69 | 70 | Find original (or bigger size) images with ``extend=True``: 71 | 72 | .. code-block:: python 73 | 74 | import haul 75 | 76 | url = 'http://gibuloto.tumblr.com/post/62525699435/fuck-yeah' 77 | result = haul.find_images(url, extend=True) 78 | 79 | print(result.image_urls) 80 | """ 81 | output: 82 | [ 83 | 'http://25.media.tumblr.com/3f5f10d7216f1dd5eacb5eb3e302286a/tumblr_mtpcwdzKBT1qh9n5lo1_500.png', 84 | ... 85 | 'http://24.media.tumblr.com/avatar_a3a119b674e2_16.png', 86 | 'http://25.media.tumblr.com/avatar_9b04f54875e1_16.png', 87 | 'http://31.media.tumblr.com/avatar_0acf8f9b4380_16.png', 88 | # bigger size, extended from above urls 89 | 'http://25.media.tumblr.com/3f5f10d7216f1dd5eacb5eb3e302286a/tumblr_mtpcwdzKBT1qh9n5lo1_1280.png', 90 | ... 91 | 'http://24.media.tumblr.com/avatar_a3a119b674e2_128.png', 92 | 'http://25.media.tumblr.com/avatar_9b04f54875e1_128.png', 93 | 'http://31.media.tumblr.com/avatar_0acf8f9b4380_128.png', 94 | ] 95 | """ 96 | 97 | Advanced Usage 98 | ============== 99 | 100 | Custom finder / extender pipeline: 101 | 102 | .. code-block:: python 103 | 104 | from haul import Haul 105 | from haul.compat import str 106 | 107 | 108 | def img_data_src_finder(pipeline_index, 109 | soup, 110 | finder_image_urls=[], 111 | *args, **kwargs): 112 | """ 113 | Find image URL in 's data-src attribute 114 | """ 115 | 116 | now_finder_image_urls = [] 117 | 118 | for img in soup.find_all('img'): 119 | src = img.get('data-src', None) 120 | if src: 121 | src = str(src) 122 | now_finder_image_urls.append(src) 123 | 124 | output = {} 125 | output['finder_image_urls'] = finder_image_urls + now_finder_image_urls 126 | 127 | return output 128 | 129 | MY_FINDER_PIPELINE = ( 130 | 'haul.finders.pipeline.html.img_src_finder', 131 | 'haul.finders.pipeline.css.background_image_finder', 132 | img_data_src_finder, 133 | ) 134 | 135 | GOOGLE_SITES_EXTENDER_PIEPLINE = ( 136 | 'haul.extenders.pipeline.google.blogspot_s1600_extender', 137 | 'haul.extenders.pipeline.google.ggpht_s1600_extender', 138 | 'haul.extenders.pipeline.google.googleusercontent_s1600_extender', 139 | ) 140 | 141 | url = 'http://fashion-fever.nl/dressing-up/' 142 | h = Haul(parser='lxml', 143 | finder_pipeline=MY_FINDER_PIPELINE, 144 | extender_pipeline=GOOGLE_SITES_EXTENDER_PIEPLINE) 145 | result = h.find_images(url, extend=True) 146 | 147 | Run Tests 148 | ========= 149 | 150 | .. code-block:: bash 151 | 152 | $ python setup.py test 153 | -------------------------------------------------------------------------------- /haul/core.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | from collections import OrderedDict 4 | import mimetypes 5 | import re 6 | 7 | from bs4 import BeautifulSoup 8 | import requests 9 | 10 | from . import exceptions, settings, utils 11 | 12 | 13 | simple_url_re = re.compile(r'^https?://\[?\w', re.IGNORECASE) 14 | 15 | 16 | class Haul(object): 17 | """ 18 | Haul 19 | """ 20 | 21 | def __init__(self, 22 | parser=settings.DEFAULT_PARSER, 23 | finder_pipeline=settings.FINDER_PIPELINE, 24 | extender_pipeline=settings.EXTENDER_PIPELINE): 25 | 26 | self.parser = parser 27 | self.finder_pipeline = finder_pipeline 28 | self.extender_pipeline = extender_pipeline 29 | 30 | self.response = None # via Requests 31 | self.soup = None # via BeautifulSoup 32 | 33 | self._result = None 34 | 35 | def __repr__(self): 36 | return '' % (self.parser) 37 | 38 | @property 39 | def result(self): 40 | if not isinstance(self._result, HaulResult): 41 | self._result = HaulResult() 42 | 43 | return self._result 44 | 45 | def retrieve_url(self, url): 46 | """ 47 | Use requests to fetch remote content 48 | """ 49 | 50 | try: 51 | r = requests.get(url) 52 | except requests.ConnectionError: 53 | raise exceptions.RetrieveError('Connection fail') 54 | 55 | if r.status_code >= 400: 56 | raise exceptions.RetrieveError('Connected, but status code is %s' % (r.status_code)) 57 | 58 | real_url = r.url 59 | content = r.content 60 | 61 | try: 62 | content_type = r.headers['Content-Type'] 63 | except KeyError: 64 | content_type, encoding = mimetypes.guess_type(real_url, strict=False) 65 | 66 | self.response = r 67 | 68 | return content_type.lower(), content 69 | 70 | def parse_html(self, html): 71 | """ 72 | Use BeautifulSoup to parse HTML / XML 73 | http://www.crummy.com/software/BeautifulSoup/bs4/doc/#specifying-the-parser-to-use 74 | """ 75 | 76 | soup = BeautifulSoup(html, self.parser) 77 | 78 | title_tag = soup.find('title') 79 | self.result.title = title_tag.string if title_tag else None 80 | 81 | self.soup = soup 82 | 83 | return soup 84 | 85 | def start_finder_pipeline(self, *args, **kwargs): 86 | pipeline_input = { 87 | 'soup': self.soup, 88 | } 89 | pipeline_output = pipeline_input.copy() 90 | 91 | for idx, name in enumerate(self.finder_pipeline): 92 | pipeline_output['pipeline_index'] = idx 93 | pipeline_output['pipeline_break'] = False 94 | 95 | if hasattr(name, '__call__'): 96 | finder_func = name 97 | else: 98 | finder_func = utils.module_member(name) 99 | 100 | output = finder_func(*args, **pipeline_output) 101 | 102 | if isinstance(output, dict): 103 | pipeline_output.update(output) 104 | 105 | if pipeline_output['pipeline_break']: 106 | break 107 | 108 | # remove unnecessary items 109 | pipeline_output.pop('pipeline_index', None) 110 | pipeline_output.pop('pipeline_break', None) 111 | pipeline_output.pop('soup', None) 112 | 113 | self.result.finder_image_urls = pipeline_output.get('finder_image_urls', []) 114 | 115 | return self.result 116 | 117 | def start_extender_pipeline(self, *args, **kwargs): 118 | pipeline_input = { 119 | 'finder_image_urls': self.result.finder_image_urls, 120 | } 121 | pipeline_output = pipeline_input.copy() 122 | 123 | for idx, name in enumerate(self.extender_pipeline): 124 | pipeline_output['pipeline_index'] = idx 125 | pipeline_output['pipeline_break'] = False 126 | 127 | if hasattr(name, '__call__'): 128 | extender_func = name 129 | else: 130 | extender_func = utils.module_member(name) 131 | 132 | output = extender_func(*args, **pipeline_output) 133 | 134 | if isinstance(output, dict): 135 | pipeline_output.update(output) 136 | 137 | if pipeline_output['pipeline_break']: 138 | break 139 | 140 | # remove unnecessary items 141 | pipeline_output.pop('pipeline_index', None) 142 | pipeline_output.pop('pipeline_break', None) 143 | pipeline_output.pop('finder_image_urls', None) 144 | 145 | self.result.extender_image_urls = pipeline_output.get('extender_image_urls', []) 146 | 147 | return self.result 148 | 149 | # API 150 | def find_images(self, url_or_html, extend=False): 151 | url = None 152 | content = None 153 | 154 | try: 155 | is_url = simple_url_re.match(url_or_html) 156 | except TypeError: 157 | raise exceptions.InvalidParameterError('Should be a URL or HTML text') 158 | 159 | if is_url: 160 | url = url_or_html 161 | content_type, content = self.retrieve_url(url) 162 | else: 163 | content_type = 'text/html' 164 | content = url_or_html 165 | 166 | self.result.url = url 167 | self.result.content_type = content_type 168 | 169 | if 'text/html' in content_type: 170 | self.parse_html(content) 171 | 172 | self.start_finder_pipeline() 173 | 174 | if extend: 175 | self.start_extender_pipeline() 176 | 177 | elif 'image/' in content_type: 178 | self.result.finder_image_urls = [str(self.response.url), ] 179 | 180 | if extend: 181 | self.start_extender_pipeline() 182 | 183 | else: 184 | raise exceptions.ContentTypeNotSupported(content_type) 185 | 186 | return self.result 187 | 188 | 189 | class HaulResult(object): 190 | """ 191 | Result of Haul 192 | """ 193 | 194 | def __init__(self): 195 | self.url = None 196 | self.content_type = None 197 | self.title = None 198 | self.finder_image_urls = [] 199 | self.extender_image_urls = [] 200 | 201 | def __repr__(self): 202 | return '' % (self.content_type) 203 | 204 | @property 205 | def is_found(self): 206 | return True if len(self.finder_image_urls) > 0 else False 207 | 208 | @property 209 | def image_urls(self): 210 | """ 211 | Combine finder_image_urls and extender_image_urls, 212 | remove duplicate but keep order 213 | """ 214 | 215 | all_image_urls = self.finder_image_urls[:] 216 | for image_url in self.extender_image_urls: 217 | if image_url not in all_image_urls: 218 | all_image_urls.append(image_url) 219 | 220 | return all_image_urls 221 | 222 | def to_dict(self): 223 | return self.__dict__ 224 | 225 | def to_ordered_dict(self): 226 | order_keys = ( 227 | 'url', 228 | 'content_type', 229 | 'title', 230 | 'finder_image_urls', 231 | 'extender_image_urls', 232 | ) 233 | 234 | d = OrderedDict() 235 | for key in order_keys: 236 | d[key] = getattr(self, key) 237 | 238 | return d 239 | -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | 3 | import os 4 | import unittest 5 | 6 | from haul import Haul, HaulResult, exceptions 7 | from haul.utils import read_file 8 | 9 | 10 | TESTS_DIR = os.path.abspath(os.path.join(__file__, '../')) 11 | 12 | 13 | class HaulBaseTestCase(unittest.TestCase): 14 | 15 | def setUp(self): 16 | self.complete_html = read_file(os.path.join(TESTS_DIR, 'fixtures/page.html')) 17 | self.no_image_html = read_file(os.path.join(TESTS_DIR, 'fixtures/no_image_page.html')) 18 | self.fragmented_html = read_file(os.path.join(TESTS_DIR, 'fixtures/fragment.html')) 19 | 20 | self.blogspot_html = read_file(os.path.join(TESTS_DIR, 'fixtures/blogspot.html')) 21 | self.tumblr_html = read_file(os.path.join(TESTS_DIR, 'fixtures/tumblr.html')) 22 | self.wordpress_html = read_file(os.path.join(TESTS_DIR, 'fixtures/wordpress.html')) 23 | 24 | self.webpage_url = 'http://vinta.ws/blog/529' 25 | 26 | self.image_url = 'http://files.heelsfetishism.com/media/heels/2013/09/01/16576_3ce9d1b8c1744319837bab454ed10f0d.jpg' 27 | self.image_url_with_querysting = 'http://files.heelsfetishism.com/media/heels/2013/08/20/2070_566cf1cd44fd4692aa6cca9b3408a97d.jpg?q=test' 28 | 29 | self.amazon_url = 'http://www.amazon.com/dp/B00C67CRRE/' 30 | self.blogspot_url = 'http://atlantic-pacific.blogspot.tw/2013/09/formulaic-dressing.html' 31 | self.blogspot_image_url = 'http://1.bp.blogspot.com/-S97wTYQKbrY/UkWukhKhTKI/AAAAAAAAJ0g/fcRDiqVC8Us/s898/aaPOP+001.jpg' 32 | self.fancy_url = 'http://fancy.com/things/307759676836021549/Patent-Leather-Heels-by-Jimmy-Choo' 33 | self.flickr_url = 'http://www.flickr.com/photos/blurri/4997760101/' 34 | self.instagram_url = 'http://instagram.com/p/YC9A5JQdrS/' 35 | self.pinterest_url = 'http://www.pinterest.com/pin/237987161531161351/' 36 | self.pinterest_image_url = 'http://media-cache-ec0.pinimg.com/736x/50/9b/bd/509bbd5c6543d473bc2b49befe75f4c6.jpg' 37 | self.tumblr_url = 'http://gibuloto.tumblr.com/post/62525699435/fuck-yeah' 38 | self.tumblr_image_url = 'http://31.media.tumblr.com/e199758fc69df7554e64772e970b4fe0/tumblr_ms446vqoA21qbrjcdo1_500.jpg' 39 | self.wordpress_url = 'http://www.wendyslookbook.com/2013/09/morning-coffee-run-tweed-jacket-watermark-plaid/' 40 | self.wordpress_image_url = 'http://www.wendyslookbook.com/wp-content/uploads/2013/09/Morning-Coffee-Run-7-433x650.jpg' 41 | 42 | self.not_exist_url = 'http://domain-not-exist-123.com/' 43 | self.broken_url = 'http://heelsfetishism.com/404/not/found/' 44 | self.not_supported_url = 'https://www.youtube.com/audiolibrary_download?vid=463864fcafcbc5bc' 45 | 46 | 47 | class HaulResultTestCase(HaulBaseTestCase): 48 | 49 | def setUp(self): 50 | super(HaulResultTestCase, self).setUp() 51 | 52 | def test_is_found_true(self): 53 | h = Haul() 54 | hr = h.find_images(self.complete_html) 55 | 56 | self.assertTrue(hr.is_found) 57 | 58 | def test_is_found_false(self): 59 | h = Haul() 60 | hr = h.find_images(self.no_image_html) 61 | 62 | self.assertFalse(hr.is_found) 63 | 64 | 65 | class FindImagesFromHTMLTestCase(HaulBaseTestCase): 66 | 67 | def setUp(self): 68 | super(FindImagesFromHTMLTestCase, self).setUp() 69 | 70 | def test_find_html_document(self): 71 | h = Haul() 72 | hr = h.find_images(self.complete_html) 73 | 74 | self.assertIsInstance(hr, HaulResult) 75 | 76 | image_urls = hr.image_urls 77 | image_urls_count = len(image_urls) 78 | self.assertEqual(image_urls_count, 6) 79 | 80 | def test_find_html_fragment(self): 81 | h = Haul() 82 | hr = h.find_images(self.fragmented_html) 83 | 84 | self.assertIsInstance(hr, HaulResult) 85 | 86 | image_urls = hr.image_urls 87 | image_urls_count = len(image_urls) 88 | self.assertEqual(image_urls_count, 6) 89 | 90 | 91 | class FindImagesFromURLTestCase(HaulBaseTestCase): 92 | 93 | def setUp(self): 94 | super(FindImagesFromURLTestCase, self).setUp() 95 | 96 | def test_find_html_url(self): 97 | h = Haul() 98 | hr = h.find_images(self.webpage_url) 99 | 100 | self.assertIsInstance(hr, HaulResult) 101 | self.assertIn('text/html', hr.content_type) 102 | 103 | def test_fancy_url(self): 104 | h = Haul() 105 | hr = h.find_images(self.fancy_url) 106 | 107 | self.assertIsInstance(hr, HaulResult) 108 | self.assertIn('text/html', hr.content_type) 109 | 110 | def test_find_image_url(self): 111 | h = Haul() 112 | hr = h.find_images(self.image_url) 113 | 114 | self.assertIsInstance(hr, HaulResult) 115 | self.assertIn('image/', hr.content_type) 116 | 117 | 118 | class ExtenderPipelineTestCase(HaulBaseTestCase): 119 | 120 | def setUp(self): 121 | super(ExtenderPipelineTestCase, self).setUp() 122 | 123 | def test_blogspot(self): 124 | h = Haul() 125 | hr = h.find_images(self.blogspot_html, extend=True) 126 | 127 | self.assertIsInstance(hr, HaulResult) 128 | self.assertIn('text/html', hr.content_type) 129 | 130 | def test_tumblr(self): 131 | h = Haul() 132 | hr = h.find_images(self.tumblr_html, extend=True) 133 | 134 | self.assertIsInstance(hr, HaulResult) 135 | self.assertIn('text/html', hr.content_type) 136 | 137 | def test_pinterest_image_url(self): 138 | h = Haul() 139 | hr = h.find_images(self.pinterest_image_url, extend=True) 140 | 141 | self.assertIsInstance(hr, HaulResult) 142 | self.assertIn('image/', hr.content_type) 143 | 144 | image_urls = hr.image_urls 145 | image_urls_count = len(image_urls) 146 | self.assertEqual(image_urls_count, 2) 147 | 148 | def test_tumblr_image_url(self): 149 | h = Haul() 150 | hr = h.find_images(self.tumblr_image_url, extend=True) 151 | 152 | self.assertIsInstance(hr, HaulResult) 153 | self.assertIn('image/', hr.content_type) 154 | 155 | image_urls = hr.image_urls 156 | image_urls_count = len(image_urls) 157 | self.assertEqual(image_urls_count, 2) 158 | 159 | def test_wordpress(self): 160 | h = Haul() 161 | hr = h.find_images(self.wordpress_html, extend=True) 162 | 163 | self.assertIsInstance(hr, HaulResult) 164 | self.assertIn('text/html', hr.content_type) 165 | 166 | 167 | class CustomFinderPipelineTestCase(HaulBaseTestCase): 168 | 169 | def setUp(self): 170 | super(CustomFinderPipelineTestCase, self).setUp() 171 | 172 | def test_find_html_document(self): 173 | from haul.compat import str 174 | 175 | def img_data_src_finder(pipeline_index, 176 | soup, 177 | finder_image_urls=[], 178 | *args, **kwargs): 179 | """ 180 | Find image URL in 's data-src attribute 181 | """ 182 | 183 | now_finder_image_urls = [] 184 | 185 | for img in soup.find_all('img'): 186 | src = img.get('data-src', None) 187 | if src: 188 | src = str(src) 189 | now_finder_image_urls.append(src) 190 | 191 | output = {} 192 | output['finder_image_urls'] = finder_image_urls + now_finder_image_urls 193 | 194 | return output 195 | 196 | FINDER_PIPELINE = ( 197 | 'haul.finders.pipeline.html.img_src_finder', 198 | 'haul.finders.pipeline.html.a_href_finder', 199 | 'haul.finders.pipeline.css.background_image_finder', 200 | img_data_src_finder, 201 | ) 202 | 203 | h = Haul(finder_pipeline=FINDER_PIPELINE) 204 | hr = h.find_images(self.complete_html) 205 | 206 | self.assertIsInstance(hr, HaulResult) 207 | 208 | test_image_url = 'http://files.heelsfetishism.com/media/heels/2013/10/03/18099_307a62430fa045cc9b2124d16de63f33.jpg' 209 | self.assertIn(test_image_url, hr.finder_image_urls) 210 | 211 | image_urls = hr.image_urls 212 | image_urls_count = len(image_urls) 213 | self.assertEqual(image_urls_count, 7) 214 | 215 | 216 | class ExceptionsTestCase(HaulBaseTestCase): 217 | 218 | def setUp(self): 219 | super(ExceptionsTestCase, self).setUp() 220 | 221 | def test_invalid_parameter_error(self): 222 | h = Haul() 223 | 224 | with self.assertRaises(exceptions.InvalidParameterError): 225 | url_or_html = None 226 | h.find_images(url_or_html) 227 | 228 | def test_retrieve_error(self): 229 | h = Haul() 230 | 231 | with self.assertRaises(exceptions.RetrieveError): 232 | h.find_images(self.not_exist_url) 233 | 234 | with self.assertRaises(exceptions.RetrieveError): 235 | h.find_images(self.broken_url) 236 | 237 | def test_content_type_not_supported(self): 238 | h = Haul() 239 | 240 | with self.assertRaises(exceptions.ContentTypeNotSupported): 241 | h.find_images(self.not_supported_url) 242 | 243 | 244 | if __name__ == '__main__': 245 | unittest.main() 246 | -------------------------------------------------------------------------------- /tests/fixtures/tumblr.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Dirty Protocol 8 | 9 | 10 | 11 | 12 | 13 | 14 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 |
94 |
95 |

Dirty Protocol

96 |

97 | 98 |

99 |

100 |
101 |
102 | 103 | 104 |
105 |
106 | September 28, 2013 at 1:43pm
107 | 108 | 109 | 110 | 111 | Home
112 | 113 | Reblogged from the-streetstyle
114 |
115 | 116 | 117 | 118 | FUCK YEAH 119 |

FUCK YEAH

(via patentleather)

120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 |
132 | 133 | 134 | 135 | 136 |
137 |

Notes

138 |

139 | 140 |

    141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 |
  1. 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | gibuloto reblogged this from patentleather and added: 157 |
    158 | 159 |
    160 | 161 | FUCK YEAH 162 |
    163 | 164 | 165 |
  2. 166 | 167 | 168 | 169 | 170 | 171 | 183 | 184 | 185 | 186 | 187 | 188 | 200 | 201 | 202 | 203 | 204 | 205 | 217 | 218 | 219 | 220 | 221 | 222 |
  3. 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 5kyscrap3r reblogged this from perksofbeingyanelfi 231 |
    232 | 233 | 234 | 235 |
  4. 236 | 237 | 238 | 239 | 240 | 241 |
  5. 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | perksofbeingyanelfi reblogged this from the-streetstyle 250 |
    251 | 252 | 253 | 254 |
  6. 255 | 256 | 257 | 258 | 259 | 260 | 272 | 273 | 274 | 275 | 276 | 277 | 289 | 290 | 291 | 292 | 293 | 294 |
  7. 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | oscarsd15 reblogged this from girlwear 303 |
    304 | 305 | 306 | 307 |
  8. 308 | 309 | 310 | 311 | 312 | 313 |
  9. 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | calligraphicsunflowers reblogged this from the-streetstyle 322 |
    323 | 324 | 325 | 326 |
  10. 327 | 328 | 329 | 330 | 331 | 332 |
  11. 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | la-vie-est-belle-xx reblogged this from the-streetstyle 341 |
    342 | 343 | 344 | 345 |
  12. 346 | 347 | 348 | 349 | 350 | 351 | 363 | 364 | 365 | 366 | 367 | 368 |
  13. 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | divinehumans reblogged this from busstopgirl 377 |
    378 | 379 | 380 | 381 |
  14. 382 | 383 | 384 | 385 | 386 | 387 |
  15. 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | flowercrowns-and-sunglasses reblogged this from infinite-momennts 396 |
    397 | 398 | 399 | 400 |
  16. 401 | 402 | 403 | 404 | 405 | 406 |
  17. 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | busstopgirl reblogged this from the-streetstyle 415 |
    416 | 417 | 418 | 419 |
  18. 420 | 421 | 422 | 423 | 424 | 425 |
  19. 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | infinite-momennts reblogged this from indodreamcatcher 434 |
    435 | 436 | 437 | 438 |
  20. 439 | 440 | 441 | 442 | 443 | 444 |
  21. 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | indodreamcatcher reblogged this from the-streetstyle 453 |
    454 | 455 | 456 | 457 |
  22. 458 | 459 | 460 | 461 | 462 | 463 | 475 | 476 | 477 | 478 | 479 | 480 | 492 | 493 | 494 | 495 | 496 | 497 |
  23. 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | we-are-enchanted reblogged this from the-streetstyle 506 |
    507 | 508 | 509 | 510 |
  24. 511 | 512 | 513 | 514 | 515 | 516 |
  25. 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | crazygirl-25 reblogged this from the-streetstyle 525 |
    526 | 527 | 528 | 529 |
  26. 530 | 531 | 532 | 533 | 534 | 535 | 547 | 548 | 549 | 550 | 551 | 552 | 564 | 565 | 566 | 567 | 568 | 569 | 581 | 582 | 583 | 584 | 585 | 586 |
  27. 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | cy121 reblogged this from the-streetstyle and added: 595 |
    596 | 597 |
    598 | 599 | Street chic 600 |
    601 | 602 | 603 |
  28. 604 | 605 | 606 | 607 | 608 | 609 |
  29. 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | karennnkim reblogged this from the-streetstyle 618 |
    619 | 620 | 621 | 622 |
  30. 623 | 624 | 625 | 626 | 627 | 628 |
  31. 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | wlgkei reblogged this from the-streetstyle 637 |
    638 | 639 | 640 | 641 |
  32. 642 | 643 | 644 | 645 | 646 | 647 |
  33. 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | cuddlybooboo reblogged this from the-streetstyle 656 |
    657 | 658 | 659 | 660 |
  34. 661 | 662 | 663 | 664 | 665 | 666 |
  35. 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | babesandi reblogged this from the-streetstyle 675 |
    676 | 677 | 678 | 679 |
  36. 680 | 681 | 682 | 683 | 684 | 685 | 697 | 698 | 699 | 700 | 701 | 702 |
  37. 703 | 704 | 705 | 706 | 707 | 708 | 709 | 710 | vier-kleeblatt reblogged this from justnin-i 711 |
    712 | 713 | 714 | 715 |
  38. 716 | 717 | 718 | 719 | 720 | 721 | 733 | 734 | 735 | 736 | 737 | 738 | 750 | 751 | 752 | 753 | 754 | 755 |
  39. 756 | 757 | 758 | 759 | 760 | 761 | 762 | 763 | damaryslujan reblogged this from the-streetstyle 764 |
    765 | 766 | 767 | 768 |
  40. 769 | 770 | 771 | 772 | 773 | 774 |
  41. 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | amanda-boie reblogged this from faithhealthlife 783 |
    784 | 785 | 786 | 787 |
  42. 788 | 789 | 790 | 791 | 792 | 793 |
  43. 794 | 795 | 796 | 797 | 798 | 799 | 800 | 801 | justnin-i reblogged this from faithhealthlife 802 |
    803 | 804 | 805 | 806 |
  44. 807 | 808 | 809 | 810 | 811 | 812 |
  45. 813 | 814 | 815 | 816 | 817 | 818 | 819 | 820 | nainpreetode reblogged this from faithhealthlife 821 |
    822 | 823 | 824 | 825 |
  46. 826 | 827 | 828 | 829 | 830 | 831 | 843 | 844 | 845 | 846 | 847 | 848 | 860 | 861 | 862 | 863 | 864 | 865 |
  47. 866 | 867 | 868 | 869 | 870 | 871 | 872 | 873 | heartwhatheart reblogged this from the-streetstyle 874 |
    875 | 876 | 877 | 878 |
  48. 879 | 880 | 881 | 882 | 883 | 884 |
  49. 885 | 886 | 887 | 888 | 889 | 890 | 891 | 892 | rendezvouswithperfection reblogged this from faithhealthlife 893 |
    894 | 895 | 896 | 897 |
  50. 898 | 899 | 900 | 901 | 902 | 903 | 915 | 916 | 917 | 918 | 919 | 920 |
  51. 921 | 922 | 923 | 924 | 925 | 926 | 927 | 928 | no0neisperfect reblogged this from faithhealthlife 929 |
    930 | 931 | 932 | 933 |
  52. 934 | 935 | 936 | 937 | 938 | 939 |
  53. 940 | 941 | 942 | 943 | 944 | 945 | 946 | 947 | truth-booth reblogged this from faithhealthlife 948 |
    949 | 950 | 951 | 952 |
  54. 953 | 954 | 955 | 956 | 957 | 958 | 970 | 971 | 972 | 973 | 974 | 975 |
  55. 976 | 977 | 978 | 979 | 980 | 981 | 982 | 983 | jay-nana reblogged this from faithhealthlife 984 |
    985 | 986 | 987 | 988 |
  56. 989 | 990 | 991 | 992 | 993 | 994 |
  57. 995 | 996 | 997 | 998 | 999 | 1000 | 1001 | 1002 | resolutionobsession reblogged this from faithhealthlife 1003 |
    1004 | 1005 | 1006 | 1007 |
  58. 1008 | 1009 | 1010 | 1011 | 1012 | 1013 |
  59. 1014 | 1015 | 1016 | 1017 | 1018 | 1019 | 1020 | 1021 | haveheartandhealth reblogged this from faithhealthlife 1022 |
    1023 | 1024 | 1025 | 1026 |
  60. 1027 | 1028 | 1029 | 1030 | 1031 | 1032 |
  61. 1033 | 1034 | 1035 | 1036 | 1037 | 1038 | 1039 | 1040 | rayham reblogged this from faithhealthlife 1041 |
    1042 | 1043 | 1044 | 1045 |
  62. 1046 | 1047 | 1048 | 1049 | 1050 | 1051 | 1063 | 1064 | 1068 | 1069 | 1070 |
1071 |

1072 |
1073 | 1074 | 1075 | 1076 | 1077 | 1078 | 1079 | 1080 | 1081 | 1082 | 1083 | 1086 | 1087 | 1088 | 1089 |
1090 |
1091 | 1096 |
1097 |
1098 | 1101 |
1102 |
1103 | 1104 | 1105 | 1106 | 1107 | 1108 | 1109 | 1110 | 1111 | 1130 | 1131 | 1132 | 1133 | 1134 | 1135 | 1136 | 1143 | 1144 | 1145 | 1146 | 1159 | 1162 | 1165 | 1166 | 1167 | 1168 | 1169 | 1170 | --------------------------------------------------------------------------------