├── .coveragerc
├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .isort.cfg
├── .vscode
    └── settings.json
├── AUTHORS.rst
├── HISTORY.rst
├── LICENSE
├── MANIFEST.in
├── README.rst
├── docs
    ├── Makefile
    ├── api.rst
    ├── conf.py
    ├── index.rst
    ├── make.bat
    └── usage
    │   ├── advanced_usage.rst
    │   ├── install.rst
    │   └── starting_out.rst
├── lassie
    ├── __init__.py
    ├── api.py
    ├── compat.py
    ├── core.py
    ├── exceptions.py
    ├── filters
    │   ├── __init__.py
    │   ├── apple.py
    │   ├── generic.py
    │   ├── oembed
    │   │   ├── __init__.py
    │   │   └── providers.py
    │   └── social.py
    └── utils.py
├── pyproject.toml
├── requirements.txt
├── setup.py
├── test_requirements.txt
└── tests
    ├── __init__.py
    ├── base.py
    ├── json
        └── youtube
        │   ├── bad_html.json
        │   ├── good.json
        │   ├── no_thumb.json
        │   └── no_type.json
    ├── oembed
        ├── __init__.py
        └── test_youtube.py
    ├── templates
        ├── amp
        │   ├── all_properties.html
        │   ├── bad_json.html
        │   ├── list_image.html
        │   ├── list_image_empty.html
        │   ├── list_image_list.html
        │   ├── list_image_list_str.html
        │   ├── list_image_str.html
        │   ├── list_json.html
        │   ├── list_thumbnail_image.html
        │   ├── str_image.html
        │   ├── str_thumbnail_image.html
        │   ├── thumbnail_image.html
        │   └── video_objects.html
        ├── core
        │   ├── bad_image_dimensions.html
        │   ├── bad_keywords.html
        │   ├── class_setting_is_none.html
        │   ├── class_vs_method_settings.html
        │   ├── empty.html
        │   ├── image_dimensions.html
        │   ├── no_html_tag.html
        │   └── retrieve_all_images.html
        ├── generic
        │   ├── all_properties.html
        │   ├── bad_locale.html
        │   ├── canonical.html
        │   ├── favicon.html
        │   └── no_title.html
        ├── handle_file_content
        │   └── image_file.jpg
        ├── open_graph
        │   ├── all_properties.html
        │   ├── no_og_title_no_og_url.html
        │   ├── og_image_plus_two_body_images.html
        │   └── og_image_relative_url.html
        └── twitter_card
        │   ├── all_properties.html
        │   └── no_og_title_use_twitter_title.html
    ├── test_amp.py
    ├── test_core.py
    ├── test_generic.py
    ├── test_handle_file_content.py
    ├── test_open_graph.py
    └── test_twitter_card.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | omit = ../lassie/compat.py
3 | 
4 | [report]
5 | exclude_lines =
6 |     pragma: no cover
7 | 
8 |     def __repr__
9 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   release:
 6 |     types:
 7 |       - published
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ubuntu-latest
12 |     strategy:
13 |       matrix:
14 |         python-version: [2.7, 3.8]
15 | 
16 |     steps:
17 |       - uses: actions/checkout@v2
18 | 
19 |       - name: Set up Python ${{ matrix.python-version }}
20 |         uses: actions/setup-python@v2
21 |         with:
22 |           python-version: ${{ matrix.python-version }}
23 | 
24 |       - name: Install Dependencies
25 |         run: |
26 |           python -m pip install --upgrade pip
27 |           pip install -r test_requirements.txt
28 | 
29 |       - name: Test
30 |         run: nosetests -v -w tests/ --logging-filter="lassie" --with-cov --cov lassie --cov-config .coveragerc --cov-report term-missing
31 |   publish:
32 |     needs: [test]
33 |     if: github.event_name == 'release' && github.event.release.target_commitish == 'main'
34 |     runs-on: ubuntu-18.04
35 |     steps:
36 |       - uses: actions/checkout@v2
37 | 
38 |       - uses: actions/setup-python@v2
39 |         with:
40 |           python-version: 3.7
41 | 
42 |       - name: Build binary wheel and a source tarball
43 |         run: python setup.py sdist
44 | 
45 |       - name: Publish 📦 to PyPI
46 |         uses: pypa/gh-action-pypi-publish@master
47 |         with:
48 |           password: ${{ secrets.PYPI_API_TOKEN }}
49 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[cod]
 2 | 
 3 | # C extensions
 4 | *.so
 5 | 
 6 | # Packages
 7 | *.egg
 8 | *.egg-info
 9 | dist
10 | build
11 | eggs
12 | parts
13 | bin
14 | var
15 | sdist
16 | develop-eggs
17 | .installed.cfg
18 | lib
19 | lib64
20 | __pycache__
21 | 
22 | # Installer logs
23 | pip-log.txt
24 | 
25 | # Unit test / coverage reports
26 | .coverage
27 | .tox
28 | nosetests.xml
29 | 
30 | # Translations
31 | *.mo
32 | 
33 | # Mr Developer
34 | .mr.developer.cfg
35 | .project
36 | .pydevproject
37 | 
38 | .DS_STORE
39 | test.py
40 | docs/_build
41 | 
42 | .venv
43 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
 1 | [settings]
 2 | line_length=99
 3 | multi_line_output=5
 4 | include_trailing_comma=True
 5 | known_future_library=future,pies
 6 | known_standard_library=std,std2
 7 | known_first_party=lassie
 8 | default_section=THIRDPARTY
 9 | indent='    '
10 | sections=FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER
11 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "yaml.customTags": [
 3 |         "!And",
 4 |         "!And sequence",
 5 |         "!If",
 6 |         "!If sequence",
 7 |         "!Not",
 8 |         "!Not sequence",
 9 |         "!Equals",
10 |         "!Equals sequence",
11 |         "!Or",
12 |         "!Or sequence",
13 |         "!FindInMap",
14 |         "!FindInMap sequence",
15 |         "!Base64",
16 |         "!Join",
17 |         "!Join sequence",
18 |         "!Cidr",
19 |         "!Ref",
20 |         "!Sub",
21 |         "!Sub sequence",
22 |         "!GetAtt",
23 |         "!GetAZs",
24 |         "!ImportValue",
25 |         "!ImportValue sequence",
26 |         "!Select",
27 |         "!Select sequence",
28 |         "!Split",
29 |         "!Split sequence"
30 |     ]
31 | }
32 | 


--------------------------------------------------------------------------------
/AUTHORS.rst:
--------------------------------------------------------------------------------
 1 | Lassie is written and maintained by Mike Helmick and various contributors:
 2 | 
 3 | 
 4 | Development Lead
 5 | ----------------
 6 | 
 7 | - Mike Helmick <me@michaelhelmick.com>
 8 | 
 9 | 
10 | Patches and Suggestions
11 | -----------------------
12 | 
13 | - `Ramiro Gómez <https://github.com/yaph>`_ - Made image URLs absolute, image width/heights are more lenient
14 | - `jay754 <https://github.com/jay754>`_ - Updated import in setup.py
15 | - `Mark Beacom <https://github.com/mbeacom>`_ - Update requirements in setup.py
16 | - `John Hobbs <https://github.com/jmhobbs>`_ - Support for canonical link tag
17 | - `Benjamin Kampmann <https://github.com/ligthyear>`_ - Updating requirements, fixing Python 3.5 support
18 | 


--------------------------------------------------------------------------------
/HISTORY.rst:
--------------------------------------------------------------------------------
  1 | .. :changelog:
  2 | 
  3 | History
  4 | -------
  5 | 
  6 | 0.11.11 (2020-12-16)
  7 | ++++++++++++++++++
  8 | - No changes.
  9 | 
 10 | 0.11.10 (2020-12-16)
 11 | ++++++++++++++++++
 12 | - Add `html` to response dict when available.
 13 | - Upgrade to GitHub Actions
 14 | 
 15 | 0.11.9 (2020-12-16)
 16 | ++++++++++++++++++
 17 | - Upgrade beautifulsoup4 dependency
 18 | 
 19 | 0.11.8 (2020-12-16)
 20 | ++++++++++++++++++
 21 | - Upgrade requests dependency
 22 | 
 23 | 0.11.7 (2018-08-03)
 24 | ++++++++++++++++++
 25 | - Try and return the "best" url. (#75).
 26 | - Fix issue where AMP image data was a list of strings. (#75).
 27 | 
 28 | 0.11.6 (2018-05-24)
 29 | ++++++++++++++++++
 30 | - Fix issue where AMP images was a list of dictionaries and being identified as an object.
 31 | 
 32 | 0.11.5 (2017-12-27)
 33 | ++++++++++++++++++
 34 | - Pin requests==2.18.4
 35 | 
 36 | 0.11.4 (2017-11-01)
 37 | ++++++++++++++++++
 38 | - Always get oembed AND html data.
 39 | 
 40 | 0.11.3 (2017-11-01)
 41 | ++++++++++++++++++
 42 | - Fix filters.oembed module once lassie is packaged.
 43 | 
 44 | 0.11.0 (2017-11-01)
 45 | ++++++++++++++++++
 46 | - Add support for OEmbed providers (YouTube)
 47 | 
 48 | 0.10.1 (2017-06-02)
 49 | ++++++++++++++++++
 50 | - Remove owl emoji from README.rst so installs on Windows don't fail.
 51 | 
 52 | 0.10.0 (2017-02-03)
 53 | ++++++++++++++++++
 54 | - Fix issue where a website may have malformed HTML and no <html> tag causing soup.html to be None (#60)
 55 | - Updated beautifulsoup4 to 4.5.3
 56 | - Update html5lib to 1.0b10
 57 | 
 58 | 0.9.0 (2017-01-29)
 59 | ++++++++++++++++++
 60 | - Added a default fake user agent to use instead of using python-requests/version (some websites will mark certain user agents as bot attempts)
 61 | - Updated requests to 2.13.0
 62 | 
 63 | 0.8.7 (2016-12-21)
 64 | ++++++++++++++++++
 65 | - Fix Python 3 support
 66 | - Handle empty AMP image lists
 67 | 
 68 | 0.8.6 (2016-11-17)
 69 | ++++++++++++++++++
 70 | - Handle AMP image list of strings vs list of objects
 71 | 
 72 | 0.8.5 (2016-11-03)
 73 | ++++++++++++++++++
 74 | - Handle AMP data that is contained in a list
 75 | - Retrieve videos and thumbnails (as images) from AMP VideoObjects
 76 | 
 77 | 0.8.4 (2016-11-01)
 78 | ++++++++++++++++++
 79 | - Fix issue where AMP images could be lists inside an object
 80 | 
 81 | 0.8.3 (2016-10-21)
 82 | ++++++++++++++++++
 83 | - Fix issue where some keys returned (i.e. description) would not be retrieved if the key existed with an empty value already
 84 | 
 85 | 0.8.2 (2016-09-26)
 86 | ++++++++++++++++++
 87 | - Fix issue where AMP images could be images and not objects
 88 | 
 89 | 0.8.1 (2016-09-26)
 90 | ++++++++++++++++++
 91 | - Add support for AMP "description" attribute
 92 | - Fix issue where an error would be thrown if width/height of an image weren't strings
 93 | - Fix duplicate AMP title request, should have been url
 94 | 
 95 | 0.8.0 (2016-09-26)
 96 | ++++++++++++++++++
 97 | - Add support for links that use AMP
 98 | 
 99 | 0.7.2 (2016-08-01)
100 | ++++++++++++++++++
101 | - Add `status_code` to response dictionary (for "file-like" responses, as well)
102 | 
103 | 0.7.1 (2016-07-27)
104 | ++++++++++++++++++
105 | - Add support for open graph `site_name`
106 | 
107 | 
108 | 0.7.0 (2016-07-01)
109 | ++++++++++++++++++
110 | - Add `status_code` to response dictionary
111 | 
112 | 
113 | 0.6.2 (2015-11-11)
114 | ++++++++++++++++++
115 | - Pinned `requests` library to version 2.8.1
116 | - Pinned `beautifulsoup4` library to version 4.4.1
117 | - Add Python 3.5 to Travis CI build matrix (officially support 3.5)
118 | 
119 | 
120 | 0.6.1 (2015-10-30)
121 | ++++++++++++++++++
122 | - Catch and raise `LassieError` on HEAD requests when `handle_file_content` is passed to the Lassie API
123 | - Pinned `requests` library to version 2.8.0
124 | 
125 | 
126 | 0.6.0 (2015-08-19)
127 | ++++++++++++++++++
128 | - Support for secure url image and videos from Open Graph
129 | - Simplified `merge_settings` and data updating internally
130 | 
131 | 
132 | 0.5.3 (2015-07-02)
133 | ++++++++++++++++++
134 | - Handle when a website doesn't set a value on the "keywords" meta tag
135 | 
136 | 
137 | 0.5.2 (2015-04-16)
138 | ++++++++++++++++++
139 | - Updated `requests` and `beautifulsoup4` library versions
140 | 
141 | 
142 | 0.5.1 (2014-08-05)
143 | ++++++++++++++++++
144 | - Fix issue where headers didn't always have 'Content-Type' key
145 | 
146 | 
147 | 0.5.0 (2014-06-23)
148 | ++++++++++++++++++
149 | - Added ability to `fetch` links that are image files (jpg, gif, png, bmp)
150 | - Renamed `_retreive_content` to `_retrieve_content` because I evidently don't know how to spell correctly
151 | 
152 | 
153 | 0.4.0 (2013-09-30)
154 | ++++++++++++++++++
155 | - Updated `requests` and `beautifulsoup4` library versions
156 | - Added support for manipulating the request, see Advanced Usage docs
157 | - Fixed issue where `lassie.fetch` would break if the page had no title
158 | - Lassie is now more lenient when it comes to width and height values of images (now accepts integers (100) or integer with px (100px)
159 | - Image URLs for all images are now absolute
160 | 
161 | 0.3.0 (2013-08-15)
162 | ++++++++++++++++++
163 | 
164 | - Added support for `locale` to be returned. If `lang` is specified in the `html` tag and it normalizes to an actual locale, it will be added to the returned data.
165 | - Fixed bug where height was not being returned for body images
166 | - Added test coverage, we're 100% covered! :D
167 | 
168 | 
169 | 0.2.1 (2013-08-13)
170 | ++++++++++++++++++
171 | 
172 | - Remove spaces from the returned keywords list
173 | - Fixed issue where favicon was not being retrieved
174 | - Fixed priority for class level vs method level params
175 | 
176 | 
177 | 0.2.0 (2013-08-06)
178 | ++++++++++++++++++
179 | 
180 | - Fix package error when importing
181 | 
182 | 
183 | 0.1.0 (2013-08-05)
184 | ++++++++++++++++++
185 | 
186 | - Initial Release
187 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2017 Mike Helmick
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst HISTORY.rst LICENSE requirements.txt
2 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Lassie
 2 | ======
 3 | 
 4 | .. image:: https://img.shields.io/pypi/v/lassie.svg?style=flat-square
 5 |   :target: https://pypi.python.org/pypi/lassie
 6 | 
 7 | .. image:: https://img.shields.io/travis/michaelhelmick/lassie.svg?style=flat-square
 8 |   :target: https://travis-ci.org/michaelhelmick/lassie
 9 | 
10 | .. image:: https://img.shields.io/coveralls/michaelhelmick/lassie/master.svg?style=flat-square
11 |   :target: https://coveralls.io/r/michaelhelmick/lassie?branch=master
12 | 
13 | .. image:: https://img.shields.io/badge/Say%20Thanks!-:)-1EAEDB.svg?style=flat-square
14 |     :target: https://saythanks.io/to/michaelhelmick
15 | 
16 | Lassie is a Python library for retrieving basic content from websites.
17 | 
18 | .. image:: https://i.imgur.com/QrvNfAX.gif
19 | 
20 | Usage
21 | -----
22 | 
23 | .. code-block:: python
24 | 
25 |     >>> import lassie
26 |     >>> lassie.fetch('http://www.youtube.com/watch?v=dQw4w9WgXcQ')
27 |     {
28 |         'description': u'Music video by Rick Astley performing Never Gonna Give You Up. YouTube view counts pre-VEVO: 2,573,462 (C) 1987 PWL',
29 |         'videos': [{
30 |             'src': u'http://www.youtube.com/v/dQw4w9WgXcQ?autohide=1&version=3',
31 |             'height': 480,
32 |             'type': u'application/x-shockwave-flash',
33 |             'width': 640
34 |         }, {
35 |             'src': u'https://www.youtube.com/embed/dQw4w9WgXcQ',
36 |             'height': 480,
37 |             'width': 640
38 |         }],
39 |         'title': u'Rick Astley - Never Gonna Give You Up',
40 |         'url': u'http://www.youtube.com/watch?v=dQw4w9WgXcQ',
41 |         'keywords': [u'Rick', u'Astley', u'Sony', u'BMG', u'Music', u'UK', u'Pop'],
42 |         'images': [{
43 |             'src': u'http://i1.ytimg.com/vi/dQw4w9WgXcQ/hqdefault.jpg?feature=og',
44 |             'type': u'og:image'
45 |         }, {
46 |             'src': u'http://i1.ytimg.com/vi/dQw4w9WgXcQ/hqdefault.jpg',
47 |             'type': u'twitter:image'
48 |         }, {
49 |             'src': u'http://s.ytimg.com/yts/img/favicon-vfldLzJxy.ico',
50 |             'type': u'favicon'
51 |         }, {
52 |             'src': u'http://s.ytimg.com/yts/img/favicon_32-vflWoMFGx.png',
53 |             'type': u'favicon'
54 |         }],
55 |         'locale': u'en_US'
56 |     }
57 | 
58 | Install
59 | -------
60 | 
61 | Install Lassie via `pip <http://www.pip-installer.org/>`_
62 | 
63 | .. code-block:: bash
64 | 
65 |     $ pip install lassie
66 | 
67 | or, with `easy_install <http://pypi.python.org/pypi/setuptools>`_
68 | 
69 | .. code-block:: bash
70 | 
71 |     $ easy_install lassie
72 | 
73 | But, hey... `that's up to you <http://www.pip-installer.org/en/latest/other-tools.html#pip-compared-to-easy-install>`_.
74 | 
75 | Documentation
76 | -------------
77 | 
78 | Documentation can be found here: https://lassie.readthedocs.org/
79 | 
80 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Lassie.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Lassie.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/Lassie"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Lassie"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
 1 | .. _api:
 2 | 
 3 | Developer Interface
 4 | ===================
 5 | 
 6 | .. module:: lassie
 7 | 
 8 | This page of the documentation will cover all methods and classes available to the developer.
 9 | 
10 | Core Interface
11 | --------------
12 | 
13 | .. autoclass:: Lassie
14 |    :special-members: __init__
15 |    :inherited-members:
16 | 
17 | Exceptions
18 | ----------
19 | 
20 | .. autoexception:: lassie.LassieError


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Lassie documentation build configuration file, created by
  4 | # sphinx-quickstart on Fri Aug  2 00:23:04 2013.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import os
 15 | import sys
 16 | 
 17 | import lassie
 18 | 
 19 | # If extensions (or modules to document with autodoc) are in another directory,
 20 | # add these directories to sys.path here. If the directory is relative to the
 21 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 22 | #sys.path.insert(0, os.path.abspath('.'))
 23 | sys.path.insert(0, os.path.abspath('..'))
 24 | 
 25 | 
 26 | # -- General configuration -----------------------------------------------------
 27 | 
 28 | # If your documentation needs a minimal Sphinx version, state it here.
 29 | #needs_sphinx = '1.0'
 30 | 
 31 | # Add any Sphinx extension module names here, as strings. They can be extensions
 32 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 33 | extensions = ['sphinx.ext.autodoc']
 34 | 
 35 | # Add any paths that contain templates here, relative to this directory.
 36 | templates_path = ['_templates']
 37 | 
 38 | # The suffix of source filenames.
 39 | source_suffix = '.rst'
 40 | 
 41 | # The encoding of source files.
 42 | #source_encoding = 'utf-8-sig'
 43 | 
 44 | # The master toctree document.
 45 | master_doc = 'index'
 46 | 
 47 | # General information about the project.
 48 | project = u'Lassie'
 49 | copyright = u'2014, Mike Helmick'
 50 | 
 51 | # The version info for the project you're documenting, acts as replacement for
 52 | # |version| and |release|, also used in various other places throughout the
 53 | # built documents.
 54 | #
 55 | # The short X.Y version.
 56 | version = '0.11.11'
 57 | # The full version, including alpha/beta/rc tags.
 58 | release = '0.11.11'
 59 | 
 60 | # The language for content autogenerated by Sphinx. Refer to documentation
 61 | # for a list of supported languages.
 62 | #language = None
 63 | 
 64 | # There are two options for replacing |today|: either, you set today to some
 65 | # non-false value, then it is used:
 66 | #today = ''
 67 | # Else, today_fmt is used as the format for a strftime call.
 68 | #today_fmt = '%B %d, %Y'
 69 | 
 70 | # List of patterns, relative to source directory, that match files and
 71 | # directories to ignore when looking for source files.
 72 | exclude_patterns = ['_build']
 73 | 
 74 | # The reST default role (used for this markup: `text`) to use for all documents.
 75 | #default_role = None
 76 | 
 77 | # If true, '()' will be appended to :func: etc. cross-reference text.
 78 | #add_function_parentheses = True
 79 | 
 80 | # If true, the current module name will be prepended to all description
 81 | # unit titles (such as .. function::).
 82 | #add_module_names = True
 83 | 
 84 | # If true, sectionauthor and moduleauthor directives will be shown in the
 85 | # output. They are ignored by default.
 86 | #show_authors = False
 87 | 
 88 | # The name of the Pygments (syntax highlighting) style to use.
 89 | pygments_style = 'sphinx'
 90 | 
 91 | # A list of ignored prefixes for module index sorting.
 92 | #modindex_common_prefix = []
 93 | 
 94 | # If true, keep warnings as "system message" paragraphs in the built documents.
 95 | #keep_warnings = False
 96 | 
 97 | 
 98 | # -- Options for HTML output ---------------------------------------------------
 99 | 
100 | # The theme to use for HTML and HTML Help pages.  See the documentation for
101 | # a list of builtin themes.
102 | html_theme = 'default'
103 | 
104 | # Theme options are theme-specific and customize the look and feel of a theme
105 | # further.  For a list of options available for each theme, see the
106 | # documentation.
107 | #html_theme_options = {}
108 | 
109 | # Add any paths that contain custom themes here, relative to this directory.
110 | #html_theme_path = []
111 | 
112 | # The name for this set of Sphinx documents.  If None, it defaults to
113 | # "<project> v<release> documentation".
114 | #html_title = None
115 | 
116 | # A shorter title for the navigation bar.  Default is the same as html_title.
117 | #html_short_title = None
118 | 
119 | # The name of an image file (relative to this directory) to place at the top
120 | # of the sidebar.
121 | #html_logo = None
122 | 
123 | # The name of an image file (within the static path) to use as favicon of the
124 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
125 | # pixels large.
126 | #html_favicon = None
127 | 
128 | # Add any paths that contain custom static files (such as style sheets) here,
129 | # relative to this directory. They are copied after the builtin static files,
130 | # so a file named "default.css" will overwrite the builtin "default.css".
131 | html_static_path = ['_static']
132 | 
133 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
134 | # using the given strftime format.
135 | #html_last_updated_fmt = '%b %d, %Y'
136 | 
137 | # If true, SmartyPants will be used to convert quotes and dashes to
138 | # typographically correct entities.
139 | #html_use_smartypants = True
140 | 
141 | # Custom sidebar templates, maps document names to template names.
142 | #html_sidebars = {}
143 | 
144 | # Additional templates that should be rendered to pages, maps page names to
145 | # template names.
146 | #html_additional_pages = {}
147 | 
148 | # If false, no module index is generated.
149 | #html_domain_indices = True
150 | 
151 | # If false, no index is generated.
152 | #html_use_index = True
153 | 
154 | # If true, the index is split into individual pages for each letter.
155 | #html_split_index = False
156 | 
157 | # If true, links to the reST sources are added to the pages.
158 | #html_show_sourcelink = True
159 | 
160 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
161 | #html_show_sphinx = True
162 | 
163 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
164 | #html_show_copyright = True
165 | 
166 | # If true, an OpenSearch description file will be output, and all pages will
167 | # contain a <link> tag referring to it.  The value of this option must be the
168 | # base URL from which the finished HTML is served.
169 | #html_use_opensearch = ''
170 | 
171 | # This is the file name suffix for HTML files (e.g. ".xhtml").
172 | #html_file_suffix = None
173 | 
174 | # Output file base name for HTML help builder.
175 | htmlhelp_basename = 'Lassiedoc'
176 | 
177 | 
178 | # -- Options for LaTeX output --------------------------------------------------
179 | 
180 | latex_elements = {
181 | # The paper size ('letterpaper' or 'a4paper').
182 | #'papersize': 'letterpaper',
183 | 
184 | # The font size ('10pt', '11pt' or '12pt').
185 | #'pointsize': '10pt',
186 | 
187 | # Additional stuff for the LaTeX preamble.
188 | #'preamble': '',
189 | }
190 | 
191 | # Grouping the document tree into LaTeX files. List of tuples
192 | # (source start file, target name, title, author, documentclass [howto/manual]).
193 | latex_documents = [
194 |   ('index', 'Lassie.tex', u'Lassie Documentation',
195 |    u'Mike Helmick', 'manual'),
196 | ]
197 | 
198 | # The name of an image file (relative to this directory) to place at the top of
199 | # the title page.
200 | #latex_logo = None
201 | 
202 | # For "manual" documents, if this is true, then toplevel headings are parts,
203 | # not chapters.
204 | #latex_use_parts = False
205 | 
206 | # If true, show page references after internal links.
207 | #latex_show_pagerefs = False
208 | 
209 | # If true, show URL addresses after external links.
210 | #latex_show_urls = False
211 | 
212 | # Documents to append as an appendix to all manuals.
213 | #latex_appendices = []
214 | 
215 | # If false, no module index is generated.
216 | #latex_domain_indices = True
217 | 
218 | 
219 | # -- Options for manual page output --------------------------------------------
220 | 
221 | # One entry per manual page. List of tuples
222 | # (source start file, name, description, authors, manual section).
223 | man_pages = [
224 |     ('index', 'lassie', u'Lassie Documentation',
225 |      [u'Mike Helmick'], 1)
226 | ]
227 | 
228 | # If true, show URL addresses after external links.
229 | #man_show_urls = False
230 | 
231 | 
232 | # -- Options for Texinfo output ------------------------------------------------
233 | 
234 | # Grouping the document tree into Texinfo files. List of tuples
235 | # (source start file, target name, title, author,
236 | #  dir menu entry, description, category)
237 | texinfo_documents = [
238 |   ('index', 'Lassie', u'Lassie Documentation',
239 |    u'Mike Helmick', 'Lassie', 'One line description of project.',
240 |    'Miscellaneous'),
241 | ]
242 | 
243 | # Documents to append as an appendix to all manuals.
244 | #texinfo_appendices = []
245 | 
246 | # If false, no module index is generated.
247 | #texinfo_domain_indices = True
248 | 
249 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
250 | #texinfo_show_urls = 'footnote'
251 | 
252 | # If true, do not generate a @detailmenu in the "Top" node's menu.
253 | #texinfo_no_detailmenu = False
254 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. Lassie documentation master file, created by
 2 |    sphinx-quickstart on Fri Aug  2 00:23:04 2013.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Lassie
 7 | ======
 8 | 
 9 |     | Lassie is a Python library for retrieving basic content from websites.
10 | 
11 | Usage
12 | -----
13 | 
14 | .. code-block:: python
15 | 
16 |     >>> import lassie
17 |     >>> lassie.fetch('http://www.youtube.com/watch?v=dQw4w9WgXcQ')
18 |     {
19 |         'description': u'Music video by Rick Astley performing Never Gonna Give You Up. YouTube view counts pre-VEVO: 2,573,462 (C) 1987 PWL',
20 |         'videos': [{
21 |             'src': u'http://www.youtube.com/v/dQw4w9WgXcQ?autohide=1&version=3',
22 |             'height': 480,
23 |             'type': u'application/x-shockwave-flash',
24 |             'width': 640
25 |         }, {
26 |             'src': u'https://www.youtube.com/embed/dQw4w9WgXcQ',
27 |             'height': 480,
28 |             'width': 640
29 |         }],
30 |         'title': u'Rick Astley - Never Gonna Give You Up',
31 |         'url': u'http://www.youtube.com/watch?v=dQw4w9WgXcQ',
32 |         'keywords': [u'Rick', u'Astley', u'Sony', u'BMG', u'Music', u'UK', u'Pop'],
33 |         'images': [{
34 |             'src': u'http://i1.ytimg.com/vi/dQw4w9WgXcQ/hqdefault.jpg?feature=og',
35 |             'type': u'og:image'
36 |         }, {
37 |             'src': u'http://i1.ytimg.com/vi/dQw4w9WgXcQ/hqdefault.jpg',
38 |             'type': u'twitter:image'
39 |         }, {
40 |             'src': u'http://s.ytimg.com/yts/img/favicon-vfldLzJxy.ico',
41 |             'type': u'favicon'
42 |         }, {
43 |             'src': u'http://s.ytimg.com/yts/img/favicon_32-vflWoMFGx.png',
44 |             'type': u'favicon'
45 |         }],
46 |         'locale': u'en_US'
47 |     }
48 | 
49 | 
50 | User Guide
51 | ----------
52 | 
53 | .. toctree::
54 |    :maxdepth: 2
55 | 
56 |    usage/install
57 | 
58 | .. toctree::
59 |    :maxdepth: 2
60 | 
61 |    usage/starting_out
62 | 
63 | .. toctree::
64 |    :maxdepth: 2
65 | 
66 |    usage/advanced_usage
67 | 
68 | 
69 | Lassie API Documentation
70 | ------------------------
71 | 
72 | .. toctree::
73 |    :maxdepth: 2
74 | 
75 |    api
76 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Lassie.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Lassie.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/docs/usage/advanced_usage.rst:
--------------------------------------------------------------------------------
  1 | .. _advanced-usage:
  2 | 
  3 | Advanced Usage
  4 | ==============
  5 | 
  6 | This section will cover how to use the ``Lassie`` class to maintain settings across all ``fetch`` calls.
  7 | 
  8 | 
  9 | Class Level Attributes
 10 | ----------------------
 11 | 
 12 | Constructing a ``Lassie`` class and calling ``fetch`` will use all the default params that are available to ``fetch``.
 13 | 
 14 | .. code-block:: python
 15 | 
 16 |     >>> from lassie import Lassie
 17 |     >>> l = Lassie()
 18 | 
 19 |     >>> l.fetch('https://github.com/michaelhelmick')
 20 |     {
 21 |         'images': [{
 22 |             'src': u'https://github.global.ssl.fastly.net/images/modules/logos_page/Octocat.png',
 23 |             'type': u'og:image'
 24 |         }, {
 25 |             'src': u'https://github.com/favicon.ico',
 26 |             'type': u'favicon'
 27 |         }],
 28 |         'url': 'https://github.com/michaelhelmick',
 29 |         'description': u'michaelhelmick has 22 repositories written in Python, Shell, and JavaScript. Follow their code on GitHub.',
 30 |         'videos': [],
 31 |         'title': u'michaelhelmick (Mike Helmick) \xb7 GitHub'
 32 |     }
 33 |     >>> l.fetch('https://github.com/ashibble')
 34 |     {
 35 |         'images': [{
 36 |             'src': u'https://github.global.ssl.fastly.net/images/modules/logos_page/Octocat.png',
 37 |             'type': u'og:image'
 38 |         }, {
 39 |             'src': u'https://github.com/favicon.ico',
 40 |             'type': u'favicon'
 41 |         }],
 42 |         'url': 'https://github.com/ashibble',
 43 |         'description': u'Follow ashibble on GitHub and watch them build beautiful projects.',
 44 |         'videos': [],
 45 |         'title': u'ashibble (Alexander Shibble) \xb7 GitHub'
 46 |     }
 47 | 
 48 | If you decide that you don't want to filter for Open Graph data, instead of declaring ``open_graph=False`` in every ``fetch`` call:
 49 | 
 50 | .. code-block:: python
 51 | 
 52 |     >>> import lassie
 53 |     >>> l = Lassie()
 54 |     >>> l.fetch('https://github.com/michaelhelmick', open_graph=False)
 55 |     >>> l.fetch('https://github.com/ashibble', open_graph=False)
 56 | 
 57 | You can use the ``Lassie`` class and set attibutes on the class.
 58 | 
 59 | .. code-block:: python
 60 | 
 61 |     >>> from lassie import Lassie
 62 |     >>> l = Lassie()
 63 |     >>> l.open_graph = False
 64 | 
 65 |     >>> l.fetch('https://github.com/michaelhelmick')
 66 |     {
 67 |         'images': [{
 68 |             'src': u'https://github.com/favicon.ico',
 69 |             'type': u'favicon'
 70 |         }],
 71 |         'url': 'https://github.com/michaelhelmick',
 72 |         'description': u'michaelhelmick has 22 repositories written in Python, Shell, and JavaScript. Follow their code on GitHub.',
 73 |         'videos': [],
 74 |         'title': u'michaelhelmick (Mike Helmick) \xb7 GitHub'
 75 |     }
 76 |     >>> l.fetch('https://github.com/ashibble')
 77 |     {
 78 |         'images': [{
 79 |             'src': u'https://github.com/favicon.ico',
 80 |             'type': u'favicon'
 81 |         }],
 82 |         'url': 'https://github.com/ashibble',
 83 |         'description': u'Follow ashibble on GitHub and watch them build beautiful projects.',
 84 |         'videos': [],
 85 |         'title': u'ashibble (Alexander Shibble) \xb7 GitHub'
 86 |     }
 87 | 
 88 | You'll notice the data for the Open Graph properties wasn't returned in the last responses. That's because passing ``open_graph=False`` tells Lassie to not filter for those properties.
 89 | 
 90 | In the edge case that there is a time or two you want to override the class attribute, just pass the parameter to ``fetch`` and Lassie will use that parameter.
 91 | 
 92 | .. code-block:: python
 93 | 
 94 |     >>> from lassie import Lassie
 95 |     >>> l = Lassie()
 96 |     >>> l.open_graph = False
 97 | 
 98 |     >>> l.fetch('https://github.com/michaelhelmick')
 99 |     {
100 |         'images': [{
101 |             'src': u'https://github.com/favicon.ico',
102 |             'type': u'favicon'
103 |         }],
104 |         'url': 'https://github.com/michaelhelmick',
105 |         'description': u'michaelhelmick has 22 repositories written in Python, Shell, and JavaScript. Follow their code on GitHub.',
106 |         'videos': [],
107 |         'title': u'michaelhelmick (Mike Helmick) \xb7 GitHub'
108 |     }
109 |     >>> l.fetch('https://github.com/ashibble', open_graph=True)
110 |     {
111 |         'images': [{
112 |             'src': u'https://github.global.ssl.fastly.net/images/modules/logos_page/Octocat.png',
113 |             'type': u'og:image'
114 |         }, {
115 |             'src': u'https://github.com/favicon.ico',
116 |             'type': u'favicon'
117 |         }],
118 |         'url': 'https://github.com/ashibble',
119 |         'description': u'Follow ashibble on GitHub and watch them build beautiful projects.',
120 |         'videos': [],
121 |         'title': u'ashibble (Alexander Shibble) \xb7 GitHub'
122 |     }
123 | 
124 | 
125 | Manipulate the Request (headers, proxies, etc.)
126 | -----------------------------------------------
127 | 
128 | There are times when you may want to turn SSL verification off, send custom headers, or add proxies for the request to go through.
129 | 
130 | Lassie uses the `requests <http://python-requests.org>`_ library to make web requests. ``requests`` accepts a few parameters to allow developers to manipulate the acutal HTTP request.
131 | 
132 | Here is an example of sending custom headers to a lassie request:
133 | 
134 | .. code-block:: python
135 | 
136 |     from lassie import Lassie
137 | 
138 |     l = Lassie()
139 |     l.request_opts = {
140 |         'headers': {
141 |             'User-Agent': 'python lassie'
142 |         }
143 |     }
144 |     l.fetch('http://google.com')
145 | 
146 | Maybe you want to set a request timeout, here's another example:
147 | 
148 | .. code-block:: python
149 | 
150 |     from lassie import Lassie
151 | 
152 |     l = Lassie()
153 |     l.request_opts = {
154 |         'timeout': 10  # 10 seconds
155 |     }
156 | 
157 |     # If the response takes longer than 10 seconds this request will fail
158 |     l.fetch('http://google.com')
159 | 
160 | 
161 | Playing Nice with non-HTML Files
162 | --------------------------------
163 | 
164 | Sometimes, you may want to grab information about an image or other type of file. Although only images are supported, you can retrieve a nicely structured ``dict``
165 | 
166 | Pass ``handle_file_content=True`` to ``lassie.fetch`` or set it on a ``Lassie`` instance
167 | 
168 | .. code-block:: python
169 | 
170 |     >>> from lassie import Lassie
171 | 
172 |     >>> lassie.fetch('https://camo.githubusercontent.com/d19b279de191489445d8cfd39faf93e19ca2df14/68747470733a2f2f692e696d6775722e636f6d2f5172764e6641582e676966', handle_file_content=True)
173 |     {
174 |         'title': '68747470733a2f2f692e696d6775722e636f6d2f5172764e6641582e676966',
175 |         'videos': [],
176 |         'url': 'https://camo.githubusercontent.com/d19b279de191489445d8cfd39faf93e19ca2df14/68747470733a2f2f692e696d6775722e636f6d2f5172764e6641582e676966',
177 |         'images': [{
178 |             'type': 'body_image',
179 |             'src': 'https://camo.githubusercontent.com/d19b279de191489445d8cfd39faf93e19ca2df14/68747470733a2f2f692e696d6775722e636f6d2f5172764e6641582e676966'
180 |         }]
181 |     }
182 | 
183 |     >>> lassie.fetch('http://2.bp.blogspot.com/-vzGgFFtW-VY/Tz-eozaHw3I/AAAAAAAAM3k/OMvxpFYr23s/s1600/The-best-top-desktop-cat-wallpapers-10.jpg', handle_file_content=True)
184 |     {
185 |         'title': 'The-best-top-desktop-cat-wallpapers-10.jpg',
186 |         'images': [{
187 |             'type': 'body_image',
188 |             'src': 'http://2.bp.blogspot.com/-vzGgFFtW-VY/Tz-eozaHw3I/AAAAAAAAM3k/OMvxpFYr23s/s1600/The-best-top-desktop-cat-wallpapers-10.jpg'
189 |         }],
190 |         'videos': [],
191 |         'url': 'http://2.bp.blogspot.com/-vzGgFFtW-VY/Tz-eozaHw3I/AAAAAAAAM3k/OMvxpFYr23s/s1600/The-best-top-desktop-cat-wallpapers-10.jpg'
192 |     }
193 | 


--------------------------------------------------------------------------------
/docs/usage/install.rst:
--------------------------------------------------------------------------------
 1 | .. _install:
 2 | 
 3 | Installation
 4 | ============
 5 | 
 6 | Information on how to properly install Lassie
 7 | 
 8 | *******************************************************************************
 9 | 
10 | Pip or Easy Install
11 | -------------------
12 | 
13 | Install Lassie via `pip <http://www.pip-installer.org/>`_
14 | 
15 | .. code-block:: bash
16 | 
17 |     $ pip install lassie
18 | 
19 | or, with `easy_install <http://pypi.python.org/pypi/setuptools>`_
20 | 
21 | .. code-block:: bash
22 | 
23 |     $ easy_install lassie
24 | 
25 | But, hey... `that's up to you <http://www.pip-installer.org/en/latest/other-tools.html#pip-compared-to-easy-install>`_.
26 | 
27 | 
28 | Source Code
29 | -----------
30 | 
31 | Lassie is actively maintained on GitHub
32 | 
33 | Feel free to clone the repository
34 | 
35 | .. code-block:: bash
36 | 
37 |     git clone git://github.com/michaelhelmick/lassie.git
38 | 
39 | `tarball <https://github.com/michaelhelmick/lassie/tarball/master>`_
40 | 
41 | .. code-block:: bash
42 | 
43 |     $ curl -OL https://github.com/michaelhelmick/lassie/tarball/master
44 | 
45 | `zipball <https://github.com/michaelhelmick/lassie/tarball/master>`_
46 | 
47 | .. code-block:: bash
48 | 
49 |     $ curl -OL https://github.com/michaelhelmick/lassie/zipball/master
50 | 
51 | Now that you have the source code, install it into your site-packages directory
52 | 
53 | .. code-block:: bash
54 | 
55 |     $ python setup.py install
56 | 
57 | *******************************************************************************
58 | 
59 | So Lassie is installed! Now, head over to the :ref:`starting out <starting-out>` section.
60 | 


--------------------------------------------------------------------------------
/docs/usage/starting_out.rst:
--------------------------------------------------------------------------------
  1 | .. _starting-out:
  2 | 
  3 | Starting Out
  4 | ============
  5 | 
  6 | This section out lines the most basic uses of Lassie
  7 | 
  8 | *******************************************************************************
  9 | 
 10 | What Lassie Returns
 11 | -------------------
 12 | 
 13 | Lassie aims to return the most beautifully crafted dictionary of important information about the web page.
 14 | 
 15 | Beginning
 16 | ---------
 17 | 
 18 | So, let's say you want to retrieve details about a YouTube video.
 19 | 
 20 | Specifically: http://www.youtube.com/watch?v=dQw4w9WgXcQ
 21 | 
 22 | .. code-block:: python
 23 | 
 24 |     >>> import lassie
 25 |     >>> lassie.fetch('http://www.youtube.com/watch?v=dQw4w9WgXcQ')
 26 |     {
 27 |         'description': u'Music video by Rick Astley performing Never Gonna Give You Up. YouTube view counts pre-VEVO: 2,573,462 (C) 1987 PWL',
 28 |         'videos': [{
 29 |             'src': u'http://www.youtube.com/v/dQw4w9WgXcQ?version=3&autohide=1',
 30 |             'height': 480,
 31 |             'type': u'application/x-shockwave-flash',
 32 |             'width': 640
 33 |         }, {
 34 |             'src': u'https://www.youtube.com/embed/dQw4w9WgXcQ',
 35 |             'height': 480,
 36 |             'width': 640
 37 |         }],
 38 |         'title': u'Rick Astley - Never Gonna Give You Up',
 39 |         'url': u'http://www.youtube.com/watch?v=dQw4w9WgXcQ',
 40 |         'keywords': [u'Rick', u' Astley', u' Sony', u' BMG', u' Music', u' UK', u' Pop'],
 41 |         'images': [{
 42 |             'src': u'http://i1.ytimg.com/vi/dQw4w9WgXcQ/hqdefault.jpg?feature=og',
 43 |             'type': u'og:image'
 44 |         }, {
 45 |             'src': u'http://i1.ytimg.com/vi/dQw4w9WgXcQ/hqdefault.jpg',
 46 |             'type': u'twitter:image'
 47 |         }, {
 48 |             'src': u'http://s.ytimg.com/yts/img/favicon-vfldLzJxy.ico',
 49 |             'type': u'favicon'
 50 |         }, {
 51 |             'src': u'http://s.ytimg.com/yts/img/favicon_32-vflWoMFGx.png',
 52 |             'type': u'favicon'
 53 |         }],
 54 |         'locale': u'en_US'
 55 |     }
 56 | 
 57 | Or what if you wanted to get information about an article?
 58 | 
 59 | Specifically: http://techcrunch.com/2013/01/16/github-passes-the-3-million-developer-mark/
 60 | 
 61 | .. code-block:: python
 62 | 
 63 |     >>> import lassie
 64 |     >>> lassie.fetch('http://techcrunch.com/2013/01/16/github-passes-the-3-million-developer-mark/')
 65 |     {
 66 |         'description': u"GitHub has surpassed the 3 million-developer mark, a milestone for the collaborative platform for application development.\xa0GitHub said it happened Monday night on the first day of the company's\xa0all-hands winter summit. Launched\xa0in April 2008, GitHub\xa0celebrated\xa0its first million users in..",
 67 |         'videos': [],
 68 |         'title': u'GitHub Passes The 3 Million Developer Mark | TechCrunch',
 69 |         'url': u'http://techcrunch.com/2013/01/16/github-passes-the-3-million-developer-mark/',
 70 |         'locale': u'en_US',
 71 |         'images': [{
 72 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/01/github-logo.png?w=150',
 73 |             'type': u'og:image'
 74 |         }, {
 75 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/01/github-logo.png',
 76 |             'type': u'twitter:image'
 77 |         }, {
 78 |             'src': u'http://s2.wp.com/wp-content/themes/vip/tctechcrunch2/images/favicon.ico?m=1357660109g',
 79 |             'type': u'favicon'
 80 |         }, {
 81 |             'src': u'http://s2.wp.com/wp-content/themes/vip/tctechcrunch2/images/favicon.ico?m=1357660109g',
 82 |             'type': u'favicon'
 83 |         }]
 84 |     }
 85 | 
 86 | Lassie, by default, also filters for content from Twitter Cards, grab favicons and touch icons.
 87 | 
 88 | Priorities
 89 | ----------
 90 | 
 91 | Open Graph values takes priority over other values (Twitter Card data, generic data, etc.)
 92 | 
 93 | In other words, if a website has the title of their page as ``<title>YouTube</title>`` and they have their Open Graph title set ``<meta property="og:title" content="YouTube | A Video Sharing Site" />``
 94 | 
 95 | The value of ``title`` when you ``fetch`` the web page will return as "YouTube | A Video Sharing Site" instead of just "YouTube".
 96 | 
 97 | But what if I don't want open graph data?
 98 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 99 | 
100 | Then pass ``open_graph=False`` to the ``fetch`` method.
101 | 
102 | .. code-block:: python
103 | 
104 |     >>> lassie.fetch('http://techcrunch.com/2013/01/16/github-passes-the-3-million-developer-mark/', open_graph=False)
105 |     {
106 |         'description': u"GitHub has surpassed the 3 million-developer mark, a milestone for the collaborative platform for application development.\xa0GitHub said it happened Monday night on the first day of the company's\xa0all-hands winter summit. Launched\xa0in April 2008, GitHub\xa0celebrated\xa0its first million users in..",
107 |         'videos': [],
108 |         'title': u'GitHub Passes The 3 Million Developer Mark | TechCrunch',
109 |         'url': u'http://techcrunch.com/2013/01/16/github-passes-the-3-million-developer-mark/',
110 |         'locale': u'en_US',
111 |         'images': [{
112 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/01/github-logo.png?w=150',
113 |             'type': u'og:image'
114 |         }, {
115 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/01/github-logo.png',
116 |             'type': u'twitter:image'
117 |         }, {
118 |             'src': u'http://s2.wp.com/wp-content/themes/vip/tctechcrunch2/images/favicon.ico?m=1357660109g',
119 |             'type': u'favicon'
120 |         }, {
121 |             'src': u'http://s2.wp.com/wp-content/themes/vip/tctechcrunch2/images/favicon.ico?m=1357660109g',
122 |             'type': u'favicon'
123 |         }]
124 |     }
125 | 
126 | If you **don't** want Twitter cards, favicons or touch icons, use any combination of the following parameters and pass them to ``fetch``:
127 | 
128 | - Pass ``twitter_card=False`` to exclude Twitter Card data from being filtered
129 | - Pass ``touch_icon=False`` to exclude the Apple touch icons from being added to the images array
130 | - Pass ``favicon=False`` to exclude the favicon from being added to the images array
131 | 
132 | Obtaining All Images
133 | --------------------
134 | 
135 | Sometimes you might want to obtain a list of all the images on a web page... simple, just pass ``all_images=True`` to ``fetch``.
136 | 
137 | .. code-block:: python
138 | 
139 |     >>> lassie.fetch('http://techcrunch.com/2013/01/16/github-passes-the-3-million-developer-mark/', all_images=True)
140 |     {
141 |         'description': u"GitHub has surpassed the 3 million-developer mark, a milestone for the collaborative platform for application development.\xa0GitHub said it happened Monday night on the first day of the company's\xa0all-hands winter summit. Launched\xa0in April 2008, GitHub\xa0celebrated\xa0its first million users in..",
142 |         'videos': [],
143 |         'title': u'GitHub Passes The 3 Million Developer Mark | TechCrunch',
144 |         'url': u'http://techcrunch.com/2013/01/16/github-passes-the-3-million-developer-mark/',
145 |         'locale': u'en_US',
146 |         'images': [{
147 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/01/github-logo.png?w=150',
148 |             'type': u'og:image'
149 |         }, {
150 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/01/github-logo.png',
151 |             'type': u'twitter:image'
152 |         }, {
153 |             'src': u'http://s2.wp.com/wp-content/themes/vip/tctechcrunch2/images/favicon.ico?m=1357660109g',
154 |             'type': u'favicon'
155 |         }, {
156 |             'src': u'http://s2.wp.com/wp-content/themes/vip/tctechcrunch2/images/favicon.ico?m=1357660109g',
157 |             'type': u'favicon'
158 |         }, {
159 |             'src': u'http://s2.wp.com/wp-content/themes/vip/tctechcrunch2/images/site-logo-cutout.png?m=1342508617g',
160 |             'alt': u'',
161 |             'type': u'body_image'
162 |         }, {
163 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/countdown4.jpg?w=640',
164 |             'alt': u'Main Event Page',
165 |             'type': u'body_image'
166 |         }, {
167 |             'src': u'http://2.gravatar.com/avatar/b4e205744ae2f9b44921d103b4d80e54?s=60&d=identicon&r=G',
168 |             'alt': u'',
169 |             'height': 60,
170 |             'type': u'body_image',
171 |             'width': 60
172 |         }, {
173 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/01/github-logo.png?w=300',
174 |             'alt': u'github-logo',
175 |             'height': 300,
176 |             'type': u'body_image',
177 |             'width': 300
178 |         }, {
179 |             'src': u'http://crunchbase.com/assets/images/resized/0001/7208/17208v9-max-150x150.png',
180 |             'alt': u'',
181 |             'type': u'body_image'
182 |         }, {
183 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/tardis-egg.jpg?w=89&h=64&crop=1',
184 |             'alt': '',
185 |             'type': u'body_image'
186 |         }, {
187 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/made-in-space-zero-gravity.jpg?w=89&h=64&crop=1',
188 |             'alt': '',
189 |             'type': u'body_image'
190 |         }, {
191 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/04/apple1.jpg?w=89&h=64&crop=1',
192 |             'alt': '',
193 |             'type': u'body_image'
194 |         }, {
195 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/p9130014.jpg?w=89&h=64&crop=1',
196 |             'alt': '',
197 |             'type': u'body_image'
198 |         }, {
199 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/htc.png?w=89&h=64&crop=1',
200 |             'alt': '',
201 |             'type': u'body_image'
202 |         }, {
203 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/screen-shot-2013-08-13-at-8-18-25-pm.png?w=89&h=64&crop=1',
204 |             'alt': '',
205 |             'type': u'body_image'
206 |         }, {
207 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/24112v5-max-250x250.jpg?w=89&h=63&crop=1',
208 |             'alt': '',
209 |             'type': u'body_image'
210 |         }, {
211 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/surface-14.jpg?w=89&h=64&crop=1',
212 |             'alt': '',
213 |             'type': u'body_image'
214 |         }, {
215 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/sprawl_tuned_robot.jpg?w=89&h=64&crop=1',
216 |             'alt': '',
217 |             'type': u'body_image'
218 |         }, {
219 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/ashton-kutcher-jobs.jpg?w=89&h=64&crop=1',
220 |             'alt': '',
221 |             'type': u'body_image'
222 |         }, {
223 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/facebook-commerce.png?w=89&h=64&crop=1',
224 |             'alt': '',
225 |             'type': u'body_image'
226 |         }, {
227 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/screen-shot-2013-08-14-at-10-23-20-am.png?w=89&h=64&crop=1',
228 |             'alt': '',
229 |             'type': u'body_image'
230 |         }, {
231 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2012/10/ibm_logo.jpg?w=89&h=64&crop=1',
232 |             'alt': '',
233 |             'type': u'body_image'
234 |         }, {
235 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/screen-shot-2013-08-15-at-12-09-16.png?w=89&h=64&crop=1',
236 |             'alt': '',
237 |             'type': u'body_image'
238 |         }, {
239 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/inklogo.jpg?w=89&h=64&crop=1',
240 |             'alt': '',
241 |             'type': u'body_image'
242 |         }, {
243 |             'src': u'http://tctechcrunch2011.files.wordpress.com/2013/08/screen-shot-2013-08-15-at-9-31-21-am.png?w=89&h=64&crop=1',
244 |             'alt': '',
245 |             'type': u'body_image'
246 |         }]
247 |     }
248 | 
249 | *******************************************************************************
250 | 
251 | So, now you know the basics. What if you don't want to declare params *every* time to the ``fetch`` method? Head over to the :ref:`advanced usage <advanced-usage>` section to learn about the ``Lassie`` class.
252 | 


--------------------------------------------------------------------------------
/lassie/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | #     __         ______     ______     ______     __     ______
 4 | #    /\ \       /\  __ \   /\  ___\   /\  ___\   /\ \   /\  ___\
 5 | #    \ \ \____  \ \  __ \  \ \___  \  \ \___  \  \ \ \  \ \  __\
 6 | #     \ \_____\  \ \_\ \_\  \/\_____\  \/\_____\  \ \_\  \ \_____\
 7 | #      \/_____/   \/_/\/_/   \/_____/   \/_____/   \/_/   \/_____/
 8 | 
 9 | """
10 | Lassie
11 | ------
12 | 
13 | Lassie is a Python library for retrieving basic content from websites.
14 | 
15 | """
16 | 
17 | __version__ = '0.11.11'
18 | 
19 | from .api import fetch
20 | from .core import Lassie
21 | from .exceptions import LassieError
22 | 


--------------------------------------------------------------------------------
/lassie/api.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | lassie.api
 5 | ~~~~~~~~~~
 6 | 
 7 | This module implements the Lassie API.
 8 | 
 9 | """
10 | 
11 | from .core import Lassie
12 | 
13 | 
14 | def fetch(url, **kwargs):
15 |     """Constructs and sends a :class:`Lassie <Lassie>`
16 |     Retrieves content from the specified url, parses it, and returns
17 |     a beautifully crafted dictionary of important information about that
18 |     web page.
19 | 
20 |     Priority tree is as follows:
21 |         1. Open Graph
22 |         2. Twitter Card
23 |         3. Other meta content (i.e. description, keywords)
24 | 
25 |     :param url: URL to send a GET request to
26 |     :param open_graph: (optional) If ``True``, filters web page content for Open Graph meta tags. The content of these properties have top priority on return values.
27 |     :type open_graph: bool
28 |     :param twitter_card: (optional) If ``True``, filters web page content for Twitter Card meta tags
29 |     :type twitter_card: bool
30 |     :param touch_icon: (optional) If ``True``, retrieves Apple touch icons and includes them in the response ``images`` array
31 |     :type touch_icon: bool
32 |     :param favicon: (optional) If ``True``, retrieves any favicon images and includes them in the response ``images`` array
33 |     :type favicon: bool
34 |     :param all_images: (optional) If ``True``, retrieves images inside web pages body and includes them in the response ``images`` array. Default: False
35 |     :type all_images: bool
36 |     :param parser: (optional) String reference for the parser that BeautifulSoup will use
37 |     :type parser: string
38 |     :param handle_file_content: (optional) If ``True``, lassie will return a generic response when a file is fetched. Default: False
39 |     :type handle_file_content: bool
40 | 
41 |     """
42 |     l = Lassie()
43 |     return l.fetch(url, **kwargs)
44 | 


--------------------------------------------------------------------------------
/lassie/compat.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | lassie.compat
 5 | ~~~~~~~~~~~~~
 6 | 
 7 | This module contains imports and declarations for seamless Python 2 and
 8 | Python 3 compatibility.
 9 | """
10 | 
11 | import sys
12 | 
13 | _ver = sys.version_info
14 | 
15 | #: Python 2.x?
16 | is_py2 = (_ver[0] == 2)
17 | 
18 | #: Python 3.x?
19 | is_py3 = (_ver[0] == 3)
20 | 
21 | if is_py2:
22 |     from urlparse import urljoin, urlparse
23 | 
24 |     str = unicode
25 | 
26 | elif is_py3:
27 |     from urllib.parse import urljoin, urlparse
28 | 
29 |     str = str
30 | 


--------------------------------------------------------------------------------
/lassie/core.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | lassie.core
  5 | ~~~~~~~~~~~
  6 | 
  7 | This module contains a Lassie object to maintain settings across lassie.
  8 | 
  9 | """
 10 | 
 11 | 
 12 | import json
 13 | import re
 14 | from os.path import basename
 15 | 
 16 | import requests
 17 | from bs4 import BeautifulSoup
 18 | from requests import Request, Session
 19 | 
 20 | from .compat import str, urljoin, urlparse
 21 | from .exceptions import LassieError
 22 | from .filters import FILTER_MAPS
 23 | from .filters.oembed.providers import consumer, parse_oembed_data
 24 | from .utils import (
 25 |     FAKE_USER_AGENT, clean_text, convert_to_int, determine_user_agent, normalize_image_data,
 26 |     normalize_locale,
 27 | )
 28 | 
 29 | REQUEST_OPTS = {
 30 |     'client': ('cert', 'headers', 'hooks', 'max_redirects', 'proxies'),
 31 |     'request': ('timeout', 'allow_redirects', 'stream', 'verify'),
 32 | }
 33 | 
 34 | IMAGE_MIMETYPES = [
 35 |     'image/jpeg', 'image/gif', 'image/bmp', 'image/png'
 36 | ]
 37 | 
 38 | 
 39 | def merge_settings(fetch_setting, class_setting):
 40 |     """Merge settings for ``fetch``, method params have priority."""
 41 |     if fetch_setting is None:
 42 |         return class_setting
 43 |     else:
 44 |         return fetch_setting
 45 | 
 46 | 
 47 | class Lassie(object):
 48 |     __attrs__ = [
 49 |         'open_graph', 'twitter_card', 'touch_icon', 'favicon',
 50 |         'canonical', 'all_images', 'parser', '_retrieve_content',
 51 |         'client'
 52 |     ]
 53 | 
 54 |     def __init__(self):
 55 |         """Instantiates an instance of Lassie."""
 56 |         self.open_graph = True
 57 |         self.twitter_card = True
 58 |         self.touch_icon = True
 59 |         self.favicon = True
 60 |         self.canonical = False
 61 |         self.all_images = False
 62 |         self.parser = 'html5lib'
 63 |         self.handle_file_content = False
 64 |         self.user_agent_set_manually = False
 65 |         self._request_opts = {}
 66 |         self.client = Session()
 67 | 
 68 |     @property
 69 |     def request_opts(self):
 70 |         return self._request_opts
 71 | 
 72 |     @request_opts.setter
 73 |     def request_opts(self, _dict):
 74 |         for k, v in _dict.items():
 75 |             if (k in REQUEST_OPTS['client'] or k in REQUEST_OPTS['request']):
 76 |                 self._request_opts[k] = v
 77 | 
 78 |             if k in REQUEST_OPTS['client']:
 79 |                 setattr(self.client, k, v)
 80 | 
 81 |         if not self.client.headers or not isinstance(self.client.headers, (dict, requests.structures.CaseInsensitiveDict)):
 82 |             self.client.headers = {}
 83 | 
 84 |         self.client.headers = requests.structures.CaseInsensitiveDict(self.client.headers)
 85 | 
 86 |         user_agent = self.client.headers.get('User-Agent')
 87 |         self.client.headers['User-Agent'] = determine_user_agent(user_agent)
 88 | 
 89 |         if user_agent != requests.utils.default_user_agent() and user_agent != FAKE_USER_AGENT:
 90 |             self.user_agent_set_manually = True
 91 |         else:
 92 |             self.user_agent_set_manually = False
 93 | 
 94 |     def __repr__(self):
 95 |         return '<Lassie [parser: %s]>' % (self.parser)
 96 | 
 97 |     def fetch(self, url, open_graph=None, twitter_card=None, touch_icon=None,
 98 |               favicon=None, all_images=None, parser=None, handle_file_content=None,
 99 |               canonical=None):
100 |         """Retrieves content from the specified url, parses it, and returns
101 |         a beautifully crafted dictionary of important information about that
102 |         web page.
103 | 
104 |         Priority tree is as follows:
105 |             1. OEmbed
106 |             2. Open Graph
107 |             3. Twitter Card
108 |             4. Other meta content (i.e. description, keywords)
109 | 
110 |         :param url: URL to send a GET request to
111 |         :param open_graph: (optional) If ``True``, filters web page content for Open Graph meta tags. The content of these properties have top priority on return values.
112 |         :type open_graph: bool
113 |         :param twitter_card: (optional) If ``True``, filters web page content for Twitter Card meta tags
114 |         :type twitter_card: bool
115 |         :param touch_icon: (optional) If ``True``, retrieves Apple touch icons and includes them in the response ``images`` array
116 |         :type touch_icon: bool
117 |         :param favicon: (optional) If ``True``, retrieves any favicon images and includes them in the response ``images`` array
118 |         :type favicon: bool
119 |         :param canonical: (optional) If ``True``, retrieves canonical url from meta tags. Default: False
120 |         :type canonical: bool
121 |         :param all_images: (optional) If ``True``, retrieves images inside web pages body and includes them in the response ``images`` array. Default: False
122 |         :type all_images: bool
123 |         :param parser: (optional) String reference for the parser that BeautifulSoup will use
124 |         :type parser: string
125 |         :param handle_file_content: (optional) If ``True``, lassie will return a generic response when a file is fetched. Default: False
126 |         :type handle_file_content: bool
127 | 
128 |         """
129 | 
130 |         # Set params, method params have priority over class params
131 |         open_graph = merge_settings(open_graph, self.open_graph)
132 |         twitter_card = merge_settings(twitter_card, self.twitter_card)
133 |         touch_icon = merge_settings(touch_icon, self.touch_icon)
134 |         favicon = merge_settings(favicon, self.favicon)
135 |         canonical = merge_settings(canonical, self.canonical)
136 |         all_images = merge_settings(all_images, self.all_images)
137 |         parser = merge_settings(parser, self.parser)
138 |         handle_file_content = merge_settings(handle_file_content, self.handle_file_content)
139 | 
140 |         data = {
141 |             'images': [],
142 |             'videos': [],
143 |         }
144 | 
145 |         has_file_content = False
146 |         content_type = None
147 |         if handle_file_content:
148 |             headers, status_code = self._retrieve_headers(url)
149 |             content_type = headers.get('Content-Type')
150 |             has_file_content = content_type and not 'text/html' in content_type
151 | 
152 |         if has_file_content and content_type:
153 |             has_image_content = content_type in IMAGE_MIMETYPES
154 |             if has_image_content:
155 |                 parsed_url = urlparse(url)
156 |                 data['title'] = basename(parsed_url.path.lstrip('/'))  # TODO: if the url doesn't have an extension, maybe we should match it up to the mimetype and append an ext?
157 |                 data['url'] = url
158 |                 data['images'].append({
159 |                     'type': 'body_image',
160 |                     'src': url,
161 |                 })
162 |         else:
163 |             try:
164 |                 oembed_data, status_code = self._retrieve_oembed_data(url)
165 |                 parse_oembed_data(oembed_data, data)
166 |             except LassieError:
167 |                 oembed_data = None
168 | 
169 |             html, status_code = self._retrieve_content(url)
170 | 
171 |             if not html and not oembed_data:
172 |                 raise LassieError('There was no content to parse.')
173 | 
174 |             if '<html' not in html:
175 |                 html = re.sub(r'(?:<!DOCTYPE(?:\s\w)?>(?:<head>)?)', '<!DOCTYPE html><html>', html)
176 | 
177 |             soup = BeautifulSoup(clean_text(html), parser)
178 | 
179 |             self._filter_amp_data(soup, data, url, all_images)
180 | 
181 |             if open_graph:
182 |                 self._filter_meta_data('open_graph', soup, data, url)
183 | 
184 |             if twitter_card:
185 |                 self._filter_meta_data('twitter_card', soup, data)
186 | 
187 |             self._filter_meta_data('generic', soup, data)
188 | 
189 |             if touch_icon:
190 |                 self._filter_link_tag_data('touch_icon', soup, data, url)
191 | 
192 |             if favicon:
193 |                 self._filter_link_tag_data('favicon', soup, data, url)
194 | 
195 |             if canonical:
196 |                 self._filter_link_tag_data('canonical', soup, data, url)
197 | 
198 |             if all_images:
199 |                 # Maybe filter out 1x1, no "good" way to do this if image doesn't supply
200 |                 # width/height.
201 |                 self._find_all_images(soup, data, url)
202 | 
203 |             # TODO: Find a good place for setting url, title and locale
204 |             if soup.html.get('lang'):
205 |                 lang = soup.html.get('lang')
206 |             else:
207 |                 lang = soup.html.get('xml:lang')
208 | 
209 |             if lang and ('locale' not in data):
210 |                 locale = normalize_locale(lang)
211 |                 if locale:
212 |                     data['locale'] = locale
213 | 
214 |             data_url = data.get('url')
215 |             if not data_url or (data_url in url and len(data_url) < len(url)):
216 |                 data['url'] = url
217 | 
218 |             if ('title' not in data or not data.get('title')) and hasattr(soup.title, 'string'):
219 |                 data['title'] = soup.title.string
220 | 
221 |             data['html'] = html
222 | 
223 |         data['status_code'] = status_code
224 | 
225 |         return data
226 | 
227 |     def _prepare_request(self, method, url, headers, **request_kwargs):
228 |         request = Request(method, url, headers=headers)
229 |         prepped = request.prepare()
230 | 
231 |         if not self.user_agent_set_manually:
232 |             prepped.headers['User-Agent'] = determine_user_agent(prepped.headers.get('User-Agent'))
233 | 
234 |         return prepped
235 | 
236 |     def _retrieve_oembed_data(self, url):  # pragma: no cover
237 |         try:
238 |             response = consumer.embed(url)
239 |             oembed_data = response.getData()
240 |             status_code = 200
241 |         except Exception as e:
242 |             raise LassieError(e)
243 | 
244 |         return oembed_data, status_code
245 | 
246 |     def _retrieve_headers(self, url):  # pragma: no cover
247 |         request_kwargs = self.merge_request_kwargs()
248 | 
249 |         try:
250 |             request = self._prepare_request(
251 |                 'HEAD', url, headers=self.client.headers, **request_kwargs
252 |             )
253 |             response = self.client.send(request, **request_kwargs)
254 |         except requests.exceptions.RequestException as e:
255 |             raise LassieError(e)
256 | 
257 |         return response.headers, response.status_code
258 | 
259 |     def _retrieve_content(self, url):  # pragma: no cover
260 |         request_kwargs = self.merge_request_kwargs()
261 | 
262 |         try:
263 |             request = self._prepare_request(
264 |                 'GET', url, headers=self.client.headers, **request_kwargs
265 |             )
266 |             response = self.client.send(request, **request_kwargs)
267 |         except requests.exceptions.RequestException as e:
268 |             raise LassieError(e)
269 | 
270 |         return response.text, response.status_code
271 | 
272 |     def merge_request_kwargs(self):
273 |         request_kwargs = {}
274 | 
275 |         for k, v in self._request_opts.items():
276 |             if k in REQUEST_OPTS['request']:
277 |                 # Set request specific kwarg
278 |                 request_kwargs[k] = v
279 | 
280 |         return request_kwargs
281 | 
282 |     def _filter_meta_data(self, source, soup, data, url=None):
283 |         """This method filters the web page content for meta tags that match patterns given in the ``FILTER_MAPS``
284 | 
285 |         :param source: The key of the meta dictionary in ``FILTER_MAPS['meta']``
286 |         :type source: string
287 |         :param soup: BeautifulSoup instance to find meta tags
288 |         :type soup: instance
289 |         :param data: The response dictionary to manipulate
290 |         :type data: (dict)
291 | 
292 |         """
293 |         meta = FILTER_MAPS['meta'][source]
294 |         meta_map = meta['map']
295 | 
296 |         html = soup.find_all('meta', {meta['key']: meta['pattern']})
297 | 
298 |         image = {}
299 |         video = {}
300 | 
301 |         for line in html:
302 |             prop = line.get(meta['key'])
303 |             value = line.get('content')
304 |             _prop = meta_map.get(prop)
305 | 
306 |             if prop in meta_map and _prop and not data.get(_prop):
307 |                 # this could be bad in cases where any values that the property
308 |                 # is mapped up to (i.e. "src", "type", etc) are found in ``data``
309 |                 # TODO: Figure out a smoother way to prevent conflicts ^^^^^^^^
310 |                 image_prop = meta['image_key']
311 |                 video_prop = meta['video_key']
312 | 
313 |                 if prop.startswith((image_prop, video_prop)) and \
314 |                 prop.endswith(('width', 'height')):
315 |                     if prop.endswith(('width', 'height')):
316 |                         value = convert_to_int(value)
317 | 
318 |                 if meta_map[prop] == 'locale':
319 |                     locale = normalize_locale(value)
320 |                     if locale:
321 |                         data['locale'] = locale
322 | 
323 |                 if prop == 'keywords':
324 |                     if isinstance(value, str):
325 |                         value = [v.strip() for v in value.split(',')]
326 |                     else:
327 |                         value = []
328 | 
329 |                 if image_prop and prop.startswith(image_prop) and value:
330 |                     # og:image URLs can be relative
331 |                     if prop == 'og:image' and url:
332 |                         value = urljoin(url, value)
333 |                     image[meta_map[prop]] = value
334 |                 elif video_prop and prop.startswith(video_prop) and value:
335 |                     video[meta_map[prop]] = value
336 |                 else:
337 |                     data[meta_map[prop]] = value
338 | 
339 |         if image:
340 |             image['type'] = image_prop
341 |             data['images'].append(image)
342 |         if video:
343 |             data['videos'].append(video)
344 | 
345 |     def _filter_link_tag_data(self, source, soup, data, url):
346 |         """This method filters the web page content for link tags that match patterns given in the ``FILTER_MAPS``
347 | 
348 |         :param source: The key of the meta dictionary in ``FILTER_MAPS['link']``
349 |         :type source: string
350 |         :param soup: BeautifulSoup instance to find meta tags
351 |         :type soup: instance
352 |         :param data: The response dictionary to manipulate
353 |         :type data: (dict)
354 |         :param url: URL used for making an absolute url
355 |         :type url: string
356 | 
357 |         """
358 |         link = FILTER_MAPS['link'][source]
359 | 
360 |         html = soup.find_all('link', {link['key']: link['pattern']})
361 | 
362 |         if link['type'] == 'url':
363 |             for line in html:
364 |                 data['url'] = line.get('href')
365 |         else:
366 |             for line in html:
367 |                 data['images'].append({
368 |                     'src': urljoin(url, line.get('href')),
369 |                     'type': link['type'],
370 |                 })
371 | 
372 |     def _filter_amp_data(self, soup, data, url, all_images):
373 |         amp_scripts = soup.find_all('script', {'type': 'application/ld+json'})
374 |         for script in amp_scripts:
375 |             content = script.contents
376 |             _json = None
377 |             try:
378 |                 _json = json.loads(content[0])
379 |             except (IndexError, ValueError):
380 |                 continue
381 | 
382 |             if _json:
383 |                 if isinstance(_json, list):
384 |                     try:
385 |                         # if the json is a list (see #46),
386 |                         # set _json to the first item which _should_ be an object
387 |                         _json = _json[0]
388 |                     except IndexError:  # pragma: no cover
389 |                         pass
390 | 
391 |                 if isinstance(_json, object):
392 |                     image = _json.get('image')
393 |                     if image:
394 |                         if isinstance(image, str):
395 |                             data['images'].append({
396 |                                 'src': urljoin(url, image),
397 |                             })
398 |                         elif isinstance(image, list) or isinstance(image, object):
399 |                             if isinstance(image, list):
400 |                                 image = image[0]
401 | 
402 |                             try:
403 |                                 image_list = image.get('@list')
404 |                             except AttributeError:
405 |                                 image_list = [image]
406 | 
407 |                             if image_list:
408 |                                 for _image in image_list:
409 |                                     if isinstance(_image, str):
410 |                                         data['images'].append({
411 |                                             'src': urljoin(url, _image),
412 |                                         })
413 |                                     elif isinstance(_image, object):
414 |                                         data['images'].append({
415 |                                             'src': urljoin(url, _image.get('url')),
416 |                                             'width': convert_to_int(_image.get('width')),
417 |                                             'height': convert_to_int(_image.get('height')),
418 |                                         })
419 |                             elif not image_list and image.get('url') and url != image.get('url'):
420 |                                 data['images'].append({
421 |                                     'src': urljoin(url, image.get('url')),
422 |                                     'width': convert_to_int(image.get('width')),
423 |                                     'height': convert_to_int(image.get('height')),
424 |                                 })
425 | 
426 |                     thumbnail_url = _json.get('thumbnailUrl')
427 |                     if thumbnail_url:
428 |                         data['images'].append({
429 |                             'src': urljoin(url, thumbnail_url),
430 |                         })
431 | 
432 |                     _type = _json.get('@type')
433 |                     if _type and _type == 'VideoObject':
434 |                         video_src = _json.get('embedUrl')
435 | 
436 |                         if video_src:
437 |                             data['videos'].append({
438 |                                 'src': video_src,
439 |                                 'width': convert_to_int(_json.get('width')),
440 |                                 'height': convert_to_int(_json.get('height')),
441 |                             })
442 | 
443 |                         thumbnail = _json.get('thumbnail')
444 |                         if thumbnail:
445 |                             if isinstance(thumbnail, str):
446 |                                 data['images'].append({
447 |                                     'src': urljoin(url, thumbnail),
448 |                                 })
449 |                             elif isinstance(thumbnail, object):
450 |                                 if thumbnail.get('@list'):
451 |                                     for _thumbnail in thumbnail.get('@list'):
452 |                                         data['images'].append({
453 |                                             'src': urljoin(url, _thumbnail.get('url')),
454 |                                             'width': convert_to_int(_thumbnail.get('width')),
455 |                                             'height': convert_to_int(_thumbnail.get('height')),
456 |                                         })
457 |                                 else:
458 |                                     data['images'].append({
459 |                                         'src': urljoin(url, thumbnail.get('url')),
460 |                                         'width': convert_to_int(thumbnail.get('width')),
461 |                                         'height': convert_to_int(thumbnail.get('height')),
462 |                                     })
463 | 
464 |                     data['title'] = _json.get('headline', '')
465 |                     data['url'] = _json.get('url', '')
466 |                     data['description'] = _json.get('description', '')
467 | 
468 |         if all_images:
469 |             amp_imgs = soup.find_all('amp-img')
470 |             for image in amp_imgs:
471 |                 item = normalize_image_data(image, url)
472 | 
473 |                 data['images'].append(item)
474 | 
475 |     def _find_all_images(self, soup, data, url):
476 |         """This method finds all images in the web page content
477 | 
478 |         :param soup: BeautifulSoup instance to find meta tags
479 |         :type soup: instance
480 |         :param data: The response dictionary to manipulate
481 |         :type data: (dict)
482 | 
483 |         """
484 |         all_images = soup.find_all('img')
485 |         for image in all_images:
486 |             item = normalize_image_data(image, url)
487 | 
488 |             data['images'].append(item)
489 | 


--------------------------------------------------------------------------------
/lassie/exceptions.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | lassie.exceptions
 5 | ~~~~~~~~~~~~~~~~~
 6 | 
 7 | This module contains the set of Lassie exceptions.
 8 | 
 9 | """
10 | 
11 | class LassieError(Exception):
12 |     """Generic catch-all Exceptions"""
13 |     pass
14 | 


--------------------------------------------------------------------------------
/lassie/filters/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | lassie.filters
 5 | ~~~~~~~~~~~~~~
 6 | 
 7 | This package contains various filters for parsing content.
 8 | 
 9 | """
10 | 
11 | from .apple import APPLE_MAPS
12 | from .generic import GENERIC_MAPS
13 | from .social import SOCIAL_MAPS
14 | 
15 | META_MAPS = dict(list(GENERIC_MAPS['meta'].items()) + list(SOCIAL_MAPS['meta'].items()))
16 | LINK_MAPS = dict(list(APPLE_MAPS['link'].items()) + list(GENERIC_MAPS['link'].items()))
17 | 
18 | FILTER_MAPS = {
19 |     'meta': META_MAPS,
20 |     'link': LINK_MAPS,
21 | }
22 | 


--------------------------------------------------------------------------------
/lassie/filters/apple.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | lassie.filters.apple
 5 | ~~~~~~~~~~~~~~~~~~~~
 6 | 
 7 | This module contains Apple related content to help Lassie filter for content.
 8 | 
 9 | """
10 | 
11 | import re
12 | 
13 | from ..compat import str
14 | 
15 | APPLE_MAPS = {  # http://i.imgur.com/cla85xT.jpg
16 |     'link': {
17 |         'touch_icon': {
18 |             'pattern': re.compile(r"^(apple-touch-icon|apple-touch-icon-precomposed)", re.I),
19 |             'key': 'icon',
20 |             'type': str('touch_icon'),
21 |         },
22 |     }
23 | }
24 | 


--------------------------------------------------------------------------------
/lassie/filters/generic.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | lassie.filters.generic
 5 | ~~~~~~~~~~~~~~~~~~~~~~
 6 | 
 7 | This module contains data about generic type content to help Lassie filter for content.
 8 | 
 9 | """
10 | 
11 | import re
12 | 
13 | from ..compat import str
14 | 
15 | GENERIC_MAPS = {
16 |     'meta': {
17 |         'generic': {
18 |             'pattern': re.compile(r"^(description|keywords|title)", re.I),
19 |             'map': {
20 |                 'description': 'description',
21 |                 'keywords': 'keywords',
22 |                 'title': 'title',
23 |             },
24 |             'image_key': '',
25 |             'video_key': '',
26 |             'key': 'name',
27 |         },
28 |     },
29 |     'link': {
30 |         'favicon': {
31 |             'pattern': 'icon',
32 |             'key': 'rel',
33 |             'type': str('favicon'),
34 |         },
35 |         'canonical': {
36 |             'pattern': 'canonical',
37 |             'key': 'rel',
38 |             'type': 'url'
39 |         }
40 |     },
41 | }
42 | 


--------------------------------------------------------------------------------
/lassie/filters/oembed/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelhelmick/lassie/1122c719a68c20b847c1963719070e10a3d253dd/lassie/filters/oembed/__init__.py


--------------------------------------------------------------------------------
/lassie/filters/oembed/providers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | lassie.filters.providers
 5 | ~~~~~~~~~~
 6 | 
 7 | This module contains oembed providers and a python oembed consumer.
 8 | 
 9 | """
10 | 
11 | import re
12 | 
13 | import oembed
14 | 
15 | from ...utils import convert_to_int
16 | 
17 | HYPERLINK_PATTERN = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
18 | 
19 | PROVIDERS = {
20 |     'http://www.youtube.com/oembed': [
21 |         'https?://*.youtube.com/watch*',
22 |         'https?://*.youtube.com/v/*',
23 |         'https?://youtu.be/*',
24 |         'https?://*.youtube.com/user/*',
25 |         'https?://*.youtube.com/*#*/*',
26 |         'https?://m.youtube.com/index*',
27 |         'https?://*.youtube.com/profile*',
28 |         'https?://*.youtube.com/view_play_list*',
29 |         'https?://*.youtube.com/playlist*'
30 |     ]
31 | }
32 | 
33 | consumer = oembed.OEmbedConsumer()
34 | for k, v in PROVIDERS.items():
35 |     endpoint = oembed.OEmbedEndpoint(k, v)
36 |     consumer.addEndpoint(endpoint)
37 | 
38 | 
39 | def parse_oembed_data(oembed_data, data):
40 |     """Parse OEmbed response data to inject into lassie's response dict.
41 | 
42 |     :param oembed_data: OEmbed response data.
43 |     :type oembed_data: dict
44 |     :param data: Refrence to data variable being updated.
45 |     :type data: dict
46 | 
47 |     """
48 |     data.update({
49 |         'oembed': oembed_data,
50 |     })
51 |     _type = oembed_data.get('type')
52 |     provider_name = oembed_data.get('provider_name')
53 |     if not _type:
54 |         return data
55 | 
56 |     if oembed_data.get('title'):
57 |         data.update({
58 |             'title': oembed_data.get('title'),
59 |         })
60 | 
61 |     if _type == 'video':
62 |         try:
63 |             item = {
64 |                 'width': convert_to_int(oembed_data.get('width')),
65 |                 'height': convert_to_int(oembed_data.get('height'))
66 |             }
67 |             if provider_name in ['YouTube', ]:
68 |                 item['src'] = HYPERLINK_PATTERN.search(oembed_data.get('html')).group(0)
69 | 
70 |             data['videos'].append(item)
71 |         except Exception:
72 |             pass
73 | 
74 |         if oembed_data.get('thumbnail_url'):
75 |             item = {
76 |                 'width': convert_to_int(oembed_data.get('thumbnail_width')),
77 |                 'height': convert_to_int(oembed_data.get('thumbnail_height')),
78 |                 'src': oembed_data.get('thumbnail_url')
79 |             }
80 | 
81 |             data['images'].append(item)
82 | 
83 |     return data
84 | 


--------------------------------------------------------------------------------
/lassie/filters/social.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | lassie.filters.social
 5 | ~~~~~~~~~~~~~~~~~~~~~
 6 | 
 7 | This module contains data social related content to help Lassie filter for content.
 8 | 
 9 | """
10 | 
11 | import re
12 | 
13 | from ..compat import str
14 | 
15 | SOCIAL_MAPS = {
16 |     'meta': {
17 |         'open_graph': {  # http://ogp.me/
18 |             'pattern': re.compile(r"^og:", re.I),
19 |             'map': {
20 |                 'og:url': 'url',
21 |                 'og:title': 'title',
22 |                 'og:description': 'description',
23 |                 'og:locale': 'locale',
24 |                 'og:site_name': 'site_name',
25 | 
26 |                 'og:image': 'src',
27 |                 'og:image:url': 'src',
28 |                 'og:image:secure_url': 'secure_src',
29 |                 'og:image:width': 'width',
30 |                 'og:image:height': 'height',
31 |                 'og:image:type': 'type',
32 | 
33 |                 'og:video': 'src',
34 |                 'og:video:url': 'src',
35 |                 'og:video:secure_url': 'secure_src',
36 |                 'og:video:width': 'width',
37 |                 'og:video:height': 'height',
38 |                 'og:video:type': 'type',
39 |             },
40 |             'image_key': str('og:image'),
41 |             'video_key': str('og:video'),
42 |             'key': 'property',
43 |         },
44 |         'twitter_card': {  # https://dev.twitter.com/docs/cards
45 |             'pattern': re.compile(r"^twitter:", re.I),
46 |             'map': {
47 |                 'twitter:url': 'url',
48 |                 'twitter:title': 'title',
49 |                 'twitter:description': 'description',
50 | 
51 |                 'twitter:image': 'src',
52 |                 'twitter:image:width': 'width',
53 |                 'twitter:image:height': 'height',
54 | 
55 |                 'twitter:player': 'src',
56 |                 'twitter:player:width': 'width',
57 |                 'twitter:player:height': 'height',
58 |                 'twitter:player:content_type': 'type',
59 |             },
60 |             'image_key': str('twitter:image'),
61 |             'video_key': str('twitter:player'),
62 |             'key': 'name',
63 |         },
64 |     }
65 | }
66 | 


--------------------------------------------------------------------------------
/lassie/utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | lassie.helpers
 5 | ~~~~~~~~~~~~~~
 6 | 
 7 | This module contains the set of helper functions executed by Lassie methods.
 8 | 
 9 | """
10 | 
11 | import locale
12 | import re
13 | 
14 | from requests.utils import default_user_agent
15 | 
16 | from .compat import str, urljoin
17 | 
18 | CLEANER = re.compile(r'[\r\n\t]')
19 | RE_INT = re.compile(r'\d+')
20 | FAKE_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.20 (KHTML, like Gecko) Version/10.1 Safari/603.1.20'
21 | 
22 | def clean_text(value):
23 |     """Removes all line breaks, new lines and tabs from the specified content
24 | 
25 |     :param value: Content to be cleansed
26 |     :type value: string
27 | 
28 |     """
29 |     return CLEANER.sub('', value)
30 | 
31 | def convert_to_int(value):
32 |     """Attempts to convert a specified value to an integer
33 | 
34 |     :param value: Content to be converted into an integer
35 |     :type value: string or int
36 | 
37 |     """
38 |     if not value:
39 |         return None
40 | 
41 |     # Apart from numbers also accept values that end with px
42 |     if isinstance(value, str):
43 |         value = value.strip(' px')
44 | 
45 |     try:
46 |         return int(value)
47 |     except (TypeError, ValueError):
48 |         return None
49 | 
50 | def normalize_locale(value):
51 |     value = value.replace('-', '_')
52 |     the_locale = locale.normalize(value)
53 | 
54 |     if the_locale != value:
55 |         # Should we return the actual locale, returned from the locale lib instead of splitting?
56 |         try:
57 |             return str(the_locale.split('.')[0])
58 |         except IndexError:  # pragma: no cover
59 |             pass
60 |     return None
61 | 
62 | def normalize_image_data(data, url):
63 |     # Create image list then remove duplicate images?
64 |     img = {
65 |         'src': urljoin(url, data.get('src')),
66 |         'alt': data.get('alt', ''),
67 |         'type': u'body_image',
68 |     }
69 | 
70 |     # Only include width and height if included as an attribute of the element
71 |     width = convert_to_int(data.get('width'))
72 |     if width:
73 |         img['width'] = width
74 | 
75 |     height = convert_to_int(data.get('height'))
76 |     if height:
77 |         img['height'] = height
78 | 
79 |     return img
80 | 
81 | def determine_user_agent(user_agent):
82 |     if not user_agent or  user_agent == default_user_agent():
83 |         return FAKE_USER_AGENT
84 | 
85 |     return user_agent
86 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel"]  # PEP 508 specifications.
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests>=2.18.4,<3.0.0
2 | beautifulsoup4>=4.9.0,<4.10.0
3 | html5lib==1.0b10
4 | python-oembed
5 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | import sys
 5 | 
 6 | try:
 7 |     from setuptools import setup
 8 | except ImportError:
 9 |     from distutils.core import setup
10 | 
11 | __version__ = '0.11.11'
12 | 
13 | packages = [
14 |     'lassie',
15 |     'lassie.filters',
16 |     'lassie.filters.oembed'
17 | ]
18 | 
19 | setup(
20 |     name='lassie',
21 |     version=__version__,
22 |     install_requires=open("requirements.txt").read().split("\n"),
23 |     author='Mike Helmick',
24 |     license=open('LICENSE').read(),
25 |     url='https://github.com/michaelhelmick/lassie/tree/master',
26 |     keywords='lassie open graph web content scrape scraper',
27 |     description='Lassie is a Python library for retrieving content from websites and being returned in a pretty format.',
28 |     include_package_data=True,
29 |     packages=packages,
30 |     classifiers=[
31 |         'Development Status :: 4 - Beta',
32 |         'Intended Audience :: Developers',
33 |         'License :: OSI Approved :: MIT License',
34 |         'Topic :: Software Development :: Libraries :: Python Modules',
35 |         'Topic :: Internet',
36 |         'Natural Language :: English',
37 |         'Programming Language :: Python',
38 |         'Programming Language :: Python :: 2.7',
39 |         'Programming Language :: Python :: 3',
40 |         'Programming Language :: Python :: 3.8',
41 |     ]
42 | )
43 | 


--------------------------------------------------------------------------------
/test_requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements.txt
2 | python-coveralls==2.1.0
3 | nose-cov==1.6
4 | mock==1.0.1
5 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelhelmick/lassie/1122c719a68c20b847c1963719070e10a3d253dd/tests/__init__.py


--------------------------------------------------------------------------------
/tests/base.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import mimetypes
 3 | import unittest
 4 | 
 5 | from mock import patch
 6 | 
 7 | from lassie.compat import urlparse
 8 | from lassie.core import Lassie
 9 | from lassie.exceptions import LassieError
10 | 
11 | 
12 | def _mock_retrieve_oembed_data(mock, url):
13 |     if '.json' not in url:
14 |         return {}, 404
15 | 
16 |     try:
17 |         filename = urlparse(url).path
18 |         _file = open('./json%s' % filename, 'r')
19 |         content = _file.read()
20 |         _file.close()
21 | 
22 |         status_code = 200
23 |     except Exception as e:
24 |         raise LassieError(e)
25 | 
26 |     return json.loads(content), status_code
27 | 
28 | 
29 | def _mock_retrieve_content(mock, url):
30 |     if '.html' not in url:
31 |         filename = '/generic/all_properties.html'
32 |     else:
33 |         filename = urlparse(url).path
34 | 
35 |     _file = open('./templates%s' % filename, 'r')
36 |     content = _file.read()
37 |     _file.close()
38 | 
39 |     status_code = 200
40 | 
41 |     return content, status_code
42 | 
43 | 
44 | def _mock_retrieve_headers(mock, url):
45 |     filename = urlparse(url).path
46 | 
47 |     headers = {
48 |         'Content-Type': mimetypes.guess_type(filename)[0] or 'application/octet-stream'
49 |     }
50 | 
51 |     status_code = 200
52 | 
53 |     return headers, status_code
54 | 
55 | 
56 | class LassieBaseTestCase(unittest.TestCase):
57 |     def setUp(self):
58 |         self.patch = patch.object(Lassie, '_retrieve_content', _mock_retrieve_content)
59 |         self.patch2 = patch.object(Lassie, '_retrieve_headers', _mock_retrieve_headers)
60 |         self.patch3 = patch.object(Lassie, '_retrieve_oembed_data', _mock_retrieve_oembed_data)
61 | 
62 |         self.patch.start()
63 |         self.patch2.start()
64 |         self.patch3.start()
65 | 
66 |     def tearDown(self):
67 |         self.patch.stop()
68 |         self.patch2.stop()
69 |         self.patch3.stop()
70 | 


--------------------------------------------------------------------------------
/tests/json/youtube/bad_html.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "author_name": "BadAuthor",
 3 |   "author_url": "https://www.youtube.com/user/BadAuthor",
 4 |   "height": 270,
 5 |   "html": "",
 6 |   "provider_name": "YouTube",
 7 |   "provider_url": "https://www.youtube.com/",
 8 |   "thumbnail_height": 360,
 9 |   "thumbnail_url": "https://i.ytimg.com/vi/nothumb/hqdefault.jpg",
10 |   "thumbnail_width": 480,
11 |   "title": "Bad Author",
12 |   "type": "video",
13 |   "version": "1.0",
14 |   "width": 480
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/json/youtube/good.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "author_name": "Himself0890",
 3 |   "author_url": "https://www.youtube.com/user/Himself0890",
 4 |   "height": 270,
 5 |   "html": "<iframe width=\"480\" height=\"270\" src=\"https://www.youtube.com/embed/lVhOLT2xQAc?feature=oembed\" frameborder=\"0\" gesture=\"media\" allowfullscreen></iframe>",
 6 |   "provider_name": "YouTube",
 7 |   "provider_url": "https://www.youtube.com/",
 8 |   "thumbnail_height": 360,
 9 |   "thumbnail_url": "https://i.ytimg.com/vi/lVhOLT2xQAc/hqdefault.jpg",
10 |   "thumbnail_width": 480,
11 |   "title": "Man vs Thunderjaw",
12 |   "type": "video",
13 |   "version": "1.0",
14 |   "width": 480
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/json/youtube/no_thumb.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "author_name": "NoThumb",
 3 |   "author_url": "https://www.youtube.com/user/NoThumb",
 4 |   "height": 270,
 5 |   "html": "<iframe width=\"480\" height=\"270\" src=\"https://www.youtube.com/embed/lVhOLT2xQAc?feature=oembed\" frameborder=\"0\" gesture=\"media\" allowfullscreen></iframe>",
 6 |   "provider_name": "YouTube",
 7 |   "provider_url": "https://www.youtube.com/",
 8 |   "title": "Bad Type",
 9 |   "type": "video",
10 |   "version": "1.0",
11 |   "width": 480
12 | }
13 | 


--------------------------------------------------------------------------------
/tests/json/youtube/no_type.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "author_name": "BadType",
 3 |   "author_url": "https://www.youtube.com/user/BadType",
 4 |   "height": 270,
 5 |   "html": "",
 6 |   "provider_name": "YouTube",
 7 |   "provider_url": "https://www.youtube.com/",
 8 |   "thumbnail_height": 360,
 9 |   "thumbnail_url": "https://i.ytimg.com/vi/nothumb/hqdefault.jpg",
10 |   "thumbnail_width": 480,
11 |   "title": "Bad Type",
12 |   "type": "",
13 |   "version": "1.0",
14 |   "width": 480
15 | }
16 | 


--------------------------------------------------------------------------------
/tests/oembed/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelhelmick/lassie/1122c719a68c20b847c1963719070e10a3d253dd/tests/oembed/__init__.py


--------------------------------------------------------------------------------
/tests/oembed/test_youtube.py:
--------------------------------------------------------------------------------
 1 | from lassie import Lassie
 2 | 
 3 | from ..base import LassieBaseTestCase
 4 | 
 5 | 
 6 | class LassieOEmbedYouTubeTestCase(LassieBaseTestCase):
 7 |     def test_youtube_good(self):
 8 |         url = 'http://lassie.it/youtube/good.json'
 9 | 
10 |         l = Lassie()
11 |         data = l.fetch(url)
12 | 
13 |         self.assertEqual(len(data['videos']), 1)
14 |         self.assertEqual(len(data['images']), 1)
15 | 
16 |     def test_bad_url(self):
17 |         url = 'http://lassie.it/youtube/bad_url_123456.json'
18 | 
19 |         l = Lassie()
20 |         data = l.fetch(url)
21 | 
22 |         self.assertIsNone(data.get('oembed'))
23 | 
24 |     def test_youtube_bad_html(self):
25 |         url = 'http://lassie.it/youtube/bad_html.json'
26 | 
27 |         l = Lassie()
28 |         data = l.fetch(url)
29 | 
30 |     def test_youtube_no_type(self):
31 |         url = 'http://lassie.it/youtube/no_type.json'
32 | 
33 |         l = Lassie()
34 |         data = l.fetch(url)
35 | 
36 |     def test_youtube_no_thumb(self):
37 |         url = 'http://lassie.it/youtube/no_thumb.json'
38 | 
39 |         l = Lassie()
40 |         data = l.fetch(url)
41 | 
42 |         self.assertEqual(len(data['videos']), 1)
43 |         self.assertEqual(len(data['images']), 0)
44 | 


--------------------------------------------------------------------------------
/tests/templates/amp/bad_json.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html amp>
  3 | 
  4 | <head>
  5 |     <meta charset="utf-8">
  6 |     <title>Google Glass Is Dead, Long Live Snapchat Spectacles</title>
  7 |     <link rel="canonical" href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869">
  8 |     <meta name="viewport" content="width=device-width,minimum-scale=1,initial-scale=1" />
  9 |     <style amp-custom>
 10 | 
 11 |     </style>
 12 |     <script type="application/ld+json">
 13 |         {
 14 |             "@context" "http://schema.org",
 15 |             "@type": "NewsArticle",
 16 |             "url": "http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869",
 17 |             "headline": "Google Glass Is Dead, Long Live Snapchat Spectacles",
 18 |             "author": {
 19 |                 "@type": "Person",
 20 |                 "name": "Rhett Jones"
 21 |             },
 22 |             "datePublished": "2016-09-24T09:10:00-04:00",
 23 |             "dateModified": "2016-09-24T10:50:48-04:00",
 24 |             "publisher": {
 25 |                 "@type": "Organization",
 26 |                 "name": "gizmodo.com",
 27 |                 "logo": {
 28 |                     "@type": "ImageObject",
 29 |                     "url": "https://i.kinja-img.com/gawker-media/image/upload/s--ay4UlTaU--/wvhsuflzmeoo0zr9ex55.png"
 30 |                 }
 31 |             },
 32 |             "mainEntityOfPage": true,
 33 |             "image": {
 34 |                 "@type": "ImageObject",
 35 |                 "url": "https://i.kinja-img.com/gawker-media/image/upload/kjqja8ibxoqy50nebzl8.png",
 36 |                 "width": "636",
 37 |                 "height": "357"
 38 |             }
 39 |         }
 40 |     </script>
 41 |     <style amp-boilerplate>
 42 |         body {
 43 |             -webkit-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 44 |             -moz-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 45 |             -ms-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 46 |             animation: -amp-start 8s steps(1, end) 0s 1 normal both
 47 |         }
 48 | 
 49 |         @-webkit-keyframes -amp-start {
 50 |             from {
 51 |                 visibility: hidden
 52 |             }
 53 |             to {
 54 |                 visibility: visible
 55 |             }
 56 |         }
 57 | 
 58 |         @-moz-keyframes -amp-start {
 59 |             from {
 60 |                 visibility: hidden
 61 |             }
 62 |             to {
 63 |                 visibility: visible
 64 |             }
 65 |         }
 66 | 
 67 |         @-ms-keyframes -amp-start {
 68 |             from {
 69 |                 visibility: hidden
 70 |             }
 71 |             to {
 72 |                 visibility: visible
 73 |             }
 74 |         }
 75 | 
 76 |         @-o-keyframes -amp-start {
 77 |             from {
 78 |                 visibility: hidden
 79 |             }
 80 |             to {
 81 |                 visibility: visible
 82 |             }
 83 |         }
 84 | 
 85 |         @keyframes -amp-start {
 86 |             from {
 87 |                 visibility: hidden
 88 |             }
 89 |             to {
 90 |                 visibility: visible
 91 |             }
 92 |         }
 93 |     </style><noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>
 94 |     <script async src="https://cdn.ampproject.org/v0.js"></script>
 95 |     <script async custom-element="amp-analytics" src="https://cdn.ampproject.org/v0/amp-analytics-0.1.js"></script>
 96 |     <script async custom-element="amp-iframe" src="https://cdn.ampproject.org/v0/amp-iframe-0.1.js"></script>
 97 |     <script async custom-element="amp-anim" src="https://cdn.ampproject.org/v0/amp-anim-0.1.js"></script>
 98 |     <script async custom-element="amp-youtube" src="https://cdn.ampproject.org/v0/amp-youtube-0.1.js"></script>
 99 |     <script async custom-element="amp-facebook" src="https://cdn.ampproject.org/v0/amp-facebook-0.1.js"></script>
100 |     <script async custom-element="amp-vine" src="https://cdn.ampproject.org/v0/amp-vine-0.1.js"></script>
101 |     <script async custom-element="amp-instagram" src="https://cdn.ampproject.org/v0/amp-instagram-0.1.js"></script>
102 |     <script async custom-element="amp-vimeo" src="https://cdn.ampproject.org/v0/amp-vimeo-0.1.js"></script>
103 |     <script async custom-element="amp-twitter" src="https://cdn.ampproject.org/v0/amp-twitter-0.1.js"></script>
104 | </head>
105 | 
106 | <body class="gizmodo blog-group-gizmodo">
107 | </body>
108 | 
109 | </html>
110 | 


--------------------------------------------------------------------------------
/tests/templates/amp/list_image_list_str.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html amp>
  3 | 
  4 | <head>
  5 |     <meta charset="utf-8">
  6 |     <title>Google Glass Is Dead, Long Live Snapchat Spectacles</title>
  7 |     <link rel="canonical" href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869">
  8 |     <meta name="viewport" content="width=device-width,minimum-scale=1,initial-scale=1" />
  9 |     <style amp-custom>
 10 | 
 11 |     </style>
 12 |     <script type="application/ld+json">
 13 |         {
 14 |             "@context": "http://schema.org",
 15 |             "@type": "NewsArticle",
 16 |             "url": "http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869",
 17 |             "headline": "Google Glass Is Dead, Long Live Snapchat Spectacles",
 18 |             "author": {
 19 |                 "@type": "Person",
 20 |                 "name": "Rhett Jones"
 21 |             },
 22 |             "datePublished": "2016-09-24T09:10:00-04:00",
 23 |             "dateModified": "2016-09-24T10:50:48-04:00",
 24 |             "publisher": {
 25 |                 "@type": "Organization",
 26 |                 "name": "gizmodo.com",
 27 |                 "logo": {
 28 |                     "@type": "ImageObject",
 29 |                     "url": "https://i.kinja-img.com/gawker-media/image/upload/s--ay4UlTaU--/wvhsuflzmeoo0zr9ex55.png"
 30 |                 }
 31 |             },
 32 |             "mainEntityOfPage": true,
 33 |             "image": ["https://i.kinja-img.com/gawker-media/image/upload/s--B9R9piEg--/c_scale,f_auto,fl_progressive,q_80,w_800/yp0cg39t2xpuovogfvy3.jpg"]
 34 |         }
 35 |     </script>
 36 |     <style amp-boilerplate>
 37 |         body {
 38 |             -webkit-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 39 |             -moz-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 40 |             -ms-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 41 |             animation: -amp-start 8s steps(1, end) 0s 1 normal both
 42 |         }
 43 | 
 44 |         @-webkit-keyframes -amp-start {
 45 |             from {
 46 |                 visibility: hidden
 47 |             }
 48 |             to {
 49 |                 visibility: visible
 50 |             }
 51 |         }
 52 | 
 53 |         @-moz-keyframes -amp-start {
 54 |             from {
 55 |                 visibility: hidden
 56 |             }
 57 |             to {
 58 |                 visibility: visible
 59 |             }
 60 |         }
 61 | 
 62 |         @-ms-keyframes -amp-start {
 63 |             from {
 64 |                 visibility: hidden
 65 |             }
 66 |             to {
 67 |                 visibility: visible
 68 |             }
 69 |         }
 70 | 
 71 |         @-o-keyframes -amp-start {
 72 |             from {
 73 |                 visibility: hidden
 74 |             }
 75 |             to {
 76 |                 visibility: visible
 77 |             }
 78 |         }
 79 | 
 80 |         @keyframes -amp-start {
 81 |             from {
 82 |                 visibility: hidden
 83 |             }
 84 |             to {
 85 |                 visibility: visible
 86 |             }
 87 |         }
 88 |     </style><noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>
 89 |     <script async src="https://cdn.ampproject.org/v0.js"></script>
 90 |     <script async custom-element="amp-analytics" src="https://cdn.ampproject.org/v0/amp-analytics-0.1.js"></script>
 91 |     <script async custom-element="amp-iframe" src="https://cdn.ampproject.org/v0/amp-iframe-0.1.js"></script>
 92 |     <script async custom-element="amp-anim" src="https://cdn.ampproject.org/v0/amp-anim-0.1.js"></script>
 93 |     <script async custom-element="amp-youtube" src="https://cdn.ampproject.org/v0/amp-youtube-0.1.js"></script>
 94 |     <script async custom-element="amp-facebook" src="https://cdn.ampproject.org/v0/amp-facebook-0.1.js"></script>
 95 |     <script async custom-element="amp-vine" src="https://cdn.ampproject.org/v0/amp-vine-0.1.js"></script>
 96 |     <script async custom-element="amp-instagram" src="https://cdn.ampproject.org/v0/amp-instagram-0.1.js"></script>
 97 |     <script async custom-element="amp-vimeo" src="https://cdn.ampproject.org/v0/amp-vimeo-0.1.js"></script>
 98 |     <script async custom-element="amp-twitter" src="https://cdn.ampproject.org/v0/amp-twitter-0.1.js"></script>
 99 | </head>
100 | 
101 | <body class="gizmodo blog-group-gizmodo">
102 |     <div class="container">
103 |         <amp-analytics type="googleanalytics" id="ga-domain"></amp-analytics>
104 |         </amp-analytics>
105 |     </div>
106 |     <div class="nav__wrap">
107 |         <!-- for affixing to top -->
108 |         <nav class="nav">
109 |             <a class="nav__menu" tabindex="0">
110 |                 <div class="burger"><span></span><span></span><span></span></div>
111 |             </a>
112 |             <div class="nav__inner"><a href="//gizmodo.com" class="logo" title="Gizmodo" data-ga="[Kinja Menu, Logo/Blog Title Click, js_pageType]"><svg class="gmg-logo gmg-logo--gizmodo" width="218" height="32" viewBox="0 0 218 32"><path d="M35.726 28.18V3.8c0-1.267.282-2.217.847-2.85.564-.633 1.294-.95 2.19-.95.92 0 1.668.314 2.24.94.57.626.857 1.58.857 2.86v24.38c0 1.28-.286 2.236-.857 2.87-.572.633-1.32.95-2.24.95-.882 0-1.61-.32-2.18-.96-.57-.64-.857-1.595-.857-2.86zM202.37 32c-8.62 0-15.63-7.174-15.63-15.99 0-8.817 7.01-15.99 15.63-15.99C210.99.02 218 7.193 218 16.01c0 8.816-7.01 15.99-15.63 15.99zm0-25.507c-5.13 0-9.304 4.27-9.304 9.517s4.174 9.517 9.304 9.517 9.302-4.27 9.302-9.517-4.173-9.517-9.302-9.517zM132.2 32c-8.618 0-15.63-7.174-15.63-15.99 0-8.817 7.012-15.99 15.63-15.99 8.62 0 15.632 7.173 15.632 15.99 0 8.816-7.012 15.99-15.63 15.99zm0-25.507c-5.128 0-9.302 4.27-9.302 9.517s4.174 9.517 9.303 9.517c5.13 0 9.304-4.27 9.304-9.517s-4.173-9.517-9.303-9.517zM181.73 16v-.004c0-3.53-1.126-6.793-3.024-9.44-2.76-3.852-7.162-6.394-12.142-6.544-.154-.005-.31-.012-.465-.012l-7.85.01c-1.34 0-2.31.312-2.91.937-.6.624-.9 1.633-.9 3.027v24.052c0 1.394.3 2.402.9 3.027.6.625 1.57.937 2.91.937l7.85.01c.155 0 .31-.008.464-.012 4.98-.15 9.38-2.692 12.142-6.543 1.898-2.648 3.023-5.912 3.025-9.44V16zm-14.522 9.45c-.757.05-1.57.058-2.446.058h-4.103V6.492h4.102c.875 0 1.69.01 2.446.06 2.713.33 5.07 1.86 6.55 4.056 1.032 1.53 1.64 3.38 1.644 5.373v.039c-.004 1.993-.612 3.843-1.643 5.373-1.482 2.197-3.84 3.724-6.552 4.056zM107.718.14c-1.142-.363-2.376.097-3.04 1.133L95.25 15.962 85.83 1.275C85.163.24 83.93-.22 82.786.142c-1.142.364-1.92 1.465-1.92 2.713v26.102c0 1.566 1.213 2.836 2.71 2.836 1.497 0 2.71-1.27 2.71-2.836V12.17l6.693 10.427.013.02.008.014c.01.013.02.025.027.038.042.062.085.124.132.184.02.026.042.05.063.075.037.043.074.088.114.13l.112.108.082.077c.054.046.108.087.164.128.016.01.03.024.046.035l.007.005c.06.04.12.08.184.116.017.01.034.022.05.03.048.028.097.05.146.073.274.13.562.212.85.242h.007c.057.007.115.008.17.01.033.002.065.004.095.004.033 0 .064-.002.095-.004.058-.002.115-.003.172-.01h.005c.29-.03.577-.112.85-.243.05-.025.1-.046.145-.073l.053-.03c.063-.038.124-.076.184-.117l.007-.005c.016-.01.03-.024.046-.035.056-.04.11-.082.164-.128.03-.025.056-.052.085-.08l.11-.105c.04-.042.076-.087.113-.13.022-.025.044-.05.064-.075.047-.06.09-.122.132-.184.007-.013.017-.025.025-.037l.008-.012.014-.02 6.69-10.428v16.787c0 1.566 1.214 2.836 2.713 2.836 1.496 0 2.71-1.27 2.71-2.836V2.855c0-1.248-.78-2.35-1.92-2.713zM71.64 31.027h-20.27c-1.066 0-2.25-.682-2.69-1.744-.438-1.06-.528-2.505.416-3.656l16.58-19.68H51.976c-1.497 0-2.71-1.327-2.71-2.964S50.478.346 51.975.346h18.47c1.068 0 2.17.174 3.136 1.255.88.988.842 3.12-.145 4.392L57.417 25.1h14.22c1.498 0 2.59 1.327 2.59 2.963 0 1.637-1.092 2.964-2.59 2.964zM15.517 32c-4.498 0-8.577-1.824-11.486-5.137C1.47 23.945 0 20.1 0 16.313 0 4.428 9.656 0 15.743 0c5.985 0 10.378 3.003 10.562 3.13 1.348.935 1.71 2.826.806 4.223-.9 1.392-2.716 1.768-4.063.843-.078-.052-3.195-2.108-7.304-2.108-3.815 0-9.076 2.576-9.076 10.224 0 4.35 3.163 9.483 8.964 9.483 3.265 0 5.56-.89 6.893-1.758v-4.525h-4.52c-1.622 0-2.937-1.363-2.937-3.044 0-1.68 1.315-3.044 2.938-3.044h7.457c1.624 0 2.94 1.363 2.94 3.044v9.053c0 .825-.323 1.612-.893 2.186-1.97 1.98-6.29 4.294-11.99 4.294z" fill="#000" fill-rule="evenodd"/></svg></a></div>
113 |             <div class="overlay"></div><button class="gmg-menu" tabindex="0"><div class="gmg-menu__inner"><svg id="svggroup--gmgavatars" class="hide"><symbol id="gmgavatar-default" viewBox="0 0 16 16"><path d="M3.11 7.522s-.026-.07-.017-.085c.282-.483 1.8-3.095 1.977-3.397.328-.563.7-1.036 1.554-1.04C7.9 2.998 13 3.006 13 3.006s-1.34 2.305-1.542 2.65c-.2.345-.59.95-1.65.93-.96-.02-4.258-.046-4.7-.048-.44 0-1.152.138-1.57.506-.354.31-.428.48-.428.48zM3 7.63s.047-.058.065-.058h3.93c.652 0 1.24.086 1.68.82.65 1.084 2.937 5.053 2.937 5.053H8.546c-.4 0-1.12-.03-1.633-.955-.467-.84-1.836-3.247-2.057-3.63-.22-.38-.7-.926-1.227-1.102C3.18 7.608 3 7.63 3 7.63z" fill="#FFF" fill-rule="evenodd"/></symbol><symbol id="gmgavatar-deadspin" viewBox="0 0 16 16"><path d="M12.448 10.14c-.003.022-.495 2.86-3.22 2.86h-5.88c.003 0-.157-.008-.26-.132-.077-.092-.104-.226-.08-.398.062-.428.518-3.933.76-5.786h3.39l-.477 3.18h2.6l.474-3.18h3.14l-.444 3.455zm.503-3.89H3.82V3h6.515c.955 0 1.667.278 2.116.827.717.874.546 2.165.5 2.424z" fill="#FFF"/></symbol><symbol id="gmgavatar-gawker" viewBox="0 0 16 16"><path d="M12.21 4v3.168h-.195c-.644-1.122-1.66-2.903-3.935-2.903-2.806 0-2.918 2.722-2.918 4.04 0 1.815.225 3.433 2.886 3.433.71 0 2.032-.248 2.194-.892V9l-.565-.61H8.5v-.267H14v.266h-1.017l-.725.623v1.766c0 .15.048.312.13.51l-.098.1c-.08-.083-.226-.13-.452-.13-.87 0-2.274.74-4.08.74C4.598 12 3 10.005 3 8.206 3 5.42 5.564 4 8 4c1.274 0 2.387.594 3.048.594.564 0 .822-.363.985-.594h.176z" fill="#FFF"/></symbol><symbol id="gmgavatar-gizmodo" viewBox="0 0 16 16"><path d="M8.418 13c-1.426 0-2.718-.57-3.64-1.605C3.966 10.483 3.5 9.28 3.5 8.098 3.5 4.384 6.56 3 8.49 3c1.896 0 3.288.938 3.346.978a.964.964 0 0 1 .256 1.32.92.92 0 0 1-1.288.263c-.025-.013-1.012-.656-2.315-.656-1.21 0-2.88.805-2.88 3.196 0 1.36 1.003 2.962 2.84 2.962 1.036 0 1.763-.277 2.186-.548V9.097h-1.43a.94.94 0 0 1-.93-.95.94.94 0 0 1 .93-.952h2.362a.94.94 0 0 1 .93.95v2.83a.962.962 0 0 1-.284.683c-.624.62-1.993 1.342-3.8 1.342" fill="#FFF"/></symbol><symbol id="gmgavatar-io9" viewBox="0 0 16 16"><path d="M6.62 9.555c-.27 0-.488-.296-.488-.662 0-.365.22-.662.488-.662.27 0 .488.3.488.665 0 .366-.218.662-.488.662zm4.764-1.584c-.267 0-.434-.235-.43-.485.002-.22.157-.484.412-.5.287-.02.46.228.46.51 0 .28-.175.478-.442.478zM6.626 6.437c-1.324 0-2.396 1.11-2.396 2.48 0 1.368 1.072 2.478 2.396 2.478 1.323 0 2.396-1.11 2.396-2.48 0-1.368-1.073-2.478-2.396-2.478zm7.358 1.098c-.132-1.28-1.097-2.356-2.523-2.335-1.59.02-2.404 1.073-2.404 2.254 0 1.232.972 2.152 2.376 1.97a.876.876 0 0 0 .24-.073c.017-.01-.016.33-.51.564-.155.074-.656.144-.69.734-.015.252.153.572.496.702.677.256 1.447-.2 1.64-.343.79-.576 1.158-1.403 1.31-2.212.07-.384.11-.874.07-1.264zM4.67 5.747a.74.74 0 0 1-.732.747.74.74 0 0 1-.733-.747A.74.74 0 0 1 3.938 5a.74.74 0 0 1 .733.747zm-1.78.748c-.345 0-.89.264-.89.89v3.076c0 .13.086.854.852.854.67 0 .985-.564.985-.885l.008-2.947c0-.475-.267-.985-.955-.985z" fill="#FFF"/></symbol><symbol id="gmgavatar-jalopnik" viewBox="0 0 16 16"><path d="M6.93 12.98c-.16.002-.58.02-.8.02H4l.416-4.238H6.57c.22 0 .42-.184.44-.408L7.534 3H11.5l-.554 5.768a5.923 5.923 0 0 1-.146.803s-.443 3.39-3.87 3.41" fill="#FFF"/></symbol><symbol id="gmgavatar-jezebel" viewBox="0 0 16 16"><path d="M6.384 2H9v7.865c0 1-.767 2.647-1.72 3.395-.353.276-.77.584-1.28.74l.002-.614c.26-.284.382-1.113.382-1.765V2z" fill="#FFF"/></symbol><symbol id="gmgavatar-kotaku" viewBox="0 0 16 16"><path d="M6 4.414l-1.404 7.513.88-.033c.972-.037 1.864-.757 2.006-1.6l.992-5.81L6 4.414zm2.29 2.662l2.644.01c.857.005 1.19.303 1.424.77L14 11.826c-.414.365-2.35.24-2.9-1.186l-.6-1.586-.15.002h-.127C9.307 9.07 8.01 8.4 8.29 7.076zM5.213 6.49l.385-2.087-1.34-.038c-1.87-.052-2.385 1.248-2.23 2.094l3.185.03zm4.62.467l3.348-.742c.63-.39.9-1.802.457-2.215l-3.246.8c-.553.302-1.186 1.183-.56 2.157z" fill="#FFF"/></symbol><symbol id="gmgavatar-lifehacker" viewBox="0 0 16 16"><path d="M4 2v12h1.906V2.122L4 2zm3.204 12V2.076h1.23V5.7c.338-.106.876-.15 1.245-.15 1.303 0 2.32 1.17 2.32 2.496V14h-1.2V8.046c0-.655-.54-1.173-1.2-1.173H8.434V14h-1.23z" fill="#FFF"/></symbol></svg><div class="gmg-menu__bar"><div class="avatar"><svg class="svg-gmgavatar gmgavatar-gizmodo"><use xlink:href="#gmgavatar-gizmodo" /></svg></div><h5>Related Blogs</h5><a class="svg-x-close"><svg class="svg-icon svg-close"><use xlink:href="#iconset-close" /></svg></a></div><ul class="gmg-menu__relatedblogs"><li><a href="//sploid.gizmodo.com" target="_blank">Sploid</a></li><li><a href="//paleofuture.gizmodo.com" target="_blank">Paleofuture</a></li><li><a href="//toyland.gizmodo.com" target="_blank">Toyland</a></li><li><a href="//io9.gizmodo.com" target="_blank">io9</a></li><li><a href="//fieldguide.gizmodo.com" target="_blank">Field Guide</a></li></ul><div class="gmg-menu__bar"><h5>Gawker Media Group Blogs</h5></div><ul class="gmg-menu__groupblogs"><li><a href="http://deadspin.com" target="_blank" class="avatar--deadspin" data-ga="[Kinja Menu, Blogs You May Like Click, Deadspin]"><svg class="svg-gmgavatar gmgavatar-deadspin"><use xlink:href="#gmgavatar-deadspin" /></svg>Deadspin</a></li><li><a href="http://gawker.com" target="_blank" data-ga="[Kinja Menu, Blogs You May Like Click, Gawker]"><svg class="svg-gmgavatar gmgavatar-gawker"><use xlink:href="#gmgavatar-gawker" /></svg>Gawker</a></li><li><a href="http://gizmodo.com" target="_blank" data-ga="[Kinja Menu, Blogs You May Like Click, Gizmodo]"><svg class="svg-gmgavatar gmgavatar-gizmodo"><use xlink:href="#gmgavatar-gizmodo" /></svg>Gizmodo</a></li><li><a href="http://jalopnik.com" target="_blank" data-ga="[Kinja Menu, Blogs You May Like Click, Jalopnik]"><svg class="svg-gmgavatar gmgavatar-jalopnik"><use xlink:href="#gmgavatar-jalopnik" /></svg>Jalopnik</a></li><li><a href="http://jezebel.com" target="_blank" data-ga="[Kinja Menu, Blogs You May Like Click, Jezebel]"><svg class="svg-gmgavatar gmgavatar-jezebel"><use xlink:href="#gmgavatar-jezebel" /></svg>Jezebel</a></li><li><a href="http://kotaku.com" target="_blank" data-ga="[Kinja Menu, Blogs You May Like Click, Kotaku]"><svg class="svg-gmgavatar gmgavatar-kotaku"><use xlink:href="#gmgavatar-kotaku" /></svg>Kotaku</a></li><li><a href="http://lifehacker.com" target="_blank" data-ga="[Kinja Menu, Blogs You May Like Click, Lifehacker]"><svg class="svg-gmgavatar gmgavatar-lifehacker"><use xlink:href="#gmgavatar-lifehacker" /></svg>Lifehacker</a></li></ul></div></button>
114 |             <!-- end of gmg-menu -->
115 |         </nav>
116 |     </div>
117 |     <div class="main__content">
118 |         <article class="post">
119 |             <header>
120 |                 <h1 class="headline"><a href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869">Google Glass Is Dead, Long Live Snapchat Spectacles</a></h1>
121 |                 <div class="meta">
122 |                     <div class="meta__avatar">
123 |                         <a href="//kinja.com/rhettjonesgizmodo" class="avatar__link">
124 |                             <amp-img class="avatar__img" height="40" width="40" layout="fixed" src="https://i.kinja-img.com/gawker-media/image/upload/s--9m1FTpWG--/c_fill,fl_progressive,g_center,h_80,q_80,w_80/r0j39aslahvyohxyiis0.jpg" />
125 |                         </a>
126 |                     </div>
127 |                     <div class="meta__text">
128 |                         <div class="meta__byline"><a href="//kinja.com/rhettjonesgizmodo" class="author">Rhett Jones</a></div><time class="meta__time" datetime="2016-09-24T09:10:00-04:00"><a href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869" target="_self" title="9/24/16 9:10am" >Saturday 9:10am</a></time>
129 |                         <div class="meta__tags">Filed to:<a class="first-tag" href="/tag/spectacle">Spectacle</a></div>
130 |                     </div>
131 |                 </div>
132 |             </header>
133 |             <div class="post-content">
134 |                 <figure>
135 |                     <amp-img src="http://i.kinja-img.com/gawker-media/image/upload/s--5EqibDaO--/c_scale,fl_progressive,q_80,w_800/kjqja8ibxoqy50nebzl8.png" width="800" height="450" alt="Google Glass Is Dead, Long Live Snapchat Spectacles" data-chomp-id="kjqja8ibxoqy50nebzl8"
136 |                         layout="responsive"></amp-img>
137 |                     <figcaption>
138 |                         Photo: Snapchat
139 |                     </figcaption>
140 |                 </figure>
141 |                 <p class="first-text">It seems like it was ages ago that Google Glass was the future that <a href="http://gizmodo.com/google-packs-up-glass-explorer-program-vows-to-try-aga-1679734877">nobody wanted</a>. The wearable tech had at least one bad design flaw—it
142 |                     seemed to get its <a href="http://www.businessinsider.com/i-was-assaulted-for-wearing-google-glass-2014-4" target="_blank">early adopters</a> <a href="http://gizmodo.com/5926570/proof-that-google-glasses-wont-win-you-admiring-glances">punched in the face</a>                    because people didn’t like the camera being pointed at them. Now, <a href="http://www.wsj.com/articles/snapchat-releases-first-hardware-product-spectacles-1474682719" target="_blank">Snapchat thinks</a> people are finally ready for
143 |                     glasses-mounted personal recording devices.</p>
144 |                 <p>Snapchat is betting that it wasn’t so much the fear of being assaulted that killed Google Glass; it’s just that people didn’t want to pay $1500 for the privilege. The <a href="http://gizmodo.com/snapchat-is-ruined-1783205205">millennial-approved</a>                    social network is jumping into the hardware game with its $130 "Spectacles." Rather than trying to do everything a smartphone can, the frames will simply focus on looking "stylish" and recording 10-second bursts of circular video.</p>
145 |                 <div class="ad-unit ad-mobile">
146 |                     <div class="ad-mobile-inner">
147 |                         <p class="ad-label proxima"><small class=" proxima">Advertisement</small></p>
148 |                         <div class="ad-container">
149 |                             <amp-ad width="300" height="250" type="doubleclick" data-loading-strategy="prefer-viewability-over-views" data-slot="/4246/gm.gizmodo.amp" json="{&quot;targeting&quot;:{&quot;page&quot;:&quot;amp&quot;,&quot;postId&quot;:1787034869,&quot;pos&quot;:&quot;amp_1&quot;,&quot;tags&quot;:[&quot;spectacle&quot;,&quot;glassholes&quot;,&quot;google glass&quot;,&quot;snapchat spectacles&quot;,&quot;circular video&quot;,&quot;evan spiegel&quot;],&quot;forcedAdZone&quot;:&quot;&quot;}}">
150 |                             </amp-ad>
151 |                         </div>
152 |                         <p class="ad-label-bottom"></p>
153 |                     </div>
154 |                 </div>
155 |                 <p>The glasses feature a fish-eye lens that captures videos at an 115-degree angle, which is closer to the eyes’ natural field of view. The user taps a button on the hinge, a ring of lights indicate to strangers that they are being filmed
156 |                     and a short clip is recorded. (Good luck with that.) The footage is then automatically pushed to Snapchat memories.</p>
157 |                 <p>CEO <a href="http://gizmodo.com/search?q=Evan+Spiegel">Evan Spiegel</a> recounted to <a href="http://www.wsj.com/articles/snapchat-releases-first-hardware-product-spectacles-1474682719" target="_blank"><em>WSJ Magazine</em></a> the story
158 |                     of his eureka moment with the Spectacles:</p>
159 |                 <blockquote>
160 |                     <p>"It was our first vacation, and we went to Big Sur for a day or two. We were walking through the woods, stepping over logs, looking up at the beautiful trees. And when I got the footage back and watched it, I could see my own memory,
161 |                         through my own eyes—it was unbelievable."</p>
162 |                 </blockquote>
163 |                 <p>Ok, I’ll admit that a demonstration of the circular video is kind of cool:</p>
164 |                 <div class="taboola-mobile-second ad-mobile">
165 |                     <div class="ad-mobile-inner">
166 |                         <p class="ad-label proxima"><small class=" proxima">Sponsored</small></p>
167 |                         <amp-embed width="100" height="100" type="taboola" layout="responsive" heights="(min-width:1082px) 54%, (min-width:572px) 61%, 74%" data-publisher="gawkermedia-network" data-mode="thumbnails-e-third-amp" data-placement="Mobile Mid-Article Third AMP" data-article="auto">
168 |                         </amp-embed>
169 |                         <p class="ad-label-bottom"></p>
170 |                     </div>
171 |                 </div>
172 |                 <p>
173 |                     <amp-twitter width="486" height="657" data-tweetid="779592486461313025" layout="responsive"></amp-twitter>
174 |                 </p>
175 |                 <p>For now, it seems that the company is taking the step into hardware cautiously and it plans to roll out the glasses to the public slowly and get a feel for how much demand is out there. Speigel also refers to the product as a "toy" to
176 |                     downplay any perception that the company considers this a groundbreaking innovation.</p>
177 |                 <p>Spiegel is also using the launch of his new toy to announce the corporate renaming of Snapchat to just Snap Inc., to help with product searches: "You can search Snapchat or Spectacles for the fun stuff and leave Snap Inc. for the Wall
178 |                     Street crowd."
179 |                 </p>
180 |                 <p>Just in time for October, here’s the summery promotion video for Snapchat Spectacles.</p>
181 |                 <p class="has-video media-large">
182 |                     <span class="clear-both flex-video widescreen">
183 |                     <amp-youtube data-videoid="XqkOFLBSJR8" layout="responsive" width="800" height="450"></amp-youtube>
184 |                 </span>
185 |                 </p>
186 |                 <p>[<a href="http://www.wsj.com/articles/snapchat-releases-first-hardware-product-spectacles-1474682719" target="_blank">WSJ Magazine</a>]</p>
187 |                 <!-- core-decorated -->
188 |             </div>
189 |         </article>
190 |         <div class="amp-comments">
191 |             <amp-iframe width=300 height=300 sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" layout="responsive" frameborder="0" resizable src="https://api.kinja.com/embed/comments/1787034869?blogId=4">
192 |                 <div overflow tabindex=0 role=button aria-label="Read more">Read more!</div>
193 |             </amp-iframe>
194 |             <footer class="referenced-comment__footer-container"><a href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869#js_discussion-region" target="_blank" class="comment-inset__full-discussion icon--svg"><span>View full discussion</span><span class="svg-icon__circle"><svg class="svg-icon small svg-chevron--small chevron--right"><use xlink:href="#iconset-chevron-right--small" /></svg></span></a></footer>
195 |         </div>
196 |         <footer class="footer">
197 |             <div class="subscribe-module"><span class="subscribe-module__copy">Want Gizmodo’s email newsletter?</span><a class="subscribe-module__btn" href="//gizmodo.com/newsletter" target="_blank">Subscribe</a></div>
198 |             <amp-embed width=100 height=100 type=taboola layout=responsive heights="(min-width:1750px) 265%, (min-width:1524px) 271%, (min-width:1350px) 278%, (min-width:1176px) 285%, (min-width:1029px) 294%, (min-width:906px) 304%, (min-width:800px) 315%, (min-width:775px) 703%, (min-width:663px) 717%, (min-width:616px) 732%, (min-width:601px) 749%, (min-width:590px) 925%, (min-width:553px) 942%, (min-width:519px) 965%, (min-width:485px) 985%, (min-width:471px) 1006%, (min-width:450px) 1028%, (min-width:432px) 1062%, (min-width:408px) 1085%, (min-width:384px) 1110%, (min-width:362px) 1136%, (min-width:339px) 1165%, (min-width:326px) 1195%, 1229%"
199 |                 data-publisher="gawkermedia-network" data-mode="thumbnails-a_AMP" data-placement="Below Article Thumbnails AMP" data-target_type="mix" data-article="auto"></amp-embed>
200 |             <div class="footer__links">
201 |                 <ul class="footer__list">
202 |                     <li><a href="/about" data-ga="[Footer, click, about]">About Blog</a></li>
203 |                     <li><a href="http://help.gawker.com/" target="_blank" data-ga="[Footer, click, Help]">Need Help?</a></li>
204 |                     <li><a href="http://legal.kinja.com/content-guidelines-90185358" target="_blank" data-ga="[Footer, click, Content Guidelines]">Content Guide</a></li>
205 |                 </ul>
206 |                 <ul class="footer__list footer__list--small">
207 |                     <li><a href="http://advertising.gawker.com/about/index.php#contact" data-ga="[Footer, click, Permissions]" target="_blank">Permissions</a></li>
208 |                     <li><a href="http://legal.kinja.com/privacy-policy-1750920278" data-ga="[Footer, click, Privacy]" target="_blank">Privacy</a></li>
209 |                     <li><a href="http://legal.kinja.com/kinja-terms-of-use-90161644" data-ga="[Footer, click, Terms]" target="_blank">Terms of Use</a></li>
210 |                     <li><a href="http://advertising.gawker.com/" data-ga="[Footer, click, Advertising]" target="_blank">Advertising</a></li>
211 |                     <li><a href="http://gawker.com/careers" data-ga="[Footer, click, Jobs]" target="_blank">Jobs</a></li>
212 |                     <li><a href="http://feeds.gawker.com/gizmodo/full" data-ga="[Footer, click, RSS]" target="_blank">RSS</a></li>
213 |                 </ul>
214 |             </div>
215 |         </footer>
216 |     </div>
217 | </body>
218 | 
219 | </html>
220 | 


--------------------------------------------------------------------------------
/tests/templates/amp/list_json.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html amp>
  3 | 
  4 | <head>
  5 |     <meta charset="utf-8">
  6 |     <title>Google Glass Is Dead, Long Live Snapchat Spectacles</title>
  7 |     <link rel="canonical" href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869">
  8 |     <meta name="viewport" content="width=device-width,minimum-scale=1,initial-scale=1" />
  9 |     <style amp-custom>
 10 | 
 11 |     </style>
 12 |     <script type="application/ld+json">
 13 |         [{
 14 |             "url": "https://vimeo.com/189955079",
 15 |             "thumbnailUrl": "https://i.vimeocdn.com/filter/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F600453872_1280x720.webp&src1=https%3A%2F%2Ff.vimeocdn.com%2Fimages_v6%2Fshare%2Fplay_icon_overlay.png",
 16 |             "embedUrl": "https://player.vimeo.com/video/189955079",
 17 |             "name": "Piper",
 18 |             "description": "The Pixar imaginative group examined the inventive procedure in creating Piper, the main character who is an infant sandpiper taking in the ropes out on the shoreline&hellip;",
 19 |             "height": 1080,
 20 |             "width": 1920,
 21 |             "playerType": "HTML5 Flash",
 22 |             "videoQuality": "HD",
 23 |             "duration": "PT00H06M06S",
 24 |             "uploadDate": "2016-11-02T11:48:33-04:00",
 25 |             "thumbnail": {
 26 |                 "@type": "ImageObject",
 27 |                 "url": "https://i.vimeocdn.com/filter/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F600453872_1280x720.webp&src1=https%3A%2F%2Ff.vimeocdn.com%2Fimages_v6%2Fshare%2Fplay_icon_overlay.png",
 28 |                 "width": 1280,
 29 |                 "height": 720
 30 |             },
 31 |             "author": {
 32 |                 "@type": "Person",
 33 |                 "name": "Citizen Consulting",
 34 |                 "url": "https://vimeo.com/citizenconsulting"
 35 |             },
 36 |             "potentialAction": {
 37 |                 "@type": "ViewAction",
 38 |                 "target": "vimeo://app.vimeo.com/videos/189955079"
 39 |             },
 40 |             "interactionCount": 78252,
 41 |             "keywords": "[Pixar,Short,Piper]",
 42 |             "@type": "VideoObject",
 43 |             "@context": "http://schema.org"
 44 |         }, {
 45 |             "itemListElement": [{
 46 |                 "@type": "ListItem",
 47 |                 "position": 1,
 48 |                 "item": {
 49 |                     "@id": "https://vimeo.com/citizenconsulting",
 50 |                     "name": "Citizen Consulting"
 51 |                 }
 52 |             }, {
 53 |                 "@type": "ListItem",
 54 |                 "position": 2,
 55 |                 "item": {
 56 |                     "@id": "https://vimeo.com/citizenconsulting/videos",
 57 |                     "name": "Videos"
 58 |                 }
 59 |             }],
 60 |             "@type": "BreadcrumbList",
 61 |             "@context": "http://schema.org"
 62 |         }]
 63 |     </script>
 64 |     <style amp-boilerplate>
 65 |         body {
 66 |             -webkit-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 67 |             -moz-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 68 |             -ms-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 69 |             animation: -amp-start 8s steps(1, end) 0s 1 normal both
 70 |         }
 71 | 
 72 |         @-webkit-keyframes -amp-start {
 73 |             from {
 74 |                 visibility: hidden
 75 |             }
 76 |             to {
 77 |                 visibility: visible
 78 |             }
 79 |         }
 80 | 
 81 |         @-moz-keyframes -amp-start {
 82 |             from {
 83 |                 visibility: hidden
 84 |             }
 85 |             to {
 86 |                 visibility: visible
 87 |             }
 88 |         }
 89 | 
 90 |         @-ms-keyframes -amp-start {
 91 |             from {
 92 |                 visibility: hidden
 93 |             }
 94 |             to {
 95 |                 visibility: visible
 96 |             }
 97 |         }
 98 | 
 99 |         @-o-keyframes -amp-start {
100 |             from {
101 |                 visibility: hidden
102 |             }
103 |             to {
104 |                 visibility: visible
105 |             }
106 |         }
107 | 
108 |         @keyframes -amp-start {
109 |             from {
110 |                 visibility: hidden
111 |             }
112 |             to {
113 |                 visibility: visible
114 |             }
115 |         }
116 |     </style><noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>
117 |     <script async src="https://cdn.ampproject.org/v0.js"></script>
118 |     <script async custom-element="amp-analytics" src="https://cdn.ampproject.org/v0/amp-analytics-0.1.js"></script>
119 |     <script async custom-element="amp-iframe" src="https://cdn.ampproject.org/v0/amp-iframe-0.1.js"></script>
120 |     <script async custom-element="amp-anim" src="https://cdn.ampproject.org/v0/amp-anim-0.1.js"></script>
121 |     <script async custom-element="amp-youtube" src="https://cdn.ampproject.org/v0/amp-youtube-0.1.js"></script>
122 |     <script async custom-element="amp-facebook" src="https://cdn.ampproject.org/v0/amp-facebook-0.1.js"></script>
123 |     <script async custom-element="amp-vine" src="https://cdn.ampproject.org/v0/amp-vine-0.1.js"></script>
124 |     <script async custom-element="amp-instagram" src="https://cdn.ampproject.org/v0/amp-instagram-0.1.js"></script>
125 |     <script async custom-element="amp-vimeo" src="https://cdn.ampproject.org/v0/amp-vimeo-0.1.js"></script>
126 |     <script async custom-element="amp-twitter" src="https://cdn.ampproject.org/v0/amp-twitter-0.1.js"></script>
127 | </head>
128 | 
129 | <body class="gizmodo blog-group-gizmodo">
130 | </body>
131 | 
132 | </html>
133 | 


--------------------------------------------------------------------------------
/tests/templates/amp/list_thumbnail_image.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html amp>
  3 | 
  4 | <head>
  5 |     <meta charset="utf-8">
  6 |     <title>Google Glass Is Dead, Long Live Snapchat Spectacles</title>
  7 |     <link rel="canonical" href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869">
  8 |     <meta name="viewport" content="width=device-width,minimum-scale=1,initial-scale=1" />
  9 |     <style amp-custom>
 10 | 
 11 |     </style>
 12 |     <script type="application/ld+json">
 13 |         [{
 14 |             "url": "https://vimeo.com/189955079",
 15 |             "thumbnailUrl": "https://i.vimeocdn.com/filter/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F600453872_1280x720.webp&src1=https%3A%2F%2Ff.vimeocdn.com%2Fimages_v6%2Fshare%2Fplay_icon_overlay.png",
 16 |             "embedUrl": "https://player.vimeo.com/video/189955079",
 17 |             "name": "Piper",
 18 |             "description": "The Pixar imaginative group examined the inventive procedure in creating Piper, the main character who is an infant sandpiper taking in the ropes out on the shoreline&hellip;",
 19 |             "height": 1080,
 20 |             "width": 1920,
 21 |             "playerType": "HTML5 Flash",
 22 |             "videoQuality": "HD",
 23 |             "duration": "PT00H06M06S",
 24 |             "uploadDate": "2016-11-02T11:48:33-04:00",
 25 |             "thumbnail": {
 26 |                 "@list": [{
 27 |                     "@type": "ImageObject",
 28 |                     "url": "https://i.vimeocdn.com/filter/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F600453872_1280x720.webp&src1=https%3A%2F%2Ff.vimeocdn.com%2Fimages_v6%2Fshare%2Fplay_icon_overlay.png",
 29 |                     "width": 1280,
 30 |                     "height": 720
 31 |                 }]
 32 |             },
 33 |             "author": {
 34 |                 "@type": "Person",
 35 |                 "name": "Citizen Consulting",
 36 |                 "url": "https://vimeo.com/citizenconsulting"
 37 |             },
 38 |             "potentialAction": {
 39 |                 "@type": "ViewAction",
 40 |                 "target": "vimeo://app.vimeo.com/videos/189955079"
 41 |             },
 42 |             "interactionCount": 78252,
 43 |             "keywords": "[Pixar,Short,Piper]",
 44 |             "@type": "VideoObject",
 45 |             "@context": "http://schema.org"
 46 |         }, {
 47 |             "itemListElement": [{
 48 |                 "@type": "ListItem",
 49 |                 "position": 1,
 50 |                 "item": {
 51 |                     "@id": "https://vimeo.com/citizenconsulting",
 52 |                     "name": "Citizen Consulting"
 53 |                 }
 54 |             }, {
 55 |                 "@type": "ListItem",
 56 |                 "position": 2,
 57 |                 "item": {
 58 |                     "@id": "https://vimeo.com/citizenconsulting/videos",
 59 |                     "name": "Videos"
 60 |                 }
 61 |             }],
 62 |             "@type": "BreadcrumbList",
 63 |             "@context": "http://schema.org"
 64 |         }]
 65 |     </script>
 66 |     <style amp-boilerplate>
 67 |         body {
 68 |             -webkit-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 69 |             -moz-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 70 |             -ms-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 71 |             animation: -amp-start 8s steps(1, end) 0s 1 normal both
 72 |         }
 73 | 
 74 |         @-webkit-keyframes -amp-start {
 75 |             from {
 76 |                 visibility: hidden
 77 |             }
 78 |             to {
 79 |                 visibility: visible
 80 |             }
 81 |         }
 82 | 
 83 |         @-moz-keyframes -amp-start {
 84 |             from {
 85 |                 visibility: hidden
 86 |             }
 87 |             to {
 88 |                 visibility: visible
 89 |             }
 90 |         }
 91 | 
 92 |         @-ms-keyframes -amp-start {
 93 |             from {
 94 |                 visibility: hidden
 95 |             }
 96 |             to {
 97 |                 visibility: visible
 98 |             }
 99 |         }
100 | 
101 |         @-o-keyframes -amp-start {
102 |             from {
103 |                 visibility: hidden
104 |             }
105 |             to {
106 |                 visibility: visible
107 |             }
108 |         }
109 | 
110 |         @keyframes -amp-start {
111 |             from {
112 |                 visibility: hidden
113 |             }
114 |             to {
115 |                 visibility: visible
116 |             }
117 |         }
118 |     </style><noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>
119 |     <script async src="https://cdn.ampproject.org/v0.js"></script>
120 |     <script async custom-element="amp-analytics" src="https://cdn.ampproject.org/v0/amp-analytics-0.1.js"></script>
121 |     <script async custom-element="amp-iframe" src="https://cdn.ampproject.org/v0/amp-iframe-0.1.js"></script>
122 |     <script async custom-element="amp-anim" src="https://cdn.ampproject.org/v0/amp-anim-0.1.js"></script>
123 |     <script async custom-element="amp-youtube" src="https://cdn.ampproject.org/v0/amp-youtube-0.1.js"></script>
124 |     <script async custom-element="amp-facebook" src="https://cdn.ampproject.org/v0/amp-facebook-0.1.js"></script>
125 |     <script async custom-element="amp-vine" src="https://cdn.ampproject.org/v0/amp-vine-0.1.js"></script>
126 |     <script async custom-element="amp-instagram" src="https://cdn.ampproject.org/v0/amp-instagram-0.1.js"></script>
127 |     <script async custom-element="amp-vimeo" src="https://cdn.ampproject.org/v0/amp-vimeo-0.1.js"></script>
128 |     <script async custom-element="amp-twitter" src="https://cdn.ampproject.org/v0/amp-twitter-0.1.js"></script>
129 | </head>
130 | 
131 | <body class="gizmodo blog-group-gizmodo">
132 | </body>
133 | 
134 | </html>
135 | 


--------------------------------------------------------------------------------
/tests/templates/amp/str_image.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html amp>
  3 | 
  4 | <head>
  5 |     <meta charset="utf-8">
  6 |     <title>Google Glass Is Dead, Long Live Snapchat Spectacles</title>
  7 |     <link rel="canonical" href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869">
  8 |     <meta name="viewport" content="width=device-width,minimum-scale=1,initial-scale=1" />
  9 |     <style amp-custom>
 10 | 
 11 |     </style>
 12 |     <script type="application/ld+json">
 13 |         {
 14 |             "@context": "http://schema.org",
 15 |             "@type": "NewsArticle",
 16 |             "url": "http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869",
 17 |             "headline": "Google Glass Is Dead, Long Live Snapchat Spectacles",
 18 |             "author": {
 19 |                 "@type": "Person",
 20 |                 "name": "Rhett Jones"
 21 |             },
 22 |             "datePublished": "2016-09-24T09:10:00-04:00",
 23 |             "dateModified": "2016-09-24T10:50:48-04:00",
 24 |             "publisher": {
 25 |                 "@type": "Organization",
 26 |                 "name": "gizmodo.com",
 27 |                 "logo": {
 28 |                     "@type": "ImageObject",
 29 |                     "url": "https://i.kinja-img.com/gawker-media/image/upload/s--ay4UlTaU--/wvhsuflzmeoo0zr9ex55.png"
 30 |                 }
 31 |             },
 32 |             "mainEntityOfPage": true,
 33 |             "image": "https://i.kinja-img.com/gawker-media/image/upload/kjqja8ibxoqy50nebzl8.png"
 34 |         }
 35 |     </script>
 36 |     <style amp-boilerplate>
 37 |         body {
 38 |             -webkit-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 39 |             -moz-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 40 |             -ms-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 41 |             animation: -amp-start 8s steps(1, end) 0s 1 normal both
 42 |         }
 43 | 
 44 |         @-webkit-keyframes -amp-start {
 45 |             from {
 46 |                 visibility: hidden
 47 |             }
 48 |             to {
 49 |                 visibility: visible
 50 |             }
 51 |         }
 52 | 
 53 |         @-moz-keyframes -amp-start {
 54 |             from {
 55 |                 visibility: hidden
 56 |             }
 57 |             to {
 58 |                 visibility: visible
 59 |             }
 60 |         }
 61 | 
 62 |         @-ms-keyframes -amp-start {
 63 |             from {
 64 |                 visibility: hidden
 65 |             }
 66 |             to {
 67 |                 visibility: visible
 68 |             }
 69 |         }
 70 | 
 71 |         @-o-keyframes -amp-start {
 72 |             from {
 73 |                 visibility: hidden
 74 |             }
 75 |             to {
 76 |                 visibility: visible
 77 |             }
 78 |         }
 79 | 
 80 |         @keyframes -amp-start {
 81 |             from {
 82 |                 visibility: hidden
 83 |             }
 84 |             to {
 85 |                 visibility: visible
 86 |             }
 87 |         }
 88 |     </style><noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>
 89 |     <script async src="https://cdn.ampproject.org/v0.js"></script>
 90 |     <script async custom-element="amp-analytics" src="https://cdn.ampproject.org/v0/amp-analytics-0.1.js"></script>
 91 |     <script async custom-element="amp-iframe" src="https://cdn.ampproject.org/v0/amp-iframe-0.1.js"></script>
 92 |     <script async custom-element="amp-anim" src="https://cdn.ampproject.org/v0/amp-anim-0.1.js"></script>
 93 |     <script async custom-element="amp-youtube" src="https://cdn.ampproject.org/v0/amp-youtube-0.1.js"></script>
 94 |     <script async custom-element="amp-facebook" src="https://cdn.ampproject.org/v0/amp-facebook-0.1.js"></script>
 95 |     <script async custom-element="amp-vine" src="https://cdn.ampproject.org/v0/amp-vine-0.1.js"></script>
 96 |     <script async custom-element="amp-instagram" src="https://cdn.ampproject.org/v0/amp-instagram-0.1.js"></script>
 97 |     <script async custom-element="amp-vimeo" src="https://cdn.ampproject.org/v0/amp-vimeo-0.1.js"></script>
 98 |     <script async custom-element="amp-twitter" src="https://cdn.ampproject.org/v0/amp-twitter-0.1.js"></script>
 99 | </head>
100 | 
101 | <body class="gizmodo blog-group-gizmodo">
102 |     <div class="container">
103 |         <amp-analytics type="googleanalytics" id="ga-domain"></amp-analytics>
104 |         </amp-analytics>
105 |     </div>
106 |     <div class="nav__wrap">
107 |         <!-- for affixing to top -->
108 |         <nav class="nav">
109 |             <a class="nav__menu" tabindex="0">
110 |                 <div class="burger"><span></span><span></span><span></span></div>
111 |             </a>
112 |             <div class="nav__inner"><a href="//gizmodo.com" class="logo" title="Gizmodo" data-ga="[Kinja Menu, Logo/Blog Title Click, js_pageType]"><svg class="gmg-logo gmg-logo--gizmodo" width="218" height="32" viewBox="0 0 218 32"><path d="M35.726 28.18V3.8c0-1.267.282-2.217.847-2.85.564-.633 1.294-.95 2.19-.95.92 0 1.668.314 2.24.94.57.626.857 1.58.857 2.86v24.38c0 1.28-.286 2.236-.857 2.87-.572.633-1.32.95-2.24.95-.882 0-1.61-.32-2.18-.96-.57-.64-.857-1.595-.857-2.86zM202.37 32c-8.62 0-15.63-7.174-15.63-15.99 0-8.817 7.01-15.99 15.63-15.99C210.99.02 218 7.193 218 16.01c0 8.816-7.01 15.99-15.63 15.99zm0-25.507c-5.13 0-9.304 4.27-9.304 9.517s4.174 9.517 9.304 9.517 9.302-4.27 9.302-9.517-4.173-9.517-9.302-9.517zM132.2 32c-8.618 0-15.63-7.174-15.63-15.99 0-8.817 7.012-15.99 15.63-15.99 8.62 0 15.632 7.173 15.632 15.99 0 8.816-7.012 15.99-15.63 15.99zm0-25.507c-5.128 0-9.302 4.27-9.302 9.517s4.174 9.517 9.303 9.517c5.13 0 9.304-4.27 9.304-9.517s-4.173-9.517-9.303-9.517zM181.73 16v-.004c0-3.53-1.126-6.793-3.024-9.44-2.76-3.852-7.162-6.394-12.142-6.544-.154-.005-.31-.012-.465-.012l-7.85.01c-1.34 0-2.31.312-2.91.937-.6.624-.9 1.633-.9 3.027v24.052c0 1.394.3 2.402.9 3.027.6.625 1.57.937 2.91.937l7.85.01c.155 0 .31-.008.464-.012 4.98-.15 9.38-2.692 12.142-6.543 1.898-2.648 3.023-5.912 3.025-9.44V16zm-14.522 9.45c-.757.05-1.57.058-2.446.058h-4.103V6.492h4.102c.875 0 1.69.01 2.446.06 2.713.33 5.07 1.86 6.55 4.056 1.032 1.53 1.64 3.38 1.644 5.373v.039c-.004 1.993-.612 3.843-1.643 5.373-1.482 2.197-3.84 3.724-6.552 4.056zM107.718.14c-1.142-.363-2.376.097-3.04 1.133L95.25 15.962 85.83 1.275C85.163.24 83.93-.22 82.786.142c-1.142.364-1.92 1.465-1.92 2.713v26.102c0 1.566 1.213 2.836 2.71 2.836 1.497 0 2.71-1.27 2.71-2.836V12.17l6.693 10.427.013.02.008.014c.01.013.02.025.027.038.042.062.085.124.132.184.02.026.042.05.063.075.037.043.074.088.114.13l.112.108.082.077c.054.046.108.087.164.128.016.01.03.024.046.035l.007.005c.06.04.12.08.184.116.017.01.034.022.05.03.048.028.097.05.146.073.274.13.562.212.85.242h.007c.057.007.115.008.17.01.033.002.065.004.095.004.033 0 .064-.002.095-.004.058-.002.115-.003.172-.01h.005c.29-.03.577-.112.85-.243.05-.025.1-.046.145-.073l.053-.03c.063-.038.124-.076.184-.117l.007-.005c.016-.01.03-.024.046-.035.056-.04.11-.082.164-.128.03-.025.056-.052.085-.08l.11-.105c.04-.042.076-.087.113-.13.022-.025.044-.05.064-.075.047-.06.09-.122.132-.184.007-.013.017-.025.025-.037l.008-.012.014-.02 6.69-10.428v16.787c0 1.566 1.214 2.836 2.713 2.836 1.496 0 2.71-1.27 2.71-2.836V2.855c0-1.248-.78-2.35-1.92-2.713zM71.64 31.027h-20.27c-1.066 0-2.25-.682-2.69-1.744-.438-1.06-.528-2.505.416-3.656l16.58-19.68H51.976c-1.497 0-2.71-1.327-2.71-2.964S50.478.346 51.975.346h18.47c1.068 0 2.17.174 3.136 1.255.88.988.842 3.12-.145 4.392L57.417 25.1h14.22c1.498 0 2.59 1.327 2.59 2.963 0 1.637-1.092 2.964-2.59 2.964zM15.517 32c-4.498 0-8.577-1.824-11.486-5.137C1.47 23.945 0 20.1 0 16.313 0 4.428 9.656 0 15.743 0c5.985 0 10.378 3.003 10.562 3.13 1.348.935 1.71 2.826.806 4.223-.9 1.392-2.716 1.768-4.063.843-.078-.052-3.195-2.108-7.304-2.108-3.815 0-9.076 2.576-9.076 10.224 0 4.35 3.163 9.483 8.964 9.483 3.265 0 5.56-.89 6.893-1.758v-4.525h-4.52c-1.622 0-2.937-1.363-2.937-3.044 0-1.68 1.315-3.044 2.938-3.044h7.457c1.624 0 2.94 1.363 2.94 3.044v9.053c0 .825-.323 1.612-.893 2.186-1.97 1.98-6.29 4.294-11.99 4.294z" fill="#000" fill-rule="evenodd"/></svg></a></div>
113 |             <div class="overlay"></div><button class="gmg-menu" tabindex="0"><div class="gmg-menu__inner"><svg id="svggroup--gmgavatars" class="hide"><symbol id="gmgavatar-default" viewBox="0 0 16 16"><path d="M3.11 7.522s-.026-.07-.017-.085c.282-.483 1.8-3.095 1.977-3.397.328-.563.7-1.036 1.554-1.04C7.9 2.998 13 3.006 13 3.006s-1.34 2.305-1.542 2.65c-.2.345-.59.95-1.65.93-.96-.02-4.258-.046-4.7-.048-.44 0-1.152.138-1.57.506-.354.31-.428.48-.428.48zM3 7.63s.047-.058.065-.058h3.93c.652 0 1.24.086 1.68.82.65 1.084 2.937 5.053 2.937 5.053H8.546c-.4 0-1.12-.03-1.633-.955-.467-.84-1.836-3.247-2.057-3.63-.22-.38-.7-.926-1.227-1.102C3.18 7.608 3 7.63 3 7.63z" fill="#FFF" fill-rule="evenodd"/></symbol><symbol id="gmgavatar-deadspin" viewBox="0 0 16 16"><path d="M12.448 10.14c-.003.022-.495 2.86-3.22 2.86h-5.88c.003 0-.157-.008-.26-.132-.077-.092-.104-.226-.08-.398.062-.428.518-3.933.76-5.786h3.39l-.477 3.18h2.6l.474-3.18h3.14l-.444 3.455zm.503-3.89H3.82V3h6.515c.955 0 1.667.278 2.116.827.717.874.546 2.165.5 2.424z" fill="#FFF"/></symbol><symbol id="gmgavatar-gawker" viewBox="0 0 16 16"><path d="M12.21 4v3.168h-.195c-.644-1.122-1.66-2.903-3.935-2.903-2.806 0-2.918 2.722-2.918 4.04 0 1.815.225 3.433 2.886 3.433.71 0 2.032-.248 2.194-.892V9l-.565-.61H8.5v-.267H14v.266h-1.017l-.725.623v1.766c0 .15.048.312.13.51l-.098.1c-.08-.083-.226-.13-.452-.13-.87 0-2.274.74-4.08.74C4.598 12 3 10.005 3 8.206 3 5.42 5.564 4 8 4c1.274 0 2.387.594 3.048.594.564 0 .822-.363.985-.594h.176z" fill="#FFF"/></symbol><symbol id="gmgavatar-gizmodo" viewBox="0 0 16 16"><path d="M8.418 13c-1.426 0-2.718-.57-3.64-1.605C3.966 10.483 3.5 9.28 3.5 8.098 3.5 4.384 6.56 3 8.49 3c1.896 0 3.288.938 3.346.978a.964.964 0 0 1 .256 1.32.92.92 0 0 1-1.288.263c-.025-.013-1.012-.656-2.315-.656-1.21 0-2.88.805-2.88 3.196 0 1.36 1.003 2.962 2.84 2.962 1.036 0 1.763-.277 2.186-.548V9.097h-1.43a.94.94 0 0 1-.93-.95.94.94 0 0 1 .93-.952h2.362a.94.94 0 0 1 .93.95v2.83a.962.962 0 0 1-.284.683c-.624.62-1.993 1.342-3.8 1.342" fill="#FFF"/></symbol><symbol id="gmgavatar-io9" viewBox="0 0 16 16"><path d="M6.62 9.555c-.27 0-.488-.296-.488-.662 0-.365.22-.662.488-.662.27 0 .488.3.488.665 0 .366-.218.662-.488.662zm4.764-1.584c-.267 0-.434-.235-.43-.485.002-.22.157-.484.412-.5.287-.02.46.228.46.51 0 .28-.175.478-.442.478zM6.626 6.437c-1.324 0-2.396 1.11-2.396 2.48 0 1.368 1.072 2.478 2.396 2.478 1.323 0 2.396-1.11 2.396-2.48 0-1.368-1.073-2.478-2.396-2.478zm7.358 1.098c-.132-1.28-1.097-2.356-2.523-2.335-1.59.02-2.404 1.073-2.404 2.254 0 1.232.972 2.152 2.376 1.97a.876.876 0 0 0 .24-.073c.017-.01-.016.33-.51.564-.155.074-.656.144-.69.734-.015.252.153.572.496.702.677.256 1.447-.2 1.64-.343.79-.576 1.158-1.403 1.31-2.212.07-.384.11-.874.07-1.264zM4.67 5.747a.74.74 0 0 1-.732.747.74.74 0 0 1-.733-.747A.74.74 0 0 1 3.938 5a.74.74 0 0 1 .733.747zm-1.78.748c-.345 0-.89.264-.89.89v3.076c0 .13.086.854.852.854.67 0 .985-.564.985-.885l.008-2.947c0-.475-.267-.985-.955-.985z" fill="#FFF"/></symbol><symbol id="gmgavatar-jalopnik" viewBox="0 0 16 16"><path d="M6.93 12.98c-.16.002-.58.02-.8.02H4l.416-4.238H6.57c.22 0 .42-.184.44-.408L7.534 3H11.5l-.554 5.768a5.923 5.923 0 0 1-.146.803s-.443 3.39-3.87 3.41" fill="#FFF"/></symbol><symbol id="gmgavatar-jezebel" viewBox="0 0 16 16"><path d="M6.384 2H9v7.865c0 1-.767 2.647-1.72 3.395-.353.276-.77.584-1.28.74l.002-.614c.26-.284.382-1.113.382-1.765V2z" fill="#FFF"/></symbol><symbol id="gmgavatar-kotaku" viewBox="0 0 16 16"><path d="M6 4.414l-1.404 7.513.88-.033c.972-.037 1.864-.757 2.006-1.6l.992-5.81L6 4.414zm2.29 2.662l2.644.01c.857.005 1.19.303 1.424.77L14 11.826c-.414.365-2.35.24-2.9-1.186l-.6-1.586-.15.002h-.127C9.307 9.07 8.01 8.4 8.29 7.076zM5.213 6.49l.385-2.087-1.34-.038c-1.87-.052-2.385 1.248-2.23 2.094l3.185.03zm4.62.467l3.348-.742c.63-.39.9-1.802.457-2.215l-3.246.8c-.553.302-1.186 1.183-.56 2.157z" fill="#FFF"/></symbol><symbol id="gmgavatar-lifehacker" viewBox="0 0 16 16"><path d="M4 2v12h1.906V2.122L4 2zm3.204 12V2.076h1.23V5.7c.338-.106.876-.15 1.245-.15 1.303 0 2.32 1.17 2.32 2.496V14h-1.2V8.046c0-.655-.54-1.173-1.2-1.173H8.434V14h-1.23z" fill="#FFF"/></symbol></svg><div class="gmg-menu__bar"><div class="avatar"><svg class="svg-gmgavatar gmgavatar-gizmodo"><use xlink:href="#gmgavatar-gizmodo" /></svg></div><h5>Related Blogs</h5><a class="svg-x-close"><svg class="svg-icon svg-close"><use xlink:href="#iconset-close" /></svg></a></div><ul class="gmg-menu__relatedblogs"><li><a href="//sploid.gizmodo.com" target="_blank">Sploid</a></li><li><a href="//paleofuture.gizmodo.com" target="_blank">Paleofuture</a></li><li><a href="//toyland.gizmodo.com" target="_blank">Toyland</a></li><li><a href="//io9.gizmodo.com" target="_blank">io9</a></li><li><a href="//fieldguide.gizmodo.com" target="_blank">Field Guide</a></li></ul><div class="gmg-menu__bar"><h5>Gawker Media Group Blogs</h5></div><ul class="gmg-menu__groupblogs"><li><a href="http://deadspin.com" target="_blank" class="avatar--deadspin" data-ga="[Kinja Menu, Blogs You May Like Click, Deadspin]"><svg class="svg-gmgavatar gmgavatar-deadspin"><use xlink:href="#gmgavatar-deadspin" /></svg>Deadspin</a></li><li><a href="http://gawker.com" target="_blank" data-ga="[Kinja Menu, Blogs You May Like Click, Gawker]"><svg class="svg-gmgavatar gmgavatar-gawker"><use xlink:href="#gmgavatar-gawker" /></svg>Gawker</a></li><li><a href="http://gizmodo.com" target="_blank" data-ga="[Kinja Menu, Blogs You May Like Click, Gizmodo]"><svg class="svg-gmgavatar gmgavatar-gizmodo"><use xlink:href="#gmgavatar-gizmodo" /></svg>Gizmodo</a></li><li><a href="http://jalopnik.com" target="_blank" data-ga="[Kinja Menu, Blogs You May Like Click, Jalopnik]"><svg class="svg-gmgavatar gmgavatar-jalopnik"><use xlink:href="#gmgavatar-jalopnik" /></svg>Jalopnik</a></li><li><a href="http://jezebel.com" target="_blank" data-ga="[Kinja Menu, Blogs You May Like Click, Jezebel]"><svg class="svg-gmgavatar gmgavatar-jezebel"><use xlink:href="#gmgavatar-jezebel" /></svg>Jezebel</a></li><li><a href="http://kotaku.com" target="_blank" data-ga="[Kinja Menu, Blogs You May Like Click, Kotaku]"><svg class="svg-gmgavatar gmgavatar-kotaku"><use xlink:href="#gmgavatar-kotaku" /></svg>Kotaku</a></li><li><a href="http://lifehacker.com" target="_blank" data-ga="[Kinja Menu, Blogs You May Like Click, Lifehacker]"><svg class="svg-gmgavatar gmgavatar-lifehacker"><use xlink:href="#gmgavatar-lifehacker" /></svg>Lifehacker</a></li></ul></div></button>
114 |             <!-- end of gmg-menu -->
115 |         </nav>
116 |     </div>
117 |     <div class="main__content">
118 |         <article class="post">
119 |             <header>
120 |                 <h1 class="headline"><a href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869">Google Glass Is Dead, Long Live Snapchat Spectacles</a></h1>
121 |                 <div class="meta">
122 |                     <div class="meta__avatar">
123 |                         <a href="//kinja.com/rhettjonesgizmodo" class="avatar__link">
124 |                             <amp-img class="avatar__img" height="40" width="40" layout="fixed" src="https://i.kinja-img.com/gawker-media/image/upload/s--9m1FTpWG--/c_fill,fl_progressive,g_center,h_80,q_80,w_80/r0j39aslahvyohxyiis0.jpg" />
125 |                         </a>
126 |                     </div>
127 |                     <div class="meta__text">
128 |                         <div class="meta__byline"><a href="//kinja.com/rhettjonesgizmodo" class="author">Rhett Jones</a></div><time class="meta__time" datetime="2016-09-24T09:10:00-04:00"><a href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869" target="_self" title="9/24/16 9:10am" >Saturday 9:10am</a></time>
129 |                         <div class="meta__tags">Filed to:<a class="first-tag" href="/tag/spectacle">Spectacle</a></div>
130 |                     </div>
131 |                 </div>
132 |             </header>
133 |             <div class="post-content">
134 |                 <figure>
135 |                     <amp-img src="http://i.kinja-img.com/gawker-media/image/upload/s--5EqibDaO--/c_scale,fl_progressive,q_80,w_800/kjqja8ibxoqy50nebzl8.png" width="800" height="450" alt="Google Glass Is Dead, Long Live Snapchat Spectacles" data-chomp-id="kjqja8ibxoqy50nebzl8"
136 |                         layout="responsive"></amp-img>
137 |                     <figcaption>
138 |                         Photo: Snapchat
139 |                     </figcaption>
140 |                 </figure>
141 |                 <p class="first-text">It seems like it was ages ago that Google Glass was the future that <a href="http://gizmodo.com/google-packs-up-glass-explorer-program-vows-to-try-aga-1679734877">nobody wanted</a>. The wearable tech had at least one bad design flaw—it
142 |                     seemed to get its <a href="http://www.businessinsider.com/i-was-assaulted-for-wearing-google-glass-2014-4" target="_blank">early adopters</a> <a href="http://gizmodo.com/5926570/proof-that-google-glasses-wont-win-you-admiring-glances">punched in the face</a>                    because people didn’t like the camera being pointed at them. Now, <a href="http://www.wsj.com/articles/snapchat-releases-first-hardware-product-spectacles-1474682719" target="_blank">Snapchat thinks</a> people are finally ready for
143 |                     glasses-mounted personal recording devices.</p>
144 |                 <p>Snapchat is betting that it wasn’t so much the fear of being assaulted that killed Google Glass; it’s just that people didn’t want to pay $1500 for the privilege. The <a href="http://gizmodo.com/snapchat-is-ruined-1783205205">millennial-approved</a>                    social network is jumping into the hardware game with its $130 "Spectacles." Rather than trying to do everything a smartphone can, the frames will simply focus on looking "stylish" and recording 10-second bursts of circular video.</p>
145 |                 <div class="ad-unit ad-mobile">
146 |                     <div class="ad-mobile-inner">
147 |                         <p class="ad-label proxima"><small class=" proxima">Advertisement</small></p>
148 |                         <div class="ad-container">
149 |                             <amp-ad width="300" height="250" type="doubleclick" data-loading-strategy="prefer-viewability-over-views" data-slot="/4246/gm.gizmodo.amp" json="{&quot;targeting&quot;:{&quot;page&quot;:&quot;amp&quot;,&quot;postId&quot;:1787034869,&quot;pos&quot;:&quot;amp_1&quot;,&quot;tags&quot;:[&quot;spectacle&quot;,&quot;glassholes&quot;,&quot;google glass&quot;,&quot;snapchat spectacles&quot;,&quot;circular video&quot;,&quot;evan spiegel&quot;],&quot;forcedAdZone&quot;:&quot;&quot;}}">
150 |                             </amp-ad>
151 |                         </div>
152 |                         <p class="ad-label-bottom"></p>
153 |                     </div>
154 |                 </div>
155 |                 <p>The glasses feature a fish-eye lens that captures videos at an 115-degree angle, which is closer to the eyes’ natural field of view. The user taps a button on the hinge, a ring of lights indicate to strangers that they are being filmed
156 |                     and a short clip is recorded. (Good luck with that.) The footage is then automatically pushed to Snapchat memories.</p>
157 |                 <p>CEO <a href="http://gizmodo.com/search?q=Evan+Spiegel">Evan Spiegel</a> recounted to <a href="http://www.wsj.com/articles/snapchat-releases-first-hardware-product-spectacles-1474682719" target="_blank"><em>WSJ Magazine</em></a> the story
158 |                     of his eureka moment with the Spectacles:</p>
159 |                 <blockquote>
160 |                     <p>"It was our first vacation, and we went to Big Sur for a day or two. We were walking through the woods, stepping over logs, looking up at the beautiful trees. And when I got the footage back and watched it, I could see my own memory,
161 |                         through my own eyes—it was unbelievable."</p>
162 |                 </blockquote>
163 |                 <p>Ok, I’ll admit that a demonstration of the circular video is kind of cool:</p>
164 |                 <div class="taboola-mobile-second ad-mobile">
165 |                     <div class="ad-mobile-inner">
166 |                         <p class="ad-label proxima"><small class=" proxima">Sponsored</small></p>
167 |                         <amp-embed width="100" height="100" type="taboola" layout="responsive" heights="(min-width:1082px) 54%, (min-width:572px) 61%, 74%" data-publisher="gawkermedia-network" data-mode="thumbnails-e-third-amp" data-placement="Mobile Mid-Article Third AMP" data-article="auto">
168 |                         </amp-embed>
169 |                         <p class="ad-label-bottom"></p>
170 |                     </div>
171 |                 </div>
172 |                 <p>
173 |                     <amp-twitter width="486" height="657" data-tweetid="779592486461313025" layout="responsive"></amp-twitter>
174 |                 </p>
175 |                 <p>For now, it seems that the company is taking the step into hardware cautiously and it plans to roll out the glasses to the public slowly and get a feel for how much demand is out there. Speigel also refers to the product as a "toy" to
176 |                     downplay any perception that the company considers this a groundbreaking innovation.</p>
177 |                 <p>Spiegel is also using the launch of his new toy to announce the corporate renaming of Snapchat to just Snap Inc., to help with product searches: "You can search Snapchat or Spectacles for the fun stuff and leave Snap Inc. for the Wall
178 |                     Street crowd."
179 |                 </p>
180 |                 <p>Just in time for October, here’s the summery promotion video for Snapchat Spectacles.</p>
181 |                 <p class="has-video media-large">
182 |                     <span class="clear-both flex-video widescreen">
183 |                     <amp-youtube data-videoid="XqkOFLBSJR8" layout="responsive" width="800" height="450"></amp-youtube>
184 |                 </span>
185 |                 </p>
186 |                 <p>[<a href="http://www.wsj.com/articles/snapchat-releases-first-hardware-product-spectacles-1474682719" target="_blank">WSJ Magazine</a>]</p>
187 |                 <!-- core-decorated -->
188 |             </div>
189 |         </article>
190 |         <div class="amp-comments">
191 |             <amp-iframe width=300 height=300 sandbox="allow-scripts allow-same-origin allow-popups allow-popups-to-escape-sandbox" layout="responsive" frameborder="0" resizable src="https://api.kinja.com/embed/comments/1787034869?blogId=4">
192 |                 <div overflow tabindex=0 role=button aria-label="Read more">Read more!</div>
193 |             </amp-iframe>
194 |             <footer class="referenced-comment__footer-container"><a href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869#js_discussion-region" target="_blank" class="comment-inset__full-discussion icon--svg"><span>View full discussion</span><span class="svg-icon__circle"><svg class="svg-icon small svg-chevron--small chevron--right"><use xlink:href="#iconset-chevron-right--small" /></svg></span></a></footer>
195 |         </div>
196 |         <footer class="footer">
197 |             <div class="subscribe-module"><span class="subscribe-module__copy">Want Gizmodo’s email newsletter?</span><a class="subscribe-module__btn" href="//gizmodo.com/newsletter" target="_blank">Subscribe</a></div>
198 |             <amp-embed width=100 height=100 type=taboola layout=responsive heights="(min-width:1750px) 265%, (min-width:1524px) 271%, (min-width:1350px) 278%, (min-width:1176px) 285%, (min-width:1029px) 294%, (min-width:906px) 304%, (min-width:800px) 315%, (min-width:775px) 703%, (min-width:663px) 717%, (min-width:616px) 732%, (min-width:601px) 749%, (min-width:590px) 925%, (min-width:553px) 942%, (min-width:519px) 965%, (min-width:485px) 985%, (min-width:471px) 1006%, (min-width:450px) 1028%, (min-width:432px) 1062%, (min-width:408px) 1085%, (min-width:384px) 1110%, (min-width:362px) 1136%, (min-width:339px) 1165%, (min-width:326px) 1195%, 1229%"
199 |                 data-publisher="gawkermedia-network" data-mode="thumbnails-a_AMP" data-placement="Below Article Thumbnails AMP" data-target_type="mix" data-article="auto"></amp-embed>
200 |             <div class="footer__links">
201 |                 <ul class="footer__list">
202 |                     <li><a href="/about" data-ga="[Footer, click, about]">About Blog</a></li>
203 |                     <li><a href="http://help.gawker.com/" target="_blank" data-ga="[Footer, click, Help]">Need Help?</a></li>
204 |                     <li><a href="http://legal.kinja.com/content-guidelines-90185358" target="_blank" data-ga="[Footer, click, Content Guidelines]">Content Guide</a></li>
205 |                 </ul>
206 |                 <ul class="footer__list footer__list--small">
207 |                     <li><a href="http://advertising.gawker.com/about/index.php#contact" data-ga="[Footer, click, Permissions]" target="_blank">Permissions</a></li>
208 |                     <li><a href="http://legal.kinja.com/privacy-policy-1750920278" data-ga="[Footer, click, Privacy]" target="_blank">Privacy</a></li>
209 |                     <li><a href="http://legal.kinja.com/kinja-terms-of-use-90161644" data-ga="[Footer, click, Terms]" target="_blank">Terms of Use</a></li>
210 |                     <li><a href="http://advertising.gawker.com/" data-ga="[Footer, click, Advertising]" target="_blank">Advertising</a></li>
211 |                     <li><a href="http://gawker.com/careers" data-ga="[Footer, click, Jobs]" target="_blank">Jobs</a></li>
212 |                     <li><a href="http://feeds.gawker.com/gizmodo/full" data-ga="[Footer, click, RSS]" target="_blank">RSS</a></li>
213 |                 </ul>
214 |             </div>
215 |         </footer>
216 |     </div>
217 | </body>
218 | 
219 | </html>
220 | 


--------------------------------------------------------------------------------
/tests/templates/amp/str_thumbnail_image.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html amp>
  3 | 
  4 | <head>
  5 |     <meta charset="utf-8">
  6 |     <title>Google Glass Is Dead, Long Live Snapchat Spectacles</title>
  7 |     <link rel="canonical" href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869">
  8 |     <meta name="viewport" content="width=device-width,minimum-scale=1,initial-scale=1" />
  9 |     <style amp-custom>
 10 | 
 11 |     </style>
 12 |     <script type="application/ld+json">
 13 |         [{
 14 |             "url": "https://vimeo.com/189955079",
 15 |             "thumbnailUrl": "https://i.vimeocdn.com/filter/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F600453872_1280x720.webp&src1=https%3A%2F%2Ff.vimeocdn.com%2Fimages_v6%2Fshare%2Fplay_icon_overlay.png",
 16 |             "embedUrl": "https://player.vimeo.com/video/189955079",
 17 |             "name": "Piper",
 18 |             "description": "The Pixar imaginative group examined the inventive procedure in creating Piper, the main character who is an infant sandpiper taking in the ropes out on the shoreline&hellip;",
 19 |             "height": 1080,
 20 |             "width": 1920,
 21 |             "playerType": "HTML5 Flash",
 22 |             "videoQuality": "HD",
 23 |             "duration": "PT00H06M06S",
 24 |             "uploadDate": "2016-11-02T11:48:33-04:00",
 25 |             "thumbnail": "https://i.vimeocdn.com/filter/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F600453872_1280x720.webp&src1=https%3A%2F%2Ff.vimeocdn.com%2Fimages_v6%2Fshare%2Fplay_icon_overlay.png",
 26 |             "author": {
 27 |                 "@type": "Person",
 28 |                 "name": "Citizen Consulting",
 29 |                 "url": "https://vimeo.com/citizenconsulting"
 30 |             },
 31 |             "potentialAction": {
 32 |                 "@type": "ViewAction",
 33 |                 "target": "vimeo://app.vimeo.com/videos/189955079"
 34 |             },
 35 |             "interactionCount": 78252,
 36 |             "keywords": "[Pixar,Short,Piper]",
 37 |             "@type": "VideoObject",
 38 |             "@context": "http://schema.org"
 39 |         }, {
 40 |             "itemListElement": [{
 41 |                 "@type": "ListItem",
 42 |                 "position": 1,
 43 |                 "item": {
 44 |                     "@id": "https://vimeo.com/citizenconsulting",
 45 |                     "name": "Citizen Consulting"
 46 |                 }
 47 |             }, {
 48 |                 "@type": "ListItem",
 49 |                 "position": 2,
 50 |                 "item": {
 51 |                     "@id": "https://vimeo.com/citizenconsulting/videos",
 52 |                     "name": "Videos"
 53 |                 }
 54 |             }],
 55 |             "@type": "BreadcrumbList",
 56 |             "@context": "http://schema.org"
 57 |         }]
 58 |     </script>
 59 |     <style amp-boilerplate>
 60 |         body {
 61 |             -webkit-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 62 |             -moz-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 63 |             -ms-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 64 |             animation: -amp-start 8s steps(1, end) 0s 1 normal both
 65 |         }
 66 | 
 67 |         @-webkit-keyframes -amp-start {
 68 |             from {
 69 |                 visibility: hidden
 70 |             }
 71 |             to {
 72 |                 visibility: visible
 73 |             }
 74 |         }
 75 | 
 76 |         @-moz-keyframes -amp-start {
 77 |             from {
 78 |                 visibility: hidden
 79 |             }
 80 |             to {
 81 |                 visibility: visible
 82 |             }
 83 |         }
 84 | 
 85 |         @-ms-keyframes -amp-start {
 86 |             from {
 87 |                 visibility: hidden
 88 |             }
 89 |             to {
 90 |                 visibility: visible
 91 |             }
 92 |         }
 93 | 
 94 |         @-o-keyframes -amp-start {
 95 |             from {
 96 |                 visibility: hidden
 97 |             }
 98 |             to {
 99 |                 visibility: visible
100 |             }
101 |         }
102 | 
103 |         @keyframes -amp-start {
104 |             from {
105 |                 visibility: hidden
106 |             }
107 |             to {
108 |                 visibility: visible
109 |             }
110 |         }
111 |     </style><noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>
112 |     <script async src="https://cdn.ampproject.org/v0.js"></script>
113 |     <script async custom-element="amp-analytics" src="https://cdn.ampproject.org/v0/amp-analytics-0.1.js"></script>
114 |     <script async custom-element="amp-iframe" src="https://cdn.ampproject.org/v0/amp-iframe-0.1.js"></script>
115 |     <script async custom-element="amp-anim" src="https://cdn.ampproject.org/v0/amp-anim-0.1.js"></script>
116 |     <script async custom-element="amp-youtube" src="https://cdn.ampproject.org/v0/amp-youtube-0.1.js"></script>
117 |     <script async custom-element="amp-facebook" src="https://cdn.ampproject.org/v0/amp-facebook-0.1.js"></script>
118 |     <script async custom-element="amp-vine" src="https://cdn.ampproject.org/v0/amp-vine-0.1.js"></script>
119 |     <script async custom-element="amp-instagram" src="https://cdn.ampproject.org/v0/amp-instagram-0.1.js"></script>
120 |     <script async custom-element="amp-vimeo" src="https://cdn.ampproject.org/v0/amp-vimeo-0.1.js"></script>
121 |     <script async custom-element="amp-twitter" src="https://cdn.ampproject.org/v0/amp-twitter-0.1.js"></script>
122 | </head>
123 | 
124 | <body class="gizmodo blog-group-gizmodo">
125 | </body>
126 | 
127 | </html>
128 | 


--------------------------------------------------------------------------------
/tests/templates/amp/thumbnail_image.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html amp>
  3 | 
  4 | <head>
  5 |     <meta charset="utf-8">
  6 |     <title>Google Glass Is Dead, Long Live Snapchat Spectacles</title>
  7 |     <link rel="canonical" href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869">
  8 |     <meta name="viewport" content="width=device-width,minimum-scale=1,initial-scale=1" />
  9 |     <style amp-custom>
 10 | 
 11 |     </style>
 12 |     <script type="application/ld+json">
 13 |         [{
 14 |             "url": "https://vimeo.com/189955079",
 15 |             "thumbnailUrl": "https://i.vimeocdn.com/filter/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F600453872_1280x720.webp&src1=https%3A%2F%2Ff.vimeocdn.com%2Fimages_v6%2Fshare%2Fplay_icon_overlay.png",
 16 |             "embedUrl": "https://player.vimeo.com/video/189955079",
 17 |             "name": "Piper",
 18 |             "description": "The Pixar imaginative group examined the inventive procedure in creating Piper, the main character who is an infant sandpiper taking in the ropes out on the shoreline&hellip;",
 19 |             "height": 1080,
 20 |             "width": 1920,
 21 |             "playerType": "HTML5 Flash",
 22 |             "videoQuality": "HD",
 23 |             "duration": "PT00H06M06S",
 24 |             "uploadDate": "2016-11-02T11:48:33-04:00",
 25 |             "thumbnail": {
 26 |                 "@type": "ImageObject",
 27 |                 "url": "https://i.vimeocdn.com/filter/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F600453872_1280x720.webp&src1=https%3A%2F%2Ff.vimeocdn.com%2Fimages_v6%2Fshare%2Fplay_icon_overlay.png",
 28 |                 "width": 1280,
 29 |                 "height": 720
 30 |             },
 31 |             "author": {
 32 |                 "@type": "Person",
 33 |                 "name": "Citizen Consulting",
 34 |                 "url": "https://vimeo.com/citizenconsulting"
 35 |             },
 36 |             "potentialAction": {
 37 |                 "@type": "ViewAction",
 38 |                 "target": "vimeo://app.vimeo.com/videos/189955079"
 39 |             },
 40 |             "interactionCount": 78252,
 41 |             "keywords": "[Pixar,Short,Piper]",
 42 |             "@type": "VideoObject",
 43 |             "@context": "http://schema.org"
 44 |         }, {
 45 |             "itemListElement": [{
 46 |                 "@type": "ListItem",
 47 |                 "position": 1,
 48 |                 "item": {
 49 |                     "@id": "https://vimeo.com/citizenconsulting",
 50 |                     "name": "Citizen Consulting"
 51 |                 }
 52 |             }, {
 53 |                 "@type": "ListItem",
 54 |                 "position": 2,
 55 |                 "item": {
 56 |                     "@id": "https://vimeo.com/citizenconsulting/videos",
 57 |                     "name": "Videos"
 58 |                 }
 59 |             }],
 60 |             "@type": "BreadcrumbList",
 61 |             "@context": "http://schema.org"
 62 |         }]
 63 |     </script>
 64 |     <style amp-boilerplate>
 65 |         body {
 66 |             -webkit-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 67 |             -moz-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 68 |             -ms-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 69 |             animation: -amp-start 8s steps(1, end) 0s 1 normal both
 70 |         }
 71 | 
 72 |         @-webkit-keyframes -amp-start {
 73 |             from {
 74 |                 visibility: hidden
 75 |             }
 76 |             to {
 77 |                 visibility: visible
 78 |             }
 79 |         }
 80 | 
 81 |         @-moz-keyframes -amp-start {
 82 |             from {
 83 |                 visibility: hidden
 84 |             }
 85 |             to {
 86 |                 visibility: visible
 87 |             }
 88 |         }
 89 | 
 90 |         @-ms-keyframes -amp-start {
 91 |             from {
 92 |                 visibility: hidden
 93 |             }
 94 |             to {
 95 |                 visibility: visible
 96 |             }
 97 |         }
 98 | 
 99 |         @-o-keyframes -amp-start {
100 |             from {
101 |                 visibility: hidden
102 |             }
103 |             to {
104 |                 visibility: visible
105 |             }
106 |         }
107 | 
108 |         @keyframes -amp-start {
109 |             from {
110 |                 visibility: hidden
111 |             }
112 |             to {
113 |                 visibility: visible
114 |             }
115 |         }
116 |     </style><noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>
117 |     <script async src="https://cdn.ampproject.org/v0.js"></script>
118 |     <script async custom-element="amp-analytics" src="https://cdn.ampproject.org/v0/amp-analytics-0.1.js"></script>
119 |     <script async custom-element="amp-iframe" src="https://cdn.ampproject.org/v0/amp-iframe-0.1.js"></script>
120 |     <script async custom-element="amp-anim" src="https://cdn.ampproject.org/v0/amp-anim-0.1.js"></script>
121 |     <script async custom-element="amp-youtube" src="https://cdn.ampproject.org/v0/amp-youtube-0.1.js"></script>
122 |     <script async custom-element="amp-facebook" src="https://cdn.ampproject.org/v0/amp-facebook-0.1.js"></script>
123 |     <script async custom-element="amp-vine" src="https://cdn.ampproject.org/v0/amp-vine-0.1.js"></script>
124 |     <script async custom-element="amp-instagram" src="https://cdn.ampproject.org/v0/amp-instagram-0.1.js"></script>
125 |     <script async custom-element="amp-vimeo" src="https://cdn.ampproject.org/v0/amp-vimeo-0.1.js"></script>
126 |     <script async custom-element="amp-twitter" src="https://cdn.ampproject.org/v0/amp-twitter-0.1.js"></script>
127 | </head>
128 | 
129 | <body class="gizmodo blog-group-gizmodo">
130 | </body>
131 | 
132 | </html>
133 | 


--------------------------------------------------------------------------------
/tests/templates/amp/video_objects.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html amp>
  3 | 
  4 | <head>
  5 |     <meta charset="utf-8">
  6 |     <title>Google Glass Is Dead, Long Live Snapchat Spectacles</title>
  7 |     <link rel="canonical" href="http://gizmodo.com/google-glass-is-dead-long-live-snapchat-spectacles-1787034869">
  8 |     <meta name="viewport" content="width=device-width,minimum-scale=1,initial-scale=1" />
  9 |     <style amp-custom>
 10 | 
 11 |     </style>
 12 |     <script type="application/ld+json">
 13 |         [{
 14 |             "url": "https://vimeo.com/189955079",
 15 |             "thumbnailUrl": "https://i.vimeocdn.com/filter/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F600453872_1280x720.webp&src1=https%3A%2F%2Ff.vimeocdn.com%2Fimages_v6%2Fshare%2Fplay_icon_overlay.png",
 16 |             "embedUrl": "https://player.vimeo.com/video/189955079",
 17 |             "name": "Piper",
 18 |             "description": "The Pixar imaginative group examined the inventive procedure in creating Piper, the main character who is an infant sandpiper taking in the ropes out on the shoreline&hellip;",
 19 |             "height": 1080,
 20 |             "width": 1920,
 21 |             "playerType": "HTML5 Flash",
 22 |             "videoQuality": "HD",
 23 |             "duration": "PT00H06M06S",
 24 |             "uploadDate": "2016-11-02T11:48:33-04:00",
 25 |             "thumbnail": {
 26 |                 "@type": "ImageObject",
 27 |                 "url": "https://i.vimeocdn.com/filter/overlay?src0=https%3A%2F%2Fi.vimeocdn.com%2Fvideo%2F600453872_1280x720.webp&src1=https%3A%2F%2Ff.vimeocdn.com%2Fimages_v6%2Fshare%2Fplay_icon_overlay.png",
 28 |                 "width": 1280,
 29 |                 "height": 720
 30 |             },
 31 |             "author": {
 32 |                 "@type": "Person",
 33 |                 "name": "Citizen Consulting",
 34 |                 "url": "https://vimeo.com/citizenconsulting"
 35 |             },
 36 |             "potentialAction": {
 37 |                 "@type": "ViewAction",
 38 |                 "target": "vimeo://app.vimeo.com/videos/189955079"
 39 |             },
 40 |             "interactionCount": 78252,
 41 |             "keywords": "[Pixar,Short,Piper]",
 42 |             "@type": "VideoObject",
 43 |             "@context": "http://schema.org"
 44 |         }, {
 45 |             "itemListElement": [{
 46 |                 "@type": "ListItem",
 47 |                 "position": 1,
 48 |                 "item": {
 49 |                     "@id": "https://vimeo.com/citizenconsulting",
 50 |                     "name": "Citizen Consulting"
 51 |                 }
 52 |             }, {
 53 |                 "@type": "ListItem",
 54 |                 "position": 2,
 55 |                 "item": {
 56 |                     "@id": "https://vimeo.com/citizenconsulting/videos",
 57 |                     "name": "Videos"
 58 |                 }
 59 |             }],
 60 |             "@type": "BreadcrumbList",
 61 |             "@context": "http://schema.org"
 62 |         }]
 63 |     </script>
 64 |     <style amp-boilerplate>
 65 |         body {
 66 |             -webkit-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 67 |             -moz-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 68 |             -ms-animation: -amp-start 8s steps(1, end) 0s 1 normal both;
 69 |             animation: -amp-start 8s steps(1, end) 0s 1 normal both
 70 |         }
 71 | 
 72 |         @-webkit-keyframes -amp-start {
 73 |             from {
 74 |                 visibility: hidden
 75 |             }
 76 |             to {
 77 |                 visibility: visible
 78 |             }
 79 |         }
 80 | 
 81 |         @-moz-keyframes -amp-start {
 82 |             from {
 83 |                 visibility: hidden
 84 |             }
 85 |             to {
 86 |                 visibility: visible
 87 |             }
 88 |         }
 89 | 
 90 |         @-ms-keyframes -amp-start {
 91 |             from {
 92 |                 visibility: hidden
 93 |             }
 94 |             to {
 95 |                 visibility: visible
 96 |             }
 97 |         }
 98 | 
 99 |         @-o-keyframes -amp-start {
100 |             from {
101 |                 visibility: hidden
102 |             }
103 |             to {
104 |                 visibility: visible
105 |             }
106 |         }
107 | 
108 |         @keyframes -amp-start {
109 |             from {
110 |                 visibility: hidden
111 |             }
112 |             to {
113 |                 visibility: visible
114 |             }
115 |         }
116 |     </style><noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>
117 |     <script async src="https://cdn.ampproject.org/v0.js"></script>
118 |     <script async custom-element="amp-analytics" src="https://cdn.ampproject.org/v0/amp-analytics-0.1.js"></script>
119 |     <script async custom-element="amp-iframe" src="https://cdn.ampproject.org/v0/amp-iframe-0.1.js"></script>
120 |     <script async custom-element="amp-anim" src="https://cdn.ampproject.org/v0/amp-anim-0.1.js"></script>
121 |     <script async custom-element="amp-youtube" src="https://cdn.ampproject.org/v0/amp-youtube-0.1.js"></script>
122 |     <script async custom-element="amp-facebook" src="https://cdn.ampproject.org/v0/amp-facebook-0.1.js"></script>
123 |     <script async custom-element="amp-vine" src="https://cdn.ampproject.org/v0/amp-vine-0.1.js"></script>
124 |     <script async custom-element="amp-instagram" src="https://cdn.ampproject.org/v0/amp-instagram-0.1.js"></script>
125 |     <script async custom-element="amp-vimeo" src="https://cdn.ampproject.org/v0/amp-vimeo-0.1.js"></script>
126 |     <script async custom-element="amp-twitter" src="https://cdn.ampproject.org/v0/amp-twitter-0.1.js"></script>
127 | </head>
128 | 
129 | <body class="gizmodo blog-group-gizmodo">
130 | </body>
131 | 
132 | </html>
133 | 


--------------------------------------------------------------------------------
/tests/templates/core/bad_image_dimensions.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-us">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Generic Test | All Properties</title>
 6 | 
 7 |     <meta name="description" content="Just a random description of a web page." />
 8 |     <meta name="keywords" content="one, two, three, four, five" />
 9 |     <meta name="title" content="Lassie Generic Test | all_properties" />
10 | </head>
11 | <body>
12 | 
13 | <img src="image.png" width="100in" height="100em">
14 | 
15 | </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/tests/templates/core/bad_keywords.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-us">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Generic Test | All Properties</title>
 6 | 
 7 |     <meta name="description" content="A webpage with keywords meta tag but no value." />
 8 |     <meta name="keywords" />
 9 |     <meta name="title" content="Lassie Generic Test | bad_keywords" />
10 | </head>
11 | <body>
12 | 
13 | </body>
14 | </html>
15 | 


--------------------------------------------------------------------------------
/tests/templates/core/class_setting_is_none.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-us">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Core Test | Class setting is None</title>
 6 | 
 7 |     <meta property="og:title" content="Lassie Core Test Class setting is None" />
 8 | 
 9 |     <meta property="og:image" content="http://i.imgur.com/cvoR7zv.jpg" />
10 |     <meta property="og:image:width" content="550" />
11 |     <meta property="og:image:height" content="365" />
12 | </head>
13 | <body>
14 | 
15 | </body>
16 | </html>


--------------------------------------------------------------------------------
/tests/templates/core/class_vs_method_settings.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-us">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Core Test | Class vs Method settings</title>
 6 | 
 7 |     <meta property="og:title" content="Lassie Core Test Class setting is None" />
 8 | 
 9 |     <meta property="og:image" content="http://i.imgur.com/cvoR7zv.jpg" />
10 |     <meta property="og:image:width" content="550" />
11 |     <meta property="og:image:height" content="365" />
12 | </head>
13 | <body>
14 | 
15 | </body>
16 | </html>


--------------------------------------------------------------------------------
/tests/templates/core/empty.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelhelmick/lassie/1122c719a68c20b847c1963719070e10a3d253dd/tests/templates/core/empty.html


--------------------------------------------------------------------------------
/tests/templates/core/image_dimensions.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-us">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Generic Test | All Properties</title>
 6 | 
 7 |     <meta name="description" content="Just a random description of a web page." />
 8 |     <meta name="keywords" content="one, two, three, four, five" />
 9 |     <meta name="title" content="Lassie Generic Test | all_properties" />
10 | </head>
11 | <body>
12 | 
13 | <img src="image.png" width="100px" height="100px">
14 | <img src="image.png" width="100" height="100px">
15 | <img src="image.png" width=" 100" height="100 ">
16 | <img src="image.png" width="   100  " height="  100   ">
17 | 
18 | </body>
19 | </html>
20 | 


--------------------------------------------------------------------------------
/tests/templates/core/no_html_tag.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <head>
 3 |     <meta charset="utf-8">
 4 |     <title>Lassie Generic Test | No HTML Tag</title>
 5 | 
 6 |     <meta name="description" content="Just a random description of a web page." />
 7 |     <meta name="keywords" content="one, two, three, four, five" />
 8 |     <meta name="title" content="Lassie Generic Test | no_html_tag" />
 9 | </head>
10 | <body>
11 | </body>
12 | </html>
13 | 


--------------------------------------------------------------------------------
/tests/templates/core/retrieve_all_images.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-us">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Core Test | Retrieve All Images</title>
 6 | 
 7 |     <meta property="og:title" content="Lassie Core Test Retrieve All Images" />
 8 | 
 9 |     <meta property="og:image" content="http://i.imgur.com/cvoR7zv.jpg" />
10 |     <meta property="og:image:width" content="550" />
11 |     <meta property="og:image:height" content="365" />
12 | </head>
13 | <body>
14 | 
15 | <p>
16 |     <img src="http://i.imgur.com/cvoR7zv.jpg" />
17 | </p>
18 | 
19 | <p>
20 |     <img src="http://i.imgur.com/cvoR7zv.jpg" width="550" height="365" />
21 | </p>
22 | 
23 | </body>
24 | </html>


--------------------------------------------------------------------------------
/tests/templates/generic/all_properties.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-us">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Generic Test | All Properties</title>
 6 | 
 7 |     <meta name="description" content="Just a random description of a web page." />
 8 |     <meta name="keywords" content="one, two, three, four, five" />
 9 |     <meta name="title" content="Lassie Generic Test | all_properties" />
10 | 
11 |     <link rel="canonical" href="http://example.com/canonical/path" />
12 | </head>
13 | <body>
14 | 
15 | </body>
16 | </html>
17 | 


--------------------------------------------------------------------------------
/tests/templates/generic/bad_locale.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="bad-locale">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Generic Test | Bad Locale</title>
 6 | 
 7 |     <meta name="description" content="Test that if a bad locale is provided, locale is not returned" />
 8 | </head>
 9 | <body>
10 | 
11 | </body>
12 | </html>


--------------------------------------------------------------------------------
/tests/templates/generic/canonical.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Generic Test | Canonical</title>
 6 | 
 7 |     <link rel="canonical" href="http://example.com/canonical/path" />
 8 | </head>
 9 | <body>
10 | 
11 | </body>
12 | </html>
13 | 


--------------------------------------------------------------------------------
/tests/templates/generic/favicon.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Generic Test | Favicon</title>
 6 | 
 7 |     <link rel="shortcut icon" href="/favicon.ico" type="image/x-icon" />
 8 | </head>
 9 | <body>
10 | 
11 | </body>
12 | </html>


--------------------------------------------------------------------------------
/tests/templates/generic/no_title.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-us">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 | 
 6 |     <meta name="keywords" content="one, two, three, four, five" />
 7 | </head>
 8 | <body>
 9 | 
10 | </body>
11 | </html>


--------------------------------------------------------------------------------
/tests/templates/handle_file_content/image_file.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/michaelhelmick/lassie/1122c719a68c20b847c1963719070e10a3d253dd/tests/templates/handle_file_content/image_file.jpg


--------------------------------------------------------------------------------
/tests/templates/open_graph/all_properties.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-us">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Open Graph Test | All Properties</title>
 6 | 
 7 |     <meta property="og:title" content="Lassie Open Graph All Properies Test" />
 8 |     <meta property="og:url" content="http://lassie.it/open_graph/all_properties.html" />
 9 |     <meta property="og:description" content="Just a test template with OG data!" />
10 |     <meta property="og:locale" content="en_US" />
11 |     <meta property="og:site_name" content="Lassie">
12 | 
13 |     <meta property="og:image" content="http://i.imgur.com/cvoR7zv.jpg" />
14 |     <meta property="og:image:width" content="550" />
15 |     <meta property="og:image:height" content="365" />
16 | 
17 |     <meta property="og:video" content="http://www.youtube.com/v/dQw4w9WgXcQ?version=3&amp;autohide=1">
18 |     <meta property="og:video:type" content="application/x-shockwave-flash">
19 |     <meta property="og:video:width" content="640">
20 |     <meta property="og:video:height" content="480">
21 | </head>
22 | <body>
23 | 
24 | </body>
25 | </html>


--------------------------------------------------------------------------------
/tests/templates/open_graph/no_og_title_no_og_url.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-us">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Open Graph Test | No og:title, No og:url</title>
 6 | 
 7 |     <meta property="og:description" content="Just a test template with OG data!" />
 8 | </head>
 9 | <body>
10 | 
11 | </body>
12 | </html>


--------------------------------------------------------------------------------
/tests/templates/open_graph/og_image_plus_two_body_images.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-us">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Open Graph Test | og:image Plus Two Body Images</title>
 6 | 
 7 |     <meta property="og:image" content="http://i.imgur.com/cvoR7zv.jpg" />
 8 |     <meta property="og:image:width" content="550" />
 9 |     <meta property="og:image:height" content="365" />
10 | </head>
11 | <body>
12 | 
13 | <img src="http://i.imgur.com/3rJyhJN.png" />
14 | <img src="http://i.imgur.com/Viw3WHh.jpg" /> <!-- -___- -->
15 | 
16 | </body>
17 | </html>


--------------------------------------------------------------------------------
/tests/templates/open_graph/og_image_relative_url.html:
--------------------------------------------------------------------------------
1 | <!DOCTYPE html>
2 | <html lang="en-us">
3 | <head>
4 |     <meta charset="utf-8">
5 |     <title>Lassie Open Graph Test | og:image with relative URL</title>
6 |     <meta property="og:image" content="name.jpg" />
7 | </head>
8 | </html>


--------------------------------------------------------------------------------
/tests/templates/twitter_card/all_properties.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-us">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Twitter Card Test | All Properties</title>
 6 | 
 7 |     <meta name="twitter:card" content="player">
 8 |     <meta name="twitter:site" content="@youtube">
 9 |     <meta name="twitter:url" content="http://www.youtube.com/watch?v=fWNaR-rxAic">
10 |     <meta name="twitter:title" content="Carly Rae Jepsen - Call Me Maybe">
11 |     <meta name="twitter:description" content="Buy Now! http://smarturl.it/CallMeMaybe Music video by Carly Rae Jepsen performing Call Me Maybe. (C) 2011 604 Records Inc. #VEVOCertified on June 8, 2012. h...">
12 |     <meta name="twitter:image" content="http://i1.ytimg.com/vi/fWNaR-rxAic/maxresdefault.jpg">
13 |     <meta name="twitter:app:name:iphone" content="YouTube">
14 |     <meta name="twitter:app:id:iphone" content="544007664">
15 |     <meta name="twitter:app:name:ipad" content="YouTube">
16 |     <meta name="twitter:app:id:ipad" content="544007664">
17 |     <meta name="twitter:app:url:iphone" content="vnd.youtube://watch/fWNaR-rxAic">
18 |     <meta name="twitter:app:url:ipad" content="vnd.youtube://watch/fWNaR-rxAic">
19 |     <meta name="twitter:app:name:googleplay" content="YouTube">
20 |     <meta name="twitter:app:id:googleplay" content="com.google.android.youtube">
21 |     <meta name="twitter:app:url:googleplay" content="http://www.youtube.com/watch?v=fWNaR-rxAic">
22 |     <meta name="twitter:player" content="https://www.youtube.com/embed/fWNaR-rxAic">
23 |     <meta name="twitter:player:width" content="1920">
24 |     <meta name="twitter:player:height" content="1080">
25 | </head>
26 | <body>
27 | 
28 | </body>
29 | </html>


--------------------------------------------------------------------------------
/tests/templates/twitter_card/no_og_title_use_twitter_title.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en-us">
 3 | <head>
 4 |     <meta charset="utf-8">
 5 |     <title>Lassie Twitter Test | No og:title Use twitter:title</title>
 6 | 
 7 |     <meta property="og:description" content="A test case for Lassie!" />
 8 | 
 9 |     <meta name="twitter:title" content="Lassie Twitter Test | no_og_title_use_twitter_title" />
10 | </head>
11 | <body>
12 | 
13 | </body>
14 | </html>


--------------------------------------------------------------------------------
/tests/test_amp.py:
--------------------------------------------------------------------------------
  1 | from lassie import Lassie
  2 | 
  3 | from .base import LassieBaseTestCase
  4 | 
  5 | 
  6 | class LassieAMPTestCase(LassieBaseTestCase):
  7 |     def test_all_properites(self):
  8 |         url = 'http://lassie.it/amp/all_properties.html'
  9 | 
 10 |         l = Lassie()
 11 |         data = l.fetch(url, all_images=True)
 12 | 
 13 |         self.assertEqual(len(data['images']), 3)
 14 | 
 15 |         title = 'Google Glass Is Dead, Long Live Snapchat Spectacles'
 16 |         self.assertEqual(data['title'], title)
 17 | 
 18 |     def test_bad_json(self):
 19 |         url = 'http://lassie.it/amp/bad_json.html'
 20 | 
 21 |         l = Lassie()
 22 |         data = l.fetch(url)
 23 | 
 24 |         self.assertTrue('amp' in data['url'])
 25 | 
 26 |     def test_str_image(self):
 27 |         url = 'http://lassie.it/amp/str_image.html'
 28 | 
 29 |         l = Lassie()
 30 |         data = l.fetch(url)
 31 | 
 32 |         self.assertEqual(1, len(data['images']))
 33 | 
 34 |     def test_list_image(self):
 35 |         url = 'http://lassie.it/amp/list_image.html'
 36 | 
 37 |         l = Lassie()
 38 |         data = l.fetch(url)
 39 | 
 40 |         self.assertEqual(2, len(data['images']))
 41 | 
 42 |     def test_list_image_list(self):
 43 |         url = 'http://lassie.it/amp/list_image_list.html'
 44 | 
 45 |         l = Lassie()
 46 |         data = l.fetch(url)
 47 | 
 48 |         self.assertEqual(2, len(data['images']))
 49 | 
 50 |     def test_list_image_list_str(self):
 51 |         url = 'http://lassie.it/amp/list_image_list_str.html'
 52 | 
 53 |         l = Lassie()
 54 |         data = l.fetch(url)
 55 | 
 56 |         self.assertEqual(1, len(data['images']))
 57 | 
 58 |     def test_list_image_str(self):
 59 |         url = 'http://lassie.it/amp/list_image_str.html'
 60 | 
 61 |         l = Lassie()
 62 |         data = l.fetch(url)
 63 | 
 64 |         self.assertEqual(1, len(data['images']))
 65 | 
 66 |     def test_list_image_empty(self):
 67 |         url = 'http://lassie.it/amp/list_image_empty.html'
 68 | 
 69 |         l = Lassie()
 70 |         data = l.fetch(url)
 71 | 
 72 |         self.assertEqual(1, len(data['images']))
 73 | 
 74 |     def test_list_json(self):
 75 |         url = 'http://lassie.it/amp/list_json.html'
 76 | 
 77 |         l = Lassie()
 78 |         data = l.fetch(url)
 79 | 
 80 |         self.assertTrue('Pixar' in data['description'])
 81 | 
 82 |     def test_video_objects(self):
 83 |         url = 'http://lassie.it/amp/video_objects.html'
 84 | 
 85 |         l = Lassie()
 86 |         data = l.fetch(url)
 87 | 
 88 |         self.assertEqual(1, len(data['videos']))
 89 | 
 90 |     def test_thumbnail_image(self):
 91 |         url = 'http://lassie.it/amp/thumbnail_image.html'
 92 | 
 93 |         l = Lassie()
 94 |         data = l.fetch(url)
 95 | 
 96 |         self.assertEqual(2, len(data['images']))
 97 | 
 98 |     def test_list_thumbnail_image(self):
 99 |         url = 'http://lassie.it/amp/list_thumbnail_image.html'
100 | 
101 |         l = Lassie()
102 |         data = l.fetch(url)
103 | 
104 |         self.assertEqual(2, len(data['images']))
105 | 
106 |     def test_str_thumbnail_image(self):
107 |         url = 'http://lassie.it/amp/str_thumbnail_image.html'
108 | 
109 |         l = Lassie()
110 |         data = l.fetch(url)
111 | 
112 |         self.assertEqual(2, len(data['images']))
113 | 


--------------------------------------------------------------------------------
/tests/test_core.py:
--------------------------------------------------------------------------------
  1 | from lassie import Lassie, LassieError
  2 | from lassie.utils import FAKE_USER_AGENT
  3 | 
  4 | from .base import LassieBaseTestCase
  5 | 
  6 | 
  7 | class LassieCoreTestCase(LassieBaseTestCase):
  8 |     def test_core_class_vs_method_settings(self):
  9 |         url = 'http://lassie.it/core/class_vs_method_settings.html'
 10 | 
 11 |         l = Lassie()
 12 |         data = l.fetch(url)
 13 | 
 14 |         self.assertEqual(len(data['images']), 1)
 15 | 
 16 |         l.open_graph = False
 17 |         data = l.fetch(url)
 18 | 
 19 |         # open_graph is set to False so there shouldn't be any images in the list this time around
 20 |         self.assertEqual(len(data['images']), 0)
 21 | 
 22 |     def test_core_class_setting_is_none(self):
 23 |         url = 'http://lassie.it/core/class_setting_is_none.html'
 24 | 
 25 |         # This is a really odd use-case where they'd set the class attr to None, but it might happen so oh wellz.
 26 |         l = Lassie()
 27 |         l.open_graph = None
 28 |         data = l.fetch(url, open_graph=False)
 29 | 
 30 |         self.assertEqual(len(data['images']), 0)
 31 | 
 32 |     def test_core_no_content_raises_error(self):
 33 |         url = 'http://lassie.it/core/empty.html'
 34 | 
 35 |         l = Lassie()
 36 |         self.assertRaises(LassieError, l.fetch, url)
 37 | 
 38 |     def test_core_retrieve_all_images(self):
 39 |         url = 'http://lassie.it/core/retrieve_all_images.html'
 40 | 
 41 |         l = Lassie()
 42 |         l.all_images = True
 43 | 
 44 |         data = l.fetch(url)
 45 |         self.assertEqual(len(data['images']), 3)
 46 | 
 47 |         last_image = data['images'][2]
 48 |         self.assertEqual(last_image['width'], 550)
 49 |         self.assertEqual(last_image['height'], 365)
 50 | 
 51 |     def test_image_dimensions(self):
 52 |         url = 'http://lassie.it/core/image_dimensions.html'
 53 | 
 54 |         l = Lassie()
 55 |         data = l.fetch(url, all_images=True)
 56 | 
 57 |         self.assertEqual(len(data['images']), 4)
 58 | 
 59 |         image = data['images'][0]
 60 |         self.assertEqual(image['width'], 100)
 61 |         self.assertEqual(image['height'], 100)
 62 | 
 63 |         image = data['images'][1]
 64 |         self.assertEqual(image['width'], 100)
 65 |         self.assertEqual(image['height'], 100)
 66 | 
 67 |         image = data['images'][2]
 68 |         self.assertEqual(image['width'], 100)
 69 |         self.assertEqual(image['height'], 100)
 70 | 
 71 |         image = data['images'][3]
 72 |         self.assertEqual(image['width'], 100)
 73 |         self.assertEqual(image['height'], 100)
 74 | 
 75 |     def test_bad_image_dimensions(self):
 76 |         url = 'http://lassie.it/core/bad_image_dimensions.html'
 77 | 
 78 |         l = Lassie()
 79 |         data = l.fetch(url, all_images=True)
 80 | 
 81 |         # lassie.utils.convert_to_int will except a TypeError or ValueError and pass (not setting a width/height on the image)
 82 |         image = data['images'][0]
 83 |         self.assertTrue(not 'width' in image)
 84 |         self.assertTrue(not 'height' in image)
 85 | 
 86 |     def test_request_opts(self):
 87 |         l = Lassie()
 88 |         l.request_opts = {
 89 |             'headers': {
 90 |                 'User-Agent': 'lassie python',
 91 |             },
 92 |             'timeout': 3
 93 |         }
 94 | 
 95 |         self.assertTrue(set(('headers', 'timeout')).issubset(l.request_opts))
 96 | 
 97 |         # If they modify one of the keys value, make sure it actually happened
 98 |         l.request_opts['headers'].update({'Content-Type': 'application/json'})
 99 |         self.assertEqual(len(l.request_opts['headers']), 2)
100 |         self.assertTrue(set(('User-Agent', 'Content-Type')).issubset(l.request_opts['headers']))
101 | 
102 |     def test_request_opts_no_headers(self):
103 |         l = Lassie()
104 |         l.request_opts = {
105 |             'headers': {},
106 |             'timeout': 3
107 |         }
108 | 
109 |         # headers should be set to {} then User-Agent should be added
110 |         self.assertTrue(l.client.headers != {})
111 | 
112 |     def test_request_opts_default_user_agent(self):
113 |         l = Lassie()
114 |         l.request_opts = {
115 |             'timeout': 3
116 |         }
117 | 
118 |         # headers should be set to {} then User-Agent should be added
119 |         self.assertTrue(l.client.headers['User-Agent'] == FAKE_USER_AGENT)
120 | 
121 |     def test_bad_request_opts(self):
122 |         l = Lassie()
123 |         l.request_opts = {
124 |             'bad_key': True,
125 |             'headers': {
126 |                 'User-Agent': 'lassie python'
127 |             }
128 |         }
129 | 
130 |         self.assertTrue('bad_key' not in l.request_opts)
131 |         self.assertTrue('headers' in l.request_opts)
132 | 
133 |     def test_core_bad_keywords(self):
134 |         url = 'http://lassie.it/core/bad_keywords.html'
135 | 
136 |         l = Lassie()
137 |         data = l.fetch(url)
138 |         self.assertEqual(data.get('keywords'), [])
139 | 
140 |     def test_merge_request_kwargs(self):
141 |         l = Lassie()
142 |         l.request_opts = {
143 |             'timeout': 3,
144 |         }
145 | 
146 |         request_kwargs = l.merge_request_kwargs()
147 |         self.assertTrue('timeout' in request_kwargs)
148 | 
149 |     def test_prepare_request(self):
150 |         url = 'http://lassie.it/core/bad_keywords.html'
151 | 
152 |         l = Lassie()
153 |         l._prepare_request('HEAD', url=url, headers=l.client.headers)
154 | 
155 |     def test_no_html_tag(self):
156 |         url = 'http://lassie.it/core/no_html_tag.html'
157 | 
158 |         l = Lassie()
159 |         data = l.fetch(url)
160 | 
161 |         self.assertTrue('no_html_tag' in data['title'])
162 | 


--------------------------------------------------------------------------------
/tests/test_generic.py:
--------------------------------------------------------------------------------
 1 | import lassie
 2 | 
 3 | from .base import LassieBaseTestCase
 4 | 
 5 | 
 6 | class LassieTwitterCardTestCase(LassieBaseTestCase):
 7 |     def test_generic_all_properties(self):
 8 |         url = 'http://lassie.it/generic/all_properties.html'
 9 |         data = lassie.fetch(url, canonical=True)
10 | 
11 |         self.assertEqual(data['locale'], 'en_US')
12 |         self.assertEqual(data['title'], 'Lassie Generic Test | all_properties')
13 |         self.assertEqual(data['description'], 'Just a random description of a web page.')
14 |         self.assertEqual(data['url'], 'http://example.com/canonical/path')
15 |         self.assertEqual(len(data['keywords']), 5)
16 | 
17 |     def test_generic_bad_locale(self):
18 |         url = 'http://lassie.it/generic/bad_locale.html'
19 |         data = lassie.fetch(url)
20 | 
21 |         self.assertTrue(not 'locale' in data)
22 | 
23 |     def test_generic_favicon(self):
24 |         url = 'http://lassie.it/generic/favicon.html'
25 |         data = lassie.fetch(url)
26 | 
27 |         self.assertEqual(len(data['images']), 1)
28 |         image = data['images'][0]
29 | 
30 |         self.assertEqual(image['type'], 'favicon')
31 | 
32 |     def test_no_title(self):
33 |         url = 'http://lassie.it/generic/no_title.html'
34 |         data = lassie.fetch(url)
35 | 
36 |         self.assertTrue(not 'title' in data)
37 | 
38 |     def test_canonical(self):
39 |         url = 'http://lassie.it/generic/canonical.html'
40 |         data = lassie.fetch(url, canonical=True)
41 | 
42 |         self.assertEqual(data['url'], 'http://example.com/canonical/path')
43 | 


--------------------------------------------------------------------------------
/tests/test_handle_file_content.py:
--------------------------------------------------------------------------------
 1 | import lassie
 2 | 
 3 | from .base import LassieBaseTestCase
 4 | 
 5 | 
 6 | class LassieFileContentTestCase(LassieBaseTestCase):
 7 |     def test_image_file(self):
 8 |         url = 'http://lassie.it/handle_file_content/image_file.jpg'
 9 |         data = lassie.fetch(url, handle_file_content=True)
10 | 
11 |         self.assertEqual(data['url'], url)
12 |         self.assertEqual(data['title'], 'image_file.jpg')
13 | 
14 |         self.assertEqual(len(data['images']), 1)
15 |         image = data['images'][0]
16 |         self.assertEqual(image['src'], 'http://lassie.it/handle_file_content/image_file.jpg')
17 |         self.assertEqual(image['type'], 'body_image')
18 | 


--------------------------------------------------------------------------------
/tests/test_open_graph.py:
--------------------------------------------------------------------------------
 1 | import lassie
 2 | 
 3 | from .base import LassieBaseTestCase
 4 | 
 5 | 
 6 | class LassieOpenGraphTestCase(LassieBaseTestCase):
 7 |     def test_open_graph_all_properties(self):
 8 |         url = 'http://lassie.it/open_graph/all_properties.html'
 9 |         data = lassie.fetch(url)
10 | 
11 |         self.assertEqual(data['url'], url)
12 |         self.assertEqual(data['title'], 'Lassie Open Graph All Properies Test')
13 |         self.assertEqual(data['description'], 'Just a test template with OG data!')
14 |         self.assertEqual(data['locale'], 'en_US')
15 |         self.assertEqual(data['site_name'], 'Lassie')
16 | 
17 |         self.assertEqual(len(data['images']), 1)
18 |         image = data['images'][0]
19 |         self.assertEqual(image['src'], 'http://i.imgur.com/cvoR7zv.jpg')
20 |         self.assertEqual(image['width'], 550)
21 |         self.assertEqual(image['height'], 365)
22 |         self.assertEqual(image['type'], 'og:image')
23 | 
24 |         self.assertEqual(len(data['videos']), 1)
25 |         video = data['videos'][0]
26 |         self.assertEqual(video['src'], 'http://www.youtube.com/v/dQw4w9WgXcQ?version=3&autohide=1')
27 |         self.assertEqual(video['width'], 640)
28 |         self.assertEqual(video['height'], 480)
29 |         self.assertEqual(video['type'], 'application/x-shockwave-flash')
30 | 
31 |     def test_open_graph_no_og_title_no_og_url(self):
32 |         url = 'http://lassie.it/open_graph/no_og_title_no_og_url.html'
33 |         data = lassie.fetch(url)
34 | 
35 |         self.assertEqual(data['url'], url)
36 |         self.assertEqual(data['title'], 'Lassie Open Graph Test | No og:title, No og:url')
37 | 
38 |     def test_open_graph_og_image_plus_two_body_images(self):
39 |         url = 'http://lassie.it/open_graph/og_image_plus_two_body_images.html'
40 |         data = lassie.fetch(url)
41 | 
42 |         # Try without passing "all_images", then pass it
43 | 
44 |         self.assertEqual(len(data['images']), 1)
45 | 
46 |         data = lassie.fetch(url, all_images=True)
47 | 
48 |         self.assertEqual(len(data['images']), 3)
49 | 
50 |         image_0 = data['images'][0]
51 |         image_1 = data['images'][1]
52 |         image_2 = data['images'][2]
53 |         self.assertEqual(image_0['type'], 'og:image')
54 |         self.assertEqual(image_1['type'], 'body_image')
55 |         self.assertEqual(image_2['type'], 'body_image')
56 | 
57 |     def test_open_graph_og_image_relative_url(self):
58 |         url = 'http://lassie.it/open_graph/og_image_relative_url.html'
59 |         data = lassie.fetch(url)
60 | 
61 |         self.assertEqual(
62 |             data['images'][0]['src'], 'http://lassie.it/open_graph/name.jpg')
63 | 


--------------------------------------------------------------------------------
/tests/test_twitter_card.py:
--------------------------------------------------------------------------------
 1 | import lassie
 2 | 
 3 | from .base import LassieBaseTestCase
 4 | 
 5 | 
 6 | class LassieTwitterCardTestCase(LassieBaseTestCase):
 7 |     def test_twitter_all_properties(self):
 8 |         url = 'http://lassie.it/twitter_card/all_properties.html'
 9 |         data = lassie.fetch(url)
10 |         self.assertEqual(data['url'], 'http://www.youtube.com/watch?v=fWNaR-rxAic')
11 |         self.assertEqual(data['title'], 'Carly Rae Jepsen - Call Me Maybe')
12 |         self.assertEqual(data['description'], 'Buy Now! http://smarturl.it/CallMeMaybe Music video by Carly Rae Jepsen performing Call Me Maybe. (C) 2011 604 Records Inc. #VEVOCertified on June 8, 2012. h...')
13 | 
14 |         self.assertEqual(len(data['images']), 1)
15 |         image = data['images'][0]
16 |         self.assertEqual(image['src'], 'http://i1.ytimg.com/vi/fWNaR-rxAic/maxresdefault.jpg')
17 | 
18 |         self.assertEqual(len(data['videos']), 1)
19 |         video = data['videos'][0]
20 |         self.assertEqual(video['src'], 'https://www.youtube.com/embed/fWNaR-rxAic')
21 |         self.assertEqual(video['width'], 1920)
22 |         self.assertEqual(video['height'], 1080)
23 | 
24 |     def test_twitter_no_og_title_use_twitter_title(self):
25 |         url = 'http://lassie.it/twitter_card/no_og_title_use_twitter_title.html'
26 |         data = lassie.fetch(url)
27 | 
28 |         self.assertEqual(data['description'], 'A test case for Lassie!')
29 |         self.assertEqual(data['title'], 'Lassie Twitter Test | no_og_title_use_twitter_title')
30 | 


--------------------------------------------------------------------------------