├── .github
    └── workflows
    │   └── tests.yaml
├── .gitignore
├── .readthedocs.yaml
├── LICENSE
├── MANIFEST.in
├── README.rst
├── docs
    ├── Makefile
    ├── api.rst
    ├── conf.py
    ├── django.rst
    ├── examples.rst
    ├── flask.rst
    ├── getting_started.rst
    ├── index.rst
    ├── installation.rst
    └── make.bat
├── examples
    ├── __init__.py
    ├── django_ex
    │   ├── __init__.py
    │   ├── manage.py
    │   ├── settings.py
    │   ├── static
    │   │   └── style.css
    │   ├── templates
    │   │   └── example.html
    │   ├── urls.py
    │   └── views.py
    ├── flask_ex
    │   ├── app.py
    │   ├── static
    │   │   └── style.css
    │   └── templates
    │   │   └── example.html
    └── python_ex
    │   └── example.py
├── micawber
    ├── __init__.py
    ├── cache.py
    ├── compat.py
    ├── contrib
    │   ├── __init__.py
    │   ├── mcdjango
    │   │   ├── __init__.py
    │   │   ├── mcdjango_tests
    │   │   │   ├── __init__.py
    │   │   │   ├── models.py
    │   │   │   └── tests.py
    │   │   ├── models.py
    │   │   ├── providers.py
    │   │   ├── templates
    │   │   │   └── micawber
    │   │   │   │   ├── link.html
    │   │   │   │   ├── photo.html
    │   │   │   │   ├── rich.html
    │   │   │   │   └── video.html
    │   │   └── templatetags
    │   │   │   ├── __init__.py
    │   │   │   └── micawber_tags.py
    │   ├── mcflask.py
    │   └── providers.py
    ├── exceptions.py
    ├── parsers.py
    ├── providers.py
    ├── test_utils.py
    └── tests.py
├── runtests.py
└── setup.py


/.github/workflows/tests.yaml:
--------------------------------------------------------------------------------
 1 | name: Tests
 2 | on: [push]
 3 | jobs:
 4 |   tests:
 5 |     name: ${{ matrix.python-version }}
 6 |     runs-on: ubuntu-latest
 7 |     strategy:
 8 |       fail-fast: false
 9 |       matrix:
10 |         python-version: [3.8, "3.10", "3.12", "3.13"]
11 |     steps:
12 |       - uses: actions/checkout@v2
13 |       - uses: actions/setup-python@v2
14 |         with:
15 |           python-version: ${{ matrix.python-version }}
16 |       - name: pip deps
17 |         run: pip install django bs4
18 |       - name: runtests
19 |         run: python runtests.py
20 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.py[co]


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 | build:
3 |   os: ubuntu-22.04
4 |   tools:
5 |     python: "3.11"
6 | sphinx:
7 |   configuration: docs/conf.py
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2010 Charles Leifer
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include MANIFEST.in
2 | include LICENSE
3 | include README.rst
4 | include runtests.py
5 | recursive-include micawber/contrib/mcdjango/templates *
6 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | .. image:: http://media.charlesleifer.com/blog/photos/micawber-logo-0.png
 2 | 
 3 | A small library for extracting rich content from urls.
 4 | 
 5 | 
 6 | what does it do?
 7 | ----------------
 8 | 
 9 | micawber supplies a few methods for retrieving rich metadata about a variety of
10 | links, such as links to youtube videos.  micawber also provides functions for
11 | parsing blocks of text and html and replacing links to videos with rich embedded
12 | content.
13 | 
14 | examples
15 | --------
16 | 
17 | here is a quick example:
18 | 
19 | .. code-block:: python
20 | 
21 |     import micawber
22 | 
23 |     # load up rules for some default providers, such as youtube and flickr
24 |     providers = micawber.bootstrap_basic()
25 | 
26 |     providers.request('http://www.youtube.com/watch?v=54XHDUOHuzU')
27 | 
28 |     # returns the following dictionary:
29 |     {
30 |         'author_name': 'pascalbrax',
31 |         'author_url': u'http://www.youtube.com/user/pascalbrax'
32 |         'height': 344,
33 |         'html': u'<iframe width="459" height="344" src="http://www.youtube.com/embed/54XHDUOHuzU?fs=1&feature=oembed" frameborder="0" allowfullscreen></iframe>',
34 |         'provider_name': 'YouTube',
35 |         'provider_url': 'http://www.youtube.com/',
36 |         'title': 'Future Crew - Second Reality demo - HD',
37 |         'type': u'video',
38 |         'thumbnail_height': 360,
39 |         'thumbnail_url': u'http://i2.ytimg.com/vi/54XHDUOHuzU/hqdefault.jpg',
40 |         'thumbnail_width': 480,
41 |         'url': 'http://www.youtube.com/watch?v=54XHDUOHuzU',
42 |         'width': 459,
43 |         'version': '1.0',
44 |     }
45 | 
46 |     providers.parse_text('this is a test:\nhttp://www.youtube.com/watch?v=54XHDUOHuzU')
47 | 
48 |     # returns the following string:
49 |     this is a test:
50 |     <iframe width="459" height="344" src="http://www.youtube.com/embed/54XHDUOHuzU?fs=1&feature=oembed" frameborder="0" allowfullscreen></iframe>
51 | 
52 |     providers.parse_html('<p>http://www.youtube.com/watch?v=54XHDUOHuzU</p>')
53 | 
54 |     # returns the following html:
55 |     <p><iframe width="459" height="344" src="http://www.youtube.com/embed/54XHDUOHuzU?fs=1&amp;feature=oembed" frameborder="0" allowfullscreen="allowfullscreen"></iframe></p>
56 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # Internal variables.
 11 | PAPEROPT_a4     = -D latex_paper_size=a4
 12 | PAPEROPT_letter = -D latex_paper_size=letter
 13 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 14 | # the i18n builder cannot share the environment and doctrees with the others
 15 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 16 | 
 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 18 | 
 19 | help:
 20 | 	@echo "Please use \`make <target>' where <target> is one of"
 21 | 	@echo "  html       to make standalone HTML files"
 22 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 23 | 	@echo "  singlehtml to make a single large HTML file"
 24 | 	@echo "  pickle     to make pickle files"
 25 | 	@echo "  json       to make JSON files"
 26 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 27 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 28 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 29 | 	@echo "  epub       to make an epub"
 30 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 31 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 32 | 	@echo "  text       to make text files"
 33 | 	@echo "  man        to make manual pages"
 34 | 	@echo "  texinfo    to make Texinfo files"
 35 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 36 | 	@echo "  gettext    to make PO message catalogs"
 37 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 38 | 	@echo "  linkcheck  to check all external links for integrity"
 39 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 40 | 
 41 | clean:
 42 | 	-rm -rf $(BUILDDIR)/*
 43 | 
 44 | html:
 45 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 46 | 	@echo
 47 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 48 | 
 49 | dirhtml:
 50 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 51 | 	@echo
 52 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 53 | 
 54 | singlehtml:
 55 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 56 | 	@echo
 57 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 58 | 
 59 | pickle:
 60 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 61 | 	@echo
 62 | 	@echo "Build finished; now you can process the pickle files."
 63 | 
 64 | json:
 65 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 66 | 	@echo
 67 | 	@echo "Build finished; now you can process the JSON files."
 68 | 
 69 | htmlhelp:
 70 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 71 | 	@echo
 72 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 73 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 74 | 
 75 | qthelp:
 76 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 77 | 	@echo
 78 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 79 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 80 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/micawber.qhcp"
 81 | 	@echo "To view the help file:"
 82 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/micawber.qhc"
 83 | 
 84 | devhelp:
 85 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 86 | 	@echo
 87 | 	@echo "Build finished."
 88 | 	@echo "To view the help file:"
 89 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/micawber"
 90 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/micawber"
 91 | 	@echo "# devhelp"
 92 | 
 93 | epub:
 94 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 95 | 	@echo
 96 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 97 | 
 98 | latex:
 99 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
100 | 	@echo
101 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
102 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
103 | 	      "(use \`make latexpdf' here to do that automatically)."
104 | 
105 | latexpdf:
106 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
107 | 	@echo "Running LaTeX files through pdflatex..."
108 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
109 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
110 | 
111 | text:
112 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
113 | 	@echo
114 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
115 | 
116 | man:
117 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
118 | 	@echo
119 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
120 | 
121 | texinfo:
122 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
123 | 	@echo
124 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
125 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
126 | 	      "(use \`make info' here to do that automatically)."
127 | 
128 | info:
129 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
130 | 	@echo "Running Texinfo files through makeinfo..."
131 | 	make -C $(BUILDDIR)/texinfo info
132 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
133 | 
134 | gettext:
135 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
136 | 	@echo
137 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
138 | 
139 | changes:
140 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
141 | 	@echo
142 | 	@echo "The overview file is in $(BUILDDIR)/changes."
143 | 
144 | linkcheck:
145 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
146 | 	@echo
147 | 	@echo "Link check complete; look for any errors in the above output " \
148 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
149 | 
150 | doctest:
151 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
152 | 	@echo "Testing of doctests in the sources finished, look at the " \
153 | 	      "results in $(BUILDDIR)/doctest/output.txt."
154 | 


--------------------------------------------------------------------------------
/docs/api.rst:
--------------------------------------------------------------------------------
  1 | .. _api:
  2 | 
  3 | API Documentation
  4 | =================
  5 | 
  6 | Providers
  7 | ---------
  8 | 
  9 | .. py:module:: micawber.providers
 10 | 
 11 | .. py:class:: Provider(endpoint, **kwargs)
 12 | 
 13 |     The :py:class:`Provider` object is responsible for retrieving metadata about
 14 |     a given URL.  It implements a method called :py:meth:`~Provider.request`, which
 15 |     takes a URL and any parameters, which it sends off to an endpoint.  The endpoint
 16 |     should return a JSON dictionary containing metadata about the resource, which is
 17 |     returned to the caller.
 18 | 
 19 |     :param endpoint: the API endpoint which should return information about requested links
 20 |     :param kwargs: any additional url parameters to send to the endpoint on each
 21 |         request, used for providing defaults.  An example use-case might be for
 22 |         providing an API key on each request.
 23 | 
 24 |     .. py:method:: request(url, **extra_params)
 25 | 
 26 |         Retrieve information about the given url.  By default, will make a HTTP
 27 |         GET request to the endpoint.  The url will be sent to the endpoint, along
 28 |         with any parameters specified in the ``extra_params`` and those parameters
 29 |         specified when the class was instantiated.
 30 | 
 31 |         Will raise a :py:class:`ProviderException` in the event the URL is not
 32 |         accessible or the API times out.
 33 | 
 34 |         :param url: URL to retrieve metadata for
 35 |         :param extra_params: additional parameters to pass to the endpoint, for
 36 |             example a maxwidth or an API key.
 37 |         :rtype: a dictionary of JSON data
 38 | 
 39 | 
 40 | .. py:class:: ProviderRegistry([cache=None])
 41 | 
 42 |     A registry for encapsulating a group of :py:class:`Provider` instances,
 43 |     with optional caching support.
 44 | 
 45 |     Handles matching regular expressions to providers.  URLs are sent to the
 46 |     registry via its :py:meth:`~ProviderRegistry.request` method, it checks to
 47 |     see if it has a provider that matches the URL, and if so, requests the
 48 |     metadata from the provider instance.
 49 | 
 50 |     Exposes methods for parsing various types of text (including HTML), and
 51 |     either rendering oembed media inline or extracting embeddable links.
 52 | 
 53 |     :param cache: the cache simply needs to implement two methods, ``.get(key)`` and ``.set(key, value)``.
 54 | 
 55 |     .. py:method:: register(regex, provider)
 56 | 
 57 |         Register the provider with the following regex.
 58 | 
 59 |         Example:
 60 | 
 61 |         .. code-block:: python
 62 | 
 63 |             registry = ProviderRegistry()
 64 |             registry.register(
 65 |                 'http://\S*.youtu(\.be|be\.com)/watch\S*',
 66 |                 Provider('http://www.youtube.com/oembed'),
 67 |             )
 68 | 
 69 |         :param regex: a regex for matching URLs of a given type
 70 |         :param provider: a :py:class:`Provider` instance
 71 | 
 72 |     .. py:method:: request(url, **extra_params)
 73 | 
 74 |         Retrieve information about the given url if it matches a regex in the
 75 |         instance's registry.  If no provider matches the URL, a
 76 |         ``ProviderException`` is thrown, otherwise the URL and parameters are
 77 |         dispatched to the matching provider's :py:meth:`Provider.request`
 78 |         method.
 79 | 
 80 |         If a cache was specified, the resulting metadata will be cached.
 81 | 
 82 |         :param url: URL to retrieve metadata for
 83 |         :param extra_params: additional parameters to pass to the endpoint, for
 84 |             example a maxwidth or an API key.
 85 |         :rtype: a dictionary of JSON data
 86 | 
 87 |     .. py:method:: parse_text_full(text[, urlize_all=True[, handler=full_handler[, urlize_params=None[, **params]]]])
 88 | 
 89 |         Parse a block of text, converting *all* links by passing them to the
 90 |         given handler.  Links contained within a block of text (i.e. not on
 91 |         their own line) will be handled as well.
 92 | 
 93 |         Example input and output::
 94 | 
 95 |             IN: 'this is a pic http://example.com/some-pic/'
 96 |             OUT: 'this is a pic <a href="http://example.com/some-pic/"><img src="http://example.com/media/some-pic.jpg" /></a>'
 97 | 
 98 |         :param str text: a string to parse
 99 |         :param bool urlize_all: convert unmatched urls into links
100 |         :param handler: function to use to convert metadata back into a string representation
101 |         :param dict urlize_params: keyword arguments to be used to construct a link
102 |             when a provider is not found and urlize is enabled.
103 |         :param params: any additional parameters to use when requesting metadata, i.e.
104 |             a maxwidth or maxheight.
105 | 
106 |     .. py:method:: parse_text(text[, urlize_all=True[, handler=full_handler[, block_handler=inline_handler[, urlize_params=None[, **params]]]]])
107 | 
108 |         Very similar to :py:meth:`~ProviderRegistry.parse_text_full` except
109 |         URLs *on their own line* are rendered using the given ``handler``,
110 |         whereas URLs within blocks of text are passed to the ``block_handler``.
111 |         The default behavior renders full content for URLs on their own line
112 |         (e.g. a video player), whereas URLs within text are rendered simply as
113 |         links so as not to disrupt the flow of text.
114 | 
115 |         * URLs on their own line are converted into full representations
116 |         * URLs within blocks of text are converted into clickable links
117 | 
118 |         :param str text: a string to parse
119 |         :param bool urlize_all: convert unmatched urls into links
120 |         :param handler: function to use to convert links found on their own line
121 |         :param block_handler: function to use to convert links found within blocks of text
122 |         :param dict urlize_params: keyword arguments to be used to construct a link
123 |             when a provider is not found and urlize is enabled.
124 |         :param params: any additional parameters to use when requesting metadata, i.e.
125 |             a maxwidth or maxheight.
126 | 
127 |     .. py:method:: parse_html(html[, urlize_all=True[, handler=full_handler[, block_handler=inline_handler[, urlize_params=None[, **params]]]]])
128 | 
129 |         Parse HTML intelligently, rendering items on their own within block
130 |         elements as full content (e.g. a video player), whereas URLs within
131 |         text are passed to the ``block_handler`` which by default will render a
132 |         simple link. URLs that are already enclosed within a ``<a>`` tag are
133 |         **skipped over**.
134 | 
135 |         * URLs that are already within <a> tags are passed over
136 |         * URLs on their own in block tags are converted into full representations
137 |         * URLs interspersed with text are converted into clickable links
138 | 
139 |         .. note:: requires BeautifulSoup or beautifulsoup4
140 | 
141 |         :param str html: a string of HTML to parse
142 |         :param bool urlize_all: convert unmatched urls into links
143 |         :param handler: function to use to convert links found on their own within a block element
144 |         :param block_handler: function to use to convert links found within blocks of text
145 |         :param dict urlize_params: keyword arguments to be used to construct a link
146 |             when a provider is not found and urlize is enabled.
147 |         :param params: any additional parameters to use when requesting metadata, i.e.
148 |             a maxwidth or maxheight.
149 | 
150 |     .. py:method:: extract(text, **params)
151 | 
152 |         Extract all URLs from a block of text, and additionally get any
153 |         metadata for URLs we have providers for.
154 | 
155 |         :param str text: a string to parse
156 |         :param params: any additional parameters to use when requesting
157 |             metadata, i.e. a maxwidth or maxheight.
158 |         :rtype: returns a 2-tuple containing a list of all URLs and a dict
159 |             keyed by URL containing any metadata.  If a provider was not found
160 |             for a URL it is not listed in the dictionary.
161 | 
162 |     .. py:method:: extract_html(html, **params)
163 | 
164 |         Extract all URLs from an HTML string, and additionally get any metadata
165 |         for URLs we have providers for. :py:meth:`~ProviderRegistry.extract`
166 |         but for HTML.
167 | 
168 |         .. note:: URLs within <a> tags will not be included.
169 | 
170 |         :param str html: a string to parse
171 |         :param params: any additional parameters to use when requesting
172 |             metadata, i.e. a maxwidth or maxheight.
173 |         :rtype: returns a 2-tuple containing a list of all URLs and a dict
174 |             keyed by URL containing any metadata.  If a provider was not found
175 |             for a URL it is not listed in the dictionary.
176 | 
177 | 
178 | .. py:function:: bootstrap_basic([cache=None[, registry=None]])
179 | 
180 |     Create a :py:class:`ProviderRegistry` and register some basic providers,
181 |     including youtube, flickr, vimeo.
182 | 
183 |     :param cache: an object that implements simple ``get`` and ``set``
184 |     :param registry: a ``ProviderRegistry`` instance, which will be updated with the list of supported providers. If not specified, an empty ``ProviderRegistry`` will be used.
185 |     :rtype: a ``ProviderRegistry`` with a handful of providers registered
186 | 
187 | 
188 | .. py:function:: bootstrap_oembed([cache=None[, registry=None[, refresh=False[, **kwargs]]])
189 | 
190 |     Create a :py:class:`ProviderRegistry` and register as many providers as
191 |     are described in the `oembed.com <https://oembed.com>`_ providers list.
192 | 
193 |     .. note::
194 |         This function makes a request over the internet whenever it is called.
195 | 
196 |     :param cache: an object that implements simple ``get`` and ``set``
197 |     :param registry: a ``ProviderRegistry`` instance, which will be updated with the list of supported providers. If not specified, an empty ``ProviderRegistry`` will be used.
198 |     :param bool refresh: force refreshing the provider data rather than attempting to load it from cache (if cache is used).
199 |     :param kwargs: any default keyword arguments to use with providers
200 |     :rtype: a ProviderRegistry with support for noembed
201 | 
202 | 
203 | .. py:function:: bootstrap_embedly([cache=None[, registry=None[, refresh=False[, **kwargs]]])
204 | 
205 |     Create a :py:class:`ProviderRegistry` and register as many providers as
206 |     are supported by `embed.ly <http://embed.ly>`_.  Valid services are
207 |     fetched from http://api.embed.ly/1/services/python and parsed then registered.
208 | 
209 |     .. note::
210 |         This function makes a request over the internet whenever it is called.
211 | 
212 |     :param cache: an object that implements simple ``get`` and ``set``
213 |     :param registry: a ``ProviderRegistry`` instance, which will be updated with the list of supported providers. If not specified, an empty ``ProviderRegistry`` will be used.
214 |     :param bool refresh: force refreshing the provider data rather than attempting to load it from cache (if cache is used).
215 |     :param kwargs: any default keyword arguments to use with providers, useful for
216 |         specifying your API key
217 |     :rtype: a ProviderRegistry with support for embed.ly
218 | 
219 |     .. code-block:: python
220 | 
221 |         # if you have an API key, you can specify that here
222 |         pr = bootstrap_embedly(key='my-embedly-key')
223 |         pr.request('http://www.youtube.com/watch?v=54XHDUOHuzU')
224 | 
225 | 
226 | .. py:function:: bootstrap_noembed([cache=None[, registry=None[, refresh=False[, **kwargs]]])
227 | 
228 |     Create a :py:class:`ProviderRegistry` and register as many providers as
229 |     are supported by `noembed.com <http://noembed.com>`_.  Valid services are
230 |     fetched from http://noembed.com/providers and parsed then registered.
231 | 
232 |     .. note::
233 |         This function makes a request over the internet whenever it is called.
234 | 
235 |     :param cache: an object that implements simple ``get`` and ``set``
236 |     :param registry: a ``ProviderRegistry`` instance, which will be updated with the list of supported providers. If not specified, an empty ``ProviderRegistry`` will be used.
237 |     :param bool refresh: force refreshing the provider data rather than attempting to load it from cache (if cache is used).
238 |     :param kwargs: any default keyword arguments to use with providers, useful for
239 |         passing the ``nowrap`` option to noembed.
240 |     :rtype: a ProviderRegistry with support for noembed
241 | 
242 |     .. code-block:: python
243 | 
244 |         # if you have an API key, you can specify that here
245 |         pr = bootstrap_noembed(nowrap=1)
246 |         pr.request('http://www.youtube.com/watch?v=54XHDUOHuzU')
247 | 
248 | 
249 | Cache
250 | -----
251 | 
252 | .. py:module:: micawber.cache
253 | 
254 | .. py:class:: Cache()
255 | 
256 |     A reference implementation for the cache interface used by the :py:class:`ProviderRegistry`.
257 | 
258 |     .. code-block:: python
259 | 
260 |         from micawber import Cache, bootstrap_oembed
261 |         cache = Cache()  # Simple in-memory cache.
262 | 
263 |         # Now our oembed provider will cache the responses for each URL we
264 |         # request, which can provide a significant speedup.
265 |         pr = bootstrap_oembed(cache=cache)
266 | 
267 |     .. py:method:: get(key)
268 | 
269 |         Retrieve the key from the cache or ``None`` if not present
270 | 
271 |     .. py:method:: set(key, value)
272 | 
273 |         Set the cache key ``key`` to the given ``value``.
274 | 
275 | .. py:class:: PickleCache([filename='cache.db'])
276 | 
277 |     A cache that uses pickle to store data.
278 | 
279 |     .. note::
280 |         To use this cache class be sure to call :py:meth:`~PickleCache.load` when
281 |         initializing your cache and :py:meth:`~PickleCache.save` before your app
282 |         terminates to persist cached data.
283 | 
284 |     .. py:method:: load()
285 | 
286 |         Load the pickled data into memory
287 | 
288 |     .. py:method:: save()
289 | 
290 |         Store the internal cache to an external file
291 | 
292 | .. py:class:: RedisCache([namespace='micawber'[, timeout=None[, **conn]]])
293 | 
294 |     A cache that uses Redis to store data
295 | 
296 |     .. note:: requires the redis-py library, ``pip install redis``
297 | 
298 |     :param namespace: prefix for cache keys
299 |     :param int timeout: expiration timeout in seconds (optional)
300 |     :param conn: keyword arguments to pass when initializing redis connection
301 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # micawber documentation build configuration file, created by
  4 | # sphinx-quickstart on Tue Apr 17 13:43:41 2012.
  5 | #
  6 | # This file is execfile()d with the current directory set to its containing dir.
  7 | #
  8 | # Note that not all possible configuration values are present in this
  9 | # autogenerated file.
 10 | #
 11 | # All configuration values have a default; values that are commented out
 12 | # serve to show the default.
 13 | 
 14 | import sys, os
 15 | 
 16 | # If extensions (or modules to document with autodoc) are in another directory,
 17 | # add these directories to sys.path here. If the directory is relative to the
 18 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 19 | #sys.path.insert(0, os.path.abspath('.'))
 20 | 
 21 | # -- General configuration -----------------------------------------------------
 22 | 
 23 | # If your documentation needs a minimal Sphinx version, state it here.
 24 | #needs_sphinx = '1.0'
 25 | 
 26 | # Add any Sphinx extension module names here, as strings. They can be extensions
 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 28 | extensions = []
 29 | 
 30 | # Add any paths that contain templates here, relative to this directory.
 31 | templates_path = ['_templates']
 32 | 
 33 | # The suffix of source filenames.
 34 | source_suffix = '.rst'
 35 | 
 36 | # The encoding of source files.
 37 | #source_encoding = 'utf-8-sig'
 38 | 
 39 | # The master toctree document.
 40 | master_doc = 'index'
 41 | 
 42 | # General information about the project.
 43 | project = u'micawber'
 44 | copyright = u'2013, charles leifer'
 45 | 
 46 | # The version info for the project you're documenting, acts as replacement for
 47 | # |version| and |release|, also used in various other places throughout the
 48 | # built documents.
 49 | #
 50 | # The short X.Y version.
 51 | version = '0.3.4'
 52 | # The full version, including alpha/beta/rc tags.
 53 | release = '0.3.4'
 54 | 
 55 | # The language for content autogenerated by Sphinx. Refer to documentation
 56 | # for a list of supported languages.
 57 | #language = None
 58 | 
 59 | # There are two options for replacing |today|: either, you set today to some
 60 | # non-false value, then it is used:
 61 | #today = ''
 62 | # Else, today_fmt is used as the format for a strftime call.
 63 | #today_fmt = '%B %d, %Y'
 64 | 
 65 | # List of patterns, relative to source directory, that match files and
 66 | # directories to ignore when looking for source files.
 67 | exclude_patterns = ['_build']
 68 | 
 69 | # The reST default role (used for this markup: `text`) to use for all documents.
 70 | #default_role = None
 71 | 
 72 | # If true, '()' will be appended to :func: etc. cross-reference text.
 73 | #add_function_parentheses = True
 74 | 
 75 | # If true, the current module name will be prepended to all description
 76 | # unit titles (such as .. function::).
 77 | #add_module_names = True
 78 | 
 79 | # If true, sectionauthor and moduleauthor directives will be shown in the
 80 | # output. They are ignored by default.
 81 | #show_authors = False
 82 | 
 83 | # The name of the Pygments (syntax highlighting) style to use.
 84 | pygments_style = 'sphinx'
 85 | 
 86 | # A list of ignored prefixes for module index sorting.
 87 | #modindex_common_prefix = []
 88 | 
 89 | 
 90 | # -- Options for HTML output ---------------------------------------------------
 91 | 
 92 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 93 | # a list of builtin themes.
 94 | html_theme = 'default'
 95 | 
 96 | # Theme options are theme-specific and customize the look and feel of a theme
 97 | # further.  For a list of options available for each theme, see the
 98 | # documentation.
 99 | #html_theme_options = {}
100 | 
101 | # Add any paths that contain custom themes here, relative to this directory.
102 | #html_theme_path = []
103 | 
104 | # The name for this set of Sphinx documents.  If None, it defaults to
105 | # "<project> v<release> documentation".
106 | #html_title = None
107 | 
108 | # A shorter title for the navigation bar.  Default is the same as html_title.
109 | #html_short_title = None
110 | 
111 | # The name of an image file (relative to this directory) to place at the top
112 | # of the sidebar.
113 | #html_logo = None
114 | 
115 | # The name of an image file (within the static path) to use as favicon of the
116 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
117 | # pixels large.
118 | #html_favicon = None
119 | 
120 | # Add any paths that contain custom static files (such as style sheets) here,
121 | # relative to this directory. They are copied after the builtin static files,
122 | # so a file named "default.css" will overwrite the builtin "default.css".
123 | html_static_path = ['_static']
124 | 
125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
126 | # using the given strftime format.
127 | #html_last_updated_fmt = '%b %d, %Y'
128 | 
129 | # If true, SmartyPants will be used to convert quotes and dashes to
130 | # typographically correct entities.
131 | #html_use_smartypants = True
132 | 
133 | # Custom sidebar templates, maps document names to template names.
134 | #html_sidebars = {}
135 | 
136 | # Additional templates that should be rendered to pages, maps page names to
137 | # template names.
138 | #html_additional_pages = {}
139 | 
140 | # If false, no module index is generated.
141 | #html_domain_indices = True
142 | 
143 | # If false, no index is generated.
144 | #html_use_index = True
145 | 
146 | # If true, the index is split into individual pages for each letter.
147 | #html_split_index = False
148 | 
149 | # If true, links to the reST sources are added to the pages.
150 | #html_show_sourcelink = True
151 | 
152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
153 | #html_show_sphinx = True
154 | 
155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
156 | #html_show_copyright = True
157 | 
158 | # If true, an OpenSearch description file will be output, and all pages will
159 | # contain a <link> tag referring to it.  The value of this option must be the
160 | # base URL from which the finished HTML is served.
161 | #html_use_opensearch = ''
162 | 
163 | # This is the file name suffix for HTML files (e.g. ".xhtml").
164 | #html_file_suffix = None
165 | 
166 | # Output file base name for HTML help builder.
167 | htmlhelp_basename = 'micawberdoc'
168 | 
169 | 
170 | # -- Options for LaTeX output --------------------------------------------------
171 | 
172 | latex_elements = {
173 | # The paper size ('letterpaper' or 'a4paper').
174 | #'papersize': 'letterpaper',
175 | 
176 | # The font size ('10pt', '11pt' or '12pt').
177 | #'pointsize': '10pt',
178 | 
179 | # Additional stuff for the LaTeX preamble.
180 | #'preamble': '',
181 | }
182 | 
183 | # Grouping the document tree into LaTeX files. List of tuples
184 | # (source start file, target name, title, author, documentclass [howto/manual]).
185 | latex_documents = [
186 |   ('index', 'micawber.tex', u'micawber Documentation',
187 |    u'charles leifer', 'manual'),
188 | ]
189 | 
190 | # The name of an image file (relative to this directory) to place at the top of
191 | # the title page.
192 | #latex_logo = None
193 | 
194 | # For "manual" documents, if this is true, then toplevel headings are parts,
195 | # not chapters.
196 | #latex_use_parts = False
197 | 
198 | # If true, show page references after internal links.
199 | #latex_show_pagerefs = False
200 | 
201 | # If true, show URL addresses after external links.
202 | #latex_show_urls = False
203 | 
204 | # Documents to append as an appendix to all manuals.
205 | #latex_appendices = []
206 | 
207 | # If false, no module index is generated.
208 | #latex_domain_indices = True
209 | 
210 | 
211 | # -- Options for manual page output --------------------------------------------
212 | 
213 | # One entry per manual page. List of tuples
214 | # (source start file, name, description, authors, manual section).
215 | man_pages = [
216 |     ('index', 'micawber', u'micawber Documentation',
217 |      [u'charles leifer'], 1)
218 | ]
219 | 
220 | # If true, show URL addresses after external links.
221 | #man_show_urls = False
222 | 
223 | 
224 | # -- Options for Texinfo output ------------------------------------------------
225 | 
226 | # Grouping the document tree into Texinfo files. List of tuples
227 | # (source start file, target name, title, author,
228 | #  dir menu entry, description, category)
229 | texinfo_documents = [
230 |   ('index', 'micawber', u'micawber Documentation',
231 |    u'charles leifer', 'micawber', 'One line description of project.',
232 |    'Miscellaneous'),
233 | ]
234 | 
235 | # Documents to append as an appendix to all manuals.
236 | #texinfo_appendices = []
237 | 
238 | # If false, no module index is generated.
239 | #texinfo_domain_indices = True
240 | 
241 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
242 | #texinfo_show_urls = 'footnote'
243 | 


--------------------------------------------------------------------------------
/docs/django.rst:
--------------------------------------------------------------------------------
  1 | .. _django:
  2 | 
  3 | Django integration
  4 | ==================
  5 | 
  6 | First be sure you have added ``micawber.contrib.mcdjango`` to ``INSTALLED_APPS``
  7 | so that we can use the template filters it defines.
  8 | 
  9 | .. code-block:: python
 10 | 
 11 |     # settings.py
 12 | 
 13 |     INSTALLED_APPS = [
 14 |         # ...
 15 |         'micawber.contrib.mcdjango',
 16 |     ]
 17 | 
 18 | micawber provides 4 template filters for converting URLs contained within
 19 | text or HTML to rich content:
 20 | 
 21 | * :py:func:`~micawber.contrib.mcdjango.oembed` for plain text
 22 | * :py:func:`~micawber.contrib.mcdjango.oembed_html` for html
 23 | * :py:func:`~micawber.contrib.mcdjango.extract_oembed` for extracting url data from plain text
 24 | * :py:func:`~micawber.contrib.mcdjango.extract_oembed_html` for extracting url data from html
 25 | 
 26 | These filters are registered in the ``micawber_tags`` library, which can be
 27 | invoked in your templates:
 28 | 
 29 | .. code-block:: html
 30 | 
 31 |     {% load micawber_tags %}
 32 | 
 33 |     <p>{{ object.body|oembed:"600x600" }}</p>
 34 | 
 35 | Each filter accepts one argument and one optional argument, due to django's template
 36 | filters being wack.
 37 | 
 38 | Piping a string through the ``oembed`` filter (or ``oembed_html``) will convert
 39 | URLs to things like youtube videos into video players.  A couple things to
 40 | understand about the parsers:
 41 | 
 42 | * the plaintext parser (``oembed``) will convert URLs *on their own line* into
 43 |   full images/video-players/etc.  URLs that are interspersed within text will
 44 |   simply be converted into clickable links so as not to disrupt the flow of text.
 45 | * the HTML parser (``oembed_html``) will convert URLs that *are not already links*
 46 |   into full images/video-players/etc. URLs within block elements along with other
 47 |   text will be converted into clickable links as this would likely disrupt the flow
 48 |   of text or produce invalid HTML.
 49 | 
 50 | .. note::
 51 |     You can control how things are rendered -- check out `the default templates <https://github.com/coleifer/micawber/tree/master/micawber/contrib/mcdjango/templates/micawber>`_
 52 |     for reference implementations.
 53 | 
 54 | 
 55 | Django filter API
 56 | -----------------
 57 | 
 58 | .. py:module:: micawber.contrib.mcdjango
 59 | 
 60 | The following filters are exposed via the :py:mod:`micawber.contrib.mcdjango` module:
 61 | 
 62 | .. py:function:: oembed(text[, width_height=None])
 63 | 
 64 |     Parse the given text, rendering URLs as rich media
 65 | 
 66 |     Usage within a django template:
 67 | 
 68 |     .. code-block:: python
 69 | 
 70 |         {{ blog_entry.body|oembed:"600x600" }}
 71 | 
 72 |     :param text: the text to be parsed **do not use HTML**
 73 |     :param width_height: string containing maximum for width and optionally height, of
 74 |         format "WIDTHxHEIGHT" or "WIDTH", e.g. "500x500" or "800"
 75 |     :rtype: parsed text with rich content embedded
 76 | 
 77 | .. py:function:: oembed_html(html[, width_height=None])
 78 | 
 79 |     Exactly the same as above except for usage *with html*
 80 | 
 81 |     Usage within a django template:
 82 | 
 83 |     .. code-block:: python
 84 | 
 85 |         {{ blog_entry.body|markdown|oembed_html:"600x600" }}
 86 | 
 87 | .. py:function:: extract_oembed(text[, width_height=None])
 88 | 
 89 |     Parse the given text, returning a list of 2-tuples containing url and metadata
 90 |     about the url.
 91 | 
 92 |     Usage within a django template:
 93 | 
 94 |     .. code-block:: python
 95 | 
 96 |         {% for url, metadata in blog_entry.body|extract_oembed:"600x600" %}
 97 |           <img src="{{ metadata.thumbnail_url }}" />
 98 |         {% endfor %}
 99 | 
100 |     :param text: the text to be parsed **do not use HTML**
101 |     :param width_height: string containing maximum for width and optionally height, of
102 |         format "WIDTHxHEIGHT" or "WIDTH", e.g. "500x500" or "800"
103 |     :rtype: 2-tuples containing the URL and a dictionary of metadata
104 | 
105 | .. py:function:: extract_oembed_html(html[, width_height=None])
106 | 
107 |     Exactly the same as above except for usage *with html*
108 | 
109 | 
110 | Extending the filters
111 | ---------------------
112 | 
113 | For simplicity, micawber provides a setting allowing you to create custom template
114 | filters.  An example use case would be to add a template filter that could embed
115 | rich content, but did not automatically "urlize" all links.
116 | 
117 | Extensions are configured in the ``settings`` module and take the form of a list of
118 | 2-tuples containing:
119 | 
120 | 1. the name for the custom filter
121 | 2. a dictionary of keyword arguments to pass in to the ``parse`` function
122 | 
123 | .. code-block:: python
124 | 
125 |     MICAWBER_TEMPLATE_EXTENSIONS = [
126 |         ('oembed_no_urlize', {'urlize_all': False}),
127 |     ]
128 | 
129 | Assume this is our template:
130 | 
131 | .. code-block:: html
132 | 
133 |     {% load micawber_tags %}
134 | 
135 |     DEFAULT:
136 |     {{ "http://foo.com/ and http://bar.com/"|oembed }}
137 | 
138 |     CUSTOM:
139 |     {{ "http://foo.com/ and http://bar.com/"|oembed_no_urlize }}
140 | 
141 | Rendering the above template will produce the following output:
142 | 
143 | .. code-block:: html
144 | 
145 |     DEFAULT:
146 |     <a href="http://foo.com/">http://foo.com/</a> and <a href="http://bar.com/">http://bar.com/</a>
147 | 
148 |     CUSTOM:
149 |     http://foo.com/ and http://bar.com/
150 | 
151 | Some examples of keyword arguments to override are:
152 | 
153 | * providers: a :py:class:`~micawber.providers.ProviderRegistry` instance
154 | * urlize_all (default ``True``): whether to convert *all* URLs to clickable links
155 | * html (default ``False``): whether to parse as plaintext or html
156 | * handler: function used to render metadata as markup
157 | * block_handler: function used to render inline links with rich metadata
158 | * text_fn: function to use when parsing text
159 | * html_fn: function to use when parsing html
160 | 
161 | The magic happens in :py:func:`micawber.contrib.mcdjango.extension` -- check
162 | out the `source code <https://github.com/coleifer/micawber/blob/master/micawber/contrib/mcdjango/__init__.py>`_ for more details.
163 | 
164 | .. note::
165 |     The ``MICAWBER_EXTENSIONS`` setting can also be a string path to
166 |     a module and an attribute containing a similar data structure.
167 | 
168 | 
169 | Additional settings
170 | -------------------
171 | 
172 | Providers
173 | ^^^^^^^^^
174 | 
175 | The most important setting to configure is the module / attribute
176 | path to the providers you wish to use.  The attribute can either
177 | be a ProviderRegistry instance or a callable.  The default is:
178 | 
179 | ``MICAWBER_PROVIDERS = 'micawber.contrib.mcdjango.providers.bootstrap_basic'``
180 | 
181 | You can use the bootstrap embedly function, but beware this may take a few
182 | seconds to load up:
183 | 
184 | ``MICAWBER_PROVIDERS = 'micawber.contrib.mcdjango.providers.bootstrap_embedly'``
185 | 
186 | If you want to use the embedly endpoints and have an API key, you can specify
187 | that in the settings:
188 | 
189 | ``MICAWBER_EMBEDLY_KEY = 'foo'``
190 | 
191 | You can also customize this with your own set of providers.  This must be either
192 | 
193 | * the module path to a :py:class:`~micawber.providers.ProviderRegistry` instance
194 | * the module path to a callable which returns a :py:class:`~micawber.providers.ProviderRegistry` instance
195 | 
196 | Here is a quick example showing a custom ``ProviderRegistry``:
197 | 
198 | .. code-block:: python
199 | 
200 |     # settings.py
201 |     MICAWBER_PROVIDERS = 'my_app.micawber_providers.oembed_providers'
202 | 
203 | .. code-block:: python
204 | 
205 |     # my_app/micawber_providers.py
206 |     from django.core.cache import cache
207 |     from micawber.providers import Provider, bootstrap_basic
208 | 
209 |     oembed_providers = boostrap_basic(cache)
210 | 
211 |     # add a custom provider
212 |     oembed_providers.register('http://example.com/\S*', Provider('http://example.com/oembed/'))
213 | 
214 | 
215 | Default settings for requests
216 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
217 | 
218 | Because of the limitations of django's template filters, we do not
219 | have the flexibility to pass in multiple arguments to the filters.
220 | Default arguments need to be specified in the settings:
221 | 
222 | .. code-block:: python
223 | 
224 |     MICAWBER_DEFAULT_SETTINGS = {
225 |         'key': 'your-embedly-api-key',
226 |         'maxwidth': 600,
227 |         'maxheight': 600,
228 |     }
229 | 
230 | 
231 | Trying it out in the python shell
232 | ---------------------------------
233 | 
234 | .. code-block:: python
235 | 
236 |     >>> from django.template import Template, Context
237 |     >>> t = Template('{% load micawber_tags %}{{ "http://www.youtube.com/watch?v=mQEWI1cn7HY"|oembed }}')
238 |     >>> t.render(Context())
239 |     u'<iframe width="480" height="270" src="http://www.youtube.com/embed/mQEWI1cn7HY?fs=1&feature=oembed" frameborder="0" allowfullscreen></iframe>'
240 | 


--------------------------------------------------------------------------------
/docs/examples.rst:
--------------------------------------------------------------------------------
 1 | .. _examples:
 2 | 
 3 | Examples
 4 | ========
 5 | 
 6 | micawber comes with a handful of examples showing usage with
 7 | 
 8 | * :ref:`django <django_example>`
 9 | * :ref:`flask <flask_example>`
10 | * :ref:`simple python script <python_example>`
11 | 
12 | .. _django_example:
13 | 
14 | Django example
15 | --------------
16 | 
17 | The django example is very simple -- it illustrates a single view that renders
18 | text inputted by the user by piping it through the :py:func:`~micawber.contrib.mcdjango.oembed`
19 | filter.  It also shows the output of the :py:func:`~micawber.contrib.mcdjango.extract_oembed`
20 | filter which returns a 2-tuple of URL -> metadata.  There is also an input where
21 | you can experiment with entering HTML.
22 | 
23 | To run the example::
24 | 
25 |     cd examples/django_ex/
26 |     ./manage.py runserver
27 | 
28 | Check out the `example source code <https://github.com/coleifer/micawber/tree/master/examples/django_ex>`_.
29 | 
30 | 
31 | .. _flask_example:
32 | 
33 | Flask example
34 | -------------
35 | 
36 | The flask example is almost identical in terms of functionality to the django example. It 
37 | shows a one-file app with a single view that renders
38 | text inputted by the user by piping it through the :py:func:`~micawber.contrib.mcflask.oembed`
39 | filter.  It also shows the output of the :py:func:`~micawber.contrib.mcflask.extract_oembed`
40 | filter which returns a 2-tuple of URL -> metadata.  There is also an input where
41 | you can experiment with entering HTML.
42 | 
43 | To run the example::
44 | 
45 |     cd examples/flask_ex/
46 |     python app.py
47 | 
48 | Check out the `example source code <https://github.com/coleifer/micawber/tree/master/examples/flask_ex>`_.
49 | 
50 | .. _python_example:
51 | 
52 | Python example
53 | --------------
54 | 
55 | The python example is a command-line app that shows the use of the :py:class:`micawber.providers.ProviderRegistry`
56 | and :py:class:`micawber.providers.bootstrap_embedly`.  It runs a loop asking the user to input
57 | URLs, outputting rich metadata when possible (view http://embed.ly for a full list of providers).
58 | 
59 | To run the example::
60 | 
61 |     cd examples/python_ex/
62 |     python example.py
63 | 
64 | Check out the `example source code <https://github.com/coleifer/micawber/tree/master/examples/python_ex/example.py>`_.
65 | 


--------------------------------------------------------------------------------
/docs/flask.rst:
--------------------------------------------------------------------------------
 1 | .. _flask:
 2 | 
 3 | Flask integration
 4 | =================
 5 | 
 6 | micawber exposes two Jinja template filters for use in your flask templates:
 7 | 
 8 | * :py:func:`~micawber.contrib.mcflask.oembed`
 9 | * :py:func:`~micawber.contrib.mcflask.extract_oembed`
10 | 
11 | You can add them to your jinja environment by using the helper function:
12 | 
13 | .. code-block:: python
14 | 
15 |     from flask import Flask
16 |     from micawber.providers import bootstrap_basic
17 |     from micawber.contrib.mcflask import add_oembed_filters
18 | 
19 |     app = Flask(__name__)
20 | 
21 |     oembed_providers = bootstrap_basic()
22 |     add_oembed_filters(app, oembed_providers)
23 | 
24 | Now you can use the filters in your templates:
25 | 
26 | .. code-block:: html
27 | 
28 |     {% block content %}
29 |       <p>{{ object.body|oembed(html=False, maxwidth=600, maxheight=600) }}</p>
30 |     {% endblock %}
31 | 
32 | Flask filter API
33 | ----------------
34 | 
35 | .. py:module:: micawber.contrib.mcflask
36 | 
37 | The following filters are exposed via the :py:mod:`micawber.contrib.mcflask` module:
38 | 
39 | .. py:function:: oembed(text, urlize_all=True, html=False, **params)
40 | 
41 |     Parse the given text, rendering URLs as rich media
42 | 
43 |     Usage within a Jinja2 template:
44 | 
45 |     .. code-block:: python
46 | 
47 |         {{ blog_entry.body|oembed(urlize_all=False, maxwidth=600) }}
48 | 
49 |     :param text: the text to be parsed, can be HTML
50 |     :param urlize_all: boolean indicating whether to convert bare links to clickable ones
51 |     :param html: boolean indicating whether text is plaintext or markup
52 |     :param params: any additional keyword arguments, e.g. maxwidth or an api key
53 |     :rtype: parsed text with rich content embedded
54 | 
55 | .. py:function:: extract_oembed(text, html=False, **params)
56 | 
57 |     Returns a 2-tuple containing
58 | 
59 |     * a list of all URLs found within the text (if HTML, all URLs that aren't already links)
60 |     * a dictionary of URL to metadata provided by the API endpoint
61 | 
62 |     .. note::
63 |         Not all URLs listed will have matching entries in the dictionary, since there
64 |         may not be a provider for them.
65 | 
66 |     :param text: the text to be parsed, can be HTML
67 |     :param html: boolean indicating whether text is plaintext or markup
68 |     :param params: any additional keyword arguments, e.g. maxwidth or an api key
69 |     :rtype: 2-tuple containing a list of *all* urls and a dictionary of url -> metadata
70 | 
71 | Adding filters to the Jinja Environment
72 | ---------------------------------------
73 | 
74 | To actually use these filters they must be made available to the application.  Use the
75 | following function to do this sometime after initializing your ``Flask`` app:
76 | 
77 | .. py:function:: add_oembed_filters(app, providers)
78 | 
79 |     Add the ``oembed`` and ``extract_oembed`` filters to the jinja environment
80 | 
81 |     :param app: a flask application
82 |     :param providers: a :py:class:`micawber.providers.ProviderRegistry` instance
83 |     :rtype: (no return value)
84 | 


--------------------------------------------------------------------------------
/docs/getting_started.rst:
--------------------------------------------------------------------------------
  1 | .. _getting_started:
  2 | 
  3 | Getting Started
  4 | ===============
  5 | 
  6 | If you want the dead simple get-me-up-and-running, try the following:
  7 | 
  8 | .. code-block:: python
  9 | 
 10 |     >>> import micawber
 11 |     >>> providers = micawber.bootstrap_basic() # may take a second
 12 |     >>> print providers.parse_text('this is a test:\nhttp://www.youtube.com/watch?v=54XHDUOHuzU')
 13 |     this is a test:
 14 |     <iframe width="640" height="360" src="http://www.youtube.com/embed/54XHDUOHuzU?fs=1&feature=oembed" frameborder="0" allowfullscreen></iframe>
 15 | 
 16 | Using django?  Add ``micawber.contrib.mcdjango`` to your ``INSTALLED_APP``, then
 17 | in your templates:
 18 | 
 19 | .. code-block:: html
 20 | 
 21 |     {% load micawber_tags %}
 22 |     {# show a video player for the youtube video #}
 23 |     {{ "http://www.youtube.com/watch?v=mQEWI1cn7HY"|oembed }}
 24 | 
 25 | Using flask?  Use the ``add_oembed_filters`` function to register two jinja
 26 | template filters, ``oembed`` and ``extract_oembed``:
 27 | 
 28 | .. code-block:: python
 29 | 
 30 |     from flask import Flask
 31 |     from micawber.providers import bootstrap_basic
 32 |     from micawber.contrib.mcflask import add_oembed_filters
 33 | 
 34 |     app = Flask(__name__)
 35 | 
 36 |     oembed_providers = bootstrap_basic()
 37 |     add_oembed_filters(app, oembed_providers)
 38 | 
 39 | .. code-block:: html
 40 | 
 41 |     {# show a video player for the youtube video #}
 42 |     {{ "http://www.youtube.com/watch?v=mQEWI1cn7HY"|oembed() }}
 43 | 
 44 | Overview
 45 | --------
 46 | 
 47 | micawber is rather simple.  It is built to use the `oembed <http://oembed.com/>`_ spec,
 48 | which is designed for converting URLs into rich, embeddable content.  Many popular sites
 49 | support this, including youtube and flickr.  There is also a 3rd-party service called
 50 | `embedly <http://embed.ly>`_ that can convert many types of links into rich content.
 51 | 
 52 | micawber was designed to make it easy to integrate with these APIs.  There are
 53 | two concepts to understand when using micawber:
 54 | 
 55 | * :py:class:`~micawber.providers.Provider` objects - which describe how to
 56 |   match a URL (based on a regex) to an OEmbed endpoint.
 57 | * :py:class:`~micawber.providers.ProviderRegistry` objects - which encapsulate
 58 |   a collection or providers and expose methods for parsing text and HTML to
 59 |   convert links into media objects.
 60 | 
 61 | 
 62 | Providers
 63 | ---------
 64 | 
 65 | Providers are used to convert URLs into rich metadata.  They have an endpoint
 66 | associated with them and can have any number of arbitrary URL parameters (such
 67 | as API keys) which are used when making API requests.
 68 | 
 69 | Example:
 70 | 
 71 | .. code-block:: python
 72 | 
 73 |     from micawber.providers import Provider
 74 | 
 75 |     youtube = Provider('http://www.youtube.com/oembed')
 76 |     youtube.request('http://www.youtube.com/watch?v=nda_OSWeyn8')
 77 | 
 78 | The above code returns a dictionary containing metadata about the requested
 79 | video, including the markup for an embeddable player::
 80 | 
 81 |     {'author_name': u'botmib',
 82 |      'author_url': u'http://www.youtube.com/user/botmib',
 83 |      'height': 344,
 84 |      'html': u'<iframe width="459" height="344" src="http://www.youtube.com/embed/nda_OSWeyn8?fs=1&feature=oembed" frameborder="0" allowfullscreen></iframe>',
 85 |      'provider_name': u'YouTube',
 86 |      'provider_url': u'http://www.youtube.com/',
 87 |      'thumbnail_height': 360,
 88 |      'thumbnail_url': u'http://i3.ytimg.com/vi/nda_OSWeyn8/hqdefault.jpg',
 89 |      'thumbnail_width': 480,
 90 |      'title': u'Leprechaun in Mobile, Alabama',
 91 |      'type': u'video',
 92 |      'url': 'http://www.youtube.com/watch?v=nda_OSWeyn8',
 93 |      'version': u'1.0',
 94 |      'width': 459}
 95 | 
 96 | More information can be found in the :py:class:`~micawber.providers.Provider` API docs.
 97 | 
 98 | ProviderRegistry
 99 | ----------------
100 | 
101 | The :py:class:`~micawber.providers.ProviderRegistry` is a way of organizing lists
102 | of providers.  URLs can be requested from the registry and if *any* provider matches
103 | it will be used, otherwise a ``ProviderException`` will be raised.
104 | 
105 | The ``ProviderRegistry`` also supports an optional simple caching mechanism.
106 | 
107 | Here is an excerpt from the code from the :py:func:`micawber.providers.bootstrap_basic` function,
108 | which is handy for grabbing a ``ProviderRegistry`` with a handful of basic providers
109 | pre-populated:
110 | 
111 | .. code-block:: python
112 | 
113 |     def bootstrap_basic(cache=None, registry=None, **params):
114 |         pr = registry or ProviderRegistry(cache)
115 |         pr.register('http://\S*?flickr.com/\S*', Provider('http://www.flickr.com/services/oembed/'))
116 |         pr.register('http://\S*.youtu(\.be|be\.com)/watch\S*', Provider('http://www.youtube.com/oembed'))
117 |         pr.register('http://www.hulu.com/watch/\S*', Provider('http://www.hulu.com/api/oembed.json'))
118 |         return pr
119 | 
120 | As you can see, the :py:meth:`~micawber.providers.ProviderRegistry.register` method takes
121 | two parameters, a regular expression for valid URLs and a ``Provider`` instance.
122 | 
123 | You can use helper functions to get a populated registry:
124 | 
125 | * :py:func:`~micawber.providers.bootstrap_basic`
126 | * :py:func:`~micawber.providers.bootstrap_oembed` - uses oembed.com's official providers list.
127 | * :py:func:`~micawber.providers.bootstrap_embedly`
128 | * :py:func:`~micawber.providers.bootstrap_noembed`
129 | 
130 | The ``bootstrap_oembed``, ``bootstrap_embedly``, and ``bootstrap_noembed``
131 | functions make a HTTP request to the API server asking for a list of supported
132 | providers, so you may experience some latency when using these helpers. For
133 | most WSGI applications this will not be an issue, but if you'd like to speed it
134 | up I suggest fetching the results, storing them in the db or a file, and then
135 | pulling from there.
136 | 
137 | More information can be found in the :py:class:`~micawber.providers.ProviderRegistry` API docs.
138 | 
139 | Parsing Links
140 | ^^^^^^^^^^^^^
141 | 
142 | Replace URLs with rich media:
143 | 
144 | * :py:meth:`~micawber.providers.ProviderRegistry.parse_text`, which converts
145 |   URLs on their own line into a rich media object. Links embedded within blocks
146 |   of text are converted into clickable links.
147 | * :py:meth:`~micawber.providers.ProviderRegistry.parse_html`, which converts
148 |   URLs within HTML into rich media objects or clickable links, depending on the
149 |   context in which the URL is found.
150 | 
151 | A quick example:
152 | 
153 | .. code-block:: python
154 | 
155 |     import micawber
156 | 
157 |     providers = micawber.bootstrap_basic()
158 | 
159 |     providers.parse_text('this is a test:\nhttp://www.youtube.com/watch?v=54XHDUOHuzU')
160 | 
161 | This will result in the following output::
162 | 
163 |     this is a test:
164 |     <iframe width="459" height="344" src="http://www.youtube.com/embed/54XHDUOHuzU?fs=1&feature=oembed" frameborder="0" allowfullscreen></iframe>
165 | 
166 | You can also parse HTML using the :py:meth:`~micawber.providers.ProviderRegistry.parse_html` method:
167 | 
168 | .. code-block:: python
169 | 
170 |     providers.parse_html('<p>http://www.youtube.com/watch?v=54XHDUOHuzU</p>')
171 | 
172 |     # yields the following output:
173 |     <p><iframe width="459" height="344" src="http://www.youtube.com/embed/54XHDUOHuzU?fs=1&amp;feature=oembed" frameborder="0" allowfullscreen="allowfullscreen"></iframe></p>
174 | 
175 | If you would rather extract metadata, there are two functions:
176 | 
177 | * :py:meth:`~micawber.providers.ProviderRegistry.extract`, which finds all URLs
178 |   within a block of text and returns a dictionary of metadata for each.
179 | * :py:meth:`~micawber.providers.ProviderRegistry.extract_html`, which finds
180 |   URLs within HTML and returns a dictionary of metadata for each.
181 | 
182 | The :ref:`API docs <api>` are extensive, so please refer there for a full list
183 | of parameters and functions.
184 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. micawber documentation master file, created by
 2 |    sphinx-quickstart on Tue Apr 17 13:43:41 2012.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | .. image:: http://media.charlesleifer.com/blog/photos/micawber-logo-0.png
 7 | 
 8 | A small library for extracting rich content from urls.
 9 | 
10 | https://github.com/coleifer/micawber
11 | 
12 | 
13 | what does it do?
14 | ----------------
15 | 
16 | micawber supplies a few methods for retrieving rich metadata about a variety of
17 | links, such as links to youtube videos.  micawber also provides functions for
18 | parsing blocks of text and html and replacing links to videos with rich embedded
19 | content.
20 | 
21 | 
22 | examples
23 | --------
24 | 
25 | here is a quick example:
26 | 
27 | .. code-block:: python
28 | 
29 |     import micawber
30 | 
31 |     # load up rules for some default providers, such as youtube and flickr
32 |     providers = micawber.bootstrap_basic()
33 | 
34 |     providers.request('http://www.youtube.com/watch?v=54XHDUOHuzU')
35 | 
36 |     # returns the following dictionary:
37 |     {
38 |         'author_name': 'pascalbrax',
39 |         'author_url': u'http://www.youtube.com/user/pascalbrax'
40 |         'height': 344,
41 |         'html': u'<iframe width="459" height="344" src="http://www.youtube.com/embed/54XHDUOHuzU?fs=1&feature=oembed" frameborder="0" allowfullscreen></iframe>',
42 |         'provider_name': 'YouTube',
43 |         'provider_url': 'http://www.youtube.com/',
44 |         'title': 'Future Crew - Second Reality demo - HD',
45 |         'type': u'video',
46 |         'thumbnail_height': 360,
47 |         'thumbnail_url': u'http://i2.ytimg.com/vi/54XHDUOHuzU/hqdefault.jpg',
48 |         'thumbnail_width': 480,
49 |         'url': 'http://www.youtube.com/watch?v=54XHDUOHuzU',
50 |         'width': 459,
51 |         'version': '1.0',
52 |     }
53 | 
54 |     providers.parse_text('this is a test:\nhttp://www.youtube.com/watch?v=54XHDUOHuzU')
55 | 
56 |     # returns the following string:
57 |     this is a test:
58 |     <iframe width="459" height="344" src="http://www.youtube.com/embed/54XHDUOHuzU?fs=1&feature=oembed" frameborder="0" allowfullscreen></iframe>
59 | 
60 |     providers.parse_html('<p>http://www.youtube.com/watch?v=54XHDUOHuzU</p>')
61 | 
62 |     # returns the following html:
63 |     <p><iframe width="459" height="344" src="http://www.youtube.com/embed/54XHDUOHuzU?fs=1&amp;feature=oembed" frameborder="0" allowfullscreen="allowfullscreen"></iframe></p>
64 | 
65 | check out the :ref:`getting started <getting_started>` for more examples
66 | 
67 | 
68 | integration with web frameworks
69 | -------------------------------
70 | 
71 | * :ref:`flask <flask>`
72 | * :ref:`django <django>`
73 | 
74 | Contents:
75 | 
76 | .. toctree::
77 |    :maxdepth: 2
78 |    :glob:
79 | 
80 |    installation
81 |    getting_started
82 |    examples
83 |    flask
84 |    django
85 |    api
86 | 
87 | 
88 | Indices and tables
89 | ==================
90 | 
91 | * :ref:`genindex`
92 | * :ref:`modindex`
93 | * :ref:`search`
94 | 
95 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
 1 | .. _installation:
 2 | 
 3 | Installation
 4 | ============
 5 | 
 6 | First, you need to install micawber
 7 | 
 8 | There are a couple of ways:
 9 | 
10 | Installing with pip
11 | ^^^^^^^^^^^^^^^^^^^
12 | 
13 | ::
14 | 
15 |     pip install micawber
16 |     
17 |     or
18 |     
19 |     pip install -e git+https://github.com/coleifer/micawber.git#egg=micawber
20 | 
21 | 
22 | Installing via git
23 | ^^^^^^^^^^^^^^^^^^
24 | 
25 | ::
26 | 
27 |     git clone https://github.com/coleifer/micawber.git
28 |     cd micawber
29 |     python setup.py test
30 |     sudo python setup.py install
31 | 
32 | 
33 | Adding to your Django Project
34 | --------------------------------
35 | 
36 | After installing, adding django-utils to your projects is a snap.  Simply
37 | add it to your projects' INSTALLED_APPs and run 'syncdb'::
38 |     
39 |     # settings.py
40 |     INSTALLED_APPS = [
41 |         ...
42 |         'micawber.contrib.mcdjango'
43 |     ]
44 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  linkcheck  to check all external links for integrity
 37 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 38 | 	goto end
 39 | )
 40 | 
 41 | if "%1" == "clean" (
 42 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 43 | 	del /q /s %BUILDDIR%\*
 44 | 	goto end
 45 | )
 46 | 
 47 | if "%1" == "html" (
 48 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 49 | 	if errorlevel 1 exit /b 1
 50 | 	echo.
 51 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 52 | 	goto end
 53 | )
 54 | 
 55 | if "%1" == "dirhtml" (
 56 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 57 | 	if errorlevel 1 exit /b 1
 58 | 	echo.
 59 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 60 | 	goto end
 61 | )
 62 | 
 63 | if "%1" == "singlehtml" (
 64 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "pickle" (
 72 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished; now you can process the pickle files.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "json" (
 80 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished; now you can process the JSON files.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "htmlhelp" (
 88 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
 92 | .hhp project file in %BUILDDIR%/htmlhelp.
 93 | 	goto end
 94 | )
 95 | 
 96 | if "%1" == "qthelp" (
 97 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 98 | 	if errorlevel 1 exit /b 1
 99 | 	echo.
100 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
101 | .qhcp project file in %BUILDDIR%/qthelp, like this:
102 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\micawber.qhcp
103 | 	echo.To view the help file:
104 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\micawber.ghc
105 | 	goto end
106 | )
107 | 
108 | if "%1" == "devhelp" (
109 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
110 | 	if errorlevel 1 exit /b 1
111 | 	echo.
112 | 	echo.Build finished.
113 | 	goto end
114 | )
115 | 
116 | if "%1" == "epub" (
117 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
118 | 	if errorlevel 1 exit /b 1
119 | 	echo.
120 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "latex" (
125 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "text" (
133 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "man" (
141 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "texinfo" (
149 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
150 | 	if errorlevel 1 exit /b 1
151 | 	echo.
152 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
153 | 	goto end
154 | )
155 | 
156 | if "%1" == "gettext" (
157 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
158 | 	if errorlevel 1 exit /b 1
159 | 	echo.
160 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
161 | 	goto end
162 | )
163 | 
164 | if "%1" == "changes" (
165 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
166 | 	if errorlevel 1 exit /b 1
167 | 	echo.
168 | 	echo.The overview file is in %BUILDDIR%/changes.
169 | 	goto end
170 | )
171 | 
172 | if "%1" == "linkcheck" (
173 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
174 | 	if errorlevel 1 exit /b 1
175 | 	echo.
176 | 	echo.Link check complete; look for any errors in the above output ^
177 | or in %BUILDDIR%/linkcheck/output.txt.
178 | 	goto end
179 | )
180 | 
181 | if "%1" == "doctest" (
182 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
183 | 	if errorlevel 1 exit /b 1
184 | 	echo.
185 | 	echo.Testing of doctests in the sources finished, look at the ^
186 | results in %BUILDDIR%/doctest/output.txt.
187 | 	goto end
188 | )
189 | 
190 | :end
191 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/examples/__init__.py


--------------------------------------------------------------------------------
/examples/django_ex/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/examples/django_ex/__init__.py


--------------------------------------------------------------------------------
/examples/django_ex/manage.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from django.core.management import execute_manager
 3 | import imp
 4 | try:
 5 |     imp.find_module('settings') # Assumed to be in the same directory.
 6 | except ImportError:
 7 |     import sys
 8 |     sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n" % __file__)
 9 |     sys.exit(1)
10 | 
11 | import settings
12 | 
13 | if __name__ == "__main__":
14 |     execute_manager(settings)
15 | 


--------------------------------------------------------------------------------
/examples/django_ex/settings.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | #### MICAWBER SETTINGS
 4 | 
 5 | # add a template filter called "oembed_no_urlize" that will not automatically
 6 | # convert URLs to clickable links in the event a provider is not found for
 7 | # the given url
 8 | MICAWBER_TEMPLATE_EXTENSIONS = [
 9 |     ('oembed_no_urlize', {'urlize_all': False}),
10 | ]
11 | 
12 | # by default, micawber will use the "bootstrap_basic" providers, but should you
13 | # wish to use embedly you can try out the second example.  You can also provide
14 | # your own ProviderRegistry with a path to a module and either a callable or
15 | # ProviderRegistry instance
16 | MICAWBER_PROVIDERS = 'micawber.contrib.mcdjango.providers.bootstrap_basic'
17 | #MICAWBER_PROVIDERS = 'micawber.contrib.mcdjango.providers.bootstrap_embedly'
18 | 
19 | # if you are using embed.ly you can specify an API key that will be used with
20 | # the bootstrap_embedly provider setting
21 | # MICAWBER_EMBEDLY_KEY = 'foofoo'
22 | 
23 | # since template filters are limited to a single optional parameter, you can
24 | # specify defaults, such as a maxwidth you prefer to use or an api key
25 | #MICAWBER_DEFAULT_SETTINGS = {
26 | #    'key': 'your-embedly-api-key',
27 | #    'maxwidth': 600,
28 | #    'maxheight': 600,
29 | #}
30 | 
31 | #### END MICAWBER SETTINGS
32 | 
33 | CURRENT_DIR = os.path.dirname(__file__)
34 | 
35 | DEBUG = True
36 | TEMPLATE_DEBUG = DEBUG
37 | 
38 | DATABASES = {
39 |     'default': {
40 |         'ENGINE': 'django.db.backends.sqlite3',
41 |         'NAME': 'django_ex.db',
42 |     }
43 | }
44 | 
45 | SITE_ID = 1
46 | 
47 | SECRET_KEY = 'fapfapfap'
48 | 
49 | STATIC_URL = '/static/'
50 | STATICFILES_DIRS = (
51 |     os.path.join(CURRENT_DIR, 'static'),
52 | )
53 | 
54 | STATICFILES_FINDERS = (
55 |     'django.contrib.staticfiles.finders.FileSystemFinder',
56 |     'django.contrib.staticfiles.finders.AppDirectoriesFinder',
57 | )
58 | 
59 | 
60 | TEMPLATE_LOADERS = (
61 |     'django.template.loaders.filesystem.Loader',
62 |     'django.template.loaders.app_directories.Loader',
63 | )
64 | 
65 | MIDDLEWARE_CLASSES = (
66 |     'django.middleware.common.CommonMiddleware',
67 |     'django.contrib.sessions.middleware.SessionMiddleware',
68 |     'django.middleware.csrf.CsrfViewMiddleware',
69 |     'django.contrib.auth.middleware.AuthenticationMiddleware',
70 |     'django.contrib.messages.middleware.MessageMiddleware',
71 | )
72 | 
73 | ROOT_URLCONF = 'django_ex.urls'
74 | 
75 | TEMPLATE_DIRS = (
76 |     os.path.join(CURRENT_DIR, 'templates'),
77 | )
78 | 
79 | INSTALLED_APPS = (
80 |     'django.contrib.auth',
81 |     'django.contrib.contenttypes',
82 |     'django.contrib.sessions',
83 |     'django.contrib.sites',
84 |     'django.contrib.staticfiles',
85 |     'micawber.contrib.mcdjango',
86 | )
87 | 


--------------------------------------------------------------------------------
/examples/django_ex/static/style.css:
--------------------------------------------------------------------------------
 1 | body            { font-family: sans-serif; background: #eee; }
 2 | a, h1, h2       { color: #377BA8; }
 3 | h1, h2          { font-family: 'Georgia', serif; margin: 0; }
 4 | h1              { border-bottom: 2px solid #eee; }
 5 | h2              { font-size: 1.2em; }
 6 | 
 7 | .page           { margin: 2em auto; width: 35em; border: 5px solid #ccc;
 8 |                   padding: 0.8em; background: white; }
 9 | .entries        { list-style: none; margin: 0; padding: 0; }
10 | .entries li     { margin: 0.8em 1.2em; }
11 | .entries li h2  { margin-left: -1em; }
12 | .add-entry      { font-size: 0.9em; border-bottom: 1px solid #ccc; }
13 | .add-entry dl   { font-weight: bold; }
14 | .metanav        { text-align: right; font-size: 0.8em; padding: 0.3em;
15 |                   margin-bottom: 1em; background: #fafafa; }
16 | .flash          { background: #CEE5F5; padding: 0.5em;
17 |                   border: 1px solid #AACBE2; }
18 | .error          { background: #F0D6D6; padding: 0.5em; }
19 | 


--------------------------------------------------------------------------------
/examples/django_ex/templates/example.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <title>Example</title>
 5 |     <link rel="stylesheet" type="text/css" href="/static/style.css">
 6 |   </head>
 7 |   <body>
 8 |     <div class="page">
 9 |       <h1>Micawber Example</h1>
10 | 
11 |       <h2>Text renderer</h2>
12 |       <form method="get">
13 |         <p>
14 |           <label>Enter some text</label>
15 |         </p>
16 |         <p>
17 |           <textarea cols=50 rows=5 name="text">{{ text }}</textarea>
18 |           <small style="display:block;">Try entering some urls to youtube videos</small>
19 |         </p>
20 |         <p>
21 |           <button type="submit">Render</button>
22 |         </p>
23 |       </form>
24 |       
25 |       {% load micawber_tags %}
26 | 
27 |       {% if text %}
28 |       <h3>Rendered</h3>
29 |       <div>
30 |         {{ text|oembed }}
31 |       </div>
32 | 
33 |       <h3>Extracted data</h3>
34 |       <textarea cols=50 rows=5>
35 |         {{ text|extract_oembed }}
36 |       </textarea>
37 |       {% endif %}
38 | 
39 |       <h2>HTML renderer</h2>
40 |       <form method="get">
41 |         <p>
42 |           <label>Enter some html</label>
43 |         </p>
44 |         <p>
45 |           <textarea cols=50 rows=5 name="text">{{ html|safe }}</textarea>
46 |         </p>
47 |         <p>
48 |           <button type="submit">Render</button>
49 |         </p>
50 |       </form>
51 |       
52 |       {% load micawber_tags %}
53 | 
54 |       {% if html %}
55 |       <h3>Rendered</h3>
56 |       <div>
57 |         {{ html|oembed_html }}
58 |       </div>
59 |       {% endif %}
60 |     </div>
61 |   </body>
62 | </html>
63 | 


--------------------------------------------------------------------------------
/examples/django_ex/urls.py:
--------------------------------------------------------------------------------
1 | from django.conf.urls.defaults import patterns, include, url
2 | 
3 | urlpatterns = patterns('',
4 |     url(r'^$', 'django_ex.views.example_view', name='example_view'),
5 | )
6 | 


--------------------------------------------------------------------------------
/examples/django_ex/views.py:
--------------------------------------------------------------------------------
 1 | from django.shortcuts import render_to_response
 2 | 
 3 | def example_view(request):
 4 |     text = request.GET.get('text', 'http://www.youtube.com/watch?v=nda_OSWeyn8')
 5 |     html = request.GET.get('html', """
 6 | <p>This is a test</p>
 7 | <p>http://www.youtube.com/watch?v=nda_OSWeyn8</p>
 8 | <p>This will get rendered as a link: http://www.youtube.com/watch?v=nda_OSWeyn8</p>
 9 | <p>This will not be modified: <a href="http://www.google.com/">http://www.youtube.com/watch?v=nda_OSWeyn8</a></p>
10 |     """)
11 |     return render_to_response('example.html', dict(
12 |         text=text,
13 |         html=html,
14 |     ))
15 | 


--------------------------------------------------------------------------------
/examples/flask_ex/app.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, render_template, request
 2 | from micawber.providers import bootstrap_basic
 3 | from micawber.contrib.mcflask import add_oembed_filters
 4 | 
 5 | app = Flask(__name__)
 6 | app.config['DEBUG'] = True
 7 | 
 8 | oembed_providers = bootstrap_basic()
 9 | add_oembed_filters(app, oembed_providers)
10 | 
11 | @app.route('/')
12 | def example_view():
13 |     text = request.args.get('text', 'http://www.youtube.com/watch?v=nda_OSWeyn8')
14 |     html = request.args.get('html', """
15 | <p>This is a test</p>
16 | <p>http://www.youtube.com/watch?v=nda_OSWeyn8</p>
17 | <p>This will get rendered as a link: http://www.youtube.com/watch?v=nda_OSWeyn8</p>
18 | <p>This will not be modified: <a href="http://www.google.com/">http://www.youtube.com/watch?v=nda_OSWeyn8</a></p>
19 |     """)
20 |     return render_template('example.html', text=text, html=html)
21 | 
22 | if __name__ == '__main__':
23 |     app.run()
24 | 


--------------------------------------------------------------------------------
/examples/flask_ex/static/style.css:
--------------------------------------------------------------------------------
 1 | body            { font-family: sans-serif; background: #eee; }
 2 | a, h1, h2       { color: #377BA8; }
 3 | h1, h2          { font-family: 'Georgia', serif; margin: 0; }
 4 | h1              { border-bottom: 2px solid #eee; }
 5 | h2              { font-size: 1.2em; }
 6 | 
 7 | .page           { margin: 2em auto; width: 35em; border: 5px solid #ccc;
 8 |                   padding: 0.8em; background: white; }
 9 | .entries        { list-style: none; margin: 0; padding: 0; }
10 | .entries li     { margin: 0.8em 1.2em; }
11 | .entries li h2  { margin-left: -1em; }
12 | .add-entry      { font-size: 0.9em; border-bottom: 1px solid #ccc; }
13 | .add-entry dl   { font-weight: bold; }
14 | .metanav        { text-align: right; font-size: 0.8em; padding: 0.3em;
15 |                   margin-bottom: 1em; background: #fafafa; }
16 | .flash          { background: #CEE5F5; padding: 0.5em;
17 |                   border: 1px solid #AACBE2; }
18 | .error          { background: #F0D6D6; padding: 0.5em; }
19 | 


--------------------------------------------------------------------------------
/examples/flask_ex/templates/example.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html>
 3 |   <head>
 4 |     <title>Example</title>
 5 |     <link rel="stylesheet" type="text/css" href="/static/style.css">
 6 |   </head>
 7 |   <body>
 8 |     <div class="page">
 9 |       <h1>Micawber Example</h1>
10 | 
11 |       <h2>Text renderer</h2>
12 |       <form method="get">
13 |         <p>
14 |           <label>Enter some text</label>
15 |         </p>
16 |         <p>
17 |           <textarea cols=50 rows=5 name="text">{{ text }}</textarea>
18 |           <small style="display:block;">Try entering some urls to youtube videos</small>
19 |         </p>
20 |         <p>
21 |           <button type="submit">Render</button>
22 |         </p>
23 |       </form>
24 | 
25 |       {% if text %}
26 |       <h3>Rendered</h3>
27 |       <div>
28 |         {{ text|oembed() }}
29 |       </div>
30 | 
31 |       <h3>Extracted data</h3>
32 |       <textarea cols=50 rows=5>
33 |         {{ text|extract_oembed() }}
34 |       </textarea>
35 |       {% endif %}
36 | 
37 |       <h2>HTML renderer</h2>
38 |       <form method="get">
39 |         <p>
40 |           <label>Enter some html</label>
41 |         </p>
42 |         <p>
43 |           <textarea cols=50 rows=5 name="text">{{ html|safe }}</textarea>
44 |         </p>
45 |         <p>
46 |           <button type="submit">Render</button>
47 |         </p>
48 |       </form>
49 | 
50 |       {% if html %}
51 |       <h3>Rendered</h3>
52 |       <div>
53 |         {{ html|oembed(html=True) }}
54 |       </div>
55 |       {% endif %}
56 |     </div>
57 |   </body>
58 | </html>
59 | 


--------------------------------------------------------------------------------
/examples/python_ex/example.py:
--------------------------------------------------------------------------------
 1 | import pprint
 2 | from micawber import bootstrap_oembed, ProviderException
 3 | try:
 4 |     read_input = raw_input
 5 | except NameError:
 6 |     read_input = input
 7 | 
 8 | def main():
 9 |     print('Please wait, loading providers from oembed.com')
10 |     providers = bootstrap_oembed()
11 | 
12 |     while 1:
13 |         url = read_input('Enter a url (or q to quit): ')
14 |         if url.lower().strip() == 'q':
15 |             break
16 | 
17 |         try:
18 |             result = providers.request(url)
19 |         except ProviderException:
20 |             print('No provider found for that url :/')
21 |         else:
22 |             print('Data for %s\n====================================================' % url)
23 |             pprint.pprint(result)
24 | 
25 | if __name__ == '__main__':
26 |     print('Welcome to the example!')
27 |     main()
28 | 


--------------------------------------------------------------------------------
/micawber/__init__.py:
--------------------------------------------------------------------------------
 1 | __version__ = '0.5.6'
 2 | 
 3 | from micawber.cache import Cache
 4 | from micawber.cache import PickleCache
 5 | from micawber.exceptions import ProviderException
 6 | from micawber.exceptions import InvalidResponseException
 7 | from micawber.parsers import extract
 8 | from micawber.parsers import extract_html
 9 | from micawber.parsers import parse_text
10 | from micawber.parsers import parse_text_full
11 | from micawber.parsers import parse_html
12 | from micawber.providers import Provider
13 | from micawber.providers import ProviderRegistry
14 | from micawber.providers import bootstrap_basic
15 | from micawber.providers import bootstrap_embedly
16 | from micawber.providers import bootstrap_noembed
17 | from micawber.providers import bootstrap_oembed
18 | 


--------------------------------------------------------------------------------
/micawber/cache.py:
--------------------------------------------------------------------------------
 1 | from __future__ import with_statement
 2 | import os
 3 | import pickle
 4 | try:
 5 |     from redis import Redis
 6 | except ImportError:
 7 |     Redis = None
 8 | 
 9 | 
10 | class Cache(object):
11 |     def __init__(self):
12 |         self._cache = {}
13 | 
14 |     def get(self, k):
15 |         return self._cache.get(k)
16 | 
17 |     def set(self, k, v):
18 |         self._cache[k] = v
19 | 
20 | 
21 | class PickleCache(Cache):
22 |     def __init__(self, filename='cache.db'):
23 |         self.filename = filename
24 |         self._cache = self.load()
25 | 
26 |     def load(self):
27 |         if os.path.exists(self.filename):
28 |             with open(self.filename, 'rb') as fh:
29 |                 return pickle.load(fh)
30 |         return {}
31 | 
32 |     def save(self):
33 |         with open(self.filename, 'wb') as fh:
34 |             pickle.dump(self._cache, fh)
35 | 
36 | 
37 | if Redis:
38 |     class RedisCache(Cache):
39 |         """
40 |         :param str namespace: key prefix.
41 |         :param int timeout: expiration timeout in seconds
42 |         """
43 |         def __init__(self, namespace='micawber', timeout=None, **conn):
44 |             self.namespace = namespace
45 |             self.timeout = timeout
46 |             self.conn = Redis(**conn)
47 | 
48 |         def key_fn(self, k):
49 |             return '%s.%s' % (self.namespace, k)
50 | 
51 |         def get(self, k):
52 |             cached = self.conn.get(self.key_fn(k))
53 |             if cached:
54 |                 return pickle.loads(cached)
55 | 
56 |         def set(self, k, v):
57 |             ck, cv = self.key_fn(k), pickle.dumps(v)
58 |             if self.timeout is not None:
59 |                 self.conn.setex(ck, cv, self.timeout)
60 |             else:
61 |                 self.conn.set(ck, cv)
62 | 


--------------------------------------------------------------------------------
/micawber/compat.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | PY3 = sys.version_info >= (3,)
  4 | 
  5 | if PY3:
  6 |     from urllib.request import Request, urlopen, URLError, HTTPError
  7 |     from urllib.parse import urlencode
  8 |     text_type = str
  9 |     string_types = str,
 10 |     def get_charset(response):
 11 |         return response.headers.get_param('charset')
 12 | else:
 13 |     from urllib2 import Request, urlopen, URLError, HTTPError
 14 |     from urllib import urlencode
 15 |     text_type = unicode
 16 |     string_types = basestring,
 17 |     def get_charset(response):
 18 |         return response.headers.getparam('charset')
 19 | 
 20 | try:
 21 |     from collections import OrderedDict
 22 | except ImportError:
 23 |     try:
 24 |         from _abcoll import KeysView, ValuesView, ItemsView
 25 |     except ImportError:
 26 |         pass
 27 | 
 28 |     class OrderedDict(dict):
 29 |         'Dictionary that remembers insertion order'
 30 |         # An inherited dict maps keys to values.
 31 |         # The inherited dict provides __getitem__, __len__, __contains__, and get.
 32 |         # The remaining methods are order-aware.
 33 |         # Big-O running times for all methods are the same as for regular dictionaries.
 34 | 
 35 |         # The internal self.__map dictionary maps keys to links in a doubly linked list.
 36 |         # The circular doubly linked list starts and ends with a sentinel element.
 37 |         # The sentinel element never gets deleted (this simplifies the algorithm).
 38 |         # Each link is stored as a list of length three:  [PREV, NEXT, KEY].
 39 | 
 40 |         def __init__(self, *args, **kwds):
 41 |             '''Initialize an ordered dictionary.  Signature is the same as for
 42 |             regular dictionaries, but keyword arguments are not recommended
 43 |             because their insertion order is arbitrary.
 44 | 
 45 |             '''
 46 |             if len(args) > 1:
 47 |                 raise TypeError('expected at most 1 arguments, got %d' % len(args))
 48 |             try:
 49 |                 self.__root
 50 |             except AttributeError:
 51 |                 self.__root = root = []                     # sentinel node
 52 |                 root[:] = [root, root, None]
 53 |                 self.__map = {}
 54 |             self.__update(*args, **kwds)
 55 | 
 56 |         def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
 57 |             'od.__setitem__(i, y) <==> od[i]=y'
 58 |             # Setting a new item creates a new link which goes at the end of the linked
 59 |             # list, and the inherited dictionary is updated with the new key/value pair.
 60 |             if key not in self:
 61 |                 root = self.__root
 62 |                 last = root[0]
 63 |                 last[1] = root[0] = self.__map[key] = [last, root, key]
 64 |             dict_setitem(self, key, value)
 65 | 
 66 |         def __delitem__(self, key, dict_delitem=dict.__delitem__):
 67 |             'od.__delitem__(y) <==> del od[y]'
 68 |             # Deleting an existing item uses self.__map to find the link which is
 69 |             # then removed by updating the links in the predecessor and successor nodes.
 70 |             dict_delitem(self, key)
 71 |             link_prev, link_next, key = self.__map.pop(key)
 72 |             link_prev[1] = link_next
 73 |             link_next[0] = link_prev
 74 | 
 75 |         def __iter__(self):
 76 |             'od.__iter__() <==> iter(od)'
 77 |             root = self.__root
 78 |             curr = root[1]
 79 |             while curr is not root:
 80 |                 yield curr[2]
 81 |                 curr = curr[1]
 82 | 
 83 |         def __reversed__(self):
 84 |             'od.__reversed__() <==> reversed(od)'
 85 |             root = self.__root
 86 |             curr = root[0]
 87 |             while curr is not root:
 88 |                 yield curr[2]
 89 |                 curr = curr[0]
 90 | 
 91 |         def clear(self):
 92 |             'od.clear() -> None.  Remove all items from od.'
 93 |             try:
 94 |                 for node in self.__map.itervalues():
 95 |                     del node[:]
 96 |                 root = self.__root
 97 |                 root[:] = [root, root, None]
 98 |                 self.__map.clear()
 99 |             except AttributeError:
100 |                 pass
101 |             dict.clear(self)
102 | 
103 |         def popitem(self, last=True):
104 |             '''od.popitem() -> (k, v), return and remove a (key, value) pair.
105 |             Pairs are returned in LIFO order if last is true or FIFO order if false.
106 | 
107 |             '''
108 |             if not self:
109 |                 raise KeyError('dictionary is empty')
110 |             root = self.__root
111 |             if last:
112 |                 link = root[0]
113 |                 link_prev = link[0]
114 |                 link_prev[1] = root
115 |                 root[0] = link_prev
116 |             else:
117 |                 link = root[1]
118 |                 link_next = link[1]
119 |                 root[1] = link_next
120 |                 link_next[0] = root
121 |             key = link[2]
122 |             del self.__map[key]
123 |             value = dict.pop(self, key)
124 |             return key, value
125 | 
126 |         # -- the following methods do not depend on the internal structure --
127 | 
128 |         def keys(self):
129 |             'od.keys() -> list of keys in od'
130 |             return list(self)
131 | 
132 |         def values(self):
133 |             'od.values() -> list of values in od'
134 |             return [self[key] for key in self]
135 | 
136 |         def items(self):
137 |             'od.items() -> list of (key, value) pairs in od'
138 |             return [(key, self[key]) for key in self]
139 | 
140 |         def iterkeys(self):
141 |             'od.iterkeys() -> an iterator over the keys in od'
142 |             return iter(self)
143 | 
144 |         def itervalues(self):
145 |             'od.itervalues -> an iterator over the values in od'
146 |             for k in self:
147 |                 yield self[k]
148 | 
149 |         def iteritems(self):
150 |             'od.iteritems -> an iterator over the (key, value) items in od'
151 |             for k in self:
152 |                 yield (k, self[k])
153 | 
154 |         def update(*args, **kwds):
155 |             '''od.update(E, **F) -> None.  Update od from dict/iterable E and F.
156 | 
157 |             If E is a dict instance, does:           for k in E: od[k] = E[k]
158 |             If E has a .keys() method, does:         for k in E.keys(): od[k] = E[k]
159 |             Or if E is an iterable of items, does:   for k, v in E: od[k] = v
160 |             In either case, this is followed by:     for k, v in F.items(): od[k] = v
161 | 
162 |             '''
163 |             if len(args) > 2:
164 |                 raise TypeError('update() takes at most 2 positional '
165 |                                 'arguments (%d given)' % (len(args),))
166 |             elif not args:
167 |                 raise TypeError('update() takes at least 1 argument (0 given)')
168 |             self = args[0]
169 |             # Make progressively weaker assumptions about "other"
170 |             other = ()
171 |             if len(args) == 2:
172 |                 other = args[1]
173 |             if isinstance(other, dict):
174 |                 for key in other:
175 |                     self[key] = other[key]
176 |             elif hasattr(other, 'keys'):
177 |                 for key in other.keys():
178 |                     self[key] = other[key]
179 |             else:
180 |                 for key, value in other:
181 |                     self[key] = value
182 |             for key, value in kwds.items():
183 |                 self[key] = value
184 | 
185 |         __update = update  # let subclasses override update without breaking __init__
186 | 
187 |         __marker = object()
188 | 
189 |         def pop(self, key, default=__marker):
190 |             '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
191 |             If key is not found, d is returned if given, otherwise KeyError is raised.
192 | 
193 |             '''
194 |             if key in self:
195 |                 result = self[key]
196 |                 del self[key]
197 |                 return result
198 |             if default is self.__marker:
199 |                 raise KeyError(key)
200 |             return default
201 | 
202 |         def setdefault(self, key, default=None):
203 |             'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
204 |             if key in self:
205 |                 return self[key]
206 |             self[key] = default
207 |             return default
208 | 
209 |         def __repr__(self):
210 |             if not self:
211 |                 return '%s()' % (self.__class__.__name__,)
212 |             return '%s(%r)' % (self.__class__.__name__, self.items())
213 | 
214 |         def __reduce__(self):
215 |             'Return state information for pickling'
216 |             items = [[k, self[k]] for k in self]
217 |             inst_dict = vars(self).copy()
218 |             for k in vars(OrderedDict()):
219 |                 inst_dict.pop(k, None)
220 |             if inst_dict:
221 |                 return (self.__class__, (items,), inst_dict)
222 |             return self.__class__, (items,)
223 | 
224 |         def copy(self):
225 |             'od.copy() -> a shallow copy of od'
226 |             return self.__class__(self)
227 | 
228 |         @classmethod
229 |         def fromkeys(cls, iterable, value=None):
230 |             '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
231 |             and values equal to v (which defaults to None).
232 | 
233 |             '''
234 |             d = cls()
235 |             for key in iterable:
236 |                 d[key] = value
237 |             return d
238 | 
239 |         def __eq__(self, other):
240 |             '''od.__eq__(y) <==> od==y.  Comparison to another OD is order-sensitive
241 |             while comparison to a regular mapping is order-insensitive.
242 | 
243 |             '''
244 |             if isinstance(other, OrderedDict):
245 |                 return len(self)==len(other) and self.items() == other.items()
246 |             return dict.__eq__(self, other)
247 | 
248 |         def __ne__(self, other):
249 |             return not self == other
250 | 
251 |         # -- the following methods are only used in Python 2.7 --
252 | 
253 |         def viewkeys(self):
254 |             "od.viewkeys() -> a set-like object providing a view on od's keys"
255 |             return KeysView(self)
256 | 
257 |         def viewvalues(self):
258 |             "od.viewvalues() -> an object providing a view on od's values"
259 |             return ValuesView(self)
260 | 
261 |         def viewitems(self):
262 |             "od.viewitems() -> a set-like object providing a view on od's items"
263 |             return ItemsView(self)
264 | 


--------------------------------------------------------------------------------
/micawber/contrib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/micawber/contrib/__init__.py


--------------------------------------------------------------------------------
/micawber/contrib/mcdjango/__init__.py:
--------------------------------------------------------------------------------
 1 | from collections.abc import Callable
 2 | from importlib import import_module
 3 | 
 4 | from django import template
 5 | from django.conf import settings
 6 | from django.template.loader import render_to_string
 7 | from django.utils.safestring import mark_safe
 8 | 
 9 | from micawber.compat import string_types
10 | from micawber.parsers import full_handler, inline_handler, parse_text, \
11 |     parse_html, extract, extract_html
12 | 
13 | 
14 | def _load_from_module(path):
15 |     package, attr = path.rsplit('.', 1)
16 |     module = import_module(package)
17 |     return getattr(module, attr)
18 | 
19 | 
20 | PROVIDERS = getattr(settings, 'MICAWBER_PROVIDERS', 'micawber.contrib.mcdjango.providers.bootstrap_basic')
21 | 
22 | providers = _load_from_module(PROVIDERS)
23 | if isinstance(providers, Callable):
24 |     providers = providers()
25 | 
26 | 
27 | register = template.Library()
28 | 
29 | def django_template_handler(url, response_data, **params):
30 |     names = (
31 |         response_data.get('provider_name'),
32 |         response_data['type'],
33 |     )
34 |     template_names = ['micawber/%s.html' % name for name in names if name]
35 |     return mark_safe(
36 |         render_to_string(
37 |             template_names,
38 |             {'params': params,
39 |              'response': response_data,
40 |              'url': url,
41 |             }).strip())
42 | 
43 | def fix_width_height(width_height, params):
44 |     if width_height:
45 |         if 'x' in width_height:
46 |             params['maxwidth'], params['maxheight'] = [int(n) for n in  width_height.split('x')]
47 |         else:
48 |             params['maxwidth'] = int(width_height)
49 |             params.pop('maxheight', None)
50 |     return params
51 | 
52 | def extension(filter_name, providers=providers, urlize_all=True, html=False, handler=django_template_handler,
53 |               block_handler=inline_handler, text_fn=parse_text, html_fn=parse_html, **kwargs):
54 |     if html:
55 |         fn = html_fn
56 |     else:
57 |         fn = text_fn
58 |     def _extension(s, width_height=None):
59 |         params = getattr(settings, 'MICAWBER_DEFAULT_SETTINGS', {})
60 |         params.update(kwargs)
61 |         params = fix_width_height(width_height, params)
62 |         return mark_safe(fn(s, providers, urlize_all, handler, block_handler, **params))
63 |     register.filter(filter_name, _extension)
64 |     return _extension
65 | 
66 | oembed = extension('oembed')
67 | oembed_html = extension('oembed_html', html=True)
68 | 
69 | def _extract_oembed(text, width_height=None, html=False):
70 |     if html:
71 |         fn = extract_html
72 |     else:
73 |         fn = extract
74 |     params = getattr(settings, 'MICAWBER_DEFAULT_SETTINGS', {})
75 |     params = fix_width_height(width_height, params)
76 |     url_list, url_data = fn(text, providers, **params)
77 |     return [(u, url_data[u]) for u in url_list if u in url_data]
78 | 
79 | @register.filter
80 | def extract_oembed(text, width_height=None):
81 |     return _extract_oembed(text, width_height)
82 | 
83 | @register.filter
84 | def extract_oembed_html(text, width_height=None):
85 |     return _extract_oembed(text, width_height, True)
86 | 
87 | user_extensions = getattr(settings, 'MICAWBER_TEMPLATE_EXTENSIONS', [])
88 | if isinstance(user_extensions, string_types):
89 |     user_extensions = _load_from_module(user_extensions)
90 | 
91 | for filter_name, filter_params in user_extensions:
92 |     extension(filter_name, **filter_params)
93 | 


--------------------------------------------------------------------------------
/micawber/contrib/mcdjango/mcdjango_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/micawber/contrib/mcdjango/mcdjango_tests/__init__.py


--------------------------------------------------------------------------------
/micawber/contrib/mcdjango/mcdjango_tests/models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/micawber/contrib/mcdjango/mcdjango_tests/models.py


--------------------------------------------------------------------------------
/micawber/contrib/mcdjango/mcdjango_tests/tests.py:
--------------------------------------------------------------------------------
  1 | from django.template import Context
  2 | from django.template import Template
  3 | from django.test import TestCase
  4 | 
  5 | from micawber.parsers import parse_text
  6 | from micawber.test_utils import BaseTestCase
  7 | from micawber.test_utils import test_cache
  8 | from micawber.test_utils import test_pr
  9 | from micawber.test_utils import test_pr_cache
 10 | from micawber.test_utils import TestProvider
 11 | 
 12 | 
 13 | class MicawberDjangoTestCase(TestCase, BaseTestCase):
 14 |     def render(self, s, **params):
 15 |         s = '{%% load micawber_tags %%}%s' % s
 16 |         return Template(s).render(Context(params)).strip()
 17 | 
 18 |     def test_oembed_alt(self):
 19 |         from micawber.contrib.mcdjango import extension
 20 | 
 21 |         def custom_handler(url, response_data):
 22 |             return url
 23 | 
 24 |         oembed_alt = extension(
 25 |             'oembed_alt',
 26 |             urlize_all=False,
 27 |             block_handler=custom_handler)
 28 | 
 29 |         text = '\n'.join((
 30 |             'this is the first line',
 31 |             'http://photo-test2',
 32 |             'this is the third line http://photo-test2',
 33 |             'http://photo-test2 this is the fourth line'))
 34 |         rendered = self.render('{{ text|oembed_alt }}', text=text)
 35 |         self.assertEqual(rendered.splitlines(), [
 36 |             'this is the first line',
 37 |             self.full_pairs['http://photo-test2'],
 38 |             'this is the third line http://photo-test2',
 39 |             'http://photo-test2 this is the fourth line',
 40 |         ])
 41 | 
 42 |     def test_fix_wh(self):
 43 |         from micawber.contrib.mcdjango import fix_width_height
 44 |         self.assertEqual(fix_width_height('300x400', {}), {'maxwidth': 300, 'maxheight': 400})
 45 |         self.assertEqual(fix_width_height('300', {}), {'maxwidth': 300})
 46 | 
 47 |     def test_provider_loading(self):
 48 |         from micawber.contrib.mcdjango import providers
 49 |         self.assertEqual(providers, test_pr)
 50 | 
 51 |     def test_oembed_filter_multiline_plain(self):
 52 |         for url, expected in self.full_pairs.items():
 53 |             expected_inline = self.inline_pairs[url]
 54 |             frame = 'this is inline: %s\n%s\nand yet another %s'
 55 | 
 56 |             test_str = frame % (url, url, url)
 57 | 
 58 |             parsed = self.render('{{ test_str|oembed }}', test_str=test_str)
 59 |             self.assertEqual(parsed, frame % (expected_inline, expected, expected_inline))
 60 | 
 61 |     def test_oembed_filter_multiline_html(self):
 62 |         for url, expected in self.full_pairs.items():
 63 |             expected_inline = self.inline_pairs[url]
 64 |             frame = '<p>%s</p>\n<p>this is inline: %s</p>\n<p>\n%s\n</p><p>last test\n%s\n</p>'
 65 | 
 66 |             test_str = frame % (url, url, url, url)
 67 | 
 68 |             parsed = self.render('{{ test_str|oembed_html }}', test_str=test_str)
 69 |             self.assertHTMLEqual(parsed, frame % (expected, expected_inline, expected, expected_inline))
 70 | 
 71 |         for url, expected in self.full_pairs.items():
 72 |             expected_inline = self.inline_pairs[url]
 73 |             frame = '<p><a href="#foo">%s</a></p>\n<p>this is inline: %s</p>\n<p>last test\n%s\n</p>'
 74 | 
 75 |             test_str = frame % (url, url, url)
 76 | 
 77 |             parsed = self.render('{{ test_str|oembed_html }}', test_str=test_str)
 78 |             self.assertHTMLEqual(parsed, frame % (url, expected_inline, expected_inline))
 79 | 
 80 |     def test_urlize(self):
 81 |         u1 = 'http://fappio.com/'
 82 |         u2 = 'http://google.com/fap/'
 83 |         u1h = '<a href="%s">%s</a>' % (u1, u1)
 84 |         u2h = '<a href="%s">%s</a>' % (u2, u2)
 85 |         for url, expected in self.full_pairs.items():
 86 |             expected_inline = self.inline_pairs[url]
 87 |             frame = 'test %s\n%s\n%s\nand another %s'
 88 | 
 89 |             test_str = frame % (u1, u2, url, url)
 90 | 
 91 |             parsed = self.render('{{ test_str|oembed }}', test_str=test_str)
 92 |             self.assertEqual(parsed, frame % (u1h, u2h, expected, expected_inline))
 93 | 
 94 |     def test_oembed_filter_extension(self):
 95 |         for url, expected in self.full_pairs.items():
 96 |             expected_inline = self.inline_pairs[url]
 97 |             frame = 'test http://fappio.com\nhttp://google.com\n%s\nand another %s'
 98 | 
 99 |             test_str = frame % (url, url)
100 | 
101 |             parsed = self.render('{{ test_str|oembed_no_urlize }}', test_str=test_str)
102 |             self.assertEqual(parsed, frame % (expected, expected_inline))
103 | 
104 |     def test_extract_filter(self):
105 |         blank = 'http://fapp.io/foo/'
106 |         frame = 'test %s\n%s\n%s\n%s at last'
107 |         frame_html = '<p>test %s</p><p><a href="foo">%s</a> %s</p><p>%s</p>'
108 | 
109 |         t = """{% for url, data in test_str|extract_oembed %}{{ url }}\n{% endfor %}"""
110 |         t2 = """{% for url, data in test_str|extract_oembed_html %}{{ url }}\n{% endfor %}"""
111 | 
112 |         for url, expected in self.data_pairs.items():
113 |             test_str = frame % (url, blank, url, blank)
114 |             rendered = self.render(t, test_str=test_str)
115 |             self.assertEqual(rendered, url)
116 | 
117 |             test_str = frame_html % (url, blank, url, blank)
118 |             rendered = self.render(t, test_str=test_str)
119 |             self.assertEqual(rendered, url)
120 | 


--------------------------------------------------------------------------------
/micawber/contrib/mcdjango/models.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/micawber/contrib/mcdjango/models.py


--------------------------------------------------------------------------------
/micawber/contrib/mcdjango/providers.py:
--------------------------------------------------------------------------------
 1 | from django.conf import settings
 2 | from django.core.cache import cache
 3 | 
 4 | from micawber.providers import bootstrap_basic as _bootstrap_basic, bootstrap_embedly as _bootstrap_embedly
 5 | 
 6 | 
 7 | def bootstrap_basic():
 8 |     return _bootstrap_basic(cache)
 9 | 
10 | def bootstrap_embedly():
11 |     key = getattr(settings, 'MICAWBER_EMBEDLY_KEY', None)
12 |     params = {}
13 |     if key:
14 |         params['key'] = key
15 |     return _bootstrap_embedly(cache, **params)
16 | 


--------------------------------------------------------------------------------
/micawber/contrib/mcdjango/templates/micawber/link.html:
--------------------------------------------------------------------------------
1 | <a href="{{ response.url }}" title="{{ response.title }}">{{ response.title }}</a>
2 | 


--------------------------------------------------------------------------------
/micawber/contrib/mcdjango/templates/micawber/photo.html:
--------------------------------------------------------------------------------
1 | <a href="{{ response.url }}" title="{{ response.title }}"><img alt="{{ response.title }}" src="{{ response.url }}" /></a>
2 | 


--------------------------------------------------------------------------------
/micawber/contrib/mcdjango/templates/micawber/rich.html:
--------------------------------------------------------------------------------
1 | {{ response.html|safe }}
2 | 


--------------------------------------------------------------------------------
/micawber/contrib/mcdjango/templates/micawber/video.html:
--------------------------------------------------------------------------------
1 | {{ response.html|safe }}
2 | 


--------------------------------------------------------------------------------
/micawber/contrib/mcdjango/templatetags/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/micawber/contrib/mcdjango/templatetags/__init__.py


--------------------------------------------------------------------------------
/micawber/contrib/mcdjango/templatetags/micawber_tags.py:
--------------------------------------------------------------------------------
1 | from micawber.contrib.mcdjango import register
2 | 


--------------------------------------------------------------------------------
/micawber/contrib/mcflask.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from markupsafe import Markup
 3 | except ImportError:
 4 |     from flask import Markup
 5 | from micawber import parse_text, parse_html, extract, extract_html
 6 | 
 7 | 
 8 | def oembed(s, providers, urlize_all=True, html=False, **params):
 9 |     if html:
10 |         fn = parse_html
11 |     else:
12 |         fn = parse_text
13 |     return Markup(fn(s, providers, urlize_all, **params))
14 | 
15 | def extract_oembed(s, providers, html=False, **params):
16 |     if html:
17 |         fn = extract_html
18 |     else:
19 |         fn = extract
20 |     return fn(s, providers, **params)
21 | 
22 | def add_oembed_filters(app, providers):
23 |     def _oembed(s, urlize_all=True, html=False, **params):
24 |         return oembed(s, providers, urlize_all, html, **params)
25 | 
26 |     def _extract_oembed(s, html=False, **params):
27 |         return extract_oembed(s, providers, html, **params)
28 | 
29 |     app.jinja_env.filters['oembed'] = _oembed
30 |     app.jinja_env.filters['extract_oembed'] = _extract_oembed
31 | 


--------------------------------------------------------------------------------
/micawber/contrib/providers.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from micawber.providers import Provider
 4 | 
 5 | 
 6 | class ImageProvider(Provider):
 7 |     """
 8 |     Simple little hack to render any image URL as an <img> tag, use with care
 9 | 
10 |     Usage:
11 | 
12 |     pr = micawber.bootstrap_basic()
13 |     pr.register(ImageProvider.regex, ImageProvider(''))
14 |     """
15 |     regex = 'http://.+?\.(jpg|gif|png)'
16 | 
17 |     def request(self, url, **params):
18 |         return {
19 |             'url': url,
20 |             'type': 'photo',
21 |             'title': '',
22 |         }
23 | 
24 | 
25 | class GoogleMapsProvider(Provider):
26 |     """
27 |     Render a map URL as an embedded map
28 | 
29 |     Usage:
30 | 
31 |     pr = micawber.bootstrap_basic()
32 |     pr.register(GoogleMapsProvider.regex, GoogleMapsProvider(''))
33 |     """
34 |     regex = r'^https?://maps.google.com/maps\?([^\s]+)'
35 |     
36 |     valid_params = ['q', 'z']
37 |     
38 |     def request(self, url, **params):
39 |         url_params = re.match(self.regex, url).groups()[0]
40 |         url_params = url_params.replace('&amp;', '&').split('&')
41 |         
42 |         map_params = ['output=embed']
43 |         
44 |         for param in url_params:
45 |             k, v = param.split('=', 1)
46 |             if k in self.valid_params:
47 |                 map_params.append(param)
48 |         
49 |         width = int(params.get('maxwidth', 640))
50 |         height = int(params.get('maxheight', 480))
51 |         html = '<iframe width="%d" height="%d" frameborder="0" scrolling="no" marginheight="0" marginwidth="0" src="http://maps.google.com/maps?%s"></iframe>' % \
52 |             (width, height, '&amp;'.join(map_params))
53 |         
54 |         return {
55 |             'height': height,
56 |             'html': html,
57 |             'provider_name': 'Google maps',
58 |             'title': '',
59 |             'type': 'rich',
60 |             'version': '1.0',
61 |             'width': width,
62 |         }
63 | 


--------------------------------------------------------------------------------
/micawber/exceptions.py:
--------------------------------------------------------------------------------
1 | class ProviderException(Exception):
2 |     pass
3 | 
4 | class ProviderNotFoundException(ProviderException):
5 |     pass
6 | 
7 | class InvalidResponseException(ProviderException):
8 |     pass
9 | 


--------------------------------------------------------------------------------
/micawber/parsers.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from .compat import text_type
  3 | try:
  4 |     import simplejson as json
  5 | except ImportError:
  6 |     import json
  7 | 
  8 | bs_kwargs = {}
  9 | try:
 10 |     from BeautifulSoup import BeautifulSoup
 11 |     bs_kwargs = {'convertEntities': BeautifulSoup.HTML_ENTITIES}
 12 |     replace_kwargs = {}
 13 | except ImportError:
 14 |     try:
 15 |         from bs4 import BeautifulSoup
 16 |         bs_kwargs = replace_kwargs = {'features': 'html.parser'}
 17 |     except ImportError:
 18 |         BeautifulSoup = None
 19 | 
 20 | from micawber.exceptions import ProviderException
 21 | 
 22 | 
 23 | url_pattern = '(https?://[-A-Za-z0-9+&@#/%?=~_()|!:,.;]*[-A-Za-z0-9+&@#/%=~_|])'
 24 | url_re = re.compile(url_pattern)
 25 | standalone_url_re = re.compile(r'^\s*' + url_pattern + r'\s*$')
 26 | 
 27 | block_elements = set([
 28 |     'address', 'article', 'aside', 'blockquote', 'canvas', 'center', 'dir',
 29 |     'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer',
 30 |     'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr',
 31 |     'isindex', 'li', 'main', 'menu', 'nav', 'noframes', 'noscript', 'ol', 'p',
 32 |     'pre', 'section', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr',
 33 |     'ul',
 34 |     # Additional elements.
 35 |     'button', 'del', 'iframe', 'ins', 'map', 'object', 'script', '[document]',
 36 | ])
 37 | 
 38 | skip_elements = set(['a', 'pre', 'code', 'input', 'textarea', 'select'])
 39 | 
 40 | 
 41 | def full_handler(url, response_data, **params):
 42 |     if response_data['type'] == 'link':
 43 |         return '<a href="%(url)s" title="%(title)s">%(title)s</a>' % response_data
 44 |     elif response_data['type'] == 'photo':
 45 |         return '<a href="%(url)s" title="%(title)s"><img alt="%(title)s" src="%(url)s" /></a>' % response_data
 46 |     else:
 47 |         return response_data['html']
 48 | 
 49 | def inline_handler(url, response_data, **params):
 50 |     return '<a href="%(url)s" title="%(title)s">%(title)s</a>' % response_data
 51 | 
 52 | def urlize(url, **params):
 53 |     params.setdefault('href', url)
 54 |     param_html = ' '.join('%s="%s"' % (key, value)
 55 |                           for key, value in sorted(params.items()))
 56 |     return '<a %s>%s</a>' % (param_html, url)
 57 | 
 58 | def extract(text, providers, **params):
 59 |     all_urls = set()
 60 |     urls = []
 61 |     extracted_urls = {}
 62 | 
 63 |     for url in re.findall(url_re, text):
 64 |         if url in all_urls:
 65 |             continue
 66 | 
 67 |         all_urls.add(url)
 68 |         urls.append(url)
 69 |         try:
 70 |             extracted_urls[url] = providers.request(url, **params)
 71 |         except ProviderException:
 72 |             pass
 73 | 
 74 |     return urls, extracted_urls
 75 | 
 76 | def parse_text_full(text, providers, urlize_all=True, handler=full_handler,
 77 |                     urlize_params=None, **params):
 78 |     all_urls, extracted_urls = extract(text, providers, **params)
 79 |     replacements = {}
 80 |     urlize_params = urlize_params or {}
 81 | 
 82 |     for url in all_urls:
 83 |         if url in extracted_urls:
 84 |             replacements[url] = handler(url, extracted_urls[url], **params)
 85 |         elif urlize_all:
 86 |             replacements[url] = urlize(url, **urlize_params)
 87 | 
 88 |     # go through the text recording URLs that can be replaced
 89 |     # taking note of their start & end indexes
 90 |     urls = re.finditer(url_re, text)
 91 |     matches = []
 92 |     for match in urls:
 93 |         if match.group() in replacements:
 94 |             matches.append([match.start(), match.end(), match.group()])
 95 | 
 96 |     # replace the URLs in order, offsetting the indices each go
 97 |     for indx, (start, end, url) in enumerate(matches):
 98 |         replacement = replacements[url]
 99 |         difference = len(replacement) - len(url)
100 | 
101 |         # insert the replacement between two slices of text surrounding the
102 |         # original url
103 |         text = text[:start] + replacement + text[end:]
104 | 
105 |         # iterate through the rest of the matches offsetting their indices
106 |         # based on the difference between replacement/original
107 |         for j in range(indx + 1, len(matches)):
108 |             matches[j][0] += difference
109 |             matches[j][1] += difference
110 | 
111 |     return text
112 | 
113 | def parse_text(text, providers, urlize_all=True, handler=full_handler,
114 |                block_handler=inline_handler, urlize_params=None, **params):
115 |     lines = text.splitlines()
116 |     parsed = []
117 |     urlize_params = urlize_params or {}
118 | 
119 |     for line in lines:
120 |         if standalone_url_re.match(line):
121 |             url = line.strip()
122 |             try:
123 |                 response = providers.request(url, **params)
124 |             except ProviderException:
125 |                 if urlize_all:
126 |                     line = urlize(url, **urlize_params)
127 |             else:
128 |                 line = handler(url, response, **params)
129 |         elif block_handler is not None:
130 |             line = parse_text_full(line, providers, urlize_all, block_handler,
131 |                                    urlize_params=urlize_params, **params)
132 | 
133 |         parsed.append(line)
134 | 
135 |     return '\n'.join(parsed)
136 | 
137 | def parse_html(html, providers, urlize_all=True, handler=full_handler,
138 |                block_handler=inline_handler, soup_class=BeautifulSoup,
139 |                urlize_params=None, **params):
140 | 
141 |     if not soup_class:
142 |         raise Exception('Unable to parse HTML, please install BeautifulSoup '
143 |                         'or beautifulsoup4, or use the text parser')
144 | 
145 |     soup = soup_class(html, **bs_kwargs)
146 | 
147 |     for url in soup.findAll(text=url_re):
148 |         if not _inside_skip(url):
149 |             if _is_standalone(url):
150 |                 url_handler = handler
151 |             else:
152 |                 url_handler = block_handler
153 | 
154 |             url_unescaped = (url.string
155 |                              .replace('<', '&lt;')
156 |                              .replace('>', '&gt;'))
157 | 
158 |             replacement = parse_text_full(
159 |                 url_unescaped,
160 |                 providers,
161 |                 urlize_all,
162 |                 url_handler,
163 |                 urlize_params=urlize_params,
164 |                 **params)
165 |             url.replaceWith(BeautifulSoup(replacement, **replace_kwargs))
166 | 
167 |     return text_type(soup)
168 | 
169 | def extract_html(html, providers, **params):
170 |     if not BeautifulSoup:
171 |         raise Exception('Unable to parse HTML, please install BeautifulSoup '
172 |                         'or use the text parser')
173 | 
174 |     soup = BeautifulSoup(html, **bs_kwargs)
175 |     all_urls = set()
176 |     urls = []
177 |     extracted_urls = {}
178 | 
179 |     for url in soup.findAll(text=url_re):
180 |         if _inside_skip(url):
181 |             continue
182 | 
183 |         block_all, block_ext = extract(text_type(url), providers, **params)
184 |         for extracted_url in block_all:
185 |             if extracted_url in all_urls:
186 |                 continue
187 | 
188 |             extracted_urls.update(block_ext)
189 |             urls.append(extracted_url)
190 |             all_urls.add(extracted_url)
191 | 
192 |     return urls, extracted_urls
193 | 
194 | def _is_standalone(soup_elem):
195 |     if standalone_url_re.match(soup_elem):
196 |         return soup_elem.parent.name in block_elements
197 |     return False
198 | 
199 | def _inside_skip(soup_elem):
200 |     parent = soup_elem.parent
201 |     while parent is not None:
202 |         if parent.name in skip_elements:
203 |             return True
204 |         parent = parent.parent
205 |     return False
206 | 


--------------------------------------------------------------------------------
/micawber/providers.py:
--------------------------------------------------------------------------------
  1 | import hashlib
  2 | import logging
  3 | import pickle
  4 | import re
  5 | import socket
  6 | import ssl
  7 | from .compat import get_charset
  8 | from .compat import HTTPError
  9 | from .compat import OrderedDict
 10 | from .compat import Request
 11 | from .compat import urlencode
 12 | from .compat import URLError
 13 | from .compat import urlopen
 14 | try:
 15 |     import simplejson as json
 16 |     try:
 17 |         InvalidJson = json.JSONDecodeError
 18 |     except AttributeError:
 19 |         InvalidJson = ValueError
 20 | except ImportError:
 21 |     import json
 22 |     InvalidJson = ValueError
 23 | 
 24 | from micawber.exceptions import InvalidResponseException
 25 | from micawber.exceptions import ProviderException
 26 | from micawber.exceptions import ProviderNotFoundException
 27 | from micawber.parsers import extract
 28 | from micawber.parsers import extract_html
 29 | from micawber.parsers import parse_html
 30 | from micawber.parsers import parse_text
 31 | from micawber.parsers import parse_text_full
 32 | 
 33 | 
 34 | logger = logging.getLogger(__name__)
 35 | 
 36 | 
 37 | class Provider(object):
 38 |     def __init__(self, endpoint, timeout=3.0, user_agent=None, **kwargs):
 39 |         self.endpoint = endpoint
 40 |         self.socket_timeout = timeout
 41 |         self.user_agent = user_agent or 'python-micawber'
 42 |         self.base_params = {'format': 'json'}
 43 |         self.base_params.update(kwargs)
 44 | 
 45 |     def fetch(self, url):
 46 |         req = Request(url, headers={'User-Agent': self.user_agent})
 47 |         try:
 48 |             resp = fetch(req, self.socket_timeout)
 49 |         except URLError:
 50 |             return False
 51 |         except HTTPError:
 52 |             return False
 53 |         except socket.timeout:
 54 |             return False
 55 |         except ssl.SSLError:
 56 |             return False
 57 |         return resp
 58 | 
 59 |     def encode_params(self, url, **extra_params):
 60 |         params = dict(self.base_params)
 61 |         params.update(extra_params)
 62 |         params['url'] = url
 63 |         return urlencode(sorted(params.items()))
 64 | 
 65 |     def request(self, url, **extra_params):
 66 |         encoded_params = self.encode_params(url, **extra_params)
 67 | 
 68 |         endpoint_url = self.endpoint
 69 |         if '?' in endpoint_url:
 70 |             endpoint_url = '%s&%s' % (endpoint_url.rstrip('&'), encoded_params)
 71 |         else:
 72 |             endpoint_url = '%s?%s' % (endpoint_url, encoded_params)
 73 | 
 74 |         response = self.fetch(endpoint_url)
 75 |         if response:
 76 |             return self.handle_response(response, url)
 77 |         else:
 78 |             raise ProviderException('Error fetching "%s"' % endpoint_url)
 79 | 
 80 |     def handle_response(self, response, url):
 81 |         try:
 82 |             json_data = json.loads(response)
 83 |         except InvalidJson as exc:
 84 |             try:
 85 |                 msg = exc.message
 86 |             except AttributeError:
 87 |                 msg = exc.args[0]
 88 |             raise InvalidResponseException(msg)
 89 | 
 90 |         if 'url' not in json_data:
 91 |             json_data['url'] = url
 92 |         if 'title' not in json_data:
 93 |             json_data['title'] = json_data['url']
 94 | 
 95 |         return json_data
 96 | 
 97 | 
 98 | def make_key(*args, **kwargs):
 99 |     return hashlib.md5(pickle.dumps((args, kwargs))).hexdigest()
100 | 
101 | 
102 | def url_cache(fn):
103 |     def inner(self, url, **params):
104 |         if self.cache is not None:
105 |             key = make_key(url, params)
106 |             data = self.cache.get(key)
107 |             if not data:
108 |                 data = fn(self, url, **params)
109 |                 self.cache.set(key, data)
110 |             return data
111 |         return fn(self, url, **params)
112 |     return inner
113 | 
114 | 
115 | def fetch(request, timeout=None):
116 |     urlopen_params = {}
117 |     if timeout:
118 |         urlopen_params['timeout'] = timeout
119 |     resp = urlopen(request, **urlopen_params)
120 |     if resp.code < 200 or resp.code >= 300:
121 |         return False
122 | 
123 |     # by RFC, default HTTP charset is ISO-8859-1
124 |     charset = get_charset(resp) or 'iso-8859-1'
125 | 
126 |     content = resp.read().decode(charset)
127 |     resp.close()
128 |     return content
129 | 
130 | 
131 | def fetch_cache(cache, url, refresh=False, timeout=None):
132 |     contents = None
133 |     if cache is not None and not refresh:
134 |         contents = cache.get('micawber.%s' % url)
135 |     if contents is None:
136 |         contents = fetch(url, timeout=timeout)
137 |         if cache is not None:
138 |             cache.set('micawber.%s' % url, contents)
139 |     return contents
140 | 
141 | 
142 | class ProviderRegistry(object):
143 |     def __init__(self, cache=None):
144 |         self._registry = OrderedDict()
145 |         self.cache = cache
146 | 
147 |     def register(self, regex, provider):
148 |         self._registry[regex] = provider
149 | 
150 |     def unregister(self, regex):
151 |         del self._registry[regex]
152 | 
153 |     def __iter__(self):
154 |         return iter(reversed(list(self._registry.items())))
155 | 
156 |     def provider_for_url(self, url):
157 |         for regex, provider in self:
158 |             if re.match(regex, url):
159 |                 return provider
160 | 
161 |     @url_cache
162 |     def request(self, url, **params):
163 |         provider = self.provider_for_url(url)
164 |         if provider:
165 |             return provider.request(url, **params)
166 |         raise ProviderNotFoundException('Provider not found for "%s"' % url)
167 | 
168 |     def parse_text(self, text, **kwargs):
169 |         return parse_text(text, self, **kwargs)
170 | 
171 |     def parse_text_full(self, text, **kwargs):
172 |         return parse_text_full(text, self, **kwargs)
173 | 
174 |     def parse_html(self, html, **kwargs):
175 |         return parse_html(html, self, **kwargs)
176 | 
177 |     def extract(self, text, **kwargs):
178 |         return extract(text, self, **kwargs)
179 | 
180 |     def extract_html(self, html, **kwargs):
181 |         return extract_html(html, self, **kwargs)
182 | 
183 | 
184 | def bootstrap_basic(cache=None, registry=None):
185 |     # complements of oembed.com#section7
186 |     pr = registry or ProviderRegistry(cache)
187 | 
188 |     # c
189 |     pr.register(r'http://chirb\.it/\S+', Provider('http://chirb.it/oembed.json'))
190 |     pr.register(r'https?://www\.circuitlab\.com/circuit/\S+', Provider('https://www.circuitlab.com/circuit/oembed'))
191 | 
192 |     # d
193 |     pr.register(r'https?://(?:www\.)?dailymotion\.com/\S+', Provider('http://www.dailymotion.com/services/oembed'))
194 | 
195 |     # f
196 |     pr.register(r'https?://\S*?flickr\.com/\S+', Provider('https://www.flickr.com/services/oembed/'))
197 |     pr.register(r'https?://flic\.kr/\S*', Provider('https://www.flickr.com/services/oembed/'))
198 |     pr.register(r'https?://(?:www\.)?funnyordie\.com/videos/\S+', Provider('http://www.funnyordie.com/oembed'))
199 | 
200 |     # g
201 |     # 2020-11-04: removed GitHub gist, as it seems to be unsupported now.
202 |     #pr.register(r'https?://gist\.github\.com/\S*', Provider('https://github.com/api/oembed'))
203 | 
204 |     # h
205 |     pr.register(r'http://(?:www\.)hulu\.com/watch/\S+', Provider('http://www.hulu.com/api/oembed.json'))
206 | 
207 |     # i
208 |     pr.register(r'https?://\S*imgur\.com/\S+', Provider('https://api.imgur.com/oembed')),
209 |     pr.register(r'https?://(www\.)?instagr(\.am|am\.com)/p/\S+', Provider('http://api.instagram.com/oembed'))
210 | 
211 |     # m
212 |     pr.register(r'http://www\.mobypicture\.com/user/\S*?/view/\S*', Provider('http://api.mobypicture.com/oEmbed'))
213 |     pr.register(r'http://moby\.to/\S*', Provider('http://api.mobypicture.com/oEmbed'))
214 | 
215 |     # p
216 |     pr.register(r'http://i\S*\.photobucket\.com/albums/\S+', Provider('http://photobucket.com/oembed'))
217 |     pr.register(r'http://gi\S*\.photobucket\.com/groups/\S+', Provider('http://photobucket.com/oembed'))
218 |     pr.register(r'http://www\.polleverywhere\.com/(polls|multiple_choice_polls|free_text_polls)/\S+', Provider('http://www.polleverywhere.com/services/oembed/'))
219 |     pr.register(r'https?://(.+\.)?polldaddy\.com/\S*', Provider('http://polldaddy.com/oembed/'))
220 | 
221 |     # s
222 |     pr.register(r'https?://(?:www\.)?slideshare\.net/[^\/]+/\S+', Provider('http://www.slideshare.net/api/oembed/2'))
223 |     pr.register(r'https?://slidesha\.re/\S*', Provider('http://www.slideshare.net/api/oembed/2'))
224 |     pr.register(r'http://\S*\.smugmug\.com/\S*', Provider('http://api.smugmug.com/services/oembed/'))
225 |     pr.register(r'https://\S*?soundcloud\.com/\S+', Provider('http://soundcloud.com/oembed'))
226 |     pr.register(r'https?://speakerdeck\.com/\S*', Provider('https://speakerdeck.com/oembed.json')),
227 |     pr.register(r'https?://(?:www\.)?scribd\.com/\S*', Provider('http://www.scribd.com/services/oembed'))
228 | 
229 |     # t
230 |     pr.register(r'https?://(www\.)tiktok\.com/\S+', Provider('https://www.tiktok.com/oembed'))
231 |     pr.register(r'https?://(www\.)?twitter\.com/\S+/status(es)?/\S+', Provider('https://publish.twitter.com/oembed'))
232 | 
233 |     # v
234 |     pr.register(r'http://(?:player\.)?vimeo\.com/\S+', Provider('http://vimeo.com/api/oembed.json'))
235 |     pr.register(r'https://(?:player\.)?vimeo\.com/\S+', Provider('https://vimeo.com/api/oembed.json'))
236 | 
237 |     # w
238 |     pr.register(r'http://\S+\.wordpress\.com/\S+', Provider('http://public-api.wordpress.com/oembed/'))
239 |     pr.register(r'https?://wordpress\.tv/\S+', Provider('http://wordpress.tv/oembed/'))
240 | 
241 |     # y
242 |     pr.register(r'http://(\S*\.)?youtu(\.be/|be\.com/watch)\S+', Provider('https://www.youtube.com/oembed'))
243 |     pr.register(r'https://(\S*\.)?youtu(\.be/|be\.com/watch)\S+', Provider('https://www.youtube.com/oembed?scheme=https&'))
244 | 
245 |     return pr
246 | 
247 | 
248 | def bootstrap_embedly(cache=None, registry=None, refresh=False, **params):
249 |     endpoint = 'http://api.embed.ly/1/oembed'
250 |     schema_url = 'http://api.embed.ly/1/services/python'
251 | 
252 |     pr = registry or ProviderRegistry(cache)
253 | 
254 |     # fetch the schema
255 |     contents = fetch_cache(cache, schema_url, refresh=refresh)
256 |     json_data = json.loads(contents)
257 | 
258 |     for provider_meta in json_data:
259 |         for regex in provider_meta['regex']:
260 |             pr.register(regex, Provider(endpoint, **params))
261 |     return pr
262 | 
263 | 
264 | def bootstrap_noembed(cache=None, registry=None, refresh=False, **params):
265 |     endpoint = 'http://noembed.com/embed'
266 |     schema_url = 'http://noembed.com/providers'
267 | 
268 |     pr = registry or ProviderRegistry(cache)
269 | 
270 |     # fetch the schema
271 |     contents = fetch_cache(cache, schema_url, refresh=refresh)
272 |     json_data = json.loads(contents)
273 | 
274 |     for provider_meta in json_data:
275 |         for regex in provider_meta['patterns']:
276 |             pr.register(regex, Provider(endpoint, **params))
277 |     return pr
278 | 
279 | 
280 | def bootstrap_oembed(cache=None, registry=None, refresh=False, **params):
281 |     schema_url = 'https://oembed.com/providers.json'
282 |     pr = registry or ProviderRegistry(cache)
283 | 
284 |     # Fetch schema.
285 |     contents = fetch_cache(cache, schema_url, refresh=refresh)
286 |     json_data = json.loads(contents)
287 | 
288 |     for item in json_data:
289 |         for endpoint in reversed(item['endpoints']):
290 |             # Possibly this provider only supports discovery via <link> tags,
291 |             # which is not supported by micawber.
292 |             if 'schemes' not in endpoint:
293 |                 continue
294 | 
295 |             # Consists of one or more schemes, a destination URL and optionally
296 |             # a format, e.g. "json".
297 |             url = endpoint['url']
298 |             if '{format}' in url:
299 |                 url = url.replace('{format}', 'json')
300 | 
301 |             provider = Provider(url, **params)
302 |             for scheme in endpoint['schemes']:
303 |                 # If a question-mark is being used, it is for the query-string
304 |                 # and should be treated as a literal.
305 |                 scheme = scheme.replace('?', r'\?')
306 | 
307 |                 # Transform the raw pattern into a reasonable regex. Match one
308 |                 # or more of any character that is not a slash, whitespace, or
309 |                 # a parameter used for separating querystring/url params.
310 |                 pattern = scheme.replace('*', r'[^\/\s\?&]+?')
311 |                 try:
312 |                     re.compile(pattern)
313 |                 except re.error:
314 |                     logger.exception('oembed.com provider %s regex could not '
315 |                                      'be compiled: %s', url, pattern)
316 |                     continue
317 | 
318 |                 pr.register(pattern, provider)
319 | 
320 |     # Currently oembed.com does not provide patterns for YouTube, so we'll add
321 |     # these ourselves.
322 |     pr.register(r'http://(\S*\.)?youtu(\.be/|be\.com/watch)\S+',
323 |                 Provider('https://www.youtube.com/oembed'))
324 |     pr.register(r'https://(\S*\.)?youtu(\.be/|be\.com/watch)\S+',
325 |                 Provider('https://www.youtube.com/oembed?scheme=https&'))
326 | 
327 |     return pr
328 | 


--------------------------------------------------------------------------------
/micawber/test_utils.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | try:
 3 |     import simplejson as json
 4 | except ImportError:
 5 |     import json
 6 | 
 7 | from micawber import *
 8 | from micawber.parsers import BeautifulSoup, bs_kwargs
 9 | from micawber.providers import make_key
10 | 
11 | 
12 | class TestProvider(Provider):
13 |     test_data = {
14 |         # link
15 |         'link?format=json&url=http%3A%2F%2Flink-test1': {'title': 'test1', 'type': 'link'},
16 |         'link?format=json&url=http%3A%2F%2Flink-test2': {'title': 'test2', 'type': 'link'},
17 | 
18 |         # photo
19 |         'photo?format=json&url=http%3A%2F%2Fphoto-test1': {'title': 'ptest1', 'url': 'test1.jpg', 'type': 'photo'},
20 |         'photo?format=json&url=http%3A%2F%2Fphoto-test2': {'title': 'ptest2', 'url': 'test2.jpg', 'type': 'photo'},
21 | 
22 |         # video
23 |         'video?format=json&url=http%3A%2F%2Fvideo-test1': {'title': 'vtest1', 'html': '<test1>video</test1>', 'type': 'video'},
24 |         'video?format=json&url=http%3A%2F%2Fvideo-test2': {'title': 'vtest2', 'html': '<test2>video</test2>', 'type': 'video'},
25 | 
26 |         # rich
27 |         'rich?format=json&url=http%3A%2F%2Frich-test1': {'title': 'rtest1', 'html': '<test1>rich</test1>', 'type': 'rich'},
28 |         'rich?format=json&url=http%3A%2F%2Frich-test2': {'title': 'rtest2', 'html': '<test2>rich</test2>', 'type': 'rich'},
29 | 
30 |         # with param
31 |         'link?format=json&url=http%3A%2F%2Flink-test1&width=100': {'title': 'test1', 'type': 'link', 'width': 99},
32 | 
33 |         # no title
34 |         'photo?format=json&url=http%3A%2F%2Fphoto-notitle': {'url': 'notitle.jpg', 'type': 'photo'},
35 |     }
36 | 
37 |     def fetch(self, url):
38 |         if url in self.test_data:
39 |             return json.dumps(self.test_data[url])
40 |         return False
41 | 
42 | test_pr = ProviderRegistry()
43 | 
44 | test_cache = Cache()
45 | test_pr_cache = ProviderRegistry(test_cache)
46 | 
47 | for pr in (test_pr, test_pr_cache):
48 |     pr.register(r'http://link\S*', TestProvider('link'))
49 |     pr.register(r'http://photo\S*', TestProvider('photo'))
50 |     pr.register(r'http://video\S*', TestProvider('video'))
51 |     pr.register(r'http://rich\S*', TestProvider('rich'))
52 | 
53 | class BaseTestCase(unittest.TestCase):
54 |     def setUp(self):
55 |         test_cache._cache = {}
56 | 
57 |         self.full_pairs = {
58 |             'http://link-test1': '<a href="http://link-test1" title="test1">test1</a>',
59 |             'http://photo-test2': '<a href="test2.jpg" title="ptest2"><img alt="ptest2" src="test2.jpg" /></a>',
60 |             'http://video-test1': '<test1>video</test1>',
61 |             'http://rich-test2': '<test2>rich</test2>',
62 |             'http://photo-notitle': '<a href="notitle.jpg" title="notitle.jpg"><img alt="notitle.jpg" src="notitle.jpg" /></a>',
63 |         }
64 | 
65 |         self.inline_pairs = {
66 |             'http://link-test1': '<a href="http://link-test1" title="test1">test1</a>',
67 |             'http://photo-test2': '<a href="test2.jpg" title="ptest2">ptest2</a>',
68 |             'http://video-test1': '<a href="http://video-test1" title="vtest1">vtest1</a>',
69 |             'http://rich-test2': '<a href="http://rich-test2" title="rtest2">rtest2</a>',
70 |             'http://rich-test2': '<a href="http://rich-test2" title="rtest2">rtest2</a>',
71 |             'http://photo-notitle': '<a href="notitle.jpg" title="notitle.jpg">notitle.jpg</a>',
72 |         }
73 | 
74 |         self.data_pairs = {
75 |             'http://link-test1': {'title': 'test1', 'type': 'link'},
76 |             'http://photo-test2': {'title': 'ptest2', 'url': 'test2.jpg', 'type': 'photo'},
77 |             'http://video-test1': {'title': 'vtest1', 'html': '<test1>video</test1>', 'type': 'video'},
78 |             'http://rich-test2': {'title': 'rtest2', 'html': '<test2>rich</test2>', 'type': 'rich'},
79 |             'http://photo-notitle': {'url': 'notitle.jpg', 'type': 'photo'},
80 |         }
81 | 
82 |     def assertCached(self, url, data, **params):
83 |         key = make_key(url, params)
84 |         self.assertTrue(key in test_cache._cache)
85 |         self.assertEqual(test_cache._cache[key], data)
86 | 
87 | 
88 |     def assertHTMLEqual(self, first, second, msg=None):
89 |         first = BeautifulSoup(first, **bs_kwargs)
90 |         second = BeautifulSoup(second, **bs_kwargs)
91 |         self.assertEqual(first, second, msg)
92 | 


--------------------------------------------------------------------------------
/micawber/tests.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import unittest
  3 | 
  4 | from micawber import *
  5 | from micawber.test_utils import test_pr, test_cache, test_pr_cache, TestProvider, BaseTestCase
  6 | 
  7 | 
  8 | class ProviderTestCase(BaseTestCase):
  9 |     def test_register_unregister(self):
 10 |         pr = ProviderRegistry()
 11 |         provider1 = TestProvider('link')
 12 |         provider2 = TestProvider('link')
 13 |         pr.register('1', provider1)
 14 |         pr.register('2', provider1)
 15 |         pr.register('3', provider2)
 16 |         pr.unregister('2')
 17 |         self.assertEqual(len(pr._registry), 2)
 18 | 
 19 |         # Multiple calls to remove() are OK.
 20 |         self.assertRaises(KeyError, pr.unregister, '2')
 21 | 
 22 |         self.assertEqual(pr.provider_for_url('1'), provider1)
 23 |         self.assertEqual(pr.provider_for_url('2'), None)
 24 |         self.assertEqual(pr.provider_for_url('3'), provider2)
 25 | 
 26 |         pr.unregister('1')
 27 |         pr.unregister('3')
 28 |         self.assertEqual(len(pr._registry), 0)
 29 |         for test_regex in ['1', '2', '3']:
 30 |             self.assertEqual(pr.provider_for_url(test_regex), None)
 31 | 
 32 |     def test_multiple_matches(self):
 33 |         pr = ProviderRegistry()
 34 |         provider1 = TestProvider('link')
 35 |         provider2 = TestProvider('link')
 36 |         pr.register(r'1(\d+)', provider1)
 37 |         pr.register(r'1\d+', provider2)
 38 |         self.assertEqual(pr.provider_for_url('11'), provider2)
 39 |         pr.unregister(r'1\d+')
 40 |         self.assertEqual(pr.provider_for_url('11'), provider1)
 41 | 
 42 |     def test_provider_matching(self):
 43 |         provider = test_pr.provider_for_url('http://link-test1')
 44 |         self.assertFalse(provider is None)
 45 |         self.assertEqual(provider.endpoint, 'link')
 46 | 
 47 |         provider = test_pr.provider_for_url('http://photo-test1')
 48 |         self.assertFalse(provider is None)
 49 |         self.assertEqual(provider.endpoint, 'photo')
 50 | 
 51 |         provider = test_pr.provider_for_url('http://video-test1')
 52 |         self.assertFalse(provider is None)
 53 |         self.assertEqual(provider.endpoint, 'video')
 54 | 
 55 |         provider = test_pr.provider_for_url('http://rich-test1')
 56 |         self.assertFalse(provider is None)
 57 |         self.assertEqual(provider.endpoint, 'rich')
 58 | 
 59 |         provider = test_pr.provider_for_url('http://none-test1')
 60 |         self.assertTrue(provider is None)
 61 | 
 62 |     def test_provider(self):
 63 |         resp = test_pr.request('http://link-test1')
 64 |         self.assertEqual(resp, {'title': 'test1', 'type': 'link', 'url': 'http://link-test1'})
 65 | 
 66 |         resp = test_pr.request('http://photo-test2')
 67 |         self.assertEqual(resp, {'title': 'ptest2', 'type': 'photo', 'url': 'test2.jpg'})
 68 | 
 69 |         resp = test_pr.request('http://video-test1')
 70 |         self.assertEqual(resp, {'title': 'vtest1', 'type': 'video', 'html': '<test1>video</test1>', 'url': 'http://video-test1'})
 71 | 
 72 |         resp = test_pr.request('http://link-test1', width=100)
 73 |         self.assertEqual(resp, {'title': 'test1', 'type': 'link', 'url': 'http://link-test1', 'width': 99})
 74 | 
 75 |         self.assertRaises(ProviderException, test_pr.request, 'http://not-here')
 76 |         self.assertRaises(ProviderException, test_pr.request, 'http://link-test3')
 77 | 
 78 |     def test_caching(self):
 79 |         resp = test_pr_cache.request('http://link-test1')
 80 |         self.assertCached('http://link-test1', resp)
 81 | 
 82 |         # check that its the same as what we tested in the previous case
 83 |         resp2 = test_pr.request('http://link-test1')
 84 |         self.assertEqual(resp, resp2)
 85 | 
 86 |         resp = test_pr_cache.request('http://photo-test2')
 87 |         self.assertCached('http://photo-test2', resp)
 88 | 
 89 |         resp = test_pr_cache.request('http://video-test1')
 90 |         self.assertCached('http://video-test1', resp)
 91 | 
 92 |         self.assertEqual(len(test_cache._cache), 3)
 93 | 
 94 |     def test_caching_params(self):
 95 |         resp = test_pr_cache.request('http://link-test1')
 96 |         self.assertCached('http://link-test1', resp)
 97 | 
 98 |         resp_p = test_pr_cache.request('http://link-test1', width=100)
 99 |         self.assertCached('http://link-test1', resp_p, width=100)
100 | 
101 |         self.assertFalse(resp == resp_p)
102 | 
103 |     def test_invalid_json(self):
104 |         pr = ProviderRegistry()
105 |         class BadProvider(Provider):
106 |             def fetch(self, url):
107 |                 return 'bad'
108 |         pr.register('http://bad', BadProvider('link'))
109 |         self.assertRaises(InvalidResponseException, pr.request, 'http://bad')
110 | 
111 | 
112 | class ParserTestCase(BaseTestCase):
113 |     def test_parse_text_full(self):
114 |         for url, expected in self.full_pairs.items():
115 |             parsed = test_pr.parse_text_full(url)
116 |             self.assertHTMLEqual(parsed, expected)
117 | 
118 |         # the parse_text_full will replace even inline content
119 |         for url, expected in self.full_pairs.items():
120 |             parsed = test_pr.parse_text_full('this is inline: %s' % url)
121 |             self.assertHTMLEqual(parsed, 'this is inline: %s' % expected)
122 | 
123 |         for url, expected in self.full_pairs.items():
124 |             parsed = test_pr.parse_html('<p>%s</p>' % url)
125 |             self.assertHTMLEqual(parsed, '<p>%s</p>' % expected)
126 | 
127 |     def test_parse_text(self):
128 |         for url, expected in self.inline_pairs.items():
129 |             parsed = test_pr.parse_text('this is inline: %s' % url)
130 |             self.assertHTMLEqual(parsed, 'this is inline: %s' % expected)
131 | 
132 |         # We can disable parsing inline links by specifying block_handler=None.
133 |         for url, expected in self.inline_pairs.items():
134 |             parsed = test_pr.parse_text('this is inline: %s' % url, block_handler=None)
135 |             self.assertEqual(parsed, 'this is inline: %s' % url)
136 | 
137 |         # if the link comes on its own line it gets included in full
138 |         for url, expected in self.full_pairs.items():
139 |             parsed = test_pr.parse_text(url)
140 |             self.assertHTMLEqual(parsed, expected)
141 | 
142 |             # Specifying block_handler=None only applies to inline links, so
143 |             # the behavior is the same for standalone links.
144 |             parsed = test_pr.parse_text(url, block_handler=None)
145 |             self.assertHTMLEqual(parsed, expected)
146 | 
147 |         # links inside block tags will render as inline
148 |         frame = '<p>Testing %s</p>'
149 |         for url, expected in self.inline_pairs.items():
150 |             parsed = test_pr.parse_html(frame % (url))
151 |             self.assertHTMLEqual(parsed, frame % (expected))
152 | 
153 |         # links inside <a> tags won't change at all
154 |         frame = '<p><a href="%s">%s</a></p>'
155 |         for url, expected in self.inline_pairs.items():
156 |             parsed = test_pr.parse_html(frame % (url, url))
157 |             self.assertHTMLEqual(parsed, frame % (url, url))
158 | 
159 |         # links within tags within a tags are fine too
160 |         frame = '<p><a href="%s"><span>%s</span></a></p>'
161 |         for url, expected in self.inline_pairs.items():
162 |             parsed = test_pr.parse_html(frame % (url, url))
163 |             self.assertHTMLEqual(parsed, frame % (url, url))
164 | 
165 |     def test_multiline(self):
166 |         for url, expected in self.full_pairs.items():
167 |             expected_inline = self.inline_pairs[url]
168 |             frame = 'this is inline: %s\n%s\nand yet another %s'
169 | 
170 |             test_str = frame % (url, url, url)
171 | 
172 |             parsed = test_pr.parse_text(test_str)
173 |             self.assertHTMLEqual(parsed, frame % (expected_inline, expected, expected_inline))
174 | 
175 |         # On multi-line text, if we specify block_handler=None, only standalone
176 |         # links will be handled.
177 |         for url, expected in self.full_pairs.items():
178 |             frame = 'this is inline: %s\n%s\nand yet another %s'
179 |             test_str = frame % (url, url, url)
180 | 
181 |             parsed = test_pr.parse_text(test_str, block_handler=None)
182 |             self.assertHTMLEqual(parsed, frame % (url, expected, url))
183 | 
184 |         for url, expected in self.full_pairs.items():
185 |             expected_inline = self.inline_pairs[url]
186 |             frame = '%s\nthis is inline: %s\n%s'
187 | 
188 |             test_str = frame % (url, url, url)
189 | 
190 |             parsed = test_pr.parse_text(test_str)
191 |             self.assertHTMLEqual(parsed, frame % (expected, expected_inline, expected))
192 | 
193 |         # test mixing multiline with p tags
194 |         for url, expected in self.full_pairs.items():
195 |             expected_inline = self.inline_pairs[url]
196 |             frame = '<p>%s</p>\n<p>this is inline: %s</p>\n<p>\n%s\n</p><p>last test\n%s\n</p>'
197 | 
198 |             test_str = frame % (url, url, url, url)
199 | 
200 |             parsed = test_pr.parse_html(test_str)
201 |             self.assertHTMLEqual(parsed, frame % (expected, expected_inline, expected, expected_inline))
202 | 
203 |         for url, expected in self.full_pairs.items():
204 |             expected_inline = self.inline_pairs[url]
205 |             frame = '<p><a href="#foo">%s</a></p>\n<p>this is inline: %s</p>\n<p>last test\n%s\n</p>'
206 | 
207 |             test_str = frame % (url, url, url)
208 | 
209 |             parsed = test_pr.parse_html(test_str)
210 |             self.assertHTMLEqual(parsed, frame % (url, expected_inline, expected_inline))
211 | 
212 |     def test_multiline_full(self):
213 |         for url, expected in self.full_pairs.items():
214 |             frame = 'this is inline: %s\n%s\nand yet another %s'
215 | 
216 |             test_str = frame % (url, url, url)
217 | 
218 |             parsed = test_pr.parse_text_full(test_str)
219 |             self.assertHTMLEqual(parsed, frame % (expected, expected, expected))
220 | 
221 |     def test_urlize(self):
222 |         blank = 'http://fapp.io/foo/'
223 |         blank_e = '<a href="http://fapp.io/foo/">http://fapp.io/foo/</a>'
224 |         for url, expected in self.full_pairs.items():
225 |             expected_inline = self.inline_pairs[url]
226 |             frame = 'test %s\n%s\n%s\nand finally %s'
227 | 
228 |             test_str = frame % (url, blank, url, blank)
229 | 
230 |             parsed = test_pr.parse_text(test_str)
231 |             self.assertHTMLEqual(parsed, frame % (expected_inline, blank_e, expected, blank_e))
232 | 
233 |             parsed = test_pr.parse_text(test_str, urlize_all=False)
234 |             self.assertHTMLEqual(parsed, frame % (expected_inline, blank, expected, blank))
235 | 
236 |             parsed = test_pr.parse_text_full(test_str)
237 |             self.assertHTMLEqual(parsed, frame % (expected, blank_e, expected, blank_e))
238 | 
239 |             parsed = test_pr.parse_text_full(test_str, urlize_all=False)
240 |             self.assertHTMLEqual(parsed, frame % (expected, blank, expected, blank))
241 | 
242 |             parsed = test_pr.parse_html(test_str)
243 |             self.assertHTMLEqual(parsed, frame % (expected_inline, blank_e, expected_inline, blank_e))
244 | 
245 |             parsed = test_pr.parse_html(test_str, urlize_all=False)
246 |             self.assertHTMLEqual(parsed, frame % (expected_inline, blank, expected_inline, blank))
247 | 
248 |             frame = '<p>test %s</p>\n<a href="foo">%s</a>\n<a href="foo2">%s</a>\n<p>and finally %s</p>'
249 | 
250 |             test_str = frame % (url, blank, url, blank)
251 | 
252 |             parsed = test_pr.parse_html(test_str)
253 |             self.assertHTMLEqual(parsed, frame % (expected_inline, blank, url, blank_e))
254 | 
255 |             parsed = test_pr.parse_html(test_str, urlize_all=False)
256 |             self.assertHTMLEqual(parsed, frame % (expected_inline, blank, url, blank))
257 | 
258 |     def test_urlize_params(self):
259 |         text = 'test http://foo.com/'
260 |         urlize_params = {'target': '_blank', 'rel': 'nofollow'}
261 |         exp = ('test <a href="http://foo.com/" rel="nofollow" target="_blank">'
262 |                'http://foo.com/</a>')
263 | 
264 |         result = test_pr.parse_text(text, urlize_params=urlize_params)
265 |         self.assertEqual(result, exp)
266 | 
267 |         result = test_pr.parse_text_full(text, urlize_params=urlize_params)
268 |         self.assertEqual(result, exp)
269 | 
270 |         result = test_pr.parse_html(text, urlize_params=urlize_params)
271 |         self.assertEqual(result, exp)
272 | 
273 |     def test_extract(self):
274 |         blank = 'http://fapp.io/foo/'
275 |         frame = 'test %s\n%s\n%s\n%s at last'
276 |         frame_html = '<p>test %s</p><p><a href="foo">%s</a> %s</p><p>%s</p>'
277 | 
278 |         for url, expected in self.data_pairs.items():
279 |             text = frame % (url, blank, url, blank)
280 |             all_urls, extracted = test_pr.extract(text)
281 |             self.assertEqual(all_urls, [url, blank])
282 | 
283 |             if 'url' not in expected:
284 |                 expected['url'] = url
285 |             if 'title' not in expected:
286 |                 expected['title'] = expected['url']
287 |             self.assertEqual(extracted, {url: expected})
288 | 
289 |             html = frame_html % (url, url, blank, blank)
290 |             all_urls, extracted = test_pr.extract_html(html)
291 |             self.assertEqual(all_urls, [url, blank])
292 | 
293 |             if 'url' not in expected:
294 |                 expected['url'] = url
295 |             self.assertEqual(extracted, {url: expected})
296 | 
297 |     def test_outside_of_markup(self):
298 |         frame = '%s<p>testing</p>'
299 |         for url, expected in self.full_pairs.items():
300 |             parsed = test_pr.parse_html(frame % (url))
301 |             self.assertHTMLEqual(parsed, frame % (expected))
302 | 
303 |     def test_html_entities(self):
304 |         frame_html = '<p>test %s</p><p><a href="foo">%s</a></p>'
305 | 
306 |         for url, expected in self.data_pairs.items():
307 |             esc_url = url.replace('&', '&amp;')
308 |             html = frame_html % (esc_url, esc_url)
309 |             all_urls, extracted = test_pr.extract_html(html)
310 |             self.assertEqual(all_urls, [url])
311 | 
312 |             if 'url' not in expected:
313 |                 expected['url'] = url
314 |             if 'title' not in expected:
315 |                 expected['title'] = expected['url']
316 |             self.assertEqual(extracted, {url: expected})
317 | 
318 |             rendered = test_pr.parse_html('<p>%s</p>' % esc_url)
319 |             self.assertHTMLEqual(rendered, '<p>%s</p>' % self.full_pairs[url])
320 | 
321 | 
322 | class TestHTMLEntities(BaseTestCase):
323 |     def test_parse_html_entities(self):
324 |         e = '&lt;script&gt;&lt;/script&gt;'
325 |         p = '<p>Test %s</p>' % e
326 |         self.assertEqual(test_pr.parse_html(p), p)
327 | 
328 |         a = '<p>http://google.com %s</p>' % e
329 |         self.assertEqual(test_pr.parse_html(a),
330 |                          '<p><a href="http://google.com">http://google.com</a>'
331 |                          ' %s</p>' % e)
332 | 
333 |         h = ('<p><a href="http://foo.com">http://foo.com</a> http://bar.com '
334 |              '<span>http://baz.com &lt;script&gt; '
335 |              '<b>http://nug.com <i>X &lt;foo&gt;</i></b></span></p>')
336 |         self.assertEqual(test_pr.parse_html(h), (
337 |             '<p><a href="http://foo.com">http://foo.com</a> '
338 |             '<a href="http://bar.com">http://bar.com</a> '
339 |             '<span><a href="http://baz.com">http://baz.com</a> &lt;script&gt; '
340 |             '<b><a href="http://nug.com">http://nug.com</a> '
341 |             '<i>X &lt;foo&gt;</i></b></span></p>'))
342 | 
343 |         h = ('<p><a href="http://foo.com">http://foo.com</a> http://bar.com '
344 |              '&lt;script&gt; http://baz.com &lt;/script&gt;\n'
345 |              'http://baze.com\n&lt;foo&gt;</p>')
346 |         self.assertEqual(test_pr.parse_html(h), (
347 |             '<p><a href="http://foo.com">http://foo.com</a> '
348 |             '<a href="http://bar.com">http://bar.com</a> &lt;script&gt; '
349 |             '<a href="http://baz.com">http://baz.com</a> &lt;/script&gt;\n'
350 |             '<a href="http://baze.com">http://baze.com</a>\n'
351 |             '&lt;foo&gt;</p>'))
352 | 
353 | 
354 | if __name__ == '__main__':
355 |     unittest.main(argv=sys.argv)
356 | 


--------------------------------------------------------------------------------
/runtests.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import sys
 4 | import unittest
 5 | 
 6 | from micawber import tests
 7 | 
 8 | 
 9 | def run_django_tests():
10 |     try:
11 |         import django
12 |     except ImportError:
13 |         print('Skipping django tests')
14 |         return
15 |     else:
16 |         print('Running django integration tests')
17 | 
18 |     providers = 'micawber.contrib.mcdjango.mcdjango_tests.tests.test_pr'
19 |     extensions = (
20 |         ('oembed_no_urlize', {'urlize_all': False}),
21 |     )
22 | 
23 |     from django.conf import settings
24 |     if not settings.configured:
25 |         settings.configure(
26 |             DATABASES={
27 |                 'default': {
28 |                     'ENGINE': 'django.db.backends.sqlite3',
29 |                     },
30 |                 },
31 |             SITE_ID=1,
32 |             INSTALLED_APPS=[
33 |                 'django.contrib.auth',
34 |                 'django.contrib.contenttypes',
35 |                 'django.contrib.sessions',
36 |                 'django.contrib.sites',
37 |                 'micawber.contrib.mcdjango',
38 |                 'micawber.contrib.mcdjango.mcdjango_tests',
39 |             ],
40 |             TEMPLATES=[
41 |                 {
42 |                     'BACKEND': 'django.template.backends.django.DjangoTemplates',
43 |                     'DIRS': [],
44 |                     'APP_DIRS': True,
45 |                     'OPTIONS': {}
46 |                 },
47 |             ],
48 |             MICAWBER_PROVIDERS=providers,
49 |             MICAWBER_TEMPLATE_EXTENSIONS=extensions,
50 |         )
51 |     else:
52 |         settings.MICAWBER_PROVIDERS = providers
53 |         settings.MICAWBER_TEMPLATE_EXTENSIONS = extensions
54 | 
55 |     try:
56 |         from django import setup
57 |     except ImportError:
58 |         pass
59 |     else:
60 |         setup()
61 | 
62 |     from django.test.runner import DiscoverRunner
63 |     parent = os.path.dirname(os.path.abspath(__file__))
64 |     sys.path.insert(0, parent)
65 |     return DiscoverRunner().run_tests(['micawber/contrib/mcdjango'])
66 | 
67 | 
68 | def runtests(*test_args):
69 |     print("Running micawber tests")
70 |     errors = failures = False
71 |     suite = unittest.TestLoader().loadTestsFromModule(tests)
72 |     result = unittest.TextTestRunner(verbosity=2).run(suite)
73 |     if result.failures:
74 |         failures = True
75 |     if result.errors:
76 |         errors = True
77 |     if not (errors or failures):
78 |         print("All micawber tests passed")
79 | 
80 |     dj_failures = run_django_tests()
81 | 
82 |     if failures or errors or dj_failures:
83 |         sys.exit(1)
84 | 
85 |     sys.exit(0)
86 | 
87 | if __name__ == '__main__':
88 |     runtests(*sys.argv[1:])
89 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup, find_packages
 3 | 
 4 | f = open(os.path.join(os.path.dirname(__file__), 'README.rst'))
 5 | readme = f.read()
 6 | f.close()
 7 | 
 8 | setup(
 9 |     name='micawber',
10 |     version='0.5.6',
11 |     description='a small library for extracting rich content from urls',
12 |     long_description=readme,
13 |     author='Charles Leifer',
14 |     author_email='coleifer@gmail.com',
15 |     url='http://github.com/coleifer/micawber/',
16 |     packages=[p for p in find_packages() if not p.startswith('examples')],
17 |     package_data = {
18 |         'micawber': [
19 |             'contrib/mcdjango/templates/micawber/*.html',
20 |         ],
21 |     },
22 |     classifiers=[
23 |         'Development Status :: 4 - Beta',
24 |         'Environment :: Web Environment',
25 |         'Intended Audience :: Developers',
26 |         'License :: OSI Approved :: MIT License',
27 |         'Operating System :: OS Independent',
28 |         'Programming Language :: Python',
29 |         'Programming Language :: Python :: 2.6',
30 |         'Programming Language :: Python :: 2.7',
31 |         'Programming Language :: Python :: 3.2',
32 |         'Programming Language :: Python :: 3.3',
33 |         'Programming Language :: Python :: 3.4',
34 |         'Programming Language :: Python :: 3.5',
35 |         'Programming Language :: Python :: 3.6',
36 |         'Programming Language :: Python :: 3.7',
37 |         'Programming Language :: Python :: 3.8',
38 |         'Programming Language :: Python :: 3.9',
39 |         'Programming Language :: Python :: 3.10',
40 |         'Programming Language :: Python :: 3.11',
41 |         'Framework :: Django',
42 |     ],
43 |     test_suite='runtests.runtests',
44 | )
45 | 


--------------------------------------------------------------------------------