├── .github └── workflows │ └── tests.yaml ├── .gitignore ├── .readthedocs.yaml ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docs ├── Makefile ├── api.rst ├── conf.py ├── django.rst ├── examples.rst ├── flask.rst ├── getting_started.rst ├── index.rst ├── installation.rst └── make.bat ├── examples ├── __init__.py ├── django_ex │ ├── __init__.py │ ├── manage.py │ ├── settings.py │ ├── static │ │ └── style.css │ ├── templates │ │ └── example.html │ ├── urls.py │ └── views.py ├── flask_ex │ ├── app.py │ ├── static │ │ └── style.css │ └── templates │ │ └── example.html └── python_ex │ └── example.py ├── micawber ├── __init__.py ├── cache.py ├── compat.py ├── contrib │ ├── __init__.py │ ├── mcdjango │ │ ├── __init__.py │ │ ├── mcdjango_tests │ │ │ ├── __init__.py │ │ │ ├── models.py │ │ │ └── tests.py │ │ ├── models.py │ │ ├── providers.py │ │ ├── templates │ │ │ └── micawber │ │ │ │ ├── link.html │ │ │ │ ├── photo.html │ │ │ │ ├── rich.html │ │ │ │ └── video.html │ │ └── templatetags │ │ │ ├── __init__.py │ │ │ └── micawber_tags.py │ ├── mcflask.py │ └── providers.py ├── exceptions.py ├── parsers.py ├── providers.py ├── test_utils.py └── tests.py ├── runtests.py └── setup.py /.github/workflows/tests.yaml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: [push] 3 | jobs: 4 | tests: 5 | name: ${{ matrix.python-version }} 6 | runs-on: ubuntu-latest 7 | strategy: 8 | fail-fast: false 9 | matrix: 10 | python-version: [3.8, "3.10", "3.12", "3.13"] 11 | steps: 12 | - uses: actions/checkout@v2 13 | - uses: actions/setup-python@v2 14 | with: 15 | python-version: ${{ matrix.python-version }} 16 | - name: pip deps 17 | run: pip install django bs4 18 | - name: runtests 19 | run: python runtests.py 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | build: 3 | os: ubuntu-22.04 4 | tools: 5 | python: "3.11" 6 | sphinx: 7 | configuration: docs/conf.py 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010 Charles Leifer 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include MANIFEST.in 2 | include LICENSE 3 | include README.rst 4 | include runtests.py 5 | recursive-include micawber/contrib/mcdjango/templates * 6 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | .. image:: http://media.charlesleifer.com/blog/photos/micawber-logo-0.png 2 | 3 | A small library for extracting rich content from urls. 4 | 5 | 6 | what does it do? 7 | ---------------- 8 | 9 | micawber supplies a few methods for retrieving rich metadata about a variety of 10 | links, such as links to youtube videos. micawber also provides functions for 11 | parsing blocks of text and html and replacing links to videos with rich embedded 12 | content. 13 | 14 | examples 15 | -------- 16 | 17 | here is a quick example: 18 | 19 | .. code-block:: python 20 | 21 | import micawber 22 | 23 | # load up rules for some default providers, such as youtube and flickr 24 | providers = micawber.bootstrap_basic() 25 | 26 | providers.request('http://www.youtube.com/watch?v=54XHDUOHuzU') 27 | 28 | # returns the following dictionary: 29 | { 30 | 'author_name': 'pascalbrax', 31 | 'author_url': u'http://www.youtube.com/user/pascalbrax' 32 | 'height': 344, 33 | 'html': u'', 34 | 'provider_name': 'YouTube', 35 | 'provider_url': 'http://www.youtube.com/', 36 | 'title': 'Future Crew - Second Reality demo - HD', 37 | 'type': u'video', 38 | 'thumbnail_height': 360, 39 | 'thumbnail_url': u'http://i2.ytimg.com/vi/54XHDUOHuzU/hqdefault.jpg', 40 | 'thumbnail_width': 480, 41 | 'url': 'http://www.youtube.com/watch?v=54XHDUOHuzU', 42 | 'width': 459, 43 | 'version': '1.0', 44 | } 45 | 46 | providers.parse_text('this is a test:\nhttp://www.youtube.com/watch?v=54XHDUOHuzU') 47 | 48 | # returns the following string: 49 | this is a test: 50 | 51 | 52 | providers.parse_html('

http://www.youtube.com/watch?v=54XHDUOHuzU

') 53 | 54 | # returns the following html: 55 |

56 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/micawber.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/micawber.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/micawber" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/micawber" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | .. _api: 2 | 3 | API Documentation 4 | ================= 5 | 6 | Providers 7 | --------- 8 | 9 | .. py:module:: micawber.providers 10 | 11 | .. py:class:: Provider(endpoint, **kwargs) 12 | 13 | The :py:class:`Provider` object is responsible for retrieving metadata about 14 | a given URL. It implements a method called :py:meth:`~Provider.request`, which 15 | takes a URL and any parameters, which it sends off to an endpoint. The endpoint 16 | should return a JSON dictionary containing metadata about the resource, which is 17 | returned to the caller. 18 | 19 | :param endpoint: the API endpoint which should return information about requested links 20 | :param kwargs: any additional url parameters to send to the endpoint on each 21 | request, used for providing defaults. An example use-case might be for 22 | providing an API key on each request. 23 | 24 | .. py:method:: request(url, **extra_params) 25 | 26 | Retrieve information about the given url. By default, will make a HTTP 27 | GET request to the endpoint. The url will be sent to the endpoint, along 28 | with any parameters specified in the ``extra_params`` and those parameters 29 | specified when the class was instantiated. 30 | 31 | Will raise a :py:class:`ProviderException` in the event the URL is not 32 | accessible or the API times out. 33 | 34 | :param url: URL to retrieve metadata for 35 | :param extra_params: additional parameters to pass to the endpoint, for 36 | example a maxwidth or an API key. 37 | :rtype: a dictionary of JSON data 38 | 39 | 40 | .. py:class:: ProviderRegistry([cache=None]) 41 | 42 | A registry for encapsulating a group of :py:class:`Provider` instances, 43 | with optional caching support. 44 | 45 | Handles matching regular expressions to providers. URLs are sent to the 46 | registry via its :py:meth:`~ProviderRegistry.request` method, it checks to 47 | see if it has a provider that matches the URL, and if so, requests the 48 | metadata from the provider instance. 49 | 50 | Exposes methods for parsing various types of text (including HTML), and 51 | either rendering oembed media inline or extracting embeddable links. 52 | 53 | :param cache: the cache simply needs to implement two methods, ``.get(key)`` and ``.set(key, value)``. 54 | 55 | .. py:method:: register(regex, provider) 56 | 57 | Register the provider with the following regex. 58 | 59 | Example: 60 | 61 | .. code-block:: python 62 | 63 | registry = ProviderRegistry() 64 | registry.register( 65 | 'http://\S*.youtu(\.be|be\.com)/watch\S*', 66 | Provider('http://www.youtube.com/oembed'), 67 | ) 68 | 69 | :param regex: a regex for matching URLs of a given type 70 | :param provider: a :py:class:`Provider` instance 71 | 72 | .. py:method:: request(url, **extra_params) 73 | 74 | Retrieve information about the given url if it matches a regex in the 75 | instance's registry. If no provider matches the URL, a 76 | ``ProviderException`` is thrown, otherwise the URL and parameters are 77 | dispatched to the matching provider's :py:meth:`Provider.request` 78 | method. 79 | 80 | If a cache was specified, the resulting metadata will be cached. 81 | 82 | :param url: URL to retrieve metadata for 83 | :param extra_params: additional parameters to pass to the endpoint, for 84 | example a maxwidth or an API key. 85 | :rtype: a dictionary of JSON data 86 | 87 | .. py:method:: parse_text_full(text[, urlize_all=True[, handler=full_handler[, urlize_params=None[, **params]]]]) 88 | 89 | Parse a block of text, converting *all* links by passing them to the 90 | given handler. Links contained within a block of text (i.e. not on 91 | their own line) will be handled as well. 92 | 93 | Example input and output:: 94 | 95 | IN: 'this is a pic http://example.com/some-pic/' 96 | OUT: 'this is a pic ' 97 | 98 | :param str text: a string to parse 99 | :param bool urlize_all: convert unmatched urls into links 100 | :param handler: function to use to convert metadata back into a string representation 101 | :param dict urlize_params: keyword arguments to be used to construct a link 102 | when a provider is not found and urlize is enabled. 103 | :param params: any additional parameters to use when requesting metadata, i.e. 104 | a maxwidth or maxheight. 105 | 106 | .. py:method:: parse_text(text[, urlize_all=True[, handler=full_handler[, block_handler=inline_handler[, urlize_params=None[, **params]]]]]) 107 | 108 | Very similar to :py:meth:`~ProviderRegistry.parse_text_full` except 109 | URLs *on their own line* are rendered using the given ``handler``, 110 | whereas URLs within blocks of text are passed to the ``block_handler``. 111 | The default behavior renders full content for URLs on their own line 112 | (e.g. a video player), whereas URLs within text are rendered simply as 113 | links so as not to disrupt the flow of text. 114 | 115 | * URLs on their own line are converted into full representations 116 | * URLs within blocks of text are converted into clickable links 117 | 118 | :param str text: a string to parse 119 | :param bool urlize_all: convert unmatched urls into links 120 | :param handler: function to use to convert links found on their own line 121 | :param block_handler: function to use to convert links found within blocks of text 122 | :param dict urlize_params: keyword arguments to be used to construct a link 123 | when a provider is not found and urlize is enabled. 124 | :param params: any additional parameters to use when requesting metadata, i.e. 125 | a maxwidth or maxheight. 126 | 127 | .. py:method:: parse_html(html[, urlize_all=True[, handler=full_handler[, block_handler=inline_handler[, urlize_params=None[, **params]]]]]) 128 | 129 | Parse HTML intelligently, rendering items on their own within block 130 | elements as full content (e.g. a video player), whereas URLs within 131 | text are passed to the ``block_handler`` which by default will render a 132 | simple link. URLs that are already enclosed within a ```` tag are 133 | **skipped over**. 134 | 135 | * URLs that are already within tags are passed over 136 | * URLs on their own in block tags are converted into full representations 137 | * URLs interspersed with text are converted into clickable links 138 | 139 | .. note:: requires BeautifulSoup or beautifulsoup4 140 | 141 | :param str html: a string of HTML to parse 142 | :param bool urlize_all: convert unmatched urls into links 143 | :param handler: function to use to convert links found on their own within a block element 144 | :param block_handler: function to use to convert links found within blocks of text 145 | :param dict urlize_params: keyword arguments to be used to construct a link 146 | when a provider is not found and urlize is enabled. 147 | :param params: any additional parameters to use when requesting metadata, i.e. 148 | a maxwidth or maxheight. 149 | 150 | .. py:method:: extract(text, **params) 151 | 152 | Extract all URLs from a block of text, and additionally get any 153 | metadata for URLs we have providers for. 154 | 155 | :param str text: a string to parse 156 | :param params: any additional parameters to use when requesting 157 | metadata, i.e. a maxwidth or maxheight. 158 | :rtype: returns a 2-tuple containing a list of all URLs and a dict 159 | keyed by URL containing any metadata. If a provider was not found 160 | for a URL it is not listed in the dictionary. 161 | 162 | .. py:method:: extract_html(html, **params) 163 | 164 | Extract all URLs from an HTML string, and additionally get any metadata 165 | for URLs we have providers for. :py:meth:`~ProviderRegistry.extract` 166 | but for HTML. 167 | 168 | .. note:: URLs within tags will not be included. 169 | 170 | :param str html: a string to parse 171 | :param params: any additional parameters to use when requesting 172 | metadata, i.e. a maxwidth or maxheight. 173 | :rtype: returns a 2-tuple containing a list of all URLs and a dict 174 | keyed by URL containing any metadata. If a provider was not found 175 | for a URL it is not listed in the dictionary. 176 | 177 | 178 | .. py:function:: bootstrap_basic([cache=None[, registry=None]]) 179 | 180 | Create a :py:class:`ProviderRegistry` and register some basic providers, 181 | including youtube, flickr, vimeo. 182 | 183 | :param cache: an object that implements simple ``get`` and ``set`` 184 | :param registry: a ``ProviderRegistry`` instance, which will be updated with the list of supported providers. If not specified, an empty ``ProviderRegistry`` will be used. 185 | :rtype: a ``ProviderRegistry`` with a handful of providers registered 186 | 187 | 188 | .. py:function:: bootstrap_oembed([cache=None[, registry=None[, refresh=False[, **kwargs]]]) 189 | 190 | Create a :py:class:`ProviderRegistry` and register as many providers as 191 | are described in the `oembed.com `_ providers list. 192 | 193 | .. note:: 194 | This function makes a request over the internet whenever it is called. 195 | 196 | :param cache: an object that implements simple ``get`` and ``set`` 197 | :param registry: a ``ProviderRegistry`` instance, which will be updated with the list of supported providers. If not specified, an empty ``ProviderRegistry`` will be used. 198 | :param bool refresh: force refreshing the provider data rather than attempting to load it from cache (if cache is used). 199 | :param kwargs: any default keyword arguments to use with providers 200 | :rtype: a ProviderRegistry with support for noembed 201 | 202 | 203 | .. py:function:: bootstrap_embedly([cache=None[, registry=None[, refresh=False[, **kwargs]]]) 204 | 205 | Create a :py:class:`ProviderRegistry` and register as many providers as 206 | are supported by `embed.ly `_. Valid services are 207 | fetched from http://api.embed.ly/1/services/python and parsed then registered. 208 | 209 | .. note:: 210 | This function makes a request over the internet whenever it is called. 211 | 212 | :param cache: an object that implements simple ``get`` and ``set`` 213 | :param registry: a ``ProviderRegistry`` instance, which will be updated with the list of supported providers. If not specified, an empty ``ProviderRegistry`` will be used. 214 | :param bool refresh: force refreshing the provider data rather than attempting to load it from cache (if cache is used). 215 | :param kwargs: any default keyword arguments to use with providers, useful for 216 | specifying your API key 217 | :rtype: a ProviderRegistry with support for embed.ly 218 | 219 | .. code-block:: python 220 | 221 | # if you have an API key, you can specify that here 222 | pr = bootstrap_embedly(key='my-embedly-key') 223 | pr.request('http://www.youtube.com/watch?v=54XHDUOHuzU') 224 | 225 | 226 | .. py:function:: bootstrap_noembed([cache=None[, registry=None[, refresh=False[, **kwargs]]]) 227 | 228 | Create a :py:class:`ProviderRegistry` and register as many providers as 229 | are supported by `noembed.com `_. Valid services are 230 | fetched from http://noembed.com/providers and parsed then registered. 231 | 232 | .. note:: 233 | This function makes a request over the internet whenever it is called. 234 | 235 | :param cache: an object that implements simple ``get`` and ``set`` 236 | :param registry: a ``ProviderRegistry`` instance, which will be updated with the list of supported providers. If not specified, an empty ``ProviderRegistry`` will be used. 237 | :param bool refresh: force refreshing the provider data rather than attempting to load it from cache (if cache is used). 238 | :param kwargs: any default keyword arguments to use with providers, useful for 239 | passing the ``nowrap`` option to noembed. 240 | :rtype: a ProviderRegistry with support for noembed 241 | 242 | .. code-block:: python 243 | 244 | # if you have an API key, you can specify that here 245 | pr = bootstrap_noembed(nowrap=1) 246 | pr.request('http://www.youtube.com/watch?v=54XHDUOHuzU') 247 | 248 | 249 | Cache 250 | ----- 251 | 252 | .. py:module:: micawber.cache 253 | 254 | .. py:class:: Cache() 255 | 256 | A reference implementation for the cache interface used by the :py:class:`ProviderRegistry`. 257 | 258 | .. code-block:: python 259 | 260 | from micawber import Cache, bootstrap_oembed 261 | cache = Cache() # Simple in-memory cache. 262 | 263 | # Now our oembed provider will cache the responses for each URL we 264 | # request, which can provide a significant speedup. 265 | pr = bootstrap_oembed(cache=cache) 266 | 267 | .. py:method:: get(key) 268 | 269 | Retrieve the key from the cache or ``None`` if not present 270 | 271 | .. py:method:: set(key, value) 272 | 273 | Set the cache key ``key`` to the given ``value``. 274 | 275 | .. py:class:: PickleCache([filename='cache.db']) 276 | 277 | A cache that uses pickle to store data. 278 | 279 | .. note:: 280 | To use this cache class be sure to call :py:meth:`~PickleCache.load` when 281 | initializing your cache and :py:meth:`~PickleCache.save` before your app 282 | terminates to persist cached data. 283 | 284 | .. py:method:: load() 285 | 286 | Load the pickled data into memory 287 | 288 | .. py:method:: save() 289 | 290 | Store the internal cache to an external file 291 | 292 | .. py:class:: RedisCache([namespace='micawber'[, timeout=None[, **conn]]]) 293 | 294 | A cache that uses Redis to store data 295 | 296 | .. note:: requires the redis-py library, ``pip install redis`` 297 | 298 | :param namespace: prefix for cache keys 299 | :param int timeout: expiration timeout in seconds (optional) 300 | :param conn: keyword arguments to pass when initializing redis connection 301 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # micawber documentation build configuration file, created by 4 | # sphinx-quickstart on Tue Apr 17 13:43:41 2012. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | #sys.path.insert(0, os.path.abspath('.')) 20 | 21 | # -- General configuration ----------------------------------------------------- 22 | 23 | # If your documentation needs a minimal Sphinx version, state it here. 24 | #needs_sphinx = '1.0' 25 | 26 | # Add any Sphinx extension module names here, as strings. They can be extensions 27 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 28 | extensions = [] 29 | 30 | # Add any paths that contain templates here, relative to this directory. 31 | templates_path = ['_templates'] 32 | 33 | # The suffix of source filenames. 34 | source_suffix = '.rst' 35 | 36 | # The encoding of source files. 37 | #source_encoding = 'utf-8-sig' 38 | 39 | # The master toctree document. 40 | master_doc = 'index' 41 | 42 | # General information about the project. 43 | project = u'micawber' 44 | copyright = u'2013, charles leifer' 45 | 46 | # The version info for the project you're documenting, acts as replacement for 47 | # |version| and |release|, also used in various other places throughout the 48 | # built documents. 49 | # 50 | # The short X.Y version. 51 | version = '0.3.4' 52 | # The full version, including alpha/beta/rc tags. 53 | release = '0.3.4' 54 | 55 | # The language for content autogenerated by Sphinx. Refer to documentation 56 | # for a list of supported languages. 57 | #language = None 58 | 59 | # There are two options for replacing |today|: either, you set today to some 60 | # non-false value, then it is used: 61 | #today = '' 62 | # Else, today_fmt is used as the format for a strftime call. 63 | #today_fmt = '%B %d, %Y' 64 | 65 | # List of patterns, relative to source directory, that match files and 66 | # directories to ignore when looking for source files. 67 | exclude_patterns = ['_build'] 68 | 69 | # The reST default role (used for this markup: `text`) to use for all documents. 70 | #default_role = None 71 | 72 | # If true, '()' will be appended to :func: etc. cross-reference text. 73 | #add_function_parentheses = True 74 | 75 | # If true, the current module name will be prepended to all description 76 | # unit titles (such as .. function::). 77 | #add_module_names = True 78 | 79 | # If true, sectionauthor and moduleauthor directives will be shown in the 80 | # output. They are ignored by default. 81 | #show_authors = False 82 | 83 | # The name of the Pygments (syntax highlighting) style to use. 84 | pygments_style = 'sphinx' 85 | 86 | # A list of ignored prefixes for module index sorting. 87 | #modindex_common_prefix = [] 88 | 89 | 90 | # -- Options for HTML output --------------------------------------------------- 91 | 92 | # The theme to use for HTML and HTML Help pages. See the documentation for 93 | # a list of builtin themes. 94 | html_theme = 'default' 95 | 96 | # Theme options are theme-specific and customize the look and feel of a theme 97 | # further. For a list of options available for each theme, see the 98 | # documentation. 99 | #html_theme_options = {} 100 | 101 | # Add any paths that contain custom themes here, relative to this directory. 102 | #html_theme_path = [] 103 | 104 | # The name for this set of Sphinx documents. If None, it defaults to 105 | # " v documentation". 106 | #html_title = None 107 | 108 | # A shorter title for the navigation bar. Default is the same as html_title. 109 | #html_short_title = None 110 | 111 | # The name of an image file (relative to this directory) to place at the top 112 | # of the sidebar. 113 | #html_logo = None 114 | 115 | # The name of an image file (within the static path) to use as favicon of the 116 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 117 | # pixels large. 118 | #html_favicon = None 119 | 120 | # Add any paths that contain custom static files (such as style sheets) here, 121 | # relative to this directory. They are copied after the builtin static files, 122 | # so a file named "default.css" will overwrite the builtin "default.css". 123 | html_static_path = ['_static'] 124 | 125 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 126 | # using the given strftime format. 127 | #html_last_updated_fmt = '%b %d, %Y' 128 | 129 | # If true, SmartyPants will be used to convert quotes and dashes to 130 | # typographically correct entities. 131 | #html_use_smartypants = True 132 | 133 | # Custom sidebar templates, maps document names to template names. 134 | #html_sidebars = {} 135 | 136 | # Additional templates that should be rendered to pages, maps page names to 137 | # template names. 138 | #html_additional_pages = {} 139 | 140 | # If false, no module index is generated. 141 | #html_domain_indices = True 142 | 143 | # If false, no index is generated. 144 | #html_use_index = True 145 | 146 | # If true, the index is split into individual pages for each letter. 147 | #html_split_index = False 148 | 149 | # If true, links to the reST sources are added to the pages. 150 | #html_show_sourcelink = True 151 | 152 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 153 | #html_show_sphinx = True 154 | 155 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 156 | #html_show_copyright = True 157 | 158 | # If true, an OpenSearch description file will be output, and all pages will 159 | # contain a tag referring to it. The value of this option must be the 160 | # base URL from which the finished HTML is served. 161 | #html_use_opensearch = '' 162 | 163 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 164 | #html_file_suffix = None 165 | 166 | # Output file base name for HTML help builder. 167 | htmlhelp_basename = 'micawberdoc' 168 | 169 | 170 | # -- Options for LaTeX output -------------------------------------------------- 171 | 172 | latex_elements = { 173 | # The paper size ('letterpaper' or 'a4paper'). 174 | #'papersize': 'letterpaper', 175 | 176 | # The font size ('10pt', '11pt' or '12pt'). 177 | #'pointsize': '10pt', 178 | 179 | # Additional stuff for the LaTeX preamble. 180 | #'preamble': '', 181 | } 182 | 183 | # Grouping the document tree into LaTeX files. List of tuples 184 | # (source start file, target name, title, author, documentclass [howto/manual]). 185 | latex_documents = [ 186 | ('index', 'micawber.tex', u'micawber Documentation', 187 | u'charles leifer', 'manual'), 188 | ] 189 | 190 | # The name of an image file (relative to this directory) to place at the top of 191 | # the title page. 192 | #latex_logo = None 193 | 194 | # For "manual" documents, if this is true, then toplevel headings are parts, 195 | # not chapters. 196 | #latex_use_parts = False 197 | 198 | # If true, show page references after internal links. 199 | #latex_show_pagerefs = False 200 | 201 | # If true, show URL addresses after external links. 202 | #latex_show_urls = False 203 | 204 | # Documents to append as an appendix to all manuals. 205 | #latex_appendices = [] 206 | 207 | # If false, no module index is generated. 208 | #latex_domain_indices = True 209 | 210 | 211 | # -- Options for manual page output -------------------------------------------- 212 | 213 | # One entry per manual page. List of tuples 214 | # (source start file, name, description, authors, manual section). 215 | man_pages = [ 216 | ('index', 'micawber', u'micawber Documentation', 217 | [u'charles leifer'], 1) 218 | ] 219 | 220 | # If true, show URL addresses after external links. 221 | #man_show_urls = False 222 | 223 | 224 | # -- Options for Texinfo output ------------------------------------------------ 225 | 226 | # Grouping the document tree into Texinfo files. List of tuples 227 | # (source start file, target name, title, author, 228 | # dir menu entry, description, category) 229 | texinfo_documents = [ 230 | ('index', 'micawber', u'micawber Documentation', 231 | u'charles leifer', 'micawber', 'One line description of project.', 232 | 'Miscellaneous'), 233 | ] 234 | 235 | # Documents to append as an appendix to all manuals. 236 | #texinfo_appendices = [] 237 | 238 | # If false, no module index is generated. 239 | #texinfo_domain_indices = True 240 | 241 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 242 | #texinfo_show_urls = 'footnote' 243 | -------------------------------------------------------------------------------- /docs/django.rst: -------------------------------------------------------------------------------- 1 | .. _django: 2 | 3 | Django integration 4 | ================== 5 | 6 | First be sure you have added ``micawber.contrib.mcdjango`` to ``INSTALLED_APPS`` 7 | so that we can use the template filters it defines. 8 | 9 | .. code-block:: python 10 | 11 | # settings.py 12 | 13 | INSTALLED_APPS = [ 14 | # ... 15 | 'micawber.contrib.mcdjango', 16 | ] 17 | 18 | micawber provides 4 template filters for converting URLs contained within 19 | text or HTML to rich content: 20 | 21 | * :py:func:`~micawber.contrib.mcdjango.oembed` for plain text 22 | * :py:func:`~micawber.contrib.mcdjango.oembed_html` for html 23 | * :py:func:`~micawber.contrib.mcdjango.extract_oembed` for extracting url data from plain text 24 | * :py:func:`~micawber.contrib.mcdjango.extract_oembed_html` for extracting url data from html 25 | 26 | These filters are registered in the ``micawber_tags`` library, which can be 27 | invoked in your templates: 28 | 29 | .. code-block:: html 30 | 31 | {% load micawber_tags %} 32 | 33 |

{{ object.body|oembed:"600x600" }}

34 | 35 | Each filter accepts one argument and one optional argument, due to django's template 36 | filters being wack. 37 | 38 | Piping a string through the ``oembed`` filter (or ``oembed_html``) will convert 39 | URLs to things like youtube videos into video players. A couple things to 40 | understand about the parsers: 41 | 42 | * the plaintext parser (``oembed``) will convert URLs *on their own line* into 43 | full images/video-players/etc. URLs that are interspersed within text will 44 | simply be converted into clickable links so as not to disrupt the flow of text. 45 | * the HTML parser (``oembed_html``) will convert URLs that *are not already links* 46 | into full images/video-players/etc. URLs within block elements along with other 47 | text will be converted into clickable links as this would likely disrupt the flow 48 | of text or produce invalid HTML. 49 | 50 | .. note:: 51 | You can control how things are rendered -- check out `the default templates `_ 52 | for reference implementations. 53 | 54 | 55 | Django filter API 56 | ----------------- 57 | 58 | .. py:module:: micawber.contrib.mcdjango 59 | 60 | The following filters are exposed via the :py:mod:`micawber.contrib.mcdjango` module: 61 | 62 | .. py:function:: oembed(text[, width_height=None]) 63 | 64 | Parse the given text, rendering URLs as rich media 65 | 66 | Usage within a django template: 67 | 68 | .. code-block:: python 69 | 70 | {{ blog_entry.body|oembed:"600x600" }} 71 | 72 | :param text: the text to be parsed **do not use HTML** 73 | :param width_height: string containing maximum for width and optionally height, of 74 | format "WIDTHxHEIGHT" or "WIDTH", e.g. "500x500" or "800" 75 | :rtype: parsed text with rich content embedded 76 | 77 | .. py:function:: oembed_html(html[, width_height=None]) 78 | 79 | Exactly the same as above except for usage *with html* 80 | 81 | Usage within a django template: 82 | 83 | .. code-block:: python 84 | 85 | {{ blog_entry.body|markdown|oembed_html:"600x600" }} 86 | 87 | .. py:function:: extract_oembed(text[, width_height=None]) 88 | 89 | Parse the given text, returning a list of 2-tuples containing url and metadata 90 | about the url. 91 | 92 | Usage within a django template: 93 | 94 | .. code-block:: python 95 | 96 | {% for url, metadata in blog_entry.body|extract_oembed:"600x600" %} 97 | 98 | {% endfor %} 99 | 100 | :param text: the text to be parsed **do not use HTML** 101 | :param width_height: string containing maximum for width and optionally height, of 102 | format "WIDTHxHEIGHT" or "WIDTH", e.g. "500x500" or "800" 103 | :rtype: 2-tuples containing the URL and a dictionary of metadata 104 | 105 | .. py:function:: extract_oembed_html(html[, width_height=None]) 106 | 107 | Exactly the same as above except for usage *with html* 108 | 109 | 110 | Extending the filters 111 | --------------------- 112 | 113 | For simplicity, micawber provides a setting allowing you to create custom template 114 | filters. An example use case would be to add a template filter that could embed 115 | rich content, but did not automatically "urlize" all links. 116 | 117 | Extensions are configured in the ``settings`` module and take the form of a list of 118 | 2-tuples containing: 119 | 120 | 1. the name for the custom filter 121 | 2. a dictionary of keyword arguments to pass in to the ``parse`` function 122 | 123 | .. code-block:: python 124 | 125 | MICAWBER_TEMPLATE_EXTENSIONS = [ 126 | ('oembed_no_urlize', {'urlize_all': False}), 127 | ] 128 | 129 | Assume this is our template: 130 | 131 | .. code-block:: html 132 | 133 | {% load micawber_tags %} 134 | 135 | DEFAULT: 136 | {{ "http://foo.com/ and http://bar.com/"|oembed }} 137 | 138 | CUSTOM: 139 | {{ "http://foo.com/ and http://bar.com/"|oembed_no_urlize }} 140 | 141 | Rendering the above template will produce the following output: 142 | 143 | .. code-block:: html 144 | 145 | DEFAULT: 146 |
http://foo.com/ and http://bar.com/ 147 | 148 | CUSTOM: 149 | http://foo.com/ and http://bar.com/ 150 | 151 | Some examples of keyword arguments to override are: 152 | 153 | * providers: a :py:class:`~micawber.providers.ProviderRegistry` instance 154 | * urlize_all (default ``True``): whether to convert *all* URLs to clickable links 155 | * html (default ``False``): whether to parse as plaintext or html 156 | * handler: function used to render metadata as markup 157 | * block_handler: function used to render inline links with rich metadata 158 | * text_fn: function to use when parsing text 159 | * html_fn: function to use when parsing html 160 | 161 | The magic happens in :py:func:`micawber.contrib.mcdjango.extension` -- check 162 | out the `source code `_ for more details. 163 | 164 | .. note:: 165 | The ``MICAWBER_EXTENSIONS`` setting can also be a string path to 166 | a module and an attribute containing a similar data structure. 167 | 168 | 169 | Additional settings 170 | ------------------- 171 | 172 | Providers 173 | ^^^^^^^^^ 174 | 175 | The most important setting to configure is the module / attribute 176 | path to the providers you wish to use. The attribute can either 177 | be a ProviderRegistry instance or a callable. The default is: 178 | 179 | ``MICAWBER_PROVIDERS = 'micawber.contrib.mcdjango.providers.bootstrap_basic'`` 180 | 181 | You can use the bootstrap embedly function, but beware this may take a few 182 | seconds to load up: 183 | 184 | ``MICAWBER_PROVIDERS = 'micawber.contrib.mcdjango.providers.bootstrap_embedly'`` 185 | 186 | If you want to use the embedly endpoints and have an API key, you can specify 187 | that in the settings: 188 | 189 | ``MICAWBER_EMBEDLY_KEY = 'foo'`` 190 | 191 | You can also customize this with your own set of providers. This must be either 192 | 193 | * the module path to a :py:class:`~micawber.providers.ProviderRegistry` instance 194 | * the module path to a callable which returns a :py:class:`~micawber.providers.ProviderRegistry` instance 195 | 196 | Here is a quick example showing a custom ``ProviderRegistry``: 197 | 198 | .. code-block:: python 199 | 200 | # settings.py 201 | MICAWBER_PROVIDERS = 'my_app.micawber_providers.oembed_providers' 202 | 203 | .. code-block:: python 204 | 205 | # my_app/micawber_providers.py 206 | from django.core.cache import cache 207 | from micawber.providers import Provider, bootstrap_basic 208 | 209 | oembed_providers = boostrap_basic(cache) 210 | 211 | # add a custom provider 212 | oembed_providers.register('http://example.com/\S*', Provider('http://example.com/oembed/')) 213 | 214 | 215 | Default settings for requests 216 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 217 | 218 | Because of the limitations of django's template filters, we do not 219 | have the flexibility to pass in multiple arguments to the filters. 220 | Default arguments need to be specified in the settings: 221 | 222 | .. code-block:: python 223 | 224 | MICAWBER_DEFAULT_SETTINGS = { 225 | 'key': 'your-embedly-api-key', 226 | 'maxwidth': 600, 227 | 'maxheight': 600, 228 | } 229 | 230 | 231 | Trying it out in the python shell 232 | --------------------------------- 233 | 234 | .. code-block:: python 235 | 236 | >>> from django.template import Template, Context 237 | >>> t = Template('{% load micawber_tags %}{{ "http://www.youtube.com/watch?v=mQEWI1cn7HY"|oembed }}') 238 | >>> t.render(Context()) 239 | u'' 240 | -------------------------------------------------------------------------------- /docs/examples.rst: -------------------------------------------------------------------------------- 1 | .. _examples: 2 | 3 | Examples 4 | ======== 5 | 6 | micawber comes with a handful of examples showing usage with 7 | 8 | * :ref:`django ` 9 | * :ref:`flask ` 10 | * :ref:`simple python script ` 11 | 12 | .. _django_example: 13 | 14 | Django example 15 | -------------- 16 | 17 | The django example is very simple -- it illustrates a single view that renders 18 | text inputted by the user by piping it through the :py:func:`~micawber.contrib.mcdjango.oembed` 19 | filter. It also shows the output of the :py:func:`~micawber.contrib.mcdjango.extract_oembed` 20 | filter which returns a 2-tuple of URL -> metadata. There is also an input where 21 | you can experiment with entering HTML. 22 | 23 | To run the example:: 24 | 25 | cd examples/django_ex/ 26 | ./manage.py runserver 27 | 28 | Check out the `example source code `_. 29 | 30 | 31 | .. _flask_example: 32 | 33 | Flask example 34 | ------------- 35 | 36 | The flask example is almost identical in terms of functionality to the django example. It 37 | shows a one-file app with a single view that renders 38 | text inputted by the user by piping it through the :py:func:`~micawber.contrib.mcflask.oembed` 39 | filter. It also shows the output of the :py:func:`~micawber.contrib.mcflask.extract_oembed` 40 | filter which returns a 2-tuple of URL -> metadata. There is also an input where 41 | you can experiment with entering HTML. 42 | 43 | To run the example:: 44 | 45 | cd examples/flask_ex/ 46 | python app.py 47 | 48 | Check out the `example source code `_. 49 | 50 | .. _python_example: 51 | 52 | Python example 53 | -------------- 54 | 55 | The python example is a command-line app that shows the use of the :py:class:`micawber.providers.ProviderRegistry` 56 | and :py:class:`micawber.providers.bootstrap_embedly`. It runs a loop asking the user to input 57 | URLs, outputting rich metadata when possible (view http://embed.ly for a full list of providers). 58 | 59 | To run the example:: 60 | 61 | cd examples/python_ex/ 62 | python example.py 63 | 64 | Check out the `example source code `_. 65 | -------------------------------------------------------------------------------- /docs/flask.rst: -------------------------------------------------------------------------------- 1 | .. _flask: 2 | 3 | Flask integration 4 | ================= 5 | 6 | micawber exposes two Jinja template filters for use in your flask templates: 7 | 8 | * :py:func:`~micawber.contrib.mcflask.oembed` 9 | * :py:func:`~micawber.contrib.mcflask.extract_oembed` 10 | 11 | You can add them to your jinja environment by using the helper function: 12 | 13 | .. code-block:: python 14 | 15 | from flask import Flask 16 | from micawber.providers import bootstrap_basic 17 | from micawber.contrib.mcflask import add_oembed_filters 18 | 19 | app = Flask(__name__) 20 | 21 | oembed_providers = bootstrap_basic() 22 | add_oembed_filters(app, oembed_providers) 23 | 24 | Now you can use the filters in your templates: 25 | 26 | .. code-block:: html 27 | 28 | {% block content %} 29 |

{{ object.body|oembed(html=False, maxwidth=600, maxheight=600) }}

30 | {% endblock %} 31 | 32 | Flask filter API 33 | ---------------- 34 | 35 | .. py:module:: micawber.contrib.mcflask 36 | 37 | The following filters are exposed via the :py:mod:`micawber.contrib.mcflask` module: 38 | 39 | .. py:function:: oembed(text, urlize_all=True, html=False, **params) 40 | 41 | Parse the given text, rendering URLs as rich media 42 | 43 | Usage within a Jinja2 template: 44 | 45 | .. code-block:: python 46 | 47 | {{ blog_entry.body|oembed(urlize_all=False, maxwidth=600) }} 48 | 49 | :param text: the text to be parsed, can be HTML 50 | :param urlize_all: boolean indicating whether to convert bare links to clickable ones 51 | :param html: boolean indicating whether text is plaintext or markup 52 | :param params: any additional keyword arguments, e.g. maxwidth or an api key 53 | :rtype: parsed text with rich content embedded 54 | 55 | .. py:function:: extract_oembed(text, html=False, **params) 56 | 57 | Returns a 2-tuple containing 58 | 59 | * a list of all URLs found within the text (if HTML, all URLs that aren't already links) 60 | * a dictionary of URL to metadata provided by the API endpoint 61 | 62 | .. note:: 63 | Not all URLs listed will have matching entries in the dictionary, since there 64 | may not be a provider for them. 65 | 66 | :param text: the text to be parsed, can be HTML 67 | :param html: boolean indicating whether text is plaintext or markup 68 | :param params: any additional keyword arguments, e.g. maxwidth or an api key 69 | :rtype: 2-tuple containing a list of *all* urls and a dictionary of url -> metadata 70 | 71 | Adding filters to the Jinja Environment 72 | --------------------------------------- 73 | 74 | To actually use these filters they must be made available to the application. Use the 75 | following function to do this sometime after initializing your ``Flask`` app: 76 | 77 | .. py:function:: add_oembed_filters(app, providers) 78 | 79 | Add the ``oembed`` and ``extract_oembed`` filters to the jinja environment 80 | 81 | :param app: a flask application 82 | :param providers: a :py:class:`micawber.providers.ProviderRegistry` instance 83 | :rtype: (no return value) 84 | -------------------------------------------------------------------------------- /docs/getting_started.rst: -------------------------------------------------------------------------------- 1 | .. _getting_started: 2 | 3 | Getting Started 4 | =============== 5 | 6 | If you want the dead simple get-me-up-and-running, try the following: 7 | 8 | .. code-block:: python 9 | 10 | >>> import micawber 11 | >>> providers = micawber.bootstrap_basic() # may take a second 12 | >>> print providers.parse_text('this is a test:\nhttp://www.youtube.com/watch?v=54XHDUOHuzU') 13 | this is a test: 14 | 15 | 16 | Using django? Add ``micawber.contrib.mcdjango`` to your ``INSTALLED_APP``, then 17 | in your templates: 18 | 19 | .. code-block:: html 20 | 21 | {% load micawber_tags %} 22 | {# show a video player for the youtube video #} 23 | {{ "http://www.youtube.com/watch?v=mQEWI1cn7HY"|oembed }} 24 | 25 | Using flask? Use the ``add_oembed_filters`` function to register two jinja 26 | template filters, ``oembed`` and ``extract_oembed``: 27 | 28 | .. code-block:: python 29 | 30 | from flask import Flask 31 | from micawber.providers import bootstrap_basic 32 | from micawber.contrib.mcflask import add_oembed_filters 33 | 34 | app = Flask(__name__) 35 | 36 | oembed_providers = bootstrap_basic() 37 | add_oembed_filters(app, oembed_providers) 38 | 39 | .. code-block:: html 40 | 41 | {# show a video player for the youtube video #} 42 | {{ "http://www.youtube.com/watch?v=mQEWI1cn7HY"|oembed() }} 43 | 44 | Overview 45 | -------- 46 | 47 | micawber is rather simple. It is built to use the `oembed `_ spec, 48 | which is designed for converting URLs into rich, embeddable content. Many popular sites 49 | support this, including youtube and flickr. There is also a 3rd-party service called 50 | `embedly `_ that can convert many types of links into rich content. 51 | 52 | micawber was designed to make it easy to integrate with these APIs. There are 53 | two concepts to understand when using micawber: 54 | 55 | * :py:class:`~micawber.providers.Provider` objects - which describe how to 56 | match a URL (based on a regex) to an OEmbed endpoint. 57 | * :py:class:`~micawber.providers.ProviderRegistry` objects - which encapsulate 58 | a collection or providers and expose methods for parsing text and HTML to 59 | convert links into media objects. 60 | 61 | 62 | Providers 63 | --------- 64 | 65 | Providers are used to convert URLs into rich metadata. They have an endpoint 66 | associated with them and can have any number of arbitrary URL parameters (such 67 | as API keys) which are used when making API requests. 68 | 69 | Example: 70 | 71 | .. code-block:: python 72 | 73 | from micawber.providers import Provider 74 | 75 | youtube = Provider('http://www.youtube.com/oembed') 76 | youtube.request('http://www.youtube.com/watch?v=nda_OSWeyn8') 77 | 78 | The above code returns a dictionary containing metadata about the requested 79 | video, including the markup for an embeddable player:: 80 | 81 | {'author_name': u'botmib', 82 | 'author_url': u'http://www.youtube.com/user/botmib', 83 | 'height': 344, 84 | 'html': u'', 85 | 'provider_name': u'YouTube', 86 | 'provider_url': u'http://www.youtube.com/', 87 | 'thumbnail_height': 360, 88 | 'thumbnail_url': u'http://i3.ytimg.com/vi/nda_OSWeyn8/hqdefault.jpg', 89 | 'thumbnail_width': 480, 90 | 'title': u'Leprechaun in Mobile, Alabama', 91 | 'type': u'video', 92 | 'url': 'http://www.youtube.com/watch?v=nda_OSWeyn8', 93 | 'version': u'1.0', 94 | 'width': 459} 95 | 96 | More information can be found in the :py:class:`~micawber.providers.Provider` API docs. 97 | 98 | ProviderRegistry 99 | ---------------- 100 | 101 | The :py:class:`~micawber.providers.ProviderRegistry` is a way of organizing lists 102 | of providers. URLs can be requested from the registry and if *any* provider matches 103 | it will be used, otherwise a ``ProviderException`` will be raised. 104 | 105 | The ``ProviderRegistry`` also supports an optional simple caching mechanism. 106 | 107 | Here is an excerpt from the code from the :py:func:`micawber.providers.bootstrap_basic` function, 108 | which is handy for grabbing a ``ProviderRegistry`` with a handful of basic providers 109 | pre-populated: 110 | 111 | .. code-block:: python 112 | 113 | def bootstrap_basic(cache=None, registry=None, **params): 114 | pr = registry or ProviderRegistry(cache) 115 | pr.register('http://\S*?flickr.com/\S*', Provider('http://www.flickr.com/services/oembed/')) 116 | pr.register('http://\S*.youtu(\.be|be\.com)/watch\S*', Provider('http://www.youtube.com/oembed')) 117 | pr.register('http://www.hulu.com/watch/\S*', Provider('http://www.hulu.com/api/oembed.json')) 118 | return pr 119 | 120 | As you can see, the :py:meth:`~micawber.providers.ProviderRegistry.register` method takes 121 | two parameters, a regular expression for valid URLs and a ``Provider`` instance. 122 | 123 | You can use helper functions to get a populated registry: 124 | 125 | * :py:func:`~micawber.providers.bootstrap_basic` 126 | * :py:func:`~micawber.providers.bootstrap_oembed` - uses oembed.com's official providers list. 127 | * :py:func:`~micawber.providers.bootstrap_embedly` 128 | * :py:func:`~micawber.providers.bootstrap_noembed` 129 | 130 | The ``bootstrap_oembed``, ``bootstrap_embedly``, and ``bootstrap_noembed`` 131 | functions make a HTTP request to the API server asking for a list of supported 132 | providers, so you may experience some latency when using these helpers. For 133 | most WSGI applications this will not be an issue, but if you'd like to speed it 134 | up I suggest fetching the results, storing them in the db or a file, and then 135 | pulling from there. 136 | 137 | More information can be found in the :py:class:`~micawber.providers.ProviderRegistry` API docs. 138 | 139 | Parsing Links 140 | ^^^^^^^^^^^^^ 141 | 142 | Replace URLs with rich media: 143 | 144 | * :py:meth:`~micawber.providers.ProviderRegistry.parse_text`, which converts 145 | URLs on their own line into a rich media object. Links embedded within blocks 146 | of text are converted into clickable links. 147 | * :py:meth:`~micawber.providers.ProviderRegistry.parse_html`, which converts 148 | URLs within HTML into rich media objects or clickable links, depending on the 149 | context in which the URL is found. 150 | 151 | A quick example: 152 | 153 | .. code-block:: python 154 | 155 | import micawber 156 | 157 | providers = micawber.bootstrap_basic() 158 | 159 | providers.parse_text('this is a test:\nhttp://www.youtube.com/watch?v=54XHDUOHuzU') 160 | 161 | This will result in the following output:: 162 | 163 | this is a test: 164 | 165 | 166 | You can also parse HTML using the :py:meth:`~micawber.providers.ProviderRegistry.parse_html` method: 167 | 168 | .. code-block:: python 169 | 170 | providers.parse_html('

http://www.youtube.com/watch?v=54XHDUOHuzU

') 171 | 172 | # yields the following output: 173 |

174 | 175 | If you would rather extract metadata, there are two functions: 176 | 177 | * :py:meth:`~micawber.providers.ProviderRegistry.extract`, which finds all URLs 178 | within a block of text and returns a dictionary of metadata for each. 179 | * :py:meth:`~micawber.providers.ProviderRegistry.extract_html`, which finds 180 | URLs within HTML and returns a dictionary of metadata for each. 181 | 182 | The :ref:`API docs ` are extensive, so please refer there for a full list 183 | of parameters and functions. 184 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. micawber documentation master file, created by 2 | sphinx-quickstart on Tue Apr 17 13:43:41 2012. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | .. image:: http://media.charlesleifer.com/blog/photos/micawber-logo-0.png 7 | 8 | A small library for extracting rich content from urls. 9 | 10 | https://github.com/coleifer/micawber 11 | 12 | 13 | what does it do? 14 | ---------------- 15 | 16 | micawber supplies a few methods for retrieving rich metadata about a variety of 17 | links, such as links to youtube videos. micawber also provides functions for 18 | parsing blocks of text and html and replacing links to videos with rich embedded 19 | content. 20 | 21 | 22 | examples 23 | -------- 24 | 25 | here is a quick example: 26 | 27 | .. code-block:: python 28 | 29 | import micawber 30 | 31 | # load up rules for some default providers, such as youtube and flickr 32 | providers = micawber.bootstrap_basic() 33 | 34 | providers.request('http://www.youtube.com/watch?v=54XHDUOHuzU') 35 | 36 | # returns the following dictionary: 37 | { 38 | 'author_name': 'pascalbrax', 39 | 'author_url': u'http://www.youtube.com/user/pascalbrax' 40 | 'height': 344, 41 | 'html': u'', 42 | 'provider_name': 'YouTube', 43 | 'provider_url': 'http://www.youtube.com/', 44 | 'title': 'Future Crew - Second Reality demo - HD', 45 | 'type': u'video', 46 | 'thumbnail_height': 360, 47 | 'thumbnail_url': u'http://i2.ytimg.com/vi/54XHDUOHuzU/hqdefault.jpg', 48 | 'thumbnail_width': 480, 49 | 'url': 'http://www.youtube.com/watch?v=54XHDUOHuzU', 50 | 'width': 459, 51 | 'version': '1.0', 52 | } 53 | 54 | providers.parse_text('this is a test:\nhttp://www.youtube.com/watch?v=54XHDUOHuzU') 55 | 56 | # returns the following string: 57 | this is a test: 58 | 59 | 60 | providers.parse_html('

http://www.youtube.com/watch?v=54XHDUOHuzU

') 61 | 62 | # returns the following html: 63 |

64 | 65 | check out the :ref:`getting started ` for more examples 66 | 67 | 68 | integration with web frameworks 69 | ------------------------------- 70 | 71 | * :ref:`flask ` 72 | * :ref:`django ` 73 | 74 | Contents: 75 | 76 | .. toctree:: 77 | :maxdepth: 2 78 | :glob: 79 | 80 | installation 81 | getting_started 82 | examples 83 | flask 84 | django 85 | api 86 | 87 | 88 | Indices and tables 89 | ================== 90 | 91 | * :ref:`genindex` 92 | * :ref:`modindex` 93 | * :ref:`search` 94 | 95 | -------------------------------------------------------------------------------- /docs/installation.rst: -------------------------------------------------------------------------------- 1 | .. _installation: 2 | 3 | Installation 4 | ============ 5 | 6 | First, you need to install micawber 7 | 8 | There are a couple of ways: 9 | 10 | Installing with pip 11 | ^^^^^^^^^^^^^^^^^^^ 12 | 13 | :: 14 | 15 | pip install micawber 16 | 17 | or 18 | 19 | pip install -e git+https://github.com/coleifer/micawber.git#egg=micawber 20 | 21 | 22 | Installing via git 23 | ^^^^^^^^^^^^^^^^^^ 24 | 25 | :: 26 | 27 | git clone https://github.com/coleifer/micawber.git 28 | cd micawber 29 | python setup.py test 30 | sudo python setup.py install 31 | 32 | 33 | Adding to your Django Project 34 | -------------------------------- 35 | 36 | After installing, adding django-utils to your projects is a snap. Simply 37 | add it to your projects' INSTALLED_APPs and run 'syncdb':: 38 | 39 | # settings.py 40 | INSTALLED_APPS = [ 41 | ... 42 | 'micawber.contrib.mcdjango' 43 | ] 44 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=_build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . 10 | set I18NSPHINXOPTS=%SPHINXOPTS% . 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. linkcheck to check all external links for integrity 37 | echo. doctest to run all doctests embedded in the documentation if enabled 38 | goto end 39 | ) 40 | 41 | if "%1" == "clean" ( 42 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 43 | del /q /s %BUILDDIR%\* 44 | goto end 45 | ) 46 | 47 | if "%1" == "html" ( 48 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 49 | if errorlevel 1 exit /b 1 50 | echo. 51 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 52 | goto end 53 | ) 54 | 55 | if "%1" == "dirhtml" ( 56 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 57 | if errorlevel 1 exit /b 1 58 | echo. 59 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 60 | goto end 61 | ) 62 | 63 | if "%1" == "singlehtml" ( 64 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 65 | if errorlevel 1 exit /b 1 66 | echo. 67 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 68 | goto end 69 | ) 70 | 71 | if "%1" == "pickle" ( 72 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 73 | if errorlevel 1 exit /b 1 74 | echo. 75 | echo.Build finished; now you can process the pickle files. 76 | goto end 77 | ) 78 | 79 | if "%1" == "json" ( 80 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 81 | if errorlevel 1 exit /b 1 82 | echo. 83 | echo.Build finished; now you can process the JSON files. 84 | goto end 85 | ) 86 | 87 | if "%1" == "htmlhelp" ( 88 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 89 | if errorlevel 1 exit /b 1 90 | echo. 91 | echo.Build finished; now you can run HTML Help Workshop with the ^ 92 | .hhp project file in %BUILDDIR%/htmlhelp. 93 | goto end 94 | ) 95 | 96 | if "%1" == "qthelp" ( 97 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 98 | if errorlevel 1 exit /b 1 99 | echo. 100 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 101 | .qhcp project file in %BUILDDIR%/qthelp, like this: 102 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\micawber.qhcp 103 | echo.To view the help file: 104 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\micawber.ghc 105 | goto end 106 | ) 107 | 108 | if "%1" == "devhelp" ( 109 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 110 | if errorlevel 1 exit /b 1 111 | echo. 112 | echo.Build finished. 113 | goto end 114 | ) 115 | 116 | if "%1" == "epub" ( 117 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 118 | if errorlevel 1 exit /b 1 119 | echo. 120 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 121 | goto end 122 | ) 123 | 124 | if "%1" == "latex" ( 125 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 129 | goto end 130 | ) 131 | 132 | if "%1" == "text" ( 133 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 134 | if errorlevel 1 exit /b 1 135 | echo. 136 | echo.Build finished. The text files are in %BUILDDIR%/text. 137 | goto end 138 | ) 139 | 140 | if "%1" == "man" ( 141 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 142 | if errorlevel 1 exit /b 1 143 | echo. 144 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 145 | goto end 146 | ) 147 | 148 | if "%1" == "texinfo" ( 149 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 150 | if errorlevel 1 exit /b 1 151 | echo. 152 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 153 | goto end 154 | ) 155 | 156 | if "%1" == "gettext" ( 157 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 158 | if errorlevel 1 exit /b 1 159 | echo. 160 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 161 | goto end 162 | ) 163 | 164 | if "%1" == "changes" ( 165 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 166 | if errorlevel 1 exit /b 1 167 | echo. 168 | echo.The overview file is in %BUILDDIR%/changes. 169 | goto end 170 | ) 171 | 172 | if "%1" == "linkcheck" ( 173 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 174 | if errorlevel 1 exit /b 1 175 | echo. 176 | echo.Link check complete; look for any errors in the above output ^ 177 | or in %BUILDDIR%/linkcheck/output.txt. 178 | goto end 179 | ) 180 | 181 | if "%1" == "doctest" ( 182 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 183 | if errorlevel 1 exit /b 1 184 | echo. 185 | echo.Testing of doctests in the sources finished, look at the ^ 186 | results in %BUILDDIR%/doctest/output.txt. 187 | goto end 188 | ) 189 | 190 | :end 191 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/examples/__init__.py -------------------------------------------------------------------------------- /examples/django_ex/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/examples/django_ex/__init__.py -------------------------------------------------------------------------------- /examples/django_ex/manage.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from django.core.management import execute_manager 3 | import imp 4 | try: 5 | imp.find_module('settings') # Assumed to be in the same directory. 6 | except ImportError: 7 | import sys 8 | sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n" % __file__) 9 | sys.exit(1) 10 | 11 | import settings 12 | 13 | if __name__ == "__main__": 14 | execute_manager(settings) 15 | -------------------------------------------------------------------------------- /examples/django_ex/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | #### MICAWBER SETTINGS 4 | 5 | # add a template filter called "oembed_no_urlize" that will not automatically 6 | # convert URLs to clickable links in the event a provider is not found for 7 | # the given url 8 | MICAWBER_TEMPLATE_EXTENSIONS = [ 9 | ('oembed_no_urlize', {'urlize_all': False}), 10 | ] 11 | 12 | # by default, micawber will use the "bootstrap_basic" providers, but should you 13 | # wish to use embedly you can try out the second example. You can also provide 14 | # your own ProviderRegistry with a path to a module and either a callable or 15 | # ProviderRegistry instance 16 | MICAWBER_PROVIDERS = 'micawber.contrib.mcdjango.providers.bootstrap_basic' 17 | #MICAWBER_PROVIDERS = 'micawber.contrib.mcdjango.providers.bootstrap_embedly' 18 | 19 | # if you are using embed.ly you can specify an API key that will be used with 20 | # the bootstrap_embedly provider setting 21 | # MICAWBER_EMBEDLY_KEY = 'foofoo' 22 | 23 | # since template filters are limited to a single optional parameter, you can 24 | # specify defaults, such as a maxwidth you prefer to use or an api key 25 | #MICAWBER_DEFAULT_SETTINGS = { 26 | # 'key': 'your-embedly-api-key', 27 | # 'maxwidth': 600, 28 | # 'maxheight': 600, 29 | #} 30 | 31 | #### END MICAWBER SETTINGS 32 | 33 | CURRENT_DIR = os.path.dirname(__file__) 34 | 35 | DEBUG = True 36 | TEMPLATE_DEBUG = DEBUG 37 | 38 | DATABASES = { 39 | 'default': { 40 | 'ENGINE': 'django.db.backends.sqlite3', 41 | 'NAME': 'django_ex.db', 42 | } 43 | } 44 | 45 | SITE_ID = 1 46 | 47 | SECRET_KEY = 'fapfapfap' 48 | 49 | STATIC_URL = '/static/' 50 | STATICFILES_DIRS = ( 51 | os.path.join(CURRENT_DIR, 'static'), 52 | ) 53 | 54 | STATICFILES_FINDERS = ( 55 | 'django.contrib.staticfiles.finders.FileSystemFinder', 56 | 'django.contrib.staticfiles.finders.AppDirectoriesFinder', 57 | ) 58 | 59 | 60 | TEMPLATE_LOADERS = ( 61 | 'django.template.loaders.filesystem.Loader', 62 | 'django.template.loaders.app_directories.Loader', 63 | ) 64 | 65 | MIDDLEWARE_CLASSES = ( 66 | 'django.middleware.common.CommonMiddleware', 67 | 'django.contrib.sessions.middleware.SessionMiddleware', 68 | 'django.middleware.csrf.CsrfViewMiddleware', 69 | 'django.contrib.auth.middleware.AuthenticationMiddleware', 70 | 'django.contrib.messages.middleware.MessageMiddleware', 71 | ) 72 | 73 | ROOT_URLCONF = 'django_ex.urls' 74 | 75 | TEMPLATE_DIRS = ( 76 | os.path.join(CURRENT_DIR, 'templates'), 77 | ) 78 | 79 | INSTALLED_APPS = ( 80 | 'django.contrib.auth', 81 | 'django.contrib.contenttypes', 82 | 'django.contrib.sessions', 83 | 'django.contrib.sites', 84 | 'django.contrib.staticfiles', 85 | 'micawber.contrib.mcdjango', 86 | ) 87 | -------------------------------------------------------------------------------- /examples/django_ex/static/style.css: -------------------------------------------------------------------------------- 1 | body { font-family: sans-serif; background: #eee; } 2 | a, h1, h2 { color: #377BA8; } 3 | h1, h2 { font-family: 'Georgia', serif; margin: 0; } 4 | h1 { border-bottom: 2px solid #eee; } 5 | h2 { font-size: 1.2em; } 6 | 7 | .page { margin: 2em auto; width: 35em; border: 5px solid #ccc; 8 | padding: 0.8em; background: white; } 9 | .entries { list-style: none; margin: 0; padding: 0; } 10 | .entries li { margin: 0.8em 1.2em; } 11 | .entries li h2 { margin-left: -1em; } 12 | .add-entry { font-size: 0.9em; border-bottom: 1px solid #ccc; } 13 | .add-entry dl { font-weight: bold; } 14 | .metanav { text-align: right; font-size: 0.8em; padding: 0.3em; 15 | margin-bottom: 1em; background: #fafafa; } 16 | .flash { background: #CEE5F5; padding: 0.5em; 17 | border: 1px solid #AACBE2; } 18 | .error { background: #F0D6D6; padding: 0.5em; } 19 | -------------------------------------------------------------------------------- /examples/django_ex/templates/example.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Example 5 | 6 | 7 | 8 |
9 |

Micawber Example

10 | 11 |

Text renderer

12 |
13 |

14 | 15 |

16 |

17 | 18 | Try entering some urls to youtube videos 19 |

20 |

21 | 22 |

23 |
24 | 25 | {% load micawber_tags %} 26 | 27 | {% if text %} 28 |

Rendered

29 |
30 | {{ text|oembed }} 31 |
32 | 33 |

Extracted data

34 | 37 | {% endif %} 38 | 39 |

HTML renderer

40 |
41 |

42 | 43 |

44 |

45 | 46 |

47 |

48 | 49 |

50 |
51 | 52 | {% load micawber_tags %} 53 | 54 | {% if html %} 55 |

Rendered

56 |
57 | {{ html|oembed_html }} 58 |
59 | {% endif %} 60 |
61 | 62 | 63 | -------------------------------------------------------------------------------- /examples/django_ex/urls.py: -------------------------------------------------------------------------------- 1 | from django.conf.urls.defaults import patterns, include, url 2 | 3 | urlpatterns = patterns('', 4 | url(r'^$', 'django_ex.views.example_view', name='example_view'), 5 | ) 6 | -------------------------------------------------------------------------------- /examples/django_ex/views.py: -------------------------------------------------------------------------------- 1 | from django.shortcuts import render_to_response 2 | 3 | def example_view(request): 4 | text = request.GET.get('text', 'http://www.youtube.com/watch?v=nda_OSWeyn8') 5 | html = request.GET.get('html', """ 6 |

This is a test

7 |

http://www.youtube.com/watch?v=nda_OSWeyn8

8 |

This will get rendered as a link: http://www.youtube.com/watch?v=nda_OSWeyn8

9 |

This will not be modified: http://www.youtube.com/watch?v=nda_OSWeyn8

10 | """) 11 | return render_to_response('example.html', dict( 12 | text=text, 13 | html=html, 14 | )) 15 | -------------------------------------------------------------------------------- /examples/flask_ex/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template, request 2 | from micawber.providers import bootstrap_basic 3 | from micawber.contrib.mcflask import add_oembed_filters 4 | 5 | app = Flask(__name__) 6 | app.config['DEBUG'] = True 7 | 8 | oembed_providers = bootstrap_basic() 9 | add_oembed_filters(app, oembed_providers) 10 | 11 | @app.route('/') 12 | def example_view(): 13 | text = request.args.get('text', 'http://www.youtube.com/watch?v=nda_OSWeyn8') 14 | html = request.args.get('html', """ 15 |

This is a test

16 |

http://www.youtube.com/watch?v=nda_OSWeyn8

17 |

This will get rendered as a link: http://www.youtube.com/watch?v=nda_OSWeyn8

18 |

This will not be modified: http://www.youtube.com/watch?v=nda_OSWeyn8

19 | """) 20 | return render_template('example.html', text=text, html=html) 21 | 22 | if __name__ == '__main__': 23 | app.run() 24 | -------------------------------------------------------------------------------- /examples/flask_ex/static/style.css: -------------------------------------------------------------------------------- 1 | body { font-family: sans-serif; background: #eee; } 2 | a, h1, h2 { color: #377BA8; } 3 | h1, h2 { font-family: 'Georgia', serif; margin: 0; } 4 | h1 { border-bottom: 2px solid #eee; } 5 | h2 { font-size: 1.2em; } 6 | 7 | .page { margin: 2em auto; width: 35em; border: 5px solid #ccc; 8 | padding: 0.8em; background: white; } 9 | .entries { list-style: none; margin: 0; padding: 0; } 10 | .entries li { margin: 0.8em 1.2em; } 11 | .entries li h2 { margin-left: -1em; } 12 | .add-entry { font-size: 0.9em; border-bottom: 1px solid #ccc; } 13 | .add-entry dl { font-weight: bold; } 14 | .metanav { text-align: right; font-size: 0.8em; padding: 0.3em; 15 | margin-bottom: 1em; background: #fafafa; } 16 | .flash { background: #CEE5F5; padding: 0.5em; 17 | border: 1px solid #AACBE2; } 18 | .error { background: #F0D6D6; padding: 0.5em; } 19 | -------------------------------------------------------------------------------- /examples/flask_ex/templates/example.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Example 5 | 6 | 7 | 8 |
9 |

Micawber Example

10 | 11 |

Text renderer

12 |
13 |

14 | 15 |

16 |

17 | 18 | Try entering some urls to youtube videos 19 |

20 |

21 | 22 |

23 |
24 | 25 | {% if text %} 26 |

Rendered

27 |
28 | {{ text|oembed() }} 29 |
30 | 31 |

Extracted data

32 | 35 | {% endif %} 36 | 37 |

HTML renderer

38 |
39 |

40 | 41 |

42 |

43 | 44 |

45 |

46 | 47 |

48 |
49 | 50 | {% if html %} 51 |

Rendered

52 |
53 | {{ html|oembed(html=True) }} 54 |
55 | {% endif %} 56 |
57 | 58 | 59 | -------------------------------------------------------------------------------- /examples/python_ex/example.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | from micawber import bootstrap_oembed, ProviderException 3 | try: 4 | read_input = raw_input 5 | except NameError: 6 | read_input = input 7 | 8 | def main(): 9 | print('Please wait, loading providers from oembed.com') 10 | providers = bootstrap_oembed() 11 | 12 | while 1: 13 | url = read_input('Enter a url (or q to quit): ') 14 | if url.lower().strip() == 'q': 15 | break 16 | 17 | try: 18 | result = providers.request(url) 19 | except ProviderException: 20 | print('No provider found for that url :/') 21 | else: 22 | print('Data for %s\n====================================================' % url) 23 | pprint.pprint(result) 24 | 25 | if __name__ == '__main__': 26 | print('Welcome to the example!') 27 | main() 28 | -------------------------------------------------------------------------------- /micawber/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.5.6' 2 | 3 | from micawber.cache import Cache 4 | from micawber.cache import PickleCache 5 | from micawber.exceptions import ProviderException 6 | from micawber.exceptions import InvalidResponseException 7 | from micawber.parsers import extract 8 | from micawber.parsers import extract_html 9 | from micawber.parsers import parse_text 10 | from micawber.parsers import parse_text_full 11 | from micawber.parsers import parse_html 12 | from micawber.providers import Provider 13 | from micawber.providers import ProviderRegistry 14 | from micawber.providers import bootstrap_basic 15 | from micawber.providers import bootstrap_embedly 16 | from micawber.providers import bootstrap_noembed 17 | from micawber.providers import bootstrap_oembed 18 | -------------------------------------------------------------------------------- /micawber/cache.py: -------------------------------------------------------------------------------- 1 | from __future__ import with_statement 2 | import os 3 | import pickle 4 | try: 5 | from redis import Redis 6 | except ImportError: 7 | Redis = None 8 | 9 | 10 | class Cache(object): 11 | def __init__(self): 12 | self._cache = {} 13 | 14 | def get(self, k): 15 | return self._cache.get(k) 16 | 17 | def set(self, k, v): 18 | self._cache[k] = v 19 | 20 | 21 | class PickleCache(Cache): 22 | def __init__(self, filename='cache.db'): 23 | self.filename = filename 24 | self._cache = self.load() 25 | 26 | def load(self): 27 | if os.path.exists(self.filename): 28 | with open(self.filename, 'rb') as fh: 29 | return pickle.load(fh) 30 | return {} 31 | 32 | def save(self): 33 | with open(self.filename, 'wb') as fh: 34 | pickle.dump(self._cache, fh) 35 | 36 | 37 | if Redis: 38 | class RedisCache(Cache): 39 | """ 40 | :param str namespace: key prefix. 41 | :param int timeout: expiration timeout in seconds 42 | """ 43 | def __init__(self, namespace='micawber', timeout=None, **conn): 44 | self.namespace = namespace 45 | self.timeout = timeout 46 | self.conn = Redis(**conn) 47 | 48 | def key_fn(self, k): 49 | return '%s.%s' % (self.namespace, k) 50 | 51 | def get(self, k): 52 | cached = self.conn.get(self.key_fn(k)) 53 | if cached: 54 | return pickle.loads(cached) 55 | 56 | def set(self, k, v): 57 | ck, cv = self.key_fn(k), pickle.dumps(v) 58 | if self.timeout is not None: 59 | self.conn.setex(ck, cv, self.timeout) 60 | else: 61 | self.conn.set(ck, cv) 62 | -------------------------------------------------------------------------------- /micawber/compat.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | PY3 = sys.version_info >= (3,) 4 | 5 | if PY3: 6 | from urllib.request import Request, urlopen, URLError, HTTPError 7 | from urllib.parse import urlencode 8 | text_type = str 9 | string_types = str, 10 | def get_charset(response): 11 | return response.headers.get_param('charset') 12 | else: 13 | from urllib2 import Request, urlopen, URLError, HTTPError 14 | from urllib import urlencode 15 | text_type = unicode 16 | string_types = basestring, 17 | def get_charset(response): 18 | return response.headers.getparam('charset') 19 | 20 | try: 21 | from collections import OrderedDict 22 | except ImportError: 23 | try: 24 | from _abcoll import KeysView, ValuesView, ItemsView 25 | except ImportError: 26 | pass 27 | 28 | class OrderedDict(dict): 29 | 'Dictionary that remembers insertion order' 30 | # An inherited dict maps keys to values. 31 | # The inherited dict provides __getitem__, __len__, __contains__, and get. 32 | # The remaining methods are order-aware. 33 | # Big-O running times for all methods are the same as for regular dictionaries. 34 | 35 | # The internal self.__map dictionary maps keys to links in a doubly linked list. 36 | # The circular doubly linked list starts and ends with a sentinel element. 37 | # The sentinel element never gets deleted (this simplifies the algorithm). 38 | # Each link is stored as a list of length three: [PREV, NEXT, KEY]. 39 | 40 | def __init__(self, *args, **kwds): 41 | '''Initialize an ordered dictionary. Signature is the same as for 42 | regular dictionaries, but keyword arguments are not recommended 43 | because their insertion order is arbitrary. 44 | 45 | ''' 46 | if len(args) > 1: 47 | raise TypeError('expected at most 1 arguments, got %d' % len(args)) 48 | try: 49 | self.__root 50 | except AttributeError: 51 | self.__root = root = [] # sentinel node 52 | root[:] = [root, root, None] 53 | self.__map = {} 54 | self.__update(*args, **kwds) 55 | 56 | def __setitem__(self, key, value, dict_setitem=dict.__setitem__): 57 | 'od.__setitem__(i, y) <==> od[i]=y' 58 | # Setting a new item creates a new link which goes at the end of the linked 59 | # list, and the inherited dictionary is updated with the new key/value pair. 60 | if key not in self: 61 | root = self.__root 62 | last = root[0] 63 | last[1] = root[0] = self.__map[key] = [last, root, key] 64 | dict_setitem(self, key, value) 65 | 66 | def __delitem__(self, key, dict_delitem=dict.__delitem__): 67 | 'od.__delitem__(y) <==> del od[y]' 68 | # Deleting an existing item uses self.__map to find the link which is 69 | # then removed by updating the links in the predecessor and successor nodes. 70 | dict_delitem(self, key) 71 | link_prev, link_next, key = self.__map.pop(key) 72 | link_prev[1] = link_next 73 | link_next[0] = link_prev 74 | 75 | def __iter__(self): 76 | 'od.__iter__() <==> iter(od)' 77 | root = self.__root 78 | curr = root[1] 79 | while curr is not root: 80 | yield curr[2] 81 | curr = curr[1] 82 | 83 | def __reversed__(self): 84 | 'od.__reversed__() <==> reversed(od)' 85 | root = self.__root 86 | curr = root[0] 87 | while curr is not root: 88 | yield curr[2] 89 | curr = curr[0] 90 | 91 | def clear(self): 92 | 'od.clear() -> None. Remove all items from od.' 93 | try: 94 | for node in self.__map.itervalues(): 95 | del node[:] 96 | root = self.__root 97 | root[:] = [root, root, None] 98 | self.__map.clear() 99 | except AttributeError: 100 | pass 101 | dict.clear(self) 102 | 103 | def popitem(self, last=True): 104 | '''od.popitem() -> (k, v), return and remove a (key, value) pair. 105 | Pairs are returned in LIFO order if last is true or FIFO order if false. 106 | 107 | ''' 108 | if not self: 109 | raise KeyError('dictionary is empty') 110 | root = self.__root 111 | if last: 112 | link = root[0] 113 | link_prev = link[0] 114 | link_prev[1] = root 115 | root[0] = link_prev 116 | else: 117 | link = root[1] 118 | link_next = link[1] 119 | root[1] = link_next 120 | link_next[0] = root 121 | key = link[2] 122 | del self.__map[key] 123 | value = dict.pop(self, key) 124 | return key, value 125 | 126 | # -- the following methods do not depend on the internal structure -- 127 | 128 | def keys(self): 129 | 'od.keys() -> list of keys in od' 130 | return list(self) 131 | 132 | def values(self): 133 | 'od.values() -> list of values in od' 134 | return [self[key] for key in self] 135 | 136 | def items(self): 137 | 'od.items() -> list of (key, value) pairs in od' 138 | return [(key, self[key]) for key in self] 139 | 140 | def iterkeys(self): 141 | 'od.iterkeys() -> an iterator over the keys in od' 142 | return iter(self) 143 | 144 | def itervalues(self): 145 | 'od.itervalues -> an iterator over the values in od' 146 | for k in self: 147 | yield self[k] 148 | 149 | def iteritems(self): 150 | 'od.iteritems -> an iterator over the (key, value) items in od' 151 | for k in self: 152 | yield (k, self[k]) 153 | 154 | def update(*args, **kwds): 155 | '''od.update(E, **F) -> None. Update od from dict/iterable E and F. 156 | 157 | If E is a dict instance, does: for k in E: od[k] = E[k] 158 | If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] 159 | Or if E is an iterable of items, does: for k, v in E: od[k] = v 160 | In either case, this is followed by: for k, v in F.items(): od[k] = v 161 | 162 | ''' 163 | if len(args) > 2: 164 | raise TypeError('update() takes at most 2 positional ' 165 | 'arguments (%d given)' % (len(args),)) 166 | elif not args: 167 | raise TypeError('update() takes at least 1 argument (0 given)') 168 | self = args[0] 169 | # Make progressively weaker assumptions about "other" 170 | other = () 171 | if len(args) == 2: 172 | other = args[1] 173 | if isinstance(other, dict): 174 | for key in other: 175 | self[key] = other[key] 176 | elif hasattr(other, 'keys'): 177 | for key in other.keys(): 178 | self[key] = other[key] 179 | else: 180 | for key, value in other: 181 | self[key] = value 182 | for key, value in kwds.items(): 183 | self[key] = value 184 | 185 | __update = update # let subclasses override update without breaking __init__ 186 | 187 | __marker = object() 188 | 189 | def pop(self, key, default=__marker): 190 | '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. 191 | If key is not found, d is returned if given, otherwise KeyError is raised. 192 | 193 | ''' 194 | if key in self: 195 | result = self[key] 196 | del self[key] 197 | return result 198 | if default is self.__marker: 199 | raise KeyError(key) 200 | return default 201 | 202 | def setdefault(self, key, default=None): 203 | 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' 204 | if key in self: 205 | return self[key] 206 | self[key] = default 207 | return default 208 | 209 | def __repr__(self): 210 | if not self: 211 | return '%s()' % (self.__class__.__name__,) 212 | return '%s(%r)' % (self.__class__.__name__, self.items()) 213 | 214 | def __reduce__(self): 215 | 'Return state information for pickling' 216 | items = [[k, self[k]] for k in self] 217 | inst_dict = vars(self).copy() 218 | for k in vars(OrderedDict()): 219 | inst_dict.pop(k, None) 220 | if inst_dict: 221 | return (self.__class__, (items,), inst_dict) 222 | return self.__class__, (items,) 223 | 224 | def copy(self): 225 | 'od.copy() -> a shallow copy of od' 226 | return self.__class__(self) 227 | 228 | @classmethod 229 | def fromkeys(cls, iterable, value=None): 230 | '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S 231 | and values equal to v (which defaults to None). 232 | 233 | ''' 234 | d = cls() 235 | for key in iterable: 236 | d[key] = value 237 | return d 238 | 239 | def __eq__(self, other): 240 | '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive 241 | while comparison to a regular mapping is order-insensitive. 242 | 243 | ''' 244 | if isinstance(other, OrderedDict): 245 | return len(self)==len(other) and self.items() == other.items() 246 | return dict.__eq__(self, other) 247 | 248 | def __ne__(self, other): 249 | return not self == other 250 | 251 | # -- the following methods are only used in Python 2.7 -- 252 | 253 | def viewkeys(self): 254 | "od.viewkeys() -> a set-like object providing a view on od's keys" 255 | return KeysView(self) 256 | 257 | def viewvalues(self): 258 | "od.viewvalues() -> an object providing a view on od's values" 259 | return ValuesView(self) 260 | 261 | def viewitems(self): 262 | "od.viewitems() -> a set-like object providing a view on od's items" 263 | return ItemsView(self) 264 | -------------------------------------------------------------------------------- /micawber/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/micawber/contrib/__init__.py -------------------------------------------------------------------------------- /micawber/contrib/mcdjango/__init__.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Callable 2 | from importlib import import_module 3 | 4 | from django import template 5 | from django.conf import settings 6 | from django.template.loader import render_to_string 7 | from django.utils.safestring import mark_safe 8 | 9 | from micawber.compat import string_types 10 | from micawber.parsers import full_handler, inline_handler, parse_text, \ 11 | parse_html, extract, extract_html 12 | 13 | 14 | def _load_from_module(path): 15 | package, attr = path.rsplit('.', 1) 16 | module = import_module(package) 17 | return getattr(module, attr) 18 | 19 | 20 | PROVIDERS = getattr(settings, 'MICAWBER_PROVIDERS', 'micawber.contrib.mcdjango.providers.bootstrap_basic') 21 | 22 | providers = _load_from_module(PROVIDERS) 23 | if isinstance(providers, Callable): 24 | providers = providers() 25 | 26 | 27 | register = template.Library() 28 | 29 | def django_template_handler(url, response_data, **params): 30 | names = ( 31 | response_data.get('provider_name'), 32 | response_data['type'], 33 | ) 34 | template_names = ['micawber/%s.html' % name for name in names if name] 35 | return mark_safe( 36 | render_to_string( 37 | template_names, 38 | {'params': params, 39 | 'response': response_data, 40 | 'url': url, 41 | }).strip()) 42 | 43 | def fix_width_height(width_height, params): 44 | if width_height: 45 | if 'x' in width_height: 46 | params['maxwidth'], params['maxheight'] = [int(n) for n in width_height.split('x')] 47 | else: 48 | params['maxwidth'] = int(width_height) 49 | params.pop('maxheight', None) 50 | return params 51 | 52 | def extension(filter_name, providers=providers, urlize_all=True, html=False, handler=django_template_handler, 53 | block_handler=inline_handler, text_fn=parse_text, html_fn=parse_html, **kwargs): 54 | if html: 55 | fn = html_fn 56 | else: 57 | fn = text_fn 58 | def _extension(s, width_height=None): 59 | params = getattr(settings, 'MICAWBER_DEFAULT_SETTINGS', {}) 60 | params.update(kwargs) 61 | params = fix_width_height(width_height, params) 62 | return mark_safe(fn(s, providers, urlize_all, handler, block_handler, **params)) 63 | register.filter(filter_name, _extension) 64 | return _extension 65 | 66 | oembed = extension('oembed') 67 | oembed_html = extension('oembed_html', html=True) 68 | 69 | def _extract_oembed(text, width_height=None, html=False): 70 | if html: 71 | fn = extract_html 72 | else: 73 | fn = extract 74 | params = getattr(settings, 'MICAWBER_DEFAULT_SETTINGS', {}) 75 | params = fix_width_height(width_height, params) 76 | url_list, url_data = fn(text, providers, **params) 77 | return [(u, url_data[u]) for u in url_list if u in url_data] 78 | 79 | @register.filter 80 | def extract_oembed(text, width_height=None): 81 | return _extract_oembed(text, width_height) 82 | 83 | @register.filter 84 | def extract_oembed_html(text, width_height=None): 85 | return _extract_oembed(text, width_height, True) 86 | 87 | user_extensions = getattr(settings, 'MICAWBER_TEMPLATE_EXTENSIONS', []) 88 | if isinstance(user_extensions, string_types): 89 | user_extensions = _load_from_module(user_extensions) 90 | 91 | for filter_name, filter_params in user_extensions: 92 | extension(filter_name, **filter_params) 93 | -------------------------------------------------------------------------------- /micawber/contrib/mcdjango/mcdjango_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/micawber/contrib/mcdjango/mcdjango_tests/__init__.py -------------------------------------------------------------------------------- /micawber/contrib/mcdjango/mcdjango_tests/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/micawber/contrib/mcdjango/mcdjango_tests/models.py -------------------------------------------------------------------------------- /micawber/contrib/mcdjango/mcdjango_tests/tests.py: -------------------------------------------------------------------------------- 1 | from django.template import Context 2 | from django.template import Template 3 | from django.test import TestCase 4 | 5 | from micawber.parsers import parse_text 6 | from micawber.test_utils import BaseTestCase 7 | from micawber.test_utils import test_cache 8 | from micawber.test_utils import test_pr 9 | from micawber.test_utils import test_pr_cache 10 | from micawber.test_utils import TestProvider 11 | 12 | 13 | class MicawberDjangoTestCase(TestCase, BaseTestCase): 14 | def render(self, s, **params): 15 | s = '{%% load micawber_tags %%}%s' % s 16 | return Template(s).render(Context(params)).strip() 17 | 18 | def test_oembed_alt(self): 19 | from micawber.contrib.mcdjango import extension 20 | 21 | def custom_handler(url, response_data): 22 | return url 23 | 24 | oembed_alt = extension( 25 | 'oembed_alt', 26 | urlize_all=False, 27 | block_handler=custom_handler) 28 | 29 | text = '\n'.join(( 30 | 'this is the first line', 31 | 'http://photo-test2', 32 | 'this is the third line http://photo-test2', 33 | 'http://photo-test2 this is the fourth line')) 34 | rendered = self.render('{{ text|oembed_alt }}', text=text) 35 | self.assertEqual(rendered.splitlines(), [ 36 | 'this is the first line', 37 | self.full_pairs['http://photo-test2'], 38 | 'this is the third line http://photo-test2', 39 | 'http://photo-test2 this is the fourth line', 40 | ]) 41 | 42 | def test_fix_wh(self): 43 | from micawber.contrib.mcdjango import fix_width_height 44 | self.assertEqual(fix_width_height('300x400', {}), {'maxwidth': 300, 'maxheight': 400}) 45 | self.assertEqual(fix_width_height('300', {}), {'maxwidth': 300}) 46 | 47 | def test_provider_loading(self): 48 | from micawber.contrib.mcdjango import providers 49 | self.assertEqual(providers, test_pr) 50 | 51 | def test_oembed_filter_multiline_plain(self): 52 | for url, expected in self.full_pairs.items(): 53 | expected_inline = self.inline_pairs[url] 54 | frame = 'this is inline: %s\n%s\nand yet another %s' 55 | 56 | test_str = frame % (url, url, url) 57 | 58 | parsed = self.render('{{ test_str|oembed }}', test_str=test_str) 59 | self.assertEqual(parsed, frame % (expected_inline, expected, expected_inline)) 60 | 61 | def test_oembed_filter_multiline_html(self): 62 | for url, expected in self.full_pairs.items(): 63 | expected_inline = self.inline_pairs[url] 64 | frame = '

%s

\n

this is inline: %s

\n

\n%s\n

last test\n%s\n

' 65 | 66 | test_str = frame % (url, url, url, url) 67 | 68 | parsed = self.render('{{ test_str|oembed_html }}', test_str=test_str) 69 | self.assertHTMLEqual(parsed, frame % (expected, expected_inline, expected, expected_inline)) 70 | 71 | for url, expected in self.full_pairs.items(): 72 | expected_inline = self.inline_pairs[url] 73 | frame = '

%s

\n

this is inline: %s

\n

last test\n%s\n

' 74 | 75 | test_str = frame % (url, url, url) 76 | 77 | parsed = self.render('{{ test_str|oembed_html }}', test_str=test_str) 78 | self.assertHTMLEqual(parsed, frame % (url, expected_inline, expected_inline)) 79 | 80 | def test_urlize(self): 81 | u1 = 'http://fappio.com/' 82 | u2 = 'http://google.com/fap/' 83 | u1h = '%s' % (u1, u1) 84 | u2h = '%s' % (u2, u2) 85 | for url, expected in self.full_pairs.items(): 86 | expected_inline = self.inline_pairs[url] 87 | frame = 'test %s\n%s\n%s\nand another %s' 88 | 89 | test_str = frame % (u1, u2, url, url) 90 | 91 | parsed = self.render('{{ test_str|oembed }}', test_str=test_str) 92 | self.assertEqual(parsed, frame % (u1h, u2h, expected, expected_inline)) 93 | 94 | def test_oembed_filter_extension(self): 95 | for url, expected in self.full_pairs.items(): 96 | expected_inline = self.inline_pairs[url] 97 | frame = 'test http://fappio.com\nhttp://google.com\n%s\nand another %s' 98 | 99 | test_str = frame % (url, url) 100 | 101 | parsed = self.render('{{ test_str|oembed_no_urlize }}', test_str=test_str) 102 | self.assertEqual(parsed, frame % (expected, expected_inline)) 103 | 104 | def test_extract_filter(self): 105 | blank = 'http://fapp.io/foo/' 106 | frame = 'test %s\n%s\n%s\n%s at last' 107 | frame_html = '

test %s

%s %s

%s

' 108 | 109 | t = """{% for url, data in test_str|extract_oembed %}{{ url }}\n{% endfor %}""" 110 | t2 = """{% for url, data in test_str|extract_oembed_html %}{{ url }}\n{% endfor %}""" 111 | 112 | for url, expected in self.data_pairs.items(): 113 | test_str = frame % (url, blank, url, blank) 114 | rendered = self.render(t, test_str=test_str) 115 | self.assertEqual(rendered, url) 116 | 117 | test_str = frame_html % (url, blank, url, blank) 118 | rendered = self.render(t, test_str=test_str) 119 | self.assertEqual(rendered, url) 120 | -------------------------------------------------------------------------------- /micawber/contrib/mcdjango/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/micawber/contrib/mcdjango/models.py -------------------------------------------------------------------------------- /micawber/contrib/mcdjango/providers.py: -------------------------------------------------------------------------------- 1 | from django.conf import settings 2 | from django.core.cache import cache 3 | 4 | from micawber.providers import bootstrap_basic as _bootstrap_basic, bootstrap_embedly as _bootstrap_embedly 5 | 6 | 7 | def bootstrap_basic(): 8 | return _bootstrap_basic(cache) 9 | 10 | def bootstrap_embedly(): 11 | key = getattr(settings, 'MICAWBER_EMBEDLY_KEY', None) 12 | params = {} 13 | if key: 14 | params['key'] = key 15 | return _bootstrap_embedly(cache, **params) 16 | -------------------------------------------------------------------------------- /micawber/contrib/mcdjango/templates/micawber/link.html: -------------------------------------------------------------------------------- 1 | {{ response.title }} 2 | -------------------------------------------------------------------------------- /micawber/contrib/mcdjango/templates/micawber/photo.html: -------------------------------------------------------------------------------- 1 | {{ response.title }} 2 | -------------------------------------------------------------------------------- /micawber/contrib/mcdjango/templates/micawber/rich.html: -------------------------------------------------------------------------------- 1 | {{ response.html|safe }} 2 | -------------------------------------------------------------------------------- /micawber/contrib/mcdjango/templates/micawber/video.html: -------------------------------------------------------------------------------- 1 | {{ response.html|safe }} 2 | -------------------------------------------------------------------------------- /micawber/contrib/mcdjango/templatetags/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coleifer/micawber/1e8b8af3cc3c3f2a7d597a100b4de71533d24ceb/micawber/contrib/mcdjango/templatetags/__init__.py -------------------------------------------------------------------------------- /micawber/contrib/mcdjango/templatetags/micawber_tags.py: -------------------------------------------------------------------------------- 1 | from micawber.contrib.mcdjango import register 2 | -------------------------------------------------------------------------------- /micawber/contrib/mcflask.py: -------------------------------------------------------------------------------- 1 | try: 2 | from markupsafe import Markup 3 | except ImportError: 4 | from flask import Markup 5 | from micawber import parse_text, parse_html, extract, extract_html 6 | 7 | 8 | def oembed(s, providers, urlize_all=True, html=False, **params): 9 | if html: 10 | fn = parse_html 11 | else: 12 | fn = parse_text 13 | return Markup(fn(s, providers, urlize_all, **params)) 14 | 15 | def extract_oembed(s, providers, html=False, **params): 16 | if html: 17 | fn = extract_html 18 | else: 19 | fn = extract 20 | return fn(s, providers, **params) 21 | 22 | def add_oembed_filters(app, providers): 23 | def _oembed(s, urlize_all=True, html=False, **params): 24 | return oembed(s, providers, urlize_all, html, **params) 25 | 26 | def _extract_oembed(s, html=False, **params): 27 | return extract_oembed(s, providers, html, **params) 28 | 29 | app.jinja_env.filters['oembed'] = _oembed 30 | app.jinja_env.filters['extract_oembed'] = _extract_oembed 31 | -------------------------------------------------------------------------------- /micawber/contrib/providers.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from micawber.providers import Provider 4 | 5 | 6 | class ImageProvider(Provider): 7 | """ 8 | Simple little hack to render any image URL as an tag, use with care 9 | 10 | Usage: 11 | 12 | pr = micawber.bootstrap_basic() 13 | pr.register(ImageProvider.regex, ImageProvider('')) 14 | """ 15 | regex = 'http://.+?\.(jpg|gif|png)' 16 | 17 | def request(self, url, **params): 18 | return { 19 | 'url': url, 20 | 'type': 'photo', 21 | 'title': '', 22 | } 23 | 24 | 25 | class GoogleMapsProvider(Provider): 26 | """ 27 | Render a map URL as an embedded map 28 | 29 | Usage: 30 | 31 | pr = micawber.bootstrap_basic() 32 | pr.register(GoogleMapsProvider.regex, GoogleMapsProvider('')) 33 | """ 34 | regex = r'^https?://maps.google.com/maps\?([^\s]+)' 35 | 36 | valid_params = ['q', 'z'] 37 | 38 | def request(self, url, **params): 39 | url_params = re.match(self.regex, url).groups()[0] 40 | url_params = url_params.replace('&', '&').split('&') 41 | 42 | map_params = ['output=embed'] 43 | 44 | for param in url_params: 45 | k, v = param.split('=', 1) 46 | if k in self.valid_params: 47 | map_params.append(param) 48 | 49 | width = int(params.get('maxwidth', 640)) 50 | height = int(params.get('maxheight', 480)) 51 | html = '' % \ 52 | (width, height, '&'.join(map_params)) 53 | 54 | return { 55 | 'height': height, 56 | 'html': html, 57 | 'provider_name': 'Google maps', 58 | 'title': '', 59 | 'type': 'rich', 60 | 'version': '1.0', 61 | 'width': width, 62 | } 63 | -------------------------------------------------------------------------------- /micawber/exceptions.py: -------------------------------------------------------------------------------- 1 | class ProviderException(Exception): 2 | pass 3 | 4 | class ProviderNotFoundException(ProviderException): 5 | pass 6 | 7 | class InvalidResponseException(ProviderException): 8 | pass 9 | -------------------------------------------------------------------------------- /micawber/parsers.py: -------------------------------------------------------------------------------- 1 | import re 2 | from .compat import text_type 3 | try: 4 | import simplejson as json 5 | except ImportError: 6 | import json 7 | 8 | bs_kwargs = {} 9 | try: 10 | from BeautifulSoup import BeautifulSoup 11 | bs_kwargs = {'convertEntities': BeautifulSoup.HTML_ENTITIES} 12 | replace_kwargs = {} 13 | except ImportError: 14 | try: 15 | from bs4 import BeautifulSoup 16 | bs_kwargs = replace_kwargs = {'features': 'html.parser'} 17 | except ImportError: 18 | BeautifulSoup = None 19 | 20 | from micawber.exceptions import ProviderException 21 | 22 | 23 | url_pattern = '(https?://[-A-Za-z0-9+&@#/%?=~_()|!:,.;]*[-A-Za-z0-9+&@#/%=~_|])' 24 | url_re = re.compile(url_pattern) 25 | standalone_url_re = re.compile(r'^\s*' + url_pattern + r'\s*$') 26 | 27 | block_elements = set([ 28 | 'address', 'article', 'aside', 'blockquote', 'canvas', 'center', 'dir', 29 | 'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 30 | 'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr', 31 | 'isindex', 'li', 'main', 'menu', 'nav', 'noframes', 'noscript', 'ol', 'p', 32 | 'pre', 'section', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 33 | 'ul', 34 | # Additional elements. 35 | 'button', 'del', 'iframe', 'ins', 'map', 'object', 'script', '[document]', 36 | ]) 37 | 38 | skip_elements = set(['a', 'pre', 'code', 'input', 'textarea', 'select']) 39 | 40 | 41 | def full_handler(url, response_data, **params): 42 | if response_data['type'] == 'link': 43 | return '%(title)s' % response_data 44 | elif response_data['type'] == 'photo': 45 | return '%(title)s' % response_data 46 | else: 47 | return response_data['html'] 48 | 49 | def inline_handler(url, response_data, **params): 50 | return '%(title)s' % response_data 51 | 52 | def urlize(url, **params): 53 | params.setdefault('href', url) 54 | param_html = ' '.join('%s="%s"' % (key, value) 55 | for key, value in sorted(params.items())) 56 | return '%s' % (param_html, url) 57 | 58 | def extract(text, providers, **params): 59 | all_urls = set() 60 | urls = [] 61 | extracted_urls = {} 62 | 63 | for url in re.findall(url_re, text): 64 | if url in all_urls: 65 | continue 66 | 67 | all_urls.add(url) 68 | urls.append(url) 69 | try: 70 | extracted_urls[url] = providers.request(url, **params) 71 | except ProviderException: 72 | pass 73 | 74 | return urls, extracted_urls 75 | 76 | def parse_text_full(text, providers, urlize_all=True, handler=full_handler, 77 | urlize_params=None, **params): 78 | all_urls, extracted_urls = extract(text, providers, **params) 79 | replacements = {} 80 | urlize_params = urlize_params or {} 81 | 82 | for url in all_urls: 83 | if url in extracted_urls: 84 | replacements[url] = handler(url, extracted_urls[url], **params) 85 | elif urlize_all: 86 | replacements[url] = urlize(url, **urlize_params) 87 | 88 | # go through the text recording URLs that can be replaced 89 | # taking note of their start & end indexes 90 | urls = re.finditer(url_re, text) 91 | matches = [] 92 | for match in urls: 93 | if match.group() in replacements: 94 | matches.append([match.start(), match.end(), match.group()]) 95 | 96 | # replace the URLs in order, offsetting the indices each go 97 | for indx, (start, end, url) in enumerate(matches): 98 | replacement = replacements[url] 99 | difference = len(replacement) - len(url) 100 | 101 | # insert the replacement between two slices of text surrounding the 102 | # original url 103 | text = text[:start] + replacement + text[end:] 104 | 105 | # iterate through the rest of the matches offsetting their indices 106 | # based on the difference between replacement/original 107 | for j in range(indx + 1, len(matches)): 108 | matches[j][0] += difference 109 | matches[j][1] += difference 110 | 111 | return text 112 | 113 | def parse_text(text, providers, urlize_all=True, handler=full_handler, 114 | block_handler=inline_handler, urlize_params=None, **params): 115 | lines = text.splitlines() 116 | parsed = [] 117 | urlize_params = urlize_params or {} 118 | 119 | for line in lines: 120 | if standalone_url_re.match(line): 121 | url = line.strip() 122 | try: 123 | response = providers.request(url, **params) 124 | except ProviderException: 125 | if urlize_all: 126 | line = urlize(url, **urlize_params) 127 | else: 128 | line = handler(url, response, **params) 129 | elif block_handler is not None: 130 | line = parse_text_full(line, providers, urlize_all, block_handler, 131 | urlize_params=urlize_params, **params) 132 | 133 | parsed.append(line) 134 | 135 | return '\n'.join(parsed) 136 | 137 | def parse_html(html, providers, urlize_all=True, handler=full_handler, 138 | block_handler=inline_handler, soup_class=BeautifulSoup, 139 | urlize_params=None, **params): 140 | 141 | if not soup_class: 142 | raise Exception('Unable to parse HTML, please install BeautifulSoup ' 143 | 'or beautifulsoup4, or use the text parser') 144 | 145 | soup = soup_class(html, **bs_kwargs) 146 | 147 | for url in soup.findAll(text=url_re): 148 | if not _inside_skip(url): 149 | if _is_standalone(url): 150 | url_handler = handler 151 | else: 152 | url_handler = block_handler 153 | 154 | url_unescaped = (url.string 155 | .replace('<', '<') 156 | .replace('>', '>')) 157 | 158 | replacement = parse_text_full( 159 | url_unescaped, 160 | providers, 161 | urlize_all, 162 | url_handler, 163 | urlize_params=urlize_params, 164 | **params) 165 | url.replaceWith(BeautifulSoup(replacement, **replace_kwargs)) 166 | 167 | return text_type(soup) 168 | 169 | def extract_html(html, providers, **params): 170 | if not BeautifulSoup: 171 | raise Exception('Unable to parse HTML, please install BeautifulSoup ' 172 | 'or use the text parser') 173 | 174 | soup = BeautifulSoup(html, **bs_kwargs) 175 | all_urls = set() 176 | urls = [] 177 | extracted_urls = {} 178 | 179 | for url in soup.findAll(text=url_re): 180 | if _inside_skip(url): 181 | continue 182 | 183 | block_all, block_ext = extract(text_type(url), providers, **params) 184 | for extracted_url in block_all: 185 | if extracted_url in all_urls: 186 | continue 187 | 188 | extracted_urls.update(block_ext) 189 | urls.append(extracted_url) 190 | all_urls.add(extracted_url) 191 | 192 | return urls, extracted_urls 193 | 194 | def _is_standalone(soup_elem): 195 | if standalone_url_re.match(soup_elem): 196 | return soup_elem.parent.name in block_elements 197 | return False 198 | 199 | def _inside_skip(soup_elem): 200 | parent = soup_elem.parent 201 | while parent is not None: 202 | if parent.name in skip_elements: 203 | return True 204 | parent = parent.parent 205 | return False 206 | -------------------------------------------------------------------------------- /micawber/providers.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import logging 3 | import pickle 4 | import re 5 | import socket 6 | import ssl 7 | from .compat import get_charset 8 | from .compat import HTTPError 9 | from .compat import OrderedDict 10 | from .compat import Request 11 | from .compat import urlencode 12 | from .compat import URLError 13 | from .compat import urlopen 14 | try: 15 | import simplejson as json 16 | try: 17 | InvalidJson = json.JSONDecodeError 18 | except AttributeError: 19 | InvalidJson = ValueError 20 | except ImportError: 21 | import json 22 | InvalidJson = ValueError 23 | 24 | from micawber.exceptions import InvalidResponseException 25 | from micawber.exceptions import ProviderException 26 | from micawber.exceptions import ProviderNotFoundException 27 | from micawber.parsers import extract 28 | from micawber.parsers import extract_html 29 | from micawber.parsers import parse_html 30 | from micawber.parsers import parse_text 31 | from micawber.parsers import parse_text_full 32 | 33 | 34 | logger = logging.getLogger(__name__) 35 | 36 | 37 | class Provider(object): 38 | def __init__(self, endpoint, timeout=3.0, user_agent=None, **kwargs): 39 | self.endpoint = endpoint 40 | self.socket_timeout = timeout 41 | self.user_agent = user_agent or 'python-micawber' 42 | self.base_params = {'format': 'json'} 43 | self.base_params.update(kwargs) 44 | 45 | def fetch(self, url): 46 | req = Request(url, headers={'User-Agent': self.user_agent}) 47 | try: 48 | resp = fetch(req, self.socket_timeout) 49 | except URLError: 50 | return False 51 | except HTTPError: 52 | return False 53 | except socket.timeout: 54 | return False 55 | except ssl.SSLError: 56 | return False 57 | return resp 58 | 59 | def encode_params(self, url, **extra_params): 60 | params = dict(self.base_params) 61 | params.update(extra_params) 62 | params['url'] = url 63 | return urlencode(sorted(params.items())) 64 | 65 | def request(self, url, **extra_params): 66 | encoded_params = self.encode_params(url, **extra_params) 67 | 68 | endpoint_url = self.endpoint 69 | if '?' in endpoint_url: 70 | endpoint_url = '%s&%s' % (endpoint_url.rstrip('&'), encoded_params) 71 | else: 72 | endpoint_url = '%s?%s' % (endpoint_url, encoded_params) 73 | 74 | response = self.fetch(endpoint_url) 75 | if response: 76 | return self.handle_response(response, url) 77 | else: 78 | raise ProviderException('Error fetching "%s"' % endpoint_url) 79 | 80 | def handle_response(self, response, url): 81 | try: 82 | json_data = json.loads(response) 83 | except InvalidJson as exc: 84 | try: 85 | msg = exc.message 86 | except AttributeError: 87 | msg = exc.args[0] 88 | raise InvalidResponseException(msg) 89 | 90 | if 'url' not in json_data: 91 | json_data['url'] = url 92 | if 'title' not in json_data: 93 | json_data['title'] = json_data['url'] 94 | 95 | return json_data 96 | 97 | 98 | def make_key(*args, **kwargs): 99 | return hashlib.md5(pickle.dumps((args, kwargs))).hexdigest() 100 | 101 | 102 | def url_cache(fn): 103 | def inner(self, url, **params): 104 | if self.cache is not None: 105 | key = make_key(url, params) 106 | data = self.cache.get(key) 107 | if not data: 108 | data = fn(self, url, **params) 109 | self.cache.set(key, data) 110 | return data 111 | return fn(self, url, **params) 112 | return inner 113 | 114 | 115 | def fetch(request, timeout=None): 116 | urlopen_params = {} 117 | if timeout: 118 | urlopen_params['timeout'] = timeout 119 | resp = urlopen(request, **urlopen_params) 120 | if resp.code < 200 or resp.code >= 300: 121 | return False 122 | 123 | # by RFC, default HTTP charset is ISO-8859-1 124 | charset = get_charset(resp) or 'iso-8859-1' 125 | 126 | content = resp.read().decode(charset) 127 | resp.close() 128 | return content 129 | 130 | 131 | def fetch_cache(cache, url, refresh=False, timeout=None): 132 | contents = None 133 | if cache is not None and not refresh: 134 | contents = cache.get('micawber.%s' % url) 135 | if contents is None: 136 | contents = fetch(url, timeout=timeout) 137 | if cache is not None: 138 | cache.set('micawber.%s' % url, contents) 139 | return contents 140 | 141 | 142 | class ProviderRegistry(object): 143 | def __init__(self, cache=None): 144 | self._registry = OrderedDict() 145 | self.cache = cache 146 | 147 | def register(self, regex, provider): 148 | self._registry[regex] = provider 149 | 150 | def unregister(self, regex): 151 | del self._registry[regex] 152 | 153 | def __iter__(self): 154 | return iter(reversed(list(self._registry.items()))) 155 | 156 | def provider_for_url(self, url): 157 | for regex, provider in self: 158 | if re.match(regex, url): 159 | return provider 160 | 161 | @url_cache 162 | def request(self, url, **params): 163 | provider = self.provider_for_url(url) 164 | if provider: 165 | return provider.request(url, **params) 166 | raise ProviderNotFoundException('Provider not found for "%s"' % url) 167 | 168 | def parse_text(self, text, **kwargs): 169 | return parse_text(text, self, **kwargs) 170 | 171 | def parse_text_full(self, text, **kwargs): 172 | return parse_text_full(text, self, **kwargs) 173 | 174 | def parse_html(self, html, **kwargs): 175 | return parse_html(html, self, **kwargs) 176 | 177 | def extract(self, text, **kwargs): 178 | return extract(text, self, **kwargs) 179 | 180 | def extract_html(self, html, **kwargs): 181 | return extract_html(html, self, **kwargs) 182 | 183 | 184 | def bootstrap_basic(cache=None, registry=None): 185 | # complements of oembed.com#section7 186 | pr = registry or ProviderRegistry(cache) 187 | 188 | # c 189 | pr.register(r'http://chirb\.it/\S+', Provider('http://chirb.it/oembed.json')) 190 | pr.register(r'https?://www\.circuitlab\.com/circuit/\S+', Provider('https://www.circuitlab.com/circuit/oembed')) 191 | 192 | # d 193 | pr.register(r'https?://(?:www\.)?dailymotion\.com/\S+', Provider('http://www.dailymotion.com/services/oembed')) 194 | 195 | # f 196 | pr.register(r'https?://\S*?flickr\.com/\S+', Provider('https://www.flickr.com/services/oembed/')) 197 | pr.register(r'https?://flic\.kr/\S*', Provider('https://www.flickr.com/services/oembed/')) 198 | pr.register(r'https?://(?:www\.)?funnyordie\.com/videos/\S+', Provider('http://www.funnyordie.com/oembed')) 199 | 200 | # g 201 | # 2020-11-04: removed GitHub gist, as it seems to be unsupported now. 202 | #pr.register(r'https?://gist\.github\.com/\S*', Provider('https://github.com/api/oembed')) 203 | 204 | # h 205 | pr.register(r'http://(?:www\.)hulu\.com/watch/\S+', Provider('http://www.hulu.com/api/oembed.json')) 206 | 207 | # i 208 | pr.register(r'https?://\S*imgur\.com/\S+', Provider('https://api.imgur.com/oembed')), 209 | pr.register(r'https?://(www\.)?instagr(\.am|am\.com)/p/\S+', Provider('http://api.instagram.com/oembed')) 210 | 211 | # m 212 | pr.register(r'http://www\.mobypicture\.com/user/\S*?/view/\S*', Provider('http://api.mobypicture.com/oEmbed')) 213 | pr.register(r'http://moby\.to/\S*', Provider('http://api.mobypicture.com/oEmbed')) 214 | 215 | # p 216 | pr.register(r'http://i\S*\.photobucket\.com/albums/\S+', Provider('http://photobucket.com/oembed')) 217 | pr.register(r'http://gi\S*\.photobucket\.com/groups/\S+', Provider('http://photobucket.com/oembed')) 218 | pr.register(r'http://www\.polleverywhere\.com/(polls|multiple_choice_polls|free_text_polls)/\S+', Provider('http://www.polleverywhere.com/services/oembed/')) 219 | pr.register(r'https?://(.+\.)?polldaddy\.com/\S*', Provider('http://polldaddy.com/oembed/')) 220 | 221 | # s 222 | pr.register(r'https?://(?:www\.)?slideshare\.net/[^\/]+/\S+', Provider('http://www.slideshare.net/api/oembed/2')) 223 | pr.register(r'https?://slidesha\.re/\S*', Provider('http://www.slideshare.net/api/oembed/2')) 224 | pr.register(r'http://\S*\.smugmug\.com/\S*', Provider('http://api.smugmug.com/services/oembed/')) 225 | pr.register(r'https://\S*?soundcloud\.com/\S+', Provider('http://soundcloud.com/oembed')) 226 | pr.register(r'https?://speakerdeck\.com/\S*', Provider('https://speakerdeck.com/oembed.json')), 227 | pr.register(r'https?://(?:www\.)?scribd\.com/\S*', Provider('http://www.scribd.com/services/oembed')) 228 | 229 | # t 230 | pr.register(r'https?://(www\.)tiktok\.com/\S+', Provider('https://www.tiktok.com/oembed')) 231 | pr.register(r'https?://(www\.)?twitter\.com/\S+/status(es)?/\S+', Provider('https://publish.twitter.com/oembed')) 232 | 233 | # v 234 | pr.register(r'http://(?:player\.)?vimeo\.com/\S+', Provider('http://vimeo.com/api/oembed.json')) 235 | pr.register(r'https://(?:player\.)?vimeo\.com/\S+', Provider('https://vimeo.com/api/oembed.json')) 236 | 237 | # w 238 | pr.register(r'http://\S+\.wordpress\.com/\S+', Provider('http://public-api.wordpress.com/oembed/')) 239 | pr.register(r'https?://wordpress\.tv/\S+', Provider('http://wordpress.tv/oembed/')) 240 | 241 | # y 242 | pr.register(r'http://(\S*\.)?youtu(\.be/|be\.com/watch)\S+', Provider('https://www.youtube.com/oembed')) 243 | pr.register(r'https://(\S*\.)?youtu(\.be/|be\.com/watch)\S+', Provider('https://www.youtube.com/oembed?scheme=https&')) 244 | 245 | return pr 246 | 247 | 248 | def bootstrap_embedly(cache=None, registry=None, refresh=False, **params): 249 | endpoint = 'http://api.embed.ly/1/oembed' 250 | schema_url = 'http://api.embed.ly/1/services/python' 251 | 252 | pr = registry or ProviderRegistry(cache) 253 | 254 | # fetch the schema 255 | contents = fetch_cache(cache, schema_url, refresh=refresh) 256 | json_data = json.loads(contents) 257 | 258 | for provider_meta in json_data: 259 | for regex in provider_meta['regex']: 260 | pr.register(regex, Provider(endpoint, **params)) 261 | return pr 262 | 263 | 264 | def bootstrap_noembed(cache=None, registry=None, refresh=False, **params): 265 | endpoint = 'http://noembed.com/embed' 266 | schema_url = 'http://noembed.com/providers' 267 | 268 | pr = registry or ProviderRegistry(cache) 269 | 270 | # fetch the schema 271 | contents = fetch_cache(cache, schema_url, refresh=refresh) 272 | json_data = json.loads(contents) 273 | 274 | for provider_meta in json_data: 275 | for regex in provider_meta['patterns']: 276 | pr.register(regex, Provider(endpoint, **params)) 277 | return pr 278 | 279 | 280 | def bootstrap_oembed(cache=None, registry=None, refresh=False, **params): 281 | schema_url = 'https://oembed.com/providers.json' 282 | pr = registry or ProviderRegistry(cache) 283 | 284 | # Fetch schema. 285 | contents = fetch_cache(cache, schema_url, refresh=refresh) 286 | json_data = json.loads(contents) 287 | 288 | for item in json_data: 289 | for endpoint in reversed(item['endpoints']): 290 | # Possibly this provider only supports discovery via tags, 291 | # which is not supported by micawber. 292 | if 'schemes' not in endpoint: 293 | continue 294 | 295 | # Consists of one or more schemes, a destination URL and optionally 296 | # a format, e.g. "json". 297 | url = endpoint['url'] 298 | if '{format}' in url: 299 | url = url.replace('{format}', 'json') 300 | 301 | provider = Provider(url, **params) 302 | for scheme in endpoint['schemes']: 303 | # If a question-mark is being used, it is for the query-string 304 | # and should be treated as a literal. 305 | scheme = scheme.replace('?', r'\?') 306 | 307 | # Transform the raw pattern into a reasonable regex. Match one 308 | # or more of any character that is not a slash, whitespace, or 309 | # a parameter used for separating querystring/url params. 310 | pattern = scheme.replace('*', r'[^\/\s\?&]+?') 311 | try: 312 | re.compile(pattern) 313 | except re.error: 314 | logger.exception('oembed.com provider %s regex could not ' 315 | 'be compiled: %s', url, pattern) 316 | continue 317 | 318 | pr.register(pattern, provider) 319 | 320 | # Currently oembed.com does not provide patterns for YouTube, so we'll add 321 | # these ourselves. 322 | pr.register(r'http://(\S*\.)?youtu(\.be/|be\.com/watch)\S+', 323 | Provider('https://www.youtube.com/oembed')) 324 | pr.register(r'https://(\S*\.)?youtu(\.be/|be\.com/watch)\S+', 325 | Provider('https://www.youtube.com/oembed?scheme=https&')) 326 | 327 | return pr 328 | -------------------------------------------------------------------------------- /micawber/test_utils.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | try: 3 | import simplejson as json 4 | except ImportError: 5 | import json 6 | 7 | from micawber import * 8 | from micawber.parsers import BeautifulSoup, bs_kwargs 9 | from micawber.providers import make_key 10 | 11 | 12 | class TestProvider(Provider): 13 | test_data = { 14 | # link 15 | 'link?format=json&url=http%3A%2F%2Flink-test1': {'title': 'test1', 'type': 'link'}, 16 | 'link?format=json&url=http%3A%2F%2Flink-test2': {'title': 'test2', 'type': 'link'}, 17 | 18 | # photo 19 | 'photo?format=json&url=http%3A%2F%2Fphoto-test1': {'title': 'ptest1', 'url': 'test1.jpg', 'type': 'photo'}, 20 | 'photo?format=json&url=http%3A%2F%2Fphoto-test2': {'title': 'ptest2', 'url': 'test2.jpg', 'type': 'photo'}, 21 | 22 | # video 23 | 'video?format=json&url=http%3A%2F%2Fvideo-test1': {'title': 'vtest1', 'html': 'video', 'type': 'video'}, 24 | 'video?format=json&url=http%3A%2F%2Fvideo-test2': {'title': 'vtest2', 'html': 'video', 'type': 'video'}, 25 | 26 | # rich 27 | 'rich?format=json&url=http%3A%2F%2Frich-test1': {'title': 'rtest1', 'html': 'rich', 'type': 'rich'}, 28 | 'rich?format=json&url=http%3A%2F%2Frich-test2': {'title': 'rtest2', 'html': 'rich', 'type': 'rich'}, 29 | 30 | # with param 31 | 'link?format=json&url=http%3A%2F%2Flink-test1&width=100': {'title': 'test1', 'type': 'link', 'width': 99}, 32 | 33 | # no title 34 | 'photo?format=json&url=http%3A%2F%2Fphoto-notitle': {'url': 'notitle.jpg', 'type': 'photo'}, 35 | } 36 | 37 | def fetch(self, url): 38 | if url in self.test_data: 39 | return json.dumps(self.test_data[url]) 40 | return False 41 | 42 | test_pr = ProviderRegistry() 43 | 44 | test_cache = Cache() 45 | test_pr_cache = ProviderRegistry(test_cache) 46 | 47 | for pr in (test_pr, test_pr_cache): 48 | pr.register(r'http://link\S*', TestProvider('link')) 49 | pr.register(r'http://photo\S*', TestProvider('photo')) 50 | pr.register(r'http://video\S*', TestProvider('video')) 51 | pr.register(r'http://rich\S*', TestProvider('rich')) 52 | 53 | class BaseTestCase(unittest.TestCase): 54 | def setUp(self): 55 | test_cache._cache = {} 56 | 57 | self.full_pairs = { 58 | 'http://link-test1': 'test1', 59 | 'http://photo-test2': 'ptest2', 60 | 'http://video-test1': 'video', 61 | 'http://rich-test2': 'rich', 62 | 'http://photo-notitle': 'notitle.jpg', 63 | } 64 | 65 | self.inline_pairs = { 66 | 'http://link-test1': 'test1', 67 | 'http://photo-test2': 'ptest2', 68 | 'http://video-test1': 'vtest1', 69 | 'http://rich-test2': 'rtest2', 70 | 'http://rich-test2': 'rtest2', 71 | 'http://photo-notitle': 'notitle.jpg', 72 | } 73 | 74 | self.data_pairs = { 75 | 'http://link-test1': {'title': 'test1', 'type': 'link'}, 76 | 'http://photo-test2': {'title': 'ptest2', 'url': 'test2.jpg', 'type': 'photo'}, 77 | 'http://video-test1': {'title': 'vtest1', 'html': 'video', 'type': 'video'}, 78 | 'http://rich-test2': {'title': 'rtest2', 'html': 'rich', 'type': 'rich'}, 79 | 'http://photo-notitle': {'url': 'notitle.jpg', 'type': 'photo'}, 80 | } 81 | 82 | def assertCached(self, url, data, **params): 83 | key = make_key(url, params) 84 | self.assertTrue(key in test_cache._cache) 85 | self.assertEqual(test_cache._cache[key], data) 86 | 87 | 88 | def assertHTMLEqual(self, first, second, msg=None): 89 | first = BeautifulSoup(first, **bs_kwargs) 90 | second = BeautifulSoup(second, **bs_kwargs) 91 | self.assertEqual(first, second, msg) 92 | -------------------------------------------------------------------------------- /micawber/tests.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import unittest 3 | 4 | from micawber import * 5 | from micawber.test_utils import test_pr, test_cache, test_pr_cache, TestProvider, BaseTestCase 6 | 7 | 8 | class ProviderTestCase(BaseTestCase): 9 | def test_register_unregister(self): 10 | pr = ProviderRegistry() 11 | provider1 = TestProvider('link') 12 | provider2 = TestProvider('link') 13 | pr.register('1', provider1) 14 | pr.register('2', provider1) 15 | pr.register('3', provider2) 16 | pr.unregister('2') 17 | self.assertEqual(len(pr._registry), 2) 18 | 19 | # Multiple calls to remove() are OK. 20 | self.assertRaises(KeyError, pr.unregister, '2') 21 | 22 | self.assertEqual(pr.provider_for_url('1'), provider1) 23 | self.assertEqual(pr.provider_for_url('2'), None) 24 | self.assertEqual(pr.provider_for_url('3'), provider2) 25 | 26 | pr.unregister('1') 27 | pr.unregister('3') 28 | self.assertEqual(len(pr._registry), 0) 29 | for test_regex in ['1', '2', '3']: 30 | self.assertEqual(pr.provider_for_url(test_regex), None) 31 | 32 | def test_multiple_matches(self): 33 | pr = ProviderRegistry() 34 | provider1 = TestProvider('link') 35 | provider2 = TestProvider('link') 36 | pr.register(r'1(\d+)', provider1) 37 | pr.register(r'1\d+', provider2) 38 | self.assertEqual(pr.provider_for_url('11'), provider2) 39 | pr.unregister(r'1\d+') 40 | self.assertEqual(pr.provider_for_url('11'), provider1) 41 | 42 | def test_provider_matching(self): 43 | provider = test_pr.provider_for_url('http://link-test1') 44 | self.assertFalse(provider is None) 45 | self.assertEqual(provider.endpoint, 'link') 46 | 47 | provider = test_pr.provider_for_url('http://photo-test1') 48 | self.assertFalse(provider is None) 49 | self.assertEqual(provider.endpoint, 'photo') 50 | 51 | provider = test_pr.provider_for_url('http://video-test1') 52 | self.assertFalse(provider is None) 53 | self.assertEqual(provider.endpoint, 'video') 54 | 55 | provider = test_pr.provider_for_url('http://rich-test1') 56 | self.assertFalse(provider is None) 57 | self.assertEqual(provider.endpoint, 'rich') 58 | 59 | provider = test_pr.provider_for_url('http://none-test1') 60 | self.assertTrue(provider is None) 61 | 62 | def test_provider(self): 63 | resp = test_pr.request('http://link-test1') 64 | self.assertEqual(resp, {'title': 'test1', 'type': 'link', 'url': 'http://link-test1'}) 65 | 66 | resp = test_pr.request('http://photo-test2') 67 | self.assertEqual(resp, {'title': 'ptest2', 'type': 'photo', 'url': 'test2.jpg'}) 68 | 69 | resp = test_pr.request('http://video-test1') 70 | self.assertEqual(resp, {'title': 'vtest1', 'type': 'video', 'html': 'video', 'url': 'http://video-test1'}) 71 | 72 | resp = test_pr.request('http://link-test1', width=100) 73 | self.assertEqual(resp, {'title': 'test1', 'type': 'link', 'url': 'http://link-test1', 'width': 99}) 74 | 75 | self.assertRaises(ProviderException, test_pr.request, 'http://not-here') 76 | self.assertRaises(ProviderException, test_pr.request, 'http://link-test3') 77 | 78 | def test_caching(self): 79 | resp = test_pr_cache.request('http://link-test1') 80 | self.assertCached('http://link-test1', resp) 81 | 82 | # check that its the same as what we tested in the previous case 83 | resp2 = test_pr.request('http://link-test1') 84 | self.assertEqual(resp, resp2) 85 | 86 | resp = test_pr_cache.request('http://photo-test2') 87 | self.assertCached('http://photo-test2', resp) 88 | 89 | resp = test_pr_cache.request('http://video-test1') 90 | self.assertCached('http://video-test1', resp) 91 | 92 | self.assertEqual(len(test_cache._cache), 3) 93 | 94 | def test_caching_params(self): 95 | resp = test_pr_cache.request('http://link-test1') 96 | self.assertCached('http://link-test1', resp) 97 | 98 | resp_p = test_pr_cache.request('http://link-test1', width=100) 99 | self.assertCached('http://link-test1', resp_p, width=100) 100 | 101 | self.assertFalse(resp == resp_p) 102 | 103 | def test_invalid_json(self): 104 | pr = ProviderRegistry() 105 | class BadProvider(Provider): 106 | def fetch(self, url): 107 | return 'bad' 108 | pr.register('http://bad', BadProvider('link')) 109 | self.assertRaises(InvalidResponseException, pr.request, 'http://bad') 110 | 111 | 112 | class ParserTestCase(BaseTestCase): 113 | def test_parse_text_full(self): 114 | for url, expected in self.full_pairs.items(): 115 | parsed = test_pr.parse_text_full(url) 116 | self.assertHTMLEqual(parsed, expected) 117 | 118 | # the parse_text_full will replace even inline content 119 | for url, expected in self.full_pairs.items(): 120 | parsed = test_pr.parse_text_full('this is inline: %s' % url) 121 | self.assertHTMLEqual(parsed, 'this is inline: %s' % expected) 122 | 123 | for url, expected in self.full_pairs.items(): 124 | parsed = test_pr.parse_html('

%s

' % url) 125 | self.assertHTMLEqual(parsed, '

%s

' % expected) 126 | 127 | def test_parse_text(self): 128 | for url, expected in self.inline_pairs.items(): 129 | parsed = test_pr.parse_text('this is inline: %s' % url) 130 | self.assertHTMLEqual(parsed, 'this is inline: %s' % expected) 131 | 132 | # We can disable parsing inline links by specifying block_handler=None. 133 | for url, expected in self.inline_pairs.items(): 134 | parsed = test_pr.parse_text('this is inline: %s' % url, block_handler=None) 135 | self.assertEqual(parsed, 'this is inline: %s' % url) 136 | 137 | # if the link comes on its own line it gets included in full 138 | for url, expected in self.full_pairs.items(): 139 | parsed = test_pr.parse_text(url) 140 | self.assertHTMLEqual(parsed, expected) 141 | 142 | # Specifying block_handler=None only applies to inline links, so 143 | # the behavior is the same for standalone links. 144 | parsed = test_pr.parse_text(url, block_handler=None) 145 | self.assertHTMLEqual(parsed, expected) 146 | 147 | # links inside block tags will render as inline 148 | frame = '

Testing %s

' 149 | for url, expected in self.inline_pairs.items(): 150 | parsed = test_pr.parse_html(frame % (url)) 151 | self.assertHTMLEqual(parsed, frame % (expected)) 152 | 153 | # links inside tags won't change at all 154 | frame = '

%s

' 155 | for url, expected in self.inline_pairs.items(): 156 | parsed = test_pr.parse_html(frame % (url, url)) 157 | self.assertHTMLEqual(parsed, frame % (url, url)) 158 | 159 | # links within tags within a tags are fine too 160 | frame = '

%s

' 161 | for url, expected in self.inline_pairs.items(): 162 | parsed = test_pr.parse_html(frame % (url, url)) 163 | self.assertHTMLEqual(parsed, frame % (url, url)) 164 | 165 | def test_multiline(self): 166 | for url, expected in self.full_pairs.items(): 167 | expected_inline = self.inline_pairs[url] 168 | frame = 'this is inline: %s\n%s\nand yet another %s' 169 | 170 | test_str = frame % (url, url, url) 171 | 172 | parsed = test_pr.parse_text(test_str) 173 | self.assertHTMLEqual(parsed, frame % (expected_inline, expected, expected_inline)) 174 | 175 | # On multi-line text, if we specify block_handler=None, only standalone 176 | # links will be handled. 177 | for url, expected in self.full_pairs.items(): 178 | frame = 'this is inline: %s\n%s\nand yet another %s' 179 | test_str = frame % (url, url, url) 180 | 181 | parsed = test_pr.parse_text(test_str, block_handler=None) 182 | self.assertHTMLEqual(parsed, frame % (url, expected, url)) 183 | 184 | for url, expected in self.full_pairs.items(): 185 | expected_inline = self.inline_pairs[url] 186 | frame = '%s\nthis is inline: %s\n%s' 187 | 188 | test_str = frame % (url, url, url) 189 | 190 | parsed = test_pr.parse_text(test_str) 191 | self.assertHTMLEqual(parsed, frame % (expected, expected_inline, expected)) 192 | 193 | # test mixing multiline with p tags 194 | for url, expected in self.full_pairs.items(): 195 | expected_inline = self.inline_pairs[url] 196 | frame = '

%s

\n

this is inline: %s

\n

\n%s\n

last test\n%s\n

' 197 | 198 | test_str = frame % (url, url, url, url) 199 | 200 | parsed = test_pr.parse_html(test_str) 201 | self.assertHTMLEqual(parsed, frame % (expected, expected_inline, expected, expected_inline)) 202 | 203 | for url, expected in self.full_pairs.items(): 204 | expected_inline = self.inline_pairs[url] 205 | frame = '

%s

\n

this is inline: %s

\n

last test\n%s\n

' 206 | 207 | test_str = frame % (url, url, url) 208 | 209 | parsed = test_pr.parse_html(test_str) 210 | self.assertHTMLEqual(parsed, frame % (url, expected_inline, expected_inline)) 211 | 212 | def test_multiline_full(self): 213 | for url, expected in self.full_pairs.items(): 214 | frame = 'this is inline: %s\n%s\nand yet another %s' 215 | 216 | test_str = frame % (url, url, url) 217 | 218 | parsed = test_pr.parse_text_full(test_str) 219 | self.assertHTMLEqual(parsed, frame % (expected, expected, expected)) 220 | 221 | def test_urlize(self): 222 | blank = 'http://fapp.io/foo/' 223 | blank_e = 'http://fapp.io/foo/' 224 | for url, expected in self.full_pairs.items(): 225 | expected_inline = self.inline_pairs[url] 226 | frame = 'test %s\n%s\n%s\nand finally %s' 227 | 228 | test_str = frame % (url, blank, url, blank) 229 | 230 | parsed = test_pr.parse_text(test_str) 231 | self.assertHTMLEqual(parsed, frame % (expected_inline, blank_e, expected, blank_e)) 232 | 233 | parsed = test_pr.parse_text(test_str, urlize_all=False) 234 | self.assertHTMLEqual(parsed, frame % (expected_inline, blank, expected, blank)) 235 | 236 | parsed = test_pr.parse_text_full(test_str) 237 | self.assertHTMLEqual(parsed, frame % (expected, blank_e, expected, blank_e)) 238 | 239 | parsed = test_pr.parse_text_full(test_str, urlize_all=False) 240 | self.assertHTMLEqual(parsed, frame % (expected, blank, expected, blank)) 241 | 242 | parsed = test_pr.parse_html(test_str) 243 | self.assertHTMLEqual(parsed, frame % (expected_inline, blank_e, expected_inline, blank_e)) 244 | 245 | parsed = test_pr.parse_html(test_str, urlize_all=False) 246 | self.assertHTMLEqual(parsed, frame % (expected_inline, blank, expected_inline, blank)) 247 | 248 | frame = '

test %s

\n%s\n%s\n

and finally %s

' 249 | 250 | test_str = frame % (url, blank, url, blank) 251 | 252 | parsed = test_pr.parse_html(test_str) 253 | self.assertHTMLEqual(parsed, frame % (expected_inline, blank, url, blank_e)) 254 | 255 | parsed = test_pr.parse_html(test_str, urlize_all=False) 256 | self.assertHTMLEqual(parsed, frame % (expected_inline, blank, url, blank)) 257 | 258 | def test_urlize_params(self): 259 | text = 'test http://foo.com/' 260 | urlize_params = {'target': '_blank', 'rel': 'nofollow'} 261 | exp = ('test ' 262 | 'http://foo.com/') 263 | 264 | result = test_pr.parse_text(text, urlize_params=urlize_params) 265 | self.assertEqual(result, exp) 266 | 267 | result = test_pr.parse_text_full(text, urlize_params=urlize_params) 268 | self.assertEqual(result, exp) 269 | 270 | result = test_pr.parse_html(text, urlize_params=urlize_params) 271 | self.assertEqual(result, exp) 272 | 273 | def test_extract(self): 274 | blank = 'http://fapp.io/foo/' 275 | frame = 'test %s\n%s\n%s\n%s at last' 276 | frame_html = '

test %s

%s %s

%s

' 277 | 278 | for url, expected in self.data_pairs.items(): 279 | text = frame % (url, blank, url, blank) 280 | all_urls, extracted = test_pr.extract(text) 281 | self.assertEqual(all_urls, [url, blank]) 282 | 283 | if 'url' not in expected: 284 | expected['url'] = url 285 | if 'title' not in expected: 286 | expected['title'] = expected['url'] 287 | self.assertEqual(extracted, {url: expected}) 288 | 289 | html = frame_html % (url, url, blank, blank) 290 | all_urls, extracted = test_pr.extract_html(html) 291 | self.assertEqual(all_urls, [url, blank]) 292 | 293 | if 'url' not in expected: 294 | expected['url'] = url 295 | self.assertEqual(extracted, {url: expected}) 296 | 297 | def test_outside_of_markup(self): 298 | frame = '%s

testing

' 299 | for url, expected in self.full_pairs.items(): 300 | parsed = test_pr.parse_html(frame % (url)) 301 | self.assertHTMLEqual(parsed, frame % (expected)) 302 | 303 | def test_html_entities(self): 304 | frame_html = '

test %s

%s

' 305 | 306 | for url, expected in self.data_pairs.items(): 307 | esc_url = url.replace('&', '&') 308 | html = frame_html % (esc_url, esc_url) 309 | all_urls, extracted = test_pr.extract_html(html) 310 | self.assertEqual(all_urls, [url]) 311 | 312 | if 'url' not in expected: 313 | expected['url'] = url 314 | if 'title' not in expected: 315 | expected['title'] = expected['url'] 316 | self.assertEqual(extracted, {url: expected}) 317 | 318 | rendered = test_pr.parse_html('

%s

' % esc_url) 319 | self.assertHTMLEqual(rendered, '

%s

' % self.full_pairs[url]) 320 | 321 | 322 | class TestHTMLEntities(BaseTestCase): 323 | def test_parse_html_entities(self): 324 | e = '<script></script>' 325 | p = '

Test %s

' % e 326 | self.assertEqual(test_pr.parse_html(p), p) 327 | 328 | a = '

http://google.com %s

' % e 329 | self.assertEqual(test_pr.parse_html(a), 330 | '

http://google.com' 331 | ' %s

' % e) 332 | 333 | h = ('

http://foo.com http://bar.com ' 334 | 'http://baz.com <script> ' 335 | 'http://nug.com X <foo>

') 336 | self.assertEqual(test_pr.parse_html(h), ( 337 | '

http://foo.com ' 338 | 'http://bar.com ' 339 | 'http://baz.com <script> ' 340 | 'http://nug.com ' 341 | 'X <foo>

')) 342 | 343 | h = ('

http://foo.com http://bar.com ' 344 | '<script> http://baz.com </script>\n' 345 | 'http://baze.com\n<foo>

') 346 | self.assertEqual(test_pr.parse_html(h), ( 347 | '

http://foo.com ' 348 | 'http://bar.com <script> ' 349 | 'http://baz.com </script>\n' 350 | 'http://baze.com\n' 351 | '<foo>

')) 352 | 353 | 354 | if __name__ == '__main__': 355 | unittest.main(argv=sys.argv) 356 | -------------------------------------------------------------------------------- /runtests.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | import unittest 5 | 6 | from micawber import tests 7 | 8 | 9 | def run_django_tests(): 10 | try: 11 | import django 12 | except ImportError: 13 | print('Skipping django tests') 14 | return 15 | else: 16 | print('Running django integration tests') 17 | 18 | providers = 'micawber.contrib.mcdjango.mcdjango_tests.tests.test_pr' 19 | extensions = ( 20 | ('oembed_no_urlize', {'urlize_all': False}), 21 | ) 22 | 23 | from django.conf import settings 24 | if not settings.configured: 25 | settings.configure( 26 | DATABASES={ 27 | 'default': { 28 | 'ENGINE': 'django.db.backends.sqlite3', 29 | }, 30 | }, 31 | SITE_ID=1, 32 | INSTALLED_APPS=[ 33 | 'django.contrib.auth', 34 | 'django.contrib.contenttypes', 35 | 'django.contrib.sessions', 36 | 'django.contrib.sites', 37 | 'micawber.contrib.mcdjango', 38 | 'micawber.contrib.mcdjango.mcdjango_tests', 39 | ], 40 | TEMPLATES=[ 41 | { 42 | 'BACKEND': 'django.template.backends.django.DjangoTemplates', 43 | 'DIRS': [], 44 | 'APP_DIRS': True, 45 | 'OPTIONS': {} 46 | }, 47 | ], 48 | MICAWBER_PROVIDERS=providers, 49 | MICAWBER_TEMPLATE_EXTENSIONS=extensions, 50 | ) 51 | else: 52 | settings.MICAWBER_PROVIDERS = providers 53 | settings.MICAWBER_TEMPLATE_EXTENSIONS = extensions 54 | 55 | try: 56 | from django import setup 57 | except ImportError: 58 | pass 59 | else: 60 | setup() 61 | 62 | from django.test.runner import DiscoverRunner 63 | parent = os.path.dirname(os.path.abspath(__file__)) 64 | sys.path.insert(0, parent) 65 | return DiscoverRunner().run_tests(['micawber/contrib/mcdjango']) 66 | 67 | 68 | def runtests(*test_args): 69 | print("Running micawber tests") 70 | errors = failures = False 71 | suite = unittest.TestLoader().loadTestsFromModule(tests) 72 | result = unittest.TextTestRunner(verbosity=2).run(suite) 73 | if result.failures: 74 | failures = True 75 | if result.errors: 76 | errors = True 77 | if not (errors or failures): 78 | print("All micawber tests passed") 79 | 80 | dj_failures = run_django_tests() 81 | 82 | if failures or errors or dj_failures: 83 | sys.exit(1) 84 | 85 | sys.exit(0) 86 | 87 | if __name__ == '__main__': 88 | runtests(*sys.argv[1:]) 89 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | f = open(os.path.join(os.path.dirname(__file__), 'README.rst')) 5 | readme = f.read() 6 | f.close() 7 | 8 | setup( 9 | name='micawber', 10 | version='0.5.6', 11 | description='a small library for extracting rich content from urls', 12 | long_description=readme, 13 | author='Charles Leifer', 14 | author_email='coleifer@gmail.com', 15 | url='http://github.com/coleifer/micawber/', 16 | packages=[p for p in find_packages() if not p.startswith('examples')], 17 | package_data = { 18 | 'micawber': [ 19 | 'contrib/mcdjango/templates/micawber/*.html', 20 | ], 21 | }, 22 | classifiers=[ 23 | 'Development Status :: 4 - Beta', 24 | 'Environment :: Web Environment', 25 | 'Intended Audience :: Developers', 26 | 'License :: OSI Approved :: MIT License', 27 | 'Operating System :: OS Independent', 28 | 'Programming Language :: Python', 29 | 'Programming Language :: Python :: 2.6', 30 | 'Programming Language :: Python :: 2.7', 31 | 'Programming Language :: Python :: 3.2', 32 | 'Programming Language :: Python :: 3.3', 33 | 'Programming Language :: Python :: 3.4', 34 | 'Programming Language :: Python :: 3.5', 35 | 'Programming Language :: Python :: 3.6', 36 | 'Programming Language :: Python :: 3.7', 37 | 'Programming Language :: Python :: 3.8', 38 | 'Programming Language :: Python :: 3.9', 39 | 'Programming Language :: Python :: 3.10', 40 | 'Programming Language :: Python :: 3.11', 41 | 'Framework :: Django', 42 | ], 43 | test_suite='runtests.runtests', 44 | ) 45 | --------------------------------------------------------------------------------