├── .gitignore
├── .travis.yml
├── LICENSE
├── MANIFEST.in
├── README.rst
├── docs
    ├── Makefile
    ├── _static
    │   ├── chair-python.png
    │   └── readability-python.png
    ├── auth.rst
    ├── conf.py
    ├── index.rst
    ├── make.bat
    ├── parser.rst
    ├── python_chair.psd
    ├── python_logo.psd
    └── reader.rst
├── readability
    ├── __init__.py
    ├── auth.py
    ├── clients.py
    ├── core.py
    ├── tests
    │   ├── __init__.py
    │   ├── content
    │   │   └── test_post_content.html
    │   ├── test_auth.py
    │   ├── test_clients.py
    │   ├── test_parser.py
    │   └── test_utils.py
    └── utils.py
├── setup.py
└── tox.ini


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | readability/tests/settings.py
3 | *.egg-info
4 | docs/_build/*
5 | build/
6 | dist/
7 | .tox/
8 | .cache/
9 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: python
 3 | python:
 4 |     - "2.7"
 5 |     - "3.5"
 6 |     - "pypy"
 7 |     - "pypy3"
 8 | 
 9 | install:
10 |     - pip install .
11 | 
12 | script: py.test
13 | 
14 | before_install:
15 |       pip install codecov
16 | after_success:
17 |       codecov
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2014 by Readability, LLC
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include README.rst LICENSE
2 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | .. image:: https://badge.fury.io/py/readability-api.png
 2 |     :target: http://badge.fury.io/py/readability-api
 3 | 
 4 | .. image:: https://travis-ci.org/arc90/python-readability-api.png
 5 |     :target: https://travis-ci.org/arc90/python-readability-api
 6 | 
 7 | readability-api
 8 | ===============
 9 | 
10 | readability-api is the official python client for Readability. It provides
11 | access to both the Parser API and the Reader API.
12 | 
13 | The latest version can be installed via pip:
14 | 
15 | .. code-block:: bash
16 |     
17 |     pip install readability-api
18 | 
19 | Please refer to the `official docs
20 | <https://readability-python-library.readthedocs.org/en/latest/>`_ for more
21 | information and examples.
22 | 
23 | 
24 | Tests
25 | -----
26 | 
27 | Valid Parser, Reader, username, and password must be set as environment
28 | variables before running the tests. This test suit runs agains the live
29 | Readability API and also serves as integration tests. We recommend creating a
30 | seperate testing user account on Readability to avoid disturbing your reading
31 | list. **Note:** These tests do reset the bookmarks library of the provided user
32 | when complete. They should *not* be run on your primary user account!
33 | 
34 | .. code-block:: bash
35 | 
36 |     # If you don't have it
37 |     pip install tox
38 | 
39 |     export READABILITY_CONSUMER_KEY='...'
40 |     export READABILITY_CONSUMER_SECRET='...'
41 |     export READABILITY_PARSER_TOKEN='...'
42 |     export READABILITY_USERNAME='...'
43 |     export READABILITY_PASSWORD='...'
44 | 
45 |     tox
46 | 
47 | 
48 | API Keys and Access
49 | -------------------
50 | 
51 | Don't have Readability API keys? You can find them on `your Readability account
52 | settings page <https://www.readability.com/account/api>`_.
53 | 
54 | 
55 | Licensing
56 | ---------
57 | 
58 | The code for readability-api is licensed under the `MIT License
59 | <http://opensource.org/licenses/MIT>`_
60 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = _build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 21 | 
 22 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
 23 | 
 24 | help:
 25 | 	@echo "Please use \`make <target>' where <target> is one of"
 26 | 	@echo "  html       to make standalone HTML files"
 27 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 28 | 	@echo "  singlehtml to make a single large HTML file"
 29 | 	@echo "  pickle     to make pickle files"
 30 | 	@echo "  json       to make JSON files"
 31 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 32 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 
 49 | clean:
 50 | 	rm -rf $(BUILDDIR)/*
 51 | 
 52 | html:
 53 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 54 | 	@echo
 55 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 56 | 
 57 | dirhtml:
 58 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 59 | 	@echo
 60 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 61 | 
 62 | singlehtml:
 63 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 64 | 	@echo
 65 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 66 | 
 67 | pickle:
 68 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 69 | 	@echo
 70 | 	@echo "Build finished; now you can process the pickle files."
 71 | 
 72 | json:
 73 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 74 | 	@echo
 75 | 	@echo "Build finished; now you can process the JSON files."
 76 | 
 77 | htmlhelp:
 78 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 79 | 	@echo
 80 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 81 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 82 | 
 83 | qthelp:
 84 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 85 | 	@echo
 86 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 87 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 88 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/ReadabilityAPIPythonLibrary.qhcp"
 89 | 	@echo "To view the help file:"
 90 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/ReadabilityAPIPythonLibrary.qhc"
 91 | 
 92 | devhelp:
 93 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 94 | 	@echo
 95 | 	@echo "Build finished."
 96 | 	@echo "To view the help file:"
 97 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/ReadabilityAPIPythonLibrary"
 98 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/ReadabilityAPIPythonLibrary"
 99 | 	@echo "# devhelp"
100 | 
101 | epub:
102 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
103 | 	@echo
104 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
105 | 
106 | latex:
107 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
108 | 	@echo
109 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
110 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
111 | 	      "(use \`make latexpdf' here to do that automatically)."
112 | 
113 | latexpdf:
114 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
115 | 	@echo "Running LaTeX files through pdflatex..."
116 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
117 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
118 | 
119 | latexpdfja:
120 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
121 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
122 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
123 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
124 | 
125 | text:
126 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
127 | 	@echo
128 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
129 | 
130 | man:
131 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
132 | 	@echo
133 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
134 | 
135 | texinfo:
136 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
137 | 	@echo
138 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
139 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
140 | 	      "(use \`make info' here to do that automatically)."
141 | 
142 | info:
143 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
144 | 	@echo "Running Texinfo files through makeinfo..."
145 | 	make -C $(BUILDDIR)/texinfo info
146 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
147 | 
148 | gettext:
149 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
150 | 	@echo
151 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
152 | 
153 | changes:
154 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
155 | 	@echo
156 | 	@echo "The overview file is in $(BUILDDIR)/changes."
157 | 
158 | linkcheck:
159 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
160 | 	@echo
161 | 	@echo "Link check complete; look for any errors in the above output " \
162 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
163 | 
164 | doctest:
165 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
166 | 	@echo "Testing of doctests in the sources finished, look at the " \
167 | 	      "results in $(BUILDDIR)/doctest/output.txt."
168 | 
169 | xml:
170 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
171 | 	@echo
172 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
173 | 
174 | pseudoxml:
175 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
176 | 	@echo
177 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
178 | 


--------------------------------------------------------------------------------
/docs/_static/chair-python.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReadabilityHoldings/python-readability-api/64077ba7408f1b826defdd0242aa854db8f765c7/docs/_static/chair-python.png


--------------------------------------------------------------------------------
/docs/_static/readability-python.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReadabilityHoldings/python-readability-api/64077ba7408f1b826defdd0242aa854db8f765c7/docs/_static/readability-python.png


--------------------------------------------------------------------------------
/docs/auth.rst:
--------------------------------------------------------------------------------
 1 | Authentication
 2 | ==============
 3 | 
 4 | .. |three-legged-twitter| raw:: html
 5 | 
 6 |     <a href="https://dev.twitter.com/oauth/3-legged" target="_blank">three-legged oAuth flow</a>
 7 | 
 8 | Authentication can be accomplised through either a |three-legged-twitter| or
 9 | via xAuth where a username and password are exchanged directly for a user token
10 | and secret.
11 | 
12 | That token and secret is then used to sign requests on behalf of the user.  A
13 | user's credentials should never be stored and are not needed. You should favor
14 | a three legged auth flow if your application can support it. For testing
15 | purposes, or for applications where a redirect flow is prohibitive, you can use
16 | the xauth class to generate the token pair needed to sign Reader API requests.
17 | 
18 | 
19 | 
20 | Client Documentation
21 | --------------------
22 | 
23 | .. autoclass:: readability.auth.xauth
24 |     :members:
25 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import alabaster
  3 | import os
  4 | import sys
  5 | 
  6 | sys.path.insert(0, os.path.abspath('/Users/philipforget/workspace/python-readability-api/readability'))
  7 | 
  8 | # -- General configuration -----------------------------------------------------
  9 | 
 10 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode', 'alabaster']
 11 | 
 12 | # Add any paths that contain templates here, relative to this directory.
 13 | templates_path = ['_templates']
 14 | 
 15 | # The suffix of source filenames.
 16 | source_suffix = '.rst'
 17 | 
 18 | # The encoding of source files.
 19 | #source_encoding = 'utf-8-sig'
 20 | 
 21 | # The master toctree document.
 22 | master_doc = 'index'
 23 | 
 24 | # General information about the project.
 25 | project = u'Readability Python API'
 26 | copyright = u'2015, Readability, LLC'
 27 | 
 28 | # The version info for the project you're documenting, acts as replacement for
 29 | # |version| and |release|, also used in various other places throughout the
 30 | # built documents.
 31 | #
 32 | # The short X.Y version.
 33 | version = '1.0.0'
 34 | # The full version, including alpha/beta/rc tags.
 35 | release = version
 36 | 
 37 | # The language for content autogenerated by Sphinx. Refer to documentation
 38 | # for a list of supported languages.
 39 | #language = None
 40 | 
 41 | # There are two options for replacing |today|: either, you set today to some
 42 | # non-false value, then it is used:
 43 | #today = ''
 44 | # Else, today_fmt is used as the format for a strftime call.
 45 | #today_fmt = '%B %d, %Y'
 46 | 
 47 | # List of patterns, relative to source directory, that match files and
 48 | # directories to ignore when looking for source files.
 49 | exclude_patterns = ['_build']
 50 | 
 51 | # The reST default role (used for this markup: `text`) to use for all documents.
 52 | #default_role = None
 53 | 
 54 | # If true, '()' will be appended to :func: etc. cross-reference text.
 55 | #add_function_parentheses = True
 56 | 
 57 | # If true, the current module name will be prepended to all description
 58 | # unit titles (such as .. function::).
 59 | #add_module_names = True
 60 | 
 61 | # If true, sectionauthor and moduleauthor directives will be shown in the
 62 | # output. They are ignored by default.
 63 | #show_authors = False
 64 | 
 65 | # The name of the Pygments (syntax highlighting) style to use.
 66 | pygments_style = 'sphinx'
 67 | 
 68 | # A list of ignored prefixes for module index sorting.
 69 | #modindex_common_prefix = []
 70 | 
 71 | # If true, keep warnings as "system message" paragraphs in the built documents.
 72 | #keep_warnings = False
 73 | 
 74 | 
 75 | # -- Options for HTML output ---------------------------------------------------
 76 | 
 77 | html_theme = 'alabaster'
 78 | html_theme_path = [alabaster.get_path()]
 79 | html_theme_options = {
 80 |     'logo': 'chair-python.png',
 81 |     'description': "Official Python client for Readability's Parser and Reader APIs.",
 82 |     'github_user': 'arc90',
 83 |     'github_repo': 'python-readability-api',
 84 |     'travis_button': 'true',
 85 |     'github_banner': 'true',
 86 | }
 87 | html_sidebars = {
 88 | '**': [
 89 |     'about.html',
 90 |     'navigation.html',
 91 |     'searchbox.html',
 92 |     ]
 93 | }
 94 | 
 95 | 
 96 | # The name for this set of Sphinx documents.  If None, it defaults to
 97 | # "<project> v<release> documentation".
 98 | #html_title = None
 99 | 
100 | # A shorter title for the navigation bar.  Default is the same as html_title.
101 | #html_short_title = None
102 | 
103 | # The name of an image file (relative to this directory) to place at the top
104 | # of the sidebar.
105 | #html_logo = None
106 | 
107 | # The name of an image file (within the static path) to use as favicon of the
108 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
109 | # pixels large.
110 | #html_favicon = None
111 | 
112 | # Add any paths that contain custom static files (such as style sheets) here,
113 | # relative to this directory. They are copied after the builtin static files,
114 | # so a file named "default.css" will overwrite the builtin "default.css".
115 | html_static_path = ['_static']
116 | 
117 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
118 | # using the given strftime format.
119 | #html_last_updated_fmt = '%b %d, %Y'
120 | 
121 | # If true, SmartyPants will be used to convert quotes and dashes to
122 | # typographically correct entities.
123 | #html_use_smartypants = True
124 | 
125 | # Custom sidebar templates, maps document names to template names.
126 | #html_sidebars = {}
127 | 
128 | # Additional templates that should be rendered to pages, maps page names to
129 | # template names.
130 | #html_additional_pages = {}
131 | 
132 | # If false, no module index is generated.
133 | #html_domain_indices = True
134 | 
135 | # If false, no index is generated.
136 | #html_use_index = True
137 | 
138 | # If true, the index is split into individual pages for each letter.
139 | #html_split_index = False
140 | 
141 | # If true, links to the reST sources are added to the pages.
142 | #html_show_sourcelink = True
143 | 
144 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
145 | #html_show_sphinx = True
146 | 
147 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
148 | #html_show_copyright = True
149 | 
150 | # If true, an OpenSearch description file will be output, and all pages will
151 | # contain a <link> tag referring to it.  The value of this option must be the
152 | # base URL from which the finished HTML is served.
153 | #html_use_opensearch = ''
154 | 
155 | # This is the file name suffix for HTML files (e.g. ".xhtml").
156 | #html_file_suffix = None
157 | 
158 | # Output file base name for HTML help builder.
159 | htmlhelp_basename = 'ReadabilityAPIPythonLibrarydoc'
160 | 
161 | 
162 | # -- Options for LaTeX output --------------------------------------------------
163 | 
164 | latex_elements = {
165 | # The paper size ('letterpaper' or 'a4paper').
166 | #'papersize': 'letterpaper',
167 | 
168 | # The font size ('10pt', '11pt' or '12pt').
169 | #'pointsize': '10pt',
170 | 
171 | # Additional stuff for the LaTeX preamble.
172 | #'preamble': '',
173 | }
174 | 
175 | # Grouping the document tree into LaTeX files. List of tuples
176 | # (source start file, target name, title, author, documentclass [howto/manual]).
177 | latex_documents = [
178 |   ('index', 'ReadabilityAPIPythonLibrary.tex', u'Readability API Python Library Documentation',
179 |    u'Readability, LLC', 'manual'),
180 | ]
181 | 
182 | # The name of an image file (relative to this directory) to place at the top of
183 | # the title page.
184 | #latex_logo = None
185 | 
186 | # For "manual" documents, if this is true, then toplevel headings are parts,
187 | # not chapters.
188 | #latex_use_parts = False
189 | 
190 | # If true, show page references after internal links.
191 | #latex_show_pagerefs = False
192 | 
193 | # If true, show URL addresses after external links.
194 | #latex_show_urls = False
195 | 
196 | # Documents to append as an appendix to all manuals.
197 | #latex_appendices = []
198 | 
199 | # If false, no module index is generated.
200 | #latex_domain_indices = True
201 | 
202 | 
203 | # -- Options for manual page output --------------------------------------------
204 | 
205 | # One entry per manual page. List of tuples
206 | # (source start file, name, description, authors, manual section).
207 | man_pages = [
208 |     ('index', 'readabilityapipythonlibrary', u'Readability API Python Library Documentation',
209 |      [u'Readability, LLC'], 1)
210 | ]
211 | 
212 | # If true, show URL addresses after external links.
213 | #man_show_urls = False
214 | 
215 | 
216 | # -- Options for Texinfo output ------------------------------------------------
217 | 
218 | # Grouping the document tree into Texinfo files. List of tuples
219 | # (source start file, target name, title, author,
220 | #  dir menu entry, description, category)
221 | texinfo_documents = [
222 |   ('index', 'ReadabilityAPIPythonLibrary', u'Readability API Python Library Documentation',
223 |    u'Readability, LLC', 'ReadabilityAPIPythonLibrary', 'One line description of project.',
224 |    'Miscellaneous'),
225 | ]
226 | 
227 | # Documents to append as an appendix to all manuals.
228 | #texinfo_appendices = []
229 | 
230 | # If false, no module index is generated.
231 | #texinfo_domain_indices = True
232 | 
233 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
234 | #texinfo_show_urls = 'footnote'
235 | 
236 | # If true, do not generate a @detailmenu in the "Top" node's menu.
237 | #texinfo_no_detailmenu = False
238 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | Readability Python API
  2 | ======================
  3 | 
  4 | .. |parser-docs| raw:: html
  5 | 
  6 |     <a href="https://www.readability.com/developers/api/parser" target="_blank">Parser</a>
  7 | 
  8 | .. |reader-docs| raw:: html
  9 | 
 10 |     <a href="https://www.readability.com/developers/api/reader" target="_blank">Reader</a>
 11 | 
 12 | .. |repo-link| raw:: html
 13 | 
 14 |     <a href="https://github.com/arc90/python-readability-api" target="_blank">Github</a>
 15 | 
 16 | .. |pypi-link| raw:: html
 17 | 
 18 |     <a href="https://pypi.python.org/pypi/readability-api/" target="_blank">PyPI</a>
 19 | 
 20 | Version |version|
 21 | 
 22 | The official Python client library for the Readability |parser-docs| and
 23 | |reader-docs| APIs.
 24 | 
 25 | Development of the readability-api package is hosted on |repo-link|. The
 26 | package itself is hosted on |pypi-link| and can easily be installed using pip.
 27 | 
 28 | 
 29 | Version 1.0.0 Notice
 30 | --------------------
 31 | 
 32 | Version 1.0 and up have fundamentally changed the objects returned by calls to
 33 | the API. The underlying `requests.Response
 34 | <http://docs.python-requests.org/en/latest/api/#requests.Response>`_ objects
 35 | are returned which greatly increases transparency and ease of development.
 36 | 
 37 | This is a departure from the 0.x releases which provided wrapped objects and
 38 | hid the http request mechanics. These releases also did not use the Requests
 39 | library. Version 1.0 also transitions to using |requests-oauthlib| for oAuth
 40 | support.
 41 | 
 42 | In addition, 1.x introduces python3 support (woohoo!)
 43 | 
 44 | .. |requests-oauthlib| raw:: html
 45 |     
 46 |     <a href="https://github.com/requests/requests-oauthlib" target="_blank">requests-oauthlib</a>
 47 | 
 48 | 
 49 | Installation
 50 | ------------
 51 | 
 52 | .. code-block:: bash
 53 | 
 54 |     pip install readability-api
 55 | 
 56 | 
 57 | Examples
 58 | --------
 59 | 
 60 | Getting a user's favorite bookmarks is easy.
 61 | 
 62 | .. code-block:: python
 63 | 
 64 |     from readability import ReaderClient
 65 | 
 66 |     # If no client credentials are passed to ReaderClient's constructor, they
 67 |     # will be looked for in your environment variables 
 68 |     client = ReaderClient(token_key="a user's key", token_secret"a user's secret")
 69 |     bookmarks_response = client.get_bookmarks(favorite=True)
 70 | 
 71 |     print(bookmarks_response.json())
 72 |     >>> {'bookmarks': [{'user_id': 9999, 'read_percent': u'0.00', ... }
 73 | 
 74 | See :class:`readability.ReaderClient` docs for a complete list of
 75 | available functionality.
 76 | 
 77 | 
 78 | .. code-block:: python
 79 | 
 80 |    from readability import ParserClient
 81 | 
 82 |    parser_client = ParserClient('your_parser_token')
 83 |    parser_response = parser_client.get_article('http://paulgraham.com/altair.html')
 84 |    article = parser_response.json()
 85 | 
 86 |    print(article['title'])
 87 |    >>> "What Microsoft Is this the Altair Basic of?"
 88 | 
 89 |    print(article['content'])
 90 |    >>> "<div><p>February 2015<p>One of the most valuable exercises you can try if you ..."
 91 | 
 92 | See :class:`readability.ParserClient` docs for a complete list of
 93 | available functionality.
 94 | 
 95 | 
 96 | .. toctree::
 97 |     :hidden:
 98 | 
 99 |     Authentication <auth>
100 |     ReaderClient <reader>
101 |     ParserClient <parser>
102 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=_build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% .
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	goto end
 41 | )
 42 | 
 43 | if "%1" == "clean" (
 44 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 45 | 	del /q /s %BUILDDIR%\*
 46 | 	goto end
 47 | )
 48 | 
 49 | 
 50 | %SPHINXBUILD% 2> nul
 51 | if errorlevel 9009 (
 52 | 	echo.
 53 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 54 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 55 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 56 | 	echo.may add the Sphinx directory to PATH.
 57 | 	echo.
 58 | 	echo.If you don't have Sphinx installed, grab it from
 59 | 	echo.http://sphinx-doc.org/
 60 | 	exit /b 1
 61 | )
 62 | 
 63 | if "%1" == "html" (
 64 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 65 | 	if errorlevel 1 exit /b 1
 66 | 	echo.
 67 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 68 | 	goto end
 69 | )
 70 | 
 71 | if "%1" == "dirhtml" (
 72 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 73 | 	if errorlevel 1 exit /b 1
 74 | 	echo.
 75 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 76 | 	goto end
 77 | )
 78 | 
 79 | if "%1" == "singlehtml" (
 80 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 81 | 	if errorlevel 1 exit /b 1
 82 | 	echo.
 83 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 84 | 	goto end
 85 | )
 86 | 
 87 | if "%1" == "pickle" (
 88 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 89 | 	if errorlevel 1 exit /b 1
 90 | 	echo.
 91 | 	echo.Build finished; now you can process the pickle files.
 92 | 	goto end
 93 | )
 94 | 
 95 | if "%1" == "json" (
 96 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 97 | 	if errorlevel 1 exit /b 1
 98 | 	echo.
 99 | 	echo.Build finished; now you can process the JSON files.
100 | 	goto end
101 | )
102 | 
103 | if "%1" == "htmlhelp" (
104 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
105 | 	if errorlevel 1 exit /b 1
106 | 	echo.
107 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
108 | .hhp project file in %BUILDDIR%/htmlhelp.
109 | 	goto end
110 | )
111 | 
112 | if "%1" == "qthelp" (
113 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
114 | 	if errorlevel 1 exit /b 1
115 | 	echo.
116 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
117 | .qhcp project file in %BUILDDIR%/qthelp, like this:
118 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\ReadabilityAPIPythonLibrary.qhcp
119 | 	echo.To view the help file:
120 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\ReadabilityAPIPythonLibrary.ghc
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "devhelp" (
125 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished.
129 | 	goto end
130 | )
131 | 
132 | if "%1" == "epub" (
133 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
134 | 	if errorlevel 1 exit /b 1
135 | 	echo.
136 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
137 | 	goto end
138 | )
139 | 
140 | if "%1" == "latex" (
141 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
142 | 	if errorlevel 1 exit /b 1
143 | 	echo.
144 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
145 | 	goto end
146 | )
147 | 
148 | if "%1" == "latexpdf" (
149 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
150 | 	cd %BUILDDIR%/latex
151 | 	make all-pdf
152 | 	cd %BUILDDIR%/..
153 | 	echo.
154 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
155 | 	goto end
156 | )
157 | 
158 | if "%1" == "latexpdfja" (
159 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
160 | 	cd %BUILDDIR%/latex
161 | 	make all-pdf-ja
162 | 	cd %BUILDDIR%/..
163 | 	echo.
164 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
165 | 	goto end
166 | )
167 | 
168 | if "%1" == "text" (
169 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
170 | 	if errorlevel 1 exit /b 1
171 | 	echo.
172 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
173 | 	goto end
174 | )
175 | 
176 | if "%1" == "man" (
177 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
178 | 	if errorlevel 1 exit /b 1
179 | 	echo.
180 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
181 | 	goto end
182 | )
183 | 
184 | if "%1" == "texinfo" (
185 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
186 | 	if errorlevel 1 exit /b 1
187 | 	echo.
188 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
189 | 	goto end
190 | )
191 | 
192 | if "%1" == "gettext" (
193 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
194 | 	if errorlevel 1 exit /b 1
195 | 	echo.
196 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
197 | 	goto end
198 | )
199 | 
200 | if "%1" == "changes" (
201 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
202 | 	if errorlevel 1 exit /b 1
203 | 	echo.
204 | 	echo.The overview file is in %BUILDDIR%/changes.
205 | 	goto end
206 | )
207 | 
208 | if "%1" == "linkcheck" (
209 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
210 | 	if errorlevel 1 exit /b 1
211 | 	echo.
212 | 	echo.Link check complete; look for any errors in the above output ^
213 | or in %BUILDDIR%/linkcheck/output.txt.
214 | 	goto end
215 | )
216 | 
217 | if "%1" == "doctest" (
218 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
219 | 	if errorlevel 1 exit /b 1
220 | 	echo.
221 | 	echo.Testing of doctests in the sources finished, look at the ^
222 | results in %BUILDDIR%/doctest/output.txt.
223 | 	goto end
224 | )
225 | 
226 | if "%1" == "xml" (
227 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
228 | 	if errorlevel 1 exit /b 1
229 | 	echo.
230 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
231 | 	goto end
232 | )
233 | 
234 | if "%1" == "pseudoxml" (
235 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
236 | 	if errorlevel 1 exit /b 1
237 | 	echo.
238 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
239 | 	goto end
240 | )
241 | 
242 | :end
243 | 


--------------------------------------------------------------------------------
/docs/parser.rst:
--------------------------------------------------------------------------------
 1 | Parser API Client
 2 | =================
 3 | 
 4 | The `Parser API
 5 | <http://readability.com/developers/api/parser>`_ is an API for programmatically
 6 | extracting content and metadata from html documents. Unlike the Reader API, the
 7 | Parser API does not require oAuth authentication but rather a single `token`
 8 | query parameter that must be used to sign every requests. You can find your
 9 | token by visiting `your Readability account settings page <https://www.readability.com/settings/account>`_.
10 | 
11 | This `token` can then be passed to the constructor or can be set via
12 | environment variables.
13 | 
14 | .. code-block:: bash
15 | 
16 |     export READABILITY_PARSER_TOKEN='your parser token here'
17 | 
18 | .. code-block:: python
19 | 
20 |     from readability import ParserClient
21 |     client = ParserClient(token='your parser token')
22 | 
23 | Under the hood, the `ParserClient` uses the popular `requests
24 | <http://docs.python-requests.org/en/latest/>`_ library. The objects returned by
25 | client calls are instances of `requests.Response
26 | <http://docs.python-requests.org/en/latest/api/#requests.Response>`_.
27 | 
28 | 
29 | 
30 | Client Documentation
31 | --------------------
32 | 
33 | .. autoclass:: readability.ParserClient
34 |     :members:
35 | 


--------------------------------------------------------------------------------
/docs/python_chair.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReadabilityHoldings/python-readability-api/64077ba7408f1b826defdd0242aa854db8f765c7/docs/python_chair.psd


--------------------------------------------------------------------------------
/docs/python_logo.psd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ReadabilityHoldings/python-readability-api/64077ba7408f1b826defdd0242aa854db8f765c7/docs/python_logo.psd


--------------------------------------------------------------------------------
/docs/reader.rst:
--------------------------------------------------------------------------------
 1 | Reader API Client
 2 | =================
 3 | 
 4 | .. |reader-docs| raw:: html
 5 | 
 6 |     <a href="https://www.readability.com/developers/api/reader" target="_blank">Reader API</a>
 7 | 
 8 | .. |account-settings-page| raw:: html
 9 | 
10 |     <a href="https://www.readability.com/settings/account" target="_blank">your Readability account settings page</a>
11 | 
12 | The |reader-docs| client requires four pieces of credential data. A consumer
13 | key and consumer secret can be obtained from |account-settings-page|. In
14 | addition to client credentials, a user's token key and token secret must also
15 | be used for authentication. For more information regarding auth, visit the
16 | `Authentication <auth.html>`_ section of the docs.
17 | 
18 | Your client key and secret can be passed to the constructor directly or set via
19 | environment variables:
20 | 
21 | .. code-block:: bash
22 |     export READABILITY_CONSUMER_KEY='your consumer key'
23 |     export READABILITY_CONSUMER_SECRET='your consumer secret'
24 | 
25 | Under the hood, the `ReaderClient` use the popular `requests
26 | <http://docs.python-requests.org/en/latest/>`_ library. The objects returned by
27 | the ``ReaderClient`` are instances of `requests.Response <http://docs.python-requests.org/en/latest/api/#requests.Response>`_.
28 | 
29 | 
30 | Client Documentation
31 | --------------------
32 | 
33 | .. autoclass:: readability.ReaderClient
34 |     :members:
35 | 


--------------------------------------------------------------------------------
/readability/__init__.py:
--------------------------------------------------------------------------------
1 | # Public interface for the readability package
2 | 
3 | from .clients import ParserClient, ReaderClient
4 | from .auth import xauth
5 | 


--------------------------------------------------------------------------------
/readability/auth.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | readability.auth
 5 | ~~~~~~~~~~~~~~~~
 6 | 
 7 | This module provides the xauth functionality for the Readability
 8 | Reader API.
 9 | 
10 | """
11 | from __future__ import unicode_literals
12 | 
13 | import logging
14 | 
15 | try:
16 |     from urllib.parse import urlencode
17 | except ImportError:
18 |     from urllib import urlencode
19 | try:
20 |     from urllib.parse import parse_qs
21 | except ImportError:
22 |     from urlparse import parse_qs
23 | 
24 | 
25 | import requests
26 | 
27 | from oauthlib.oauth1 import Client
28 | 
29 | from readability.clients import DEFAULT_READER_URL_TEMPLATE
30 | from readability.core import required_from_env
31 | 
32 | logger = logging.getLogger(__name__)
33 | ACCESS_TOKEN_URL = 'oauth/access_token/'
34 | 
35 | 
36 | 
37 | def xauth(base_url_template=DEFAULT_READER_URL_TEMPLATE, **xargs):
38 |     """
39 |     Returns an OAuth token tuple that can be used with clients.ReaderClient.
40 | 
41 |     :param base_url_template: Template for generating Readability API urls.
42 |     :param consumer_key:  Readability consumer key, otherwise read from READABILITY_CONSUMER_KEY.
43 |     :param consumer_secret: Readability consumer secret, otherwise read from READABILITY_CONSUMER_SECRET.
44 |     :param username: A username, otherwise read from READABILITY_USERNAME.
45 |     :param password: A password, otherwise read from READABILITY_PASSWORD.
46 | 
47 |     """
48 |     consumer_key = xargs.get('consumer_key') or required_from_env('READABILITY_CONSUMER_KEY')
49 |     consumer_secret = xargs.get('consumer_secret') or required_from_env('READABILITY_CONSUMER_SECRET')
50 |     username = xargs.get('username') or required_from_env('READABILITY_USERNAME')
51 |     password = xargs.get('password') or required_from_env('READABILITY_PASSWORD')
52 | 
53 |     client = Client(consumer_key, client_secret=consumer_secret, signature_type='BODY')
54 |     url = base_url_template.format(ACCESS_TOKEN_URL)
55 |     headers = {'Content-Type': 'application/x-www-form-urlencoded'}
56 |     params = {
57 |         'x_auth_username': username,
58 |         'x_auth_password': password,
59 |         'x_auth_mode': 'client_auth'
60 |     }
61 | 
62 |     uri, headers, body = client.sign(url,
63 |         http_method='POST',
64 |         body=urlencode(params),
65 |         headers=headers)
66 | 
67 |     response = requests.post(uri, data=body)
68 |     logger.debug('POST to %s.', uri)
69 | 
70 |     token = parse_qs(response.content)
71 |     try:
72 |         # The indexes below are a little weird. parse_qs above gives us
73 |         # back a dict where each value is a list. We want the first value
74 |         # in those lists.
75 |         token = (token[b'oauth_token'][0].decode(), token[b'oauth_token_secret'][0].decode())
76 |     except KeyError:
77 |         raise ValueError('Invalid Credentials.')
78 | 
79 |     return token
80 | 


--------------------------------------------------------------------------------
/readability/clients.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | readability.clients
  5 | ~~~~~~~~~~~~~~~~~~~
  6 | 
  7 | This module provies a client for the Reader API.
  8 | 
  9 | """
 10 | 
 11 | import logging
 12 | 
 13 | try:
 14 |     from urllib.parse import urlencode
 15 | except ImportError:
 16 |     from urllib import urlencode
 17 | 
 18 | import requests
 19 | 
 20 | from requests_oauthlib import OAuth1Session
 21 | 
 22 | from readability.core import required_from_env
 23 | from readability.utils import filter_args_to_dict
 24 | 
 25 | logger = logging.getLogger(__name__)
 26 | DEFAULT_READER_URL_TEMPLATE = 'https://www.readability.com/api/rest/v1/{}'
 27 | DEFAULT_PARSER_URL_TEMPLATE = 'https://www.readability.com/api/content/v1/{}'
 28 | ACCEPTED_BOOKMARK_FILTERS = [
 29 |     'added_since',
 30 |     'added_until',
 31 |     'archive',
 32 |     'archived_since',
 33 |     'archived_until',
 34 |     'domain',
 35 |     'favorite',
 36 |     'only_deleted',
 37 |     'opened_since',
 38 |     'opened_until',
 39 |     'page',
 40 |     'per_page',
 41 |     'tags',
 42 |     'updated_since',
 43 |     'updated_until',
 44 | ]
 45 | 
 46 | 
 47 | 
 48 | class ReaderClient(object):
 49 |     """
 50 |     Client for interacting with the Readability Reader API.
 51 | 
 52 |     Docs can be found at `http://www.readability.com/developers/api/reader`.
 53 |     """
 54 |     def __init__(self, token_key, token_secret,
 55 |         base_url_template=DEFAULT_READER_URL_TEMPLATE, **xargs):
 56 |         """
 57 |         Initialize the ReaderClient.
 58 | 
 59 |         :param consumer_key: Reader API key, otherwise read from READABILITY_CONSUMER_KEY.
 60 |         :param consumer_secret: Reader API secret, otherwise read from READABILITY_CONSUMER_SECRET.
 61 |         :param token_key: Readability user token key
 62 |         :param token_secret: Readability user token secret
 63 |         :param base_url_template (optional): Template used to build URL to
 64 |             which requests will be sent. This shouldn't need to be passed as the
 65 |             main purpose for it is testing environments that the user probably
 66 |             doesn't have access to (staging, local dev, etc).
 67 | 
 68 |         """
 69 |         consumer_key = xargs.get('consumer_key') or required_from_env('READABILITY_CONSUMER_KEY')
 70 |         consumer_secret = xargs.get('consumer_secret') or required_from_env('READABILITY_CONSUMER_SECRET')
 71 | 
 72 |         self.base_url_template = base_url_template
 73 |         self.oauth_session = OAuth1Session(consumer_key, consumer_secret, token_key, token_secret)
 74 | 
 75 |     def get(self, url):
 76 |         """
 77 |         Make a HTTP GET request to the Reader API.
 78 | 
 79 |         :param url: url to which to make a GET request.
 80 |         """
 81 |         logger.debug('Making GET request to %s', url)
 82 |         return self.oauth_session.get(url)
 83 | 
 84 |     def post(self, url, post_params=None):
 85 |         """
 86 |         Make a HTTP POST request to the Reader API.
 87 | 
 88 |         :param url: url to which to make a POST request.
 89 |         :param post_params: parameters to be sent in the request's body.
 90 |         """
 91 |         params = urlencode(post_params)
 92 |         logger.debug('Making POST request to %s with body %s', url, params)
 93 |         return self.oauth_session.post(url, data=params)
 94 | 
 95 |     def delete(self, url):
 96 |         """
 97 |         Make a HTTP DELETE request to the Readability API.
 98 | 
 99 |         :param url: The url to which to send a DELETE request.
100 |         """
101 |         logger.debug('Making DELETE request to %s', url)
102 |         return self.oauth_session.delete(url)
103 | 
104 |     def _generate_url(self, resource, query_params=None):
105 |         """
106 |         Generate a Readability URL to the given resource.
107 | 
108 |         :param resource: the path to the resource that the request should
109 |             go to.
110 |         :param query_params (optional): a dict of query params that should
111 |             be added to the url.
112 |         """
113 |         if query_params:
114 |             resource = '{0}?{1}'.format(
115 |                 resource, urlencode(query_params))
116 | 
117 |         return self.base_url_template.format(resource)
118 | 
119 |     def get_article(self, article_id):
120 |         """
121 |         Get a single article represented by `article_id`.
122 | 
123 |         :param article_id: ID of the article to retrieve.
124 |         """
125 |         url = self._generate_url('articles/{0}'.format(article_id))
126 |         return self.get(url)
127 | 
128 |     def get_bookmarks(self, **filters):
129 |         """
130 |         Get Bookmarks for the current user.
131 | 
132 |         Filters:
133 | 
134 |         :param archive: Filter Bookmarks returned by archived status.
135 |         :param favorite: Filter Bookmarks returned by favorite status.
136 |         :param domain: Filter Bookmarks returned by a domain.
137 |         :param added_since: Filter bookmarks by date added (since this date).
138 |         :param added_until: Filter bookmarks by date added (until this date).
139 |         :param opened_since: Filter bookmarks by date opened (since this date).
140 |         :param opened_until: Filter bookmarks by date opened (until this date).
141 |         :param archived_since: Filter bookmarks by date archived (since this date.)
142 |         :param archived_until: Filter bookmarks by date archived (until this date.)
143 |         :param updated_since: Filter bookmarks by date updated (since this date.)
144 |         :param updated_until: Filter bookmarks by date updated (until this date.)
145 |         :param page: What page of results to return. Default is 1.
146 |         :param per_page: How many results to return per page. Default is 20, max is 50.
147 |         :param only_deleted: Return only bookmarks that this user has deleted.
148 |         :param tags: Comma separated string of tags to filter bookmarks.
149 |         """
150 |         filter_dict = filter_args_to_dict(filters, ACCEPTED_BOOKMARK_FILTERS)
151 |         url = self._generate_url('bookmarks', query_params=filter_dict)
152 |         return self.get(url)
153 | 
154 |     def get_bookmark(self, bookmark_id):
155 |         """
156 |         Get a single bookmark represented by `bookmark_id`.
157 | 
158 |         The requested bookmark must belong to the current user.
159 | 
160 |         :param bookmark_id: ID of the bookmark to retrieve.
161 |         """
162 |         url = self._generate_url('bookmarks/{0}'.format(bookmark_id))
163 |         return self.get(url)
164 | 
165 |     def add_bookmark(self, url, favorite=False, archive=False, allow_duplicates=True):
166 |         """
167 |         Adds given bookmark to the authenticated user.
168 | 
169 |         :param url: URL of the article to bookmark
170 |         :param favorite: whether or not the bookmark should be favorited
171 |         :param archive: whether or not the bookmark should be archived
172 |         :param allow_duplicates: whether or not to allow duplicate bookmarks to
173 |             be created for a given url
174 |         """
175 |         rdb_url = self._generate_url('bookmarks')
176 |         params = {
177 |             "url": url,
178 |             "favorite": int(favorite),
179 |             "archive": int(archive),
180 |             "allow_duplicates": int(allow_duplicates)
181 |         }
182 |         return self.post(rdb_url, params)
183 | 
184 |     def update_bookmark(self, bookmark_id, favorite=None, archive=None, read_percent=None):
185 |         """
186 |         Updates given bookmark. The requested bookmark must belong to the
187 |         current user.
188 | 
189 |         :param bookmark_id: ID of the bookmark to update.
190 |         :param favorite (optional): Whether this article is favorited or not.
191 |         :param archive (optional): Whether this article is archived or not.
192 |         :param read_percent (optional): The read progress made in this article,
193 |             where 1.0 means the bottom and 0.0 means the very top.
194 |         """
195 |         rdb_url = self._generate_url('bookmarks/{0}'.format(bookmark_id))
196 |         params = {}
197 |         if favorite is not None:
198 |             params['favorite'] = 1 if favorite == True else 0
199 |         if archive is not None:
200 |             params['archive'] = 1 if archive == True else 0
201 |         if read_percent is not None:
202 |             try:
203 |                 params['read_percent'] = float(read_percent)
204 |             except ValueError:
205 |                 pass
206 |         return self.post(rdb_url, params)
207 | 
208 |     def favorite_bookmark(self, bookmark_id):
209 |         """
210 |         Favorites given bookmark. The requested bookmark must belong to the
211 |         current user.
212 | 
213 |         :param bookmark_id: ID of the bookmark to favorite.
214 |         """
215 |         return self.update_bookmark(bookmark_id, favorite=True)
216 | 
217 |     def archive_bookmark(self, bookmark_id):
218 |         """
219 |         Archives given bookmark. The requested bookmark must belong to the
220 |         current user.
221 | 
222 |         :param bookmark_id: ID of the bookmark to archive.
223 |         """
224 |         return self.update_bookmark(bookmark_id, archive=True)
225 | 
226 |     def set_read_percent_of_bookmark(self, bookmark_id, read_percent):
227 |         """
228 |         Set the read percentage of given bookmark. The requested bookmark must
229 |         belong to the current user.
230 | 
231 |         :param bookmark_id: ID of the bookmark to update.
232 |         :param read_percent: The read progress made in this article,
233 |           where 1.0 means the bottom and 0.0 means the very top.
234 |         """
235 |         return self.update_bookmark(bookmark_id, read_percent=read_percent)
236 | 
237 |     def delete_bookmark(self, bookmark_id):
238 |         """
239 |         Delete a single bookmark represented by `bookmark_id`.
240 | 
241 |         The requested bookmark must belong to the current user.
242 | 
243 |         :param bookmark_id: ID of the bookmark to delete.
244 |         """
245 |         url = self._generate_url('bookmarks/{0}'.format(bookmark_id))
246 |         return self.delete(url)
247 | 
248 |     def get_bookmark_tags(self, bookmark_id):
249 |         """
250 |         Retrieve tags that have been applied to a bookmark.
251 | 
252 |         The requested bookmark must belong to the current user.
253 | 
254 |         :param bookmark_id: ID of the bookmark to delete.
255 |         """
256 |         url = self._generate_url('bookmarks/{0}/tags'.format(bookmark_id))
257 |         return self.get(url)
258 | 
259 |     def add_tags_to_bookmark(self, bookmark_id, tags):
260 |         """
261 |         Add tags to to a bookmark.
262 | 
263 |         The identified bookmark must belong to the current user.
264 | 
265 |         :param bookmark_id: ID of the bookmark to delete.
266 |         :param tags: Comma separated tags to be applied.
267 |         """
268 |         url = self._generate_url('bookmarks/{0}/tags'.format(bookmark_id))
269 |         params = dict(tags=tags)
270 |         return self.post(url, params)
271 | 
272 |     def delete_tag_from_bookmark(self, bookmark_id, tag_id):
273 |         """
274 |         Remove a single tag from a bookmark.
275 | 
276 |         The identified bookmark must belong to the current user.
277 | 
278 |         :param bookmark_id: ID of the bookmark to delete.
279 |         """
280 |         url = self._generate_url('bookmarks/{0}/tags/{1}'.format(
281 |             bookmark_id, tag_id))
282 |         return self.delete(url)
283 | 
284 |     def get_tag(self, tag_id):
285 |         """
286 |         Get a single tag represented by `tag_id`.
287 | 
288 |         The requested tag must belong to the current user.
289 | 
290 |         :param tag_id: ID fo the tag to retrieve.
291 |         """
292 |         url = self._generate_url('tags/{0}'.format(tag_id))
293 |         return self.get(url)
294 | 
295 |     def get_tags(self):
296 |         """
297 |         Get all tags belonging to the current user.
298 |         """
299 |         url = self._generate_url('tags')
300 |         return self.get(url)
301 | 
302 |     def get_user(self):
303 |         """
304 |         Retrives the current user.
305 |         """
306 |         url = self._generate_url('users/_current')
307 |         return self.get(url)
308 | 
309 | 
310 | class ParserClient(object):
311 |     """
312 |     Client for interacting with the Readability Parser API.
313 | 
314 |     Docs can be found at `http://www.readability.com/developers/api/parser`.
315 |     """
316 |     def __init__(self, base_url_template=DEFAULT_PARSER_URL_TEMPLATE, **xargs):
317 |         """
318 |         Initialize client.
319 | 
320 |         :param token: parser API token, otherwise read from READABILITY_PARSER_TOKEN.
321 |         :param base_url_template (optional): Template used to build URL to
322 |             which requests will be sent. This shouldn't need to be passed as the
323 |             main purpose for it is testing environments that the user probably
324 |             doesn't have access to (staging, local dev, etc).
325 |         """
326 |         logger.debug('Initializing ParserClient with base url template %s',
327 |             base_url_template)
328 | 
329 |         self.token = xargs.get('token', None) or required_from_env('READABILITY_PARSER_TOKEN')
330 |         self.base_url_template = base_url_template
331 | 
332 |     def get(self, url):
333 |         """
334 |         Make an HTTP GET request to the Parser API.
335 | 
336 |         :param url: url to which to make the request
337 |         """
338 |         logger.debug('Making GET request to %s', url)
339 |         return requests.get(url)
340 | 
341 |     def head(self, url):
342 |         """
343 |         Make an HTTP HEAD request to the Parser API.
344 | 
345 |         :param url: url to which to make the request
346 |         """
347 |         logger.debug('Making HEAD request to %s', url)
348 |         return requests.head(url)
349 | 
350 |     def post(self, url, post_params=None):
351 |         """
352 |         Make an HTTP POST request to the Parser API.
353 | 
354 |         :param url: url to which to make the request
355 |         :param post_params: POST data to send along. Expected to be a dict.
356 |         """
357 |         post_params['token'] = self.token
358 |         params = urlencode(post_params)
359 |         logger.debug('Making POST request to %s with body %s', url, params)
360 |         return requests.post(url, data=params)
361 | 
362 |     def _generate_url(self, resource, query_params=None):
363 |         """
364 |         Build the url to resource.
365 | 
366 |         :param resource: Name of the resource that is being called. Options are
367 |         `''` (empty string) for root resource, `'parser'`, `'confidence'`.
368 |         :param query_params: Data to be passed as query parameters.
369 |         """
370 |         resource = '{resource}?token={token}'.format(resource=resource, token=self.token)
371 |         if query_params:
372 |             resource += "&{}".format(urlencode(query_params))
373 |         return self.base_url_template.format(resource)
374 | 
375 |     def get_root(self):
376 |         """
377 |         Send a GET request to the root resource of the Parser API.
378 |         """
379 |         url = self._generate_url('')
380 |         return self.get(url)
381 | 
382 |     def get_article(self, url=None, article_id=None, max_pages=25):
383 |         """
384 |         Send a GET request to the `parser` endpoint of the parser API to get
385 |         back the representation of an article.
386 | 
387 |         The article can be identified by either a URL or an id that exists
388 |         in Readability.
389 | 
390 |         Note that either the `url` or `article_id` param should be passed.
391 | 
392 |         :param url (optional): The url of an article whose content is wanted.
393 |         :param article_id (optional): The id of an article in the Readability
394 |             system whose content is wanted.
395 |         :param max_pages: The maximum number of pages to parse and combine.
396 |             The default is 25.
397 |         """
398 |         query_params = {}
399 |         if url is not None:
400 |             query_params['url'] = url
401 |         if article_id is not None:
402 |             query_params['article_id'] = article_id
403 |         query_params['max_pages'] = max_pages
404 |         url = self._generate_url('parser', query_params=query_params)
405 |         return self.get(url)
406 | 
407 |     def post_article_content(self, content, url, max_pages=25):
408 |         """
409 |         POST content to be parsed to the Parser API.
410 | 
411 |         Note: Even when POSTing content, a url must still be provided.
412 | 
413 |         :param content: the content to be parsed
414 |         :param url: the url that represents the content
415 |         :param max_pages (optional): the maximum number of pages to parse
416 |             and combine. Default is 25.
417 |         """
418 |         params = {
419 |             'doc': content,
420 |             'max_pages': max_pages
421 |         }
422 |         url = self._generate_url('parser', {"url": url})
423 |         return self.post(url, post_params=params)
424 | 
425 |     def get_article_status(self, url=None, article_id=None):
426 |         """
427 |         Send a HEAD request to the `parser` endpoint to the parser API to
428 |         get the articles status.
429 | 
430 |         Returned is a `requests.Response` object. The id and status for the
431 |         article can be extracted from the `X-Article-Id` and `X-Article-Status`
432 |         headers.
433 | 
434 |         Note that either the `url` or `article_id` param should be passed.
435 | 
436 |         :param url (optional): The url of an article whose content is wanted.
437 |         :param article_id (optional): The id of an article in the Readability
438 |             system whose content is wanted.
439 |         """
440 |         query_params = {}
441 |         if url is not None:
442 |             query_params['url'] = url
443 |         if article_id is not None:
444 |             query_params['article_id'] = article_id
445 |         url = self._generate_url('parser', query_params=query_params)
446 |         return self.head(url)
447 | 
448 |     def get_confidence(self, url=None, article_id=None):
449 |         """
450 |         Send a GET request to the `confidence` endpoint of the Parser API.
451 | 
452 |         Note that either the `url` or `article_id` param should be passed.
453 | 
454 |         :param url (optional): The url of an article whose content is wanted.
455 |         :param article_id (optional): The id of an article in the Readability
456 |             system whose content is wanted.
457 |         """
458 |         query_params = {}
459 |         if url is not None:
460 |             query_params['url'] = url
461 |         if article_id is not None:
462 |             query_params['article_id'] = article_id
463 |         url = self._generate_url('confidence', query_params=query_params)
464 |         return self.get(url)
465 | 


--------------------------------------------------------------------------------
/readability/core.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | def required_from_env(key):
 5 |     """
 6 |     Retrieve a required variable from the current environment variables.
 7 | 
 8 |     Raises a ValueError if the env variable is not found or has no value.
 9 | 
10 |     """
11 |     val = os.environ.get(key)
12 |     if not val:
13 |         raise ValueError(
14 |             "Required argument '{}' not supplied and not found in environment variables".format(key))
15 |     return val
16 | 


--------------------------------------------------------------------------------
/readability/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | test_root = os.path.dirname(os.path.realpath(__file__))
 4 | 
 5 | 
 6 | def load_test_content(filename):
 7 |     """
 8 |     Load the contents of the file for use in tests.
 9 | 
10 |     Useful for canned responses / post content
11 |     """
12 |     with open(os.path.join(test_root, filename), 'r') as testfile:
13 |         return testfile.read()
14 | 


--------------------------------------------------------------------------------
/readability/tests/content/test_post_content.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
  2 | <html xmlns="http://www.w3.org/1999/xhtml"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
  3 | <title>Readability v1 Parser API</title><style type="text/css">
  4 |                     body {
  5 |                         font-family: sans-serif;
  6 |                         font: 0.8em/1.4 Arial, sans-serif;
  7 |                         margin: 2em 6em;
  8 |                         width: 65em;
  9 |                     }
 10 |                     pre {
 11 |                         font-family: Courier, monospace;
 12 |                         font-weight: 500;
 13 |                         font-size: 0.8em;
 14 |                         background-color: #eef;
 15 |                         padding: 1em;
 16 |                     }
 17 |                     .methods {
 18 |                         background-color: #e4e4e4;
 19 |                         margin-top: .4em;
 20 |                         padding: .6em;
 21 |                     }
 22 |                     .methods h4 {
 23 |                         border-bottom: 1px solid #fff;
 24 |                         padding: .1em 0;
 25 |                         margin-bottom: .4em;
 26 |                         color: #0b3c97;
 27 |                         font-size: 1.1em;
 28 |                     }
 29 |                     .methods h6 {
 30 |                         color: #666;
 31 |                         text-transform: lowercase;
 32 |                         margin: .6em 0 .3em;
 33 |                     }
 34 |                     .resource {
 35 |                         margin-bottom: 2em;
 36 |                         margin-top: .4em;
 37 |                     }
 38 |                     .resource h3 {
 39 |                         margin-bottom: .4em;
 40 |                         font-size: 1.4em;
 41 |                         color: #ff5700;
 42 |                     }
 43 |                     h1 {
 44 |                         font-size: 2.5em;
 45 |                     }
 46 |                     h2 {
 47 |                         border-bottom: 1px solid black;
 48 |                         margin-top: 1em;
 49 |                         color: #666;
 50 |                         margin-bottom: 0.5em;
 51 |                         font-size: 2em;
 52 |                     }
 53 |                     h3 {
 54 |                         font-size: 1.75em;
 55 |                         margin: 0.6em 0;
 56 |                     }
 57 |                     h4 {
 58 |                         color: #666;
 59 |                         margin: 0;
 60 |                         padding: 0.3em 0;
 61 |                         border-bottom: 2px solid white;
 62 |                     }
 63 |                     h6 {
 64 |                         font-size: 1.1em;
 65 |                         color: #99a;
 66 |                         margin: 0.5em 0em 0.25em 0em;
 67 |                     }
 68 |                     dd {
 69 |                         margin-left: 1em;
 70 |                     }
 71 |                     tt {
 72 |                         font-size: 1.2em;
 73 |                     }
 74 |                     table {
 75 |                         margin-bottom: 0.5em;
 76 |                         width: 100%;
 77 |                         border-collapse: collapse;
 78 |                     }
 79 |                     th {
 80 |                         text-align: left;
 81 |                         font-weight: normal;
 82 |                         color: black;
 83 |                         border-bottom: 1px solid black;
 84 |                         padding: 3px 6px;
 85 |                     }
 86 |                     td {
 87 |                         padding: 3px 6px;
 88 |                         vertical-align: top;
 89 |                         background-color: f6f6ff;
 90 |                         font-size: 0.85em;
 91 |                     }
 92 |                     td p {
 93 |                         margin: 0px;
 94 |                     }
 95 |                     ul {
 96 |                         padding-left: 1.75em;
 97 |                     }
 98 |                     p + ul, p + ol, p + dl {
 99 |                         margin-top: 0em;
100 |                     }
101 |                     .optional {
102 |                         font-weight: normal;
103 |                         opacity: 0.75;
104 |                     }
105 |                 </style><link href="prettify/prettify.css" type="text/css" rel="stylesheet"></link><script type="text/javascript" src="prettify/prettify.js"></script></head><body onload="prettyPrint()"><h1>Readability v1 Parser API</h1>
106 |         <section>
107 |             <h2 id="authentication">Authentication</h2>
108 |             <p>
109 |                 Requests to the Parser API are not signed like an OAuth
110 |                 request.  The Parser token is simply passed as a POST or GET
111 |                 parameter depending on the request type. Be careful not to
112 |                 reveal this token, requests directly to the Parser API should
113 |                 not be made on the client device but rather proxied to keep the
114 |                 API token secure.
115 |             </p>
116 |         </section>
117 | 
118 |         <section>
119 |             <h2 id="quick-start">Quick Start</h2>
120 |             <p class="section-intro">
121 |                         Here's how to pull an article's content from the Readability Parser API:
122 |             </p>
123 |             <h4>Request</h4>
124 |             <pre>GET /api/content/v1/parser?url=http://blog.readability.com/2011/02/step-up-be-heard-readability-ideas/&amp;token=1b830931777ac7c2ac954e9f0d67df437175e66e</pre>
125 |             <h4>Response</h4>
126 |             <pre>
127 | HTTP/1.0 200 OK
128 | {
129 |     "content" &lt;div class=\"article-text\"&gt;\n&lt;p&gt;I'm idling outside Diamante's, [snip] ...&lt;/p&gt;&lt;/div&gt;",
130 |     "domain": "www.gq.com",
131 |     "author": "Rafi Kohan",
132 |     "url": "http://www.gq.com/sports/profiles/201202/david-diamante-interview-cigar-lounge-brooklyn-new-jersey-nets?currentPage=all",
133 |     "short_url": "http://rdd.me/g3jcb1sr",
134 |     "title": "Blowing Smoke with Boxing's Big Voice",
135 |     "excerpt": "I'm idling outside Diamante's, a cigar lounge in Fort Greene, waiting for David Diamante, and soon I smell him coming. It's late January but warm. A motorcycle growls down the Brooklyn side street,&amp;hellip;",
136 |     "direction": "ltr",
137 |     "word_count": 2892,
138 |     "total_pages": 1,
139 |     "date_published": null,
140 |     "dek": "Announcer &lt;strong&gt;David Diamante&lt;/strong&gt;, the new voice of the New Jersey (soon Brooklyn) Nets, has been calling boxing matches for years. On the side, he owns a cigar lounge in the heart of Brooklyn. We talk with Diamante about his new gig and the fine art of cigars",
141 |     "lead_image_url": "http://www.gq.com/images/entertainment/2012/02/david-diamante/diamante-628.jpg",
142 |     "next_page_id": null,
143 |     "rendered_pages": 1
144 | }
145 | </pre>
146 |         </section>
147 | 
148 |         <section>
149 |             <h2 id="data-formats">Data Formats</h2>
150 |             <p>
151 |                 All requests are, by default, provided as JSON. You may also pass "?format=xml" in the URL to convert this into XML data to be consumed.
152 |             </p>
153 |         </section>
154 | 
155 |     <h3>Resources, Representations &amp; Errors</h3><ul><li><a href="#resources">Resources</a><ul><li><a href="#idp3728">https://readability.com/api/content/v1/</a></li><li><a href="#idp4080">https://readability.com/api/content/v1/parser</a></li><li><a href="#idp39744">https://readability.com/api/content/v1/confidence</a></li></ul></li><li><a href="#representations">Representations</a><ul><li><a href="#https://readability.com/api/content/v1#rootRepresentation">Example root representation. (application/json)</a></li><li><a href="#https://readability.com/api/content/v1#articleRepresentation">Example article representation. (application/json)</a></li><li><a href="#https://readability.com/api/content/v1#confidenceRepresentation">Example confidence representation. (application/json)</a></li><li><a href="#https://readability.com/api/content/v1#confidenceRepresentationJsonp">Example confidence representation as jsonp. (application/json)</a></li></ul></li><li><a href="#faults">Errors</a><ul><li><a href="#https://readability.com/api/content/v1#error_400">400 Bad Request (application/json)</a></li><li><a href="#https://readability.com/api/content/v1#error_401">401 Authorization Required (application/json)</a></li><li><a href="#https://readability.com/api/content/v1#error_500">500 Internal Server Error (application/json)</a></li><li><a href="#https://readability.com/api/content/v1#error_404">404 Not Found (application/json)</a></li></ul></li></ul><h2 id="resources">Resources</h2><div class="resource"><h3 id="idp3728">/</h3><h6>Methods</h6><div class="methods"><div class="method"><h4 id="idp5008">GET</h4>
156 |                     Retrieve the base API URI - information about subresources.
157 |                 <h6>request header parameters</h6><table><tr><th style="width: 25%">parameter</th><th style="width: 20%">value</th><th>description</th></tr><tr><td><p><strong>Authorization</strong></p></td><td><p><em><a href="" title=""></a></em><small> (required)</small></p></td><td></td></tr></table><p><em>available response representations:</em></p><ul><li><a href="#https://readability.com/api/content/v1#rootRepresentation">Example root representation. (application/json)</a></li></ul></div></div></div><div class="resource"><h3 id="idp4080">/parser?token<span class="optional">&amp;url</span><span class="optional">&amp;id</span><span class="optional">&amp;max_pages</span></h3><h6>Methods</h6><div class="methods"><div class="method"><h4 id="idp36384">GET</h4>
158 |                     Parse an article
159 |                 <h6>request query parameters</h6><table><tr><th style="width: 25%">parameter</th><th style="width: 20%">value</th><th>description</th></tr><tr><td><p><strong>token</strong></p></td><td><p><em><a href="http://www.w3.org/TR/xmlschema-2/#string">string</a></em><small> (required)</small></p></td><td></td></tr><tr><td><p><strong>url</strong></p></td><td><p><em><a href="http://www.w3.org/TR/xmlschema-2/#string">string</a></em></p></td><td>The URL of an article to return the content for.</td></tr><tr><td><p><strong>id</strong></p></td><td><p><em><a href="http://www.w3.org/TR/xmlschema-2/#string">string</a></em></p></td><td>The ID of an article to return the content for.</td></tr><tr><td><p><strong>max_pages</strong></p></td><td><p><em><a href="http://www.w3.org/TR/xmlschema-2/#integer">integer</a></em></p></td><td>The maximum number of pages to parse and combine. Default is 25.</td></tr></table><p><em>available response representations:</em></p><ul><li><a href="#https://readability.com/api/content/v1#articleRepresentation">Example article representation. (application/json)</a></li></ul><p><em>potential faults:</em></p><ul><li><a href="#https://readability.com/api/content/v1#error_400">400 Bad Request (application/json)</a></li><li><a href="#https://readability.com/api/content/v1#error_401">401 Authorization Required (application/json)</a></li><li><a href="#https://readability.com/api/content/v1#error_500">500 Internal Server Error (application/json)</a></li></ul></div><div class="method"><h4 id="idp63552">HEAD</h4>
160 |                     <p>
161 |                         Retrieve the Content Status of an article. This is useful if you want to save yourself from POSTing a large html document. You can do a HEAD request on the resource, and check for the status of the article in the X-Article-Status header. <strong>Additionally, if we've never seen the article before, we'll return a 404, which also means you should POST.</strong>
162 |                     </p>
163 |                 <h6>request query parameters</h6><table><tr><th style="width: 25%">parameter</th><th style="width: 20%">value</th><th>description</th></tr><tr><td><p><strong>token</strong></p></td><td><p><em><a href="http://www.w3.org/TR/xmlschema-2/#string">string</a></em><small> (required)</small></p></td><td></td></tr><tr><td><p><strong>url</strong></p></td><td><p><em><a href="http://www.w3.org/TR/xmlschema-2/#string">string</a></em></p></td><td>The URL of an article to check.</td></tr><tr><td><p><strong>id</strong></p></td><td><p><em><a href="http://www.w3.org/TR/xmlschema-2/#string">string</a></em></p></td><td>The ID of an article to check.</td></tr></table><h6>response header parameters</h6><table><tr><th style="width: 25%">parameter</th><th style="width: 20%">value</th><th>description</th></tr><tr><td><p><strong>X-Article-Id</strong></p></td><td><p><em><a href="http://www.w3.org/TR/xmlschema-2/#string">string</a></em></p></td><td>
164 |                         <p>The ID of the article within Readablity.</p>
165 |                     </td></tr><tr><td><p><strong>X-Article-Status</strong></p></td><td><p><em><a href="http://www.w3.org/TR/xmlschema-2/#string">string</a></em></p></td><td>
166 |                         <p>The status of the content in Readability. One of:</p>
167 |                         <dl>
168 |                         <dt>INVALID</dt>
169 |                         <dd>We were unable to parse this URL for some reason. <em>Recommendation: Fail</em></dd>
170 |                         <dt>UNRETRIEVED</dt>
171 |                         <dd>We know of this article, but have not yet retrieved its content, or the cache has expired. <em>Recommendation: POST content to us</em></dd>
172 |                         <dt>PROVIDED_BY_USER</dt>
173 |                         <dd>We have retrieved the content for this URL from at least one user. <em>Recommendation: POST content to us</em></dd>
174 |                         <dt>VALIDATED_BY_USERS</dt>
175 |                         <dd>We have retrieved the content for this URL from multiple users, and have validated it. <em>Recommendation: GET the content from us.</em></dd>
176 |                         <dt>FETCHED</dt>
177 |                         <dd>We fetched the content for this URL manually, and it has been cached. <em>Recommendation:GET the content from us.</em></dd>
178 |                         </dl>
179 |                     </td></tr></table><p><em>potential faults:</em></p><ul><li><a href="#https://readability.com/api/content/v1#error_400">400 Bad Request (application/json)</a></li><li><a href="#https://readability.com/api/content/v1#error_401">401 Authorization Required (application/json)</a></li><li><a href="#https://readability.com/api/content/v1#error_404">404 Not Found (application/json)</a></li><li><a href="#https://readability.com/api/content/v1#error_500">500 Internal Server Error (application/json)</a></li></ul></div></div></div><div class="resource"><h3 id="idp39744">/confidence?url<span class="optional">&amp;callback</span></h3><h6>Methods</h6><div class="methods"><div class="method"><h4 id="idp89296">GET</h4>Detect the confidence with which Readability could parse a given URL. Does not require a token.<h6>request query parameters</h6><table><tr><th style="width: 25%">parameter</th><th style="width: 20%">value</th><th>description</th></tr><tr><td><p><strong>url</strong></p></td><td><p><em><a href="http://www.w3.org/TR/xmlschema-2/#string">string</a></em><small> (required)</small></p></td><td>The URL of an article to return the confidence for.</td></tr><tr><td><p><strong>callback</strong></p></td><td><p><em><a href="http://www.w3.org/TR/xmlschema-2/#string">string</a></em></p></td><td>The jsonp callback function name.</td></tr></table><p><em>available response representations:</em></p><ul><li><a href="#https://readability.com/api/content/v1#confidenceRepresentation">Example confidence representation. (application/json)</a></li><li><a href="#https://readability.com/api/content/v1#confidenceRepresentationJsonp">Example confidence representation as jsonp. (application/json)</a></li></ul><p><em>potential faults:</em></p><ul><li><a href="#https://readability.com/api/content/v1#error_400">400 Bad Request (application/json)</a></li><li><a href="#https://readability.com/api/content/v1#error_500">500 Internal Server Error (application/json)</a></li></ul></div></div></div><h2 id="representations">Representations</h2><h3 id="https://readability.com/api/content/v1#rootRepresentation">Example root representation. (application/json)</h3>
180 |             <pre xmlns="http://research.sun.com/wadl/2006/10" class="prettyprint">
181 | {
182 |     "resources": {
183 |         "parser": {
184 |             "description": "The Content Parser Resource",
185 |             "href": "/api/content/v1/parser"
186 |         }
187 |     }
188 | }
189 |             </pre>
190 |         <h3 id="https://readability.com/api/content/v1#articleRepresentation">Example article representation. (application/json)</h3>
191 |             <pre xmlns="http://research.sun.com/wadl/2006/10" class="prettyprint">
192 | {
193 |     "content" &lt;div class=\"article-text\"&gt;\n&lt;p&gt;I'm idling outside Diamante's, [snip] ...&lt;/p&gt;&lt;/div&gt;",
194 |     "domain": "www.gq.com",
195 |     "author": "Rafi Kohan",
196 |     "url": "http://www.gq.com/sports/profiles/201202/david-diamante-interview-cigar-lounge-brooklyn-new-jersey-nets?currentPage=all",
197 |     "short_url": "http://rdd.me/g3jcb1sr",
198 |     "title": "Blowing Smoke with Boxing's Big Voice",
199 |     "excerpt": "I'm idling outside Diamante's, a cigar lounge in Fort Greene, waiting for David Diamante, and soon I smell him coming. It's late January but warm. A motorcycle growls down the Brooklyn side street,&amp;hellip;",
200 |     "direction": "ltr",
201 |     "word_count": 2892,
202 |     "total_pages": 1,
203 |     "date_published": null,
204 |     "dek": "Announcer &lt;strong&gt;David Diamante&lt;/strong&gt;, the new voice of the New Jersey (soon Brooklyn) Nets, has been calling boxing matches for years. On the side, he owns a cigar lounge in the heart of Brooklyn. We talk with Diamante about his new gig and the fine art of cigars",
205 |     "lead_image_url": "http://www.gq.com/images/entertainment/2012/02/david-diamante/diamante-628.jpg",
206 |     "next_page_id": null,
207 |     "rendered_pages": 1
208 | }
209 | 
210 | </pre>
211 |         <h3 id="https://readability.com/api/content/v1#confidenceRepresentation">Example confidence representation. (application/json)</h3>
212 |             <pre xmlns="http://research.sun.com/wadl/2006/10" class="prettyprint">
213 | {
214 |     "url": "http://www.gq.com/article/12",
215 |     "confidence": .7
216 | }
217 | 
218 | </pre>
219 |         <h3 id="https://readability.com/api/content/v1#confidenceRepresentationJsonp">Example confidence representation as jsonp. (application/json)</h3>
220 |             <pre xmlns="http://research.sun.com/wadl/2006/10" class="prettyprint">
221 | callback({
222 |     "url": "http://www.gq.com/article/12",
223 |     "confidence": .7
224 | });
225 | 
226 | </pre>
227 |         <h2 id="faults">Errors</h2><h3 id="https://readability.com/api/content/v1#error_400">400 Bad Request (application/json)</h3>
228 |             The server could not understand your request. Verify that request parameters (and content, if any) are valid.
229 |         <h3 id="https://readability.com/api/content/v1#error_401">401 Authorization Required (application/json)</h3>
230 |             <p>
231 |                 Authentication failed or was not provided. Verify that you have sent valid ixDirectory credentials via HTTP Basic.
232 |             </p>
233 |             <p>A 'Www-Authenticate' challenge header will be sent with this type of error response.</p>
234 |         <h3 id="https://readability.com/api/content/v1#error_500">500 Internal Server Error (application/json)</h3>
235 |             An unknown error has occurred.
236 |         <h3 id="https://readability.com/api/content/v1#error_404">404 Not Found (application/json)</h3>
237 |             The resource that you requested does not exist.
238 |     </body>
239 | </html>
240 | 


--------------------------------------------------------------------------------
/readability/tests/test_auth.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | # Bad hack. I only installed unittest2 locally in my virtualenv
 4 | # for Python 2.6.7
 5 | try:
 6 |     import unittest2 as unittest
 7 | except ImportError:
 8 |     import unittest
 9 | 
10 | 
11 | from readability import xauth
12 | 
13 | 
14 | class XAuthTestCase(unittest.TestCase):
15 |     """
16 |     Test XAuth functionality.
17 |     """
18 |     def test_bad_base_url(self):
19 |         """
20 |         If given a bad base url template, the request to the
21 |         ACCESS_TOKEN_URL should fail and an exception be raised.
22 |         """
23 |         token = None
24 |         with self.assertRaises(Exception):
25 |             token = xauth(base_url_template='https://arc90.com/{0}')
26 |         self.assertEqual(token, None)
27 | 
28 |     def test_bad_consumer_key(self):
29 |         """
30 |         If given a bad consumer key, the `xauth` method should raise
31 |         an exception.
32 |         """
33 |         token = None
34 |         with self.assertRaises(Exception):
35 |             token = xauth(consumer_key='bad consumer key')
36 |         self.assertEqual(token, None)
37 | 
38 |     def test_bad_consumer_secret(self):
39 |         """
40 |         If given a bad consumer key, the `xauth` method should raise
41 |         an exception.
42 |         """
43 |         token = None
44 |         with self.assertRaises(Exception):
45 |             token = xauth(consumer_secret='bad consumer secret')
46 |         self.assertEqual(token, None)
47 | 
48 |     def test_bad_username(self):
49 |         """
50 |         If given a bad username, an exception should be raised.
51 |         """
52 |         token = None
53 |         with self.assertRaises(Exception):
54 |             token = xauth(username='bad username')
55 |         self.assertEqual(token, None)
56 | 
57 |     def test_bad_password(self):
58 |         """
59 |         If given a bad password, an exception should be raised.
60 |         """
61 |         token = None
62 |         with self.assertRaises(Exception):
63 |             token = xauth(password='badpassword')
64 |         self.assertEqual(token, None)
65 | 
66 |     def test_successful_auth(self):
67 |         """
68 |         Test getting a token with proper creds
69 |         """
70 |         # Credentials should be set as environment variables when running tests
71 |         token = xauth()
72 |         self.assertEqual(len(token), 2)
73 | 
74 | 
75 | if __name__ == '__main__':
76 |     unittest.main()
77 | 


--------------------------------------------------------------------------------
/readability/tests/test_clients.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | try:
  4 |     import unittest2 as unittest
  5 | except ImportError:
  6 |     import unittest
  7 | try:
  8 |     from unittest.mock import patch
  9 | except ImportError as e:
 10 |     from mock import patch
 11 | 
 12 | from readability import xauth, ReaderClient, ParserClient
 13 | 
 14 | 
 15 | class ClientInitTest(unittest.TestCase):
 16 |     """
 17 |     Test that passing tokens to the constructor bypasses looking in ENV.
 18 | 
 19 |     """
 20 |     def setUp(self):
 21 |         self.env_cache = {}
 22 |         for var in ['READABILITY_PARSER_TOKEN', 'READABILITY_CONSUMER_KEY', 'READABILITY_CONSUMER_SECRET']:
 23 |             if var in os.environ:
 24 |                 self.env_cache[var] = os.environ[var]
 25 |                 del os.environ[var]
 26 | 
 27 |     def tearDown(self):
 28 |         for key, val in self.env_cache.items():
 29 |             os.environ[key] = val
 30 | 
 31 |     def test_reader(self):
 32 |         """
 33 |         Test that passing tokens to the constructor bypasses looking in ENV.
 34 | 
 35 |         """
 36 |         with patch('readability.core.required_from_env') as mock:
 37 |             ReaderClient(
 38 |                 consumer_key='consumer_key',
 39 |                 consumer_secret='consumer_secret',
 40 |                 # Fake xauth since we wont be actually making calls for this test
 41 |                 token_key='token_key',
 42 |                 token_secret='token_secret')
 43 |             self.assertEqual(mock.call_count, 0)
 44 | 
 45 |     def test_parser(self):
 46 |         with patch('readability.core.required_from_env') as mock:
 47 |             ParserClient(token='token')
 48 |             self.assertEqual(mock.call_count, 0)
 49 | 
 50 | class ReaderClientNoBookmarkTest(unittest.TestCase):
 51 |     """
 52 |     Tests for the Readability ReaderClient class that need no bookmarks.
 53 |     """
 54 |     def setUp(self):
 55 |         """
 56 |         Need to get a token for each test.
 57 | 
 58 |         """
 59 |         token_key, token_secret = xauth()
 60 |         self.reader_client = ReaderClient(token_key, token_secret)
 61 | 
 62 |     def test_get_article(self):
 63 |         """
 64 |         Test the `get_article` method.
 65 |         """
 66 |         article_id = 'orrspy2p'
 67 |         response = self.reader_client.get_article(article_id)
 68 |         self.assertEqual(response.status_code, 200)
 69 | 
 70 |         # spot check some keys
 71 |         some_expected_keys = set(['direction', 'title', 'url', 'excerpt',
 72 |             'content', 'processed', 'short_url', 'date_published'])
 73 |         keys_set = set(response.json().keys())
 74 |         self.assertTrue(some_expected_keys.issubset(keys_set))
 75 | 
 76 |     def test_get_article_404(self):
 77 |         """
 78 |         Try getting an article that doesn't exist.
 79 |         """
 80 |         article_id = 'antidisestablishmentarianism'
 81 |         response = self.reader_client.get_article(article_id)
 82 |         self.assertEqual(response.status_code, 404)
 83 | 
 84 |     def test_get_user(self):
 85 |         """
 86 |         Test getting user data
 87 |         """
 88 |         user_response = self.reader_client.get_user()
 89 |         self.assertEqual(user_response.status_code, 200)
 90 |         some_expected_keys = set(['username', 'first_name', 'last_name',
 91 |             'date_joined', 'email_into_address'])
 92 |         received_keys = set(user_response.json().keys())
 93 |         self.assertTrue(some_expected_keys.issubset(received_keys))
 94 | 
 95 |     def test_get_empty_tags(self):
 96 |         """
 97 |         Test getting an empty set of tags. Since there are no bookmarks
 98 |         present in this test, there should be no tags.
 99 |         """
100 |         tag_response = self.reader_client.get_tags()
101 |         self.assertEqual(tag_response.status_code, 200)
102 |         response_json = tag_response.json()
103 |         self.assertTrue('tags' in response_json)
104 |         self.assertEqual(len(response_json['tags']), 0)
105 | 
106 | 
107 | class ReaderClientSingleBookmarkTest(unittest.TestCase):
108 |     """
109 |     Tests that only need one bookmark
110 |     """
111 |     def setUp(self):
112 |         """
113 |         Get a client and add a bookmark
114 |         """
115 |         token_key, token_secret = xauth()
116 |         self.reader_client = ReaderClient(token_key=token_key, token_secret=token_secret)
117 |         self.url = 'http://www.theatlantic.com/technology/archive/2013/01/the-never-before-told-story-of-the-worlds-first-computer-art-its-a-sexy-dame/267439/'
118 |         add_response = self.reader_client.add_bookmark(self.url)
119 |         self.assertTrue(add_response.status_code in [201, 202])
120 | 
121 |     def tearDown(self):
122 |         """
123 |         Remove all added bookmarks.
124 |         """
125 |         for bm in self.reader_client.get_bookmarks().json()['bookmarks']:
126 |             del_response = self.reader_client.delete_bookmark(bm['id'])
127 |             self.assertEqual(del_response.status_code, 204)
128 | 
129 |     def test_get_bookmark(self):
130 |         """
131 |         Test getting one bookmark by id
132 |         """
133 |         bookmark_id = self._get_bookmark_data()['id']
134 | 
135 |         bm_response = self.reader_client.get_bookmark(bookmark_id)
136 |         self.assertEqual(bm_response.status_code, 200)
137 |         some_expected_keys = set(['article', 'user_id', 'favorite', 'id'])
138 |         received_keys = set(bm_response.json().keys())
139 |         self.assertTrue(some_expected_keys.issubset(received_keys))
140 | 
141 |     def test_bookmark_tag_functionality(self):
142 |         """
143 |         Test adding, fetching and deleting tags on a bookmark.
144 |         """
145 |         bookmark_id = self._get_bookmark_data()['id']
146 | 
147 |         # test getting empty tags
148 |         tag_response = self.reader_client.get_bookmark_tags(bookmark_id)
149 |         self.assertEqual(tag_response.status_code, 200)
150 |         self.assertEqual(len(tag_response.json()['tags']), 0)
151 | 
152 |         # test adding tags
153 |         tags = ['tag', 'another tag']
154 |         tag_string = ', '.join(tags)
155 |         tag_add_response = \
156 |             self.reader_client.add_tags_to_bookmark(bookmark_id, tag_string)
157 |         self.assertEqual(tag_add_response.status_code, 202)
158 | 
159 |         # re-fetch tags. should have 2
160 |         retag_response = self.reader_client.get_bookmark_tags(bookmark_id)
161 |         self.assertEqual(retag_response.status_code, 200)
162 |         self.assertEqual(len(retag_response.json()['tags']), 2)
163 |         for tag in retag_response.json()['tags']:
164 |             self.assertTrue(tag['text'] in tags)
165 | 
166 |         # test getting tags for user
167 |         user_tag_resp = self.reader_client.get_tags()
168 |         self.assertEqual(user_tag_resp.status_code, 200)
169 |         self.assertEqual(len(user_tag_resp.json()['tags']), 2)
170 |         for tag in user_tag_resp.json()['tags']:
171 |             self.assertTrue(tag['text'] in tags)
172 | 
173 |             # test getting a single tag while we're here
174 |             single_tag_resp = self.reader_client.get_tag(tag['id'])
175 |             self.assertEqual(single_tag_resp.status_code, 200)
176 |             self.assertTrue('applied_count' in single_tag_resp.json())
177 |             self.assertTrue('id' in single_tag_resp.json())
178 |             self.assertTrue('text' in single_tag_resp.json())
179 | 
180 |         # delete tags
181 |         for tag in retag_response.json()['tags']:
182 |             del_response = self.reader_client.delete_tag_from_bookmark(
183 |                 bookmark_id, tag['id'])
184 |             self.assertEqual(del_response.status_code, 204)
185 | 
186 |         # check that tags are gone
187 |         tag_response = self.reader_client.get_bookmark_tags(bookmark_id)
188 |         self.assertEqual(tag_response.status_code, 200)
189 |         self.assertEqual(len(tag_response.json()['tags']), 0)
190 | 
191 |     def _get_bookmark_data(self):
192 |         """
193 |         Convenience method to get a single bookmark's data.
194 |         """
195 |         bm_response = self.reader_client.get_bookmarks()
196 |         self.assertEqual(bm_response.status_code, 200)
197 |         bm_response_json = bm_response.json()
198 |         self.assertTrue(len(bm_response_json['bookmarks']) > 0)
199 |         return bm_response_json['bookmarks'][0]
200 | 
201 | 
202 | class ReaderClientMultipleBookmarkTest(unittest.TestCase):
203 |     """
204 |     Tests for bookmark functionality
205 |     """
206 |     def setUp(self):
207 |         """
208 |         Add a few bookmarks.
209 |         """
210 |         token_key, token_secret = xauth()
211 |         self.reader_client = ReaderClient(token_key=token_key, token_secret=token_secret)
212 | 
213 |         self.urls = [
214 |             'http://www.theatlantic.com/technology/archive/2013/01/the-never-before-told-story-of-the-worlds-first-computer-art-its-a-sexy-dame/267439/',
215 |             'http://www.theatlantic.com/business/archive/2013/01/why-smart-poor-students-dont-apply-to-selective-colleges-and-how-to-fix-it/272490/',
216 |         ]
217 | 
218 |         self.favorite_urls = [
219 |             'http://www.theatlantic.com/sexes/archive/2013/01/the-lonely-existence-of-mel-feit-mens-rights-advocate/267413/',
220 |             'http://www.theatlantic.com/technology/archive/2013/01/women-in-combat-an-idea-whose-time-has-come-aided-by-technology/272483/'
221 |         ]
222 | 
223 |         self.archive_urls = [
224 |             'http://www.theatlantic.com/business/archive/2013/01/what-economics-can-and-cant-tell-us-about-the-legacy-of-legal-abortion/267459/',
225 |             'http://www.theatlantic.com/business/archive/2013/01/5-ways-to-understand-just-how-absurd-spains-26-unemployment-rate-is/272502/'
226 |         ]
227 | 
228 |         self.all_urls = self.urls + self.favorite_urls + self.archive_urls
229 | 
230 |         for url in self.urls:
231 |             response = self.reader_client.add_bookmark(url)
232 |             self.assertTrue(response.status_code in [201, 202])
233 | 
234 |         for url in self.favorite_urls:
235 |             response = self.reader_client.add_bookmark(url, favorite=True)
236 |             self.assertTrue(response.status_code in [201, 202])
237 | 
238 |         for url in self.archive_urls:
239 |             response = self.reader_client.add_bookmark(url, archive=True)
240 |             self.assertTrue(response.status_code in [201, 202])
241 | 
242 |     def test_get_bookmarks(self):
243 |         """
244 |         Test getting all bookmarks
245 |         """
246 |         response = self.reader_client.get_bookmarks()
247 |         self.assertEqual(response.status_code, 200)
248 |         self.assertEqual(
249 |             len(response.json()['bookmarks']), len(self.all_urls))
250 | 
251 |         # test favorite bookmarks
252 |         response = self.reader_client.get_bookmarks(favorite=True)
253 |         self.assertEqual(response.status_code, 200)
254 |         self.assertEqual(
255 |             len(response.json()['bookmarks']), len(self.favorite_urls))
256 |         for bm in response.json()['bookmarks']:
257 |             self.assertTrue(bm['article']['url'] in self.favorite_urls)
258 | 
259 |         # test archive bookmarks
260 |         response = self.reader_client.get_bookmarks(archive=True)
261 |         self.assertEqual(response.status_code, 200)
262 |         self.assertEqual(
263 |             len(response.json()['bookmarks']), len(self.archive_urls))
264 |         for bm in response.json()['bookmarks']:
265 |             self.assertTrue(bm['article']['url'] in self.archive_urls)
266 | 
267 |     def tearDown(self):
268 |         """
269 |         Remove all added bookmarks.
270 |         """
271 |         for bm in self.reader_client.get_bookmarks().json()['bookmarks']:
272 |             del_response = self.reader_client.delete_bookmark(bm['id'])
273 |             self.assertEqual(del_response.status_code, 204)
274 | 
275 | 
276 | if __name__ == '__main__':
277 |     unittest.main(warnings='ignore')
278 | 


--------------------------------------------------------------------------------
/readability/tests/test_parser.py:
--------------------------------------------------------------------------------
  1 | try:
  2 |     import unittest2 as unittest
  3 | except ImportError:
  4 |     import unittest
  5 | 
  6 | from readability import ParserClient
  7 | from readability.clients import DEFAULT_PARSER_URL_TEMPLATE
  8 | from readability.core import required_from_env
  9 | from readability.tests import load_test_content
 10 | 
 11 | class ParserClientTest(unittest.TestCase):
 12 |     """
 13 |     Test case for the Parser Client
 14 |     """
 15 |     def setUp(self):
 16 |         self.parser_token = required_from_env('READABILITY_PARSER_TOKEN')
 17 |         self.parser_client = ParserClient(token=self.parser_token)
 18 |         self.test_url = 'https://en.wikipedia.org/wiki/Mark_Twain'
 19 | 
 20 |     def test_generate_url(self):
 21 |         """
 22 |         Test the clients ability to generate urls to endpoints.
 23 |         """
 24 |         # Test root resource
 25 |         expected_url = DEFAULT_PARSER_URL_TEMPLATE.format('')
 26 |         expected_url = '{}?token={}'.format(expected_url, self.parser_token)
 27 |         generated_url = self.parser_client._generate_url('')
 28 |         self.assertEqual(generated_url, expected_url)
 29 | 
 30 |         # Test parser resource
 31 |         expected_url = '{base_url}?token={token}&url=http%3A%2F%2Fwww.google.biz%2Fblog.html'.format(
 32 |             base_url=DEFAULT_PARSER_URL_TEMPLATE.format('parser'),
 33 |             token=self.parser_token)
 34 |         params = {'url': 'http://www.google.biz/blog.html'}
 35 |         generated_url = self.parser_client._generate_url(
 36 |             'parser', query_params=params)
 37 | 
 38 |         self.assertEqual(generated_url, expected_url)
 39 | 
 40 |     def test_get_root(self):
 41 |         """
 42 |         Test the client's ability to hit the root endpoint.
 43 |         """
 44 |         response = self.parser_client.get_root()
 45 | 
 46 |         expected_keys = set(['resources', ])
 47 |         self.assertEqual(set(response.json().keys()), expected_keys)
 48 | 
 49 |     def test_get_confidence(self):
 50 |         """
 51 |         Test the client's ability to hit the confidence endpoint.
 52 |         """
 53 |         # hit without an article_id or url. Should get an error.
 54 |         response = self.parser_client.get_confidence()
 55 |         self.assertEqual(response.status_code, 400)
 56 | 
 57 |         expected_keys = set(['url', 'confidence'])
 58 | 
 59 |         response = self.parser_client.get_confidence(url=self.test_url)
 60 |         self.assertEqual(response.status_code, 200)
 61 |         self.assertEqual(set(response.json().keys()), expected_keys)
 62 |         # confidence for wikipedia should be over .5
 63 |         self.assertTrue(response.json()['confidence'] >= .5)
 64 | 
 65 |     def test_get_article_status(self):
 66 |         """
 67 |         Test the client's ability to hit the parser endpoint with a HEAD
 68 |         """
 69 |         # hit without an article_id or url. Should get an error.
 70 |         response = self.parser_client.get_confidence()
 71 |         self.assertEqual(response.status_code, 400)
 72 | 
 73 |         response = self.parser_client.get_article_status(url=self.test_url)
 74 |         self.assertEqual(response.status_code, 200)
 75 |         self.assertTrue(response.headers.get('x-article-status') is not None)
 76 |         self.assertTrue(response.headers.get('x-article-id') is not None)
 77 | 
 78 |     def test_get_article(self):
 79 |         """
 80 |         Test the client's ability to hit the parser endpoint with a GET
 81 |         """
 82 |         # test with incorrect params
 83 |         response = self.parser_client.get_article()
 84 |         self.assertEqual(response.status_code, 400)
 85 | 
 86 |         response = self.parser_client.get_article(url=self.test_url)
 87 |         self.assertEqual(response.status_code, 200)
 88 | 
 89 |         some_expected_keys = set(['content', 'domain', 'author', 'word_count',
 90 |             'title', 'total_pages'])
 91 |         self.assertTrue(
 92 |             some_expected_keys.issubset(set(response.json().keys())))
 93 | 
 94 |     def test_post_article_content(self):
 95 |         """
 96 |         Test the client's ability to hit the parser endpoint with a POST
 97 |         request.
 98 |         """
 99 |         content = load_test_content('content/test_post_content.html')
100 |         url = 'http://thisisaurlthatdoesntmatterbutmustbepassedanyway.com/article.html'
101 |         response = self.parser_client.post_article_content(content, url)
102 |         self.assertEqual(response.status_code, 200)
103 | 
104 | 
105 | if __name__ == '__main__':
106 |     unittest.main()
107 | 


--------------------------------------------------------------------------------
/readability/tests/test_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Bad hack. I only installed unittest2 locally in my virtualenv
  3 | # for Python 2.6.7
  4 | try:
  5 |     import unittest2 as unittest
  6 | except ImportError:
  7 |     import unittest
  8 | 
  9 | from unittest import TestCase
 10 | from datetime import datetime
 11 | 
 12 | from readability.utils import \
 13 |     cast_datetime_filter, cast_integer_filter, filter_args_to_dict
 14 | 
 15 | 
 16 | class CastDatetimeFilterTestCase(unittest.TestCase):
 17 |     """
 18 |     Tests for the `cast_datetime_filter` function.
 19 |     """
 20 |     def test_int(self):
 21 |         """
 22 |         Pass an int. Should raise a `ValueError`
 23 |         """
 24 |         with self.assertRaises(ValueError):
 25 |             cast_datetime_filter(1)
 26 | 
 27 |     def test_non_iso_string(self):
 28 |         """
 29 |         Pass a string that's not in ISO format. Should get a string back
 30 |         that's in ISO format.
 31 |         """
 32 |         date_string = '08-03-2010'
 33 |         expected_iso = cast_datetime_filter(date_string)
 34 |         self.assertEqual(expected_iso, '2010-08-03T00:00:00')
 35 | 
 36 |     def test_datetime_object(self):
 37 |         """
 38 |         Pass a datetime object. Should get a string back in ISO format.
 39 |         """
 40 |         now = datetime.now()
 41 |         expected_output = now.isoformat()
 42 |         actual_output = cast_datetime_filter(now)
 43 |         self.assertEqual(actual_output, expected_output)
 44 | 
 45 | 
 46 | class CastIntegerFilter(unittest.TestCase):
 47 |     """
 48 |     Test for the `cast_integer_filter` function.
 49 |     """
 50 |     def test_int(self):
 51 |         """
 52 |         Pass an int. Should get it back.
 53 |         """
 54 |         value_to_cast = 1
 55 |         output = cast_integer_filter(value_to_cast)
 56 |         self.assertEqual(value_to_cast, output)
 57 | 
 58 |     def test_false(self):
 59 |         """
 60 |         Pass a boolean False. Should get a 0 back.
 61 |         """
 62 |         output = cast_integer_filter(False)
 63 |         expected_output = 0
 64 |         self.assertEqual(output, expected_output)
 65 | 
 66 |     def test_true(self):
 67 |         """
 68 |         Pass a boolean True. Should get a 1 back.
 69 |         """
 70 |         output = cast_integer_filter(True)
 71 |         expected_output = 1
 72 |         self.assertEqual(output, expected_output)
 73 | 
 74 |     def test_numeric_string(self):
 75 |         """
 76 |         Pass a numeric string. Should get the integer version back.
 77 |         """
 78 |         numeric_string = '123'
 79 |         expected_output = 123
 80 |         output = cast_integer_filter(numeric_string)
 81 |         self.assertEqual(expected_output, output)
 82 | 
 83 | 
 84 | class FilterArgsToDictTestCase(unittest.TestCase):
 85 |     """
 86 |     Test for the `filter_args_to_dict` function.
 87 |     """
 88 |     def test_all_bad_filter_keys(self):
 89 |         """
 90 |         Pass a dict who's keys are not in the acceptable filter list.
 91 | 
 92 |         Should get an empty dict back.
 93 |         """
 94 |         filters = {
 95 |             'date_deleted': '08-08-2010',
 96 |             'date_updated': '08-08-2011',
 97 |             'liked': 1
 98 |         }
 99 | 
100 |         acceptable_filters = ['favorite', 'archive']
101 |         expected_empty = filter_args_to_dict(filters, acceptable_filters)
102 |         self.assertEqual(expected_empty, {})
103 | 
104 |     def test_some_bad_filter_keys(self):
105 |         """
106 |         Pass a mixture of good and bad filter keys.
107 |         """
108 |         filters = {
109 |             'favorite': True,
110 |             'archive': False
111 |         }
112 |         bad_filters = {
113 |             'date_deleted': '08-08-2010',
114 |             'date_updated': '08-08-2011',
115 |             'liked': 1
116 |         }
117 |         acceptable_filter_keys = ['favorite', 'archive']
118 | 
119 |         # add bad filters to filters dict
120 |         filters.update(bad_filters)
121 |         filter_dict = filter_args_to_dict(filters, acceptable_filter_keys)
122 |         self.assertEqual(set(filter_dict.keys()), set(acceptable_filter_keys))
123 | 
124 |     def test_casting_of_integer_filters(self):
125 |         """
126 |         Pass keys that correspond to integer filters.
127 |         """
128 |         filters = {
129 |             'favorite': True,
130 |             'archive': False
131 |         }
132 |         acceptable_filter_keys = filters.keys()
133 |         filter_dict = filter_args_to_dict(filters, acceptable_filter_keys)
134 |         self.assertEqual(set(filter_dict.keys()), set(acceptable_filter_keys))
135 |         self.assertEqual(filter_dict['favorite'], 1)
136 |         self.assertEqual(filter_dict['archive'], 0)
137 | 
138 |     def test_casting_of_datetime_filters(self):
139 |         """
140 |         Pass keys that correspond to datetime filters.
141 |         """
142 |         now = datetime.now()
143 |         filters = {
144 |             'archived_since': '08-08-2010',
145 |             'favorited_since': now
146 |         }
147 |         acceptable_filter_keys = filters.keys()
148 |         filter_dict = filter_args_to_dict(filters, acceptable_filter_keys)
149 |         self.assertEqual(set(filter_dict.keys()), set(acceptable_filter_keys))
150 |         self.assertEqual(filter_dict['archived_since'], '2010-08-08T00:00:00')
151 |         self.assertEqual(filter_dict['favorited_since'], now.isoformat())
152 | 
153 | 
154 | if __name__ == '__main__':
155 |     unittest.main()
156 | 


--------------------------------------------------------------------------------
/readability/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | readability.utils
  5 | ~~~~~~~~~~~~~~~~~
  6 | 
  7 | This module provides various utils to the rest of the package.
  8 | 
  9 | """
 10 | 
 11 | import logging
 12 | 
 13 | from datetime import datetime
 14 | 
 15 | from dateutil.parser import parse as parse_datetime
 16 | 
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | 
 21 | # map of filter names to a data type. This is used to map names to a
 22 | # casting function when needed.
 23 | filter_type_map = {
 24 |     'added_since': 'datetime',
 25 |     'added_until': 'datetime',
 26 |     'archive': 'int',
 27 |     'archived_since': 'datetime',
 28 |     'archived_until': 'datetime',
 29 |     'exclude_accessibility': 'string',
 30 |     'favorite': 'int',
 31 |     'favorited_since': 'datetime',
 32 |     'favorited_until': 'datetime',
 33 |     'domain': 'string',
 34 |     'only_delete': 'int',
 35 |     'opened_since': 'datetime',
 36 |     'opened_until': 'datetime',
 37 |     'order': 'string',
 38 |     'page': 'int',
 39 |     'per_page': 'int',
 40 |     'tags': 'string',
 41 |     'updated_since': 'datetime',
 42 |     'updated_until': 'datetime',
 43 | }
 44 | 
 45 | 
 46 | def cast_datetime_filter(value):
 47 |     """Cast a datetime filter value.
 48 | 
 49 |     :param value: string representation of a value that needs to be casted to
 50 |         a `datetime` object.
 51 | 
 52 |     """
 53 |     if isinstance(value, str):
 54 |         dtime = parse_datetime(value)
 55 | 
 56 |     elif isinstance(value, datetime):
 57 |         dtime = value
 58 |     else:
 59 |         raise ValueError('Received value of type {0}'.format(type(value)))
 60 | 
 61 |     return dtime.isoformat()
 62 | 
 63 | 
 64 | def cast_integer_filter(value):
 65 |     """Cast an integer filter value.
 66 | 
 67 |     Theses are usually booleans in Python but they need to be sent as
 68 |     1s and 0s to the API.
 69 | 
 70 |     :param value: boolean value that needs to be casted to an int
 71 |     """
 72 |     return int(value)
 73 | 
 74 | 
 75 | def filter_args_to_dict(filter_dict, accepted_filter_keys=[]):
 76 |     """Cast and validate filter args.
 77 | 
 78 |     :param filter_dict: Filter kwargs
 79 |     :param accepted_filter_keys: List of keys that are acceptable to use.
 80 | 
 81 |     """
 82 |     out_dict = {}
 83 |     for k, v in filter_dict.items():
 84 |         # make sure that the filter k is acceptable
 85 |         # and that there is a value associated with the key
 86 |         if k not in accepted_filter_keys or v is None:
 87 |             logger.debug(
 88 |                 'Filter was not in accepted_filter_keys or value is None.')
 89 |             # skip it
 90 |             continue
 91 |         filter_type = filter_type_map.get(k, None)
 92 | 
 93 |         if filter_type is None:
 94 |             logger.debug('Filter key not foud in map.')
 95 |             # hmm, this was an acceptable filter type but not in the map...
 96 |             # Going to skip it.
 97 |             continue
 98 | 
 99 |         # map of casting funcitons to filter types
100 |         filter_cast_map = {
101 |             'int': cast_integer_filter,
102 |             'datetime': cast_datetime_filter
103 |         }
104 |         cast_function = filter_cast_map.get(filter_type, None)
105 | 
106 |         # if we get a cast function, call it with v. If not, just use v.
107 |         if cast_function:
108 |             out_value = cast_function(v)
109 |         else:
110 |             out_value = v
111 |         out_dict[k] = out_value
112 | 
113 |     return out_dict
114 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | import sys
 4 | 
 5 | from setuptools import setup
 6 | 
 7 | required = [
 8 |     'pytest',
 9 |     'requests',
10 |     'requests_oauthlib',
11 |     'httplib2==0.19.0',
12 |     'python-dateutil',
13 | ]
14 | 
15 | # Python 2 dependencies
16 | if sys.version_info[0] == 2:
17 |     required += [
18 |         'mock',
19 |     ]
20 | 
21 | setup(
22 |     name='readability-api',
23 |     version='1.0.2',
24 |     description='Python client for the Readability Reader and Parser APIs.',
25 |     long_description=open('README.rst').read(),
26 |     author='The Readability Team',
27 |     author_email='philip@readability.com',
28 |     url='https://github.com/arc90/python-readability-api',
29 |     packages=['readability'],
30 |     install_requires=required,
31 |     license='MIT',
32 |     classifiers=(
33 |         'Development Status :: 5 - Production/Stable',
34 |         'Intended Audience :: Developers',
35 |         'Natural Language :: English',
36 |         'License :: OSI Approved :: MIT License',
37 |         'Programming Language :: Python',
38 |         'Programming Language :: Python :: 2.7',
39 |         'Programming Language :: Python :: 3.5',
40 |         'Programming Language :: Python :: Implementation :: PyPy',
41 |     ),
42 | )
43 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py27, py35, pypy, pypy3
 3 | 
 4 | [testenv]
 5 | commands = py.test
 6 | deps =
 7 |     pytest
 8 |     requests
 9 |     requests_oauthlib
10 |     httplib2==0.9.1
11 |     python-dateutil
12 |     mock
13 | passenv =
14 |     READABILITY_CONSUMER_KEY
15 |     READABILITY_CONSUMER_SECRET
16 |     READABILITY_PARSER_TOKEN
17 |     READABILITY_PASSWORD
18 |     READABILITY_USERNAME
19 | 


--------------------------------------------------------------------------------