├── README ├── docs ├── changes.txt ├── api.txt ├── future.txt ├── testing.txt ├── attributes.txt ├── traversing.txt ├── index.txt ├── css.txt ├── tips.txt ├── ajax.txt ├── manipulating.txt ├── Makefile └── conf.py ├── MANIFEST.in ├── .gitignore ├── pyquery ├── test.html ├── __init__.py ├── tests.txt ├── rules.py ├── ajax.py ├── cssselectpatch.py ├── test.py └── pyquery.py ├── setup.cfg ├── CHANGES.txt ├── TODO.txt ├── LICENSE.txt ├── setup.py ├── README.md ├── README.rst ├── bootstrap.py └── bootstrap-py3k.py /README: -------------------------------------------------------------------------------- 1 | README.md -------------------------------------------------------------------------------- /docs/changes.txt: -------------------------------------------------------------------------------- 1 | News 2 | ===== 3 | 4 | .. include:: ../CHANGES.txt 5 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include pyquery *.txt 2 | include README.txt 3 | include CHANGES.txt 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | develop-eggs/ 2 | bin/ 3 | dist/ 4 | build/ 5 | parts/ 6 | docs/_build/ 7 | .installed.cfg 8 | *.egg-info 9 | *.pyc 10 | *.swp 11 | *~ 12 | -------------------------------------------------------------------------------- /pyquery/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |

Hello world !

4 | 5 |

6 | hello python ! 7 |

8 | 9 | 10 | -------------------------------------------------------------------------------- /docs/api.txt: -------------------------------------------------------------------------------- 1 | :mod:`~pyquery.pyquery` -- PyQuery complete API 2 | ================================================ 3 | 4 | .. automodule:: pyquery.pyquery 5 | 6 | .. autoclass:: PyQuery 7 | :members: 8 | 9 | 10 | -------------------------------------------------------------------------------- /pyquery/__init__.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | # 3 | # Copyright (C) 2008 - Olivier Lauzanne 4 | # 5 | # Distributed under the BSD license, see LICENSE.txt 6 | 7 | import sys 8 | 9 | try: 10 | import webob 11 | except ImportError: 12 | from .pyquery import PyQuery 13 | else: 14 | from .ajax import PyQuery 15 | 16 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [nosetests] 2 | with-doctest=true 3 | verbosity=3 4 | 5 | [aliases] 6 | sphinx = build_sphinx 7 | release = sdist --formats=zip,gztar register upload build_sphinx upload_sphinx 8 | 9 | [build_sphinx] 10 | source-dir = docs/ 11 | build-dir = docs/_build 12 | all_files = 1 13 | 14 | [upload_sphinx] 15 | upload-dir = docs/_build/html 16 | -------------------------------------------------------------------------------- /docs/future.txt: -------------------------------------------------------------------------------- 1 | Future 2 | ------- 3 | 4 | - SELECTORS: done 5 | 6 | - ATTRIBUTES: done 7 | 8 | - CSS: done 9 | 10 | - HTML: done 11 | 12 | - MANIPULATING: missing the wrapInner method 13 | 14 | - TRAVERSING: about half done 15 | 16 | - EVENTS: nothing to do with server side might be used later for automatic ajax 17 | 18 | - CORE UI EFFECTS: did hide and show the rest doesn't really makes sense on 19 | server side 20 | 21 | - AJAX: some with wsgi app 22 | 23 | -------------------------------------------------------------------------------- /docs/testing.txt: -------------------------------------------------------------------------------- 1 | Testing 2 | ------- 3 | 4 | If you want to run the tests that you can see above you should do:: 5 | 6 | $ hg clone https://bitbucket.org/olauzanne/pyquery/ 7 | $ cd pyquery 8 | $ python bootstrap.py 9 | $ bin/buildout 10 | $ bin/test 11 | 12 | You can build the Sphinx documentation by doing:: 13 | 14 | $ cd docs 15 | $ make html 16 | 17 | If you don't already have lxml installed use this line:: 18 | 19 | $ STATIC_DEPS=true bin/buildout 20 | 21 | 22 | -------------------------------------------------------------------------------- /CHANGES.txt: -------------------------------------------------------------------------------- 1 | 1.1 2 | --- 3 | 4 | 5 | 6 | 1.0 7 | --- 8 | fix issues 24 9 | 10 | 0.7 11 | --- 12 | 13 | Python 3 compatible 14 | 15 | Add __unicode__ method 16 | 17 | Add root and encoding attribute 18 | 19 | fix issues 19, 20, 22, 23 20 | 21 | 0.6.1 22 | ------ 23 | 24 | Move README.txt at package root 25 | 26 | Add CHANGES.txt and add it to long_description 27 | 28 | 0.6 29 | ---- 30 | 31 | Added PyQuery.outerHtml 32 | 33 | Added PyQuery.fn 34 | 35 | Added PyQuery.map 36 | 37 | Change PyQuery.each behavior to reflect jQuery api 38 | 39 | 40 | -------------------------------------------------------------------------------- /TODO.txt: -------------------------------------------------------------------------------- 1 | # todo.txt 2 | 3 | * Don't choke on doctypes 4 | * Iteration yields PyQuery objects, not lxml nodes 5 | * Uniformly support selectors (CSS3 + jQuery extensions) 6 | * Support the jQuery API more uniformly: 7 | * Pass a modified version of the Sizzle and/or jQuery test suite 8 | * Remove pyquery.ajax? It's rather outside the core purpose of this library. I'd rather have something focused on fluent DOM parsing than a swissarmy knife. 9 | 10 | 11 | 12 | ## Welp. Also. 13 | 14 | https://bitbucket.org/olauzanne/pyquery/issues?status=new&status=open 15 | 16 | -------------------------------------------------------------------------------- /docs/attributes.txt: -------------------------------------------------------------------------------- 1 | Attributes 2 | ---------- 3 | 4 | 5 | You can play with the attributes with the jquery API:: 6 | 7 | >>> p = pq('

')('p') 8 | >>> p.attr("id") 9 | 'hello' 10 | >>> p.attr("id", "plop") 11 | [] 12 | >>> p.attr("id", "hello") 13 | [] 14 | 15 | 16 | Or in a more pythonic way:: 17 | 18 | >>> p.attr.id = "plop" 19 | >>> p.attr.id 20 | 'plop' 21 | >>> p.attr["id"] = "ola" 22 | >>> p.attr["id"] 23 | 'ola' 24 | >>> p.attr(id='hello', class_='hello2') 25 | [] 26 | >>> p.attr.class_ 27 | 'hello2' 28 | >>> p.attr.class_ = 'hello' 29 | 30 | 31 | -------------------------------------------------------------------------------- /docs/traversing.txt: -------------------------------------------------------------------------------- 1 | Traversing 2 | ---------- 3 | 4 | Some jQuery traversal methods are supported. Here are a few examples. 5 | 6 | You can filter the selection list using a string selector:: 7 | 8 | >>> d = pq('

') 9 | >>> d('p').filter('.hello') 10 | [] 11 | 12 | It is possible to select a single element with eq:: 13 | 14 | >>> d('p').eq(0) 15 | [] 16 | 17 | You can find nested elements:: 18 | 19 | >>> d('p').find('a') 20 | [, ] 21 | >>> d('p').eq(1).find('a') 22 | [] 23 | 24 | Breaking out of a level of traversal is also supported using end:: 25 | 26 | >>> d('p').find('a').end() 27 | [, ] 28 | >>> d('p').eq(0).end() 29 | [, ] 30 | >>> d('p').filter(lambda i: i == 1).end() 31 | [, ] 32 | 33 | 34 | -------------------------------------------------------------------------------- /pyquery/tests.txt: -------------------------------------------------------------------------------- 1 | 2 | Assume spaces normalization:: 3 | 4 | >>> pq('
').text() 5 | '' 6 | 7 | >>> print(pq('
  • toto
  • tata
').text()) 8 | toto tata 9 | 10 | Complex wrapping:: 11 | 12 | >>> d = pq('
youhou
') 13 | >>> s = d('span') 14 | >>> s is d 15 | False 16 | >>> s.wrap('
') 17 | [
] 18 | 19 | We get the original doc with new node:: 20 | 21 | >>> print(d) 22 |
youhou
23 | 24 | Complex wrapAll:: 25 | 26 | >>> doc = pq('
Heyyou !
') 27 | >>> s = doc('span') 28 | >>> s.wrapAll('
') 29 | [] 30 | 31 | >>> print(doc) 32 |
Heyyou !
33 | -------------------------------------------------------------------------------- /docs/index.txt: -------------------------------------------------------------------------------- 1 | .. include:: ../README.txt 2 | 3 | Full documentation 4 | ================== 5 | 6 | .. toctree:: 7 | :maxdepth: 1 8 | 9 | attributes 10 | css 11 | manipulating 12 | traversing 13 | api 14 | ajax 15 | tips 16 | testing 17 | future 18 | changes 19 | 20 | More documentation 21 | ================== 22 | 23 | First there is the Sphinx documentation `here`_. 24 | Then for more documentation about the API you can use the `jquery website`_. 25 | The reference I'm now using for the API is ... the `color cheat sheet`_. 26 | Then you can always look at the `code`_. 27 | 28 | .. _jquery website: http://docs.jquery.com/ 29 | .. _code: http://www.bitbucket.org/olauzanne/pyquery/src/tip/pyquery/pyquery.py 30 | .. _color cheat sheet: http://colorcharge.com/wp-content/uploads/2007/12/jquery12_colorcharge.png 31 | .. _here: http://packages.python.org/pyquery/ 32 | 33 | Indices and tables 34 | ================== 35 | 36 | * :ref:`genindex` 37 | * :ref:`modindex` 38 | * :ref:`search` 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /docs/css.txt: -------------------------------------------------------------------------------- 1 | CSS 2 | --- 3 | 4 | .. Initialize tests 5 | 6 | >>> from pyquery import PyQuery 7 | >>> p = PyQuery('

')('p') 8 | 9 | You can play with css classes:: 10 | 11 | >>> p.addClass("toto") 12 | [] 13 | >>> p.toggleClass("titi toto") 14 | [] 15 | >>> p.removeClass("titi") 16 | [] 17 | 18 | Or the css style:: 19 | 20 | >>> p.css("font-size", "15px") 21 | [] 22 | >>> p.attr("style") 23 | 'font-size: 15px' 24 | >>> p.css({"font-size": "17px"}) 25 | [] 26 | >>> p.attr("style") 27 | 'font-size: 17px' 28 | 29 | Same thing the pythonic way ('_' characters are translated to '-'):: 30 | 31 | >>> p.css.font_size = "16px" 32 | >>> p.attr.style 33 | 'font-size: 16px' 34 | >>> p.css['font-size'] = "15px" 35 | >>> p.attr.style 36 | 'font-size: 15px' 37 | >>> p.css(font_size="16px") 38 | [] 39 | >>> p.attr.style 40 | 'font-size: 16px' 41 | >>> p.css = {"font-size": "17px"} 42 | >>> p.attr.style 43 | 'font-size: 17px' 44 | 45 | 46 | -------------------------------------------------------------------------------- /pyquery/rules.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | try: 3 | from deliverance.pyref import PyReference 4 | from deliverance import rules 5 | from ajax import PyQuery as pq 6 | except ImportError: 7 | pass 8 | else: 9 | class PyQuery(rules.AbstractAction): 10 | """Python function""" 11 | name = 'py' 12 | def __init__(self, source_location, pyref): 13 | self.source_location = source_location 14 | self.pyref = pyref 15 | 16 | def apply(self, content_doc, theme_doc, resource_fetcher, log): 17 | self.pyref(pq([content_doc]), pq([theme_doc]), resource_fetcher, log) 18 | 19 | @classmethod 20 | def from_xml(cls, el, source_location): 21 | """Parses and instantiates the class from an element""" 22 | pyref = PyReference.parse_xml( 23 | el, source_location=source_location, 24 | default_function='transform') 25 | return cls(source_location, pyref) 26 | 27 | rules._actions['pyquery'] = PyQuery 28 | 29 | def deliverance_proxy(): 30 | import deliverance.proxycommand 31 | deliverance.proxycommand.main() 32 | -------------------------------------------------------------------------------- /docs/tips.txt: -------------------------------------------------------------------------------- 1 | Tips 2 | ==== 3 | 4 | Making links absolute 5 | --------------------- 6 | 7 | You can make links absolute which can be usefull for screen scrapping:: 8 | 9 | >>> d = pq(url='http://www.w3.org/', parser='html') 10 | >>> d('a[accesskey="0"]').attr('href') 11 | '/Help/' 12 | >>> d.make_links_absolute() 13 | [] 14 | >>> d('a[accesskey="0"]').attr('href') 15 | 'http://www.w3.org/Help/' 16 | 17 | Using different parsers 18 | ----------------------- 19 | 20 | By default pyquery uses the lxml xml parser and then if it doesn't work goes on 21 | to try the html parser from lxml.html. The xml parser can sometimes be 22 | problematic when parsing xhtml pages because the parser will not raise an error 23 | but give an unusable tree (on w3c.org for example). 24 | 25 | You can also choose which parser to use explicitly:: 26 | 27 | >>> pq('

toto

', parser='xml') 28 | [] 29 | >>> pq('

toto

', parser='html') 30 | [] 31 | >>> pq('

toto

', parser='html_fragments') 32 | [

] 33 | 34 | The html and html_fragments parser are the ones from lxml.html. 35 | 36 | 37 | -------------------------------------------------------------------------------- /docs/ajax.txt: -------------------------------------------------------------------------------- 1 | :mod:`pyquery.ajax` -- PyQuery AJAX extension 2 | ============================================= 3 | 4 | .. automodule:: pyquery.ajax 5 | 6 | 7 | .. fake imports 8 | 9 | >>> from ajax import PyQuery as pq 10 | 11 | You can query some wsgi app if `WebOb`_ is installed (it's not a pyquery 12 | dependencie). IN this example the test app returns a simple input at `/` and a 13 | submit button at `/submit`:: 14 | 15 | >>> d = pq('

', app=input_app) 16 | >>> d.append(d.get('/')) 17 | [
] 18 | >>> print d 19 |
20 | 21 | The app is also available in new nodes:: 22 | 23 | >>> d.get('/').app is d.app is d('form').app 24 | True 25 | 26 | You can also request another path:: 27 | 28 | >>> d.append(d.get('/submit')) 29 | [
] 30 | >>> print d 31 |
32 | 33 | If `Paste`_ is installed, you are able to get url directly with a `Proxy`_ app:: 34 | 35 | >>> a = d.get('http://pyquery.org/') 36 | >>> a 37 | [] 38 | 39 | You can retrieve the app response:: 40 | 41 | >>> print a.response.status 42 | 200 OK 43 | 44 | The response attribute is a `WebOb`_ `Response`_ 45 | 46 | .. _webob: http://pythonpaste.org/webob/ 47 | .. _response: http://pythonpaste.org/webob/#response 48 | .. _paste: http://pythonpaste.org/ 49 | .. _proxy: http://pythonpaste.org/modules/proxy.html#paste.proxy.Proxy 50 | 51 | Api 52 | --- 53 | 54 | .. autoclass:: PyQuery 55 | :members: 56 | 57 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2008 - Olivier Lauzanne 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | 1. Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in 12 | the documentation and/or other materials provided with the 13 | distribution. 14 | 15 | 3. Neither the name of Infrae nor the names of its contributors may 16 | be used to endorse or promote products derived from this software 17 | without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INFRAE OR 23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 26 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 27 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | # 3 | # Copyright (C) 2008 - Olivier Lauzanne 4 | # Copyright (C) 2011 - David Schoonover 5 | # 6 | # Distributed under the BSD license, see LICENSE.txt 7 | 8 | from setuptools import setup, find_packages 9 | import sys, os 10 | 11 | def read(*names): 12 | values = dict() 13 | for name in names: 14 | filename = name+'.txt' 15 | if os.path.isfile(filename): 16 | value = open(name+'.txt').read() 17 | else: 18 | value = '' 19 | values[name] = value 20 | return values 21 | 22 | long_description=""" 23 | %(README)s 24 | 25 | See http://packages.python.org/pyquery/ for the full documentation 26 | 27 | News 28 | ==== 29 | 30 | %(CHANGES)s 31 | 32 | """ % read('README', 'CHANGES') 33 | 34 | version = '1.1' 35 | 36 | setup(name='pyquery', 37 | version=version, 38 | description='A jQuery-like library for Python', 39 | long_description=long_description, 40 | classifiers=[ 41 | "Intended Audience :: Developers", 42 | "Development Status :: 5 - Production/Stable", 43 | "Programming Language :: Python :: 2", 44 | "Programming Language :: Python :: 3", 45 | ], 46 | keywords='jquery html xml', 47 | maintainer='David Schoonover', 48 | maintainer_email='dsc@less.ly', 49 | author='Olivier Lauzanne', 50 | author_email='olauzanne@gmail.com', 51 | url='https://github.com/dsc/pyquery', 52 | license='BSD', 53 | packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), 54 | include_package_data=True, 55 | zip_safe=False, 56 | install_requires=[ 57 | 'lxml>=2.1' 58 | ], 59 | test_requires=['nose'], 60 | test_suite='nose.collector', 61 | entry_points=""" 62 | # -*- Entry points: -*- 63 | """, 64 | ) 65 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyQuery: a jQuery-like library for Python 2 | 3 | PyQuery allows you to make [jQuery](http://jquery.com)-style CSS-selector queries on XML/HTML 4 | documents. The API is intended to match [jQuery's API](http://api.jquery.com) whenever possible, 5 | though it has been made more Pythonic where appropriate. 6 | 7 | This [project](https://github.com/dsc/pyquery) is a fork of the [original](http://www.bitbucket.org/olauzanne/pyquery) 8 | PyQuery developed by Olivier Lauzanne in 2008; it is maintained by [David Schoonover](mailto:dsc@less.ly). Feedback and bug 9 | reports are both very welcome over on [github](https://github.com/dsc/pyquery/issues). 10 | 11 | 12 | ## Quickstart 13 | 14 | You can use the PyQuery class to load an xml document from a string, a lxml 15 | document, from a file or from an url: 16 | 17 | >>> from pyquery import PyQuery as pq 18 | >>> from lxml import etree 19 | >>> import urllib 20 | >>> d = pq("") 21 | >>> d = pq(etree.fromstring("")) 22 | >>> d = pq(url='http://google.com/') 23 | >>> # d = pq(url='http://google.com/', opener=lambda url: urllib.urlopen(url).read()) 24 | >>> d = pq(filename=path_to_html_file) 25 | 26 | Now `d` is like the `$` object in jQuery: 27 | 28 | >>> d("#hello") 29 | [] 30 | >>> p = d("#hello") 31 | >>> print(p.html()) 32 | Hello world ! 33 | >>> p.html("you know
Python rocks") 34 | [] 35 | >>> print(p.html()) 36 | you know Python rocks 37 | >>> print(p.text()) 38 | you know Python rocks 39 | 40 | You can use some of the pseudo classes that are available in jQuery but that 41 | are not standard in css such as `:first`, `:last`, `:even`, `:odd`, `:eq`, 42 | `:lt`, `:gt`, `:checked`, `:selected`, and `:file`. 43 | 44 | >>> d('p:first') 45 | [] 46 | 47 | 48 | ## Notes 49 | 50 | * PyQuery uses lxml for fast XML and HTML manipulation. 51 | * This is not a library to produce or interact with JavaScript code. If 52 | that's what you need, check out 53 | -------------------------------------------------------------------------------- /docs/manipulating.txt: -------------------------------------------------------------------------------- 1 | Manipulating 2 | ------------ 3 | 4 | You can also add content to the end of tags:: 5 | 6 | >>> d = pq('

you know Python rocks

') 7 | >>> d('p').append(' check out reddit') 8 | [] 9 | >>> print d 10 |

you know Python rocks check out reddit

11 | 12 | Or to the beginning:: 13 | 14 | >>> p = d('p') 15 | >>> p.prepend('check out reddit') 16 | [] 17 | >>> p.html() 18 | u'check out reddityou know ...' 19 | 20 | Prepend or append an element into an other:: 21 | 22 | >>> d = pq('') 23 | >>> p.prependTo(d('#test')) 24 | [] 25 | >>> d('#test').html() 26 | u'

>> p.insertAfter(d('#test')) 31 | [] 32 | >>> d('#test').html() 33 | u'python !' 34 | 35 | Or before:: 36 | 37 | >>> p.insertBefore(d('#test')) 38 | [] 39 | >>> d('body').html() 40 | u'

...' 41 | 42 | Doing something for each elements:: 43 | 44 | >>> p.each(lambda e: e.addClass('hello2')) 45 | [] 46 | 47 | Remove an element:: 48 | 49 | >>> d = pq('

Yeah!

python rocks !

') 50 | >>> d.remove('p#id') 51 | [] 52 | >>> d('p#id') 53 | [] 54 | 55 | Remove what's inside the selection:: 56 | 57 | >>> d('p').empty() 58 | [

] 59 | 60 | And you can get back the modified html:: 61 | 62 | >>> print d 63 |

64 | 65 | You can generate html stuff:: 66 | 67 | >>> from pyquery import PyQuery as pq 68 | >>> print pq('

Yeah !
').addClass('myclass') + pq('cool') 69 |
Yeah !
cool 70 | 71 | 72 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | PyQuery: a jQuery-like library for Python 2 | ========================================= 3 | 4 | PyQuery allows you to make `jQuery`_-style CSS-selector queries on XML/HTML documents. 5 | The API is intended to match `jQuery's API`_ whenever possible, 6 | though it has been made more Pythonic where appropriate. 7 | 8 | This `project`_ is a fork of the `original`_ PyQuery developed by Olivier Lauzanne in 2008; 9 | it is maintained by `David Schoonover`_. Feedback and bug reports are 10 | both very welcome over on `github`_. 11 | 12 | .. _jQuery: http://jquery.com 13 | .. _jQuery's API: http://api.jquery.com 14 | .. _project: https://github.com/dsc/pyquery 15 | .. _original: http://www.bitbucket.org/olauzanne/pyquery 16 | .. _David Schoonover: mailto:dsc@less.ly 17 | .. _github: https://github.com/dsc/pyquery/issues 18 | 19 | 20 | Quickstart 21 | ========== 22 | 23 | You can use the PyQuery class to load an xml document from a string, a lxml 24 | document, from a file or from an url:: 25 | 26 | >>> from pyquery import PyQuery as pq 27 | >>> from lxml import etree 28 | >>> import urllib 29 | >>> d = pq("") 30 | >>> d = pq(etree.fromstring("")) 31 | >>> d = pq(url='http://google.com/') 32 | >>> # d = pq(url='http://google.com/', opener=lambda url: urllib.urlopen(url).read()) 33 | >>> d = pq(filename=path_to_html_file) 34 | 35 | Now d is like the $ in jQuery:: 36 | 37 | >>> d("#hello") 38 | [] 39 | >>> p = d("#hello") 40 | >>> print(p.html()) 41 | Hello world ! 42 | >>> p.html("you know Python rocks") 43 | [] 44 | >>> print(p.html()) 45 | you know Python rocks 46 | >>> print(p.text()) 47 | you know Python rocks 48 | 49 | You can use some of the pseudo classes that are available in jQuery but that 50 | are not standard in css such as :first :last :even :odd :eq :lt :gt :checked 51 | :selected :file:: 52 | 53 | >>> d('p:first') 54 | [] 55 | 56 | Notes 57 | ===== 58 | 59 | * PyQuery uses lxml for fast XML and HTML manipulation. 60 | * This is not a library to produce or interact with JavaScript code. If that's what you need, check out 61 | 62 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = ../bin/sphinx-build 7 | PAPER = 8 | 9 | # Internal variables. 10 | PAPEROPT_a4 = -D latex_paper_size=a4 11 | PAPEROPT_letter = -D latex_paper_size=letter 12 | ALLSPHINXOPTS = -d _build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 13 | 14 | .PHONY: help clean html web pickle htmlhelp latex changes linkcheck 15 | 16 | help: 17 | @echo "Please use \`make ' where is one of" 18 | @echo " html to make standalone HTML files" 19 | @echo " pickle to make pickle files" 20 | @echo " json to make JSON files" 21 | @echo " htmlhelp to make HTML files and a HTML help project" 22 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 23 | @echo " changes to make an overview over all changed/added/deprecated items" 24 | @echo " linkcheck to check all external links for integrity" 25 | 26 | clean: 27 | -rm -rf _build/* 28 | 29 | html: 30 | mkdir -p _build/html _build/doctrees 31 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) _build/html 32 | @echo 33 | @echo "Build finished. The HTML pages are in _build/html." 34 | 35 | open: html 36 | open _build/html/index.html 37 | 38 | pickle: 39 | mkdir -p _build/pickle _build/doctrees 40 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) _build/pickle 41 | @echo 42 | @echo "Build finished; now you can process the pickle files." 43 | 44 | web: pickle 45 | 46 | json: 47 | mkdir -p _build/json _build/doctrees 48 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) _build/json 49 | @echo 50 | @echo "Build finished; now you can process the JSON files." 51 | 52 | htmlhelp: 53 | mkdir -p _build/htmlhelp _build/doctrees 54 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) _build/htmlhelp 55 | @echo 56 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 57 | ".hhp project file in _build/htmlhelp." 58 | 59 | latex: 60 | mkdir -p _build/latex _build/doctrees 61 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) _build/latex 62 | @echo 63 | @echo "Build finished; the LaTeX files are in _build/latex." 64 | @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ 65 | "run these through (pdf)latex." 66 | 67 | changes: 68 | mkdir -p _build/changes _build/doctrees 69 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) _build/changes 70 | @echo 71 | @echo "The overview file is in _build/changes." 72 | 73 | linkcheck: 74 | mkdir -p _build/linkcheck _build/doctrees 75 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) _build/linkcheck 76 | @echo 77 | @echo "Link check complete; look for any errors in the above output " \ 78 | "or in _build/linkcheck/output.txt." 79 | -------------------------------------------------------------------------------- /pyquery/ajax.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import sys 3 | from .pyquery import PyQuery as Base 4 | from .pyquery import no_default 5 | 6 | if sys.version_info < (3,): 7 | from webob import Request, Response 8 | 9 | try: 10 | from paste.proxy import Proxy 11 | except ImportError: 12 | Proxy = no_default 13 | 14 | class PyQuery(Base): 15 | 16 | def __init__(self, *args, **kwargs): 17 | if 'response' in kwargs: 18 | self.response = kwargs.pop('response') 19 | else: 20 | self.response = Response() 21 | if 'app' in kwargs: 22 | self.app = kwargs.pop('app') 23 | if len(args) == 0: 24 | args = [[]] 25 | else: 26 | self.app = no_default 27 | Base.__init__(self, *args, **kwargs) 28 | if self._parent is not no_default: 29 | self.app = self._parent.app 30 | 31 | def _wsgi_get(self, path_info, **kwargs): 32 | if path_info.startswith('/'): 33 | if 'app' in kwargs: 34 | app = kwargs.pop('app') 35 | elif self.app is not no_default: 36 | app = self.app 37 | else: 38 | raise ValueError('There is no app available') 39 | else: 40 | if Proxy is not no_default: 41 | app = Proxy(path_info) 42 | path_info = '/' 43 | else: 44 | raise ImportError('Paste is not installed') 45 | 46 | if 'environ' in kwargs: 47 | environ = kwargs.pop('environ').copy() 48 | else: 49 | environ = {} 50 | if path_info: 51 | kwargs['PATH_INFO'] = path_info 52 | environ.update(kwargs) 53 | 54 | # unsuported (came from Deliverance) 55 | for key in ['HTTP_ACCEPT_ENCODING', 'HTTP_IF_MATCH', 'HTTP_IF_UNMODIFIED_SINCE', 56 | 'HTTP_RANGE', 'HTTP_IF_RANGE']: 57 | if key in environ: 58 | del environ[key] 59 | 60 | req = Request(environ) 61 | resp = req.get_response(app) 62 | status = resp.status.split() 63 | ctype = resp.content_type.split(';')[0] 64 | if status[0] not in '45' and ctype == 'text/html': 65 | body = resp.body 66 | else: 67 | body = [] 68 | result = self.__class__(body, 69 | parent=self._parent, 70 | app=self.app, # always return self.app 71 | response=resp) 72 | return result 73 | 74 | def get(self, path_info, **kwargs): 75 | """GET a path from wsgi app or url 76 | """ 77 | kwargs['REQUEST_METHOD'] = 'GET' 78 | return self._wsgi_get(path_info, **kwargs) 79 | 80 | def post(self, path_info, **kwargs): 81 | """POST a path from wsgi app or url 82 | """ 83 | kwargs['REQUEST_METHOD'] = 'POST' 84 | return self._wsgi_get(path_info, **kwargs) 85 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # pyquery documentation build configuration file, created by 4 | # sphinx-quickstart on Sat Dec 6 13:08:03 2008. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # The contents of this file are pickled, so don't put values in the namespace 9 | # that aren't pickleable (module imports are okay, they're removed automatically). 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If your extensions are in another directory, add it here. If the directory 17 | # is relative to the documentation root, use os.path.abspath to make it 18 | # absolute, like shown here. 19 | #sys.path.append(os.path.abspath('.')) 20 | 21 | # General configuration 22 | # --------------------- 23 | 24 | # Add any Sphinx extension module names here, as strings. They can be extensions 25 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 26 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.doctest'] 27 | 28 | # Add any paths that contain templates here, relative to this directory. 29 | templates_path = ['_templates'] 30 | 31 | # The suffix of source filenames. 32 | source_suffix = '.txt' 33 | 34 | # The encoding of source files. 35 | #source_encoding = 'utf-8' 36 | 37 | # The master toctree document. 38 | master_doc = 'index' 39 | 40 | # General information about the project. 41 | project = u'pyquery' 42 | copyright = u'2008, Olivier Lauzanne' 43 | 44 | # The version info for the project you're documenting, acts as replacement for 45 | # |version| and |release|, also used in various other places throughout the 46 | # built documents. 47 | # 48 | # The short X.Y version. 49 | version = '0.5' 50 | # The full version, including alpha/beta/rc tags. 51 | release = version 52 | 53 | # The language for content autogenerated by Sphinx. Refer to documentation 54 | # for a list of supported languages. 55 | #language = None 56 | 57 | # There are two options for replacing |today|: either, you set today to some 58 | # non-false value, then it is used: 59 | #today = '' 60 | # Else, today_fmt is used as the format for a strftime call. 61 | #today_fmt = '%B %d, %Y' 62 | 63 | # List of documents that shouldn't be included in the build. 64 | #unused_docs = [] 65 | 66 | # List of directories, relative to source directory, that shouldn't be searched 67 | # for source files. 68 | exclude_trees = ['.build'] 69 | 70 | # The reST default role (used for this markup: `text`) to use for all documents. 71 | #default_role = None 72 | 73 | # If true, '()' will be appended to :func: etc. cross-reference text. 74 | #add_function_parentheses = True 75 | 76 | # If true, the current module name will be prepended to all description 77 | # unit titles (such as .. function::). 78 | #add_module_names = True 79 | 80 | # If true, sectionauthor and moduleauthor directives will be shown in the 81 | # output. They are ignored by default. 82 | #show_authors = False 83 | 84 | # The name of the Pygments (syntax highlighting) style to use. 85 | pygments_style = 'sphinx' 86 | 87 | 88 | # Options for HTML output 89 | # ----------------------- 90 | 91 | # The style sheet to use for HTML and HTML Help pages. A file of that name 92 | # must exist either in Sphinx' static/ path, or in one of the custom paths 93 | # given in html_static_path. 94 | html_style = 'default.css' 95 | 96 | # The name for this set of Sphinx documents. If None, it defaults to 97 | # " v documentation". 98 | #html_title = None 99 | 100 | # A shorter title for the navigation bar. Default is the same as html_title. 101 | #html_short_title = None 102 | 103 | # The name of an image file (relative to this directory) to place at the top 104 | # of the sidebar. 105 | #html_logo = None 106 | 107 | # The name of an image file (within the static path) to use as favicon of the 108 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 109 | # pixels large. 110 | #html_favicon = None 111 | 112 | # Add any paths that contain custom static files (such as style sheets) here, 113 | # relative to this directory. They are copied after the builtin static files, 114 | # so a file named "default.css" will overwrite the builtin "default.css". 115 | html_static_path = ['_static'] 116 | 117 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 118 | # using the given strftime format. 119 | #html_last_updated_fmt = '%b %d, %Y' 120 | 121 | # If true, SmartyPants will be used to convert quotes and dashes to 122 | # typographically correct entities. 123 | #html_use_smartypants = True 124 | 125 | # Custom sidebar templates, maps document names to template names. 126 | #html_sidebars = {} 127 | 128 | # Additional templates that should be rendered to pages, maps page names to 129 | # template names. 130 | #html_additional_pages = {} 131 | 132 | # If false, no module index is generated. 133 | #html_use_modindex = True 134 | 135 | # If false, no index is generated. 136 | #html_use_index = True 137 | 138 | # If true, the index is split into individual pages for each letter. 139 | #html_split_index = False 140 | 141 | # If true, the reST sources are included in the HTML build as _sources/. 142 | #html_copy_source = True 143 | 144 | # If true, an OpenSearch description file will be output, and all pages will 145 | # contain a tag referring to it. The value of this option must be the 146 | # base URL from which the finished HTML is served. 147 | #html_use_opensearch = '' 148 | 149 | # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). 150 | #html_file_suffix = '' 151 | 152 | # Output file base name for HTML help builder. 153 | htmlhelp_basename = 'pyquerydoc' 154 | 155 | 156 | # Options for LaTeX output 157 | # ------------------------ 158 | 159 | # The paper size ('letter' or 'a4'). 160 | #latex_paper_size = 'letter' 161 | 162 | # The font size ('10pt', '11pt' or '12pt'). 163 | #latex_font_size = '10pt' 164 | 165 | # Grouping the document tree into LaTeX files. List of tuples 166 | # (source start file, target name, title, author, document class [howto/manual]). 167 | latex_documents = [ 168 | ('index', 'pyquery.tex', ur'pyquery Documentation', 169 | ur'Olivier Lauzanne', 'manual'), 170 | ] 171 | 172 | # The name of an image file (relative to this directory) to place at the top of 173 | # the title page. 174 | #latex_logo = None 175 | 176 | # For "manual" documents, if this is true, then toplevel headings are parts, 177 | # not chapters. 178 | #latex_use_parts = False 179 | 180 | # Additional stuff for the LaTeX preamble. 181 | #latex_preamble = '' 182 | 183 | # Documents to append as an appendix to all manuals. 184 | #latex_appendices = [] 185 | 186 | # If false, no module index is generated. 187 | #latex_use_modindex = True 188 | 189 | # Custom stuff 190 | 191 | from os import path 192 | pkg_dir = path.abspath(__file__).split('/docs')[0] 193 | setup = path.join(pkg_dir, 'setup.py') 194 | if path.isfile(setup): 195 | for line_ in open(setup): 196 | if line_.startswith("version"): 197 | version = line_.split('=')[-1] 198 | version = version.strip() 199 | version = version.strip("'\"") 200 | release = version 201 | break 202 | del pkg_dir, setup, path 203 | 204 | -------------------------------------------------------------------------------- /pyquery/cssselectpatch.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | # 3 | # Copyright (C) 2008 - Olivier Lauzanne 4 | # 5 | # Distributed under the BSD license, see LICENSE.txt 6 | from lxml.cssselect import Pseudo, XPathExpr, XPathExprOr, Function, css_to_xpath, Element 7 | from lxml import cssselect 8 | 9 | class JQueryPseudo(Pseudo): 10 | """This class is used to implement the css pseudo classes 11 | (:first, :last, ...) that are not defined in the css standard, 12 | but are defined in the jquery API. 13 | """ 14 | def _xpath_first(self, xpath): 15 | """Matches the first selected element. 16 | """ 17 | xpath.add_post_condition('position() = 1') 18 | return xpath 19 | 20 | def _xpath_last(self, xpath): 21 | """Matches the last selected element. 22 | """ 23 | xpath.add_post_condition('position() = last()') 24 | return xpath 25 | 26 | def _xpath_even(self, xpath): 27 | """Matches even elements, zero-indexed. 28 | """ 29 | # the first element is 1 in xpath and 0 in python and js 30 | xpath.add_post_condition('position() mod 2 = 1') 31 | return xpath 32 | 33 | def _xpath_odd(self, xpath): 34 | """Matches odd elements, zero-indexed. 35 | """ 36 | xpath.add_post_condition('position() mod 2 = 0') 37 | return xpath 38 | 39 | def _xpath_checked(self, xpath): 40 | """Matches odd elements, zero-indexed. 41 | """ 42 | xpath.add_condition("@checked and name(.) = 'input'") 43 | return xpath 44 | 45 | def _xpath_selected(self, xpath): 46 | """Matches all elements that are selected. 47 | """ 48 | xpath.add_condition("@selected and name(.) = 'option'") 49 | return xpath 50 | 51 | def _xpath_disabled(self, xpath): 52 | """Matches all elements that are disabled. 53 | """ 54 | xpath.add_condition("@disabled") 55 | return xpath 56 | 57 | def _xpath_enabled(self, xpath): 58 | """Matches all elements that are enabled. 59 | """ 60 | xpath.add_condition("not(@disabled) and name(.) = 'input'") 61 | return xpath 62 | 63 | def _xpath_file(self, xpath): 64 | """Matches all input elements of type file. 65 | """ 66 | xpath.add_condition("@type = 'file' and name(.) = 'input'") 67 | return xpath 68 | 69 | def _xpath_input(self, xpath): 70 | """Matches all input elements. 71 | """ 72 | xpath.add_condition("(name(.) = 'input' or name(.) = 'select') " 73 | + "or (name(.) = 'textarea' or name(.) = 'button')") 74 | return xpath 75 | 76 | def _xpath_button(self, xpath): 77 | """Matches all button input elements and the button element. 78 | """ 79 | xpath.add_condition("(@type = 'button' and name(.) = 'input') " 80 | + "or name(.) = 'button'") 81 | return xpath 82 | 83 | def _xpath_radio(self, xpath): 84 | """Matches all radio input elements. 85 | """ 86 | xpath.add_condition("@type = 'radio' and name(.) = 'input'") 87 | return xpath 88 | 89 | def _xpath_text(self, xpath): 90 | """Matches all text input elements. 91 | """ 92 | xpath.add_condition("@type = 'text' and name(.) = 'input'") 93 | return xpath 94 | 95 | def _xpath_checkbox(self, xpath): 96 | """Matches all checkbox input elements. 97 | """ 98 | xpath.add_condition("@type = 'checkbox' and name(.) = 'input'") 99 | return xpath 100 | 101 | def _xpath_password(self, xpath): 102 | """Matches all password input elements. 103 | """ 104 | xpath.add_condition("@type = 'password' and name(.) = 'input'") 105 | return xpath 106 | 107 | def _xpath_submit(self, xpath): 108 | """Matches all submit input elements. 109 | """ 110 | xpath.add_condition("@type = 'submit' and name(.) = 'input'") 111 | return xpath 112 | 113 | def _xpath_image(self, xpath): 114 | """Matches all image input elements. 115 | """ 116 | xpath.add_condition("@type = 'image' and name(.) = 'input'") 117 | return xpath 118 | 119 | def _xpath_reset(self, xpath): 120 | """Matches all reset input elements. 121 | """ 122 | xpath.add_condition("@type = 'reset' and name(.) = 'input'") 123 | return xpath 124 | 125 | def _xpath_header(self, xpath): 126 | """Matches all header elelements (h1, ..., h6) 127 | """ 128 | # this seems kind of brute-force, is there a better way? 129 | xpath.add_condition("(name(.) = 'h1' or name(.) = 'h2' or name (.) = 'h3') " 130 | + "or (name(.) = 'h4' or name (.) = 'h5' or name(.) = 'h6')") 131 | return xpath 132 | 133 | def _xpath_parent(self, xpath): 134 | """Match all elements that contain other elements 135 | """ 136 | xpath.add_condition("count(child::*) > 0") 137 | return xpath 138 | 139 | def _xpath_empty(self, xpath): 140 | """Match all elements that do not contain other elements 141 | """ 142 | xpath.add_condition("count(child::*) = 0") 143 | return xpath 144 | 145 | cssselect.Pseudo = JQueryPseudo 146 | 147 | class JQueryFunction(Function): 148 | """Represents selector:name(expr) that are present in JQuery but not in the 149 | css standard. 150 | """ 151 | def _xpath_eq(self, xpath, expr): 152 | """Matches a single element by its index. 153 | """ 154 | xpath.add_post_condition('position() = %s' % int(expr+1)) 155 | return xpath 156 | 157 | def _xpath_gt(self, xpath, expr): 158 | """Matches all elements with an index over the given one. 159 | """ 160 | xpath.add_post_condition('position() > %s' % int(expr+1)) 161 | return xpath 162 | 163 | def _xpath_lt(self, xpath, expr): 164 | """Matches all elements with an index below the given one. 165 | """ 166 | xpath.add_post_condition('position() < %s' % int(expr+1)) 167 | return xpath 168 | 169 | def _xpath_contains(self, xpath, expr): 170 | """Matches all elements that contain the given text 171 | """ 172 | xpath.add_post_condition("contains(text(), '%s')" % str(expr)) 173 | return xpath 174 | 175 | cssselect.Function = JQueryFunction 176 | 177 | class AdvancedXPathExpr(XPathExpr): 178 | def __init__(self, prefix=None, path=None, element='*', condition=None, 179 | post_condition=None, star_prefix=False): 180 | self.prefix = prefix 181 | self.path = path 182 | self.element = element 183 | self.condition = condition 184 | self.post_condition = post_condition 185 | self.star_prefix = star_prefix 186 | 187 | def add_post_condition(self, post_condition): 188 | if self.post_condition: 189 | self.post_condition = '%s and (%s)' % (self.post_condition, 190 | post_condition) 191 | else: 192 | self.post_condition = post_condition 193 | 194 | def __str__(self): 195 | path = XPathExpr.__str__(self) 196 | if self.post_condition: 197 | path = '(%s)[%s]' % (path, self.post_condition) 198 | return path 199 | 200 | def join(self, combiner, other): 201 | XPathExpr.join(self, combiner, other) 202 | self.post_condition = other.post_condition 203 | 204 | cssselect.XPathExpr = AdvancedXPathExpr 205 | 206 | class AdvancedXPathExprOr(XPathExprOr): 207 | def __init__(self, items, prefix=None): 208 | self.prefix = prefix = prefix or '' 209 | self.items = items 210 | self.prefix_prepended = False 211 | 212 | def __str__(self): 213 | if not self.prefix_prepended: 214 | # We cannot prepend the prefix at __init__ since it's legal to 215 | # modify it after construction. And because __str__ can be called 216 | # multiple times we have to take care not to prepend it twice. 217 | prefix = self.prefix or '' 218 | for item in self.items: 219 | item.prefix = prefix+(item.prefix or '') 220 | self.prefix_prepended = True 221 | return ' | '.join([str(i) for i in self.items]) 222 | 223 | cssselect.XPathExprOr = AdvancedXPathExprOr 224 | 225 | class JQueryElement(Element): 226 | """ 227 | Represents namespace|element 228 | """ 229 | 230 | def xpath(self): 231 | if self.namespace == '*': 232 | el = self.element 233 | else: 234 | # FIXME: Should we lowercase here? 235 | el = '%s:%s' % (self.namespace, self.element) 236 | return AdvancedXPathExpr(element=el) 237 | 238 | cssselect.Element = JQueryElement 239 | 240 | def selector_to_xpath(selector, prefix='descendant-or-self::'): 241 | """JQuery selector to xpath. 242 | """ 243 | selector = selector.replace('[@', '[') 244 | return css_to_xpath(selector, prefix) 245 | -------------------------------------------------------------------------------- /bootstrap.py: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | # 3 | # Copyright (c) 2006 Zope Foundation and Contributors. 4 | # All Rights Reserved. 5 | # 6 | # This software is subject to the provisions of the Zope Public License, 7 | # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. 8 | # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED 9 | # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 10 | # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS 11 | # FOR A PARTICULAR PURPOSE. 12 | # 13 | ############################################################################## 14 | """Bootstrap a buildout-based project 15 | 16 | Simply run this script in a directory containing a buildout.cfg. 17 | The script accepts buildout command-line options, so you can 18 | use the -c option to specify an alternate configuration file. 19 | """ 20 | 21 | import os, shutil, sys, tempfile, textwrap, urllib, urllib2, subprocess 22 | from optparse import OptionParser 23 | 24 | if sys.platform == 'win32': 25 | def quote(c): 26 | if ' ' in c: 27 | return '"%s"' % c # work around spawn lamosity on windows 28 | else: 29 | return c 30 | else: 31 | quote = str 32 | 33 | # See zc.buildout.easy_install._has_broken_dash_S for motivation and comments. 34 | stdout, stderr = subprocess.Popen( 35 | [sys.executable, '-Sc', 36 | 'try:\n' 37 | ' import ConfigParser\n' 38 | 'except ImportError:\n' 39 | ' print 1\n' 40 | 'else:\n' 41 | ' print 0\n'], 42 | stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() 43 | has_broken_dash_S = bool(int(stdout.strip())) 44 | 45 | # In order to be more robust in the face of system Pythons, we want to 46 | # run without site-packages loaded. This is somewhat tricky, in 47 | # particular because Python 2.6's distutils imports site, so starting 48 | # with the -S flag is not sufficient. However, we'll start with that: 49 | if not has_broken_dash_S and 'site' in sys.modules: 50 | # We will restart with python -S. 51 | args = sys.argv[:] 52 | args[0:0] = [sys.executable, '-S'] 53 | args = map(quote, args) 54 | os.execv(sys.executable, args) 55 | # Now we are running with -S. We'll get the clean sys.path, import site 56 | # because distutils will do it later, and then reset the path and clean 57 | # out any namespace packages from site-packages that might have been 58 | # loaded by .pth files. 59 | clean_path = sys.path[:] 60 | import site 61 | sys.path[:] = clean_path 62 | for k, v in sys.modules.items(): 63 | if k in ('setuptools', 'pkg_resources') or ( 64 | hasattr(v, '__path__') and 65 | len(v.__path__)==1 and 66 | not os.path.exists(os.path.join(v.__path__[0],'__init__.py'))): 67 | # This is a namespace package. Remove it. 68 | sys.modules.pop(k) 69 | 70 | is_jython = sys.platform.startswith('java') 71 | 72 | setuptools_source = 'http://peak.telecommunity.com/dist/ez_setup.py' 73 | distribute_source = 'http://python-distribute.org/distribute_setup.py' 74 | 75 | # parsing arguments 76 | def normalize_to_url(option, opt_str, value, parser): 77 | if value: 78 | if '://' not in value: # It doesn't smell like a URL. 79 | value = 'file://%s' % ( 80 | urllib.pathname2url( 81 | os.path.abspath(os.path.expanduser(value))),) 82 | if opt_str == '--download-base' and not value.endswith('/'): 83 | # Download base needs a trailing slash to make the world happy. 84 | value += '/' 85 | else: 86 | value = None 87 | name = opt_str[2:].replace('-', '_') 88 | setattr(parser.values, name, value) 89 | 90 | usage = '''\ 91 | [DESIRED PYTHON FOR BUILDOUT] bootstrap.py [options] 92 | 93 | Bootstraps a buildout-based project. 94 | 95 | Simply run this script in a directory containing a buildout.cfg, using the 96 | Python that you want bin/buildout to use. 97 | 98 | Note that by using --setup-source and --download-base to point to 99 | local resources, you can keep this script from going over the network. 100 | ''' 101 | 102 | parser = OptionParser(usage=usage) 103 | parser.add_option("-v", "--version", dest="version", 104 | help="use a specific zc.buildout version") 105 | parser.add_option("-d", "--distribute", 106 | action="store_true", dest="use_distribute", default=False, 107 | help="Use Distribute rather than Setuptools.") 108 | parser.add_option("--setup-source", action="callback", dest="setup_source", 109 | callback=normalize_to_url, nargs=1, type="string", 110 | help=("Specify a URL or file location for the setup file. " 111 | "If you use Setuptools, this will default to " + 112 | setuptools_source + "; if you use Distribute, this " 113 | "will default to " + distribute_source +".")) 114 | parser.add_option("--download-base", action="callback", dest="download_base", 115 | callback=normalize_to_url, nargs=1, type="string", 116 | help=("Specify a URL or directory for downloading " 117 | "zc.buildout and either Setuptools or Distribute. " 118 | "Defaults to PyPI.")) 119 | parser.add_option("--eggs", 120 | help=("Specify a directory for storing eggs. Defaults to " 121 | "a temporary directory that is deleted when the " 122 | "bootstrap script completes.")) 123 | parser.add_option("-t", "--accept-buildout-test-releases", 124 | dest='accept_buildout_test_releases', 125 | action="store_true", default=False, 126 | help=("Normally, if you do not specify a --version, the " 127 | "bootstrap script and buildout gets the newest " 128 | "*final* versions of zc.buildout and its recipes and " 129 | "extensions for you. If you use this flag, " 130 | "bootstrap and buildout will get the newest releases " 131 | "even if they are alphas or betas.")) 132 | parser.add_option("-c", None, action="store", dest="config_file", 133 | help=("Specify the path to the buildout configuration " 134 | "file to be used.")) 135 | 136 | options, args = parser.parse_args() 137 | 138 | # if -c was provided, we push it back into args for buildout's main function 139 | if options.config_file is not None: 140 | args += ['-c', options.config_file] 141 | 142 | if options.eggs: 143 | eggs_dir = os.path.abspath(os.path.expanduser(options.eggs)) 144 | else: 145 | eggs_dir = tempfile.mkdtemp() 146 | 147 | if options.setup_source is None: 148 | if options.use_distribute: 149 | options.setup_source = distribute_source 150 | else: 151 | options.setup_source = setuptools_source 152 | 153 | if options.accept_buildout_test_releases: 154 | args.append('buildout:accept-buildout-test-releases=true') 155 | args.append('bootstrap') 156 | 157 | try: 158 | import pkg_resources 159 | import setuptools # A flag. Sometimes pkg_resources is installed alone. 160 | if not hasattr(pkg_resources, '_distribute'): 161 | raise ImportError 162 | except ImportError: 163 | ez_code = urllib2.urlopen( 164 | options.setup_source).read().replace('\r\n', '\n') 165 | ez = {} 166 | exec ez_code in ez 167 | setup_args = dict(to_dir=eggs_dir, download_delay=0) 168 | if options.download_base: 169 | setup_args['download_base'] = options.download_base 170 | if options.use_distribute: 171 | setup_args['no_fake'] = True 172 | ez['use_setuptools'](**setup_args) 173 | if 'pkg_resources' in sys.modules: 174 | reload(sys.modules['pkg_resources']) 175 | import pkg_resources 176 | # This does not (always?) update the default working set. We will 177 | # do it. 178 | for path in sys.path: 179 | if path not in pkg_resources.working_set.entries: 180 | pkg_resources.working_set.add_entry(path) 181 | 182 | cmd = [quote(sys.executable), 183 | '-c', 184 | quote('from setuptools.command.easy_install import main; main()'), 185 | '-mqNxd', 186 | quote(eggs_dir)] 187 | 188 | if not has_broken_dash_S: 189 | cmd.insert(1, '-S') 190 | 191 | find_links = options.download_base 192 | if not find_links: 193 | find_links = os.environ.get('bootstrap-testing-find-links') 194 | if find_links: 195 | cmd.extend(['-f', quote(find_links)]) 196 | 197 | if options.use_distribute: 198 | setup_requirement = 'distribute' 199 | else: 200 | setup_requirement = 'setuptools' 201 | ws = pkg_resources.working_set 202 | setup_requirement_path = ws.find( 203 | pkg_resources.Requirement.parse(setup_requirement)).location 204 | env = dict( 205 | os.environ, 206 | PYTHONPATH=setup_requirement_path) 207 | 208 | requirement = 'zc.buildout' 209 | version = options.version 210 | if version is None and not options.accept_buildout_test_releases: 211 | # Figure out the most recent final version of zc.buildout. 212 | import setuptools.package_index 213 | _final_parts = '*final-', '*final' 214 | def _final_version(parsed_version): 215 | for part in parsed_version: 216 | if (part[:1] == '*') and (part not in _final_parts): 217 | return False 218 | return True 219 | index = setuptools.package_index.PackageIndex( 220 | search_path=[setup_requirement_path]) 221 | if find_links: 222 | index.add_find_links((find_links,)) 223 | req = pkg_resources.Requirement.parse(requirement) 224 | if index.obtain(req) is not None: 225 | best = [] 226 | bestv = None 227 | for dist in index[req.project_name]: 228 | distv = dist.parsed_version 229 | if _final_version(distv): 230 | if bestv is None or distv > bestv: 231 | best = [dist] 232 | bestv = distv 233 | elif distv == bestv: 234 | best.append(dist) 235 | if best: 236 | best.sort() 237 | version = best[-1].version 238 | if version: 239 | requirement = '=='.join((requirement, version)) 240 | cmd.append(requirement) 241 | 242 | if is_jython: 243 | import subprocess 244 | exitcode = subprocess.Popen(cmd, env=env).wait() 245 | else: # Windows prefers this, apparently; otherwise we would prefer subprocess 246 | exitcode = os.spawnle(*([os.P_WAIT, sys.executable] + cmd + [env])) 247 | if exitcode != 0: 248 | sys.stdout.flush() 249 | sys.stderr.flush() 250 | print ("An error occurred when trying to install zc.buildout. " 251 | "Look above this message for any errors that " 252 | "were output by easy_install.") 253 | sys.exit(exitcode) 254 | 255 | ws.add_entry(eggs_dir) 256 | ws.require(requirement) 257 | import zc.buildout.buildout 258 | zc.buildout.buildout.main(args) 259 | if not options.eggs: # clean up temporary egg directory 260 | shutil.rmtree(eggs_dir) 261 | -------------------------------------------------------------------------------- /bootstrap-py3k.py: -------------------------------------------------------------------------------- 1 | ############################################################################## 2 | # 3 | # Copyright (c) 2006 Zope Foundation and Contributors. 4 | # All Rights Reserved. 5 | # 6 | # This software is subject to the provisions of the Zope Public License, 7 | # Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution. 8 | # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED 9 | # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 10 | # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS 11 | # FOR A PARTICULAR PURPOSE. 12 | # 13 | ############################################################################## 14 | """Bootstrap a buildout-based project 15 | 16 | Simply run this script in a directory containing a buildout.cfg. 17 | The script accepts buildout command-line options, so you can 18 | use the -c option to specify an alternate configuration file. 19 | """ 20 | 21 | import os, shutil, sys, tempfile, textwrap 22 | try: 23 | import urllib.request as urllib2 24 | except ImportError: 25 | import urllib2 26 | import subprocess 27 | from optparse import OptionParser 28 | 29 | if sys.platform == 'win32': 30 | def quote(c): 31 | if ' ' in c: 32 | return '"%s"' % c # work around spawn lamosity on windows 33 | else: 34 | return c 35 | else: 36 | quote = str 37 | 38 | # See zc.buildout.easy_install._has_broken_dash_S for motivation and comments. 39 | stdout, stderr = subprocess.Popen( 40 | [sys.executable, '-S', '-c', 41 | 'try:\n' 42 | ' import pickle\n' 43 | 'except ImportError:\n' 44 | ' print(1)\n' 45 | 'else:\n' 46 | ' print(0)\n'], 47 | stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() 48 | has_broken_dash_S = bool(int(stdout.strip())) 49 | 50 | # In order to be more robust in the face of system Pythons, we want to 51 | # run without site-packages loaded. This is somewhat tricky, in 52 | # particular because Python 2.6's distutils imports site, so starting 53 | # with the -S flag is not sufficient. However, we'll start with that: 54 | if not has_broken_dash_S and 'site' in sys.modules: 55 | # We will restart with python -S. 56 | args = sys.argv[:] 57 | args[0:0] = [sys.executable, '-S'] 58 | args = list(map(quote, args)) 59 | os.execv(sys.executable, args) 60 | 61 | # Now we are running with -S. We'll get the clean sys.path, import site 62 | # because distutils will do it later, and then reset the path and clean 63 | # out any namespace packages from site-packages that might have been 64 | # loaded by .pth files. 65 | clean_path = sys.path[:] 66 | import site 67 | sys.path[:] = clean_path 68 | for k, v in list(sys.modules.items()): 69 | if k in ('setuptools', 'pkg_resources') or ( 70 | hasattr(v, '__path__') and 71 | len(v.__path__)==1 and 72 | not os.path.exists(os.path.join(v.__path__[0],'__init__.py'))): 73 | # This is a namespace package. Remove it. 74 | sys.modules.pop(k) 75 | 76 | is_jython = sys.platform.startswith('java') 77 | 78 | setuptools_source = 'http://peak.telecommunity.com/dist/ez_setup.py' 79 | distribute_source = 'http://python-distribute.org/distribute_setup.py' 80 | 81 | # parsing arguments 82 | def normalize_to_url(option, opt_str, value, parser): 83 | if value: 84 | if '://' not in value: # It doesn't smell like a URL. 85 | value = 'file://%s' % ( 86 | urllib2.pathname2url( 87 | os.path.abspath(os.path.expanduser(value))),) 88 | if opt_str == '--download-base' and not value.endswith('/'): 89 | # Download base needs a trailing slash to make the world happy. 90 | value += '/' 91 | else: 92 | value = None 93 | name = opt_str[2:].replace('-', '_') 94 | setattr(parser.values, name, value) 95 | 96 | usage = '''\ 97 | [DESIRED PYTHON FOR BUILDOUT] bootstrap.py [options] 98 | 99 | Bootstraps a buildout-based project. 100 | 101 | Simply run this script in a directory containing a buildout.cfg, using the 102 | Python that you want bin/buildout to use. 103 | 104 | Note that by using --setup-source and --download-base to point to 105 | local resources, you can keep this script from going over the network. 106 | ''' 107 | 108 | parser = OptionParser(usage=usage) 109 | parser.add_option("-v", "--version", dest="version", 110 | help="use a specific zc.buildout version") 111 | parser.add_option("--setup-version", dest="setup_version", 112 | help="The version of setuptools or distribute to use.") 113 | parser.add_option("-d", "--distribute", 114 | action="store_true", dest="use_distribute", 115 | default= sys.version_info[0] >= 3, 116 | help="Use Distribute rather than Setuptools.") 117 | parser.add_option("--setup-source", action="callback", dest="setup_source", 118 | callback=normalize_to_url, nargs=1, type="string", 119 | help=("Specify a URL or file location for the setup file. " 120 | "If you use Setuptools, this will default to " + 121 | setuptools_source + "; if you use Distribute, this " 122 | "will default to " + distribute_source +".")) 123 | parser.add_option("--download-base", action="callback", dest="download_base", 124 | callback=normalize_to_url, nargs=1, type="string", 125 | help=("Specify a URL or directory for downloading " 126 | "zc.buildout and either Setuptools or Distribute. " 127 | "Defaults to PyPI.")) 128 | parser.add_option("--eggs", 129 | help=("Specify a directory for storing eggs. Defaults to " 130 | "a temporary directory that is deleted when the " 131 | "bootstrap script completes.")) 132 | parser.add_option("-t", "--accept-buildout-test-releases", 133 | dest='accept_buildout_test_releases', 134 | action="store_true", 135 | default=sys.version_info[0] > 2, 136 | help=("Normally, if you do not specify a --version, the " 137 | "bootstrap script and buildout gets the newest " 138 | "*final* versions of zc.buildout and its recipes and " 139 | "extensions for you. If you use this flag, " 140 | "bootstrap and buildout will get the newest releases " 141 | "even if they are alphas or betas.")) 142 | parser.add_option("-c", None, action="store", dest="config_file", 143 | help=("Specify the path to the buildout configuration " 144 | "file to be used.")) 145 | 146 | options, args = parser.parse_args() 147 | 148 | # if -c was provided, we push it back into args for buildout's main function 149 | if options.config_file is not None: 150 | args += ['-c', options.config_file] 151 | 152 | if options.eggs: 153 | eggs_dir = os.path.abspath(os.path.expanduser(options.eggs)) 154 | else: 155 | eggs_dir = tempfile.mkdtemp() 156 | 157 | if options.setup_source is None: 158 | if options.use_distribute: 159 | options.setup_source = distribute_source 160 | else: 161 | options.setup_source = setuptools_source 162 | 163 | if options.accept_buildout_test_releases: 164 | args.append('buildout:accept-buildout-test-releases=true') 165 | args.append('bootstrap') 166 | 167 | try: 168 | import pkg_resources 169 | import setuptools # A flag. Sometimes pkg_resources is installed alone. 170 | if not hasattr(pkg_resources, '_distribute'): 171 | raise ImportError 172 | except ImportError: 173 | ez_code = urllib2.urlopen( 174 | options.setup_source).read().replace('\r\n'.encode(), '\n'.encode()) 175 | ez = {} 176 | exec(ez_code, ez) 177 | setup_args = dict(to_dir=eggs_dir, download_delay=0) 178 | if options.download_base: 179 | setup_args['download_base'] = options.download_base 180 | if options.setup_version: 181 | setup_args['version'] = options.setup_version 182 | if options.use_distribute: 183 | setup_args['no_fake'] = True 184 | ez['use_setuptools'](**setup_args) 185 | if 'pkg_resources' in sys.modules: 186 | if sys.version_info[0] >= 3: 187 | import imp 188 | reload_ = imp.reload 189 | else: 190 | reload_ = reload 191 | 192 | reload_(sys.modules['pkg_resources']) 193 | import pkg_resources 194 | # This does not (always?) update the default working set. We will 195 | # do it. 196 | for path in sys.path: 197 | if path not in pkg_resources.working_set.entries: 198 | pkg_resources.working_set.add_entry(path) 199 | 200 | cmd = [quote(sys.executable), 201 | '-c', 202 | quote('from setuptools.command.easy_install import main; main()'), 203 | '-mqNxd', 204 | quote(eggs_dir)] 205 | 206 | if not has_broken_dash_S: 207 | cmd.insert(1, '-S') 208 | 209 | find_links = options.download_base 210 | if not find_links: 211 | find_links = os.environ.get('bootstrap-testing-find-links') 212 | if find_links: 213 | cmd.extend(['-f', quote(find_links)]) 214 | 215 | if options.use_distribute: 216 | setup_requirement = 'distribute' 217 | else: 218 | setup_requirement = 'setuptools' 219 | ws = pkg_resources.working_set 220 | setup_requirement_path = ws.find( 221 | pkg_resources.Requirement.parse(setup_requirement)).location 222 | env = dict( 223 | os.environ, 224 | PYTHONPATH=setup_requirement_path) 225 | 226 | requirement = 'zc.buildout' 227 | version = options.version 228 | if version is None and not options.accept_buildout_test_releases: 229 | # Figure out the most recent final version of zc.buildout. 230 | import setuptools.package_index 231 | _final_parts = '*final-', '*final' 232 | def _final_version(parsed_version): 233 | for part in parsed_version: 234 | if (part[:1] == '*') and (part not in _final_parts): 235 | return False 236 | return True 237 | index = setuptools.package_index.PackageIndex( 238 | search_path=[setup_requirement_path]) 239 | if find_links: 240 | index.add_find_links((find_links,)) 241 | req = pkg_resources.Requirement.parse(requirement) 242 | if index.obtain(req) is not None: 243 | best = [] 244 | bestv = None 245 | for dist in index[req.project_name]: 246 | distv = dist.parsed_version 247 | if _final_version(distv): 248 | if bestv is None or distv > bestv: 249 | best = [dist] 250 | bestv = distv 251 | elif distv == bestv: 252 | best.append(dist) 253 | if best: 254 | best.sort() 255 | version = best[-1].version 256 | if version: 257 | requirement = '=='.join((requirement, version)) 258 | cmd.append(requirement) 259 | 260 | if is_jython: 261 | import subprocess 262 | exitcode = subprocess.Popen(cmd, env=env).wait() 263 | else: # Windows prefers this, apparently; otherwise we would prefer subprocess 264 | exitcode = os.spawnle(*([os.P_WAIT, sys.executable] + cmd + [env])) 265 | if exitcode != 0: 266 | sys.stdout.flush() 267 | sys.stderr.flush() 268 | print("An error occurred when trying to install zc.buildout. " 269 | "Look above this message for any errors that " 270 | "were output by easy_install.") 271 | sys.exit(exitcode) 272 | 273 | ws.add_entry(eggs_dir) 274 | ws.require(requirement) 275 | import zc.buildout.buildout 276 | zc.buildout.buildout.main(args) 277 | if not options.eggs: # clean up temporary egg directory 278 | shutil.rmtree(eggs_dir) 279 | -------------------------------------------------------------------------------- /pyquery/test.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | # 3 | # Copyright (C) 2008 - Olivier Lauzanne 4 | # 5 | # Distributed under the BSD license, see LICENSE.txt 6 | from lxml import etree 7 | import unittest 8 | import doctest 9 | import socket 10 | import sys 11 | import os 12 | 13 | PY3k = sys.version_info >= (3,) 14 | 15 | if PY3k: 16 | from io import StringIO 17 | import pyquery 18 | from pyquery.pyquery import PyQuery as pq 19 | from http.client import HTTPConnection 20 | pqa = pq 21 | else: 22 | from cStringIO import StringIO 23 | import pyquery 24 | from httplib import HTTPConnection 25 | from webob import Request, Response, exc 26 | from pyquery import PyQuery as pq 27 | from ajax import PyQuery as pqa 28 | 29 | socket.setdefaulttimeout(1) 30 | 31 | try: 32 | conn = HTTPConnection("pyquery.org:80") 33 | conn.request("GET", "/") 34 | response = conn.getresponse() 35 | except (socket.timeout, socket.error): 36 | GOT_NET=False 37 | else: 38 | GOT_NET=True 39 | 40 | 41 | def with_net(func): 42 | if GOT_NET: 43 | return func 44 | 45 | def not_py3k(func): 46 | if not PY3k: 47 | return func 48 | 49 | dirname = os.path.dirname(os.path.abspath(pyquery.__file__)) 50 | docs = os.path.join(os.path.dirname(dirname), 'docs') 51 | path_to_html_file = os.path.join(dirname, 'test.html') 52 | 53 | def input_app(environ, start_response): 54 | resp = Response() 55 | req = Request(environ) 56 | if req.path_info == '/': 57 | resp.body = '' 58 | elif req.path_info == '/submit': 59 | resp.body = '' 60 | else: 61 | resp.body = '' 62 | return resp(environ, start_response) 63 | 64 | class TestReadme(doctest.DocFileCase): 65 | path = os.path.join(dirname, '..', 'README.txt') 66 | 67 | def __init__(self, *args, **kwargs): 68 | parser = doctest.DocTestParser() 69 | doc = open(self.path).read() 70 | test = parser.get_doctest(doc, globals(), '', self.path, 0) 71 | doctest.DocFileCase.__init__(self, test, optionflags=doctest.ELLIPSIS) 72 | 73 | def setUp(self): 74 | test = self._dt_test 75 | test.globs.update(globals()) 76 | 77 | for filename in os.listdir(docs): 78 | if filename.endswith('.txt'): 79 | if not GOT_NET and filename in ('ajax.txt', 'tips.txt'): 80 | continue 81 | if PY3k and filename in ('ajax.txt',): 82 | continue 83 | klass_name = 'Test%s' % filename.replace('.txt', '').title() 84 | path = os.path.join(docs, filename) 85 | exec('%s = type("%s", (TestReadme,), dict(path=path))' % (klass_name, klass_name)) 86 | 87 | class TestTests(doctest.DocFileCase): 88 | path = os.path.join(dirname, 'tests.txt') 89 | 90 | def __init__(self, *args, **kwargs): 91 | parser = doctest.DocTestParser() 92 | doc = open(self.path).read() 93 | test = parser.get_doctest(doc, globals(), '', self.path, 0) 94 | doctest.DocFileCase.__init__(self, test, optionflags=doctest.ELLIPSIS) 95 | 96 | class TestUnicode(unittest.TestCase): 97 | 98 | @not_py3k 99 | def test_unicode(self): 100 | xml = pq(unicode("

é

", 'utf-8')) 101 | self.assertEqual(unicode(xml), unicode("

é

", 'utf-8')) 102 | self.assertEqual(type(xml.html()), unicode) 103 | self.assertEqual(str(xml), '

é

') 104 | 105 | 106 | class TestSelector(unittest.TestCase): 107 | klass = pq 108 | html = """ 109 | 110 | 111 |
node1
112 |
node2
113 |
node3
114 | 115 | 116 | """ 117 | 118 | html2 = """ 119 | 120 | 121 |
node1
122 | 123 | 124 | """ 125 | 126 | html3 = """ 127 | 128 | 129 |
node1
130 |
node2
131 |
node3
132 | 133 | 134 | """ 135 | 136 | html4 = """ 137 | 138 | 139 |
140 | 141 | 142 | 143 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 |
158 | 159 | 160 | """ 161 | 162 | html5 = """ 163 | 164 | 165 |

Heading 1

166 |

Heading 2

167 |

Heading 3

168 |

Heading 4

169 |
Heading 5
170 |
Heading 6
171 | 172 | 173 | """ 174 | 175 | @not_py3k 176 | def test_get_root(self): 177 | doc = pq('

') 178 | self.assertEqual(isinstance(doc.root, etree._ElementTree), True) 179 | self.assertEqual(doc.encoding, 'UTF-8') 180 | 181 | def test_selector_from_doc(self): 182 | doc = etree.fromstring(self.html) 183 | assert len(self.klass(doc)) == 1 184 | assert len(self.klass('div', doc)) == 3 185 | assert len(self.klass('div#node2', doc)) == 1 186 | 187 | def test_selector_from_html(self): 188 | assert len(self.klass(self.html)) == 1 189 | assert len(self.klass('div', self.html)) == 3 190 | assert len(self.klass('div#node2', self.html)) == 1 191 | 192 | def test_selector_from_obj(self): 193 | e = self.klass(self.html) 194 | assert len(e('div')) == 3 195 | assert len(e('div#node2')) == 1 196 | 197 | def test_selector_from_html_from_obj(self): 198 | e = self.klass(self.html) 199 | assert len(e('div', self.html2)) == 1 200 | assert len(e('div#node2', self.html2)) == 0 201 | 202 | def test_class(self): 203 | e = self.klass(self.html) 204 | assert isinstance(e, self.klass) 205 | n = e('div', self.html2) 206 | assert isinstance(n, self.klass) 207 | assert n._parent is e 208 | 209 | def test_pseudo_classes(self): 210 | e = self.klass(self.html) 211 | self.assertEqual(e('div:first').text(), 'node1') 212 | self.assertEqual(e('div:last').text(), 'node3') 213 | self.assertEqual(e('div:even').text(), 'node1 node3') 214 | self.assertEqual(e('div div:even').text(), None) 215 | self.assertEqual(e('body div:even').text(), 'node1 node3') 216 | self.assertEqual(e('div:gt(0)').text(), 'node2 node3') 217 | self.assertEqual(e('div:lt(1)').text(), 'node1') 218 | self.assertEqual(e('div:eq(2)').text(), 'node3') 219 | 220 | #test on the form 221 | e = self.klass(self.html4) 222 | assert len(e(':disabled')) == 1 223 | assert len(e('input:enabled')) == 9 224 | assert len(e(':selected')) == 1 225 | assert len(e(':checked')) == 2 226 | assert len(e(':file')) == 1 227 | assert len(e(':input')) == 12 228 | assert len(e(':button')) == 2 229 | assert len(e(':radio')) == 3 230 | assert len(e(':checkbox')) == 3 231 | 232 | #test on other elements 233 | e = self.klass(self.html5) 234 | assert len(e(":header")) == 6 235 | assert len(e(":parent")) == 2 236 | assert len(e(":empty")) == 6 237 | assert len(e(":contains('Heading')")) == 6 238 | 239 | def test_on_the_fly_dom_creation(self): 240 | e = self.klass(self.html) 241 | assert e('

Hello world

').text() == 'Hello world' 242 | assert e('').text() == None 243 | 244 | class TestTraversal(unittest.TestCase): 245 | klass = pq 246 | html = """ 247 | 248 | 249 |
node1
250 |
node2 booyah
251 | 252 | 253 | """ 254 | 255 | def test_filter(self): 256 | assert len(self.klass('div', self.html).filter('.node3')) == 1 257 | assert len(self.klass('div', self.html).filter('#node2')) == 1 258 | assert len(self.klass('div', self.html).filter(lambda i: i == 0)) == 1 259 | 260 | d = pq('

Hello warming world

') 261 | self.assertEqual(d('strong').filter(lambda el: True), []) 262 | 263 | def test_not(self): 264 | assert len(self.klass('div', self.html).not_('.node3')) == 1 265 | 266 | def test_is(self): 267 | assert self.klass('div', self.html).is_('.node3') 268 | assert not self.klass('div', self.html).is_('.foobazbar') 269 | 270 | def test_find(self): 271 | assert len(self.klass('#node1', self.html).find('span')) == 1 272 | assert len(self.klass('#node2', self.html).find('span')) == 2 273 | assert len(self.klass('div', self.html).find('span')) == 3 274 | 275 | def test_each(self): 276 | doc = self.klass(self.html) 277 | doc('span').each(lambda: doc(this).wrap("")) 278 | assert len(doc('em')) == 3 279 | 280 | def test_map(self): 281 | def ids_minus_one(i, elem): 282 | return int(self.klass(elem).attr('id')[-1]) - 1 283 | assert self.klass('div', self.html).map(ids_minus_one) == [0, 1] 284 | 285 | d = pq('

Hello warming world

') 286 | self.assertEqual(d('strong').map(lambda i,el: pq(this).text()), []) 287 | 288 | def test_end(self): 289 | assert len(self.klass('div', self.html).find('span').end()) == 2 290 | assert len(self.klass('#node2', self.html).find('span').end()) == 1 291 | 292 | def test_closest(self): 293 | assert len(self.klass('#node1 span', self.html).closest('body')) == 1 294 | assert self.klass('#node2', self.html).closest('.node3').attr('id') == 'node2' 295 | assert self.klass('.node3', self.html).closest('form') == [] 296 | 297 | class TestOpener(unittest.TestCase): 298 | 299 | def test_custom_opener(self): 300 | def opener(url): 301 | return '
' 302 | 303 | doc = pq(url='http://example.com', opener=opener) 304 | assert len(doc('.node')) == 1, doc 305 | 306 | class TestHasClass(unittest.TestCase): 307 | def test_child_has_class(self): 308 | doc = pq("""
""") 309 | assert doc('#test').hasClass('on') 310 | assert not doc('#test').hasClass('off') 311 | 312 | class TestCallback(unittest.TestCase): 313 | html = """ 314 |
    315 |
  1. Coffee
  2. 316 |
  3. Tea
  4. 317 |
  5. Milk
  6. 318 |
319 | """ 320 | 321 | def test_S_this_inside_callback(self): 322 | S = pq(self.html) 323 | self.assertEqual(S('li').map(lambda i, el: S(this).html()), ['Coffee', 'Tea', 'Milk']) 324 | 325 | def test_parameterless_callback(self): 326 | S = pq(self.html) 327 | self.assertEqual(S('li').map(lambda: S(this).html()), ['Coffee', 'Tea', 'Milk']) 328 | 329 | def application(environ, start_response): 330 | req = Request(environ) 331 | response = Response() 332 | if req.method == 'GET': 333 | response.body = '
Yeah !
' 334 | else: 335 | response.body = 'Yeah !' 336 | return response(environ, start_response) 337 | 338 | def secure_application(environ, start_response): 339 | if 'REMOTE_USER' not in environ: 340 | return exc.HTTPUnauthorized('vomis')(environ, start_response) 341 | return application(environ, start_response) 342 | 343 | class TestAjaxSelector(TestSelector): 344 | klass = pqa 345 | 346 | @not_py3k 347 | @with_net 348 | def test_proxy(self): 349 | e = self.klass([]) 350 | val = e.get('http://pyquery.org/') 351 | assert len(val('body')) == 1, (str(val.response), val) 352 | 353 | @not_py3k 354 | def test_get(self): 355 | e = self.klass(app=application) 356 | val = e.get('/') 357 | assert len(val('pre')) == 1, val 358 | 359 | @not_py3k 360 | def test_secure_get(self): 361 | e = self.klass(app=secure_application) 362 | val = e.get('/', environ=dict(REMOTE_USER='gawii')) 363 | assert len(val('pre')) == 1, val 364 | val = e.get('/', REMOTE_USER='gawii') 365 | assert len(val('pre')) == 1, val 366 | 367 | @not_py3k 368 | def test_secure_get_not_authorized(self): 369 | e = self.klass(app=secure_application) 370 | val = e.get('/') 371 | assert len(val('pre')) == 0, val 372 | 373 | @not_py3k 374 | def test_post(self): 375 | e = self.klass(app=application) 376 | val = e.post('/') 377 | assert len(val('a')) == 1, val 378 | 379 | @not_py3k 380 | def test_subquery(self): 381 | e = self.klass(app=application) 382 | n = e('div') 383 | val = n.post('/') 384 | assert len(val('a')) == 1, val 385 | 386 | class TestManipulating(unittest.TestCase): 387 | html = ''' 388 |
389 | TestMy link text 390 | My link text 2 391 |
392 | ''' 393 | 394 | def test_remove(self): 395 | d = pq(self.html) 396 | d('img').remove() 397 | val = d('a:first').html() 398 | assert val == 'Test My link text', repr(val) 399 | val = d('a:last').html() 400 | assert val == ' My link text 2', repr(val) 401 | 402 | class TestHTMLParser(unittest.TestCase): 403 | xml = "
I'm valid XML
" 404 | html = ''' 405 |
406 | TestimageMy link text 407 | imageMy link text 2 408 | Behind you, a three-headed HTML‐Entity! 409 |
410 | ''' 411 | def test_parser_persistance(self): 412 | d = pq(self.xml, parser='xml') 413 | self.assertRaises(etree.XMLSyntaxError, lambda: d.after(self.html)) 414 | d = pq(self.xml, parser='html') 415 | d.after(self.html) # this should not fail 416 | 417 | 418 | @not_py3k 419 | def test_soup_parser(self): 420 | d = pq('Hello</head><body onload=crash()>Hi all<p>', parser='soup') 421 | self.assertEqual(str(d), '<html><meta/><head><title>HelloHi all

') 422 | 423 | def test_replaceWith(self): 424 | expected = '''

425 | TestimageMy link text 426 | imageMy link text 2 427 | Behind you, a three-headed HTML&dash;Entity! 428 |
''' 429 | d = pq(self.html) 430 | d('img').replaceWith('image') 431 | val = d.__html__() 432 | assert val == expected, (repr(val), repr(expected)) 433 | 434 | def test_replaceWith_with_function(self): 435 | expected = '''
436 | TestimageMy link text 437 | imageMy link text 2 438 | Behind you, a three-headed HTML&dash;Entity! 439 |
''' 440 | d = pq(self.html) 441 | d('a').replaceWith(lambda i, e: pq(e).html()) 442 | val = d.__html__() 443 | assert val == expected, (repr(val), repr(expected)) 444 | 445 | class TestWebScrapping(unittest.TestCase): 446 | @with_net 447 | def test_get(self): 448 | d = pq('http://www.theonion.com/search/', {'q': 'inconsistency'}, method='get') 449 | self.assertEqual(d('input[name=q]:last').val(), 'inconsistency') 450 | self.assertEqual(d('.news-in-brief h3').text(), 'Slight Inconsistency Found In Bible') 451 | 452 | @with_net 453 | def test_post(self): 454 | d = pq('http://www.theonion.com/search/', {'q': 'inconsistency'}, method='post') 455 | self.assertEqual(d('input[name=q]:last').val(), '') # the onion does not search on post 456 | 457 | if __name__ == '__main__': 458 | fails, total = unittest.main() 459 | if fails == 0: 460 | print('OK') 461 | -------------------------------------------------------------------------------- /pyquery/pyquery.py: -------------------------------------------------------------------------------- 1 | #-*- coding:utf-8 -*- 2 | # 3 | # Copyright (C) 2008 - Olivier Lauzanne 4 | # 5 | # Distributed under the BSD license, see LICENSE.txt 6 | from .cssselectpatch import selector_to_xpath 7 | from copy import deepcopy 8 | from lxml import etree 9 | import lxml.html 10 | import sys 11 | 12 | PY3k = sys.version_info >= (3,) 13 | 14 | if PY3k: 15 | from urllib.request import urlopen 16 | from urllib.parse import urlencode 17 | from urllib.parse import urljoin 18 | basestring = (str, bytes) 19 | unicode = str 20 | else: 21 | from urllib2 import urlopen 22 | from urllib import urlencode 23 | from urlparse import urljoin 24 | 25 | def func_globals(f): 26 | return f.__globals__ if PY3k else f.func_globals 27 | 28 | def func_code(f): 29 | return f.__code__ if PY3k else f.func_code 30 | 31 | def fromstring(context, parser=None, custom_parser=None): 32 | """use html parser if we don't have clean xml 33 | """ 34 | if hasattr(context, 'read') and hasattr(context.read, '__call__'): 35 | meth = 'parse' 36 | else: 37 | meth = 'fromstring' 38 | if custom_parser is None: 39 | if parser is None: 40 | try: 41 | result = getattr(etree, meth)(context) 42 | except etree.XMLSyntaxError: 43 | result = getattr(lxml.html, meth)(context) 44 | if isinstance(result, etree._ElementTree): 45 | return [result.getroot()] 46 | else: 47 | return [result] 48 | elif parser == 'xml': 49 | custom_parser = getattr(etree, meth) 50 | elif parser == 'html': 51 | custom_parser = getattr(lxml.html, meth) 52 | elif parser == 'soup': 53 | from lxml.html import soupparser 54 | custom_parser = getattr(lxml.html.soupparser, meth) 55 | elif parser == 'html_fragments': 56 | custom_parser = lxml.html.fragments_fromstring 57 | else: 58 | ValueError('No such parser: "%s"' % parser) 59 | 60 | result = custom_parser(context) 61 | if type(result) is list: 62 | return result 63 | elif isinstance(result, etree._ElementTree): 64 | return [result.getroot()] 65 | else: 66 | return [result] 67 | 68 | def callback(func, *args): 69 | return func(*args[:func_code(func).co_argcount]) 70 | 71 | class NoDefault(object): 72 | def __repr__(self): 73 | """clean representation in Sphinx""" 74 | return '' 75 | 76 | no_default = NoDefault() 77 | del NoDefault 78 | 79 | class FlexibleElement(object): 80 | """property to allow a flexible api""" 81 | def __init__(self, pget, pset=no_default, pdel=no_default): 82 | self.pget = pget 83 | self.pset = pset 84 | self.pdel = pdel 85 | def __get__(self, instance, klass): 86 | class _element(object): 87 | """real element to support set/get/del attr and item and js call 88 | style""" 89 | def __call__(prop, *args, **kwargs): 90 | return self.pget(instance, *args, **kwargs) 91 | __getattr__ = __getitem__ = __setattr__ = __setitem__ = __call__ 92 | def __delitem__(prop, name): 93 | if self.pdel is not no_default: 94 | return self.pdel(instance, name) 95 | else: 96 | raise NotImplementedError() 97 | __delattr__ = __delitem__ 98 | def __repr__(prop): 99 | return '' % self.pget.__name__ 100 | return _element() 101 | def __set__(self, instance, value): 102 | if self.pset is not no_default: 103 | self.pset(instance, value) 104 | else: 105 | raise NotImplementedError() 106 | 107 | class PyQuery(list): 108 | """The main class 109 | """ 110 | def __init__(self, *args, **kwargs): 111 | html = None 112 | elements = [] 113 | self._base_url = None 114 | self.parser = kwargs.get('parser', None) 115 | if 'parser' in kwargs: 116 | del kwargs['parser'] 117 | if len(args) >= 1 and isinstance(args[0], basestring) \ 118 | and args[0].startswith('http://'): 119 | kwargs['url'] = args[0] 120 | if len(args) >= 2: 121 | kwargs['data'] = args[1] 122 | args = [] 123 | 124 | if 'parent' in kwargs: 125 | self._parent = kwargs.pop('parent') 126 | else: 127 | self._parent = no_default 128 | 129 | if kwargs: 130 | # specific case to get the dom 131 | if 'filename' in kwargs: 132 | html = open(kwargs['filename']) 133 | elif 'url' in kwargs: 134 | url = kwargs.pop('url') 135 | if 'opener' in kwargs: 136 | opener = kwargs.pop('opener') 137 | html = opener(url) 138 | else: 139 | method = kwargs.get('method') 140 | data = kwargs.get('data') 141 | if type(data) in (dict, list, tuple): 142 | data = urlencode(data) 143 | 144 | if isinstance(method, basestring) and method.lower() == 'get' and data: 145 | if '?' not in url: 146 | url += '?' 147 | elif url[-1] not in ('?', '&'): 148 | url += '&' 149 | url += data 150 | data = None 151 | 152 | if data and PY3k: 153 | data = data.encode('utf-8') 154 | 155 | html = urlopen(url, data) 156 | if not self.parser: 157 | self.parser = 'html' 158 | self._base_url = url 159 | else: 160 | raise ValueError('Invalid keyword arguments %s' % kwargs) 161 | elements = fromstring(html, self.parser) 162 | else: 163 | # get nodes 164 | 165 | # determine context and selector if any 166 | selector = context = no_default 167 | length = len(args) 168 | if len(args) == 1: 169 | context = args[0] 170 | elif len(args) == 2: 171 | selector, context = args 172 | else: 173 | raise ValueError("You can't do that." +\ 174 | " Please, provide arguments") 175 | 176 | # get context 177 | if isinstance(context, basestring): 178 | try: 179 | elements = fromstring(context, self.parser) 180 | except Exception: 181 | raise ValueError(context) 182 | elif isinstance(context, self.__class__): 183 | # copy 184 | elements = context[:] 185 | elif isinstance(context, list): 186 | elements = context 187 | elif isinstance(context, etree._Element): 188 | elements = [context] 189 | 190 | # select nodes 191 | if elements and selector is not no_default: 192 | xpath = selector_to_xpath(selector) 193 | results = [tag.xpath(xpath) for tag in elements] 194 | # Flatten the results 195 | elements = [] 196 | for r in results: 197 | elements.extend(r) 198 | 199 | list.__init__(self, elements) 200 | 201 | def __call__(self, *args): 202 | """return a new PyQuery instance 203 | """ 204 | length = len(args) 205 | if length == 0: 206 | raise ValueError('You must provide at least a selector') 207 | if args[0] == '': 208 | return self.__class__([]) 209 | if len(args) == 1 and isinstance(args[0], str) and not args[0].startswith('<'): 210 | args += (self,) 211 | result = self.__class__(*args, **dict(parent=self)) 212 | return result 213 | 214 | # keep original list api prefixed with _ 215 | _append = list.append 216 | _extend = list.extend 217 | 218 | # improve pythonic api 219 | def __add__(self, other): 220 | assert isinstance(other, self.__class__) 221 | return self.__class__(self[:] + other[:]) 222 | 223 | def extend(self, other): 224 | assert isinstance(other, self.__class__) 225 | self._extend(other[:]) 226 | 227 | def __str__(self): 228 | """xml representation of current nodes:: 229 | 230 | >>> xml = PyQuery('', parser='html_fragments') 231 | >>> print(str(xml)) 232 | 233 | 234 | """ 235 | if PY3k: 236 | return ''.join([etree.tostring(e, encoding=str) for e in self]) 237 | else: 238 | return ''.join([etree.tostring(e) for e in self]) 239 | 240 | def __unicode__(self): 241 | """xml representation of current nodes""" 242 | return unicode('').join([etree.tostring(e, encoding=unicode) for e in self]) 243 | 244 | def __html__(self): 245 | """html representation of current nodes:: 246 | 247 | >>> html = PyQuery('', parser='html_fragments') 248 | >>> print(html.__html__()) 249 | 250 | 251 | """ 252 | return unicode('').join([lxml.html.tostring(e, encoding=unicode) for e in self]) 253 | 254 | def __repr__(self): 255 | r = [] 256 | try: 257 | for el in self: 258 | c = el.get('class') 259 | c = c and '.' + '.'.join(c.split(' ')) or '' 260 | id = el.get('id') 261 | id = id and '#' + id or '' 262 | r.append('<%s%s%s>' % (el.tag, id, c)) 263 | return '[' + (', '.join(r)) + ']' 264 | except AttributeError: 265 | if PY3k: 266 | return list.__repr__(self) 267 | else: 268 | for el in self: 269 | if isinstance(el, unicode): 270 | r.append(el.encode('utf-8')) 271 | else: 272 | r.append(el) 273 | return repr(r) 274 | 275 | 276 | @property 277 | def root(self): 278 | """return the xml root element 279 | """ 280 | if self._parent is not no_default: 281 | return self._parent.getroottree() 282 | return self[0].getroottree() 283 | 284 | @property 285 | def encoding(self): 286 | """return the xml encoding of the root element 287 | """ 288 | root = self.root 289 | if root is not None: 290 | return self.root.docinfo.encoding 291 | 292 | ############## 293 | # Traversing # 294 | ############## 295 | 296 | def _filter_only(self, selector, elements, reverse=False, unique=False): 297 | """Filters the selection set only, as opposed to also including 298 | descendants. 299 | """ 300 | if selector is None: 301 | results = elements 302 | else: 303 | xpath = selector_to_xpath(selector, 'self::') 304 | results = [] 305 | for tag in elements: 306 | results.extend(tag.xpath(xpath)) 307 | if reverse: 308 | results.reverse() 309 | if unique: 310 | result_list = results 311 | results = [] 312 | for item in result_list: 313 | if not item in results: 314 | results.append(item) 315 | return self.__class__(results, **dict(parent=self)) 316 | 317 | def parent(self, selector=None): 318 | return self._filter_only(selector, [e.getparent() for e in self if e.getparent() is not None], unique = True) 319 | 320 | def prev(self, selector=None): 321 | return self._filter_only(selector, [e.getprevious() for e in self if e.getprevious() is not None]) 322 | 323 | def next(self, selector=None): 324 | return self._filter_only(selector, [e.getnext() for e in self if e.getnext() is not None]) 325 | 326 | def _traverse(self, method): 327 | for e in self: 328 | current = getattr(e, method)() 329 | while current is not None: 330 | yield current 331 | current = getattr(current, method)() 332 | 333 | def _traverse_parent_topdown(self): 334 | for e in self: 335 | this_list = [] 336 | current = e.getparent() 337 | while current is not None: 338 | this_list.append(current) 339 | current = current.getparent() 340 | this_list.reverse() 341 | for j in this_list: 342 | yield j 343 | 344 | def _nextAll(self): 345 | return [e for e in self._traverse('getnext')] 346 | 347 | def nextAll(self, selector=None): 348 | """ 349 | >>> d = PyQuery('

Hi

Bye

') 350 | >>> d('p:last').nextAll() 351 | [] 352 | """ 353 | return self._filter_only(selector, self._nextAll()) 354 | 355 | def _prevAll(self): 356 | return [e for e in self._traverse('getprevious')] 357 | 358 | def prevAll(self, selector=None): 359 | """ 360 | >>> d = PyQuery('

Hi

Bye

') 361 | >>> d('p:last').prevAll() 362 | [] 363 | """ 364 | return self._filter_only(selector, self._prevAll(), reverse = True) 365 | 366 | def siblings(self, selector=None): 367 | """ 368 | >>> d = PyQuery('

Hi

Bye

') 369 | >>> d('.hello').siblings() 370 | [

, ] 371 | >>> d('.hello').siblings('img') 372 | [] 373 | """ 374 | return self._filter_only(selector, self._prevAll() + self._nextAll()) 375 | 376 | def parents(self, selector=None): 377 | """ 378 | >>> d = PyQuery('

Hi

Bye

') 379 | >>> d('p').parents() 380 | [] 381 | >>> d('.hello').parents('span') 382 | [] 383 | >>> d('.hello').parents('p') 384 | [] 385 | """ 386 | return self._filter_only( 387 | selector, 388 | [e for e in self._traverse_parent_topdown()], 389 | unique = True 390 | ) 391 | 392 | def children(self, selector=None): 393 | """Filter elements that are direct children of self using optional selector. 394 | 395 | >>> d = PyQuery('

Hi

Bye

') 396 | >>> d 397 | [] 398 | >>> d.children() 399 | [,

] 400 | >>> d.children('.hello') 401 | [] 402 | """ 403 | elements = [child for tag in self for child in tag.getchildren()] 404 | return self._filter_only(selector, elements) 405 | 406 | def closest(self, selector=None): 407 | """ 408 | >>> d = PyQuery('

This is a test

') 409 | >>> d('strong').closest('div') 410 | [] 411 | >>> d('strong').closest('.hello') 412 | [] 413 | >>> d('strong').closest('form') 414 | [] 415 | """ 416 | result = [] 417 | for current in self: 418 | while current is not None and not self.__class__(current).is_(selector): 419 | current = current.getparent() 420 | if current is not None: 421 | result.append(current) 422 | return self.__class__(result, **dict(parent=self)) 423 | 424 | def filter(self, selector): 425 | """Filter elements in self using selector (string or function). 426 | 427 | >>> d = PyQuery('

Hi

Bye

') 428 | >>> d('p') 429 | [,

] 430 | >>> d('p').filter('.hello') 431 | [] 432 | >>> d('p').filter(lambda i: i == 1) 433 | [

] 434 | >>> d('p').filter(lambda i: PyQuery(this).text() == 'Hi') 435 | [] 436 | """ 437 | if not hasattr(selector, '__call__'): 438 | return self._filter_only(selector, self) 439 | else: 440 | elements = [] 441 | try: 442 | for i, this in enumerate(self): 443 | func_globals(selector)['this'] = this 444 | if callback(selector, i): 445 | elements.append(this) 446 | finally: 447 | f_globals = func_globals(selector) 448 | if 'this' in f_globals: 449 | del f_globals['this'] 450 | return self.__class__(elements, **dict(parent=self)) 451 | 452 | def not_(self, selector): 453 | """Return elements that don't match the given selector. 454 | 455 | >>> d = PyQuery('

Hi

Bye

') 456 | >>> d('p').not_('.hello') 457 | [

] 458 | """ 459 | exclude = set(self.__class__(selector, self)) 460 | return self.__class__([e for e in self if e not in exclude], **dict(parent=self)) 461 | 462 | def is_(self, selector): 463 | """Returns True if selector matches at least one current element, else False:: 464 | 465 | >>> d = PyQuery('

Hi

Bye

') 466 | >>> d('p').eq(0).is_('.hello') 467 | True 468 | 469 | >>> d('p').eq(1).is_('.hello') 470 | False 471 | 472 | .. 473 | """ 474 | return bool(self.__class__(selector, self)) 475 | 476 | def find(self, selector): 477 | """Find elements using selector traversing down from self:: 478 | 479 | >>> m = '

Whoah!

there

' 480 | >>> d = PyQuery(m) 481 | >>> d('p').find('em') 482 | [, ] 483 | >>> d('p').eq(1).find('em') 484 | [] 485 | 486 | .. 487 | """ 488 | xpath = selector_to_xpath(selector) 489 | results = [child.xpath(xpath) for tag in self for child in tag.getchildren()] 490 | # Flatten the results 491 | elements = [] 492 | for r in results: 493 | elements.extend(r) 494 | return self.__class__(elements, **dict(parent=self)) 495 | 496 | def eq(self, index): 497 | """Return PyQuery of only the element with the provided index:: 498 | 499 | >>> d = PyQuery('

Hi

Bye

') 500 | >>> d('p').eq(0) 501 | [] 502 | >>> d('p').eq(1) 503 | [

] 504 | >>> d('p').eq(2) 505 | [] 506 | 507 | .. 508 | """ 509 | # Use slicing to silently handle out of bounds indexes 510 | items = self[index:index+1] 511 | return self.__class__(items, **dict(parent=self)) 512 | 513 | def each(self, func): 514 | """apply func on each nodes 515 | """ 516 | try: 517 | for i, element in enumerate(self): 518 | func_globals(func)['this'] = element 519 | if callback(func, i, element) == False: 520 | break 521 | finally: 522 | f_globals = func_globals(func) 523 | if 'this' in f_globals: 524 | del f_globals['this'] 525 | return self 526 | 527 | def map(self, func): 528 | """Returns a new PyQuery after transforming current items with func. 529 | 530 | func should take two arguments - 'index' and 'element'. Elements can 531 | also be referred to as 'this' inside of func:: 532 | 533 | >>> d = PyQuery('

Hi there

Bye


') 534 | >>> d('p').map(lambda i, e: PyQuery(e).text()) 535 | ['Hi there', 'Bye'] 536 | 537 | >>> d('p').map(lambda i, e: len(PyQuery(this).text())) 538 | [8, 3] 539 | 540 | >>> d('p').map(lambda i, e: PyQuery(this).text().split()) 541 | ['Hi', 'there', 'Bye'] 542 | 543 | """ 544 | items = [] 545 | try: 546 | for i, element in enumerate(self): 547 | func_globals(func)['this'] = element 548 | result = callback(func, i, element) 549 | if result is not None: 550 | if not isinstance(result, list): 551 | items.append(result) 552 | else: 553 | items.extend(result) 554 | finally: 555 | f_globals = func_globals(func) 556 | if 'this' in f_globals: 557 | del f_globals['this'] 558 | return self.__class__(items, **dict(parent=self)) 559 | 560 | @property 561 | def length(self): 562 | return len(self) 563 | 564 | def size(self): 565 | return len(self) 566 | 567 | def end(self): 568 | """Break out of a level of traversal and return to the parent level. 569 | 570 | >>> m = '

Whoah!

there

' 571 | >>> d = PyQuery(m) 572 | >>> d('p').eq(1).find('em').end().end() 573 | [

,

] 574 | """ 575 | return self._parent 576 | 577 | ############## 578 | # Attributes # 579 | ############## 580 | def attr(self, *args, **kwargs): 581 | """Attributes manipulation 582 | """ 583 | 584 | mapping = {'class_': 'class', 'for_': 'for'} 585 | 586 | attr = value = no_default 587 | length = len(args) 588 | if length == 1: 589 | attr = args[0] 590 | attr = mapping.get(attr, attr) 591 | elif length == 2: 592 | attr, value = args 593 | attr = mapping.get(attr, attr) 594 | elif kwargs: 595 | attr = {} 596 | for k, v in kwargs.items(): 597 | attr[mapping.get(k, k)] = v 598 | else: 599 | raise ValueError('Invalid arguments %s %s' % (args, kwargs)) 600 | 601 | if not self: 602 | return None 603 | elif isinstance(attr, dict): 604 | for tag in self: 605 | for key, value in attr.items(): 606 | tag.set(key, value) 607 | elif value is no_default: 608 | return self[0].get(attr) 609 | elif value is None or value == '': 610 | return self.removeAttr(attr) 611 | else: 612 | for tag in self: 613 | tag.set(attr, value) 614 | return self 615 | 616 | def removeAttr(self, name): 617 | """Remove an attribute:: 618 | 619 | >>> d = PyQuery('

') 620 | >>> d.removeAttr('id') 621 | [
] 622 | 623 | .. 624 | """ 625 | for tag in self: 626 | del tag.attrib[name] 627 | return self 628 | 629 | attr = FlexibleElement(pget=attr, pdel=removeAttr) 630 | 631 | ####### 632 | # CSS # 633 | ####### 634 | def height(self, value=no_default): 635 | """set/get height of element 636 | """ 637 | return self.attr('height', value) 638 | 639 | def width(self, value=no_default): 640 | """set/get width of element 641 | """ 642 | return self.attr('width', value) 643 | 644 | def hasClass(self, name): 645 | """Return True if element has class:: 646 | 647 | >>> d = PyQuery('
') 648 | >>> d.hasClass('myclass') 649 | True 650 | 651 | .. 652 | """ 653 | for tag in self: 654 | classes = set((tag.get('class') or '').split()) 655 | if name in classes: 656 | return True 657 | return False 658 | 659 | def addClass(self, value): 660 | """Add a css class to elements:: 661 | 662 | >>> d = PyQuery('
') 663 | >>> d.addClass('myclass') 664 | [] 665 | 666 | .. 667 | """ 668 | for tag in self: 669 | values = value.split(' ') 670 | classes = set((tag.get('class') or '').split()) 671 | classes = classes.union(values) 672 | classes.difference_update(['']) 673 | tag.set('class', ' '.join(classes)) 674 | return self 675 | 676 | def removeClass(self, value): 677 | """Remove a css class to elements:: 678 | 679 | >>> d = PyQuery('
') 680 | >>> d.removeClass('myclass') 681 | [
] 682 | 683 | .. 684 | """ 685 | for tag in self: 686 | values = value.split(' ') 687 | classes = set((tag.get('class') or '').split()) 688 | classes.difference_update(values) 689 | classes.difference_update(['']) 690 | tag.set('class', ' '.join(classes)) 691 | return self 692 | 693 | def toggleClass(self, value): 694 | """Toggle a css class to elements 695 | 696 | >>> d = PyQuery('
') 697 | >>> d.toggleClass('myclass') 698 | [] 699 | 700 | """ 701 | for tag in self: 702 | values = set(value.split(' ')) 703 | classes = set((tag.get('class') or '').split()) 704 | values_to_add = values.difference(classes) 705 | classes.difference_update(values) 706 | classes = classes.union(values_to_add) 707 | classes.difference_update(['']) 708 | tag.set('class', ' '.join(classes)) 709 | return self 710 | 711 | def css(self, *args, **kwargs): 712 | """css attributes manipulation 713 | """ 714 | 715 | attr = value = no_default 716 | length = len(args) 717 | if length == 1: 718 | attr = args[0] 719 | elif length == 2: 720 | attr, value = args 721 | elif kwargs: 722 | attr = kwargs 723 | else: 724 | raise ValueError('Invalid arguments %s %s' % (args, kwargs)) 725 | 726 | if isinstance(attr, dict): 727 | for tag in self: 728 | stripped_keys = [key.strip().replace('_', '-') 729 | for key in attr.keys()] 730 | current = [el.strip() 731 | for el in (tag.get('style') or '').split(';') 732 | if el.strip() 733 | and not el.split(':')[0].strip() in stripped_keys] 734 | for key, value in attr.items(): 735 | key = key.replace('_', '-') 736 | current.append('%s: %s' % (key, value)) 737 | tag.set('style', '; '.join(current)) 738 | elif isinstance(value, basestring): 739 | attr = attr.replace('_', '-') 740 | for tag in self: 741 | current = [el.strip() 742 | for el in (tag.get('style') or '').split(';') 743 | if el.strip() 744 | and not el.split(':')[0].strip() == attr.strip()] 745 | current.append('%s: %s' % (attr, value)) 746 | tag.set('style', '; '.join(current)) 747 | return self 748 | 749 | css = FlexibleElement(pget=css, pset=css) 750 | 751 | ################### 752 | # CORE UI EFFECTS # 753 | ################### 754 | def hide(self): 755 | """remove display:none to elements style 756 | 757 | >>> print(PyQuery('
').hide()) 758 |
759 | 760 | """ 761 | return self.css('display', 'none') 762 | 763 | def show(self): 764 | """add display:block to elements style 765 | 766 | >>> print(PyQuery('
').show()) 767 |
768 | 769 | """ 770 | return self.css('display', 'block') 771 | 772 | ######## 773 | # HTML # 774 | ######## 775 | def val(self, value=no_default): 776 | """Set the attribute value:: 777 | 778 | >>> d = PyQuery('') 779 | >>> d.val('Youhou') 780 | [] 781 | 782 | Get the attribute value:: 783 | 784 | >>> d.val() 785 | 'Youhou' 786 | 787 | """ 788 | return self.attr('value', value) 789 | 790 | def html(self, value=no_default): 791 | """Get or set the html representation of sub nodes. 792 | 793 | Get the text value:: 794 | 795 | >>> d = PyQuery('
toto
') 796 | >>> print(d.html()) 797 | toto 798 | 799 | Set the text value:: 800 | 801 | >>> d.html('Youhou !') 802 | [
] 803 | >>> print(d) 804 |
Youhou !
805 | """ 806 | if value is no_default: 807 | if not self: 808 | return None 809 | tag = self[0] 810 | children = tag.getchildren() 811 | if not children: 812 | return tag.text 813 | html = tag.text or '' 814 | html += unicode('').join([etree.tostring(e, encoding=unicode) for e in children]) 815 | return html 816 | else: 817 | if isinstance(value, self.__class__): 818 | new_html = unicode(value) 819 | elif isinstance(value, basestring): 820 | new_html = value 821 | elif not value: 822 | new_html = '' 823 | else: 824 | raise ValueError(type(value)) 825 | 826 | for tag in self: 827 | for child in tag.getchildren(): 828 | tag.remove(child) 829 | root = fromstring(unicode('') + new_html + unicode(''), self.parser)[0] 830 | children = root.getchildren() 831 | if children: 832 | tag.extend(children) 833 | tag.text = root.text 834 | tag.tail = root.tail 835 | return self 836 | 837 | def outerHtml(self): 838 | """Get the html representation of the first selected element:: 839 | 840 | >>> d = PyQuery('
toto rocks
') 841 | >>> print(d('span')) 842 | toto rocks 843 | >>> print(d('span').outerHtml()) 844 | toto 845 | 846 | >>> S = PyQuery('

Only me & myself

') 847 | >>> print(S('b').outerHtml()) 848 | me 849 | 850 | .. 851 | """ 852 | 853 | if not self: 854 | return None 855 | e0 = self[0] 856 | if e0.tail: 857 | e0 = deepcopy(e0) 858 | e0.tail = '' 859 | return lxml.html.tostring(e0, encoding=unicode) 860 | 861 | def text(self, value=no_default): 862 | """Get or set the text representation of sub nodes. 863 | 864 | Get the text value:: 865 | 866 | >>> doc = PyQuery('
tototata
') 867 | >>> print(doc.text()) 868 | toto tata 869 | 870 | Set the text value:: 871 | 872 | >>> doc.text('Youhou !') 873 | [
] 874 | >>> print(doc) 875 |
Youhou !
876 | 877 | """ 878 | 879 | if value is no_default: 880 | if not self: 881 | return None 882 | 883 | text = [] 884 | 885 | def add_text(tag, no_tail=False): 886 | if tag.text: 887 | text.append(tag.text) 888 | for child in tag.getchildren(): 889 | add_text(child) 890 | if not no_tail and tag.tail: 891 | text.append(tag.tail) 892 | 893 | for tag in self: 894 | add_text(tag, no_tail=True) 895 | return ' '.join([t.strip() for t in text if t.strip()]) 896 | 897 | for tag in self: 898 | for child in tag.getchildren(): 899 | tag.remove(child) 900 | tag.text = value 901 | return self 902 | 903 | ################ 904 | # Manipulating # 905 | ################ 906 | 907 | def _get_root(self, value): 908 | if isinstance(value, basestring): 909 | root = fromstring(unicode('') + value + unicode(''), self.parser)[0] 910 | elif isinstance(value, etree._Element): 911 | root = self.__class__(value) 912 | elif isinstance(value, PyQuery): 913 | root = value 914 | else: 915 | raise TypeError( 916 | 'Value must be string, PyQuery or Element. Got %r' % value) 917 | if hasattr(root, 'text') and isinstance(root.text, basestring): 918 | root_text = root.text 919 | else: 920 | root_text = '' 921 | return root, root_text 922 | 923 | def append(self, value): 924 | """append value to each nodes 925 | """ 926 | root, root_text = self._get_root(value) 927 | for i, tag in enumerate(self): 928 | if len(tag) > 0: # if the tag has children 929 | last_child = tag[-1] 930 | if not last_child.tail: 931 | last_child.tail = '' 932 | last_child.tail += root_text 933 | else: 934 | if not tag.text: 935 | tag.text = '' 936 | tag.text += root_text 937 | if i > 0: 938 | root = deepcopy(list(root)) 939 | tag.extend(root) 940 | root = tag[-len(root):] 941 | return self 942 | 943 | def appendTo(self, value): 944 | """append nodes to value 945 | """ 946 | value.append(self) 947 | return self 948 | 949 | def prepend(self, value): 950 | """prepend value to nodes 951 | """ 952 | root, root_text = self._get_root(value) 953 | for i, tag in enumerate(self): 954 | if not tag.text: 955 | tag.text = '' 956 | if len(root) > 0: 957 | root[-1].tail = tag.text 958 | tag.text = root_text 959 | else: 960 | tag.text = root_text + tag.text 961 | if i > 0: 962 | root = deepcopy(list(root)) 963 | tag[:0] = root 964 | root = tag[:len(root)] 965 | return self 966 | 967 | def prependTo(self, value): 968 | """prepend nodes to value 969 | """ 970 | value.prepend(self) 971 | return self 972 | 973 | def after(self, value): 974 | """add value after nodes 975 | """ 976 | root, root_text = self._get_root(value) 977 | for i, tag in enumerate(self): 978 | if not tag.tail: 979 | tag.tail = '' 980 | tag.tail += root_text 981 | if i > 0: 982 | root = deepcopy(list(root)) 983 | parent = tag.getparent() 984 | index = parent.index(tag) + 1 985 | parent[index:index] = root 986 | root = parent[index:len(root)] 987 | return self 988 | 989 | def insertAfter(self, value): 990 | """insert nodes after value 991 | """ 992 | value.after(self) 993 | return self 994 | 995 | def before(self, value): 996 | """insert value before nodes 997 | """ 998 | root, root_text = self._get_root(value) 999 | for i, tag in enumerate(self): 1000 | previous = tag.getprevious() 1001 | if previous != None: 1002 | if not previous.tail: 1003 | previous.tail = '' 1004 | previous.tail += root_text 1005 | else: 1006 | parent = tag.getparent() 1007 | if not parent.text: 1008 | parent.text = '' 1009 | parent.text += root_text 1010 | if i > 0: 1011 | root = deepcopy(list(root)) 1012 | parent = tag.getparent() 1013 | index = parent.index(tag) 1014 | parent[index:index] = root 1015 | root = parent[index:len(root)] 1016 | return self 1017 | 1018 | def insertBefore(self, value): 1019 | """insert nodes before value 1020 | """ 1021 | value.before(self) 1022 | return self 1023 | 1024 | def wrap(self, value): 1025 | """A string of HTML that will be created on the fly and wrapped around 1026 | each target:: 1027 | 1028 | >>> d = PyQuery('youhou') 1029 | >>> d.wrap('
') 1030 | [
] 1031 | >>> print(d) 1032 |
youhou
1033 | 1034 | """ 1035 | assert isinstance(value, basestring) 1036 | value = fromstring(value)[0] 1037 | nodes = [] 1038 | for tag in self: 1039 | wrapper = deepcopy(value) 1040 | # FIXME: using iterchildren is probably not optimal 1041 | if not wrapper.getchildren(): 1042 | wrapper.append(deepcopy(tag)) 1043 | else: 1044 | childs = [c for c in wrapper.iterchildren()] 1045 | child = childs[-1] 1046 | child.append(deepcopy(tag)) 1047 | nodes.append(wrapper) 1048 | 1049 | parent = tag.getparent() 1050 | if parent is not None: 1051 | for t in parent.iterchildren(): 1052 | if t is tag: 1053 | t.addnext(wrapper) 1054 | parent.remove(t) 1055 | break 1056 | self[:] = nodes 1057 | return self 1058 | 1059 | def wrapAll(self, value): 1060 | """Wrap all the elements in the matched set into a single wrapper element:: 1061 | 1062 | >>> d = PyQuery('
Heyyou !
') 1063 | >>> print(d('span').wrapAll('
')) 1064 |
Heyyou !
1065 | 1066 | .. 1067 | """ 1068 | if not self: 1069 | return self 1070 | 1071 | assert isinstance(value, basestring) 1072 | value = fromstring(value)[0] 1073 | wrapper = deepcopy(value) 1074 | if not wrapper.getchildren(): 1075 | child = wrapper 1076 | else: 1077 | childs = [c for c in wrapper.iterchildren()] 1078 | child = childs[-1] 1079 | 1080 | replace_childs = True 1081 | parent = self[0].getparent() 1082 | if parent is None: 1083 | parent = no_default 1084 | 1085 | # add nodes to wrapper and check parent 1086 | for tag in self: 1087 | child.append(deepcopy(tag)) 1088 | if tag.getparent() is not parent: 1089 | replace_childs = False 1090 | 1091 | # replace nodes i parent if possible 1092 | if parent is not no_default and replace_childs: 1093 | childs = [c for c in parent.iterchildren()] 1094 | if len(childs) == len(self): 1095 | for tag in self: 1096 | parent.remove(tag) 1097 | parent.append(wrapper) 1098 | 1099 | self[:] = [wrapper] 1100 | return self 1101 | 1102 | def replaceWith(self, value): 1103 | """replace nodes by value 1104 | """ 1105 | if hasattr(value, '__call__'): 1106 | for i, element in enumerate(self): 1107 | self.__class__(element).before(value(i, element) + (element.tail or '')) 1108 | parent = element.getparent() 1109 | parent.remove(element) 1110 | else: 1111 | for tag in self: 1112 | self.__class__(tag).before(value + (tag.tail or '')) 1113 | parent = tag.getparent() 1114 | parent.remove(tag) 1115 | return self 1116 | 1117 | def replaceAll(self, expr): 1118 | """replace nodes by expr 1119 | """ 1120 | if self._parent is no_default: 1121 | raise ValueError( 1122 | 'replaceAll can only be used with an object with parent') 1123 | self._parent(expr).replaceWith(self) 1124 | return self 1125 | 1126 | def clone(self): 1127 | """return a copy of nodes 1128 | """ 1129 | self[:] = [deepcopy(tag) for tag in self] 1130 | return self 1131 | 1132 | def empty(self): 1133 | """remove nodes content 1134 | """ 1135 | for tag in self: 1136 | tag.text = None 1137 | tag[:] = [] 1138 | return self 1139 | 1140 | def remove(self, expr=no_default): 1141 | """remove nodes 1142 | 1143 | >>> d = PyQuery('
Maybe she does NOT know
') 1144 | >>> d('strong').remove() 1145 | [] 1146 | >>> print(d) 1147 |
Maybe she does know
1148 | """ 1149 | if expr is no_default: 1150 | for tag in self: 1151 | parent = tag.getparent() 1152 | if parent is not None: 1153 | if tag.tail: 1154 | prev = tag.getprevious() 1155 | if prev is None: 1156 | if not parent.text: 1157 | parent.text = '' 1158 | parent.text += ' ' + tag.tail 1159 | else: 1160 | if not prev.tail: 1161 | prev.tail = '' 1162 | prev.tail += ' ' + tag.tail 1163 | parent.remove(tag) 1164 | else: 1165 | results = self.__class__(expr, self) 1166 | results.remove() 1167 | return self 1168 | 1169 | class Fn(object): 1170 | """Hook for defining custom function (like the jQuery.fn) 1171 | 1172 | >>> PyQuery.fn.listOuterHtml = lambda: this.map(lambda i, el: PyQuery(this).outerHtml()) 1173 | >>> S = PyQuery('
  1. Coffee
  2. Tea
  3. Milk
') 1174 | >>> S('li').listOuterHtml() 1175 | ['
  • Coffee
  • ', '
  • Tea
  • ', '
  • Milk
  • '] 1176 | 1177 | """ 1178 | def __setattr__(self, name, func): 1179 | def fn(self, *args): 1180 | func_globals(func)['this'] = self 1181 | return func(*args) 1182 | fn.__name__ = name 1183 | setattr(PyQuery, name, fn) 1184 | fn = Fn() 1185 | 1186 | ##################################################### 1187 | # Additional methods that are not in the jQuery API # 1188 | ##################################################### 1189 | 1190 | @property 1191 | def base_url(self): 1192 | """Return the url of current html document or None if not available. 1193 | """ 1194 | if self._base_url is not None: 1195 | return self._base_url 1196 | if self._parent is not no_default: 1197 | return self._parent.base_url 1198 | 1199 | def make_links_absolute(self, base_url=None): 1200 | """Make all links absolute. 1201 | """ 1202 | if base_url is None: 1203 | base_url = self.base_url 1204 | if base_url is None: 1205 | raise ValueError('You need a base URL to make your links' 1206 | 'absolute. It can be provided by the base_url parameter.') 1207 | 1208 | self('a').each(lambda: self(this).attr('href', urljoin(base_url, self(this).attr('href')))) 1209 | return self 1210 | --------------------------------------------------------------------------------