├── .gitignore
├── .hgignore
├── .hgtags
├── .travis.yml
├── CHANGES.txt
├── CREDITS.txt
├── DD.py
├── IDEAS.txt
├── INSTALL.txt
├── LICENSES.txt
├── MANIFEST.in
├── Makefile
├── README.rst
├── TODO.txt
├── benchmark
    ├── bench_etree.py
    ├── bench_objectify.py
    ├── bench_xpath.py
    ├── bench_xslt.py
    └── benchbase.py
├── bisect_crashes.py
├── buildlibxml.py
├── doc
    ├── FAQ.txt
    ├── api.txt
    ├── build.txt
    ├── capi.txt
    ├── compatibility.txt
    ├── cssselect.txt
    ├── docstructure.py
    ├── element_classes.txt
    ├── elementsoup.txt
    ├── extensions.txt
    ├── html
    │   ├── flattr-badge-large.png
    │   ├── paypal_btn_donateCC_LG.gif
    │   ├── proxies.png
    │   ├── python-xml-title.png
    │   ├── python-xml.png
    │   ├── style.css
    │   └── tagpython-big.png
    ├── html5parser.txt
    ├── intro.txt
    ├── licenses
    │   ├── BSD.txt
    │   ├── GPL.txt
    │   ├── ZopePublicLicense.txt
    │   └── elementtree.txt
    ├── lxml-source-howto.txt
    ├── lxml.mgp
    ├── lxml2.txt
    ├── lxmlhtml.txt
    ├── main.txt
    ├── memorymanagement.txt
    ├── mkhtml.py
    ├── mklatex.py
    ├── objectify.txt
    ├── parsing.txt
    ├── performance.txt
    ├── pubkey.asc
    ├── resolvers.txt
    ├── rest2html.py
    ├── rest2latex.py
    ├── s5
    │   ├── Makefile
    │   ├── ep2008
    │   │   ├── atom-example.xml
    │   │   ├── atom.py
    │   │   ├── atom.rng
    │   │   ├── atomgen.py
    │   │   └── proxies.png
    │   ├── lxml-ep2008.txt
    │   ├── rst2s5.py
    │   ├── tagpython.png
    │   └── ui
    │   │   └── default
    │   │       ├── blank.gif
    │   │       ├── bodybg.gif
    │   │       ├── framing.css
    │   │       ├── iepngfix.htc
    │   │       ├── lxml-logo64.png
    │   │       ├── opera.css
    │   │       ├── outline.css
    │   │       ├── pretty.css
    │   │       ├── print.css
    │   │       ├── s5-core.css
    │   │       ├── slides.css
    │   │       ├── slides.js
    │   │       └── tagpython.png
    ├── sax.txt
    ├── test.xml
    ├── tutorial.txt
    ├── valgrind.txt
    ├── validation.txt
    └── xpathxslt.txt
├── ez_setup.py
├── fake_pyrex
    └── Pyrex
    │   ├── Distutils
    │       ├── __init__.py
    │       └── build_ext.py
    │   └── __init__.py
├── samples
    ├── simple-ns.xml
    └── simple.xml
├── selftest.py
├── selftest2.py
├── setup.py
├── setupinfo.py
├── src
    ├── local_doctest.py
    └── lxml
    │   ├── ElementInclude.py
    │   ├── __init__.py
    │   ├── _elementpath.py
    │   ├── apihelpers.pxi
    │   ├── builder.py
    │   ├── classlookup.pxi
    │   ├── cleanup.pxi
    │   ├── cssselect.py
    │   ├── cvarargs.pxd
    │   ├── debug.pxi
    │   ├── docloader.pxi
    │   ├── doctestcompare.py
    │   ├── dtd.pxi
    │   ├── extensions.pxi
    │   ├── html
    │       ├── ElementSoup.py
    │       ├── __init__.py
    │       ├── _diffcommand.py
    │       ├── _html5builder.py
    │       ├── _setmixin.py
    │       ├── builder.py
    │       ├── clean.py
    │       ├── defs.py
    │       ├── diff.py
    │       ├── formfill.py
    │       ├── html5parser.py
    │       ├── soupparser.py
    │       ├── tests
    │       │   ├── __init__.py
    │       │   ├── feedparser-data
    │       │   │   ├── entry_content_applet.data
    │       │   │   ├── entry_content_blink.data
    │       │   │   ├── entry_content_crazy.data
    │       │   │   ├── entry_content_embed.data
    │       │   │   ├── entry_content_frame.data
    │       │   │   ├── entry_content_iframe.data
    │       │   │   ├── entry_content_link.data
    │       │   │   ├── entry_content_meta.data
    │       │   │   ├── entry_content_object.data
    │       │   │   ├── entry_content_onabort.data
    │       │   │   ├── entry_content_onblur.data
    │       │   │   ├── entry_content_onchange.data
    │       │   │   ├── entry_content_onclick.data
    │       │   │   ├── entry_content_ondblclick.data
    │       │   │   ├── entry_content_onerror.data
    │       │   │   ├── entry_content_onfocus.data
    │       │   │   ├── entry_content_onkeydown.data
    │       │   │   ├── entry_content_onkeypress.data
    │       │   │   ├── entry_content_onkeyup.data
    │       │   │   ├── entry_content_onload.data
    │       │   │   ├── entry_content_onmousedown.data
    │       │   │   ├── entry_content_onmouseout.data
    │       │   │   ├── entry_content_onmouseover.data
    │       │   │   ├── entry_content_onmouseup.data
    │       │   │   ├── entry_content_onreset.data
    │       │   │   ├── entry_content_onresize.data
    │       │   │   ├── entry_content_onsubmit.data
    │       │   │   ├── entry_content_onunload.data
    │       │   │   ├── entry_content_script.data
    │       │   │   ├── entry_content_script_cdata.data
    │       │   │   ├── entry_content_script_inline.data
    │       │   │   └── entry_content_style.data
    │       │   ├── hackers-org-data
    │       │   │   ├── background-image-plus.data
    │       │   │   ├── background-image-with-unicoded.data
    │       │   │   ├── downlevel-hidden.data
    │       │   │   ├── html-plus-time.data
    │       │   │   ├── javascript-link.data
    │       │   │   ├── style-comment.data
    │       │   │   ├── style-expression.data
    │       │   │   ├── style-import.data
    │       │   │   ├── style-js-tag.data
    │       │   │   ├── style-url-js.data
    │       │   │   ├── xml-data-island.data
    │       │   │   ├── xml-embedded-js.data
    │       │   │   └── xml-namespace.data.BROKEN
    │       │   ├── test_autolink.py
    │       │   ├── test_autolink.txt
    │       │   ├── test_basic.py
    │       │   ├── test_basic.txt
    │       │   ├── test_clean.py
    │       │   ├── test_clean.txt
    │       │   ├── test_clean_embed.txt
    │       │   ├── test_diff.py
    │       │   ├── test_diff.txt
    │       │   ├── test_elementsoup.py
    │       │   ├── test_feedparser_data.py
    │       │   ├── test_formfill.py
    │       │   ├── test_formfill.txt
    │       │   ├── test_forms.py
    │       │   ├── test_forms.txt
    │       │   ├── test_frames.py
    │       │   ├── test_html5parser.py
    │       │   ├── test_rewritelinks.py
    │       │   ├── test_rewritelinks.txt
    │       │   ├── test_xhtml.py
    │       │   ├── test_xhtml.txt
    │       │   └── transform_feedparser_data.py
    │       └── usedoctest.py
    │   ├── includes
    │       ├── __init__.py
    │       ├── c14n.pxd
    │       ├── config.pxd
    │       ├── dtdvalid.pxd
    │       ├── etree_defs.h
    │       ├── etreepublic.pxd
    │       ├── htmlparser.pxd
    │       ├── relaxng.pxd
    │       ├── schematron.pxd
    │       ├── tree.pxd
    │       ├── uri.pxd
    │       ├── xinclude.pxd
    │       ├── xmlerror.pxd
    │       ├── xmlparser.pxd
    │       ├── xmlschema.pxd
    │       ├── xpath.pxd
    │       └── xslt.pxd
    │   ├── isoschematron
    │       ├── __init__.py
    │       └── resources
    │       │   ├── rng
    │       │       └── iso-schematron.rng
    │       │   └── xsl
    │       │       ├── RNG2Schtrn.xsl
    │       │       ├── XSD2Schtrn.xsl
    │       │       └── iso-schematron-xslt1
    │       │           ├── iso_abstract_expand.xsl
    │       │           ├── iso_dsdl_include.xsl
    │       │           ├── iso_schematron_message.xsl
    │       │           ├── iso_schematron_skeleton_for_xslt1.xsl
    │       │           ├── iso_svrl_for_xslt1.xsl
    │       │           └── readme.txt
    │   ├── iterparse.pxi
    │   ├── lxml.etree.pyx
    │   ├── lxml.objectify.pyx
    │   ├── nsclasses.pxi
    │   ├── objectpath.pxi
    │   ├── parser.pxi
    │   ├── parsertarget.pxi
    │   ├── proxy.pxi
    │   ├── public-api.pxi
    │   ├── pyclasslookup.py
    │   ├── python.pxd
    │   ├── readonlytree.pxi
    │   ├── relaxng.pxi
    │   ├── sax.py
    │   ├── saxparser.pxi
    │   ├── schematron.pxi
    │   ├── serializer.pxi
    │   ├── tests
    │       ├── __init__.py
    │       ├── common_imports.py
    │       ├── dummy_http_server.py
    │       ├── include
    │       │   └── test_xinclude.xml
    │       ├── shakespeare.html
    │       ├── test-document.xslt
    │       ├── test-string.xml
    │       ├── test.dtd
    │       ├── test.sch
    │       ├── test.xml
    │       ├── test.xsd
    │       ├── test1.rng
    │       ├── test1.xslt
    │       ├── test2.rng
    │       ├── test2.xslt
    │       ├── test_broken.xml
    │       ├── test_builder.py
    │       ├── test_classlookup.py
    │       ├── test_css.py
    │       ├── test_dtd.py
    │       ├── test_elementtree.py
    │       ├── test_errors.py
    │       ├── test_etree.py
    │       ├── test_htmlparser.py
    │       ├── test_http_io.py
    │       ├── test_import.xsd
    │       ├── test_inc.xsd
    │       ├── test_incremental_xmlfile.py
    │       ├── test_io.py
    │       ├── test_isoschematron.py
    │       ├── test_nsclasses.py
    │       ├── test_objectify.py
    │       ├── test_pyclasslookup.py
    │       ├── test_relaxng.py
    │       ├── test_sax.py
    │       ├── test_schematron.py
    │       ├── test_threading.py
    │       ├── test_unicode.py
    │       ├── test_xmlschema.py
    │       ├── test_xpathevaluator.py
    │       └── test_xslt.py
    │   ├── usedoctest.py
    │   ├── xinclude.pxi
    │   ├── xmlerror.pxi
    │   ├── xmlid.pxi
    │   ├── xmlschema.pxi
    │   ├── xpath.pxi
    │   ├── xslt.pxi
    │   └── xsltext.pxi
├── test.py
├── tools
    └── xpathgrep.py
├── tox.ini
├── update-error-constants.py
├── valgrind-python.supp
├── version.txt
└── versioninfo.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | .tox
 3 | build
 4 | libs
 5 | *.egg-info
 6 | *.so
 7 | src/lxml/includes/lxml-version.h
 8 | src/lxml/lxml.etree.c
 9 | src/lxml/lxml.etree.h
10 | src/lxml/lxml.etree_api.h
11 | src/lxml/lxml.objectify.c
12 | 


--------------------------------------------------------------------------------
/.hgignore:
--------------------------------------------------------------------------------
 1 | syntax: glob
 2 | 
 3 | *.pyc
 4 | *.pyo
 5 | __pycache__
 6 | 
 7 | build/
 8 | dist/
 9 | cython_debug/
10 | .git/
11 | .gitrev
12 | .coverage
13 | funding.txt
14 | .tox
15 | *.orig
16 | *.rej
17 | *.dep
18 | *.swp
19 | *.so
20 | *.o
21 | *~
22 | 


--------------------------------------------------------------------------------
/.hgtags:
--------------------------------------------------------------------------------
 1 | 40fdc2efbcf833c2d2de7a1ebff7cc0b634e3a0a lxml-2.3
 2 | ea513f9a9811ee9b3991a1df0319b197b361e5cb lxml-0.5.1
 3 | e0fa117052c57bb83d005b962ff8788605efeadc lxml-0.6
 4 | 802f612635d91469d9430bee819713ce7ecb30e2 lxml-0.7
 5 | 1623013df810d6b4363dd1daf9f7f6fe5603f458 lxml-0.9
 6 | 11e79f443fed94d91f90c8080a2c8a8afeb1ae94 lxml-1.0.beta
 7 | a37777a46c55ae77a78266f57c3b6bca2ca04c5f lxml-1.0
 8 | 6a117f91ff2ac2824aeed1ccc87512608d131f47 lxml-1.1
 9 | 782bc8d9146fd9666879ace31cb4cf541a390173 lxml-1.1alpha
10 | 4144bbe6f24822a7ce5392130b2c98354cd9847e lxml-1.1beta
11 | 8205702eda77bb4a23d6789cc5ee94b4d36e65d4 lxml-1.2
12 | 4d410818a0e10638bb5eb5b54a37350a3477629b lxml-2.0
13 | 1dabace6188ee89b433f30b3838b6f2129698ba5 lxml-2.0alpha1
14 | 9dec5f9222aea1b9c531cfcc7e68d2c328394247 lxml-2.0alpha2
15 | b7873fce37031508d6fb115d8c79abad00b9f219 lxml-2.0alpha3
16 | 2ae894916b47710bbc79c139ff9f4a861ca5815c lxml-2.0alpha4
17 | c81c85642ca9eafe85630c46ef828828e692842e lxml-2.0alpha5
18 | 7a9b9811fadbd32b34d2b3e07901e213daca98d3 lxml-2.0alpha6
19 | 68ba2cbfc422d59bcba09216a7707153bd58b2e8 lxml-2.0beta1
20 | b1389dfc312b7d438fd673f7f7ee75d892ef81d7 lxml-2.0beta2
21 | 9b2be5208b1ecf4cfe5fd3cd0de4a396267abd97 lxml-2.0.1
22 | c5790462867c207a3c78dd510055589bf2950f9d lxml-2.1alpha1
23 | d1f3cf7d078796553de3b276db580796d5aeb048 lxml-2.1beta1
24 | fb891a783f270aefd03df44105677d49d765f2b2 lxml-2.1beta2
25 | 714552f48a53c2555994b6c56deb3de1e7ee702e lxml-2.1beta3
26 | 89227c4d5809f866f4a54a791d2452dc0ccc8d3b lxml-2.1
27 | e38e2a1162010841eebb60174be16797e0d34a87 lxml-2.2
28 | 3f730df23e58592418e22572fea5d8dfce7cf87d lxml-2.2.1
29 | 376b4baba7c91b98fae1ebd07592b21e5e535ba8 lxml-2.2.2
30 | 405a1fb3486ee8a9be7f37fd000553df21b95d5e lxml-2.3alpha1
31 | 901463d324cda95df28b2cab3ee86f715103fa84 lxml-2.3alpha2
32 | c9fef2d447ca436a83fa41183b73ccc825052f48 lxml-2.3beta1
33 | 5f5143534860cfba7fbe3ceab98dc749e79a4fc3 lxml-2.3.1
34 | 65ce4c8efb51013363dcc7318c847fc2f28f2eb2 lxml-2.3.2
35 | 945b29e5b54abf07897b46ebcb6d2227c05f8137 lxml-2.3.3
36 | cf0980063266b383d0403759993536eaa18ebe93 lxml-2.3.4
37 | c161cb55f4d4ebd93f5aee72ed73f267155bd894 lxml-2.3.5
38 | 66c66707c7d8a89b99a24bb29c791dbe9dc860f1 lxml-2.3.6
39 | 36e5b10c3ae6256e613554e7d71c34de0d71f385 lxml-3.0beta1
40 | 5bd7af62e93207ff58d54fc83f96b078b621eef8 lxml-3.0alpha2
41 | 6d41ed7c4b756792c9be44a5b8a383c10718016f lxml-3.0alpha1
42 | 60dd2d56701944e05c7655d1f47c56657d7837b3 lxml-3.0
43 | 22efeb405c9c4fb326541f56e431fd8e2686c435 lxml-3.0.1
44 | 714ab3c31e40ab2fef58e2be523de1ef4cb2a8a0 lxml-3.0.2
45 | 3e04be8a649395b193c70dd7d0e2b2b5acecb563 lxml-3.1beta1
46 | 91c436e11e2a822154fef48abe64274646cfde45 lxml-3.1.0
47 | e408b1f0eca00cb226acd28cd169988f9690067f lxml-3.1.1
48 | 862039d37b73e0250c9d8af6e5a689f6fe6321cd lxml-3.1.2
49 | 76262b9d449e75624b9dea745364f87e2e99b2a3 lxml-3.2.0
50 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - 2.5
 5 |   - 2.6
 6 |   - 2.7
 7 |   - 3.2
 8 |   - 3.3
 9 |   - pypy
10 | 
11 | install:
12 |     pip install --use-mirrors cython
13 | 
14 | script:
15 |   - python setup.py clean
16 |   - python setup.py build_ext --inplace
17 |   - make test
18 | 
19 | matrix:
20 |   allow_failures:
21 |     - python: pypy
22 | 


--------------------------------------------------------------------------------
/CREDITS.txt:
--------------------------------------------------------------------------------
 1 | =======
 2 | Credits
 3 | =======
 4 | 
 5 | Main contributors
 6 | =================
 7 | 
 8 | Stefan Behnel
 9 |        main developer and maintainer
10 | 
11 | Martijn Faassen
12 |        creator of lxml and initial main developer
13 | 
14 | Ian Bicking
15 |        creator and maintainer of lxml.html
16 | 
17 | Holger Joukl
18 |        ISO-Schematron support, development on lxml.objectify, bug reports, feedback
19 | 
20 | Simon Sapin
21 |        external maintenance and development of the cssselect package
22 | 
23 | Marc-Antoine Parent
24 |        XPath extension function help and patches
25 | 
26 | Olivier Grisel
27 |        improved (c)ElementTree compatibility patches, 
28 |        website improvements.
29 | 
30 | Kasimier Buchcik
31 |        help with specs and libxml2
32 | 
33 | Florian Wagner
34 |        help with copy.deepcopy support, bug reporting
35 |  
36 | Emil Kroymann
37 |        help with encoding support, bug reporting
38 |  
39 | Paul Everitt
40 |        bug reporting, feedback on API design
41 | 
42 | Victor Ng
43 |        Discussions on memory management strategies, vlibxml2
44 | 
45 | Robert Kern
46 |        feedback on API design
47 | 
48 | Andreas Pakulat
49 |        rpath linking support, doc improvements
50 | 
51 | David Sankel
52 |        building statically on Windows
53 | 
54 | Marcin Kasperski
55 |        PDF documentation generation
56 | 
57 | Sidnei da Silva
58 |        official MS Windows builds
59 | 
60 | Pascal Oberndörfer
61 |        official Mac-OS builds
62 | 
63 | ... and lots of other people who contributed to lxml by reporting
64 | bugs, discussing its functionality or blaming the docs for the bugs in
65 | their code.  Thank you all, user feedback and discussions form a very
66 | important part of an Open Source project!
67 | 
68 | 
69 | Special thanks goes to:
70 | =======================
71 | 
72 | * Daniel Veillard and the libxml2 project for a great XML library.
73 | 
74 | * Fredrik Lundh for ElementTree, its API, and the competition through
75 |   cElementTree.
76 | 
77 | * Greg Ewing (Pyrex) and Robert Bradshaw et al. (Cython) for the
78 |   binding technology.
79 | 
80 | * Jonathan Stoppani for hosting the new mailing list on lxml.de.
81 | 
82 | * the codespeak crew, in particular Philipp von Weitershausen and
83 |   Holger Krekel for originally hosting lxml on codespeak.net
84 | 


--------------------------------------------------------------------------------
/IDEAS.txt:
--------------------------------------------------------------------------------
 1 | Things to try out when life permits
 2 | ===================================
 3 | 
 4 | * zlib-based parsing/serialising of compressed in-memory data
 5 | 
 6 |   * requires a libxml2 I/O OutputBuffer with appropriate I/O functions
 7 |     that call into the zlib compression routines
 8 | 
 9 | * lzma-based parsing/serialising of compressed in-memory data
10 | 
11 |   * requires a libxml2 I/O OutputBuffer with appropriate I/O functions
12 |     that call into the lzma compression routines
13 | 
14 |   * advantage over zlib: probably faster and better compression
15 | 
16 |   * maybe embed the lzma C sources in the distro
17 |     http://www.7-zip.org/sdk.html
18 | 
19 | * generating XML using the ``with`` statement
20 | 
21 |   http://comments.gmane.org/gmane.comp.python.general/579950?set_lines=100000
22 | 
23 | * parse-time validation against a user provided DTD
24 | 
25 |   * currently only works for XML Schema
26 | 
27 | * somehow integrate RelaxNG compact notation (rnc versus rng)
28 | 
29 |   * currently not supported by libxml2 (patch exists)
30 | 
31 | * support subclassing XSLTAccessControl to provide custom per-URL
32 |   access check methods
33 | 
34 |   * maybe custom resolvers are enough, or can be combined with this?
35 | 
36 | * reimplement iterparse() using the libxml2 xmlReader API
37 | 
38 |   * Advantage: the implementation can be made safer than the current
39 |     SAX implementation, as the parser would not interact with the
40 |     Python-level tree.
41 | 
42 |   * Disadvantage: the tree has to be built manually. In the current
43 |     SAX based implementation, libxml2 does it for us.
44 | 
45 | * rewrite iterparse() to accept a parser as argument instead of being
46 |   one
47 | 
48 |   * disadvantage: iterparse() can't deal with all parser options
49 | 
50 | * provide an HTMLParser wrapper that handles broken encodings in broken
51 |   HTML better, e.g. using BeautifulSoup's "unicode dammit" analyser
52 | 
53 | * expose namespace prefixes through the QName class
54 | 
55 | 


--------------------------------------------------------------------------------
/LICENSES.txt:
--------------------------------------------------------------------------------
 1 | lxml is copyright Infrae and distributed under the BSD license (see
 2 | doc/licenses/BSD.txt), with the following exceptions:
 3 | 
 4 | Some code, such a selftest.py, selftest2.py and
 5 | src/lxml/_elementpath.py are derived from ElementTree and
 6 | cElementTree. See doc/licenses/elementtree.txt for the license text.
 7 | 
 8 | lxml.cssselect and lxml.html are copyright Ian Bicking and distributed
 9 | under the BSD license (see doc/licenses/BSD.txt).
10 | 
11 | test.py, the test-runner script, is GPL and copyright Shuttleworth
12 | Foundation. See doc/licenses/GPL.txt. It is believed the unchanged
13 | inclusion of test.py to run the unit test suite falls under the
14 | "aggregation" clause of the GPL and thus does not affect the license
15 | of the rest of the package.
16 | 
17 | The doctest.py module is taken from the Python library and falls under
18 | the PSF Python License.
19 | 
20 | The isoschematron implementation uses several XSL and RelaxNG resources:
21 |  * The (XML syntax) RelaxNG schema for schematron, copyright International
22 |    Organization for Standardization (see 
23 |    src/lxml/isoschematron/resources/rng/iso-schematron.rng for the license
24 |    text)
25 |  * The skeleton iso-schematron-xlt1 pure-xslt schematron implementation
26 |    xsl stylesheets, copyright Rick Jelliffe and Academia Sinica Computing
27 |    Center, Taiwan (see the xsl files here for the license text: 
28 |    src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/)
29 |  * The xsd/rng schema schematron extraction xsl transformations are unlicensed
30 |    and copyright the respective authors as noted (see 
31 |    src/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl and
32 |    src/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl)
33 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | exclude *.py
 2 | include setup.py ez_setup.py setupinfo.py versioninfo.py buildlibxml.py
 3 | include test.py selftest.py selftest2.py
 4 | include update-error-constants.py
 5 | include MANIFEST.in Makefile version.txt
 6 | include CHANGES.txt CREDITS.txt INSTALL.txt LICENSES.txt README.rst TODO.txt
 7 | recursive-include src *.pyx *.pxd *.pxi *.py
 8 | recursive-include src/lxml lxml.etree.c lxml.objectify.c
 9 | recursive-include src/lxml lxml.etree.h lxml.etree_api.h etree_defs.h
10 | recursive-include src/lxml/isoschematron *.rng *.xsl *.txt
11 | recursive-include src/lxml/tests *.rng *.xslt *.xml *.dtd *.xsd *.sch *.html *.txt
12 | recursive-include src/lxml/html/tests *.data *.txt
13 | recursive-include samples *.xml
14 | recursive-include benchmark *.py
15 | recursive-include doc *.txt *.html *.css *.xml *.mgp pubkey.asc tagpython*.png Makefile
16 | recursive-include doc/s5/ui *.gif *.htc *.png *.js
17 | recursive-include doc/s5/ep2008 *.py *.png *.rng
18 | recursive-include fake_pyrex *.py
19 | include doc/*.py
20 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | PYTHON?=python
  2 | PYTHON3?=python3
  3 | TESTFLAGS=-p -v
  4 | TESTOPTS=
  5 | SETUPFLAGS=
  6 | LXMLVERSION=`cat version.txt`
  7 | 
  8 | PY2_WITH_CYTHON=$(shell $(PYTHON)  -c 'import Cython.Compiler' >/dev/null 2>/dev/null && echo " --with-cython" || true)
  9 | PY3_WITH_CYTHON=$(shell $(PYTHON3) -c 'import Cython.Compiler' >/dev/null 2>/dev/null && echo " --with-cython" || true)
 10 | 
 11 | all: inplace
 12 | 
 13 | # Build in-place
 14 | inplace:
 15 | 	$(PYTHON) setup.py $(SETUPFLAGS) build_ext -i $(PY2_WITH_CYTHON)
 16 | 
 17 | build:
 18 | 	$(PYTHON) setup.py $(SETUPFLAGS) build $(PY2_WITH_CYTHON)
 19 | 
 20 | test_build: build
 21 | 	$(PYTHON) test.py $(TESTFLAGS) $(TESTOPTS)
 22 | 
 23 | test_inplace: inplace
 24 | 	$(PYTHON) test.py $(TESTFLAGS) $(TESTOPTS)
 25 | 	PYTHONPATH=src:$(PYTHONPATH) $(PYTHON) selftest.py
 26 | 	PYTHONPATH=src:$(PYTHONPATH) $(PYTHON) selftest2.py
 27 | 
 28 | test_inplace3: inplace
 29 | 	$(PYTHON3) setup.py $(SETUPFLAGS) build_ext -i $(PY3_WITH_CYTHON)
 30 | 	$(PYTHON3) test.py $(TESTFLAGS) $(TESTOPTS)
 31 | 	PYTHONPATH=src:$(PYTHONPATH) $(PYTHON3) selftest.py
 32 | 	PYTHONPATH=src:$(PYTHONPATH) $(PYTHON3) selftest2.py
 33 | 
 34 | valgrind_test_inplace: inplace
 35 | 	valgrind --tool=memcheck --leak-check=full --num-callers=30 --suppressions=valgrind-python.supp \
 36 | 		$(PYTHON) test.py
 37 | 
 38 | gdb_test_inplace: inplace
 39 | 	@echo -e "file $(PYTHON)\nrun test.py" > .gdb.command
 40 | 	gdb -x .gdb.command -d src -d src/lxml
 41 | 
 42 | bench_inplace: inplace
 43 | 	$(PYTHON) benchmark/bench_etree.py -i
 44 | 	$(PYTHON) benchmark/bench_xpath.py -i
 45 | 	$(PYTHON) benchmark/bench_xslt.py -i
 46 | 	$(PYTHON) benchmark/bench_objectify.py -i
 47 | 
 48 | ftest_build: build
 49 | 	$(PYTHON) test.py -f $(TESTFLAGS) $(TESTOPTS)
 50 | 
 51 | ftest_inplace: inplace
 52 | 	$(PYTHON) test.py -f $(TESTFLAGS) $(TESTOPTS)
 53 | 
 54 | apihtml: inplace
 55 | 	rm -fr doc/html/api
 56 | 	@[ -x "`which epydoc`" ] \
 57 | 		&& (cd src && echo "Generating API docs ..." && \
 58 | 			PYTHONPATH=. epydoc -v --docformat "restructuredtext en" \
 59 | 			-o ../doc/html/api --exclude='[.]html[.]tests|[.]_' \
 60 | 			--exclude-introspect='[.]usedoctest' \
 61 | 			--name "lxml API" --url / lxml/) \
 62 | 		|| (echo "not generating epydoc API documentation")
 63 | 
 64 | website: inplace
 65 | 	PYTHONPATH=src:$(PYTHONPATH) $(PYTHON) doc/mkhtml.py doc/html . ${LXMLVERSION}
 66 | 
 67 | html: inplace website apihtml s5
 68 | 
 69 | s5:
 70 | 	$(MAKE) -C doc/s5 slides
 71 | 
 72 | apipdf: inplace
 73 | 	rm -fr doc/pdf
 74 | 	mkdir -p doc/pdf
 75 | 	@[ -x "`which epydoc`" ] \
 76 | 		&& (cd src && echo "Generating API docs ..." && \
 77 | 			PYTHONPATH=. epydoc -v --latex --docformat "restructuredtext en" \
 78 | 			-o ../doc/pdf --exclude='([.]html)?[.]tests|[.]_' \
 79 | 			--exclude-introspect='html[.]clean|[.]usedoctest' \
 80 | 			--name "lxml API" --url / lxml/) \
 81 | 		|| (echo "not generating epydoc API documentation")
 82 | 
 83 | pdf: apipdf
 84 | 	$(PYTHON) doc/mklatex.py doc/pdf . ${LXMLVERSION}
 85 | 	(cd doc/pdf && pdflatex lxmldoc.tex \
 86 | 		    && pdflatex lxmldoc.tex \
 87 | 		    && pdflatex lxmldoc.tex)
 88 | 	@pdfopt doc/pdf/lxmldoc.pdf doc/pdf/lxmldoc-${LXMLVERSION}.pdf
 89 | 	@echo "PDF available as doc/pdf/lxmldoc-${LXMLVERSION}.pdf"
 90 | 
 91 | # Two pdflatex runs are needed to build the correct Table of contents.
 92 | 
 93 | test: test_inplace
 94 | 
 95 | test3: test_inplace3
 96 | 
 97 | valtest: valgrind_test_inplace
 98 | 
 99 | gdbtest: gdb_test_inplace
100 | 
101 | bench: bench_inplace
102 | 
103 | ftest: ftest_inplace
104 | 
105 | clean:
106 | 	find . \( -name '*.o' -o -name '*.so' -o -name '*.py[cod]' -o -name '*.dll' \) -exec rm -f {} \;
107 | 	rm -rf build
108 | 
109 | docclean:
110 | 	$(MAKE) -C doc/s5 clean
111 | 	rm -f doc/html/*.html
112 | 	rm -fr doc/html/api
113 | 	rm -fr doc/pdf
114 | 
115 | realclean: clean docclean
116 | 	find . -name '*.c' -exec rm -f {} \;
117 | 	rm -f TAGS
118 | 	$(PYTHON) setup.py clean -a --without-cython
119 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | What is lxml?
 2 | =============
 3 | 
 4 | lxml is the most feature-rich and easy-to-use library for processing XML and HTML in the Python language.
 5 | It's also very fast and memory friendly, just so you know.
 6 | 
 7 | For an introduction and further documentation, see `doc/main.txt`_.
 8 | 
 9 | For installation information, see `INSTALL.txt`_.
10 | 
11 | 
12 | Support the project
13 | -------------------
14 | 
15 | lxml has been downloaded from the `Python Package Index`_ more than
16 | two million times and is also available directly in many package
17 | distributions, e.g. for Linux or MacOS-X.
18 | 
19 | .. _`Python Package Index`: https://pypi.python.org/pypi/lxml
20 | 
21 | Most people who use lxml do so because they like using it.
22 | You can show us that you like it by blogging about your experience
23 | with it and linking to the project website.
24 | 
25 | If you are using lxml for your work and feel like giving a bit of
26 | your own benefit back to support the project, consider sending us
27 | money through PayPal that we can use for fixing bugs in the software
28 | and improving its features and documentation.  Please read the Legal
29 | Notice below, at the bottom of this page.  Thank you for your support.
30 | 
31 | .. class:: center
32 | 
33 |   |Donate|_
34 | 
35 | .. _Donate: https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=R56JE3VCPDA9N
36 | 
37 | Alternatively, if you prefer expressing your appreciation in a monthy
38 | dose of pennies rather than a dedicated donation, you can also use
39 | Flattr to do so.
40 | 
41 | .. class:: center
42 | 
43 |   |FlattrLink|_
44 | 
45 | .. _FlattrLink: https://flattr.com/thing/268156/lxml-The-Python-XML-Toolkit
46 | 
47 | Note that Flattr keeps 10% of the transactions for itself, which is money
48 | you pay that will not reach us.  Do not send any larger amounts through
49 | Flattr.  Use PayPal for donations instead, or `contact Stefan Behnel`_ for
50 | other ways to support the lxml project, as well as commercial consulting,
51 | customisations and trainings on lxml and fast Python XML processing.
52 | 
53 | .. |Donate| image:: https://github.com/lxml/lxml/raw/master/doc/html/paypal_btn_donateCC_LG.gif
54 |             :width: 160
55 |             :height: 47
56 |             :alt: Donate to the lxml project
57 | 
58 | .. |FlattrLink| image:: https://github.com/lxml/lxml/raw/master/doc/html/flattr-badge-large.png
59 |                 :width: 93
60 |                 :height: 20
61 |                 :alt: Flattr the lxml project
62 | 
63 | .. _`contact Stefan Behnel`: http://consulting.behnel.de/
64 | .. _`doc/main.txt`: http://lxml.de/
65 | .. _`INSTALL.txt`: http://lxml.de/installation.html
66 | 
67 | 
68 | Legal Notice for Donations
69 | --------------------------
70 | 
71 | Any donation that you make to the lxml project is voluntary and
72 | is not a fee for any services, goods, or advantages.  By making
73 | a donation to the lxml project, you acknowledge that we have the
74 | right to use the money you donate in any lawful way and for any
75 | lawful purpose we see fit and we are not obligated to disclose
76 | the way and purpose to any party unless required by applicable
77 | law.  Although lxml is free software, to the best of our knowledge
78 | the lxml project does not have any tax exempt status.  The lxml
79 | project is neither a registered non-profit corporation nor a
80 | registered charity in any country.  Your donation may or may not
81 | be tax-deductible; please consult your tax advisor in this matter.
82 | We will not publish or disclose your name and/or e-mail address
83 | without your consent, unless required by applicable law.  Your
84 | donation is non-refundable.
85 | 


--------------------------------------------------------------------------------
/TODO.txt:
--------------------------------------------------------------------------------
 1 | ===============
 2 | ToDo's for lxml
 3 | ===============
 4 | 
 5 | lxml
 6 | ====
 7 | 
 8 | In general
 9 | ----------
10 | 
11 | * more testing on multi-threading
12 | 
13 | * better exception messages for XPath and schemas based on error log,
14 |   e.g. missing namespace mappings in XPath
15 | 
16 | * when building statically, compile everything into one shared library
17 |   instead of one for lxml.etree and one for lxml.objectify to prevent
18 |   the redundant static linking of the library dependencies.
19 | 
20 | * more testing on input/output of encoded filenames, including custom
21 |   resolvers, relative XSLT imports, ...
22 | 
23 | * always use '<string>' as URL when tree was parsed from string? (can libxml2
24 |   handle this?)
25 | 
26 | * follow PEP 8 in API naming (avoidCamelCase in_favour_of_underscores)
27 | 
28 | * use per-call or per-thread error logs in XSLT/XPath/etc. to keep the
29 |   messages separate, especially in exceptions
30 | 
31 | * add 'nsmap' parameter to cleanup_namespaces()
32 | 
33 | 
34 | Entities
35 | --------
36 | 
37 | * clean support for entities (is the Entity element class enough?)
38 | 
39 | 
40 | Objectify
41 | ---------
42 | 
43 | * emulate setting special __attributes__ on ObjectifiedElement's as Python
44 |   attributes, not XML children
45 | 


--------------------------------------------------------------------------------
/benchmark/bench_objectify.py:
--------------------------------------------------------------------------------
  1 | import sys, copy
  2 | from itertools import *
  3 | 
  4 | import benchbase
  5 | from benchbase import (with_attributes, with_text, onlylib,
  6 |                        serialized, children, nochange)
  7 | 
  8 | ############################################################
  9 | # Benchmarks
 10 | ############################################################
 11 | 
 12 | class BenchMark(benchbase.TreeBenchMark):
 13 |     repeat100  = range(100)
 14 |     repeat1000 = range(1000)
 15 |     repeat3000 = range(3000)
 16 | 
 17 |     def __init__(self, lib):
 18 |         from lxml import etree, objectify
 19 |         self.objectify = objectify
 20 |         parser = etree.XMLParser(remove_blank_text=True)
 21 |         lookup = objectify.ObjectifyElementClassLookup()
 22 |         parser.setElementClassLookup(lookup)
 23 |         super(BenchMark, self).__init__(etree, parser)
 24 | 
 25 |     @nochange
 26 |     def bench_attribute(self, root):
 27 |         "1 2 4"
 28 |         for i in self.repeat3000:
 29 |             root.zzzzz
 30 | 
 31 |     def bench_attribute_assign_int(self, root):
 32 |         "1 2 4"
 33 |         for i in self.repeat3000:
 34 |             root.XYZ = 5
 35 | 
 36 |     def bench_attribute_assign_string(self, root):
 37 |         "1 2 4"
 38 |         for i in self.repeat3000:
 39 |             root.XYZ = "5"
 40 | 
 41 |     @nochange
 42 |     def bench_attribute_cached(self, root):
 43 |         "1 2 4"
 44 |         cache = root.zzzzz
 45 |         for i in self.repeat3000:
 46 |             root.zzzzz
 47 | 
 48 |     @nochange
 49 |     def bench_attributes_deep(self, root):
 50 |         "1 2 4"
 51 |         for i in self.repeat3000:
 52 |             root.zzzzz['{cdefg}a00001']
 53 | 
 54 |     @nochange
 55 |     def bench_attributes_deep_cached(self, root):
 56 |         "1 2 4"
 57 |         cache1 = root.zzzzz
 58 |         cache2 = cache1['{cdefg}a00001']
 59 |         for i in self.repeat3000:
 60 |             root.zzzzz['{cdefg}a00001']
 61 | 
 62 |     @nochange
 63 |     def bench_objectpath(self, root):
 64 |         "1 2 4"
 65 |         path = self.objectify.ObjectPath(".zzzzz")
 66 |         for i in self.repeat3000:
 67 |             path(root)
 68 | 
 69 |     @nochange
 70 |     def bench_objectpath_deep(self, root):
 71 |         "1 2 4"
 72 |         path = self.objectify.ObjectPath(".zzzzz.{cdefg}a00001")
 73 |         for i in self.repeat3000:
 74 |             path(root)
 75 | 
 76 |     @nochange
 77 |     def bench_objectpath_deep_cached(self, root):
 78 |         "1 2 4"
 79 |         cache1 = root.zzzzz
 80 |         cache2 = cache1['{cdefg}a00001']
 81 |         path = self.objectify.ObjectPath(".zzzzz.{cdefg}a00001")
 82 |         for i in self.repeat3000:
 83 |             path(root)
 84 | 
 85 |     @with_text(text=True, utext=True, no_text=True)
 86 |     def bench_annotate(self, root):
 87 |         self.objectify.annotate(root)
 88 | 
 89 |     @nochange
 90 |     def bench_descendantpaths(self, root):
 91 |         root.descendantpaths()
 92 | 
 93 |     @nochange
 94 |     @with_text(text=True)
 95 |     def bench_type_inference(self, root):
 96 |         "1 2 4"
 97 |         el = root.aaaaa
 98 |         for i in self.repeat1000:
 99 |             el.getchildren()
100 | 
101 |     @nochange
102 |     @with_text(text=True)
103 |     def bench_type_inference_annotated(self, root):
104 |         "1 2 4"
105 |         el = root.aaaaa
106 |         self.objectify.annotate(el)
107 |         for i in self.repeat1000:
108 |             el.getchildren()
109 | 
110 |     @nochange
111 |     @children
112 |     def bench_elementmaker(self, children):
113 |         E = self.objectify.E
114 |         for child in children:
115 |             root = E.this(
116 |                 "test",
117 |                 E.will(
118 |                     E.do("nothing"),
119 |                     E.special,
120 |                     )
121 |                 )
122 | 
123 | if __name__ == '__main__':
124 |     benchbase.main(BenchMark)
125 | 


--------------------------------------------------------------------------------
/benchmark/bench_xpath.py:
--------------------------------------------------------------------------------
 1 | import sys, copy
 2 | from itertools import *
 3 | 
 4 | import benchbase
 5 | from benchbase import with_attributes, with_text, onlylib, serialized, children, nochange
 6 | 
 7 | ############################################################
 8 | # Benchmarks
 9 | ############################################################
10 | 
11 | class XPathBenchMark(benchbase.TreeBenchMark):
12 |     @nochange
13 |     @onlylib('lxe')
14 |     @children
15 |     def bench_xpath_class(self, children):
16 |         xpath = self.etree.XPath("./*[1]")
17 |         for child in children:
18 |             xpath(child)
19 | 
20 |     @nochange
21 |     @onlylib('lxe')
22 |     @children
23 |     def bench_xpath_class_repeat(self, children):
24 |         for child in children:
25 |             xpath = self.etree.XPath("./*[1]")
26 |             xpath(child)
27 | 
28 |     @nochange
29 |     @onlylib('lxe')
30 |     def bench_xpath_element(self, root):
31 |         xpath = self.etree.XPathElementEvaluator(root)
32 |         for child in root:
33 |             xpath.evaluate("./*[1]")
34 | 
35 |     @nochange
36 |     @onlylib('lxe')
37 |     @children
38 |     def bench_xpath_method(self, children):
39 |         for child in children:
40 |             child.xpath("./*[1]")
41 | 
42 |     @nochange
43 |     @onlylib('lxe')
44 |     @children
45 |     def bench_multiple_xpath_or(self, children):
46 |         xpath = self.etree.XPath(".//p:a00001|.//p:b00001|.//p:c00001",
47 |                                  namespaces={'p':'cdefg'})
48 |         for child in children:
49 |             xpath(child)
50 | 
51 |     @nochange
52 |     @onlylib('lxe')
53 |     @children
54 |     def bench_multiple_iter_tag(self, children):
55 |         for child in children:
56 |             list(child.iter("{cdefg}a00001"))
57 |             list(child.iter("{cdefg}b00001"))
58 |             list(child.iter("{cdefg}c00001"))
59 | 
60 |     @nochange
61 |     @onlylib('lxe')
62 |     @children
63 |     def bench_xpath_old_extensions(self, children):
64 |         def return_child(_, elements):
65 |             if elements:
66 |                 return elements[0][0]
67 |             else:
68 |                 return ()
69 |         extensions = {("test", "child") : return_child}
70 |         xpath = self.etree.XPath("t:child(.)", namespaces={"t":"test"},
71 |                                  extensions=extensions)
72 |         for child in children:
73 |             xpath(child)
74 | 
75 |     @nochange
76 |     @onlylib('lxe')
77 |     @children
78 |     def bench_xpath_extensions(self, children):
79 |         def return_child(_, elements):
80 |             if elements:
81 |                 return elements[0][0]
82 |             else:
83 |                 return ()
84 |         self.etree.FunctionNamespace("testns")["t"] = return_child
85 | 
86 |         try:
87 |             xpath = self.etree.XPath("test:t(.)", namespaces={"test":"testns"})
88 |             for child in children:
89 |                 xpath(child)
90 |         finally:
91 |             del self.etree.FunctionNamespace("testns")["t"]
92 | 
93 | if __name__ == '__main__':
94 |     benchbase.main(XPathBenchMark)
95 | 


--------------------------------------------------------------------------------
/benchmark/bench_xslt.py:
--------------------------------------------------------------------------------
 1 | import sys, copy
 2 | from itertools import *
 3 | 
 4 | import benchbase
 5 | from benchbase import with_attributes, with_text, onlylib, serialized
 6 | 
 7 | ############################################################
 8 | # Benchmarks
 9 | ############################################################
10 | 
11 | class XSLTBenchMark(benchbase.TreeBenchMark):
12 |     @onlylib('lxe')
13 |     def bench_xslt_extensions_old(self, root):
14 |         tree = self.etree.XML("""\
15 | <xsl:stylesheet version="1.0"
16 |    xmlns:l="test"
17 |    xmlns:testns="testns"
18 |    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
19 |   <l:data>TEST</l:data>
20 |   <xsl:template match="/">
21 |     <l:result>
22 |       <xsl:for-each select="*/*">
23 |         <xsl:copy-of select="testns:child(.)"/>
24 |       </xsl:for-each>
25 |     </l:result>
26 |   </xsl:template>
27 | </xsl:stylesheet>
28 | """)
29 |         def return_child(_, elements):
30 |             return elements[0][0]
31 | 
32 |         extensions = {('testns', 'child') : return_child}
33 | 
34 |         transform = self.etree.XSLT(tree, extensions)
35 |         for i in range(10):
36 |             transform(root)
37 | 
38 |     @onlylib('lxe')
39 |     def bench_xslt_document(self, root):
40 |         transform = self.etree.XSLT(self.etree.XML("""\
41 | <xsl:stylesheet version="1.0"
42 |    xmlns:l="test"
43 |    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
44 |   <l:data>TEST</l:data>
45 |   <xsl:template match="/">
46 |     <l:result>
47 |       <xsl:for-each select="*/*">
48 |         <l:test><xsl:copy-of select="document('')//l:data/text()"/></l:test>
49 |       </xsl:for-each>
50 |     </l:result>
51 |   </xsl:template>
52 | </xsl:stylesheet>
53 | """))
54 |         transform(root)
55 | 
56 | if __name__ == '__main__':
57 |     benchbase.main(XSLTBenchMark)
58 | 


--------------------------------------------------------------------------------
/bisect_crashes.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import sys
 4 | import unittest
 5 | 
 6 | # make sure we import test.py from the right place
 7 | script_path = os.path.abspath(os.path.dirname(sys.argv[0]))
 8 | sys.path.insert(0, script_path)
 9 | 
10 | test_base_path = os.path.join(script_path, 'src')
11 | sys.path.insert(1, test_base_path)
12 | 
13 | import test
14 | from DD import DD
15 | 
16 | cfg = test.Options()
17 | cfg.verbosity = 0
18 | cfg.basedir = test_base_path
19 | cfg.unit_tests = True
20 | 
21 | def write(line, *args):
22 |     if args:
23 |         line = line % args
24 |     sys.stderr.write(line + '\n')
25 | 
26 | 
27 | def find_tests():
28 |     test_files = test.get_test_files(cfg)
29 |     return test.get_test_cases(test_files, cfg)
30 | 
31 | class DDTester(DD):
32 |     def _test(self, test_cases):
33 |         if not test_cases:
34 |             return self.PASS
35 |         write('Running subset of %d tests %s',
36 |               len(test_cases), self.coerce(test_cases))
37 |         test_cases = [ item[-1] for item in test_cases ]
38 |         pid = os.fork()
39 |         if not pid:
40 |             # child executes tests
41 |             runner = test.CustomTestRunner(cfg, None)
42 |             suite = unittest.TestSuite()
43 |             suite.addTests(test_cases)
44 |             os._exit( not runner.run(suite).wasSuccessful() )
45 |         cid, retval = os.waitpid(pid, 0)
46 |         if retval:
47 |             write('exit status: %d, signal: %d', retval >> 8, retval % 0xFF)
48 |         if (retval % 0xFF) > 2: # signal received?
49 |             return self.FAIL
50 |         return self.PASS
51 | 
52 |     def coerce(self, test_cases):
53 |         if not test_cases:
54 |             return '[]'
55 |         test_cases = [ item[-1] for item in test_cases ]
56 |         return '[%s .. %s]' % (test_cases[0].id(), test_cases[-1].id())
57 | 
58 | def dd_tests():
59 |     tests = find_tests()
60 |     write('Found %d tests', len(tests))
61 |     dd = DDTester()
62 |     min_tests = dd.ddmin( list(enumerate(tests)) )
63 |     return [ item[-1] for item in min_tests ]
64 | 
65 | if __name__ == '__main__':
66 |     write('Failing tests:\n%s', '\n'.join([test.id() for test in dd_tests()]))
67 | 


--------------------------------------------------------------------------------
/doc/capi.txt:
--------------------------------------------------------------------------------
 1 | ==============================
 2 | The public C-API of lxml.etree
 3 | ==============================
 4 | 
 5 | As of version 1.1, lxml.etree provides a public C-API.  This allows external
 6 | C extensions to efficiently access public functions and classes of lxml,
 7 | without going through the Python API.
 8 | 
 9 | The API is described in the file `etreepublic.pxd`_, which is directly
10 | c-importable by extension modules implemented in Pyrex_ or Cython_.
11 | 
12 | .. _`etreepublic.pxd`: https://github.com/lxml/lxml/blob/master/src/lxml/include/etreepublic.pxd
13 | .. _Cython: http://cython.org
14 | .. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/
15 | 
16 | .. contents::
17 | ..
18 |    1  Writing external modules in Cython
19 |    2  Writing external modules in C
20 | 
21 | 
22 | Writing external modules in Cython
23 | ----------------------------------
24 | 
25 | This is the easiest way of extending lxml at the C level.  A Cython_
26 | (or Pyrex_) module should start like this::
27 | 
28 |     # My Cython extension
29 | 
30 |     # import the public functions and classes of lxml.etree
31 |     cimport etreepublic as cetree
32 | 
33 |     # import the lxml.etree module in Python
34 |     cdef object etree
35 |     from lxml import etree
36 | 
37 |     # initialize the access to the C-API of lxml.etree
38 |     cetree.import_lxml__etree()
39 | 
40 | From this line on, you can access all public functions of lxml.etree
41 | from the ``cetree`` namespace like this::
42 | 
43 |     # build a tag name from namespace and element name
44 |     py_tag = cetree.namespacedNameFromNsName("http://some/url", "myelement")
45 | 
46 | Public lxml classes are easily subclassed.  For example, to implement
47 | and set a new default element class, you can write Cython code like
48 | the following::
49 | 
50 |     from etreepublic cimport ElementBase
51 |     cdef class NewElementClass(ElementBase):
52 |          def set_value(self, myval):
53 |              self.set("my_attribute", myval)
54 | 
55 |     etree.set_element_class_lookup(
56 |          etree.DefaultElementClassLookup(element=NewElementClass))
57 | 
58 | 
59 | Writing external modules in C
60 | -----------------------------
61 | 
62 | If you really feel like it, you can also interface with lxml.etree straight
63 | from C code.  All you have to do is include the header file for the public
64 | API, import the ``lxml.etree`` module and then call the import function:
65 | 
66 | .. sourcecode:: c
67 | 
68 |     /* My C extension */
69 | 
70 |     /* common includes */
71 |     #include "Python.h"
72 |     #include "stdio.h"
73 |     #include "string.h"
74 |     #include "stdarg.h"
75 |     #include "libxml/xmlversion.h"
76 |     #include "libxml/encoding.h"
77 |     #include "libxml/hash.h"
78 |     #include "libxml/tree.h"
79 |     #include "libxml/xmlIO.h"
80 |     #include "libxml/xmlsave.h"
81 |     #include "libxml/globals.h"
82 |     #include "libxml/xmlstring.h"
83 | 
84 |     /* lxml.etree specific includes */
85 |     #include "lxml-version.h"
86 |     #include "etree_defs.h"
87 |     #include "etree.h"
88 | 
89 |     /* setup code */
90 |     import_lxml__etree()
91 | 
92 | Note that including ``etree.h`` does not automatically include the
93 | header files it requires.  Note also that the above list of common
94 | includes may not be sufficient.
95 | 


--------------------------------------------------------------------------------
/doc/cssselect.txt:
--------------------------------------------------------------------------------
  1 | ==============
  2 | lxml.cssselect
  3 | ==============
  4 | 
  5 | lxml supports a number of interesting languages for tree traversal and element
  6 | selection.  The most important is obviously XPath_, but there is also
  7 | ObjectPath_ in the `lxml.objectify`_ module.  The newest child of this family
  8 | is `CSS selection`_, which is made available in form of the ``lxml.cssselect``
  9 | module.
 10 | 
 11 | Although it started its life in lxml, cssselect_ is now an independent project.
 12 | It translates CSS selectors to XPath 1.0 expressions that can be used with
 13 | lxml's XPath engine.  ``lxml.cssselect`` adds a few convenience shortcuts into
 14 | that package.
 15 | 
 16 | 
 17 | .. _XPath: xpathxslt.html#xpath
 18 | .. _ObjectPath: objectify.html#objectpath
 19 | .. _`lxml.objectify`: objectify.html
 20 | .. _`CSS selection`: http://www.w3.org/TR/CSS21/selector.html
 21 | .. _cssselect: http://packages.python.org/cssselect/
 22 | 
 23 | .. contents::
 24 | ..
 25 |    1  The CSSSelector class
 26 |    2  CSS Selectors
 27 |      2.1  Namespaces
 28 |    3  Limitations
 29 | 
 30 | 
 31 | The CSSSelector class
 32 | =====================
 33 | 
 34 | The most important class in the ``lxml.cssselect`` module is ``CSSSelector``.  It
 35 | provides the same interface as the XPath_ class, but accepts a CSS selector
 36 | expression as input:
 37 | 
 38 | .. sourcecode:: pycon
 39 | 
 40 |     >>> from lxml.cssselect import CSSSelector
 41 |     >>> sel = CSSSelector('div.content')
 42 |     >>> sel  #doctest: +ELLIPSIS
 43 |     <CSSSelector ... for 'div.content'>
 44 |     >>> sel.css
 45 |     'div.content'
 46 | 
 47 | The selector actually compiles to XPath, and you can see the
 48 | expression by inspecting the object:
 49 | 
 50 | .. sourcecode:: pycon
 51 | 
 52 |     >>> sel.path
 53 |     "descendant-or-self::div[@class and contains(concat(' ', normalize-space(@class), ' '), ' content ')]"
 54 | 
 55 | To use the selector, simply call it with a document or element
 56 | object:
 57 | 
 58 | .. sourcecode:: pycon
 59 | 
 60 |     >>> from lxml.etree import fromstring
 61 |     >>> h = fromstring('''<div id="outer">
 62 |     ...   <div id="inner" class="content body">
 63 |     ...       text
 64 |     ...   </div></div>''')
 65 |     >>> [e.get('id') for e in sel(h)]
 66 |     ['inner']
 67 | 
 68 | Using ``CSSSelector`` is equivalent to translating with ``cssselect``
 69 | and using the ``XPath`` class:
 70 | 
 71 | .. sourcecode:: pycon
 72 | 
 73 |     >>> from cssselect import GenericTranslator
 74 |     >>> from lxml.etree import XPath
 75 |     >>> sel = XPath(GenericTranslator().css_to_xpath('div.content'))
 76 | 
 77 | ``CSSSelector`` takes a ``translator`` parameter to let you choose which
 78 | translator to use. It can be ``'xml'`` (the default), ``'xhtml'``, ``'html'``
 79 | or a `Translator object`_.
 80 | 
 81 | .. _Translator object: http://packages.python.org/cssselect/#cssselect.GenericTranslator
 82 | 
 83 | 
 84 | The cssselect method
 85 | ====================
 86 | 
 87 | lxml ``Element`` objects have a ``cssselect`` convenience method.
 88 | 
 89 | .. sourcecode:: pycon
 90 | 
 91 |     >>> h.cssselect('div.content') == sel(h)
 92 |     True
 93 | 
 94 | Note however that pre-compiling the expression with the ``CSSSelector`` or
 95 | ``XPath`` class can provide a substantial speedup.
 96 | 
 97 | The method also accepts a ``translator`` parameter. On ``HtmlElement``
 98 | objects, the default is changed to ``'html'``.
 99 | 
100 | 
101 | Supported Selectors
102 | ===================
103 | 
104 | Most `Level 3`_ selectors are supported. The details are in the
105 | `cssselect documentation`_.
106 | 
107 | .. _Level 3: http://www.w3.org/TR/2011/REC-css3-selectors-20110929/
108 | .. _cssselect documentation: http://packages.python.org/cssselect/#supported-selectors
109 | 
110 | 
111 | Namespaces
112 | ==========
113 | 
114 | In CSS you can use ``namespace-prefix|element``, similar to
115 | ``namespace-prefix:element`` in an XPath expression.  In fact, it maps
116 | one-to-one, and the same rules are used to map namespace prefixes to
117 | namespace URIs: the ``CSSSelector`` class accepts a dictionary as its
118 | ``namespaces`` argument.
119 | 


--------------------------------------------------------------------------------
/doc/docstructure.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | 
 4 | if os.path.exists(os.path.join(os.path.dirname(__file__), '..', 'funding.txt')):
 5 |     funding = ('../funding.txt',)
 6 | else:
 7 |     funding = ()
 8 | 
 9 | SITE_STRUCTURE = [
10 |     ('lxml', ('main.txt', 'intro.txt', '../INSTALL.txt', # 'lxml2.txt',
11 |               'performance.txt', 'compatibility.txt', 'FAQ.txt') + funding),
12 |     ('Developing with lxml', ('tutorial.txt', '@API reference',
13 |                               'api.txt', 'parsing.txt',
14 |                               'validation.txt', 'xpathxslt.txt',
15 |                               'objectify.txt', 'lxmlhtml.txt',
16 |                               'cssselect.txt', 'elementsoup.txt',
17 |                               'html5parser.txt')),
18 |     ('Extending lxml', ('resolvers.txt', 'extensions.txt',
19 |                         'element_classes.txt', 'sax.txt', 'capi.txt')),
20 |     ('Developing lxml', ('build.txt', 'lxml-source-howto.txt',
21 |                          '@Release Changelog', '../CREDITS.txt')),
22 |     ]
23 | 
24 | HREF_MAP = {
25 |     "API reference" : "api/index.html"
26 | }
27 | 
28 | BASENAME_MAP = {
29 |     'main' : 'index',
30 |     'INSTALL' : 'installation',
31 |     'CREDITS' : 'credits',
32 | }
33 | 


--------------------------------------------------------------------------------
/doc/html/flattr-badge-large.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/doc/html/flattr-badge-large.png


--------------------------------------------------------------------------------
/doc/html/paypal_btn_donateCC_LG.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/doc/html/paypal_btn_donateCC_LG.gif


--------------------------------------------------------------------------------
/doc/html/proxies.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/doc/html/proxies.png


--------------------------------------------------------------------------------
/doc/html/python-xml-title.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/doc/html/python-xml-title.png


--------------------------------------------------------------------------------
/doc/html/python-xml.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/doc/html/python-xml.png


--------------------------------------------------------------------------------
/doc/html/tagpython-big.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/doc/html/tagpython-big.png


--------------------------------------------------------------------------------
/doc/html5parser.txt:
--------------------------------------------------------------------------------
 1 | ===============
 2 | html5lib Parser
 3 | ===============
 4 | 
 5 | `html5lib`_ is a Python package that implements the HTML5 parsing algorithm
 6 | which is heavily influenced by current browsers and based on the `WHATWG
 7 | HTML5 specification`_.
 8 | 
 9 | .. _html5lib: http://code.google.com/p/html5lib/
10 | .. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/
11 | .. _WHATWG HTML5 specification: http://www.whatwg.org/specs/web-apps/current-work/
12 | 
13 | lxml can benefit from the parsing capabilities of `html5lib` through
14 | the ``lxml.html.html5parser`` module.  It provides a similar interface
15 | to the ``lxml.html`` module by providing ``fromstring()``,
16 | ``parse()``, ``document_fromstring()``, ``fragment_fromstring()`` and
17 | ``fragments_fromstring()`` that work like the regular html parsing
18 | functions.
19 | 
20 | 
21 | Differences to regular HTML parsing
22 | ===================================
23 | 
24 | There are a few differences in the returned tree to the regular HTML
25 | parsing functions from ``lxml.html``.  html5lib normalizes some elements
26 | and element structures to a common format.  For example even if a tables
27 | does not have a `tbody` html5lib will inject one automatically:
28 | 
29 | .. sourcecode:: pycon
30 | 
31 |     >>> from lxml.html import tostring, html5parser
32 |     >>> tostring(html5parser.fromstring("<table><td>foo"))
33 |     '<table><tbody><tr><td>foo</td></tr></tbody></table>'
34 | 
35 | Also the parameters the functions accept are different.
36 | 
37 | 
38 | Function Reference
39 | ==================
40 | 
41 | ``parse(filename_url_or_file)``:
42 |     Parses the named file or url, or if the object has a ``.read()``
43 |     method, parses from that.
44 | 
45 | ``document_fromstring(html, guess_charset=True)``:
46 |     Parses a document from the given string.  This always creates a
47 |     correct HTML document, which means the parent node is ``<html>``,
48 |     and there is a body and possibly a head.
49 | 
50 |     If a bytestring is passed and ``guess_charset`` is true the chardet
51 |     library (if installed) will guess the charset if ambiguities exist.
52 | 
53 | ``fragment_fromstring(string, create_parent=False, guess_charset=False)``:
54 |     Returns an HTML fragment from a string.  The fragment must contain
55 |     just a single element, unless ``create_parent`` is given;
56 |     e.g,. ``fragment_fromstring(string, create_parent='div')`` will
57 |     wrap the element in a ``<div>``.  If ``create_parent`` is true the
58 |     default parent tag (div) is used.
59 | 
60 |     If a bytestring is passed and ``guess_charset`` is true the chardet
61 |     library (if installed) will guess the charset if ambiguities exist.
62 | 
63 | ``fragments_fromstring(string, no_leading_text=False, parser=None)``:
64 |     Returns a list of the elements found in the fragment.  The first item in
65 |     the list may be a string.  If ``no_leading_text`` is true, then it will
66 |     be an error if there is leading text, and it will always be a list of
67 |     only elements.
68 | 
69 |     If a bytestring is passed and ``guess_charset`` is true the chardet
70 |     library (if installed) will guess the charset if ambiguities exist.
71 | 
72 | ``fromstring(string)``:
73 |     Returns ``document_fromstring`` or ``fragment_fromstring``, based
74 |     on whether the string looks like a full document, or just a
75 |     fragment.
76 | 
77 | Additionally all parsing functions accept an ``parser`` keyword argument
78 | that can be set to a custom parser instance.  To create custom parsers
79 | you can subclass the ``HTMLParser`` and ``XHTMLParser`` from the same
80 | module.  Note that these are the parser classes provided by html5lib.
81 | 


--------------------------------------------------------------------------------
/doc/intro.txt:
--------------------------------------------------------------------------------
 1 | Why lxml?
 2 | =========
 3 | 
 4 | .. contents::
 5 | ..
 6 |    1  Motto
 7 |    2  Aims
 8 | 
 9 | 
10 | Motto
11 | -----
12 | 
13 | "the thrills without the strangeness"
14 | 
15 | To explain the motto:
16 | 
17 | "Programming with libxml2 is like the thrilling embrace of an exotic stranger.
18 | It seems to have the potential to fulfill your wildest dreams, but there's a
19 | nagging voice somewhere in your head warning you that you're about to get
20 | screwed in the worst way."  (`a quote by Mark Pilgrim`_)
21 | 
22 | Mark Pilgrim was describing in particular the experience a Python programmer
23 | has when dealing with libxml2.  The default Python bindings of libxml2 are
24 | fast, thrilling, powerful, and your code might fail in some horrible way that
25 | you really shouldn't have to worry about when writing Python code.  lxml
26 | combines the power of libxml2 with the ease of use of Python.
27 | 
28 | .. _`a quote by Mark Pilgrim`: http://diveintomark.org/archives/2004/02/18/libxml2
29 | 
30 | 
31 | Aims
32 | ----
33 | 
34 | The C libraries libxml2_ and libxslt_ have huge benefits:
35 | 
36 | * Standards-compliant XML support.
37 | 
38 | * Support for (broken) HTML.
39 | 
40 | * Full-featured.
41 | 
42 | * Actively maintained by XML experts.
43 | 
44 | * fast. fast! FAST!
45 | 
46 | .. _libxml2: http://www.xmlsoft.org
47 | 
48 | .. _libxslt: http://xmlsoft.org/XSLT
49 | 
50 | 
51 | These libraries already ship with Python bindings, but these Python bindings
52 | mimic the C-level interface.  This yields a number of problems:
53 | 
54 | * very low level and C-ish (not Pythonic).
55 | 
56 | * underdocumented and huge, you get lost in them.
57 | 
58 | * UTF-8 in API, instead of Python unicode strings.
59 | 
60 | * Can easily cause segfaults from Python.
61 | 
62 | * Require manual memory management!
63 | 
64 | 
65 | lxml is a new Python binding for libxml2 and libxslt, completely independent
66 | from these existing Python bindings.  Its aims:
67 | 
68 | * Pythonic API.
69 | 
70 | * Documented.
71 | 
72 | * Use Python unicode strings in API.
73 | 
74 | * Safe (no segfaults).
75 | 
76 | * No manual memory management!
77 | 
78 | lxml aims to provide a Pythonic API by following as much as possible the
79 | `ElementTree API`_.  We're trying to avoid inventing too many new APIs, or you
80 | having to learn new things -- XML is complicated enough.
81 | 
82 | .. _`ElementTree API`: http://effbot.org/zone/element-index.htm
83 | 


--------------------------------------------------------------------------------
/doc/licenses/BSD.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2004 Infrae. All rights reserved.
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 |   1. Redistributions of source code must retain the above copyright
 8 |      notice, this list of conditions and the following disclaimer.
 9 |    
10 |   2. Redistributions in binary form must reproduce the above copyright
11 |      notice, this list of conditions and the following disclaimer in
12 |      the documentation and/or other materials provided with the
13 |      distribution.
14 | 
15 |   3. Neither the name of Infrae nor the names of its contributors may
16 |      be used to endorse or promote products derived from this software
17 |      without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INFRAE OR
23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
26 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
27 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/doc/licenses/ZopePublicLicense.txt:
--------------------------------------------------------------------------------
 1 | Zope Public License (ZPL) Version 2.0
 2 | -----------------------------------------------
 3 | 
 4 | This software is Copyright (c) Zope Corporation (tm) and
 5 | Contributors. All rights reserved.
 6 | 
 7 | This license has been certified as open source. It has also
 8 | been designated as GPL compatible by the Free Software
 9 | Foundation (FSF).
10 | 
11 | Redistribution and use in source and binary forms, with or
12 | without modification, are permitted provided that the
13 | following conditions are met:
14 | 
15 | 1. Redistributions in source code must retain the above
16 |    copyright notice, this list of conditions, and the following
17 |    disclaimer.
18 | 
19 | 2. Redistributions in binary form must reproduce the above
20 |    copyright notice, this list of conditions, and the following
21 |    disclaimer in the documentation and/or other materials
22 |    provided with the distribution.
23 | 
24 | 3. The name Zope Corporation (tm) must not be used to
25 |    endorse or promote products derived from this software
26 |    without prior written permission from Zope Corporation.
27 | 
28 | 4. The right to distribute this software or to use it for
29 |    any purpose does not give you the right to use Servicemarks
30 |    (sm) or Trademarks (tm) of Zope Corporation. Use of them is
31 |    covered in a separate agreement (see
32 |    http://www.zope.com/Marks).
33 | 
34 | 5. If any files are modified, you must cause the modified
35 |    files to carry prominent notices stating that you changed
36 |    the files and the date of any change.
37 | 
38 | Disclaimer
39 | 
40 |   THIS SOFTWARE IS PROVIDED BY ZOPE CORPORATION ``AS IS''
41 |   AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
42 |   NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
43 |   AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
44 |   NO EVENT SHALL ZOPE CORPORATION OR ITS CONTRIBUTORS BE
45 |   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
46 |   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 |   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
48 |   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 |   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
50 |   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
51 |   OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
52 |   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
53 |   DAMAGE.
54 | 
55 | 
56 | This software consists of contributions made by Zope
57 | Corporation and many individuals on behalf of Zope
58 | Corporation.  Specific attributions are listed in the
59 | accompanying credits file.
60 | 


--------------------------------------------------------------------------------
/doc/licenses/elementtree.txt:
--------------------------------------------------------------------------------
 1 | The ElementTree / XML Toys Library is
 2 | 
 3 | Copyright (c) 1999-2003 by Secret Labs AB
 4 | Copyright (c) 1999-2003 by Fredrik Lundh
 5 | 
 6 | By obtaining, using, and/or copying this software and/or its
 7 | associated documentation, you agree that you have read, understood,
 8 | and will comply with the following terms and conditions:
 9 | 
10 | Permission to use, copy, modify, and distribute this software and its
11 | associated documentation for any purpose and without fee is hereby
12 | granted, provided that the above copyright notice appears in all
13 | copies, and that both that copyright notice and this permission notice
14 | appear in supporting documentation, and that the name of Secret Labs
15 | AB or the author not be used in advertising or publicity pertaining to
16 | distribution of the software without specific, written prior
17 | permission.
18 | 
19 | SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
20 | THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 | FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
22 | ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
23 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
24 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
25 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
26 | 


--------------------------------------------------------------------------------
/doc/lxml.mgp:
--------------------------------------------------------------------------------
  1 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | %deffont "standard" xfont "helvetica-medium-r"
  3 | %deffont "thick" xfont "helvetica-bold-r"
  4 | %deffont "typewriter" xfont "courier-medium-r"
  5 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  6 | %%
  7 | %% Default settings per each line numbers.
  8 | %%
  9 | %default 1 area 90 90, leftfill, size 2, fore "gray20", back "white", font "standard", hgap 0
 10 | %default 2 size 7, vgap 10, prefix " ", ccolor "blue"
 11 | %default 3 size 2, bar "gray70", vgap 10
 12 | %default 4 size 5, fore "gray20", vgap 30, prefix " ", font "standard"
 13 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 14 | %%
 15 | %% Default settings that are applied to TAB-indented lines.
 16 | %%
 17 | %tab 1 size 5, vgap 40, prefix "  ", icon box "red" 50
 18 | %tab 2 size 4, vgap 40, prefix "      ", icon arc "yellow" 50
 19 | %tab 3 size 3, vgap 40, prefix "            ", icon delta3 "white" 40
 20 | %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 21 | %page
 22 | 
 23 | lxml - a sane Python wrapper for libxml
 24 | 
 25 | 
 26 | 
 27 | %center
 28 | Martijn Faassen, Infrae
 29 | faassen@infrae.com
 30 | 
 31 | %page
 32 | 
 33 | The C library libxml has huge benefits
 34 | 
 35 | 
 36 | 	Standards-compliant XML support
 37 | 
 38 | 	full-featured
 39 | 
 40 | 	actively maintained by XML exports
 41 | 
 42 | 	fast. fast! FAST!
 43 | 
 44 | %page
 45 | 
 46 | Features of libxml
 47 | 
 48 | 
 49 | 	Parsing
 50 | 
 51 | 	Tree based (DOM-ish) XML structure
 52 | 
 53 | 	XPath support
 54 | 
 55 | 	XSLT support (libxslt)
 56 | 
 57 | 	Relax NG (schema) support
 58 | 
 59 | 	And more
 60 | 
 61 | %page
 62 | 
 63 | But libxml already has Python bindings!
 64 | 
 65 | 
 66 | 	very low level and C-ish (not Pythonic)
 67 | 
 68 | 	underdocumented. huge, you get lost in them
 69 | 
 70 | 	works with UTF-8, not native Python unicode
 71 | 
 72 | 	can cause segfaults from Python
 73 | 
 74 | 	have to do manual memory management!
 75 | 
 76 | %page
 77 | 
 78 | lxml is a new Python binding for libxml
 79 | 
 80 | Aims (read: TODOS)
 81 | 
 82 | 	Pythonic API
 83 | 
 84 | 	Documented
 85 | 
 86 | 	Use Python unicode strings in API
 87 | 
 88 | 	Safe (no segfaults)
 89 | 
 90 | 	No manual memory management!
 91 | 
 92 | %page
 93 | 
 94 | Tradeoffs
 95 | 
 96 | 
 97 | 	Slower because of better wrapping.
 98 | 
 99 | 	But libxml is so fast this likely doesn't matter much.
100 | 
101 | 	Not all features of libxml exposed (unless you help)
102 | 
103 | %page
104 | 
105 | What is there now - Proof of concept
106 | 
107 | 
108 | 	Automatic destruction of documents (refcounted)
109 | 
110 | 	Start of ElementTree style API for tree
111 | 
112 | %page
113 | 
114 | Future
115 | 
116 | 
117 | 	Fix bugs, add features
118 | 
119 | 	Moving into svn repository on codespeak.net
120 | 
121 | 	Help!
122 | 
123 | 


--------------------------------------------------------------------------------
/doc/memorymanagement.txt:
--------------------------------------------------------------------------------
 1 | Memory management
 2 | =================
 3 | 
 4 | There can be two types of nodes:
 5 | 
 6 | * those connected to an existing tree
 7 | 
 8 | * those unconnected. These may be the top node of a tree
 9 | 
10 | Nodes consist of a C-level libxml2 node, Node for short, and
11 | optionally a Python-level proxy node, Proxy. Zero, one or more Proxies can
12 | exist for a single Node.
13 | 
14 | Proxies are garbage collected automatically by Python. Nodes are not
15 | garbage collected at all. Instead, explicit mechanisms exist for
16 | Nodes to clear them and the tree they may be the top of.
17 | 
18 | A Node can be safely freed when:
19 | 
20 | * no Proxy is connected to this Node
21 | 
22 | * no Proxy cannot be created for this Node
23 | 
24 | A Proxy cannot be created to a CNode when:
25 | 
26 | * no Proxy exist for nodes that are connected to that Node
27 | 
28 | This is the case when:
29 | 
30 | * the Node is in a tree that has no Proxy connected to any of the nodes.
31 | 
32 | This means that the whole tree in such a condition can be freed.
33 | 
34 | Detecting whether a Node is in a tree thas has no Proxies connected to
35 | it can be done by relying on Python's garbage collection
36 | algorithm. Each Proxy can have a reference to the Proxy that points to
37 | the top of the tree. In case of a document tree, this reference is to
38 | the Document Proxy. When no more references exist in the system to the
39 | top Proxy, this means no more Proxies exist that point to the Node
40 | tree the top Proxy is the top of. If this Node tree is unconnected;
41 | i.e. it is not a subtree, this means that tree can be safely garbage
42 | collected.
43 | 
44 | A special case exists for document references. Each Proxy will always
45 | have a reference to the Document Proxy, as any Node will have such a
46 | reference to the Document Node. This means that a Document Node can
47 | only be garbage collected when no more Proxies at all exist anymore
48 | which refer to the Document. This is a separate system from the
49 | top-Node references, even though the top-node in many cases will be
50 | the Document. This because there is no way to get to a node that is
51 | not connected to the Document tree from a Document Proxy.
52 | 
53 | This approach requires a system that can keep track of the top of the
54 | tree in any case. Usually this is simple: when a Proxy gets connected,
55 | the tree top becomes the tree top of whatever node it is connected
56 | to. 
57 | 
58 | Sometimes this is more difficult: a Proxy may exist pointing to a node
59 | in a subtree that just got connected. The top reference cannot be
60 | updated. This is a problem in the following case:
61 | 
62 |     a
63 |   b    c         h
64 | d  e  f  g     i  j
65 |               k
66 | 
67 | now imagine we have a proxy to k, K, and a proxy of i, I. They both
68 | have a pointer to proxy H.
69 | 
70 | Now imagine i gets moved under g through proxy I. Proxy I will have an
71 | updated pointer to proxy A. However, proxy K cannot be updated and still
72 | points to H, from which it is now in fact disconnected.
73 | 
74 | proxy H cannot be removed now until proxy A is removed. In addition,
75 | proxy A has a refcount that is too low because proxy K doesn't point
76 | to it but should.
77 | 
78 | Another strategy involves having a reference count on the underlying
79 | nodes, one per proxy. A node can only be freed if there is no
80 | descendant-or-self that has the refcount higher than 0. A node, when
81 | no more Python references to it exist, will check for refcounts first.
82 | The drawback of this is potentially heavy tree-walking each time a proxy
83 | can be removed.
84 | 


--------------------------------------------------------------------------------
/doc/pubkey.asc:
--------------------------------------------------------------------------------
 1 | -----BEGIN PGP PUBLIC KEY BLOCK-----
 2 | Version: GnuPG v1.4.2 (GNU/Linux)
 3 | 
 4 | mQGiBEQf3JQRBACciSqxoX0q3VurkRENVVtG/pVqtFh/d2CohbVJlLCrO4s7nnPj
 5 | CTfZFt6tmykZjsLJl24XpEJt0O/C0jLcaBqvXVgVvRXHz4DjEYYuQF4LPthhI4MA
 6 | 4T7ExptX4lU5g3BVJ46vPU8uRBbbxarBRas9rYewgnrYKWpZZCa7yMq+9wCgnyyR
 7 | Si4E3viLwi77jda135nA6vcD/iqu8zIl9/dFuUcOvxJrhrm+UdY72puZ1TVczSAH
 8 | GOqMjrKkfyHlaJh/ZzWENpTZIfOdVhy7Chvva18vH4Wz7jKj5UeIpRrBvjAD28r3
 9 | Y3W5bfsnpPkvDOyU1vqBsw4q+/250GXEX0JqV2Rbf5yLVgEZPdGrswO460dr4UVS
10 | 8RS0BACYTmyrz57AugHc5tRkqNw6o7ux2deOT0c3AbUcOWtOocGumCsUf+M1nOrc
11 | VWkeBWTv4HIIiecWYY/KwIemTthQGjxywaZDxOlBT0BOL/+vfYTq/plZULXr+g90
12 | rSe82+kLl9N5onkBDJKeDIcJDzRoxIRPV1i0Om/5JBI4jmUnv7QnU3RlZmFuIEJl
13 | aG5lbCA8c2NvZGVyQHVzZXJzLmJlcmxpb3MuZGU+iF8EExECACAFAkQiqKYCGwMG
14 | CwkIBwMCBBUCCAMEFgIDAQIeAQIXgAAKCRANPVNpCNOgHi+2AJ0a0JH8iP3RqrOL
15 | JefvHz1dSl3MxACYo7Ma6CeIgsGnyaSSdNOmNVXn+IhGBBARAgAGBQJEIqk0AAoJ
16 | ELO5mMzzmgZbmCcAoKZ2En1IlsxBpaPPxgWYrUOWfc6hAKCBWODMMOYptCBkSrjg
17 | m3gsrjHgYbQsU3RlZmFuIEJlaG5lbCA8c2NvZGVyQHVzZXJzLnNvdXJjZWZvcmdl
18 | Lm5ldD6IYAQTEQIAIAUCRB/clAIbAwYLCQgHAwIEFQIIAwQWAgMBAh4BAheAAAoJ
19 | EA09U2kI06Aen2YAn0hvuDs+Gslq9vPRFFbsFNJI40PmAJ0chjiiEy0xV5C+n6YX
20 | XFuldRDILYhGBBARAgAGBQJEIp4AAAoJELO5mMzzmgZbgKQAn3pWrmFdj8YaEyuR
21 | tEjKVZJDQ6ZVAJ0Y1igwADT40BPra+G/xiLa3YbCrrkCDQREH9ynEAgAiR4/0r0d
22 | doViNECfSLClllu5K0Bo1SEiMtvVNC3sJYgVzBddD8Xn8UAdjyAgmaL5FC2FsNQu
23 | RxxKkNlHNYCq8ZSWtZaL2MQ+SyMUyHv6VXVCGuSW0COpzbx58u+SZpjyESJ1kaZc
24 | 73SaIw6kv/dVQHjurwmlo1lg3dLZ3PG08WGCYUMqkkv2K+J7+puzE2Cjo31gTq4s
25 | LYDCV26wjVQ6BqT2EcHQhVEjh0xq5ugc908cr/2FQAKkTifEbF+OVBGWiFMGgri+
26 | 6+G54/BV/RakpvNCFYBiZHn/M9mQaWt7XoTmnEQ1ldq5KNlRhkqnQRF/NK5VpGcQ
27 | 29As28aqpZTECwADBgf/WlRvBRI1Q1eIv2falEv7C6sOxqc3kr5z1uUBTRG5v9t6
28 | ff9k/J4oC6cnQx00GK3ZR8ija6bl8zwu+0m0M3rW49Krb1rsiT7r4ahOZ7p9RRro
29 | oG3NbUJYgMG10D1nxpaioYqa/m+PpILJM0wfYZZEuX0xkZcOB24yb+J7EIcGR09T
30 | mMd5sXtdTU+w/p7Xi2cP61uQ8qixyHBH8E06qgW2JtVFV9rGn7CNUOvkNaUBRnY5
31 | QxhdkvKJRx7voOLYWZFUBIWgto+6vmTgKmc2Ho6qddzME9UgwUNcknRgm0cf6Cxr
32 | 6zPtxZl8a6KemjQcK7kARSmMNCDkqp/Pohe519A5vYhJBBgRAgAJBQJEH9ynAhsM
33 | AAoJEA09U2kI06Aesv4AnjiVQVLzqnNS/64vvMMP1UARY3HtAJ90YxNGhRNIhWYL
34 | UU16oJlGD/9M1Q==
35 | =gWy2
36 | -----END PGP PUBLIC KEY BLOCK-----
37 | 


--------------------------------------------------------------------------------
/doc/rest2html.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | """
 4 | A minimal front end to the Docutils Publisher, producing HTML with
 5 | Pygments syntax highlighting.
 6 | """
 7 | 
 8 | # Set to True if you want inline CSS styles instead of classes
 9 | INLINESTYLES = False
10 | 
11 | 
12 | try:
13 |     import locale
14 |     locale.setlocale(locale.LC_ALL, '')
15 | except:
16 |     pass
17 | 
18 | # set up Pygments
19 | 
20 | from pygments.formatters import HtmlFormatter
21 | 
22 | # The default formatter
23 | DEFAULT = HtmlFormatter(noclasses=INLINESTYLES, cssclass='syntax')
24 | 
25 | # Add name -> formatter pairs for every variant you want to use
26 | VARIANTS = {
27 |     # 'linenos': HtmlFormatter(noclasses=INLINESTYLES, linenos=True),
28 | }
29 | 
30 | 
31 | from docutils import nodes
32 | from docutils.parsers.rst import directives
33 | 
34 | from pygments import highlight
35 | from pygments.lexers import get_lexer_by_name, TextLexer
36 | 
37 | def pygments_directive(name, arguments, options, content, lineno,
38 |                        content_offset, block_text, state, state_machine):
39 |     try:
40 |         lexer = get_lexer_by_name(arguments[0])
41 |     except ValueError, e:
42 |         # no lexer found - use the text one instead of an exception
43 |         lexer = TextLexer()
44 |     # take an arbitrary option if more than one is given
45 |     formatter = options and VARIANTS[options.keys()[0]] or DEFAULT
46 |     parsed = highlight(u'\n'.join(content), lexer, formatter)
47 |     return [nodes.raw('', parsed, format='html')]
48 | 
49 | pygments_directive.arguments = (1, 0, 1)
50 | pygments_directive.content = 1
51 | pygments_directive.options = dict([(key, directives.flag) for key in VARIANTS])
52 | 
53 | directives.register_directive('sourcecode', pygments_directive)
54 | 
55 | 
56 | # run the generation
57 | 
58 | from docutils.core import publish_cmdline, default_description
59 | 
60 | description = ('Generates (X)HTML documents from standalone reStructuredText '
61 |                'sources.  ' + default_description)
62 | 
63 | publish_cmdline(writer_name='html', description=description)
64 | 


--------------------------------------------------------------------------------
/doc/rest2latex.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | # Testing:
 4 | #    python rest2latex.py objectify.txt > latex/objectify.tex
 5 | 
 6 | """
 7 | A minimal front end to the Docutils Publisher, producing LaTeX with
 8 | some syntax highlighting.
 9 | """
10 | 
11 | # Set to True if you want inline CSS styles instead of classes
12 | INLINESTYLES = False
13 | 
14 | 
15 | try:
16 |     import locale
17 |     locale.setlocale(locale.LC_ALL, '')
18 | except:
19 |     pass
20 | 
21 | # set up Pygments
22 | 
23 | from pygments.formatters import LatexFormatter
24 | 
25 | # The default formatter
26 | DEFAULT = LatexFormatter()
27 | 
28 | # Add name -> formatter pairs for every variant you want to use
29 | VARIANTS = {
30 |     # 'linenos': HtmlFormatter(noclasses=INLINESTYLES, linenos=True),
31 | }
32 | 
33 | 
34 | from docutils import nodes
35 | from docutils.parsers.rst import directives
36 | 
37 | from pygments import highlight
38 | from pygments.lexers import get_lexer_by_name, TextLexer
39 | 
40 | def pygments_directive(name, arguments, options, content, lineno,
41 |                        content_offset, block_text, state, state_machine):
42 |     try:
43 |         lexer = get_lexer_by_name(arguments[0])
44 |     except ValueError, e:
45 |         # no lexer found - use the text one instead of an exception
46 |         lexer = TextLexer()
47 |     # take an arbitrary option if more than one is given
48 |     formatter = options and VARIANTS[options.keys()[0]] or DEFAULT
49 |     parsed = highlight(u'\n'.join(content), lexer, formatter)
50 |     return [nodes.raw('', parsed, format='latex')]
51 | 
52 | pygments_directive.arguments = (1, 0, 1)
53 | pygments_directive.content = 1
54 | pygments_directive.options = dict([(key, directives.flag) for key in VARIANTS])
55 | 
56 | directives.register_directive('sourcecode', pygments_directive)
57 | 
58 | 
59 | # run the generation
60 | 
61 | from docutils.core import publish_cmdline, default_description
62 | 
63 | description = ('Generates LaTeX documents from standalone reStructuredText '
64 |                'sources.  ' + default_description)
65 | 
66 | publish_cmdline(writer_name='latex2e', description=description)
67 | 


--------------------------------------------------------------------------------
/doc/s5/Makefile:
--------------------------------------------------------------------------------
 1 | PYTHON?=python
 2 | 
 3 | SLIDES=$(subst .txt,.html,$(wildcard *.txt))
 4 | 
 5 | slides: $(SLIDES)
 6 | 
 7 | %.html: %.txt
 8 | 	$(PYTHON) rst2s5.py --current-slide --language=en $< $@
 9 | 
10 | clean:
11 | 	rm -f *~ $(SLIDES)
12 | 


--------------------------------------------------------------------------------
/doc/s5/ep2008/atom-example.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <feed xmlns="http://www.w3.org/2005/Atom">
 3 | 
 4 |   <title>Example Feed</title>
 5 |   <link href="http://example.org/"/>
 6 |   <updated>2003-12-13T18:30:02Z</updated>
 7 |   <author>
 8 |     <name>John Doe</name>
 9 |   </author>
10 |   <id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
11 | 
12 |   <entry>
13 |     <title>Atom-Powered Robots Run Amok</title>
14 |     <link href="http://example.org/2003/12/13/atom03"/>
15 |     <id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
16 |     <updated>2003-12-13T18:30:02Z</updated>
17 |     <summary>Some text.</summary>
18 |   </entry>
19 | 
20 | </feed>
21 | 


--------------------------------------------------------------------------------
/doc/s5/ep2008/atomgen.py:
--------------------------------------------------------------------------------
 1 | # atomgen.py
 2 | 
 3 | import os.path
 4 | 
 5 | from lxml import etree
 6 | from lxml.builder import ElementMaker
 7 | 
 8 | ATOM_NAMESPACE = "http://www.w3.org/2005/Atom"
 9 | 
10 | A = ElementMaker(namespace=ATOM_NAMESPACE,
11 |                  nsmap={None : ATOM_NAMESPACE})
12 | 
13 | feed      = A.feed
14 | entry     = A.entry
15 | title     = A.title
16 | author    = A.author
17 | name      = A.name
18 | link      = A.link
19 | summary   = A.summary
20 | id        = A.id
21 | updated   = A.updated
22 | # ... and so on and so forth ...
23 | 
24 | 
25 | # plus a little validation function: isvalid()
26 | isvalid = etree.RelaxNG(
27 |     file=os.path.join(os.path.abspath(os.path.dirname(__file__)), "atom.rng"))
28 | 


--------------------------------------------------------------------------------
/doc/s5/ep2008/proxies.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/doc/s5/ep2008/proxies.png


--------------------------------------------------------------------------------
/doc/s5/rst2s5.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 |     The Pygments reStructuredText directive
 4 |     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 5 | 
 6 |     This fragment is a Docutils_ 0.5 directive that renders source code
 7 |     (to HTML only, currently) via Pygments.
 8 | 
 9 |     To use it, adjust the options below and copy the code into a module
10 |     that you import on initialization.  The code then automatically
11 |     registers a ``sourcecode`` directive that you can use instead of
12 |     normal code blocks like this::
13 | 
14 |         .. sourcecode:: python
15 | 
16 |             My code goes here.
17 | 
18 |     If you want to have different code styles, e.g. one with line numbers
19 |     and one without, add formatters with their names in the VARIANTS dict
20 |     below.  You can invoke them instead of the DEFAULT one by using a
21 |     directive option::
22 | 
23 |         .. sourcecode:: python
24 |             :linenos:
25 | 
26 |             My code goes here.
27 | 
28 |     Look at the `directive documentation`_ to get all the gory details.
29 | 
30 |     .. _Docutils: http://docutils.sf.net/
31 |     .. _directive documentation:
32 |        http://docutils.sourceforge.net/docs/howto/rst-directives.html
33 | 
34 |     :copyright: Copyright 2006-2009 by the Pygments team, see AUTHORS.
35 |     :license: BSD, see LICENSE for details.
36 | """
37 | 
38 | # Options
39 | # ~~~~~~~
40 | 
41 | # Set to True if you want inline CSS styles instead of classes
42 | INLINESTYLES = False
43 | STYLE = "fruity"
44 | 
45 | from pygments.formatters import HtmlFormatter
46 | 
47 | # The default formatter
48 | DEFAULT = HtmlFormatter(noclasses=INLINESTYLES, style=STYLE)
49 | 
50 | # Add name -> formatter pairs for every variant you want to use
51 | VARIANTS = {
52 |     # 'linenos': HtmlFormatter(noclasses=INLINESTYLES, linenos=True),
53 | }
54 | 
55 | 
56 | from docutils import nodes
57 | from docutils.parsers.rst import directives, Directive
58 | 
59 | from pygments import highlight
60 | from pygments.lexers import get_lexer_by_name, TextLexer
61 | 
62 | class Pygments(Directive):
63 |     """ Source code syntax hightlighting.
64 |     """
65 |     required_arguments = 1
66 |     optional_arguments = 0
67 |     final_argument_whitespace = True
68 |     option_spec = dict([(key, directives.flag) for key in VARIANTS])
69 |     has_content = True
70 | 
71 |     def run(self):
72 |         self.assert_has_content()
73 |         try:
74 |             lexer = get_lexer_by_name(self.arguments[0])
75 |         except ValueError:
76 |             # no lexer found - use the text one instead of an exception
77 |             lexer = TextLexer()
78 |         # take an arbitrary option if more than one is given
79 |         formatter = self.options and VARIANTS[self.options.keys()[0]] or DEFAULT
80 | 
81 | #        print >>open('ui/default/pygments.css', 'w'), formatter.get_style_defs('.highlight')
82 |         parsed = highlight(u'\n'.join(self.content), lexer, formatter)
83 |         return [nodes.raw('', parsed, format='html')]
84 | 
85 | directives.register_directive('sourcecode', Pygments)
86 | 
87 | from docutils.core import publish_cmdline, default_description
88 | 
89 | description = ('Generates S5 (X)HTML slideshow documents from standalone '
90 |                'reStructuredText sources.  ' + default_description)
91 | 
92 | publish_cmdline(writer_name='s5', description=description)
93 | 


--------------------------------------------------------------------------------
/doc/s5/tagpython.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/doc/s5/tagpython.png


--------------------------------------------------------------------------------
/doc/s5/ui/default/blank.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/doc/s5/ui/default/blank.gif


--------------------------------------------------------------------------------
/doc/s5/ui/default/bodybg.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/doc/s5/ui/default/bodybg.gif


--------------------------------------------------------------------------------
/doc/s5/ui/default/framing.css:
--------------------------------------------------------------------------------
 1 | /* The following styles size, place, and layer the slide components.
 2 |    Edit these if you want to change the overall slide layout.
 3 |    The commented lines can be uncommented (and modified, if necessary) 
 4 |     to help you with the rearrangement process. */
 5 | 
 6 | /* target = 1024x768 */
 7 | 
 8 | div#header, div#footer, .slide {width: 100%; top: 0; left: 0;}
 9 | div#header {top: 0; height: 3em; z-index: 1;}
10 | div#footer {top: auto; bottom: 0; height: 2.5em; z-index: 5;}
11 | .slide {top: 0; width: 92%; padding: 3.5em 4% 4%; z-index: 2;  list-style: none;}
12 | div#controls {left: 50%; bottom: 0; width: 50%; z-index: 100;}
13 | div#controls form {position: absolute; bottom: 0; right: 0; width: 100%;
14 |   margin: 0;}
15 | #currentSlide {position: absolute; width: 10%; left: 45%; bottom: 1em; z-index: 10;}
16 | html>body #currentSlide {position: fixed;}
17 | 
18 | /*
19 | div#header {background: #FCC;}
20 | div#footer {background: #CCF;}
21 | div#controls {background: #BBD;}
22 | div#currentSlide {background: #FFC;}
23 | */
24 | 


--------------------------------------------------------------------------------
/doc/s5/ui/default/iepngfix.htc:
--------------------------------------------------------------------------------
 1 | <public:component>
 2 | <public:attach event="onpropertychange" onevent="doFix()" />
 3 | 
 4 | <script>
 5 | 
 6 | // IE5.5+ PNG Alpha Fix v1.0 by Angus Turnbull http://www.twinhelix.com
 7 | // Free usage permitted as long as this notice remains intact.
 8 | 
 9 | // This must be a path to a blank image. That's all the configuration you need here.
10 | var blankImg = 'v11rc1/default/blank.gif';
11 | 
12 | var f = 'DXImageTransform.Microsoft.AlphaImageLoader';
13 | 
14 | function filt(s, m) {
15 |  if (filters[f]) {
16 |   filters[f].enabled = s ? true : false;
17 |   if (s) with (filters[f]) { src = s; sizingMethod = m }
18 |  } else if (s) style.filter = 'progid:'+f+'(src="'+s+'",sizingMethod="'+m+'")';
19 | }
20 | 
21 | function doFix() {
22 |  if ((parseFloat(navigator.userAgent.match(/MSIE (\S+)/)[1]) < 5.5) ||
23 |   (event && !/(background|src)/.test(event.propertyName))) return;
24 | 
25 |  if (tagName == 'IMG') {
26 |   if ((/\.png$/i).test(src)) {
27 |    filt(src, 'image');  // was 'scale'
28 |    src = blankImg;
29 |   } else if (src.indexOf(blankImg) < 0) filt();
30 |  } else if (style.backgroundImage) {
31 |   if (style.backgroundImage.match(/^url[("']+(.*\.png)[)"']+$/i)) {
32 |    var s = RegExp.$1;
33 |    style.backgroundImage = '';
34 |    filt(s, 'crop');
35 |   } else filt();
36 |  }
37 | }
38 | 
39 | doFix();
40 | 
41 | </script>
42 | </public:component>


--------------------------------------------------------------------------------
/doc/s5/ui/default/lxml-logo64.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/doc/s5/ui/default/lxml-logo64.png


--------------------------------------------------------------------------------
/doc/s5/ui/default/opera.css:
--------------------------------------------------------------------------------
1 | /* DO NOT CHANGE THESE unless you really want to break Opera Show */
2 | .slide {
3 | 	visibility: visible !important;
4 | 	position: static !important;
5 | 	page-break-before: always;
6 | }
7 | #slide0 {page-break-before: avoid;}
8 | 


--------------------------------------------------------------------------------
/doc/s5/ui/default/outline.css:
--------------------------------------------------------------------------------
 1 | /* don't change this unless you want the layout stuff to show up in the outline view! */
 2 | 
 3 | .layout div, #footer *, #controlForm * {display: none;}
 4 | #footer, #controls, #controlForm, #navLinks, #toggle {
 5 |   display: block; visibility: visible; margin: 0; padding: 0;}
 6 | #toggle {float: right; padding: 0.5em;}
 7 | html>body #toggle {position: fixed; top: 0; right: 0;}
 8 | 
 9 | /* making the outline look pretty-ish */
10 | 
11 | #slide0 h1, #slide0 h2, #slide0 h3, #slide0 h4 {border: none; margin: 0;}
12 | #slide0 h1 {padding-top: 1.5em;}
13 | .slide h1 {margin: 1.5em 0 0; padding-top: 0.25em;
14 |   border-top: 1px solid #888; border-bottom: 1px solid #AAA;}
15 | #toggle {border: 1px solid; border-width: 0 0 1px 1px; background: #FFF;}
16 | 


--------------------------------------------------------------------------------
/doc/s5/ui/default/print.css:
--------------------------------------------------------------------------------
 1 | /* The following rule is necessary to have all slides appear in print! DO NOT REMOVE IT! */
 2 | .slide, ul {page-break-inside: avoid; visibility: visible !important;}
 3 | h1 {page-break-after: avoid;}
 4 | 
 5 | body {font-size: 12pt; background: white;}
 6 | * {color: black;}
 7 | 
 8 | #slide0 h1 {font-size: 200%; border: none; margin: 0.5em 0 0.25em;}
 9 | #slide0 h3 {margin: 0; padding: 0;}
10 | #slide0 h4 {margin: 0 0 0.5em; padding: 0;}
11 | #slide0 {margin-bottom: 3em;}
12 | 
13 | h1 {border-top: 2pt solid gray; border-bottom: 1px dotted silver;}
14 | .extra {background: transparent !important;}
15 | div.extra, pre.extra, .example {font-size: 10pt; color: #333;}
16 | ul.extra a {font-weight: bold;}
17 | p.example {display: none;}
18 | 
19 | #header {display: none;}
20 | #footer h1 {margin: 0; border-bottom: 1px solid; color: gray; font-style: italic;}
21 | #footer h2, #controls {display: none;}
22 | 
23 | /* The following rule keeps the layout stuff out of print.  Remove at your own risk! */
24 | .layout, .layout * {display: none !important;}
25 | 


--------------------------------------------------------------------------------
/doc/s5/ui/default/s5-core.css:
--------------------------------------------------------------------------------
 1 | /* Do not edit or override these styles! The system will likely break if you do. */
 2 | 
 3 | div#header, div#footer, div#controls, .slide {position: absolute;}
 4 | html>body div#header, html>body div#footer, 
 5 |   html>body div#controls, html>body .slide {position: fixed;}
 6 | .handout {display: none;}
 7 | .layout {display: block;}
 8 | .slide, .hideme, .incremental {visibility: hidden;}
 9 | #slide0 {visibility: visible;}
10 | 


--------------------------------------------------------------------------------
/doc/s5/ui/default/slides.css:
--------------------------------------------------------------------------------
1 | @import url(s5-core.css); /* required to make the slide show run at all */
2 | @import url(framing.css); /* sets basic placement and size of slide components */
3 | @import url(pretty.css);  /* stuff that makes the slides look better than blah */


--------------------------------------------------------------------------------
/doc/s5/ui/default/tagpython.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/doc/s5/ui/default/tagpython.png


--------------------------------------------------------------------------------
/doc/sax.txt:
--------------------------------------------------------------------------------
  1 | Sax support
  2 | ===========
  3 | 
  4 | In this document we'll describe lxml's SAX support.  lxml has support for
  5 | producing SAX events for an ElementTree or Element.  lxml can also turn SAX
  6 | events into an ElementTree.  The SAX API used by lxml is compatible with that
  7 | in the Python core (xml.sax), so is useful for interfacing lxml with code that
  8 | uses the Python core SAX facilities.
  9 | 
 10 | .. contents::
 11 | .. 
 12 |    1  Building a tree from SAX events
 13 |    2  Producing SAX events from an ElementTree or Element
 14 |    3  Interfacing with pulldom/minidom
 15 | 
 16 | ..
 17 |   >>> try: from StringIO import StringIO
 18 |   ... except ImportError:
 19 |   ...    from io import BytesIO
 20 |   ...    def StringIO(s):
 21 |   ...        if isinstance(s, str): s = s.encode("UTF-8")
 22 |   ...        return BytesIO(s)
 23 | 
 24 | 
 25 | Building a tree from SAX events
 26 | -------------------------------
 27 | 
 28 | First of all, lxml has support for building a new tree given SAX events.  To
 29 | do this, we use the special SAX content handler defined by lxml named
 30 | ``lxml.sax.ElementTreeContentHandler``:
 31 | 
 32 | .. sourcecode:: pycon
 33 | 
 34 |   >>> import lxml.sax
 35 |   >>> handler = lxml.sax.ElementTreeContentHandler()
 36 | 
 37 | Now let's fire some SAX events at it:
 38 | 
 39 | .. sourcecode:: pycon
 40 | 
 41 |   >>> handler.startElementNS((None, 'a'), 'a', {})
 42 |   >>> handler.startElementNS((None, 'b'), 'b', {(None, 'foo'): 'bar'})
 43 |   >>> handler.characters('Hello world')
 44 |   >>> handler.endElementNS((None, 'b'), 'b')
 45 |   >>> handler.endElementNS((None, 'a'), 'a')
 46 | 
 47 | This constructs an equivalent tree.  You can access it through the ``etree``
 48 | property of the handler:
 49 | 
 50 | .. sourcecode:: pycon
 51 | 
 52 |   >>> tree = handler.etree
 53 |   >>> lxml.etree.tostring(tree.getroot())
 54 |   b'<a><b foo="bar">Hello world</b></a>'
 55 | 
 56 | By passing a ``makeelement`` function the constructor of
 57 | ``ElementTreeContentHandler``, e.g. the one of a parser you configured, you
 58 | can determine which element class lookup scheme should be used.
 59 | 
 60 | 
 61 | Producing SAX events from an ElementTree or Element
 62 | ---------------------------------------------------
 63 | 
 64 | Let's make a tree we can generate SAX events for:
 65 | 
 66 | .. sourcecode:: pycon
 67 | 
 68 |   >>> f = StringIO('<a><b>Text</b></a>')
 69 |   >>> tree = lxml.etree.parse(f)
 70 | 
 71 | To see whether the correct SAX events are produced, we'll write a custom
 72 | content handler.:
 73 | 
 74 | .. sourcecode:: pycon
 75 | 
 76 |   >>> from xml.sax.handler import ContentHandler
 77 |   >>> class MyContentHandler(ContentHandler):
 78 |   ...     def __init__(self):
 79 |   ...         self.a_amount = 0
 80 |   ...         self.b_amount = 0
 81 |   ...         self.text = None
 82 |   ...
 83 |   ...     def startElementNS(self, name, qname, attributes):
 84 |   ...         uri, localname = name
 85 |   ...         if localname == 'a':
 86 |   ...             self.a_amount += 1
 87 |   ...         if localname == 'b':
 88 |   ...             self.b_amount += 1
 89 |   ...
 90 |   ...     def characters(self, data):
 91 |   ...         self.text = data
 92 | 
 93 | Note that it only defines the startElementNS() method and not startElement().
 94 | The SAX event generator in lxml.sax currently only supports namespace-aware
 95 | processing.
 96 | 
 97 | To test the content handler, we can produce SAX events from the tree:
 98 | 
 99 | .. sourcecode:: pycon
100 | 
101 |   >>> handler = MyContentHandler()
102 |   >>> lxml.sax.saxify(tree, handler)
103 | 
104 | This is what we expect:
105 | 
106 | .. sourcecode:: pycon
107 | 
108 |   >>> handler.a_amount
109 |   1
110 |   >>> handler.b_amount
111 |   1
112 |   >>> handler.text
113 |   'Text'
114 | 
115 | 
116 | Interfacing with pulldom/minidom
117 | --------------------------------
118 | 
119 | lxml.sax is a simple way to interface with the standard XML support in the
120 | Python library.  Note, however, that this is a one-way solution, as Python's
121 | DOM implementation connot generate SAX events from a DOM tree.
122 | 
123 | You can use xml.dom.pulldom to build a minidom from lxml:
124 | 
125 | .. sourcecode:: pycon
126 | 
127 |   >>> from xml.dom.pulldom import SAX2DOM
128 |   >>> handler = SAX2DOM()
129 |   >>> lxml.sax.saxify(tree, handler)
130 | 
131 | PullDOM makes the result available through the ``document`` attribute:
132 | 
133 | .. sourcecode:: pycon
134 | 
135 |   >>> dom = handler.document
136 |   >>> print(dom.firstChild.localName)
137 |   a
138 | 


--------------------------------------------------------------------------------
/doc/test.xml:
--------------------------------------------------------------------------------
1 | <a/>
2 | 


--------------------------------------------------------------------------------
/doc/valgrind.txt:
--------------------------------------------------------------------------------
1 | The command used to run the tests with valgrind:
2 | 
3 | valgrind --tool=memcheck --leak-check=full --suppressions=valgrind-python.supp python2.7 test.py
4 | 


--------------------------------------------------------------------------------
/fake_pyrex/Pyrex/Distutils/__init__.py:
--------------------------------------------------------------------------------
1 | # work around broken setuptools monkey patching
2 | 


--------------------------------------------------------------------------------
/fake_pyrex/Pyrex/Distutils/build_ext.py:
--------------------------------------------------------------------------------
1 | build_ext = "yes, it's there!"
2 | 


--------------------------------------------------------------------------------
/fake_pyrex/Pyrex/__init__.py:
--------------------------------------------------------------------------------
1 | # work around broken setuptools monkey patching
2 | 


--------------------------------------------------------------------------------
/samples/simple-ns.xml:
--------------------------------------------------------------------------------
1 | <root xmlns='http://namespace/'>
2 |    <element key='value'>text</element>
3 |    <element>text</element>tail
4 |    <empty-element/>
5 | </root>
6 | 


--------------------------------------------------------------------------------
/samples/simple.xml:
--------------------------------------------------------------------------------
1 | <root>
2 |    <element key='value'>text</element>
3 |    <element>text</element>tail
4 |    <empty-element/>
5 | </root>
6 | 


--------------------------------------------------------------------------------
/selftest.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/selftest.py


--------------------------------------------------------------------------------
/src/lxml/__init__.py:
--------------------------------------------------------------------------------
 1 | # this is a package
 2 | 
 3 | def get_include():
 4 |     """
 5 |     Returns a list of header include paths (for lxml itself, libxml2
 6 |     and libxslt) needed to compile C code against lxml if it was built
 7 |     with statically linked libraries.
 8 |     """
 9 |     import os
10 |     lxml_path = __path__[0]
11 |     include_path = os.path.join(lxml_path, 'includes')
12 |     includes = [include_path, lxml_path]
13 | 
14 |     for name in os.listdir(include_path):
15 |         path = os.path.join(include_path, name)
16 |         if os.path.isdir(path):
17 |             includes.append(path)
18 | 
19 |     return includes
20 | 
21 | 


--------------------------------------------------------------------------------
/src/lxml/cssselect.py:
--------------------------------------------------------------------------------
  1 | """CSS Selectors based on XPath.
  2 | 
  3 | This module supports selecting XML/HTML tags based on CSS selectors.
  4 | See the `CSSSelector` class for details.
  5 | 
  6 | This is a thin wrapper around cssselect 0.7 or later.
  7 | """
  8 | 
  9 | import sys
 10 | from lxml import etree
 11 | 
 12 | ## Work-around the lack of absolute import in Python 2.4
 13 | #from __future__ import absolute_import
 14 | #from cssselect import ...
 15 | try:
 16 |     external_cssselect = __import__('cssselect')
 17 | except ImportError:
 18 |     raise ImportError('cssselect seems not to be installed. '
 19 |                       'See http://packages.python.org/cssselect/')
 20 | 
 21 | SelectorSyntaxError = external_cssselect.SelectorSyntaxError
 22 | ExpressionError = external_cssselect.ExpressionError
 23 | SelectorError = external_cssselect.SelectorError
 24 | 
 25 | 
 26 | __all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError',
 27 |            'CSSSelector']
 28 | 
 29 | 
 30 | class LxmlTranslator(external_cssselect.GenericTranslator):
 31 |     """
 32 |     A custom CSS selector to XPath translator with lxml-specific extensions.
 33 |     """
 34 |     def xpath_contains_function(self, xpath, function):
 35 |         # Defined there, removed in later drafts:
 36 |         # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
 37 |         if function.argument_types() not in (['STRING'], ['IDENT']):
 38 |             raise ExpressionError(
 39 |                 "Expected a single string or ident for :contains(), got %r"
 40 |                 % function.arguments)
 41 |         value = function.arguments[0].value
 42 |         return xpath.add_condition(
 43 |             'contains(__lxml_internal_css:lower-case(string(.)), %s)'
 44 |             % self.xpath_literal(value.lower()))
 45 | 
 46 | 
 47 | class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator):
 48 |     """
 49 |     lxml extensions + HTML support.
 50 |     """
 51 | 
 52 | 
 53 | def _make_lower_case(context, s):
 54 |     return s.lower()
 55 | 
 56 | ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
 57 | ns.prefix = '__lxml_internal_css'
 58 | ns['lower-case'] = _make_lower_case
 59 | 
 60 | 
 61 | class CSSSelector(etree.XPath):
 62 |     """A CSS selector.
 63 | 
 64 |     Usage::
 65 | 
 66 |         >>> from lxml import etree, cssselect
 67 |         >>> select = cssselect.CSSSelector("a tag > child")
 68 | 
 69 |         >>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>")
 70 |         >>> [ el.tag for el in select(root) ]
 71 |         ['child']
 72 | 
 73 |     To use CSS namespaces, you need to pass a prefix-to-namespace
 74 |     mapping as ``namespaces`` keyword argument::
 75 | 
 76 |         >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
 77 |         >>> select_ns = cssselect.CSSSelector('root > rdf|Description',
 78 |         ...                                   namespaces={'rdf': rdfns})
 79 | 
 80 |         >>> rdf = etree.XML((
 81 |         ...     '<root xmlns:rdf="%s">'
 82 |         ...       '<rdf:Description>blah</rdf:Description>'
 83 |         ...     '</root>') % rdfns)
 84 |         >>> [(el.tag, el.text) for el in select_ns(rdf)]
 85 |         [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]
 86 | 
 87 |     """
 88 |     def __init__(self, css, namespaces=None, translator='xml'):
 89 |         if translator == 'xml':
 90 |             translator = LxmlTranslator()
 91 |         elif translator == 'html':
 92 |             translator = LxmlHTMLTranslator()
 93 |         elif translator == 'xhtml':
 94 |             translator = LxmlHTMLTranslator(xhtml=True)
 95 |         path = translator.css_to_xpath(css)
 96 |         etree.XPath.__init__(self, path, namespaces=namespaces)
 97 |         self.css = css
 98 | 
 99 |     def __repr__(self):
100 |         return '<%s %s for %r>' % (
101 |             self.__class__.__name__,
102 |             hex(abs(id(self)))[2:],
103 |             self.css)
104 | 


--------------------------------------------------------------------------------
/src/lxml/cvarargs.pxd:
--------------------------------------------------------------------------------
1 | cdef extern from "stdarg.h":
2 |     ctypedef void *va_list
3 |     void va_start(va_list ap, void *last) nogil
4 |     void va_end(va_list ap) nogil
5 | 
6 | cdef extern from "etree_defs.h":
7 |     cdef int va_int(va_list ap) nogil
8 |     cdef char *va_charptr(va_list ap) nogil
9 | 


--------------------------------------------------------------------------------
/src/lxml/debug.pxi:
--------------------------------------------------------------------------------
 1 | 
 2 | @cython.final
 3 | @cython.internal
 4 | cdef class _MemDebug:
 5 |     """Debugging support for the memory allocation in libxml2.
 6 |     """
 7 |     def bytes_used(self):
 8 |         """bytes_used(self)
 9 | 
10 |         Returns the total amount of memory (in bytes) currently used by libxml2.
11 |         Note that libxml2 constrains this value to a C int, which limits
12 |         the accuracy on 64 bit systems.
13 |         """
14 |         return tree.xmlMemUsed()
15 | 
16 |     def blocks_used(self):
17 |         """blocks_used(self)
18 | 
19 |         Returns the total number of memory blocks currently allocated by libxml2.
20 |         Note that libxml2 constrains this value to a C int, which limits
21 |         the accuracy on 64 bit systems.
22 |         """
23 |         return tree.xmlMemBlocks()
24 | 
25 |     def dict_size(self):
26 |         """dict_size(self)
27 | 
28 |         Returns the current size of the global name dictionary used by libxml2
29 |         for the current thread.  Each thread has its own dictionary.
30 |         """
31 |         c_dict = __GLOBAL_PARSER_CONTEXT._getThreadDict(NULL)
32 |         if c_dict is NULL:
33 |             raise MemoryError()
34 |         return tree.xmlDictSize(c_dict)
35 | 
36 |     def dump(self, output_file=None, byte_count=None):
37 |         """dump(self, output_file=None, byte_count=None)
38 | 
39 |         Dumps the current memory blocks allocated by libxml2 to a file.
40 | 
41 |         The optional parameter 'output_file' specifies the file path.  It defaults
42 |         to the file ".memorylist" in the current directory.
43 | 
44 |         The optional parameter 'byte_count' limits the number of bytes in the dump.
45 |         Note that this parameter is ignored when lxml is compiled against a libxml2
46 |         version before 2.7.0.
47 |         """
48 |         cdef Py_ssize_t c_count
49 |         if output_file is None:
50 |             output_file = b'.memorylist'
51 |         elif isinstance(output_file, unicode):
52 |             output_file.encode(sys.getfilesystemencoding())
53 | 
54 |         f = stdio.fopen(output_file, "w")
55 |         if f is NULL:
56 |             raise IOError("Failed to create file %s" % output_file.decode(sys.getfilesystemencoding()))
57 |         try:
58 |             if tree.LIBXML_VERSION < 20700:
59 |                 tree.xmlMemDisplay(f)
60 |             elif byte_count is None:
61 |                 tree.xmlMemDisplay(f)
62 |             else:
63 |                 c_count = byte_count
64 |                 tree.xmlMemDisplayLast(f, c_count)
65 |         finally:
66 |             stdio.fclose(f)
67 | 
68 |     def show(self, output_file=None, block_count=None):
69 |         """show(self, output_file=None, block_count=None)
70 | 
71 |         Dumps the current memory blocks allocated by libxml2 to a file.
72 |         The output file format is suitable for line diffing.
73 | 
74 |         The optional parameter 'output_file' specifies the file path.  It defaults
75 |         to the file ".memorydump" in the current directory.
76 | 
77 |         The optional parameter 'block_count' limits the number of blocks
78 |         in the dump.
79 |         """
80 |         if output_file is None:
81 |             output_file = b'.memorydump'
82 |         elif isinstance(output_file, unicode):
83 |             output_file.encode(sys.getfilesystemencoding())
84 | 
85 |         f = stdio.fopen(output_file, "w")
86 |         if f is NULL:
87 |             raise IOError("Failed to create file %s" % output_file.decode(sys.getfilesystemencoding()))
88 |         try:
89 |             tree.xmlMemShow(f, block_count if block_count is not None else tree.xmlMemBlocks())
90 |         finally:
91 |             stdio.fclose(f)
92 | 
93 | memory_debugger = _MemDebug()
94 | 


--------------------------------------------------------------------------------
/src/lxml/html/ElementSoup.py:
--------------------------------------------------------------------------------
 1 | __doc__ = """Legacy interface to the BeautifulSoup HTML parser.
 2 | """
 3 | 
 4 | __all__ = ["parse", "convert_tree"]
 5 | 
 6 | from soupparser import convert_tree, parse as _parse
 7 | 
 8 | def parse(file, beautifulsoup=None, makeelement=None):
 9 |     root = _parse(file, beautifulsoup=beautifulsoup, makeelement=makeelement)
10 |     return root.getroot()
11 | 


--------------------------------------------------------------------------------
/src/lxml/html/_diffcommand.py:
--------------------------------------------------------------------------------
 1 | import optparse
 2 | import sys
 3 | import re
 4 | import os
 5 | from lxml.html.diff import htmldiff
 6 | 
 7 | description = """\
 8 | """
 9 | 
10 | parser = optparse.OptionParser(
11 |     usage="%prog [OPTIONS] FILE1 FILE2\n"
12 |     "%prog --annotate [OPTIONS] INFO1 FILE1 INFO2 FILE2 ...",
13 |     description=description,
14 |     )
15 | 
16 | parser.add_option(
17 |     '-o', '--output',
18 |     metavar="FILE",
19 |     dest="output",
20 |     default="-",
21 |     help="File to write the difference to",
22 |     )
23 | 
24 | parser.add_option(
25 |     '-a', '--annotation',
26 |     action="store_true",
27 |     dest="annotation",
28 |     help="Do an annotation")
29 | 
30 | def main(args=None):
31 |     if args is None:
32 |         args = sys.argv[1:]
33 |     options, args = parser.parse_args(args)
34 |     if options.annotation:
35 |         return annotate(options, args)
36 |     if len(args) != 2:
37 |         print('Error: you must give two files')
38 |         parser.print_help()
39 |         sys.exit(1)
40 |     file1, file2 = args
41 |     input1 = read_file(file1)
42 |     input2 = read_file(file2)
43 |     body1 = split_body(input1)[1]
44 |     pre, body2, post = split_body(input2)
45 |     result = htmldiff(body1, body2)
46 |     result = pre + result + post
47 |     if options.output == '-':
48 |         if not result.endswith('\n'):
49 |             result += '\n'
50 |         sys.stdout.write(result)
51 |     else:
52 |         f = open(options.output, 'wb')
53 |         f.write(result)
54 |         f.close()
55 | 
56 | def read_file(filename):
57 |     if filename == '-':
58 |         c = sys.stdin.read()
59 |     elif not os.path.exists(filename):
60 |         raise OSError(
61 |             "Input file %s does not exist" % filename)
62 |     else:
63 |         f = open(filename, 'rb')
64 |         c = f.read()
65 |         f.close()
66 |     return c
67 | 
68 | body_start_re = re.compile(
69 |     r"<body.*?>", re.I|re.S)
70 | body_end_re = re.compile(
71 |     r"</body.*?>", re.I|re.S)
72 |     
73 | def split_body(html):
74 |     match = body_start_re.search(html)
75 |     if match:
76 |         pre = html[:match.end()]
77 |         html = html[match.end():]
78 |     match = body_end_re.search(html)
79 |     if match:
80 |         post = html[match.start():]
81 |         html = html[:match.start()]
82 |     return pre, html, post
83 | 
84 | def annotate(options, args):
85 |     print("Not yet implemented")
86 |     sys.exit(1)
87 |     
88 | 


--------------------------------------------------------------------------------
/src/lxml/html/_html5builder.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Legacy module - don't use in new code!
  3 | 
  4 | html5lib now has its own proper implementation.
  5 | 
  6 | This module implements a tree builder for html5lib that generates lxml
  7 | html element trees.  This module uses camelCase as it follows the
  8 | html5lib style guide.
  9 | """
 10 | 
 11 | from html5lib.treebuilders import _base, etree as etree_builders
 12 | from lxml import html, etree
 13 | 
 14 | 
 15 | class DocumentType(object):
 16 | 
 17 |     def __init__(self, name, publicId, systemId):
 18 |         self.name = name
 19 |         self.publicId = publicId
 20 |         self.systemId = systemId
 21 | 
 22 | class Document(object):
 23 | 
 24 |     def __init__(self):
 25 |         self._elementTree = None
 26 |         self.childNodes = []
 27 | 
 28 |     def appendChild(self, element):
 29 |         self._elementTree.getroot().addnext(element._element)
 30 | 
 31 | 
 32 | class TreeBuilder(_base.TreeBuilder):
 33 |     documentClass = Document
 34 |     doctypeClass = DocumentType
 35 |     elementClass = None
 36 |     commentClass = None
 37 |     fragmentClass = Document
 38 | 
 39 |     def __init__(self, *args, **kwargs):
 40 |         html_builder = etree_builders.getETreeModule(html, fullTree=False)
 41 |         etree_builder = etree_builders.getETreeModule(etree, fullTree=False)
 42 |         self.elementClass = html_builder.Element
 43 |         self.commentClass = etree_builder.Comment
 44 |         _base.TreeBuilder.__init__(self, *args, **kwargs)
 45 | 
 46 |     def reset(self):
 47 |         _base.TreeBuilder.reset(self)
 48 |         self.rootInserted = False
 49 |         self.initialComments = []
 50 |         self.doctype = None
 51 | 
 52 |     def getDocument(self):
 53 |         return self.document._elementTree
 54 | 
 55 |     def getFragment(self):
 56 |         fragment = []
 57 |         element = self.openElements[0]._element
 58 |         if element.text:
 59 |             fragment.append(element.text)
 60 |         fragment.extend(element.getchildren())
 61 |         if element.tail:
 62 |             fragment.append(element.tail)
 63 |         return fragment
 64 | 
 65 |     def insertDoctype(self, name, publicId, systemId):
 66 |         doctype = self.doctypeClass(name, publicId, systemId)
 67 |         self.doctype = doctype
 68 | 
 69 |     def insertComment(self, data, parent=None):
 70 |         if not self.rootInserted:
 71 |             self.initialComments.append(data)
 72 |         else:
 73 |             _base.TreeBuilder.insertComment(self, data, parent)
 74 | 
 75 |     def insertRoot(self, name):
 76 |         buf = []
 77 |         if self.doctype and self.doctype.name:
 78 |             buf.append('<!DOCTYPE %s' % self.doctype.name)
 79 |             if self.doctype.publicId is not None or self.doctype.systemId is not None:
 80 |                 buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId,
 81 |                                                   self.doctype.systemId))
 82 |             buf.append('>')
 83 |         buf.append('<html></html>')
 84 |         root = html.fromstring(''.join(buf))
 85 | 
 86 |         # Append the initial comments:
 87 |         for comment in self.initialComments:
 88 |             root.addprevious(etree.Comment(comment))
 89 | 
 90 |         # Create the root document and add the ElementTree to it
 91 |         self.document = self.documentClass()
 92 |         self.document._elementTree = root.getroottree()
 93 | 
 94 |         # Add the root element to the internal child/open data structures
 95 |         root_element = self.elementClass(name)
 96 |         root_element._element = root
 97 |         self.document.childNodes.append(root_element)
 98 |         self.openElements.append(root_element)
 99 | 
100 |         self.rootInserted = True
101 | 


--------------------------------------------------------------------------------
/src/lxml/html/_setmixin.py:
--------------------------------------------------------------------------------
  1 | class SetMixin(object):
  2 | 
  3 |     """
  4 |     Mix-in for sets.  You must define __iter__, add, remove
  5 |     """
  6 | 
  7 |     def __len__(self):
  8 |         length = 0
  9 |         for item in self:
 10 |             length += 1
 11 |         return length
 12 | 
 13 |     def __contains__(self, item):
 14 |         for has_item in self:
 15 |             if item == has_item:
 16 |                 return True
 17 |         return False
 18 | 
 19 |     def issubset(self, other):
 20 |         for item in other:
 21 |             if item not in self:
 22 |                 return False
 23 |         return True
 24 | 
 25 |     __le__ = issubset
 26 | 
 27 |     def issuperset(self, other):
 28 |         for item in self:
 29 |             if item not in other:
 30 |                 return False
 31 |         return True
 32 | 
 33 |     __ge__ = issuperset
 34 | 
 35 |     def union(self, other):
 36 |         return self | other
 37 | 
 38 |     def __or__(self, other):
 39 |         new = self.copy()
 40 |         new |= other
 41 |         return new
 42 |     
 43 |     def intersection(self, other):
 44 |         return self & other
 45 | 
 46 |     def __and__(self, other):
 47 |         new = self.copy()
 48 |         new &= other
 49 |         return new
 50 | 
 51 |     def difference(self, other):
 52 |         return self - other
 53 | 
 54 |     def __sub__(self, other):
 55 |         new = self.copy()
 56 |         new -= other
 57 |         return new
 58 | 
 59 |     def symmetric_difference(self, other):
 60 |         return self ^ other
 61 | 
 62 |     def __xor__(self, other):
 63 |         new = self.copy()
 64 |         new ^= other
 65 |         return new
 66 | 
 67 |     def copy(self):
 68 |         return set(self)
 69 | 
 70 |     def update(self, other):
 71 |         for item in other:
 72 |             self.add(item)
 73 | 
 74 |     def __ior__(self, other):
 75 |         self.update(other)
 76 |         return self
 77 | 
 78 |     def intersection_update(self, other):
 79 |         for item in self:
 80 |             if item not in other:
 81 |                 self.remove(item)
 82 | 
 83 |     def __iand__(self, other):
 84 |         self.intersection_update(other)
 85 |         return self
 86 | 
 87 |     def difference_update(self, other):
 88 |         for item in other:
 89 |             if item in self:
 90 |                 self.remove(item)
 91 | 
 92 |     def __isub__(self, other):
 93 |         self.difference_update(other)
 94 |         return self
 95 | 
 96 |     def symmetric_difference_update(self, other):
 97 |         for item in other:
 98 |             if item in self:
 99 |                 self.remove(item)
100 |             else:
101 |                 self.add(item)
102 | 
103 |     def __ixor__(self, other):
104 |         self.symmetric_difference_update(other)
105 |         return self
106 | 
107 |     def discard(self, item):
108 |         try:
109 |             self.remove(item)
110 |         except KeyError:
111 |             pass
112 | 
113 |     def clear(self):
114 |         for item in list(self):
115 |             self.remove(item)
116 | 


--------------------------------------------------------------------------------
/src/lxml/html/builder.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------------------
  2 | # The ElementTree toolkit is
  3 | # Copyright (c) 1999-2004 by Fredrik Lundh
  4 | # --------------------------------------------------------------------
  5 | 
  6 | """
  7 | A set of HTML generator tags for building HTML documents.
  8 | 
  9 | Usage::
 10 | 
 11 |     >>> from lxml.html.builder import *
 12 |     >>> html = HTML(
 13 |     ...            HEAD( TITLE("Hello World") ),
 14 |     ...            BODY( CLASS("main"),
 15 |     ...                  H1("Hello World !")
 16 |     ...            )
 17 |     ...        )
 18 | 
 19 |     >>> import lxml.etree
 20 |     >>> print lxml.etree.tostring(html, pretty_print=True)
 21 |     <html>
 22 |       <head>
 23 |         <title>Hello World</title>
 24 |       </head>
 25 |       <body class="main">
 26 |         <h1>Hello World !</h1>
 27 |       </body>
 28 |     </html>
 29 | 
 30 | """
 31 | 
 32 | from lxml.builder import ElementMaker
 33 | from lxml.html import html_parser
 34 | 
 35 | E = ElementMaker(makeelement=html_parser.makeelement)
 36 | 
 37 | # elements
 38 | A = E.a # anchor
 39 | ABBR = E.abbr # abbreviated form (e.g., WWW, HTTP, etc.)
 40 | ACRONYM = E.acronym # 
 41 | ADDRESS = E.address # information on author
 42 | APPLET = E.applet # Java applet (DEPRECATED)
 43 | AREA = E.area # client-side image map area
 44 | B = E.b # bold text style
 45 | BASE = E.base # document base URI
 46 | BASEFONT = E.basefont # base font size (DEPRECATED)
 47 | BDO = E.bdo # I18N BiDi over-ride
 48 | BIG = E.big # large text style
 49 | BLOCKQUOTE = E.blockquote # long quotation
 50 | BODY = E.body # document body
 51 | BR = E.br # forced line break
 52 | BUTTON = E.button # push button
 53 | CAPTION = E.caption # table caption
 54 | CENTER = E.center # shorthand for DIV align=center (DEPRECATED)
 55 | CITE = E.cite # citation
 56 | CODE = E.code # computer code fragment
 57 | COL = E.col # table column
 58 | COLGROUP = E.colgroup # table column group
 59 | DD = E.dd # definition description
 60 | DEL = getattr(E, 'del') # deleted text
 61 | DFN = E.dfn # instance definition
 62 | DIR = E.dir # directory list (DEPRECATED)
 63 | DIV = E.div # generic language/style container
 64 | DL = E.dl # definition list
 65 | DT = E.dt # definition term
 66 | EM = E.em # emphasis
 67 | FIELDSET = E.fieldset # form control group
 68 | FONT = E.font # local change to font (DEPRECATED)
 69 | FORM = E.form # interactive form
 70 | FRAME = E.frame # subwindow
 71 | FRAMESET = E.frameset # window subdivision
 72 | H1 = E.h1 # heading
 73 | H2 = E.h2 # heading
 74 | H3 = E.h3 # heading
 75 | H4 = E.h4 # heading
 76 | H5 = E.h5 # heading
 77 | H6 = E.h6 # heading
 78 | HEAD = E.head # document head
 79 | HR = E.hr # horizontal rule
 80 | HTML = E.html # document root element
 81 | I = E.i # italic text style
 82 | IFRAME = E.iframe # inline subwindow
 83 | IMG = E.img # Embedded image
 84 | INPUT = E.input # form control
 85 | INS = E.ins # inserted text
 86 | ISINDEX = E.isindex # single line prompt (DEPRECATED)
 87 | KBD = E.kbd # text to be entered by the user
 88 | LABEL = E.label # form field label text
 89 | LEGEND = E.legend # fieldset legend
 90 | LI = E.li # list item
 91 | LINK = E.link # a media-independent link
 92 | MAP = E.map # client-side image map
 93 | MENU = E.menu # menu list (DEPRECATED)
 94 | META = E.meta # generic metainformation
 95 | NOFRAMES = E.noframes # alternate content container for non frame-based rendering
 96 | NOSCRIPT = E.noscript # alternate content container for non script-based rendering
 97 | OBJECT = E.object # generic embedded object
 98 | OL = E.ol # ordered list
 99 | OPTGROUP = E.optgroup # option group
100 | OPTION = E.option # selectable choice
101 | P = E.p # paragraph
102 | PARAM = E.param # named property value
103 | PRE = E.pre # preformatted text
104 | Q = E.q # short inline quotation
105 | S = E.s # strike-through text style (DEPRECATED)
106 | SAMP = E.samp # sample program output, scripts, etc.
107 | SCRIPT = E.script # script statements
108 | SELECT = E.select # option selector
109 | SMALL = E.small # small text style
110 | SPAN = E.span # generic language/style container
111 | STRIKE = E.strike # strike-through text (DEPRECATED)
112 | STRONG = E.strong # strong emphasis
113 | STYLE = E.style # style info
114 | SUB = E.sub # subscript
115 | SUP = E.sup # superscript
116 | TABLE = E.table # 
117 | TBODY = E.tbody # table body
118 | TD = E.td # table data cell
119 | TEXTAREA = E.textarea # multi-line text field
120 | TFOOT = E.tfoot # table footer
121 | TH = E.th # table header cell
122 | THEAD = E.thead # table header
123 | TITLE = E.title # document title
124 | TR = E.tr # table row
125 | TT = E.tt # teletype or monospaced text style
126 | U = E.u # underlined text style (DEPRECATED)
127 | UL = E.ul # unordered list
128 | VAR = E.var # instance of a variable or program argument
129 | 
130 | # attributes (only reserved words are included here)
131 | ATTR = dict
132 | def CLASS(v): return {'class': v}
133 | def FOR(v): return {'for': v}
134 | 


--------------------------------------------------------------------------------
/src/lxml/html/defs.py:
--------------------------------------------------------------------------------
  1 | # FIXME: this should all be confirmed against what a DTD says
  2 | # (probably in a test; this may not match the DTD exactly, but we
  3 | # should document just how it differs).
  4 | 
  5 | # Data taken from http://www.w3.org/TR/html401/index/elements.html
  6 | # and http://www.w3.org/community/webed/wiki/HTML/New_HTML5_Elements
  7 | # for html5_tags.
  8 | 
  9 | try:
 10 |     frozenset
 11 | except NameError:
 12 |     from sets import Set as frozenset
 13 | 
 14 | 
 15 | empty_tags = frozenset([
 16 |     'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
 17 |     'img', 'input', 'isindex', 'link', 'meta', 'param'])
 18 | 
 19 | deprecated_tags = frozenset([
 20 |     'applet', 'basefont', 'center', 'dir', 'font', 'isindex',
 21 |     'menu', 's', 'strike', 'u'])
 22 | 
 23 | # archive actually takes a space-separated list of URIs
 24 | link_attrs = frozenset([
 25 |     'action', 'archive', 'background', 'cite', 'classid',
 26 |     'codebase', 'data', 'href', 'longdesc', 'profile', 'src',
 27 |     'usemap',
 28 |     # Not standard:
 29 |     'dynsrc', 'lowsrc',
 30 |     ])
 31 | 
 32 | # Not in the HTML 4 spec:
 33 | # onerror, onresize
 34 | event_attrs = frozenset([
 35 |     'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror',
 36 |     'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload',
 37 |     'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover',
 38 |     'onmouseup', 'onreset', 'onresize', 'onselect', 'onsubmit',
 39 |     'onunload',
 40 |     ])
 41 | 
 42 | safe_attrs = frozenset([
 43 |     'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align',
 44 |     'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff',
 45 |     'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan',
 46 |     'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype',
 47 |     'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id',
 48 |     'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method',
 49 |     'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly',
 50 |     'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape',
 51 |     'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title',
 52 |     'type', 'usemap', 'valign', 'value', 'vspace', 'width'])
 53 | 
 54 | # From http://htmlhelp.com/reference/html40/olist.html
 55 | top_level_tags = frozenset([
 56 |     'html', 'head', 'body', 'frameset',
 57 |     ])
 58 | 
 59 | head_tags = frozenset([
 60 |     'base', 'isindex', 'link', 'meta', 'script', 'style', 'title',
 61 |     ])
 62 | 
 63 | general_block_tags = frozenset([
 64 |     'address',
 65 |     'blockquote',
 66 |     'center',
 67 |     'del',
 68 |     'div',
 69 |     'h1',
 70 |     'h2',
 71 |     'h3',
 72 |     'h4',
 73 |     'h5',
 74 |     'h6',
 75 |     'hr',
 76 |     'ins',
 77 |     'isindex',
 78 |     'noscript',
 79 |     'p',
 80 |     'pre',
 81 |     ])
 82 | 
 83 | list_tags = frozenset([
 84 |     'dir', 'dl', 'dt', 'dd', 'li', 'menu', 'ol', 'ul',
 85 |     ])
 86 | 
 87 | table_tags = frozenset([
 88 |     'table', 'caption', 'colgroup', 'col',
 89 |     'thead', 'tfoot', 'tbody', 'tr', 'td', 'th',
 90 |     ])
 91 | 
 92 | # just this one from
 93 | # http://www.georgehernandez.com/h/XComputers/HTML/2BlockLevel.htm
 94 | block_tags = general_block_tags | list_tags | table_tags | frozenset([
 95 |     # Partial form tags
 96 |     'fieldset', 'form', 'legend', 'optgroup', 'option',
 97 |     ])
 98 | 
 99 | form_tags = frozenset([
100 |     'form', 'button', 'fieldset', 'legend', 'input', 'label',
101 |     'select', 'optgroup', 'option', 'textarea',
102 |     ])
103 | 
104 | special_inline_tags = frozenset([
105 |     'a', 'applet', 'basefont', 'bdo', 'br', 'embed', 'font', 'iframe',
106 |     'img', 'map', 'area', 'object', 'param', 'q', 'script',
107 |     'span', 'sub', 'sup',
108 |     ])
109 | 
110 | phrase_tags = frozenset([
111 |     'abbr', 'acronym', 'cite', 'code', 'del', 'dfn', 'em',
112 |     'ins', 'kbd', 'samp', 'strong', 'var',
113 |     ])
114 | 
115 | font_style_tags = frozenset([
116 |     'b', 'big', 'i', 's', 'small', 'strike', 'tt', 'u',
117 |     ])
118 | 
119 | frame_tags = frozenset([
120 |     'frameset', 'frame', 'noframes',
121 |     ])
122 |     
123 | html5_tags = frozenset([
124 |     'article', 'aside', 'audio', 'canvas', 'command', 'datalist',
125 |     'details', 'embed', 'figcaption', 'figure', 'footer', 'header',
126 |     'hgroup', 'keygen', 'mark', 'math', 'meter', 'nav', 'output',
127 |     'progress', 'rp', 'rt', 'ruby', 'section', 'source', 'summary',
128 |     'svg', 'time', 'track', 'video', 'wbr'
129 |     ])
130 | 
131 | # These tags aren't standard
132 | nonstandard_tags = frozenset(['blink', 'marquee'])
133 | 
134 | tags = (top_level_tags | head_tags | general_block_tags | list_tags
135 |         | table_tags | form_tags | special_inline_tags | phrase_tags
136 |         | font_style_tags | nonstandard_tags | html5_tags)
137 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_applet.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains applet
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'safe <b>description</b>'
3 | Options: 
4 | 
5 | <div>safe<applet code="foo.class" codebase="http://example.com/"></applet> <b>description</b></div>
6 | ----------
7 | <div>safe <b>description</b></div>


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_blink.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains embed
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'safe description'
3 | Options: 
4 | Notes: <div> wrapper
5 | 
6 | <div><blink>safe</blink> description</div>
7 | ----------
8 | <div>safe description</div>


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_crazy.data:
--------------------------------------------------------------------------------
 1 | Description: entry content is crazy
 2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'Crazy HTML -' + u'- Can Your Regex Parse This?\n\n\n\n<!-' + u'- <script> -' + u'->\n\n<!-' + u'- \n\t<script> \n-' + u'->\n\n\n\nfunction executeMe()\n{\n\n\n\n\n/* \n<h1>Did The Javascript Execute?</h1>\n<div>\nI will execute here, too, if you mouse over me\n</div>'
 3 | Options: -page_structure
 4 | Notes: for some reason the comments in the expected field are acting weird
 5 | 
 6 | 
 7 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
 8 | 
 9 | <html xmlns="http://www.w3.org/1999/xhtml">
10 | <head>
11 | <title>Crazy HTML -- Can Your Regex Parse This?</title>
12 | 
13 | </head>
14 | <body    notRealAttribute="value"onload="executeMe();"foo="bar"
15 | 
16 | >
17 | <!-- <script> -->
18 | 
19 | <!-- 
20 | 	<script> 
21 | -->
22 | 
23 | </script>
24 | 
25 | 
26 | <script
27 | 
28 | 
29 | >
30 | 
31 | function executeMe()
32 | {
33 | 
34 | 
35 | 
36 | 
37 | /* <script> 
38 | function am_i_javascript()
39 | {
40 | 	var str = "Some innocuously commented out stuff";
41 | }
42 | < /script>
43 | */
44 | 
45 | 	
46 | 	
47 | 	
48 | 	
49 | 	
50 | 	
51 | 	
52 | 	
53 | 	alert("Executed");
54 | }
55 | 
56 |                                    </script
57 | 
58 | 
59 | 
60 | >
61 | <h1>Did The Javascript Execute?</h1>
62 | <div notRealAttribute="value
63 | "onmouseover="
64 | executeMe();
65 | "foo="bar">
66 | I will execute here, too, if you mouse over me
67 | </div>
68 | 
69 | </body>
70 | 
71 | </html>
72 | 
73 | ----------
74 | <html>
75 |  <head>
76 |   <title>Crazy HTML -- Can Your Regex Parse This?</title>
77 |  </head>
78 |  <body>
79 | <h1>Did The Javascript Execute?</h1>
80 | <div>
81 | I will execute here, too, if you mouse over me
82 | </div>
83 |  </body>
84 | </html>


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_embed.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains embed
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'safe <b>description</b>'
3 | Options: 
4 | Notes: <div> wrapper, close <embed> tag (not closing it lost the <b> tag)
5 | 
6 | <div>safe<embed src="http://example.com/"></embed> <b>description</b></div>
7 | ----------
8 | <div>safe <b>description</b></div>
9 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_frame.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains frameset
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'safe <b>description</b>'
3 | Options: 
4 | 
5 | <div>safe<frameset rows="*"><frame src="http://example.com/"></frameset> <b>description</b></div>
6 | ----------
7 | <div>safe <b>description</b></div>
8 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_iframe.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains iframe
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'safe <b>description</b>'
3 | Options: 
4 | Notes: div wrapper, close <iframe>
5 | 
6 | <div>safe<iframe src="http://example.com/"></iframe> <b>description</b></iframe></div>
7 | ----------
8 | <div>safe <b>description</b></div>


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_link.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains link
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'safe <b>description</b>'
3 | Options:
4 | 
5 | <div>safe<link rel="stylesheet" type="text/css" href="http://example.com/evil.css"> <b>description</b></div>
6 | ----------
7 | <div>safe <b>description</b></div>
8 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_meta.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains meta
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'safe <b>description</b>'
3 | Options: 
4 | 
5 | <div>safe<meta http-equiv="Refresh" content="0; URL=http://example.com/"> <b>description</b></div>
6 | ----------
7 | <div>safe <b>description</b></div>


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_object.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains object
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'safe <b>description</b>'
3 | Options: 
4 | Notes: div wrapper, close <object>
5 | 
6 | <div>safe<object classid="clsid:C932BA85-4374-101B-A56C-00AA003668DC"></object> <b>description</b></div>
7 | ----------
8 | <div>safe <b>description</b></div>


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onabort.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onabort
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onabort="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onblur.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onblur
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onblur="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onchange.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onchange
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onchange="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onclick.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onclick
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onclick="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_ondblclick.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains ondblclick
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: javascript
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" ondblclick="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onerror.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onerror
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onerror="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onfocus.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onfocus
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onfocus="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onkeydown.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onkeydown
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onkeydown="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onkeypress.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onkeypress
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onkeypress="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onkeyup.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onkeyup
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onkeyup="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onload.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onload
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onload="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onmousedown.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onmousedown
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onmousedown="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onmouseout.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onmouseout
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onmouseout="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onmouseover.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onmouseover
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onmouseover="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onmouseup.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onmouseup
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onmouseup="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onreset.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onreset
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onreset="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onresize.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onresize
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onresize="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onsubmit.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onsubmit
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onsubmit="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_onunload.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains onunload
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<img src="http://www.ragingplatypus.com/i/cam-full.jpg" />'
3 | Options: 
4 | 
5 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" onunload="location.href='http://www.ragingplatypus.com/';" />
6 | ----------
7 | <img src="http://www.ragingplatypus.com/i/cam-full.jpg" />


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_script.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains script
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'safe description'
3 | Options: 
4 | 
5 | <div>safe<script type="text/javascript">location.href='http:/'+'/example.com/';</script> description</div>
6 | ----------
7 | <div>safe description</div>


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_script_cdata.data:
--------------------------------------------------------------------------------
 1 | Description: entry content contains script (cdata)
 2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'safe description'
 3 | Options: 
 4 | Notes: div wrapper.  Currently not working because of how HTML() is parsing the CDATA (not in a useful way)
 5 |        The resulting code is safe, it just includes crap from the <script> tag (but not the script tag
 6 |        itself).
 7 | Ignore: true
 8 | 
 9 | <div>
10 |   <![CDATA[safe<script type="text/javascript">location.href='http:/'+'/example.com/';</script> description]]>
11 | </div>
12 | ----------
13 | <div>safe description</div>


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_script_inline.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains script (inline)
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<div>safe description</div>'
3 | Options: 
4 | 
5 | <div xmlns="http://www.w3.org/1999/xhtml">safe<script type="text/javascript">location.href='http:/'+'/example.com/';</script> description</div>
6 | ----------
7 | <div>safe description</div>


--------------------------------------------------------------------------------
/src/lxml/html/tests/feedparser-data/entry_content_style.data:
--------------------------------------------------------------------------------
1 | Description: entry content contains style
2 | Expect: not bozo and entries[0]['content'][0]['value'] == u'<a href="http://www.ragingplatypus.com/">never trust your upstream platypus</a>'
3 | Options: style
4 | 
5 | <a href="http://www.ragingplatypus.com/" style="display:block; position:absolute; left:0; top:0; width:100%; height:100%; z-index:1; background-color:black; background-image:url(http://www.ragingplatypus.com/i/cam-full.jpg); background-x:center; background-y:center; background-repeat:repeat;">never trust your upstream platypus</a>
6 | ----------
7 | <a href="http://www.ragingplatypus.com/">never trust your upstream platypus</a>


--------------------------------------------------------------------------------
/src/lxml/html/tests/hackers-org-data/background-image-plus.data:
--------------------------------------------------------------------------------
1 | Description: I built a quick XSS fuzzer to detect any erroneous characters that are allowed after the open parenthesis but before the JavaScript directive in IE and Netscape 8.1 in secure site mode. These are in decimal but you can include hex and add padding of course. (Any of the following chars can be used: 1-32, 34, 39, 160, 8192-8.13, 12288, 65279)
2 |     http://ha.ckers.org/xss.html#XSS_DIV_background-image_plus
3 | Options: -safe_attrs_only
4 | Notes: As you see, the CSS gets corrupted, but I don't really care that much.
5 | 
6 | <DIV STYLE="background-image: url(javascript:alert('XSS'))">text</div>
7 | ----------
8 | <div style="background-image: url(">text</div>
9 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/hackers-org-data/background-image-with-unicoded.data:
--------------------------------------------------------------------------------
 1 | Description: exploit (this has been modified slightly to obfuscate the url parameter). The original vulnerability was found by Renaud Lifchitz as a vulnerability in Hotmail.
 2 |     http://ha.ckers.org/xss.html#XSS_DIV_background_image_unicode
 3 | Options: -safe_attrs_only
 4 | Ignore: true
 5 | Notes: I don't understand how this exploit works.  It seems like the description actually refers to
 6 |        the unicode you'd import, but why that matters I don't know.
 7 | 
 8 | <DIV STYLE="background-image:\0075\0072\006C\0028'\006a\0061\0076\0061\0073\0063\0072\0069\0070\0074\003a\0061\006c\0065\0072\0074\0028.1027\0058.1053\0053\0027\0029'\0029">text</div>
 9 | ----------
10 | <div style="background-image: ">text</div>
11 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/hackers-org-data/downlevel-hidden.data:
--------------------------------------------------------------------------------
 1 | Description: Downlevel-Hidden-Hidden block (only works in IE5.0 and later and Netscape 8.1 in IE rendering engine mode). Some websites consider anything inside a comment block to be safe and therefore does not need to be removed, which allows our Cross Site Scripting vector. Or the system could add comment tags around something to attempt to render it harmless. As we can see, that probably wouldn't do the job
 2 |     http://ha.ckers.org/xss.html#XSS_Downlevel-Hidden
 3 | Options: -comments, -processing_instructions
 4 | 
 5 | <div><!--[if gte IE 4]>
 6 | <SCRIPT>alert('XSS');</SCRIPT>
 7 | <![endif]--></div>
 8 | ----------
 9 | <div></div>
10 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/hackers-org-data/html-plus-time.data:
--------------------------------------------------------------------------------
 1 | Description: HTML+TIME in XML. This is how Grey Magic hacked Hotmail and Yahoo!. This only works in Internet Explorer and Netscape 8.1 in IE rendering engine mode and remember that you need to be between HTML and BODY tags for this to work
 2 |     http://ha.ckers.org/xss.html#XSS_HTML_plus_time
 3 | Ignore: true
 4 | Notes: I don't understand the vector here, or how this is supposed to work.
 5 | 
 6 | <div>
 7 | <t:set attributeName="innerHTML" to="XSS&lt;SCRIPT DEFER&gt;alert(&quot;XSS&quot;)&lt;/SCRIPT&gt;">
 8 | </BODY></HTML></div>
 9 | ----------
10 | <div>
11 | <t:set attributeName="innerHTML" to="XSS&lt;SCRIPT DEFER&gt;alert(&quot;XSS&quot;)&lt;/SCRIPT&gt;">
12 | </BODY></HTML>x</div>
13 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/hackers-org-data/javascript-link.data:
--------------------------------------------------------------------------------
 1 | Description: javascript: in many forms
 2 | 
 3 | <div>
 4 |  <a href="java
 5 | script:alert()">x</a>
 6 |  <a href="j a v a s c r i p t:alert()">x</a>
 7 |  <a href="jscript
 8 | :alert()">x</a>
 9 | </div>
10 | ----------
11 | <div>
12 |  <a href="">x</a>
13 |  <a href="">x</a>
14 |  <a href="">x</a>
15 | </div>
16 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/hackers-org-data/style-comment.data:
--------------------------------------------------------------------------------
1 | Description: to break up expression (Thanks to Roman Ivanov for this one)
2 |     http://ha.ckers.org/xss.html#XSS_STYLE_comment
3 | Options: -safe_attrs_only
4 | Notes: Because of the suspicious stuff in there, the style is removed entirely
5 | 
6 | <IMG STYLE="xss:expr/*XSS*/ession(alert('XSS'))">
7 | ----------
8 | <img>
9 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/hackers-org-data/style-expression.data:
--------------------------------------------------------------------------------
 1 | Description: (this is really a hybrid of the above XSS vectors, but it really does show how hard STYLE tags can be to parse apart, like above this can send IE into a loop)
 2 |     http://ha.ckers.org/xss.html#XSS_IMG_STYLE_expression
 3 | Options: -safe_attrs_only
 4 | Notes: Modified to avoid a parsing in libxml2 that ruins the XSS (the " marks).  
 5 |        Also there seemed to be an extra "p" in exppression
 6 | 
 7 | <div><img style="xss: ex/*<A STYLE='no\xss:noxss(*//*);
 8 | xss:&#101;x&#x2F;*XSS*//*/*/pression(alert('XSS'))"></div>
 9 | ----------
10 | <div><img></div>
11 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/hackers-org-data/style-import.data:
--------------------------------------------------------------------------------
1 | Description: tags with broken up JavaScript for XSS (this XSS at times sends IE into an infinite loop of alerts)
2 |     http://ha.ckers.org/xss.html#XSS_STYLE
3 | Options: -safe_attrs_only
4 | 
5 | <div><STYLE>@im\port'\ja\vasc\ript:alert("XSS")';</STYLE></div>
6 | ----------
7 | <div><style>/* deleted */</style></div>
8 | 
9 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/hackers-org-data/style-js-tag.data:
--------------------------------------------------------------------------------
1 | Description: (Older versions of Netscape only)
2 |     http://ha.ckers.org/xss.html#XSS_STYLE_tag
3 | Options: -safe_attrs_only
4 | 
5 | <div><STYLE TYPE="text/javascript">alert('XSS');</STYLE></div>
6 | ----------
7 | <div></div>
8 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/hackers-org-data/style-url-js.data:
--------------------------------------------------------------------------------
1 | Description: http://ha.ckers.org/xss.html#XSS_STYLE_background-image
2 | Options: -style, -safe_attrs_only
3 | Notes: The CSS is messed up here, but so it goes
4 | 
5 | <div><STYLE>.XSS{background-image:url("javascript:alert('XSS')");}</STYLE><A CLASS=XSS></A></div>
6 | ----------
7 | <div><style>.XSS{background-image:url("");}</style><a class="XSS"></a></div>
8 | 
9 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/hackers-org-data/xml-data-island.data:
--------------------------------------------------------------------------------
 1 | Description: XML data island with comment obfuscation (this is another take on the same exploit that doesn't use CDATA fields, but rather uses comments to break up the javascript directive)
 2 |     http://ha.ckers.org/xss.html#XSS_XML_data_island_comment
 3 | Ignore: true
 4 | Notes: I don't understand the vector here.  Maybe datasrc should be filtered?
 5 | 
 6 | <div><XML ID="xss"><I><B>&lt;IMG SRC="javas<!-- -->cript:alert('XSS')"&gt;</B></I></XML>
 7 | <SPAN DATASRC="#xss" DATAFLD="B" DATAFORMATAS="HTML"></SPAN></div>
 8 | ----------
 9 | <div><XML ID="xss"><I><B>&lt;IMG SRC="javas<!-- -->cript:alert('XSS')"&gt;</B></I></XML>
10 | <SPAN DATASRC="#xss" DATAFLD="B" DATAFORMATAS="HTML"></SPAN>x</div>
11 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/hackers-org-data/xml-embedded-js.data:
--------------------------------------------------------------------------------
 1 | Description: Locally hosted XML with embedded JavaScript#XSS_Local_XML that is generated using an XML data island. This is the same as above but instead referrs to a locally hosted (must be on the same server) XML file that contains your cross site scripting vector. You can see the result here <http://ha.ckers.org/xssxmltest.html>
 2 |     http://ha.ckers.org/xss.html#XSS_Local_XML
 3 | 
 4 | <div><XML SRC="xsstest.xml" ID=I></XML>
 5 | <SPAN DATASRC=#I DATAFLD=C DATAFORMATAS=HTML></SPAN></div>
 6 | ----------
 7 | <div>
 8 |  <span></span>
 9 | </div>
10 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/hackers-org-data/xml-namespace.data.BROKEN:
--------------------------------------------------------------------------------
 1 | Description: XML namespace. The htc file must be located on the same server as your XSS vector
 2 |     http://ha.ckers.org/xss.html#XSS_XML_namespace
 3 | Note: I don't completely understand the vector here.  page_structure is what does this.
 4 | 
 5 | <HTML xmlns:xss>
 6 |   <body>
 7 |     <?import namespace="xss" implementation="http://ha.ckers.org/xss.htc">
 8 |     <xss:xss>XSS</xss:xss>
 9 |   </body>
10 | </HTML>
11 | ----------
12 | <HTML>
13 |   <body>
14 |     <div>XSS</div>
15 |   </body>
16 | </HTML>
17 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_autolink.py:
--------------------------------------------------------------------------------
 1 | import unittest, sys
 2 | from lxml.tests.common_imports import make_doctest
 3 | 
 4 | def test_suite():
 5 |     suite = unittest.TestSuite()
 6 |     if sys.version_info >= (2,4):
 7 |         suite.addTests([make_doctest('test_autolink.txt')])
 8 |     return suite
 9 | 
10 | if __name__ == '__main__':
11 |     unittest.main()
12 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_autolink.txt:
--------------------------------------------------------------------------------
 1 | This tests autolink::
 2 | 
 3 |     >>> from lxml.html import usedoctest
 4 |     >>> from lxml.html.clean import autolink_html
 5 |     >>> print(autolink_html('''
 6 |     ... <div>Link here: http://test.com/foo.html.</div>
 7 |     ... '''))
 8 |     <div>Link here: <a href="http://test.com/foo.html">http://test.com/foo.html</a>.</div>
 9 |     >>> print(autolink_html('''
10 |     ... <div>Mail me at mailto:ianb@test.com or http://myhome.com</div>
11 |     ... '''))
12 |     <div>Mail me at <a href="mailto:ianb@test.com">ianb@test.com</a>
13 |     or <a href="http://myhome.com">http://myhome.com</a></div>
14 |     >>> print(autolink_html('''
15 |     ... <div>The <b>great</b> thing is the http://link.com links <i>and</i>
16 |     ... the http://foobar.com links.</div>'''))
17 |     <div>The <b>great</b> thing is the <a href="http://link.com">http://link.com</a> links <i>and</i>
18 |     the <a href="http://foobar.com">http://foobar.com</a> links.</div>
19 |     >>> print(autolink_html('''
20 |     ... <div>Link: &lt;http://foobar.com&gt;</div>'''))
21 |     <div>Link: &lt;<a href="http://foobar.com">http://foobar.com</a>&gt;</div>
22 |     >>> print(autolink_html('''
23 |     ... <div>Link: (http://foobar.com)</div>'''))
24 |     <div>Link: (<a href="http://foobar.com">http://foobar.com</a>)</div>
25 | 
26 | Parenthesis are tricky, we'll do our best::
27 | 
28 |     >>> print(autolink_html('''
29 |     ... <div>(Link: http://en.wikipedia.org/wiki/PC_Tools_(Central_Point_Software))</div>
30 |     ... '''))
31 |     <div>(Link: <a href="http://en.wikipedia.org/wiki/PC_Tools_(Central_Point_Software)">http://en.wikipedia.org/wiki/PC_Tools_(Central_Point_Software)</a>)</div>
32 |     >>> print(autolink_html('''
33 |     ... <div>... a link: http://foo.com)</div>
34 |     ... '''))
35 |     <div>... a link: <a href="http://foo.com">http://foo.com</a>)</div>
36 | 
37 | Some cases that won't be caught (on purpose)::
38 | 
39 |     >>> print(autolink_html('''
40 |     ... <div>A link to http://localhost/foo/bar won't, but a link to
41 |     ...  http://test.com will</div>'''))
42 |     <div>A link to http://localhost/foo/bar won't, but a link to
43 |     <a href="http://test.com">http://test.com</a> will</div>
44 |     >>> print(autolink_html('''
45 |     ... <div>A link in <textarea>http://test.com</textarea></div>'''))
46 |     <div>A link in <textarea>http://test.com</textarea></div>
47 |     >>> print(autolink_html('''
48 |     ... <div>A link in <a href="http://foo.com">http://bar.com</a></div>'''))
49 |     <div>A link in <a href="http://foo.com">http://bar.com</a></div>
50 |     >>> print(autolink_html('''
51 |     ... <div>A link in <code>http://foo.com</code> or
52 |     ... <span class="nolink">http://bar.com</span></div>'''))
53 |     <div>A link in <code>http://foo.com</code> or
54 |     <span class="nolink">http://bar.com</span></div>
55 | 
56 | There's also a word wrapping function, that should probably be run
57 | after autolink::
58 | 
59 |     >>> from lxml.html.clean import word_break_html
60 |     >>> def pascii(s):
61 |     ...     print(s.encode('ascii', 'xmlcharrefreplace').decode('ascii'))
62 |     >>> pascii(word_break_html( u'''
63 |     ... <div>Hey you
64 |     ... 12345678901234567890123456789012345678901234567890</div>'''))
65 |     <div>Hey you
66 |     1234567890123456789012345678901234567890&#8203;1234567890</div>
67 | 
68 | Not everything is broken:
69 | 
70 |     >>> pascii(word_break_html('''
71 |     ... <div>Hey you
72 |     ... <code>12345678901234567890123456789012345678901234567890</code></div>'''))
73 |     <div>Hey you
74 |     <code>12345678901234567890123456789012345678901234567890</code></div>
75 |     >>> pascii(word_break_html('''
76 |     ... <a href="12345678901234567890123456789012345678901234567890">text</a>'''))
77 |     <a href="12345678901234567890123456789012345678901234567890">text</a>
78 | 
79 |     
80 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_basic.py:
--------------------------------------------------------------------------------
 1 | import unittest, sys
 2 | from lxml.tests.common_imports import make_doctest, doctest
 3 | import lxml.html
 4 | 
 5 | def test_suite():
 6 |     suite = unittest.TestSuite()
 7 |     if sys.version_info >= (2,4):
 8 |         suite.addTests([make_doctest('test_basic.txt')])
 9 |     suite.addTests([doctest.DocTestSuite(lxml.html)])
10 |     return suite
11 | 
12 | if __name__ == '__main__':
13 |     unittest.main()
14 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_clean.py:
--------------------------------------------------------------------------------
 1 | import unittest, sys
 2 | from lxml.tests.common_imports import make_doctest
 3 | from lxml.etree import LIBXML_VERSION
 4 | 
 5 | import lxml.html
 6 | from lxml.html.clean import Cleaner
 7 | 
 8 | class CleanerTest(unittest.TestCase):
 9 |     def test_allow_tags(self):
10 |         html = """
11 |             <html>
12 |             <head>
13 |             </head>
14 |             <body>
15 |             <p>some text</p>
16 |             <table>
17 |             <tr>
18 |             <td>hello</td><td>world</td>
19 |             </tr>
20 |             <tr>
21 |             <td>hello</td><td>world</td>
22 |             </tr>
23 |             </table>
24 |             <img>
25 |             </body>
26 |             </html>
27 |             """
28 | 
29 |         html_root = lxml.html.document_fromstring(html)
30 |         cleaner = Cleaner(
31 |             remove_unknown_tags = False,
32 |             allow_tags = ['table', 'tr', 'td'])
33 |         result = cleaner.clean_html(html_root)
34 | 
35 |         self.assertEqual(12-5+1, len(list(result.iter())))
36 | 
37 |     def test_safe_attrs_included(self):
38 |         html = """<p><span style="color: #00ffff;">Cyan</span></p>"""
39 | 
40 |         safe_attrs=set(lxml.html.defs.safe_attrs)
41 |         safe_attrs.add('style')
42 | 
43 |         cleaner = Cleaner(
44 |             safe_attrs_only=True,
45 |             safe_attrs=safe_attrs)
46 |         result = cleaner.clean_html(html)
47 | 
48 |         self.assertEqual(html, result)
49 | 
50 |     def test_safe_attrs_excluded(self):
51 |         html = """<p><span style="color: #00ffff;">Cyan</span></p>"""
52 |         expected = """<p><span>Cyan</span></p>"""
53 | 
54 |         safe_attrs=set()
55 | 
56 |         cleaner = Cleaner(
57 |             safe_attrs_only=True,
58 |             safe_attrs=safe_attrs)
59 |         result = cleaner.clean_html(html)
60 | 
61 |         self.assertEqual(expected, result)
62 | 
63 | def test_suite():
64 |     suite = unittest.TestSuite()
65 |     if sys.version_info >= (2,4):
66 |         suite.addTests([make_doctest('test_clean.txt')])
67 |         if LIBXML_VERSION >= (2,6,31):
68 |             suite.addTests([make_doctest('test_clean_embed.txt')])
69 |     suite.addTests(unittest.makeSuite(CleanerTest))
70 |     return suite
71 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_clean_embed.txt:
--------------------------------------------------------------------------------
 1 | THIS FAILS IN libxml2 2.6.29 AND 2.6.30 !!
 2 | 
 3 | 
 4 | >>> from lxml.html import fromstring, tostring
 5 | >>> from lxml.html.clean import clean, clean_html, Cleaner
 6 | >>> from lxml.html import usedoctest
 7 | 
 8 | >>> def tostring(el):  # work-around for Py3 'bytes' type
 9 | ...     from lxml.html import tostring
10 | ...     s = tostring(el)
11 | ...     if not isinstance(s, str):
12 | ...         s = s.decode('UTF-8')
13 | ...     return s
14 | 
15 | >>> doc_embed = '''<div>
16 | ... <embed src="http://www.youtube.com/v/183tVH1CZpA" type="application/x-shockwave-flash"></embed>
17 | ... <embed src="http://anothersite.com/v/another"></embed>
18 | ... <script src="http://www.youtube.com/example.js"></script>
19 | ... <script src="/something-else.js"></script>
20 | ... </div>'''
21 | >>> print(tostring(fromstring(doc_embed)))
22 | <div>
23 | <embed src="http://www.youtube.com/v/183tVH1CZpA" type="application/x-shockwave-flash"></embed>
24 | <embed src="http://anothersite.com/v/another"></embed>
25 | <script src="http://www.youtube.com/example.js"></script>
26 | <script src="/something-else.js"></script>
27 | </div>
28 | >>> print(Cleaner().clean_html(doc_embed))
29 | <div>
30 | </div>
31 | >>> print(Cleaner(host_whitelist=['www.youtube.com']).clean_html(doc_embed))
32 | <div>
33 | <embed src="http://www.youtube.com/v/183tVH1CZpA" type="application/x-shockwave-flash"></embed>
34 | </div>
35 | >>> print(Cleaner(host_whitelist=['www.youtube.com'], whitelist_tags=None).clean_html(doc_embed))
36 | <div>
37 | <embed src="http://www.youtube.com/v/183tVH1CZpA" type="application/x-shockwave-flash"></embed>
38 | <script src="http://www.youtube.com/example.js"></script>
39 | </div>
40 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_diff.py:
--------------------------------------------------------------------------------
 1 | import unittest, sys
 2 | from lxml.tests.common_imports import make_doctest, doctest
 3 | 
 4 | from lxml.html import diff
 5 | 
 6 | def test_suite():
 7 |     suite = unittest.TestSuite()
 8 |     if sys.version_info >= (2,4):
 9 |         suite.addTests([make_doctest('test_diff.txt'),
10 |                         doctest.DocTestSuite(diff)])
11 |     return suite
12 | 
13 | if __name__ == '__main__':
14 |     unittest.main()
15 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_elementsoup.py:
--------------------------------------------------------------------------------
 1 | import unittest, sys
 2 | from lxml.tests.common_imports import make_doctest, HelperTestCase
 3 | 
 4 | try:
 5 |     import BeautifulSoup
 6 |     BS_INSTALLED = True
 7 | except ImportError:
 8 |     BS_INSTALLED = False
 9 | 
10 | if BS_INSTALLED:
11 |     class SoupParserTestCase(HelperTestCase):
12 |         from lxml.html import soupparser
13 | 
14 |         def test_broken_attribute(self):
15 |             html = """\
16 |               <html><head></head><body>
17 |                 <form><input type='text' disabled size='10'></form>
18 |               </body></html>
19 |             """
20 |             root = self.soupparser.fromstring(html)
21 |             self.assertTrue(root.find('.//input').get('disabled') is not None)
22 | 
23 | 
24 | def test_suite():
25 |     suite = unittest.TestSuite()
26 |     if BS_INSTALLED:
27 |         suite.addTests([unittest.makeSuite(SoupParserTestCase)])
28 |         if sys.version_info[0] < 3:
29 |             suite.addTests([make_doctest('../../../../doc/elementsoup.txt')])
30 |     return suite
31 | 
32 | if __name__ == '__main__':
33 |     unittest.main()
34 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_feedparser_data.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import re
 4 | try:
 5 |     from rfc822 import Message
 6 | except ImportError:
 7 |     # Python 3
 8 |     from email import message_from_file as Message
 9 | import unittest
10 | from lxml.tests.common_imports import doctest
11 | if sys.version_info >= (2,4):
12 |     from lxml.doctestcompare import LHTMLOutputChecker
13 | 
14 | from lxml.html.clean import clean, Cleaner
15 | 
16 | feed_dirs = [
17 |     os.path.join(os.path.dirname(__file__), 'feedparser-data'),
18 |     os.path.join(os.path.dirname(__file__), 'hackers-org-data'),
19 |     ]
20 | bar_re = re.compile(r"-----+")
21 | 
22 | class DummyInput:
23 |     def __init__(self, **kw):
24 |         for name, value in kw.items():
25 |             setattr(self, name, value)
26 | 
27 | class FeedTestCase(unittest.TestCase):
28 | 
29 |     def __init__(self, filename):
30 |         self.filename = filename
31 |         unittest.TestCase.__init__(self)
32 | 
33 |     def parse(self):
34 |         f = open(self.filename, 'r')
35 |         headers = Message(f)
36 |         c = f.read()
37 |         f.close()
38 |         if not c.strip():
39 |             c = headers.get_payload()
40 |         if not headers.keys():
41 |             raise Exception(
42 |                 "File %s has no headers" % self.filename)
43 |         self.description = headers['Description']
44 |         self.expect = headers.get('Expect', '')
45 |         self.ignore = headers.get('Ignore')
46 |         self.options = [
47 |             o.strip() for o in headers.get('Options', '').split(',')
48 |             if o.strip()]
49 |         parts = bar_re.split(c)
50 |         self.input = parts[0].rstrip() + '\n'
51 |         if parts[1:]:
52 |             self.expect = parts[1].rstrip() + '\n'
53 |         else:
54 |             self.expect = None
55 | 
56 |     def runTest(self):
57 |         self.parse()
58 |         if self.ignore:
59 |             # We've marked this test to be ignored.
60 |             return
61 |         kw = {}
62 |         for name in self.options:
63 |             if name.startswith('-'):
64 |                 kw[name[1:]] = False
65 |             else:
66 |                 kw[name] = True
67 |         if kw.get('clean', True):
68 |             transformed = Cleaner(**kw).clean_html(self.input)
69 |         else:
70 |             transformed = self.input
71 |         assert self.expect is not None, (
72 |             "No expected output in %s" % self.filename)
73 |         checker = LHTMLOutputChecker()
74 |         if not checker.check_output(self.expect, transformed, 0):
75 |             result = checker.output_difference(
76 |                 DummyInput(want=self.expect), transformed, 0)
77 |             #result += '\noptions: %s %r' % (', '.join(self.options), kw)
78 |             #result += repr(transformed)
79 |             raise Exception("\n"+result)
80 | 
81 |     def shortDescription(self):
82 |         return self.filename
83 | 
84 | def test_suite():
85 |     suite = unittest.TestSuite()
86 |     if sys.version_info >= (2,4):
87 |         for dir in feed_dirs:
88 |             for fn in os.listdir(dir):
89 |                 fn = os.path.join(dir, fn)
90 |                 if fn.endswith('.data'):
91 |                     case = FeedTestCase(fn)
92 |                     suite.addTests([case])
93 |                     # This is my lazy way of stopping on first error:
94 |                     try:
95 |                         case.runTest()
96 |                     except:
97 |                         break
98 |     return suite
99 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_formfill.py:
--------------------------------------------------------------------------------
1 | import unittest, sys
2 | from lxml.tests.common_imports import make_doctest
3 | 
4 | def test_suite():
5 |     suite = unittest.TestSuite()
6 |     if sys.version_info >= (2,4):
7 |         suite.addTests([make_doctest('test_formfill.txt')])
8 |     return suite
9 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_formfill.txt:
--------------------------------------------------------------------------------
  1 | Some basic imports:
  2 | 
  3 |     >>> from lxml.html import usedoctest
  4 |     >>> from lxml.html.formfill import fill_form_html
  5 | 
  6 | The simplest kind of filling is just filling an input with a value:
  7 | 
  8 |     >>> print(fill_form_html('''
  9 |     ... <form><input type="text" name="foo"></form>''', dict(foo='bar')))
 10 |     <form><input type="text" name="foo" value="bar"></form>
 11 |     
 12 | You can also fill multiple inputs, like:
 13 | 
 14 |     >>> print(fill_form_html('''
 15 |     ... <form>
 16 |     ...   <input type="text" name="foo">
 17 |     ...   <input type="text" name="foo">
 18 |     ... </form>''', dict(foo=['bar1', 'bar2'])))
 19 |     <form>
 20 |       <input type="text" name="foo" value="bar1">
 21 |       <input type="text" name="foo" value="bar2">
 22 |     </form>
 23 | 
 24 | Checkboxes can work either as boolean true/false, or be selected based
 25 | on their inclusion in a set of values::
 26 | 
 27 |     >>> print(fill_form_html('''
 28 |     ... <form>
 29 |     ...   Would you like to be spammed?
 30 |     ...   <input type="checkbox" name="spam_me"> <br>
 31 |     ...   Spam you'd like to receive:<br>
 32 |     ...   Viagra spam:
 33 |     ...       <input type="checkbox" name="type" value="viagra"><br>
 34 |     ...   Stock spam:
 35 |     ...       <input type="checkbox" name="type" value="stock"><br>
 36 |     ...   Other spam:
 37 |     ...       <input type="checkbox" name="type" value="other"><br>
 38 |     ...   <input type="submit" value="Spam!">
 39 |     ... </form>''', dict(spam_me=True, type=['viagra', 'other'])))
 40 |     <form>
 41 |       Would you like to be spammed?
 42 |       <input type="checkbox" name="spam_me" checked> <br>
 43 |       Spam you'd like to receive:<br>
 44 |       Viagra spam:
 45 |           <input type="checkbox" name="type" value="viagra" checked><br>
 46 |       Stock spam:
 47 |           <input type="checkbox" name="type" value="stock"><br>
 48 |       Other spam:
 49 |           <input type="checkbox" name="type" value="other" checked><br>
 50 |       <input type="submit" value="Spam!">
 51 |     </form>
 52 | 
 53 | FIXME: I need to test more of this.  But I'm lazy and want to use the
 54 | coverage report for some of this.
 55 | 
 56 | 
 57 | This module also allows you to add error messages to the form.  The errors
 58 | add an "error" class to the input fields, and any labels if the field
 59 | has a label.  It also inserts an error message into the form, using a
 60 | function you can provide (or the default function).
 61 | 
 62 | Example::
 63 | 
 64 |     >>> from lxml.html.formfill import insert_errors_html
 65 |     >>> print(insert_errors_html('''
 66 |     ... <form>
 67 |     ...   <fieldset id="fieldset">
 68 |     ...     <input name="v1"><br>
 69 |     ...     <label for="v2">label</label>
 70 |     ...     <input name="v2" id="v2"><br>
 71 |     ...   </fieldset>
 72 |     ...   <input name="v3" class="foo">
 73 |     ...   <input name="v3" class="foo">
 74 |     ...   <input name="v4">
 75 |     ...   <input name="v4">
 76 |     ... </form>''', {
 77 |     ...   'v1': "err1",
 78 |     ...   'v2': "err2",
 79 |     ...   'v3': [None, "err3-2"],
 80 |     ...   'v4': "err4",
 81 |     ...   None: 'general error',
 82 |     ...   '#fieldset': 'area error',
 83 |     ... }))
 84 |     <form>
 85 |       <div class="error-message error-block">general error</div>
 86 |       <fieldset id="fieldset" class="error">
 87 |         <div class="error-message error-block">area error</div>
 88 |         <div class="error-message">err1</div>
 89 |         <input name="v1" class="error"><br>
 90 |         <label for="v2" class="error">label</label>
 91 |         <div class="error-message">err2</div>
 92 |         <input name="v2" id="v2" class="error"><br>
 93 |       </fieldset>
 94 |       <input name="v3" class="foo">
 95 |       <div class="error-message">err3-2</div>
 96 |       <input name="v3" class="foo error">
 97 |       <div class="error-message">err4</div>
 98 |       <input name="v4" class="error">
 99 |       <input name="v4">
100 |     </form>
101 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_forms.py:
--------------------------------------------------------------------------------
 1 | import unittest, sys
 2 | from lxml.tests.common_imports import make_doctest
 3 | 
 4 | def test_suite():
 5 |     suite = unittest.TestSuite()
 6 |     if sys.version_info >= (2,4):
 7 |         suite.addTests([make_doctest('test_forms.txt')])
 8 |     return suite
 9 | 
10 | if __name__ == '__main__':
11 |     unittest.main()
12 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_frames.py:
--------------------------------------------------------------------------------
 1 | import unittest, sys
 2 | from lxml.tests.common_imports import make_doctest, doctest
 3 | import lxml.html
 4 | from lxml.html import html_parser, XHTML_NAMESPACE
 5 | 
 6 | class FrameTest(unittest.TestCase):
 7 | 
 8 |     def test_parse_fragments_fromstring(self):
 9 |         parser = lxml.html.HTMLParser(encoding='utf-8', remove_comments=True)
10 |         html = """<frameset>
11 |             <frame src="main.php" name="srcpg" id="srcpg" frameborder="0" rolling="Auto" marginwidth="" marginheight="0">
12 |         </frameset>"""
13 |         etree_document = lxml.html.fragments_fromstring(html, parser=parser)
14 |         self.assertEqual(len(etree_document), 1)
15 |         root = etree_document[0]
16 |         self.assertEqual(root.tag, "frameset")
17 |         frame_element = root[0]
18 |         self.assertEqual(frame_element.tag, 'frame')
19 | 
20 |     def test_parse_fromstring(self):
21 |         parser = lxml.html.HTMLParser(encoding='utf-8', remove_comments=True)
22 |         html = """<html><frameset>
23 |             <frame src="main.php" name="srcpg" id="srcpg" frameborder="0" rolling="Auto" marginwidth="" marginheight="0">
24 |         </frameset></html>"""
25 |         etree_document = lxml.html.fromstring(html, parser=parser)
26 |         self.assertEqual(etree_document.tag, 'html')
27 |         self.assertEqual(len(etree_document), 1)
28 |         frameset_element = etree_document[0]
29 |         self.assertEqual(len(frameset_element), 1)
30 |         frame_element = frameset_element[0]
31 |         self.assertEqual(frame_element.tag, 'frame')
32 | 
33 | 
34 | def test_suite():
35 |     loader = unittest.TestLoader()
36 |     return loader.loadTestsFromModule(sys.modules[__name__])


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_rewritelinks.py:
--------------------------------------------------------------------------------
 1 | import unittest, sys
 2 | from lxml.tests.common_imports import make_doctest
 3 | 
 4 | def test_suite():
 5 |     suite = unittest.TestSuite()
 6 |     if sys.version_info >= (2,4):
 7 |         suite.addTests([make_doctest('test_rewritelinks.txt')])
 8 |     return suite
 9 | 
10 | if __name__ == '__main__':
11 |     unittest.main()
12 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_xhtml.py:
--------------------------------------------------------------------------------
 1 | import unittest, sys
 2 | from lxml.tests.common_imports import make_doctest
 3 | import lxml.html
 4 | 
 5 | def test_suite():
 6 |     suite = unittest.TestSuite()
 7 |     suite.addTests([make_doctest('test_xhtml.txt')])
 8 |     return suite
 9 | 
10 | if __name__ == '__main__':
11 |     unittest.main()
12 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/test_xhtml.txt:
--------------------------------------------------------------------------------
 1 |     >>> from lxml.html import document_fromstring, fragment_fromstring, tostring
 2 | 
 3 | lxml.html has two parsers, one for HTML, one for XHTML:
 4 | 
 5 |     >>> from lxml.html import HTMLParser, XHTMLParser
 6 |     >>> html = "<html><body><p>Hi!</p></body></html>"
 7 | 
 8 |     >>> root = document_fromstring(html, parser=HTMLParser())
 9 |     >>> print(root.tag)
10 |     html
11 | 
12 |     >>> root = document_fromstring(html, parser=XHTMLParser())
13 |     >>> print(root.tag)
14 |     html
15 | 
16 | There are two functions for converting between HTML and XHTML:
17 | 
18 |     >>> from lxml.html import xhtml_to_html, html_to_xhtml
19 | 
20 |     >>> doc = document_fromstring(html, parser=HTMLParser())
21 |     >>> tostring(doc)
22 |     b'<html><body><p>Hi!</p></body></html>'
23 | 
24 |     >>> html_to_xhtml(doc)
25 |     >>> tostring(doc)
26 |     b'<html:html xmlns:html="http://www.w3.org/1999/xhtml"><html:body><html:p>Hi!</html:p></html:body></html:html>'
27 | 
28 |     >>> xhtml_to_html(doc)
29 |     >>> tostring(doc)
30 |     b'<html xmlns:html="http://www.w3.org/1999/xhtml"><body><p>Hi!</p></body></html>'
31 | 


--------------------------------------------------------------------------------
/src/lxml/html/tests/transform_feedparser_data.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This takes the feedparser tests from here:
  3 | 
  4 |   http://feedparser.org/tests/wellformed/sanitize/
  5 | 
  6 | and rewrites them to be easier to handle (not using the internal model
  7 | of feedparser).  The input format is::
  8 | 
  9 |   <!--
 10 |   Description: {description}
 11 |   Expect: {expression}
 12 |   -->
 13 |   ...
 14 |   <content ...>{content}</content>
 15 |   ...
 16 | 
 17 | The Expect expression is checked for
 18 | ``entries[0]['content'][0]['value'] == {data}``.
 19 | 
 20 | The output format is::
 21 | 
 22 |   Description: {description}
 23 |   Expect: {expression} (if data couldn't be parsed)
 24 |   Options: 
 25 | 
 26 |   {content, unescaped}
 27 |   ----------
 28 |   {data, unescaped, if found}
 29 | 
 30 | """
 31 | 
 32 | import re
 33 | import os
 34 | import traceback
 35 | 
 36 | _desc_re = re.compile(r'\s*Description:\s*(.*)')
 37 | _expect_re = re.compile(r'\s*Expect:\s*(.*)')
 38 | _data_expect_re = re.compile(r"entries\[0\]\['[^']+'\](?:\[0\]\['value'\])?\s*==\s*(.*)")
 39 | _feed_data_expect_re = re.compile(r"feed\['[^']+'\]\s*==\s*(.*)")
 40 | 
 41 | def parse_content(content):
 42 |     match = _desc_re.search(content)
 43 |     desc = match.group(1)
 44 |     match = _expect_re.search(content)
 45 |     expect = match.group(1)
 46 |     data = None
 47 |     for regex in [_data_expect_re, _feed_data_expect_re]:
 48 |         match = regex.search(expect)
 49 |         if match:
 50 |             # Icky, but I'll trust it
 51 |             data = eval(match.group(1).strip())
 52 |             break
 53 |     c = None
 54 |     for tag in ['content', 'summary', 'title', 'copyright', 'tagline', 'info', 'subtitle', 'fullitem', 'body', 'description', 'content:encoded']:
 55 |         regex = re.compile(r"<%s.*?>(.*)</%s>" % (tag, tag), re.S)
 56 |         match = regex.search(content)
 57 |         if match:
 58 |             c = match.group(1)
 59 |             break
 60 |     assert c is not None
 61 |     # Seems like body isn't quoted
 62 |     if tag != 'body':
 63 |         c = c.replace('&lt;', '<')
 64 |         c = c.replace('&amp;', '&')
 65 |     # FIXME: I should really do more unescaping...
 66 |     return {
 67 |         'Description': desc,
 68 |         'Expect': expect,
 69 |         'data': data,
 70 |         'content': c}
 71 | 
 72 | def serialize_content(d):
 73 |     s = '''\
 74 | Description: %(Description)s
 75 | Expect: %(Expect)s
 76 | Options: 
 77 | 
 78 | %(content)s
 79 | ''' % d
 80 |     if d.get('data') is not None:
 81 |         s += '----------\n%s' % d['data']
 82 |     return s
 83 | 
 84 | def translate_file(filename):
 85 |     f = open(filename, 'rb')
 86 |     c = f.read()
 87 |     f.close()
 88 |     try:
 89 |         output = serialize_content(parse_content(c))
 90 |     except:
 91 |         print('Bad data in %s:' % filename)
 92 |         print(c)
 93 |         traceback.print_exc()
 94 |         print('-'*60)
 95 |         return
 96 |     new = os.path.splitext(filename)[0] + '.data'
 97 |     f = open(new, 'wb')
 98 |     f.write(output)
 99 |     f.close()
100 | 
101 | def translate_all(dir):
102 |     for fn in os.listdir(dir):
103 |         fn = os.path.join(dir, fn)
104 |         if fn.endswith('.xml'):
105 |             translate_file(fn)
106 |         
107 | if __name__ == '__main__':
108 |     import sys
109 |     translate_all(os.path.join(os.path.dirname(__file__), 'feedparser-data'))
110 | 
111 | 


--------------------------------------------------------------------------------
/src/lxml/html/usedoctest.py:
--------------------------------------------------------------------------------
 1 | """Doctest module for HTML comparison.
 2 | 
 3 | Usage::
 4 | 
 5 |    >>> import lxml.html.usedoctest
 6 |    >>> # now do your HTML doctests ...
 7 | 
 8 | See `lxml.doctestcompare`.
 9 | """
10 | 
11 | from lxml import doctestcompare
12 | 
13 | doctestcompare.temp_install(html=True, del_module=__name__)
14 | 


--------------------------------------------------------------------------------
/src/lxml/includes/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aglyzov/lxml/98efdc08f9886af20b48b225873f96a636f06056/src/lxml/includes/__init__.py


--------------------------------------------------------------------------------
/src/lxml/includes/c14n.pxd:
--------------------------------------------------------------------------------
 1 | from lxml.includes.tree cimport xmlDoc, xmlOutputBuffer, xmlChar
 2 | from lxml.includes.xpath cimport xmlNodeSet
 3 | 
 4 | cdef extern from "libxml/c14n.h":
 5 |     cdef int xmlC14NDocDumpMemory(xmlDoc* doc,
 6 |                                   xmlNodeSet* nodes,
 7 |                                   int exclusive,
 8 |                                   xmlChar** inclusive_ns_prefixes,
 9 |                                   int with_comments,
10 |                                   xmlChar** doc_txt_ptr) nogil
11 | 
12 |     cdef int xmlC14NDocSave(xmlDoc* doc,
13 |                             xmlNodeSet* nodes,
14 |                             int exclusive,
15 |                             xmlChar** inclusive_ns_prefixes,
16 |                             int with_comments,
17 |                             char* filename,
18 |                             int compression) nogil
19 | 
20 |     cdef int xmlC14NDocSaveTo(xmlDoc* doc,
21 |                               xmlNodeSet* nodes,
22 |                               int exclusive,
23 |                               xmlChar** inclusive_ns_prefixes,
24 |                               int with_comments,
25 |                               xmlOutputBuffer* buffer) nogil
26 | 
27 | 


--------------------------------------------------------------------------------
/src/lxml/includes/config.pxd:
--------------------------------------------------------------------------------
1 | cdef extern from "etree_defs.h":
2 |     cdef bint ENABLE_THREADING
3 |     cdef bint ENABLE_SCHEMATRON
4 | 


--------------------------------------------------------------------------------
/src/lxml/includes/dtdvalid.pxd:
--------------------------------------------------------------------------------
 1 | from lxml.includes cimport tree
 2 | from lxml.includes.tree cimport xmlDoc, xmlDtd
 3 | 
 4 | cdef extern from "libxml/valid.h":
 5 |     ctypedef struct xmlValidCtxt
 6 | 
 7 |     cdef xmlValidCtxt* xmlNewValidCtxt() nogil
 8 |     cdef void xmlFreeValidCtxt(xmlValidCtxt* cur) nogil
 9 | 
10 |     cdef int xmlValidateDtd(xmlValidCtxt* ctxt, xmlDoc* doc, xmlDtd* dtd) nogil
11 | 


--------------------------------------------------------------------------------
/src/lxml/includes/htmlparser.pxd:
--------------------------------------------------------------------------------
 1 | from lxml.includes.tree cimport xmlDoc, xmlDict
 2 | from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback
 3 | from lxml.includes.xmlparser cimport xmlParserCtxt, xmlSAXHandler, xmlSAXHandlerV1
 4 | from lxml.includes.xmlerror cimport xmlError
 5 | 
 6 | cdef extern from "libxml/HTMLparser.h":
 7 |     ctypedef enum htmlParserOption:
 8 |         HTML_PARSE_NOERROR    # suppress error reports
 9 |         HTML_PARSE_NOWARNING  # suppress warning reports
10 |         HTML_PARSE_PEDANTIC   # pedantic error reporting
11 |         HTML_PARSE_NOBLANKS   # remove blank nodes
12 |         HTML_PARSE_NONET      # Forbid network access
13 |         # libxml2 2.6.21+ only:
14 |         HTML_PARSE_RECOVER    # Relaxed parsing
15 |         HTML_PARSE_COMPACT    # compact small text nodes
16 | 
17 |     xmlSAXHandlerV1 htmlDefaultSAXHandler
18 | 
19 |     cdef xmlParserCtxt* htmlCreateMemoryParserCtxt(
20 |         char* buffer, int size) nogil
21 |     cdef xmlParserCtxt* htmlCreateFileParserCtxt(
22 |         char* filename, char* encoding) nogil
23 |     cdef xmlParserCtxt* htmlCreatePushParserCtxt(xmlSAXHandler* sax,
24 |                                                  void* user_data,
25 |                                                  char* chunk, int size,
26 |                                                  char* filename, int enc) nogil
27 |     cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt) nogil
28 |     cdef void htmlCtxtReset(xmlParserCtxt* ctxt) nogil
29 |     cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) nogil
30 |     cdef int htmlParseDocument(xmlParserCtxt* ctxt) nogil
31 |     cdef int htmlParseChunk(xmlParserCtxt* ctxt, 
32 |                             char* chunk, int size, int terminate) nogil
33 | 
34 |     cdef xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt,
35 |                                   char* filename, char* encoding,
36 |                                   int options) nogil
37 |     cdef xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt,
38 |                                  char* buffer, char* URL, char* encoding,
39 |                                  int options) nogil
40 |     cdef xmlDoc* htmlCtxtReadIO(xmlParserCtxt* ctxt, 
41 |                                 xmlInputReadCallback ioread, 
42 |                                 xmlInputCloseCallback ioclose, 
43 |                                 void* ioctx,
44 |                                 char* URL, char* encoding,
45 |                                 int options) nogil
46 |     cdef xmlDoc* htmlCtxtReadMemory(xmlParserCtxt* ctxt,
47 |                                     char* buffer, int size,
48 |                                     char* filename, char* encoding,
49 |                                     int options) nogil
50 | 


--------------------------------------------------------------------------------
/src/lxml/includes/relaxng.pxd:
--------------------------------------------------------------------------------
 1 | from lxml.includes cimport tree
 2 | from lxml.includes.tree cimport xmlDoc
 3 | from lxml.includes.xmlerror cimport xmlStructuredErrorFunc
 4 | 
 5 | cdef extern from "libxml/relaxng.h":
 6 |     ctypedef struct xmlRelaxNG
 7 |     ctypedef struct xmlRelaxNGParserCtxt
 8 |     
 9 |     ctypedef struct xmlRelaxNGValidCtxt
10 |     
11 |     ctypedef enum xmlRelaxNGValidErr:
12 |         XML_RELAXNG_OK = 0
13 |         XML_RELAXNG_ERR_MEMORY = 1
14 |         XML_RELAXNG_ERR_TYPE = 2
15 |         XML_RELAXNG_ERR_TYPEVAL = 3
16 |         XML_RELAXNG_ERR_DUPID = 4
17 |         XML_RELAXNG_ERR_TYPECMP = 5
18 |         XML_RELAXNG_ERR_NOSTATE = 6
19 |         XML_RELAXNG_ERR_NODEFINE = 7
20 |         XML_RELAXNG_ERR_LISTEXTRA = 8
21 |         XML_RELAXNG_ERR_LISTEMPTY = 9
22 |         XML_RELAXNG_ERR_INTERNODATA = 10
23 |         XML_RELAXNG_ERR_INTERSEQ = 11
24 |         XML_RELAXNG_ERR_INTEREXTRA = 12
25 |         XML_RELAXNG_ERR_ELEMNAME = 13
26 |         XML_RELAXNG_ERR_ATTRNAME = 14
27 |         XML_RELAXNG_ERR_ELEMNONS = 15
28 |         XML_RELAXNG_ERR_ATTRNONS = 16
29 |         XML_RELAXNG_ERR_ELEMWRONGNS = 17
30 |         XML_RELAXNG_ERR_ATTRWRONGNS = 18
31 |         XML_RELAXNG_ERR_ELEMEXTRANS = 19
32 |         XML_RELAXNG_ERR_ATTREXTRANS = 20
33 |         XML_RELAXNG_ERR_ELEMNOTEMPTY = 21
34 |         XML_RELAXNG_ERR_NOELEM = 22
35 |         XML_RELAXNG_ERR_NOTELEM = 23
36 |         XML_RELAXNG_ERR_ATTRVALID = 24
37 |         XML_RELAXNG_ERR_CONTENTVALID = 25
38 |         XML_RELAXNG_ERR_EXTRACONTENT = 26
39 |         XML_RELAXNG_ERR_INVALIDATTR = 27
40 |         XML_RELAXNG_ERR_DATAELEM = 28
41 |         XML_RELAXNG_ERR_VALELEM = 29
42 |         XML_RELAXNG_ERR_LISTELEM = 30
43 |         XML_RELAXNG_ERR_DATATYPE = 31
44 |         XML_RELAXNG_ERR_VALUE = 32
45 |         XML_RELAXNG_ERR_LIST = 33
46 |         XML_RELAXNG_ERR_NOGRAMMAR = 34
47 |         XML_RELAXNG_ERR_EXTRADATA = 35
48 |         XML_RELAXNG_ERR_LACKDATA = 36
49 |         XML_RELAXNG_ERR_INTERNAL = 37
50 |         XML_RELAXNG_ERR_ELEMWRONG = 38
51 |         XML_RELAXNG_ERR_TEXTWRONG = 39
52 |         
53 |     cdef xmlRelaxNGValidCtxt* xmlRelaxNGNewValidCtxt(xmlRelaxNG* schema) nogil
54 |     cdef int xmlRelaxNGValidateDoc(xmlRelaxNGValidCtxt* ctxt, xmlDoc* doc) nogil
55 |     cdef xmlRelaxNG* xmlRelaxNGParse(xmlRelaxNGParserCtxt* ctxt) nogil
56 |     cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewParserCtxt(char* URL) nogil
57 |     cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewDocParserCtxt(xmlDoc* doc) nogil
58 |     cdef void xmlRelaxNGFree(xmlRelaxNG* schema) nogil
59 |     cdef void xmlRelaxNGFreeParserCtxt(xmlRelaxNGParserCtxt* ctxt) nogil
60 |     cdef void xmlRelaxNGFreeValidCtxt(xmlRelaxNGValidCtxt* ctxt) nogil
61 | 
62 |     cdef void xmlRelaxNGSetValidStructuredErrors(
63 |         xmlRelaxNGValidCtxt* ctxt, xmlStructuredErrorFunc serror, void *ctx) nogil
64 |     cdef void xmlRelaxNGSetParserStructuredErrors(
65 |         xmlRelaxNGParserCtxt* ctxt, xmlStructuredErrorFunc serror, void *ctx) nogil
66 | 


--------------------------------------------------------------------------------
/src/lxml/includes/schematron.pxd:
--------------------------------------------------------------------------------
 1 | from lxml.includes cimport tree, xmlerror
 2 | from lxml.includes.tree cimport xmlDoc, xmlDtd
 3 | 
 4 | cdef extern from "libxml/schematron.h":
 5 |     ctypedef struct xmlSchematron
 6 |     ctypedef struct xmlSchematronParserCtxt
 7 |     ctypedef struct xmlSchematronValidCtxt
 8 | 
 9 |     ctypedef enum xmlSchematronValidOptions:
10 |         XML_SCHEMATRON_OUT_QUIET     =    1 # quiet no report
11 |         XML_SCHEMATRON_OUT_TEXT      =    2 # build a textual report
12 |         XML_SCHEMATRON_OUT_XML       =    4 # output SVRL
13 |         XML_SCHEMATRON_OUT_ERROR     =    8 # output via xmlStructuredErrorFunc
14 |         XML_SCHEMATRON_OUT_FILE      =  256 # output to a file descriptor
15 |         XML_SCHEMATRON_OUT_BUFFER    =  512 # output to a buffer
16 |         XML_SCHEMATRON_OUT_IO        = 1024 # output to I/O mechanism
17 | 
18 |     cdef xmlSchematronParserCtxt* xmlSchematronNewDocParserCtxt(
19 |         xmlDoc* doc) nogil
20 |     cdef xmlSchematronParserCtxt* xmlSchematronNewParserCtxt(
21 |         char* filename) nogil
22 |     cdef xmlSchematronValidCtxt* xmlSchematronNewValidCtxt(
23 |         xmlSchematron* schema, int options) nogil
24 | 
25 |     cdef xmlSchematron* xmlSchematronParse(xmlSchematronParserCtxt* ctxt) nogil
26 |     cdef int xmlSchematronValidateDoc(xmlSchematronValidCtxt* ctxt,
27 |                                       xmlDoc* instance) nogil
28 | 
29 |     cdef void xmlSchematronFreeParserCtxt(xmlSchematronParserCtxt* ctxt) nogil
30 |     cdef void xmlSchematronFreeValidCtxt(xmlSchematronValidCtxt* ctxt) nogil
31 |     cdef void xmlSchematronFree(xmlSchematron* schema) nogil
32 |     cdef void xmlSchematronSetValidStructuredErrors(
33 |         xmlSchematronValidCtxt* ctxt,
34 |         xmlerror.xmlStructuredErrorFunc error_func, void *data)
35 | 


--------------------------------------------------------------------------------
/src/lxml/includes/uri.pxd:
--------------------------------------------------------------------------------
1 | cdef extern from "libxml/uri.h":
2 |     ctypedef struct xmlURI
3 | 
4 |     cdef xmlURI* xmlParseURI(char* str)
5 |     cdef void xmlFreeURI(xmlURI* uri)
6 | 


--------------------------------------------------------------------------------
/src/lxml/includes/xinclude.pxd:
--------------------------------------------------------------------------------
 1 | from lxml.includes.tree cimport xmlDoc, xmlNode
 2 | 
 3 | cdef extern from "libxml/xinclude.h":
 4 | 
 5 |     ctypedef struct xmlXIncludeCtxt
 6 | 
 7 |     cdef int xmlXIncludeProcess(xmlDoc* doc) nogil
 8 |     cdef int xmlXIncludeProcessFlags(xmlDoc* doc, int parser_opts) nogil
 9 |     cdef int xmlXIncludeProcessTree(xmlNode* doc) nogil
10 |     cdef int xmlXIncludeProcessTreeFlags(xmlNode* doc, int parser_opts) nogil
11 | 
12 |     cdef xmlXIncludeCtxt* xmlXIncludeNewContext(xmlDoc* doc) nogil
13 |     cdef int xmlXIncludeProcessNode(xmlXIncludeCtxt* ctxt, xmlNode* node) nogil
14 |     cdef int xmlXIncludeSetFlags(xmlXIncludeCtxt* ctxt, int flags) nogil
15 | 
16 |     # libxml2 >= 2.6.27
17 |     cdef int xmlXIncludeProcessFlagsData(
18 |         xmlDoc* doc, int flags, void* data) nogil
19 | 


--------------------------------------------------------------------------------
/src/lxml/includes/xmlschema.pxd:
--------------------------------------------------------------------------------
 1 | from lxml.includes.tree cimport xmlDoc
 2 | from lxml.includes.xmlparser cimport xmlSAXHandler
 3 | from lxml.includes.xmlerror cimport xmlStructuredErrorFunc
 4 | 
 5 | cdef extern from "libxml/xmlschemas.h":
 6 |     ctypedef struct xmlSchema
 7 |     ctypedef struct xmlSchemaParserCtxt
 8 | 
 9 |     ctypedef struct xmlSchemaSAXPlugStruct
10 |     ctypedef struct xmlSchemaValidCtxt
11 | 
12 |     ctypedef enum xmlSchemaValidOption:
13 |         XML_SCHEMA_VAL_VC_I_CREATE = 1
14 | 
15 |     cdef xmlSchemaValidCtxt* xmlSchemaNewValidCtxt(xmlSchema* schema) nogil
16 |     cdef void xmlSchemaSetParserStructuredErrors(xmlSchemaParserCtxt* ctxt,
17 |         xmlStructuredErrorFunc serror, void *ctx)
18 |     cdef void xmlSchemaSetValidStructuredErrors(xmlSchemaValidCtxt* ctxt,
19 |         xmlStructuredErrorFunc serror, void *ctx)
20 | 
21 |     cdef int xmlSchemaValidateDoc(xmlSchemaValidCtxt* ctxt, xmlDoc* doc) nogil
22 |     cdef xmlSchema* xmlSchemaParse(xmlSchemaParserCtxt* ctxt) nogil
23 |     cdef xmlSchemaParserCtxt* xmlSchemaNewParserCtxt(char* URL) nogil
24 |     cdef xmlSchemaParserCtxt* xmlSchemaNewDocParserCtxt(xmlDoc* doc) nogil
25 |     cdef void xmlSchemaFree(xmlSchema* schema) nogil
26 |     cdef void xmlSchemaFreeParserCtxt(xmlSchemaParserCtxt* ctxt) nogil
27 |     cdef void xmlSchemaFreeValidCtxt(xmlSchemaValidCtxt* ctxt) nogil
28 |     cdef int xmlSchemaSetValidOptions(xmlSchemaValidCtxt* ctxt,
29 |                                       int options) nogil
30 | 
31 |     cdef xmlSchemaSAXPlugStruct* xmlSchemaSAXPlug(xmlSchemaValidCtxt* ctxt,
32 |                                                   xmlSAXHandler** sax,
33 |                                                   void** data) nogil
34 |     cdef int xmlSchemaSAXUnplug(xmlSchemaSAXPlugStruct* sax_plug)
35 |     cdef int xmlSchemaIsValid(xmlSchemaValidCtxt* ctxt)
36 | 


--------------------------------------------------------------------------------
/src/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!--
 3 | 	Stylesheet for extracting Schematron information from a RELAX-NG schema.
 4 | 	Based on the stylesheet for extracting Schematron information from W3C XML Schema.
 5 | 	Created by Eddie Robertsson 2002/06/01
 6 |         2009/12/10      hj: changed Schematron namespace to ISO URI (Holger Joukl)
 7 | -->
 8 | <xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
 9 | xmlns:sch="http://purl.oclc.org/dsdl/schematron" xmlns:rng="http://relaxng.org/ns/structure/1.0">
10 | 	<!-- Set the output to be XML with an XML declaration and use indentation -->
11 | 	<xsl:output method="xml" omit-xml-declaration="no" indent="yes" standalone="yes"/>
12 | 	<!-- -->
13 | 	<!-- match schema and call recursive template to extract included schemas -->
14 | 	<!-- -->
15 | 	<xsl:template match="/rng:grammar | /rng:element">
16 | 		<!-- call the schema definition template ... -->
17 | 		<xsl:call-template name="gatherSchema">
18 | 			<!-- ... with current node as the $schemas parameter ... -->
19 | 			<xsl:with-param name="schemas" select="."/>
20 | 			<!-- ... and any includes in the $include parameter -->
21 | 			<xsl:with-param name="includes" select="document(/rng:grammar/rng:include/@href
22 | | //rng:externalRef/@href)"/>
23 | 		</xsl:call-template>
24 | 	</xsl:template>
25 | 	<!-- -->
26 | 	<!-- gather all included schemas into a single parameter variable -->
27 | 	<!-- -->
28 | 	<xsl:template name="gatherSchema">
29 | 		<xsl:param name="schemas"/>
30 | 		<xsl:param name="includes"/>
31 | 		<xsl:choose>
32 | 			<xsl:when test="count($schemas) &lt; count($schemas | $includes)">
33 | 				<!-- when $includes includes something new, recurse ... -->
34 | 				<xsl:call-template name="gatherSchema">
35 | 					<!-- ... with current $includes added to the $schemas parameter ... -->
36 | 					<xsl:with-param name="schemas" select="$schemas | $includes"/>
37 | 					<!-- ... and any *new* includes in the $include parameter -->
38 | 					<xsl:with-param name="includes" select="document($includes/rng:grammar/rng:include/@href
39 | | $includes//rng:externalRef/@href)"/>
40 | 				</xsl:call-template>
41 | 			</xsl:when>
42 | 			<xsl:otherwise>
43 | 				<!-- we have the complete set of included schemas, so now let's output the embedded schematron -->
44 | 				<xsl:call-template name="output">
45 | 					<xsl:with-param name="schemas" select="$schemas"/>
46 | 				</xsl:call-template>
47 | 			</xsl:otherwise>
48 | 		</xsl:choose>
49 | 	</xsl:template>
50 | 	<!-- -->
51 | 	<!-- output the schematron information -->
52 | 	<!-- -->
53 | 	<xsl:template name="output">
54 | 		<xsl:param name="schemas"/>
55 | 		<!-- -->
56 | 		<sch:schema>
57 | 			<!-- get header-type elements - eg title and especially ns -->
58 | 			<!-- title (just one) -->
59 | 			<xsl:copy-of select="$schemas//sch:title[1]"/>
60 | 			<!-- get remaining schematron schema children -->
61 | 			<!-- get non-blank namespace elements, dropping duplicates -->
62 | 			<xsl:for-each select="$schemas//sch:ns">
63 | 				<xsl:if test="generate-id(.) = generate-id($schemas//sch:ns[@prefix = current()/@prefix][1])">
64 | 					<xsl:copy-of select="."/>
65 | 				</xsl:if>
66 | 			</xsl:for-each>
67 | 			<xsl:copy-of select="$schemas//sch:phase"/>
68 | 			<xsl:copy-of select="$schemas//sch:pattern"/>
69 | 			<sch:diagnostics>
70 | 				<xsl:copy-of select="$schemas//sch:diagnostics/*"/>
71 | 			</sch:diagnostics>
72 | 		</sch:schema>
73 | 	</xsl:template>
74 | 	<!-- -->
75 | </xsl:transform>
76 | 


--------------------------------------------------------------------------------
/src/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!--
 3 |         based on an original transform by Eddie Robertsson
 4 |         2001/04/21      fn: added support for included schemas
 5 |         2001/06/27      er: changed XMl Schema prefix from xsd: to xs: and changed to the Rec namespace
 6 |         2009/12/10      hj: changed Schematron namespace to ISO URI (Holger Joukl)
 7 | -->
 8 | <xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
 9 | xmlns:sch="http://purl.oclc.org/dsdl/schematron" xmlns:xs="http://www.w3.org/2001/XMLSchema">
10 |         <!-- Set the output to be XML with an XML declaration and use indentation -->
11 |         <xsl:output method="xml" omit-xml-declaration="no" indent="yes" standalone="yes"/>
12 |         <!-- -->
13 |         <!-- match schema and call recursive template to extract included schemas -->
14 |         <!-- -->
15 |         <xsl:template match="xs:schema">
16 |                 <!-- call the schema definition template ... -->
17 |                 <xsl:call-template name="gatherSchema">
18 |                         <!-- ... with current current root as the $schemas parameter ... -->
19 |                         <xsl:with-param name="schemas" select="/"/>
20 |                         <!-- ... and any includes in the $include parameter -->
21 |                         <xsl:with-param name="includes" 
22 | 						select="document(/xs:schema/xs:*[self::xs:include or self::xs:import or self::xs:redefine]/@schemaLocation)"/>
23 |                 </xsl:call-template>
24 |         </xsl:template>
25 |         <!-- -->
26 |         <!-- gather all included schemas into a single parameter variable -->
27 |         <!-- -->
28 |         <xsl:template name="gatherSchema">
29 |                 <xsl:param name="schemas"/>
30 |                 <xsl:param name="includes"/>
31 |                 <xsl:choose>
32 |                         <xsl:when test="count($schemas) &lt; count($schemas | $includes)">
33 |                                 <!-- when $includes includes something new, recurse ... -->
34 |                                 <xsl:call-template name="gatherSchema">
35 |                                         <!-- ... with current $includes added to the $schemas parameter ... -->
36 |                                         <xsl:with-param name="schemas" select="$schemas | $includes"/>
37 |                                         <!-- ... and any *new* includes in the $include parameter -->
38 |                                         <xsl:with-param name="includes" 
39 | 										select="document($includes/xs:schema/xs:*[self::xs:include or self::xs:import or self::xs:redefine]/@schemaLocation)"/>
40 |                                 </xsl:call-template>
41 |                         </xsl:when>
42 |                         <xsl:otherwise>
43 |                                 <!-- we have the complete set of included schemas, 
44 | 								so now let's output the embedded schematron -->
45 |                                 <xsl:call-template name="output">
46 |                                         <xsl:with-param name="schemas" select="$schemas"/>
47 |                                 </xsl:call-template>
48 |                         </xsl:otherwise>
49 |                 </xsl:choose>
50 |         </xsl:template>
51 |         <!-- -->
52 |         <!-- output the schematron information -->
53 |         <!-- -->
54 |         <xsl:template name="output">
55 |                 <xsl:param name="schemas"/>
56 |                 <!-- -->
57 |                 <sch:schema>
58 |                         <!-- get header-type elements - eg title and especially ns -->
59 |                         <!-- title (just one) -->
60 |                         <xsl:copy-of select="$schemas//xs:appinfo/sch:title[1]"/>
61 |                         <!-- get remaining schematron schema children -->
62 |                         <!-- get non-blank namespace elements, dropping duplicates -->
63 |                         <xsl:for-each select="$schemas//xs:appinfo/sch:ns">
64 |                                 <xsl:if test="generate-id(.) = 
65 | 								generate-id($schemas//xs:appinfo/sch:ns[@prefix = current()/@prefix][1])">
66 |                                         <xsl:copy-of select="."/>
67 |                                 </xsl:if>
68 |                         </xsl:for-each>
69 |                         <xsl:copy-of select="$schemas//xs:appinfo/sch:phase"/>
70 |                         <xsl:copy-of select="$schemas//xs:appinfo/sch:pattern"/>
71 |                         <sch:diagnostics>
72 |                                 <xsl:copy-of select="$schemas//xs:appinfo/sch:diagnostics/*"/>
73 |                         </sch:diagnostics>
74 |                 </sch:schema>
75 |         </xsl:template>
76 |         <!-- -->
77 | </xsl:transform>
78 | 


--------------------------------------------------------------------------------
/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" ?><?xar XSLT?>
 2 | <!-- Implmentation for the Schematron XML Schema Language.
 3 | 	http://www.ascc.net/xml/resource/schematron/schematron.html
 4 |  
 5 |  Copyright (c) 2000,2001 Rick Jelliffe and Academia Sinica Computing Center, Taiwan
 6 | 
 7 |  This software is provided 'as-is', without any express or implied warranty. 
 8 |  In no event will the authors be held liable for any damages arising from 
 9 |  the use of this software.
10 | 
11 |  Permission is granted to anyone to use this software for any purpose, 
12 |  including commercial applications, and to alter it and redistribute it freely,
13 |  subject to the following restrictions:
14 | 
15 |  1. The origin of this software must not be misrepresented; you must not claim
16 |  that you wrote the original software. If you use this software in a product, 
17 |  an acknowledgment in the product documentation would be appreciated but is 
18 |  not required.
19 | 
20 |  2. Altered source versions must be plainly marked as such, and must not be 
21 |  misrepresented as being the original software.
22 | 
23 |  3. This notice may not be removed or altered from any source distribution.
24 | -->
25 | 
26 | <!-- Schematron message -->
27 | 
28 | <xsl:stylesheet
29 |    version="1.0"
30 |    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
31 |    xmlns:axsl="http://www.w3.org/1999/XSL/TransformAlias">
32 | 
33 | <xsl:import href="iso_schematron_skeleton_for_xslt1.xsl"/>
34 | 
35 | <xsl:template name="process-prolog">
36 |    <axsl:output method="text" />
37 | </xsl:template>
38 | 
39 | <!-- use default rule for process-root:  copy contens / ignore title -->
40 | <!-- use default rule for process-pattern: ignore name and see -->
41 | <!-- use default rule for process-name:  output name -->
42 | <!-- use default rule for process-assert and process-report:
43 |      call process-message -->
44 | 
45 | <xsl:template name="process-message">
46 |    <xsl:param name="pattern" />
47 |    <xsl:param name="role" />
48 |    <axsl:message>
49 |       <xsl:apply-templates mode="text"  
50 |       /> (<xsl:value-of select="$pattern" />
51 |       <xsl:if test="$role"> / <xsl:value-of select="$role" />
52 |       </xsl:if>)</axsl:message>
53 | </xsl:template>
54 | 
55 | </xsl:stylesheet>


--------------------------------------------------------------------------------
/src/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt:
--------------------------------------------------------------------------------
 1 | ISO SCHEMATRON 2009
 2 | 
 3 | XSLT implementation by Rick Jelliffe with assistance from members of Schematron-love-in maillist.
 4 | 
 5 | 2009-03-18
 6 | 
 7 | Two distributions are available. One is for XSLT1 engines. 
 8 | The other is for XSLT2 engines, such as SAXON 9.
 9 | 
10 | 
11 | This version of Schematron splits the process into a pipeline of several different XSLT stages.
12 | 
13 | 1) First, preprocess your Schematron schema with iso_dsdl_include.xsl.  
14 | This is a macro processor to assemble the schema from various parts. 
15 | If your schema is not in separate parts, you can skip this stage.
16 | 
17 | 2) Second, preprocess the output from stage 1 with iso_abstract_expand.xsl.  
18 | This is a macro processor to convert abstract patterns to real patterns. 
19 | If your schema does not use abstract patterns, you can skip this
20 | stage.
21 | 
22 | 3) Third, compile the Schematron schema into an XSLT script. 
23 | This will typically use iso_svrl_for_xslt1.xsl or iso_svrl_for_xslt2.xsl 
24 | (which in turn invoke iso_schematron_skeleton_for_xslt1.xsl or iso_schematron_skeleton_for_saxon.xsl)
25 | However, other "meta-styleseets" are also in common use; the principle of operation is the same.
26 | If your schema uses Schematron phases, supply these as command line/invocation parameters
27 | to this process.
28 | 
29 | 4) Fourth, run the script generated by stage 3 against the document being validated.
30 | If you are using the SVRL script, then the output of validation will be an XML document.
31 | If your schema uses Schematron parameters, supply these as command line/invocation parameters
32 | to this process. 
33 | 
34 | 
35 | The XSLT2 distribution also features several next generation features, 
36 | such as validating multiple documents. See the source code for details.
37 | 
38 | Schematron assertions can be written in any language, of course; the file
39 | sch-messages-en.xhtml contains the diagnostics messages from the XSLT2 skeleton
40 | in English, and this can be used as template to localize the skeleton's
41 | error messages. Note that typically programming errors in Schematron are XPath
42 | errors, which requires localized messages from the XSLT engine.
43 | 
44 | ANT
45 | ---
46 | To give an example of how to process a document, here is a sample ANT task.
47 | 
48 | <target  name="schematron-compile-test" >
49 | 
50 | 	   <!-- expand inclusions -->
51 | 	   <xslt basedir="test/schematron"
52 | 	   		style="iso_dsdl_include.xsl" in="test.sch"  out="test1.sch"> 
53 | 	   				<classpath>
54 | 	   					<pathelement location="${lib.dir}/saxon9.jar"/>
55 | 	   				</classpath>
56 | 	   </xslt>
57 | 
58 | 	   <!-- expand abstract patterns -->
59 | 	   <xslt basedir="test/schematron"
60 | 	   		style="iso_abstract_expand.xsl" in="test1.sch"  out="test2.sch"> 
61 | 	   				<classpath>
62 | 	   					<pathelement location="${lib.dir}/saxon9.jar"/>
63 | 	   				</classpath>
64 | 	   </xslt>
65 | 
66 | 
67 | 
68 | 	   <!-- compile it -->
69 | 	   <xslt basedir="test/schematron"
70 | 	   		style="iso_svrl_for_xslt2.xsl" in="test2.sch"  out="test.xsl"> 
71 | 	   				<classpath>
72 | 	   					<pathelement location="${lib.dir}/saxon9.jar"/>
73 | 	   				</classpath>
74 | 	   </xslt>
75 | 	   
76 | 	   <!-- validate -->
77 | 	   <xslt basedir="test/schematron"
78 | 		   		style="test.xsl" in="instance.xml"  out="instance.svrlt"> 
79 | 		   				<classpath>
80 | 		   					<pathelement location="${lib.dir}/saxon9.jar"/>
81 | 		   				</classpath>
82 | 	</xslt>
83 | 		</target>


--------------------------------------------------------------------------------
/src/lxml/pyclasslookup.py:
--------------------------------------------------------------------------------
1 | # dummy module for backwards compatibility
2 | 
3 | from etree import PythonElementClassLookup
4 | 


--------------------------------------------------------------------------------
/src/lxml/tests/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | The lxml test suite for lxml, ElementTree and cElementTree.
3 | """
4 | 
5 | 


--------------------------------------------------------------------------------
/src/lxml/tests/dummy_http_server.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple HTTP request dumper for tests in Python 2.5+.
 3 | """
 4 | 
 5 | import sys
 6 | from contextlib import contextmanager
 7 | 
 8 | try:
 9 |     import urlparse
10 | except ImportError:
11 |     # Python 3
12 |     import urllib.parse as urlparse
13 | 
14 | 
15 | @contextmanager
16 | def webserver(app, port=0, host=None):
17 |     """Context manager entry point for the 'with' statement.
18 | 
19 |     Pass 0 as port number to dynamically allocate a free port.
20 | 
21 |     Usage:
22 | 
23 |     with webserver(wsgi_app_function, 8080) as host_url:
24 |         do_ws_calls(host_url)
25 |     """
26 |     server = build_web_server(app, port, host or '127.0.0.1')
27 |     host, port = server.socket.getsockname()
28 | 
29 |     import threading
30 |     thread = threading.Thread(target=server.serve_forever,
31 |                               kwargs={'poll_interval': 0.5})
32 |     thread.setDaemon(True)
33 |     thread.start()
34 |     try:
35 |         yield 'http://%s:%s/' % (host, port)  # yield control to 'with' body
36 |     finally:
37 |         server.shutdown()
38 | 
39 | 
40 | try:
41 |     from SocketServer import ThreadingMixIn
42 | except ImportError:
43 |     # Python 3
44 |     from socketserver import ThreadingMixIn
45 | 
46 | import wsgiref.simple_server as wsgiserver
47 | class WebServer(wsgiserver.WSGIServer, ThreadingMixIn):
48 |     """A web server that starts a new thread for each request.
49 |     """
50 | 
51 | 
52 | class _RequestHandler(wsgiserver.WSGIRequestHandler):
53 |     def get_stderr(self):
54 |         # don't write to stderr
55 |         return sys.stdout
56 | 
57 |     def log_message(self, format, *args):
58 |         # message = "wsmock(%s) %s" % (self.address_string(), format % args)
59 |         pass  # don't log messages
60 | 
61 | 
62 | def build_web_server(app, port, host=None):
63 |     server = wsgiserver.make_server(
64 |         host or '', port, app,
65 |         server_class=WebServer,
66 |         handler_class=_RequestHandler)
67 |     return server
68 | 
69 | 
70 | class HTTPRequestCollector(object):
71 |     def __init__(self, response_data, response_code=200, headers=()):
72 |         self.requests = []
73 |         self.response_code = response_code
74 |         self.response_data = response_data
75 |         self.headers = list(headers or ())
76 | 
77 |     def __call__(self, environ, start_response):
78 |         self.requests.append((
79 |             environ.get('PATH_INFO'),
80 |             urlparse.parse_qsl(environ.get('QUERY_STRING'))))
81 |         start_response('%s OK' % self.response_code, self.headers)
82 |         return [self.response_data]
83 | 


--------------------------------------------------------------------------------
/src/lxml/tests/include/test_xinclude.xml:
--------------------------------------------------------------------------------
1 | <doc xmlns:xi="http://www.w3.org/2001/XInclude">
2 | <foo/>
3 | <xi:include href="../test.xml" />
4 | </doc>


--------------------------------------------------------------------------------
/src/lxml/tests/test-document.xslt:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <xsl:stylesheet version="1.0"
 3 |    xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
 4 | 
 5 |   <xsl:template match="/">
 6 |     <test>
 7 |       <xsl:copy-of select="document('')"/>
 8 |     </test>
 9 |   </xsl:template>
10 | </xsl:stylesheet>
11 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test-string.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0"?>
2 | <a>Søk på nettet</a>
3 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test.dtd:
--------------------------------------------------------------------------------
 1 | <!ELEMENT a (b)>
 2 | <!ATTLIST a
 3 |     default (valueA|valueB) "valueA"
 4 | >
 5 | 
 6 | <!ELEMENT b EMPTY>
 7 | <!ATTLIST b
 8 |     default (valueA|valueB) "valueB"
 9 | >
10 | 
11 | <!ENTITY c "&#42;">
12 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test.sch:
--------------------------------------------------------------------------------
1 | <sch:schema xmlns:sch="http://purl.oclc.org/dsdl/schematron">
2 |   <sch:pattern id="number_of_entries">
3 |     <sch:title>mandatory number_of_entries tests</sch:title>
4 |     <sch:rule context="number_of_entries">
5 |       <sch:assert test="text()=count(../entries/entry)">[ERROR] number_of_entries (<sch:value-of select="."/>) must equal the number of entries/entry elements (<sch:value-of select="count(../entries/entry)"/>)</sch:assert>
6 |     </sch:rule>
7 |   </sch:pattern>
8 | </sch:schema>
9 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test.xml:
--------------------------------------------------------------------------------
1 | <!DOCTYPE a SYSTEM "test.dtd">
2 | <a><b></b></a>
3 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test.xsd:
--------------------------------------------------------------------------------
1 | <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema">
2 |   <xsd:element name="a" type="AType"/>
3 |   <xsd:complexType name="AType">
4 |     <xsd:sequence>
5 |       <xsd:element name="b" type="xsd:string" />
6 |     </xsd:sequence>
7 |   </xsd:complexType>
8 | </xsd:schema>
9 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test1.rng:
--------------------------------------------------------------------------------
1 | <grammar ns="http://www.w3.org/1999/xhtml"
2 |          xmlns="http://relaxng.org/ns/structure/1.0">
3 | 
4 | <include href="test2.rng"/>
5 | 
6 | </grammar>
7 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test1.xslt:
--------------------------------------------------------------------------------
 1 | <xsl:stylesheet 
 2 |   xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
 3 | <xsl:include href="test2.xslt" />
 4 | 
 5 | <xsl:template match="/">
 6 | <p>Foo</p>
 7 | </xsl:template>
 8 | 
 9 | </xsl:stylesheet>
10 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test2.rng:
--------------------------------------------------------------------------------
 1 | <grammar xmlns="http://relaxng.org/ns/structure/1.0">
 2 | 
 3 | <start>
 4 |   <ref name="a" />
 5 | </start>
 6 | 
 7 | <define name="a">
 8 |   <element name="a">
 9 |   <text/>
10 |   </element>
11 | </define>
12 | 
13 | </grammar>
14 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test2.xslt:
--------------------------------------------------------------------------------
1 | <xsl:stylesheet 
2 |   xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
3 | 
4 | <xsl:template match="hello">
5 | <p>hello</p>
6 | </xsl:template>
7 | 
8 | </xsl:stylesheet>
9 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test_broken.xml:
--------------------------------------------------------------------------------
1 | <a><b></c></b></a>


--------------------------------------------------------------------------------
/src/lxml/tests/test_builder.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import unittest
 3 | 
 4 | """
 5 | Tests that ElementMaker works properly.
 6 | """
 7 | 
 8 | import sys, os.path
 9 | from lxml import etree
10 | from lxml.builder import E
11 | 
12 | this_dir = os.path.dirname(__file__)
13 | if this_dir not in sys.path:
14 |     sys.path.insert(0, this_dir) # needed for Py3
15 | 
16 | from common_imports import HelperTestCase, BytesIO, _bytes
17 | 
18 | class BuilderTestCase(HelperTestCase):
19 |     etree = etree
20 | 
21 |     def test_build_from_xpath_result(self):
22 |         class StringSubclass(str): pass
23 |         wrapped = E.b(StringSubclass('Hello'))
24 |         self.assertEqual(_bytes('<b>Hello</b>'), etree.tostring(wrapped))
25 | 
26 |     def test_unknown_type_raises(self):
27 |         class UnknownType(object):
28 |             pass
29 |         self.assertRaises(TypeError, E.b, UnknownType())
30 | 
31 | 
32 | def test_suite():
33 |     suite = unittest.TestSuite()
34 |     suite.addTests([unittest.makeSuite(BuilderTestCase)])
35 |     return suite
36 | 
37 | if __name__ == '__main__':
38 |     print('to test use test.py %s' % __file__)
39 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test_css.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | 
 3 | import lxml.html
 4 | 
 5 | from lxml.tests.common_imports import doctest, HelperTestCase, skipif
 6 | 
 7 | try:
 8 |     import cssselect
 9 | except ImportError:
10 |     cssselect = None
11 | 
12 | 
13 | HTML = '''
14 | <div>
15 |   <a href="foo">link</a>
16 |   <a>anchor</a>
17 | </div>
18 | '''
19 | 
20 | 
21 | class CSSTestCase(HelperTestCase):
22 | 
23 |     pytestmark = skipif('cssselect is None')
24 | 
25 |     def test_cssselect(self):
26 |         div, = lxml.html.fromstring(HTML).xpath('//div')
27 | 
28 |         def count(selector, expected_count, **kwargs):
29 |             result = div.cssselect(selector, **kwargs)
30 |             self.assertEqual(len(result), expected_count)
31 | 
32 |         count('div', 1)
33 |         count('a', 2)
34 |         count('em', 0)
35 |         # Element names are case-insensitive in HTML
36 |         count('DIV', 1)
37 |         # ... but not in XHTML or XML
38 |         count('DIV', 0, translator='xhtml')
39 |         count('DIV', 0, translator='xml')
40 | 
41 |         # :contains() is case-insensitive in lxml
42 |         count(':contains("link")', 2)  # div, a
43 |         count(':contains("LInk")', 2)
44 |         # Whatever the document language
45 |         count(':contains("LInk")', 2, translator='xhtml')
46 |         count(':contains("LInk")', 2, translator='xml')
47 |         # ... but not in upstream cssselect
48 |         import cssselect
49 |         count(':contains("link")', 2, translator=cssselect.HTMLTranslator())
50 |         count(':contains("LInk")', 0, translator=cssselect.HTMLTranslator())
51 | 
52 | 
53 | def test_suite():
54 |     suite = unittest.TestSuite()
55 |     try:
56 |         import cssselect
57 |     except ImportError:
58 |         # no 'cssselect' installed
59 |         print("Skipping tests in lxml.cssselect - external cssselect package is not installed")
60 |         return suite
61 | 
62 |     import lxml.cssselect
63 |     suite.addTests(doctest.DocTestSuite(lxml.cssselect))
64 |     suite.addTests([unittest.makeSuite(CSSTestCase)])
65 |     return suite
66 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test_errors.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import unittest, doctest
 3 | 
 4 | # These tests check that error handling in the Pyrex code is
 5 | # complete.
 6 | # It is likely that if there are errors, instead of failing the code
 7 | # will simply crash.
 8 | 
 9 | import sys, gc, os.path
10 | from lxml import etree
11 | 
12 | this_dir = os.path.dirname(__file__)
13 | if this_dir not in sys.path:
14 |     sys.path.insert(0, this_dir) # needed for Py3
15 | 
16 | from common_imports import HelperTestCase
17 | 
18 | class ErrorTestCase(HelperTestCase):
19 |     etree = etree
20 | 
21 |     def test_bad_element(self):
22 |         # attrib argument of Element() should be a dictionary, so if
23 |         # we pass a string we should get an error.
24 |         self.assertRaises(TypeError, self.etree.Element, 'a', 'b')
25 | 
26 |     def test_empty_parse(self):
27 |         self.assertRaises(etree.XMLSyntaxError, etree.fromstring, '')
28 | 
29 |     def test_element_cyclic_gc_none(self):
30 |         # test if cyclic reference can crash etree
31 |         Element = self.etree.Element
32 |         gc.collect()
33 | 
34 |         count = sys.getrefcount(None)
35 | 
36 |         l = [Element('name'), Element('name')]
37 |         l.append(l)
38 | 
39 |         del l
40 |         gc.collect()
41 | 
42 |         self.assertEqual(sys.getrefcount(None), count)
43 | 
44 | def test_suite():
45 |     suite = unittest.TestSuite()
46 |     suite.addTests([unittest.makeSuite(ErrorTestCase)])
47 |     return suite
48 | 
49 | if __name__ == '__main__':
50 |     print('to test use test.py %s' % __file__)
51 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test_http_io.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | Web IO test cases that need Python 2.5+ (wsgiref)
  5 | """
  6 | 
  7 | from __future__ import with_statement
  8 | 
  9 | import unittest
 10 | import textwrap
 11 | import os
 12 | import sys
 13 | import gzip
 14 | 
 15 | this_dir = os.path.dirname(__file__)
 16 | if this_dir not in sys.path:
 17 |     sys.path.insert(0, this_dir)  # needed for Py3
 18 | 
 19 | from .common_imports import (
 20 |     etree, HelperTestCase, BytesIO, _bytes)
 21 | from .dummy_http_server import webserver, HTTPRequestCollector
 22 | 
 23 | 
 24 | class HttpIOTestCase(HelperTestCase):
 25 |     etree = etree
 26 | 
 27 |     def _parse_from_http(self, data, code=200, headers=None, parser=None):
 28 |         handler = HTTPRequestCollector(data, code, headers)
 29 |         with webserver(handler) as host_url:
 30 |             tree = self.etree.parse(host_url + 'TEST', parser=parser)
 31 |         self.assertEqual([('/TEST', [])], handler.requests)
 32 |         return tree
 33 | 
 34 |     def test_http_client(self):
 35 |         tree = self._parse_from_http(_bytes('<root><a/></root>'))
 36 |         self.assertEqual('root', tree.getroot().tag)
 37 |         self.assertEqual('a', tree.getroot()[0].tag)
 38 | 
 39 |     def test_http_client_404(self):
 40 |         try:
 41 |             self._parse_from_http(_bytes('<root/>'), code=404)
 42 |         except IOError:
 43 |             self.assertTrue(True)
 44 |         else:
 45 |             self.assertTrue(False, "expected IOError")
 46 | 
 47 |     def test_http_client_gzip(self):
 48 |         f = BytesIO()
 49 |         gz = gzip.GzipFile(fileobj=f, mode='w', filename='test.xml')
 50 |         gz.write(_bytes('<root><a/></root>'))
 51 |         gz.close()
 52 |         data = f.getvalue()
 53 |         del f, gz
 54 | 
 55 |         headers = [('Content-Encoding', 'gzip')]
 56 |         tree = self._parse_from_http(data, headers=headers)
 57 |         self.assertEqual('root', tree.getroot().tag)
 58 |         self.assertEqual('a', tree.getroot()[0].tag)
 59 | 
 60 |     def test_parser_input_mix(self):
 61 |         data = _bytes('<root><a/></root>')
 62 |         handler = HTTPRequestCollector(data)
 63 | 
 64 |         with webserver(handler) as host_url:
 65 |             tree = self.etree.parse(host_url)
 66 |             root = tree.getroot()
 67 |             self.assertEqual('a', root[0].tag)
 68 | 
 69 |             root = self.etree.fromstring(data)
 70 |             self.assertEqual('a', root[0].tag)
 71 | 
 72 |             tree = self.etree.parse(host_url)
 73 |             root = tree.getroot()
 74 |             self.assertEqual('a', root[0].tag)
 75 | 
 76 |             root = self.etree.fromstring(data)
 77 |             self.assertEqual('a', root[0].tag)
 78 | 
 79 |         root = self.etree.fromstring(data)
 80 |         self.assertEqual('a', root[0].tag)
 81 | 
 82 |     def test_network_dtd(self):
 83 |         data = [_bytes(textwrap.dedent(s)) for s in [
 84 |             # XML file
 85 |             '''\
 86 |             <?xml version="1.0"?>
 87 |             <!DOCTYPE root SYSTEM "./file.dtd">
 88 |             <root>&myentity;</root>
 89 |             ''',
 90 |             # DTD
 91 |             '<!ENTITY myentity "DEFINED">',
 92 |         ]]
 93 | 
 94 |         responses = []
 95 |         def handler(environ, start_response):
 96 |             start_response('200 OK', [])
 97 |             return [responses.pop()]
 98 | 
 99 |         with webserver(handler) as host_url:
100 |             # DTD network loading enabled
101 |             responses = data[::-1]
102 |             tree = self.etree.parse(
103 |                 host_url + 'dir/test.xml',
104 |                 parser=self.etree.XMLParser(
105 |                     load_dtd=True, no_network=False))
106 |             self.assertFalse(responses)  # all read
107 |             root = tree.getroot()
108 |             self.assertEqual('DEFINED', root.text)
109 | 
110 |             # DTD network loading disabled
111 |             responses = data[::-1]
112 |             try:
113 |                 self.etree.parse(
114 |                     host_url + 'dir/test.xml',
115 |                     parser=self.etree.XMLParser(
116 |                         load_dtd=True, no_network=True))
117 |             except self.etree.XMLSyntaxError:
118 |                 self.assertTrue("myentity" in str(sys.exc_info()[1]))
119 |             else:
120 |                 self.assertTrue(False)
121 |             self.assertEqual(1, len(responses))  # DTD not read
122 | 
123 | 
124 | def test_suite():
125 |     suite = unittest.TestSuite()
126 |     suite.addTests([unittest.makeSuite(HttpIOTestCase)])
127 |     return suite
128 | 
129 | 
130 | if __name__ == '__main__':
131 |     print('to test use test.py %s' % __file__)
132 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test_import.xsd:
--------------------------------------------------------------------------------
 1 | <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
 2 | 	    targetNamespace="http://codespeak.net/lxml/schema/ns1"
 3 | 	    xmlns:a="http://codespeak.net/lxml/schema/ns"
 4 | 	    >
 5 |   <xsd:import
 6 | 	namespace="http://codespeak.net/lxml/schema/ns"
 7 | 	schemaLocation="test_inc.xsd" />
 8 | 
 9 |   <xsd:element name="x" type="a:AType"/>
10 | </xsd:schema>
11 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test_inc.xsd:
--------------------------------------------------------------------------------
 1 | <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
 2 | 	    xmlns="http://codespeak.net/lxml/schema/ns"
 3 | 	    targetNamespace="http://codespeak.net/lxml/schema/ns">
 4 |   <xsd:element name="a" type="AType"/>
 5 |   <xsd:complexType name="AType">
 6 |     <xsd:sequence>
 7 |       <xsd:element name="b" type="xsd:string" />
 8 |     </xsd:sequence>
 9 |   </xsd:complexType>
10 | </xsd:schema>
11 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test_schematron.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | Test cases related to Schematron parsing and validation
 5 | """
 6 | 
 7 | import unittest, sys, os.path
 8 | 
 9 | this_dir = os.path.dirname(__file__)
10 | if this_dir not in sys.path:
11 |     sys.path.insert(0, this_dir) # needed for Py3
12 | 
13 | from common_imports import etree, HelperTestCase, fileInTestDir
14 | from common_imports import doctest, make_doctest
15 | 
16 | class ETreeSchematronTestCase(HelperTestCase):
17 |     def test_schematron(self):
18 |         tree_valid = self.parse('<AAA><BBB/><CCC/></AAA>')
19 |         tree_invalid = self.parse('<AAA><BBB/><CCC/><DDD/></AAA>')
20 |         schema = self.parse('''\
21 | <schema xmlns="http://purl.oclc.org/dsdl/schematron" >
22 |      <pattern name="Open model">
23 |           <rule context="AAA">
24 |                <assert test="BBB"> BBB element is not present</assert>
25 |                <assert test="CCC"> CCC element is not present</assert>
26 |           </rule>
27 |      </pattern>
28 |      <pattern name="Closed model">
29 |           <rule context="AAA">
30 |                <assert test="BBB"> BBB element is not present</assert>
31 |                <assert test="CCC"> CCC element is not present</assert>
32 |                <assert test="count(BBB|CCC) = count (*)">There is an extra element</assert>
33 |           </rule>
34 |      </pattern>
35 | </schema>
36 | ''')
37 |         schema = etree.Schematron(schema)
38 |         self.assertTrue(schema.validate(tree_valid))
39 |         self.assertTrue(not schema.validate(tree_invalid))
40 | 
41 |     def test_schematron_elementtree_error(self):
42 |         self.assertRaises(ValueError, etree.Schematron, etree.ElementTree())
43 | 
44 |     def test_schematron_invalid_schema(self):
45 |         schema = self.parse('''\
46 | <schema xmlns="http://purl.oclc.org/dsdl/schematron" >
47 |      <pattern name="Open model">
48 |      </pattern>
49 | </schema>
50 | ''')
51 |         self.assertRaises(etree.SchematronParseError,
52 |                           etree.Schematron, schema)
53 | 
54 |     def test_schematron_invalid_schema_empty(self):
55 |         schema = self.parse('''\
56 | <schema xmlns="http://purl.oclc.org/dsdl/schematron" />
57 | ''')
58 |         self.assertRaises(etree.SchematronParseError,
59 |                           etree.Schematron, schema)
60 | 
61 |     def test_schematron_invalid_schema_namespace(self):
62 |         # segfault
63 |         schema = self.parse('''\
64 | <schema xmlns="mynamespace" />
65 | ''')
66 |         self.assertRaises(etree.SchematronParseError,
67 |                           etree.Schematron, schema)
68 | 
69 | 
70 | def test_suite():
71 |     suite = unittest.TestSuite()
72 |     suite.addTests([unittest.makeSuite(ETreeSchematronTestCase)])
73 |     suite.addTests(
74 |         [make_doctest('../../../doc/validation.txt')])
75 |     return suite
76 | 
77 | if __name__ == '__main__':
78 |     print('to test use test.py %s' % __file__)
79 | 


--------------------------------------------------------------------------------
/src/lxml/tests/test_unicode.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | import unittest, doctest, sys, os.path
 3 | 
 4 | this_dir = os.path.dirname(__file__)
 5 | if this_dir not in sys.path:
 6 |     sys.path.insert(0, this_dir) # needed for Py3
 7 | 
 8 | from common_imports import StringIO, etree, SillyFileLike, HelperTestCase
 9 | from common_imports import _str, _bytes
10 | 
11 | try:
12 |     unicode
13 | except NameError:
14 |     unicode = str
15 | 
16 | ascii_uni = _str('a')
17 | 
18 | klingon = _bytes("\\uF8D2").decode("unicode_escape") # not valid for XML names
19 | 
20 | invalid_tag = _str("test") + klingon
21 | 
22 | uni = _bytes('\\xc3\\u0680\\u3120').decode("unicode_escape") # some non-ASCII characters
23 | 
24 | uxml = _bytes("<test><title>test \\xc3\\xa1\\u3120</title><h1>page \\xc3\\xa1\\u3120 title</h1></test>"
25 |               ).decode("unicode_escape")
26 | 
27 | class UnicodeTestCase(HelperTestCase):
28 |     def test_unicode_xml(self):
29 |         tree = etree.XML(_str('<p>%s</p>') % uni)
30 |         self.assertEqual(uni, tree.text)
31 | 
32 |     def test_unicode_xml_broken(self):
33 |         uxml = _str('<?xml version="1.0" encoding="UTF-8"?>') + \
34 |                _str('<p>%s</p>') % uni
35 |         self.assertRaises(ValueError, etree.XML, uxml)
36 | 
37 |     def test_unicode_tag(self):
38 |         el = etree.Element(uni)
39 |         self.assertEqual(uni, el.tag)
40 | 
41 |     def test_unicode_tag_invalid(self):
42 |         # sadly, Klingon is not well-formed
43 |         self.assertRaises(ValueError, etree.Element, invalid_tag)
44 | 
45 |     def test_unicode_nstag(self):
46 |         tag = _str("{http://abc/}%s") % uni
47 |         el = etree.Element(tag)
48 |         self.assertEqual(tag, el.tag)
49 | 
50 |     def test_unicode_ns_invalid(self):
51 |         # namespace URIs must conform to RFC 3986
52 |         tag = _str("{http://%s/}abc") % uni
53 |         self.assertRaises(ValueError, etree.Element, tag)
54 | 
55 |     def test_unicode_nstag_invalid(self):
56 |         # sadly, Klingon is not well-formed
57 |         tag = _str("{http://abc/}%s") % invalid_tag
58 |         self.assertRaises(ValueError, etree.Element, tag)
59 | 
60 |     def test_unicode_qname(self):
61 |         qname = etree.QName(uni, uni)
62 |         tag = _str("{%s}%s") % (uni, uni)
63 |         self.assertEqual(qname.text, tag)
64 |         self.assertEqual(unicode(qname), tag)
65 | 
66 |     def test_unicode_qname_invalid(self):
67 |         self.assertRaises(ValueError, etree.QName, invalid_tag)
68 | 
69 |     def test_unicode_attr(self):
70 |         el = etree.Element('foo', {'bar': uni})
71 |         self.assertEqual(uni, el.attrib['bar'])
72 | 
73 |     def test_unicode_comment(self):
74 |         el = etree.Comment(uni)
75 |         self.assertEqual(uni, el.text)
76 | 
77 |     def test_unicode_parse_stringio(self):
78 |         el = etree.parse(StringIO(_str('<p>%s</p>') % uni)).getroot()
79 |         self.assertEqual(uni, el.text)
80 | 
81 | ##     def test_parse_fileobject_unicode(self):
82 | ##         # parse unicode from unamed file object (not support by ElementTree)
83 | ##         f = SillyFileLike(uxml)
84 | ##         root = etree.parse(f).getroot()
85 | ##         self.assertEqual(unicode(etree.tostring(root, 'UTF-8'), 'UTF-8'),
86 | ##                           uxml)
87 | 
88 | def test_suite():
89 |     suite = unittest.TestSuite()
90 |     suite.addTests([unittest.makeSuite(UnicodeTestCase)])
91 |     return suite
92 | 


--------------------------------------------------------------------------------
/src/lxml/usedoctest.py:
--------------------------------------------------------------------------------
 1 | """Doctest module for XML comparison.
 2 | 
 3 | Usage::
 4 | 
 5 |    >>> import lxml.usedoctest
 6 |    >>> # now do your XML doctests ...
 7 | 
 8 | See `lxml.doctestcompare`
 9 | """
10 | 
11 | from lxml import doctestcompare
12 | 
13 | doctestcompare.temp_install(del_module=__name__)
14 | 


--------------------------------------------------------------------------------
/src/lxml/xinclude.pxi:
--------------------------------------------------------------------------------
 1 | # XInclude processing
 2 | 
 3 | from lxml.includes cimport xinclude
 4 | 
 5 | class XIncludeError(LxmlError):
 6 |     u"""Error during XInclude processing.
 7 |     """
 8 |     pass
 9 | 
10 | cdef class XInclude:
11 |     u"""XInclude(self)
12 |     XInclude processor.
13 | 
14 |     Create an instance and call it on an Element to run XInclude
15 |     processing.
16 |     """
17 |     cdef _ErrorLog _error_log
18 |     def __init__(self):
19 |         self._error_log = _ErrorLog()
20 | 
21 |     property error_log:
22 |         def __get__(self):
23 |             assert self._error_log is not None, "XInclude instance not initialised"
24 |             return self._error_log.copy()
25 | 
26 |     def __call__(self, _Element node not None):
27 |         u"__call__(self, node)"
28 |         # We cannot pass the XML_PARSE_NOXINCNODE option as this would free
29 |         # the XInclude nodes - there may still be Python references to them!
30 |         # Therefore, we allow XInclude nodes to be converted to
31 |         # XML_XINCLUDE_START nodes.  XML_XINCLUDE_END nodes are added as
32 |         # siblings.  Tree traversal will simply ignore them as they are not
33 |         # typed as elements.  The included fragment is added between the two,
34 |         # i.e. as a sibling, which does not conflict with traversal.
35 |         cdef int result
36 |         _assertValidNode(node)
37 |         assert self._error_log is not None, "XPath evaluator not initialised"
38 |         self._error_log.connect()
39 |         __GLOBAL_PARSER_CONTEXT.pushImpliedContextFromParser(
40 |             node._doc._parser)
41 |         with nogil:
42 |             if node._doc._parser is not None:
43 |                 result = xinclude.xmlXIncludeProcessTreeFlags(
44 |                     node._c_node, node._doc._parser._parse_options)
45 |             else:
46 |                 result = xinclude.xmlXIncludeProcessTree(node._c_node)
47 |         __GLOBAL_PARSER_CONTEXT.popImpliedContext()
48 |         self._error_log.disconnect()
49 | 
50 |         if result == -1:
51 |             raise XIncludeError(
52 |                 self._error_log._buildExceptionMessage(
53 |                     u"XInclude processing failed"),
54 |                 self._error_log)
55 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | # Tox (http://tox.testrun.org/) is a tool for running tests
 2 | # in multiple virtualenvs. This configuration file will run the
 3 | # test suite on all supported python versions. To use it, "pip install tox"
 4 | # and then run "tox" from this directory.
 5 | 
 6 | [tox]
 7 | envlist = py24, py25, py26, py27, py30, py31, py32, py33
 8 | 
 9 | [testenv]
10 | commands =
11 |     {envpython} setup.py clean
12 |     {envpython} setup.py build_ext --inplace
13 |     make test
14 | deps =
15 |     Cython>=0.17.2
16 | 


--------------------------------------------------------------------------------
/version.txt:
--------------------------------------------------------------------------------
1 | 3.2.1
2 | 


--------------------------------------------------------------------------------