├── .flake8 ├── .github ├── FUNDING.yml ├── dependabot.yml └── workflows │ ├── build.yml │ └── codeql.yml ├── .gitignore ├── AUTHORS ├── CHANGELOG.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── docs ├── Makefile ├── conf.py └── index.rst ├── examples.py ├── poetry.lock ├── pyproject.toml ├── setup.py ├── tests ├── res │ ├── figs.xml │ ├── foo.xml │ ├── pom.xml │ ├── some.xslt │ ├── unicode.xml │ └── xxe.xml └── test_untangle.py ├── untangle.py └── untangle.rst /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = .git,__pycache__,docs/conf.py,old,build,dist,.tox 3 | ignore = E501 4 | -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: stchris 2 | liberapay: stchris 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | schedule: 11 | interval: "daily" 12 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: push 4 | 5 | jobs: 6 | test: 7 | name: Test Python ${{ matrix.python-version }} (${{ matrix.os }}) 8 | strategy: 9 | matrix: 10 | python-version: ["3.7", "3.8", "3.9", "3.10"] 11 | os: [ubuntu-latest, macos-latest, windows-latest] 12 | runs-on: ${{ matrix.os }} 13 | steps: 14 | - uses: actions/checkout@v3 15 | 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v3 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | 21 | - name: Install dependencies 22 | run: make setup 23 | 24 | - name: Lint 25 | run: make lint 26 | 27 | - name: Run tests 28 | run: make test 29 | 30 | release: 31 | name: Release 32 | needs: test 33 | runs-on: ubuntu-latest 34 | steps: 35 | - uses: actions/checkout@v3 36 | 37 | - name: Set up Python ${{ matrix.python-version }} 38 | uses: actions/setup-python@v3 39 | with: 40 | python-version: "3.10" 41 | 42 | - name: Install requirements 43 | run: python -m pip install wheel setuptools build 44 | 45 | - name: Build a distribution 46 | run: python -m build 47 | 48 | - name: Publish package to TestPyPI 49 | uses: pypa/gh-action-pypi-publish@master 50 | with: 51 | user: __token__ 52 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 53 | repository_url: https://test.pypi.org/legacy/ 54 | skip_existing: true 55 | 56 | - name: Publish package to PyPI 57 | uses: pypa/gh-action-pypi-publish@master 58 | if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') 59 | with: 60 | user: __token__ 61 | password: ${{ secrets.PYPI_API_TOKEN }} 62 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "main" ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ "main" ] 20 | schedule: 21 | - cron: '39 0 * * 1' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'python' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Use only 'java' to analyze code written in Java, Kotlin or both 38 | # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both 39 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 40 | 41 | steps: 42 | - name: Checkout repository 43 | uses: actions/checkout@v3 44 | 45 | # Initializes the CodeQL tools for scanning. 46 | - name: Initialize CodeQL 47 | uses: github/codeql-action/init@v2 48 | with: 49 | languages: ${{ matrix.language }} 50 | # If you wish to specify custom queries, you can do so here or in a config file. 51 | # By default, queries listed here will override any specified in a config file. 52 | # Prefix the list here with "+" to use these queries and those in the config file. 53 | 54 | # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 55 | # queries: security-extended,security-and-quality 56 | 57 | 58 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java). 59 | # If this step fails, then you should remove it and run the build manually (see below) 60 | - name: Autobuild 61 | uses: github/codeql-action/autobuild@v2 62 | 63 | # ℹ️ Command-line programs to run using the OS shell. 64 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 65 | 66 | # If the Autobuild fails above, remove it and uncomment the following three lines. 67 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 68 | 69 | # - run: | 70 | # echo "Run, Build Application using script" 71 | # ./location_of_script_within_repo/buildscript.sh 72 | 73 | - name: Perform CodeQL Analysis 74 | uses: github/codeql-action/analyze@v2 75 | with: 76 | category: "/language:${{matrix.language}}" 77 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/** 2 | dist/** 3 | deb_dist/** 4 | debian/** 5 | **/*.pyc 6 | *.pyc 7 | *.DS_Store 8 | MANIFEST 9 | untangle.egg-info 10 | .tox/ 11 | tox.init 12 | untangle-*.tar.gz 13 | .cache/* 14 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Christian Stefanescu 2 | 3 | # Contributions from: 4 | 5 | Florian Idelberger 6 | Apalala 7 | Reverb Chu 8 | Henrikki Tenkanen 9 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | Changelog 2 | --------- 3 | 4 | Unreleased 5 | 6 | 1.2.1 7 | - (SECURITY) Use [defusedxml](https://github.com/tiran/defusedxml) to prevent XML SAX vulnerabilities ([#94](https://github.com/stchris/untangle/pull/94)) 8 | 9 | 1.2.0 10 | - (SECURITY) Prevent XML SAX vulnerability: External Entities injection ([#60](https://github.com/stchris/untangle/issues/60)) 11 | - support for python keywords as element names ([#43](https://github.com/stchris/untangle/pull/43)) 12 | - support Element truthiness on Python 3 ([#68](https://github.com/stchris/untangle/pull/68/)) 13 | - dropped support for Python 3.4-3.6 and pypy, untangle currently support Python 3.7-3.10 14 | - fixed setup.py warning ([#77](https://github.com/stchris/untangle/pull/77/)) 15 | 16 | - dropped support for Python 2.6, 3.3 17 | - formatted code with black 18 | - flake8 linter enforced in CI 19 | - `main` is now the default branch 20 | - switch to Github Actions 21 | - switch to poetry and pytest 22 | 23 | 1.1.1 24 | - added generic SAX feature toggle ([#26](https://github.com/stchris/untangle/pull/26)) 25 | - added support for `hasattribute`/`getattribute` ([#15](https://github.com/stchris/untangle/pull/15)) 26 | - added support for `len()` on parsed objects ([https://github.com/stchris/untangle/commit/31f3078]()) 27 | - fixed a potential bug when trying to detect URLs ([https://github.com/stchris/untangle/commit/cfa11d16]()) 28 | - include CDATA in `str` representation ([https://github.com/stchris/untangle/commit/63aaa]()) 29 | - added support for parsing file-like objects ([#9](https://github.com/stchris/untangle/issues/9)) 30 | - dropped support for Python 3.2 (untangle now supports Python versions 2.6, 2.7, 3.3, 3.4, 3.5, 3.6 and pypy) 31 | - improved unit test coverage and quality 32 | - better documentation and examples for accessing cdata 33 | 34 | 1.1.0 35 | - __dir__ support for untangled objects 36 | - code cleanups 37 | 38 | 1.0.0 39 | - first official release 40 | 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 2 | 3 | Permission is hereby granted, free of charge, to any person 4 | obtaining a copy of this software and associated documentation 5 | files (the "Software"), to deal in the Software without 6 | restriction, including without limitation the rights to use, 7 | copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | copies of the Software, and to permit persons to whom the 9 | Software is furnished to do so, subject to the following 10 | conditions: 11 | 12 | The above copyright notice and this permission notice shall be 13 | included in all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 | OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 19 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 20 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | .DEFAULT_GOAL=compile 3 | 4 | compile: 5 | python -m compileall -q untangle.py tests/tests.py 6 | 7 | setup: 8 | python -m pip install poetry 9 | poetry install 10 | 11 | lint: 12 | poetry run flake8 . 13 | poetry run black --check . 14 | 15 | test: 16 | poetry run pytest -v 17 | 18 | # needs python-stdeb 19 | package_deb: 20 | python setup.py --command-packages=stdeb.command bdist_deb 21 | 22 | clean: 23 | rm -rf deb_dist/ 24 | rm -rf debian/ 25 | rm -rf dist/ 26 | rm -f untangle-*.tar.gz 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | untangle 2 | ======== 3 | 4 | [![Build Status](https://github.com/stchris/untangle/actions/workflows/build.yml/badge.svg)](https://github.com/stchris/untangle/actions) 5 | [![PyPi version](https://img.shields.io/pypi/v/untangle.svg)](https://pypi.python.org/pypi/untangle) 6 | Code style: black 7 | 8 | [Documentation](http://readthedocs.org/docs/untangle/en/latest/) 9 | 10 | * Converts XML to a Python object. 11 | * Siblings with similar names are grouped into a list. 12 | * Children can be accessed with ``parent.child``, attributes with ``element['attribute']``. 13 | * You can call the ``parse()`` method with a filename, an URL or an XML string. 14 | * Substitutes ``-``, ``.`` and ``:`` with ``_`` ```` can be accessed with ``foobar.foo_bar``, ```` can be accessed with ``foo_bar_baz`` and ```` can be accessed with ``foo_bar.foo_baz`` 15 | * Works with Python 3.7 - 3.10 16 | 17 | Installation 18 | ------------ 19 | 20 | With pip: 21 | ``` 22 | pip install untangle 23 | ``` 24 | 25 | With conda: 26 | ``` 27 | conda install -c conda-forge untangle 28 | ``` 29 | 30 | Conda feedstock maintained by @htenkanen. Issues and questions about conda-forge packaging / installation can be done [here](https://github.com/conda-forge/untangle-feedstock/issues). 31 | 32 | Usage 33 | ----- 34 | (See and run examples.py or this blog post: [Read XML painlessly](http://pythonadventures.wordpress.com/2011/10/30/read-xml-painlessly/) for more info) 35 | 36 | ```python 37 | import untangle 38 | obj = untangle.parse(resource) 39 | ``` 40 | 41 | ``resource`` can be: 42 | 43 | * a URL 44 | * a filename 45 | * an XML string 46 | 47 | Running the above code and passing this XML: 48 | 49 | ```xml 50 | 51 | 52 | 53 | 54 | ``` 55 | allows it to be navigated from the ``untangle``d object like this: 56 | 57 | ```python 58 | obj.root.child['name'] # u'child1' 59 | ``` 60 | 61 | Changelog 62 | --------- 63 | 64 | see CHANGELOG.md 65 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # Internal variables. 11 | PAPEROPT_a4 = -D latex_paper_size=a4 12 | PAPEROPT_letter = -D latex_paper_size=letter 13 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 14 | # the i18n builder cannot share the environment and doctrees with the others 15 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 16 | 17 | .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext 18 | 19 | help: 20 | @echo "Please use \`make ' where is one of" 21 | @echo " html to make standalone HTML files" 22 | @echo " dirhtml to make HTML files named index.html in directories" 23 | @echo " singlehtml to make a single large HTML file" 24 | @echo " pickle to make pickle files" 25 | @echo " json to make JSON files" 26 | @echo " htmlhelp to make HTML files and a HTML help project" 27 | @echo " qthelp to make HTML files and a qthelp project" 28 | @echo " devhelp to make HTML files and a Devhelp project" 29 | @echo " epub to make an epub" 30 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 31 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 32 | @echo " text to make text files" 33 | @echo " man to make manual pages" 34 | @echo " texinfo to make Texinfo files" 35 | @echo " info to make Texinfo files and run them through makeinfo" 36 | @echo " gettext to make PO message catalogs" 37 | @echo " changes to make an overview of all changed/added/deprecated items" 38 | @echo " linkcheck to check all external links for integrity" 39 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 40 | 41 | clean: 42 | -rm -rf $(BUILDDIR)/* 43 | 44 | html: 45 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 46 | @echo 47 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 48 | 49 | dirhtml: 50 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 51 | @echo 52 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 53 | 54 | singlehtml: 55 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 56 | @echo 57 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 58 | 59 | pickle: 60 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 61 | @echo 62 | @echo "Build finished; now you can process the pickle files." 63 | 64 | json: 65 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 66 | @echo 67 | @echo "Build finished; now you can process the JSON files." 68 | 69 | htmlhelp: 70 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 71 | @echo 72 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 73 | ".hhp project file in $(BUILDDIR)/htmlhelp." 74 | 75 | qthelp: 76 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 77 | @echo 78 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 79 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 80 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/untangle.qhcp" 81 | @echo "To view the help file:" 82 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/untangle.qhc" 83 | 84 | devhelp: 85 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 86 | @echo 87 | @echo "Build finished." 88 | @echo "To view the help file:" 89 | @echo "# mkdir -p $$HOME/.local/share/devhelp/untangle" 90 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/untangle" 91 | @echo "# devhelp" 92 | 93 | epub: 94 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 95 | @echo 96 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 97 | 98 | latex: 99 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 100 | @echo 101 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 102 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 103 | "(use \`make latexpdf' here to do that automatically)." 104 | 105 | latexpdf: 106 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 107 | @echo "Running LaTeX files through pdflatex..." 108 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 109 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 110 | 111 | text: 112 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 113 | @echo 114 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 115 | 116 | man: 117 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 118 | @echo 119 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 120 | 121 | texinfo: 122 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 123 | @echo 124 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 125 | @echo "Run \`make' in that directory to run these through makeinfo" \ 126 | "(use \`make info' here to do that automatically)." 127 | 128 | info: 129 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 130 | @echo "Running Texinfo files through makeinfo..." 131 | make -C $(BUILDDIR)/texinfo info 132 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 133 | 134 | gettext: 135 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 136 | @echo 137 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 138 | 139 | changes: 140 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 141 | @echo 142 | @echo "The overview file is in $(BUILDDIR)/changes." 143 | 144 | linkcheck: 145 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 146 | @echo 147 | @echo "Link check complete; look for any errors in the above output " \ 148 | "or in $(BUILDDIR)/linkcheck/output.txt." 149 | 150 | doctest: 151 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 152 | @echo "Testing of doctests in the sources finished, look at the " \ 153 | "results in $(BUILDDIR)/doctest/output.txt." 154 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # 3 | # untangle documentation build configuration file, created by 4 | # sphinx-quickstart on Fri Apr 6 16:05:20 2012. 5 | # 6 | # This file is execfile()d with the current directory set to its containing dir. 7 | # 8 | # Note that not all possible configuration values are present in this 9 | # autogenerated file. 10 | # 11 | # All configuration values have a default; values that are commented out 12 | # serve to show the default. 13 | 14 | import sys, os 15 | 16 | # If extensions (or modules to document with autodoc) are in another directory, 17 | # add these directories to sys.path here. If the directory is relative to the 18 | # documentation root, use os.path.abspath to make it absolute, like shown here. 19 | sys.path.insert(0, os.path.abspath("..")) 20 | import untangle 21 | 22 | # -- General configuration ----------------------------------------------------- 23 | 24 | # If your documentation needs a minimal Sphinx version, state it here. 25 | # needs_sphinx = '1.0' 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be extensions 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 29 | extensions = ["sphinx.ext.autodoc"] 30 | 31 | # Add any paths that contain templates here, relative to this directory. 32 | templates_path = ["_templates"] 33 | 34 | # The suffix of source filenames. 35 | source_suffix = ".rst" 36 | 37 | # The encoding of source files. 38 | # source_encoding = 'utf-8-sig' 39 | 40 | # The master toctree document. 41 | master_doc = "index" 42 | 43 | # General information about the project. 44 | project = "untangle" 45 | copyright = "2012, Christian Stefanescu" 46 | 47 | # The version info for the project you're documenting, acts as replacement for 48 | # |version| and |release|, also used in various other places throughout the 49 | # built documents. 50 | # 51 | # The short X.Y version. 52 | version = untangle.__version__ 53 | # The full version, including alpha/beta/rc tags. 54 | release = untangle.__version__ 55 | 56 | # The language for content autogenerated by Sphinx. Refer to documentation 57 | # for a list of supported languages. 58 | # language = None 59 | 60 | # There are two options for replacing |today|: either, you set today to some 61 | # non-false value, then it is used: 62 | # today = '' 63 | # Else, today_fmt is used as the format for a strftime call. 64 | # today_fmt = '%B %d, %Y' 65 | 66 | # List of patterns, relative to source directory, that match files and 67 | # directories to ignore when looking for source files. 68 | exclude_patterns = ["_build"] 69 | 70 | # The reST default role (used for this markup: `text`) to use for all documents. 71 | # default_role = None 72 | 73 | # If true, '()' will be appended to :func: etc. cross-reference text. 74 | # add_function_parentheses = True 75 | 76 | # If true, the current module name will be prepended to all description 77 | # unit titles (such as .. function::). 78 | # add_module_names = True 79 | 80 | # If true, sectionauthor and moduleauthor directives will be shown in the 81 | # output. They are ignored by default. 82 | # show_authors = False 83 | 84 | # The name of the Pygments (syntax highlighting) style to use. 85 | pygments_style = "sphinx" 86 | 87 | # A list of ignored prefixes for module index sorting. 88 | # modindex_common_prefix = [] 89 | 90 | 91 | # -- Options for HTML output --------------------------------------------------- 92 | 93 | # The theme to use for HTML and HTML Help pages. See the documentation for 94 | # a list of builtin themes. 95 | html_theme = "default" 96 | 97 | # Theme options are theme-specific and customize the look and feel of a theme 98 | # further. For a list of options available for each theme, see the 99 | # documentation. 100 | # html_theme_options = {} 101 | 102 | # Add any paths that contain custom themes here, relative to this directory. 103 | # html_theme_path = [] 104 | 105 | # The name for this set of Sphinx documents. If None, it defaults to 106 | # " v documentation". 107 | # html_title = None 108 | 109 | # A shorter title for the navigation bar. Default is the same as html_title. 110 | # html_short_title = None 111 | 112 | # The name of an image file (relative to this directory) to place at the top 113 | # of the sidebar. 114 | # html_logo = None 115 | 116 | # The name of an image file (within the static path) to use as favicon of the 117 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 118 | # pixels large. 119 | # html_favicon = None 120 | 121 | # Add any paths that contain custom static files (such as style sheets) here, 122 | # relative to this directory. They are copied after the builtin static files, 123 | # so a file named "default.css" will overwrite the builtin "default.css". 124 | html_static_path = ["_static"] 125 | 126 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 127 | # using the given strftime format. 128 | # html_last_updated_fmt = '%b %d, %Y' 129 | 130 | # If true, SmartyPants will be used to convert quotes and dashes to 131 | # typographically correct entities. 132 | # html_use_smartypants = True 133 | 134 | # Custom sidebar templates, maps document names to template names. 135 | # html_sidebars = {} 136 | 137 | # Additional templates that should be rendered to pages, maps page names to 138 | # template names. 139 | # html_additional_pages = {} 140 | 141 | # If false, no module index is generated. 142 | # html_domain_indices = True 143 | 144 | # If false, no index is generated. 145 | # html_use_index = True 146 | 147 | # If true, the index is split into individual pages for each letter. 148 | # html_split_index = False 149 | 150 | # If true, links to the reST sources are added to the pages. 151 | # html_show_sourcelink = True 152 | 153 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 154 | # html_show_sphinx = True 155 | 156 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 157 | # html_show_copyright = True 158 | 159 | # If true, an OpenSearch description file will be output, and all pages will 160 | # contain a tag referring to it. The value of this option must be the 161 | # base URL from which the finished HTML is served. 162 | # html_use_opensearch = '' 163 | 164 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 165 | # html_file_suffix = None 166 | 167 | # Output file base name for HTML help builder. 168 | htmlhelp_basename = "untangledoc" 169 | 170 | 171 | # -- Options for LaTeX output -------------------------------------------------- 172 | 173 | latex_elements = { 174 | # The paper size ('letterpaper' or 'a4paper'). 175 | #'papersize': 'letterpaper', 176 | # The font size ('10pt', '11pt' or '12pt'). 177 | #'pointsize': '10pt', 178 | # Additional stuff for the LaTeX preamble. 179 | #'preamble': '', 180 | } 181 | 182 | # Grouping the document tree into LaTeX files. List of tuples 183 | # (source start file, target name, title, author, documentclass [howto/manual]). 184 | latex_documents = [ 185 | ( 186 | "index", 187 | "untangle.tex", 188 | "untangle Documentation", 189 | "Christian Stefanescu", 190 | "manual", 191 | ), 192 | ] 193 | 194 | # The name of an image file (relative to this directory) to place at the top of 195 | # the title page. 196 | # latex_logo = None 197 | 198 | # For "manual" documents, if this is true, then toplevel headings are parts, 199 | # not chapters. 200 | # latex_use_parts = False 201 | 202 | # If true, show page references after internal links. 203 | # latex_show_pagerefs = False 204 | 205 | # If true, show URL addresses after external links. 206 | # latex_show_urls = False 207 | 208 | # Documents to append as an appendix to all manuals. 209 | # latex_appendices = [] 210 | 211 | # If false, no module index is generated. 212 | # latex_domain_indices = True 213 | 214 | 215 | # -- Options for manual page output -------------------------------------------- 216 | 217 | # One entry per manual page. List of tuples 218 | # (source start file, name, description, authors, manual section). 219 | man_pages = [ 220 | ("index", "untangle", "untangle Documentation", ["Christian Stefanescu"], 1) 221 | ] 222 | 223 | # If true, show URL addresses after external links. 224 | # man_show_urls = False 225 | 226 | 227 | # -- Options for Texinfo output ------------------------------------------------ 228 | 229 | # Grouping the document tree into Texinfo files. List of tuples 230 | # (source start file, target name, title, author, 231 | # dir menu entry, description, category) 232 | texinfo_documents = [ 233 | ( 234 | "index", 235 | "untangle", 236 | "untangle Documentation", 237 | "Christian Stefanescu", 238 | "untangle", 239 | "One line description of project.", 240 | "Miscellaneous", 241 | ), 242 | ] 243 | 244 | # Documents to append as an appendix to all manuals. 245 | # texinfo_appendices = [] 246 | 247 | # If false, no module index is generated. 248 | # texinfo_domain_indices = True 249 | 250 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 251 | # texinfo_show_urls = 'footnote' 252 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. untangle documentation master file, created by 2 | sphinx-quickstart on Fri Apr 6 16:05:20 2012. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | untangle: Convert XML to Python objects 7 | ======================================= 8 | 9 | `untangle `_ is a tiny Python library which converts an XML 10 | document to a Python object. It is available under the `MIT license `_. 11 | 12 | .. contents:: 13 | 14 | Usage 15 | ----- 16 | .. module:: untangle 17 | 18 | untangle has a very simple API. You just need to call the 19 | parse function to get back a Python object. The parameter 20 | can be: 21 | 22 | * a string 23 | * a filename 24 | * a URL 25 | 26 | .. autofunction:: parse 27 | If you are looking for information on a specific function, class or method, this part of the documentation is for you. 28 | 29 | The object you get back represents the complete XML document. Child elements can be accessed with ``parent.child``, attributes with ``element['attribute']``. Siblings with similar names are grouped into a list. 30 | 31 | Example 32 | ------- 33 | 34 | Considering this XML document: :: 35 | 36 | 37 | 38 | 39 | 40 | 41 | and assuming it's available in a variable called `xml`, we could use untangle like this: :: 42 | 43 | doc = untangle.parse(xml) 44 | child_name = doc.root.child['name'] # 'child1' 45 | 46 | For text/data inbetween tags, this is described as cdata. After specifying the relevant element as explained above, the data/cdata can be accessed by adding ".cdata" (without the quotes) to the end of your dictionary call. 47 | 48 | For more examples, have a look at (and launch) `examples.py `_. 49 | 50 | Installation 51 | ------------ 52 | 53 | :: 54 | 55 | pip install untangle 56 | 57 | Alternatively, you can install untangle with conda from conda-forge: :: 58 | 59 | conda install -c conda-forge untangle 60 | 61 | Motivation 62 | ---------- 63 | 64 | untangle is available for that use case, where you have a 20-line XML file you got back from an API and you just need to extract some values out of it. You might not want to use regular expressions, but just as well you might not want to install a complex libxml2-based solution (and look up its terse API). 65 | 66 | Performance and memory usage might be bad, but these tradeoffs were made in order to allow a simple API and no external dependencies. See also: Limitations_. 67 | 68 | 69 | Limitations 70 | ----------- 71 | 72 | untangle trades features for a simple API, which is why untangle substitutes ``-``, ``.`` and ``:`` with ``_``: 73 | 74 | * ```` can be accessed with ``foobar.foo_bar`` 75 | * ```` can be accessed with ``foo_bar_baz`` 76 | * ```` can be accessed with ``foo_bar.foo_baz`` 77 | 78 | Encoding 79 | --------- 80 | 81 | Be aware that with certain characters or maybe also depending on the python version you might get an error on accessing specific attributes, such as ``UnicodeEncodeError: 'ascii' codec can't encode character u'\xfc' in position 385: ordinal not in range(128)`` 82 | In most cases it should be enough to import the sys module, and set utf-8 as encoding, with: :: 83 | 84 | import sys 85 | reload(sys) # just to be sure 86 | sys.setdefaultencoding('utf-8') 87 | 88 | SAX features 89 | ------------ 90 | 91 | It is possible to pass specific SAX features to the handler used by untangle, for instance: :: 92 | 93 | untangle.parse(my_xml, feature_external_ges=False) 94 | 95 | This will toggle the SAX handler feature described `here `_. 96 | 97 | Changelog 98 | --------- 99 | 100 | see https://github.com/stchris/untangle/blob/main/CHANGELOG.md 101 | 102 | 103 | Indices and tables 104 | ================== 105 | 106 | * :ref:`genindex` 107 | * :ref:`modindex` 108 | * :ref:`search` 109 | -------------------------------------------------------------------------------- /examples.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | """ 4 | Usage examples for untangle 5 | """ 6 | 7 | import untangle 8 | 9 | 10 | def access(): 11 | """ 12 | Shows basic attribute access and node navigation. 13 | """ 14 | o = untangle.parse('This is cdata') 15 | return "Node id = %s, subnode value = %s" % (o.node["id"], o.node.subnode["value"]) 16 | 17 | 18 | def siblings_list(): 19 | """ 20 | Shows child element iteration 21 | """ 22 | o = untangle.parse( 23 | """ 24 | 25 | 26 | 27 | 28 | 29 | """ 30 | ) 31 | return ",".join([child["name"] for child in o.root.child]) 32 | 33 | 34 | def access_cdata(): 35 | """ 36 | Shows how to handle CDATA elements 37 | """ 38 | o = untangle.parse('This is cdata') 39 | return "%s" % (o.node.cdata) 40 | 41 | 42 | examples = [ 43 | ( 44 | "Access children with parent.children and" 45 | ' attributes with element["attribute"]', 46 | access, 47 | ), 48 | ("Access siblings as list", siblings_list), 49 | ("Access cdata text or other data", access_cdata), 50 | ] 51 | 52 | if __name__ == "__main__": 53 | for description, func in examples: 54 | print("=" * 70) 55 | print(description) 56 | print("=" * 70) 57 | print() 58 | print(func()) 59 | print() 60 | 61 | # vim: set expandtab ts=4 sw=4: 62 | -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- 1 | [[package]] 2 | name = "atomicwrites" 3 | version = "1.4.0" 4 | description = "Atomic file writes." 5 | category = "dev" 6 | optional = false 7 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 8 | 9 | [[package]] 10 | name = "attrs" 11 | version = "21.4.0" 12 | description = "Classes Without Boilerplate" 13 | category = "dev" 14 | optional = false 15 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 16 | 17 | [package.extras] 18 | dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit", "cloudpickle"] 19 | docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] 20 | tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "cloudpickle"] 21 | tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "cloudpickle"] 22 | 23 | [[package]] 24 | name = "black" 25 | version = "22.6.0" 26 | description = "The uncompromising code formatter." 27 | category = "dev" 28 | optional = false 29 | python-versions = ">=3.6.2" 30 | 31 | [package.dependencies] 32 | click = ">=8.0.0" 33 | mypy-extensions = ">=0.4.3" 34 | pathspec = ">=0.9.0" 35 | platformdirs = ">=2" 36 | tomli = {version = ">=1.1.0", markers = "python_full_version < \"3.11.0a7\""} 37 | typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\" and implementation_name == \"cpython\""} 38 | typing-extensions = {version = ">=3.10.0.0", markers = "python_version < \"3.10\""} 39 | 40 | [package.extras] 41 | colorama = ["colorama (>=0.4.3)"] 42 | d = ["aiohttp (>=3.7.4)"] 43 | jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] 44 | uvloop = ["uvloop (>=0.15.2)"] 45 | 46 | [[package]] 47 | name = "build" 48 | version = "0.8.0" 49 | description = "A simple, correct PEP 517 build frontend" 50 | category = "dev" 51 | optional = false 52 | python-versions = ">=3.6" 53 | 54 | [package.dependencies] 55 | colorama = {version = "*", markers = "os_name == \"nt\""} 56 | importlib-metadata = {version = ">=0.22", markers = "python_version < \"3.8\""} 57 | packaging = ">=19.0" 58 | pep517 = ">=0.9.1" 59 | tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} 60 | 61 | [package.extras] 62 | docs = ["furo (>=2021.08.31)", "sphinx (>=4.0,<5.0)", "sphinx-argparse-cli (>=1.5)", "sphinx-autodoc-typehints (>=1.10)"] 63 | test = ["filelock (>=3)", "pytest (>=6.2.4)", "pytest-cov (>=2.12)", "pytest-mock (>=2)", "pytest-rerunfailures (>=9.1)", "pytest-xdist (>=1.34)", "toml (>=0.10.0)", "wheel (>=0.36.0)", "setuptools (>=42.0.0)", "setuptools (>=56.0.0)"] 64 | typing = ["importlib-metadata (>=4.6.4)", "mypy (==0.950)", "typing-extensions (>=3.7.4.3)"] 65 | virtualenv = ["virtualenv (>=20.0.35)"] 66 | 67 | [[package]] 68 | name = "click" 69 | version = "8.1.3" 70 | description = "Composable command line interface toolkit" 71 | category = "dev" 72 | optional = false 73 | python-versions = ">=3.7" 74 | 75 | [package.dependencies] 76 | colorama = {version = "*", markers = "platform_system == \"Windows\""} 77 | importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} 78 | 79 | [[package]] 80 | name = "colorama" 81 | version = "0.4.5" 82 | description = "Cross-platform colored terminal text." 83 | category = "dev" 84 | optional = false 85 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 86 | 87 | [[package]] 88 | name = "defusedxml" 89 | version = "0.7.1" 90 | description = "XML bomb protection for Python stdlib modules" 91 | category = "main" 92 | optional = false 93 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 94 | 95 | [[package]] 96 | name = "flake8" 97 | version = "4.0.1" 98 | description = "the modular source code checker: pep8 pyflakes and co" 99 | category = "dev" 100 | optional = false 101 | python-versions = ">=3.6" 102 | 103 | [package.dependencies] 104 | importlib-metadata = {version = "<4.3", markers = "python_version < \"3.8\""} 105 | mccabe = ">=0.6.0,<0.7.0" 106 | pycodestyle = ">=2.8.0,<2.9.0" 107 | pyflakes = ">=2.4.0,<2.5.0" 108 | 109 | [[package]] 110 | name = "importlib-metadata" 111 | version = "4.2.0" 112 | description = "Read metadata from Python packages" 113 | category = "dev" 114 | optional = false 115 | python-versions = ">=3.6" 116 | 117 | [package.dependencies] 118 | typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} 119 | zipp = ">=0.5" 120 | 121 | [package.extras] 122 | docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] 123 | testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pep517", "pyfakefs", "flufl.flake8", "pytest-black (>=0.3.7)", "pytest-mypy", "importlib-resources (>=1.3)"] 124 | 125 | [[package]] 126 | name = "iniconfig" 127 | version = "1.1.1" 128 | description = "iniconfig: brain-dead simple config-ini parsing" 129 | category = "dev" 130 | optional = false 131 | python-versions = "*" 132 | 133 | [[package]] 134 | name = "mccabe" 135 | version = "0.6.1" 136 | description = "McCabe checker, plugin for flake8" 137 | category = "dev" 138 | optional = false 139 | python-versions = "*" 140 | 141 | [[package]] 142 | name = "mypy-extensions" 143 | version = "0.4.3" 144 | description = "Experimental type system extensions for programs checked with the mypy typechecker." 145 | category = "dev" 146 | optional = false 147 | python-versions = "*" 148 | 149 | [[package]] 150 | name = "packaging" 151 | version = "21.3" 152 | description = "Core utilities for Python packages" 153 | category = "dev" 154 | optional = false 155 | python-versions = ">=3.6" 156 | 157 | [package.dependencies] 158 | pyparsing = ">=2.0.2,<3.0.5 || >3.0.5" 159 | 160 | [[package]] 161 | name = "pathspec" 162 | version = "0.9.0" 163 | description = "Utility library for gitignore style pattern matching of file paths." 164 | category = "dev" 165 | optional = false 166 | python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,>=2.7" 167 | 168 | [[package]] 169 | name = "pep517" 170 | version = "0.12.0" 171 | description = "Wrappers to build Python packages using PEP 517 hooks" 172 | category = "dev" 173 | optional = false 174 | python-versions = "*" 175 | 176 | [package.dependencies] 177 | importlib_metadata = {version = "*", markers = "python_version < \"3.8\""} 178 | tomli = {version = ">=1.1.0", markers = "python_version >= \"3.6\""} 179 | zipp = {version = "*", markers = "python_version < \"3.8\""} 180 | 181 | [[package]] 182 | name = "platformdirs" 183 | version = "2.5.2" 184 | description = "A small Python module for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." 185 | category = "dev" 186 | optional = false 187 | python-versions = ">=3.7" 188 | 189 | [package.extras] 190 | docs = ["furo (>=2021.7.5b38)", "proselint (>=0.10.2)", "sphinx-autodoc-typehints (>=1.12)", "sphinx (>=4)"] 191 | test = ["appdirs (==1.4.4)", "pytest-cov (>=2.7)", "pytest-mock (>=3.6)", "pytest (>=6)"] 192 | 193 | [[package]] 194 | name = "pluggy" 195 | version = "1.0.0" 196 | description = "plugin and hook calling mechanisms for python" 197 | category = "dev" 198 | optional = false 199 | python-versions = ">=3.6" 200 | 201 | [package.dependencies] 202 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} 203 | 204 | [package.extras] 205 | dev = ["pre-commit", "tox"] 206 | testing = ["pytest", "pytest-benchmark"] 207 | 208 | [[package]] 209 | name = "py" 210 | version = "1.11.0" 211 | description = "library with cross-python path, ini-parsing, io, code, log facilities" 212 | category = "dev" 213 | optional = false 214 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 215 | 216 | [[package]] 217 | name = "pycodestyle" 218 | version = "2.8.0" 219 | description = "Python style guide checker" 220 | category = "dev" 221 | optional = false 222 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" 223 | 224 | [[package]] 225 | name = "pyflakes" 226 | version = "2.4.0" 227 | description = "passive checker of Python programs" 228 | category = "dev" 229 | optional = false 230 | python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" 231 | 232 | [[package]] 233 | name = "pyparsing" 234 | version = "3.0.9" 235 | description = "pyparsing module - Classes and methods to define and execute parsing grammars" 236 | category = "dev" 237 | optional = false 238 | python-versions = ">=3.6.8" 239 | 240 | [package.extras] 241 | diagrams = ["railroad-diagrams", "jinja2"] 242 | 243 | [[package]] 244 | name = "pytest" 245 | version = "7.1.2" 246 | description = "pytest: simple powerful testing with Python" 247 | category = "dev" 248 | optional = false 249 | python-versions = ">=3.7" 250 | 251 | [package.dependencies] 252 | atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} 253 | attrs = ">=19.2.0" 254 | colorama = {version = "*", markers = "sys_platform == \"win32\""} 255 | importlib-metadata = {version = ">=0.12", markers = "python_version < \"3.8\""} 256 | iniconfig = "*" 257 | packaging = "*" 258 | pluggy = ">=0.12,<2.0" 259 | py = ">=1.8.2" 260 | tomli = ">=1.0.0" 261 | 262 | [package.extras] 263 | testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] 264 | 265 | [[package]] 266 | name = "tomli" 267 | version = "2.0.1" 268 | description = "A lil' TOML parser" 269 | category = "dev" 270 | optional = false 271 | python-versions = ">=3.7" 272 | 273 | [[package]] 274 | name = "typed-ast" 275 | version = "1.5.4" 276 | description = "a fork of Python 2 and 3 ast modules with type comment support" 277 | category = "dev" 278 | optional = false 279 | python-versions = ">=3.6" 280 | 281 | [[package]] 282 | name = "typing-extensions" 283 | version = "4.2.0" 284 | description = "Backported and Experimental Type Hints for Python 3.7+" 285 | category = "dev" 286 | optional = false 287 | python-versions = ">=3.7" 288 | 289 | [[package]] 290 | name = "zipp" 291 | version = "3.8.0" 292 | description = "Backport of pathlib-compatible object wrapper for zip files" 293 | category = "dev" 294 | optional = false 295 | python-versions = ">=3.7" 296 | 297 | [package.extras] 298 | docs = ["sphinx", "jaraco.packaging (>=9)", "rst.linker (>=1.9)"] 299 | testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy (>=0.9.1)"] 300 | 301 | [metadata] 302 | lock-version = "1.1" 303 | python-versions = "^3.7" 304 | content-hash = "b27722b23f8379a524e0e979e75811a164f0b50a672c44b764c020e1b7779fc4" 305 | 306 | [metadata.files] 307 | atomicwrites = [ 308 | {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, 309 | {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, 310 | ] 311 | attrs = [ 312 | {file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"}, 313 | {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"}, 314 | ] 315 | black = [ 316 | {file = "black-22.6.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f586c26118bc6e714ec58c09df0157fe2d9ee195c764f630eb0d8e7ccce72e69"}, 317 | {file = "black-22.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b270a168d69edb8b7ed32c193ef10fd27844e5c60852039599f9184460ce0807"}, 318 | {file = "black-22.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6797f58943fceb1c461fb572edbe828d811e719c24e03375fd25170ada53825e"}, 319 | {file = "black-22.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c85928b9d5f83b23cee7d0efcb310172412fbf7cb9d9ce963bd67fd141781def"}, 320 | {file = "black-22.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:f6fe02afde060bbeef044af7996f335fbe90b039ccf3f5eb8f16df8b20f77666"}, 321 | {file = "black-22.6.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:cfaf3895a9634e882bf9d2363fed5af8888802d670f58b279b0bece00e9a872d"}, 322 | {file = "black-22.6.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94783f636bca89f11eb5d50437e8e17fbc6a929a628d82304c80fa9cd945f256"}, 323 | {file = "black-22.6.0-cp36-cp36m-win_amd64.whl", hash = "sha256:2ea29072e954a4d55a2ff58971b83365eba5d3d357352a07a7a4df0d95f51c78"}, 324 | {file = "black-22.6.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:e439798f819d49ba1c0bd9664427a05aab79bfba777a6db94fd4e56fae0cb849"}, 325 | {file = "black-22.6.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:187d96c5e713f441a5829e77120c269b6514418f4513a390b0499b0987f2ff1c"}, 326 | {file = "black-22.6.0-cp37-cp37m-win_amd64.whl", hash = "sha256:074458dc2f6e0d3dab7928d4417bb6957bb834434516f21514138437accdbe90"}, 327 | {file = "black-22.6.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a218d7e5856f91d20f04e931b6f16d15356db1c846ee55f01bac297a705ca24f"}, 328 | {file = "black-22.6.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:568ac3c465b1c8b34b61cd7a4e349e93f91abf0f9371eda1cf87194663ab684e"}, 329 | {file = "black-22.6.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6c1734ab264b8f7929cef8ae5f900b85d579e6cbfde09d7387da8f04771b51c6"}, 330 | {file = "black-22.6.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9a3ac16efe9ec7d7381ddebcc022119794872abce99475345c5a61aa18c45ad"}, 331 | {file = "black-22.6.0-cp38-cp38-win_amd64.whl", hash = "sha256:b9fd45787ba8aa3f5e0a0a98920c1012c884622c6c920dbe98dbd05bc7c70fbf"}, 332 | {file = "black-22.6.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:7ba9be198ecca5031cd78745780d65a3f75a34b2ff9be5837045dce55db83d1c"}, 333 | {file = "black-22.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a3db5b6409b96d9bd543323b23ef32a1a2b06416d525d27e0f67e74f1446c8f2"}, 334 | {file = "black-22.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:560558527e52ce8afba936fcce93a7411ab40c7d5fe8c2463e279e843c0328ee"}, 335 | {file = "black-22.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b154e6bbde1e79ea3260c4b40c0b7b3109ffcdf7bc4ebf8859169a6af72cd70b"}, 336 | {file = "black-22.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:4af5bc0e1f96be5ae9bd7aaec219c901a94d6caa2484c21983d043371c733fc4"}, 337 | {file = "black-22.6.0-py3-none-any.whl", hash = "sha256:ac609cf8ef5e7115ddd07d85d988d074ed00e10fbc3445aee393e70164a2219c"}, 338 | {file = "black-22.6.0.tar.gz", hash = "sha256:6c6d39e28aed379aec40da1c65434c77d75e65bb59a1e1c283de545fb4e7c6c9"}, 339 | ] 340 | build = [ 341 | {file = "build-0.8.0-py3-none-any.whl", hash = "sha256:19b0ed489f92ace6947698c3ca8436cb0556a66e2aa2d34cd70e2a5d27cd0437"}, 342 | {file = "build-0.8.0.tar.gz", hash = "sha256:887a6d471c901b1a6e6574ebaeeebb45e5269a79d095fe9a8f88d6614ed2e5f0"}, 343 | ] 344 | click = [ 345 | {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, 346 | {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, 347 | ] 348 | colorama = [ 349 | {file = "colorama-0.4.5-py2.py3-none-any.whl", hash = "sha256:854bf444933e37f5824ae7bfc1e98d5bce2ebe4160d46b5edf346a89358e99da"}, 350 | {file = "colorama-0.4.5.tar.gz", hash = "sha256:e6c6b4334fc50988a639d9b98aa429a0b57da6e17b9a44f0451f930b6967b7a4"}, 351 | ] 352 | defusedxml = [ 353 | {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"}, 354 | {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, 355 | ] 356 | flake8 = [ 357 | {file = "flake8-4.0.1-py2.py3-none-any.whl", hash = "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d"}, 358 | {file = "flake8-4.0.1.tar.gz", hash = "sha256:806e034dda44114815e23c16ef92f95c91e4c71100ff52813adf7132a6ad870d"}, 359 | ] 360 | importlib-metadata = [ 361 | {file = "importlib_metadata-4.2.0-py3-none-any.whl", hash = "sha256:057e92c15bc8d9e8109738a48db0ccb31b4d9d5cfbee5a8670879a30be66304b"}, 362 | {file = "importlib_metadata-4.2.0.tar.gz", hash = "sha256:b7e52a1f8dec14a75ea73e0891f3060099ca1d8e6a462a4dff11c3e119ea1b31"}, 363 | ] 364 | iniconfig = [ 365 | {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, 366 | {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, 367 | ] 368 | mccabe = [ 369 | {file = "mccabe-0.6.1-py2.py3-none-any.whl", hash = "sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42"}, 370 | {file = "mccabe-0.6.1.tar.gz", hash = "sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"}, 371 | ] 372 | mypy-extensions = [ 373 | {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, 374 | {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, 375 | ] 376 | packaging = [ 377 | {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, 378 | {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, 379 | ] 380 | pathspec = [ 381 | {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"}, 382 | {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"}, 383 | ] 384 | pep517 = [ 385 | {file = "pep517-0.12.0-py2.py3-none-any.whl", hash = "sha256:dd884c326898e2c6e11f9e0b64940606a93eb10ea022a2e067959f3a110cf161"}, 386 | {file = "pep517-0.12.0.tar.gz", hash = "sha256:931378d93d11b298cf511dd634cf5ea4cb249a28ef84160b3247ee9afb4e8ab0"}, 387 | ] 388 | platformdirs = [ 389 | {file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"}, 390 | {file = "platformdirs-2.5.2.tar.gz", hash = "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19"}, 391 | ] 392 | pluggy = [ 393 | {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, 394 | {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, 395 | ] 396 | py = [ 397 | {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, 398 | {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, 399 | ] 400 | pycodestyle = [ 401 | {file = "pycodestyle-2.8.0-py2.py3-none-any.whl", hash = "sha256:720f8b39dde8b293825e7ff02c475f3077124006db4f440dcbc9a20b76548a20"}, 402 | {file = "pycodestyle-2.8.0.tar.gz", hash = "sha256:eddd5847ef438ea1c7870ca7eb78a9d47ce0cdb4851a5523949f2601d0cbbe7f"}, 403 | ] 404 | pyflakes = [ 405 | {file = "pyflakes-2.4.0-py2.py3-none-any.whl", hash = "sha256:3bb3a3f256f4b7968c9c788781e4ff07dce46bdf12339dcda61053375426ee2e"}, 406 | {file = "pyflakes-2.4.0.tar.gz", hash = "sha256:05a85c2872edf37a4ed30b0cce2f6093e1d0581f8c19d7393122da7e25b2b24c"}, 407 | ] 408 | pyparsing = [ 409 | {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, 410 | {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, 411 | ] 412 | pytest = [ 413 | {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, 414 | {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, 415 | ] 416 | tomli = [ 417 | {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, 418 | {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, 419 | ] 420 | typed-ast = [ 421 | {file = "typed_ast-1.5.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:669dd0c4167f6f2cd9f57041e03c3c2ebf9063d0757dc89f79ba1daa2bfca9d4"}, 422 | {file = "typed_ast-1.5.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:211260621ab1cd7324e0798d6be953d00b74e0428382991adfddb352252f1d62"}, 423 | {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:267e3f78697a6c00c689c03db4876dd1efdfea2f251a5ad6555e82a26847b4ac"}, 424 | {file = "typed_ast-1.5.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c542eeda69212fa10a7ada75e668876fdec5f856cd3d06829e6aa64ad17c8dfe"}, 425 | {file = "typed_ast-1.5.4-cp310-cp310-win_amd64.whl", hash = "sha256:a9916d2bb8865f973824fb47436fa45e1ebf2efd920f2b9f99342cb7fab93f72"}, 426 | {file = "typed_ast-1.5.4-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:79b1e0869db7c830ba6a981d58711c88b6677506e648496b1f64ac7d15633aec"}, 427 | {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a94d55d142c9265f4ea46fab70977a1944ecae359ae867397757d836ea5a3f47"}, 428 | {file = "typed_ast-1.5.4-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:183afdf0ec5b1b211724dfef3d2cad2d767cbefac291f24d69b00546c1837fb6"}, 429 | {file = "typed_ast-1.5.4-cp36-cp36m-win_amd64.whl", hash = "sha256:639c5f0b21776605dd6c9dbe592d5228f021404dafd377e2b7ac046b0349b1a1"}, 430 | {file = "typed_ast-1.5.4-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cf4afcfac006ece570e32d6fa90ab74a17245b83dfd6655a6f68568098345ff6"}, 431 | {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ed855bbe3eb3715fca349c80174cfcfd699c2f9de574d40527b8429acae23a66"}, 432 | {file = "typed_ast-1.5.4-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:6778e1b2f81dfc7bc58e4b259363b83d2e509a65198e85d5700dfae4c6c8ff1c"}, 433 | {file = "typed_ast-1.5.4-cp37-cp37m-win_amd64.whl", hash = "sha256:0261195c2062caf107831e92a76764c81227dae162c4f75192c0d489faf751a2"}, 434 | {file = "typed_ast-1.5.4-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:2efae9db7a8c05ad5547d522e7dbe62c83d838d3906a3716d1478b6c1d61388d"}, 435 | {file = "typed_ast-1.5.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7d5d014b7daa8b0bf2eaef684295acae12b036d79f54178b92a2b6a56f92278f"}, 436 | {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:370788a63915e82fd6f212865a596a0fefcbb7d408bbbb13dea723d971ed8bdc"}, 437 | {file = "typed_ast-1.5.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:4e964b4ff86550a7a7d56345c7864b18f403f5bd7380edf44a3c1fb4ee7ac6c6"}, 438 | {file = "typed_ast-1.5.4-cp38-cp38-win_amd64.whl", hash = "sha256:683407d92dc953c8a7347119596f0b0e6c55eb98ebebd9b23437501b28dcbb8e"}, 439 | {file = "typed_ast-1.5.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4879da6c9b73443f97e731b617184a596ac1235fe91f98d279a7af36c796da35"}, 440 | {file = "typed_ast-1.5.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3e123d878ba170397916557d31c8f589951e353cc95fb7f24f6bb69adc1a8a97"}, 441 | {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ebd9d7f80ccf7a82ac5f88c521115cc55d84e35bf8b446fcd7836eb6b98929a3"}, 442 | {file = "typed_ast-1.5.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:98f80dee3c03455e92796b58b98ff6ca0b2a6f652120c263efdba4d6c5e58f72"}, 443 | {file = "typed_ast-1.5.4-cp39-cp39-win_amd64.whl", hash = "sha256:0fdbcf2fef0ca421a3f5912555804296f0b0960f0418c440f5d6d3abb549f3e1"}, 444 | {file = "typed_ast-1.5.4.tar.gz", hash = "sha256:39e21ceb7388e4bb37f4c679d72707ed46c2fbf2a5609b8b8ebc4b067d977df2"}, 445 | ] 446 | typing-extensions = [ 447 | {file = "typing_extensions-4.2.0-py3-none-any.whl", hash = "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708"}, 448 | {file = "typing_extensions-4.2.0.tar.gz", hash = "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376"}, 449 | ] 450 | zipp = [ 451 | {file = "zipp-3.8.0-py3-none-any.whl", hash = "sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099"}, 452 | {file = "zipp-3.8.0.tar.gz", hash = "sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad"}, 453 | ] 454 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "untangle" 3 | version = "1.2.1" 4 | description = "Converts XML to Python objects" 5 | authors = ["Christian Stefanescu "] 6 | license = "MIT" 7 | readme = "README.md" 8 | 9 | [tool.poetry.dependencies] 10 | python = "^3.7" 11 | defusedxml = "^0.7.1" 12 | 13 | [tool.poetry.dev-dependencies] 14 | pytest = "^7.1.2" 15 | flake8 = "^4.0.1" 16 | black = "^22.6.0" 17 | build = "^0.8.0" 18 | setuptools = "^62.6.0" 19 | wheel = "^0.37.1" 20 | 21 | [build-system] 22 | requires = ["poetry-core>=1.0.0"] 23 | build-backend = "poetry.core.masonry.api" 24 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | import untangle 5 | 6 | from setuptools import setup, find_packages 7 | from pathlib import Path 8 | 9 | long_description = (Path(__file__).parent / "README.md").read_text() 10 | 11 | setup( 12 | name="untangle", 13 | packages=find_packages(), 14 | version=untangle.__version__, 15 | description="Convert XML documents into Python objects", 16 | long_description_content_type="text/markdown", 17 | long_description=long_description, 18 | author="Christian Stefanescu", 19 | author_email="hello@stchris.net", 20 | url="http://github.com/stchris//untangle", 21 | py_modules=["untangle"], 22 | install_requires=["defusedxml"], 23 | include_package_data=True, 24 | license="MIT", 25 | classifiers=[ 26 | "Development Status :: 5 - Production/Stable", 27 | "Intended Audience :: Developers", 28 | "Natural Language :: English", 29 | "License :: OSI Approved :: MIT License", 30 | "Programming Language :: Python", 31 | "Programming Language :: Python :: 3.7", 32 | "Programming Language :: Python :: 3.8", 33 | "Programming Language :: Python :: 3.9", 34 | "Programming Language :: Python :: 3.10", 35 | ], 36 | ) 37 | 38 | # vim: set expandtab ts=4 sw=4: 39 | -------------------------------------------------------------------------------- /tests/res/figs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | value1 5 | value2 6 | 7 | 8 | value 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /tests/res/foo.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Target-1 8 | 0.0.0.0 9 | cody 10 | cody 11 | cody 12 | cody 13 | 14 | 2 15 | 16 | 17 | 18 | Target-2 19 | 0.0.0.0 20 | cody 21 | cody 22 | cody 23 | cody 24 | 2 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /tests/res/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | com.atlassian.confluence.plugin.base 9 | confluence-plugin-base 10 | 17 11 | 12 | 13 | 4.0.0 14 | com.this.that.groupId 15 | artifactId 16 | 0.1 17 | 18 | 19 | atlassian-plugin 20 | 21 | 22 | ${pom.groupId}.${pom.artifactId} 23 | 24 | 25 | 2.9 26 | 27 | 1.4.1 28 | 29 | 2.9 30 | 31 | 32 | 33 | 37 | 38 | 39 | 47 | 48 | 49 | 56 | 57 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /tests/res/some.xslt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 10 | 11 | 12 |
8 | 9 |
13 |
14 | 15 | 16 | 17 | 18 |

19 | 20 |

21 |
22 | 23 | 24 |

25 | 26 |

27 |
28 |
29 | -------------------------------------------------------------------------------- /tests/res/unicode.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ðÒÉ×ÅÔ ÍÉÒ 5 | 6 | 7 | ðÕÎËÔ 1 8 | http://example1.com 9 | 10 | 11 | ðÕÎËÔ 2 12 | http://example2.com 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /tests/res/xxe.xml: -------------------------------------------------------------------------------- 1 | 3 | ]> 4 | 5 | -------------------------------------------------------------------------------- /tests/test_untangle.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | import unittest 5 | import untangle 6 | import xml 7 | 8 | import defusedxml 9 | 10 | 11 | class FromStringTestCase(unittest.TestCase): 12 | """Basic parsing tests with input as string""" 13 | 14 | def test_basic(self): 15 | o = untangle.parse("") 16 | self.assertTrue(o is not None) 17 | self.assertTrue(o.a is not None) 18 | self.assertTrue(o.a.b is not None) 19 | self.assertTrue(o.a.c is not None) 20 | self.assertTrue("a" in o) 21 | self.assertTrue("b" in o.a) 22 | self.assertTrue("c" in o.a) 23 | self.assertTrue("d" not in o.a) 24 | 25 | def test_basic_with_decl(self): 26 | o = untangle.parse("") 27 | self.assertTrue(o is not None) 28 | self.assertTrue(o.a is not None) 29 | self.assertTrue(o.a.b is not None) 30 | self.assertTrue(o.a.c is not None) 31 | self.assertTrue("a" in o) 32 | self.assertTrue("b" in o.a) 33 | self.assertTrue("c" in o.a) 34 | self.assertTrue("d" not in o.a) 35 | 36 | def test_truthiness(self): 37 | o = untangle.parse("") 38 | self.assertTrue(o) 39 | self.assertTrue(o.a) 40 | self.assertTrue(o.a.b) 41 | self.assertTrue(o.a.c) 42 | 43 | def test_with_attributes(self): 44 | o = untangle.parse( 45 | """ 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | """ 59 | ) 60 | self.assertEqual("Tomato soup", o.Soup["name"]) 61 | self.assertEqual(1, int(o.Soup["version"])) 62 | self.assertEqual("1l", o.Soup.Ingredients.Water["qty"]) 63 | self.assertTrue(o.Soup.Instructions.add_ingredients is not None) 64 | 65 | def test_grouping(self): 66 | o = untangle.parse( 67 | """ 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | """ 79 | ) 80 | self.assertTrue(o.root) 81 | 82 | children = o.root.child 83 | self.assertEqual(3, len(children)) 84 | self.assertEqual("child1", children[0]["name"]) 85 | self.assertEqual("sub1", children[0].subchild["name"]) 86 | self.assertEqual(2, len(children[2].subchild)) 87 | self.assertEqual("sub2", children[2].subchild[0]["name"]) 88 | 89 | def test_single_root(self): 90 | self.assertTrue(untangle.parse("")) 91 | 92 | def test_attribute_protocol(self): 93 | o = untangle.parse( 94 | """ 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | """ 106 | ) 107 | try: 108 | self.assertEqual(None, o.root.child.inexistent) 109 | self.fail("Was able to access inexistent child as None") 110 | except AttributeError: 111 | pass # this is the expected error 112 | except IndexError: 113 | self.fail("Caught IndexError quen expecting AttributeError") 114 | 115 | self.assertTrue(hasattr(o.root, "child")) 116 | self.assertFalse(hasattr(o.root, "inexistent")) 117 | 118 | self.assertEqual("child1", getattr(o.root, "child")[0]["name"]) 119 | 120 | def test_python_keyword(self): 121 | o = untangle.parse("") 122 | self.assertTrue(o is not None) 123 | self.assertTrue(o.class_ is not None) 124 | self.assertTrue(o.class_.return_ is not None) 125 | self.assertTrue(o.class_.pass_ is not None) 126 | self.assertTrue(o.class_.None_ is not None) 127 | 128 | 129 | class InvalidTestCase(unittest.TestCase): 130 | """Test corner cases""" 131 | 132 | def test_invalid_xml(self): 133 | self.assertRaises(xml.sax.SAXParseException, untangle.parse, "") 134 | 135 | def test_empty_xml(self): 136 | self.assertRaises(ValueError, untangle.parse, "") 137 | 138 | def test_none_xml(self): 139 | self.assertRaises(ValueError, untangle.parse, None) 140 | 141 | 142 | class PomXmlTestCase(unittest.TestCase): 143 | """Tests parsing a Maven pom.xml""" 144 | 145 | def setUp(self): 146 | self.o = untangle.parse("tests/res/pom.xml") 147 | 148 | def test_parent(self): 149 | project = self.o.project 150 | self.assertTrue(project) 151 | 152 | parent = project.parent 153 | self.assertTrue(parent) 154 | self.assertEqual("com.atlassian.confluence.plugin.base", parent.groupId) 155 | self.assertEqual("confluence-plugin-base", parent.artifactId) 156 | self.assertEqual("17", parent.version) 157 | 158 | self.assertEqual("4.0.0", project.modelVersion) 159 | self.assertEqual("com.this.that.groupId", project.groupId) 160 | 161 | self.assertEqual("", project.name) 162 | self.assertEqual( 163 | "${pom.groupId}.${pom.artifactId}", project.properties.atlassian_plugin_key 164 | ) 165 | self.assertEqual("1.4.1", project.properties.atlassian_product_test_lib_version) 166 | self.assertEqual("2.9", project.properties.atlassian_product_data_version) 167 | 168 | def test_lengths(self): 169 | self.assertEqual(1, len(self.o)) 170 | self.assertEqual(8, len(self.o.project)) 171 | self.assertEqual(3, len(self.o.project.parent)) 172 | self.assertEqual(4, len(self.o.project.properties)) 173 | 174 | 175 | class NamespaceTestCase(unittest.TestCase): 176 | """Tests for XMLs with namespaces""" 177 | 178 | def setUp(self): 179 | self.o = untangle.parse("tests/res/some.xslt") 180 | 181 | def test_namespace(self): 182 | self.assertTrue(self.o) 183 | 184 | stylesheet = self.o.xsl_stylesheet 185 | self.assertTrue(stylesheet) 186 | self.assertEqual("1.0", stylesheet["version"]) 187 | 188 | template = stylesheet.xsl_template[0] 189 | self.assertTrue(template) 190 | self.assertEqual("math", template["match"]) 191 | self.assertEqual("compact", template.table["class"]) 192 | self.assertEqual("compact vam", template.table.tr.xsl_for_each.td["class"]) 193 | self.assertEqual( 194 | untangle.Element("", ""), 195 | template.table.tr.xsl_for_each.td.xsl_apply_templates, 196 | ) 197 | 198 | last_template = stylesheet.xsl_template[-1] 199 | self.assertTrue(last_template) 200 | self.assertEqual("m_var", last_template["match"]) 201 | self.assertEqual("compact tac formula italic", last_template.p["class"]) 202 | self.assertEqual( 203 | untangle.Element("xsl_apply_templates", ""), 204 | last_template.p.xsl_apply_templates, 205 | ) 206 | 207 | 208 | class IterationTestCase(unittest.TestCase): 209 | """Tests various cases of iteration over child nodes.""" 210 | 211 | def test_multiple_children(self): 212 | """Regular case of iteration.""" 213 | o = untangle.parse("") 214 | cnt = 0 215 | for i in o.a.b: 216 | cnt += 1 217 | self.assertEqual(2, cnt) 218 | 219 | def test_single_child(self): 220 | """Special case when there is only a single child element. 221 | Does not work without an __iter__ implemented. 222 | """ 223 | o = untangle.parse("") 224 | cnt = 0 225 | for i in o.a.b: 226 | cnt += 1 227 | self.assertEqual(1, cnt) 228 | 229 | 230 | class TwimlTestCase(unittest.TestCase): 231 | """Github Issue #5: can't dir the parsed object""" 232 | 233 | def test_twiml_dir(self): 234 | xml = """ 235 | 236 | 238 | http://example.com/barcall_message_url.wav 239 | 240 | http://example.com/calls/1/twiml?event=start 241 | 242 | """ 243 | o = untangle.parse(xml) 244 | self.assertEqual(["Response"], dir(o)) 245 | resp = o.Response 246 | self.assertEqual(["Gather", "Redirect"], dir(resp)) 247 | gather = resp.Gather 248 | redir = resp.Redirect 249 | self.assertEqual(["Play"], dir(gather)) 250 | self.assertEqual([], dir(redir)) 251 | self.assertEqual( 252 | "http://example.com/calls/1/twiml?event=start", o.Response.Redirect.cdata 253 | ) 254 | 255 | 256 | class UnicodeTestCase(unittest.TestCase): 257 | """Github issue #8: UnicodeEncodeError""" 258 | 259 | def test_unicode_file(self): 260 | o = untangle.parse("tests/res/unicode.xml") 261 | self.assertEqual("ðÒÉ×ÅÔ ÍÉÒ", o.page.menu.name) 262 | 263 | def test_lengths(self): 264 | o = untangle.parse("tests/res/unicode.xml") 265 | self.assertEqual(1, len(o)) 266 | self.assertEqual(1, len(o.page)) 267 | self.assertEqual(2, len(o.page.menu)) 268 | self.assertEqual(2, len(o.page.menu.items)) 269 | self.assertEqual(2, len(o.page.menu.items.item)) 270 | self.assertEqual(0, len(o.page.menu.items.item[0].name)) 271 | self.assertEqual(0, len(o.page.menu.items.item[1].name)) 272 | 273 | def test_unicode_string(self): 274 | o = untangle.parse("valüé ◔‿◔") 275 | self.assertEqual("valüé ◔‿◔", o.Element.cdata) 276 | 277 | def test_unicode_element(self): 278 | o = untangle.parse("") 279 | self.assertTrue(o is not None) 280 | self.assertTrue(o.Francés is not None) 281 | 282 | 283 | class FileObjects(unittest.TestCase): 284 | """Test reading from file-like objects""" 285 | 286 | def test_file_object(self): 287 | with open("tests/res/pom.xml") as pom_file: 288 | o = untangle.parse(pom_file) 289 | project = o.project 290 | self.assertTrue(project) 291 | 292 | parent = project.parent 293 | self.assertTrue(parent) 294 | self.assertEqual("com.atlassian.confluence.plugin.base", parent.groupId) 295 | self.assertEqual("confluence-plugin-base", parent.artifactId) 296 | self.assertEqual("17", parent.version) 297 | 298 | 299 | class Foo(object): 300 | """Used in UntangleInObjectsTestCase""" 301 | 302 | def __init__(self): 303 | self.doc = untangle.parse('foo') 304 | 305 | 306 | class UntangleInObjectsTestCase(unittest.TestCase): 307 | """tests usage of untangle in classes""" 308 | 309 | def test_object(self): 310 | foo = Foo() 311 | self.assertEqual("1", foo.doc.a.b["x"]) 312 | self.assertEqual("foo", foo.doc.a.b.cdata) 313 | 314 | 315 | class UrlStringTestCase(unittest.TestCase): 316 | """tests is_url() function""" 317 | 318 | def test_is_url(self): 319 | self.assertFalse(untangle.is_url("foo")) 320 | self.assertFalse(untangle.is_url("httpfoo")) 321 | self.assertFalse(untangle.is_url(7)) 322 | self.assertTrue(untangle.is_url("http://foobar")) 323 | self.assertTrue(untangle.is_url("https://foobar")) 324 | 325 | 326 | class TestSaxHandler(unittest.TestCase): 327 | """Tests the SAX ContentHandler""" 328 | 329 | def test_empty_handler(self): 330 | h = untangle.Handler() 331 | self.assertRaises(IndexError, h.endElement, "foo") 332 | self.assertRaises(IndexError, h.characters, "bar") 333 | 334 | def test_handler(self): 335 | h = untangle.Handler() 336 | h.startElement("foo", {}) 337 | h.endElement("foo") 338 | self.assertEqual("foo", h.root.children[0]._name) 339 | 340 | def test_cdata(self): 341 | h = untangle.Handler() 342 | h.startElement("foo", {}) 343 | h.characters("baz") 344 | self.assertEqual("baz", h.root.children[0].cdata) 345 | 346 | 347 | class FigsTestCase(unittest.TestCase): 348 | def test_figs(self): 349 | doc = untangle.parse("tests/res/figs.xml") 350 | expected_pairs = [("key1", "value1"), ("key2", "value2"), ("key", "value")] 351 | pairs = [] 352 | for group in doc.props.children: 353 | for prop in group.children: 354 | pairs.append((prop["key"], prop.cdata)) 355 | assert expected_pairs == pairs 356 | 357 | 358 | class ParserFeatureTestCase(unittest.TestCase): 359 | """Tests adding xml.sax parser features via parse()""" 360 | 361 | # External DTD that will never be loadable (invalid address) 362 | bad_dtd_xml = """ 363 | 364 | """ 365 | 366 | def test_valid_feature(self): 367 | # xml.sax.handler.feature_external_ges -> load external general (text) 368 | # entities, such as DTDs 369 | with self.assertRaises(defusedxml.common.ExternalReferenceForbidden): 370 | untangle.parse(self.bad_dtd_xml) 371 | 372 | def test_invalid_feature(self): 373 | with self.assertRaises(AttributeError): 374 | untangle.parse(self.bad_dtd_xml, invalid_feature=True) 375 | 376 | def test_invalid_external_dtd(self): 377 | with self.assertRaises(defusedxml.common.ExternalReferenceForbidden): 378 | untangle.parse(self.bad_dtd_xml) 379 | 380 | 381 | class TestEquals(unittest.TestCase): 382 | def test_equals(self): 383 | a = untangle.Element("a", "1") 384 | b = untangle.Element("b", "1") 385 | self.assertTrue(a == b) 386 | 387 | def test_list_equals(self): 388 | a = untangle.Element("a", "1") 389 | b = untangle.Element("b", "1") 390 | listA = [a, b] 391 | c = untangle.Element("c", "1") 392 | self.assertTrue(c in listA) 393 | 394 | 395 | class TestExternalEntityExpansion(unittest.TestCase): 396 | def test_xxe(self): 397 | # from https://pypi.org/project/defusedxml/#external-entity-expansion-remote 398 | with self.assertRaises(defusedxml.common.EntitiesForbidden): 399 | untangle.parse("tests/res/xxe.xml") 400 | 401 | 402 | if __name__ == "__main__": 403 | unittest.main() 404 | 405 | # vim: set expandtab ts=4 sw=4 406 | -------------------------------------------------------------------------------- /untangle.py: -------------------------------------------------------------------------------- 1 | """ 2 | untangle 3 | 4 | Converts xml to python objects. 5 | 6 | The only method you need to call is parse() 7 | 8 | Partially inspired by xml2obj 9 | (http://code.activestate.com/recipes/149368-xml2obj/) 10 | 11 | Author: Christian Stefanescu (http://0chris.com) 12 | License: MIT License - http://www.opensource.org/licenses/mit-license.php 13 | """ 14 | import os 15 | import keyword 16 | from defusedxml.sax import make_parser 17 | from xml.sax import handler 18 | 19 | 20 | try: 21 | from StringIO import StringIO 22 | except ImportError: 23 | from io import StringIO 24 | try: 25 | from types import StringTypes 26 | 27 | def is_string(x): 28 | return isinstance(x, StringTypes) 29 | 30 | except ImportError: 31 | 32 | def is_string(x): 33 | return isinstance(x, str) 34 | 35 | 36 | __version__ = "1.2.1" 37 | 38 | 39 | class Element(object): 40 | """ 41 | Representation of an XML element. 42 | """ 43 | 44 | def __init__(self, name, attributes): 45 | self._name = name 46 | self._attributes = attributes 47 | self.children = [] 48 | self.is_root = False 49 | self.cdata = "" 50 | 51 | def add_child(self, element): 52 | """ 53 | Store child elements. 54 | """ 55 | self.children.append(element) 56 | 57 | def add_cdata(self, cdata): 58 | """ 59 | Store cdata 60 | """ 61 | self.cdata = self.cdata + cdata 62 | 63 | def get_attribute(self, key): 64 | """ 65 | Get attributes by key 66 | """ 67 | return self._attributes.get(key) 68 | 69 | def get_elements(self, name=None): 70 | """ 71 | Find a child element by name 72 | """ 73 | if name: 74 | return [e for e in self.children if e._name == name] 75 | else: 76 | return self.children 77 | 78 | def __getitem__(self, key): 79 | return self.get_attribute(key) 80 | 81 | def __getattr__(self, key): 82 | matching_children = [x for x in self.children if x._name == key] 83 | if matching_children: 84 | if len(matching_children) == 1: 85 | self.__dict__[key] = matching_children[0] 86 | return matching_children[0] 87 | else: 88 | self.__dict__[key] = matching_children 89 | return matching_children 90 | else: 91 | raise AttributeError("'%s' has no attribute '%s'" % (self._name, key)) 92 | 93 | def __hasattribute__(self, name): 94 | if name in self.__dict__: 95 | return True 96 | return any(x._name == name for x in self.children) 97 | 98 | def __iter__(self): 99 | yield self 100 | 101 | def __str__(self): 102 | return "Element <%s> with attributes %s, children %s and cdata %s" % ( 103 | self._name, 104 | self._attributes, 105 | self.children, 106 | self.cdata, 107 | ) 108 | 109 | def __repr__(self): 110 | return "Element(name = %s, attributes = %s, cdata = %s)" % ( 111 | self._name, 112 | self._attributes, 113 | self.cdata, 114 | ) 115 | 116 | def __bool__(self): 117 | return self.is_root or self._name is not None 118 | 119 | __nonzero__ = __bool__ 120 | 121 | def __eq__(self, val): 122 | return self.cdata == val 123 | 124 | def __dir__(self): 125 | children_names = [x._name for x in self.children] 126 | return children_names 127 | 128 | def __len__(self): 129 | return len(self.children) 130 | 131 | def __contains__(self, key): 132 | return key in dir(self) 133 | 134 | 135 | class Handler(handler.ContentHandler): 136 | """ 137 | SAX handler which creates the Python object structure out of ``Element``s 138 | """ 139 | 140 | def __init__(self): 141 | self.root = Element(None, None) 142 | self.root.is_root = True 143 | self.elements = [] 144 | 145 | def startElement(self, name, attributes): 146 | name = name.replace("-", "_") 147 | name = name.replace(".", "_") 148 | name = name.replace(":", "_") 149 | 150 | # adding trailing _ for keywords 151 | if keyword.iskeyword(name): 152 | name += "_" 153 | 154 | attrs = dict() 155 | for k, v in attributes.items(): 156 | attrs[k] = v 157 | element = Element(name, attrs) 158 | if len(self.elements) > 0: 159 | self.elements[-1].add_child(element) 160 | else: 161 | self.root.add_child(element) 162 | self.elements.append(element) 163 | 164 | def endElement(self, name): 165 | self.elements.pop() 166 | 167 | def characters(self, cdata): 168 | self.elements[-1].add_cdata(cdata) 169 | 170 | 171 | def parse(filename, **parser_features): 172 | """ 173 | Interprets the given string as a filename, URL or XML data string, 174 | parses it and returns a Python object which represents the given 175 | document. 176 | 177 | Extra arguments to this function are treated as feature values that are 178 | passed to ``parser.setFeature()``. For example, ``feature_external_ges=False`` 179 | will set ``xml.sax.handler.feature_external_ges`` to False, disabling 180 | the parser's inclusion of external general (text) entities such as DTDs. 181 | 182 | Raises ``ValueError`` if the first argument is None / empty string. 183 | 184 | Raises ``AttributeError`` if a requested xml.sax feature is not found in 185 | ``xml.sax.handler``. 186 | 187 | Raises ``xml.sax.SAXParseException`` if something goes wrong 188 | during parsing. 189 | 190 | Raises ``defusedxml.common.EntitiesForbidden`` 191 | or ``defusedxml.common.ExternalReferenceForbidden`` 192 | when a potentially malicious entity load is attempted. See also 193 | https://github.com/tiran/defusedxml#attack-vectors 194 | """ 195 | if filename is None or (is_string(filename) and filename.strip()) == "": 196 | raise ValueError("parse() takes a filename, URL or XML string") 197 | parser = make_parser() 198 | for feature, value in parser_features.items(): 199 | parser.setFeature(getattr(handler, feature), value) 200 | sax_handler = Handler() 201 | parser.setContentHandler(sax_handler) 202 | if is_string(filename) and (os.path.exists(filename) or is_url(filename)): 203 | parser.parse(filename) 204 | else: 205 | if hasattr(filename, "read"): 206 | parser.parse(filename) 207 | else: 208 | parser.parse(StringIO(filename)) 209 | 210 | return sax_handler.root 211 | 212 | 213 | def is_url(string): 214 | """ 215 | Checks if the given string starts with 'http(s)'. 216 | """ 217 | try: 218 | return string.startswith("http://") or string.startswith("https://") 219 | except AttributeError: 220 | return False 221 | 222 | 223 | # vim: set expandtab ts=4 sw=4: 224 | -------------------------------------------------------------------------------- /untangle.rst: -------------------------------------------------------------------------------- 1 | untangle 2 | -------- 3 | .. image:: https://github.com/stchris/untangle/actions/workflows/build.yml/badge.svg 4 | 5 | untangle parses an XML document and returns a Python object which makes it 6 | easy to access the data you want. 7 | 8 | Example: 9 | 10 | :: 11 | 12 | import untangle 13 | obj = untangle.parse('') 14 | assert obj.root.child['name'] == u'child1' 15 | 16 | See http://github.com/stchris/untangle and 17 | http://readthedocs.org/docs/untangle/en/latest/ 18 | --------------------------------------------------------------------------------