├── .coveragerc ├── .gitignore ├── .gitlab-ci.yml ├── .travis.yml ├── CHANGES ├── LICENSE ├── MANIFEST.in ├── README.rst ├── docs ├── _static │ └── custom.css ├── _templates │ └── layout.html ├── changelog.rst ├── conf.py ├── css3.rst ├── extending.rst ├── hacking.rst ├── index.rst └── parsing.rst ├── setup.cfg ├── setup.py └── tinycss ├── __init__.py ├── color3.py ├── css21.py ├── decoding.py ├── fonts3.py ├── page3.py ├── parsing.py ├── speedups.pyx ├── tests ├── __init__.py ├── speed.py ├── test_api.py ├── test_color3.py ├── test_css21.py ├── test_decoding.py ├── test_fonts3.py ├── test_page3.py └── test_tokenizer.py ├── token_data.py ├── tokenizer.py └── version.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | 4 | [report] 5 | exclude_lines = 6 | pragma: no cover 7 | def __repr__ 8 | except ImportError 9 | omit = 10 | tinycss/tests/speed.py 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.c 3 | *.so 4 | *.egg-info 5 | /.coverage 6 | /htmlcov 7 | /build 8 | /dist 9 | /.tox 10 | /MANIFEST 11 | /docs/_build 12 | /env 13 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | before_script: 2 | - pip install -U setuptools 3 | - pip install Cython 4 | - pip install --upgrade -e .[test] 5 | 6 | .before_script_alpine: &before_alpine 7 | before_script: 8 | - apk add --no-cache openssl gcc musl-dev 9 | - pip install Cython setuptools 10 | - pip install --upgrade -e .[test] 11 | 12 | .test_template: &test 13 | script: 14 | - python setup.py test 15 | 16 | python 2.7alpine: 17 | image: python:2.7-alpine 18 | <<: *before_alpine 19 | <<: *test 20 | 21 | python 3.3alpine: 22 | image: python:3.3-alpine 23 | <<: *before_alpine 24 | <<: *test 25 | 26 | python 3.4alpine: 27 | image: python:3.4-alpine 28 | <<: *before_alpine 29 | <<: *test 30 | 31 | python 3.5alpine: 32 | image: python:3.5-alpine 33 | <<: *before_alpine 34 | <<: *test 35 | 36 | python 3.6alpine: 37 | image: python:3.6-alpine 38 | <<: *before_alpine 39 | <<: *test 40 | 41 | python pypy: 42 | image: pypy:2 43 | <<: *test 44 | 45 | python pypy3: 46 | image: pypy:3 47 | <<: *test 48 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "2.7" 5 | - "3.3" 6 | - "3.4" 7 | - "3.5" 8 | - "pypy" 9 | 10 | install: 11 | - pip install Cython 12 | - pip install --upgrade -e .[test] 13 | 14 | script: 15 | - python setup.py test 16 | -------------------------------------------------------------------------------- /CHANGES: -------------------------------------------------------------------------------- 1 | tinycss changelog 2 | ================= 3 | 4 | 5 | Version 0.4 6 | ----------- 7 | 8 | Released on 2016-09-23. 9 | 10 | * Add an __eq__ operator to Token object. 11 | * Support Fonts 3. 12 | 13 | 14 | 15 | Version 0.3 16 | ----------- 17 | 18 | Released on 2012-09-18. 19 | 20 | * Fix a bug when parsing \5c (an escaped antislash.) 21 | 22 | 23 | 24 | Version 0.2 25 | ----------- 26 | 27 | Released on 2012-04-27. 28 | 29 | **Breaking changes:** 30 | 31 | * Remove the ``selectors3`` module. The functionality has moved to the 32 | `cssselect `_ project. 33 | * Simplify the API for :func:`~tinycss.make_parser`. 34 | 35 | 36 | Version 0.1.1 37 | ------------- 38 | 39 | Released on 2012-04-06. 40 | 41 | Bug fixes: 42 | 43 | * Error handling on exepected end of stylesheet in an at-rule head 44 | * Fix the installation on ASCII-only locales 45 | 46 | 47 | Version 0.1 48 | ----------- 49 | 50 | Released on 2012-04-05. 51 | 52 | First release. Parser support for CSS 2.1, Seloctors 3, Color 3 and 53 | Paged Media 3. 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 by Simon Sapin. 2 | 3 | Some rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the following 14 | disclaimer in the documentation and/or other materials provided 15 | with the distribution. 16 | 17 | * The names of the contributors may not be used to endorse or 18 | promote products derived from this software without specific 19 | prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst CHANGES LICENSE tox.ini .coveragerc tinycss/speedups.c 2 | recursive-include docs * 3 | prune docs/_build 4 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | tinycss: CSS parser for Python 2 | ============================== 3 | 4 | *tinycss* is a complete yet simple CSS parser for Python. It supports the full 5 | syntax and error handling for CSS 2.1 as well as some CSS 3 modules: 6 | 7 | * CSS Color 3 8 | * CSS Fonts 3 9 | * CSS Paged Media 3 10 | 11 | It is designed to be easy to extend for new CSS modules and syntax, 12 | and integrates well with cssselect_ for Selectors 3 support. 13 | 14 | Quick facts: 15 | 16 | * Free software: BSD licensed 17 | * Compatible with Python 2.7 and 3.x 18 | * Latest documentation `on python.org`_ 19 | * Source, issues and pull requests `on Github`_ 20 | * Releases `on PyPI`_ 21 | * Install with ``pip install tinycss`` 22 | 23 | .. _cssselect: http://packages.python.org/cssselect/ 24 | .. _on python.org: http://packages.python.org/tinycss/ 25 | .. _on Github: https://github.com/SimonSapin/tinycss/ 26 | .. _on PyPI: http://pypi.python.org/pypi/tinycss 27 | -------------------------------------------------------------------------------- /docs/_static/custom.css: -------------------------------------------------------------------------------- 1 | div.body { 2 | text-align: left; 3 | } 4 | div.document p, div.document ul { 5 | margin-top: 0; 6 | margin-bottom: 1em; 7 | } 8 | div.document ul ul { 9 | margin-top: 0; 10 | margin-bottom: .5em; 11 | } 12 | .field-name { 13 | padding-right: .5em; 14 | } 15 | table.field-list p, table.field-list ul { 16 | margin-bottom: .5em; 17 | } 18 | table { 19 | border-collapse: collapse; 20 | margin-bottom: 1em; 21 | } 22 | table.docutils td, table.docutils th { 23 | padding: .2em .5em; 24 | } 25 | -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | {% block extrahead %} 3 | 4 | {% endblock %} 5 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CHANGES 2 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # 4 | # tinycss documentation build configuration file, created by 5 | # sphinx-quickstart on Tue Mar 27 14:20:34 2012. 6 | # 7 | # This file is execfile()d with the current directory set to its containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | import sys, os 16 | 17 | # If extensions (or modules to document with autodoc) are in another directory, 18 | # add these directories to sys.path here. If the directory is relative to the 19 | # documentation root, use os.path.abspath to make it absolute, like shown here. 20 | #sys.path.insert(0, os.path.abspath('.')) 21 | 22 | # -- General configuration ----------------------------------------------------- 23 | 24 | # If your documentation needs a minimal Sphinx version, state it here. 25 | #needs_sphinx = '1.0' 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be extensions 28 | # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. 29 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 30 | 'sphinx.ext.viewcode', 'sphinx.ext.doctest'] 31 | 32 | # Add any paths that contain templates here, relative to this directory. 33 | templates_path = ['_templates'] 34 | 35 | # The suffix of source filenames. 36 | source_suffix = '.rst' 37 | 38 | # The encoding of source files. 39 | #source_encoding = 'utf-8-sig' 40 | 41 | # The master toctree document. 42 | master_doc = 'index' 43 | 44 | # General information about the project. 45 | project = 'tinycss' 46 | copyright = '2012, Simon Sapin' 47 | 48 | # The version info for the project you're documenting, acts as replacement for 49 | # |version| and |release|, also used in various other places throughout the 50 | # built documents. 51 | # 52 | # The full version, including alpha/beta/rc tags. 53 | #release = '0.1dev' 54 | import re 55 | with open(os.path.join(os.path.dirname(__file__), '..', 56 | 'tinycss', 'version.py')) as init_py: 57 | release = re.search("VERSION = '([^']+)'", init_py.read()).group(1) 58 | # The short X.Y version. 59 | version = release.rstrip('dev') 60 | 61 | # The language for content autogenerated by Sphinx. Refer to documentation 62 | # for a list of supported languages. 63 | #language = None 64 | 65 | # There are two options for replacing |today|: either, you set today to some 66 | # non-false value, then it is used: 67 | #today = '' 68 | # Else, today_fmt is used as the format for a strftime call. 69 | #today_fmt = '%B %d, %Y' 70 | 71 | # List of patterns, relative to source directory, that match files and 72 | # directories to ignore when looking for source files. 73 | exclude_patterns = ['_build'] 74 | 75 | # The reST default role (used for this markup: `text`) to use for all documents. 76 | #default_role = None 77 | 78 | # If true, '()' will be appended to :func: etc. cross-reference text. 79 | #add_function_parentheses = True 80 | 81 | # If true, the current module name will be prepended to all description 82 | # unit titles (such as .. function::). 83 | #add_module_names = True 84 | 85 | # If true, sectionauthor and moduleauthor directives will be shown in the 86 | # output. They are ignored by default. 87 | #show_authors = False 88 | 89 | # The name of the Pygments (syntax highlighting) style to use. 90 | pygments_style = 'sphinx' 91 | 92 | # A list of ignored prefixes for module index sorting. 93 | #modindex_common_prefix = [] 94 | 95 | 96 | # -- Options for HTML output --------------------------------------------------- 97 | 98 | # The theme to use for HTML and HTML Help pages. See the documentation for 99 | # a list of builtin themes. 100 | #html_theme = 'agogo' 101 | 102 | # Theme options are theme-specific and customize the look and feel of a theme 103 | # further. For a list of options available for each theme, see the 104 | # documentation. 105 | #html_theme_options = {} 106 | 107 | # Add any paths that contain custom themes here, relative to this directory. 108 | #html_theme_path = [] 109 | 110 | # The name for this set of Sphinx documents. If None, it defaults to 111 | # " v documentation". 112 | #html_title = None 113 | 114 | # A shorter title for the navigation bar. Default is the same as html_title. 115 | #html_short_title = None 116 | 117 | # The name of an image file (relative to this directory) to place at the top 118 | # of the sidebar. 119 | #html_logo = None 120 | 121 | # The name of an image file (within the static path) to use as favicon of the 122 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 123 | # pixels large. 124 | #html_favicon = None 125 | 126 | # Add any paths that contain custom static files (such as style sheets) here, 127 | # relative to this directory. They are copied after the builtin static files, 128 | # so a file named "default.css" will overwrite the builtin "default.css". 129 | html_static_path = ['_static'] 130 | 131 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 132 | # using the given strftime format. 133 | #html_last_updated_fmt = '%b %d, %Y' 134 | 135 | # If true, SmartyPants will be used to convert quotes and dashes to 136 | # typographically correct entities. 137 | #html_use_smartypants = True 138 | 139 | # Custom sidebar templates, maps document names to template names. 140 | #html_sidebars = {} 141 | 142 | # Additional templates that should be rendered to pages, maps page names to 143 | # template names. 144 | #html_additional_pages = {} 145 | 146 | # If false, no module index is generated. 147 | #html_domain_indices = True 148 | 149 | # If false, no index is generated. 150 | #html_use_index = True 151 | 152 | # If true, the index is split into individual pages for each letter. 153 | #html_split_index = False 154 | 155 | # If true, links to the reST sources are added to the pages. 156 | #html_show_sourcelink = True 157 | 158 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 159 | #html_show_sphinx = True 160 | 161 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 162 | #html_show_copyright = True 163 | 164 | # If true, an OpenSearch description file will be output, and all pages will 165 | # contain a tag referring to it. The value of this option must be the 166 | # base URL from which the finished HTML is served. 167 | #html_use_opensearch = '' 168 | 169 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 170 | #html_file_suffix = None 171 | 172 | # Output file base name for HTML help builder. 173 | htmlhelp_basename = 'tinycssdoc' 174 | 175 | 176 | # -- Options for LaTeX output -------------------------------------------------- 177 | 178 | latex_elements = { 179 | # The paper size ('letterpaper' or 'a4paper'). 180 | #'papersize': 'letterpaper', 181 | 182 | # The font size ('10pt', '11pt' or '12pt'). 183 | #'pointsize': '10pt', 184 | 185 | # Additional stuff for the LaTeX preamble. 186 | #'preamble': '', 187 | } 188 | 189 | # Grouping the document tree into LaTeX files. List of tuples 190 | # (source start file, target name, title, author, documentclass [howto/manual]). 191 | latex_documents = [ 192 | ('index', 'tinycss.tex', 'tinycss Documentation', 193 | 'Simon Sapin', 'manual'), 194 | ] 195 | 196 | # The name of an image file (relative to this directory) to place at the top of 197 | # the title page. 198 | #latex_logo = None 199 | 200 | # For "manual" documents, if this is true, then toplevel headings are parts, 201 | # not chapters. 202 | #latex_use_parts = False 203 | 204 | # If true, show page references after internal links. 205 | #latex_show_pagerefs = False 206 | 207 | # If true, show URL addresses after external links. 208 | #latex_show_urls = False 209 | 210 | # Documents to append as an appendix to all manuals. 211 | #latex_appendices = [] 212 | 213 | # If false, no module index is generated. 214 | #latex_domain_indices = True 215 | 216 | 217 | # -- Options for manual page output -------------------------------------------- 218 | 219 | # One entry per manual page. List of tuples 220 | # (source start file, name, description, authors, manual section). 221 | man_pages = [ 222 | ('index', 'tinycss', 'tinycss Documentation', 223 | ['Simon Sapin'], 1) 224 | ] 225 | 226 | # If true, show URL addresses after external links. 227 | #man_show_urls = False 228 | 229 | 230 | # -- Options for Texinfo output ------------------------------------------------ 231 | 232 | # Grouping the document tree into Texinfo files. List of tuples 233 | # (source start file, target name, title, author, 234 | # dir menu entry, description, category) 235 | texinfo_documents = [ 236 | ('index', 'tinycss', 'tinycss Documentation', 237 | 'Simon Sapin', 'tinycss', 'One line description of project.', 238 | 'Miscellaneous'), 239 | ] 240 | 241 | # Documents to append as an appendix to all manuals. 242 | #texinfo_appendices = [] 243 | 244 | # If false, no module index is generated. 245 | #texinfo_domain_indices = True 246 | 247 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 248 | #texinfo_show_urls = 'footnote' 249 | 250 | 251 | # Example configuration for intersphinx: refer to the Python standard library. 252 | intersphinx_mapping = {'http://docs.python.org/': None} 253 | -------------------------------------------------------------------------------- /docs/css3.rst: -------------------------------------------------------------------------------- 1 | CSS 3 Modules 2 | ============= 3 | 4 | .. _selectors3: 5 | 6 | Selectors 3 7 | ----------- 8 | 9 | .. currentmodule:: tinycss.css21 10 | 11 | On :attr:`RuleSet.selector`, the :meth:`~.token_data.TokenList.as_css` method 12 | can be used to serialize a selector back to an Unicode string. 13 | 14 | >>> import tinycss 15 | >>> stylesheet = tinycss.make_parser().parse_stylesheet( 16 | ... 'div.error, #root > section:first-letter { color: red }') 17 | >>> selector_string = stylesheet.rules[0].selector.as_css() 18 | >>> selector_string 19 | 'div.error, #root > section:first-letter' 20 | 21 | This string can be parsed by cssselect_. The parsed objects have information 22 | about pseudo-elements and selector specificity. 23 | 24 | .. _cssselect: http://packages.python.org/cssselect/ 25 | 26 | >>> import cssselect 27 | >>> selectors = cssselect.parse(selector_string) 28 | >>> [s.specificity() for s in selectors] 29 | [(0, 1, 1), (1, 0, 2)] 30 | >>> [s.pseudo_element for s in selectors] 31 | [None, 'first-letter'] 32 | 33 | These objects can in turn be translated to XPath expressions. Note that 34 | the translation ignores pseudo-elements, you have to account for them 35 | somehow or reject selectors with pseudo-elements. 36 | 37 | >>> xpath = cssselect.HTMLTranslator().selector_to_xpath(selectors[1]) 38 | >>> xpath 39 | "descendant-or-self::*[@id = 'root']/section" 40 | 41 | Finally, the XPath expressions can be used with lxml_ to find the matching 42 | elements. 43 | 44 | >>> from lxml import etree 45 | >>> compiled_selector = etree.XPath(xpath) 46 | >>> document = etree.fromstring('''
47 | ... 48 | ...
49 | ... Lorem
ipsum
50 | ...
51 | ...
''') 52 | >>> [el.get('id') for el in compiled_selector(document)] 53 | ['head', 'content'] 54 | 55 | .. _lxml: http://lxml.de/xpathxslt.html#xpath 56 | 57 | Find more details in the `cssselect documentation`_. 58 | 59 | .. _cssselect documentation: http://packages.python.org/cssselect/ 60 | 61 | 62 | .. module:: tinycss.color3 63 | 64 | Color 3 65 | ------- 66 | 67 | This module implements parsing for the ** values, as defined in 68 | `CSS 3 Color `_. 69 | 70 | The (deprecated) CSS2 system colors are not supported, but you can 71 | easily test for them if you want as they are simple ``IDENT`` tokens. 72 | For example:: 73 | 74 | if token.type == 'IDENT' and token.value == 'ButtonText': 75 | return ... 76 | 77 | All other values types *are* supported: 78 | 79 | * Basic, extended (X11) and transparent color keywords; 80 | * 3-digit and 6-digit hexadecimal notations; 81 | * ``rgb()``, ``rgba()``, ``hsl()`` and ``hsla()`` functional notations. 82 | * ``currentColor`` 83 | 84 | This module does not integrate with a parser class. Instead, it provides 85 | a function that can parse tokens as found in :attr:`.css21.Declaration.value`, 86 | for example. 87 | 88 | .. autofunction:: parse_color 89 | .. autofunction:: parse_color_string 90 | .. autoclass:: RGBA 91 | 92 | 93 | .. module:: tinycss.page3 94 | 95 | Paged Media 3 96 | ------------- 97 | 98 | .. autoclass:: CSSPage3Parser 99 | .. autoclass:: MarginRule 100 | 101 | 102 | .. module:: tinycss.fonts3 103 | 104 | Fonts 3 105 | ------- 106 | 107 | .. autoclass:: CSSFonts3Parser 108 | .. autoclass:: FontFaceRule 109 | .. autoclass:: FontFeatureValuesRule 110 | .. autoclass:: FontFeatureRule 111 | 112 | 113 | Other CSS modules 114 | ----------------- 115 | 116 | To add support for new CSS syntax, see :ref:`extending`. 117 | -------------------------------------------------------------------------------- /docs/extending.rst: -------------------------------------------------------------------------------- 1 | .. _extending: 2 | 3 | Extending the parser 4 | ==================== 5 | 6 | Modules such as :mod:`.page3` extend the CSS 2.1 parser to add support for 7 | CSS 3 syntax. 8 | They do so by sub-classing :class:`.css21.CSS21Parser` and overriding/extending 9 | some of its methods. If fact, the parser is made of methods in a class 10 | (rather than a set of functions) solely to enable this kind of sub-classing. 11 | 12 | tinycss is designed to enable you to have parser subclasses outside of 13 | tinycss, without monkey-patching. If however the syntax you added is for a 14 | W3C specification, consider including your subclass in a new tinycss module 15 | and send a pull request: see :ref:`hacking`. 16 | 17 | 18 | .. currentmodule:: tinycss.css21 19 | 20 | Example: star hack 21 | ------------------ 22 | 23 | .. _star hack: https://en.wikipedia.org/wiki/CSS_filter#Star_hack 24 | 25 | The `star hack`_ uses invalid declarations that are only parsed by some 26 | versions of Internet Explorer. By default, tinycss ignores invalid 27 | declarations and logs an error. 28 | 29 | >>> from tinycss.css21 import CSS21Parser 30 | >>> css = '#elem { width: [W3C Model Width]; *width: [BorderBox Model]; }' 31 | >>> stylesheet = CSS21Parser().parse_stylesheet(css) 32 | >>> stylesheet.errors 33 | [ParseError('Parse error at 1:35, expected a property name, got DELIM',)] 34 | >>> [decl.name for decl in stylesheet.rules[0].declarations] 35 | ['width'] 36 | 37 | If for example a minifier based on tinycss wants to support the star hack, 38 | it can by extending the parser:: 39 | 40 | >>> class CSSStarHackParser(CSS21Parser): 41 | ... def parse_declaration(self, tokens): 42 | ... has_star_hack = (tokens[0].type == 'DELIM' and tokens[0].value == '*') 43 | ... if has_star_hack: 44 | ... tokens = tokens[1:] 45 | ... declaration = super(CSSStarHackParser, self).parse_declaration(tokens) 46 | ... declaration.has_star_hack = has_star_hack 47 | ... return declaration 48 | ... 49 | >>> stylesheet = CSSStarHackParser().parse_stylesheet(css) 50 | >>> stylesheet.errors 51 | [] 52 | >>> [(d.name, d.has_star_hack) for d in stylesheet.rules[0].declarations] 53 | [('width', False), ('width', True)] 54 | 55 | This class extends the :meth:`~CSS21Parser.parse_declaration` method. 56 | It removes any ``*`` delimeter :class:`~.token_data.Token` at the start of 57 | a declaration, and adds a ``has_star_hack`` boolean attribute on parsed 58 | :class:`Declaration` objects: ``True`` if a ``*`` was removed, ``False`` for 59 | “normal” declarations. 60 | 61 | 62 | Parser methods 63 | -------------- 64 | 65 | In addition to methods of the user API (see :ref:`parsing`), here 66 | are the methods of the CSS 2.1 parser that can be overriden or extended: 67 | 68 | .. automethod:: CSS21Parser.parse_rules 69 | .. automethod:: CSS21Parser.read_at_rule 70 | .. automethod:: CSS21Parser.parse_at_rule 71 | .. automethod:: CSS21Parser.parse_media 72 | .. automethod:: CSS21Parser.parse_page_selector 73 | .. automethod:: CSS21Parser.parse_declarations_and_at_rules 74 | .. automethod:: CSS21Parser.parse_ruleset 75 | .. automethod:: CSS21Parser.parse_declaration_list 76 | .. automethod:: CSS21Parser.parse_declaration 77 | .. automethod:: CSS21Parser.parse_value_priority 78 | 79 | Unparsed at-rules 80 | ----------------- 81 | 82 | .. autoclass:: AtRule 83 | 84 | 85 | .. module:: tinycss.parsing 86 | 87 | Parsing helper functions 88 | ------------------------ 89 | 90 | The :mod:`tinycss.parsing` module contains helper functions for parsing 91 | tokens into a more structured form: 92 | 93 | .. autofunction:: strip_whitespace 94 | .. autofunction:: split_on_comma 95 | .. autofunction:: validate_value 96 | .. autofunction:: validate_block 97 | .. autofunction:: validate_any 98 | -------------------------------------------------------------------------------- /docs/hacking.rst: -------------------------------------------------------------------------------- 1 | .. _hacking: 2 | 3 | Hacking tinycss 4 | =============== 5 | 6 | .. highlight:: sh 7 | 8 | Bugs and feature requests 9 | ------------------------- 10 | 11 | Bug reports, feature requests and other issues should got to the 12 | `tinycss issue tracker`_ on Github. Any suggestion or feedback is welcome. 13 | Please include in full any error message, trackback or other detail that 14 | could be helpful. 15 | 16 | .. _tinycss issue tracker: https://github.com/SimonSapin/tinycss/issues 17 | 18 | 19 | Installing the development version 20 | ---------------------------------- 21 | 22 | First, get the latest git version:: 23 | 24 | git clone https://github.com/SimonSapin/tinycss.git 25 | cd tinycss 26 | 27 | You will need Cython_ and pytest_. Installing in a virtualenv_ is recommended:: 28 | 29 | virtualenv env 30 | . env/bin/activate 31 | pip install Cython pytest 32 | 33 | .. _Cython: http://cython.org/ 34 | .. _pytest: http://pytest.org/ 35 | .. _virtualenv: http://www.virtualenv.org/ 36 | 37 | Then, install tinycss in-place with pip’s *editable mode*. This will also 38 | build the accelerators:: 39 | 40 | pip install -e . 41 | 42 | 43 | Running the test suite 44 | ---------------------- 45 | 46 | Once you have everything installed (see above), just run pytest from the 47 | *tinycss* directory:: 48 | 49 | py.test 50 | 51 | If the accelerators are not available for some reason, use the 52 | ``TINYCSS_SKIP_SPEEDUPS_TESTS`` environment variable:: 53 | 54 | TINYCSS_SKIP_SPEEDUPS_TESTS=1 py.test 55 | 56 | If you get test failures on a fresh git clone, something may have gone wrong 57 | during the installation. Otherwise, you probably found a bug. Please 58 | `report it <#bugs-and-feature-requests>`_. 59 | 60 | 61 | Test in multiple Python versions with tox 62 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 63 | 64 | tox_ automatically creates virtualenvs for various Python versions and 65 | runs the test suite there:: 66 | 67 | pip install tox 68 | 69 | Change to the project’s root directory and just run:: 70 | 71 | tox 72 | 73 | .. _tox: http://tox.testrun.org/ 74 | 75 | tinycss comes with a pre-configured ``tox.ini`` file to test in CPython 76 | 2.6, 2.7, 3.1 and 3.2 as well as PyPy. You can change that with the ``-e`` 77 | parameter:: 78 | 79 | tox -e py27,py32 80 | 81 | If you use ``--`` in the arguments passed to tox, further arguments 82 | are passed to the underlying ``py.test`` command:: 83 | 84 | tox -- -x --pdb 85 | 86 | 87 | Building the documentation 88 | -------------------------- 89 | 90 | This documentation is made with Sphinx_:: 91 | 92 | pip install Sphinx 93 | 94 | .. _Sphinx: http://sphinx.pocoo.org/ 95 | 96 | To build the HTML version of the documentation, change to the project’s root 97 | directory and run:: 98 | 99 | python setup.py build_sphinx 100 | 101 | The built HTML files are in ``docs/_build/html``. 102 | 103 | 104 | Making a patch and a pull request 105 | --------------------------------- 106 | 107 | If you would like to see something included in tinycss, please fork 108 | `the repository `_ on Github 109 | and make a pull request. Make sure to include tests for your change. 110 | 111 | 112 | Mailing-list 113 | ------------ 114 | 115 | tinycss does not have a mailing-list of its own for now, but the 116 | `WeasyPrint mailing-list `_ 117 | is appropriate to discuss it. 118 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../README.rst 2 | 3 | 4 | Requirements 5 | ------------ 6 | 7 | `tinycss is tested `_ on CPython 2.7, 3.3, 8 | 3.4 and 3.5 as well as PyPy 5.3 and PyPy3 2.4; it should work on any 9 | implementation of **Python 2.7 or later version (including 3.x)** of the 10 | language. 11 | 12 | Cython_ is used for optional accelerators but is only required for 13 | development versions on tinycss. 14 | 15 | .. _Cython: http://cython.org/ 16 | 17 | 18 | Installation 19 | ------------ 20 | 21 | Installing with `pip `_ should Just Work: 22 | 23 | .. code-block:: sh 24 | 25 | pip install tinycss 26 | 27 | The release tarballs contain pre-*cythoned* C files for the accelerators: 28 | you will not need Cython to install like this. 29 | If the accelerators fail to build for some reason, tinycss will 30 | print a warning and fall back to a pure-Python installation. 31 | 32 | 33 | Documentation 34 | ------------- 35 | 36 | .. Have this page in the sidebar, but do not show a link to itself here: 37 | 38 | .. toctree:: 39 | :hidden: 40 | 41 | self 42 | 43 | .. toctree:: 44 | :maxdepth: 2 45 | 46 | parsing 47 | css3 48 | extending 49 | hacking 50 | changelog 51 | -------------------------------------------------------------------------------- /docs/parsing.rst: -------------------------------------------------------------------------------- 1 | Parsing with tinycss 2 | ==================== 3 | 4 | .. highlight:: python 5 | 6 | Quickstart 7 | ---------- 8 | 9 | Import *tinycss*, make a parser object with the features you want, 10 | and parse a stylesheet: 11 | 12 | .. doctest:: 13 | 14 | >>> import tinycss 15 | >>> parser = tinycss.make_parser('page3') 16 | >>> stylesheet = parser.parse_stylesheet_bytes(b'''@import "foo.css"; 17 | ... p.error { color: red } @lorem-ipsum; 18 | ... @page tables { size: landscape }''') 19 | >>> stylesheet.rules 20 | [, , ] 21 | >>> stylesheet.errors 22 | [ParseError('Parse error at 2:29, unknown at-rule in stylesheet context: @lorem-ipsum',)] 23 | 24 | You’ll get a :class:`~tinycss.css21.Stylesheet` object which contains 25 | all the parsed content as well as a list of encountered errors. 26 | 27 | 28 | Parsers 29 | ------- 30 | 31 | Parsers are subclasses of :class:`tinycss.css21.CSS21Parser`. Various 32 | subclasses add support for more syntax. You can choose which features to 33 | enable by making a new parser class with multiple inheritance, but there 34 | is also a convenience function to do that: 35 | 36 | .. module:: tinycss 37 | 38 | .. autofunction:: make_parser 39 | 40 | 41 | .. module:: tinycss.css21 42 | .. _parsing: 43 | 44 | Parsing a stylesheet 45 | ~~~~~~~~~~~~~~~~~~~~ 46 | 47 | Parser classes have three different methods to parse CSS stylesheet, 48 | depending on whether you have a file, a byte string, or an Unicode string. 49 | 50 | .. autoclass:: CSS21Parser 51 | :members: parse_stylesheet_file, parse_stylesheet_bytes, parse_stylesheet 52 | 53 | 54 | Parsing a ``style`` attribute 55 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 56 | 57 | .. automethod:: CSS21Parser.parse_style_attr 58 | 59 | 60 | Parsed objects 61 | -------------- 62 | 63 | These data structures make up the results of the various parsing methods. 64 | 65 | .. autoclass:: tinycss.parsing.ParseError() 66 | .. autoclass:: Stylesheet() 67 | 68 | .. note:: 69 | All subsequent objects have :obj:`line` and :obj:`column` attributes (not 70 | repeated every time fore brevity) that indicate where in the CSS source 71 | this object was read. 72 | 73 | .. autoclass:: RuleSet() 74 | .. autoclass:: ImportRule() 75 | .. autoclass:: MediaRule() 76 | .. autoclass:: PageRule() 77 | .. autoclass:: Declaration() 78 | 79 | 80 | Tokens 81 | ------ 82 | 83 | Some parts of a stylesheet (such as selectors in CSS 2.1 or property values) 84 | are not parsed by tinycss. They appear as tokens instead. 85 | 86 | .. module:: tinycss.token_data 87 | 88 | .. autoclass:: TokenList() 89 | :member-order: bysource 90 | :members: 91 | .. autoclass:: Token() 92 | :members: 93 | .. autoclass:: tinycss.speedups.CToken() 94 | .. autoclass:: ContainerToken() 95 | :members: 96 | 97 | .. autoclass:: FunctionToken() 98 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [build_sphinx] 2 | source-dir = docs 3 | build-dir = docs/_build 4 | #all_files = 1 5 | 6 | [upload_sphinx] # Sphinx-PyPI-upload 7 | upload-dir = docs/_build/html 8 | 9 | [aliases] 10 | test = pytest 11 | 12 | [tool:pytest] 13 | addopts = --flake8 --isort --cov --ignore=test/cairosvg_reference 14 | norecursedirs = dist .cache .git build *.egg-info .eggs venv cairosvg_reference 15 | flake8-ignore = docs/conf.py ALL 16 | isort_ignore = 17 | docs/conf.py 18 | setup.py 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import re 3 | import sys 4 | from distutils.errors import ( 5 | CCompilerError, DistutilsExecError, DistutilsPlatformError) 6 | from setuptools import Extension, setup 7 | 8 | try: 9 | from Cython.Distutils import build_ext 10 | import Cython.Compiler.Version 11 | CYTHON_INSTALLED = True 12 | except ImportError: 13 | from distutils.command.build_ext import build_ext 14 | CYTHON_INSTALLED = False 15 | 16 | 17 | ext_errors = (CCompilerError, DistutilsExecError, DistutilsPlatformError) 18 | if sys.platform == 'win32' and sys.version_info > (2, 6): 19 | # 2.6's distutils.msvc9compiler can raise an IOError when failing to 20 | # find the compiler 21 | ext_errors += (IOError,) 22 | 23 | 24 | class BuildFailed(Exception): 25 | pass 26 | 27 | 28 | class ve_build_ext(build_ext): 29 | # This class allows C extension building to fail. 30 | 31 | def run(self): 32 | try: 33 | build_ext.run(self) 34 | except DistutilsPlatformError: 35 | raise BuildFailed 36 | 37 | def build_extension(self, ext): 38 | try: 39 | build_ext.build_extension(self, ext) 40 | except ext_errors: 41 | raise BuildFailed 42 | 43 | 44 | ROOT = os.path.dirname(__file__) 45 | with open(os.path.join(ROOT, 'tinycss', 'version.py')) as fd: 46 | VERSION = re.search("VERSION = '([^']+)'", fd.read()).group(1) 47 | 48 | with open(os.path.join(ROOT, 'README.rst'), 'rb') as fd: 49 | README = fd.read().decode('utf8') 50 | 51 | 52 | needs_pytest = {'pytest', 'test', 'ptr'}.intersection(sys.argv) 53 | pytest_runner = ['pytest-runner'] if needs_pytest else [] 54 | 55 | 56 | def run_setup(with_extension): 57 | if with_extension: 58 | extension_path = os.path.join('tinycss', 'speedups') 59 | if CYTHON_INSTALLED: 60 | extension_path += '.pyx' 61 | print('Building with Cython %s.' % Cython.Compiler.Version.version) 62 | else: 63 | extension_path += '.c' 64 | if not os.path.exists(extension_path): 65 | print("WARNING: Trying to build without Cython, but " 66 | "pre-generated '%s' does not seem to be available." 67 | % extension_path) 68 | else: 69 | print('Building without Cython.') 70 | kwargs = dict( 71 | cmdclass=dict(build_ext=ve_build_ext), 72 | ext_modules=[Extension('tinycss.speedups', 73 | sources=[extension_path])], 74 | ) 75 | else: 76 | kwargs = dict() 77 | 78 | setup( 79 | name='tinycss', 80 | version=VERSION, 81 | url='http://tinycss.readthedocs.io/', 82 | license='BSD', 83 | author='Simon Sapin', 84 | author_email='simon.sapin@exyr.org', 85 | description='tinycss is a complete yet simple CSS parser for Python.', 86 | long_description=README, 87 | classifiers=[ 88 | 'Development Status :: 4 - Beta', 89 | 'Intended Audience :: Developers', 90 | 'License :: OSI Approved :: BSD License', 91 | 'Programming Language :: Python :: 2', 92 | 'Programming Language :: Python :: 2.7', 93 | 'Programming Language :: Python :: 3', 94 | 'Programming Language :: Python :: 3.3', 95 | 'Programming Language :: Python :: 3.4', 96 | 'Programming Language :: Python :: 3.5', 97 | 'Programming Language :: Python :: Implementation :: CPython', 98 | 'Programming Language :: Python :: Implementation :: PyPy', 99 | ], 100 | setup_requires=pytest_runner, 101 | tests_require=[ 102 | 'pytest-cov', 'pytest-flake8', 'pytest-isort', 'pytest-runner'], 103 | extras_require={'test': ( 104 | 'pytest-runner', 'pytest-cov', 'pytest-flake8', 'pytest-isort')}, 105 | packages=['tinycss', 'tinycss.tests'], 106 | **kwargs 107 | ) 108 | 109 | 110 | IS_PYPY = hasattr(sys, 'pypy_translation_info') 111 | try: 112 | run_setup(not IS_PYPY) 113 | except BuildFailed: 114 | BUILD_EXT_WARNING = ('WARNING: The extension could not be compiled, ' 115 | 'speedups are not enabled.') 116 | print('*' * 75) 117 | print(BUILD_EXT_WARNING) 118 | print('Failure information, if any, is above.') 119 | print('Retrying the build without the Cython extension now.') 120 | print('*' * 75) 121 | 122 | run_setup(False) 123 | 124 | print('*' * 75) 125 | print(BUILD_EXT_WARNING) 126 | print('Plain-Python installation succeeded.') 127 | print('*' * 75) 128 | -------------------------------------------------------------------------------- /tinycss/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | tinycss 4 | ------- 5 | 6 | A CSS parser, and nothing else. 7 | 8 | :copyright: (c) 2012 by Simon Sapin. 9 | :license: BSD, see LICENSE for more details. 10 | """ 11 | 12 | from .version import VERSION 13 | 14 | from .css21 import CSS21Parser 15 | from .page3 import CSSPage3Parser 16 | from .fonts3 import CSSFonts3Parser 17 | 18 | 19 | __version__ = VERSION 20 | 21 | PARSER_MODULES = { 22 | 'page3': CSSPage3Parser, 23 | 'fonts3': CSSFonts3Parser, 24 | } 25 | 26 | 27 | def make_parser(*features, **kwargs): 28 | """Make a parser object with the chosen features. 29 | 30 | :param features: 31 | Positional arguments are base classes the new parser class will extend. 32 | The string ``'page3'`` is accepted as short for 33 | :class:`~page3.CSSPage3Parser`. 34 | The string ``'fonts3'`` is accepted as short for 35 | :class:`~fonts3.CSSFonts3Parser`. 36 | :param kwargs: 37 | Keyword arguments are passed to the parser’s constructor. 38 | :returns: 39 | An instance of a new subclass of :class:`CSS21Parser` 40 | 41 | """ 42 | if features: 43 | bases = tuple(PARSER_MODULES.get(f, f) for f in features) 44 | parser_class = type('CustomCSSParser', bases + (CSS21Parser,), {}) 45 | else: 46 | parser_class = CSS21Parser 47 | return parser_class(**kwargs) 48 | -------------------------------------------------------------------------------- /tinycss/color3.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | tinycss.colors3 4 | --------------- 5 | 6 | Parser for CSS 3 color values 7 | http://www.w3.org/TR/css3-color/ 8 | 9 | This module does not provide anything that integrates in a parser class, 10 | only functions that parse single tokens from (eg.) a property value. 11 | 12 | :copyright: (c) 2012 by Simon Sapin. 13 | :license: BSD, see LICENSE for more details. 14 | """ 15 | 16 | from __future__ import division, unicode_literals 17 | 18 | import collections 19 | import itertools 20 | import re 21 | 22 | from .tokenizer import tokenize_grouped 23 | 24 | 25 | class RGBA(collections.namedtuple('RGBA', ['red', 'green', 'blue', 'alpha'])): 26 | """An RGBA color. 27 | 28 | A tuple of four floats in the 0..1 range: ``(r, g, b, a)``. 29 | Also has ``red``, ``green``, ``blue`` and ``alpha`` attributes to access 30 | the same values. 31 | 32 | """ 33 | 34 | 35 | def parse_color_string(css_string): 36 | """Parse a CSS string as a color value. 37 | 38 | This is a convenience wrapper around :func:`parse_color` in case you 39 | have a string that is not from a CSS stylesheet. 40 | 41 | :param css_string: 42 | An unicode string in CSS syntax. 43 | :returns: 44 | Same as :func:`parse_color`. 45 | 46 | """ 47 | tokens = list(tokenize_grouped(css_string.strip())) 48 | if len(tokens) == 1: 49 | return parse_color(tokens[0]) 50 | 51 | 52 | def parse_color(token): 53 | """Parse single token as a color value. 54 | 55 | :param token: 56 | A single :class:`~.token_data.Token` or 57 | :class:`~.token_data.ContainerToken`, as found eg. in a 58 | property value. 59 | :returns: 60 | * ``None``, if the token is not a valid CSS 3 color value. 61 | (No exception is raised.) 62 | * For the *currentColor* keyword: the string ``'currentColor'`` 63 | * Every other values (including keywords, HSL and HSLA) is converted 64 | to RGBA and returned as an :class:`RGBA` object (a 4-tuple with 65 | attribute access). 66 | The alpha channel is clipped to [0, 1], but R, G, or B can be 67 | out of range (eg. ``rgb(-51, 306, 0)`` is represented as 68 | ``(-.2, 1.2, 0, 1)``.) 69 | 70 | """ 71 | if token.type == 'IDENT': 72 | return COLOR_KEYWORDS.get(token.value.lower()) 73 | elif token.type == 'HASH': 74 | for multiplier, regexp in HASH_REGEXPS: 75 | match = regexp(token.value) 76 | if match: 77 | r, g, b = [int(group * multiplier, 16) / 255 78 | for group in match.groups()] 79 | return RGBA(r, g, b, 1.) 80 | elif token.type == 'FUNCTION': 81 | args = parse_comma_separated(token.content) 82 | if args: 83 | name = token.function_name.lower() 84 | if name == 'rgb': 85 | return parse_rgb(args, alpha=1.) 86 | elif name == 'rgba': 87 | alpha = parse_alpha(args[3:]) 88 | if alpha is not None: 89 | return parse_rgb(args[:3], alpha) 90 | elif name == 'hsl': 91 | return parse_hsl(args, alpha=1.) 92 | elif name == 'hsla': 93 | alpha = parse_alpha(args[3:]) 94 | if alpha is not None: 95 | return parse_hsl(args[:3], alpha) 96 | 97 | 98 | def parse_alpha(args): 99 | """ 100 | If args is a list of a single INTEGER or NUMBER token, 101 | retur its value clipped to the 0..1 range 102 | Otherwise, return None. 103 | """ 104 | if len(args) == 1 and args[0].type in ('NUMBER', 'INTEGER'): 105 | return min(1, max(0, args[0].value)) 106 | 107 | 108 | def parse_rgb(args, alpha): 109 | """ 110 | If args is a list of 3 INTEGER tokens or 3 PERCENTAGE tokens, 111 | return RGB values as a tuple of 3 floats in 0..1. 112 | Otherwise, return None. 113 | """ 114 | types = [arg.type for arg in args] 115 | if types == ['INTEGER', 'INTEGER', 'INTEGER']: 116 | r, g, b = [arg.value / 255 for arg in args[:3]] 117 | return RGBA(r, g, b, alpha) 118 | elif types == ['PERCENTAGE', 'PERCENTAGE', 'PERCENTAGE']: 119 | r, g, b = [arg.value / 100 for arg in args[:3]] 120 | return RGBA(r, g, b, alpha) 121 | 122 | 123 | def parse_hsl(args, alpha): 124 | """ 125 | If args is a list of 1 INTEGER token and 2 PERCENTAGE tokens, 126 | return RGB values as a tuple of 3 floats in 0..1. 127 | Otherwise, return None. 128 | """ 129 | types = [arg.type for arg in args] 130 | if types == ['INTEGER', 'PERCENTAGE', 'PERCENTAGE']: 131 | hsl = [arg.value for arg in args[:3]] 132 | r, g, b = hsl_to_rgb(*hsl) 133 | return RGBA(r, g, b, alpha) 134 | 135 | 136 | def hsl_to_rgb(hue, saturation, lightness): 137 | """ 138 | :param hue: degrees 139 | :param saturation: percentage 140 | :param lightness: percentage 141 | :returns: (r, g, b) as floats in the 0..1 range 142 | """ 143 | hue = (hue / 360) % 1 144 | saturation = min(1, max(0, saturation / 100)) 145 | lightness = min(1, max(0, lightness / 100)) 146 | 147 | # Translated from ABC: http://www.w3.org/TR/css3-color/#hsl-color 148 | def hue_to_rgb(m1, m2, h): 149 | if h < 0: 150 | h += 1 151 | if h > 1: 152 | h -= 1 153 | if h * 6 < 1: 154 | return m1 + (m2 - m1) * h * 6 155 | if h * 2 < 1: 156 | return m2 157 | if h * 3 < 2: 158 | return m1 + (m2 - m1) * (2 / 3 - h) * 6 159 | return m1 160 | 161 | if lightness <= 0.5: 162 | m2 = lightness * (saturation + 1) 163 | else: 164 | m2 = lightness + saturation - lightness * saturation 165 | m1 = lightness * 2 - m2 166 | return ( 167 | hue_to_rgb(m1, m2, hue + 1 / 3), 168 | hue_to_rgb(m1, m2, hue), 169 | hue_to_rgb(m1, m2, hue - 1 / 3), 170 | ) 171 | 172 | 173 | def parse_comma_separated(tokens): 174 | """Parse a list of tokens (typically the content of a function token) 175 | as arguments made of a single token each, separated by mandatory commas, 176 | with optional white space around each argument. 177 | 178 | return the argument list without commas or white space; 179 | or None if the function token content do not match the description above. 180 | 181 | """ 182 | tokens = [token for token in tokens if token.type != 'S'] 183 | if not tokens: 184 | return [] 185 | if len(tokens) % 2 == 1 and all( 186 | token.type == 'DELIM' and token.value == ',' 187 | for token in tokens[1::2]): 188 | return tokens[::2] 189 | 190 | 191 | HASH_REGEXPS = ( 192 | (2, re.compile('^#([\da-f])([\da-f])([\da-f])$', re.I).match), 193 | (1, re.compile('^#([\da-f]{2})([\da-f]{2})([\da-f]{2})$', re.I).match), 194 | ) 195 | 196 | 197 | # (r, g, b) in 0..255 198 | BASIC_COLOR_KEYWORDS = [ 199 | ('black', (0, 0, 0)), 200 | ('silver', (192, 192, 192)), 201 | ('gray', (128, 128, 128)), 202 | ('white', (255, 255, 255)), 203 | ('maroon', (128, 0, 0)), 204 | ('red', (255, 0, 0)), 205 | ('purple', (128, 0, 128)), 206 | ('fuchsia', (255, 0, 255)), 207 | ('green', (0, 128, 0)), 208 | ('lime', (0, 255, 0)), 209 | ('olive', (128, 128, 0)), 210 | ('yellow', (255, 255, 0)), 211 | ('navy', (0, 0, 128)), 212 | ('blue', (0, 0, 255)), 213 | ('teal', (0, 128, 128)), 214 | ('aqua', (0, 255, 255)), 215 | ] 216 | 217 | 218 | # (r, g, b) in 0..255 219 | EXTENDED_COLOR_KEYWORDS = [ 220 | ('aliceblue', (240, 248, 255)), 221 | ('antiquewhite', (250, 235, 215)), 222 | ('aqua', (0, 255, 255)), 223 | ('aquamarine', (127, 255, 212)), 224 | ('azure', (240, 255, 255)), 225 | ('beige', (245, 245, 220)), 226 | ('bisque', (255, 228, 196)), 227 | ('black', (0, 0, 0)), 228 | ('blanchedalmond', (255, 235, 205)), 229 | ('blue', (0, 0, 255)), 230 | ('blueviolet', (138, 43, 226)), 231 | ('brown', (165, 42, 42)), 232 | ('burlywood', (222, 184, 135)), 233 | ('cadetblue', (95, 158, 160)), 234 | ('chartreuse', (127, 255, 0)), 235 | ('chocolate', (210, 105, 30)), 236 | ('coral', (255, 127, 80)), 237 | ('cornflowerblue', (100, 149, 237)), 238 | ('cornsilk', (255, 248, 220)), 239 | ('crimson', (220, 20, 60)), 240 | ('cyan', (0, 255, 255)), 241 | ('darkblue', (0, 0, 139)), 242 | ('darkcyan', (0, 139, 139)), 243 | ('darkgoldenrod', (184, 134, 11)), 244 | ('darkgray', (169, 169, 169)), 245 | ('darkgreen', (0, 100, 0)), 246 | ('darkgrey', (169, 169, 169)), 247 | ('darkkhaki', (189, 183, 107)), 248 | ('darkmagenta', (139, 0, 139)), 249 | ('darkolivegreen', (85, 107, 47)), 250 | ('darkorange', (255, 140, 0)), 251 | ('darkorchid', (153, 50, 204)), 252 | ('darkred', (139, 0, 0)), 253 | ('darksalmon', (233, 150, 122)), 254 | ('darkseagreen', (143, 188, 143)), 255 | ('darkslateblue', (72, 61, 139)), 256 | ('darkslategray', (47, 79, 79)), 257 | ('darkslategrey', (47, 79, 79)), 258 | ('darkturquoise', (0, 206, 209)), 259 | ('darkviolet', (148, 0, 211)), 260 | ('deeppink', (255, 20, 147)), 261 | ('deepskyblue', (0, 191, 255)), 262 | ('dimgray', (105, 105, 105)), 263 | ('dimgrey', (105, 105, 105)), 264 | ('dodgerblue', (30, 144, 255)), 265 | ('firebrick', (178, 34, 34)), 266 | ('floralwhite', (255, 250, 240)), 267 | ('forestgreen', (34, 139, 34)), 268 | ('fuchsia', (255, 0, 255)), 269 | ('gainsboro', (220, 220, 220)), 270 | ('ghostwhite', (248, 248, 255)), 271 | ('gold', (255, 215, 0)), 272 | ('goldenrod', (218, 165, 32)), 273 | ('gray', (128, 128, 128)), 274 | ('green', (0, 128, 0)), 275 | ('greenyellow', (173, 255, 47)), 276 | ('grey', (128, 128, 128)), 277 | ('honeydew', (240, 255, 240)), 278 | ('hotpink', (255, 105, 180)), 279 | ('indianred', (205, 92, 92)), 280 | ('indigo', (75, 0, 130)), 281 | ('ivory', (255, 255, 240)), 282 | ('khaki', (240, 230, 140)), 283 | ('lavender', (230, 230, 250)), 284 | ('lavenderblush', (255, 240, 245)), 285 | ('lawngreen', (124, 252, 0)), 286 | ('lemonchiffon', (255, 250, 205)), 287 | ('lightblue', (173, 216, 230)), 288 | ('lightcoral', (240, 128, 128)), 289 | ('lightcyan', (224, 255, 255)), 290 | ('lightgoldenrodyellow', (250, 250, 210)), 291 | ('lightgray', (211, 211, 211)), 292 | ('lightgreen', (144, 238, 144)), 293 | ('lightgrey', (211, 211, 211)), 294 | ('lightpink', (255, 182, 193)), 295 | ('lightsalmon', (255, 160, 122)), 296 | ('lightseagreen', (32, 178, 170)), 297 | ('lightskyblue', (135, 206, 250)), 298 | ('lightslategray', (119, 136, 153)), 299 | ('lightslategrey', (119, 136, 153)), 300 | ('lightsteelblue', (176, 196, 222)), 301 | ('lightyellow', (255, 255, 224)), 302 | ('lime', (0, 255, 0)), 303 | ('limegreen', (50, 205, 50)), 304 | ('linen', (250, 240, 230)), 305 | ('magenta', (255, 0, 255)), 306 | ('maroon', (128, 0, 0)), 307 | ('mediumaquamarine', (102, 205, 170)), 308 | ('mediumblue', (0, 0, 205)), 309 | ('mediumorchid', (186, 85, 211)), 310 | ('mediumpurple', (147, 112, 219)), 311 | ('mediumseagreen', (60, 179, 113)), 312 | ('mediumslateblue', (123, 104, 238)), 313 | ('mediumspringgreen', (0, 250, 154)), 314 | ('mediumturquoise', (72, 209, 204)), 315 | ('mediumvioletred', (199, 21, 133)), 316 | ('midnightblue', (25, 25, 112)), 317 | ('mintcream', (245, 255, 250)), 318 | ('mistyrose', (255, 228, 225)), 319 | ('moccasin', (255, 228, 181)), 320 | ('navajowhite', (255, 222, 173)), 321 | ('navy', (0, 0, 128)), 322 | ('oldlace', (253, 245, 230)), 323 | ('olive', (128, 128, 0)), 324 | ('olivedrab', (107, 142, 35)), 325 | ('orange', (255, 165, 0)), 326 | ('orangered', (255, 69, 0)), 327 | ('orchid', (218, 112, 214)), 328 | ('palegoldenrod', (238, 232, 170)), 329 | ('palegreen', (152, 251, 152)), 330 | ('paleturquoise', (175, 238, 238)), 331 | ('palevioletred', (219, 112, 147)), 332 | ('papayawhip', (255, 239, 213)), 333 | ('peachpuff', (255, 218, 185)), 334 | ('peru', (205, 133, 63)), 335 | ('pink', (255, 192, 203)), 336 | ('plum', (221, 160, 221)), 337 | ('powderblue', (176, 224, 230)), 338 | ('purple', (128, 0, 128)), 339 | ('red', (255, 0, 0)), 340 | ('rosybrown', (188, 143, 143)), 341 | ('royalblue', (65, 105, 225)), 342 | ('saddlebrown', (139, 69, 19)), 343 | ('salmon', (250, 128, 114)), 344 | ('sandybrown', (244, 164, 96)), 345 | ('seagreen', (46, 139, 87)), 346 | ('seashell', (255, 245, 238)), 347 | ('sienna', (160, 82, 45)), 348 | ('silver', (192, 192, 192)), 349 | ('skyblue', (135, 206, 235)), 350 | ('slateblue', (106, 90, 205)), 351 | ('slategray', (112, 128, 144)), 352 | ('slategrey', (112, 128, 144)), 353 | ('snow', (255, 250, 250)), 354 | ('springgreen', (0, 255, 127)), 355 | ('steelblue', (70, 130, 180)), 356 | ('tan', (210, 180, 140)), 357 | ('teal', (0, 128, 128)), 358 | ('thistle', (216, 191, 216)), 359 | ('tomato', (255, 99, 71)), 360 | ('turquoise', (64, 224, 208)), 361 | ('violet', (238, 130, 238)), 362 | ('wheat', (245, 222, 179)), 363 | ('white', (255, 255, 255)), 364 | ('whitesmoke', (245, 245, 245)), 365 | ('yellow', (255, 255, 0)), 366 | ('yellowgreen', (154, 205, 50)), 367 | ] 368 | 369 | 370 | # (r, g, b, a) in 0..1 or a string marker 371 | SPECIAL_COLOR_KEYWORDS = { 372 | 'currentcolor': 'currentColor', 373 | 'transparent': RGBA(0., 0., 0., 0.), 374 | } 375 | 376 | 377 | # RGBA namedtuples of (r, g, b, a) in 0..1 or a string marker 378 | COLOR_KEYWORDS = SPECIAL_COLOR_KEYWORDS.copy() 379 | COLOR_KEYWORDS.update( 380 | # 255 maps to 1, 0 to 0, the rest is linear. 381 | (keyword, RGBA(r / 255., g / 255., b / 255., 1.)) 382 | for keyword, (r, g, b) in itertools.chain( 383 | BASIC_COLOR_KEYWORDS, EXTENDED_COLOR_KEYWORDS)) 384 | -------------------------------------------------------------------------------- /tinycss/css21.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | tinycss.css21 4 | ------------- 5 | 6 | Parser for CSS 2.1 7 | http://www.w3.org/TR/CSS21/syndata.html 8 | 9 | :copyright: (c) 2012 by Simon Sapin. 10 | :license: BSD, see LICENSE for more details. 11 | """ 12 | 13 | from __future__ import unicode_literals 14 | 15 | from itertools import chain, islice 16 | 17 | from .decoding import decode 18 | from .parsing import ( 19 | ParseError, remove_whitespace, split_on_comma, strip_whitespace, 20 | validate_any, validate_value) 21 | from .token_data import TokenList 22 | from .tokenizer import tokenize_grouped 23 | 24 | 25 | # stylesheet : [ CDO | CDC | S | statement ]*; 26 | # statement : ruleset | at-rule; 27 | # at-rule : ATKEYWORD S* any* [ block | ';' S* ]; 28 | # block : '{' S* [ any | block | ATKEYWORD S* | ';' S* ]* '}' S*; 29 | # ruleset : selector? '{' S* declaration? [ ';' S* declaration? ]* '}' S*; 30 | # selector : any+; 31 | # declaration : property S* ':' S* value; 32 | # property : IDENT; 33 | # value : [ any | block | ATKEYWORD S* ]+; 34 | # any : [ IDENT | NUMBER | PERCENTAGE | DIMENSION | STRING 35 | # | DELIM | URI | HASH | UNICODE-RANGE | INCLUDES 36 | # | DASHMATCH | ':' | FUNCTION S* [any|unused]* ')' 37 | # | '(' S* [any|unused]* ')' | '[' S* [any|unused]* ']' 38 | # ] S*; 39 | # unused : block | ATKEYWORD S* | ';' S* | CDO S* | CDC S*; 40 | 41 | 42 | class Stylesheet(object): 43 | """ 44 | A parsed CSS stylesheet. 45 | 46 | .. attribute:: rules 47 | 48 | A mixed list, in source order, of :class:`RuleSet` and various 49 | at-rules such as :class:`ImportRule`, :class:`MediaRule` 50 | and :class:`PageRule`. 51 | Use their :obj:`at_keyword` attribute to distinguish them. 52 | 53 | .. attribute:: errors 54 | 55 | A list of :class:`~.parsing.ParseError`. Invalid rules and declarations 56 | are ignored, with the details logged in this list. 57 | 58 | .. attribute:: encoding 59 | 60 | The character encoding that was used to decode the stylesheet 61 | from bytes, or ``None`` for Unicode stylesheets. 62 | 63 | """ 64 | def __init__(self, rules, errors, encoding): 65 | self.rules = rules 66 | self.errors = errors 67 | self.encoding = encoding 68 | 69 | def __repr__(self): 70 | return '<{0.__class__.__name__} {1} rules {2} errors>'.format( 71 | self, len(self.rules), len(self.errors)) 72 | 73 | 74 | class AtRule(object): 75 | """ 76 | An unparsed at-rule. 77 | 78 | .. attribute:: at_keyword 79 | 80 | The normalized (lower-case) at-keyword as a string. Eg: ``'@page'`` 81 | 82 | .. attribute:: head 83 | 84 | The part of the at-rule between the at-keyword and the ``{`` 85 | marking the body, or the ``;`` marking the end of an at-rule without 86 | a body. A :class:`~.token_data.TokenList`. 87 | 88 | .. attribute:: body 89 | 90 | The content of the body between ``{`` and ``}`` as a 91 | :class:`~.token_data.TokenList`, or ``None`` if there is no body 92 | (ie. if the rule ends with ``;``). 93 | 94 | The head was validated against the core grammar but **not** the body, 95 | as the body might contain declarations. In case of an error in a 96 | declaration, parsing should continue from the next declaration. 97 | The whole rule should not be ignored as it would be for an error 98 | in the head. 99 | 100 | These at-rules are expected to be parsed further before reaching 101 | the user API. 102 | 103 | """ 104 | def __init__(self, at_keyword, head, body, line, column): 105 | self.at_keyword = at_keyword 106 | self.head = TokenList(head) 107 | self.body = TokenList(body) if body is not None else body 108 | self.line = line 109 | self.column = column 110 | 111 | def __repr__(self): 112 | return ('<{0.__class__.__name__} {0.line}:{0.column} {0.at_keyword}>' 113 | .format(self)) 114 | 115 | 116 | class RuleSet(object): 117 | """A ruleset. 118 | 119 | .. attribute:: at_keyword 120 | 121 | Always ``None``. Helps to tell rulesets apart from at-rules. 122 | 123 | .. attribute:: selector 124 | 125 | The selector as a :class:`~.token_data.TokenList`. 126 | In CSS 3, this is actually called a selector group. 127 | 128 | ``rule.selector.as_css()`` gives the selector as a string. 129 | This string can be used with *cssselect*, see :ref:`selectors3`. 130 | 131 | .. attribute:: declarations 132 | 133 | The list of :class:`Declaration`, in source order. 134 | 135 | """ 136 | 137 | at_keyword = None 138 | 139 | def __init__(self, selector, declarations, line, column): 140 | self.selector = TokenList(selector) 141 | self.declarations = declarations 142 | self.line = line 143 | self.column = column 144 | 145 | def __repr__(self): 146 | return ('<{0.__class__.__name__} at {0.line}:{0.column} {1}>' 147 | .format(self, self.selector.as_css())) 148 | 149 | 150 | class Declaration(object): 151 | """A property declaration. 152 | 153 | .. attribute:: name 154 | 155 | The property name as a normalized (lower-case) string. 156 | 157 | .. attribute:: value 158 | 159 | The property value as a :class:`~.token_data.TokenList`. 160 | 161 | The value is not parsed. UAs using tinycss may only support 162 | some properties or some values and tinycss does not know which. 163 | They need to parse values themselves and ignore declarations with 164 | unknown or unsupported properties or values, and fall back 165 | on any previous declaration. 166 | 167 | :mod:`tinycss.color3` parses color values, but other values 168 | will need specific parsing/validation code. 169 | 170 | .. attribute:: priority 171 | 172 | Either the string ``'important'`` or ``None``. 173 | 174 | """ 175 | def __init__(self, name, value, priority, line, column): 176 | self.name = name 177 | self.value = TokenList(value) 178 | self.priority = priority 179 | self.line = line 180 | self.column = column 181 | 182 | def __repr__(self): 183 | priority = ' !' + self.priority if self.priority else '' 184 | return ('<{0.__class__.__name__} {0.line}:{0.column}' 185 | ' {0.name}: {1}{2}>'.format( 186 | self, self.value.as_css(), priority)) 187 | 188 | 189 | class PageRule(object): 190 | """A parsed CSS 2.1 @page rule. 191 | 192 | .. attribute:: at_keyword 193 | 194 | Always ``'@page'`` 195 | 196 | .. attribute:: selector 197 | 198 | The page selector. 199 | In CSS 2.1 this is either ``None`` (no selector), or the string 200 | ``'first'``, ``'left'`` or ``'right'`` for the pseudo class 201 | of the same name. 202 | 203 | .. attribute:: specificity 204 | 205 | Specificity of the page selector. This is a tuple of four integers, 206 | but these tuples are mostly meant to be compared to each other. 207 | 208 | .. attribute:: declarations 209 | 210 | A list of :class:`Declaration`, in source order. 211 | 212 | .. attribute:: at_rules 213 | 214 | The list of parsed at-rules inside the @page block, in source order. 215 | Always empty for CSS 2.1. 216 | 217 | """ 218 | at_keyword = '@page' 219 | 220 | def __init__(self, selector, specificity, declarations, at_rules, 221 | line, column): 222 | self.selector = selector 223 | self.specificity = specificity 224 | self.declarations = declarations 225 | self.at_rules = at_rules 226 | self.line = line 227 | self.column = column 228 | 229 | def __repr__(self): 230 | return ('<{0.__class__.__name__} {0.line}:{0.column}' 231 | ' {0.selector}>'.format(self)) 232 | 233 | 234 | class MediaRule(object): 235 | """A parsed @media rule. 236 | 237 | .. attribute:: at_keyword 238 | 239 | Always ``'@media'`` 240 | 241 | .. attribute:: media 242 | 243 | For CSS 2.1 without media queries: the media types 244 | as a list of strings. 245 | 246 | .. attribute:: rules 247 | 248 | The list :class:`RuleSet` and various at-rules inside the @media 249 | block, in source order. 250 | 251 | """ 252 | at_keyword = '@media' 253 | 254 | def __init__(self, media, rules, line, column): 255 | self.media = media 256 | self.rules = rules 257 | self.line = line 258 | self.column = column 259 | 260 | def __repr__(self): 261 | return ('<{0.__class__.__name__} {0.line}:{0.column}' 262 | ' {0.media}>'.format(self)) 263 | 264 | 265 | class ImportRule(object): 266 | """A parsed @import rule. 267 | 268 | .. attribute:: at_keyword 269 | 270 | Always ``'@import'`` 271 | 272 | .. attribute:: uri 273 | 274 | The URI to be imported, as read from the stylesheet. 275 | (URIs are not made absolute.) 276 | 277 | .. attribute:: media 278 | 279 | For CSS 2.1 without media queries: the media types 280 | as a list of strings. 281 | This attribute is explicitly ``['all']`` if the media was omitted 282 | in the source. 283 | 284 | """ 285 | at_keyword = '@import' 286 | 287 | def __init__(self, uri, media, line, column): 288 | self.uri = uri 289 | self.media = media 290 | self.line = line 291 | self.column = column 292 | 293 | def __repr__(self): 294 | return ('<{0.__class__.__name__} {0.line}:{0.column}' 295 | ' {0.uri}>'.format(self)) 296 | 297 | 298 | def _remove_at_charset(tokens): 299 | """Remove any valid @charset at the beggining of a token stream. 300 | 301 | :param tokens: 302 | An iterable of tokens 303 | :returns: 304 | A possibly truncated iterable of tokens 305 | 306 | """ 307 | tokens = iter(tokens) 308 | header = list(islice(tokens, 4)) 309 | if [t.type for t in header] == ['ATKEYWORD', 'S', 'STRING', ';']: 310 | atkw, space, string, semicolon = header 311 | if ((atkw.value, space.value) == ('@charset', ' ') and 312 | string.as_css()[0] == '"'): 313 | # Found a valid @charset rule, only keep what’s after it. 314 | return tokens 315 | return chain(header, tokens) 316 | 317 | 318 | class CSS21Parser(object): 319 | """Parser for CSS 2.1 320 | 321 | This parser supports the core CSS syntax as well as @import, @media, 322 | @page and !important. 323 | 324 | Note that property values are still not parsed, as UAs using this 325 | parser may only support some properties or some values. 326 | 327 | Currently the parser holds no state. It being a class only allows 328 | subclassing and overriding its methods. 329 | 330 | """ 331 | 332 | # User API: 333 | 334 | def parse_stylesheet_file(self, css_file, protocol_encoding=None, 335 | linking_encoding=None, document_encoding=None): 336 | """Parse a stylesheet from a file or filename. 337 | 338 | Character encoding-related parameters and behavior are the same 339 | as in :meth:`parse_stylesheet_bytes`. 340 | 341 | :param css_file: 342 | Either a file (any object with a :meth:`~file.read` method) 343 | or a filename. 344 | :return: 345 | A :class:`Stylesheet`. 346 | 347 | """ 348 | if hasattr(css_file, 'read'): 349 | css_bytes = css_file.read() 350 | else: 351 | with open(css_file, 'rb') as fd: 352 | css_bytes = fd.read() 353 | return self.parse_stylesheet_bytes(css_bytes, protocol_encoding, 354 | linking_encoding, document_encoding) 355 | 356 | def parse_stylesheet_bytes(self, css_bytes, protocol_encoding=None, 357 | linking_encoding=None, document_encoding=None): 358 | """Parse a stylesheet from a byte string. 359 | 360 | The character encoding is determined from the passed metadata and the 361 | ``@charset`` rule in the stylesheet (if any). 362 | If no encoding information is available or decoding fails, 363 | decoding defaults to UTF-8 and then fall back on ISO-8859-1. 364 | 365 | :param css_bytes: 366 | A CSS stylesheet as a byte string. 367 | :param protocol_encoding: 368 | The "charset" parameter of a "Content-Type" HTTP header (if any), 369 | or similar metadata for other protocols. 370 | :param linking_encoding: 371 | ```` or other metadata from the linking mechanism 372 | (if any) 373 | :param document_encoding: 374 | Encoding of the referring style sheet or document (if any) 375 | :return: 376 | A :class:`Stylesheet`. 377 | 378 | """ 379 | css_unicode, encoding = decode(css_bytes, protocol_encoding, 380 | linking_encoding, document_encoding) 381 | return self.parse_stylesheet(css_unicode, encoding=encoding) 382 | 383 | def parse_stylesheet(self, css_unicode, encoding=None): 384 | """Parse a stylesheet from an Unicode string. 385 | 386 | :param css_unicode: 387 | A CSS stylesheet as an unicode string. 388 | :param encoding: 389 | The character encoding used to decode the stylesheet from bytes, 390 | if any. 391 | :return: 392 | A :class:`Stylesheet`. 393 | 394 | """ 395 | tokens = tokenize_grouped(css_unicode) 396 | if encoding: 397 | tokens = _remove_at_charset(tokens) 398 | rules, errors = self.parse_rules(tokens, context='stylesheet') 399 | return Stylesheet(rules, errors, encoding) 400 | 401 | def parse_style_attr(self, css_source): 402 | """Parse a "style" attribute (eg. of an HTML element). 403 | 404 | This method only accepts Unicode as the source (HTML) document 405 | is supposed to handle the character encoding. 406 | 407 | :param css_source: 408 | The attribute value, as an unicode string. 409 | :return: 410 | A tuple of the list of valid :class:`Declaration` and 411 | a list of :class:`~.parsing.ParseError`. 412 | """ 413 | return self.parse_declaration_list(tokenize_grouped(css_source)) 414 | 415 | # API for subclasses: 416 | 417 | def parse_rules(self, tokens, context): 418 | """Parse a sequence of rules (rulesets and at-rules). 419 | 420 | :param tokens: 421 | An iterable of tokens. 422 | :param context: 423 | Either ``'stylesheet'`` or an at-keyword such as ``'@media'``. 424 | (Most at-rules are only allowed in some contexts.) 425 | :return: 426 | A tuple of a list of parsed rules and a list of 427 | :class:`~.parsing.ParseError`. 428 | 429 | """ 430 | rules = [] 431 | errors = [] 432 | tokens = iter(tokens) 433 | for token in tokens: 434 | if token.type not in ('S', 'CDO', 'CDC'): 435 | try: 436 | if token.type == 'ATKEYWORD': 437 | rule = self.read_at_rule(token, tokens) 438 | result = self.parse_at_rule( 439 | rule, rules, errors, context) 440 | rules.append(result) 441 | else: 442 | rule, rule_errors = self.parse_ruleset(token, tokens) 443 | rules.append(rule) 444 | errors.extend(rule_errors) 445 | except ParseError as exc: 446 | errors.append(exc) 447 | # Skip the entire rule 448 | return rules, errors 449 | 450 | def read_at_rule(self, at_keyword_token, tokens): 451 | """Read an at-rule from a token stream. 452 | 453 | :param at_keyword_token: 454 | The ATKEYWORD token that starts this at-rule 455 | You may have read it already to distinguish the rule 456 | from a ruleset. 457 | :param tokens: 458 | An iterator of subsequent tokens. Will be consumed just enough 459 | for one at-rule. 460 | :return: 461 | An unparsed :class:`AtRule`. 462 | :raises: 463 | :class:`~.parsing.ParseError` if the head is invalid for the core 464 | grammar. The body is **not** validated. See :class:`AtRule`. 465 | 466 | """ 467 | # CSS syntax is case-insensitive 468 | at_keyword = at_keyword_token.value.lower() 469 | head = [] 470 | # For the ParseError in case `tokens` is empty: 471 | token = at_keyword_token 472 | for token in tokens: 473 | if token.type in '{;': 474 | break 475 | # Ignore white space just after the at-keyword. 476 | else: 477 | head.append(token) 478 | # On unexpected end of stylesheet, pretend that a ';' was there 479 | head = strip_whitespace(head) 480 | for head_token in head: 481 | validate_any(head_token, 'at-rule head') 482 | body = token.content if token.type == '{' else None 483 | return AtRule(at_keyword, head, body, 484 | at_keyword_token.line, at_keyword_token.column) 485 | 486 | def parse_at_rule(self, rule, previous_rules, errors, context): 487 | """Parse an at-rule. 488 | 489 | Subclasses that override this method must use ``super()`` and 490 | pass its return value for at-rules they do not know. 491 | 492 | In CSS 2.1, this method handles @charset, @import, @media and @page 493 | rules. 494 | 495 | :param rule: 496 | An unparsed :class:`AtRule`. 497 | :param previous_rules: 498 | The list of at-rules and rulesets that have been parsed so far 499 | in this context. This list can be used to decide if the current 500 | rule is valid. (For example, @import rules are only allowed 501 | before anything but a @charset rule.) 502 | :param context: 503 | Either ``'stylesheet'`` or an at-keyword such as ``'@media'``. 504 | (Most at-rules are only allowed in some contexts.) 505 | :raises: 506 | :class:`~.parsing.ParseError` if the rule is invalid. 507 | :return: 508 | A parsed at-rule 509 | 510 | """ 511 | if rule.at_keyword == '@page': 512 | if context != 'stylesheet': 513 | raise ParseError(rule, '@page rule not allowed in ' + context) 514 | selector, specificity = self.parse_page_selector(rule.head) 515 | if rule.body is None: 516 | raise ParseError( 517 | rule, 'invalid {0} rule: missing block'.format( 518 | rule.at_keyword)) 519 | declarations, at_rules, rule_errors = \ 520 | self.parse_declarations_and_at_rules(rule.body, '@page') 521 | errors.extend(rule_errors) 522 | return PageRule(selector, specificity, declarations, at_rules, 523 | rule.line, rule.column) 524 | 525 | elif rule.at_keyword == '@media': 526 | if context != 'stylesheet': 527 | raise ParseError(rule, '@media rule not allowed in ' + context) 528 | if not rule.head: 529 | raise ParseError(rule, 'expected media types for @media') 530 | media = self.parse_media(rule.head) 531 | if rule.body is None: 532 | raise ParseError( 533 | rule, 'invalid {0} rule: missing block'.format( 534 | rule.at_keyword)) 535 | rules, rule_errors = self.parse_rules(rule.body, '@media') 536 | errors.extend(rule_errors) 537 | return MediaRule(media, rules, rule.line, rule.column) 538 | 539 | elif rule.at_keyword == '@import': 540 | if context != 'stylesheet': 541 | raise ParseError( 542 | rule, '@import rule not allowed in ' + context) 543 | for previous_rule in previous_rules: 544 | if previous_rule.at_keyword not in ('@charset', '@import'): 545 | if previous_rule.at_keyword: 546 | type_ = 'an {0} rule'.format(previous_rule.at_keyword) 547 | else: 548 | type_ = 'a ruleset' 549 | raise ParseError( 550 | previous_rule, 551 | '@import rule not allowed after ' + type_) 552 | head = rule.head 553 | if not head: 554 | raise ParseError( 555 | rule, 'expected URI or STRING for @import rule') 556 | if head[0].type not in ('URI', 'STRING'): 557 | raise ParseError( 558 | rule, 'expected URI or STRING for @import rule, got ' + 559 | head[0].type) 560 | uri = head[0].value 561 | media = self.parse_media(strip_whitespace(head[1:])) 562 | if rule.body is not None: 563 | # The position of the ';' token would be best, but we don’t 564 | # have it anymore here. 565 | raise ParseError(head[-1], "expected ';', got a block") 566 | return ImportRule(uri, media, rule.line, rule.column) 567 | 568 | elif rule.at_keyword == '@charset': 569 | raise ParseError(rule, 'mis-placed or malformed @charset rule') 570 | 571 | else: 572 | raise ParseError( 573 | rule, 'unknown at-rule in {0} context: {1}'.format( 574 | context, rule.at_keyword)) 575 | 576 | def parse_media(self, tokens): 577 | """For CSS 2.1, parse a list of media types. 578 | 579 | Media Queries are expected to override this. 580 | 581 | :param tokens: 582 | A list of tokens 583 | :raises: 584 | :class:`~.parsing.ParseError` on invalid media types/queries 585 | :returns: 586 | For CSS 2.1, a list of media types as strings 587 | """ 588 | if not tokens: 589 | return ['all'] 590 | media_types = [] 591 | for part in split_on_comma(remove_whitespace(tokens)): 592 | types = [token.type for token in part] 593 | if types == ['IDENT']: 594 | media_types.append(part[0].value) 595 | else: 596 | raise ParseError( 597 | tokens[0], 'expected a media type' + 598 | ((', got ' + ', '.join(types)) if types else '')) 599 | return media_types 600 | 601 | def parse_page_selector(self, tokens): 602 | """Parse an @page selector. 603 | 604 | :param tokens: 605 | An iterable of token, typically from the ``head`` attribute of 606 | an unparsed :class:`AtRule`. 607 | :returns: 608 | A page selector. For CSS 2.1, this is ``'first'``, ``'left'``, 609 | ``'right'`` or ``None``. 610 | :raises: 611 | :class:`~.parsing.ParseError` on invalid selectors 612 | 613 | """ 614 | if not tokens: 615 | return None, (0, 0) 616 | if (len(tokens) == 2 and tokens[0].type == ':' and 617 | tokens[1].type == 'IDENT'): 618 | pseudo_class = tokens[1].value 619 | specificity = { 620 | 'first': (1, 0), 'left': (0, 1), 'right': (0, 1), 621 | }.get(pseudo_class) 622 | if specificity: 623 | return pseudo_class, specificity 624 | raise ParseError(tokens[0], 'invalid @page selector') 625 | 626 | def parse_declarations_and_at_rules(self, tokens, context): 627 | """Parse a mixed list of declarations and at rules, as found eg. 628 | in the body of an @page rule. 629 | 630 | Note that to add supported at-rules inside @page, 631 | :class:`~.page3.CSSPage3Parser` extends :meth:`parse_at_rule`, 632 | not this method. 633 | 634 | :param tokens: 635 | An iterable of token, typically from the ``body`` attribute of 636 | an unparsed :class:`AtRule`. 637 | :param context: 638 | An at-keyword such as ``'@page'``. 639 | (Most at-rules are only allowed in some contexts.) 640 | :returns: 641 | A tuple of: 642 | 643 | * A list of :class:`Declaration` 644 | * A list of parsed at-rules (empty for CSS 2.1) 645 | * A list of :class:`~.parsing.ParseError` 646 | 647 | """ 648 | at_rules = [] 649 | declarations = [] 650 | errors = [] 651 | tokens = iter(tokens) 652 | for token in tokens: 653 | if token.type == 'ATKEYWORD': 654 | try: 655 | rule = self.read_at_rule(token, tokens) 656 | result = self.parse_at_rule( 657 | rule, at_rules, errors, context) 658 | at_rules.append(result) 659 | except ParseError as err: 660 | errors.append(err) 661 | elif token.type != 'S': 662 | declaration_tokens = [] 663 | while token and token.type != ';': 664 | declaration_tokens.append(token) 665 | token = next(tokens, None) 666 | if declaration_tokens: 667 | try: 668 | declarations.append( 669 | self.parse_declaration(declaration_tokens)) 670 | except ParseError as err: 671 | errors.append(err) 672 | return declarations, at_rules, errors 673 | 674 | def parse_ruleset(self, first_token, tokens): 675 | """Parse a ruleset: a selector followed by declaration block. 676 | 677 | :param first_token: 678 | The first token of the ruleset (probably of the selector). 679 | You may have read it already to distinguish the rule 680 | from an at-rule. 681 | :param tokens: 682 | an iterator of subsequent tokens. Will be consumed just enough 683 | for one ruleset. 684 | :return: 685 | a tuple of a :class:`RuleSet` and an error list. 686 | The errors are recovered :class:`~.parsing.ParseError` in 687 | declarations. (Parsing continues from the next declaration on such 688 | errors.) 689 | :raises: 690 | :class:`~.parsing.ParseError` if the selector is invalid for the 691 | core grammar. 692 | Note a that a selector can be valid for the core grammar but 693 | not for CSS 2.1 or another level. 694 | 695 | """ 696 | selector = [] 697 | for token in chain([first_token], tokens): 698 | if token.type == '{': 699 | # Parse/validate once we’ve read the whole rule 700 | selector = strip_whitespace(selector) 701 | if not selector: 702 | raise ParseError(first_token, 'empty selector') 703 | for selector_token in selector: 704 | validate_any(selector_token, 'selector') 705 | declarations, errors = self.parse_declaration_list( 706 | token.content) 707 | ruleset = RuleSet(selector, declarations, 708 | first_token.line, first_token.column) 709 | return ruleset, errors 710 | else: 711 | selector.append(token) 712 | raise ParseError(token, 'no declaration block found for ruleset') 713 | 714 | def parse_declaration_list(self, tokens): 715 | """Parse a ``;`` separated declaration list. 716 | 717 | You may want to use :meth:`parse_declarations_and_at_rules` (or 718 | some other method that uses :func:`parse_declaration` directly) 719 | instead if you have not just declarations in the same context. 720 | 721 | :param tokens: 722 | an iterable of tokens. Should stop at (before) the end 723 | of the block, as marked by ``}``. 724 | :return: 725 | a tuple of the list of valid :class:`Declaration` and a list 726 | of :class:`~.parsing.ParseError` 727 | 728 | """ 729 | # split at ';' 730 | parts = [] 731 | this_part = [] 732 | for token in tokens: 733 | if token.type == ';': 734 | parts.append(this_part) 735 | this_part = [] 736 | else: 737 | this_part.append(token) 738 | parts.append(this_part) 739 | 740 | declarations = [] 741 | errors = [] 742 | for tokens in parts: 743 | tokens = strip_whitespace(tokens) 744 | if tokens: 745 | try: 746 | declarations.append(self.parse_declaration(tokens)) 747 | except ParseError as exc: 748 | errors.append(exc) 749 | # Skip the entire declaration 750 | return declarations, errors 751 | 752 | def parse_declaration(self, tokens): 753 | """Parse a single declaration. 754 | 755 | :param tokens: 756 | an iterable of at least one token. Should stop at (before) 757 | the end of the declaration, as marked by a ``;`` or ``}``. 758 | Empty declarations (ie. consecutive ``;`` with only white space 759 | in-between) should be skipped earlier and not passed to 760 | this method. 761 | :returns: 762 | a :class:`Declaration` 763 | :raises: 764 | :class:`~.parsing.ParseError` if the tokens do not match the 765 | 'declaration' production of the core grammar. 766 | 767 | """ 768 | tokens = iter(tokens) 769 | 770 | name_token = next(tokens) # assume there is at least one 771 | if name_token.type == 'IDENT': 772 | # CSS syntax is case-insensitive 773 | property_name = name_token.value.lower() 774 | else: 775 | raise ParseError( 776 | name_token, 'expected a property name, got {0}'.format( 777 | name_token.type)) 778 | 779 | token = name_token # In case ``tokens`` is now empty 780 | for token in tokens: 781 | if token.type == ':': 782 | break 783 | elif token.type != 'S': 784 | raise ParseError( 785 | token, "expected ':', got {0}".format(token.type)) 786 | else: 787 | raise ParseError(token, "expected ':'") 788 | 789 | value = strip_whitespace(list(tokens)) 790 | if not value: 791 | raise ParseError(token, 'expected a property value') 792 | validate_value(value) 793 | value, priority = self.parse_value_priority(value) 794 | return Declaration( 795 | property_name, value, priority, name_token.line, name_token.column) 796 | 797 | def parse_value_priority(self, tokens): 798 | """Separate any ``!important`` marker at the end of a property value. 799 | 800 | :param tokens: 801 | A list of tokens for the property value. 802 | :returns: 803 | A tuple of the actual property value (a list of tokens) 804 | and the :attr:`~Declaration.priority`. 805 | """ 806 | value = list(tokens) 807 | # Walk the token list from the end 808 | token = value.pop() 809 | if token.type == 'IDENT' and token.value.lower() == 'important': 810 | while value: 811 | token = value.pop() 812 | if token.type == 'DELIM' and token.value == '!': 813 | # Skip any white space before the '!' 814 | while value and value[-1].type == 'S': 815 | value.pop() 816 | if not value: 817 | raise ParseError( 818 | token, 'expected a value before !important') 819 | return value, 'important' 820 | # Skip white space between '!' and 'important' 821 | elif token.type != 'S': 822 | break 823 | return tokens, None 824 | -------------------------------------------------------------------------------- /tinycss/decoding.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | tinycss.decoding 4 | ---------------- 5 | 6 | Decoding stylesheets from bytes to Unicode. 7 | http://www.w3.org/TR/CSS21/syndata.html#charset 8 | 9 | :copyright: (c) 2012 by Simon Sapin. 10 | :license: BSD, see LICENSE for more details. 11 | """ 12 | 13 | from __future__ import unicode_literals 14 | 15 | import operator 16 | import re 17 | from binascii import unhexlify 18 | 19 | __all__ = ['decode'] # Everything else is implementation detail 20 | 21 | 22 | def decode(css_bytes, protocol_encoding=None, 23 | linking_encoding=None, document_encoding=None): 24 | """ 25 | Determine the character encoding from the passed metadata and the 26 | ``@charset`` rule in the stylesheet (if any); and decode accordingly. 27 | If no encoding information is available or decoding fails, 28 | decoding defaults to UTF-8 and then fall back on ISO-8859-1. 29 | 30 | :param css_bytes: 31 | a CSS stylesheet as a byte string 32 | :param protocol_encoding: 33 | The "charset" parameter of a "Content-Type" HTTP header (if any), 34 | or similar metadata for other protocols. 35 | :param linking_encoding: 36 | ```` or other metadata from the linking mechanism 37 | (if any) 38 | :param document_encoding: 39 | Encoding of the referring style sheet or document (if any) 40 | :return: 41 | A tuple of an Unicode string, with any BOM removed, and the 42 | encoding that was used. 43 | 44 | """ 45 | if protocol_encoding: 46 | css_unicode = try_encoding(css_bytes, protocol_encoding) 47 | if css_unicode is not None: 48 | return css_unicode, protocol_encoding 49 | for encoding, pattern in ENCODING_MAGIC_NUMBERS: 50 | match = pattern(css_bytes) 51 | if match: 52 | has_at_charset = isinstance(encoding, tuple) 53 | if has_at_charset: 54 | extract, endianness = encoding 55 | encoding = extract(match.group(1)) 56 | # Get an ASCII-only unicode value. 57 | # This is the only thing that works on both Python 2 and 3 58 | # for bytes.decode() 59 | # Non-ASCII encoding names are invalid anyway, 60 | # but make sure they stay invalid. 61 | encoding = encoding.decode('ascii', 'replace') 62 | encoding = encoding.replace('\ufffd', '?') 63 | if encoding.replace('-', '').replace('_', '').lower() in [ 64 | 'utf16', 'utf32']: 65 | encoding += endianness 66 | encoding = encoding.encode('ascii', 'replace').decode('ascii') 67 | css_unicode = try_encoding(css_bytes, encoding) 68 | if css_unicode and not (has_at_charset and not 69 | css_unicode.startswith('@charset "')): 70 | return css_unicode, encoding 71 | break 72 | for encoding in [linking_encoding, document_encoding]: 73 | if encoding: 74 | css_unicode = try_encoding(css_bytes, encoding) 75 | if css_unicode is not None: 76 | return css_unicode, encoding 77 | css_unicode = try_encoding(css_bytes, 'UTF-8') 78 | if css_unicode is not None: 79 | return css_unicode, 'UTF-8' 80 | return try_encoding(css_bytes, 'ISO-8859-1', fallback=False), 'ISO-8859-1' 81 | 82 | 83 | def try_encoding(css_bytes, encoding, fallback=True): 84 | if fallback: 85 | try: 86 | css_unicode = css_bytes.decode(encoding) 87 | # LookupError means unknown encoding 88 | except (UnicodeDecodeError, LookupError): 89 | return None 90 | else: 91 | css_unicode = css_bytes.decode(encoding) 92 | if css_unicode and css_unicode[0] == '\ufeff': 93 | # Remove any Byte Order Mark 94 | css_unicode = css_unicode[1:] 95 | return css_unicode 96 | 97 | 98 | def hex2re(hex_data): 99 | return re.escape(unhexlify(hex_data.replace(' ', '').encode('ascii'))) 100 | 101 | 102 | class Slicer(object): 103 | """Slice()[start:stop:end] == slice(start, stop, end)""" 104 | def __getitem__(self, slice_): 105 | return operator.itemgetter(slice_) 106 | 107 | 108 | Slice = Slicer() 109 | 110 | 111 | # List of (bom_size, encoding, pattern) 112 | # bom_size is in bytes and can be zero 113 | # encoding is a string or (slice_, endianness) for "as specified" 114 | # slice_ is a slice object.How to extract the specified 115 | 116 | ENCODING_MAGIC_NUMBERS = [ 117 | ((Slice[:], ''), re.compile( 118 | hex2re('EF BB BF 40 63 68 61 72 73 65 74 20 22') + 119 | b'([^\x22]*?)' + 120 | hex2re('22 3B')).match), 121 | 122 | ('UTF-8', re.compile( 123 | hex2re('EF BB BF')).match), 124 | 125 | ((Slice[:], ''), re.compile( 126 | hex2re('40 63 68 61 72 73 65 74 20 22') + 127 | b'([^\x22]*?)' + 128 | hex2re('22 3B')).match), 129 | 130 | ((Slice[1::2], '-BE'), re.compile( 131 | hex2re('FE FF 00 40 00 63 00 68 00 61 00 72 00 73 00 65 00' 132 | '74 00 20 00 22') + 133 | b'((\x00[^\x22])*?)' + 134 | hex2re('00 22 00 3B')).match), 135 | 136 | ((Slice[1::2], '-BE'), re.compile( 137 | hex2re('00 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00' 138 | '20 00 22') + 139 | b'((\x00[^\x22])*?)' + 140 | hex2re('00 22 00 3B')).match), 141 | 142 | ((Slice[::2], '-LE'), re.compile( 143 | hex2re('FF FE 40 00 63 00 68 00 61 00 72 00 73 00 65 00 74' 144 | '00 20 00 22 00') + 145 | b'(([^\x22]\x00)*?)' + 146 | hex2re('22 00 3B 00')).match), 147 | 148 | ((Slice[::2], '-LE'), re.compile( 149 | hex2re('40 00 63 00 68 00 61 00 72 00 73 00 65 00 74 00 20' 150 | '00 22 00') + 151 | b'(([^\x22]\x00)*?)' + 152 | hex2re('22 00 3B 00')).match), 153 | 154 | ((Slice[3::4], '-BE'), re.compile( 155 | hex2re('00 00 FE FF 00 00 00 40 00 00 00 63 00 00 00 68 00' 156 | '00 00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00' 157 | '00 74 00 00 00 20 00 00 00 22') + 158 | b'((\x00\x00\x00[^\x22])*?)' + 159 | hex2re('00 00 00 22 00 00 00 3B')).match), 160 | 161 | ((Slice[3::4], '-BE'), re.compile( 162 | hex2re('00 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00' 163 | '00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00' 164 | '00 20 00 00 00 22') + 165 | b'((\x00\x00\x00[^\x22])*?)' + 166 | hex2re('00 00 00 22 00 00 00 3B')).match), 167 | 168 | 169 | # Python does not support 2143 or 3412 endianness, AFAIK. 170 | # I guess we could fix it up ourselves but meh. Patches welcome. 171 | 172 | # ((Slice[2::4], '-2143'), re.compile( 173 | # hex2re('00 00 FF FE 00 00 40 00 00 00 63 00 00 00 68 00 00' 174 | # '00 61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00' 175 | # '74 00 00 00 20 00 00 00 22 00') + 176 | # b'((\x00\x00[^\x22]\x00)*?)' + 177 | # hex2re('00 00 22 00 00 00 3B 00')).match), 178 | 179 | # ((Slice[2::4], '-2143'), re.compile( 180 | # hex2re('00 00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00' 181 | # '00 72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00' 182 | # '20 00 00 00 22 00') + 183 | # b'((\x00\x00[^\x22]\x00)*?)' + 184 | # hex2re('00 00 22 00 00 00 3B 00')).match), 185 | 186 | # ((Slice[1::4], '-3412'), re.compile( 187 | # hex2re('FE FF 00 00 00 40 00 00 00 63 00 00 00 68 00 00 00' 188 | # '61 00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74' 189 | # '00 00 00 20 00 00 00 22 00 00') + 190 | # b'((\x00[^\x22]\x00\x00)*?)' + 191 | # hex2re('00 22 00 00 00 3B 00 00')).match), 192 | 193 | # ((Slice[1::4], '-3412'), re.compile( 194 | # hex2re('00 40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00' 195 | # '72 00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20' 196 | # '00 00 00 22 00 00') + 197 | # b'((\x00[^\x22]\x00\x00)*?)' + 198 | # hex2re('00 22 00 00 00 3B 00 00')).match), 199 | 200 | ((Slice[::4], '-LE'), re.compile( 201 | hex2re('FF FE 00 00 40 00 00 00 63 00 00 00 68 00 00 00 61' 202 | '00 00 00 72 00 00 00 73 00 00 00 65 00 00 00 74 00' 203 | '00 00 20 00 00 00 22 00 00 00') + 204 | b'(([^\x22]\x00\x00\x00)*?)' + 205 | hex2re('22 00 00 00 3B 00 00 00')).match), 206 | 207 | ((Slice[::4], '-LE'), re.compile( 208 | hex2re('40 00 00 00 63 00 00 00 68 00 00 00 61 00 00 00 72' 209 | '00 00 00 73 00 00 00 65 00 00 00 74 00 00 00 20 00' 210 | '00 00 22 00 00 00') + 211 | b'(([^\x22]\x00\x00\x00)*?)' + 212 | hex2re('22 00 00 00 3B 00 00 00')).match), 213 | 214 | ('UTF-32-BE', re.compile( 215 | hex2re('00 00 FE FF')).match), 216 | 217 | ('UTF-32-LE', re.compile( 218 | hex2re('FF FE 00 00')).match), 219 | 220 | # ('UTF-32-2143', re.compile( 221 | # hex2re('00 00 FF FE')).match), 222 | 223 | # ('UTF-32-3412', re.compile( 224 | # hex2re('FE FF 00 00')).match), 225 | 226 | ('UTF-16-BE', re.compile( 227 | hex2re('FE FF')).match), 228 | 229 | ('UTF-16-LE', re.compile( 230 | hex2re('FF FE')).match), 231 | 232 | 233 | # Some of there are supported by Python, but I didn’t bother. 234 | # You know the story with patches ... 235 | 236 | # # as specified, transcoded from EBCDIC to ASCII 237 | # ('as_specified-EBCDIC', re.compile( 238 | # hex2re('7C 83 88 81 99 A2 85 A3 40 7F') 239 | # + b'([^\x7F]*?)' 240 | # + hex2re('7F 5E')).match), 241 | 242 | # # as specified, transcoded from IBM1026 to ASCII 243 | # ('as_specified-IBM1026', re.compile( 244 | # hex2re('AE 83 88 81 99 A2 85 A3 40 FC') 245 | # + b'([^\xFC]*?)' 246 | # + hex2re('FC 5E')).match), 247 | 248 | # # as specified, transcoded from GSM 03.38 to ASCII 249 | # ('as_specified-GSM_03.38', re.compile( 250 | # hex2re('00 63 68 61 72 73 65 74 20 22') 251 | # + b'([^\x22]*?)' 252 | # + hex2re('22 3B')).match), 253 | ] 254 | -------------------------------------------------------------------------------- /tinycss/fonts3.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | tinycss.colors3 4 | --------------- 5 | 6 | Parser for CSS 3 Fonts syntax: 7 | https://www.w3.org/TR/css-fonts-3/ 8 | 9 | Adds support for font-face and font-feature-values rules. 10 | 11 | :copyright: (c) 2016 by Kozea. 12 | :license: BSD, see LICENSE for more details. 13 | """ 14 | 15 | from __future__ import division, unicode_literals 16 | 17 | from .css21 import CSS21Parser, ParseError 18 | 19 | 20 | class FontFaceRule(object): 21 | """A parsed at-rule for font faces. 22 | 23 | .. attribute:: at_keyword 24 | 25 | Always ``'@font-face'``. 26 | 27 | .. attribute:: declarations 28 | 29 | A list of :class:`~.css21.Declaration` objects. 30 | 31 | .. attribute:: line 32 | 33 | Source line where this was read. 34 | 35 | .. attribute:: column 36 | 37 | Source column where this was read. 38 | 39 | """ 40 | 41 | def __init__(self, at_keyword, declarations, line, column): 42 | assert at_keyword == '@font-face' 43 | self.at_keyword = at_keyword 44 | self.declarations = declarations 45 | self.line = line 46 | self.column = column 47 | 48 | 49 | class FontFeatureValuesRule(object): 50 | """A parsed at-rule for font feature values. 51 | 52 | .. attribute:: at_keyword 53 | 54 | Always ``'@font-feature-values'``. 55 | 56 | .. attribute:: line 57 | 58 | Source line where this was read. 59 | 60 | .. attribute:: column 61 | 62 | Source column where this was read. 63 | 64 | .. attribute:: at_rules 65 | 66 | The list of parsed at-rules inside the @font-feature-values block, in 67 | source order. 68 | 69 | .. attribute:: family_names 70 | 71 | A list of strings representing font families. 72 | 73 | """ 74 | 75 | def __init__(self, at_keyword, at_rules, family_names, line, column): 76 | assert at_keyword == '@font-feature-values' 77 | self.at_keyword = at_keyword 78 | self.family_names = family_names 79 | self.at_rules = at_rules 80 | self.line = line 81 | self.column = column 82 | 83 | 84 | class FontFeatureRule(object): 85 | """A parsed at-rule for font features. 86 | 87 | .. attribute:: at_keyword 88 | 89 | One of the 16 following strings: 90 | 91 | * ``@stylistic`` 92 | * ``@styleset`` 93 | * ``@character-variant`` 94 | * ``@swash`` 95 | * ``@ornaments`` 96 | * ``@annotation`` 97 | 98 | .. attribute:: declarations 99 | 100 | A list of :class:`~.css21.Declaration` objects. 101 | 102 | .. attribute:: line 103 | 104 | Source line where this was read. 105 | 106 | .. attribute:: column 107 | 108 | Source column where this was read. 109 | 110 | """ 111 | 112 | def __init__(self, at_keyword, declarations, line, column): 113 | self.at_keyword = at_keyword 114 | self.declarations = declarations 115 | self.line = line 116 | self.column = column 117 | 118 | 119 | class CSSFonts3Parser(CSS21Parser): 120 | """Extend :class:`~.css21.CSS21Parser` for `CSS 3 Fonts`_ syntax. 121 | 122 | .. _CSS 3 Fonts: https://www.w3.org/TR/css-fonts-3/ 123 | 124 | """ 125 | 126 | FONT_FEATURE_VALUES_AT_KEYWORDS = [ 127 | '@stylistic', 128 | '@styleset', 129 | '@character-variant', 130 | '@swash', 131 | '@ornaments', 132 | '@annotation', 133 | ] 134 | 135 | def parse_at_rule(self, rule, previous_rules, errors, context): 136 | if rule.at_keyword == '@font-face': 137 | if rule.head: 138 | raise ParseError( 139 | rule.head[0], 140 | 'unexpected {0} token in {1} rule header'.format( 141 | rule.head[0].type, rule.at_keyword)) 142 | declarations, body_errors = self.parse_declaration_list(rule.body) 143 | errors.extend(body_errors) 144 | return FontFaceRule( 145 | rule.at_keyword, declarations, rule.line, rule.column) 146 | elif rule.at_keyword == '@font-feature-values': 147 | family_names = tuple( 148 | self.parse_font_feature_values_family_names(rule.head)) 149 | at_rules, body_errors = ( 150 | self.parse_rules(rule.body or [], '@font-feature-values')) 151 | errors.extend(body_errors) 152 | return FontFeatureValuesRule( 153 | rule.at_keyword, at_rules, family_names, 154 | rule.line, rule.column) 155 | elif rule.at_keyword in self.FONT_FEATURE_VALUES_AT_KEYWORDS: 156 | if context != '@font-feature-values': 157 | raise ParseError( 158 | rule, '{0} rule not allowed in {1}'.format( 159 | rule.at_keyword, context)) 160 | declarations, body_errors = self.parse_declaration_list(rule.body) 161 | errors.extend(body_errors) 162 | return FontFeatureRule( 163 | rule.at_keyword, declarations, rule.line, rule.column) 164 | return super(CSSFonts3Parser, self).parse_at_rule( 165 | rule, previous_rules, errors, context) 166 | 167 | def parse_font_feature_values_family_names(self, tokens): 168 | """Parse an @font-feature-values selector. 169 | 170 | :param tokens: 171 | An iterable of token, typically from the ``head`` attribute of 172 | an unparsed :class:`AtRule`. 173 | :returns: 174 | A generator of strings representing font families. 175 | :raises: 176 | :class:`~.parsing.ParseError` on invalid selectors 177 | 178 | """ 179 | family = '' 180 | current_string = False 181 | for token in tokens: 182 | if token.type == 'DELIM' and token.value == ',' and family: 183 | yield family 184 | family = '' 185 | current_string = False 186 | elif token.type == 'STRING' and not family and ( 187 | current_string is False): 188 | family = token.value 189 | current_string = True 190 | elif token.type == 'IDENT' and not current_string: 191 | if family: 192 | family += ' ' 193 | family += token.value 194 | elif token.type != 'S': 195 | family = '' 196 | break 197 | if family: 198 | yield family 199 | else: 200 | raise ParseError(token, 'invalid @font-feature-values selector') 201 | -------------------------------------------------------------------------------- /tinycss/page3.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | tinycss.page3 4 | ------------------ 5 | 6 | Support for CSS 3 Paged Media syntax: 7 | http://dev.w3.org/csswg/css3-page/ 8 | 9 | Adds support for named page selectors and margin rules. 10 | 11 | :copyright: (c) 2012 by Simon Sapin. 12 | :license: BSD, see LICENSE for more details. 13 | """ 14 | 15 | from __future__ import division, unicode_literals 16 | 17 | from .css21 import CSS21Parser, ParseError 18 | 19 | 20 | class MarginRule(object): 21 | """A parsed at-rule for margin box. 22 | 23 | .. attribute:: at_keyword 24 | 25 | One of the 16 following strings: 26 | 27 | * ``@top-left-corner`` 28 | * ``@top-left`` 29 | * ``@top-center`` 30 | * ``@top-right`` 31 | * ``@top-right-corner`` 32 | * ``@bottom-left-corner`` 33 | * ``@bottom-left`` 34 | * ``@bottom-center`` 35 | * ``@bottom-right`` 36 | * ``@bottom-right-corner`` 37 | * ``@left-top`` 38 | * ``@left-middle`` 39 | * ``@left-bottom`` 40 | * ``@right-top`` 41 | * ``@right-middle`` 42 | * ``@right-bottom`` 43 | 44 | .. attribute:: declarations 45 | 46 | A list of :class:`~.css21.Declaration` objects. 47 | 48 | .. attribute:: line 49 | 50 | Source line where this was read. 51 | 52 | .. attribute:: column 53 | 54 | Source column where this was read. 55 | 56 | """ 57 | 58 | def __init__(self, at_keyword, declarations, line, column): 59 | self.at_keyword = at_keyword 60 | self.declarations = declarations 61 | self.line = line 62 | self.column = column 63 | 64 | 65 | class CSSPage3Parser(CSS21Parser): 66 | """Extend :class:`~.css21.CSS21Parser` for `CSS 3 Paged Media`_ syntax. 67 | 68 | .. _CSS 3 Paged Media: http://dev.w3.org/csswg/css3-page/ 69 | 70 | Compared to CSS 2.1, the ``at_rules`` and ``selector`` attributes of 71 | :class:`~.css21.PageRule` objects are modified: 72 | 73 | * ``at_rules`` is not always empty, it is a list of :class:`MarginRule` 74 | objects. 75 | 76 | * ``selector``, instead of a single string, is a tuple of the page name 77 | and the pseudo class. Each of these may be a ``None`` or a string. 78 | 79 | +--------------------------+------------------------+ 80 | | CSS | Parsed selectors | 81 | +==========================+========================+ 82 | | .. code-block:: css | .. code-block:: python | 83 | | | | 84 | | @page {} | (None, None) | 85 | | @page :first {} | (None, 'first') | 86 | | @page chapter {} | ('chapter', None) | 87 | | @page table:right {} | ('table', 'right') | 88 | +--------------------------+------------------------+ 89 | 90 | """ 91 | 92 | PAGE_MARGIN_AT_KEYWORDS = [ 93 | '@top-left-corner', 94 | '@top-left', 95 | '@top-center', 96 | '@top-right', 97 | '@top-right-corner', 98 | '@bottom-left-corner', 99 | '@bottom-left', 100 | '@bottom-center', 101 | '@bottom-right', 102 | '@bottom-right-corner', 103 | '@left-top', 104 | '@left-middle', 105 | '@left-bottom', 106 | '@right-top', 107 | '@right-middle', 108 | '@right-bottom', 109 | ] 110 | 111 | def parse_at_rule(self, rule, previous_rules, errors, context): 112 | if rule.at_keyword in self.PAGE_MARGIN_AT_KEYWORDS: 113 | if context != '@page': 114 | raise ParseError( 115 | rule, '{0} rule not allowed in {1}'.format( 116 | rule.at_keyword, context)) 117 | if rule.head: 118 | raise ParseError( 119 | rule.head[0], 120 | 'unexpected {0} token in {1} rule header'.format( 121 | rule.head[0].type, rule.at_keyword)) 122 | declarations, body_errors = self.parse_declaration_list(rule.body) 123 | errors.extend(body_errors) 124 | return MarginRule( 125 | rule.at_keyword, declarations, rule.line, rule.column) 126 | return super(CSSPage3Parser, self).parse_at_rule( 127 | rule, previous_rules, errors, context) 128 | 129 | def parse_page_selector(self, head): 130 | """Parse an @page selector. 131 | 132 | :param head: 133 | The ``head`` attribute of an unparsed :class:`AtRule`. 134 | :returns: 135 | A page selector. For CSS 2.1, this is 'first', 'left', 'right' 136 | or None. 'blank' is added by GCPM. 137 | :raises: 138 | :class`~parsing.ParseError` on invalid selectors 139 | 140 | """ 141 | if not head: 142 | return (None, None), (0, 0, 0) 143 | if head[0].type == 'IDENT': 144 | name = head.pop(0).value 145 | while head and head[0].type == 'S': 146 | head.pop(0) 147 | if not head: 148 | return (name, None), (1, 0, 0) 149 | name_specificity = (1,) 150 | else: 151 | name = None 152 | name_specificity = (0,) 153 | if (len(head) == 2 and head[0].type == ':' and 154 | head[1].type == 'IDENT'): 155 | pseudo_class = head[1].value 156 | specificity = { 157 | 'first': (1, 0), 'blank': (1, 0), 158 | 'left': (0, 1), 'right': (0, 1), 159 | }.get(pseudo_class) 160 | if specificity: 161 | return (name, pseudo_class), (name_specificity + specificity) 162 | raise ParseError(head[0], 'invalid @page selector') 163 | -------------------------------------------------------------------------------- /tinycss/parsing.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | tinycss.parsing 4 | --------------- 5 | 6 | Utilities for parsing lists of tokens. 7 | 8 | :copyright: (c) 2012 by Simon Sapin. 9 | :license: BSD, see LICENSE for more details. 10 | """ 11 | 12 | from __future__ import unicode_literals 13 | 14 | 15 | # TODO: unit tests 16 | 17 | def split_on_comma(tokens): 18 | """Split a list of tokens on commas, ie ``,`` DELIM tokens. 19 | 20 | Only "top-level" comma tokens are splitting points, not commas inside a 21 | function or other :class:`ContainerToken`. 22 | 23 | :param tokens: 24 | An iterable of :class:`~.token_data.Token` or 25 | :class:`~.token_data.ContainerToken`. 26 | :returns: 27 | A list of lists of tokens 28 | 29 | """ 30 | parts = [] 31 | this_part = [] 32 | for token in tokens: 33 | if token.type == 'DELIM' and token.value == ',': 34 | parts.append(this_part) 35 | this_part = [] 36 | else: 37 | this_part.append(token) 38 | parts.append(this_part) 39 | return parts 40 | 41 | 42 | def strip_whitespace(tokens): 43 | """Remove whitespace at the beggining and end of a token list. 44 | 45 | Whitespace tokens in-between other tokens in the list are preserved. 46 | 47 | :param tokens: 48 | A list of :class:`~.token_data.Token` or 49 | :class:`~.token_data.ContainerToken`. 50 | :return: 51 | A new sub-sequence of the list. 52 | 53 | """ 54 | for i, token in enumerate(tokens): 55 | if token.type != 'S': 56 | break 57 | else: 58 | return [] # only whitespace 59 | tokens = tokens[i:] 60 | while tokens and tokens[-1].type == 'S': 61 | tokens.pop() 62 | return tokens 63 | 64 | 65 | def remove_whitespace(tokens): 66 | """Remove any top-level whitespace in a token list. 67 | 68 | Whitespace tokens inside recursive :class:`~.token_data.ContainerToken` 69 | are preserved. 70 | 71 | :param tokens: 72 | A list of :class:`~.token_data.Token` or 73 | :class:`~.token_data.ContainerToken`. 74 | :return: 75 | A new sub-sequence of the list. 76 | 77 | """ 78 | return [token for token in tokens if token.type != 'S'] 79 | 80 | 81 | def validate_value(tokens): 82 | """Validate a property value. 83 | 84 | :param tokens: 85 | an iterable of tokens 86 | :raises: 87 | :class:`ParseError` if there is any invalid token for the 'value' 88 | production of the core grammar. 89 | 90 | """ 91 | for token in tokens: 92 | type_ = token.type 93 | if type_ == '{': 94 | validate_block(token.content, 'property value') 95 | else: 96 | validate_any(token, 'property value') 97 | 98 | 99 | def validate_block(tokens, context): 100 | """ 101 | :raises: 102 | :class:`ParseError` if there is any invalid token for the 'block' 103 | production of the core grammar. 104 | :param tokens: an iterable of tokens 105 | :param context: a string for the 'unexpected in ...' message 106 | 107 | """ 108 | for token in tokens: 109 | type_ = token.type 110 | if type_ == '{': 111 | validate_block(token.content, context) 112 | elif type_ not in (';', 'ATKEYWORD'): 113 | validate_any(token, context) 114 | 115 | 116 | def validate_any(token, context): 117 | """ 118 | :raises: 119 | :class:`ParseError` if this is an invalid token for the 120 | 'any' production of the core grammar. 121 | :param token: a single token 122 | :param context: a string for the 'unexpected in ...' message 123 | 124 | """ 125 | type_ = token.type 126 | if type_ in ('FUNCTION', '(', '['): 127 | for token in token.content: 128 | validate_any(token, type_) 129 | elif type_ not in ('S', 'IDENT', 'DIMENSION', 'PERCENTAGE', 'NUMBER', 130 | 'INTEGER', 'URI', 'DELIM', 'STRING', 'HASH', ':', 131 | 'UNICODE-RANGE'): 132 | if type_ in ('}', ')', ']'): 133 | adjective = 'unmatched' 134 | else: 135 | adjective = 'unexpected' 136 | raise ParseError( 137 | token, '{0} {1} token in {2}'.format(adjective, type_, context)) 138 | 139 | 140 | class ParseError(ValueError): 141 | """Details about a CSS syntax error. Usually indicates that something 142 | (a rule or a declaration) was ignored and will not appear as a parsed 143 | object. 144 | 145 | This exception is typically logged in a list rather than being propagated 146 | to the user API. 147 | 148 | .. attribute:: line 149 | 150 | Source line where the error occured. 151 | 152 | .. attribute:: column 153 | 154 | Column in the source line where the error occured. 155 | 156 | .. attribute:: reason 157 | 158 | What happend (a string). 159 | 160 | """ 161 | def __init__(self, subject, reason): 162 | self.line = subject.line 163 | self.column = subject.column 164 | self.reason = reason 165 | super(ParseError, self).__init__( 166 | 'Parse error at {0.line}:{0.column}, {0.reason}'.format(self)) 167 | -------------------------------------------------------------------------------- /tinycss/speedups.pyx: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | tinycss.speedups 4 | ---------------- 5 | 6 | Cython module for speeding up inner loops. 7 | 8 | Right now only :func:`tokenize_flat` has a second implementation. 9 | 10 | :copyright: (c) 2010 by Simon Sapin. 11 | :license: BSD, see LICENSE for more details. 12 | """ 13 | 14 | from __future__ import unicode_literals 15 | 16 | from .token_data import ( 17 | COMPILED_TOKEN_REGEXPS, UNICODE_UNESCAPE, NEWLINE_UNESCAPE, 18 | SIMPLE_UNESCAPE, FIND_NEWLINES, TOKEN_DISPATCH) 19 | 20 | 21 | COMPILED_TOKEN_INDEXES = dict( 22 | (name, i) for i, (name, regexp) in enumerate(COMPILED_TOKEN_REGEXPS)) 23 | 24 | 25 | cdef class CToken: 26 | """A token built by the Cython speedups. Identical to 27 | :class:`~.token_data.Token`. 28 | 29 | """ 30 | is_container = False 31 | 32 | cdef public object type, _as_css, value, unit 33 | cdef public Py_ssize_t line, column 34 | 35 | def __init__(self, type_, css_value, value, unit, line, column): 36 | self.type = type_ 37 | self._as_css = css_value 38 | self.value = value 39 | self.unit = unit 40 | self.line = line 41 | self.column = column 42 | 43 | def as_css(self): 44 | """ 45 | Return as an Unicode string the CSS representation of the token, 46 | as parsed in the source. 47 | """ 48 | return self._as_css 49 | 50 | def __repr__(self): 51 | return ('' 52 | .format(self, self.unit or '')) 53 | 54 | 55 | def tokenize_flat(css_source, int ignore_comments=1): 56 | """ 57 | :param css_source: 58 | CSS as an unicode string 59 | :param ignore_comments: 60 | if true (the default) comments will not be included in the 61 | return value 62 | :return: 63 | An iterator of :class:`Token` 64 | 65 | """ 66 | # Make these local variable to avoid global lookups in the loop 67 | tokens_dispatch = TOKEN_DISPATCH 68 | compiled_token_indexes = COMPILED_TOKEN_INDEXES 69 | compiled_tokens = COMPILED_TOKEN_REGEXPS 70 | unicode_unescape = UNICODE_UNESCAPE 71 | newline_unescape = NEWLINE_UNESCAPE 72 | simple_unescape = SIMPLE_UNESCAPE 73 | find_newlines = FIND_NEWLINES 74 | 75 | # Use the integer indexes instead of string markers 76 | cdef Py_ssize_t BAD_COMMENT = compiled_token_indexes['BAD_COMMENT'] 77 | cdef Py_ssize_t BAD_STRING = compiled_token_indexes['BAD_STRING'] 78 | cdef Py_ssize_t PERCENTAGE = compiled_token_indexes['PERCENTAGE'] 79 | cdef Py_ssize_t DIMENSION = compiled_token_indexes['DIMENSION'] 80 | cdef Py_ssize_t ATKEYWORD = compiled_token_indexes['ATKEYWORD'] 81 | cdef Py_ssize_t FUNCTION = compiled_token_indexes['FUNCTION'] 82 | cdef Py_ssize_t COMMENT = compiled_token_indexes['COMMENT'] 83 | cdef Py_ssize_t NUMBER = compiled_token_indexes['NUMBER'] 84 | cdef Py_ssize_t STRING = compiled_token_indexes['STRING'] 85 | cdef Py_ssize_t IDENT = compiled_token_indexes['IDENT'] 86 | cdef Py_ssize_t HASH = compiled_token_indexes['HASH'] 87 | cdef Py_ssize_t URI = compiled_token_indexes['URI'] 88 | cdef Py_ssize_t DELIM = -1 89 | 90 | cdef Py_ssize_t pos = 0 91 | cdef Py_ssize_t line = 1 92 | cdef Py_ssize_t column = 1 93 | cdef Py_ssize_t source_len = len(css_source) 94 | cdef Py_ssize_t n_tokens = len(compiled_tokens) 95 | cdef Py_ssize_t length, next_pos, type_ 96 | cdef CToken token 97 | 98 | tokens = [] 99 | while pos < source_len: 100 | char = css_source[pos] 101 | if char in ':;{}()[]': 102 | type_ = -1 # not parsed further anyway 103 | type_name = char 104 | css_value = char 105 | else: 106 | codepoint = min(ord(char), 160) 107 | for type_, type_name, regexp in tokens_dispatch[codepoint]: 108 | match = regexp(css_source, pos) 109 | if match: 110 | # First match is the longest. See comments on TOKENS above. 111 | css_value = match.group() 112 | break 113 | else: 114 | # No match. 115 | # "Any other character not matched by the above rules, 116 | # and neither a single nor a double quote." 117 | # ... but quotes at the start of a token are always matched 118 | # by STRING or BAD_STRING. So DELIM is any single character. 119 | type_ = DELIM 120 | type_name = 'DELIM' 121 | css_value = char 122 | length = len(css_value) 123 | next_pos = pos + length 124 | 125 | # A BAD_COMMENT is a comment at EOF. Ignore it too. 126 | if not (ignore_comments and type_ in (COMMENT, BAD_COMMENT)): 127 | # Parse numbers, extract strings and URIs, unescape 128 | unit = None 129 | if type_ == DIMENSION: 130 | value = match.group(1) 131 | value = float(value) if '.' in value else int(value) 132 | unit = match.group(2) 133 | unit = simple_unescape(unit) 134 | unit = unicode_unescape(unit) 135 | unit = unit.lower() # normalize 136 | elif type_ == PERCENTAGE: 137 | value = css_value[:-1] 138 | value = float(value) if '.' in value else int(value) 139 | unit = '%' 140 | elif type_ == NUMBER: 141 | value = css_value 142 | if '.' in value: 143 | value = float(value) 144 | else: 145 | value = int(value) 146 | type_name = 'INTEGER' 147 | elif type_ in (IDENT, ATKEYWORD, HASH, FUNCTION): 148 | value = simple_unescape(css_value) 149 | value = unicode_unescape(value) 150 | elif type_ == URI: 151 | value = match.group(1) 152 | if value and value[0] in '"\'': 153 | value = value[1:-1] # Remove quotes 154 | value = newline_unescape(value) 155 | value = simple_unescape(value) 156 | value = unicode_unescape(value) 157 | elif type_ == STRING: 158 | value = css_value[1:-1] # Remove quotes 159 | value = newline_unescape(value) 160 | value = simple_unescape(value) 161 | value = unicode_unescape(value) 162 | # BAD_STRING can only be one of: 163 | # * Unclosed string at the end of the stylesheet: 164 | # Close the string, but this is not an error. 165 | # Make it a "good" STRING token. 166 | # * Unclosed string at the (unescaped) end of the line: 167 | # Close the string, but this is an error. 168 | # Leave it as a BAD_STRING, don’t bother parsing it. 169 | # See http://www.w3.org/TR/CSS21/syndata.html#parsing-errors 170 | elif type_ == BAD_STRING and next_pos == source_len: 171 | type_name = 'STRING' 172 | value = css_value[1:] # Remove quote 173 | value = newline_unescape(value) 174 | value = simple_unescape(value) 175 | value = unicode_unescape(value) 176 | else: 177 | value = css_value 178 | token = CToken(type_name, css_value, value, unit, line, column) 179 | tokens.append(token) 180 | 181 | pos = next_pos 182 | newlines = list(find_newlines(css_value)) 183 | if newlines: 184 | line += len(newlines) 185 | # Add 1 to have lines start at column 1, not 0 186 | column = length - newlines[-1].end() + 1 187 | else: 188 | column += length 189 | return tokens 190 | -------------------------------------------------------------------------------- /tinycss/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Test suite for tinycss 4 | ---------------------- 5 | 6 | :copyright: (c) 2012 by Simon Sapin. 7 | :license: BSD, see LICENSE for more details. 8 | """ 9 | 10 | 11 | from __future__ import unicode_literals 12 | 13 | import sys 14 | 15 | 16 | # Awful workaround to fix isort's "sys.setdefaultencoding('utf-8')". 17 | if sys.version_info[0] == 2: 18 | reload(sys) # noqa 19 | sys.setdefaultencoding('ascii') 20 | 21 | 22 | def assert_errors(errors, expected_errors): 23 | """Test not complete error messages but only substrings.""" 24 | assert len(errors) == len(expected_errors) 25 | for error, expected in zip(errors, expected_errors): 26 | assert expected in str(error) 27 | -------------------------------------------------------------------------------- /tinycss/tests/speed.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Speed tests 4 | ----------- 5 | 6 | Note: this file is not named test_*.py as it is not part of the 7 | test suite ran by pytest. 8 | 9 | :copyright: (c) 2012 by Simon Sapin. 10 | :license: BSD, see LICENSE for more details. 11 | """ 12 | 13 | 14 | from __future__ import division, unicode_literals 15 | 16 | import contextlib 17 | import functools 18 | import os.path 19 | import sys 20 | import timeit 21 | 22 | from cssutils import parseString 23 | 24 | from .. import tokenizer 25 | from ..css21 import CSS21Parser 26 | from ..parsing import remove_whitespace 27 | 28 | CSS_REPEAT = 4 29 | TIMEIT_REPEAT = 3 30 | TIMEIT_NUMBER = 20 31 | 32 | 33 | def load_css(): 34 | filename = os.path.join(os.path.dirname(__file__), 35 | '..', '..', 'docs', '_static', 'custom.css') 36 | with open(filename, 'rb') as fd: 37 | return b'\n'.join([fd.read()] * CSS_REPEAT) 38 | 39 | 40 | # Pre-load so that I/O is not measured 41 | CSS = load_css() 42 | 43 | 44 | @contextlib.contextmanager 45 | def install_tokenizer(name): 46 | original = tokenizer.tokenize_flat 47 | try: 48 | tokenizer.tokenize_flat = getattr(tokenizer, name) 49 | yield 50 | finally: 51 | tokenizer.tokenize_flat = original 52 | 53 | 54 | def parse(tokenizer_name): 55 | with install_tokenizer(tokenizer_name): 56 | stylesheet = CSS21Parser().parse_stylesheet_bytes(CSS) 57 | result = [] 58 | for rule in stylesheet.rules: 59 | selector = rule.selector.as_css() 60 | declarations = [ 61 | (declaration.name, len(list(remove_whitespace(declaration.value)))) 62 | for declaration in rule.declarations] 63 | result.append((selector, declarations)) 64 | return result 65 | 66 | 67 | parse_cython = functools.partial(parse, 'cython_tokenize_flat') 68 | parse_python = functools.partial(parse, 'python_tokenize_flat') 69 | 70 | 71 | def parse_cssutils(): 72 | stylesheet = parseString(CSS) 73 | result = [] 74 | for rule in stylesheet.cssRules: 75 | selector = rule.selectorText 76 | declarations = [ 77 | (declaration.name, len(list(declaration.propertyValue))) 78 | for declaration in rule.style.getProperties(all=True)] 79 | result.append((selector, declarations)) 80 | return result 81 | 82 | 83 | def check_consistency(): 84 | result = parse_python() 85 | assert len(result) > 0 86 | if tokenizer.cython_tokenize_flat: 87 | assert parse_cython() == result 88 | assert parse_cssutils() == result 89 | version = '.'.join(map(str, sys.version_info[:3])) 90 | print('Python {}, consistency OK.'.format(version)) 91 | 92 | 93 | def warm_up(): 94 | is_pypy = hasattr(sys, 'pypy_translation_info') 95 | if is_pypy: 96 | print('Warming up for PyPy...') 97 | for i in range(80): 98 | for i in range(10): 99 | parse_python() 100 | parse_cssutils() 101 | sys.stdout.write('.') 102 | sys.stdout.flush() 103 | sys.stdout.write('\n') 104 | 105 | 106 | def time(function): 107 | seconds = timeit.Timer(function).repeat(TIMEIT_REPEAT, TIMEIT_NUMBER) 108 | miliseconds = int(min(seconds) * 1000) 109 | return miliseconds 110 | 111 | 112 | def run(): 113 | if tokenizer.cython_tokenize_flat: 114 | data_set = [ 115 | ('tinycss + speedups ', parse_cython), 116 | ] 117 | else: 118 | print('Speedups are NOT available.') 119 | data_set = [] 120 | data_set += [ 121 | ('tinycss WITHOUT speedups', parse_python), 122 | ('cssutils ', parse_cssutils), 123 | ] 124 | label, function = data_set.pop(0) 125 | ref = time(function) 126 | print('{} {} ms'.format(label, ref)) 127 | for label, function in data_set: 128 | result = time(function) 129 | print('{} {} ms {:.2f}x'.format(label, result, result / ref)) 130 | 131 | 132 | if __name__ == '__main__': 133 | check_consistency() 134 | warm_up() 135 | run() 136 | -------------------------------------------------------------------------------- /tinycss/tests/test_api.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Tests for the public API 4 | ------------------------ 5 | 6 | :copyright: (c) 2012 by Simon Sapin. 7 | :license: BSD, see LICENSE for more details. 8 | """ 9 | 10 | 11 | from __future__ import unicode_literals 12 | 13 | from pytest import raises 14 | from tinycss import make_parser 15 | from tinycss.page3 import CSSPage3Parser 16 | 17 | 18 | def test_make_parser(): 19 | class MyParser(object): 20 | def __init__(self, some_config): 21 | self.some_config = some_config 22 | 23 | parsers = [ 24 | make_parser(), 25 | make_parser('page3'), 26 | make_parser(CSSPage3Parser), 27 | make_parser(MyParser, some_config=42), 28 | make_parser(CSSPage3Parser, MyParser, some_config=42), 29 | make_parser(MyParser, 'page3', some_config=42), 30 | ] 31 | 32 | for parser, exp in zip(parsers, [False, True, True, False, True, True]): 33 | assert isinstance(parser, CSSPage3Parser) == exp 34 | 35 | for parser, exp in zip(parsers, [False, False, False, True, True, True]): 36 | assert isinstance(parser, MyParser) == exp 37 | 38 | for parser in parsers[3:]: 39 | assert parser.some_config == 42 40 | 41 | # Extra or missing named parameters 42 | raises(TypeError, make_parser, some_config=4) 43 | raises(TypeError, make_parser, 'page3', some_config=4) 44 | raises(TypeError, make_parser, MyParser) 45 | raises(TypeError, make_parser, MyParser, some_config=4, other_config=7) 46 | -------------------------------------------------------------------------------- /tinycss/tests/test_color3.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Tests for the CSS 3 color parser 4 | -------------------------------- 5 | 6 | :copyright: (c) 2012 by Simon Sapin. 7 | :license: BSD, see LICENSE for more details. 8 | """ 9 | 10 | 11 | from __future__ import unicode_literals 12 | 13 | import pytest 14 | from tinycss.color3 import hsl_to_rgb, parse_color_string 15 | 16 | 17 | @pytest.mark.parametrize(('css_source', 'expected_result'), [ 18 | ('', None), 19 | (' /* hey */\n', None), 20 | ('4', None), 21 | ('top', None), 22 | ('/**/transparent', (0, 0, 0, 0)), 23 | ('transparent', (0, 0, 0, 0)), 24 | (' transparent\n', (0, 0, 0, 0)), 25 | ('TransParent', (0, 0, 0, 0)), 26 | ('currentColor', 'currentColor'), 27 | ('CURRENTcolor', 'currentColor'), 28 | ('current_Color', None), 29 | 30 | ('black', (0, 0, 0, 1)), 31 | ('white', (1, 1, 1, 1)), 32 | ('fuchsia', (1, 0, 1, 1)), 33 | ('cyan', (0, 1, 1, 1)), 34 | ('CyAn', (0, 1, 1, 1)), 35 | ('darkkhaki', (189 / 255., 183 / 255., 107 / 255., 1)), 36 | 37 | ('#', None), 38 | ('#f', None), 39 | ('#ff', None), 40 | ('#fff', (1, 1, 1, 1)), 41 | ('#ffg', None), 42 | ('#ffff', None), 43 | ('#fffff', None), 44 | ('#ffffff', (1, 1, 1, 1)), 45 | ('#fffffg', None), 46 | ('#fffffff', None), 47 | ('#ffffffff', None), 48 | ('#fffffffff', None), 49 | 50 | ('#cba987', (203 / 255., 169 / 255., 135 / 255., 1)), 51 | ('#CbA987', (203 / 255., 169 / 255., 135 / 255., 1)), 52 | ('#1122aA', (17 / 255., 34 / 255., 170 / 255., 1)), 53 | ('#12a', (17 / 255., 34 / 255., 170 / 255., 1)), 54 | 55 | ('rgb(203, 169, 135)', (203 / 255., 169 / 255., 135 / 255., 1)), 56 | ('RGB(255, 255, 255)', (1, 1, 1, 1)), 57 | ('rgB(0, 0, 0)', (0, 0, 0, 1)), 58 | ('rgB(0, 51, 255)', (0, .2, 1, 1)), 59 | ('rgb(0,51,255)', (0, .2, 1, 1)), 60 | ('rgb(0\t, 51 ,255)', (0, .2, 1, 1)), 61 | ('rgb(/* R */0, /* G */51, /* B */255)', (0, .2, 1, 1)), 62 | ('rgb(-51, 306, 0)', (-.2, 1.2, 0, 1)), # out of 0..1 is allowed 63 | 64 | ('rgb(42%, 3%, 50%)', (.42, .03, .5, 1)), 65 | ('RGB(100%, 100%, 100%)', (1, 1, 1, 1)), 66 | ('rgB(0%, 0%, 0%)', (0, 0, 0, 1)), 67 | ('rgB(10%, 20%, 30%)', (.1, .2, .3, 1)), 68 | ('rgb(10%,20%,30%)', (.1, .2, .3, 1)), 69 | ('rgb(10%\t, 20% ,30%)', (.1, .2, .3, 1)), 70 | ('rgb(/* R */10%, /* G */20%, /* B */30%)', (.1, .2, .3, 1)), 71 | ('rgb(-12%, 110%, 1400%)', (-.12, 1.1, 14, 1)), # out of 0..1 is allowed 72 | 73 | ('rgb(10%, 50%, 0)', None), 74 | ('rgb(255, 50%, 0%)', None), 75 | ('rgb(0, 0 0)', None), 76 | ('rgb(0, 0, 0deg)', None), 77 | ('rgb(0, 0, light)', None), 78 | ('rgb()', None), 79 | ('rgb(0)', None), 80 | ('rgb(0, 0)', None), 81 | ('rgb(0, 0, 0, 0)', None), 82 | ('rgb(0%)', None), 83 | ('rgb(0%, 0%)', None), 84 | ('rgb(0%, 0%, 0%, 0%)', None), 85 | ('rgb(0%, 0%, 0%, 0)', None), 86 | 87 | ('rgba(0, 0, 0, 0)', (0, 0, 0, 0)), 88 | ('rgba(203, 169, 135, 0.3)', (203 / 255., 169 / 255., 135 / 255., 0.3)), 89 | ('RGBA(255, 255, 255, 0)', (1, 1, 1, 0)), 90 | ('rgBA(0, 51, 255, 1)', (0, 0.2, 1, 1)), 91 | ('rgba(0, 51, 255, 1.1)', (0, 0.2, 1, 1)), 92 | ('rgba(0, 51, 255, 37)', (0, 0.2, 1, 1)), 93 | ('rgba(0, 51, 255, 0.42)', (0, 0.2, 1, 0.42)), 94 | ('rgba(0, 51, 255, 0)', (0, 0.2, 1, 0)), 95 | ('rgba(0, 51, 255, -0.1)', (0, 0.2, 1, 0)), 96 | ('rgba(0, 51, 255, -139)', (0, 0.2, 1, 0)), 97 | 98 | ('rgba(42%, 3%, 50%, 0.3)', (.42, .03, .5, 0.3)), 99 | ('RGBA(100%, 100%, 100%, 0)', (1, 1, 1, 0)), 100 | ('rgBA(0%, 20%, 100%, 1)', (0, 0.2, 1, 1)), 101 | ('rgba(0%, 20%, 100%, 1.1)', (0, 0.2, 1, 1)), 102 | ('rgba(0%, 20%, 100%, 37)', (0, 0.2, 1, 1)), 103 | ('rgba(0%, 20%, 100%, 0.42)', (0, 0.2, 1, 0.42)), 104 | ('rgba(0%, 20%, 100%, 0)', (0, 0.2, 1, 0)), 105 | ('rgba(0%, 20%, 100%, -0.1)', (0, 0.2, 1, 0)), 106 | ('rgba(0%, 20%, 100%, -139)', (0, 0.2, 1, 0)), 107 | 108 | ('rgba(255, 255, 255, 0%)', None), 109 | ('rgba(10%, 50%, 0, 1)', None), 110 | ('rgba(255, 50%, 0%, 1)', None), 111 | ('rgba(0, 0, 0 0)', None), 112 | ('rgba(0, 0, 0, 0deg)', None), 113 | ('rgba(0, 0, 0, light)', None), 114 | ('rgba()', None), 115 | ('rgba(0)', None), 116 | ('rgba(0, 0, 0)', None), 117 | ('rgba(0, 0, 0, 0, 0)', None), 118 | ('rgba(0%)', None), 119 | ('rgba(0%, 0%)', None), 120 | ('rgba(0%, 0%, 0%)', None), 121 | ('rgba(0%, 0%, 0%, 0%)', None), 122 | ('rgba(0%, 0%, 0%, 0%, 0%)', None), 123 | 124 | ('HSL(0, 0%, 0%)', (0, 0, 0, 1)), 125 | ('hsL(0, 100%, 50%)', (1, 0, 0, 1)), 126 | ('hsl(60, 100%, 37.5%)', (0.75, 0.75, 0, 1)), 127 | ('hsl(780, 100%, 37.5%)', (0.75, 0.75, 0, 1)), 128 | ('hsl(-300, 100%, 37.5%)', (0.75, 0.75, 0, 1)), 129 | ('hsl(300, 50%, 50%)', (0.75, 0.25, 0.75, 1)), 130 | 131 | ('hsl(10, 50%, 0)', None), 132 | ('hsl(50%, 50%, 0%)', None), 133 | ('hsl(0, 0% 0%)', None), 134 | ('hsl(30deg, 100%, 100%)', None), 135 | ('hsl(0, 0%, light)', None), 136 | ('hsl()', None), 137 | ('hsl(0)', None), 138 | ('hsl(0, 0%)', None), 139 | ('hsl(0, 0%, 0%, 0%)', None), 140 | 141 | ('HSLA(-300, 100%, 37.5%, 1)', (0.75, 0.75, 0, 1)), 142 | ('hsLA(-300, 100%, 37.5%, 12)', (0.75, 0.75, 0, 1)), 143 | ('hsla(-300, 100%, 37.5%, 0.2)', (0.75, 0.75, 0, .2)), 144 | ('hsla(-300, 100%, 37.5%, 0)', (0.75, 0.75, 0, 0)), 145 | ('hsla(-300, 100%, 37.5%, -3)', (0.75, 0.75, 0, 0)), 146 | 147 | ('hsla(10, 50%, 0, 1)', None), 148 | ('hsla(50%, 50%, 0%, 1)', None), 149 | ('hsla(0, 0% 0%, 1)', None), 150 | ('hsla(30deg, 100%, 100%, 1)', None), 151 | ('hsla(0, 0%, light, 1)', None), 152 | ('hsla()', None), 153 | ('hsla(0)', None), 154 | ('hsla(0, 0%)', None), 155 | ('hsla(0, 0%, 0%, 50%)', None), 156 | ('hsla(0, 0%, 0%, 1, 0%)', None), 157 | 158 | ('cmyk(0, 0, 0, 0)', None), 159 | ]) 160 | def test_color(css_source, expected_result): 161 | result = parse_color_string(css_source) 162 | if isinstance(result, tuple): 163 | for got, expected in zip(result, expected_result): 164 | # Compensate for floating point errors: 165 | assert abs(got - expected) < 1e-10 166 | for i, attr in enumerate(['red', 'green', 'blue', 'alpha']): 167 | assert getattr(result, attr) == result[i] 168 | else: 169 | assert result == expected_result 170 | 171 | 172 | @pytest.mark.parametrize(('hsl', 'expected_rgb'), [ 173 | # http://en.wikipedia.org/wiki/HSL_and_HSV#Examples 174 | ((0, 0, 100 ), (1, 1, 1 )), # noqa 175 | ((127, 0, 100 ), (1, 1, 1 )), # noqa 176 | ((0, 0, 50 ), (0.5, 0.5, 0.5 )), # noqa 177 | ((127, 0, 50 ), (0.5, 0.5, 0.5 )), # noqa 178 | ((0, 0, 0 ), (0, 0, 0 )), # noqa 179 | ((127, 0, 0 ), (0, 0, 0 )), # noqa 180 | ((0, 100, 50 ), (1, 0, 0 )), # noqa 181 | ((60, 100, 37.5), (0.75, 0.75, 0 )), # noqa 182 | ((780, 100, 37.5), (0.75, 0.75, 0 )), # noqa 183 | ((-300, 100, 37.5), (0.75, 0.75, 0 )), # noqa 184 | ((120, 100, 25 ), (0, 0.5, 0 )), # noqa 185 | ((180, 100, 75 ), (0.5, 1, 1 )), # noqa 186 | ((240, 100, 75 ), (0.5, 0.5, 1 )), # noqa 187 | ((300, 50, 50 ), (0.75, 0.25, 0.75 )), # noqa 188 | ((61.8, 63.8, 39.3), (0.628, 0.643, 0.142)), # noqa 189 | ((251.1, 83.2, 51.1), (0.255, 0.104, 0.918)), # noqa 190 | ((134.9, 70.7, 39.6), (0.116, 0.675, 0.255)), # noqa 191 | ((49.5, 89.3, 49.7), (0.941, 0.785, 0.053)), # noqa 192 | ((283.7, 77.5, 54.2), (0.704, 0.187, 0.897)), # noqa 193 | ((14.3, 81.7, 62.4), (0.931, 0.463, 0.316)), # noqa 194 | ((56.9, 99.1, 76.5), (0.998, 0.974, 0.532)), # noqa 195 | ((162.4, 77.9, 44.7), (0.099, 0.795, 0.591)), # noqa 196 | ((248.3, 60.1, 37.3), (0.211, 0.149, 0.597)), # noqa 197 | ((240.5, 29, 60.7), (0.495, 0.493, 0.721)), # noqa 198 | ]) 199 | def test_hsl(hsl, expected_rgb): 200 | for got, expected in zip(hsl_to_rgb(*hsl), expected_rgb): 201 | # Compensate for floating point errors and Wikipedia’s rounding: 202 | assert abs(got - expected) < 0.001 203 | -------------------------------------------------------------------------------- /tinycss/tests/test_css21.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Tests for the CSS 2.1 parser 4 | ---------------------------- 5 | 6 | :copyright: (c) 2012 by Simon Sapin. 7 | :license: BSD, see LICENSE for more details. 8 | """ 9 | 10 | 11 | from __future__ import unicode_literals 12 | 13 | import io 14 | import os 15 | import tempfile 16 | 17 | import pytest 18 | from tinycss.css21 import CSS21Parser 19 | 20 | from . import assert_errors 21 | from .test_tokenizer import jsonify 22 | 23 | 24 | def parse_bytes(css_bytes, kwargs): 25 | return CSS21Parser().parse_stylesheet_bytes(css_bytes, **kwargs) 26 | 27 | 28 | def parse_bytesio_file(css_bytes, kwargs): 29 | css_file = io.BytesIO(css_bytes) 30 | return CSS21Parser().parse_stylesheet_file(css_file, **kwargs) 31 | 32 | 33 | def parse_filename(css_bytes, kwargs): 34 | css_file = tempfile.NamedTemporaryFile(delete=False) 35 | try: 36 | css_file.write(css_bytes) 37 | # Windows can not open the filename a second time while 38 | # it is still open for writing. 39 | css_file.close() 40 | return CSS21Parser().parse_stylesheet_file(css_file.name, **kwargs) 41 | finally: 42 | os.remove(css_file.name) 43 | 44 | 45 | @pytest.mark.parametrize(('css_bytes', 'kwargs', 'expected_result', 'parse'), [ 46 | params + (parse,) 47 | for parse in [parse_bytes, parse_bytesio_file, parse_filename] 48 | for params in [ 49 | ('@import "é";'.encode('utf8'), {}, 'é'), 50 | ('@import "é";'.encode('utf16'), {}, 'é'), # with a BOM 51 | ('@import "é";'.encode('latin1'), {}, 'é'), 52 | ('@import "£";'.encode('Shift-JIS'), {}, '\x81\x92'), # lat1 mojibake 53 | ('@charset "Shift-JIS";@import "£";'.encode('Shift-JIS'), {}, '£'), 54 | (' @charset "Shift-JIS";@import "£";'.encode('Shift-JIS'), {}, 55 | '\x81\x92'), 56 | ('@import "£";'.encode('Shift-JIS'), 57 | {'document_encoding': 'Shift-JIS'}, '£'), 58 | ('@import "£";'.encode('Shift-JIS'), 59 | {'document_encoding': 'utf8'}, '\x81\x92'), 60 | ('@charset "utf8"; @import "£";'.encode('utf8'), 61 | {'document_encoding': 'latin1'}, '£'), 62 | # Mojibake yay! 63 | (' @charset "utf8"; @import "é";'.encode('utf8'), 64 | {'document_encoding': 'latin1'}, 'é'), 65 | ('@import "é";'.encode('utf8'), {'document_encoding': 'latin1'}, 'é'), 66 | ] 67 | ]) 68 | def test_bytes(css_bytes, kwargs, expected_result, parse): 69 | stylesheet = parse(css_bytes, kwargs) 70 | assert stylesheet.rules[0].at_keyword == '@import' 71 | assert stylesheet.rules[0].uri == expected_result 72 | 73 | 74 | @pytest.mark.parametrize(('css_source', 'expected_rules', 'expected_errors'), [ 75 | (' /* hey */\n', 0, []), 76 | ('foo {}', 1, []), 77 | ('foo{} @lipsum{} bar{}', 2, 78 | ['unknown at-rule in stylesheet context: @lipsum']), 79 | ('@charset "ascii"; foo {}', 1, []), 80 | (' @charset "ascii"; foo {}', 1, [ 81 | 'mis-placed or malformed @charset rule']), 82 | ('@charset ascii; foo {}', 1, ['mis-placed or malformed @charset rule']), 83 | ('foo {} @charset "ascii";', 1, ['mis-placed or malformed @charset rule']), 84 | ]) 85 | def test_at_rules(css_source, expected_rules, expected_errors): 86 | # Pass 'encoding' to allow @charset 87 | stylesheet = CSS21Parser().parse_stylesheet(css_source, encoding='utf8') 88 | assert_errors(stylesheet.errors, expected_errors) 89 | result = len(stylesheet.rules) 90 | assert result == expected_rules 91 | 92 | 93 | @pytest.mark.parametrize(('css_source', 'expected_rules', 'expected_errors'), [ 94 | (' /* hey */\n', [], []), 95 | 96 | ('foo{} /* hey */\n@bar;@baz{}', 97 | [('foo', []), ('@bar', [], None), ('@baz', [], [])], []), 98 | 99 | ('@import "foo.css"/**/;', [ 100 | ('@import', [('STRING', 'foo.css')], None)], []), 101 | 102 | ('@import "foo.css"/**/', [ 103 | ('@import', [('STRING', 'foo.css')], None)], []), 104 | 105 | ('@import "foo.css', [ 106 | ('@import', [('STRING', 'foo.css')], None)], []), 107 | 108 | ('{}', [], ['empty selector']), 109 | 110 | ('a{b:4}', [('a', [('b', [('INTEGER', 4)])])], []), 111 | 112 | ('@page {\t b: 4; @margin}', [('@page', [], [ 113 | ('S', '\t '), ('IDENT', 'b'), (':', ':'), ('S', ' '), ('INTEGER', 4), 114 | (';', ';'), ('S', ' '), ('ATKEYWORD', '@margin'), 115 | ])], []), 116 | 117 | ('foo', [], ['no declaration block found']), 118 | 119 | ('foo @page {} bar {}', [('bar', [])], 120 | ['unexpected ATKEYWORD token in selector']), 121 | 122 | ('foo { content: "unclosed string;\n color:red; ; margin/**/\n: 2cm; }', 123 | [('foo', [('margin', [('DIMENSION', 2)])])], 124 | ['unexpected BAD_STRING token in property value']), 125 | 126 | ('foo { 4px; bar: 12% }', 127 | [('foo', [('bar', [('PERCENTAGE', 12)])])], 128 | ['expected a property name, got DIMENSION']), 129 | 130 | ('foo { bar! 3cm auto ; baz: 7px }', 131 | [('foo', [('baz', [('DIMENSION', 7)])])], 132 | ["expected ':', got DELIM"]), 133 | 134 | ('foo { bar ; baz: {("}"/* comment */) {0@fizz}} }', 135 | [('foo', [('baz', [('{', [ 136 | ('(', [('STRING', '}')]), ('S', ' '), 137 | ('{', [('INTEGER', 0), ('ATKEYWORD', '@fizz')]) 138 | ])])])], 139 | ["expected ':'"]), 140 | 141 | ('foo { bar: ; baz: not(z) }', 142 | [('foo', [('baz', [('FUNCTION', 'not', [('IDENT', 'z')])])])], 143 | ['expected a property value']), 144 | 145 | ('foo { bar: (]) ; baz: U+20 }', 146 | [('foo', [('baz', [('UNICODE-RANGE', 'U+20')])])], 147 | ['unmatched ] token in (']), 148 | ]) 149 | def test_core_parser(css_source, expected_rules, expected_errors): 150 | class CoreParser(CSS21Parser): 151 | """A parser that always accepts unparsed at-rules.""" 152 | def parse_at_rule(self, rule, stylesheet_rules, errors, context): 153 | return rule 154 | 155 | stylesheet = CoreParser().parse_stylesheet(css_source) 156 | assert_errors(stylesheet.errors, expected_errors) 157 | result = [ 158 | (rule.at_keyword, list(jsonify(rule.head)), 159 | list(jsonify(rule.body)) 160 | if rule.body is not None else None) 161 | if rule.at_keyword else 162 | (rule.selector.as_css(), [ 163 | (decl.name, list(jsonify(decl.value))) 164 | for decl in rule.declarations]) 165 | for rule in stylesheet.rules 166 | ] 167 | assert result == expected_rules 168 | 169 | 170 | @pytest.mark.parametrize(('css_source', 'expected_declarations', 171 | 'expected_errors'), [ 172 | (' /* hey */\n', [], []), 173 | 174 | ('b:4', [('b', [('INTEGER', 4)])], []), 175 | 176 | ('{b:4}', [], ['expected a property name, got {']), 177 | 178 | ('b:4} c:3', [], ['unmatched } token in property value']), 179 | 180 | (' 4px; bar: 12% ', 181 | [('bar', [('PERCENTAGE', 12)])], 182 | ['expected a property name, got DIMENSION']), 183 | 184 | ('bar! 3cm auto ; baz: 7px', 185 | [('baz', [('DIMENSION', 7)])], 186 | ["expected ':', got DELIM"]), 187 | 188 | ('foo; bar ; baz: {("}"/* comment */) {0@fizz}}', 189 | [('baz', [('{', [ 190 | ('(', [('STRING', '}')]), ('S', ' '), 191 | ('{', [('INTEGER', 0), ('ATKEYWORD', '@fizz')]) 192 | ])])], 193 | ["expected ':'", "expected ':'"]), 194 | 195 | ('bar: ; baz: not(z)', 196 | [('baz', [('FUNCTION', 'not', [('IDENT', 'z')])])], 197 | ['expected a property value']), 198 | 199 | ('bar: (]) ; baz: U+20', 200 | [('baz', [('UNICODE-RANGE', 'U+20')])], 201 | ['unmatched ] token in (']), 202 | ]) 203 | def test_parse_style_attr(css_source, expected_declarations, expected_errors): 204 | declarations, errors = CSS21Parser().parse_style_attr(css_source) 205 | assert_errors(errors, expected_errors) 206 | result = [(decl.name, list(jsonify(decl.value))) 207 | for decl in declarations] 208 | assert result == expected_declarations 209 | 210 | 211 | @pytest.mark.parametrize(('css_source', 'expected_declarations', 212 | 'expected_errors'), [ 213 | (' /* hey */\n', [], []), 214 | 215 | ('a:1; b:2', 216 | [('a', [('INTEGER', 1)], None), ('b', [('INTEGER', 2)], None)], []), 217 | 218 | ('a:1 important; b: important', 219 | [('a', [('INTEGER', 1), ('S', ' '), ('IDENT', 'important')], None), 220 | ('b', [('IDENT', 'important')], None)], 221 | []), 222 | 223 | ('a:1 !important; b:2', 224 | [('a', [('INTEGER', 1)], 'important'), ('b', [('INTEGER', 2)], None)], 225 | []), 226 | 227 | ('a:1!\t Im\\50 O\\RTant; b:2', 228 | [('a', [('INTEGER', 1)], 'important'), ('b', [('INTEGER', 2)], None)], 229 | []), 230 | 231 | ('a: !important; b:2', 232 | [('b', [('INTEGER', 2)], None)], 233 | ['expected a value before !important']), 234 | 235 | ]) 236 | def test_important(css_source, expected_declarations, expected_errors): 237 | declarations, errors = CSS21Parser().parse_style_attr(css_source) 238 | assert_errors(errors, expected_errors) 239 | result = [(decl.name, list(jsonify(decl.value)), decl.priority) 240 | for decl in declarations] 241 | assert result == expected_declarations 242 | 243 | 244 | @pytest.mark.parametrize(('css_source', 'expected_rules', 'expected_errors'), [ 245 | (' /* hey */\n', [], []), 246 | ('@import "foo.css";', [('foo.css', ['all'])], []), 247 | ('@import url(foo.css);', [('foo.css', ['all'])], []), 248 | ('@import "foo.css" screen, print;', 249 | [('foo.css', ['screen', 'print'])], []), 250 | ('@charset "ascii"; @import "foo.css"; @import "bar.css";', 251 | [('foo.css', ['all']), ('bar.css', ['all'])], []), 252 | ('foo {} @import "foo.css";', 253 | [], ['@import rule not allowed after a ruleset']), 254 | ('@page {} @import "foo.css";', 255 | [], ['@import rule not allowed after an @page rule']), 256 | ('@import ;', 257 | [], ['expected URI or STRING for @import rule']), 258 | ('@import foo.css;', 259 | [], ['expected URI or STRING for @import rule, got IDENT']), 260 | ('@import "foo.css" {}', 261 | [], ["expected ';', got a block"]), 262 | ]) 263 | def test_at_import(css_source, expected_rules, expected_errors): 264 | # Pass 'encoding' to allow @charset 265 | stylesheet = CSS21Parser().parse_stylesheet(css_source, encoding='utf8') 266 | assert_errors(stylesheet.errors, expected_errors) 267 | 268 | result = [ 269 | (rule.uri, rule.media) 270 | for rule in stylesheet.rules 271 | if rule.at_keyword == '@import' 272 | ] 273 | assert result == expected_rules 274 | 275 | 276 | @pytest.mark.parametrize(('css', 'expected_result', 'expected_errors'), [ 277 | ('@page {}', (None, (0, 0), []), []), 278 | ('@page:first {}', ('first', (1, 0), []), []), 279 | ('@page :left{}', ('left', (0, 1), []), []), 280 | ('@page\t\n:right {}', ('right', (0, 1), []), []), 281 | ('@page :last {}', None, ['invalid @page selector']), 282 | ('@page : right {}', None, ['invalid @page selector']), 283 | ('@page table:left {}', None, ['invalid @page selector']), 284 | 285 | ('@page;', None, ['invalid @page rule: missing block']), 286 | ('@page { a:1; ; b: 2 }', 287 | (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]), 288 | []), 289 | ('@page { a:1; c: ; b: 2 }', 290 | (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]), 291 | ['expected a property value']), 292 | ('@page { a:1; @top-left {} b: 2 }', 293 | (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]), 294 | ['unknown at-rule in @page context: @top-left']), 295 | ('@page { a:1; @top-left {}; b: 2 }', 296 | (None, (0, 0), [('a', [('INTEGER', 1)]), ('b', [('INTEGER', 2)])]), 297 | ['unknown at-rule in @page context: @top-left']), 298 | ]) 299 | def test_at_page(css, expected_result, expected_errors): 300 | stylesheet = CSS21Parser().parse_stylesheet(css) 301 | assert_errors(stylesheet.errors, expected_errors) 302 | 303 | if expected_result is None: 304 | assert not stylesheet.rules 305 | else: 306 | assert len(stylesheet.rules) == 1 307 | rule = stylesheet.rules[0] 308 | assert rule.at_keyword == '@page' 309 | assert rule.at_rules == [] # in CSS 2.1 310 | result = ( 311 | rule.selector, 312 | rule.specificity, 313 | [(decl.name, list(jsonify(decl.value))) 314 | for decl in rule.declarations], 315 | ) 316 | assert result == expected_result 317 | 318 | 319 | @pytest.mark.parametrize(('css_source', 'expected_rules', 'expected_errors'), [ 320 | (' /* hey */\n', [], []), 321 | ('@media all {}', [(['all'], [])], []), 322 | ('@media screen, print {}', [(['screen', 'print'], [])], []), 323 | ('@media all;', [], ['invalid @media rule: missing block']), 324 | ('@media {}', [], ['expected media types for @media']), 325 | ('@media 4 {}', [], ['expected a media type, got INTEGER']), 326 | ('@media , screen {}', [], ['expected a media type']), 327 | ('@media screen, {}', [], ['expected a media type']), 328 | ('@media screen print {}', [], 329 | ['expected a media type, got IDENT, IDENT']), 330 | 331 | ('@media all { @page { a: 1 } @media; @import; foo { a: 1 } }', 332 | [(['all'], [('foo', [('a', [('INTEGER', 1)])])])], 333 | ['@page rule not allowed in @media', 334 | '@media rule not allowed in @media', 335 | '@import rule not allowed in @media']), 336 | 337 | ]) 338 | def test_at_media(css_source, expected_rules, expected_errors): 339 | stylesheet = CSS21Parser().parse_stylesheet(css_source) 340 | assert_errors(stylesheet.errors, expected_errors) 341 | 342 | for rule in stylesheet.rules: 343 | assert rule.at_keyword == '@media' 344 | result = [ 345 | (rule.media, [ 346 | (sub_rule.selector.as_css(), [ 347 | (decl.name, list(jsonify(decl.value))) 348 | for decl in sub_rule.declarations]) 349 | for sub_rule in rule.rules 350 | ]) 351 | for rule in stylesheet.rules 352 | ] 353 | assert result == expected_rules 354 | -------------------------------------------------------------------------------- /tinycss/tests/test_decoding.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Tests for decoding bytes to Unicode 4 | ----------------------------------- 5 | 6 | :copyright: (c) 2012 by Simon Sapin. 7 | :license: BSD, see LICENSE for more details. 8 | """ 9 | 10 | 11 | from __future__ import unicode_literals 12 | 13 | import pytest 14 | from tinycss.decoding import decode 15 | 16 | 17 | def params(css, encoding, use_bom=False, expect_error=False, **kwargs): 18 | """Nicer syntax to make a tuple.""" 19 | return css, encoding, use_bom, expect_error, kwargs 20 | 21 | 22 | @pytest.mark.parametrize(('css', 'encoding', 'use_bom', 'expect_error', 23 | 'kwargs'), [ 24 | params('', 'utf8'), # default to utf8 25 | params('𐂃', 'utf8'), 26 | params('é', 'latin1'), # utf8 fails, fall back on ShiftJIS 27 | params('£', 'ShiftJIS', expect_error=True), 28 | params('£', 'ShiftJIS', protocol_encoding='Shift-JIS'), 29 | params('£', 'ShiftJIS', linking_encoding='Shift-JIS'), 30 | params('£', 'ShiftJIS', document_encoding='Shift-JIS'), 31 | params('£', 'ShiftJIS', protocol_encoding='utf8', 32 | document_encoding='ShiftJIS'), 33 | params('@charset "utf8"; £', 'ShiftJIS', expect_error=True), 34 | params('@charset "utf£8"; £', 'ShiftJIS', expect_error=True), 35 | params('@charset "unknown-encoding"; £', 'ShiftJIS', expect_error=True), 36 | params('@charset "utf8"; £', 'ShiftJIS', document_encoding='ShiftJIS'), 37 | params('£', 'ShiftJIS', linking_encoding='utf8', 38 | document_encoding='ShiftJIS'), 39 | params('@charset "utf-32"; 𐂃', 'utf-32-be'), 40 | params('@charset "Shift-JIS"; £', 'ShiftJIS'), 41 | params('@charset "ISO-8859-8"; £', 'ShiftJIS', expect_error=True), 42 | params('𐂃', 'utf-16-le', expect_error=True), # no BOM 43 | params('𐂃', 'utf-16-le', use_bom=True), 44 | params('𐂃', 'utf-32-be', expect_error=True), 45 | params('𐂃', 'utf-32-be', use_bom=True), 46 | params('𐂃', 'utf-32-be', document_encoding='utf-32-be'), 47 | params('𐂃', 'utf-32-be', linking_encoding='utf-32-be'), 48 | params('@charset "utf-32-le"; 𐂃', 'utf-32-be', 49 | use_bom=True, expect_error=True), 50 | # protocol_encoding takes precedence over @charset 51 | params('@charset "ISO-8859-8"; £', 'ShiftJIS', 52 | protocol_encoding='Shift-JIS'), 53 | params('@charset "unknown-encoding"; £', 'ShiftJIS', 54 | protocol_encoding='Shift-JIS'), 55 | params('@charset "Shift-JIS"; £', 'ShiftJIS', 56 | protocol_encoding='utf8'), 57 | # @charset takes precedence over document_encoding 58 | params('@charset "Shift-JIS"; £', 'ShiftJIS', 59 | document_encoding='ISO-8859-8'), 60 | # @charset takes precedence over linking_encoding 61 | params('@charset "Shift-JIS"; £', 'ShiftJIS', 62 | linking_encoding='ISO-8859-8'), 63 | # linking_encoding takes precedence over document_encoding 64 | params('£', 'ShiftJIS', 65 | linking_encoding='Shift-JIS', document_encoding='ISO-8859-8'), 66 | ]) 67 | def test_decode(css, encoding, use_bom, expect_error, kwargs): 68 | # Workaround PyPy and CPython 3.0 bug: https://bugs.pypy.org/issue1094 69 | css = css.encode('utf16').decode('utf16') 70 | if use_bom: 71 | source = '\ufeff' + css 72 | else: 73 | source = css 74 | css_bytes = source.encode(encoding) 75 | result, result_encoding = decode(css_bytes, **kwargs) 76 | if expect_error: 77 | assert result != css, 'Unexpected unicode success' 78 | else: 79 | assert result == css, 'Unexpected unicode error' 80 | -------------------------------------------------------------------------------- /tinycss/tests/test_fonts3.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Tests for the Fonts 3 parser 4 | ---------------------------- 5 | 6 | :copyright: (c) 2016 by Kozea. 7 | :license: BSD, see LICENSE for more details. 8 | """ 9 | 10 | 11 | from __future__ import unicode_literals 12 | 13 | import pytest 14 | from tinycss.fonts3 import CSSFonts3Parser 15 | 16 | from . import assert_errors 17 | from .test_tokenizer import jsonify 18 | 19 | 20 | @pytest.mark.parametrize(('css', 'expected_family_names', 'expected_errors'), [ 21 | ('@font-feature-values foo {}', ('foo',), []), 22 | ('@font-feature-values Foo Test {}', ('Foo Test',), []), 23 | ('@font-feature-values \'Foo Test\' {}', ('Foo Test',), []), 24 | ('@font-feature-values Foo Test, Foo Lol, "Foo tooo"', ( 25 | 'Foo Test', 'Foo Lol', 'Foo tooo'), []), 26 | ('@font-feature-values Foo , Foo lol {}', ('Foo', 'Foo lol'), []), 27 | ('@font-feature-values Foo , "Foobar" , Lol {}', ( 28 | 'Foo', 'Foobar', 'Lol'), []), 29 | ('@font-feature-values Foo, {}', None, [ 30 | 'invalid @font-feature-values selector']), 31 | ('@font-feature-values ,Foo {}', None, [ 32 | 'invalid @font-feature-values selector']), 33 | ('@font-feature-values Test,"Foo", {}', None, [ 34 | 'invalid @font-feature-values selector']), 35 | ('@font-feature-values Test "Foo" {}', None, [ 36 | 'invalid @font-feature-values selector']), 37 | ('@font-feature-values Test Foo, Test "bar", "foo" {}', None, [ 38 | 'invalid @font-feature-values selector']), 39 | ('@font-feature-values Test/Foo {}', None, [ 40 | 'invalid @font-feature-values selector']), 41 | ('@font-feature-values /Foo {}', None, [ 42 | 'invalid @font-feature-values selector']), 43 | ('@font-feature-values #Foo {}', None, [ 44 | 'invalid @font-feature-values selector']), 45 | # TODO: this currently works but should not work 46 | # ('@font-feature-values test@foo {}', None, [ 47 | # 'invalid @font-feature-values selector']), 48 | ('@font-feature-values Hawaii 5-0 {}', None, [ 49 | 'invalid @font-feature-values selector']), 50 | ]) 51 | def test_font_feature_values_selectors(css, expected_family_names, 52 | expected_errors): 53 | stylesheet = CSSFonts3Parser().parse_stylesheet(css) 54 | assert_errors(stylesheet.errors, expected_errors) 55 | 56 | if stylesheet.rules: 57 | assert len(stylesheet.rules) == 1 58 | rule = stylesheet.rules[0] 59 | assert rule.at_keyword == '@font-feature-values' 60 | assert rule.family_names == expected_family_names 61 | 62 | 63 | @pytest.mark.parametrize(('css', 'expected_declarations', 'expected_errors'), [ 64 | ('@font-face {}', [], []), 65 | ('@font-face test { src: "lol"; font-family: "bar" }', None, [ 66 | 'unexpected IDENT token in @font-face rule header']), 67 | ('@font-face { src: "lol"; font-family: "bar" }', [ 68 | ('src', [('STRING', 'lol')]), 69 | ('font-family', [('STRING', 'bar')])], []), 70 | ('@font-face { src: "lol"; font-family: "bar"; src: "baz" }', [ 71 | ('src', [('STRING', 'lol')]), 72 | ('font-family', [('STRING', 'bar')]), 73 | ('src', [('STRING', 'baz')])], []), 74 | ]) 75 | def test_font_face_content(css, expected_declarations, expected_errors): 76 | stylesheet = CSSFonts3Parser().parse_stylesheet(css) 77 | assert_errors(stylesheet.errors, expected_errors) 78 | 79 | def declarations(rule): 80 | return [(decl.name, list(jsonify(decl.value))) 81 | for decl in rule.declarations] 82 | 83 | if expected_declarations is None: 84 | assert stylesheet.rules == [] 85 | assert expected_errors 86 | else: 87 | assert len(stylesheet.rules) == 1 88 | rule = stylesheet.rules[0] 89 | assert rule.at_keyword == '@font-face' 90 | assert declarations(rule) == expected_declarations 91 | 92 | 93 | @pytest.mark.parametrize( 94 | ('css', 'expected_rules', 'expected_errors'), [ 95 | ('''@annotation{}''', None, [ 96 | '@annotation rule not allowed in stylesheet']), 97 | ('''@font-feature-values foo {}''', None, []), 98 | ('''@font-feature-values foo { 99 | @swash { ornate: 1; } 100 | @styleset { double-W: 14; sharp-terminals: 16 1; } 101 | }''', [ 102 | ('@swash', [('ornate', [('INTEGER', 1)])]), 103 | ('@styleset', [ 104 | ('double-w', [('INTEGER', 14)]), 105 | ('sharp-terminals', [ 106 | ('INTEGER', 16), ('S', ' '), ('INTEGER', 1)])])], []), 107 | ('''@font-feature-values foo { 108 | @swash { ornate: 14; } 109 | @unknown { test: 1; } 110 | }''', [('@swash', [('ornate', [('INTEGER', 14)])])], [ 111 | 'unknown at-rule in @font-feature-values context: @unknown']), 112 | ('''@font-feature-values foo { 113 | @annotation{boxed:1} 114 | bad: 2; 115 | @brokenstylesetbecauseofbadabove { sharp: 1} 116 | @styleset { sharp-terminals: 16 1; @bad {}} 117 | @styleset { @bad {} top-ignored: 3; top: 9000} 118 | really-bad 119 | }''', [ 120 | ('@annotation', [('boxed', [('INTEGER', 1)])]), 121 | ('@styleset', [ 122 | ('sharp-terminals', [ 123 | ('INTEGER', 16), ('S', ' '), ('INTEGER', 1)])]), 124 | ('@styleset', [('top', [('INTEGER', 9000)])])], [ 125 | 'unexpected ; token in selector', 126 | 'expected a property name, got ATKEYWORD', 127 | 'expected a property name, got ATKEYWORD', 128 | 'no declaration block found for ruleset']), 129 | ]) 130 | def test_font_feature_values_content(css, expected_rules, expected_errors): 131 | stylesheet = CSSFonts3Parser().parse_stylesheet(css) 132 | assert_errors(stylesheet.errors, expected_errors) 133 | 134 | if expected_rules is not None: 135 | assert len(stylesheet.rules) == 1 136 | rule = stylesheet.rules[0] 137 | assert rule.at_keyword == '@font-feature-values' 138 | 139 | rules = [ 140 | (at_rule.at_keyword, [ 141 | (decl.name, list(jsonify(decl.value))) 142 | for decl in at_rule.declarations]) 143 | for at_rule in rule.at_rules] if rule.at_rules else None 144 | assert rules == expected_rules 145 | -------------------------------------------------------------------------------- /tinycss/tests/test_page3.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Tests for the Paged Media 3 parser 4 | ---------------------------------- 5 | 6 | :copyright: (c) 2012 by Simon Sapin. 7 | :license: BSD, see LICENSE for more details. 8 | """ 9 | 10 | 11 | from __future__ import unicode_literals 12 | 13 | import pytest 14 | from tinycss.page3 import CSSPage3Parser 15 | 16 | from . import assert_errors 17 | from .test_tokenizer import jsonify 18 | 19 | 20 | @pytest.mark.parametrize(('css', 'expected_selector', 21 | 'expected_specificity', 'expected_errors'), [ 22 | ('@page {}', (None, None), (0, 0, 0), []), 23 | 24 | ('@page :first {}', (None, 'first'), (0, 1, 0), []), 25 | ('@page:left{}', (None, 'left'), (0, 0, 1), []), 26 | ('@page :right {}', (None, 'right'), (0, 0, 1), []), 27 | ('@page :blank{}', (None, 'blank'), (0, 1, 0), []), 28 | ('@page :last {}', None, None, ['invalid @page selector']), 29 | ('@page : first {}', None, None, ['invalid @page selector']), 30 | 31 | ('@page foo:first {}', ('foo', 'first'), (1, 1, 0), []), 32 | ('@page bar :left {}', ('bar', 'left'), (1, 0, 1), []), 33 | (r'@page \26:right {}', ('&', 'right'), (1, 0, 1), []), 34 | 35 | ('@page foo {}', ('foo', None), (1, 0, 0), []), 36 | (r'@page \26 {}', ('&', None), (1, 0, 0), []), 37 | 38 | ('@page foo fist {}', None, None, ['invalid @page selector']), 39 | ('@page foo, bar {}', None, None, ['invalid @page selector']), 40 | ('@page foo&first {}', None, None, ['invalid @page selector']), 41 | ]) 42 | def test_selectors(css, expected_selector, expected_specificity, 43 | expected_errors): 44 | stylesheet = CSSPage3Parser().parse_stylesheet(css) 45 | assert_errors(stylesheet.errors, expected_errors) 46 | 47 | if stylesheet.rules: 48 | assert len(stylesheet.rules) == 1 49 | rule = stylesheet.rules[0] 50 | assert rule.at_keyword == '@page' 51 | selector = rule.selector 52 | assert rule.specificity == expected_specificity 53 | else: 54 | selector = None 55 | assert selector == expected_selector 56 | 57 | 58 | @pytest.mark.parametrize(('css', 'expected_declarations', 59 | 'expected_rules', 'expected_errors'), [ 60 | ('@page {}', [], [], []), 61 | ('@page { foo: 4; bar: z }', 62 | [('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])], [], []), 63 | ('''@page { foo: 4; 64 | @top-center { content: "Awesome Title" } 65 | @bottom-left { content: counter(page) } 66 | bar: z 67 | }''', 68 | [('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])], 69 | [('@top-center', [('content', [('STRING', 'Awesome Title')])]), 70 | ('@bottom-left', [('content', [ 71 | ('FUNCTION', 'counter', [('IDENT', 'page')])])])], 72 | []), 73 | ('''@page { foo: 4; 74 | @bottom-top { content: counter(page) } 75 | bar: z 76 | }''', 77 | [('foo', [('INTEGER', 4)]), ('bar', [('IDENT', 'z')])], 78 | [], 79 | ['unknown at-rule in @page context: @bottom-top']), 80 | 81 | ('@page{} @top-right{}', [], [], [ 82 | '@top-right rule not allowed in stylesheet']), 83 | ('@page{ @top-right 4 {} }', [], [], [ 84 | 'unexpected INTEGER token in @top-right rule header']), 85 | # Not much error recovery tests here. This should be covered in test_css21 86 | ]) 87 | def test_content(css, expected_declarations, expected_rules, expected_errors): 88 | stylesheet = CSSPage3Parser().parse_stylesheet(css) 89 | assert_errors(stylesheet.errors, expected_errors) 90 | 91 | def declarations(rule): 92 | return [(decl.name, list(jsonify(decl.value))) 93 | for decl in rule.declarations] 94 | 95 | assert len(stylesheet.rules) == 1 96 | rule = stylesheet.rules[0] 97 | assert rule.at_keyword == '@page' 98 | assert declarations(rule) == expected_declarations 99 | rules = [(margin_rule.at_keyword, declarations(margin_rule)) 100 | for margin_rule in rule.at_rules] 101 | assert rules == expected_rules 102 | -------------------------------------------------------------------------------- /tinycss/tests/test_tokenizer.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Tests for the tokenizer 4 | ----------------------- 5 | 6 | :copyright: (c) 2012 by Simon Sapin. 7 | :license: BSD, see LICENSE for more details. 8 | """ 9 | 10 | 11 | from __future__ import unicode_literals 12 | 13 | import os 14 | import sys 15 | 16 | import pytest 17 | from tinycss.tokenizer import ( 18 | cython_tokenize_flat, python_tokenize_flat, regroup) 19 | 20 | 21 | def test_speedups(): 22 | is_pypy = hasattr(sys, 'pypy_translation_info') 23 | env_skip_tests = os.environ.get('TINYCSS_SKIP_SPEEDUPS_TESTS') 24 | # pragma: no cover 25 | if is_pypy or env_skip_tests: 26 | return 27 | assert cython_tokenize_flat is not None, ( 28 | 'Cython speedups are not installed, related tests will ' 29 | 'be skipped. Set the TINYCSS_SKIP_SPEEDUPS_TESTS environment ' 30 | 'variable if this is expected.') 31 | 32 | 33 | @pytest.mark.parametrize(('tokenize', 'css_source', 'expected_tokens'), [ 34 | (tokenize,) + test_data 35 | for tokenize in (python_tokenize_flat, cython_tokenize_flat) 36 | for test_data in [ 37 | ('', []), 38 | ('red -->', [('IDENT', 'red'), ('S', ' '), ('CDC', '-->')]), 39 | # Longest match rule: no CDC 40 | ('red-->', [('IDENT', 'red--'), ('DELIM', '>')]), 41 | (r'p[example="foo(int x) { this.x = x;}"]', [ 42 | ('IDENT', 'p'), 43 | ('[', '['), 44 | ('IDENT', 'example'), 45 | ('DELIM', '='), 46 | ('STRING', 'foo(int x) { this.x = x;}'), 47 | (']', ']')]), 48 | 49 | # Numbers are parsed 50 | ('42 .5 -4pX 1.25em 30%', [ 51 | ('INTEGER', 42), ('S', ' '), 52 | ('NUMBER', .5), ('S', ' '), 53 | # units are normalized to lower-case: 54 | ('DIMENSION', -4, 'px'), ('S', ' '), 55 | ('DIMENSION', 1.25, 'em'), ('S', ' '), 56 | ('PERCENTAGE', 30, '%')]), 57 | 58 | # URLs are extracted 59 | ('url(foo.png)', [('URI', 'foo.png')]), 60 | ('url("foo.png")', [('URI', 'foo.png')]), 61 | 62 | # Escaping 63 | 64 | (r'/* Comment with a \ backslash */', [ 65 | ('COMMENT', '/* Comment with a \ backslash */')]), # Unchanged 66 | 67 | # backslash followed by a newline in a string: ignored 68 | ('"Lorem\\\nIpsum"', [('STRING', 'LoremIpsum')]), 69 | 70 | # backslash followed by a newline outside a string: stands for itself 71 | ('Lorem\\\nIpsum', [ 72 | ('IDENT', 'Lorem'), ('DELIM', '\\'), 73 | ('S', '\n'), ('IDENT', 'Ipsum')]), 74 | 75 | # Cancel the meaning of special characters 76 | (r'"Lore\m Ipsum"', [('STRING', 'Lorem Ipsum')]), # or not specal 77 | (r'"Lorem \49psum"', [('STRING', 'Lorem Ipsum')]), 78 | (r'"Lorem \49 psum"', [('STRING', 'Lorem Ipsum')]), 79 | (r'"Lorem\"Ipsum"', [('STRING', 'Lorem"Ipsum')]), 80 | (r'"Lorem\\Ipsum"', [('STRING', r'Lorem\Ipsum')]), 81 | (r'"Lorem\5c Ipsum"', [('STRING', r'Lorem\Ipsum')]), 82 | (r'Lorem\+Ipsum', [('IDENT', 'Lorem+Ipsum')]), 83 | (r'Lorem+Ipsum', [ 84 | ('IDENT', 'Lorem'), ('DELIM', '+'), ('IDENT', 'Ipsum')]), 85 | (r'url(foo\).png)', [('URI', 'foo).png')]), 86 | 87 | # Unicode and backslash escaping 88 | ('\\26 B', [('IDENT', '&B')]), 89 | ('\\&B', [('IDENT', '&B')]), 90 | ('@\\26\tB', [('ATKEYWORD', '@&B')]), 91 | ('@\\&B', [('ATKEYWORD', '@&B')]), 92 | ('#\\26\nB', [('HASH', '#&B')]), 93 | ('#\\&B', [('HASH', '#&B')]), 94 | ('\\26\r\nB(', [('FUNCTION', '&B(')]), 95 | ('\\&B(', [('FUNCTION', '&B(')]), 96 | (r'12.5\000026B', [('DIMENSION', 12.5, '&b')]), 97 | (r'12.5\0000263B', [('DIMENSION', 12.5, '&3b')]), # max 6 digits 98 | (r'12.5\&B', [('DIMENSION', 12.5, '&b')]), 99 | (r'"\26 B"', [('STRING', '&B')]), 100 | (r"'\000026B'", [('STRING', '&B')]), 101 | (r'"\&B"', [('STRING', '&B')]), 102 | (r'url("\26 B")', [('URI', '&B')]), 103 | (r'url(\26 B)', [('URI', '&B')]), 104 | (r'url("\&B")', [('URI', '&B')]), 105 | (r'url(\&B)', [('URI', '&B')]), 106 | (r'Lorem\110000Ipsum', [('IDENT', 'Lorem\uFFFDIpsum')]), 107 | 108 | # Bad strings 109 | 110 | # String ends at EOF without closing: no error, parsed 111 | ('"Lorem\\26Ipsum', [('STRING', 'Lorem&Ipsum')]), 112 | # Unescaped newline: ends the string, error, unparsed 113 | ('"Lorem\\26Ipsum\n', [ 114 | ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n')]), 115 | # Tokenization restarts after the newline, so the second " starts 116 | # a new string (which ends at EOF without errors, as above.) 117 | ('"Lorem\\26Ipsum\ndolor" sit', [ 118 | ('BAD_STRING', r'"Lorem\26Ipsum'), ('S', '\n'), 119 | ('IDENT', 'dolor'), ('STRING', ' sit')]), 120 | 121 | ]]) 122 | def test_tokens(tokenize, css_source, expected_tokens): 123 | if tokenize is None: # pragma: no cover 124 | pytest.skip('Speedups not available') 125 | sources = [css_source] 126 | if sys.version_info[0] < 3: 127 | # On Python 2.x, ASCII-only bytestrings can be used 128 | # where Unicode is expected. 129 | sources.append(css_source.encode('ascii')) 130 | for css_source in sources: 131 | tokens = tokenize(css_source, ignore_comments=False) 132 | result = [ 133 | (token.type, token.value) + ( 134 | () if token.unit is None else (token.unit,)) 135 | for token in tokens 136 | ] 137 | assert result == expected_tokens 138 | 139 | 140 | @pytest.mark.parametrize('tokenize', [ 141 | python_tokenize_flat, cython_tokenize_flat]) 142 | def test_positions(tokenize): 143 | """Test the reported line/column position of each token.""" 144 | if tokenize is None: # pragma: no cover 145 | pytest.skip('Speedups not available') 146 | css = '/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }' 147 | tokens = tokenize(css, ignore_comments=False) 148 | result = [(token.type, token.line, token.column) for token in tokens] 149 | assert result == [ 150 | ('COMMENT', 1, 1), ('S', 2, 9), 151 | ('IDENT', 3, 1), ('S', 3, 2), ('{', 3, 3), 152 | ('S', 3, 4), ('IDENT', 4, 5), (':', 4, 10), 153 | ('S', 4, 11), ('IDENT', 4, 12), (';', 4, 15), ('S', 4, 16), 154 | ('IDENT', 4, 17), (':', 4, 24), ('S', 4, 25), ('STRING', 4, 26), 155 | ('S', 5, 5), ('}', 5, 6)] 156 | 157 | 158 | @pytest.mark.parametrize(('tokenize', 'css_source', 'expected_tokens'), [ 159 | (tokenize,) + test_data 160 | for tokenize in (python_tokenize_flat, cython_tokenize_flat) 161 | for test_data in [ 162 | ('', []), 163 | (r'Lorem\26 "i\psum"4px', [ 164 | ('IDENT', 'Lorem&'), ('STRING', 'ipsum'), ('DIMENSION', 4)]), 165 | 166 | ('not([[lorem]]{ipsum (42)})', [ 167 | ('FUNCTION', 'not', [ 168 | ('[', [ 169 | ('[', [ 170 | ('IDENT', 'lorem'), 171 | ]), 172 | ]), 173 | ('{', [ 174 | ('IDENT', 'ipsum'), 175 | ('S', ' '), 176 | ('(', [ 177 | ('INTEGER', 42), 178 | ]) 179 | ]) 180 | ])]), 181 | 182 | # Close everything at EOF, no error 183 | ('a[b{"d', [ 184 | ('IDENT', 'a'), 185 | ('[', [ 186 | ('IDENT', 'b'), 187 | ('{', [ 188 | ('STRING', 'd'), 189 | ]), 190 | ]), 191 | ]), 192 | 193 | # Any remaining ), ] or } token is a nesting error 194 | ('a[b{d]e}', [ 195 | ('IDENT', 'a'), 196 | ('[', [ 197 | ('IDENT', 'b'), 198 | ('{', [ 199 | ('IDENT', 'd'), 200 | (']', ']'), # The error is visible here 201 | ('IDENT', 'e'), 202 | ]), 203 | ]), 204 | ]), 205 | # ref: 206 | ('a[b{d}e]', [ 207 | ('IDENT', 'a'), 208 | ('[', [ 209 | ('IDENT', 'b'), 210 | ('{', [ 211 | ('IDENT', 'd'), 212 | ]), 213 | ('IDENT', 'e'), 214 | ]), 215 | ]), 216 | ]]) 217 | def test_token_grouping(tokenize, css_source, expected_tokens): 218 | if tokenize is None: # pragma: no cover 219 | pytest.skip('Speedups not available') 220 | tokens = regroup(tokenize(css_source, ignore_comments=False)) 221 | result = list(jsonify(tokens)) 222 | assert result == expected_tokens 223 | 224 | 225 | def jsonify(tokens): 226 | """Turn tokens into "JSON-compatible" data structures.""" 227 | for token in tokens: 228 | if token.type == 'FUNCTION': 229 | yield (token.type, token.function_name, 230 | list(jsonify(token.content))) 231 | elif token.is_container: 232 | yield token.type, list(jsonify(token.content)) 233 | else: 234 | yield token.type, token.value 235 | 236 | 237 | @pytest.mark.parametrize(('tokenize', 'ignore_comments', 'expected_tokens'), [ 238 | (tokenize,) + test_data 239 | for tokenize in (python_tokenize_flat, cython_tokenize_flat) 240 | for test_data in [ 241 | (False, [ 242 | ('COMMENT', '/* lorem */'), 243 | ('S', ' '), 244 | ('IDENT', 'ipsum'), 245 | ('[', [ 246 | ('IDENT', 'dolor'), 247 | ('COMMENT', '/* sit */'), 248 | ]), 249 | ('BAD_COMMENT', '/* amet') 250 | ]), 251 | (True, [ 252 | ('S', ' '), 253 | ('IDENT', 'ipsum'), 254 | ('[', [ 255 | ('IDENT', 'dolor'), 256 | ]), 257 | ]), 258 | ]]) 259 | def test_comments(tokenize, ignore_comments, expected_tokens): 260 | if tokenize is None: # pragma: no cover 261 | pytest.skip('Speedups not available') 262 | css_source = '/* lorem */ ipsum[dolor/* sit */]/* amet' 263 | tokens = regroup(tokenize(css_source, ignore_comments)) 264 | result = list(jsonify(tokens)) 265 | assert result == expected_tokens 266 | 267 | 268 | @pytest.mark.parametrize(('tokenize', 'css_source'), [ 269 | (tokenize, test_data) 270 | for tokenize in (python_tokenize_flat, cython_tokenize_flat) 271 | for test_data in [ 272 | r'p[example="foo(int x) { this.x = x;}"]', 273 | '"Lorem\\26Ipsum\ndolor" sit', 274 | '/* Lorem\nipsum */\fa {\n color: red;\tcontent: "dolor\\\fsit" }', 275 | 'not([[lorem]]{ipsum (42)})', 276 | 'a[b{d]e}', 277 | 'a[b{"d', 278 | ]]) 279 | def test_token_serialize_css(tokenize, css_source): 280 | if tokenize is None: # pragma: no cover 281 | pytest.skip('Speedups not available') 282 | for _regroup in [regroup, lambda x: x]: 283 | tokens = _regroup(tokenize(css_source, ignore_comments=False)) 284 | result = ''.join(token.as_css() for token in tokens) 285 | assert result == css_source 286 | 287 | 288 | @pytest.mark.parametrize(('tokenize', 'css_source'), [ 289 | (tokenize, test_data) 290 | for tokenize in (python_tokenize_flat, cython_tokenize_flat) 291 | for test_data in [ 292 | '(8, foo, [z])', '[8, foo, (z)]', '{8, foo, [z]}', 'func(8, foo, [z])' 293 | ] 294 | ]) 295 | def test_token_api(tokenize, css_source): 296 | if tokenize is None: # pragma: no cover 297 | pytest.skip('Speedups not available') 298 | tokens = list(regroup(tokenize(css_source))) 299 | assert len(tokens) == 1 300 | token = tokens[0] 301 | expected_len = 7 # 2 spaces, 2 commas, 3 others. 302 | assert len(token.content) == expected_len 303 | -------------------------------------------------------------------------------- /tinycss/token_data.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | tinycss.token_data 4 | ------------------ 5 | 6 | Shared data for both implementations (Cython and Python) of the tokenizer. 7 | 8 | :copyright: (c) 2012 by Simon Sapin. 9 | :license: BSD, see LICENSE for more details. 10 | """ 11 | 12 | from __future__ import unicode_literals 13 | 14 | import functools 15 | import operator 16 | import re 17 | import string 18 | import sys 19 | 20 | # * Raw strings with the r'' notation are used so that \ do not need 21 | # to be escaped. 22 | # * Names and regexps are separated by a tabulation. 23 | # * Macros are re-ordered so that only previous definitions are needed. 24 | # * {} are used for macro substitution with ``string.Formatter``, 25 | # so other uses of { or } have been doubled. 26 | # * The syntax is otherwise compatible with re.compile. 27 | # * Some parentheses were added to add capturing groups. 28 | # (in unicode, DIMENSION and URI) 29 | 30 | # *** Willful violation: *** 31 | # Numbers can take a + or - sign, but the sign is a separate DELIM token. 32 | # Since comments are allowed anywhere between tokens, this makes 33 | # the following this is valid. It means 10 negative pixels: 34 | # margin-top: -/**/10px 35 | 36 | # This makes parsing numbers a pain, so instead we’ll do the same is Firefox 37 | # and make the sign part as of the 'num' macro. The above CSS will be invalid. 38 | # See discussion: 39 | # http://lists.w3.org/Archives/Public/www-style/2011Oct/0028.html 40 | MACROS = r''' 41 | nl \n|\r\n|\r|\f 42 | w [ \t\r\n\f]* 43 | nonascii [^\0-\237] 44 | unicode \\([0-9a-f]{{1,6}})(\r\n|[ \n\r\t\f])? 45 | simple_escape [^\n\r\f0-9a-f] 46 | escape {unicode}|\\{simple_escape} 47 | nmstart [_a-z]|{nonascii}|{escape} 48 | nmchar [_a-z0-9-]|{nonascii}|{escape} 49 | name {nmchar}+ 50 | ident [-]?{nmstart}{nmchar}* 51 | num [-+]?(?:[0-9]*\.[0-9]+|[0-9]+) 52 | string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\" 53 | string2 \'([^\n\r\f\\']|\\{nl}|{escape})*\' 54 | string {string1}|{string2} 55 | badstring1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\\? 56 | badstring2 \'([^\n\r\f\\']|\\{nl}|{escape})*\\? 57 | badstring {badstring1}|{badstring2} 58 | badcomment1 \/\*[^*]*\*+([^/*][^*]*\*+)* 59 | badcomment2 \/\*[^*]*(\*+[^/*][^*]*)* 60 | badcomment {badcomment1}|{badcomment2} 61 | baduri1 url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w} 62 | baduri2 url\({w}{string}{w} 63 | baduri3 url\({w}{badstring} 64 | baduri {baduri1}|{baduri2}|{baduri3} 65 | '''.replace(r'\0', '\0').replace(r'\237', '\237') 66 | 67 | # Removed these tokens. Instead, they’re tokenized as two DELIM each. 68 | # INCLUDES ~= 69 | # DASHMATCH |= 70 | # They are only used in selectors but selectors3 also have ^=, *= and $=. 71 | # We don’t actually parse selectors anyway 72 | 73 | # Re-ordered so that the longest match is always the first. 74 | # For example, "url('foo')" matches URI, BAD_URI, FUNCTION and IDENT, 75 | # but URI would always be a longer match than the others. 76 | TOKENS = r''' 77 | S [ \t\r\n\f]+ 78 | 79 | URI url\({w}({string}|([!#$%&*-\[\]-~]|{nonascii}|{escape})*){w}\) 80 | BAD_URI {baduri} 81 | FUNCTION {ident}\( 82 | UNICODE-RANGE u\+[0-9a-f?]{{1,6}}(-[0-9a-f]{{1,6}})? 83 | IDENT {ident} 84 | 85 | ATKEYWORD @{ident} 86 | HASH #{name} 87 | 88 | DIMENSION ({num})({ident}) 89 | PERCENTAGE {num}% 90 | NUMBER {num} 91 | 92 | STRING {string} 93 | BAD_STRING {badstring} 94 | 95 | COMMENT \/\*[^*]*\*+([^/*][^*]*\*+)*\/ 96 | BAD_COMMENT {badcomment} 97 | 98 | : : 99 | ; ; 100 | { \{{ 101 | } \}} 102 | ( \( 103 | ) \) 104 | [ \[ 105 | ] \] 106 | CDO 108 | ''' 109 | 110 | 111 | # Strings with {macro} expanded 112 | COMPILED_MACROS = {} 113 | 114 | 115 | COMPILED_TOKEN_REGEXPS = [] # [(name, regexp.match)] ordered 116 | COMPILED_TOKEN_INDEXES = {} # {name: i} helper for the C speedups 117 | 118 | 119 | # Indexed by codepoint value of the first character of a token. 120 | # Codepoints >= 160 (aka nonascii) all use the index 160. 121 | # values are (i, name, regexp.match) 122 | TOKEN_DISPATCH = [] 123 | 124 | 125 | try: 126 | unichr 127 | except NameError: 128 | # Python 3 129 | unichr = chr 130 | unicode = str 131 | 132 | 133 | def _init(): 134 | """Import-time initialization.""" 135 | COMPILED_MACROS.clear() 136 | for line in MACROS.splitlines(): 137 | if line.strip(): 138 | name, value = line.split('\t') 139 | COMPILED_MACROS[name.strip()] = '(?:%s)' \ 140 | % value.format(**COMPILED_MACROS) 141 | 142 | COMPILED_TOKEN_REGEXPS[:] = ( 143 | ( 144 | name.strip(), 145 | re.compile( 146 | value.format(**COMPILED_MACROS), 147 | # Case-insensitive when matching eg. uRL(foo) 148 | # but preserve the case in extracted groups 149 | re.I 150 | ).match 151 | ) 152 | for line in TOKENS.splitlines() 153 | if line.strip() 154 | for name, value in [line.split('\t')] 155 | ) 156 | 157 | COMPILED_TOKEN_INDEXES.clear() 158 | for i, (name, regexp) in enumerate(COMPILED_TOKEN_REGEXPS): 159 | COMPILED_TOKEN_INDEXES[name] = i 160 | 161 | dispatch = [[] for i in range(161)] 162 | for chars, names in [ 163 | (' \t\r\n\f', ['S']), 164 | ('uU', ['URI', 'BAD_URI', 'UNICODE-RANGE']), 165 | # \ is an escape outside of another token 166 | (string.ascii_letters + '\\_-' + unichr(160), ['FUNCTION', 'IDENT']), 167 | (string.digits + '.+-', ['DIMENSION', 'PERCENTAGE', 'NUMBER']), 168 | ('@', ['ATKEYWORD']), 169 | ('#', ['HASH']), 170 | ('\'"', ['STRING', 'BAD_STRING']), 171 | ('/', ['COMMENT', 'BAD_COMMENT']), 172 | ('<', ['CDO']), 173 | ('-', ['CDC']), 174 | ]: 175 | for char in chars: 176 | dispatch[ord(char)].extend(names) 177 | for char in ':;{}()[]': 178 | dispatch[ord(char)] = [char] 179 | 180 | TOKEN_DISPATCH[:] = ( 181 | [ 182 | (index,) + COMPILED_TOKEN_REGEXPS[index] 183 | for name in names 184 | for index in [COMPILED_TOKEN_INDEXES[name]] 185 | ] 186 | for names in dispatch 187 | ) 188 | 189 | 190 | _init() 191 | 192 | 193 | def _unicode_replace(match, int=int, unichr=unichr, maxunicode=sys.maxunicode): 194 | codepoint = int(match.group(1), 16) 195 | if codepoint <= maxunicode: 196 | return unichr(codepoint) 197 | else: 198 | return '\N{REPLACEMENT CHARACTER}' # U+FFFD 199 | 200 | 201 | UNICODE_UNESCAPE = functools.partial( 202 | re.compile(COMPILED_MACROS['unicode'], re.I).sub, 203 | _unicode_replace) 204 | 205 | NEWLINE_UNESCAPE = functools.partial( 206 | re.compile(r'()\\' + COMPILED_MACROS['nl']).sub, 207 | '') 208 | 209 | SIMPLE_UNESCAPE = functools.partial( 210 | re.compile(r'\\(%s)' % COMPILED_MACROS['simple_escape'], re.I).sub, 211 | # Same as r'\1', but faster on CPython 212 | operator.methodcaller('group', 1)) 213 | 214 | FIND_NEWLINES = re.compile(COMPILED_MACROS['nl']).finditer 215 | 216 | 217 | class Token(object): 218 | """A single atomic token. 219 | 220 | .. attribute:: is_container 221 | 222 | Always ``False``. 223 | Helps to tell :class:`Token` apart from :class:`ContainerToken`. 224 | 225 | .. attribute:: type 226 | 227 | The type of token as a string: 228 | 229 | ``S`` 230 | A sequence of white space 231 | 232 | ``IDENT`` 233 | An identifier: a name that does not start with a digit. 234 | A name is a sequence of letters, digits, ``_``, ``-``, escaped 235 | characters and non-ASCII characters. Eg: ``margin-left`` 236 | 237 | ``HASH`` 238 | ``#`` followed immediately by a name. Eg: ``#ff8800`` 239 | 240 | ``ATKEYWORD`` 241 | ``@`` followed immediately by an identifier. Eg: ``@page`` 242 | 243 | ``URI`` 244 | Eg: ``url(foo)`` The content may or may not be quoted. 245 | 246 | ``UNICODE-RANGE`` 247 | ``U+`` followed by one or two hexadecimal 248 | Unicode codepoints. Eg: ``U+20-00FF`` 249 | 250 | ``INTEGER`` 251 | An integer with an optional ``+`` or ``-`` sign 252 | 253 | ``NUMBER`` 254 | A non-integer number with an optional ``+`` or ``-`` sign 255 | 256 | ``DIMENSION`` 257 | An integer or number followed immediately by an 258 | identifier (the unit). Eg: ``12px`` 259 | 260 | ``PERCENTAGE`` 261 | An integer or number followed immediately by ``%`` 262 | 263 | ``STRING`` 264 | A string, quoted with ``"`` or ``'`` 265 | 266 | ``:`` or ``;`` 267 | That character. 268 | 269 | ``DELIM`` 270 | A single character not matched in another token. Eg: ``,`` 271 | 272 | See the source of the :mod:`.token_data` module for the precise 273 | regular expressions that match various tokens. 274 | 275 | Note that other token types exist in the early tokenization steps, 276 | but these are ignored, are syntax errors, or are later transformed 277 | into :class:`ContainerToken` or :class:`FunctionToken`. 278 | 279 | .. attribute:: value 280 | 281 | The parsed value: 282 | 283 | * INTEGER, NUMBER, PERCENTAGE or DIMENSION tokens: the numeric value 284 | as an int or float. 285 | * STRING tokens: the unescaped string without quotes 286 | * URI tokens: the unescaped URI without quotes or 287 | ``url(`` and ``)`` markers. 288 | * IDENT, ATKEYWORD or HASH tokens: the unescaped token, 289 | with ``@`` or ``#`` markers left as-is 290 | * Other tokens: same as :attr:`as_css` 291 | 292 | *Unescaped* refers to the various escaping methods based on the 293 | backslash ``\`` character in CSS syntax. 294 | 295 | .. attribute:: unit 296 | 297 | * DIMENSION tokens: the normalized (unescaped, lower-case) 298 | unit name as a string. eg. ``'px'`` 299 | * PERCENTAGE tokens: the string ``'%'`` 300 | * Other tokens: ``None`` 301 | 302 | .. attribute:: line 303 | 304 | The line number in the CSS source of the start of this token. 305 | 306 | .. attribute:: column 307 | 308 | The column number (inside a source line) of the start of this token. 309 | 310 | """ 311 | is_container = False 312 | __slots__ = 'type', '_as_css', 'value', 'unit', 'line', 'column' 313 | 314 | def __init__(self, type_, css_value, value, unit, line, column): 315 | self.type = type_ 316 | self._as_css = css_value 317 | self.value = value 318 | self.unit = unit 319 | self.line = line 320 | self.column = column 321 | 322 | def as_css(self): 323 | """ 324 | Return as an Unicode string the CSS representation of the token, 325 | as parsed in the source. 326 | """ 327 | return self._as_css 328 | 329 | def __repr__(self): 330 | return ('' 331 | .format(self, self.unit or '')) 332 | 333 | def __eq__(self, other): 334 | if type(self) != type(other): 335 | raise TypeError( 336 | 'Cannot compare {0} and {1}'.format(type(self), type(other))) 337 | else: 338 | return all( 339 | self.type_ == other.type_, 340 | self._as_css == other._as_css, 341 | self.value == other.value, 342 | self.unit == other.unit, 343 | ) 344 | 345 | 346 | class ContainerToken(object): 347 | """A token that contains other (nested) tokens. 348 | 349 | .. attribute:: is_container 350 | 351 | Always ``True``. 352 | Helps to tell :class:`ContainerToken` apart from :class:`Token`. 353 | 354 | .. attribute:: type 355 | 356 | The type of token as a string. One of ``{``, ``(``, ``[`` or 357 | ``FUNCTION``. For ``FUNCTION``, the object is actually a 358 | :class:`FunctionToken`. 359 | 360 | .. attribute:: unit 361 | 362 | Always ``None``. Included to make :class:`ContainerToken` behave 363 | more like :class:`Token`. 364 | 365 | .. attribute:: content 366 | 367 | A list of :class:`Token` or nested :class:`ContainerToken`, 368 | not including the opening or closing token. 369 | 370 | .. attribute:: line 371 | 372 | The line number in the CSS source of the start of this token. 373 | 374 | .. attribute:: column 375 | 376 | The column number (inside a source line) of the start of this token. 377 | 378 | """ 379 | is_container = True 380 | unit = None 381 | __slots__ = 'type', '_css_start', '_css_end', 'content', 'line', 'column' 382 | 383 | def __init__(self, type_, css_start, css_end, content, line, column): 384 | self.type = type_ 385 | self._css_start = css_start 386 | self._css_end = css_end 387 | self.content = content 388 | self.line = line 389 | self.column = column 390 | 391 | def as_css(self): 392 | """ 393 | Return as an Unicode string the CSS representation of the token, 394 | as parsed in the source. 395 | """ 396 | parts = [self._css_start] 397 | parts.extend(token.as_css() for token in self.content) 398 | parts.append(self._css_end) 399 | return ''.join(parts) 400 | 401 | format_string = '' 402 | 403 | def __repr__(self): 404 | return (self.format_string + ' {0.content}').format(self) 405 | 406 | 407 | class FunctionToken(ContainerToken): 408 | """A specialized :class:`ContainerToken` for a ``FUNCTION`` group. 409 | Has an additional attribute: 410 | 411 | .. attribute:: function_name 412 | 413 | The unescaped name of the function, with the ``(`` marker removed. 414 | 415 | """ 416 | __slots__ = 'function_name', 417 | 418 | def __init__(self, type_, css_start, css_end, function_name, content, 419 | line, column): 420 | super(FunctionToken, self).__init__( 421 | type_, css_start, css_end, content, line, column) 422 | # Remove the ( marker: 423 | self.function_name = function_name[:-1] 424 | 425 | format_string = ('') 427 | 428 | 429 | class TokenList(list): 430 | """ 431 | A mixed list of :class:`~.token_data.Token` and 432 | :class:`~.token_data.ContainerToken` objects. 433 | 434 | This is a subclass of the builtin :class:`~builtins.list` type. 435 | It can be iterated, indexed and sliced as usual, but also has some 436 | additional API: 437 | 438 | """ 439 | @property 440 | def line(self): 441 | """The line number in the CSS source of the first token.""" 442 | return self[0].line 443 | 444 | @property 445 | def column(self): 446 | """The column number (inside a source line) of the first token.""" 447 | return self[0].column 448 | 449 | def as_css(self): 450 | """ 451 | Return as an Unicode string the CSS representation of the tokens, 452 | as parsed in the source. 453 | """ 454 | return ''.join(token.as_css() for token in self) 455 | -------------------------------------------------------------------------------- /tinycss/tokenizer.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | tinycss.tokenizer 4 | ----------------- 5 | 6 | Tokenizer for the CSS core syntax: 7 | http://www.w3.org/TR/CSS21/syndata.html#tokenization 8 | 9 | This is the pure-python implementation. See also speedups.pyx 10 | 11 | :copyright: (c) 2012 by Simon Sapin. 12 | :license: BSD, see LICENSE for more details. 13 | """ 14 | 15 | from __future__ import unicode_literals 16 | 17 | from . import token_data 18 | 19 | 20 | def tokenize_flat( 21 | css_source, ignore_comments=True, 22 | # Make these local variable to avoid global lookups in the loop 23 | tokens_dispatch=token_data.TOKEN_DISPATCH, 24 | unicode_unescape=token_data.UNICODE_UNESCAPE, 25 | newline_unescape=token_data.NEWLINE_UNESCAPE, 26 | simple_unescape=token_data.SIMPLE_UNESCAPE, 27 | find_newlines=token_data.FIND_NEWLINES, 28 | Token=token_data.Token, 29 | len=len, 30 | int=int, 31 | float=float, 32 | list=list, 33 | _None=None): 34 | """ 35 | :param css_source: 36 | CSS as an unicode string 37 | :param ignore_comments: 38 | if true (the default) comments will not be included in the 39 | return value 40 | :return: 41 | An iterator of :class:`Token` 42 | 43 | """ 44 | 45 | pos = 0 46 | line = 1 47 | column = 1 48 | source_len = len(css_source) 49 | tokens = [] 50 | while pos < source_len: 51 | char = css_source[pos] 52 | if char in ':;{}()[]': 53 | type_ = char 54 | css_value = char 55 | else: 56 | codepoint = min(ord(char), 160) 57 | for _index, type_, regexp in tokens_dispatch[codepoint]: 58 | match = regexp(css_source, pos) 59 | if match: 60 | # First match is the longest. See comments on TOKENS above. 61 | css_value = match.group() 62 | break 63 | else: 64 | # No match. 65 | # "Any other character not matched by the above rules, 66 | # and neither a single nor a double quote." 67 | # ... but quotes at the start of a token are always matched 68 | # by STRING or BAD_STRING. So DELIM is any single character. 69 | type_ = 'DELIM' 70 | css_value = char 71 | length = len(css_value) 72 | next_pos = pos + length 73 | 74 | # A BAD_COMMENT is a comment at EOF. Ignore it too. 75 | if not (ignore_comments and type_ in ('COMMENT', 'BAD_COMMENT')): 76 | # Parse numbers, extract strings and URIs, unescape 77 | unit = _None 78 | if type_ == 'DIMENSION': 79 | value = match.group(1) 80 | value = float(value) if '.' in value else int(value) 81 | unit = match.group(2) 82 | unit = simple_unescape(unit) 83 | unit = unicode_unescape(unit) 84 | unit = unit.lower() # normalize 85 | elif type_ == 'PERCENTAGE': 86 | value = css_value[:-1] 87 | value = float(value) if '.' in value else int(value) 88 | unit = '%' 89 | elif type_ == 'NUMBER': 90 | value = css_value 91 | if '.' in value: 92 | value = float(value) 93 | else: 94 | value = int(value) 95 | type_ = 'INTEGER' 96 | elif type_ in ('IDENT', 'ATKEYWORD', 'HASH', 'FUNCTION'): 97 | value = simple_unescape(css_value) 98 | value = unicode_unescape(value) 99 | elif type_ == 'URI': 100 | value = match.group(1) 101 | if value and value[0] in '"\'': 102 | value = value[1:-1] # Remove quotes 103 | value = newline_unescape(value) 104 | value = simple_unescape(value) 105 | value = unicode_unescape(value) 106 | elif type_ == 'STRING': 107 | value = css_value[1:-1] # Remove quotes 108 | value = newline_unescape(value) 109 | value = simple_unescape(value) 110 | value = unicode_unescape(value) 111 | # BAD_STRING can only be one of: 112 | # * Unclosed string at the end of the stylesheet: 113 | # Close the string, but this is not an error. 114 | # Make it a "good" STRING token. 115 | # * Unclosed string at the (unescaped) end of the line: 116 | # Close the string, but this is an error. 117 | # Leave it as a BAD_STRING, don’t bother parsing it. 118 | # See http://www.w3.org/TR/CSS21/syndata.html#parsing-errors 119 | elif type_ == 'BAD_STRING' and next_pos == source_len: 120 | type_ = 'STRING' 121 | value = css_value[1:] # Remove quote 122 | value = newline_unescape(value) 123 | value = simple_unescape(value) 124 | value = unicode_unescape(value) 125 | else: 126 | value = css_value 127 | tokens.append(Token(type_, css_value, value, unit, line, column)) 128 | 129 | pos = next_pos 130 | newlines = list(find_newlines(css_value)) 131 | if newlines: 132 | line += len(newlines) 133 | # Add 1 to have lines start at column 1, not 0 134 | column = length - newlines[-1].end() + 1 135 | else: 136 | column += length 137 | return tokens 138 | 139 | 140 | def regroup(tokens): 141 | """ 142 | Match pairs of tokens: () [] {} function() 143 | (Strings in "" or '' are taken care of by the tokenizer.) 144 | 145 | Opening tokens are replaced by a :class:`ContainerToken`. 146 | Closing tokens are removed. Unmatched closing tokens are invalid 147 | but left as-is. All nested structures that are still open at 148 | the end of the stylesheet are implicitly closed. 149 | 150 | :param tokens: 151 | a *flat* iterable of tokens, as returned by :func:`tokenize_flat`. 152 | :return: 153 | A tree of tokens. 154 | 155 | """ 156 | # "global" objects for the inner recursion 157 | pairs = {'FUNCTION': ')', '(': ')', '[': ']', '{': '}'} 158 | tokens = iter(tokens) 159 | eof = [False] 160 | 161 | def _regroup_inner(stop_at=None, tokens=tokens, pairs=pairs, eof=eof, 162 | ContainerToken=token_data.ContainerToken, 163 | FunctionToken=token_data.FunctionToken): 164 | for token in tokens: 165 | type_ = token.type 166 | if type_ == stop_at: 167 | return 168 | 169 | end = pairs.get(type_) 170 | if end is None: 171 | yield token # Not a grouping token 172 | else: 173 | assert not isinstance(token, ContainerToken), ( 174 | 'Token looks already grouped: {0}'.format(token)) 175 | content = list(_regroup_inner(end)) 176 | if eof[0]: 177 | end = '' # Implicit end of structure at EOF. 178 | if type_ == 'FUNCTION': 179 | yield FunctionToken(token.type, token.as_css(), end, 180 | token.value, content, 181 | token.line, token.column) 182 | else: 183 | yield ContainerToken(token.type, token.as_css(), end, 184 | content, 185 | token.line, token.column) 186 | else: 187 | eof[0] = True # end of file/stylesheet 188 | return _regroup_inner() 189 | 190 | 191 | def tokenize_grouped(css_source, ignore_comments=True): 192 | """ 193 | :param css_source: 194 | CSS as an unicode string 195 | :param ignore_comments: 196 | if true (the default) comments will not be included in the 197 | return value 198 | :return: 199 | An iterator of :class:`Token` 200 | 201 | """ 202 | return regroup(tokenize_flat(css_source, ignore_comments)) 203 | 204 | 205 | # Optional Cython version of tokenize_flat 206 | # Make both versions available with explicit names for tests. 207 | python_tokenize_flat = tokenize_flat 208 | try: 209 | from . import speedups 210 | except ImportError: 211 | cython_tokenize_flat = None 212 | else: 213 | cython_tokenize_flat = speedups.tokenize_flat 214 | # Default to the Cython version if available 215 | tokenize_flat = cython_tokenize_flat 216 | -------------------------------------------------------------------------------- /tinycss/version.py: -------------------------------------------------------------------------------- 1 | VERSION = '0.4' 2 | --------------------------------------------------------------------------------